Upgrade liburing to liburing-2.8 am: 42074fb771 Original change: https://android-review.googlesource.com/c/platform/external/liburing/+/3408663 Change-Id: I817e53d9ea4d79c623040bf7e41bd70e67f674d5 Signed-off-by: Automerger Merge Worker <[email protected]>

commit: 68fd8799fc9254e9b15a9dba15b27c840a9d1b6d [log] [tgz]
author: Akilesh Kailash <[email protected]> Thu Dec 12 13:06:45 2024 -0800
committer: Automerger Merge Worker <[email protected]> Thu Dec 12 13:06:45 2024 -0800
tree: 42465b51c425a6342b3f12f8edbd239c827068f0
parent: cc78841c5b686103ee7fc42ba376cc399861a794 [diff]
parent: 42074fb7717ac6767cd1b8c1c7cbe3aa77411824 [diff]
diff --git a/.github/actions/codespell/stopwords b/.github/actions/codespell/stopwords
new file mode 100644
index 0000000..1e968cd
--- /dev/null
+++ b/.github/actions/codespell/stopwords

@@ -0,0 +1,7 @@
+bu
+cancelation
+cancelations
+cant
+pring
+sring
+wont

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 88192ff..d585831 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml

@@ -7,7 +7,7 @@
 
 jobs:
   build:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-24.04
 
     strategy:
       fail-fast: false
@@ -19,6 +19,15 @@
             cxx_pkg: g++-x86-64-linux-gnu
             cc: x86_64-linux-gnu-gcc
             cxx: x86_64-linux-gnu-g++
+            sanitize: 0
+
+          # x86-64 gcc asan
+          - arch: x86_64
+            cc_pkg: gcc-x86-64-linux-gnu
+            cxx_pkg: g++-x86-64-linux-gnu
+            cc: x86_64-linux-gnu-gcc
+            cxx: x86_64-linux-gnu-g++
+            sanitize: 1
 
           # x86-64 clang
           - arch: x86_64
@@ -26,6 +35,9 @@
             cxx_pkg: clang
             cc: clang
             cxx: clang++
+            liburing_extra_flags: -Wshorten-64-to-32
+            extra_flags: -Wmissing-prototypes -Wstrict-prototypes -Wunreachable-code-loop-increment -Wunreachable-code -Wmissing-variable-declarations -Wextra-semi-stmt
+            sanitize: 0
 
           # x86 (32-bit) gcc
           - arch: i686
@@ -33,6 +45,7 @@
             cxx_pkg: g++-i686-linux-gnu
             cc: i686-linux-gnu-gcc
             cxx: i686-linux-gnu-g++
+            sanitize: 0
 
           # aarch64 gcc
           - arch: aarch64
@@ -40,6 +53,7 @@
             cxx_pkg: g++-aarch64-linux-gnu
             cc: aarch64-linux-gnu-gcc
             cxx: aarch64-linux-gnu-g++
+            sanitize: 0
 
           # arm (32-bit) gcc
           - arch: arm
@@ -47,6 +61,15 @@
             cxx_pkg: g++-arm-linux-gnueabi
             cc: arm-linux-gnueabi-gcc
             cxx: arm-linux-gnueabi-g++
+            sanitize: 0
+
+          # riscv64
+          - arch: riscv64
+            cc_pkg: gcc-riscv64-linux-gnu
+            cxx_pkg: g++-riscv64-linux-gnu
+            cc: riscv64-linux-gnu-gcc
+            cxx: riscv64-linux-gnu-g++
+            sanitize: 0
 
           # powerpc64
           - arch: powerpc64
@@ -54,6 +77,7 @@
             cxx_pkg: g++-powerpc64-linux-gnu
             cc: powerpc64-linux-gnu-gcc
             cxx: powerpc64-linux-gnu-g++
+            sanitize: 0
 
           # powerpc
           - arch: powerpc
@@ -61,6 +85,7 @@
             cxx_pkg: g++-powerpc-linux-gnu
             cc: powerpc-linux-gnu-gcc
             cxx: powerpc-linux-gnu-g++
+            sanitize: 0
 
           # alpha
           - arch: alpha
@@ -68,6 +93,7 @@
             cxx_pkg: g++-alpha-linux-gnu
             cc: alpha-linux-gnu-gcc
             cxx: alpha-linux-gnu-g++
+            sanitize: 0
 
           # mips64
           - arch: mips64
@@ -75,6 +101,7 @@
             cxx_pkg: g++-mips64-linux-gnuabi64
             cc: mips64-linux-gnuabi64-gcc
             cxx: mips64-linux-gnuabi64-g++
+            sanitize: 0
 
           # mips
           - arch: mips
@@ -82,21 +109,35 @@
             cxx_pkg: g++-mips-linux-gnu
             cc: mips-linux-gnu-gcc
             cxx: mips-linux-gnu-g++
+            sanitize: 0
+
+          # hppa
+          - arch: hppa
+            cc_pkg: gcc-hppa-linux-gnu
+            cxx_pkg: g++-hppa-linux-gnu
+            cc: hppa-linux-gnu-gcc
+            cxx: hppa-linux-gnu-g++
+            sanitize: 0
 
     env:
-      FLAGS: -g -O2 -Wall -Wextra -Werror
+      FLAGS: -g -O3 -Wall -Wextra -Werror -Wno-sign-compare ${{matrix.extra_flags}}
+      SANITIZE: ${{matrix.sanitize}}
+
+      # Flags for building sources in src/ dir only.
+      LIBURING_CFLAGS: ${{matrix.liburing_extra_flags}}
 
     steps:
     - name: Checkout source
-      uses: actions/checkout@v2
+      uses: actions/checkout@v4
 
     - name: Install Compilers
       run: |
         if [[ "${{matrix.cc_pkg}}" == "clang" ]]; then \
             wget https://apt.llvm.org/llvm.sh -O /tmp/llvm.sh; \
-            sudo bash /tmp/llvm.sh 15; \
-            sudo update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 400; \
-            sudo update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 400; \
+            sudo apt-get purge --auto-remove llvm python3-lldb-14 llvm-14 -y; \
+            sudo bash /tmp/llvm.sh 17; \
+            sudo update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-17 400; \
+            sudo update-alternatives --install /usr/bin/clang clang /usr/bin/clang-17 400; \
         else \
             sudo apt-get update -y; \
             sudo apt-get install -y ${{matrix.cc_pkg}} ${{matrix.cxx_pkg}}; \
@@ -108,19 +149,16 @@
         ${{matrix.cxx}} --version;
 
     - name: Build
+      if: ${{matrix.sanitizer == '0'}}
       run: |
         ./configure --cc=${{matrix.cc}} --cxx=${{matrix.cxx}};
         make -j$(nproc) V=1 CPPFLAGS="-Werror" CFLAGS="$FLAGS" CXXFLAGS="$FLAGS";
 
-    - name: Build nolibc
+    - name: Build
+      if: ${{matrix.sanitizer == '1'}}
       run: |
-        if [[ "${{matrix.arch}}" == "x86_64" || "${{matrix.arch}}" == "i686" ]]; then \
-            make clean; \
-            ./configure --cc=${{matrix.cc}} --cxx=${{matrix.cxx}} --nolibc; \
-            make -j$(nproc) V=1 CPPFLAGS="-Werror" CFLAGS="$FLAGS" CXXFLAGS="$FLAGS"; \
-        else \
-            echo "Skipping nolibc build, this arch doesn't support building liburing without libc"; \
-        fi;
+        ./configure --cc=${{matrix.cc}} --cxx=${{matrix.cxx}} --enable-sanitizer;
+        make -j$(nproc) V=1 CPPFLAGS="-Werror" CFLAGS="$FLAGS" CXXFLAGS="$FLAGS";
 
     - name: Test install command
       run: |

diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml
new file mode 100644
index 0000000..11209e4
--- /dev/null
+++ b/.github/workflows/codespell.yml

@@ -0,0 +1,25 @@
+name: Codespell
+
+on:
+  # Trigger the workflow on push or pull requests.
+  push:
+  pull_request:
+
+jobs:
+  test:
+    runs-on: ubuntu-24.04
+
+    steps:
+    - name: Checkout source
+      uses: actions/checkout@v4
+
+    - name: Install codespell
+      run: |
+        sudo apt-get update -y
+        sudo apt-get install -y codespell
+
+    - name: Display codespell version
+      run: codespell --version
+
+    - name: Execute codespell
+      run: codespell --ignore-words=.github/actions/codespell/stopwords .

diff --git a/.github/workflows/shellcheck.yml b/.github/workflows/shellcheck.yml
index 8873f0b..b97e9d1 100644
--- a/.github/workflows/shellcheck.yml
+++ b/.github/workflows/shellcheck.yml

@@ -7,11 +7,11 @@
 
 jobs:
   test:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
 
     steps:
     - name: Checkout source
-      uses: actions/checkout@v2
+      uses: actions/checkout@v4
 
     - name: Display shellcheck version
       run: shellcheck --version

diff --git a/.gitignore b/.gitignore
index b5acffd..8edb52d 100644
--- a/.gitignore
+++ b/.gitignore

@@ -9,12 +9,24 @@
 
 /src/liburing.a
 /src/liburing.so*
+/src/liburing-ffi.a
+/src/liburing-ffi.so*
 /src/include/liburing/compat.h
+/src/include/liburing/io_uring_version.h
 
+/examples/io_uring-close-test
 /examples/io_uring-cp
 /examples/io_uring-test
+/examples/io_uring-udp
 /examples/link-cp
+/examples/napi-busy-poll-client
+/examples/napi-busy-poll-server
 /examples/ucontext-cp
+/examples/poll-bench
+/examples/proxy
+/examples/send-zerocopy
+/examples/rsrc-update-bench
+/examples/kdigest
 
 /test/*.t
 /test/*.dmesg
@@ -25,5 +37,6 @@
 config.log
 
 liburing.pc
+liburing-ffi.pc
 
 cscope.out

diff --git a/CHANGELOG b/CHANGELOG
index 01cb677..ca1056e 100644
--- a/CHANGELOG
+++ b/CHANGELOG

@@ -1,3 +1,101 @@
+liburing-2.8 release
+- Add support for incrementally/partially consumed provided buffers,
+  usable with the provided buffer ring support.
+- Add support for foo_and_wait_min_timeout(), where it's possible to
+  define a minimum timeout for waiting to get batches of completions,
+  but if that fails, extend for a longer timeout without having any
+  extra context switches.
+- Add support for using different clock sources for completion waiting.
+- Great increase coverage of test cases, test case improvements and
+  fixes.
+- Man page updates
+- Don't leak _GNU_SOURCE via pkb-config --cflags
+- Support for address sanitizer
+- Add examples/kdigest sample program
+- Add discard helper, test, and man page
+
+liburing-2.7 release
+
+- Man page updates
+- Sync with kernel 6.10
+	- send/recv bundle support
+	- accept nowait and CQE_F_MORE
+- Add and update test cases
+- Fix io_uring_queue_init_mem() returning a value that was too small,
+  potentially causing memory corruption in userspace by overwriting
+  64 bytes beyond the returned value. Also add test case for that.
+- Add 64-bit length variants of io_uring_prep_{m,f}advise()
+- Add BIND/LISTEN support and helpers / man pages
+- Add io_uring_enable_rings.3 man page
+- Fix bug in io_uring_prep_read_multishot()
+- Fixup bundle test cases
+- Add fixed-hugepage test case
+- Fix io_uring_prep_fixed_fd_install.3 man page
+- Note 'len' == 0 requirement in io_uring_prep_send.3 man page
+- Fix some test cases for skipping on older kernels
+
+liburing-2.6 release
+
+- Add getsockopt and setsockopt socket commands
+- Add test cases to test/hardlink
+- Man page fixes
+- Add futex support, and test cases
+- Add waitid support, and test cases
+- Add read multishot, and test cases
+- Add support for IORING_SETUP_NO_SQARRAY
+- Use IORING_SETUP_NO_SQARRAY as the default
+- Add support for IORING_OP_FIXED_FD_INSTALL
+- Add io_uring_prep_fixed_fd_install() helper
+- Support for napi busy polling
+- Improve/add test cases
+- Man page fixes
+- Add sample 'proxy' example
+
+liburing-2.5 release
+
+- Add support for io_uring_prep_cmd_sock()
+- Add support for application allocated ring memory, for placing rings
+  in huge mem. Available through io_uring_queue_init_mem().
+- Add support for registered ring fds
+- Various documentation updates
+- Various fixes
+
+liburing-2.4 release
+
+- Add io_uring_{major,minor,check}_version() functions.
+- Add IO_URING_{MAJOR,MINOR,CHECK}_VERSION() macros.
+- FFI support (for non-C/C++ languages integration).
+- Add io_uring_prep_msg_ring_cqe_flags() function.
+- Deprecate --nolibc configure option.
+- CONFIG_NOLIBC is always enabled on x86-64, x86, and aarch64.
+- Add support for IORING_REGISTER_USE_REGISTERED_RING and use if available.
+- Add io_uring_close_ring_fd() function.
+- Add io_uring_prep_msg_ring_fd_alloc function.
+- Add io_uring_free_buf_ring() and io_uring_setup_buf_ring() functions.
+- Ensure that io_uring_prep_accept_direct(), io_uring_prep_openat_direct(),
+  io_uring_prep_openat2_direct(), io_uring_prep_msg_ring_fd(), and
+  io_uring_prep_socket_direct() factor in being called with
+  IORING_FILE_INDEX_ALLOC for allocating a direct descriptor.
+- Add io_uring_prep_sendto() function.
+- Add io_uring_prep_cmd_sock() function.
+
+liburing-2.3 release
+
+- Support non-libc build for aarch64.
+- Add io_uring_{enter,enter2,register,setup} syscall functions.
+- Add sync cancel interface, io_uring_register_sync_cancel().
+- Fix return value of io_uring_submit_and_wait_timeout() to match the
+  man page.
+- Improvements to the regression tests
+- Add support and test case for passthrough IO
+- Add recv and recvmsg multishot helpers and support
+- Add documentation and support for IORING_SETUP_DEFER_TASKRUN
+- Fix potential missing kernel entry with IORING_SETUP_IOPOLL
+- Add support and documentation for zero-copy network transmit
+- Various optimizations
+- Many cleanups
+- Many man page additions and updates
+
 liburing-2.2 release
 
 - Support non-libc builds.
@@ -6,7 +104,7 @@
 - Add support for multishot accept.
 - io_uring_register_files() will set RLIMIT_NOFILE if necessary.
 - Add support for registered ring fds, io_uring_register_ring_fd(),
-  reducingthe overhead of an io_uring_enter() system call.
+  reducing the overhead of an io_uring_enter() system call.
 - Add support for the message ring opcode.
 - Add support for newer request cancelation features.
 - Add support for IORING_SETUP_COOP_TASKRUN, which can help reduce the

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..8bbce78
--- /dev/null
+++ b/CONTRIBUTING.md

@@ -0,0 +1,165 @@
+Introduction
+============
+
+liburing welcomes contributions, whether they be bug fixes, features, or
+documentation additions/updates. However, we do have some rules in place
+to govern the sanity of the project, and all contributions should follow
+the guidelines in this document. The main reasons for the rules are:
+
+1) Keep the code consistent
+2) Keep the git repository consistent
+3) Maintain bisectability
+
+Coding style
+============
+
+Generally, all the code in liburing should follow the same style. A few
+known exceptions exist, like syzbot test cases that got committed rather
+than re-writing them in a saner format. Any change you make, please
+follow the style of the code around you.
+
+Commit format
+=============
+
+Each commit should do one thing, and one thing only. If you find yourself,
+in the commit message, adding phrases like "Also do [...]" or "While in
+here [...]", then that's a sign that the change should have been split
+into multiple commits. If your change includes some refactoring of code to
+make your change possible, then that refactoring should be a separate
+commit, done first. That means this preparatory commit won't have any
+functional changes, and hence should be a no-op. It also means that your
+main commit, with the change that you actually care about, will be smaller
+and easier to review.
+
+Each commit must stand on its own in terms of what it provides, and how it
+works. Lots of changes are just a single commit, but for something a bit
+more involved, it's not uncommon to have a pull request contain multiple
+commits. Make each commit as simple as possible, and not any simpler. We'd
+much rather see 10 simple commits than 2 more complicated ones. If you
+stumble across something that needs fixing while making an unrelated
+change, then please make that change as a separate commit, explaining why
+it's being made.
+
+Each commit in a series must be buildable, it's not enough that the end
+result is buildable. See reason 3 in the introduction for why that's the
+case.
+
+No fixup commits! Sometimes people post a change and errors are pointed
+out in the commit, and the author then does a followup fix for that
+error. This isn't acceptable, please squash fixup commits into the
+commit that introduced the problem in the first place. This is done by
+amending the fix into the original commit that caused the issue. You can
+do that with git rebase -i <sha> and arrange the commit order such that
+the fixup is right after the original commit, and then use 's' (for
+squash) to squash the fixup into the original commit. Don't forget to
+edit the commit message while doing that, as git will combine the two
+commit messages into one. Or you can do it manually. Once done, force
+push your rewritten git history. See reasons 1-3 in the introduction
+series for why that is.
+
+Commit message
+==============
+
+A good commit explains the WHY of a commit - explain the reason for this
+commit to exist. Don't explain what the code in commit does, that should
+be readily apparent from just reading the code. If that isn't the case,
+then a comment in the code is going to be more useful than a lengthy
+explanation in the commit message. liburing commits use the following
+format:
+
+Title
+
+Body of commit
+
+Signed-off-by: ```My Identity <[email protected]>```
+
+That is, a descriptive title on the first line, then an empty line, then
+the body of the commit message, then an empty line, and finally an SOB
+tag. The signed-off-by exists to provide proof of origin, see the
+[DCO](https://developercertificate.org/).
+
+Example commit:
+
+```
+commit 0fe5c09195c0918f89582dd6ff098a58a0bdf62a
+Author: Jens Axboe <[email protected]>
+Date:   Fri Sep 6 15:54:04 2024 -0600
+
+    configure: fix ublk_cmd header check
+    
+    The previous commit is mixing private structures and defines with public
+    uapi ones. Testing for UBLK_U_CMD_START_DEV is fine, CTRL_CMD_HAS_DATA
+    is not. And struct ublk_ctrl_cmd_data is not a public struct.
+    
+    Fixes: 83bc535a3118 ("configure: don't enable ublk if modern commands not available")
+    Signed-off-by: Jens Axboe <[email protected]>
+```
+
+Since this change is pretty trivial, a huge explanation need not be given
+as to the reasonings for the change. However, for more complicated
+changes, better reasonings should be given.
+
+A Fixes line can be added if this commit fixes an issue in a previous
+commit. That kind of meta data can be useful down the line for finding
+dependencies between commits. Adding the following to your .gitconfig:
+
+```
+[pretty]
+	fixes = Fixes: %h (\"%s\")
+```
+
+and running ```git fixes <sha>``` will then generate the correctly
+formatted Fixes line for the commit. Likewise, other meta data can be:
+
+Link: https://somesite/somewhere
+
+can be useful to link to a discussion around the issue that led to this
+commit, perhaps a bug report. This can be a GitHub issue as well. If a
+commit closes/solves a GitHub issue, than:
+
+Closes: https://github.com/axboe/liburing/issues/XXXX
+
+can also be used.
+
+Each commit message should be formatted so each full line is 72-74 chars
+wide. For many of us, GitHub is not the primary location, and git log is
+often used in a terminal to browse the repo. Breaking lines at 72-74
+characters retains readability in an xterm/terminal.
+
+Pull Requests
+=============
+
+The git repository itself is the canonical location for information. It's
+quite fine to provide a lengthy explanation for a pull request on GitHub,
+however please ensure that this doesn't come at the expense of the commit
+messages themselves being lacking. The commit messages should stand on
+their own and contain everything that you'd otherwise put in the PR
+message. If you've worked on projects that send patches before, consider
+the PR message similar to the cover letter for a series of patches.
+
+Most contributors seem to use GH for sending patches, which is fine. If
+you prefer using email, then patches can also be sent to the io_uring
+mailing list: [email protected].
+
+liburing doesn't squash/rebase-on-merge, or other heinous practices
+sometimes seen elsewhere. Whatever sha your commit has in your tree is
+what it'll have in the upstream tree. Patches are applied directly, and
+pull requests are merged with a merge commit. If meta data needs to go
+into the merge commit, then it will go into the merge commit message.
+This means that you don't need to continually rebase your changes on top
+of the master branch.
+
+Testing changes
+===============
+
+You should ALWAYS test your changes, no matter how trivial or obviously
+correct they may seem. Nobody is infallible, and making mistakes is only
+human.
+
+liburing contains a wide variety of functional tests. If you make changes
+to liburing, then you should run the test cases. This is done by building
+the repo and running ```make runtests```. Note that some of the liburing
+tests test for defects in older kernels, and hence it's possible that they
+will crash on an outdated kernel that doesn't contain fixes from the
+stable kernel tree. If in doubt, building and running the tests in a vm is
+encouraged.

diff --git a/METADATA b/METADATA
index e5962aa..4e158f0 100644
--- a/METADATA
+++ b/METADATA

@@ -1,17 +1,20 @@
-name: "liburing"
-description:
-    "io_uring userspace library"
+# This project was upgraded with external_updater.
+# Usage: tools/external_updater/updater.sh update external/liburing
+# For more info, check https://cs.android.com/android/platform/superproject/main/+/main:tools/external_updater/README.md
 
+name: "liburing"
+description: "io_uring userspace library"
 third_party {
-  url {
-    type: HOMEPAGE
-    value: "https://github.com/axboe/liburing"
-  }
-  url {
-    type: GIT
-    value: "https://github.com/axboe/liburing"
-  }
-  version: "2.2"
-  last_upgrade_date { year: 2022 month: 10 day: 28 }
   license_type: NOTICE
+  last_upgrade_date {
+    year: 2024
+    month: 12
+    day: 11
+  }
+  homepage: "https://github.com/axboe/liburing"
+  identifier {
+    type: "Git"
+    value: "https://github.com/axboe/liburing"
+    version: "liburing-2.8"
+  }
 }

diff --git a/Makefile b/Makefile
index 686be4f..6a2d63b 100644
--- a/Makefile
+++ b/Makefile

@@ -11,11 +11,11 @@
 	@$(MAKE) -C test
 	@$(MAKE) -C examples
 
-.PHONY: all install default clean test
-.PHONY: FORCE cscope
+library:
+	@$(MAKE) -C src
 
-partcheck: all
-	@echo "make partcheck => TODO add tests with out kernel support"
+.PHONY: all install default clean test library
+.PHONY: FORCE cscope
 
 runtests: all
 	@$(MAKE) -C test runtests
@@ -25,7 +25,7 @@
 	@$(MAKE) -C test runtests-parallel
 
 config-host.mak: configure
-	@if [ ! -e "$@" ]; then					\
+	+@if [ ! -e "$@" ]; then				\
 	  echo "Running configure ...";				\
 	  ./configure;						\
 	else							\
@@ -45,13 +45,14 @@
 	    -e "s%@VERSION@%$(VERSION)%g" \
 	    $< >$@
 
-install: $(NAME).pc
+install: $(NAME).pc $(NAME)-ffi.pc
 	@$(MAKE) -C src install prefix=$(DESTDIR)$(prefix) \
 		includedir=$(DESTDIR)$(includedir) \
 		libdir=$(DESTDIR)$(libdir) \
 		libdevdir=$(DESTDIR)$(libdevdir) \
 		relativelibdir=$(relativelibdir)
 	$(INSTALL) -D -m 644 $(NAME).pc $(DESTDIR)$(libdevdir)/pkgconfig/$(NAME).pc
+	$(INSTALL) -D -m 644 $(NAME)-ffi.pc $(DESTDIR)$(libdevdir)/pkgconfig/$(NAME)-ffi.pc
 	$(INSTALL) -m 755 -d $(DESTDIR)$(mandir)/man2
 	$(INSTALL) -m 644 man/*.2 $(DESTDIR)$(mandir)/man2
 	$(INSTALL) -m 755 -d $(DESTDIR)$(mandir)/man3
@@ -59,11 +60,22 @@
 	$(INSTALL) -m 755 -d $(DESTDIR)$(mandir)/man7
 	$(INSTALL) -m 644 man/*.7 $(DESTDIR)$(mandir)/man7
 
+uninstall:
+	@$(MAKE) -C src uninstall prefix=$(DESTDIR)$(prefix) datadir=$(DESTDIR)$(datadir)
+	@rm -f $(DESTDIR)$(libdevdir)/pkgconfig/$(NAME).pc
+	@rm -f $(DESTDIR)$(libdevdir)/pkgconfig/$(NAME)-ffi.pc
+	@rm -rf $(DESTDIR)$(mandir)/man2/io_uring*.2
+	@rm -rf $(DESTDIR)$(mandir)/man3/io_uring*.3
+	@rm -rf $(DESTDIR)$(mandir)/man7/io_uring*.7
+
 install-tests:
 	@$(MAKE) -C test install prefix=$(DESTDIR)$(prefix) datadir=$(DESTDIR)$(datadir)
 
+uninstall-tests:
+	@$(MAKE) -C test uninstall prefix=$(DESTDIR)$(prefix) datadir=$(DESTDIR)$(datadir)
+
 clean:
-	@rm -f config-host.mak config-host.h cscope.out $(NAME).pc test/*.dmesg
+	@rm -f config-host.mak config-host.h cscope.out $(NAME).pc $(NAME)-ffi.pc test/*.dmesg
 	@$(MAKE) -C src clean
 	@$(MAKE) -C test clean
 	@$(MAKE) -C examples clean

diff --git a/Makefile.common b/Makefile.common
index 27fc233..ea75c34 100644
--- a/Makefile.common
+++ b/Makefile.common

@@ -3,4 +3,5 @@
 SPECFILE=$(TOP)/$(NAME).spec
 VERSION=$(shell awk '/Version:/ { print $$2 }' $(SPECFILE))
 VERSION_MAJOR=$(shell echo $(VERSION) | cut -d. -f1)
+VERSION_MINOR=$(shell echo $(VERSION) | cut -d. -f2)
 TAG = $(NAME)-$(VERSION)

diff --git a/README b/README
index 80d2b3d..3e41a8f 100644
--- a/README
+++ b/README

@@ -47,6 +47,54 @@
 pass on older kernels, and may even crash or hang older kernels!
 
 
+Building liburing
+-----------------
+
+    #
+    # Prepare build config (optional).
+    #
+    #  --cc  specifies the C   compiler.
+    #  --cxx specifies the C++ compiler.
+    #
+    ./configure --cc=gcc --cxx=g++;
+
+    #
+    # Build liburing.
+    #
+    make -j$(nproc);
+
+    #
+    # Install liburing (headers, shared/static libs, and manpage).
+    #
+    sudo make install;
+
+See './configure --help' for more information about build config options.
+
+
+FFI support
+-----------
+
+By default, the build results in 4 lib files:
+
+    2 shared libs:
+
+        liburing.so
+        liburing-ffi.so
+
+    2 static libs:
+
+        liburing.a
+        liburing-ffi.a
+
+Languages and applications that can't use 'static inline' functions in
+liburing.h should use the FFI variants.
+
+liburing's main public interface lives in liburing.h as 'static inline'
+functions. Users wishing to consume liburing purely as a binary dependency
+should link against liburing-ffi. It contains definitions for every 'static
+inline' function.
+
+
 License
 -------
 

diff --git a/configure b/configure
index 2c2441b..4794306 100755
--- a/configure
+++ b/configure

@@ -10,7 +10,7 @@
   case "$opt" in
   --help|-h) show_help=yes
   ;;
-  --prefix=*) prefix="$optarg"
+  --prefix=*) prefix="$(realpath -s $optarg)"
   ;;
   --includedir=*) includedir="$optarg"
   ;;
@@ -26,7 +26,9 @@
   ;;
   --cxx=*) cxx="$optarg"
   ;;
-  --nolibc) liburing_nolibc="yes"
+  --use-libc) use_libc=yes
+  ;;
+  --enable-sanitizer) use_sanitizer=yes
   ;;
   *)
     echo "ERROR: unknown option $opt"
@@ -75,7 +77,8 @@
   --datadir=PATH           install shared data in PATH [$datadir]
   --cc=CMD                 use CMD as the C compiler
   --cxx=CMD                use CMD as the C++ compiler
-  --nolibc                 build liburing without libc
+  --use-libc               use libc for liburing (useful for hardening)
+  --enable-sanitizer       compile liburing with the address and undefined behaviour sanitizers. (useful for debugging)
 EOF
 exit 0
 fi
@@ -115,7 +118,7 @@
 }
 
 # Default CFLAGS
-CFLAGS="-D_GNU_SOURCE -include config-host.h"
+CFLAGS="-D_GNU_SOURCE -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 -include config-host.h"
 BUILD_CFLAGS=""
 
 # Print configure header at the top of $config_host_h
@@ -202,6 +205,15 @@
 print_and_output_mak "mandir" "$mandir"
 print_and_output_mak "datadir" "$datadir"
 
+####################################################
+# Check for correct compiler runtime library to link with
+libgcc_link_flag="-lgcc"
+if $cc -print-libgcc-file-name >/dev/null 2>&1; then
+  libgcc_link_flag="$($cc $CFLAGS $LDFLAGS -print-libgcc-file-name)"
+fi
+print_and_output_mak "libgcc_link_flag" "$libgcc_link_flag"
+####################################################
+
 ##########################################
 # check for compiler -Wstringop-overflow
 stringop_overflow="no"
@@ -368,30 +380,138 @@
 print_config "has_ucontext" "$has_ucontext"
 
 ##########################################
-# check for memfd_create(2)
-has_memfd_create="no"
+# Check NVME_URING_CMD support
+nvme_uring_cmd="no"
 cat > $TMPC << EOF
-#include <sys/mman.h>
-int main(int argc, char **argv)
+#include <linux/nvme_ioctl.h>
+int main(void)
 {
-  int memfd = memfd_create("test", 0);
+  struct nvme_uring_cmd *cmd;
+
+  return sizeof(struct nvme_uring_cmd);
+}
+EOF
+if compile_prog "" "" "nvme uring cmd"; then
+  nvme_uring_cmd="yes"
+fi
+print_config "NVMe uring command support" "$nvme_uring_cmd"
+
+##########################################
+# Check futexv support
+futexv="no"
+cat > $TMPC << EOF
+#include <linux/futex.h>
+#include <unistd.h>
+#include <string.h>
+int main(void)
+{
+  struct futex_waitv fw;
+
+  memset(&fw, FUTEX_32, sizeof(fw));
+
+  return sizeof(struct futex_waitv);
+}
+EOF
+if compile_prog "" "" "futexv"; then
+  futexv="yes"
+fi
+print_config "futex waitv support" "$futexv"
+
+##########################################
+# Check block discard cmd support
+discard_cmd="no"
+cat > $TMPC << EOF
+#include <linux/blkdev.h>
+int main(void)
+{
+  return BLOCK_URING_CMD_DISCARD;
+}
+EOF
+if compile_prog "" "" "discard command"; then
+  discard_cmd="yes"
+fi
+print_config "io_uring discard command support" "$discard_cmd"
+
+##########################################
+# Check idtype_t support
+has_idtype_t="no"
+cat > $TMPC << EOF
+#include <sys/wait.h>
+int main(void)
+{
+  idtype_t v;
   return 0;
 }
 EOF
-if compile_prog "-Werror=implicit-function-declaration" "" "has_memfd_create"; then
-  has_memfd_create="yes"
+if compile_prog "" "" "idtype_t"; then
+  has_idtype_t="yes"
 fi
-print_config "has_memfd_create" "$has_memfd_create"
-
+print_config "has_idtype_t" "$has_idtype_t"
 
 #############################################################################
+liburing_nolibc="no"
+if test "$use_libc" != "yes"; then
+
+  #
+  # Currently, CONFIG_NOLIBC only supports x86-64, x86 (32-bit), aarch64 and riscv64.
+  #
+  cat > $TMPC << EOF
+int main(void){
+#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || (defined(__riscv) && __riscv_xlen == 64)
+  return 0;
+#else
+#error libc is needed
+#endif
+}
+EOF
+
+  if compile_prog "" "" "nolibc"; then
+    liburing_nolibc="yes"
+  fi
+fi
+
+print_config "nolibc" "$liburing_nolibc";
+#############################################################################
+
+####################################################
+# Most Android devices don't have sys/fanotify.h
+has_fanotify="no"
+cat > $TMPC << EOF
+#include <sys/fanotify.h>
+int main(void)
+{
+  return 0;
+}
+EOF
+if compile_prog "" "" "fanotify"; then
+  has_fanotify="yes"
+fi
+print_config "has_fanotify" "$has_fanotify"
+####################################################
+
+##########################################
+# check for ublk headers
+ublk_header="no"
+cat > $TMPC << EOF
+#include <string.h>
+#include <sys/ioctl.h>
+#include <linux/ublk_cmd.h>
+int main(int argc, char **argv)
+{
+  struct ublksrv_ctrl_cmd cmd = { };
+
+  cmd.addr = UBLK_U_CMD_START_DEV;
+  return cmd.queue_id;
+}
+EOF
+if compile_prog "" "" "ublk_header"; then
+  ublk_header="yes"
+fi
+print_config "ublk_header" "$ublk_header"
+
 if test "$liburing_nolibc" = "yes"; then
   output_sym "CONFIG_NOLIBC"
-else
-  liburing_nolibc="no"
 fi
-print_config "liburing_nolibc" "$liburing_nolibc"
-
 if test "$__kernel_rwf_t" = "yes"; then
   output_sym "CONFIG_HAVE_KERNEL_RWF_T"
 fi
@@ -419,8 +539,23 @@
 if test "$array_bounds" = "yes"; then
   output_sym "CONFIG_HAVE_ARRAY_BOUNDS"
 fi
-if test "$has_memfd_create" = "yes"; then
-  output_sym "CONFIG_HAVE_MEMFD_CREATE"
+if test "$nvme_uring_cmd" = "yes"; then
+  output_sym "CONFIG_HAVE_NVME_URING"
+fi
+if test "$has_fanotify" = "yes"; then
+  output_sym "CONFIG_HAVE_FANOTIFY"
+fi
+if test "$futexv" = "yes"; then
+  output_sym "CONFIG_HAVE_FUTEXV"
+fi
+if test "$ublk_header" = "yes"; then
+  output_sym "CONFIG_HAVE_UBLK_HEADER"
+fi
+if test "$use_sanitizer" = "yes"; then
+  output_sym "CONFIG_USE_SANITIZER"
+  print_config "use sanitizer" "yes"
+else
+  print_config "use sanitizer" "no"
 fi
 
 echo "CC=$cc" >> $config_host_mak
@@ -428,6 +563,24 @@
 echo "CXX=$cxx" >> $config_host_mak
 print_config "CXX" "$cxx"
 
+# generate io_uring_version.h
+# Reset MAKEFLAGS
+MAKEFLAGS=
+MAKE_PRINT_VARS="include Makefile.common\nprint-%%: ; @echo \$(\$*)\n"
+VERSION_MAJOR=$(printf "$MAKE_PRINT_VARS" | make -s --no-print-directory -f - print-VERSION_MAJOR)
+VERSION_MINOR=$(printf "$MAKE_PRINT_VARS" | make -s --no-print-directory -f - print-VERSION_MINOR)
+io_uring_version_h="src/include/liburing/io_uring_version.h"
+cat > $io_uring_version_h << EOF
+/* SPDX-License-Identifier: MIT */
+#ifndef LIBURING_VERSION_H
+#define LIBURING_VERSION_H
+
+#define IO_URING_VERSION_MAJOR $VERSION_MAJOR
+#define IO_URING_VERSION_MINOR $VERSION_MINOR
+
+#endif
+EOF
+
 # generate compat.h
 compat_h="src/include/liburing/compat.h"
 cat > $compat_h << EOF
@@ -452,10 +605,15 @@
 	long long	tv_nsec;
 };
 
+/* <linux/time_types.h> is not available, so it can't be included */
+#define UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H 1
+
 EOF
 else
 cat >> $compat_h << EOF
 #include <linux/time_types.h>
+/* <linux/time_types.h> is included above and not needed again */
+#define UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H 1
 
 EOF
 fi
@@ -481,6 +639,49 @@
 
 EOF
 fi
+if test "$futexv" != "yes"; then
+cat >> $compat_h << EOF
+#include <inttypes.h>
+
+#define FUTEX_32	2
+#define FUTEX_WAITV_MAX	128
+
+struct futex_waitv {
+	uint64_t	val;
+	uint64_t	uaddr;
+	uint32_t	flags;
+	uint32_t	__reserved;
+};
+
+EOF
+fi
+
+if test "$has_idtype_t" != "yes"; then
+cat >> $compat_h << EOF
+typedef enum
+{
+  P_ALL,		/* Wait for any child.  */
+  P_PID,		/* Wait for specified process.  */
+  P_PGID		/* Wait for members of process group.  */
+} idtype_t;
+EOF
+fi
+
+if test "$discard_cmd" != "yes"; then
+cat >> $compat_h << EOF
+
+#include <linux/ioctl.h>
+
+#ifndef BLOCK_URING_CMD_DISCARD
+#define BLOCK_URING_CMD_DISCARD                        _IO(0x12, 0)
+#endif
+
+EOF
+else cat >> $compat_h << EOF
+#include <linux/blkdev.h>
+
+EOF
+fi
 
 cat >> $compat_h << EOF
 #endif

diff --git a/debian/changelog b/debian/changelog
index f0032e3..4c06b4f 100644
--- a/debian/changelog
+++ b/debian/changelog

@@ -1,3 +1,14 @@
+liburing (2.2-1) stable; urgency=low
+
+  * Update to 2.2
+  * Bump up so version to 2
+  * Drop liburing1-udeb
+  * Package using dh instead of using dh_* helpers manually
+  * Add linux header dependency to liburing-dev
+  * Bump up debhelper-compact level to 13
+
+ -- Kefu Chai <[email protected]>  Sun, 16 Oct 2022 16:30:48 +0800
+
 liburing (0.7-1) stable; urgency=low
 
   * Update to 0.7

diff --git a/debian/compat b/debian/compat
deleted file mode 100644
index ec63514..0000000
--- a/debian/compat
+++ /dev/null

@@ -1 +0,0 @@
-9

diff --git a/debian/control b/debian/control
index 831a314..e79ff11 100644
--- a/debian/control
+++ b/debian/control

@@ -2,13 +2,14 @@
 Section: libs
 Priority: optional
 Maintainer: Liu Changcheng <[email protected]>
-Build-Depends: debhelper (>=9)
+Build-Depends:
+  debhelper-compat (= 13)
 Standards-Version: 4.1.4
 Homepage: https://git.kernel.dk/cgit/liburing/tree/README
 Vcs-Git: https://git.kernel.dk/liburing
 Vcs-Browser: https://git.kernel.dk/cgit/liburing/
 
-Package: liburing1
+Package: liburing2
 Architecture: linux-any
 Multi-Arch: same
 Pre-Depends: ${misc:Pre-Depends}
@@ -16,33 +17,23 @@
 Description: userspace library for using io_uring
  io_uring is kernel feature to improve development
  The newese Linux IO interface, io_uring could improve
- system performance a lot. liburing is the userpace
+ system performance a lot. liburing is the userspace
  library to use io_uring feature.
  .
  This package contains the shared library.
 
-Package: liburing1-udeb
-Package-Type: udeb
-Section: debian-installer
-Architecture: linux-any
-Depends: ${misc:Depends}, ${shlibs:Depends},
-Description: userspace library for using io_uring
- io_uring is kernel feature to improve development
- The newese Linux IO interface, io_uring could improve
- system performance a lot. liburing is the userpace
- library to use io_uring feature.
- .
- This package contains the udeb shared library.
-
 Package: liburing-dev
 Section: libdevel
 Architecture: linux-any
 Multi-Arch: same
-Depends: ${misc:Depends}, liburing1 (= ${binary:Version}),
+Depends:
+  ${misc:Depends},
+  liburing2 (= ${binary:Version}),
+  linux-libc-dev (>= 5.1)
 Description: userspace library for using io_uring
  io_uring is kernel feature to improve development
  The newese Linux IO interface, io_uring could improve
- system performance a lot. liburing is the userpace
+ system performance a lot. liburing is the userspace
  library to use io_uring feature.
  .
  This package contains the static library and the header files.

diff --git a/debian/liburing-dev.manpages b/debian/liburing-dev.manpages
index fbbee23..ff885fd 100644
--- a/debian/liburing-dev.manpages
+++ b/debian/liburing-dev.manpages

@@ -1,6 +1,5 @@
-man/io_uring_setup.2
-man/io_uring_enter.2
-man/io_uring_register.2
-man/io_uring_queue_exit.3
-man/io_uring_queue_init.3
-man/io_uring_get_sqe.3
+usr/share/man/man2/io_uring_*.2
+usr/share/man/man3/io_uring_*.3
+usr/share/man/man7/io_uring.7
+usr/share/man/man3/IO_URING_*.3
+usr/share/man/man3/__io_uring_*.3

diff --git a/debian/liburing1-udeb.install b/debian/liburing1-udeb.install
deleted file mode 100644
index 622f9ef..0000000
--- a/debian/liburing1-udeb.install
+++ /dev/null

@@ -1 +0,0 @@
-lib/*/lib*.so.*

diff --git a/debian/liburing1.install b/debian/liburing1.install
deleted file mode 100644
index 622f9ef..0000000
--- a/debian/liburing1.install
+++ /dev/null

@@ -1 +0,0 @@
-lib/*/lib*.so.*

diff --git a/debian/liburing1.symbols b/debian/liburing1.symbols
deleted file mode 100644
index 29109f2..0000000
--- a/debian/liburing1.symbols
+++ /dev/null

@@ -1,32 +0,0 @@
-liburing.so.1 liburing1 #MINVER#
- (symver)LIBURING_0.1 0.1-1
- io_uring_get_sqe@LIBURING_0.1 0.1-1
- io_uring_queue_exit@LIBURING_0.1 0.1-1
- io_uring_queue_init@LIBURING_0.1 0.1-1
- io_uring_queue_mmap@LIBURING_0.1 0.1-1
- io_uring_register_buffers@LIBURING_0.1 0.1-1
- io_uring_register_eventfd@LIBURING_0.1 0.1-1
- io_uring_register_eventfd_async@LIBURING_0.6 0.6-1
- io_uring_register_files@LIBURING_0.1 0.1-1
- io_uring_submit@LIBURING_0.1 0.1-1
- io_uring_submit_and_wait@LIBURING_0.1 0.1-1
- io_uring_unregister_buffers@LIBURING_0.1 0.1-1
- io_uring_unregister_files@LIBURING_0.1 0.1-1
- (symver)LIBURING_0.2 0.2-1
- __io_uring_get_cqe@LIBURING_0.2 0.2-1
- io_uring_queue_init_params@LIBURING_0.2 0.2-1
- io_uring_register_files_update@LIBURING_0.2 0.2-1
- io_uring_peek_batch_cqe@LIBURING_0.2 0.2-1
- io_uring_wait_cqe_timeout@LIBURING_0.2 0.2-1
- io_uring_wait_cqes@LIBURING_0.2 0.2-1
- (symver)LIBURING_0.3 0.3-1
- (symver)LIBURING_0.4 0.4-1
- (symver)LIBURING_0.5 0.5-1
- (symver)LIBURING_0.6 0.6-1
- (symver)LIBURING_0.7 0.7-1
- io_uring_get_probe@LIBURING_0.4 0.4-1
- io_uring_get_probe_ring@LIBURING_0.4 0.4-1
- io_uring_register_personality@LIBURING_0.4 0.4-1
- io_uring_register_probe@LIBURING_0.4 0.4-1
- io_uring_ring_dontfork@LIBURING_0.4 0.4-1
- io_uring_unregister_personality@LIBURING_0.4 0.4-1

diff --git a/debian/liburing2.install b/debian/liburing2.install
new file mode 100644
index 0000000..3ddde58
--- /dev/null
+++ b/debian/liburing2.install

@@ -0,0 +1 @@
+usr/lib/*/lib*.so.*

diff --git a/debian/liburing2.symbols b/debian/liburing2.symbols
new file mode 100644
index 0000000..725a039
--- /dev/null
+++ b/debian/liburing2.symbols

@@ -0,0 +1,56 @@
+liburing.so.2 liburing2 #MINVER#                                                                                                                                                                                                                                    [47/1887]
+ LIBURING_2.0@LIBURING_2.0 0.7-1
+ LIBURING_2.1@LIBURING_2.1 0.7-1
+ LIBURING_2.2@LIBURING_2.2 0.7-1
+ LIBURING_2.3@LIBURING_2.3 0.7-1
+ __io_uring_get_cqe@LIBURING_2.0 0.7-1
+ __io_uring_sqring_wait@LIBURING_2.0 0.7-1
+ io_uring_enter2@LIBURING_2.3 0.7-1
+ io_uring_enter@LIBURING_2.3 0.7-1
+ io_uring_free_probe@LIBURING_2.0 0.7-1
+ io_uring_get_events@LIBURING_2.3 0.7-1
+ io_uring_get_probe@LIBURING_2.0 0.7-1
+ io_uring_get_probe_ring@LIBURING_2.0 0.7-1
+ io_uring_get_sqe@LIBURING_2.0 0.7-1
+ io_uring_mlock_size@LIBURING_2.1 0.7-1
+ io_uring_mlock_size_params@LIBURING_2.1 0.7-1
+ io_uring_peek_batch_cqe@LIBURING_2.0 0.7-1
+ io_uring_queue_exit@LIBURING_2.0 0.7-1
+ io_uring_queue_init@LIBURING_2.0 0.7-1
+ io_uring_queue_init_params@LIBURING_2.0 0.7-1
+ io_uring_queue_mmap@LIBURING_2.0 0.7-1
+ io_uring_register@LIBURING_2.3 0.7-1
+ io_uring_register_buf_ring@LIBURING_2.2 0.7-1
+ io_uring_register_buffers@LIBURING_2.0 0.7-1
+ io_uring_register_buffers_sparse@LIBURING_2.2 0.7-1
+ io_uring_register_buffers_tags@LIBURING_2.1 0.7-1
+ io_uring_register_buffers_update_tag@LIBURING_2.1 0.7-1
+ io_uring_register_eventfd@LIBURING_2.0 0.7-1
+ io_uring_register_eventfd_async@LIBURING_2.0 0.7-1
+ io_uring_register_file_alloc_range@LIBURING_2.3 0.7-1
+ io_uring_register_files@LIBURING_2.0 0.7-1
+ io_uring_register_files_sparse@LIBURING_2.2 0.7-1
+ io_uring_register_files_tags@LIBURING_2.1 0.7-1
+ io_uring_register_files_update@LIBURING_2.0 0.7-1
+ io_uring_register_files_update_tag@LIBURING_2.1 0.7-1
+ io_uring_register_iowq_aff@LIBURING_2.1 0.7-1
+ io_uring_register_iowq_max_workers@LIBURING_2.1 0.7-1
+ io_uring_register_personality@LIBURING_2.0 0.7-1
+ io_uring_register_probe@LIBURING_2.0 0.7-1
+ io_uring_register_ring_fd@LIBURING_2.2 0.7-1
+ io_uring_register_sync_cancel@LIBURING_2.3 0.7-1
+ io_uring_ring_dontfork@LIBURING_2.0 0.7-1
+ io_uring_setup@LIBURING_2.3 0.7-1
+ io_uring_submit@LIBURING_2.0 0.7-1
+ io_uring_submit_and_get_events@LIBURING_2.3 0.7-1
+ io_uring_submit_and_wait@LIBURING_2.0 0.7-1
+ io_uring_submit_and_wait_timeout@LIBURING_2.2 0.7-1
+ io_uring_unregister_buf_ring@LIBURING_2.2 0.7-1
+ io_uring_unregister_buffers@LIBURING_2.0 0.7-1
+ io_uring_unregister_eventfd@LIBURING_2.0 0.7-1
+ io_uring_unregister_files@LIBURING_2.0 0.7-1
+ io_uring_unregister_iowq_aff@LIBURING_2.1 0.7-1
+ io_uring_unregister_personality@LIBURING_2.0 0.7-1
+ io_uring_unregister_ring_fd@LIBURING_2.2 0.7-1
+ io_uring_wait_cqe_timeout@LIBURING_2.0 0.7-1
+ io_uring_wait_cqes@LIBURING_2.0 0.7-1

diff --git a/debian/rules b/debian/rules
index 1a334b3..cdc0a60 100755
--- a/debian/rules
+++ b/debian/rules

@@ -5,77 +5,25 @@
 
 DEB_BUILD_MAINT_OPTIONS = hardening=+bindnow
 DEB_CFLAGS_MAINT_PREPEND = -Wall
+DEB_BUILD_OPTIONS += nocheck
 
 include /usr/share/dpkg/default.mk
 include /usr/share/dpkg/buildtools.mk
 
-export CC
+%:
+	dh $@ --parallel
 
-lib := liburing1
-libdbg := $(lib)-dbg
-libudeb := $(lib)-udeb
-libdev := liburing-dev
+override_dh_auto_configure:
+	./configure \
+	  --prefix=/usr \
+	  --includedir=/usr/include \
+	  --datadir=/usr/share \
+	  --mandir=/usr/share/man \
+	  --libdir=/usr/lib/$(DEB_HOST_MULTIARCH) \
+	  --libdevdir=/usr/lib/$(DEB_HOST_MULTIARCH) \
+	  --cc=$(CC)
 
-build-indep:
-
-build-arch:
-	dh_testdir
-
-	$(MAKE) CPPFLAGS="$(CPPFLAGS)" CFLAGS="$(CFLAGS)" LDFLAGS="$(LDFLAGS)"
-
-build: build-indep build-arch
-
-clean:
-	dh_testdir
-	dh_testroot
-
-	$(MAKE) clean
-
-	dh_clean
-
-check-arch: build-arch
-	dh_testdir
-
+override_dh_auto_test:
 ifeq (,$(filter nocheck,$(DEB_BUILD_OPTIONS)))
-	$(MAKE) CPPFLAGS="$(CPPFLAGS)" CFLAGS="$(CFLAGS)" LDFLAGS="$(LDFLAGS)" \
-	        partcheck
+	$(MAKE) runtests
 endif
-
-install-arch: check-arch
-	dh_testdir
-	dh_testroot
-	dh_clean
-	dh_installdirs
-
-	$(MAKE) install \
-	  DESTDIR=$(CURDIR)/debian/tmp \
-	  libdir=/lib/$(DEB_HOST_MULTIARCH) \
-	  libdevdir=/usr/lib/$(DEB_HOST_MULTIARCH) \
-	  relativelibdir=/lib/$(DEB_HOST_MULTIARCH)/
-
-binary: binary-indep binary-arch
-
-binary-indep:
-	# Nothing to do.
-
-binary-arch: install-arch
-	dh_testdir
-	dh_testroot
-	dh_install -a
-	dh_installdocs -a
-	dh_installexamples -a
-	dh_installman -a
-	dh_lintian -a
-	dh_link -a
-	dh_strip -a --ddeb-migration='$(libdbg) (<< 0.3)'
-	dh_compress -a
-	dh_fixperms -a
-	dh_makeshlibs -a --add-udeb '$(libudeb)'
-	dh_shlibdeps -a
-	dh_installdeb -a
-	dh_gencontrol -a
-	dh_md5sums -a
-	dh_builddeb -a
-
-.PHONY: clean build-indep build-arch build
-.PHONY: install-arch binary-indep binary-arch binary

diff --git a/examples/Makefile b/examples/Makefile
index 95a45f9..0452820 100644
--- a/examples/Makefile
+++ b/examples/Makefile

@@ -10,10 +10,28 @@
 include ../config-host.mak
 endif
 
+LDFLAGS ?=
+override LDFLAGS += -L../src/ -luring -lpthread
+
+ifeq ($(CONFIG_USE_SANITIZER),y)
+	override CFLAGS += -fsanitize=address,undefined -fno-omit-frame-pointer -fno-optimize-sibling-calls
+	override CPPFLAGS += -fsanitize=address,undefined -fno-omit-frame-pointer -fno-optimize-sibling-calls
+	override LDFLAGS += -fsanitize=address,undefined
+endif
+
 example_srcs := \
+	io_uring-close-test.c \
 	io_uring-cp.c \
 	io_uring-test.c \
-	link-cp.c
+	io_uring-udp.c \
+	link-cp.c \
+	napi-busy-poll-client.c \
+	napi-busy-poll-server.c \
+	poll-bench.c \
+	send-zerocopy.c \
+	rsrc-update-bench.c \
+	proxy.c \
+	kdigest.c
 
 all_targets :=
 
@@ -21,16 +39,20 @@
 ifdef CONFIG_HAVE_UCONTEXT
 	example_srcs += ucontext-cp.c
 endif
-all_targets += ucontext-cp
+all_targets += ucontext-cp helpers.o
 
 example_targets := $(patsubst %.c,%,$(patsubst %.cc,%,$(example_srcs)))
 all_targets += $(example_targets)
 
+helpers = helpers.o
 
 all: $(example_targets)
 
-%: %.c ../src/liburing.a
-	$(QUIET_CC)$(CC) $(CPPFLAGS) $(CFLAGS) -o $@ $< $(LDFLAGS)
+helpers.o: helpers.c
+	$(QUIET_CC)$(CC) $(CPPFLAGS) $(CFLAGS) -o $@ -c $<
+
+%: %.c $(helpers) ../src/liburing.a
+	$(QUIET_CC)$(CC) $(CPPFLAGS) $(CFLAGS) -o $@ $< $(helpers) $(LDFLAGS)
 
 clean:
 	@rm -f $(all_targets)

diff --git a/examples/helpers.c b/examples/helpers.c
new file mode 100644
index 0000000..b70ce7c
--- /dev/null
+++ b/examples/helpers.c

@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: MIT */
+#include <fcntl.h>
+#include <stdint.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+#include "helpers.h"
+
+int setup_listening_socket(int port, int ipv6)
+{
+	struct sockaddr_in srv_addr = { };
+	struct sockaddr_in6 srv_addr6 = { };
+	int fd, enable, ret, domain;
+
+	if (ipv6)
+		domain = AF_INET6;
+	else
+		domain = AF_INET;
+
+	fd = socket(domain, SOCK_STREAM, 0);
+	if (fd == -1) {
+		perror("socket()");
+		return -1;
+	}
+
+	enable = 1;
+	ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(int));
+	if (ret < 0) {
+		perror("setsockopt(SO_REUSEADDR)");
+		return -1;
+	}
+
+	if (ipv6) {
+		srv_addr6.sin6_family = AF_INET6;
+		srv_addr6.sin6_port = htons(port);
+		srv_addr6.sin6_addr = in6addr_any;
+		ret = bind(fd, (const struct sockaddr *)&srv_addr6, sizeof(srv_addr6));
+	} else {
+		srv_addr.sin_family = AF_INET;
+		srv_addr.sin_port = htons(port);
+		srv_addr.sin_addr.s_addr = htonl(INADDR_ANY);
+		ret = bind(fd, (const struct sockaddr *)&srv_addr, sizeof(srv_addr));
+	}
+
+	if (ret < 0) {
+		perror("bind()");
+		return -1;
+	}
+
+	if (listen(fd, 1024) < 0) {
+		perror("listen()");
+		return -1;
+	}
+
+	return fd;
+}

diff --git a/examples/helpers.h b/examples/helpers.h
new file mode 100644
index 0000000..9b1cf34
--- /dev/null
+++ b/examples/helpers.h

@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: MIT */
+#ifndef LIBURING_EX_HELPERS_H
+#define LIBURING_EX_HELPERS_H
+
+int setup_listening_socket(int port, int ipv6);
+
+#endif

diff --git a/examples/io_uring-close-test.c b/examples/io_uring-close-test.c
new file mode 100644
index 0000000..3936d41
--- /dev/null
+++ b/examples/io_uring-close-test.c

@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Simple app that demonstrates how to setup an io_uring interface, and use it
+ * via a registered ring fd, without leaving the original fd open.
+ *
+ * gcc -Wall -O2 -D_GNU_SOURCE -o io_uring-close-test io_uring-close-test.c -luring
+ */
+#include <stdio.h>
+#include <fcntl.h>
+#include <string.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include "liburing.h"
+
+#define QD	4
+
+int main(int argc, char *argv[])
+{
+	struct io_uring ring;
+	int i, fd, ret, pending, done;
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct iovec *iovecs;
+	struct stat sb;
+	ssize_t fsize;
+	off_t offset;
+	void *buf;
+
+	if (argc < 2) {
+		printf("%s: file\n", argv[0]);
+		return 1;
+	}
+
+	ret = io_uring_queue_init(QD, &ring, 0);
+	if (ret < 0) {
+		fprintf(stderr, "queue_init: %s\n", strerror(-ret));
+		return 1;
+	}
+
+	ret = io_uring_register_ring_fd(&ring);
+	if (ret < 0) {
+		fprintf(stderr, "register_ring_fd: %s\n", strerror(-ret));
+		return 1;
+	}
+	ret = io_uring_close_ring_fd(&ring);
+	if (ret < 0) {
+		fprintf(stderr, "close_ring_fd: %s\n", strerror(-ret));
+		return 1;
+	}
+
+	fd = open(argv[1], O_RDONLY);
+	if (fd < 0) {
+		perror("open");
+		return 1;
+	}
+
+	if (fstat(fd, &sb) < 0) {
+		perror("fstat");
+		return 1;
+	}
+
+	fsize = 0;
+	iovecs = calloc(QD, sizeof(struct iovec));
+	for (i = 0; i < QD; i++) {
+		if (posix_memalign(&buf, 4096, 4096))
+			return 1;
+		iovecs[i].iov_base = buf;
+		iovecs[i].iov_len = 4096;
+		fsize += 4096;
+	}
+
+	offset = 0;
+	i = 0;
+	do {
+		sqe = io_uring_get_sqe(&ring);
+		if (!sqe)
+			break;
+		io_uring_prep_readv(sqe, fd, &iovecs[i], 1, offset);
+		offset += iovecs[i].iov_len;
+		i++;
+		if (offset > sb.st_size)
+			break;
+	} while (1);
+
+	ret = io_uring_submit(&ring);
+	if (ret < 0) {
+		fprintf(stderr, "io_uring_submit: %s\n", strerror(-ret));
+		return 1;
+	} else if (ret != i) {
+		fprintf(stderr, "io_uring_submit submitted less %d\n", ret);
+		return 1;
+	}
+
+	done = 0;
+	pending = ret;
+	fsize = 0;
+	for (i = 0; i < pending; i++) {
+		ret = io_uring_wait_cqe(&ring, &cqe);
+		if (ret < 0) {
+			fprintf(stderr, "io_uring_wait_cqe: %s\n", strerror(-ret));
+			return 1;
+		}
+
+		done++;
+		ret = 0;
+		if (cqe->res != 4096 && cqe->res + fsize != sb.st_size) {
+			fprintf(stderr, "ret=%d, wanted 4096\n", cqe->res);
+			ret = 1;
+		}
+		fsize += cqe->res;
+		io_uring_cqe_seen(&ring, cqe);
+		if (ret)
+			break;
+	}
+
+	printf("Submitted=%d, completed=%d, bytes=%lu\n", pending, done,
+						(unsigned long) fsize);
+	close(fd);
+	io_uring_queue_exit(&ring);
+	return 0;
+}

diff --git a/examples/io_uring-test.c b/examples/io_uring-test.c
index 1a68536..d3fcc9e 100644
--- a/examples/io_uring-test.c
+++ b/examples/io_uring-test.c

@@ -69,7 +69,7 @@
 		io_uring_prep_readv(sqe, fd, &iovecs[i], 1, offset);
 		offset += iovecs[i].iov_len;
 		i++;
-		if (offset > sb.st_size)
+		if (offset >= sb.st_size)
 			break;
 	} while (1);
 

diff --git a/examples/io_uring-udp.c b/examples/io_uring-udp.c
new file mode 100644
index 0000000..4697af1
--- /dev/null
+++ b/examples/io_uring-udp.c

@@ -0,0 +1,403 @@
+/* SPDX-License-Identifier: MIT */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/mman.h>
+#include <stdlib.h>
+#include <string.h>
+#include <netinet/udp.h>
+#include <arpa/inet.h>
+
+#include "liburing.h"
+
+#define QD 64
+#define BUF_SHIFT 12 /* 4k */
+#define CQES (QD * 16)
+#define BUFFERS CQES
+#define CONTROLLEN 0
+
+struct sendmsg_ctx {
+	struct msghdr msg;
+	struct iovec iov;
+};
+
+struct ctx {
+	struct io_uring ring;
+	struct io_uring_buf_ring *buf_ring;
+	unsigned char *buffer_base;
+	struct msghdr msg;
+	int buf_shift;
+	int af;
+	bool verbose;
+	struct sendmsg_ctx send[BUFFERS];
+	size_t buf_ring_size;
+};
+
+static size_t buffer_size(struct ctx *ctx)
+{
+	return 1U << ctx->buf_shift;
+}
+
+static unsigned char *get_buffer(struct ctx *ctx, int idx)
+{
+	return ctx->buffer_base + (idx << ctx->buf_shift);
+}
+
+static int setup_buffer_pool(struct ctx *ctx)
+{
+	int ret, i;
+	void *mapped;
+	struct io_uring_buf_reg reg = { .ring_addr = 0,
+					.ring_entries = BUFFERS,
+					.bgid = 0 };
+
+	ctx->buf_ring_size = (sizeof(struct io_uring_buf) + buffer_size(ctx)) * BUFFERS;
+	mapped = mmap(NULL, ctx->buf_ring_size, PROT_READ | PROT_WRITE,
+		      MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+	if (mapped == MAP_FAILED) {
+		fprintf(stderr, "buf_ring mmap: %s\n", strerror(errno));
+		return -1;
+	}
+	ctx->buf_ring = (struct io_uring_buf_ring *)mapped;
+
+	io_uring_buf_ring_init(ctx->buf_ring);
+
+	reg = (struct io_uring_buf_reg) {
+		.ring_addr = (unsigned long)ctx->buf_ring,
+		.ring_entries = BUFFERS,
+		.bgid = 0
+	};
+	ctx->buffer_base = (unsigned char *)ctx->buf_ring +
+			   sizeof(struct io_uring_buf) * BUFFERS;
+
+	ret = io_uring_register_buf_ring(&ctx->ring, &reg, 0);
+	if (ret) {
+		fprintf(stderr, "buf_ring init failed: %s\n"
+				"NB This requires a kernel version >= 6.0\n",
+				strerror(-ret));
+		return ret;
+	}
+
+	for (i = 0; i < BUFFERS; i++) {
+		io_uring_buf_ring_add(ctx->buf_ring, get_buffer(ctx, i), buffer_size(ctx), i,
+				      io_uring_buf_ring_mask(BUFFERS), i);
+	}
+	io_uring_buf_ring_advance(ctx->buf_ring, BUFFERS);
+
+	return 0;
+}
+
+static int setup_context(struct ctx *ctx)
+{
+	struct io_uring_params params;
+	int ret;
+
+	memset(&params, 0, sizeof(params));
+	params.cq_entries = QD * 8;
+	params.flags = IORING_SETUP_SUBMIT_ALL | IORING_SETUP_COOP_TASKRUN |
+		       IORING_SETUP_CQSIZE;
+
+	ret = io_uring_queue_init_params(QD, &ctx->ring, &params);
+	if (ret < 0) {
+		fprintf(stderr, "queue_init failed: %s\n"
+				"NB: This requires a kernel version >= 6.0\n",
+				strerror(-ret));
+		return ret;
+	}
+
+	ret = setup_buffer_pool(ctx);
+	if (ret)
+		io_uring_queue_exit(&ctx->ring);
+
+	memset(&ctx->msg, 0, sizeof(ctx->msg));
+	ctx->msg.msg_namelen = sizeof(struct sockaddr_storage);
+	ctx->msg.msg_controllen = CONTROLLEN;
+	return ret;
+}
+
+static int setup_sock(int af, int port)
+{
+	int ret;
+	int fd;
+	uint16_t nport = port <= 0 ? 0 : htons(port);
+
+	fd = socket(af, SOCK_DGRAM, 0);
+	if (fd < 0) {
+		fprintf(stderr, "sock_init: %s\n", strerror(errno));
+		return -1;
+	}
+
+	if (af == AF_INET6) {
+		struct sockaddr_in6 addr6 = {
+			.sin6_family = af,
+			.sin6_port = nport,
+			.sin6_addr = IN6ADDR_ANY_INIT
+		};
+
+		ret = bind(fd, (struct sockaddr *) &addr6, sizeof(addr6));
+	} else {
+		struct sockaddr_in addr = {
+			.sin_family = af,
+			.sin_port = nport,
+			.sin_addr = { INADDR_ANY }
+		};
+
+		ret = bind(fd, (struct sockaddr *) &addr, sizeof(addr));
+	}
+
+	if (ret) {
+		fprintf(stderr, "sock_bind: %s\n", strerror(errno));
+		close(fd);
+		return -1;
+	}
+
+	if (port <= 0) {
+		int port;
+		struct sockaddr_storage s;
+		socklen_t sz = sizeof(s);
+
+		if (getsockname(fd, (struct sockaddr *)&s, &sz)) {
+			fprintf(stderr, "getsockname failed\n");
+			close(fd);
+			return -1;
+		}
+
+		port = ntohs(((struct sockaddr_in *)&s)->sin_port);
+		fprintf(stderr, "port bound to %d\n", port);
+	}
+
+	return fd;
+}
+
+static void cleanup_context(struct ctx *ctx)
+{
+	munmap(ctx->buf_ring, ctx->buf_ring_size);
+	io_uring_queue_exit(&ctx->ring);
+}
+
+static bool get_sqe(struct ctx *ctx, struct io_uring_sqe **sqe)
+{
+	*sqe = io_uring_get_sqe(&ctx->ring);
+
+	if (!*sqe) {
+		io_uring_submit(&ctx->ring);
+		*sqe = io_uring_get_sqe(&ctx->ring);
+	}
+	if (!*sqe) {
+		fprintf(stderr, "cannot get sqe\n");
+		return true;
+	}
+	return false;
+}
+
+static int add_recv(struct ctx *ctx, int idx)
+{
+	struct io_uring_sqe *sqe;
+
+	if (get_sqe(ctx, &sqe))
+		return -1;
+
+	io_uring_prep_recvmsg_multishot(sqe, idx, &ctx->msg, MSG_TRUNC);
+	sqe->flags |= IOSQE_FIXED_FILE;
+
+	sqe->flags |= IOSQE_BUFFER_SELECT;
+	sqe->buf_group = 0;
+	io_uring_sqe_set_data64(sqe, BUFFERS + 1);
+	return 0;
+}
+
+static void recycle_buffer(struct ctx *ctx, int idx)
+{
+	io_uring_buf_ring_add(ctx->buf_ring, get_buffer(ctx, idx), buffer_size(ctx), idx,
+			      io_uring_buf_ring_mask(BUFFERS), 0);
+	io_uring_buf_ring_advance(ctx->buf_ring, 1);
+}
+
+static int process_cqe_send(struct ctx *ctx, struct io_uring_cqe *cqe)
+{
+	int idx = cqe->user_data;
+
+	if (cqe->res < 0)
+		fprintf(stderr, "bad send %s\n", strerror(-cqe->res));
+	recycle_buffer(ctx, idx);
+	return 0;
+}
+
+static int process_cqe_recv(struct ctx *ctx, struct io_uring_cqe *cqe,
+			    int fdidx)
+{
+	int ret, idx;
+	struct io_uring_recvmsg_out *o;
+	struct io_uring_sqe *sqe;
+
+	if (!(cqe->flags & IORING_CQE_F_MORE)) {
+		ret = add_recv(ctx, fdidx);
+		if (ret)
+			return ret;
+	}
+
+	if (cqe->res == -ENOBUFS)
+		return 0;
+
+	if (!(cqe->flags & IORING_CQE_F_BUFFER) || cqe->res < 0) {
+		fprintf(stderr, "recv cqe bad res %d\n", cqe->res);
+		if (cqe->res == -EFAULT || cqe->res == -EINVAL)
+			fprintf(stderr,
+				"NB: This requires a kernel version >= 6.0\n");
+		return -1;
+	}
+	idx = cqe->flags >> 16;
+
+	o = io_uring_recvmsg_validate(get_buffer(ctx, cqe->flags >> 16),
+				      cqe->res, &ctx->msg);
+	if (!o) {
+		fprintf(stderr, "bad recvmsg\n");
+		return -1;
+	}
+	if (o->namelen > ctx->msg.msg_namelen) {
+		fprintf(stderr, "truncated name\n");
+		recycle_buffer(ctx, idx);
+		return 0;
+	}
+	if (o->flags & MSG_TRUNC) {
+		unsigned int r;
+
+		r = io_uring_recvmsg_payload_length(o, cqe->res, &ctx->msg);
+		fprintf(stderr, "truncated msg need %u received %u\n",
+				o->payloadlen, r);
+		recycle_buffer(ctx, idx);
+		return 0;
+	}
+
+	if (ctx->verbose) {
+		struct sockaddr_in *addr = io_uring_recvmsg_name(o);
+		struct sockaddr_in6 *addr6 = (void *)addr;
+		char buff[INET6_ADDRSTRLEN + 1];
+		const char *name;
+		void *paddr;
+
+		if (ctx->af == AF_INET6)
+			paddr = &addr6->sin6_addr;
+		else
+			paddr = &addr->sin_addr;
+
+		name = inet_ntop(ctx->af, paddr, buff, sizeof(buff));
+		if (!name)
+			name = "<INVALID>";
+
+		fprintf(stderr, "received %u bytes %d from [%s]:%d\n",
+			io_uring_recvmsg_payload_length(o, cqe->res, &ctx->msg),
+			o->namelen, name, (int)ntohs(addr->sin_port));
+	}
+
+	if (get_sqe(ctx, &sqe))
+		return -1;
+
+	ctx->send[idx].iov = (struct iovec) {
+		.iov_base = io_uring_recvmsg_payload(o, &ctx->msg),
+		.iov_len =
+			io_uring_recvmsg_payload_length(o, cqe->res, &ctx->msg)
+	};
+	ctx->send[idx].msg = (struct msghdr) {
+		.msg_namelen = o->namelen,
+		.msg_name = io_uring_recvmsg_name(o),
+		.msg_control = NULL,
+		.msg_controllen = 0,
+		.msg_iov = &ctx->send[idx].iov,
+		.msg_iovlen = 1
+	};
+
+	io_uring_prep_sendmsg(sqe, fdidx, &ctx->send[idx].msg, 0);
+	io_uring_sqe_set_data64(sqe, idx);
+	sqe->flags |= IOSQE_FIXED_FILE;
+
+	return 0;
+}
+static int process_cqe(struct ctx *ctx, struct io_uring_cqe *cqe, int fdidx)
+{
+	if (cqe->user_data < BUFFERS)
+		return process_cqe_send(ctx, cqe);
+	else
+		return process_cqe_recv(ctx, cqe, fdidx);
+}
+
+int main(int argc, char *argv[])
+{
+	struct ctx ctx;
+	int ret;
+	int port = -1;
+	int sockfd;
+	int opt;
+	struct io_uring_cqe *cqes[CQES];
+	unsigned int count, i;
+
+	memset(&ctx, 0, sizeof(ctx));
+	ctx.verbose = false;
+	ctx.af = AF_INET;
+	ctx.buf_shift = BUF_SHIFT;
+
+	while ((opt = getopt(argc, argv, "6vp:b:")) != -1) {
+		switch (opt) {
+		case '6':
+			ctx.af = AF_INET6;
+			break;
+		case 'p':
+			port = atoi(optarg);
+			break;
+		case 'b':
+			ctx.buf_shift = atoi(optarg);
+			break;
+		case 'v':
+			ctx.verbose = true;
+			break;
+		default:
+			fprintf(stderr, "Usage: %s [-p port] "
+					"[-b log2(BufferSize)] [-6] [-v]\n",
+					argv[0]);
+			exit(-1);
+		}
+	}
+
+	sockfd = setup_sock(ctx.af, port);
+	if (sockfd < 0)
+		return 1;
+
+	if (setup_context(&ctx)) {
+		close(sockfd);
+		return 1;
+	}
+
+	ret = io_uring_register_files(&ctx.ring, &sockfd, 1);
+	if (ret) {
+		fprintf(stderr, "register files: %s\n", strerror(-ret));
+		return -1;
+	}
+
+	ret = add_recv(&ctx, 0);
+	if (ret)
+		return 1;
+
+	while (true) {
+		ret = io_uring_submit_and_wait(&ctx.ring, 1);
+		if (ret == -EINTR)
+			continue;
+		if (ret < 0) {
+			fprintf(stderr, "submit and wait failed %d\n", ret);
+			break;
+		}
+
+		count = io_uring_peek_batch_cqe(&ctx.ring, &cqes[0], CQES);
+		for (i = 0; i < count; i++) {
+			ret = process_cqe(&ctx, cqes[i], 0);
+			if (ret)
+				goto cleanup;
+		}
+		io_uring_cq_advance(&ctx.ring, count);
+	}
+
+cleanup:
+	cleanup_context(&ctx);
+	close(sockfd);
+	return ret;
+}

diff --git a/examples/kdigest.c b/examples/kdigest.c
new file mode 100644
index 0000000..37c8513
--- /dev/null
+++ b/examples/kdigest.c

@@ -0,0 +1,405 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Proof-of-concept for doing file digests using the kernel's AF_ALG API.
+ * Needs a bit of error handling.
+ */
+#include <stdio.h>
+#include <fcntl.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <assert.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <linux/if_alg.h>
+#include "liburing.h"
+
+#define QD		64
+#define WAIT_BATCH	(QD / 8)
+#define BS		(64*1024)
+
+#define BGID		1
+#define BID_MASK	(QD - 1)
+
+enum req_state {
+	IO_INIT = 0,
+	IO_READ,
+	IO_READ_COMPLETE,
+	IO_WRITE,
+	IO_WRITE_COMPLETE,
+};
+
+struct req {
+	off_t offset;
+	enum req_state state;
+	struct iovec iov;
+};
+
+struct kdigest {
+	struct io_uring ring;
+	struct io_uring_buf_ring *br;
+	struct req reqs[QD];
+	/* heap allocated, aligned QD*BS buffer */
+	uint8_t *bufs;
+};
+
+static int infd, outfd;
+
+static int get_file_size(int fd, size_t *size)
+{
+	struct stat st;
+
+	if (fstat(fd, &st) < 0)
+		return -1;
+	if (S_ISREG(st.st_mode)) {
+		*size = st.st_size;
+	} else if (S_ISBLK(st.st_mode)) {
+		unsigned long long bytes;
+
+		if (ioctl(fd, BLKGETSIZE64, &bytes) != 0)
+			return -1;
+
+		*size = bytes;
+	} else {
+		return -1;
+	}
+
+	return 0;
+}
+
+static int reap_completions(struct io_uring *ring, int *inflight,
+			    size_t *outsize)
+{
+	struct io_uring_cqe *cqe;
+	unsigned head;
+	int ret = 0, nr;
+
+	nr = 0;
+	io_uring_for_each_cqe(ring, head, cqe) {
+		struct req *req;
+
+		req = io_uring_cqe_get_data(cqe);
+		assert(req->state == IO_READ || req->state == IO_WRITE);
+		if (cqe->res < 0) {
+			fprintf(stderr, "%s: cqe error %d\n",
+				req->state == IO_WRITE ? "send" : "read",
+				cqe->res);
+			*outsize = 0;
+			ret = 1;
+			break;
+		}
+
+		(*inflight)--;
+		req->state++;
+		if (req->state == IO_WRITE_COMPLETE)
+			*outsize -= cqe->res;
+		nr++;
+	}
+
+	io_uring_cq_advance(ring, nr);
+	return ret;
+}
+
+/*
+ * Add buffers to the outgoing ring, and submit a single bundle send that
+ * will finish when all of them have completed.
+ */
+static void submit_sends_br(struct kdigest *kdigest, int *write_idx,
+			    int *inflight)
+{
+	struct io_uring_buf_ring *br = kdigest->br;
+	struct req *req, *first_req = NULL;
+	struct io_uring_sqe *sqe;
+	int nr = 0;
+
+	/*
+	 * Find any completed reads, and add the buffers to the outgoing
+	 * send ring. That will serialize the data sent.
+	 */
+	while (kdigest->reqs[*write_idx].state == IO_READ_COMPLETE) {
+		req = &kdigest->reqs[*write_idx];
+		io_uring_buf_ring_add(br, req->iov.iov_base, req->iov.iov_len,
+					*write_idx, BID_MASK, nr++);
+		/*
+		 * Mark as a write/send if it's the first one, that serve
+		 * as the "barrier" in the array. The rest can be marked
+		 * complete upfront, if there's more in this bundle, as
+		 * the first will serve a the stopping point.
+		 */
+		if (!first_req) {
+			req->state = IO_WRITE;
+			first_req = req;
+		} else {
+			req->state = IO_WRITE_COMPLETE;
+		}
+		*write_idx = (*write_idx + 1) % QD;
+	}
+
+	/*
+	 * If any completed reads were found and we added buffers, advance
+	 * the buffer ring and prepare a single bundle send for all of them.
+	 */
+	if (first_req) {
+		io_uring_buf_ring_advance(br, nr);
+
+		sqe = io_uring_get_sqe(&kdigest->ring);
+		io_uring_prep_send_bundle(sqe, outfd, 0, MSG_MORE);
+		sqe->flags |= IOSQE_BUFFER_SELECT;
+		sqe->buf_group = BGID;
+		io_uring_sqe_set_data(sqe, first_req);
+		(*inflight)++;
+	}
+}
+
+/*
+ * Serialize multiple writes with IOSQE_IO_LINK. Not the most efficient
+ * way, as it's both more expensive on the kernel side to handle link, and
+ * if there's bundle support, all of the below can be done with a single
+ * send rather than multiple ones.
+ */
+static void submit_sends_linked(struct kdigest *kdigest, int *write_idx,
+				int *inflight)
+{
+	struct io_uring_sqe *sqe;
+	struct req *req;
+
+	/* Queue up any possible writes. Link flag ensures ordering. */
+	sqe = NULL;
+	while (kdigest->reqs[*write_idx].state == IO_READ_COMPLETE) {
+		if (sqe)
+			sqe->flags |= IOSQE_IO_LINK;
+
+		req = &kdigest->reqs[*write_idx];
+		req->state = IO_WRITE;
+		sqe = io_uring_get_sqe(&kdigest->ring);
+		io_uring_prep_send(sqe, outfd, req->iov.iov_base,
+					req->iov.iov_len, MSG_MORE);
+		io_uring_sqe_set_data(sqe, req);
+		(*inflight)++;
+
+		*write_idx = (*write_idx + 1) % QD;
+	}
+}
+
+static void submit_sends(struct kdigest *kdigest, int *write_idx, int *inflight)
+{
+	if (kdigest->br)
+		submit_sends_br(kdigest, write_idx, inflight);
+	else
+		submit_sends_linked(kdigest, write_idx, inflight);
+}
+
+static int digest_file(struct kdigest *kdigest, size_t insize)
+{
+	struct io_uring *ring = &kdigest->ring;
+	off_t read_off = 0;
+	size_t outsize = insize;
+	int read_idx = 0, write_idx = 0, inflight = 0;
+
+	while (outsize) {
+		struct io_uring_sqe *sqe;
+		struct req *req;
+		int to_wait;
+
+		submit_sends(kdigest, &write_idx, &inflight);
+
+		/* Queue up any reads. Completions may arrive out of order. */
+		while (insize && (kdigest->reqs[read_idx].state == IO_INIT
+		    || kdigest->reqs[read_idx].state == IO_WRITE_COMPLETE)) {
+			size_t this_size = (insize < BS ? insize : BS);
+
+			req = &kdigest->reqs[read_idx];
+			req->state = IO_READ;
+			req->offset = read_off;
+			req->iov.iov_base = &kdigest->bufs[read_idx * BS];
+			req->iov.iov_len = this_size;
+
+			sqe = io_uring_get_sqe(ring);
+			io_uring_prep_read(sqe, infd, req->iov.iov_base,
+						req->iov.iov_len, read_off);
+			io_uring_sqe_set_data(sqe, req);
+
+			read_off += this_size;
+			insize -= this_size;
+			inflight++;
+
+			read_idx = (read_idx + 1) % QD;
+		}
+
+		/* wait for about half queue completion before resubmit */
+		for (to_wait = (inflight >> 1) | 1; to_wait; to_wait--) {
+			int ret, wait_nr;
+
+			wait_nr = inflight;
+			if (wait_nr > WAIT_BATCH)
+				wait_nr = WAIT_BATCH;
+
+			ret = io_uring_submit_and_wait(ring, wait_nr);
+			if (ret < 0) {
+				fprintf(stderr, "wait cqe: %s\n",
+					strerror(-ret));
+				return 1;
+			}
+
+			if (reap_completions(ring, &inflight, &outsize))
+				return 1;
+		}
+	}
+	assert(!inflight);
+
+	return 0;
+}
+
+static int get_result(struct kdigest *kdigest, const char *alg, const char *file)
+{
+	struct io_uring *ring = &kdigest->ring;
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	int i, ret;
+	/* reuse I/O buf block to stash hash result */
+
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_recv(sqe, outfd, kdigest->bufs, BS, 0);
+
+	if (io_uring_submit_and_wait(ring, 1) < 0)
+		return 1;
+
+	ret = io_uring_peek_cqe(ring, &cqe);
+	if (ret < 0) {
+		fprintf(stderr, "peek cqe: %s\n", strerror(-ret));
+		return 1;
+	}
+
+	if (cqe->res < 0) {
+		fprintf(stderr, "cqe error: %s\n", strerror(-cqe->res));
+		goto err;
+	}
+
+	fprintf(stdout, "uring %s%s(%s) returned(len=%u): ",
+		kdigest->br ? "bundled " : "", alg, file, cqe->res);
+	for (i = 0; i < cqe->res; i++)
+		fprintf(stdout, "%02x", kdigest->bufs[i]);
+	putc('\n', stdout);
+	ret = 0;
+err:
+	io_uring_cqe_seen(ring, cqe);
+	return ret;
+}
+
+int main(int argc, char *argv[])
+{
+	const char *alg;
+	const char *infile;
+	size_t alg_len, insize;
+	struct sockaddr_alg sa = {
+		.salg_family = AF_ALG,
+		.salg_type = "hash",
+	};
+	struct kdigest kdigest = { };
+	struct io_uring_params p = { };
+	int sfd, ret;
+
+	if (argc < 3) {
+		fprintf(stderr, "%s: algorithm infile\n", argv[0]);
+		return 1;
+	}
+
+	alg = argv[1];
+	infile = argv[2];
+	alg_len = strlen(alg);
+	if (alg_len >= sizeof(sa.salg_name)) {
+		fprintf(stderr, "algorithm name too long\n");
+		return 1;
+	}
+	/* +1 for null terminator */
+	memcpy(sa.salg_name, alg, alg_len + 1);
+
+	infd = open(infile, O_RDONLY);
+	if (infd < 0) {
+		perror("open infile");
+		return 1;
+	}
+
+	sfd = socket(AF_ALG, SOCK_SEQPACKET, 0);
+	if (sfd < 0) {
+		if (errno == EAFNOSUPPORT)
+			fprintf(stderr, "kernel AF_ALG support not available. "
+				"CONFIG_CRYPTO_USER_API_HASH required.\n");
+		else
+			perror("AF_ALG socket");
+		return 1;
+	}
+
+	if (bind(sfd, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
+		if (errno == ENOENT)
+			fprintf(stderr, "AF_ALG bind(%s): hash not available. "
+				"See /proc/crypto hash algorithm list.\n",
+				alg);
+		else
+			fprintf(stderr, "AF_ALG bind(%s): %s\n",
+				alg, strerror(errno));
+		return 1;
+	}
+
+	outfd = accept(sfd, NULL, 0);
+	if (outfd < 0) {
+		perror("AF_ALG accept");
+		return 1;
+	}
+
+	if (posix_memalign((void **)&kdigest.bufs, 4096, QD * BS)) {
+		fprintf(stderr, "failed to alloc I/O bufs\n");
+		return 1;
+	}
+
+	p.flags = IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN;
+	do {
+		ret = io_uring_queue_init_params(QD, &kdigest.ring, &p);
+		if (!ret)
+			break;
+		if (!p.flags) {
+			fprintf(stderr, "queue_init: %s\n", strerror(-ret));
+			return 1;
+		}
+		p.flags = 0;
+	} while (1);
+
+	/* use send bundles, if available */
+	if (p.features & IORING_FEAT_RECVSEND_BUNDLE) {
+		kdigest.br = io_uring_setup_buf_ring(&kdigest.ring, QD, BGID, 0, &ret);
+		if (!kdigest.br) {
+			fprintf(stderr, "Failed setting up bundle buffer ring: %d\n", ret);
+			return 1;
+		}
+	}
+
+	if (get_file_size(infd, &insize))
+		return 1;
+
+	ret = digest_file(&kdigest, insize);
+	if (ret) {
+		fprintf(stderr, "%s digest failed\n", alg);
+		return 1;
+	}
+
+	ret = get_result(&kdigest, alg, infile);
+	if (ret) {
+		fprintf(stderr, "failed to retrieve %s digest result\n", alg);
+		return 1;
+	}
+
+	if (kdigest.br)
+		io_uring_free_buf_ring(&kdigest.ring, kdigest.br, QD, BGID);
+	io_uring_queue_exit(&kdigest.ring);
+	free(kdigest.bufs);
+	if (close(infd) < 0)
+		ret |= 1;
+	if (close(sfd) < 0)
+		ret |= 1;
+	if (close(outfd) < 0)
+		ret |= 1;
+	return ret;
+}

diff --git a/examples/napi-busy-poll-client.c b/examples/napi-busy-poll-client.c
new file mode 100644
index 0000000..43dcca8
--- /dev/null
+++ b/examples/napi-busy-poll-client.c

@@ -0,0 +1,509 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Simple ping/pong client which can use the io_uring NAPI support.
+ *
+ * Needs to be run as root because it sets SCHED_FIFO scheduling class,
+ * but will work without that.
+ *
+ * Example:
+ *
+ * sudo examples/napi-busy-poll-client -a 192.168.2.2 -n100000 -p4444 \
+ *	-b -t10 -u
+ *
+ * send and receive 100k packets, using NAPI.
+ */
+#include <ctype.h>
+#include <errno.h>
+#include <float.h>
+#include <getopt.h>
+#include <liburing.h>
+#include <math.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <time.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <netdb.h>
+#include <netinet/in.h>
+
+#define MAXBUFLEN 100
+#define PORTNOLEN 10
+#define ADDRLEN   80
+#define RINGSIZE  1024
+
+#define printable(ch) (isprint((unsigned char)ch) ? ch : '#')
+
+enum {
+	IOURING_RECV,
+	IOURING_SEND,
+	IOURING_RECVMSG,
+	IOURING_SENDMSG
+};
+
+struct ctx
+{
+	struct io_uring ring;
+	union {
+		struct sockaddr_in6 saddr6;
+		struct sockaddr_in saddr;
+	};
+
+	int sockfd;
+	int buffer_len;
+	int num_pings;
+	bool napi_check;
+
+	union {
+		char buffer[MAXBUFLEN];
+		struct timespec ts;
+	};
+
+	int rtt_index;
+	double *rtt;
+};
+
+struct options
+{
+	int  num_pings;
+	__u32  timeout;
+
+	bool sq_poll;
+	bool defer_tw;
+	bool busy_loop;
+	bool prefer_busy_poll;
+	bool ipv6;
+
+	char port[PORTNOLEN];
+	char addr[ADDRLEN];
+};
+
+static struct option longopts[] =
+{
+	{"address"  , 1, NULL, 'a'},
+	{"busy"     , 0, NULL, 'b'},
+	{"help"     , 0, NULL, 'h'},
+	{"num_pings", 1, NULL, 'n'},
+	{"port"     , 1, NULL, 'p'},
+	{"prefer"   , 1, NULL, 'u'},
+	{"sqpoll"   , 0, NULL, 's'},
+	{"timeout"  , 1, NULL, 't'},
+	{NULL       , 0, NULL,  0 }
+};
+
+static void printUsage(const char *name)
+{
+	fprintf(stderr,
+	"Usage: %s [-l|--listen] [-a|--address ip_address] [-p|--port port-no] [-s|--sqpoll]"
+	" [-b|--busy] [-n|--num pings] [-t|--timeout busy-poll-timeout] [-u||--prefer] [-6] [-h|--help]\n"
+	"--address\n"
+	"-a        : remote or local ipv6 address\n"
+	"--busy\n"
+	"-b        : busy poll io_uring instead of blocking.\n"
+	"--num_pings\n"
+	"-n        : number of pings\n"
+	"--port\n"
+	"-p        : port\n"
+	"--sqpoll\n"
+	"-s        : Configure io_uring to use SQPOLL thread\n"
+	"--timeout\n"
+	"-t        : Configure NAPI busy poll timeout"
+	"--prefer\n"
+	"-u        : prefer NAPI busy poll\n"
+	"-6        : use IPV6\n"
+	"--help\n"
+	"-h        : Display this usage message\n\n",
+	name);
+}
+
+static void printError(const char *msg, int opt)
+{
+	if (msg && opt)
+		fprintf(stderr, "%s (-%c)\n", msg, printable(opt));
+}
+
+static void setProcessScheduler(void)
+{
+	struct sched_param param;
+
+	param.sched_priority = sched_get_priority_max(SCHED_FIFO);
+	if (sched_setscheduler(0, SCHED_FIFO, &param) < 0)
+		fprintf(stderr, "sched_setscheduler() failed: (%d) %s\n",
+			errno, strerror(errno));
+}
+
+static double diffTimespec(const struct timespec *time1, const struct timespec *time0)
+{
+	return (time1->tv_sec - time0->tv_sec)
+		+ (time1->tv_nsec - time0->tv_nsec) / 1000000000.0;
+}
+
+static uint64_t encodeUserData(char type, int fd)
+{
+	return (uint32_t)fd | ((uint64_t)type << 56);
+}
+
+static void decodeUserData(uint64_t data, char *type, int *fd)
+{
+	*type = data >> 56;
+	*fd   = data & 0xffffffffU;
+}
+
+static const char *opTypeToStr(char type)
+{
+	const char *res;
+
+	switch (type) {
+	case IOURING_RECV:
+		res = "IOURING_RECV";
+		break;
+	case IOURING_SEND:
+		res = "IOURING_SEND";
+		break;
+	case IOURING_RECVMSG:
+		res = "IOURING_RECVMSG";
+		break;
+	case IOURING_SENDMSG:
+		res = "IOURING_SENDMSG";
+		break;
+	default:
+		res = "Unknown";
+	}
+
+	return res;
+}
+
+static void reportNapi(struct ctx *ctx)
+{
+	unsigned int napi_id = 0;
+	socklen_t len = sizeof(napi_id);
+
+	getsockopt(ctx->sockfd, SOL_SOCKET, SO_INCOMING_NAPI_ID, &napi_id, &len);
+	if (napi_id)
+		printf(" napi id: %d\n", napi_id);
+	else
+		printf(" unassigned napi id\n");
+
+	ctx->napi_check = true;
+}
+
+static void sendPing(struct ctx *ctx)
+{
+	struct io_uring_sqe *sqe = io_uring_get_sqe(&ctx->ring);
+
+	clock_gettime(CLOCK_REALTIME, (struct timespec *)ctx->buffer);
+	io_uring_prep_send(sqe, ctx->sockfd, ctx->buffer, sizeof(struct timespec), 0);
+	sqe->user_data = encodeUserData(IOURING_SEND, ctx->sockfd);
+}
+
+static void receivePing(struct ctx *ctx)
+{
+	struct io_uring_sqe *sqe = io_uring_get_sqe(&ctx->ring);
+
+	io_uring_prep_recv(sqe, ctx->sockfd, ctx->buffer, MAXBUFLEN, 0);
+	sqe->user_data = encodeUserData(IOURING_RECV, ctx->sockfd);
+}
+
+static void recordRTT(struct ctx *ctx)
+{
+	struct timespec startTs = ctx->ts;
+
+	// Send next ping.
+	sendPing(ctx);
+
+	// Store round-trip time.
+	ctx->rtt[ctx->rtt_index] = diffTimespec(&ctx->ts, &startTs);
+	ctx->rtt_index++;
+}
+
+static void printStats(struct ctx *ctx)
+{
+	double minRTT    = DBL_MAX;
+	double maxRTT    = 0.0;
+	double avgRTT    = 0.0;
+	double stddevRTT = 0.0;
+
+	// Calculate min, max, avg.
+	for (int i = 0; i < ctx->rtt_index; i++) {
+		if (ctx->rtt[i] < minRTT)
+			minRTT = ctx->rtt[i];
+		if (ctx->rtt[i] > maxRTT)
+			maxRTT = ctx->rtt[i];
+
+		avgRTT += ctx->rtt[i];
+	}
+	avgRTT /= ctx->rtt_index;
+
+	// Calculate stddev.
+	for (int i = 0; i < ctx->rtt_index; i++)
+		stddevRTT += fabs(ctx->rtt[i] - avgRTT);
+	stddevRTT /= ctx->rtt_index;
+
+	fprintf(stdout, " rtt(us) min/avg/max/mdev = %.3f/%.3f/%.3f/%.3f\n",
+		minRTT * 1000000, avgRTT * 1000000, maxRTT * 1000000, stddevRTT * 1000000);
+}
+
+static int completion(struct ctx *ctx, struct io_uring_cqe *cqe)
+{
+	char type;
+	int  fd;
+	int  res = cqe->res;
+
+	decodeUserData(cqe->user_data, &type, &fd);
+	if (res < 0) {
+		fprintf(stderr, "unexpected %s failure: (%d) %s\n",
+			opTypeToStr(type), -res, strerror(-res));
+		return -1;
+	}
+
+	switch (type) {
+	case IOURING_SEND:
+		receivePing(ctx);
+		break;
+	case IOURING_RECV:
+		if (res != sizeof(struct timespec)) {
+			fprintf(stderr, "unexpected ping reply len: %d\n", res);
+			abort();
+		}
+
+		if (!ctx->napi_check) {
+			reportNapi(ctx);
+			sendPing(ctx);
+		} else {
+			recordRTT(ctx);
+		}
+
+		--ctx->num_pings;
+		break;
+
+	default:
+		fprintf(stderr, "unexpected %s completion\n",
+			opTypeToStr(type));
+		return -1;
+		break;
+	}
+
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	struct ctx       ctx;
+	struct options   opt;
+	struct __kernel_timespec *tsPtr;
+	struct __kernel_timespec ts;
+	struct io_uring_params params;
+	struct io_uring_napi napi;
+	int flag, ret, af;
+
+	memset(&opt, 0, sizeof(struct options));
+
+	// Process flags.
+	while ((flag = getopt_long(argc, argv, ":hs:bua:n:p:t:6d:", longopts, NULL)) != -1) {
+		switch (flag) {
+		case 'a':
+			strcpy(opt.addr, optarg);
+			break;
+		case 'b':
+			opt.busy_loop = true;
+			break;
+		case 'h':
+			printUsage(argv[0]);
+			exit(0);
+			break;
+		case 'n':
+			opt.num_pings = atoi(optarg) + 1;
+			break;
+		case 'p':
+			strcpy(opt.port, optarg);
+			break;
+		case 's':
+			opt.sq_poll = !!atoi(optarg);
+			break;
+		case 't':
+			opt.timeout = atoi(optarg);
+			break;
+		case 'u':
+			opt.prefer_busy_poll = true;
+			break;
+		case '6':
+			opt.ipv6 = true;
+			break;
+		case 'd':
+			opt.defer_tw = !!atoi(optarg);
+			break;
+		case ':':
+			printError("Missing argument", optopt);
+			printUsage(argv[0]);
+			exit(-1);
+			break;
+		case '?':
+			printError("Unrecognized option", optopt);
+			printUsage(argv[0]);
+			exit(-1);
+			break;
+
+		default:
+			fprintf(stderr, "Fatal: Unexpected case in CmdLineProcessor switch()\n");
+			exit(-1);
+			break;
+		}
+	}
+
+	if (strlen(opt.addr) == 0) {
+		fprintf(stderr, "address option is mandatory\n");
+		printUsage(argv[0]);
+		exit(1);
+	}
+
+	if (opt.ipv6) {
+		af = AF_INET6;
+		ctx.saddr6.sin6_port   = htons(atoi(opt.port));
+		ctx.saddr6.sin6_family = AF_INET6;
+	} else {
+		af = AF_INET;
+		ctx.saddr.sin_port   = htons(atoi(opt.port));
+		ctx.saddr.sin_family = AF_INET;
+	}
+
+	if (opt.ipv6)
+		ret = inet_pton(af, opt.addr, &ctx.saddr6.sin6_addr);
+	else
+		ret = inet_pton(af, opt.addr, &ctx.saddr.sin_addr);
+	if (ret <= 0) {
+		fprintf(stderr, "inet_pton error for %s\n", optarg);
+		printUsage(argv[0]);
+		exit(1);
+	}
+
+	// Connect to server.
+	fprintf(stdout, "Connecting to %s... (port=%s) to send %d pings\n", opt.addr, opt.port, opt.num_pings - 1);
+
+	if ((ctx.sockfd = socket(af, SOCK_DGRAM, 0)) < 0) {
+		fprintf(stderr, "socket() failed: (%d) %s\n", errno, strerror(errno));
+		exit(1);
+	}
+
+	if (opt.ipv6)
+		ret = connect(ctx.sockfd, (struct sockaddr *)&ctx.saddr6, sizeof(struct sockaddr_in6));
+	else
+		ret = connect(ctx.sockfd, (struct sockaddr *)&ctx.saddr, sizeof(struct sockaddr_in));
+	if (ret < 0) {
+		fprintf(stderr, "connect() failed: (%d) %s\n", errno, strerror(errno));
+		exit(1);
+	}
+
+	// Setup ring.
+	memset(&params, 0, sizeof(params));
+	memset(&ts, 0, sizeof(ts));
+	memset(&napi, 0, sizeof(napi));
+
+	params.flags = IORING_SETUP_SINGLE_ISSUER;
+	if (opt.defer_tw) {
+		params.flags |= IORING_SETUP_DEFER_TASKRUN;
+	} else if (opt.sq_poll) {
+		params.flags = IORING_SETUP_SQPOLL;
+		params.sq_thread_idle = 50;
+	} else {
+		params.flags |= IORING_SETUP_COOP_TASKRUN;
+	}
+
+	ret = io_uring_queue_init_params(RINGSIZE, &ctx.ring, &params);
+	if (ret) {
+		fprintf(stderr, "io_uring_queue_init_params() failed: (%d) %s\n",
+			ret, strerror(-ret));
+		exit(1);
+	}
+
+	if (opt.timeout || opt.prefer_busy_poll) {
+		napi.prefer_busy_poll = opt.prefer_busy_poll;
+		napi.busy_poll_to = opt.timeout;
+
+		ret = io_uring_register_napi(&ctx.ring, &napi);
+		if (ret) {
+			fprintf(stderr, "io_uring_register_napi: %d\n", ret);
+			exit(1);
+		}
+	}
+
+	if (opt.busy_loop)
+		tsPtr = &ts;
+	else
+		tsPtr = NULL;
+
+	// Use realtime scheduler.
+	setProcessScheduler();
+
+	// Copy payload.
+	clock_gettime(CLOCK_REALTIME, &ctx.ts);
+
+	// Setup context.
+	ctx.napi_check = false;
+	ctx.buffer_len = sizeof(struct timespec);
+	ctx.num_pings  = opt.num_pings;
+
+	ctx.rtt_index = 0;
+	ctx.rtt = (double *)malloc(sizeof(double) * opt.num_pings);
+	if (!ctx.rtt) {
+		fprintf(stderr, "Cannot allocate results array\n");
+		exit(1);
+	}
+
+	// Send initial message to get napi id.
+	sendPing(&ctx);
+
+	while (ctx.num_pings != 0) {
+		int res;
+		unsigned num_completed = 0;
+		unsigned head;
+		struct io_uring_cqe *cqe;
+
+		do {
+			res = io_uring_submit_and_wait_timeout(&ctx.ring, &cqe, 1, tsPtr, NULL);
+			if (res >= 0)
+				break;
+			else if (res == -ETIME)
+				continue;
+			fprintf(stderr, "submit_and_wait: %d\n", res);
+			exit(1);
+		} while (1);
+
+		io_uring_for_each_cqe(&ctx.ring, head, cqe) {
+			++num_completed;
+			if (completion(&ctx, cqe))
+				goto out;
+		}
+
+		if (num_completed)
+			io_uring_cq_advance(&ctx.ring, num_completed);
+	}
+
+	printStats(&ctx);
+
+out:
+	// Clean up.
+	if (opt.timeout || opt.prefer_busy_poll) {
+		ret = io_uring_unregister_napi(&ctx.ring, &napi);
+		if (ret)
+			fprintf(stderr, "io_uring_unregister_napi: %d\n", ret);
+		if (opt.timeout          != napi.busy_poll_to ||
+		    opt.prefer_busy_poll != napi.prefer_busy_poll) {
+			fprintf(stderr, "Expected busy poll to = %d, got %d\n",
+				opt.timeout, napi.busy_poll_to);
+			fprintf(stderr, "Expected prefer busy poll = %d, got %d\n",
+				opt.prefer_busy_poll, napi.prefer_busy_poll);
+		}
+	} else {
+		ret = io_uring_unregister_napi(&ctx.ring, NULL);
+		if (ret)
+			fprintf(stderr, "io_uring_unregister_napi: %d\n", ret);
+	}
+
+	io_uring_queue_exit(&ctx.ring);
+	free(ctx.rtt);
+	close(ctx.sockfd);
+	return 0;
+}

diff --git a/examples/napi-busy-poll-server.c b/examples/napi-busy-poll-server.c
new file mode 100644
index 0000000..584632a
--- /dev/null
+++ b/examples/napi-busy-poll-server.c

@@ -0,0 +1,450 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Simple ping/pong backend which can use the io_uring NAPI support.
+ *
+ * Needs to be run as root because it sets SCHED_FIFO scheduling class,
+ * but will work without that.
+ *
+ * Example:
+ *
+ * sudo examples/napi-busy-poll-server -l -a 192.168.2.2 -n100000 \
+ *	-p4444 -t10 -b -u
+ *
+ * will respond to 100k packages, using NAPI.
+ */
+#include <ctype.h>
+#include <errno.h>
+#include <getopt.h>
+#include <liburing.h>
+#include <math.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <time.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <netdb.h>
+#include <netinet/in.h>
+
+#define MAXBUFLEN 100
+#define PORTNOLEN 10
+#define ADDRLEN   80
+#define RINGSIZE  1024
+
+#define printable(ch) (isprint((unsigned char)ch) ? ch : '#')
+
+enum {
+	IOURING_RECV,
+	IOURING_SEND,
+	IOURING_RECVMSG,
+	IOURING_SENDMSG
+};
+
+struct ctx
+{
+	struct io_uring     ring;
+	union {
+		struct sockaddr_in6 saddr6;
+		struct sockaddr_in saddr;
+	};
+	struct iovec        iov;
+	struct msghdr       msg;
+
+	int sockfd;
+	int buffer_len;
+	int num_pings;
+	bool napi_check;
+
+	union {
+		char buffer[MAXBUFLEN];
+		struct timespec ts;
+	};
+};
+
+struct options
+{
+	int  num_pings;
+	__u32 timeout;
+
+	bool listen;
+	bool defer_tw;
+	bool sq_poll;
+	bool busy_loop;
+	bool prefer_busy_poll;
+	bool ipv6;
+
+	char port[PORTNOLEN];
+	char addr[ADDRLEN];
+};
+
+static struct options opt;
+
+static struct option longopts[] =
+{
+	{"address"  , 1, NULL, 'a'},
+	{"busy"     , 0, NULL, 'b'},
+	{"help"     , 0, NULL, 'h'},
+	{"listen"   , 0, NULL, 'l'},
+	{"num_pings", 1, NULL, 'n'},
+	{"port"     , 1, NULL, 'p'},
+	{"prefer"   , 1, NULL, 'u'},
+	{"sqpoll"   , 0, NULL, 's'},
+	{"timeout"  , 1, NULL, 't'},
+	{NULL       , 0, NULL,  0 }
+};
+
+static void printUsage(const char *name)
+{
+	fprintf(stderr,
+	"Usage: %s [-l|--listen] [-a|--address ip_address] [-p|--port port-no] [-s|--sqpoll]"
+	" [-b|--busy] [-n|--num pings] [-t|--timeout busy-poll-timeout] [-u|--prefer] [-6] [-h|--help]\n"
+	" --listen\n"
+	"-l        : Server mode\n"
+	"--address\n"
+	"-a        : remote or local ipv6 address\n"
+	"--busy\n"
+	"-b        : busy poll io_uring instead of blocking.\n"
+	"--num_pings\n"
+	"-n        : number of pings\n"
+	"--port\n"
+	"-p        : port\n"
+	"--sqpoll\n"
+	"-s        : Configure io_uring to use SQPOLL thread\n"
+	"--timeout\n"
+	"-t        : Configure NAPI busy poll timeout"
+	"--prefer\n"
+	"-u        : prefer NAPI busy poll\n"
+	"-6        : use IPV6\n"
+	"--help\n"
+	"-h        : Display this usage message\n\n",
+	name);
+}
+
+static void printError(const char *msg, int opt)
+{
+	if (msg && opt)
+		fprintf(stderr, "%s (-%c)\n", msg, printable(opt));
+}
+
+static void setProcessScheduler(void)
+{
+	struct sched_param param;
+
+	param.sched_priority = sched_get_priority_max(SCHED_FIFO);
+	if (sched_setscheduler(0, SCHED_FIFO, &param) < 0)
+		fprintf(stderr, "sched_setscheduler() failed: (%d) %s\n",
+			errno, strerror(errno));
+}
+
+static uint64_t encodeUserData(char type, int fd)
+{
+	return (uint32_t)fd | ((__u64)type << 56);
+}
+
+static void decodeUserData(uint64_t data, char *type, int *fd)
+{
+	*type = data >> 56;
+	*fd   = data & 0xffffffffU;
+}
+
+static const char *opTypeToStr(char type)
+{
+	const char *res;
+
+	switch (type) {
+	case IOURING_RECV:
+		res = "IOURING_RECV";
+		break;
+	case IOURING_SEND:
+		res = "IOURING_SEND";
+		break;
+	case IOURING_RECVMSG:
+		res = "IOURING_RECVMSG";
+		break;
+	case IOURING_SENDMSG:
+		res = "IOURING_SENDMSG";
+		break;
+	default:
+		res = "Unknown";
+	}
+
+	return res;
+}
+
+static void reportNapi(struct ctx *ctx)
+{
+	unsigned int napi_id = 0;
+	socklen_t len = sizeof(napi_id);
+
+	getsockopt(ctx->sockfd, SOL_SOCKET, SO_INCOMING_NAPI_ID, &napi_id, &len);
+	if (napi_id)
+		printf(" napi id: %d\n", napi_id);
+	else
+		printf(" unassigned napi id\n");
+
+	ctx->napi_check = true;
+}
+
+static void sendPing(struct ctx *ctx)
+{
+	struct io_uring_sqe *sqe = io_uring_get_sqe(&ctx->ring);
+
+	io_uring_prep_sendmsg(sqe, ctx->sockfd, &ctx->msg, 0);
+	sqe->user_data = encodeUserData(IOURING_SENDMSG, ctx->sockfd);
+}
+
+static void receivePing(struct ctx *ctx)
+{
+	struct io_uring_sqe *sqe;
+
+	bzero(&ctx->msg, sizeof(struct msghdr));
+	if (opt.ipv6) {
+		ctx->msg.msg_name    = &ctx->saddr6;
+		ctx->msg.msg_namelen = sizeof(struct sockaddr_in6);
+	} else {
+		ctx->msg.msg_name    = &ctx->saddr;
+		ctx->msg.msg_namelen = sizeof(struct sockaddr_in);
+	}
+	ctx->iov.iov_base    = ctx->buffer;
+	ctx->iov.iov_len     = MAXBUFLEN;
+	ctx->msg.msg_iov     = &ctx->iov;
+	ctx->msg.msg_iovlen  = 1;
+
+	sqe = io_uring_get_sqe(&ctx->ring);
+	io_uring_prep_recvmsg(sqe, ctx->sockfd, &ctx->msg, 0);
+	sqe->user_data = encodeUserData(IOURING_RECVMSG, ctx->sockfd);
+}
+
+static void completion(struct ctx *ctx, struct io_uring_cqe *cqe)
+{
+	char type;
+	int  fd;
+	int  res = cqe->res;
+
+	decodeUserData(cqe->user_data, &type, &fd);
+	if (res < 0) {
+		fprintf(stderr, "unexpected %s failure: (%d) %s\n",
+			opTypeToStr(type), -res, strerror(-res));
+		abort();
+	}
+
+	switch (type) {
+	case IOURING_SENDMSG:
+		receivePing(ctx);
+		--ctx->num_pings;
+		break;
+	case IOURING_RECVMSG:
+		ctx->iov.iov_len = res;
+		sendPing(ctx);
+		if (!ctx->napi_check)
+			reportNapi(ctx);
+		break;
+	default:
+		fprintf(stderr, "unexpected %s completion\n",
+			opTypeToStr(type));
+		abort();
+		break;
+	}
+}
+
+int main(int argc, char *argv[])
+{
+	int flag;
+	struct ctx       ctx;
+	struct __kernel_timespec *tsPtr;
+	struct __kernel_timespec ts;
+	struct io_uring_params params;
+	struct io_uring_napi napi;
+	int ret, af;
+
+	memset(&opt, 0, sizeof(struct options));
+
+	// Process flags.
+	while ((flag = getopt_long(argc, argv, ":lhs:bua:n:p:t:6d:", longopts, NULL)) != -1) {
+		switch (flag) {
+		case 'a':
+			strcpy(opt.addr, optarg);
+			break;
+		case 'b':
+			opt.busy_loop = true;
+			break;
+		case 'h':
+			printUsage(argv[0]);
+			exit(0);
+			break;
+		case 'l':
+			opt.listen = true;
+			break;
+		case 'n':
+			opt.num_pings = atoi(optarg) + 1;
+			break;
+		case 'p':
+			strcpy(opt.port, optarg);
+			break;
+		case 's':
+			opt.sq_poll = !!atoi(optarg);
+			break;
+		case 't':
+			opt.timeout = atoi(optarg);
+			break;
+		case 'u':
+			opt.prefer_busy_poll = true;
+			break;
+		case '6':
+			opt.ipv6 = true;
+			break;
+		case 'd':
+			opt.defer_tw = !!atoi(optarg);
+			break;
+		case ':':
+			printError("Missing argument", optopt);
+			printUsage(argv[0]);
+			exit(-1);
+			break;
+		case '?':
+			printError("Unrecognized option", optopt);
+			printUsage(argv[0]);
+			exit(-1);
+			break;
+
+		default:
+			fprintf(stderr, "Fatal: Unexpected case in CmdLineProcessor switch()\n");
+			exit(-1);
+			break;
+		}
+	}
+
+	if (strlen(opt.addr) == 0) {
+		fprintf(stderr, "address option is mandatory\n");
+		printUsage(argv[0]);
+		exit(1);
+	}
+
+	if (opt.ipv6) {
+		af = AF_INET6;
+		ctx.saddr6.sin6_port   = htons(atoi(opt.port));
+		ctx.saddr6.sin6_family = AF_INET6;
+	} else {
+		af = AF_INET;
+		ctx.saddr.sin_port   = htons(atoi(opt.port));
+		ctx.saddr.sin_family = AF_INET;
+	}
+
+	if (opt.ipv6)
+		ret = inet_pton(AF_INET6, opt.addr, &ctx.saddr6.sin6_addr);
+	else
+		ret = inet_pton(AF_INET, opt.addr, &ctx.saddr.sin_addr);
+	if (ret <= 0) {
+		fprintf(stderr, "inet_pton error for %s\n", optarg);
+		printUsage(argv[0]);
+		exit(1);
+	}
+
+	// Connect to server.
+	fprintf(stdout, "Listening %s : %s...\n", opt.addr, opt.port);
+
+	if ((ctx.sockfd = socket(af, SOCK_DGRAM, 0)) < 0) {
+		fprintf(stderr, "socket() failed: (%d) %s\n", errno, strerror(errno));
+		exit(1);
+	}
+
+	if (opt.ipv6)
+		ret = bind(ctx.sockfd, (struct sockaddr *)&ctx.saddr6, sizeof(struct sockaddr_in6));
+	else
+		ret = bind(ctx.sockfd, (struct sockaddr *)&ctx.saddr, sizeof(struct sockaddr_in));
+	if (ret < 0) {
+		fprintf(stderr, "bind() failed: (%d) %s\n", errno, strerror(errno));
+		exit(1);
+	}
+
+	// Setup ring.
+	memset(&params, 0, sizeof(params));
+	memset(&ts, 0, sizeof(ts));
+	memset(&napi, 0, sizeof(napi));
+
+	params.flags = IORING_SETUP_SINGLE_ISSUER;
+	if (opt.defer_tw) {
+		params.flags |= IORING_SETUP_DEFER_TASKRUN;
+	} else if (opt.sq_poll) {
+		params.flags = IORING_SETUP_SQPOLL;
+		params.sq_thread_idle = 50;
+	} else {
+		params.flags |= IORING_SETUP_COOP_TASKRUN;
+	}
+
+	ret = io_uring_queue_init_params(RINGSIZE, &ctx.ring, &params);
+	if (ret) {
+		fprintf(stderr, "io_uring_queue_init_params() failed: (%d) %s\n",
+			ret, strerror(-ret));
+		exit(1);
+	}
+
+	if (opt.timeout || opt.prefer_busy_poll) {
+		napi.prefer_busy_poll = opt.prefer_busy_poll;
+		napi.busy_poll_to = opt.timeout;
+
+		ret = io_uring_register_napi(&ctx.ring, &napi);
+		if (ret) {
+			fprintf(stderr, "io_uring_register_napi: %d\n", ret);
+			exit(1);
+		}
+	}
+
+	if (opt.busy_loop)
+		tsPtr = &ts;
+	else
+		tsPtr = NULL;
+
+	// Use realtime scheduler.
+	setProcessScheduler();
+
+	// Copy payload.
+	clock_gettime(CLOCK_REALTIME, &ctx.ts);
+
+	// Setup context.
+	ctx.napi_check = false;
+	ctx.buffer_len = sizeof(struct timespec);
+	ctx.num_pings  = opt.num_pings;
+
+	// Receive initial message to get napi id.
+	receivePing(&ctx);
+
+	while (ctx.num_pings != 0) {
+		int res;
+		unsigned int num_completed = 0;
+		unsigned int head;
+		struct io_uring_cqe *cqe;
+
+		do {
+			res = io_uring_submit_and_wait_timeout(&ctx.ring, &cqe, 1, tsPtr, NULL);
+			if (res >= 0)
+				break;
+			else if (res == -ETIME)
+				continue;
+			fprintf(stderr, "submit_and_wait: %d\n", res);
+			exit(1);
+		} while (1);
+
+		io_uring_for_each_cqe(&ctx.ring, head, cqe) {
+			++num_completed;
+			completion(&ctx, cqe);
+		}
+
+		if (num_completed)
+			io_uring_cq_advance(&ctx.ring, num_completed);
+	}
+
+	// Clean up.
+	if (opt.timeout || opt.prefer_busy_poll) {
+		ret = io_uring_unregister_napi(&ctx.ring, &napi);
+		if (ret)
+			fprintf(stderr, "io_uring_unregister_napi: %d\n", ret);
+	}
+
+	io_uring_queue_exit(&ctx.ring);
+	close(ctx.sockfd);
+	return 0;
+}

diff --git a/examples/poll-bench.c b/examples/poll-bench.c
new file mode 100644
index 0000000..e3c0052
--- /dev/null
+++ b/examples/poll-bench.c

@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: MIT */
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <poll.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+
+#include "liburing.h"
+
+static char buf[4096];
+static unsigned long runtime_ms = 10000;
+
+static unsigned long gettimeofday_ms(void)
+{
+	struct timeval tv;
+
+	gettimeofday(&tv, NULL);
+	return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+int main(void)
+{
+	unsigned long tstop;
+	unsigned long nr_reqs = 0;
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	struct io_uring ring;
+	int pipe1[2];
+	int ret, i, qd = 32;
+
+	if (pipe(pipe1) != 0) {
+		perror("pipe");
+		return 1;
+	}
+
+	ret = io_uring_queue_init(1024, &ring, IORING_SETUP_SINGLE_ISSUER);
+	if (ret == -EINVAL) {
+		fprintf(stderr, "can't single\n");
+		ret = io_uring_queue_init(1024, &ring, 0);
+	}
+	if (ret) {
+		fprintf(stderr, "child: ring setup failed: %d\n", ret);
+		return 1;
+	}
+
+	ret = io_uring_register_files(&ring, pipe1, 2);
+	if (ret < 0) {
+		fprintf(stderr, "io_uring_register_files failed\n");
+		return 1;
+	}
+
+	ret = io_uring_register_ring_fd(&ring);
+	if (ret < 0) {
+		fprintf(stderr, "io_uring_register_ring_fd failed\n");
+		return 1;
+	}
+
+	tstop = gettimeofday_ms() + runtime_ms;
+	do {
+		for (i = 0; i < qd; i++) {
+			sqe = io_uring_get_sqe(&ring);
+			io_uring_prep_poll_add(sqe, 0, POLLIN);
+			sqe->flags |= IOSQE_FIXED_FILE;
+			sqe->user_data = 1;
+		}
+
+		ret = io_uring_submit(&ring);
+		if (ret != qd) {
+			fprintf(stderr, "child: sqe submit failed: %d\n", ret);
+			return 1;
+		}
+
+		ret = write(pipe1[1], buf, 1);
+		if (ret != 1) {
+			fprintf(stderr, "write failed %i\n", errno);
+			return 1;
+		}
+		ret = read(pipe1[0], buf, 1);
+		if (ret != 1) {
+			fprintf(stderr, "read failed %i\n", errno);
+			return 1;
+		}
+
+		for (i = 0; i < qd; i++) {
+			ret = io_uring_wait_cqe(&ring, &cqe);
+			if (ret < 0) {
+				fprintf(stderr, "child: wait completion %d\n", ret);
+				break;
+			}
+			io_uring_cqe_seen(&ring, cqe);
+			nr_reqs++;
+		}
+	} while (gettimeofday_ms() < tstop);
+
+	fprintf(stderr, "requests/s: %lu\n", nr_reqs * 1000UL / runtime_ms);
+	return 0;
+}

diff --git a/examples/proxy.c b/examples/proxy.c
new file mode 100644
index 0000000..91c7f12
--- /dev/null
+++ b/examples/proxy.c

@@ -0,0 +1,2528 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Sample program that can act either as a packet sink, where it just receives
+ * packets and doesn't do anything with them, or it can act as a proxy where it
+ * receives packets and then sends them to a new destination. The proxy can
+ * be unidirectional (-B0), or bi-direction (-B1).
+ * 
+ * Examples:
+ *
+ * Act as a proxy, listening on port 4444, and send data to 192.168.2.6 on port
+ * 4445. Use multishot receive, DEFER_TASKRUN, and fixed files
+ *
+ * 	./proxy -m1 -r4444 -H 192.168.2.6 -p4445
+ *
+ * Same as above, but utilize send bundles (-C1, requires -u1 send_ring) as well
+ * with ring provided send buffers, and recv bundles (-c1).
+ *
+ * 	./proxy -m1 -c1 -u1 -C1 -r4444 -H 192.168.2.6 -p4445
+ *
+ * Act as a bi-directional proxy, listening on port 8888, and send data back
+ * and forth between host and 192.168.2.6 on port 22. Use multishot receive,
+ * DEFER_TASKRUN, fixed files, and buffers of size 1500.
+ *
+ * 	./proxy -m1 -B1 -b1500 -r8888 -H 192.168.2.6 -p22
+ *
+ * Act a sink, listening on port 4445, using multishot receive, DEFER_TASKRUN,
+ * and fixed files:
+ *
+ * 	./proxy -m1 -s1 -r4445
+ *
+ * Run with -h to see a list of options, and their defaults.
+ *
+ * (C) 2024 Jens Axboe <[email protected]>
+ *
+ */
+#include <fcntl.h>
+#include <stdint.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <arpa/inet.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <linux/mman.h>
+#include <locale.h>
+#include <assert.h>
+#include <pthread.h>
+#include <liburing.h>
+
+#include "proxy.h"
+#include "helpers.h"
+
+/*
+ * Will go away once/if bundles are upstreamed and we put the generic
+ * definitions in the kernel header.
+ */
+#ifndef IORING_RECVSEND_BUNDLE
+#define IORING_RECVSEND_BUNDLE		(1U << 4)
+#endif
+#ifndef IORING_FEAT_SEND_BUF_SELECT
+#define IORING_FEAT_SEND_BUF_SELECT	(1U << 14)
+#endif
+
+static int cur_bgid = 1;
+static int nr_conns;
+static int open_conns;
+static long page_size;
+
+static unsigned long event_loops;
+static unsigned long events;
+
+static int recv_mshot = 1;
+static int sqpoll;
+static int defer_tw = 1;
+static int is_sink;
+static int fixed_files = 1;
+static char *host = "192.168.3.2";
+static int send_port = 4445;
+static int receive_port = 4444;
+static int buf_size = 32;
+static int buf_ring_inc;
+static int bidi;
+static int ipv6;
+static int napi;
+static int napi_timeout;
+static int wait_batch = 1;
+static int wait_usec = 1000000;
+static int rcv_msg;
+static int snd_msg;
+static int snd_zc;
+static int send_ring = -1;
+static int snd_bundle;
+static int rcv_bundle;
+static int use_huge;
+static int ext_stat;
+static int verbose;
+
+static int nr_bufs = 256;
+static int br_mask;
+
+static int ring_size = 128;
+
+static pthread_mutex_t thread_lock;
+static struct timeval last_housekeeping;
+
+/*
+ * For sendmsg/recvmsg. recvmsg just has a single vec, sendmsg will have
+ * two vecs - one that is currently submitted and being sent, and one that
+ * is being prepared. When a new sendmsg is issued, we'll swap which one we
+ * use. For send, even though we don't pass in the iovec itself, we use the
+ * vec to serialize the sends to avoid reordering.
+ */
+struct msg_vec {
+	struct iovec *iov;
+	/* length of allocated vec */
+	int vec_size;
+	/* length currently being used */
+	int iov_len;
+	/* only for send, current index we're processing */
+	int cur_iov;
+};
+
+struct io_msg {
+	struct msghdr msg;
+	struct msg_vec vecs[2];
+	/* current msg_vec being prepared */
+	int vec_index;
+};
+
+/*
+ * Per socket stats per connection. For bi-directional, we'll have both
+ * sends and receives on each socket, this helps track them separately.
+ * For sink or one directional, each of the two stats will be only sends
+ * or receives, not both.
+ */
+struct conn_dir {
+	int index;
+
+	int pending_shutdown;
+	int pending_send;
+	int pending_recv;
+
+	int snd_notif;
+
+	int out_buffers;
+
+	int rcv, rcv_shrt, rcv_enobufs, rcv_mshot;
+	int snd, snd_shrt, snd_enobufs, snd_busy, snd_mshot;
+
+	int snd_next_bid;
+	int rcv_next_bid;
+
+	int *rcv_bucket;
+	int *snd_bucket;
+
+	unsigned long in_bytes, out_bytes;
+
+	/* only ever have a single recv pending */
+	struct io_msg io_rcv_msg;
+
+	/* one send that is inflight, and one being prepared for the next one */
+	struct io_msg io_snd_msg;
+};
+
+enum {
+	CONN_F_STARTED		= 1,
+	CONN_F_DISCONNECTING	= 2,
+	CONN_F_DISCONNECTED	= 4,
+	CONN_F_PENDING_SHUTDOWN	= 8,
+	CONN_F_STATS_SHOWN	= 16,
+	CONN_F_END_TIME		= 32,
+	CONN_F_REAPED		= 64,
+};
+
+/*
+ * buffer ring belonging to a connection
+ */
+struct conn_buf_ring {
+	struct io_uring_buf_ring *br;
+	void *buf;
+	int bgid;
+};
+
+struct conn {
+	struct io_uring ring;
+
+	/* receive side buffer ring, new data arrives here */
+	struct conn_buf_ring in_br;
+	/* if send_ring is used, outgoing data to send */
+	struct conn_buf_ring out_br;
+
+	int tid;
+	int in_fd, out_fd;
+	int pending_cancels;
+	int flags;
+
+	struct conn_dir cd[2];
+
+	struct timeval start_time, end_time;
+
+	union {
+		struct sockaddr_in addr;
+		struct sockaddr_in6 addr6;
+	};
+
+	pthread_t thread;
+	pthread_barrier_t startup_barrier;
+};
+
+#define MAX_CONNS	1024
+static struct conn conns[MAX_CONNS];
+
+#define vlog(str, ...) do {						\
+	if (verbose)							\
+		printf(str, ##__VA_ARGS__);				\
+} while (0)
+
+static int prep_next_send(struct io_uring *ring, struct conn *c,
+			  struct conn_dir *cd, int fd);
+static void *thread_main(void *data);
+
+static struct conn *cqe_to_conn(struct io_uring_cqe *cqe)
+{
+	struct userdata ud = { .val = cqe->user_data };
+
+	return &conns[ud.op_tid & TID_MASK];
+}
+
+static struct conn_dir *cqe_to_conn_dir(struct conn *c,
+					struct io_uring_cqe *cqe)
+{
+	int fd = cqe_to_fd(cqe);
+
+	return &c->cd[fd != c->in_fd];
+}
+
+static int other_dir_fd(struct conn *c, int fd)
+{
+	if (c->in_fd == fd)
+		return c->out_fd;
+	return c->in_fd;
+}
+
+/* currently active msg_vec */
+static struct msg_vec *msg_vec(struct io_msg *imsg)
+{
+	return &imsg->vecs[imsg->vec_index];
+}
+
+static struct msg_vec *snd_msg_vec(struct conn_dir *cd)
+{
+	return msg_vec(&cd->io_snd_msg);
+}
+
+/*
+ * Goes from accept new connection -> create socket, connect to end
+ * point, prepare recv, on receive do send (unless sink). If either ends
+ * disconnects, we transition to shutdown and then close.
+ */
+enum {
+	__ACCEPT	= 1,
+	__SOCK		= 2,
+	__CONNECT	= 3,
+	__RECV		= 4,
+	__RECVMSG	= 5,
+	__SEND		= 6,
+	__SENDMSG	= 7,
+	__SHUTDOWN	= 8,
+	__CANCEL	= 9,
+	__CLOSE		= 10,
+	__FD_PASS	= 11,
+	__NOP		= 12,
+	__STOP		= 13,
+};
+
+struct error_handler {
+	const char *name;
+	int (*error_fn)(struct error_handler *, struct io_uring *, struct io_uring_cqe *);
+};
+
+static int recv_error(struct error_handler *err, struct io_uring *ring,
+		      struct io_uring_cqe *cqe);
+static int send_error(struct error_handler *err, struct io_uring *ring,
+		      struct io_uring_cqe *cqe);
+
+static int default_error(struct error_handler *err,
+			 struct io_uring __attribute__((__unused__)) *ring,
+			 struct io_uring_cqe *cqe)
+{
+	struct conn *c = cqe_to_conn(cqe);
+
+	fprintf(stderr, "%d: %s error %s\n", c->tid, err->name, strerror(-cqe->res));
+	fprintf(stderr, "fd=%d, bid=%d\n", cqe_to_fd(cqe), cqe_to_bid(cqe));
+	return 1;
+}
+
+/*
+ * Move error handling out of the normal handling path, cleanly separating
+ * them. If an opcode doesn't need any error handling, set it to NULL. If
+ * it wants to stop the connection at that point and not do anything else,
+ * then the default handler can be used. Only receive has proper error
+ * handling, as we can get -ENOBUFS which is not a fatal condition. It just
+ * means we need to wait on buffer replenishing before re-arming the receive.
+ */
+static struct error_handler error_handlers[] = {
+	{ .name = "NULL",	.error_fn = NULL, },
+	{ .name = "ACCEPT",	.error_fn = default_error, },
+	{ .name = "SOCK",	.error_fn = default_error, },
+	{ .name = "CONNECT",	.error_fn = default_error, },
+	{ .name = "RECV",	.error_fn = recv_error, },
+	{ .name = "RECVMSG",	.error_fn = recv_error, },
+	{ .name = "SEND",	.error_fn = send_error, },
+	{ .name = "SENDMSG",	.error_fn = send_error, },
+	{ .name = "SHUTDOWN",	.error_fn = NULL, },
+	{ .name = "CANCEL",	.error_fn = NULL, },
+	{ .name = "CLOSE",	.error_fn = NULL, },
+	{ .name = "FD_PASS",	.error_fn = default_error, },
+	{ .name = "NOP",	.error_fn = NULL, },
+	{ .name = "STOP",	.error_fn = default_error, },
+};
+
+static void free_buffer_ring(struct io_uring *ring, struct conn_buf_ring *cbr)
+{
+	if (!cbr->br)
+		return;
+
+	io_uring_free_buf_ring(ring, cbr->br, nr_bufs, cbr->bgid);
+	cbr->br = NULL;
+	if (use_huge)
+		munmap(cbr->buf, buf_size * nr_bufs);
+	else
+		free(cbr->buf);
+}
+
+static void free_buffer_rings(struct io_uring *ring, struct conn *c)
+{
+	free_buffer_ring(ring, &c->in_br);
+	free_buffer_ring(ring, &c->out_br);
+}
+
+/*
+ * Setup a ring provided buffer ring for each connection. If we get -ENOBUFS
+ * on receive, for multishot receive we'll wait for half the provided buffers
+ * to be returned by pending sends, then re-arm the multishot receive. If
+ * this happens too frequently (see enobufs= stat), then the ring size is
+ * likely too small. Use -nXX to make it bigger. See recv_enobufs().
+ *
+ * The alternative here would be to use the older style provided buffers,
+ * where you simply setup a buffer group and use SQEs with
+ * io_urign_prep_provide_buffers() to add to the pool. But that approach is
+ * slower and has been deprecated by using the faster ring provided buffers.
+ */
+static int setup_recv_ring(struct io_uring *ring, struct conn *c)
+{
+	struct conn_buf_ring *cbr = &c->in_br;
+	int br_flags = 0;
+	int ret, i;
+	size_t len;
+	void *ptr;
+
+	len = buf_size * nr_bufs;
+	if (use_huge) {
+		cbr->buf = mmap(NULL, len, PROT_READ|PROT_WRITE,
+				MAP_PRIVATE|MAP_HUGETLB|MAP_HUGE_2MB|MAP_ANONYMOUS,
+				-1, 0);
+		if (cbr->buf == MAP_FAILED) {
+			perror("mmap");
+			return 1;
+		}
+	} else {
+		if (posix_memalign(&cbr->buf, page_size, len)) {
+			perror("posix memalign");
+			return 1;
+		}
+	}
+	if (buf_ring_inc)
+		br_flags = IOU_PBUF_RING_INC;
+	cbr->br = io_uring_setup_buf_ring(ring, nr_bufs, cbr->bgid, br_flags, &ret);
+	if (!cbr->br) {
+		fprintf(stderr, "Buffer ring register failed %d\n", ret);
+		return 1;
+	}
+
+	ptr = cbr->buf;
+	for (i = 0; i < nr_bufs; i++) {
+		vlog("%d: add bid %d, data %p\n", c->tid, i, ptr);
+		io_uring_buf_ring_add(cbr->br, ptr, buf_size, i, br_mask, i);
+		ptr += buf_size;
+	}
+	io_uring_buf_ring_advance(cbr->br, nr_bufs);
+	printf("%d: recv buffer ring bgid %d, bufs %d\n", c->tid, cbr->bgid, nr_bufs);
+	return 0;
+}
+
+/*
+ * If 'send_ring' is used and the kernel supports it, we can skip serializing
+ * sends as the data will be ordered regardless. This reduces the send handling
+ * complexity, as buffers can always be added to the outgoing ring and will be
+ * processed in the order in which they were added.
+ */
+static int setup_send_ring(struct io_uring *ring, struct conn *c)
+{
+	struct conn_buf_ring *cbr = &c->out_br;
+	int br_flags = 0;
+	int ret;
+
+	if (buf_ring_inc)
+		br_flags = IOU_PBUF_RING_INC;
+	cbr->br = io_uring_setup_buf_ring(ring, nr_bufs, cbr->bgid, br_flags, &ret);
+	if (!cbr->br) {
+		fprintf(stderr, "Buffer ring register failed %d\n", ret);
+		return 1;
+	}
+
+	printf("%d: send buffer ring bgid %d, bufs %d\n", c->tid, cbr->bgid, nr_bufs);
+	return 0;
+}
+
+static int setup_send_zc(struct io_uring *ring, struct conn *c)
+{
+	struct iovec *iovs;
+	void *buf;
+	int i, ret;
+
+	if (snd_msg)
+		return 0;
+
+	buf = c->in_br.buf;
+	iovs = calloc(nr_bufs, sizeof(struct iovec));
+	for (i = 0; i < nr_bufs; i++) {
+		iovs[i].iov_base = buf;
+		iovs[i].iov_len = buf_size;
+		buf += buf_size;
+	}
+
+	ret = io_uring_register_buffers(ring, iovs, nr_bufs);
+	if (ret) {
+		fprintf(stderr, "failed registering buffers: %d\n", ret);
+		free(iovs);
+		return ret;
+	}
+	free(iovs);
+	return 0;
+}
+
+/*
+ * Setup an input and output buffer ring.
+ */
+static int setup_buffer_rings(struct io_uring *ring, struct conn *c)
+{
+	int ret;
+
+	/* no locking needed on cur_bgid, parent serializes setup */
+	c->in_br.bgid = cur_bgid++;
+	c->out_br.bgid = cur_bgid++;
+	c->out_br.br = NULL;
+
+	ret = setup_recv_ring(ring, c);
+	if (ret)
+		return ret;
+	if (is_sink)
+		return 0;
+	if (snd_zc) {
+		ret = setup_send_zc(ring, c);
+		if (ret)
+			return ret;
+	}
+	if (send_ring) {
+		ret = setup_send_ring(ring, c);
+		if (ret) {
+			free_buffer_ring(ring, &c->in_br);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+struct bucket_stat {
+	int nr_packets;
+	int count;
+};
+
+static int stat_cmp(const void *p1, const void *p2)
+{
+	const struct bucket_stat *b1 = p1;
+	const struct bucket_stat *b2 = p2;
+
+	if (b1->count < b2->count)
+		return 1;
+	else if (b1->count > b2->count)
+		return -1;
+	return 0;
+}
+
+static void show_buckets(struct conn_dir *cd)
+{
+	unsigned long snd_total, rcv_total;
+	struct bucket_stat *rstat, *sstat;
+	int i;
+
+	if (!cd->rcv_bucket || !cd->snd_bucket)
+		return;
+
+	rstat = calloc(nr_bufs + 1, sizeof(struct bucket_stat));
+	sstat = calloc(nr_bufs + 1, sizeof(struct bucket_stat));
+
+	snd_total = rcv_total = 0;
+	for (i = 0; i <= nr_bufs; i++) {
+		snd_total += cd->snd_bucket[i];
+		sstat[i].nr_packets = i;
+		sstat[i].count = cd->snd_bucket[i];
+		rcv_total += cd->rcv_bucket[i];
+		rstat[i].nr_packets = i;
+		rstat[i].count = cd->rcv_bucket[i];
+	}
+
+	if (!snd_total && !rcv_total) {
+		free(sstat);
+		free(rstat);
+	}
+	if (snd_total)
+		qsort(sstat, nr_bufs, sizeof(struct bucket_stat), stat_cmp);
+	if (rcv_total)
+		qsort(rstat, nr_bufs, sizeof(struct bucket_stat), stat_cmp);
+
+	printf("\t Packets per recv/send:\n");
+	for (i = 0; i <= nr_bufs; i++) {
+		double snd_prc = 0.0, rcv_prc = 0.0;
+		if (!rstat[i].count && !sstat[i].count)
+			continue;
+		if (rstat[i].count)
+			rcv_prc = 100.0 * (rstat[i].count / (double) rcv_total);
+		if (sstat[i].count)
+			snd_prc = 100.0 * (sstat[i].count / (double) snd_total);
+		printf("\t bucket(%3d/%3d): rcv=%u (%.2f%%) snd=%u (%.2f%%)\n",
+				rstat[i].nr_packets, sstat[i].nr_packets,
+				rstat[i].count, rcv_prc,
+				sstat[i].count, snd_prc);
+	}
+
+	free(sstat);
+	free(rstat);
+}
+
+static void __show_stats(struct conn *c)
+{
+	unsigned long msec, qps;
+	unsigned long bytes, bw;
+	struct conn_dir *cd;
+	int i;
+
+	if (c->flags & (CONN_F_STATS_SHOWN | CONN_F_REAPED))
+		return;
+	if (!(c->flags & CONN_F_STARTED))
+		return;
+
+	if (!(c->flags & CONN_F_END_TIME))
+		gettimeofday(&c->end_time, NULL);
+
+	msec = (c->end_time.tv_sec - c->start_time.tv_sec) * 1000;
+	msec += (c->end_time.tv_usec - c->start_time.tv_usec) / 1000;
+
+	qps = 0;
+	for (i = 0; i < 2; i++)
+		qps += c->cd[i].rcv + c->cd[i].snd;
+
+	if (!qps)
+		return;
+
+	if (msec)
+		qps = (qps * 1000) / msec;
+
+	printf("Conn %d/(in_fd=%d, out_fd=%d): qps=%lu, msec=%lu\n", c->tid,
+					c->in_fd, c->out_fd, qps, msec);
+
+	bytes = 0;
+	for (i = 0; i < 2; i++) {
+		cd = &c->cd[i];
+
+		if (!cd->in_bytes && !cd->out_bytes && !cd->snd && !cd->rcv)
+			continue;
+
+		bytes += cd->in_bytes;
+		bytes += cd->out_bytes;
+
+		printf("\t%3d: rcv=%u (short=%u, enobufs=%d), snd=%u (short=%u,"
+			" busy=%u, enobufs=%d)\n", i, cd->rcv, cd->rcv_shrt,
+			cd->rcv_enobufs, cd->snd, cd->snd_shrt, cd->snd_busy,
+			cd->snd_enobufs);
+		printf("\t   : in_bytes=%lu (Kb %lu), out_bytes=%lu (Kb %lu)\n",
+			cd->in_bytes, cd->in_bytes >> 10,
+			cd->out_bytes, cd->out_bytes >> 10);
+		printf("\t   : mshot_rcv=%d, mshot_snd=%d\n", cd->rcv_mshot,
+			cd->snd_mshot);
+		show_buckets(cd);
+
+	}
+	if (msec) {
+		bytes *= 8UL;
+		bw = bytes / 1000;
+		bw /= msec;
+		printf("\tBW=%'luMbit\n", bw);
+	}
+
+	c->flags |= CONN_F_STATS_SHOWN;
+}
+
+static void show_stats(void)
+{
+	float events_per_loop = 0.0;
+	static int stats_shown;
+	int i;
+
+	if (stats_shown)
+		return;
+
+	if (events)
+		events_per_loop = (float) events / (float) event_loops;
+
+	printf("Event loops: %lu, events %lu, events per loop %.2f\n", event_loops,
+							events, events_per_loop);
+
+	for (i = 0; i < MAX_CONNS; i++) {
+		struct conn *c = &conns[i];
+
+		__show_stats(c);
+	}
+	stats_shown = 1;
+}
+
+static void sig_int(int __attribute__((__unused__)) sig)
+{
+	printf("\n");
+	show_stats();
+	exit(1);
+}
+
+/*
+ * Special cased for SQPOLL only, as we don't control when SQEs are consumed if
+ * that is used. Hence we may need to wait for the SQPOLL thread to keep up
+ * until we can get a new SQE. All other cases will break immediately, with a
+ * fresh SQE.
+ *
+ * If we grossly undersized our SQ ring, getting a NULL sqe can happen even
+ * for the !SQPOLL case if we're handling a lot of CQEs in our event loop
+ * and multishot isn't used. We can do io_uring_submit() to flush what we
+ * have here. Only caveat here is that if linked requests are used, SQEs
+ * would need to be allocated upfront as a link chain is only valid within
+ * a single submission cycle.
+ */
+static struct io_uring_sqe *get_sqe(struct io_uring *ring)
+{
+	struct io_uring_sqe *sqe;
+
+	do {
+		sqe = io_uring_get_sqe(ring);
+		if (sqe)
+			break;
+		if (!sqpoll)
+			io_uring_submit(ring);
+		else
+			io_uring_sqring_wait(ring);
+	} while (1);
+
+	return sqe;
+}
+
+/*
+ * See __encode_userdata() for how we encode sqe->user_data, which is passed
+ * back as cqe->user_data at completion time.
+ */
+static void encode_userdata(struct io_uring_sqe *sqe, struct conn *c, int op,
+			    int bid, int fd)
+{
+	__encode_userdata(sqe, c->tid, op, bid, fd);
+}
+
+static void __submit_receive(struct io_uring *ring, struct conn *c,
+			     struct conn_dir *cd, int fd)
+{
+	struct conn_buf_ring *cbr = &c->in_br;
+	struct io_uring_sqe *sqe;
+
+	vlog("%d: submit receive fd=%d\n", c->tid, fd);
+
+	assert(!cd->pending_recv);
+	cd->pending_recv = 1;
+
+	/*
+	 * For both recv and multishot receive, we use the ring provided
+	 * buffers. These are handed to the application ahead of time, and
+	 * are consumed when a receive triggers. Note that the address and
+	 * length of the receive are set to NULL/0, and we assign the
+	 * sqe->buf_group to tell the kernel which buffer group ID to pick
+	 * a buffer from. Finally, IOSQE_BUFFER_SELECT is set to tell the
+	 * kernel that we want a buffer picked for this request, we are not
+	 * passing one in with the request.
+	 */
+	sqe = get_sqe(ring);
+	if (rcv_msg) {
+		struct io_msg *imsg = &cd->io_rcv_msg;
+		struct msghdr *msg = &imsg->msg;
+
+		memset(msg, 0, sizeof(*msg));
+		msg->msg_iov = msg_vec(imsg)->iov;
+		msg->msg_iovlen = msg_vec(imsg)->iov_len;
+
+		if (recv_mshot) {
+			cd->rcv_mshot++;
+			io_uring_prep_recvmsg_multishot(sqe, fd, &imsg->msg, 0);
+		} else {
+			io_uring_prep_recvmsg(sqe, fd, &imsg->msg, 0);
+		}
+	} else {
+		if (recv_mshot) {
+			cd->rcv_mshot++;
+			io_uring_prep_recv_multishot(sqe, fd, NULL, 0, 0);
+		} else {
+			io_uring_prep_recv(sqe, fd, NULL, 0, 0);
+		}
+	}
+	encode_userdata(sqe, c, __RECV, 0, fd);
+	sqe->buf_group = cbr->bgid;
+	sqe->flags |= IOSQE_BUFFER_SELECT;
+	if (fixed_files)
+		sqe->flags |= IOSQE_FIXED_FILE;
+	if (rcv_bundle)
+		sqe->ioprio |= IORING_RECVSEND_BUNDLE;
+}
+
+/*
+ * One directional just arms receive on our in_fd
+ */
+static void submit_receive(struct io_uring *ring, struct conn *c)
+{
+	__submit_receive(ring, c, &c->cd[0], c->in_fd);
+}
+
+/*
+ * Bi-directional arms receive on both in and out fd
+ */
+static void submit_bidi_receive(struct io_uring *ring, struct conn *c)
+{
+	__submit_receive(ring, c, &c->cd[0], c->in_fd);
+	__submit_receive(ring, c, &c->cd[1], c->out_fd);
+}
+
+/*
+ * We hit -ENOBUFS, which means that we ran out of buffers in our current
+ * provided buffer group. This can happen if there's an imbalance between the
+ * receives coming in and the sends being processed, particularly with multishot
+ * receive as they can trigger very quickly. If this happens, defer arming a
+ * new receive until we've replenished half of the buffer pool by processing
+ * pending sends.
+ */
+static void recv_enobufs(struct io_uring *ring, struct conn *c,
+			 struct conn_dir *cd, int fd)
+{
+	vlog("%d: enobufs hit\n", c->tid);
+
+	cd->rcv_enobufs++;
+
+	/*
+	 * If we're a sink, mark rcv as rearm. If we're not, then mark us as
+	 * needing a rearm for receive and send. The completing send will
+	 * kick the recv rearm.
+	 */
+	if (!is_sink) {
+		int do_recv_arm = 1;
+
+		if (!cd->pending_send)
+			do_recv_arm = !prep_next_send(ring, c, cd, fd);
+		if (do_recv_arm)
+			__submit_receive(ring, c, &c->cd[0], c->in_fd);
+	} else {
+		__submit_receive(ring, c, &c->cd[0], c->in_fd);
+	}
+}
+
+/*
+ * Kill this socket - submit a shutdown and link a close to it. We don't
+ * care about shutdown status, so mark it as not needing to post a CQE unless
+ * it fails.
+ */
+static void queue_shutdown_close(struct io_uring *ring, struct conn *c, int fd)
+{
+	struct io_uring_sqe *sqe1, *sqe2;
+
+	/*
+	 * On the off chance that we run out of SQEs after the first one,
+	 * grab two upfront. This it to prevent our link not working if
+	 * get_sqe() ends up doing submissions to free up an SQE, as links
+	 * are not valid across separate submissions.
+	 */
+	sqe1 = get_sqe(ring);
+	sqe2 = get_sqe(ring);
+
+	io_uring_prep_shutdown(sqe1, fd, SHUT_RDWR);
+	if (fixed_files)
+		sqe1->flags |= IOSQE_FIXED_FILE;
+	sqe1->flags |= IOSQE_IO_LINK | IOSQE_CQE_SKIP_SUCCESS;
+	encode_userdata(sqe1, c, __SHUTDOWN, 0, fd);
+
+	if (fixed_files)
+		io_uring_prep_close_direct(sqe2, fd);
+	else
+		io_uring_prep_close(sqe2, fd);
+	encode_userdata(sqe2, c, __CLOSE, 0, fd);
+}
+
+/*
+ * This connection is going away, queue a cancel for any pending recv, for
+ * example, we have pending for this ring. For completeness, we issue a cancel
+ * for any request we have pending for both in_fd and out_fd.
+ */
+static void queue_cancel(struct io_uring *ring, struct conn *c)
+{
+	struct io_uring_sqe *sqe;
+	int flags = 0;
+
+	if (fixed_files)
+		flags |= IORING_ASYNC_CANCEL_FD_FIXED;
+
+	sqe = get_sqe(ring);
+	io_uring_prep_cancel_fd(sqe, c->in_fd, flags);
+	encode_userdata(sqe, c, __CANCEL, 0, c->in_fd);
+	c->pending_cancels++;
+
+	if (c->out_fd != -1) {
+		sqe = get_sqe(ring);
+		io_uring_prep_cancel_fd(sqe, c->out_fd, flags);
+		encode_userdata(sqe, c, __CANCEL, 0, c->out_fd);
+		c->pending_cancels++;
+	}
+
+	io_uring_submit(ring);
+}
+
+static int pending_shutdown(struct conn *c)
+{
+	return c->cd[0].pending_shutdown + c->cd[1].pending_shutdown;
+}
+
+static bool should_shutdown(struct conn *c)
+{
+	int i;
+
+	if (!pending_shutdown(c))
+		return false;
+	if (is_sink)
+		return true;
+	if (!bidi)
+		return c->cd[0].in_bytes == c->cd[1].out_bytes;
+
+	for (i = 0; i < 2; i++) {
+		if (c->cd[0].rcv != c->cd[1].snd)
+			return false;
+		if (c->cd[1].rcv != c->cd[0].snd)
+			return false;
+	}
+
+	return true;
+}
+
+/*
+ * Close this connection - send a ring message to the connection with intent
+ * to stop. When the client gets the message, it will initiate the stop.
+ */
+static void __close_conn(struct io_uring *ring, struct conn *c)
+{
+	struct io_uring_sqe *sqe;
+	uint64_t user_data;
+
+	printf("Client %d: queueing stop\n", c->tid);
+
+	user_data = __raw_encode(c->tid, __STOP, 0, 0);
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_msg_ring(sqe, c->ring.ring_fd, 0, user_data, 0);
+	encode_userdata(sqe, c, __NOP, 0, 0);
+	io_uring_submit(ring);
+}
+
+static void close_cd(struct conn *c, struct conn_dir *cd)
+{
+	cd->pending_shutdown = 1;
+
+	if (cd->pending_send)
+		return;
+
+	if (!(c->flags & CONN_F_PENDING_SHUTDOWN)) {
+		gettimeofday(&c->end_time, NULL);
+		c->flags |= CONN_F_PENDING_SHUTDOWN | CONN_F_END_TIME;
+	}
+}
+
+/*
+ * We're done with this buffer, add it back to our pool so the kernel is
+ * free to use it again.
+ */
+static int replenish_buffer(struct conn_buf_ring *cbr, int bid, int offset)
+{
+	void *this_buf = cbr->buf + bid * buf_size;
+
+	assert(bid < nr_bufs);
+
+	io_uring_buf_ring_add(cbr->br, this_buf, buf_size, bid, br_mask, offset);
+	return buf_size;
+}
+
+/*
+ * Iterate buffers from '*bid' and with a total size of 'bytes' and add them
+ * back to our receive ring so they can be reused for new receives.
+ */
+static int replenish_buffers(struct conn *c, int *bid, int bytes)
+{
+	struct conn_buf_ring *cbr = &c->in_br;
+	int nr_packets = 0;
+
+	while (bytes) {
+		int this_len = replenish_buffer(cbr, *bid, nr_packets);
+
+		if (this_len > bytes)
+			this_len = bytes;
+		bytes -= this_len;
+
+		*bid = (*bid + 1) & (nr_bufs - 1);
+		nr_packets++;
+	}
+
+	io_uring_buf_ring_advance(cbr->br, nr_packets);
+	return nr_packets;
+}
+
+static void free_mvec(struct msg_vec *mvec)
+{
+	free(mvec->iov);
+	mvec->iov = NULL;
+}
+
+static void init_mvec(struct msg_vec *mvec)
+{
+	memset(mvec, 0, sizeof(*mvec));
+	mvec->iov = malloc(sizeof(struct iovec));
+	mvec->vec_size = 1;
+}
+
+static void init_msgs(struct conn_dir *cd)
+{
+	memset(&cd->io_snd_msg, 0, sizeof(cd->io_snd_msg));
+	memset(&cd->io_rcv_msg, 0, sizeof(cd->io_rcv_msg));
+	init_mvec(&cd->io_snd_msg.vecs[0]);
+	init_mvec(&cd->io_snd_msg.vecs[1]);
+	init_mvec(&cd->io_rcv_msg.vecs[0]);
+}
+
+static void free_msgs(struct conn_dir *cd)
+{
+	free_mvec(&cd->io_snd_msg.vecs[0]);
+	free_mvec(&cd->io_snd_msg.vecs[1]);
+	free_mvec(&cd->io_rcv_msg.vecs[0]);
+}
+
+/*
+ * Multishot accept completion triggered. If we're acting as a sink, we're
+ * good to go. Just issue a receive for that case. If we're acting as a proxy,
+ * then start opening a socket that we can use to connect to the other end.
+ */
+static int handle_accept(struct io_uring *ring, struct io_uring_cqe *cqe)
+{
+	struct conn *c;
+	int i;
+
+	if (nr_conns == MAX_CONNS) {
+		fprintf(stderr, "max clients reached %d\n", nr_conns);
+		return 1;
+	}
+
+	/* main thread handles this, which is obviously serialized */
+	c = &conns[nr_conns];
+	c->tid = nr_conns++;
+	c->in_fd = -1;
+	c->out_fd = -1;
+
+	for (i = 0; i < 2; i++) {
+		struct conn_dir *cd = &c->cd[i];
+
+		cd->index = i;
+		cd->snd_next_bid = -1;
+		cd->rcv_next_bid = -1;
+		if (ext_stat) {
+			cd->rcv_bucket = calloc(nr_bufs + 1, sizeof(int));
+			cd->snd_bucket = calloc(nr_bufs + 1, sizeof(int));
+		}
+		init_msgs(cd);
+	}
+
+	printf("New client: id=%d, in=%d\n", c->tid, c->in_fd);
+	gettimeofday(&c->start_time, NULL);
+
+	pthread_barrier_init(&c->startup_barrier, NULL, 2);
+	pthread_create(&c->thread, NULL, thread_main, c);
+
+	/*
+	 * Wait for thread to have its ring setup, then either assign the fd
+	 * if it's non-fixed, or pass the fixed one
+	 */
+	pthread_barrier_wait(&c->startup_barrier);
+	if (!fixed_files) {
+		c->in_fd = cqe->res;
+	} else {
+		struct io_uring_sqe *sqe;
+		uint64_t user_data;
+
+		/*
+		 * Ring has just been setup, we'll use index 0 as the descriptor
+		 * value.
+		 */
+		user_data = __raw_encode(c->tid, __FD_PASS, 0, 0);
+		sqe = io_uring_get_sqe(ring);
+		io_uring_prep_msg_ring_fd(sqe, c->ring.ring_fd, cqe->res, 0,
+						user_data, 0);
+		encode_userdata(sqe, c, __NOP, 0, cqe->res);
+	}
+
+	return 0;
+}
+
+/*
+ * Our socket request completed, issue a connect request to the other end.
+ */
+static int handle_sock(struct io_uring *ring, struct io_uring_cqe *cqe)
+{
+	struct conn *c = cqe_to_conn(cqe);
+	struct io_uring_sqe *sqe;
+	int ret;
+
+	vlog("%d: sock: res=%d\n", c->tid, cqe->res);
+
+	c->out_fd = cqe->res;
+
+	if (ipv6) {
+		memset(&c->addr6, 0, sizeof(c->addr6));
+		c->addr6.sin6_family = AF_INET6;
+		c->addr6.sin6_port = htons(send_port);
+		ret = inet_pton(AF_INET6, host, &c->addr6.sin6_addr);
+	} else {
+		memset(&c->addr, 0, sizeof(c->addr));
+		c->addr.sin_family = AF_INET;
+		c->addr.sin_port = htons(send_port);
+		ret = inet_pton(AF_INET, host, &c->addr.sin_addr);
+	}
+	if (ret <= 0) {
+		if (!ret)
+			fprintf(stderr, "host not in right format\n");
+		else
+			perror("inet_pton");
+		return 1;
+	}
+
+	sqe = get_sqe(ring);
+	if (ipv6) {
+		io_uring_prep_connect(sqe, c->out_fd,
+					(struct sockaddr *) &c->addr6,
+					sizeof(c->addr6));
+	} else {
+		io_uring_prep_connect(sqe, c->out_fd,
+					(struct sockaddr *) &c->addr,
+					sizeof(c->addr));
+	}
+	encode_userdata(sqe, c, __CONNECT, 0, c->out_fd);
+	if (fixed_files)
+		sqe->flags |= IOSQE_FIXED_FILE;
+	return 0;
+}
+
+/*
+ * Connection to the other end is done, submit a receive to start receiving
+ * data. If we're a bidirectional proxy, issue a receive on both ends. If not,
+ * then just a single recv will do.
+ */
+static int handle_connect(struct io_uring *ring, struct io_uring_cqe *cqe)
+{
+	struct conn *c = cqe_to_conn(cqe);
+
+	pthread_mutex_lock(&thread_lock);
+	open_conns++;
+	pthread_mutex_unlock(&thread_lock);
+
+	if (bidi)
+		submit_bidi_receive(ring, c);
+	else
+		submit_receive(ring, c);
+
+	return 0;
+}
+
+/*
+ * Append new segment to our currently active msg_vec. This will be submitted
+ * as a sendmsg (with all of it), or as separate sends, later. If we're using
+ * send_ring, then we won't hit this path. Instead, outgoing buffers are
+ * added directly to our outgoing send buffer ring.
+ */
+static void send_append_vec(struct conn_dir *cd, void *data, int len)
+{
+	struct msg_vec *mvec = snd_msg_vec(cd);
+
+	if (mvec->iov_len == mvec->vec_size) {
+		mvec->vec_size <<= 1;
+		mvec->iov = realloc(mvec->iov, mvec->vec_size * sizeof(struct iovec));
+	}
+
+	mvec->iov[mvec->iov_len].iov_base = data;
+	mvec->iov[mvec->iov_len].iov_len = len;
+	mvec->iov_len++;
+}
+
+/*
+ * Queue a send based on the data received in this cqe, which came from
+ * a completed receive operation.
+ */
+static void send_append(struct conn *c, struct conn_dir *cd, void *data,
+			int bid, int len)
+{
+	vlog("%d: send %d (%p, bid %d)\n", c->tid, len, data, bid);
+
+	assert(bid < nr_bufs);
+
+	/* if using provided buffers for send, add it upfront */
+	if (send_ring) {
+		struct conn_buf_ring *cbr = &c->out_br;
+
+		io_uring_buf_ring_add(cbr->br, data, len, bid, br_mask, 0);
+		io_uring_buf_ring_advance(cbr->br, 1);
+	} else {
+		send_append_vec(cd, data, len);
+	}
+}
+
+/*
+ * For non recvmsg && multishot, a zero receive marks the end. For recvmsg
+ * with multishot, we always get the header regardless. Hence a "zero receive"
+ * is the size of the header.
+ */
+static int recv_done_res(int res)
+{
+	if (!res)
+		return 1;
+	if (rcv_msg && recv_mshot && res == sizeof(struct io_uring_recvmsg_out))
+		return 1;
+	return 0;
+}
+
+static int recv_inc(struct conn *c, struct conn_dir *cd, int *bid,
+		    struct io_uring_cqe *cqe)
+{
+	struct conn_buf_ring *cbr = &c->out_br;
+	struct conn_buf_ring *in_cbr = &c->in_br;
+	void *data;
+
+	if (!cqe->res)
+		return 0;
+	if (cqe->flags & IORING_CQE_F_BUF_MORE)
+		return 0;
+
+	data = in_cbr->buf + *bid * buf_size;
+	if (is_sink) {
+		io_uring_buf_ring_add(in_cbr->br, data, buf_size, *bid, br_mask, 0);
+		io_uring_buf_ring_advance(in_cbr->br, 1);
+	} else if (send_ring) {
+		io_uring_buf_ring_add(cbr->br, data, buf_size, *bid, br_mask, 0);
+		io_uring_buf_ring_advance(cbr->br, 1);
+	} else {
+		send_append(c, cd, data, *bid, buf_size);
+	}
+	*bid = (*bid + 1) & (nr_bufs - 1);
+	return 1;
+}
+
+/*
+ * Any receive that isn't recvmsg with multishot can be handled the same way.
+ * Iterate from '*bid' and 'in_bytes' in total, and append the data to the
+ * outgoing queue.
+ */
+static int recv_bids(struct conn *c, struct conn_dir *cd, int *bid, int in_bytes)
+{
+	struct conn_buf_ring *cbr = &c->out_br;
+	struct conn_buf_ring *in_cbr = &c->in_br;
+	struct io_uring_buf *buf;
+	int nr_packets = 0;
+
+	while (in_bytes) {
+		int this_bytes;
+		void *data;
+
+		buf = &in_cbr->br->bufs[*bid];
+		data = (void *) (unsigned long) buf->addr;
+		this_bytes = buf->len;
+		if (this_bytes > in_bytes)
+			this_bytes = in_bytes;
+
+		in_bytes -= this_bytes;
+
+		if (send_ring)
+			io_uring_buf_ring_add(cbr->br, data, this_bytes, *bid,
+						br_mask, nr_packets);
+		else
+			send_append(c, cd, data, *bid, this_bytes);
+
+		*bid = (*bid + 1) & (nr_bufs - 1);
+		nr_packets++;
+	}
+
+	if (send_ring)
+		io_uring_buf_ring_advance(cbr->br, nr_packets);
+
+	return nr_packets;
+}
+
+/*
+ * Special handling of recvmsg with multishot
+ */
+static int recv_mshot_msg(struct conn *c, struct conn_dir *cd, int *bid,
+			  int in_bytes)
+{
+	struct conn_buf_ring *cbr = &c->out_br;
+	struct conn_buf_ring *in_cbr = &c->in_br;
+	struct io_uring_buf *buf;
+	int nr_packets = 0;
+
+	while (in_bytes) {
+		struct io_uring_recvmsg_out *pdu;
+		int this_bytes;
+		void *data;
+
+		buf = &in_cbr->br->bufs[*bid];
+
+		/*
+		 * multishot recvmsg puts a header in front of the data - we
+		 * have to take that into account for the send setup, and
+		 * adjust the actual data read to not take this metadata into
+		 * account. For this use case, namelen and controllen will not
+		 * be set. If they were, they would need to be factored in too.
+		 */
+		buf->len -= sizeof(struct io_uring_recvmsg_out);
+		in_bytes -= sizeof(struct io_uring_recvmsg_out);
+
+		pdu = (void *) (unsigned long) buf->addr;
+		vlog("pdu namelen %d, controllen %d, payload %d flags %x\n",
+				pdu->namelen, pdu->controllen, pdu->payloadlen,
+				pdu->flags);
+		data = (void *) (pdu + 1);
+
+		this_bytes = pdu->payloadlen;
+		if (this_bytes > in_bytes)
+			this_bytes = in_bytes;
+
+		in_bytes -= this_bytes;
+
+		if (send_ring)
+			io_uring_buf_ring_add(cbr->br, data, this_bytes, *bid,
+						br_mask, nr_packets);
+		else
+			send_append(c, cd, data, *bid, this_bytes);
+
+		*bid = (*bid + 1) & (nr_bufs - 1);
+		nr_packets++;
+	}
+
+	if (send_ring)
+		io_uring_buf_ring_advance(cbr->br, nr_packets);
+
+	return nr_packets;
+}
+
+static int __handle_recv(struct io_uring *ring, struct conn *c,
+			 struct conn_dir *cd, struct io_uring_cqe *cqe)
+{
+	struct conn_dir *ocd = &c->cd[!cd->index];
+	int bid, nr_packets;
+
+	/*
+	 * Not having a buffer attached should only happen if we get a zero
+	 * sized receive, because the other end closed the connection. It
+	 * cannot happen otherwise, as all our receives are using provided
+	 * buffers and hence it's not possible to return a CQE with a non-zero
+	 * result and not have a buffer attached.
+	 */
+	if (!(cqe->flags & IORING_CQE_F_BUFFER)) {
+		cd->pending_recv = 0;
+
+		if (!recv_done_res(cqe->res)) {
+			fprintf(stderr, "no buffer assigned, res=%d\n", cqe->res);
+			return 1;
+		}
+start_close:
+		prep_next_send(ring, c, ocd, other_dir_fd(c, cqe_to_fd(cqe)));
+		close_cd(c, cd);
+		return 0;
+	}
+
+	if (cqe->res && cqe->res < buf_size)
+		cd->rcv_shrt++;
+
+	bid = cqe->flags >> IORING_CQE_BUFFER_SHIFT;
+
+	/*
+	 * BIDI will use the same buffer pool and do receive on both CDs,
+	 * so can't reliably check. TODO.
+	 */
+	if (!bidi && cd->rcv_next_bid != -1 && bid != cd->rcv_next_bid) {
+		fprintf(stderr, "recv bid %d, wanted %d\n", bid, cd->rcv_next_bid);
+		goto start_close;
+	}
+
+	vlog("%d: recv: bid=%d, res=%d, cflags=%x\n", c->tid, bid, cqe->res, cqe->flags);
+	/*
+	 * If we're a sink, we're done here. Just replenish the buffer back
+	 * to the pool. For proxy mode, we will send the data to the other
+	 * end and the buffer will be replenished once the send is done with
+	 * it.
+	 */
+	if (buf_ring_inc)
+		nr_packets = recv_inc(c, ocd, &bid, cqe);
+	else if (is_sink)
+		nr_packets = replenish_buffers(c, &bid, cqe->res);
+	else if (rcv_msg && recv_mshot)
+		nr_packets = recv_mshot_msg(c, ocd, &bid, cqe->res);
+	else
+		nr_packets = recv_bids(c, ocd, &bid, cqe->res);
+
+	if (cd->rcv_bucket)
+		cd->rcv_bucket[nr_packets]++;
+
+	if (!is_sink) {
+		ocd->out_buffers += nr_packets;
+		assert(ocd->out_buffers <= nr_bufs);
+	}
+
+	cd->rcv++;
+	cd->rcv_next_bid = bid;
+
+	/*
+	 * If IORING_CQE_F_MORE isn't set, then this is either a normal recv
+	 * that needs rearming, or it's a multishot that won't post any further
+	 * completions. Setup a new one for these cases.
+	 */
+	if (!(cqe->flags & IORING_CQE_F_MORE)) {
+		cd->pending_recv = 0;
+		if (recv_done_res(cqe->res))
+			goto start_close;
+		if (is_sink || !ocd->pending_send)
+			__submit_receive(ring, c, &c->cd[0], c->in_fd);
+	}
+
+	/*
+	 * Submit a send if we won't get anymore notifications from this
+	 * recv, or if we have nr_bufs / 2 queued up. If BIDI mode, send
+	 * every buffer. We assume this is interactive mode, and hence don't
+	 * delay anything.
+	 */
+	if (((!ocd->pending_send && (bidi || (ocd->out_buffers >= nr_bufs / 2))) ||
+	    !(cqe->flags & IORING_CQE_F_MORE)) && !is_sink)
+		prep_next_send(ring, c, ocd, other_dir_fd(c, cqe_to_fd(cqe)));
+
+	if (!recv_done_res(cqe->res))
+		cd->in_bytes += cqe->res;
+	return 0;
+}
+
+static int handle_recv(struct io_uring *ring, struct io_uring_cqe *cqe)
+{
+	struct conn *c = cqe_to_conn(cqe);
+	struct conn_dir *cd = cqe_to_conn_dir(c, cqe);
+
+	return __handle_recv(ring, c, cd, cqe);
+}
+
+static int recv_error(struct error_handler *err, struct io_uring *ring,
+		      struct io_uring_cqe *cqe)
+{
+	struct conn *c = cqe_to_conn(cqe);
+	struct conn_dir *cd = cqe_to_conn_dir(c, cqe);
+
+	cd->pending_recv = 0;
+
+	if (cqe->res != -ENOBUFS)
+		return default_error(err, ring, cqe);
+
+	recv_enobufs(ring, c, cd, other_dir_fd(c, cqe_to_fd(cqe)));
+	return 0;
+}
+
+static void submit_send(struct io_uring *ring, struct conn *c,
+			struct conn_dir *cd, int fd, void *data, int len,
+			int bid, int flags)
+{
+	struct io_uring_sqe *sqe;
+	int bgid = c->out_br.bgid;
+
+	if (cd->pending_send)
+		return;
+	cd->pending_send = 1;
+
+	flags |= MSG_WAITALL | MSG_NOSIGNAL;
+
+	sqe = get_sqe(ring);
+	if (snd_msg) {
+		struct io_msg *imsg = &cd->io_snd_msg;
+
+		if (snd_zc) {
+			io_uring_prep_sendmsg_zc(sqe, fd, &imsg->msg, flags);
+			cd->snd_notif++;
+		} else {
+			io_uring_prep_sendmsg(sqe, fd, &imsg->msg, flags);
+		}
+	} else if (send_ring) {
+		io_uring_prep_send(sqe, fd, NULL, 0, flags);
+	} else if (!snd_zc) {
+		io_uring_prep_send(sqe, fd, data, len, flags);
+	} else {
+		io_uring_prep_send_zc(sqe, fd, data, len, flags, 0);
+		sqe->ioprio |= IORING_RECVSEND_FIXED_BUF;
+		sqe->buf_index = bid;
+		cd->snd_notif++;
+	}
+	encode_userdata(sqe, c, __SEND, bid, fd);
+	if (fixed_files)
+		sqe->flags |= IOSQE_FIXED_FILE;
+	if (send_ring) {
+		sqe->flags |= IOSQE_BUFFER_SELECT;
+		sqe->buf_group = bgid;
+	}
+	if (snd_bundle) {
+		sqe->ioprio |= IORING_RECVSEND_BUNDLE;
+		cd->snd_mshot++;
+	} else if (send_ring)
+		cd->snd_mshot++;
+}
+
+/*
+ * Prepare the next send request, if we need to. If one is already pending,
+ * or if we're a sink and we don't need to do sends, then there's nothing
+ * to do.
+ *
+ * Return 1 if another send completion is expected, 0 if not.
+ */
+static int prep_next_send(struct io_uring *ring, struct conn *c,
+			   struct conn_dir *cd, int fd)
+{
+	int bid;
+
+	if (cd->pending_send || is_sink)
+		return 0;
+	if (!cd->out_buffers)
+		return 0;
+
+	bid = cd->snd_next_bid;
+	if (bid == -1)
+		bid = 0;
+
+	if (send_ring) {
+		/*
+		 * send_ring mode is easy, there's nothing to do but submit
+		 * our next send request. That will empty the entire outgoing
+		 * queue.
+		 */
+		submit_send(ring, c, cd, fd, NULL, 0, bid, 0);
+		return 1;
+	} else if (snd_msg) {
+		/*
+		 * For sendmsg mode, submit our currently prepared iovec, if
+		 * we have one, and swap our iovecs so that any further
+		 * receives will start preparing that one.
+		 */
+		struct io_msg *imsg = &cd->io_snd_msg;
+
+		if (!msg_vec(imsg)->iov_len)
+			return 0;
+		imsg->msg.msg_iov = msg_vec(imsg)->iov;
+		imsg->msg.msg_iovlen = msg_vec(imsg)->iov_len;
+		msg_vec(imsg)->iov_len = 0;
+		imsg->vec_index = !imsg->vec_index;
+		submit_send(ring, c, cd, fd, NULL, 0, bid, 0);
+		return 1;
+	} else {
+		/*
+		 * send without send_ring - submit the next available vec,
+		 * if any. If this vec is the last one in the current series,
+		 * then swap to the next vec. We flag each send with MSG_MORE,
+		 * unless this is the last part of the current vec.
+		 */
+		struct io_msg *imsg = &cd->io_snd_msg;
+		struct msg_vec *mvec = msg_vec(imsg);
+		int flags = !snd_zc ? MSG_MORE : 0;
+		struct iovec *iov;
+
+		if (mvec->iov_len == mvec->cur_iov)
+			return 0;
+		imsg->msg.msg_iov = msg_vec(imsg)->iov;
+		iov = &mvec->iov[mvec->cur_iov];
+		mvec->cur_iov++;
+		if (mvec->cur_iov == mvec->iov_len) {
+			mvec->iov_len = 0;
+			mvec->cur_iov = 0;
+			imsg->vec_index = !imsg->vec_index;
+			flags = 0;
+		}
+		submit_send(ring, c, cd, fd, iov->iov_base, iov->iov_len, bid, flags);
+		return 1;
+	}
+}
+
+static int handle_send_inc(struct conn *c, struct conn_dir *cd, int bid,
+			   struct io_uring_cqe *cqe)
+{
+	struct conn_buf_ring *in_cbr = &c->in_br;
+	int ret = 0;
+	void *data;
+
+	if (!cqe->res)
+		goto out;
+	if (cqe->flags & IORING_CQE_F_BUF_MORE)
+		return 0;
+
+	assert(cqe->res <= buf_size);
+	cd->out_bytes += cqe->res;
+
+	data = in_cbr->buf + bid * buf_size;
+	io_uring_buf_ring_add(in_cbr->br, data, buf_size, bid, br_mask, 0);
+	io_uring_buf_ring_advance(in_cbr->br, 1);
+	bid = (bid + 1) & (nr_bufs - 1);
+	ret = 1;
+out:
+	if (pending_shutdown(c))
+		close_cd(c, cd);
+
+	return ret;
+}
+
+/*
+ * Handling a send with an outgoing send ring. Get the buffers from the
+ * receive side, and add them to the ingoing buffer ring again.
+ */
+static int handle_send_ring(struct conn *c, struct conn_dir *cd, int bid,
+			    int bytes)
+{
+	struct conn_buf_ring *in_cbr = &c->in_br;
+	struct conn_buf_ring *out_cbr = &c->out_br;
+	int i = 0;
+
+	while (bytes) {
+		struct io_uring_buf *buf = &out_cbr->br->bufs[bid];
+		int this_bytes;
+		void *this_buf;
+
+		this_bytes = buf->len;
+		if (this_bytes > bytes)
+			this_bytes = bytes;
+
+		cd->out_bytes += this_bytes;
+
+		vlog("%d: send: bid=%d, len=%d\n", c->tid, bid, this_bytes);
+
+		this_buf = in_cbr->buf + bid * buf_size;
+		io_uring_buf_ring_add(in_cbr->br, this_buf, buf_size, bid, br_mask, i);
+		/*
+		 * Find the provided buffer that the receive consumed, and
+		 * which we then used for the send, and add it back to the
+		 * pool so it can get picked by another receive. Once the send
+		 * is done, we're done with it.
+		 */
+		bid = (bid + 1) & (nr_bufs - 1);
+		bytes -= this_bytes;
+		i++;
+	}
+	cd->snd_next_bid = bid;
+	io_uring_buf_ring_advance(in_cbr->br, i);
+
+	if (pending_shutdown(c))
+		close_cd(c, cd);
+
+	return i;
+}
+
+/*
+ * sendmsg, or send without a ring. Just add buffers back to the ingoing
+ * ring for receives.
+ */
+static int handle_send_buf(struct conn *c, struct conn_dir *cd, int bid,
+			   int bytes)
+{
+	struct conn_buf_ring *in_cbr = &c->in_br;
+	int i = 0;
+
+	while (bytes) {
+		struct io_uring_buf *buf = &in_cbr->br->bufs[bid];
+		int this_bytes;
+
+		this_bytes = bytes;
+		if (this_bytes > buf->len)
+			this_bytes = buf->len;
+
+		vlog("%d: send: bid=%d, len=%d\n", c->tid, bid, this_bytes);
+
+		cd->out_bytes += this_bytes;
+		/* each recvmsg mshot package has this overhead */
+		if (rcv_msg && recv_mshot)
+			cd->out_bytes += sizeof(struct io_uring_recvmsg_out);
+		replenish_buffer(in_cbr, bid, i);
+		bid = (bid + 1) & (nr_bufs - 1);
+		bytes -= this_bytes;
+		i++;
+	}
+	io_uring_buf_ring_advance(in_cbr->br, i);
+	cd->snd_next_bid = bid;
+	return i;
+}
+
+static int __handle_send(struct io_uring *ring, struct conn *c,
+			 struct conn_dir *cd, struct io_uring_cqe *cqe)
+{
+	struct conn_dir *ocd;
+	int bid, nr_packets;
+
+	if (send_ring) {
+		if (!(cqe->flags & IORING_CQE_F_BUFFER)) {
+			fprintf(stderr, "no buffer in send?! %d\n", cqe->res);
+			return 1;
+		}
+		bid = cqe->flags >> IORING_CQE_BUFFER_SHIFT;
+	} else {
+		bid = cqe_to_bid(cqe);
+	}
+
+	/*
+	 * CQE notifications only happen with send/sendmsg zerocopy. They
+	 * tell us that the data has been acked, and that hence the buffer
+	 * is now free to reuse. Waiting on an ACK for each packet will slow
+	 * us down tremendously, so do all of our sends and then wait for
+	 * the ACKs to come in. They tend to come in bundles anyway. Once
+	 * all acks are done (cd->snd_notif == 0), then fire off the next
+	 * receive.
+	 */
+	if (cqe->flags & IORING_CQE_F_NOTIF) {
+		cd->snd_notif--;
+	} else {
+		if (cqe->res && cqe->res < buf_size)
+			cd->snd_shrt++;
+
+		/*
+		 * BIDI will use the same buffer pool and do sends on both CDs,
+		 * so can't reliably check. TODO.
+		 */
+		if (!bidi && send_ring && cd->snd_next_bid != -1 &&
+		    bid != cd->snd_next_bid) {
+			fprintf(stderr, "send bid %d, wanted %d at %lu\n", bid,
+					cd->snd_next_bid, cd->out_bytes);
+			goto out_close;
+		}
+
+		assert(bid <= nr_bufs);
+
+		vlog("send: got %d, %lu\n", cqe->res, cd->out_bytes);
+
+		if (buf_ring_inc)
+			nr_packets = handle_send_inc(c, cd, bid, cqe);
+		else if (send_ring)
+			nr_packets = handle_send_ring(c, cd, bid, cqe->res);
+		else
+			nr_packets = handle_send_buf(c, cd, bid, cqe->res);
+
+		if (cd->snd_bucket)
+			cd->snd_bucket[nr_packets]++;
+
+		cd->out_buffers -= nr_packets;
+		assert(cd->out_buffers >= 0);
+
+		cd->snd++;
+	}
+
+	if (!(cqe->flags & IORING_CQE_F_MORE)) {
+		int do_recv_arm;
+
+		cd->pending_send = 0;
+
+		/*
+		 * send done - see if the current vec has data to submit, and
+		 * do so if it does. if it doesn't have data yet, nothing to
+		 * do.
+		 */
+		do_recv_arm = !prep_next_send(ring, c, cd, cqe_to_fd(cqe));
+
+		ocd = &c->cd[!cd->index];
+		if (!cd->snd_notif && do_recv_arm && !ocd->pending_recv) {
+			int fd = other_dir_fd(c, cqe_to_fd(cqe));
+
+			__submit_receive(ring, c, ocd, fd);
+		}
+out_close:
+		if (pending_shutdown(c))
+			close_cd(c, cd);
+	}
+
+	vlog("%d: pending sends %d\n", c->tid, cd->pending_send);
+	return 0;
+}
+
+static int handle_send(struct io_uring *ring, struct io_uring_cqe *cqe)
+{
+	struct conn *c = cqe_to_conn(cqe);
+	struct conn_dir *cd = cqe_to_conn_dir(c, cqe);
+
+	return __handle_send(ring, c, cd, cqe);
+}
+
+static int send_error(struct error_handler *err, struct io_uring *ring,
+		      struct io_uring_cqe *cqe)
+{
+	struct conn *c = cqe_to_conn(cqe);
+	struct conn_dir *cd = cqe_to_conn_dir(c, cqe);
+
+	cd->pending_send = 0;
+
+	/* res can have high bit set */
+	if (cqe->flags & IORING_CQE_F_NOTIF)
+		return handle_send(ring, cqe);
+	if (cqe->res != -ENOBUFS)
+		return default_error(err, ring, cqe);
+
+	cd->snd_enobufs++;
+	return 0;
+}
+
+/*
+ * We don't expect to get here, as we marked it with skipping posting a
+ * CQE if it was successful. If it does trigger, than means it fails and
+ * that our close has not been done. Log the shutdown error and issue a new
+ * separate close.
+ */
+static int handle_shutdown(struct io_uring *ring, struct io_uring_cqe *cqe)
+{
+	struct conn *c = cqe_to_conn(cqe);
+	struct io_uring_sqe *sqe;
+	int fd = cqe_to_fd(cqe);
+
+	fprintf(stderr, "Got shutdown notification on fd %d\n", fd);
+
+	if (!cqe->res)
+		fprintf(stderr, "Unexpected success shutdown CQE\n");
+	else if (cqe->res < 0)
+		fprintf(stderr, "Shutdown got %s\n", strerror(-cqe->res));
+
+	sqe = get_sqe(ring);
+	if (fixed_files)
+		io_uring_prep_close_direct(sqe, fd);
+	else
+		io_uring_prep_close(sqe, fd);
+	encode_userdata(sqe, c, __CLOSE, 0, fd);
+	return 0;
+}
+
+/*
+ * Final stage of a connection, the shutdown and close has finished. Mark
+ * it as disconnected and let the main loop reap it.
+ */
+static int handle_close(struct io_uring *ring, struct io_uring_cqe *cqe)
+{
+	struct conn *c = cqe_to_conn(cqe);
+	int fd = cqe_to_fd(cqe);
+
+	printf("Closed client: id=%d, in_fd=%d, out_fd=%d\n", c->tid, c->in_fd, c->out_fd);
+	if (fd == c->in_fd)
+		c->in_fd = -1;
+	else if (fd == c->out_fd)
+		c->out_fd = -1;
+
+	if (c->in_fd == -1 && c->out_fd == -1) {
+		c->flags |= CONN_F_DISCONNECTED;
+
+		pthread_mutex_lock(&thread_lock);
+		__show_stats(c);
+		open_conns--;
+		pthread_mutex_unlock(&thread_lock);
+		free_buffer_rings(ring, c);
+		free_msgs(&c->cd[0]);
+		free_msgs(&c->cd[1]);
+		free(c->cd[0].rcv_bucket);
+		free(c->cd[0].snd_bucket);
+	}
+
+	return 0;
+}
+
+static int handle_cancel(struct io_uring *ring, struct io_uring_cqe *cqe)
+{
+	struct conn *c = cqe_to_conn(cqe);
+	int fd = cqe_to_fd(cqe);
+
+	c->pending_cancels--;
+
+	vlog("%d: got cancel fd %d, refs %d\n", c->tid, fd, c->pending_cancels);
+
+	if (!c->pending_cancels) {
+		queue_shutdown_close(ring, c, c->in_fd);
+		if (c->out_fd != -1)
+			queue_shutdown_close(ring, c, c->out_fd);
+		io_uring_submit(ring);
+	}
+
+	return 0;
+}
+
+static void open_socket(struct conn *c)
+{
+	if (is_sink) {
+		pthread_mutex_lock(&thread_lock);
+		open_conns++;
+		pthread_mutex_unlock(&thread_lock);
+
+		submit_receive(&c->ring, c);
+	} else {
+		struct io_uring_sqe *sqe;
+		int domain;
+
+		if (ipv6)
+			domain = AF_INET6;
+		else
+			domain = AF_INET;
+
+		/*
+		 * If fixed_files is set, proxy will use fixed files for any new
+		 * file descriptors it instantiates. Fixd files, or fixed
+		 * descriptors, are io_uring private file descriptors. They
+		 * cannot be accessed outside of io_uring. io_uring holds a
+		 * fixed reference to them, which means that we do not need to
+		 * grab per-request references to them. Particularly for
+		 * threaded applications, grabbing and dropping file references
+		 * for each operation can be costly as the file table is shared.
+		 * This generally shows up as fget/fput related overhead in any
+		 * workload profiles.
+		 *
+		 * Fixed descriptors are passed in via the 'fd' field just like
+		 * regular descriptors, and then marked as such by setting the
+		 * IOSQE_FIXED_FILE flag in the sqe->flags field. Some helpers
+		 * do that automatically, like the below, others will need it
+		 * set manually if they don't have a *direct*() helper.
+		 *
+		 * For operations that instantiate them, like the opening of a
+		 * direct socket, the application may either ask the kernel to
+		 * find a free one (as is done below), or the application may
+		 * manage the space itself and pass in an index for a currently
+		 * free slot in the table. If the kernel is asked to allocate a
+		 * free direct descriptor, note that io_uring does not abide by
+		 * the POSIX mandated "lowest free must be returned". It may
+		 * return any free descriptor of its choosing.
+		 */
+		sqe = get_sqe(&c->ring);
+		if (fixed_files)
+			io_uring_prep_socket_direct_alloc(sqe, domain, SOCK_STREAM, 0, 0);
+		else
+			io_uring_prep_socket(sqe, domain, SOCK_STREAM, 0, 0);
+		encode_userdata(sqe, c, __SOCK, 0, 0);
+	}
+}
+
+/*
+ * Start of connection, we got our in descriptor.
+ */
+static int handle_fd_pass(struct io_uring_cqe *cqe)
+{
+	struct conn *c = cqe_to_conn(cqe);
+	int fd = cqe_to_fd(cqe);
+
+	vlog("%d: got fd pass %d\n", c->tid, fd);
+	c->in_fd = fd;
+	open_socket(c);
+	return 0;
+}
+
+static int handle_stop(struct io_uring_cqe *cqe)
+{
+	struct conn *c = cqe_to_conn(cqe);
+
+	printf("Client %d: queueing shutdown\n", c->tid);
+	queue_cancel(&c->ring, c);
+	return 0;
+}
+
+/*
+ * Called for each CQE that we receive. Decode the request type that it
+ * came from, and call the appropriate handler.
+ */
+static int handle_cqe(struct io_uring *ring, struct io_uring_cqe *cqe)
+{
+	int ret;
+
+	/*
+	 * Unlikely, but there's an error in this CQE. If an error handler
+	 * is defined, call it, and that will deal with it. If no error
+	 * handler is defined, the opcode handler either doesn't care or will
+	 * handle it on its own.
+	 */
+	if (cqe->res < 0) {
+		struct error_handler *err = &error_handlers[cqe_to_op(cqe)];
+
+		if (err->error_fn)
+			return err->error_fn(err, ring, cqe);
+	}
+
+	switch (cqe_to_op(cqe)) {
+	case __ACCEPT:
+		ret = handle_accept(ring, cqe);
+		break;
+	case __SOCK:
+		ret = handle_sock(ring, cqe);
+		break;
+	case __CONNECT:
+		ret = handle_connect(ring, cqe);
+		break;
+	case __RECV:
+	case __RECVMSG:
+		ret = handle_recv(ring, cqe);
+		break;
+	case __SEND:
+	case __SENDMSG:
+		ret = handle_send(ring, cqe);
+		break;
+	case __CANCEL:
+		ret = handle_cancel(ring, cqe);
+		break;
+	case __SHUTDOWN:
+		ret = handle_shutdown(ring, cqe);
+		break;
+	case __CLOSE:
+		ret = handle_close(ring, cqe);
+		break;
+	case __FD_PASS:
+		ret = handle_fd_pass(cqe);
+		break;
+	case __STOP:
+		ret = handle_stop(cqe);
+		break;
+	case __NOP:
+		ret = 0;
+		break;
+	default:
+		fprintf(stderr, "bad user data %lx\n", (long) cqe->user_data);
+		return 1;
+	}
+
+	return ret;
+}
+
+static void house_keeping(struct io_uring *ring)
+{
+	static unsigned long last_bytes;
+	unsigned long bytes, elapsed;
+	struct conn *c;
+	int i, j;
+
+	vlog("House keeping entered\n");
+
+	bytes = 0;
+	for (i = 0; i < nr_conns; i++) {
+		c = &conns[i];
+
+		for (j = 0; j < 2; j++) {
+			struct conn_dir *cd = &c->cd[j];
+
+			bytes += cd->in_bytes + cd->out_bytes;
+		}
+		if (c->flags & CONN_F_DISCONNECTED) {
+			vlog("%d: disconnected\n", i);
+
+			if (!(c->flags & CONN_F_REAPED)) {
+				void *ret;
+
+				pthread_join(c->thread, &ret);
+				c->flags |= CONN_F_REAPED;
+			}
+			continue;
+		}
+		if (c->flags & CONN_F_DISCONNECTING)
+			continue;
+
+		if (should_shutdown(c)) {
+			__close_conn(ring, c);
+			c->flags |= CONN_F_DISCONNECTING;
+		}
+	}
+
+	elapsed = mtime_since_now(&last_housekeeping);
+	if (bytes && elapsed >= 900) {
+		unsigned long bw;
+
+		bw = (8 * (bytes - last_bytes) / 1000UL) / elapsed;
+		if (bw) {
+			if (open_conns)
+				printf("Bandwidth (threads=%d): %'luMbit\n", open_conns, bw);
+			gettimeofday(&last_housekeeping, NULL);
+			last_bytes = bytes;
+		}
+	}
+}
+
+/*
+ * Event loop shared between the parent, and the connections. Could be
+ * split in two, as they don't handle the same types of events. For the per
+ * connection loop, 'c' is valid. For the main loop, it's NULL.
+ */
+static int __event_loop(struct io_uring *ring, struct conn *c)
+{
+	struct __kernel_timespec active_ts, idle_ts;
+	int flags;
+
+	idle_ts.tv_sec = 0;
+	idle_ts.tv_nsec = 100000000LL;
+	active_ts = idle_ts;
+	if (wait_usec > 1000000) {
+		active_ts.tv_sec = wait_usec / 1000000;
+		wait_usec -= active_ts.tv_sec * 1000000;
+	}
+	active_ts.tv_nsec = wait_usec * 1000;
+
+	gettimeofday(&last_housekeeping, NULL);
+
+	flags = 0;
+	while (1) {
+		struct __kernel_timespec *ts = &idle_ts;
+		struct io_uring_cqe *cqe;
+		unsigned int head;
+		int ret, i, to_wait;
+
+		/*
+		 * If wait_batch is set higher than 1, then we'll wait on
+		 * that amount of CQEs to be posted each loop. If used with
+		 * DEFER_TASKRUN, this can provide a substantial reduction
+		 * in context switch rate as the task isn't woken until the
+		 * requested number of events can be returned.
+		 *
+		 * Can be used with -t to set a wait_usec timeout as well.
+		 * For example, if an application can deal with 250 usec
+		 * of wait latencies, it can set -w8 -t250 which will cause
+		 * io_uring to return when either 8 events have been received,
+		 * or if 250 usec of waiting has passed.
+		 *
+		 * If we don't have any open connections, wait on just 1
+		 * always.
+		 */
+		to_wait = 1;
+		if (open_conns && !flags) {
+			ts = &active_ts;
+			to_wait = wait_batch;
+		}
+
+		vlog("Submit and wait for %d\n", to_wait);
+		ret = io_uring_submit_and_wait_timeout(ring, &cqe, to_wait, ts, NULL);
+
+		if (*ring->cq.koverflow)
+			printf("overflow %u\n", *ring->cq.koverflow);
+		if (*ring->sq.kflags &  IORING_SQ_CQ_OVERFLOW)
+			printf("saw overflow\n");
+
+		vlog("Submit and wait: %d\n", ret);
+
+		i = flags = 0;
+		io_uring_for_each_cqe(ring, head, cqe) {
+			if (handle_cqe(ring, cqe))
+				return 1;
+			flags |= cqe_to_conn(cqe)->flags;
+			++i;
+		}
+
+		vlog("Handled %d events\n", i);
+
+		/*
+		 * Advance the CQ ring for seen events when we've processed
+		 * all of them in this loop. This can also be done with
+		 * io_uring_cqe_seen() in each handler above, which just marks
+		 * that single CQE as seen. However, it's more efficient to
+		 * mark a batch as seen when we're done with that batch.
+		 */
+		if (i) {
+			io_uring_cq_advance(ring, i);
+			events += i;
+		}
+
+		event_loops++;
+		if (c) {
+			if (c->flags & CONN_F_DISCONNECTED)
+				break;
+		} else {
+			house_keeping(ring);
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Main event loop, Submit our multishot accept request, and then just loop
+ * around handling incoming connections.
+ */
+static int parent_loop(struct io_uring *ring, int fd)
+{
+	struct io_uring_sqe *sqe;
+
+	/*
+	 * proxy provides a way to use either multishot receive or not, but
+	 * for accept, we always use multishot. A multishot accept request
+	 * needs only be armed once, and then it'll trigger a completion and
+	 * post a CQE whenever a new connection is accepted. No need to do
+	 * anything else, unless the multishot accept terminates. This happens
+	 * if it encounters an error. Applications should check for
+	 * IORING_CQE_F_MORE in cqe->flags - this tells you if more completions
+	 * are expected from this request or not. Non-multishot never have
+	 * this set, where multishot will always have this set unless an error
+	 * occurs.
+	 */
+	sqe = get_sqe(ring);
+	if (fixed_files)
+		io_uring_prep_multishot_accept_direct(sqe, fd, NULL, NULL, 0);
+	else
+		io_uring_prep_multishot_accept(sqe, fd, NULL, NULL, 0);
+	__encode_userdata(sqe, 0, __ACCEPT, 0, fd);
+
+	return __event_loop(ring, NULL);
+}
+
+static int init_ring(struct io_uring *ring, int nr_files)
+{
+	struct io_uring_params params;
+	int ret;
+
+	/*
+	 * By default, set us up with a big CQ ring. Not strictly needed
+	 * here, but it's very important to never overflow the CQ ring.
+	 * Events will not be dropped if this happens, but it does slow
+	 * the application down in dealing with overflown events.
+	 *
+	 * Set SINGLE_ISSUER, which tells the kernel that only one thread
+	 * is doing IO submissions. This enables certain optimizations in
+	 * the kernel.
+	 */
+	memset(&params, 0, sizeof(params));
+	params.flags |= IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_CLAMP;
+	params.flags |= IORING_SETUP_CQSIZE;
+	params.cq_entries = 1024;
+
+	/*
+	 * If use_huge is set, setup the ring with IORING_SETUP_NO_MMAP. This
+	 * means that the application allocates the memory for the ring, and
+	 * the kernel maps it. The alternative is having the kernel allocate
+	 * the memory, and then liburing will mmap it. But we can't really
+	 * support huge pages that way. If this fails, then ensure that the
+	 * system has huge pages set aside upfront.
+	 */
+	if (use_huge)
+		params.flags |= IORING_SETUP_NO_MMAP;
+
+	/*
+	 * DEFER_TASKRUN decouples async event reaping and retrying from
+	 * regular system calls. If this isn't set, then io_uring uses
+	 * normal task_work for this. task_work is always being run on any
+	 * exit to userspace. Real applications do more than just call IO
+	 * related system calls, and hence we can be running this work way
+	 * too often. Using DEFER_TASKRUN defers any task_work running to
+	 * when the application enters the kernel anyway to wait on new
+	 * events. It's generally the preferred and recommended way to setup
+	 * a ring.
+	 */
+	if (defer_tw) {
+		params.flags |= IORING_SETUP_DEFER_TASKRUN;
+		sqpoll = 0;
+	}
+
+	/*
+	 * SQPOLL offloads any request submission and retry operations to a
+	 * dedicated thread. This enables an application to do IO without
+	 * ever having to enter the kernel itself. The SQPOLL thread will
+	 * stay busy as long as there's work to do, and go to sleep if
+	 * sq_thread_idle msecs have passed. If it's running, submitting new
+	 * IO just needs to make them visible to the SQPOLL thread, it needs
+	 * not enter the kernel. For submission, the application will only
+	 * enter the kernel if the SQPOLL has been idle long enough that it
+	 * has gone to sleep.
+	 *
+	 * Waiting on events still need to enter the kernel, if none are
+	 * available. The application may also use io_uring_peek_cqe() to
+	 * check for new events without entering the kernel, as completions
+	 * will be continually produced to the CQ ring by the SQPOLL thread
+	 * as they occur.
+	 */
+	if (sqpoll) {
+		params.flags |= IORING_SETUP_SQPOLL;
+		params.sq_thread_idle = 1000;
+		defer_tw = 0;
+	}
+
+	/*
+	 * If neither DEFER_TASKRUN or SQPOLL is used, set COOP_TASKRUN. This
+	 * avoids heavy signal based notifications, which can force an
+	 * application to enter the kernel and process it as soon as they
+	 * occur.
+	 */
+	if (!sqpoll && !defer_tw)
+		params.flags |= IORING_SETUP_COOP_TASKRUN;
+
+	/*
+	 * The SQ ring size need not be larger than any batch of requests
+	 * that need to be prepared before submit. Normally in a loop we'd
+	 * only need a few, if any, particularly if multishot is used.
+	 */
+	ret = io_uring_queue_init_params(ring_size, ring, &params);
+	if (ret) {
+		fprintf(stderr, "%s\n", strerror(-ret));
+		return 1;
+	}
+
+	/*
+	 * If send serialization is available and no option was given to use
+	 * it or not, default it to on. If it was turned on and the kernel
+	 * doesn't support it, turn it off.
+	 */
+	if (params.features & IORING_FEAT_SEND_BUF_SELECT) {
+		if (send_ring == -1)
+			send_ring = 1;
+	} else {
+		if (send_ring == 1) {
+			fprintf(stderr, "Kernel doesn't support ring provided "
+				"buffers for sends, disabled\n");
+		}
+		send_ring = 0;
+	}
+
+	if (!send_ring && snd_bundle) {
+		fprintf(stderr, "Can't use send bundle without send_ring\n");
+		snd_bundle = 0;
+	}
+
+	if (fixed_files) {
+		/*
+		 * If fixed files are used, we need to allocate a fixed file
+		 * table upfront where new direct descriptors can be managed.
+		 */
+		ret = io_uring_register_files_sparse(ring, nr_files);
+		if (ret) {
+			fprintf(stderr, "file register: %d\n", ret);
+			return 1;
+		}
+
+		/*
+		 * If fixed files are used, we also register the ring fd. See
+		 * comment near io_uring_prep_socket_direct_alloc() further
+		 * down. This avoids the fget/fput overhead associated with
+		 * the io_uring_enter(2) system call itself, which is used to
+		 * submit and wait on events.
+		 */
+		ret = io_uring_register_ring_fd(ring);
+		if (ret != 1) {
+			fprintf(stderr, "ring register: %d\n", ret);
+			return 1;
+		}
+	}
+
+	if (napi) {
+		struct io_uring_napi n = {
+			.prefer_busy_poll = napi > 1 ? 1 : 0,
+			.busy_poll_to = napi_timeout,
+		};
+
+		ret = io_uring_register_napi(ring, &n);
+		if (ret) {
+			fprintf(stderr, "io_uring_register_napi: %d\n", ret);
+			if (ret != -EINVAL)
+				return 1;
+			fprintf(stderr, "NAPI not available, turned off\n");
+		}
+	}
+
+	return 0;
+}
+
+static void *thread_main(void *data)
+{
+	struct conn *c = data;
+	int ret;
+
+	c->flags |= CONN_F_STARTED;
+
+	/* we need a max of 4 descriptors for each client */
+	ret = init_ring(&c->ring, 4);
+	if (ret)
+		goto done;
+
+	if (setup_buffer_rings(&c->ring, c))
+		goto done;
+
+	/*
+	 * If we're using fixed files, then we need to wait for the parent
+	 * to install the c->in_fd into our direct descriptor table. When
+	 * that happens, we'll set things up. If we're not using fixed files,
+	 * we can set up the receive or connect now.
+	 */
+	if (!fixed_files)
+		open_socket(c);
+
+	/* we're ready */
+	pthread_barrier_wait(&c->startup_barrier);
+
+	__event_loop(&c->ring, c);
+done:
+	return NULL;
+}
+
+static void usage(const char *name)
+{
+	printf("%s:\n", name);
+	printf("\t-m:\t\tUse multishot receive (%d)\n", recv_mshot);
+	printf("\t-d:\t\tUse DEFER_TASKRUN (%d)\n", defer_tw);
+	printf("\t-S:\t\tUse SQPOLL (%d)\n", sqpoll);
+	printf("\t-f:\t\tUse only fixed files (%d)\n", fixed_files);
+	printf("\t-a:\t\tUse huge pages for the ring (%d)\n", use_huge);
+	printf("\t-t:\t\tTimeout for waiting on CQEs (usec) (%d)\n", wait_usec);
+	printf("\t-w:\t\tNumber of CQEs to wait for each loop (%d)\n", wait_batch);
+	printf("\t-B:\t\tUse bi-directional mode (%d)\n", bidi);
+	printf("\t-s:\t\tAct only as a sink (%d)\n", is_sink);
+	printf("\t-q:\t\tRing size to use (%d)\n", ring_size);
+	printf("\t-H:\t\tHost to connect to (%s)\n", host);
+	printf("\t-r:\t\tPort to receive on (%d)\n", receive_port);
+	printf("\t-p:\t\tPort to connect to (%d)\n", send_port);
+	printf("\t-6:\t\tUse IPv6 (%d)\n", ipv6);
+	printf("\t-N:\t\tUse NAPI polling (%d)\n", napi);
+	printf("\t-T:\t\tNAPI timeout (usec) (%d)\n", napi_timeout);
+	printf("\t-b:\t\tSend/receive buf size (%d)\n", buf_size);
+	printf("\t-n:\t\tNumber of provided buffers (pow2) (%d)\n", nr_bufs);
+	printf("\t-u:\t\tUse provided buffers for send (%d)\n", send_ring);
+	printf("\t-C:\t\tUse bundles for send (%d)\n", snd_bundle);
+	printf("\t-z:\t\tUse zerocopy send (%d)\n", snd_zc);
+	printf("\t-c:\t\tUse bundles for recv (%d)\n", snd_bundle);
+	printf("\t-M:\t\tUse sendmsg (%d)\n", snd_msg);
+	printf("\t-M:\t\tUse recvmsg (%d)\n", rcv_msg);
+	printf("\t-x:\t\tShow extended stats (%d)\n", ext_stat);
+	printf("\t-V:\t\tIncrease verbosity (%d)\n", verbose);
+}
+
+/*
+ * Options parsing the ring / net setup
+ */
+int main(int argc, char *argv[])
+{
+	struct io_uring ring;
+	struct sigaction sa = { };
+	const char *optstring;
+	int opt, ret, fd;
+
+	setlocale(LC_NUMERIC, "en_US");
+
+	page_size = sysconf(_SC_PAGESIZE);
+	if (page_size < 0) {
+		perror("sysconf(_SC_PAGESIZE)");
+		return 1;
+	}
+
+	pthread_mutex_init(&thread_lock, NULL);
+
+	optstring = "m:d:S:s:b:f:H:r:p:n:B:N:T:w:t:M:R:u:c:C:q:a:x:z:i:6Vh?";
+	while ((opt = getopt(argc, argv, optstring)) != -1) {
+		switch (opt) {
+		case 'm':
+			recv_mshot = !!atoi(optarg);
+			break;
+		case 'S':
+			sqpoll = !!atoi(optarg);
+			break;
+		case 'd':
+			defer_tw = !!atoi(optarg);
+			break;
+		case 'b':
+			buf_size = atoi(optarg);
+			break;
+		case 'n':
+			nr_bufs = atoi(optarg);
+			break;
+		case 'u':
+			send_ring = !!atoi(optarg);
+			break;
+		case 'c':
+			rcv_bundle = !!atoi(optarg);
+			break;
+		case 'C':
+			snd_bundle = !!atoi(optarg);
+			break;
+		case 'w':
+			wait_batch = atoi(optarg);
+			break;
+		case 't':
+			wait_usec = atoi(optarg);
+			break;
+		case 's':
+			is_sink = !!atoi(optarg);
+			break;
+		case 'f':
+			fixed_files = !!atoi(optarg);
+			break;
+		case 'H':
+			host = strdup(optarg);
+			break;
+		case 'r':
+			receive_port = atoi(optarg);
+			break;
+		case 'p':
+			send_port = atoi(optarg);
+			break;
+		case 'B':
+			bidi = !!atoi(optarg);
+			break;
+		case 'N':
+			napi = !!atoi(optarg);
+			break;
+		case 'T':
+			napi_timeout = atoi(optarg);
+			break;
+		case '6':
+			ipv6 = true;
+			break;
+		case 'M':
+			snd_msg = !!atoi(optarg);
+			break;
+		case 'z':
+			snd_zc = !!atoi(optarg);
+			break;
+		case 'R':
+			rcv_msg = !!atoi(optarg);
+			break;
+		case 'q':
+			ring_size = atoi(optarg);
+			break;
+		case 'i':
+			buf_ring_inc = !!atoi(optarg);
+			break;
+		case 'a':
+			use_huge = !!atoi(optarg);
+			break;
+		case 'x':
+			ext_stat = !!atoi(optarg);
+			break;
+		case 'V':
+			verbose++;
+			break;
+		case 'h':
+		default:
+			usage(argv[0]);
+			return 1;
+		}
+	}
+
+	if (bidi && is_sink) {
+		fprintf(stderr, "Can't be both bidi proxy and sink\n");
+		return 1;
+	}
+	if (snd_msg && sqpoll) {
+		fprintf(stderr, "SQPOLL with msg variants disabled\n");
+		snd_msg = 0;
+	}
+	if (rcv_msg && rcv_bundle) {
+		fprintf(stderr, "Can't use bundles with recvmsg\n");
+		rcv_msg = 0;
+	}
+	if (snd_msg && snd_bundle) {
+		fprintf(stderr, "Can't use bundles with sendmsg\n");
+		snd_msg = 0;
+	}
+	if (snd_msg && send_ring) {
+		fprintf(stderr, "Can't use send ring sendmsg\n");
+		snd_msg = 0;
+	}
+	if (snd_zc && (send_ring || snd_bundle)) {
+		fprintf(stderr, "Can't use send zc with bundles or ring\n");
+		send_ring = snd_bundle = 0;
+	}
+	/*
+	 * For recvmsg w/multishot, we waste some data at the head of the
+	 * packet every time. Adjust the buffer size to account for that,
+	 * so we're still handing 'buf_size' actual payload of data.
+	 */
+	if (rcv_msg && recv_mshot) {
+		fprintf(stderr, "Adjusted buf size for recvmsg w/multishot\n");
+		buf_size += sizeof(struct io_uring_recvmsg_out);
+	}
+
+	br_mask = nr_bufs - 1;
+
+	fd = setup_listening_socket(receive_port, ipv6);
+	if (is_sink)
+		send_port = -1;
+
+	if (fd == -1)
+		return 1;
+
+	atexit(show_stats);
+	sa.sa_handler = sig_int;
+	sa.sa_flags = SA_RESTART;
+	sigaction(SIGINT, &sa, NULL);
+
+	ret = init_ring(&ring, MAX_CONNS * 3);
+	if (ret)
+		return ret;
+
+	printf("Backend: sqpoll=%d, defer_tw=%d, fixed_files=%d, "
+		"is_sink=%d, buf_size=%d, nr_bufs=%d, host=%s, send_port=%d, "
+		"receive_port=%d, napi=%d, napi_timeout=%d, huge_page=%d\n",
+			sqpoll, defer_tw, fixed_files, is_sink,
+			buf_size, nr_bufs, host, send_port, receive_port,
+			napi, napi_timeout, use_huge);
+	printf(" recv options: recvmsg=%d, recv_mshot=%d, recv_bundle=%d\n",
+			rcv_msg, recv_mshot, rcv_bundle);
+	printf(" send options: sendmsg=%d, send_ring=%d, send_bundle=%d, "
+		"send_zerocopy=%d\n", snd_msg, send_ring, snd_bundle,
+			snd_zc);
+
+	return parent_loop(&ring, fd);
+}

diff --git a/examples/proxy.h b/examples/proxy.h
new file mode 100644
index 0000000..3fa187b
--- /dev/null
+++ b/examples/proxy.h

@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: MIT */
+#ifndef LIBURING_PROXY_H
+#define LIBURING_PROXY_H
+
+#include <sys/time.h>
+
+/*
+ * Generic opcode agnostic encoding to sqe/cqe->user_data
+ */
+struct userdata {
+	union {
+		struct {
+			uint16_t op_tid; /* 4 bits op, 12 bits tid */
+			uint16_t bid;
+			uint16_t fd;
+		};
+		uint64_t val;
+	};
+};
+
+#define OP_SHIFT	(12)
+#define TID_MASK	((1U << 12) - 1)
+
+/*
+ * Packs the information that we will need at completion time into the
+ * sqe->user_data field, which is passed back in the completion in
+ * cqe->user_data. Some apps would need more space than this, and in fact
+ * I'd love to pack the requested IO size in here, and it's not uncommon to
+ * see apps use this field as just a cookie to either index a data structure
+ * at completion time, or even just put the pointer to the associated
+ * structure into this field.
+ */
+static inline void __encode_userdata(struct io_uring_sqe *sqe, int tid, int op,
+				     int bid, int fd)
+{
+	struct userdata ud = {
+		.op_tid = (op << OP_SHIFT) | tid,
+		.bid = bid,
+		.fd = fd
+	};
+
+	io_uring_sqe_set_data64(sqe, ud.val);
+}
+
+static inline uint64_t __raw_encode(int tid, int op, int bid, int fd)
+{
+	struct userdata ud = {
+		.op_tid = (op << OP_SHIFT) | tid,
+		.bid = bid,
+		.fd = fd
+	};
+
+	return ud.val;
+}
+
+static inline int cqe_to_op(struct io_uring_cqe *cqe)
+{
+	struct userdata ud = { .val = cqe->user_data };
+
+	return ud.op_tid >> OP_SHIFT;
+}
+
+static inline int cqe_to_bid(struct io_uring_cqe *cqe)
+{
+	struct userdata ud = { .val = cqe->user_data };
+
+	return ud.bid;
+}
+
+static inline int cqe_to_fd(struct io_uring_cqe *cqe)
+{
+	struct userdata ud = { .val = cqe->user_data };
+
+	return ud.fd;
+}
+
+static unsigned long long mtime_since(const struct timeval *s,
+				      const struct timeval *e)
+{
+	long long sec, usec;
+
+	sec = e->tv_sec - s->tv_sec;
+	usec = (e->tv_usec - s->tv_usec);
+	if (sec > 0 && usec < 0) {
+		sec--;
+		usec += 1000000;
+	}
+
+	sec *= 1000;
+	usec /= 1000;
+	return sec + usec;
+}
+
+static unsigned long long mtime_since_now(struct timeval *tv)
+{
+	struct timeval end;
+
+	gettimeofday(&end, NULL);
+	return mtime_since(tv, &end);
+}
+
+#endif

diff --git a/examples/rsrc-update-bench.c b/examples/rsrc-update-bench.c
new file mode 100644
index 0000000..5e3cd99
--- /dev/null
+++ b/examples/rsrc-update-bench.c

@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: MIT */
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <poll.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+
+#include "liburing.h"
+
+static unsigned long runtime_ms = 10000;
+
+static unsigned long gettimeofday_ms(void)
+{
+	struct timeval tv;
+
+	gettimeofday(&tv, NULL);
+	return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+int main(void)
+{
+	unsigned long tstop;
+	unsigned long nr_reqs = 0;
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	struct io_uring ring;
+	int pipe1[2];
+	int ret, i, qd = 32;
+	int table_size = 128;
+
+	if (pipe(pipe1) != 0) {
+		perror("pipe");
+		return 1;
+	}
+
+	ret = io_uring_queue_init(1024, &ring, IORING_SETUP_SINGLE_ISSUER |
+					       IORING_SETUP_DEFER_TASKRUN);
+	if (ret) {
+		fprintf(stderr, "io_uring_queue_init failed: %d\n", ret);
+		return 1;
+	}
+	ret = io_uring_register_ring_fd(&ring);
+	if (ret < 0) {
+		fprintf(stderr, "io_uring_register_ring_fd failed\n");
+		return 1;
+	}
+	ret = io_uring_register_files_sparse(&ring, table_size);
+	if (ret < 0) {
+		fprintf(stderr, "io_uring_register_files_sparse failed\n");
+		return 1;
+	}
+
+	for (i = 0; i < table_size; i++) {
+		ret = io_uring_register_files_update(&ring, i, pipe1, 1);
+		if (ret < 0) {
+			fprintf(stderr, "io_uring_register_files_update failed\n");
+			return 1;
+		}
+	}
+
+	srand(time(NULL));
+
+	tstop = gettimeofday_ms() + runtime_ms;
+	do {
+		int off = rand();
+
+		for (i = 0; i < qd; i++) {
+			sqe = io_uring_get_sqe(&ring);
+			int roff = (off + i) % table_size;
+			io_uring_prep_files_update(sqe, pipe1, 1, roff);
+		}
+
+		ret = io_uring_submit(&ring);
+		if (ret != qd) {
+			fprintf(stderr, "child: sqe submit failed: %d\n", ret);
+			return 1;
+		}
+
+		for (i = 0; i < qd; i++) {
+			ret = io_uring_wait_cqe(&ring, &cqe);
+			if (ret < 0) {
+				fprintf(stderr, "child: wait completion %d\n", ret);
+				break;
+			}
+			io_uring_cqe_seen(&ring, cqe);
+			nr_reqs++;
+		}
+	} while (gettimeofday_ms() < tstop);
+
+	fprintf(stderr, "max updates/s: %lu\n", nr_reqs * 1000UL / runtime_ms);
+
+	io_uring_queue_exit(&ring);
+	close(pipe1[0]);
+	close(pipe1[1]);
+	return 0;
+}

diff --git a/examples/send-zerocopy.c b/examples/send-zerocopy.c
new file mode 100644
index 0000000..691a0cc
--- /dev/null
+++ b/examples/send-zerocopy.c

@@ -0,0 +1,658 @@
+/* SPDX-License-Identifier: MIT */
+/* based on linux-kernel/tools/testing/selftests/net/msg_zerocopy.c */
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <stdarg.h>
+#include <string.h>
+#include <pthread.h>
+
+#include <poll.h>
+#include <sched.h>
+#include <arpa/inet.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/ip.h>
+#include <netinet/in.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/un.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/mman.h>
+#include <linux/mman.h>
+#include <signal.h>
+
+#include "liburing.h"
+
+#define ZC_TAG 0xfffffffULL
+#define MAX_SUBMIT_NR 512
+#define MAX_THREADS 100
+
+struct thread_data {
+	pthread_t thread;
+	void *ret;
+	int idx;
+	unsigned long long packets;
+	unsigned long long bytes;
+	unsigned long long dt_ms;
+	struct sockaddr_storage dst_addr;
+	int fd;
+};
+
+static bool cfg_reg_ringfd = true;
+static bool cfg_fixed_files = 1;
+static bool cfg_zc = 1;
+static int  cfg_nr_reqs = 8;
+static bool cfg_fixed_buf = 1;
+static bool cfg_hugetlb = 0;
+static bool cfg_defer_taskrun = 0;
+static int  cfg_cpu = -1;
+static bool cfg_rx = 0;
+static unsigned  cfg_nr_threads = 1;
+
+static int  cfg_family		= PF_UNSPEC;
+static int  cfg_type		= 0;
+static int  cfg_payload_len;
+static int  cfg_port		= 8000;
+static int  cfg_runtime_ms	= 4200;
+static bool cfg_rx_poll		= false;
+
+static socklen_t cfg_alen;
+static char *str_addr = NULL;
+
+static char payload_buf[IP_MAXPACKET] __attribute__((aligned(4096)));
+static char *payload;
+static struct thread_data threads[MAX_THREADS];
+static pthread_barrier_t barrier;
+
+static bool should_stop = false;
+
+static void sigint_handler(__attribute__((__unused__)) int sig)
+{
+	/* kill if should_stop can't unblock threads fast enough */
+	if (should_stop)
+		_exit(-1);
+	should_stop = true;
+}
+
+/*
+ * Implementation of error(3), prints an error message and exits.
+ */
+static void t_error(int status, int errnum, const char *format, ...)
+{
+	va_list args;
+	va_start(args, format);
+
+	vfprintf(stderr, format, args);
+	if (errnum)
+		fprintf(stderr, ": %s", strerror(errnum));
+
+	fprintf(stderr, "\n");
+	va_end(args);
+	exit(status);
+}
+
+static void set_cpu_affinity(void)
+{
+	cpu_set_t mask;
+
+	if (cfg_cpu == -1)
+		return;
+
+	CPU_ZERO(&mask);
+	CPU_SET(cfg_cpu, &mask);
+	if (sched_setaffinity(0, sizeof(mask), &mask))
+		t_error(1, errno, "unable to pin cpu\n");
+}
+
+static void set_iowq_affinity(struct io_uring *ring)
+{
+	cpu_set_t mask;
+	int ret;
+
+	if (cfg_cpu == -1)
+		return;
+
+	CPU_ZERO(&mask);
+	CPU_SET(cfg_cpu, &mask);
+	ret = io_uring_register_iowq_aff(ring, 1, &mask);
+	if (ret)
+		t_error(1, ret, "unabled to set io-wq affinity\n");
+}
+
+static unsigned long gettimeofday_ms(void)
+{
+	struct timeval tv;
+
+	gettimeofday(&tv, NULL);
+	return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static void do_setsockopt(int fd, int level, int optname, int val)
+{
+	if (setsockopt(fd, level, optname, &val, sizeof(val)))
+		t_error(1, errno, "setsockopt %d.%d: %d", level, optname, val);
+}
+
+static void setup_sockaddr(int domain, const char *str_addr,
+			   struct sockaddr_storage *sockaddr)
+{
+	struct sockaddr_in6 *addr6 = (void *) sockaddr;
+	struct sockaddr_in *addr4 = (void *) sockaddr;
+	int port = cfg_port;
+
+	switch (domain) {
+	case PF_INET:
+		memset(addr4, 0, sizeof(*addr4));
+		addr4->sin_family = AF_INET;
+		addr4->sin_port = htons(port);
+		if (str_addr &&
+		    inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
+			t_error(1, 0, "ipv4 parse error: %s", str_addr);
+		break;
+	case PF_INET6:
+		memset(addr6, 0, sizeof(*addr6));
+		addr6->sin6_family = AF_INET6;
+		addr6->sin6_port = htons(port);
+		if (str_addr &&
+		    inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
+			t_error(1, 0, "ipv6 parse error: %s", str_addr);
+		break;
+	default:
+		t_error(1, 0, "illegal domain");
+	}
+}
+
+static int do_poll(int fd, int events)
+{
+	struct pollfd pfd;
+	int ret;
+
+	pfd.events = events;
+	pfd.revents = 0;
+	pfd.fd = fd;
+
+	ret = poll(&pfd, 1, -1);
+	if (ret == -1)
+		t_error(1, errno, "poll");
+
+	return ret && (pfd.revents & events);
+}
+
+/* Flush all outstanding bytes for the tcp receive queue */
+static int do_flush_tcp(struct thread_data *td, int fd)
+{
+	int ret;
+
+	/* MSG_TRUNC flushes up to len bytes */
+	ret = recv(fd, NULL, 1 << 21, MSG_TRUNC | MSG_DONTWAIT);
+	if (ret == -1 && errno == EAGAIN)
+		return 0;
+	if (ret == -1)
+		t_error(1, errno, "flush");
+	if (!ret)
+		return 1;
+
+	td->packets++;
+	td->bytes += ret;
+	return 0;
+}
+
+/* Flush all outstanding datagrams. Verify first few bytes of each. */
+static int do_flush_datagram(struct thread_data *td, int fd)
+{
+	long ret, off = 0;
+	char buf[64];
+
+	/* MSG_TRUNC will return full datagram length */
+	ret = recv(fd, buf, sizeof(buf), MSG_DONTWAIT | MSG_TRUNC);
+	if (ret == -1 && errno == EAGAIN)
+		return 0;
+
+	if (ret == -1)
+		t_error(1, errno, "recv");
+	if (ret != cfg_payload_len)
+		t_error(1, 0, "recv: ret=%u != %u", ret, cfg_payload_len);
+	if ((unsigned long) ret > sizeof(buf) - off)
+		ret = sizeof(buf) - off;
+	if (memcmp(buf + off, payload, ret))
+		t_error(1, 0, "recv: data mismatch");
+
+	td->packets++;
+	td->bytes += cfg_payload_len;
+	return 0;
+}
+
+static void do_setup_rx(int domain, int type, int protocol)
+{
+	struct sockaddr_storage addr = {};
+	struct thread_data *td;
+	int listen_fd, fd;
+	unsigned int i;
+
+	fd = socket(domain, type, protocol);
+	if (fd == -1)
+		t_error(1, errno, "socket r");
+
+	do_setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, 1);
+
+	setup_sockaddr(cfg_family, str_addr, &addr);
+
+	if (bind(fd, (void *)&addr, cfg_alen))
+		t_error(1, errno, "bind");
+
+	if (type != SOCK_STREAM) {
+		if (cfg_nr_threads != 1)
+			t_error(1, 0, "udp rx cant multithread");
+		threads[0].fd = fd;
+		return;
+	}
+
+	listen_fd = fd;
+	if (listen(listen_fd, cfg_nr_threads))
+		t_error(1, errno, "listen");
+
+	for (i = 0; i < cfg_nr_threads; i++) {
+		td = &threads[i];
+
+		fd = accept(listen_fd, NULL, NULL);
+		if (fd == -1)
+			t_error(1, errno, "accept");
+		td->fd = fd;
+	}
+
+	if (close(listen_fd))
+		t_error(1, errno, "close listen sock");
+}
+
+static void *do_rx(void *arg)
+{
+	struct thread_data *td = arg;
+	const int cfg_receiver_wait_ms = 400;
+	uint64_t tstop;
+	int ret, fd = td->fd;
+
+	tstop = gettimeofday_ms() + cfg_runtime_ms + cfg_receiver_wait_ms;
+	do {
+		if (cfg_type == SOCK_STREAM)
+			ret = do_flush_tcp(td, fd);
+		else
+			ret = do_flush_datagram(td, fd);
+
+		if (ret)
+			break;
+
+		do_poll(fd, POLLIN);
+	} while (gettimeofday_ms() < tstop);
+
+	if (close(fd))
+		t_error(1, errno, "close");
+	pthread_exit(&td->ret);
+	return NULL;
+}
+
+static inline struct io_uring_cqe *wait_cqe_fast(struct io_uring *ring)
+{
+	struct io_uring_cqe *cqe;
+	unsigned head;
+	int ret;
+
+	io_uring_for_each_cqe(ring, head, cqe)
+		return cqe;
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret)
+		t_error(1, ret, "wait cqe");
+	return cqe;
+}
+
+static void do_tx(struct thread_data *td, int domain, int type, int protocol)
+{
+	const int notif_slack = 128;
+	struct io_uring ring;
+	struct iovec iov;
+	uint64_t tstart;
+	int i, fd, ret;
+	int compl_cqes = 0;
+	int ring_flags = IORING_SETUP_COOP_TASKRUN | IORING_SETUP_SINGLE_ISSUER;
+	unsigned loop = 0;
+
+	if (cfg_defer_taskrun)
+		ring_flags |= IORING_SETUP_DEFER_TASKRUN;
+
+	fd = socket(domain, type, protocol);
+	if (fd == -1)
+		t_error(1, errno, "socket t");
+
+	if (connect(fd, (void *)&td->dst_addr, cfg_alen))
+		t_error(1, errno, "connect, idx %i", td->idx);
+
+	ret = io_uring_queue_init(512, &ring, ring_flags);
+	if (ret)
+		t_error(1, ret, "io_uring: queue init");
+
+	set_cpu_affinity();
+	set_iowq_affinity(&ring);
+
+	if (cfg_fixed_files) {
+		ret = io_uring_register_files(&ring, &fd, 1);
+		if (ret < 0)
+			t_error(1, ret, "io_uring: files registration");
+	}
+	if (cfg_reg_ringfd) {
+		ret = io_uring_register_ring_fd(&ring);
+		if (ret < 0)
+			t_error(1, ret, "io_uring: io_uring_register_ring_fd");
+	}
+
+	iov.iov_base = payload;
+	iov.iov_len = cfg_payload_len;
+
+	ret = io_uring_register_buffers(&ring, &iov, 1);
+	if (ret)
+		t_error(1, ret, "io_uring: buffer registration");
+
+	if (cfg_rx_poll) {
+		struct io_uring_sqe *sqe;
+
+		sqe = io_uring_get_sqe(&ring);
+		io_uring_prep_poll_add(sqe, fd, POLLIN);
+
+		ret = io_uring_submit(&ring);
+		if (ret != 1)
+			t_error(1, ret, "submit poll");
+	}
+
+	pthread_barrier_wait(&barrier);
+
+	tstart = gettimeofday_ms();
+	do {
+		struct io_uring_sqe *sqe;
+		struct io_uring_cqe *cqe;
+		unsigned buf_idx = 0;
+		unsigned msg_flags = MSG_WAITALL;
+
+		for (i = 0; i < cfg_nr_reqs; i++) {
+			sqe = io_uring_get_sqe(&ring);
+
+			if (!cfg_zc)
+				io_uring_prep_send(sqe, fd, payload,
+						   cfg_payload_len, 0);
+			else {
+				io_uring_prep_send_zc(sqe, fd, payload,
+						     cfg_payload_len, msg_flags, 0);
+				if (cfg_fixed_buf) {
+					sqe->ioprio |= IORING_RECVSEND_FIXED_BUF;
+					sqe->buf_index = buf_idx;
+				}
+			}
+			sqe->user_data = 1;
+			if (cfg_fixed_files) {
+				sqe->fd = 0;
+				sqe->flags |= IOSQE_FIXED_FILE;
+			}
+		}
+
+		if (cfg_defer_taskrun && compl_cqes >= notif_slack)
+			ret = io_uring_submit_and_get_events(&ring);
+		else
+			ret = io_uring_submit(&ring);
+
+		if (ret != cfg_nr_reqs)
+			t_error(1, ret, "submit");
+
+		for (i = 0; i < cfg_nr_reqs; i++) {
+			cqe = wait_cqe_fast(&ring);
+
+			if (cqe->flags & IORING_CQE_F_NOTIF) {
+				if (cqe->flags & IORING_CQE_F_MORE)
+					t_error(1, -EINVAL, "F_MORE notif");
+				compl_cqes--;
+				i--;
+				io_uring_cqe_seen(&ring, cqe);
+				continue;
+			}
+			if (cqe->flags & IORING_CQE_F_MORE)
+				compl_cqes++;
+
+			if (cqe->res >= 0) {
+				td->packets++;
+				td->bytes += cqe->res;
+			} else if (cqe->res == -ECONNREFUSED || cqe->res == -EPIPE ||
+				   cqe->res == -ECONNRESET) {
+				fprintf(stderr, "Connection failure\n");
+				goto out_fail;
+			} else if (cqe->res != -EAGAIN) {
+				t_error(1, cqe->res, "send failed");
+			}
+			io_uring_cqe_seen(&ring, cqe);
+		}
+		if (should_stop)
+			break;
+	} while ((++loop % 16 != 0) || gettimeofday_ms() < tstart + cfg_runtime_ms);
+
+	td->dt_ms = gettimeofday_ms() - tstart;
+
+out_fail:
+	shutdown(fd, SHUT_RDWR);
+	if (close(fd))
+		t_error(1, errno, "close");
+
+	while (compl_cqes) {
+		struct io_uring_cqe *cqe = wait_cqe_fast(&ring);
+
+		io_uring_cqe_seen(&ring, cqe);
+		compl_cqes--;
+	}
+	io_uring_queue_exit(&ring);
+}
+
+static void *do_test(void *arg)
+{
+	struct thread_data *td = arg;
+	int protocol = 0;
+
+	setup_sockaddr(cfg_family, str_addr, &td->dst_addr);
+
+	do_tx(td, cfg_family, cfg_type, protocol);
+	pthread_exit(&td->ret);
+	return NULL;
+}
+
+static void usage(const char *filepath)
+{
+	printf("Usage:\t%s <protocol> <ip-version> -D<addr> [options]\n", filepath);
+	printf("\t%s <protocol> <ip-version> -R [options]\n\n", filepath);
+
+	printf("  -4\t\tUse IPv4\n");
+	printf("  -6\t\tUse IPv4\n");
+	printf("  -D <address>\tDestination address\n");
+	printf("  -p <port>\tServer port to listen on/connect to\n");
+	printf("  -s <size>\tBytes per request\n");
+	printf("  -s <size>\tBytes per request\n");
+	printf("  -n <nr>\tNumber of parallel requests\n");
+	printf("  -z <mode>\tZerocopy mode, 0 to disable, enabled otherwise\n");
+	printf("  -b <mode>\tUse registered buffers\n");
+	printf("  -l <mode>\tUse huge pages\n");
+	printf("  -d\t\tUse defer taskrun\n");
+	printf("  -C <cpu>\tPin to the specified CPU\n");
+	printf("  -T <nr>\tNumber of threads to use for sending\n");
+	printf("  -R\t\tPlay the server role\n");
+	printf("  -t <seconds>\tTime in seconds\n");
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	const int max_payload_len = IP_MAXPACKET -
+				    sizeof(struct ipv6hdr) -
+				    sizeof(struct tcphdr) -
+				    40 /* max tcp options */;
+	int c;
+	char *daddr = NULL;
+
+	if (argc <= 1) {
+		usage(argv[0]);
+		exit(0);
+	}
+
+	cfg_payload_len = max_payload_len;
+
+	while ((c = getopt(argc, argv, "46D:p:s:t:n:z:b:l:dC:T:Ry")) != -1) {
+		switch (c) {
+		case '4':
+			if (cfg_family != PF_UNSPEC)
+				t_error(1, 0, "Pass one of -4 or -6");
+			cfg_family = PF_INET;
+			cfg_alen = sizeof(struct sockaddr_in);
+			break;
+		case '6':
+			if (cfg_family != PF_UNSPEC)
+				t_error(1, 0, "Pass one of -4 or -6");
+			cfg_family = PF_INET6;
+			cfg_alen = sizeof(struct sockaddr_in6);
+			break;
+		case 'D':
+			daddr = optarg;
+			break;
+		case 'p':
+			cfg_port = strtoul(optarg, NULL, 0);
+			break;
+		case 's':
+			cfg_payload_len = strtoul(optarg, NULL, 0);
+			break;
+		case 't':
+			cfg_runtime_ms = 200 + strtoul(optarg, NULL, 10) * 1000;
+			break;
+		case 'n':
+			cfg_nr_reqs = strtoul(optarg, NULL, 0);
+			break;
+		case 'z':
+			cfg_zc = strtoul(optarg, NULL, 0);
+			break;
+		case 'b':
+			cfg_fixed_buf = strtoul(optarg, NULL, 0);
+			break;
+		case 'l':
+			cfg_hugetlb = strtoul(optarg, NULL, 0);
+			break;
+		case 'd':
+			cfg_defer_taskrun = 1;
+			break;
+		case 'C':
+			cfg_cpu = strtol(optarg, NULL, 0);
+			break;
+		case 'T':
+			cfg_nr_threads = strtol(optarg, NULL, 0);
+			if (cfg_nr_threads > MAX_THREADS)
+				t_error(1, 0, "too many threads\n");
+			break;
+		case 'R':
+			cfg_rx = 1;
+			break;
+		case 'y':
+			cfg_rx_poll = 1;
+			break;
+		}
+	}
+
+	if (cfg_nr_reqs > MAX_SUBMIT_NR)
+		t_error(1, 0, "-n: submit batch nr exceeds max (%d)", MAX_SUBMIT_NR);
+	if (cfg_payload_len > max_payload_len)
+		t_error(1, 0, "-s: payload exceeds max (%d)", max_payload_len);
+
+	str_addr = daddr;
+
+	if (optind != argc - 1)
+		usage(argv[0]);
+}
+
+int main(int argc, char **argv)
+{
+	unsigned long long tsum = 0;
+	unsigned long long packets = 0, bytes = 0;
+	struct thread_data *td;
+	const char *cfg_test;
+	unsigned int i;
+	void *res;
+
+	parse_opts(argc, argv);
+	set_cpu_affinity();
+
+	payload = payload_buf;
+	if (cfg_hugetlb) {
+		payload = mmap(NULL, 2*1024*1024, PROT_READ | PROT_WRITE,
+				MAP_PRIVATE | MAP_HUGETLB | MAP_HUGE_2MB | MAP_ANONYMOUS,
+				-1, 0);
+		if (payload == MAP_FAILED) {
+			fprintf(stderr, "hugetlb alloc failed\n");
+			return 1;
+		}
+	}
+
+	cfg_test = argv[argc - 1];
+	if (!strcmp(cfg_test, "tcp"))
+		cfg_type = SOCK_STREAM;
+	else if (!strcmp(cfg_test, "udp"))
+		cfg_type = SOCK_DGRAM;
+	else
+		t_error(1, 0, "unknown cfg_test %s", cfg_test);
+
+	pthread_barrier_init(&barrier, NULL, cfg_nr_threads);
+
+	for (i = 0; i < IP_MAXPACKET; i++)
+		payload[i] = 'a' + (i % 26);
+
+	for (i = 0; i < cfg_nr_threads; i++) {
+		td = &threads[i];
+		td->idx = i;
+	}
+
+	if (cfg_rx)
+		do_setup_rx(cfg_family, cfg_type, 0);
+
+	if (!cfg_rx)
+		signal(SIGINT, sigint_handler);
+
+	for (i = 0; i < cfg_nr_threads; i++)
+		pthread_create(&threads[i].thread, NULL,
+				!cfg_rx ? do_test : do_rx, &threads[i]);
+
+	for (i = 0; i < cfg_nr_threads; i++) {
+		td = &threads[i];
+		pthread_join(td->thread, &res);
+		packets += td->packets;
+		bytes += td->bytes;
+		tsum += td->dt_ms;
+	}
+	tsum = tsum / cfg_nr_threads;
+
+	if (!tsum) {
+		printf("The run is too short, can't gather stats\n");
+	} else {
+		printf("packets=%llu (MB=%llu), rps=%llu (MB/s=%llu)\n",
+			packets, bytes >> 20,
+			packets * 1000 / tsum,
+			(bytes >> 20) * 1000 / tsum);
+	}
+	pthread_barrier_destroy(&barrier);
+	return 0;
+}

diff --git a/examples/ucontext-cp.c b/examples/ucontext-cp.c
index 281013f..d17aae7 100644
--- a/examples/ucontext-cp.c
+++ b/examples/ucontext-cp.c

@@ -68,23 +68,8 @@
 DEFINE_AWAIT_OP(writev)
 #undef DEFINE_AWAIT_OP
 
-int await_poll(async_context *pctx, int fd, short poll_mask) {
-	struct io_uring_sqe *sqe = io_uring_get_sqe(pctx->ring);
-	struct io_uring_cqe *cqe;
-	if (!sqe)
-		return -1;
-
-	io_uring_prep_poll_add(sqe, fd, poll_mask);
-	io_uring_sqe_set_data(sqe, pctx);
-	swapcontext(&pctx->ctx_fnew, &pctx->ctx_main);
-	io_uring_peek_cqe(pctx->ring, &cqe);
-	assert(cqe);
-	io_uring_cqe_seen(pctx->ring, cqe);
-
-	return cqe->res;
-}
-
-int await_delay(async_context *pctx, time_t seconds) {
+static int await_delay(async_context *pctx, time_t seconds)
+{
 	struct io_uring_sqe *sqe = io_uring_get_sqe(pctx->ring);
 	struct io_uring_cqe *cqe;
 	struct __kernel_timespec ts = {

diff --git a/liburing-ffi.pc.in b/liburing-ffi.pc.in
new file mode 100644
index 0000000..71943ac
--- /dev/null
+++ b/liburing-ffi.pc.in

@@ -0,0 +1,12 @@
+prefix=@prefix@
+exec_prefix=${prefix}
+libdir=@libdir@
+includedir=@includedir@
+
+Name: @NAME@
+Version: @VERSION@
+Description: io_uring FFI library
+URL: https://git.kernel.dk/cgit/liburing/
+
+Libs: -L${libdir} -luring-ffi
+Cflags: -I${includedir}

diff --git a/liburing.pc.in b/liburing.pc.in
index e621939..2c879b2 100644
--- a/liburing.pc.in
+++ b/liburing.pc.in

@@ -6,7 +6,7 @@
 Name: @NAME@
 Version: @VERSION@
 Description: io_uring library
-URL: http://git.kernel.dk/cgit/liburing/
+URL: https://git.kernel.dk/cgit/liburing/
 
 Libs: -L${libdir} -luring
 Cflags: -I${includedir}

diff --git a/liburing.spec b/liburing.spec
index df62d2f..ee2f4f8 100644
--- a/liburing.spec
+++ b/liburing.spec

@@ -1,5 +1,5 @@
 Name: liburing
-Version: 2.2
+Version: 2.8
 Release: 1%{?dist}
 Summary: Linux-native io_uring I/O access library
 License: (GPLv2 with exceptions and LGPLv2+) or MIT
@@ -27,7 +27,7 @@
 
 %build
 %set_build_flags
-./configure --prefix=%{_prefix} --libdir=/%{_libdir} --libdevdir=/%{_libdir} --mandir=%{_mandir} --includedir=%{_includedir}
+./configure --prefix=%{_prefix} --libdir=%{_libdir} --libdevdir=%{_libdir} --mandir=%{_mandir} --includedir=%{_includedir}
 
 %make_build
 

diff --git a/make-debs.sh b/make-debs.sh
index 01d563c..aa0974d 100755
--- a/make-debs.sh
+++ b/make-debs.sh

@@ -13,16 +13,18 @@
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
 #
 set -xe
 
 # Create dir for build
 base=${1:-/tmp/release}
-codename=$(lsb_release -sc)
+distro=unstable
 releasedir=$base/$(lsb_release -si)/liburing
 rm -rf $releasedir
 mkdir -p $releasedir
+HEAD=$(which head)
+DCH=$(which dch)
 
 src_dir=$(readlink -e `basename $0`)
 liburing_dir=$(dirname $src_dir)
@@ -38,12 +40,12 @@
 git clean -dxf
 
 # Change changelog if it's needed
-cur_ver=`head -l debian/changelog | sed -n -e 's/.* (\(.*\)) .*/\1/p'`
+cur_ver=`$HEAD < debian/changelog | sed -n -e 's/.* (\(.*\)) .*/\1/p'`
 if [ "$cur_ver" != "$version-1" ]; then
-	dch -D $codename --force-distribution -b -v "$version-1" "new version"
+	$DCH -D $distro --force-distribution -b -v "$version-1" "new version"
 fi
 
-# Create tar archieve
+# Create tar archive
 cd ../
 tar cvzf ${outfile}.tar.gz ${outfile}
 ln -s ${outfile}.tar.gz ${orgfile}.orig.tar.gz

diff --git a/man/IO_URING_CHECK_VERSION.3 b/man/IO_URING_CHECK_VERSION.3
new file mode 120000
index 0000000..21bbf45
--- /dev/null
+++ b/man/IO_URING_CHECK_VERSION.3

@@ -0,0 +1 @@
+io_uring_check_version.3
\ No newline at end of file

diff --git a/man/IO_URING_VERSION_MAJOR.3 b/man/IO_URING_VERSION_MAJOR.3
new file mode 120000
index 0000000..21bbf45
--- /dev/null
+++ b/man/IO_URING_VERSION_MAJOR.3

@@ -0,0 +1 @@
+io_uring_check_version.3
\ No newline at end of file

diff --git a/man/IO_URING_VERSION_MINOR.3 b/man/IO_URING_VERSION_MINOR.3
new file mode 120000
index 0000000..21bbf45
--- /dev/null
+++ b/man/IO_URING_VERSION_MINOR.3

@@ -0,0 +1 @@
+io_uring_check_version.3
\ No newline at end of file

diff --git a/man/__io_uring_buf_ring_cq_advance.3 b/man/__io_uring_buf_ring_cq_advance.3
new file mode 120000
index 0000000..4b3a1e5
--- /dev/null
+++ b/man/__io_uring_buf_ring_cq_advance.3

@@ -0,0 +1 @@
+io_uring_buf_ring_cq_advance.3
\ No newline at end of file

diff --git a/man/io_uring.7 b/man/io_uring.7
index 8c71d93..ee1ff56 100644
--- a/man/io_uring.7
+++ b/man/io_uring.7

@@ -2,7 +2,7 @@
 .\" SPDX-License-Identifier: LGPL-2.0-or-later
 .\"
 
-.TH IO_URING 7 2020-07-26 "Linux" "Linux Programmer's Manual"
+.TH io_uring 7 2020-07-26 "Linux" "Linux Programmer's Manual"
 .SH NAME
 io_uring \- Asynchronous I/O facility
 .SH SYNOPSIS
@@ -55,6 +55,16 @@
 the equivalent of a system call you would have made otherwise,
 if you were not using
 .BR io_uring .
+For instance,
+a SQE with the
+.I opcode
+set to
+.B IORING_OP_READ
+will request a read operation to be issued that is similar to the
+.BR read (2)
+system call. Refer to the opcode documentation in
+.BR io_uring_enter (2)
+for all supported opcodes and their properties.
 You can add more than one SQE to the queue depending on the number of
 operations you want to request.
 .IP \(bu
@@ -79,21 +89,9 @@
 call's equivalent,
 had you used it directly without using 
 .BR io_uring .
-For instance,
-a read operation under 
-.BR io_uring ,
-started with the
-.BR IORING_OP_READ
-operation, issues the equivalent of the
-.BR read (2) 
-system call. In practice, it mixes the semantics of
-.BR pread (2)
-and
-.BR preadv2 (2)
-in that it takes an explicit offset, and supports using -1 for the offset to
-indicate that the current file position should be used instead of passing in
-an explicit offset. See the opcode documentation for more details. Given that
-io_uring is an async interface,
+Given that
+.B io_uring
+is an async interface,
 .I errno
 is never used for passing back error information. Instead,
 .I res
@@ -101,15 +99,15 @@
 of success, and in case of error
 .I res
 will contain
-.I -errno .
+.IR -errno .
 For example, if the normal read system call would have returned -1 and set
 .I errno
 to
-.B EINVAL ,
+.BR EINVAL ,
 then
 .I res
 would contain
-.B -EINVAL .
+.BR -EINVAL .
 If the normal system call would have returned a read size of 1024, then
 .I res
 would contain 1024.
@@ -146,6 +144,17 @@
 .IP \(bu
 The kernel adds CQEs to the tail of the CQ.
 You read CQEs off the head of the queue.
+.PP
+It should be noted that depending on the configuration io_uring's behavior
+can deviate from the behavior outlined above, like not posting a CQE for
+every SQE when setting
+.B IOSQE_CQE_SKIP_SUCCESS
+in the SQE or posting multiple CQEs for a single SQE for multi shot operations
+or requiring an
+.BR io_uring_enter (2)
+syscall to make the kernel begin processing newly added SQEs when using
+submission queue polling.
+
 .SS Submission queue polling
 One of the goals of 
 .B io_uring
@@ -223,50 +232,83 @@
 .in +4n
 .EX
 struct io_uring_sqe {
-        __u8    opcode;         /* type of operation for this sqe */
-        __u8    flags;          /* IOSQE_ flags */
-        __u16   ioprio;         /* ioprio for the request */
-        __s32   fd;             /* file descriptor to do IO on */
-        union {
-                __u64   off;    /* offset into file */
-                __u64   addr2;
-        };
-        union {
-                __u64   addr;   /* pointer to buffer or iovecs */
-                __u64   splice_off_in;
-        };
-        __u32   len;            /* buffer size or number of iovecs */
-        union {
-                __kernel_rwf_t  rw_flags;
-                __u32           fsync_flags;
-                __u16           poll_events;    /* compatibility */
-                __u32           poll32_events;  /* word-reversed for BE */
-                __u32           sync_range_flags;
-                __u32           msg_flags;
-                __u32           timeout_flags;
-                __u32           accept_flags;
-                __u32           cancel_flags;
-                __u32           open_flags;
-                __u32           statx_flags;
-                __u32           fadvise_advice;
-                __u32           splice_flags;
-        };
-        __u64   user_data;      /* data to be passed back at completion time */
-        union {
-                struct {
-                        /* pack this to avoid bogus arm OABI complaints */
-                        union {
-                                /* index into fixed buffers, if used */
-                                __u16   buf_index;
-                                /* for grouped buffer selection */
-                                __u16   buf_group;
-                        } __attribute__((packed));
-                        /* personality to use, if used */
-                        __u16   personality;
-                        __s32   splice_fd_in;
-                };
-                __u64   __pad2[3];
-        };
+	__u8	opcode;		/* type of operation for this sqe */
+	__u8	flags;		/* IOSQE_ flags */
+	__u16	ioprio;		/* ioprio for the request */
+	__s32	fd;		/* file descriptor to do IO on */
+	union {
+		__u64	off;	/* offset into file */
+		__u64	addr2;
+		struct {
+			__u32	cmd_op;
+			__u32	__pad1;
+		};
+	};
+	union {
+		__u64	addr;	/* pointer to buffer or iovecs */
+		__u64	splice_off_in;
+		struct {
+			__u32	level;
+			__u32	optname;
+		};
+	};
+	__u32	len;		/* buffer size or number of iovecs */
+	union {
+		__kernel_rwf_t	rw_flags;
+		__u32		fsync_flags;
+		__u16		poll_events;	/* compatibility */
+		__u32		poll32_events;	/* word-reversed for BE */
+		__u32		sync_range_flags;
+		__u32		msg_flags;
+		__u32		timeout_flags;
+		__u32		accept_flags;
+		__u32		cancel_flags;
+		__u32		open_flags;
+		__u32		statx_flags;
+		__u32		fadvise_advice;
+		__u32		splice_flags;
+		__u32		rename_flags;
+		__u32		unlink_flags;
+		__u32		hardlink_flags;
+		__u32		xattr_flags;
+		__u32		msg_ring_flags;
+		__u32		uring_cmd_flags;
+		__u32		waitid_flags;
+		__u32		futex_flags;
+		__u32		install_fd_flags;
+		__u32		nop_flags;
+	};
+	__u64	user_data;	/* data to be passed back at completion time */
+	/* pack this to avoid bogus arm OABI complaints */
+	union {
+		/* index into fixed buffers, if used */
+		__u16	buf_index;
+		/* for grouped buffer selection */
+		__u16	buf_group;
+	} __attribute__((packed));
+	/* personality to use, if used */
+	__u16	personality;
+	union {
+		__s32	splice_fd_in;
+		__u32	file_index;
+		__u32	optlen;
+		struct {
+			__u16	addr_len;
+			__u16	__pad3[1];
+		};
+	};
+	union {
+		struct {
+			__u64	addr3;
+			__u64	__pad2[1];
+		};
+		__u64	optval;
+		/*
+		 * If the ring is initialized with IORING_SETUP_SQE128, then
+		 * this field is used for 80 bytes of arbitrary command data
+		 */
+		__u8	cmd[0];
+	};
 };
 .EE
 .in
@@ -299,7 +341,7 @@
 /* fill the sqe index into the SQ ring array */
 sqring->array[index] = index;
 tail++;
-atomic_store_release(sqring->tail, tail);
+atomic_store_explicit(sqring->tail, tail, memory_order_release);
 .EE
 .in
 .PP
@@ -349,6 +391,9 @@
 We use memory barriers to enforce this coherency.
 Being significantly large subjects on their own,
 memory barriers are out of scope for further discussion on this man page.
+For more information on modern memory models the reader may refer to the
+Documentation/memory-barriers.txt in the kernel tree or to the documentation
+of the formal C11 or kernel memory model.
 .TP
 .B Letting the kernel know about I/O submissions
 Once you place one or more SQEs on to the SQ,
@@ -405,10 +450,46 @@
 is the result from the system call that was performed as part of the
 submission;
 its return value.
+
 The
 .I flags
-field could carry request-specific metadata in the future,
-but is currently unused.
+field carries request-specific information. As of the 6.0 kernel, the following
+flags are defined:
+
+.TP
+.B IORING_CQE_F_BUFFER
+If set, the upper 16 bits of the flags field carries the buffer ID that was
+chosen for this request. The request must have been issued with
+.B IOSQE_BUFFER_SELECT
+set, and used with a request type that supports buffer selection. Additionally,
+buffers must have been provided upfront either via the
+.B IORING_OP_PROVIDE_BUFFERS
+or the
+.B IORING_REGISTER_PBUF_RING
+methods.
+.TP
+.B IORING_CQE_F_MORE
+If set, the application should expect more completions from the request. This
+is used for requests that can generate multiple completions, such as multi-shot
+requests, receive, or accept.
+.TP
+.B IORING_CQE_F_SOCK_NONEMPTY
+If set, upon receiving the data from the socket in the current request, the
+socket still had data left on completion of this request.
+.TP
+.B IORING_CQE_F_NOTIF
+Set for notification CQEs, as seen with the zero-copy networking send and
+receive support.
+.TP
+.B IORING_CQE_F_BUF_MORE
+If set, the buffer ID set in the completion will get more completions. This
+means that the provided buffer has been partially consumed and there's more
+buffer space left, and hence the application should expect more completions
+with this buffer ID. Each completion will continue where the previous one
+left off. This can only happen if the provided buffer ring has been setup
+with
+.B IORING_REGISTER_PBUF_RING
+to allow for incremental / partial consumption of buffers.
 .PP
 The general sequence to read completion events off the completion queue is
 as follows:
@@ -427,7 +508,7 @@
     /* CQE consumption complete */
     head++;
 }
-atomic_store_release(cqring->head, head);
+atomic_store_explicit(cqring->head, head, memory_order_release);
 .EE
 .in
 .PP

diff --git a/man/io_uring_buf_ring_add.3 b/man/io_uring_buf_ring_add.3
index 9d8283b..49e08e9 100644
--- a/man/io_uring_buf_ring_add.3
+++ b/man/io_uring_buf_ring_add.3

@@ -9,12 +9,12 @@
 .nf
 .B #include <liburing.h>
 .PP
-.BI "int io_uring_buf_ring_add(struct io_uring_buf_ring *" br ",
-.BI "                          void *" addr ",
-.BI "                          unsigned int " len ",
-.BI "                          unsigned short " bid ",
-.BI "                          int " mask ",
-.BI "                          int " buf_offset ");"
+.BI "void io_uring_buf_ring_add(struct io_uring_buf_ring *" br ",
+.BI "                           void *" addr ",
+.BI "                           unsigned int " len ",
+.BI "                           unsigned short " bid ",
+.BI "                           int " mask ",
+.BI "                           int " buf_offset ");"
 .fi
 .SH DESCRIPTION
 .PP
@@ -46,6 +46,14 @@
 
 .SH RETURN VALUE
 None
+.SH NOTES
+liburing (or the kernel, for that matter) doesn't care about what buffer ID maps
+to what buffer, and in fact when recycling buffers after use, the application is
+free to add a different buffer into the same buffer ID location. All that
+matters is that the application knows what a given buffer ID in time corresponds
+to in terms of virtual memory. There's no liburing or kernel assumption that
+these mappings are persistent over time, they can very well be different every
+time a given buffer ID is added to the provided buffer ring.
 .SH SEE ALSO
 .BR io_uring_register_buf_ring (3),
 .BR io_uring_buf_ring_mask (3),

diff --git a/man/io_uring_buf_ring_advance.3 b/man/io_uring_buf_ring_advance.3
index 29a3578..f2dc90b 100644
--- a/man/io_uring_buf_ring_advance.3
+++ b/man/io_uring_buf_ring_advance.3

@@ -9,8 +9,8 @@
 .nf
 .B #include <liburing.h>
 .PP
-.BI "int io_uring_buf_ring_advance(struct io_uring_buf_ring *" br ",
-.BI "                              int " count ");"
+.BI "void io_uring_buf_ring_advance(struct io_uring_buf_ring *" br ",
+.BI "                               int " count ");"
 .fi
 .SH DESCRIPTION
 .PP

diff --git a/man/io_uring_buf_ring_available.3 b/man/io_uring_buf_ring_available.3
new file mode 100644
index 0000000..c1dc8b8
--- /dev/null
+++ b/man/io_uring_buf_ring_available.3

@@ -0,0 +1,47 @@
+.\" Copyright (C) 2022 Jens Axboe <[email protected]>
+.\"
+.\" SPDX-License-Identifier: LGPL-2.0-or-later
+.\"
+.TH io_uring_buf_ring_available 3 "Jan 11, 2024" "liburing-2.6" "liburing Manual"
+.SH NAME
+io_uring_buf_ring_available \- return number of unconsumed provided ring buffer entries
+.SH SYNOPSIS
+.nf
+.B #include <liburing.h>
+.PP
+.BI "int io_uring_buf_ring_available(struct io_uring *" ring ",
+.BI "                                struct io_uring_buf_ring *" br ",
+.BI "                                unsigned short " bgid ");"
+.fi
+.SH DESCRIPTION
+.PP
+The
+.BR io_uring_buf_ring_available (3)
+helper returns the number of unconsumed (by the kernel) entries in the
+.IR br
+provided buffer group belonging to the io_uring
+.IR ring
+and identified by the buffer group ID
+.IR bgid.
+
+Since the head of the provided buffer ring is only visible to the kernel, it's
+impossible to otherwise know how many unconsumed entries exist in the given
+provided buffer ring. This function query the kernel to return that number.
+Available since kernel 6.8.
+
+.SH NOTES
+The returned number of entries reflect the amount of unconsumed entries at the
+time that it was queried. If inflight IO exists that may consume provided
+buffers from this buffer group, then the returned value is inherently racy.
+.SH RETURN VALUE
+Returns the number of unconsumed entries on success, which may be 0. In case
+of error, may return
+.BR -ENOENT
+if the specified buffer group doesn't exist, or
+.BR -EINVAL
+if the buffer group isn't of the correct type, or if the kernel doesn't
+support this feature.
+.SH SEE ALSO
+.BR io_uring_register_buf_ring (3),
+.BR io_uring_buf_ring_add (3),
+.BR io_uring_buf_ring_cq_advance (3)

diff --git a/man/io_uring_buf_ring_cq_advance.3 b/man/io_uring_buf_ring_cq_advance.3
index caf882f..d7ada5d 100644
--- a/man/io_uring_buf_ring_cq_advance.3
+++ b/man/io_uring_buf_ring_cq_advance.3

@@ -9,9 +9,14 @@
 .nf
 .B #include <liburing.h>
 .PP
-.BI "int io_uring_buf_ring_cq_advance(struct io_uring *" ring ",
-.BI "                                 struct io_uring_buf_ring *" br ",
-.BI "                                 int " count ");"
+.BI "void io_uring_buf_ring_cq_advance(struct io_uring *" ring ",
+.BI "                                  struct io_uring_buf_ring *" br ",
+.BI "                                  int " count ");"
+.PP
+.BI "void __io_uring_buf_ring_cq_advance(struct io_uring *" ring ",
+.BI "                                    struct io_uring_buf_ring *" br ",
+.BI "                                    int " cq_count ",
+.BI "                                    int " buf_count ");"
 .fi
 .SH DESCRIPTION
 .PP
@@ -29,10 +34,19 @@
 amount. This effectively bundles both a
 .BR io_uring_buf_ring_advance (3)
 call and a
-.BR io_uring_cq_avance (3)
+.BR io_uring_cq_advance (3)
 into one operation. Since updating either ring index entails a store memory
 barrier, doing both at once is more efficient.
 
+The
+.BR __io_uring_buf_ring_cq_advance (3)
+function performs the same operation, except it splits the counts into two
+separate values. It advances the CQ ring by
+.I cq_count
+entries, and the buffer ring by
+.I buf_count
+entries rather than increment both by the same value.
+
 .SH RETURN VALUE
 None
 .SH SEE ALSO

diff --git a/man/io_uring_buf_ring_init.3 b/man/io_uring_buf_ring_init.3
index 50cf69a..beabed8 100644
--- a/man/io_uring_buf_ring_init.3
+++ b/man/io_uring_buf_ring_init.3

@@ -23,8 +23,14 @@
 .SH RETURN VALUE
 None
 
+.SH NOTES
+Unless manual setup is needed, it's recommended to use
+.BR io_uring_setup_buf_ring (3)
+as it provides a simpler way to setup a provided buffer ring.
+.
 .SH SEE ALSO
 .BR io_uring_register_buf_ring (3),
+.BR io_uring_setup_buf_ring (3),
 .BR io_uring_buf_ring_add (3)
 .BR io_uring_buf_ring_advance (3),
 .BR io_uring_buf_ring_cq_advance (3)

diff --git a/man/io_uring_check_version.3 b/man/io_uring_check_version.3
new file mode 100644
index 0000000..b7d771a
--- /dev/null
+++ b/man/io_uring_check_version.3

@@ -0,0 +1,72 @@
+.\" Copyright (C) 2022 Christian Hergert <[email protected]>
+.\"
+.\" SPDX-License-Identifier: LGPL-2.0-or-later
+.\"
+.TH io_uring_check_version 3 "December 1, 2022" "liburing-2.4" "liburing Manual"
+.SH NAME
+io_uring_check_version \- functions and macros to check the liburing version
+.SH SYNOPSIS
+.nf
+.B #include <liburing.h>
+.PP
+.BI "bool io_uring_check_version(int " major ", int " minor ");"
+.BI "IO_URING_CHECK_VERSION(" major ", " minor ");"
+.PP
+.BI "int io_uring_major_version(void);"
+.BI "IO_URING_VERSION_MAJOR;"
+.PP
+.BI "int io_uring_minor_version(void);"
+.BI "IO_URING_VERSION_MINOR;"
+.fi
+.SH DESCRIPTION
+.PP
+The
+.BR io_uring_check_version (3)
+function returns
+.I false
+if the liburing library loaded by the dynamic linker is greater-than
+or equal-to the
+.I major
+and
+.I minor
+numbers provided.
+
+.PP
+The
+.BR IO_URING_CHECK_VERSION (3)
+macro returns
+.I 0
+if the liburing library being compiled against is greater-than or equal-to the
+.I major
+and
+.I minor
+numbers provided.
+
+.PP
+The
+.BR io_uring_major_version (3)
+function returns the
+.I major
+version number of the liburing library loaded by the dynamic linker.
+
+.PP
+The
+.BR IO_URING_VERSION_MAJOR (3)
+macro returns the
+.I major
+version number of the liburing library being compiled against.
+
+.PP
+The
+.BR io_uring_minor_version (3)
+function returns the
+.I minor
+version number of the liburing library loaded by the dynamic linker.
+
+.PP
+The
+.BR IO_URING_VERSION_MINOR (3)
+macro returns the
+.I minor
+version number of the liburing library being compiled against.
+

diff --git a/man/io_uring_clone_buffers.3 b/man/io_uring_clone_buffers.3
new file mode 100644
index 0000000..bc1875d
--- /dev/null
+++ b/man/io_uring_clone_buffers.3

@@ -0,0 +1,64 @@
+.\" Copyright (C) 2024 Jens Axboe <[email protected]>
+.\"
+.\" SPDX-License-Identifier: LGPL-2.0-or-later
+.\"
+.TH io_uring_clone_buffers 3 "September 12, 2024" "liburing-2.8" "liburing Manual"
+.SH NAME
+io_uring_clone_buffers \- Clones registered buffers between rings
+.SH SYNOPSIS
+.nf
+.B #include <liburing.h>
+.PP
+.BI "int io_uring_clone_buffers(struct io_uring *" dst ","
+.BI "                           struct io_uring * " src ");"
+.PP
+.fi
+.SH DESCRIPTION
+.PP
+The
+.BR io_uring_clone_buffers (3)
+function clones registered buffers from the ring indicated by
+.I src
+to the ring indicated by
+.I dst .
+Upon successful completion of this operation,
+.I src
+and
+.I dst
+will have the same set of registered buffers. This operation is identical to
+performing a
+.BR io_uring_register_buffers (3)
+operation on the
+.I dst
+ring, if the
+.I src
+ring previously had that same buffer registration operating done.
+
+The
+.I dst
+ring must not have any buffers currently registered. If buffers are currently
+registered on the destination ring, they must be unregistered with
+.BR io_uring_unregister_buffers (3)
+first.
+
+On success
+.BR io_uring_clone_buffers (3)
+returns 0.
+On failure, it returns
+.BR -errno ,
+specifically
+.TP
+.B -EBUSY
+The destination ring already has buffers registered.
+.TP
+.B -ENOMEM
+The kernel ran out of memory.
+.TP
+.B -ENXIO
+The source ring doesn't have any buffers registered.
+.SH SEE ALSO
+.BR io_uring_register (2),
+.BR io_uring_unregister_buffers (3),
+.BR io_uring_register_buffers (3),
+.BR io_uring_prep_read_fixed (3),
+.BR io_uring_prep_write_fixed (3)

diff --git a/man/io_uring_close_ring_fd.3 b/man/io_uring_close_ring_fd.3
new file mode 100644
index 0000000..b5963c9
--- /dev/null
+++ b/man/io_uring_close_ring_fd.3

@@ -0,0 +1,43 @@
+.\" Copyright (C) 2022 Jens Axboe <[email protected]>
+.\" Copyright (C) 2022 Josh Triplett <[email protected]>
+.\"
+.\" SPDX-License-Identifier: LGPL-2.0-or-later
+.\"
+.TH io_uring_close_ring_fd 3 "September 25, 2022" "liburing-2.4" "liburing Manual"
+.SH NAME
+io_uring_close_ring_fd \- close a ring file descriptor and use it only via registered index
+.SH SYNOPSIS
+.nf
+.B #include <liburing.h>
+.PP
+.BI "int io_uring_close_ring_fd(struct io_uring *" ring ");"
+.fi
+.SH DESCRIPTION
+.PP
+.BR io_uring_close_ring_fd (3)
+closes the ring file descriptor, which must have been previously registered.
+The file will remain open, but accessible only via the registered index, not
+via any file descriptor. Subsequent liburing calls will continue to work, using
+the registered ring fd.
+
+The kernel must support
+.BR IORING_FEAT_REG_REG_RING .
+
+Libraries that must avoid disrupting their users' uses of file descriptors, and
+must continue working even in the face of
+.BR close_range (2)
+and similar, can use
+.BR io_uring_close_ring_fd (3)
+to work with liburing without having any open file descriptor.
+
+.SH NOTES
+Each thread that wants to make use of io_uring must register the fd. A library
+that may get called from arbitrary theads may need to detect when it gets
+called on a previously unseen thread and create and register a ring for that
+thread.
+.SH RETURN VALUE
+Returns 1 on success, or
+.BR -errno
+on error.
+.SH SEE ALSO
+.BR io_uring_register_ring_fd (3)

diff --git a/man/io_uring_cq_has_overflow.3 b/man/io_uring_cq_has_overflow.3
new file mode 100644
index 0000000..8ca0ba8
--- /dev/null
+++ b/man/io_uring_cq_has_overflow.3

@@ -0,0 +1,31 @@
+.\" Copyright (C) 2022 Dylan Yudaken <[email protected]>
+.\"
+.\" SPDX-License-Identifier: LGPL-2.0-or-later
+.\"
+.TH io_uring_cq_has_overflow 3 "September 5, 2022" "liburing-2.3" "liburing Manual"
+.SH NAME
+io_uring_cq_has_overflow \- returns if there are overflow entries waiting to move to the CQ ring
+.SH SYNOPSIS
+.nf
+.B #include <liburing.h>
+.PP
+.BI "bool io_uring_cq_has_overflow(const struct io_uring *" ring ");"
+.fi
+.SH DESCRIPTION
+.PP
+The
+.BR io_uring_cq_has_overflow (3)
+function informs the application if CQ entries have overflowed and are waiting to be flushed to
+the CQ ring. For example using
+.BR io_uring_get_events (3)
+.SH NOTES
+Using this function is only valid if the ring has
+.B IORING_FEAT_NODROP
+set, as it's checking for a flag set by kernels supporting that feature. For
+really old kernels that don't support this feature, if CQE overflow is
+experienced the CQEs are lost. If that happens, the CQ ring overflow offset
+will get incremented.
+.SH RETURN VALUE
+True if there are CQ entries waiting to be flushed to the CQ ring.
+.SH SEE ALSO
+.BR io_uring_get_events (3)

diff --git a/man/io_uring_cq_ready.3 b/man/io_uring_cq_ready.3
index e411a64..641828a 100644
--- a/man/io_uring_cq_ready.3
+++ b/man/io_uring_cq_ready.3

@@ -2,7 +2,7 @@
 .\"
 .\" SPDX-License-Identifier: LGPL-2.0-or-later
 .\"
-.TH io_uring_cq_ready "January 25, 2022" "liburing-2.1" "liburing Manual"
+.TH io_uring_cq_ready 3 "January 25, 2022" "liburing-2.1" "liburing Manual"
 .SH NAME
 io_uring_cq_ready \- returns number of unconsumed ready entries in the CQ ring
 .SH SYNOPSIS
@@ -15,7 +15,7 @@
 .PP
 The
 .BR io_uring_cq_ready (3)
-function retuns the number of unconsumed entries that are ready belonging to the
+function returns the number of unconsumed entries that are ready belonging to the
 .I ring
 param.
 

diff --git a/man/io_uring_cqe_get_data.3 b/man/io_uring_cqe_get_data.3
index 4cbb32c..a4d2988 100644
--- a/man/io_uring_cqe_get_data.3
+++ b/man/io_uring_cqe_get_data.3

@@ -46,7 +46,7 @@
 If the
 .I user_data
 value has been set before submitting the request, it will be returned.
-Otherwise the functions returns NULL.
+Otherwise, the return value is undefined.
 .SH SEE ALSO
 .BR io_uring_get_sqe (3),
 .BR io_uring_sqe_set_data (3),

diff --git a/man/io_uring_cqe_seen.3 b/man/io_uring_cqe_seen.3
index d2f2984..5c9cd4e 100644
--- a/man/io_uring_cqe_seen.3
+++ b/man/io_uring_cqe_seen.3

@@ -35,8 +35,7 @@
 None
 .SH SEE ALSO
 .BR io_uring_submit (3),
-.BR io_uring_wait_cqe (3),
 .BR io_uring_peek_cqe (3),
+.BR io_uring_wait_cqe (3),
 .BR io_uring_wait_cqes (3),
-.BR io_uring_wait_cqe_timeout (3),
-.BR io_uring_cqe_seen (3)
+.BR io_uring_wait_cqe_timeout (3)

diff --git a/man/io_uring_enable_rings.3 b/man/io_uring_enable_rings.3
new file mode 100644
index 0000000..a01713b
--- /dev/null
+++ b/man/io_uring_enable_rings.3

@@ -0,0 +1,40 @@
+.\" Copyright (C) 2023 nick black <[email protected]>
+.\"
+.\" SPDX-License-Identifier: LGPL-2.0-or-later
+.\"
+.TH io_uring_enable_rings 3 "July 26, 2024" "liburing-2.7" "liburing Manual"
+.SH NAME
+io_uring_enable_rings \- enable a disabled ring
+.SH SYNOPSIS
+.nf
+.B #include <liburing.h>
+.PP
+.BI "int io_uring_enable_rings(struct io_uring *" ring ");"
+.fi
+.SH DESCRIPTION
+.PP
+The
+.BR io_uring_enable_rings (3)
+function enables a ring after having created it with the
+.B IORING_SETUP_R_DISABLED
+flag to
+.BR io_uring_queue_init (3)
+
+It is not possible to submit work to such a ring until this
+function has been successfully called.
+
+.SH RETURN VALUE
+.BR io_uring_enable_rings (3)
+returns 0 on success. It otherwise returns a negative error code.
+It does not write to
+.BR errno .
+
+.SH ERRORS
+.TP
+.B EBADFD
+The ring was not disabled.
+
+.SH SEE ALSO
+.BR io_uring_queue_init (3),
+.BR io_uring_register (2),
+.BR io_uring_setup (2)

diff --git a/man/io_uring_enter.2 b/man/io_uring_enter.2
index 3c04541..6ccbeb4 100644
--- a/man/io_uring_enter.2
+++ b/man/io_uring_enter.2

@@ -3,27 +3,31 @@
 .\"
 .\" SPDX-License-Identifier: LGPL-2.0-or-later
 .\"
-.TH IO_URING_ENTER 2 2019-01-22 "Linux" "Linux Programmer's Manual"
+.TH io_uring_enter 2 2019-01-22 "Linux" "Linux Programmer's Manual"
 .SH NAME
 io_uring_enter \- initiate and/or complete asynchronous I/O
 .SH SYNOPSIS
 .nf
-.BR "#include <linux/io_uring.h>"
+.BR "#include <liburing.h>"
 .PP
 .BI "int io_uring_enter(unsigned int " fd ", unsigned int " to_submit ,
 .BI "                   unsigned int " min_complete ", unsigned int " flags ,
 .BI "                   sigset_t *" sig );
+.PP
+.BI "int io_uring_enter2(unsigned int " fd ", unsigned int " to_submit ,
+.BI "                    unsigned int " min_complete ", unsigned int " flags ,
+.BI "                    sigset_t *" sig ", size_t " sz );
 .fi
 .PP
 .SH DESCRIPTION
 .PP
-.BR io_uring_enter ()
+.BR io_uring_enter (2)
 is used to initiate and complete I/O using the shared submission and
 completion queues setup by a call to
 .BR io_uring_setup (2).
 A single call can both submit new I/O and wait for completions of I/O
 initiated by this call or previous calls to
-.BR io_uring_enter ().
+.BR io_uring_enter (2).
 
 .I fd
 is the file descriptor returned by
@@ -34,12 +38,23 @@
 is a bitmask of the following values:
 .TP
 .B IORING_ENTER_GETEVENTS
-If this flag is set, then the system call will wait for the specificied
+If this flag is set, then the system call will wait for the specified
 number of events in
 .I min_complete
 before returning. This flag can be set along with
 .I to_submit
 to both submit and complete events in a single system call.
+If this flag is set either the flag
+.B IORING_SETUP_DEFER_TASKRUN
+must not be set or the thread issuing the syscall must be the thread that
+created the io_uring associated with
+.I fd,
+or be the thread that enabled the ring originally created with
+.B IORING_SETUP_R_DISABLED
+via
+.BR io_uring_register (2)
+or
+.BR io_uring_enable_rings (3).
 .TP
 .B IORING_ENTER_SQ_WAKEUP
 If the ring has been created with
@@ -52,7 +67,7 @@
 then the application has no real insight into when the SQ kernel thread has
 consumed entries from the SQ ring. This can lead to a situation where the
 application can no longer get a free SQE entry to submit, without knowing
-when it one becomes available as the SQ kernel thread consumes them. If
+when one will become available as the SQ kernel thread consumes them. If
 the system call is used with this flag set, then it will wait until at least
 one entry is free in the SQ ring.
 .TP
@@ -61,12 +76,12 @@
 the following:
 
 .nf
-.BI "int io_uring_enter(unsigned int " fd ", unsigned int " to_submit ,
-.BI "                   unsigned int " min_complete ", unsigned int " flags ,
-.BI "                   const void *" arg ", size_t " argsz );
+.BI "int io_uring_enter2(unsigned int " fd ", unsigned int " to_submit ,
+.BI "                    unsigned int " min_complete ", unsigned int " flags ,
+.BI "                    const void *" arg ", size_t " argsz );
 .fi
 
-which is behaves just like the original definition by default. However, if
+which behaves just like the original definition by default. However, if
 .B IORING_ENTER_EXT_ARG
 is set, then instead of a
 .I sigset_t
@@ -77,7 +92,7 @@
 must be set to the size of this structure. The definition is as follows:
 
 .nf
-.BI "struct io_uring_getevents_args {
+.BI "struct io_uring_getevents_arg {
 .BI "        __u64   sigmask;
 .BI "        __u32   sigmask_sz;
 .BI "        __u32   pad;
@@ -96,11 +111,23 @@
 .TP
 .B IORING_ENTER_REGISTERED_RING
 If the ring file descriptor has been registered through use of
-.B IORING_REGISTER_RING_FDS,
+.BR IORING_REGISTER_RING_FDS ,
 then setting this flag will tell the kernel that the
 .I ring_fd
 passed in is the registered ring offset rather than a normal file descriptor.
 
+.TP
+.B IORING_ENTER_ABS_TIMER
+
+When this flag is set, the timeout argument passed in
+.I struct io_uring_getevents_arg
+will be interpreted as an absolute
+time of the registered clock (see
+.BR IORING_REGISTER_CLOCK)
+until which the waiting should end.
+
+Available since 6.12
+
 .PP
 .PP
 If the io_uring instance was configured for polling, by specifying
@@ -137,12 +164,12 @@
 if
 .I sig
 is not NULL,
-.BR io_uring_enter ()
+.BR io_uring_enter (2)
 first replaces the current signal mask by the one pointed to by
 .IR sig ,
 then waits for events to become available in the completion queue, and
 then restores the original signal mask.  The following
-.BR io_uring_enter ()
+.BR io_uring_enter (2)
 call:
 .PP
 .in +4n
@@ -178,56 +205,83 @@
  * IO submission data structure (Submission Queue Entry)
  */
 struct io_uring_sqe {
-    __u8    opcode;         /* type of operation for this sqe */
-    __u8    flags;          /* IOSQE_ flags */
-    __u16   ioprio;         /* ioprio for the request */
-    __s32   fd;             /* file descriptor to do IO on */
-    union {
-        __u64   off;            /* offset into file */
-        __u64   addr2;
-    };
-    union {
-        __u64   addr;       /* pointer to buffer or iovecs */
-        __u64   splice_off_in;
-    }
-    __u32   len;            /* buffer size or number of iovecs */
-    union {
-        __kernel_rwf_t  rw_flags;
-        __u32    fsync_flags;
-        __u16    poll_events;   /* compatibility */
-        __u32    poll32_events; /* word-reversed for BE */
-        __u32    sync_range_flags;
-        __u32    msg_flags;
-        __u32    timeout_flags;
-        __u32    accept_flags;
-        __u32    cancel_flags;
-        __u32    open_flags;
-        __u32    statx_flags;
-        __u32    fadvise_advice;
-        __u32    splice_flags;
-        __u32    rename_flags;
-        __u32    unlink_flags;
-        __u32    hardlink_flags;
-    };
-    __u64    user_data;     /* data to be passed back at completion time */
-    union {
-    struct {
-        /* index into fixed buffers, if used */
-            union {
-                /* index into fixed buffers, if used */
-                __u16    buf_index;
-                /* for grouped buffer selection */
-                __u16    buf_group;
-            }
-        /* personality to use, if used */
-        __u16    personality;
-        union {
-            __s32    splice_fd_in;
-            __u32    file_index;
+	__u8	opcode;		/* type of operation for this sqe */
+	__u8	flags;		/* IOSQE_ flags */
+	__u16	ioprio;		/* ioprio for the request */
+	__s32	fd;		/* file descriptor to do IO on */
+	union {
+		__u64	off;	/* offset into file */
+		__u64	addr2;
+		struct {
+			__u32	cmd_op;
+			__u32	__pad1;
+		};
 	};
-    };
-    __u64    __pad2[3];
-    };
+	union {
+		__u64	addr;	/* pointer to buffer or iovecs */
+		__u64	splice_off_in;
+		struct {
+			__u32	level;
+			__u32	optname;
+		};
+	};
+	__u32	len;		/* buffer size or number of iovecs */
+	union {
+		__kernel_rwf_t	rw_flags;
+		__u32		fsync_flags;
+		__u16		poll_events;	/* compatibility */
+		__u32		poll32_events;	/* word-reversed for BE */
+		__u32		sync_range_flags;
+		__u32		msg_flags;
+		__u32		timeout_flags;
+		__u32		accept_flags;
+		__u32		cancel_flags;
+		__u32		open_flags;
+		__u32		statx_flags;
+		__u32		fadvise_advice;
+		__u32		splice_flags;
+		__u32		rename_flags;
+		__u32		unlink_flags;
+		__u32		hardlink_flags;
+		__u32		xattr_flags;
+		__u32		msg_ring_flags;
+		__u32		uring_cmd_flags;
+		__u32		waitid_flags;
+		__u32		futex_flags;
+		__u32		install_fd_flags;
+		__u32		nop_flags;
+	};
+	__u64	user_data;	/* data to be passed back at completion time */
+	/* pack this to avoid bogus arm OABI complaints */
+	union {
+		/* index into fixed buffers, if used */
+		__u16	buf_index;
+		/* for grouped buffer selection */
+		__u16	buf_group;
+	} __attribute__((packed));
+	/* personality to use, if used */
+	__u16	personality;
+	union {
+		__s32	splice_fd_in;
+		__u32	file_index;
+		__u32	optlen;
+		struct {
+			__u16	addr_len;
+			__u16	__pad3[1];
+		};
+	};
+	union {
+		struct {
+			__u64	addr3;
+			__u64	__pad2[1];
+		};
+		__u64	optval;
+		/*
+		 * If the ring is initialized with IORING_SETUP_SQE128, then
+		 * this field is used for 80 bytes of arbitrary command data
+		 */
+		__u8	cmd[0];
+	};
 };
 .EE
 .in
@@ -249,7 +303,7 @@
 .BR pwritev2 (2).
 If the file is not seekable,
 .I off
-must be set to zero.
+must be set to zero or -1.
 
 .TP
 .B IORING_OP_READ_FIXED
@@ -263,6 +317,12 @@
 .B IORING_OP_FSYNC
 File sync.  See also
 .BR fsync (2).
+Optionally
+.I off
+and
+.I len
+can be used to specify a range within the file to be synced rather than
+syncing the entire file, which is the default behavior.
 Note that, while I/O is initiated in the order in which it appears in
 the submission queue, completions are unordered.  For example, an
 application which places a write I/O followed by an fsync in the
@@ -270,6 +330,10 @@
 two operations execute in parallel, so the fsync may complete before
 the write is issued to the storage.  The same is also true for
 previously issued writes that have not completed prior to the fsync.
+To enforce ordering one may utilize linked SQEs,
+.B IOSQE_IO_DRAIN
+or wait for the arrival of CQEs of requests which have to be ordered
+before a given request before submitting its SQE.
 
 .TP
 .B IORING_OP_POLL_ADD
@@ -298,41 +362,24 @@
 request has been terminated and no further events will be generated. This mode
 is available since 5.13.
 
-If
-.B IORING_POLL_UPDATE_EVENTS
-is set in the SQE
-.I len
-field, then the request will update an existing poll request with the mask of
-events passed in with this request. The lookup is based on the
-.I user_data
-field of the original SQE submitted, and this values is passed in the
-.I addr
-field of the SQE. This mode is available since 5.13.
-
-If
-.B IORING_POLL_UPDATE_USER_DATA
-is set in the SQE
-.I len
-field, then the request will update the
-.I user_data
-of an existing poll request based on the value passed in the
-.I off
-field. This mode is available since 5.13.
-
 This command works like
 an async
 .BR poll(2)
-and the completion event result is the returned mask of events. For the
-variants that update
-.I user_data
-or
-.I events
-, the completion result will be similar to
-.B IORING_OP_POLL_REMOVE.
+and the completion event result is the returned mask of events.
+
+Without
+.B IORING_POLL_ADD_MULTI
+and the initial poll operation with
+.B IORING_POLL_ADD_MULTI
+the operation is level triggered, i.e. if there is data ready or events
+pending etc. at the time of submission a corresponding CQE will be posted.
+Potential further completions beyond the first caused by a
+.B IORING_POLL_ADD_MULTI
+are edge triggered.
 
 .TP
 .B IORING_OP_POLL_REMOVE
-Remove an existing poll request.  If found, the
+Remove or update an existing poll request.  If found, the
 .I res
 field of the
 .I "struct io_uring_cqe"
@@ -344,6 +391,26 @@
 .B -EALREADY
 if the poll request was in the process of completing already.
 
+If
+.B IORING_POLL_UPDATE_EVENTS
+is set in the SQE
+.I len
+field, then the request will update an existing poll request with the mask of
+events passed in with this request. The lookup is based on the
+.I user_data
+field of the original SQE submitted, and this values is passed in the
+.I addr
+field of the SQE.
+If
+.B IORING_POLL_UPDATE_USER_DATA
+is set in the SQE
+.I len
+field, then the request will update the
+.I user_data
+of an existing poll request based on the value passed in the
+.I off
+field. Updating an existing poll is available since 5.13.
+
 .TP
 .B IORING_OP_EPOLL_CTL
 Add, remove or modify entries in the interest list of
@@ -353,13 +420,17 @@
 for details of the system call.
 .I fd
 holds the file descriptor that represents the epoll instance,
-.I addr
+.I off
 holds the file descriptor to add, remove or modify,
 .I len
-holds the operation (EPOLL_CTL_ADD, EPOLL_CTL_DEL, EPOLL_CTL_MOD) to perform and,
-.I off
+holds the operation (
+.BR EPOLL_CTL_ADD ,
+.BR EPOLL_CTL_DEL ,
+.BR EPOLL_CTL_MOD )
+to perform and,
+.I addr
 holds a pointer to the
-.I epoll_events
+.I epoll_event
 structure. Available since 5.6.
 
 .TP
@@ -390,12 +461,46 @@
 .BR sendmsg (2)
 for the general description of the related system call. Available since 5.3.
 
+This command also supports the following modifiers in
+.I ioprio:
+
+.PP
+.in +12
+.B IORING_RECVSEND_POLL_FIRST
+If set, io_uring will assume the socket is currently full and attempting to
+send data will be unsuccessful. For this case, io_uring will arm internal
+poll and trigger a send of the data when there is enough space available.
+This initial send attempt can be wasteful for the case where the socket
+is expected to be full, setting this flag will bypass the initial send
+attempt and go straight to arming poll. If poll does indicate that data can
+be sent, the operation will proceed.
+.EE
+.in
+.PP
+
 .TP
 .B IORING_OP_RECVMSG
 Works just like IORING_OP_SENDMSG, except for
 .BR recvmsg(2)
 instead. See the description of IORING_OP_SENDMSG. Available since 5.3.
 
+This command also supports the following modifiers in
+.I ioprio:
+
+.PP
+.in +12
+.B IORING_RECVSEND_POLL_FIRST
+If set, io_uring will assume the socket is currently empty and attempting to
+receive data will be unsuccessful. For this case, io_uring will arm internal
+poll and trigger a receive of the data when the socket has data to be read.
+This initial receive attempt can be wasteful for the case where the socket
+is expected to be empty, setting this flag will bypass the initial receive
+attempt and go straight to arming poll. If poll does indicate that data is
+ready to be received, the operation will proceed.
+.EE
+.in
+.PP
+
 .TP
 .B IORING_OP_SEND
 Issue the equivalent of a
@@ -412,21 +517,56 @@
 .BR send(2)
 for the general description of the related system call. Available since 5.6.
 
+This command also supports the following modifiers in
+.I ioprio:
+
+.PP
+.in +12
+.B IORING_RECVSEND_POLL_FIRST
+If set, io_uring will assume the socket is currently full and attempting to
+send data will be unsuccessful. For this case, io_uring will arm internal
+poll and trigger a send of the data when there is enough space available.
+This initial send attempt can be wasteful for the case where the socket
+is expected to be full, setting this flag will bypass the initial send
+attempt and go straight to arming poll. If poll does indicate that data can
+be sent, the operation will proceed.
+.EE
+.in
+.PP
+
 .TP
 .B IORING_OP_RECV
 Works just like IORING_OP_SEND, except for
 .BR recv(2)
 instead. See the description of IORING_OP_SEND. Available since 5.6.
 
+This command also supports the following modifiers in
+.I ioprio:
+
+.PP
+.in +12
+.B IORING_RECVSEND_POLL_FIRST
+If set, io_uring will assume the socket is currently empty and attempting to
+receive data will be unsuccessful. For this case, io_uring will arm internal
+poll and trigger a receive of the data when the socket has data to be read.
+This initial receive attempt can be wasteful for the case where the socket
+is expected to be empty, setting this flag will bypass the initial receive
+attempt and go straight to arming poll. If poll does indicate that data is
+ready to be received, the operation will proceed.
+.EE
+.in
+.PP
+
 .TP
 .B IORING_OP_TIMEOUT
 This command will register a timeout operation. The
 .I addr
-field must contain a pointer to a struct timespec64 structure,
+field must contain a pointer to a struct __kernel_timespec structure,
 .I len
-must contain 1 to signify one timespec64 structure,
+must contain 1 to signify one __kernel_timespec structure,
 .I timeout_flags
-may contain IORING_TIMEOUT_ABS
+may contain
+.B IORING_TIMEOUT_ABS
 for an absolute timeout value, or 0 for a relative timeout.
 .I off
 may contain a completion event count. A timeout
@@ -436,13 +576,13 @@
 trigger the event. If set to 0, completed events are not counted, which
 effectively acts like a timer. io_uring timeouts use the
 .B CLOCK_MONOTONIC
-clock source. The request will complete with
-.I -ETIME
+as the default clock source. The request will complete with
+.B -ETIME
 if the timeout got completed through expiration of the timer, or
 .I 0
 if the timeout got completed through requests completing on their own. If
 the timeout was canceled before it expired, the request will complete with
-.I -ECANCELED.
+.B -ECANCELED.
 Available since 5.4.
 
 Since 5.15, this command also supports the following modifiers in
@@ -452,26 +592,62 @@
 .in +12
 .B IORING_TIMEOUT_BOOTTIME
 If set, then the clocksource used is
-.I CLOCK_BOOTTIME
+.B CLOCK_BOOTTIME
 instead of
-.I CLOCK_MONOTONIC.
+.BR CLOCK_MONOTONIC .
 This clocksource differs in that it includes time elapsed if the system was
 suspend while having a timeout request in-flight.
 
 .B IORING_TIMEOUT_REALTIME
 If set, then the clocksource used is
-.I CLOCK_BOOTTIME
+.B CLOCK_REALTIME
 instead of
-.I CLOCK_MONOTONIC.
+.BR CLOCK_MONOTONIC .
 .EE
 .in
 .PP
 
+.PP
+.in +7
+Since 5.16,
+.B IORING_TIMEOUT_ETIME_SUCCESS
+can be set in
+.IR timeout_flags ,
+which will result in the expiration of the timer and subsequent completion
+with
+.B -ETIME
+not being interpreted as an error. This is mostly relevant for linked SQEs, as
+subsequent requests in the chain would not get canceled by the timeout, if
+this flag is set. See
+.B IOSQE_IO_LINK
+for more details on linked SQEs.
+.in
+.PP
+
+.PP
+.in +7
+Since 6.4,
+.B IORING_TIMEOUT_MULTISHOT
+can be set in
+.IR timeout_flags ,
+which will result in the timer producing multiple consecutive completions
+like other multi shot operations e.g.
+.B IORING_OP_READ_MULTISHOT
+or
+.BR IORING_POLL_ADD_MULTI .
+.I off
+must be set to the amount of desired completions.
+.B IORING_TIMEOUT_MULTISHOT
+must not be used with
+.BR IORING_TIMEOUT_ABS .
+.in
+.PP
+
 .TP
 .B IORING_OP_TIMEOUT_REMOVE
 If
-.I timeout_flags are zero, then it attempts to remove an existing timeout
-operation.
+.I timeout_flags
+are zero, then it attempts to remove an existing timeout operation.
 .I addr
 must contain the
 .I user_data
@@ -481,21 +657,21 @@
 .I 0
 If the timeout request was found but expiration was already in progress,
 this request will terminate with a result value of
-.I -EBUSY
+.B -EBUSY
 If the timeout request wasn't found, the request will terminate with a result
 value of
-.I -ENOENT
+.B -ENOENT
 Available since 5.5.
 
 If
 .I timeout_flags
 contain
-.I IORING_TIMEOUT_UPDATE,
+.BR IORING_TIMEOUT_UPDATE ,
 instead of removing an existing operation, it updates it.
 .I addr
 and return values are same as before.
 .I addr2
-field must contain a pointer to a struct timespec64 structure.
+field must contain a pointer to a struct __kernel_timespec structure.
 .I timeout_flags
 may also contain IORING_TIMEOUT_ABS, in which case the value given is an
 absolute one, not a relative one.
@@ -504,7 +680,7 @@
 .TP
 .B IORING_OP_ACCEPT
 Issue the equivalent of an
-.BR accept4(2)
+.BR accept4 (2)
 system call.
 .I fd
 must be set to the socket file descriptor,
@@ -515,14 +691,15 @@
 the
 .I accept_flags
 field. See also
-.BR accept4(2)
+.BR accept4 (2)
 for the general description of the related system call. Available since 5.5.
 
 If the
 .I file_index
 field is set to a positive number, the file won't be installed into the
 normal file table as usual but will be placed into the fixed file table at index
-.I file_index - 1.
+.I file_index
+- 1.
 In this case, instead of returning a file descriptor, the result will contain
 either 0 on success or an error. If the index points to a valid empty slot, the
 installation is guaranteed to not fail. If there is already a file in the slot,
@@ -532,7 +709,7 @@
 can use them. See
 .B IOSQE_FIXED_FILE
 and
-.B IORING_REGISTER_FILES.
+.BR IORING_REGISTER_FILES .
 
 Available since 5.5.
 
@@ -547,9 +724,13 @@
 .I res
 field of the cqe will contain 0. If not found,
 .I res
-will contain -ENOENT. If found and attempted canceled, the
+will contain
+.BR -ENOENT .
+If found and attempted canceled, the
 .I res
-field will contain -EALREADY. In this case, the request may or may not
+field will contain
+.BR -EALREADY .
+In this case, the request may or may not
 terminate. In general, requests that are interruptible (like socket IO) will
 get canceled, while disk IO requests cannot be canceled if already started.
 Available since 5.5.
@@ -557,20 +738,20 @@
 .TP
 .B IORING_OP_LINK_TIMEOUT
 This request must be linked with another request through
-.I IOSQE_IO_LINK
+.B IOSQE_IO_LINK
 which is described below. Unlike
-.I IORING_OP_TIMEOUT,
-.I IORING_OP_LINK_TIMEOUT
+.BR IORING_OP_TIMEOUT ,
+.B IORING_OP_LINK_TIMEOUT
 acts on the linked request, not the completion queue. The format of the command
 is otherwise like
-.I IORING_OP_TIMEOUT,
+.BR IORING_OP_TIMEOUT ,
 except there's no completion event count as it's tied to a specific request.
 If used, the timeout specified in the command will cancel the linked command,
 unless the linked command completes before the timeout. The timeout will
 complete with
-.I -ETIME
+.B -ETIME
 if the timer expired and the linked request was attempted canceled, or
-.I -ECANCELED
+.B -ECANCELED
 if the timer got canceled because of completion of the linked request. Like
 .B IORING_OP_TIMEOUT
 the clock source used is
@@ -581,7 +762,7 @@
 .TP
 .B IORING_OP_CONNECT
 Issue the equivalent of a
-.BR connect(2)
+.BR connect (2)
 system call.
 .I fd
 must be set to the socket file descriptor,
@@ -589,13 +770,13 @@
 must contain the const pointer to the sockaddr structure, and
 .I off
 must contain the socklen_t addrlen field. See also
-.BR connect(2)
+.BR connect (2)
 for the general description of the related system call. Available since 5.5.
 
 .TP
 .B IORING_OP_FALLOCATE
 Issue the equivalent of a
-.BR fallocate(2)
+.BR fallocate (2)
 system call.
 .I fd
 must be set to the file descriptor,
@@ -605,13 +786,13 @@
 must contain the offset on which to operate, and
 .I addr
 must contain the length. See also
-.BR fallocate(2)
+.BR fallocate (2)
 for the general description of the related system call. Available since 5.6.
 
 .TP
 .B IORING_OP_FADVISE
 Issue the equivalent of a
-.BR posix_fadvise(2)
+.BR posix_fadvise (2)
 system call.
 .I fd
 must be set to the file descriptor,
@@ -621,13 +802,13 @@
 must contain the length, and
 .I fadvise_advice
 must contain the advice associated with the operation. See also
-.BR posix_fadvise(2)
+.BR posix_fadvise (2)
 for the general description of the related system call. Available since 5.6.
 
 .TP
 .B IORING_OP_MADVISE
 Issue the equivalent of a
-.BR madvise(2)
+.BR madvise (2)
 system call.
 .I addr
 must contain the address to operate on,
@@ -636,13 +817,13 @@
 and
 .I fadvise_advice
 must contain the advice associated with the operation. See also
-.BR madvise(2)
+.BR madvise (2)
 for the general description of the related system call. Available since 5.6.
 
 .TP
 .B IORING_OP_OPENAT
 Issue the equivalent of a
-.BR openat(2)
+.BR openat (2)
 system call.
 .I fd
 is the
@@ -656,7 +837,7 @@
 should contain any flags passed in, and
 .I len
 is access mode of the file. See also
-.BR openat(2)
+.BR openat (2)
 for the general description of the related system call. Available since 5.6.
 
 If the
@@ -673,14 +854,14 @@
 can use them. See
 .B IOSQE_FIXED_FILE
 and
-.B IORING_REGISTER_FILES.
+.BR IORING_REGISTER_FILES .
 
 Available since 5.15.
 
 .TP
 .B IORING_OP_OPENAT2
 Issue the equivalent of a
-.BR openat2(2)
+.BR openat2 (2)
 system call.
 .I fd
 is the
@@ -694,7 +875,7 @@
 should contain the size of the open_how structure, and
 .I off
 should be set to the address of the open_how structure. See also
-.BR openat2(2)
+.BR openat2 (2)
 for the general description of the related system call. Available since 5.6.
 
 If the
@@ -706,32 +887,31 @@
 either 0 on success or an error. If the index points to a valid empty slot, the
 installation is guaranteed to not fail. If there is already a file in the slot,
 it will be replaced, similar to
-.B IORING_OP_FILES_UPDATE.
+.BR IORING_OP_FILES_UPDATE .
 Please note that only io_uring has access to such files and no other syscall
 can use them. See
 .B IOSQE_FIXED_FILE
 and
-.B IORING_REGISTER_FILES.
+.BR IORING_REGISTER_FILES .
 
 Available since 5.15.
 
 .TP
 .B IORING_OP_CLOSE
 Issue the equivalent of a
-.BR close(2)
+.BR close (2)
 system call.
 .I fd
 is the file descriptor to be closed. See also
-.BR close(2)
+.BR close (2)
 for the general description of the related system call. Available since 5.6.
 If the
 .I file_index
 field is set to a positive number, this command can be used to close files
 that were direct opened through
-.B IORING_OP_OPENAT
-,
-.B IORING_OP_OPENAT2
-, or
+.BR IORING_OP_OPENAT ,
+.BR IORING_OP_OPENAT2 ,
+or
 .B IORING_OP_ACCEPT
 using the io_uring specific direct descriptors. Note that only one of the
 descriptor fields may be set. The direct close feature is available since
@@ -740,7 +920,7 @@
 .TP
 .B IORING_OP_STATX
 Issue the equivalent of a
-.BR statx(2)
+.BR statx (2)
 system call.
 .I fd
 is the
@@ -762,7 +942,7 @@
 must contain a pointer to the
 .I statxbuf
 to be filled in. See also
-.BR statx(2)
+.BR statx (2)
 for the general description of the related system call. Available since 5.6.
 
 .TP
@@ -770,9 +950,9 @@
 .TP
 .B IORING_OP_WRITE
 Issue the equivalent of a
-.BR pread(2)
+.BR pread (2)
 or
-.BR pwrite(2)
+.BR pwrite (2)
 system call.
 .I fd
 is the file descriptor to be operated on,
@@ -785,28 +965,28 @@
 .I fd
 does not refer to a seekable file,
 .I off
-must be set to zero. If
+must be set to zero or -1. If
 .I offs
 is set to
 .B -1
 , the offset will use (and advance) the file position, like the
-.BR read(2)
+.BR read (2)
 and
-.BR write(2)
+.BR write (2)
 system calls. These are non-vectored versions of the
 .B IORING_OP_READV
 and
 .B IORING_OP_WRITEV
 opcodes. See also
-.BR read(2)
+.BR read (2)
 and
-.BR write(2)
+.BR write (2)
 for the general description of the related system call. Available since 5.6.
 
 .TP
 .B IORING_OP_SPLICE
 Issue the equivalent of a
-.BR splice(2)
+.BR splice (2)
 system call.
 .I splice_fd_in
 is the file descriptor to read from,
@@ -818,20 +998,20 @@
 is an offset from which to start writing to. A sentinel value of
 .B -1
 is used to pass the equivalent of a NULL for the offsets to
-.BR splice(2).
+.BR splice (2).
 .I len
 contains the number of bytes to copy.
 .I splice_flags
 contains a bit mask for the flag field associated with the system call.
 Please note that one of the file descriptors must refer to a pipe.
 See also
-.BR splice(2)
+.BR splice (2)
 for the general description of the related system call. Available since 5.7.
 
 .TP
 .B IORING_OP_TEE
 Issue the equivalent of a
-.BR tee(2)
+.BR tee (2)
 system call.
 .I splice_fd_in
 is the file descriptor to read from,
@@ -843,7 +1023,7 @@
 contains a bit mask for the flag field associated with the system call.
 Please note that both of the file descriptors must refer to a pipe.
 See also
-.BR tee(2)
+.BR tee (2)
 for the general description of the related system call. Available since 5.8.
 
 .TP
@@ -905,7 +1085,7 @@
 .TP
 .B IORING_OP_REMOVE_BUFFERS
 Remove buffers previously registered with
-.B IORING_OP_PROVIDE_BUFFERS.
+.BR IORING_OP_PROVIDE_BUFFERS .
 .I fd
 must contain the number of buffers to remove, and
 .I buf_group
@@ -915,7 +1095,7 @@
 .TP
 .B IORING_OP_SHUTDOWN
 Issue the equivalent of a
-.BR shutdown(2)
+.BR shutdown (2)
 system call.
 .I fd
 is the file descriptor to the socket being shutdown, and
@@ -927,77 +1107,77 @@
 .TP
 .B IORING_OP_RENAMEAT
 Issue the equivalent of a
-.BR renameat2(2)
+.BR renameat2 (2)
 system call.
 .I fd
 should be set to the
-.I olddirfd,
+.IR olddirfd ,
 .I addr
 should be set to the
-.I oldpath,
+.IR oldpath ,
 .I len
 should be set to the
-.I newdirfd,
+.IR newdirfd ,
 .I addr
 should be set to the
-.I oldpath,
+.IR oldpath ,
 .I addr2
 should be set to the
-.I newpath,
+.IR newpath ,
 and finally
 .I rename_flags
 should be set to the
 .I flags
 passed in to
-.BR renameat2(2).
+.BR renameat2 (2).
 Available since 5.11.
 
 .TP
 .B IORING_OP_UNLINKAT
 Issue the equivalent of a
-.BR unlinkat2(2)
+.BR unlinkat (2)
 system call.
 .I fd
 should be set to the
-.I dirfd,
+.IR dirfd ,
 .I addr
 should be set to the
-.I pathname,
+.IR pathname ,
 and
 .I unlink_flags
 should be set to the
 .I flags
 being passed in to
-.BR unlinkat(2).
+.BR unlinkat (2).
 Available since 5.11.
 
 .TP
 .B IORING_OP_MKDIRAT
 Issue the equivalent of a
-.BR mkdirat2(2)
+.BR mkdirat (2)
 system call.
 .I fd
 should be set to the
-.I dirfd,
+.IR dirfd ,
 .I addr
 should be set to the
-.I pathname,
+.IR pathname ,
 and
 .I len
 should be set to the
 .I mode
 being passed in to
-.BR mkdirat(2).
+.BR mkdirat (2).
 Available since 5.15.
 
 .TP
 .B IORING_OP_SYMLINKAT
 Issue the equivalent of a
-.BR symlinkat2(2)
+.BR symlinkat (2)
 system call.
 .I fd
 should be set to the
-.I newdirfd,
+.IR newdirfd ,
 .I addr
 should be set to the
 .I target
@@ -1006,32 +1186,32 @@
 should be set to the
 .I linkpath
 being passed in to
-.BR symlinkat(2).
+.BR symlinkat (2).
 Available since 5.15.
 
 .TP
 .B IORING_OP_LINKAT
 Issue the equivalent of a
-.BR linkat2(2)
+.BR linkat (2)
 system call.
 .I fd
 should be set to the
-.I olddirfd,
+.IR olddirfd ,
 .I addr
 should be set to the
-.I oldpath,
+.IR oldpath ,
 .I len
 should be set to the
-.I newdirfd,
+.IR newdirfd ,
 .I addr2
 should be set to the
-.I newpath,
+.IR newpath ,
 and
 .I hardlink_flags
 should be set to the
 .I flags
 being passed in to
-.BR linkat(2).
+.BR linkat (2).
 Available since 5.15.
 
 .TP
@@ -1052,9 +1232,346 @@
 field matching the
 .I off
 value being passed in. This request type can be used to either just wake or
-interrupt anyone waiting for completions on the target ring, ot it can be used
+interrupt anyone waiting for completions on the target ring, or it can be used
 to pass messages via the two fields. Available since 5.18.
 
+.TP
+.B IORING_OP_SOCKET
+Issue the equivalent of a
+.BR socket (2)
+system call.
+.I fd
+must contain the communication domain,
+.I off
+must contain the communication type,
+.I len
+must contain the protocol, and
+.I rw_flags
+is currently unused and must be set to zero. See also
+.BR socket (2)
+for the general description of the related system call. Available since 5.19.
+
+If the
+.I file_index
+field is set to a positive number, the file won't be installed into the
+normal file table as usual but will be placed into the fixed file table at index
+.I file_index
+- 1.
+In this case, instead of returning a file descriptor, the result will contain
+either 0 on success or an error. If the index points to a valid empty slot, the
+installation is guaranteed to not fail. If there is already a file in the slot,
+it will be replaced, similar to
+.BR IORING_OP_FILES_UPDATE .
+Please note that only io_uring has access to such files and no other syscall
+can use them. See
+.B IOSQE_FIXED_FILE
+and
+.BR IORING_REGISTER_FILES .
+
+Available since 5.19.
+
+.TP
+.B IORING_OP_URING_CMD
+Issues an asynchronous, per-file private operation, similar to
+.BR ioctl (2).
+Further information may be found in the dedicated man page of
+.BR IORING_OP_URING_CMD .
+
+Available since 5.19.
+
+.TP
+.B IORING_OP_SEND_ZC
+Issue the zerocopy equivalent of a
+.BR send(2)
+system call. Similar to
+.BR IORING_OP_SEND ,
+but tries to avoid making intermediate
+copies of data. Zerocopy execution is not guaranteed and may fall back to
+copying. The request may also fail with
+.BR -EOPNOTSUPP ,
+when a protocol doesn't support zerocopy, in which case users are recommended
+to use copying sends instead.
+
+The
+.I flags
+field of the first
+.I "struct io_uring_cqe"
+may likely contain
+.BR IORING_CQE_F_MORE ,
+which means that there will be a second completion event / notification for
+the request, with the
+.I user_data
+field set to the same value. The user must not modify the data buffer until the
+notification is posted. The first cqe follows the usual rules and so its
+.I res
+field will contain the number of bytes sent or a negative error code. The
+notification's
+.I res
+field will be set to zero and the
+.I flags
+field will contain
+.BR IORING_CQE_F_NOTIF .
+The two step model is needed because the kernel may hold on to buffers for a
+long time, e.g. waiting for a TCP ACK, and having a separate cqe for request
+completions allows userspace to push more data without extra delays. Note,
+notifications are only responsible for controlling the lifetime of the buffers,
+and as such don't mean anything about whether the data has atually been sent
+out or received by the other end. Even errored requests may generate a
+notification, and the user must check for
+.B IORING_CQE_F_MORE
+rather than relying on the result.
+
+.I fd
+must be set to the socket file descriptor,
+.I addr
+must contain a pointer to the buffer,
+.I len
+denotes the length of the buffer to send, and
+.I msg_flags
+holds the flags associated with the system call. When
+.I addr2
+is non-zero it points to the address of the target with
+.I addr_len
+specifying its size, turning the request into a
+.BR sendto (2)
+system call equivalent.
+
+Available since 6.0.
+
+This command also supports the following modifiers in
+.I ioprio:
+
+.PP
+.in +12
+.B IORING_RECVSEND_POLL_FIRST
+If set, io_uring will assume the socket is currently full and attempting to
+send data will be unsuccessful. For this case, io_uring will arm internal
+poll and trigger a send of the data when there is enough space available.
+This initial send attempt can be wasteful for the case where the socket
+is expected to be full, setting this flag will bypass the initial send
+attempt and go straight to arming poll. If poll does indicate that data can
+be sent, the operation will proceed.
+
+.B IORING_RECVSEND_FIXED_BUF
+If set, instructs io_uring to use a pre-mapped buffer. The
+.I buf_index
+field should contain an index into an array of fixed buffers. See
+.BR io_uring_register (2)
+for details on how to setup a context for fixed buffer I/O.
+.EE
+.in
+.PP
+
+.TP
+.B IORING_OP_SENDMSG_ZC
+Issue the zerocopy equivalent of a
+.BR sendmsg (2)
+system call.
+Works just like
+.BR IORING_OP_SENDMSG ,
+but like
+.B IORING_OP_SEND_ZC
+supports
+.BR IORING_RECVSEND_FIXED_BUF .
+For additional notes regarding zero copy see
+.BR IORING_OP_SEND_ZC .
+
+Available since 6.1
+
+.TP
+.B IORING_OP_WAITID
+Issue the equivalent of a
+.BR waitid (2)
+system call.
+.I len
+must contain the idtype being queried/waited for and
+.I fd
+must contain the 'pid' (or id) being waited for.
+.I file_index
+is the 'options' being set (the child state changes to wait for).
+.I addr2
+is a pointer to siginfo_t, if any, being filled in. See also
+.BR waitid (2)
+for the general description of the related system call. Available since 6.5.
+
+.TP
+.B IORING_OP_SETXATTR
+.TP
+.B IORING_OP_GETXATTR
+.TP
+.B IORING_OP_FSETXATTR
+.TP
+.B IORING_OP_FGETXATTR
+Issue the equivalent of a
+.BR setxattr (2)
+or
+.BR getxattr (2)
+or
+.BR fsetxattr (2)
+or
+.BR fgetxattr (2)
+system call.
+.I addr
+must contain a pointer to a buffer containing the name of the extended
+attribute.
+.I addr2
+must contain a pointer to a buffer of maximum length
+.IR len ,
+in which the value of the extended attribute is to be placed or is read from.
+Additional flags maybe provided in
+.IR xattr_flags .
+For
+.BR setxattr (2)
+or
+.BR getxattr (2)
+.I addr3
+must contain a pointer to the path of the file.
+For
+.BR fsetxattr (2)
+or
+.BR fgetxattr (2)
+.I fd
+must contain the file descriptor of the file.
+
+Available since 5.19.
+
+.TP
+.B IORING_OP_BIND
+Issues the equivalent of the
+.BR bind (2)
+system call.
+.I fd
+must contain the file descriptor of the socket,
+.I addr
+must contain a pointer to the sockaddr struct containing the address to assign
+and
+.I addr2
+must contain the length of the address.
+
+Available since 6.11.
+
+.TP
+.B IORING_OP_LISTEN
+Issues the equivalent of the
+.BR listen (2)
+system call.
+.I fd
+must contain the file descriptor of the socket and
+.I addr
+must contain the backlog parameter, i.e. the maximum amount of pending
+queued connections.
+
+Available since 6.11.
+
+.TP
+.B IORING_OP_FTRUNCATE
+Issues the equivalent of the
+.BR ftruncate (2)
+system call.
+.I fd
+must contain the file descriptor of the file to truncate and
+.I off
+must contain the length to which the file will be truncated.
+
+Available since 6.9.
+
+.TP
+.B IORING_OP_READ_MULTISHOT
+Like
+.BR IORING_OP_READ ,
+but similar to requests prepared with
+.IR io_uring_prep_multishot_accept (3)
+additional reads and thus CQEs will be performed based on this single SQE once
+there is more data available.
+Is restricted to pollable files and will fall back to single shot if the file
+does not support
+.BR NOWAIT .
+Like other multishot type requests, the application should look at the CQE
+flags and see if
+.B IORING_CQE_F_MORE
+is set on completion as an indication of whether or not the read request will
+generate further CQEs. Available since 6.7.
+
+.TP
+.B IORING_OP_FUTEX_WAIT
+Issues the equivalent of the
+.BR futex_wait (2)
+system call.
+.I addr
+must hold a pointer to the futex,
+.I addr2
+must hold the value to which the futex has to be changed so this caller to
+.BR futex_wait (2)
+can be woken by a call to
+.BR futex_wake (2),
+.I addr3
+must hold the bitmask of this
+.BR futex_wait (2)
+caller.
+For a caller of
+.BR futex_wake (2)
+to wake a waiter additionally the bitmask of the waiter and waker must have
+at least one set bit in common.
+.I fd
+must contain additional flags passed in.
+
+Available since 6.7.
+
+.TP
+.B IORING_OP_FUTEX_WAKE
+Issues the equivalent of the
+.BR futex_wake (2)
+system call.
+.I addr
+must hold a pointer to the futex,
+.I addr2
+must hold the maximum number of waiters waiting on this futex to wake,
+.I addr3
+must hold the bitmask of this
+.BR futex_wake (2)
+call.
+To wake a waiter additionally the bitmask of the waiter and waker must have
+at least one set bit in common.
+.I fd
+must contain additional flags passed in.
+
+Available since 6.7.
+
+.TP
+.B IORING_OP_FUTEX_WAITV
+Issues the equivalent of the
+.BR futex_waitv (2)
+system call.
+.I addr
+must hold a pointer to the futexv struct,
+.I len
+must hold the length of the futexv struct, which may not be 0 and must be
+smaller than
+.B FUTEX_WAITV_MAX
+(as of 6.11 == 128).
+
+Available since 6.7.
+
+.TP
+.B IORING_OP_FIXED_FD_INSTALL
+This operation is used to insert a registered file into the regular process
+file table.
+Consequently
+.I fd
+must contain the file index and
+.B IOSQE_FIXED_FILE
+must be set.
+The resulting regular fd is returned via cqe->res.
+Additional flags may be passed in via
+.IR install_fd_flags .
+Currently supported flags are:
+.BR IORING_FIXED_FD_NO_CLOEXEC ,
+which overrides a potentially set
+.B O_CLOEXEC
+flag set on the initial file.
+
+Available since 6.8.
+
 .PP
 The
 .I flags
@@ -1069,7 +1586,7 @@
 .BR io_uring_register (2)
 man page). Note that this isn't always available for all commands. If used on
 a command that doesn't support fixed files, the SQE will error with
-.B -EBADF.
+.BR -EBADF .
 Available since 5.1.
 .TP
 .B IOSQE_IO_DRAIN
@@ -1087,7 +1604,7 @@
 chain. This flag has no effect on previous SQE submissions, nor does it impact
 SQEs that are outside of the chain tail. This means that multiple chains can be
 executing in parallel, or chains and individual SQEs. Only members inside the
-chain are serialized. A chain of SQEs will be broken, if any request in that
+chain are serialized. A chain of SQEs will be broken if any request in that
 chain ends in error. io_uring considers any unexpected result an error. This
 means that, eg, a short read will also terminate the remainder of the chain.
 If a chain of SQE links is broken, the remaining unstarted part of the chain
@@ -1099,7 +1616,10 @@
 Like IOSQE_IO_LINK, but it doesn't sever regardless of the completion result.
 Note that the link will still sever if we fail submitting the parent request,
 hard links are only resilient in the presence of completion results for
-requests that did submit correctly. IOSQE_IO_HARDLINK implies IOSQE_IO_LINK.
+requests that did submit correctly.
+.B IOSQE_IO_HARDLINK
+implies
+.BR IOSQE_IO_LINK .
 Available since 5.5.
 .TP
 .B IOSQE_ASYNC
@@ -1134,7 +1654,7 @@
 .B IOSQE_IO_HARDLINK,
 CQEs for all linked requests will be omitted. The notion of failure/success is
 opcode specific and is the same as with breaking chains of
-.B IOSQE_IO_LINK.
+.BR IOSQE_IO_LINK .
 One special case is when the request has a linked timeout, then the CQE
 generation for the linked timeout is decided solely by whether it has
 .B IOSQE_CQE_SKIP_SUCCESS
@@ -1143,16 +1663,16 @@
 
 The semantics are chosen to accommodate several use cases. First, when all but
 the last request of a normal link without linked timeouts are marked with the
-flag, only one CQE per lin is posted. Additionally, it enables supression of
+flag, only one CQE per link is posted. Additionally, it enables suppression of
 CQEs in cases where the side effects of a successfully executed operation is
 enough for userspace to know the state of the system. One such example would
 be writing to a synchronisation file.
 
 This flag is incompatible with
-.B IOSQE_IO_DRAIN.
+.BR IOSQE_IO_DRAIN .
 Using both of them in a single ring is undefined behavior, even when they are
 not used together in a single request. Currently, after the first request with
-.B IOSQE_CQE_SKIP_SUCCESS,
+.BR IOSQE_CQE_SKIP_SUCCESS ,
 all subsequent requests marked with drain will be failed at submission time.
 Note that the error reporting is best effort only, and restrictions may change
 in the future.
@@ -1219,7 +1739,7 @@
 buffers were registered.
 .I personality
 is the credentials id to use for this operation. See
-.BR io_uring_register(2)
+.BR io_uring_register (2)
 for how to register personalities with io_uring. If set to 0, the current
 personality of the submitting task is used.
 .PP
@@ -1255,13 +1775,14 @@
 or in conjunction with
 .B IOSQE_BUFFER_SELECT
 or
-.B IORING_OP_MSG_RING,
-, see those entries for details.
+.BR IORING_OP_MSG_RING ,
+see those entries for details.
 .I res
 is the operation-specific result, but io_uring-specific errors
 (e.g. flags or opcode invalid) are returned through this field.
 They are described in section
-.B CQE ERRORS.
+.B CQE
+.BR ERRORS .
 .PP
 For read and write opcodes, the
 return values match
@@ -1282,7 +1803,7 @@
 io_uring-specific opcodes.
 .PP
 .SH RETURN VALUE
-.BR io_uring_enter ()
+.BR io_uring_enter (2)
 returns the number of I/Os successfully consumed.  This can be zero
 if
 .I to_submit
@@ -1295,19 +1816,19 @@
 
 The errors related to a submission queue entry will be returned through a
 completion queue entry (see section
-.B CQE ERRORS),
+.B CQE
+.BR ERRORS ),
 rather than through the system call itself.
 
 Errors that occur not on behalf of a submission queue entry are returned via the
-system call directly. On such an error,
-.B -1
-is returned and
+system call directly. On such an error, a negative error code is returned. The
+caller should not rely on
 .I errno
-is set appropriately.
+variable.
 .PP
 .SH ERRORS
 These are the errors returned by
-.BR io_uring_enter ()
+.BR io_uring_enter (2)
 system call.
 .TP
 .B EAGAIN
@@ -1326,13 +1847,44 @@
 .BR io_uring_register (2)
 for details on how to enable the ring.
 .TP
+.B EBADR
+At least one CQE was dropped even with the
+.B IORING_FEAT_NODROP
+feature, and there are no otherwise available CQEs. This clears the error state
+and so with no other changes the next call to
+.BR io_uring_enter (2)
+will not have this error. This error should be extremely rare and indicates the
+machine is running critically low on memory. It may be reasonable for the
+application to terminate running unless it is able to safely handle any CQE
+being lost.
+.TP
 .B EBUSY
-The application is attempting to overcommit the number of requests it can have
+If the
+.B IORING_FEAT_NODROP
+feature flag is set, then
+.B EBUSY
+will be returned if there were overflow entries,
+.B IORING_ENTER_GETEVENTS
+flag is set and not all of the overflow entries were able to be flushed to
+the CQ ring.
+
+Without
+.B IORING_FEAT_NODROP
+the application is attempting to overcommit the number of requests it can have
 pending. The application should wait for some completions and try again. May
 occur if the application tries to queue more requests than we have room for in
 the CQ ring, or if the application attempts to wait for more events without
 having reaped the ones already present in the CQ ring.
 .TP
+.B EEXIST
+The thread submitting the work is invalid. This may occur if
+.B IORING_ENTER_GETEVENTS
+and
+.B IORING_SETUP_DEFER_TASKRUN
+is set, but the submitting thread is not the thread that initially created or
+enabled the io_uring associated with
+.I fd.
+.TP
 .B EINVAL
 Some bits in the
 .I flags
@@ -1353,9 +1905,14 @@
 .B EINTR
 The operation was interrupted by a delivery of a signal before it could
 complete; see
-.BR signal(7).
+.BR signal (7).
 Can happen while waiting for events with
 .B IORING_ENTER_GETEVENTS.
+.TP
+.B EOWNERDEAD
+The ring has been setup with
+.B IORING_SETUP_SQPOLL
+and the sq poll kernel thread has been killed.
 
 .SH CQE ERRORS
 These io_uring-specific errors are returned as a negative value in the
@@ -1490,3 +2047,12 @@
 field of the submission queue entry, but the
 .I opcode
 doesn't support buffer selection.
+.TP
+.B EINVAL
+.B IORING_OP_TIMEOUT
+was specified, but
+.I timeout_flags
+specified more than one clock source or
+.B IORING_TIMEOUT_MULTISHOT
+was set alongside
+.BR IORING_TIMEOUT_ABS .

diff --git a/man/io_uring_enter2.2 b/man/io_uring_enter2.2
new file mode 120000
index 0000000..5566c09
--- /dev/null
+++ b/man/io_uring_enter2.2

@@ -0,0 +1 @@
+io_uring_enter.2
\ No newline at end of file

diff --git a/man/io_uring_for_each_cqe.3 b/man/io_uring_for_each_cqe.3
new file mode 100644
index 0000000..78d8f6f
--- /dev/null
+++ b/man/io_uring_for_each_cqe.3

@@ -0,0 +1,63 @@
+.\" Copyright (C) 2023 Jens Axboe <[email protected]>
+.\"
+.\" SPDX-License-Identifier: LGPL-2.0-or-later
+.\"
+.TH io_uring_for_each_cqe 3 "June 04, 2023" "liburing-2.4" "liburing Manual"
+.SH NAME
+io_uring_for_each_cqe \- iterate pending completion events
+.SH SYNOPSIS
+.nf
+.B #include <liburing.h>
+.PP
+.BI "io_uring_for_each_cqe(struct io_uring *" ring ","
+.BI "                      unsigned " head ","
+.BI "                      struct io_uring_cqe *" cqe ") { }
+.fi
+.SH DESCRIPTION
+.PP
+The
+.BR io_uring_for_each_cqe (3)
+is a macro helper that iterates completion events belonging to the
+.I ring
+using
+.I head
+as a temporary iterator, and points
+.I cqe
+to each pending event when iterating.
+
+This helper provides an efficient way to iterate all pending events in
+the ring, and then advancing the CQ ring by calling
+.BR io_uring_cq_advance (3)
+with the number of CQEs consumed when done. As updating the kernel visible
+CQ ring state involves an ordered write, doing it once for a number of
+events is more efficient than handling each completion separately and
+calling
+.BR io_uring_cqe_seen (3)
+for each of them.
+
+.SH EXAMPLE
+.EX
+void handle_cqes(struct io_uring *ring)
+{
+	struct io_uring_cqe *cqe;
+	unsigned head;
+	unsigned i = 0;
+
+	io_uring_for_each_cqe(ring, head, cqe) {
+		/* handle completion */
+		printf("cqe: %d\\n", cqe->res);
+		i++;
+	}
+
+	io_uring_cq_advance(ring, i);
+}
+.EE
+
+.SH RETURN VALUE
+None
+.SH SEE ALSO
+.BR io_uring_wait_cqe_timeout (3),
+.BR io_uring_wait_cqe (3),
+.BR io_uring_wait_cqes (3),
+.BR io_uring_cqe_seen (3),
+.BR io_uring_buf_ring_cq_advance (3)

diff --git a/man/io_uring_free_buf_ring.3 b/man/io_uring_free_buf_ring.3
new file mode 100644
index 0000000..649cabb
--- /dev/null
+++ b/man/io_uring_free_buf_ring.3

@@ -0,0 +1,53 @@
+.\" Copyright (C) 2022 Jens Axboe <[email protected]>
+.\"
+.\" SPDX-License-Identifier: LGPL-2.0-or-later
+.\"
+.TH io_uring_free_buf_ring 3 "Mar 07, 2023" "liburing-2.4" "liburing Manual"
+.SH NAME
+io_uring_free_buf_ring \- register and free a buffer ring for provided buffers
+.SH SYNOPSIS
+.nf
+.B #include <liburing.h>
+.PP
+.BI "int io_uring_free_buf_ring(struct io_uring *" ring ",
+.BI "                           struct io_uring_buf_ring *" br ",
+.BI "                           unsigned int " nentries ",
+.BI "                           int " bgid ");"
+.BI "
+.fi
+.SH DESCRIPTION
+.PP
+The
+.BR io_uring_free_buf_ring (3)
+function unregisters a previously registered shared buffer ring. The ring must
+have heen previously returned from
+.BR io_uring_setup_buf_ring (3) .
+
+The
+.I ring
+argument must pointer to the ring for which the provided buffer ring is being
+registered,
+.I br
+must point to a buffer ring previously returned by
+.BR io_uring_setup_buf_ring (3) ,
+.I nentries
+is the number of entries requested in the buffer ring, and
+.I bgid
+is the buffer group ID that
+.I br
+was setup with.
+
+Under the covers, this function uses
+.BR io_uring_unregister_buf_ring (3)
+to unregister the ring, and handles the freeing of the ring rather than
+letting the application open code it.
+
+Available since 5.19.
+
+.SH RETURN VALUE
+On success
+.BR io_uring_free_buf_ring (3)
+returns zero. On failure it returns
+.BR -errno .
+.SH SEE ALSO
+.BR io_uring_setup_buf_ring (3)

diff --git a/man/io_uring_free_probe.3 b/man/io_uring_free_probe.3
index d2308fa..960fda3 100644
--- a/man/io_uring_free_probe.3
+++ b/man/io_uring_free_probe.3

@@ -2,7 +2,7 @@
 .\"
 .\" SPDX-License-Identifier: LGPL-2.0-or-later
 .\"
-.TH io_uring_free_probe "January 25, 2022" "liburing-2.1" "liburing Manual"
+.TH io_uring_free_probe 3 "January 25, 2022" "liburing-2.1" "liburing Manual"
 .SH NAME
 io_uring_free_probe \- free probe instance
 .SH SYNOPSIS

diff --git a/man/io_uring_get_events.3 b/man/io_uring_get_events.3
new file mode 100644
index 0000000..f241542
--- /dev/null
+++ b/man/io_uring_get_events.3

@@ -0,0 +1,33 @@
+.\" Copyright (C) 2022 Dylan Yudaken
+.\"
+.\" SPDX-License-Identifier: LGPL-2.0-or-later
+.\"
+.TH io_uring_get_events 3 "September 5, 2022" "liburing-2.3" "liburing Manual"
+.SH NAME
+io_uring_get_events \- Flush outstanding requests to CQE ring
+.SH SYNOPSIS
+.nf
+.B #include <liburing.h>
+.PP
+.BI "int io_uring_get_events(struct io_uring *" ring ");"
+.fi
+.SH DESCRIPTION
+.PP
+The
+.BR io_uring_get_events (3)
+function runs outstanding work and flushes completion events to the CQE ring.
+
+There can be events needing to be flushed if the ring was full and had overflowed.
+Alternatively if the ring was setup with the
+.BR IORING_SETUP_DEFER_TASKRUN
+flag then this will process outstanding tasks, possibly resulting in more CQEs.
+
+.SH RETURN VALUE
+On success
+.BR io_uring_get_events (3)
+returns 0. On failure it returns
+.BR -errno .
+.SH SEE ALSO
+.BR io_uring_get_sqe (3),
+.BR io_uring_submit_and_get_events (3),
+.BR io_uring_cq_has_overflow (3)

diff --git a/man/io_uring_get_probe.3 b/man/io_uring_get_probe.3
index 94c1b21..353cc73 100644
--- a/man/io_uring_get_probe.3
+++ b/man/io_uring_get_probe.3

@@ -2,7 +2,7 @@
 .\"
 .\" SPDX-License-Identifier: LGPL-2.0-or-later
 .\"
-.TH io_uring_get_probe "January 25, 2022" "liburing-2.1" "liburing Manual"
+.TH io_uring_get_probe 3 "January 25, 2022" "liburing-2.1" "liburing Manual"
 .SH NAME
 io_uring_get_probe \- get probe instance
 .SH SYNOPSIS

diff --git a/man/io_uring_get_sqe.3 b/man/io_uring_get_sqe.3
index 58c8b96..b257ebb 100644
--- a/man/io_uring_get_sqe.3
+++ b/man/io_uring_get_sqe.3

@@ -32,10 +32,26 @@
 and submitted via
 .BR io_uring_submit (3).
 
+Note that neither
+.BR io_uring_get_sqe
+nor the prep functions set (or clear) the
+.B user_data
+field of the SQE. If the caller expects
+.BR io_uring_cqe_get_data (3)
+or
+.BR io_uring_cqe_get_data64 (3)
+to return valid data when reaping IO completions, either
+.BR io_uring_sqe_set_data (3)
+or
+.BR io_uring_sqe_set_data64 (3)
+.B MUST
+have been called before submitting the request.
+
 .SH RETURN VALUE
 .BR io_uring_get_sqe (3)
 returns a pointer to the next submission queue event on success and NULL on
 failure. If NULL is returned, the SQ ring is currently full and entries must
 be submitted for processing before new ones can get allocated.
 .SH SEE ALSO
-.BR io_uring_submit (3)
+.BR io_uring_submit (3),
+.BR io_uring_sqe_set_data (3)

diff --git a/man/io_uring_major_version.3 b/man/io_uring_major_version.3
new file mode 120000
index 0000000..21bbf45
--- /dev/null
+++ b/man/io_uring_major_version.3

@@ -0,0 +1 @@
+io_uring_check_version.3
\ No newline at end of file

diff --git a/man/io_uring_minor_version.3 b/man/io_uring_minor_version.3
new file mode 120000
index 0000000..21bbf45
--- /dev/null
+++ b/man/io_uring_minor_version.3

@@ -0,0 +1 @@
+io_uring_check_version.3
\ No newline at end of file

diff --git a/man/io_uring_opcode_supported.3 b/man/io_uring_opcode_supported.3
index b20b504..b981ed7 100644
--- a/man/io_uring_opcode_supported.3
+++ b/man/io_uring_opcode_supported.3

@@ -2,7 +2,7 @@
 .\"
 .\" SPDX-License-Identifier: LGPL-2.0-or-later
 .\"
-.TH io_uring_opcode_supported "January 25, 2022" "liburing-2.1" "liburing Manual"
+.TH io_uring_opcode_supported 3 "January 25, 2022" "liburing-2.1" "liburing Manual"
 .SH NAME
 io_uring_opcode_supported \- is op code supported?
 .SH SYNOPSIS

diff --git a/man/io_uring_peek_batch_cqe.3 b/man/io_uring_peek_batch_cqe.3
new file mode 120000
index 0000000..fbf4e4c
--- /dev/null
+++ b/man/io_uring_peek_batch_cqe.3

@@ -0,0 +1 @@
+io_uring_peek_cqe.3
\ No newline at end of file

diff --git a/man/io_uring_peek_cqe.3 b/man/io_uring_peek_cqe.3
index a4ac2da..deb41f9 100644
--- a/man/io_uring_peek_cqe.3
+++ b/man/io_uring_peek_cqe.3

@@ -11,6 +11,10 @@
 .PP
 .BI "int io_uring_peek_cqe(struct io_uring *" ring ","
 .BI "                      struct io_uring_cqe **" cqe_ptr ");"
+.PP
+.BI "int io_uring_peek_batch_cqe(struct io_uring *" ring ","
+.BI "                            struct io_uring_cqe **" cqe_ptrs ","
+.BI "                            unsigned count ");"
 .fi
 .SH DESCRIPTION
 .PP
@@ -25,12 +29,27 @@
 This function does not enter the kernel to wait for an event, an event
 is only returned if it's already available in the CQ ring.
 
+The
+.BR io_uring_peek_batch_cqe (3)
+function returns up to
+.I count
+request completions in
+.I cqe_ptrs
+belonging to the
+.I ring
+param, if they are readily available. It will not enter the kernel, unless the
+CQ ring is in an overflow condition. Upon successful return,
+.I cqe_ptrs
+are filled with the number of events indicated by the return value.
+
 .SH RETURN VALUE
 On success
 .BR io_uring_peek_cqe (3)
 returns
 .B 0
-and the cqe_ptr parameter is filled in. On failure it returns
+and the cqe_ptr parameter is filled in. On success
+.BR io_uring_peek_batch_cqe (3)
+returns the number of completions filled in. On failure, they may return
 .BR -EAGAIN .
 .SH SEE ALSO
 .BR io_uring_submit (3),

diff --git a/man/io_uring_prep_accept.3 b/man/io_uring_prep_accept.3
index 3800ccb..c8985d5 100644
--- a/man/io_uring_prep_accept.3
+++ b/man/io_uring_prep_accept.3

@@ -39,7 +39,9 @@
 .PP
 The
 .BR io_uring_prep_accept (3)
-function prepares an accept request. The submission queue entry
+function and its three variants prepare an accept request similar to
+.BR accept4 (2).
+The submission queue entry
 .I sqe
 is setup to use the file descriptor
 .I sockfd
@@ -50,51 +52,65 @@
 and using modifier flags in
 .IR flags .
 
-For a direct descriptor accept request, the offset is specified by the
-.I file_index
-argument. Direct descriptors are io_uring private file descriptors. They
+The three variants allow combining the direct file table and multishot features.
+
+Direct descriptors are io_uring private file descriptors. They
 avoid some of the overhead associated with thread shared file tables and
-can be used in any io_uring request that takes a file descriptor. To do so,
+can be used in any io_uring request that takes a file descriptor.
+The two direct variants here create such direct descriptors.
+Subsequent to their creation, they can be used by setting
 .B IOSQE_FIXED_FILE
-must be set in the SQE
+in the SQE
 .I flags
-member, and the SQE
+member, and setting the SQE
 .I fd
-field should use the direct descriptor value rather than the regular file
+field to the direct descriptor value rather than the regular file
 descriptor. Direct descriptors are managed like registered files.
 
-If the direct variant is used, the application must first have registered
-a file table using
+To use an accept direct variant, the application must first have registered
+a file table of a desired size using
 .BR io_uring_register_files (3)
-of the appropriate size. Once registered, a direct accept request may use any
-entry in that table, as long as it is within the size of the registered table.
-If a specified entry already contains a file, the file will first be removed
-from the table and closed. It's consistent with the behavior of updating an
+or
+.BR io_uring_register_files_sparse (3).
+Once registered,
+.BR io_uring_prep_accept_direct (3)
+allows an entry in that table to be specifically selected through the
+.I file_index
+argument.
+If the specified entry already contains a file, the file will first be removed
+from the table and closed, consistent with the behavior of updating an
 existing file with
 .BR io_uring_register_files_update (3).
+.I file_index
+can also be set to
+.B IORING_FILE_INDEX_ALLOC
+for this variant and
+an unused table index will be dynamically chosen and returned.
+Likewise,
+.B io_uring_prep_multishot_accept_direct
+will have an unused table index dynamically chosen and returned for each connection accepted.
+If both forms of direct selection will be employed, specific and dynamic, see
+.BR io_uring_register_file_alloc_range (3)
+for setting up the table so dynamically chosen entries are made against
+a different range than that targeted by specific requests.
+
 Note that old kernels don't check the SQE
 .I file_index
-field, which is not a problem for liburing helpers, but users of the raw
-io_uring interface need to zero SQEs to avoid unexpected behavior. This also
-means that applications should check for availability of
-.B IORING_OP_ACCEPT_DIRECT
-before using it, they cannot rely on a
+field meaning
+applications cannot rely on a
 .B -EINVAL
 CQE
 .I res
-return.
+being returned when the kernel is too old because older kernels
+may not recognize they are being asked to use a direct table slot.
 
-For a direct descriptor accept request, the
-.I file_index
-argument can be set to
-.BR IORING_FILE_INDEX_ALLOC ,
-In this case a free entry in io_uring file table will
-be used automatically and the file index will be returned as CQE
-.IR res .
+When a direct descriptor accept request asks for a table slot to be
+dynamically chosen but there are no free entries,
 .B -ENFILE
-is otherwise returned if there is no free entries in the io_uring file table.
+is returned as the CQE
+.IR res .
 
-The multishot version accept and accept_direct allow an application to issue
+The multishot variants allow an application to issue
 a single accept request, which will repeatedly trigger a CQE when a connection
 request comes in. Like other multishot type requests, the application should
 look at the CQE
@@ -102,42 +118,61 @@
 and see if
 .B IORING_CQE_F_MORE
 is set on completion as an indication of whether or not the accept request
-will generate further CQEs. The multishot variants are available since 5.19.
+will generate further CQEs. Note that for the multishot variants, setting
+.B addr
+and
+.B addrlen
+may not make a lot of sense, as the same value would be used for every
+accepted connection. This means that the data written to
+.B addr
+may be overwritten by a new connection before the application has had time
+to process a past connection. If the application knows that a new connection
+cannot come in before a previous one has been processed, it may be used as
+expected. The multishot variants are available since 5.19.
 
-For multishot with direct descriptors,
-.B IORING_FILE_INDEX_ALLOC
-must be used as the file descriptor. This tells io_uring to allocate a free
-direct descriptor from our table, rather than the application passing one in.
-Failure to do so will result in the accept request being terminated with
-.BR -EINVAL .
-The allocated descriptor will be returned in the CQE
-.I res
-field, like a non-direct accept request.
-
-These functions prepare an async
+See the man page
 .BR accept4 (2)
-request. See that man page for details.
+for details of the accept function itself.
 
 .SH RETURN VALUE
 None
 .SH ERRORS
 The CQE
 .I res
-field will contain the result of the operation. For singleshot accept, the
-non-direct accept returns the installed file descriptor as its value, the
-direct accept returns
+field will contain the result of the operation.
+
+.BR io_uring_prep_accept (3)
+generates the installed file descriptor as its result.
+
+.BR io_uring_prep_accept_direct (3)
+and
+.I file_index
+set to a specific direct descriptor
+generates
 .B 0
-on success. The caller must know which direct descriptor was picked for this
-request. For multishot accept, the non-direct accept returns the installed
-file descriptor as its value, the direct accept returns the file index used on
-success. See the related man page for details on possible values for the
-non-direct accept. Note that where synchronous system calls will return
+on success.
+The caller must remember which direct descriptor was picked for this request.
+
+.BR io_uring_prep_accept_direct (3)
+and
+.I file_index
+set to
+.B IORING_FILE_INDEX_ALLOC
+generates the dynamically chosen direct descriptor.
+
+.BR io_uring_prep_multishot_accept (3)
+generates the installed file descriptor in each result.
+
+.BR io_uring_prep_multishot_accept_direct (3),
+generates the dynamically chosen direct descriptor in each result.
+
+Note that where synchronous system calls will return
 .B -1
 on failure and set
 .I errno
 to the actual error value, io_uring never uses
 .IR errno .
-Instead it returns the negated
+Instead it generates the negated
 .I errno
 directly in the CQE
 .I res
@@ -155,5 +190,8 @@
 .SH SEE ALSO
 .BR io_uring_get_sqe (3),
 .BR io_uring_submit (3),
+.BR io_uring_register_files (3),
+.BR io_uring_register_files_sparse (3),
+.BR io_uring_register_file_alloc_range (3),
 .BR io_uring_register (2),
 .BR accept4 (2)

diff --git a/man/io_uring_prep_bind.3 b/man/io_uring_prep_bind.3
new file mode 100644
index 0000000..e2a1cf9
--- /dev/null
+++ b/man/io_uring_prep_bind.3

@@ -0,0 +1,54 @@
+.\" Copyright (C) 2024 SUSE LLC
+.\"
+.\" SPDX-License-Identifier: LGPL-2.0-or-later
+.\"
+.TH io_uring_prep_bind 3 "Jun 3, 2024" "liburing-2.7" "liburing Manual"
+.SH NAME
+io_uring_prep_bind \- prepare a bind request
+.SH SYNOPSIS
+.nf
+.B #include <sys/socket.h>
+.B #include <liburing.h>
+.PP
+.BI "void io_uring_prep_bind(struct io_uring_sqe *" sqe ","
+.BI "                          int " sockfd ","
+.BI "                          struct sockaddr *" addr ","
+.BI "                          socklen_t " addrlen ");"
+.fi
+.SH DESCRIPTION
+The
+.BR io_uring_prep_bind (3)
+function prepares a bind request. The submission queue entry
+.I sqe
+is setup to assign the network address at
+.IR addr ,
+of length
+.IR addrlen ,
+to the socket descriptor
+.IR sockfd.
+
+This function prepares an async
+.BR bind (2)
+request. See that man page for details.
+
+.SH RETURN VALUE
+None
+.SH ERRORS
+The CQE
+.I res
+field will contain the result of the operation. See the related man page for
+details on possible values. Note that where synchronous system calls will return
+.B -1
+on failure and set
+.I errno
+to the actual error value, io_uring never uses
+.IR errno .
+Instead it returns the negated
+.I errno
+directly in the CQE
+.I res
+field.
+.SH SEE ALSO
+.BR io_uring_get_sqe (3),
+.BR io_uring_submit (3),
+.BR bind (2)

diff --git a/man/io_uring_prep_cancel.3 b/man/io_uring_prep_cancel.3
index 3c9f2df..0240ce9 100644
--- a/man/io_uring_prep_cancel.3
+++ b/man/io_uring_prep_cancel.3

@@ -74,6 +74,12 @@
 .BR io_uring_prep_cancel_fd (3)
 sets up. Available since 5.19.
 .TP
+.B IORING_ASYNC_CANCEL_FD_FIXED
+Set in conjunction with
+.B IORING_ASYNC_CANCEL_FD ,
+indicating that the file descriptor given is a direct descriptor rather than
+a normal file descriptor. Available since 6.0.
+.TP
 .B IORING_ASYNC_CANCEL_ANY
 Match any request in the ring, regardless of user_data or file descriptor.
 Can be used to cancel any pending request in the ring. Available since 5.19.

diff --git a/man/io_uring_prep_cancel_fd.3 b/man/io_uring_prep_cancel_fd.3
new file mode 120000
index 0000000..347db09
--- /dev/null
+++ b/man/io_uring_prep_cancel_fd.3

@@ -0,0 +1 @@
+io_uring_prep_cancel.3
\ No newline at end of file

diff --git a/man/io_uring_prep_close.3 b/man/io_uring_prep_close.3
index 94780f2..9358cd5 100644
--- a/man/io_uring_prep_close.3
+++ b/man/io_uring_prep_close.3

@@ -42,7 +42,10 @@
 The CQE
 .I res
 field will contain the result of the operation. See the related man page for
-details on possible values. Note that where synchronous system calls will return
+details on possible values. For closing of a direct descriptor, the only
+failure cases are the kernel running completely out of memory, or if the
+application has specified an invalid direct descriptor. Note that where
+synchronous system calls will return
 .B -1
 on failure and set
 .I errno

diff --git a/man/io_uring_prep_cmd.3 b/man/io_uring_prep_cmd.3
new file mode 100644
index 0000000..76f00b7
--- /dev/null
+++ b/man/io_uring_prep_cmd.3

@@ -0,0 +1,126 @@
+.\" Copyright (C) 2023 Breno Leitao <[email protected]>
+.\"
+.\" SPDX-License-Identifier: LGPL-2.0-or-later
+.\"
+.TH io_uring_prep_cmd 3 "July 27, 2023" "liburing-2.5" "liburing Manual"
+.SH NAME
+io_uring_prep_cmd_sock \- prepare a command request for a socket
+.SH SYNOPSIS
+.nf
+.B #include <liburing.h>
+.PP
+.BI "void io_uring_prep_cmd_sock(struct io_uring_sqe *" sqe ","
+.BI "                            int " cmd_op ","
+.BI "                            int " fd ","
+.BI "                            int " level ","
+.BI "                            int " optname ","
+.BI "                            void " *optval ","
+.BI "                            int " optlen ");"
+.fi
+.SH DESCRIPTION
+.PP
+The
+.BR io_uring_prep_cmd_sock (3)
+function prepares an cmd request for a socket. The submission queue entry
+.I sqe
+is setup to use the socket file descriptor pointed to by
+.I fd
+to start an command operation defined by
+.I cmd_op.
+
+This is a generic function, and each command has their own individual
+.I level, optname, optval
+values.  The optlen defines the size pointed by
+.I optval.
+
+.SH Available commands
+
+.TP
+.B SOCKET_URING_OP_SIOCINQ
+Returns the amount of queued unread data in the receive buffer.
+The socket must not be in LISTEN state, otherwise an error
+.B -EINVAL
+is returned in the CQE
+.I res
+field.
+The following arguments are not used for this command
+.I level, optname, optval
+and
+.I optlen.
+
+Negative return value means an error.
+
+For more information about this command, please check
+.BR unix(7).
+
+Available since 6.7.
+
+.TP
+.B SOCKET_URING_OP_SIOCOUTQ
+Returns the amount of unsent data in the socket send queue.
+The socket must not be in LISTEN state, otherwise an error
+.B -EINVAL
+is returned in the CQE
+.I res.
+field.
+The following arguments are not used for this command
+.I level, optname, optval
+and
+.I optlen.
+
+Negative return value means an error.
+
+For more information about this command, please check
+.BR unix(7).
+
+.TP
+.B SOCKET_URING_OP_GETSOCKOPT
+Command to get options for the socket referred to by the socket file descriptor
+.I fd.
+The arguments are similar to the
+.BR getsockopt(2)
+system call.
+
+The
+.BR SOCKET_URING_OP_GETSOCKOPT
+command is limited to
+.BR SOL_SOCKET
+.I level.
+
+Differently from the
+.BR getsockopt(2)
+system call, the updated
+.I optlen
+value is returned in the CQE
+.I res
+field, on success. On failure, the CQE
+.I res
+contains a negative error number.
+
+.TP
+.B SOCKET_URING_OP_SETSOCKOPT
+Command to set options for the socket referred to by the socket file descriptor
+.I fd.
+The arguments are similar to the
+.BR setsockopt(2)
+system call.
+
+Available since 6.7.
+
+.SH NOTES
+The memory block pointed by
+.I optval
+needs to be valid/live until the CQE returns.
+
+.SH RETURN VALUE
+Dependent on the command.
+
+.SH ERRORS
+The CQE
+.I res
+field will contain the result of the operation.
+.SH SEE ALSO
+.BR io_uring_get_sqe (3),
+.BR io_uring_submit (3),
+.BR io_uring_register (2),
+.BR unix (7)

diff --git a/man/io_uring_prep_cmd_discard.3 b/man/io_uring_prep_cmd_discard.3
new file mode 100644
index 0000000..9778672
--- /dev/null
+++ b/man/io_uring_prep_cmd_discard.3

@@ -0,0 +1,64 @@
+.\" Copyright (C) 2024 Pavel Begunkov <[email protected]>
+.\"
+.\" SPDX-License-Identifier: LGPL-2.0-or-later
+.\"
+.TH io_uring_prep_cmd_discard 3 "Oct 13, 2024" "liburing-2.8" "liburing Manual"
+.SH NAME
+io_uring_prep_cmd_discard \- prepare a discard command
+.SH SYNOPSIS
+.nf
+.B #include <liburing.h>
+.PP
+.BI "void io_uring_prep_cmd_discard(struct io_uring_sqe *" sqe ","
+.BI "                          int " fd ","
+.BI "                          uint64_t " offset ","
+.BI "                          uint64_t " nbytes ");"
+.fi
+.SH DESCRIPTION
+The
+.BR io_uring_prep_cmd_discard (3)
+function prepares a discard command request. The submission queue entry
+.I sqe
+is setup to use the file descriptor
+.IR fd
+to start discarding
+.I nbytes
+at the specified
+.IR offset .
+
+The command is an asynchronous equivalent of
+.B BLOCK_URING_CMD_DISCARD
+ioctl with a few differences. It allows multiple parallel discards, and it does
+not exclude concurrent writes and reads. As a result, it may lead to races for
+the data on the disk, if the application has IO inflight for the same ranges
+that the discard operates on. It's the user's responsibility to account for that.
+Furthermore, only best efforts are done to invalidate page caches. The user has
+to make sure that no other inflight requests are modifying or reading the
+range(s). If that is the case, it might result in stale page cache and data
+inconsistencies.
+
+Available since 6.12.
+
+.SH RETURN VALUE
+None
+.SH ERRORS
+The CQE
+.I res
+field will contain the result of the operation. On success, this field will be
+set to
+.B 0 .
+On error, a negative error value is returned. Note that where synchronous
+system calls will return
+.B -1
+on failure and set
+.I errno
+to the actual error value, io_uring never uses
+.IR errno .
+Instead it returns the negated
+.I errno
+directly in the CQE
+.I res
+field.
+.SH SEE ALSO
+.BR io_uring_get_sqe (3),
+.BR io_uring_submit (3),

diff --git a/man/io_uring_prep_fadvise.3 b/man/io_uring_prep_fadvise.3
index a53ab25..7b508b5 100644
--- a/man/io_uring_prep_fadvise.3
+++ b/man/io_uring_prep_fadvise.3

@@ -13,8 +13,14 @@
 .BI "void io_uring_prep_fadvise(struct io_uring_sqe *" sqe ","
 .BI "                           int " fd ","
 .BI "                           __u64 " offset ","
-.BI "                           off_t " len ","
+.BI "                           __u32 " len ","
 .BI "                           int " advice ");"
+.BI "
+.BI "void io_uring_prep_fadvise64(struct io_uring_sqe *" sqe ","
+.BI "                             int " fd ","
+.BI "                             __u64 " offset ","
+.BI "                             __u64 " len ","
+.BI "                             int " advice ");"
 .fi
 .SH DESCRIPTION
 .PP
@@ -31,6 +37,17 @@
 length in bytes, giving it the advise located in
 .IR advice .
 
+The
+.BR io_uring_prep_fadvise64 (3)
+function works like
+.BR io_uring_prep_fadvise (3)
+except that it takes a 64-bit length rather than just a 32-bit one. Older
+kernels may not support the 64-bit length variant. If this variant is attempted
+used on a kernel that doesn't support 64-bit lengths, then the request will get
+errored with
+.B -EINVAL
+in the results field of the CQE.
+
 This function prepares an async
 .BR posix_fadvise (2)
 request. See that man page for details.

diff --git a/man/io_uring_prep_fadvise64.3 b/man/io_uring_prep_fadvise64.3
new file mode 120000
index 0000000..cfd6828
--- /dev/null
+++ b/man/io_uring_prep_fadvise64.3

@@ -0,0 +1 @@
+io_uring_prep_fadvise.3
\ No newline at end of file

diff --git a/man/io_uring_prep_fallocate.3 b/man/io_uring_prep_fallocate.3
index 86e1d39..426e1d5 100644
--- a/man/io_uring_prep_fallocate.3
+++ b/man/io_uring_prep_fallocate.3

@@ -13,8 +13,8 @@
 .BI "void io_uring_prep_fallocate(struct io_uring_sqe *" sqe ","
 .BI "                             int " fd ","
 .BI "                             int " mode ","
-.BI "                             off_t " offset ","
-.BI "                             off_t " len ");"
+.BI "                             __u64 " offset ","
+.BI "                             __u64 " len ");"
 .fi
 .SH DESCRIPTION
 .PP

diff --git a/man/io_uring_prep_fgetxattr.3 b/man/io_uring_prep_fgetxattr.3
new file mode 120000
index 0000000..fd0634a
--- /dev/null
+++ b/man/io_uring_prep_fgetxattr.3

@@ -0,0 +1 @@
+io_uring_prep_getxattr.3
\ No newline at end of file

diff --git a/man/io_uring_prep_fixed_fd_install.3 b/man/io_uring_prep_fixed_fd_install.3
new file mode 100644
index 0000000..3300c45
--- /dev/null
+++ b/man/io_uring_prep_fixed_fd_install.3

@@ -0,0 +1,70 @@
+.\" Copyright (C) 2023 Jens Axboe <[email protected]>
+.\"
+.\" SPDX-License-Identifier: LGPL-2.0-or-later
+.\"
+.TH io_uring_prep_fixed_fd_install 3 "December 8, 2023" "liburing-2.6" "liburing Manual"
+.SH NAME
+io_uring_prep_fixed_fd_install \- prepare fixed file fd installation request
+.SH SYNOPSIS
+.nf
+.B #include <liburing.h>
+.PP
+.BI "void io_uring_prep_fixed_fd_install(struct io_uring_sqe *" sqe ","
+.BI "                                    int " fd ","
+.BI "                                    unsigned int " flags ");"
+.fi
+.SH DESCRIPTION
+.PP
+The
+.BR io_uring_prep_fixed_fd_install (3)
+helper prepares a fixed file descriptor installation. The submission queue entry
+.I sqe
+is setup to install the direct/fixed file descriptor
+.I fd
+with the specified
+.I flags
+file installation flags.
+
+One use case of direct/fixed file descriptors is to turn a regular file
+descriptor into a direct one, reducing the overhead of any request that
+needs to access this file. This helper provides a way to go the other way,
+turning a direct descriptor into a regular file descriptor that can then
+subsequently be used by regular system calls that take a normal file descriptor.
+This can be handy if no regular file descriptor exists for this direct
+descriptor. Either because it was instantiated directly as a fixed descriptor,
+or because the regular file was closed with
+.BR close (2)
+after being turned into a direct descriptor.
+
+Upon successful return of this request, both a normal and fixed file descriptor
+exists for the same file. Either one of them may be used to access the file.
+Either one of them may be closed without affecting the other one.
+
+.I flags
+may be either zero, or set to
+.B IORING_FIXED_FD_NO_CLOEXEC
+to indicate that the new regular file descriptor should not be closed during
+exec. By default,
+.B O_CLOEXEC
+will be set on the new descriptor otherwise. Setting this field to anything but
+those two values will result in the request being failed with
+.B -EINVAL
+in the CQE
+.I res
+field.
+
+.SH RETURN VALUE
+None
+.SH ERRORS
+The CQE
+.I res
+field will contain the result of the operation, which in this case will be the
+value of the new regular file descriptor. In case of failure, a negative value
+is returned.
+.SH SEE ALSO
+.BR io_uring_get_sqe (3),
+.BR io_uring_submit (3),
+.BR io_uring_register_files (3),
+.BR io_uring_unregister_files (3),
+.BR io_uring_prep_close_direct (3),
+.BR io_uring_prep_openat_direct (3)

diff --git a/man/io_uring_prep_fsetxattr.3 b/man/io_uring_prep_fsetxattr.3
new file mode 120000
index 0000000..724254c
--- /dev/null
+++ b/man/io_uring_prep_fsetxattr.3

@@ -0,0 +1 @@
+io_uring_prep_setxattr.3
\ No newline at end of file

diff --git a/man/io_uring_prep_ftruncate.3 b/man/io_uring_prep_ftruncate.3
new file mode 100644
index 0000000..e3f1f7e
--- /dev/null
+++ b/man/io_uring_prep_ftruncate.3

@@ -0,0 +1,54 @@
+.\" Copyright (C) 2024 Tony Solomonik <[email protected]>
+.\"
+.\" SPDX-License-Identifier: LGPL-2.0-or-later
+.\"
+.TH io_uring_prep_ftruncate 3 "January 23, 2024" "liburing-2.6" "liburing Manual"
+.SH NAME
+io_uring_prep_ftruncate \- prepare an ftruncate request
+.SH SYNOPSIS
+.nf
+.B #include <liburing.h>
+.PP
+.BI "void io_uring_prep_ftruncate(struct io_uring_sqe *" sqe ","
+.BI "                             int " fd ","
+.BI "                             loff_t " len ");"
+.fi
+.SH DESCRIPTION
+.PP
+The
+.BR io_uring_prep_ftruncate (3)
+function prepares an ftruncate request. The submission queue entry
+.I sqe
+is setup to use the file descriptor
+.I fd
+that should get truncated to the length indicated by the
+.I len
+argument.
+
+Applications must define
+.B _GNU_SOURCE
+to obtain the definition of this helper, as
+.I loff_t
+will not be defined without it.
+
+.SH RETURN VALUE
+None
+.SH ERRORS
+The CQE
+.I res
+field will contain the result of the operation. See the related man page for
+details on possible values. Note that where synchronous system calls will return
+.B -1
+on failure and set
+.I errno
+to the actual error value, io_uring never uses
+.IR errno .
+Instead it returns the negated
+.I errno
+directly in the CQE
+.I res
+field.
+.SH SEE ALSO
+.BR io_uring_get_sqe (3),
+.BR io_uring_submit (3),
+.BR ftruncate (2),

diff --git a/man/io_uring_prep_futex_wait.3 b/man/io_uring_prep_futex_wait.3
new file mode 100644
index 0000000..ab89112
--- /dev/null
+++ b/man/io_uring_prep_futex_wait.3

@@ -0,0 +1,94 @@
+.\" Copyright (C) 2022 Jens Axboe <[email protected]>
+.\"
+.\" SPDX-License-Identifier: LGPL-2.0-or-later
+.\"
+.TH io_uring_prep_futex_wait 3 "Sep 29, 2023" "liburing-2.5" "liburing Manual"
+.SH NAME
+io_uring_prep_futex_wait \- prepare a futex wait request
+.SH SYNOPSIS
+.nf
+.B #include <linux/futex.h>
+.B #include <unistd.h>
+.B #include <liburing.h>
+.PP
+.BI "void io_uring_prep_futex_wait(struct io_uring_sqe *" sqe ","
+.BI "                              uint32_t *" futex ","
+.BI "                              uint64_t " val ","
+.BI "                              uint64_t " mask ","
+.BI "                              uint32_t " futex_flags ","
+.BI "                              unsigned int " flags ");"
+.fi
+.SH DESCRIPTION
+.PP
+The
+.BR io_uring_prep_futex_wait (3)
+function prepares a futex wait request. The submission queue entry
+.I sqe
+is setup for waiting on a futex at address
+.I futex
+and which still has the value
+.I val
+and with
+.BR futex2 (2)
+flags of
+.I futex_flags
+and io_uring futex flags of
+.I flags .
+
+.I mask
+can be set to a specific bitset mask, which will be matched by the waking
+side to decide who to wake up. To always get woken, an application may use
+.B FUTEX_BITSET_MATCH_ANY .
+
+.I futex_flags
+follows the
+.BR futex2 (2)
+flags, not the
+.BR futex (2)
+v1 interface flags.
+
+.I flags
+are currently unused and hence
+.B 0
+must be passed.
+
+This function prepares an async
+.BR futex (2)
+wait request. See that man page for details. Note that the io_uring futex
+wait request is similar to the
+.B FUTEX_WAIT_BITSET
+operation, as
+.B FUTEX_WAIT
+is a strict subset of that.
+
+Available since kernel 6.7.
+
+.SH RETURN VALUE
+None
+.SH ERRORS
+The CQE
+.I res
+field will contain the result of the operation. See the related man page for
+details on possible values. Note that where synchronous system calls will return
+.B -1
+on failure and set
+.I errno
+to the actual error value, io_uring never uses
+.IR errno .
+Instead it returns the negated
+.I errno
+directly in the CQE
+.I res
+field.
+.SH NOTES
+Unlike the sync futex syscalls that wait on a futex, io_uring does not support
+passing in a timeout for the request. Instead, applications are encouraged
+to use a linked timeout to abort the futex request at a given time, if desired.
+.SH SEE ALSO
+.BR io_uring_get_sqe (3),
+.BR io_uring_submit (3),
+.BR io_uring_prep_futex_waitv (3),
+.BR io_uring_prep_futex_wake (3),
+.BR io_uring_prep_link_timeout (3),
+.BR futex (2)
+.BR futex2 (2)

diff --git a/man/io_uring_prep_futex_waitv.3 b/man/io_uring_prep_futex_waitv.3
new file mode 100644
index 0000000..c4baf5a
--- /dev/null
+++ b/man/io_uring_prep_futex_waitv.3

@@ -0,0 +1,78 @@
+.\" Copyright (C) 2022 Jens Axboe <[email protected]>
+.\"
+.\" SPDX-License-Identifier: LGPL-2.0-or-later
+.\"
+.TH io_uring_prep_futex_waitv 3 "Sep 29, 2023" "liburing-2.5" "liburing Manual"
+.SH NAME
+io_uring_prep_futex_waitv \- prepare a futex waitv request
+.SH SYNOPSIS
+.nf
+.B #include <linux/futex.h>
+.B #include <unistd.h>
+.B #include <liburing.h>
+.PP
+.BI "void io_uring_prep_futex_waitv(struct io_uring_sqe *" sqe ","
+.BI "                               struct futex_waitv *" futexv ","
+.BI "                               uint32_t " nr_futex ","
+.BI "                               unsigned int " flags ");"
+.fi
+.SH DESCRIPTION
+.PP
+The
+.BR io_uring_prep_futex_waitv (3)
+function prepares a futex wait request for multiple futexes at the same time.
+The submission queue entry
+.I sqe
+is setup for waiting on all futexes given by
+.I futexv
+and
+.I nr_futex
+is the number of futexes in that array.
+.I flags
+must be set to the io_uring specific futex flags.
+
+Unlike
+.BR io_uring_prep_futex_wait (3),
+the desired bitset mask and values are passed in
+.IR futexv .
+
+.I flags
+are currently unused and hence
+.B 0
+must be passed.
+
+This function prepares an async
+.BR futex (2)
+waitv request. See that man page for details.
+
+Available since kernel 6.7.
+
+.SH RETURN VALUE
+None
+.SH ERRORS
+The CQE
+.I res
+field will contain the result of the operation. See the related man page for
+details on possible values. Note that where synchronous system calls will return
+.B -1
+on failure and set
+.I errno
+to the actual error value, io_uring never uses
+.IR errno .
+Instead it returns the negated
+.I errno
+directly in the CQE
+.I res
+field.
+.SH NOTES
+Unlike the sync futex syscalls that wait on a futex, io_uring does not support
+passing in a timeout for the request. Instead, applications are encouraged
+to use a linked timeout to abort the futex request at a given time, if desired.
+.SH SEE ALSO
+.BR io_uring_get_sqe (3),
+.BR io_uring_submit (3),
+.BR io_uring_prep_futex_wait (3),
+.BR io_uring_prep_futex_wake (3),
+.BR io_uring_prep_link_timeout (3),
+.BR futex (2)
+.BR futex2 (2)

diff --git a/man/io_uring_prep_futex_wake.3 b/man/io_uring_prep_futex_wake.3
new file mode 100644
index 0000000..1f52b11
--- /dev/null
+++ b/man/io_uring_prep_futex_wake.3

@@ -0,0 +1,86 @@
+.\" Copyright (C) 2022 Jens Axboe <[email protected]>
+.\"
+.\" SPDX-License-Identifier: LGPL-2.0-or-later
+.\"
+.TH io_uring_prep_futex_wake 3 "Sep 29, 2023" "liburing-2.5" "liburing Manual"
+.SH NAME
+io_uring_prep_futex_wake \- prepare a futex wake request
+.SH SYNOPSIS
+.nf
+.B #include <linux/futex.h>
+.B #include <unistd.h>
+.B #include <liburing.h>
+.PP
+.BI "void io_uring_prep_futex_wake(struct io_uring_sqe *" sqe ","
+.BI "                              uint32_t *" futex ","
+.BI "                              uint64_t " val ","
+.BI "                              uint64_t " mask ","
+.BI "                              uint32_t " futex_flags ","
+.BI "                              unsigned int " flags ");"
+.fi
+.SH DESCRIPTION
+.PP
+The
+.BR io_uring_prep_futex_wake (3)
+function prepares a futex wake request. The submission queue entry
+.I sqe
+is setup for waking any waiters on the futex indicated by
+.I futex
+and at most
+.I val
+futexes.
+.I futex_flags
+indicates the
+.BR futex2 (2)
+modifier flags, and io_uring futex flags of
+.I flags .
+
+If a given bitset for who to wake is desired, then that must be set in
+.I mask .
+Use
+.B FUTEX_BITSET_MATCH_ANY
+to match any waiter on the given futex.
+
+.I flags
+are currently unused and hence
+.B 0
+must be passed.
+
+This function prepares an async
+.BR futex (2)
+wake request. See that man page for details. Note that the io_uring futex
+wake request is similar to the
+.B FUTEX_WAKE_BITSET
+operation, as
+.B FUTEX_WAKE
+is a strict subset of that.
+
+Available since kernel 6.7.
+
+.SH RETURN VALUE
+None
+.SH ERRORS
+The CQE
+.I res
+field will contain the result of the operation. On success, the value will be
+the index into
+.I futexv
+which received a wakeup. See the related man page for details on possible
+values for errors. Note that where synchronous system calls will return
+.B -1
+on failure and set
+.I errno
+to the actual error value, io_uring never uses
+.IR errno .
+Instead it returns the negated
+.I errno
+directly in the CQE
+.I res
+field.
+.SH SEE ALSO
+.BR io_uring_get_sqe (3),
+.BR io_uring_submit (3),
+.BR io_uring_prep_futex_wait (3),
+.BR io_uring_prep_futex_waitv (3),
+.BR futex (2)
+.BR futex2 (2)

diff --git a/man/io_uring_prep_getxattr.3 b/man/io_uring_prep_getxattr.3
new file mode 100644
index 0000000..6127405
--- /dev/null
+++ b/man/io_uring_prep_getxattr.3

@@ -0,0 +1,61 @@
+.\" Copyright (C) 2023 Rutvik Patel <[email protected]>
+.\"
+.\" SPDX-License-Identifier: LGPL-2.0-or-later
+.\"
+.TH io_uring_prep_getxattr 3 "January 23, 2023" "liburing-2.4" "liburing Manual"
+.SH NAME
+io_uring_prep_getxattr, io_uring_prep_fgetxattr \- prepare a request to get an 
+extended attribute value
+.SH SYNOPSIS
+.nf
+.B #include <liburing.h>
+.PP
+.BI "void io_uring_prep_getxattr(struct io_uring_sqe *" sqe ","
+.BI "                            const char *" name ","
+.BI "                            char *" value ","
+.BI "                            const char *" path ","
+.BI "                            unsigned int " len ");"
+.PP
+.BI "void io_uring_prep_fgetxattr(struct io_uring_sqe *" sqe ","
+.BI "                             int " fd ","
+.BI "                             const char *" name ","
+.BI "                             char *" value ","
+.BI "                             unsigned int " len ");"
+.fi
+.SH DESCRIPTION
+.PP
+The
+.BR io_uring_prep_getxattr (3)
+function prepares a request to get an extended attribute value. The submission 
+queue entry
+.I sqe
+is setup to get the
+.I value
+of the extended attribute identified by
+.I name
+and associated with the given
+.I path
+in the filesystem.
+The
+.I len
+argument specifies the size (in bytes) of
+.IR value .
+
+.BR io_uring_prep_fgetxattr (3)
+is identical to 
+.BR io_uring_prep_getxattr (3),
+only the open file referred to by
+.I fd
+is interrogated in place of
+.IR path .
+
+This function prepares an async 
+.BR getxattr (2)
+request. See that man page for details.
+
+.SH RETURN VALUE
+None
+
+.SH SEE ALSO
+.BR io_uring_get_sqe (3),
+.BR getxattr (2)

diff --git a/man/io_uring_prep_link_timeout.3 b/man/io_uring_prep_link_timeout.3
new file mode 100644
index 0000000..a4c6c08
--- /dev/null
+++ b/man/io_uring_prep_link_timeout.3

@@ -0,0 +1,98 @@
+.\" Copyright (C) 2023 Rutvik Patel <[email protected]>
+.\"
+.\" SPDX-License-Identifier: LGPL-2.0-or-later
+.\"
+.TH io_uring_prep_link_timeout 3 "January 23, 2023" "liburing-2.4" "liburing Manual"
+.SH NAME
+io_uring_prep_link_timeout \- a timeout request for linked sqes
+.SH SYNOPSIS
+.nf
+.B #include <liburing.h>
+.PP
+.BI "void io_uring_prep_link_timeout(struct io_uring_sqe *" sqe ","
+.BI "                                struct __kernel_timespec *" ts ","
+.BI "                                unsigned " flags ");"
+.fi
+.SH DESCRIPTION
+.PP
+The
+.BR io_uring_prep_link_timeout (3)
+function prepares a timeout request for linked sqes. The submission queue entry
+.I sqe
+is setup for with timeout specified by
+.IR ts .
+The flags argument holds modifier
+.I flags
+for the timeout behaviour of the request.
+
+The
+.I ts
+argument must be filled in with the appropriate information for the timeout. It
+looks as follows:
+.PP
+.in +4n
+.EX
+struct __kernel_timespec {
+    __kernel_time64_t tv_sec;
+    long long tv_nsec;
+};
+.EE
+.in
+.PP
+
+The
+.I flags
+argument may contain:
+.TP
+.B IORING_TIMEOUT_ABS
+The value specified in
+.I ts
+is an absolute value rather than a relative one.
+.TP
+.B IORING_TIMEOUT_BOOTTIME
+The boottime clock source should be used.
+.TP
+.B IORING_TIMEOUT_REALTIME
+The realtime clock source should be used.
+.TP
+.B IORING_TIMEOUT_ETIME_SUCCESS
+Consider an expired timeout a success in terms of the posted completion.
+.PP
+
+It is invalid to create a chain (linked sqes) consisting only of a link timeout
+request. If all the requests in the chain are completed before timeout, then the
+link timeout request gets canceled. Upon timeout, all the uncompleted requests
+in the chain get canceled.
+
+.SH RETURN VALUE
+None
+
+.SH ERRORS
+.PP
+These are the errors that are reported in the CQE
+.I res
+field. On success,
+.B 0
+is returned.
+.TP
+.B -ETIME
+The specified timeout occurred and triggered the completion event.
+.TP
+.B -EINVAL
+One of the fields set in the SQE was invalid. For example, two clock sources
+where given, or the specified timeout seconds or nanoseconds where < 0.
+.TP
+.B -EFAULT
+io_uring was unable to access the data specified by ts.
+.TP
+.B -ECANCELED
+The timeout was canceled because all submitted requests were completed successfully
+or one of the requests resulted in failure.
+.TP
+.B -ENOENT
+The request to which the linked timeout was linked already completed and could
+not be found when the timer expired.
+
+.SH SEE ALSO
+.BR io_uring_get_sqe (3),
+.BR io_uring_prep_timeout (3)

diff --git a/man/io_uring_prep_listen.3 b/man/io_uring_prep_listen.3
new file mode 100644
index 0000000..b765298
--- /dev/null
+++ b/man/io_uring_prep_listen.3

@@ -0,0 +1,52 @@
+.\" Copyright (C) 2024 SUSE LLC.
+.\"
+.\" SPDX-License-Identifier: LGPL-2.0-or-later
+.\"
+.TH io_uring_prep_listen 3 "Jun 3, 2024" "liburing-2.7" "liburing Manual"
+.SH NAME
+io_uring_prep_listen \- prepare a listen request
+.SH SYNOPSIS
+.nf
+.B #include <sys/socket.h>
+.B #include <liburing.h>
+.PP
+.BI "void io_uring_prep_listen(struct io_uring_sqe *" sqe ","
+.BI "                          int " sockfd ","
+.BI "                          int" backlog ");"
+.fi
+.SH DESCRIPTION
+The
+.BR io_uring_prep_listen (3)
+function prepares a listen request. The submission queue entry
+.I sqe
+is setup to place the socket file descriptor pointed by
+.IR sockfd
+into a state to accept incoming connections.  The parameter
+.IR backlog ,
+defines the maximum length of the queue of pending connections.
+
+This function prepares an async
+.BR listen (2)
+request. See that man page for details.
+
+.SH RETURN VALUE
+None
+.SH ERRORS
+The CQE
+.I res
+field will contain the result of the operation. See the related man page for
+details on possible values. Note that where synchronous system calls will return
+.B -1
+on failure and set
+.I errno
+to the actual error value, io_uring never uses
+.IR errno .
+Instead it returns the negated
+.I errno
+directly in the CQE
+.I res
+field.
+.SH SEE ALSO
+.BR io_uring_get_sqe (3),
+.BR io_uring_submit (3),
+.BR listen (2)

diff --git a/man/io_uring_prep_madvise.3 b/man/io_uring_prep_madvise.3
index 6c5f16b..664a020 100644
--- a/man/io_uring_prep_madvise.3
+++ b/man/io_uring_prep_madvise.3

@@ -12,8 +12,13 @@
 .PP
 .BI "void io_uring_prep_madvise(struct io_uring_sqe *" sqe ","
 .BI "                           void *" addr ","
-.BI "                           off_t " len ","
+.BI "                           __u32 " len ","
 .BI "                           int " advice ");"
+.BI "
+.BI "void io_uring_prep_madvise64(struct io_uring_sqe *" sqe ","
+.BI "                             void *" addr ","
+.BI "                             __u64 " len ","
+.BI "                             int " advice ");"
 .fi
 .SH DESCRIPTION
 .PP
@@ -28,6 +33,17 @@
 length in bytes, giving it the advise located in
 .IR advice .
 
+The
+.BR io_uring_prep_madvise64 (3)
+function works like
+.BR io_uring_prep_madvise (3)
+except that it takes a 64-bit length rather than just a 32-bit one. Older
+kernels may not support the 64-bit length variant. If this variant is attempted
+used on a kernel that doesn't support 64-bit lengths, then the request will get
+errored with
+.B -EINVAL
+in the results field of the CQE.
+
 This function prepares an async
 .BR madvise (2)
 request. See that man page for details.

diff --git a/man/io_uring_prep_madvise64.3 b/man/io_uring_prep_madvise64.3
new file mode 120000
index 0000000..1a368ee
--- /dev/null
+++ b/man/io_uring_prep_madvise64.3

@@ -0,0 +1 @@
+io_uring_prep_madvise.3
\ No newline at end of file

diff --git a/man/io_uring_prep_msg_ring.3 b/man/io_uring_prep_msg_ring.3
index 9cf3444..70caebe 100644
--- a/man/io_uring_prep_msg_ring.3
+++ b/man/io_uring_prep_msg_ring.3

@@ -14,11 +14,18 @@
 .BI "                        unsigned int " len ","
 .BI "                        __u64 " data ","
 .BI "                        unsigned int " flags ");"
+.PP
+.BI "void io_uring_prep_msg_ring_cqe_flags(struct io_uring_sqe *" sqe ","
+.BI "                        int " fd ","
+.BI "                        unsigned int " len ","
+.BI "                        __u64 " data ","
+.BI "                        unsigned int " flags ","
+.BI "                        unsigned int " cqe_flags ");"
 .fi
 .SH DESCRIPTION
 .PP
 .BR io_uring_prep_msg_ring (3)
-prepares a to send a CQE to an io_uring file descriptor. The submission queue
+prepares to send a CQE to an io_uring file descriptor. The submission queue
 entry
 .I sqe
 is setup to use the file descriptor
@@ -47,6 +54,18 @@
 on the targeted ring, or it can be used to pass messages between the two
 rings.
 
+.BR io_uring_prep_msg_ring_cqe_flags (3)
+is similar to
+.BR io_uring_prep_msg_ring (3) .
+But has an addition
+.I cqe_flags
+parameter, which is used to set
+.I flags
+field on CQE side. That way, you can set the CQE flags field
+.I cqe->flags
+when sending a message. Be aware that io_uring could potentially set additional
+bits into this field.
+
 .SH RETURN VALUE
 None
 
@@ -64,7 +83,8 @@
 .B -EBADFD
 The descriptor passed in
 .I fd
-does not refer to an io_uring file descriptor.
+does not refer to an io_uring file descriptor, or the ring is in a disabled
+state.
 .TP
 .B -EOVERFLOW
 The kernel was unable to fill a CQE on the target ring. This can happen if

diff --git a/man/io_uring_prep_msg_ring_cqe_flags.3 b/man/io_uring_prep_msg_ring_cqe_flags.3
new file mode 120000
index 0000000..c96663b
--- /dev/null
+++ b/man/io_uring_prep_msg_ring_cqe_flags.3

@@ -0,0 +1 @@
+io_uring_prep_msg_ring.3
\ No newline at end of file

diff --git a/man/io_uring_prep_msg_ring_fd.3 b/man/io_uring_prep_msg_ring_fd.3
new file mode 100644
index 0000000..c3a32fd
--- /dev/null
+++ b/man/io_uring_prep_msg_ring_fd.3

@@ -0,0 +1,83 @@
+.\" Copyright (C) 2022 Jens Axboe <[email protected]>
+.\"
+.\" SPDX-License-Identifier: LGPL-2.0-or-later
+.\"
+.TH io_uring_prep_msg_ring 3 "Mar 16, 2023" "liburing-2.4" "liburing Manual"
+.SH NAME
+io_uring_prep_msg_ring_fd \- send a direct descriptor to another ring
+.SH SYNOPSIS
+.nf
+.B #include <liburing.h>
+.PP
+.BI "void io_uring_prep_msg_ring_fd(struct io_uring_sqe *" sqe ","
+.BI "                        int " fd ","
+.BI "                        int " source_fd ","
+.BI "                        int " target_fd ","
+.BI "                        __u64 " data ","
+.BI "                        unsigned int " flags ");"
+.PP
+.BI "void io_uring_prep_msg_ring_fd_alloc(struct io_uring_sqe *" sqe ","
+.BI "                        int " fd ","
+.BI "                        int " source_fd ","
+.BI "                        __u64 " data ","
+.BI "                        unsigned int " flags ");"
+.fi
+.SH DESCRIPTION
+.PP
+.BR io_uring_prep_msg_ring_fd (3)
+prepares an SQE to send a direct file descriptor to another ring. The submission
+queue entry
+.I sqe
+is setup to use the file descriptor
+.IR fd ,
+which must identify a target io_uring context, to send the locally registered
+file descriptor with value
+.I source_fd
+to the destination ring into index
+.I target_fd
+and passing
+.I data
+as the user data in the target CQE with the request modifier flags set by
+.IR flags .
+Currently there are no valid flag modifiers, this field must contain
+.BR 0 .
+
+.BR io_uring_prep_msg_ring_fd_alloc (3)
+is similar to
+.BR io_uring_prep_msg_ring_fd (3) ,
+but doesn't specify a target index for the direct descriptor. Instead, this
+index is allocated in the target ring and returned in the CQE
+.IR res
+field.
+
+.SH RETURN VALUE
+None
+
+.SH ERRORS
+These are the errors that are reported in the CQE
+.I res
+field.
+.TP
+.B -ENOMEM
+The kernel was unable to allocate memory for the request.
+.TP
+.B -EINVAL
+One of the fields set in the SQE was invalid.
+.TP
+.B -EINVAL
+Target ring is identical to the source ring.
+.TP
+.B -EBADFD
+The descriptor passed in
+.I fd
+does not refer to an io_uring file descriptor, or the ring is in a disabled
+state.
+.TP
+.B -EOVERFLOW
+The kernel was unable to fill a CQE on the target ring. This can happen if
+the target CQ ring is in an overflow state and the kernel wasn't able to
+allocate memory for a new CQE entry.
+.TP
+.B -ENFILE
+The direct descriptor table in the target ring was full, no new descriptors
+could be successfully allocated.

diff --git a/man/io_uring_prep_msg_ring_fd_alloc.3 b/man/io_uring_prep_msg_ring_fd_alloc.3
new file mode 120000
index 0000000..a3a7731
--- /dev/null
+++ b/man/io_uring_prep_msg_ring_fd_alloc.3

@@ -0,0 +1 @@
+io_uring_prep_msg_ring_fd.3
\ No newline at end of file

diff --git a/man/io_uring_prep_nop.3 b/man/io_uring_prep_nop.3
new file mode 100644
index 0000000..81853d7
--- /dev/null
+++ b/man/io_uring_prep_nop.3

@@ -0,0 +1,28 @@
+.\" Copyright (C) 2022 Samuel Williams
+.\"
+.\" SPDX-License-Identifier: LGPL-2.0-or-later
+.\"
+.TH io_uring_prep_nop 3 "October 20, 2022" "liburing-2.2" "liburing Manual"
+.SH NAME
+io_uring_prep_nop \- prepare a nop request
+.SH SYNOPSIS
+.nf
+.B #include <liburing.h>
+.PP
+.BI "void io_uring_prep_nop(struct io_uring_sqe *" sqe ");"
+.fi
+.SH DESCRIPTION
+.PP
+The
+.BR io_uring_prep_nop (3)
+function prepares nop (no operation) request. The submission queue entry
+.I sqe
+does not require any additional setup.
+
+.SH RETURN VALUE
+None
+.SH ERRORS
+None
+.SH SEE ALSO
+.BR io_uring_get_sqe (3),
+.BR io_uring_submit (3),

diff --git a/man/io_uring_prep_open.3 b/man/io_uring_prep_open.3
new file mode 120000
index 0000000..67f501e
--- /dev/null
+++ b/man/io_uring_prep_open.3

@@ -0,0 +1 @@
+io_uring_prep_openat.3
\ No newline at end of file

diff --git a/man/io_uring_prep_open_direct.3 b/man/io_uring_prep_open_direct.3
new file mode 120000
index 0000000..67f501e
--- /dev/null
+++ b/man/io_uring_prep_open_direct.3

@@ -0,0 +1 @@
+io_uring_prep_openat.3
\ No newline at end of file

diff --git a/man/io_uring_prep_openat.3 b/man/io_uring_prep_openat.3
index e8b4217..071a9f8 100644
--- a/man/io_uring_prep_openat.3
+++ b/man/io_uring_prep_openat.3

@@ -12,6 +12,17 @@
 .B #include <fcntl.h>
 .B #include <liburing.h>
 .PP
+.BI "void io_uring_prep_open(struct io_uring_sqe *" sqe ","
+.BI "                          const char *" path ","
+.BI "                          int " flags ","
+.BI "                          mode_t " mode ");"
+.PP
+.BI "void io_uring_prep_open_direct(struct io_uring_sqe *" sqe ","
+.BI "                                 const char *" path ","
+.BI "                                 int " flags ","
+.BI "                                 mode_t " mode ","
+.BI "                                 unsigned " file_index ");"
+.PP
 .BI "void io_uring_prep_openat(struct io_uring_sqe *" sqe ","
 .BI "                          int " dfd ","
 .BI "                          const char *" path ","
@@ -39,33 +50,21 @@
 .I flags
 and using the file mode bits specified in
 .IR mode .
-
-For a direct descriptor open request, the offset is specified by the
-.I file_index
-argument. Direct descriptors are io_uring private file descriptors. They
-avoid some of the overhead associated with thread shared file tables, and
-can be used in any io_uring request that takes a file descriptor. To do so,
-.B IOSQE_FIXED_FILE
-must be set in the SQE
-.I flags
-member, and the SQE
-.I fd
-field should use the direct descriptor value rather than the regular file
-descriptor. Direct descriptors are managed like registered files.
+Similarly
+.BR io_uring_prep_open (3)
+prepares an open request.
 
 If the direct variant is used, the application must first have registered
 a file table using
 .BR io_uring_register_files (3)
 of the appropriate size. Once registered, a direct accept request may use any
-entry in that table, as long as it is within the size of the registered table.
+entry in that table and is specified in
+.I file_index
+, as long as it is within the size of the registered table.
 If a specified entry already contains a file, the file will first be removed
 from the table and closed. It's consistent with the behavior of updating an
 existing file with
 .BR io_uring_register_files_update (3).
-Note that old kernels don't check the SQE
-.I file_index
-field, which is not a problem for liburing helpers, but users of the raw
-io_uring interface need to zero SQEs to avoid unexpected behavior.
 
 If
 .B IORING_FILE_INDEX_ALLOC
@@ -79,8 +78,30 @@
 .B -ENFILE
 is returned instead.
 
+Direct descriptors are io_uring private file descriptors. They
+avoid some of the overhead associated with thread shared file tables, and
+can be used in any subsequent io_uring request that takes a file descriptor. To do so,
+.B IOSQE_FIXED_FILE
+must be set in the SQE
+.I flags
+member, and the SQE
+.I fd
+field should use the direct descriptor value rather than the regular file
+descriptor. Direct descriptors are managed like registered files.
+
+The directory file descriptor
+.I dfd
+is always a regular file descriptor.
+
+Note that old kernels don't check the SQE
+.I file_index
+field, which is not a problem for liburing helpers, but users of the raw
+io_uring interface need to zero SQEs to avoid unexpected behavior.
+
 These functions prepare an async
 .BR openat (2)
+or
+.BR open (2)
 request. See that man page for details.
 
 .SH RETURN VALUE

diff --git a/man/io_uring_prep_openat2.3 b/man/io_uring_prep_openat2.3
index 338cf7e..109c381 100644
--- a/man/io_uring_prep_openat2.3
+++ b/man/io_uring_prep_openat2.3

@@ -41,32 +41,18 @@
 and using the instructions on how to open the file given in
 .IR how .
 
-For a direct descriptor open request, the offset is specified by the
-.I file_index
-argument. Direct descriptors are io_uring private file descriptors. They
-avoid some of the overhead associated with thread shared file tables, and
-can be used in any io_uring request that takes a file descriptor. To do so,
-.B IOSQE_FIXED_FILE
-must be set in the SQE
-.I flags
-member, and the SQE
-.I fd
-field should use the direct descriptor value rather than the regular file
-descriptor. Direct descriptors are managed like registered files.
-
 If the direct variant is used, the application must first have registered
 a file table using
 .BR io_uring_register_files (3)
-of the appropriate size. Once registered, a direct accept request may use any
-entry in that table, as long as it is within the size of the registered table.
-If a specified entry already contains a file, the file will first be removed
+of the appropriate size. Once registered, a direct request may use any
+entry in that table and is specified in
+.I file_index
+, as long as it is within the size of the registered table.
+If the specified entry already contains a file, the file will first be removed
 from the table and closed. It's consistent with the behavior of updating an
 existing file with
 .BR io_uring_register_files_update (3).
-Note that old kernels don't check the SQE
-.I file_index
-field, which is not a problem for liburing helpers, but users of the raw
-io_uring interface need to zero SQEs to avoid unexpected behavior.
+
 If
 .B IORING_FILE_INDEX_ALLOC
 is used as the
@@ -79,6 +65,26 @@
 .B -ENFILE
 is returned instead.
 
+Direct descriptors are io_uring private file descriptors. They
+avoid some of the overhead associated with thread shared file tables, and
+can be used in any subsequent io_uring request that takes a file descriptor. To do so,
+.B IOSQE_FIXED_FILE
+must be set in the SQE
+.I flags
+member, and the SQE
+.I fd
+field should use the direct descriptor value rather than the regular file
+descriptor. Direct descriptors are managed like registered files.
+
+The directory file descriptor
+.I dfd
+is always a regular file descriptor.
+
+Note that old kernels don't check the SQE
+.I file_index
+field, which is not a problem for liburing helpers, but users of the raw
+io_uring interface need to zero SQEs to avoid unexpected behavior.
+
 These functions prepare an async
 .BR openat2 (2)
 request. See that man page for details.

diff --git a/man/io_uring_prep_poll_add.3 b/man/io_uring_prep_poll_add.3
index cb60878..82539b9 100644
--- a/man/io_uring_prep_poll_add.3
+++ b/man/io_uring_prep_poll_add.3

@@ -33,8 +33,8 @@
 The default behavior is a single-shot poll request. When the specified event
 has triggered, a completion CQE is posted and no more events will be generated
 by the poll request.
-.BR io_uring_prep_multishot (3)
-behaves identically in terms of events, but it persist across notifications
+.BR io_uring_prep_poll_multishot (3)
+behaves identically in terms of events, but it persists across notifications
 and will repeatedly post notifications for the same registration. A CQE
 posted from a multishot poll request will have
 .B IORING_CQE_F_MORE

diff --git a/man/io_uring_prep_poll_update.3 b/man/io_uring_prep_poll_update.3
index 11f6346..41cf776 100644
--- a/man/io_uring_prep_poll_update.3
+++ b/man/io_uring_prep_poll_update.3

@@ -40,7 +40,19 @@
 If set, the poll update request will replace the existing events being waited
 for with the ones specified in the
 .I poll_mask
-argument to the function.
+argument to the function. Note that only the lower 16 bits of events can
+be updated. This includes things like
+.B EPOLLIN
+and
+.B EPOLLOUT .
+Higher order masks/settings are included as internal state, and cannot be
+modified. That includes settings like
+.B EPOLLONESHOT ,
+.B EPOLLEXCLUSIVE ,
+and
+.B EPOLLET .
+If an application wishes to modify these, it must cancel/remove the existing
+poll request and arm a new one.
 .TP
 .B IORING_POLL_UPDATE_USER_DATA
 If set, the poll update request will update the existing user_data of the

diff --git a/man/io_uring_prep_provide_buffers.3 b/man/io_uring_prep_provide_buffers.3
index f3dded9..ae51c1d 100644
--- a/man/io_uring_prep_provide_buffers.3
+++ b/man/io_uring_prep_provide_buffers.3

@@ -24,8 +24,10 @@
 submission queue entry
 .I sqe
 is setup to consume
+.I nr
+number of
 .I len
-number of buffers starting at
+sized buffers starting at
 .I addr
 and identified by the buffer group ID of
 .I bgid
@@ -68,6 +70,18 @@
 The application must be aware of this to make sense of the buffer ID passed
 back in the CQE.
 
+Buffer IDs always range from
+.B 0
+to
+.B 65535 ,
+as there are only 16-bits available in the CQE to pass them back. This range
+is independent of how the buffer group initially got created. Attempting to
+add buffer IDs larger than that, or buffer IDs that will wrap when cast to
+a 16-bit value, will cause the request to fail with
+.B -E2BIG
+or
+.B -EINVAL .
+
 Not all requests support buffer selection, as it only really makes sense for
 requests that receive data from the kernel rather than write or provide data.
 Currently, this mode of operation is supported for any file read or socket
@@ -88,8 +102,9 @@
 .I res
 field. On success,
 .I res
-will contain the number of successfully provided buffers. On error,
-the following errors can occur.
+will contain
+.B 0
+or the number of successfully provided buffers.
 .TP
 .B -ENOMEM
 The kernel was unable to allocate memory for the request.
@@ -107,12 +122,6 @@
 .B -EFAULT
 Some of the user memory given was invalid for the application.
 .TP
-.B -EBADF
-On of the descriptors located in
-.I fds
-didn't refer to a valid file descriptor, or one of the file descriptors in
-the array referred to an io_uring instance.
-.TP
 .B -EOVERFLOW
 The product of
 .I len

diff --git a/man/io_uring_prep_read.3 b/man/io_uring_prep_read.3
index faec35f..a763608 100644
--- a/man/io_uring_prep_read.3
+++ b/man/io_uring_prep_read.3

@@ -40,7 +40,7 @@
 to the file is serialized. It is not encouraged to use this feature, if it's
 possible to provide the desired IO offset from the application or library.
 
-On files that are not capable of seeking, the offset is ignored.
+On files that are not capable of seeking, the offset must be 0 or -1.
 
 After the read has been prepared it can be submitted with one of the submit
 functions.

diff --git a/man/io_uring_prep_read_fixed.3 b/man/io_uring_prep_read_fixed.3
index d3726f2..523685d 100644
--- a/man/io_uring_prep_read_fixed.3
+++ b/man/io_uring_prep_read_fixed.3

@@ -42,7 +42,7 @@
 .I buf
 and
 .I nbytes
-arguments must fall within a region specificed by
+arguments must fall within a region specified by
 .I buf_index
 in the previously registered buffer. The buffer need not be aligned with
 the start of the registered buffer.

diff --git a/man/io_uring_prep_read_multishot.3 b/man/io_uring_prep_read_multishot.3
new file mode 100644
index 0000000..2995357
--- /dev/null
+++ b/man/io_uring_prep_read_multishot.3

@@ -0,0 +1,100 @@
+.\" Copyright (C) 2023 Jens Axboe <[email protected]>
+.\"
+.\" SPDX-License-Identifier: LGPL-2.0-or-later
+.\"
+.TH io_uring_prep_read_multishot 3 "September 12, 2023" "liburing-2.5" "liburing Manual"
+.SH NAME
+io_uring_prep_read_multishot \- prepare I/O read multishot request
+.SH SYNOPSIS
+.nf
+.B #include <liburing.h>
+.PP
+.BI "void io_uring_prep_read_multishot(struct io_uring_sqe *" sqe ","
+.BI "                                  int " fd ","
+.BI "                                  unsigned " nbytes ","
+.BI "                                  __u64 " offset ","
+.BI "                                  int " buf_group ");"
+.fi
+.SH DESCRIPTION
+.PP
+The
+.BR io_uring_prep_read_multishot (3)
+helper prepares an IO read multishot request. The submission queue entry
+.I sqe
+is setup to use the file descriptor
+.I fd
+to start reading
+into a buffer from the provided buffer group with ID
+.I buf_group
+at the specified
+.IR offset .
+
+.I nbytes
+must be set to zero, as the size read will be given by the size of the
+buffers in the indicated buffer group IO.
+
+On files that are not capable of seeking, the offset must be 0 or -1.
+
+If
+.I nbytes
+exceeds the size of the buffers in the specified buffer group, or if
+.I nbytes
+is
+.B 0 ,
+then the size of the buffer in that group will be used for the transfer.
+
+A multishot read request will repeatedly trigger a completion event
+whenever data is available to read from the file. Because of that,
+this type of request can only be used with a file type that is pollable.
+Examples of that include pipes, tun devices, etc. If used with a regular
+file, or a wrong file type in general, the request will fail with
+.B -EBADFD
+in the CQE
+.I res
+field.
+
+Since multishot requests repeatedly trigger completion events as data
+arrives, it must be used with provided buffers. With provided buffers, the
+application provides buffers to io_uring upfront, and then the kernel picks
+a buffer from the specified group in
+.I buf_group
+when the request is ready to transfer data.
+
+A multishot request will persist as long as no errors are encountered
+doing handling of the request. For each CQE posted on behalf of this request,
+the CQE
+.I flags
+will have
+.B IORING_CQE_F_MORE
+set if the application should expect more completions from this request.
+If this flag isn't set, then that signifies termination of the multishot
+read request.
+
+After the read has been prepared it can be submitted with one of the submit
+functions.
+
+Available since 6.7.
+
+.SH RETURN VALUE
+None
+.SH ERRORS
+The CQE
+.I res
+field will contain the result of the operation. See the related man page for
+details on possible values. Note that where synchronous system calls will return
+.B -1
+on failure and set
+.I errno
+to the actual error value, io_uring never uses
+.IR errno .
+Instead it returns the negated
+.I errno
+directly in the CQE
+.I res
+field.
+.SH SEE ALSO
+.BR io_uring_get_sqe (3),
+.BR io_uring_prep_read (3),
+.BR io_uring_buf_ring_init (3)
+.BR io_uring_buf_ring_add (3),
+.BR io_uring_submit (3)

diff --git a/man/io_uring_prep_readv.3 b/man/io_uring_prep_readv.3
index ea7afd5..81d8fad 100644
--- a/man/io_uring_prep_readv.3
+++ b/man/io_uring_prep_readv.3

@@ -41,9 +41,9 @@
 to the file is serialized. It is not encouraged to use this feature, if it's
 possible to provide the desired IO offset from the application or library.
 
-On files that are not capable of seeking, the offset is ignored.
+On files that are not capable of seeking, the offset must be 0 or -1.
 
-After the write has been prepared it can be submitted with one of the submit
+After the read has been prepared it can be submitted with one of the submit
 functions.
 
 .SH RETURN VALUE
@@ -64,8 +64,8 @@
 .I res
 field.
 .SH NOTES
-Unless an application explicitly needs to pass in more than iovec, it is more
-efficient to use
+Unless an application explicitly needs to pass in more than one iovec, it
+is more efficient to use
 .BR io_uring_prep_read (3)
 rather than this function, as no state has to be maintained for a
 non-vectored IO request.

diff --git a/man/io_uring_prep_readv2.3 b/man/io_uring_prep_readv2.3
index 171a699..842b787 100644
--- a/man/io_uring_prep_readv2.3
+++ b/man/io_uring_prep_readv2.3

@@ -67,9 +67,9 @@
 to the file is serialized. It is not encouraged to use this feature, if it's
 possible to provide the desired IO offset from the application or library.
 
-On files that are not capable of seeking, the offset is ignored.
+On files that are not capable of seeking, the offset must be 0 or -1.
 
-After the write has been prepared, it can be submitted with one of the submit
+After the read has been prepared, it can be submitted with one of the submit
 functions.
 
 .SH RETURN VALUE
@@ -90,8 +90,8 @@
 .I res
 field.
 .SH NOTES
-Unless an application explicitly needs to pass in more than iovec, it is more
-efficient to use
+Unless an application explicitly needs to pass in more than one iovec, it
+is more efficient to use
 .BR io_uring_prep_read (3)
 rather than this function, as no state has to be maintained for a
 non-vectored IO request.

diff --git a/man/io_uring_prep_recv.3 b/man/io_uring_prep_recv.3
index 993e331..0fd74b2 100644
--- a/man/io_uring_prep_recv.3
+++ b/man/io_uring_prep_recv.3

@@ -14,6 +14,12 @@
 .BI "                        void *" buf ","
 .BI "                        size_t " len ","
 .BI "                        int " flags ");"
+.PP
+.BI "void io_uring_prep_recv_multishot(struct io_uring_sqe *" sqe ","
+.BI "                                  int " sockfd ","
+.BI "                                  void *" buf ","
+.BI "                                  size_t " len ","
+.BI "                                  int " flags ");"
 .fi
 .SH DESCRIPTION
 .PP
@@ -24,10 +30,10 @@
 .I sqe
 is setup to use the file descriptor
 .I sockfd
-to start receiving the data into the buffer destination
+to start receiving the data into the destination buffer
 .I buf
 of size
-.I size
+.I len
 and with modifier flags
 .IR flags .
 
@@ -36,9 +42,25 @@
 request. See that man page for details on the arguments specified to this
 prep helper.
 
+The multishot version allows the application to issue a single receive request,
+which repeatedly posts a CQE when data is available. It requires length to
+be 0, the
+.B IOSQE_BUFFER_SELECT
+flag to be set and no
+.B MSG_WAITALL
+flag to be set.
+Therefore each CQE will take a buffer out of a provided buffer pool for receiving.
+The application should check the flags of each CQE, regardless of its result.
+If a posted CQE does not have the
+.B IORING_CQE_F_MORE
+flag set then the multishot receive will be done and the application should issue a
+new request.
+Multishot variants are available since kernel 6.0.
+
+
 After calling this function, additional io_uring internal modifier flags
 may be set in the SQE
-.I off
+.I ioprio
 field. The following flags are supported:
 .TP
 .B IORING_RECVSEND_POLL_FIRST
@@ -58,6 +80,32 @@
 .BR recvmsg (2)
 operation. If set, the socket still had data to be read after the operation
 completed. Both these flags are available since 5.19.
+
+.TP
+.B IORING_RECVSEND_BUNDLE
+If set and provided buffers are used with
+.B IOSQE_BUFFER_SELECT ,
+the receive operation will attempt to fill multiple buffers with rather than
+just pick a single buffer to fill. To receive multiple buffers in a single
+receive, the buffer group ID set in the SQE must be of the ring provided type.
+If set, the CQE
+.I res
+field indicates the total number of bytes received, and the buffer ID returned
+in the CQE
+.I flags
+field indicates the first buffer in the receive operation. The application must
+iterate from the indicated initial buffer ID and until all
+.I res
+bytes have been seen to know which is the last buffer in the receive operation.
+The buffer IDs consumed will be contiguous from the starting ID, in the order
+in which they were added to the buffer ring used. Receiving in bundles can
+improve performance when more than one chunk of data is available to receive,
+by eliminating redundant round trips through the networking stack. Receive
+bundles may be used by both single shot and multishot receive operations. Note
+that, internally, bundles rely on the networking stack passing back how much
+data is left in the socket after the initial receive. This means that the
+initial receive may contain less buffers than what is available, with the
+followup receive(s) containing more buffers. Available since 6.10.
 .P
 
 .SH RETURN VALUE
@@ -80,4 +128,6 @@
 .SH SEE ALSO
 .BR io_uring_get_sqe (3),
 .BR io_uring_submit (3),
+.BR io_uring_buf_ring_init (3),
+.BR io_uring_buf_ring_add (3),
 .BR recv (2)

diff --git a/man/io_uring_prep_recv_multishot.3 b/man/io_uring_prep_recv_multishot.3
new file mode 120000
index 0000000..71fe277
--- /dev/null
+++ b/man/io_uring_prep_recv_multishot.3

@@ -0,0 +1 @@
+io_uring_prep_recv.3
\ No newline at end of file

diff --git a/man/io_uring_prep_recvmsg.3 b/man/io_uring_prep_recvmsg.3
index 8c49411..a54e3fd 100644
--- a/man/io_uring_prep_recvmsg.3
+++ b/man/io_uring_prep_recvmsg.3

@@ -15,6 +15,11 @@
 .BI "                           int " fd ","
 .BI "                           struct msghdr *" msg ","
 .BI "                           unsigned " flags ");"
+.PP
+.BI "void io_uring_prep_recvmsg_multishot(struct io_uring_sqe *" sqe ","
+.BI "                                     int " fd ","
+.BI "                                     struct msghdr *" msg ","
+.BI "                                     unsigned " flags ");"
 .fi
 .SH DESCRIPTION
 .PP
@@ -37,9 +42,34 @@
 request. See that man page for details on the arguments specified to this
 prep helper.
 
+The multishot version allows the application to issue a single receive request,
+which repeatedly posts a CQE when data is available. It requires the
+.B IOSQE_BUFFER_SELECT
+flag to be set and no
+.B MSG_WAITALL
+flag to be set.
+Therefore each CQE will take a buffer out of a provided buffer pool for receiving.
+The application should check the flags of each CQE, regardless of its result.
+If a posted CQE does not have the
+.B IORING_CQE_F_MORE
+flag set then the multishot receive will be done and the application should issue a
+new request.
+
+Unlike
+.BR recvmsg (2),
+multishot recvmsg will prepend a
+.I struct io_uring_recvmsg_out
+which describes the layout of the rest of the buffer in combination with the initial
+.I struct msghdr
+submitted with the request. See
+.BR io_uring_recvmsg_out (3)
+for more information on accessing the data.
+
+Multishot variants are available since kernel 6.0.
+
 After calling this function, additional io_uring internal modifier flags
 may be set in the SQE
-.I off
+.I ioprio
 field. The following flags are supported:
 .TP
 .B IORING_RECVSEND_POLL_FIRST
@@ -91,4 +121,6 @@
 .SH SEE ALSO
 .BR io_uring_get_sqe (3),
 .BR io_uring_submit (3),
+.BR io_uring_buf_ring_init (3),
+.BR io_uring_buf_ring_add (3),
 .BR recvmsg (2)

diff --git a/man/io_uring_prep_recvmsg_multishot.3 b/man/io_uring_prep_recvmsg_multishot.3
new file mode 120000
index 0000000..cd9566f
--- /dev/null
+++ b/man/io_uring_prep_recvmsg_multishot.3

@@ -0,0 +1 @@
+io_uring_prep_recvmsg.3
\ No newline at end of file

diff --git a/man/io_uring_prep_renameat.3 b/man/io_uring_prep_renameat.3
index 1fc9e01..08d4a46 100644
--- a/man/io_uring_prep_renameat.3
+++ b/man/io_uring_prep_renameat.3

@@ -16,12 +16,12 @@
 .BI "                            const char *" oldpath ","
 .BI "                            int " newdirfd ","
 .BI "                            const char *" newpath ","
-.BI "                            int " flags ");"
+.BI "                            unsigned int " flags ");"
 .PP
 .BI "void io_uring_prep_rename(struct io_uring_sqe *" sqe ","
 .BI "                          const char *" oldpath ","
 .BI "                          const char *" newpath ","
-.BI "                          int " flags ");"
+.BI "                          unsigned int " flags ");"
 .fi
 .SH DESCRIPTION
 .PP

diff --git a/man/io_uring_prep_send.3 b/man/io_uring_prep_send.3
index 10c86ba..e61ea43 100644
--- a/man/io_uring_prep_send.3
+++ b/man/io_uring_prep_send.3

@@ -14,6 +14,19 @@
 .BI "                        const void *" buf ","
 .BI "                        size_t " len ","
 .BI "                        int " flags ");"
+.PP
+.BI "void io_uring_prep_sendto(struct io_uring_sqe *" sqe ","
+.BI "                          int " sockfd ","
+.BI "                          const void *" buf ","
+.BI "                          size_t " len ","
+.BI "                          int " flags ","
+.BI "                          const struct sockaddr *" addr ","
+.BI "                          socklen_t " addrlen ");"
+.PP
+.BI "void io_uring_prep_send_bundle(struct io_uring_sqe *" sqe ","
+.BI "                               int " sockfd ","
+.BI "                               size_t " len ","
+.BI "                               int " flags ");"
 .fi
 .SH DESCRIPTION
 .PP
@@ -26,14 +39,87 @@
 to start sending the data from
 .I buf
 of size
-.I size
-and with modifier flags
+.I len
+bytes and with modifier flags
 .IR flags .
 
+Note that using
+.B IOSQE_IO_LINK
+with this request type requires the setting of
+.B MSG_WAITALL
+in the
+.IR flags
+argument, as a short send isn't a considered an error condition without
+that being set.
+
 This function prepares an async
 .BR send (2)
 request. See that man page for details.
 
+The
+.BR io_uring_prep_sendto (3)
+function prepares a sendto request. The submission queue entry
+.I sqe
+is setup to use the file descriptor
+.I sockfd
+to start sending the data from
+.I buf
+of size
+.I len
+bytes and with modifier flags
+.IR flags .
+The destination address is specified by
+.I addr
+and
+.I addrlen
+and must be a valid address for the socket type.
+
+This function prepares an async
+.BR sendto (2)
+request. See that man page for details.
+
+Both of the above send variants may be used with provided buffers, where rather
+than pass a buffer in directly with the request,
+.B IOSQE_BUFFER_SELECT
+is set in the SQE
+.I flags
+field, and additionally a buffer group ID is set in the SQE
+.I buf_group
+field. By using provided buffers with send requests, the application can
+prevent any kind of reordering of the outgoing data which can otherwise
+occur if the application has more than one send request inflight for a single
+socket. This provides better pipelining of data, where previously the app
+needed to manually serialize sends.
+
+The bundle version allows the application to issue a single send request,
+with a buffer group ID given in the SQE
+.I buf_group
+field, which keeps sending from that buffer group until it runs out of buffers.
+As with any other request using provided buffers,
+.B IOSQE_BUFFER_SELECT
+must also be set in the SQE
+.I flags
+before submission. Currently
+.I len
+must be given as
+.B 0
+otherwise the request will be errored with
+.B -EINVAL
+as the result code. Future versions may allow setting
+.I
+to limit the transfer size. A single CQE is posted for the send, with the result
+being how many bytes were sent, on success. When used with provided buffers,
+send or send bundle will contain the starting buffer group ID in the CQE
+.I flags
+field. The number of bytes sent starts from there, and will be in contiguous
+buffer IDs after that. Send bundle, and send with provided buffers in general,
+are available since kernel 6.10, and can be further identified by checking for
+the
+.B IORING_FEAT_SEND_BUF_SELECT
+flag returned in when using
+.BR io_uring_init_queue_params (3)
+to setup the ring.
+
 .SH RETURN VALUE
 None
 .SH ERRORS
@@ -54,4 +140,7 @@
 .SH SEE ALSO
 .BR io_uring_get_sqe (3),
 .BR io_uring_submit (3),
+.BR io_uring_buf_ring_init (3),
+.BR io_uring_buf_ring_add (3),
 .BR send (2)
+.BR sendto (2)

diff --git a/man/io_uring_prep_send_bundle.3 b/man/io_uring_prep_send_bundle.3
new file mode 120000
index 0000000..ba85e68
--- /dev/null
+++ b/man/io_uring_prep_send_bundle.3

@@ -0,0 +1 @@
+io_uring_prep_send.3
\ No newline at end of file

diff --git a/man/io_uring_prep_send_set_addr.3 b/man/io_uring_prep_send_set_addr.3
new file mode 100644
index 0000000..5adcdbc
--- /dev/null
+++ b/man/io_uring_prep_send_set_addr.3

@@ -0,0 +1,38 @@
+.\" Copyright (C) 2023 Rutvik Patel <[email protected]>
+.\"
+.\" SPDX-License-Identifier: LGPL-2.0-or-later
+.\"
+.TH io_uring_prep_send_set_addr 3 "January 23, 2023" "liburing-2.4" "liburing Manual"
+.SH NAME
+io_uring_prep_send_set_addr \- set address details for send requests
+.SH SYNOPSIS
+.nf
+.B #include <liburing.h>
+.PP
+.BI "void io_uring_prep_send_set_addr(struct io_uring_sqe *" sqe ","
+.BI "                                 const struct sockaddr *" dest_addr ","
+.BI "                                 __u16 " addr_len ");"
+.fi
+.SH DESCRIPTION
+.PP
+The
+.BR io_uring_prep_send_set_addr (3)
+function sets a socket destination address specified by
+.I dest_addr
+and its length using
+.I addr_len
+parameters. It can be used once 
+.I sqe
+is prepared using any of the
+.BR send (2)
+io_uring helpers. See man pages of
+.BR io_uring_prep_send (3)
+or
+.BR io_uring_prep_send_zc (3).
+.SH RETURN VALUE
+None
+.SH SEE ALSO
+.BR io_uring_get_sqe (3),
+.BR io_uring_prep_send (3),
+.BR io_uring_prep_send_zc (3),
+.BR send (2)

diff --git a/man/io_uring_prep_send_zc.3 b/man/io_uring_prep_send_zc.3
new file mode 100644
index 0000000..f5ec5d2
--- /dev/null
+++ b/man/io_uring_prep_send_zc.3

@@ -0,0 +1,96 @@
+.\" Copyright (C) 2022 Jens Axboe <[email protected]>
+.\"
+.\" SPDX-License-Identifier: LGPL-2.0-or-later
+.\"
+.TH io_uring_prep_send_zc 3 "September 6, 2022" "liburing-2.3" "liburing Manual"
+.SH NAME
+io_uring_prep_send_zc \- prepare a zerocopy send request
+.SH SYNOPSIS
+.nf
+.B #include <liburing.h>
+.PP
+.BI "void io_uring_prep_send_zc(struct io_uring_sqe *" sqe ","
+.BI "                           int " sockfd ","
+.BI "                           const void *" buf ","
+.BI "                           size_t " len ","
+.BI "                           int " flags ","
+.BI "                           unsigned " zc_flags ");"
+.PP
+.BI "void io_uring_prep_send_zc_fixed(struct io_uring_sqe *" sqe ","
+.BI "                                 int " sockfd ","
+.BI "                                 const void *" buf ","
+.BI "                                 size_t " len ","
+.BI "                                 int " flags ","
+.BI "                                 unsigned " zc_flags ");"
+.BI "                                 unsigned " buf_index ");"
+.fi
+.SH DESCRIPTION
+.PP
+The
+.BR io_uring_prep_send_zc (3)
+function prepares a zerocopy send request. The submission queue entry
+.I sqe
+is setup to use the file descriptor
+.I sockfd
+to start sending the data from
+.I buf
+of size
+.I len
+bytes with send modifier flags
+.I flags
+and zerocopy modifier flags
+.IR zc_flags .
+
+The 
+.BR io_uring_prep_send_zc_fixed (3)
+works just like
+.BR io_uring_prep_send_zc (3)
+except it requires the use of buffers that have been registered with 
+.BR io_uring_register_buffers (3).
+The
+.I buf
+and
+.I len
+arguments must fall within a region specified by
+.I buf_index
+in the previously registered buffer. The buffer need not be aligned with the 
+start of the registered buffer.
+
+Note that using
+.B IOSQE_IO_LINK
+with this request type requires the setting of
+.B MSG_WAITALL
+in the
+.I flags
+argument, as a short send isn't considered an error condition without
+that being set.
+
+These functions prepare an async zerocopy
+.BR send (2)
+request. See that man page for details. For details on the zerocopy nature
+of it, see
+.BR io_uring_enter (2) .
+
+.SH RETURN VALUE
+None
+.SH ERRORS
+The CQE
+.I res
+field will contain the result of the operation. See the related man page for
+details on possible values. Note that where synchronous system calls will return
+.B -1
+on failure and set
+.I errno
+to the actual error value, io_uring never uses
+.IR errno .
+Instead it returns the negated
+.I errno
+directly in the CQE
+.I res
+field.
+.SH SEE ALSO
+.BR io_uring_get_sqe (3),
+.BR io_uring_submit (3),
+.BR io_uring_prep_send (3),
+.BR io_uring_enter (2),
+.BR send (2)

diff --git a/man/io_uring_prep_send_zc_fixed.3 b/man/io_uring_prep_send_zc_fixed.3
new file mode 120000
index 0000000..c66c84d
--- /dev/null
+++ b/man/io_uring_prep_send_zc_fixed.3

@@ -0,0 +1 @@
+io_uring_prep_send_zc.3
\ No newline at end of file

diff --git a/man/io_uring_prep_sendmsg.3 b/man/io_uring_prep_sendmsg.3
index bc81d91..7744a4c 100644
--- a/man/io_uring_prep_sendmsg.3
+++ b/man/io_uring_prep_sendmsg.3

@@ -15,6 +15,11 @@
 .BI "                           int " fd ","
 .BI "                           const struct msghdr *" msg ","
 .BI "                           unsigned " flags ");"
+.PP
+.BI "void io_uring_prep_sendmsg_zc(struct io_uring_sqe *" sqe ","
+.BI "                              int " fd ","
+.BI "                              const struct msghdr *" msg ","
+.BI "                              unsigned " flags ");"
 .fi
 .SH DESCRIPTION
 .PP
@@ -32,6 +37,21 @@
 .I flags
 argument.
 
+The
+.BR io_uring_prep_sendmsg_zc (3)
+accepts the same parameters as 
+.BR io_uring_prep_sendmsg (3)
+but prepares a zerocopy sendmsg request.
+
+Note that using
+.B IOSQE_IO_LINK
+with this request type requires the setting of
+.B MSG_WAITALL
+in the
+.I flags
+argument, as a short send isn't considered an error condition without
+that being set.
+
 This function prepares an async
 .BR sendmsg (2)
 request. See that man page for details.
@@ -66,4 +86,6 @@
 .SH SEE ALSO
 .BR io_uring_get_sqe (3),
 .BR io_uring_submit (3),
+.BR io_uring_buf_ring_init (3),
+.BR io_uring_buf_ring_add (3),
 .BR sendmsg (2)

diff --git a/man/io_uring_prep_sendmsg_zc.3 b/man/io_uring_prep_sendmsg_zc.3
new file mode 120000
index 0000000..47599fb
--- /dev/null
+++ b/man/io_uring_prep_sendmsg_zc.3

@@ -0,0 +1 @@
+io_uring_prep_sendmsg.3
\ No newline at end of file

diff --git a/man/io_uring_prep_sendto.3 b/man/io_uring_prep_sendto.3
new file mode 120000
index 0000000..ba85e68
--- /dev/null
+++ b/man/io_uring_prep_sendto.3

@@ -0,0 +1 @@
+io_uring_prep_send.3
\ No newline at end of file

diff --git a/man/io_uring_prep_setxattr.3 b/man/io_uring_prep_setxattr.3
new file mode 100644
index 0000000..5ccff75
--- /dev/null
+++ b/man/io_uring_prep_setxattr.3

@@ -0,0 +1,64 @@
+.\" Copyright (C) 2023 Rutvik Patel <[email protected]>
+.\"
+.\" SPDX-License-Identifier: LGPL-2.0-or-later
+.\"
+.TH io_uring_prep_setxattr 3 "January 23, 2023" "liburing-2.4" "liburing Manual"
+.SH NAME
+io_uring_prep_setxattr, io_uring_prep_fsetxattr \- prepare a request to set an 
+extended attribute value
+.SH SYNOPSIS
+.nf
+.B #include <liburing.h>
+.PP
+.BI "void io_uring_prep_setxattr(struct io_uring_sqe *" sqe ","
+.BI "                            const char *" name ","
+.BI "                            const char *" value ","
+.BI "                            const char *" path ","
+.BI "                            int " flags ","
+.BI "                            unsigned int " len ");"
+.PP
+.BI "void io_uring_prep_fsetxattr(struct io_uring_sqe *" sqe ","
+.BI "                             int " fd ","
+.BI "                             const char *" name ","
+.BI "                             const char *" value ","
+.BI "                             int " flags ","
+.BI "                             unsigned int " len ");"
+.fi
+.SH DESCRIPTION
+.PP
+The
+.BR io_uring_prep_setxattr (3)
+function prepares a request to set an extended attribute value. The submission 
+queue entry 
+.I sqe
+is setup to set the
+.I value
+of the extended attribute identified by
+.I name
+and associated with the given
+.I path
+in the filesystem with modifier flags
+.IR flags .
+The
+.I len
+argument specifies the size (in bytes) of
+.IR value .
+
+.BR io_uring_prep_fsetxattr (3)
+is identical to 
+.BR io_uring_prep_setxattr (3),
+only the extended attribute is set on the open file referred to by
+.I fd
+in place of
+.IR path .
+
+This function prepares an async 
+.BR setxattr (2)
+request. See that man page for details.
+
+.SH RETURN VALUE
+None
+
+.SH SEE ALSO
+.BR io_uring_get_sqe (3),
+.BR setxattr (2)

diff --git a/man/io_uring_prep_socket.3 b/man/io_uring_prep_socket.3
index 473f225..77dba66 100644
--- a/man/io_uring_prep_socket.3
+++ b/man/io_uring_prep_socket.3

@@ -22,6 +22,12 @@
 .BI "                                 int " protocol ","
 .BI "                                 unsigned int " file_index ","
 .BI "                                 unsigned int " flags ");"
+.PP
+.BI "void io_uring_prep_socket_direct_alloc(struct io_uring_sqe *" sqe ","
+.BI "                                 int " domain ","
+.BI "                                 int " type ","
+.BI "                                 int " protocol ","
+.BI "                                 unsigned int " flags ");"
 .fi
 .SH DESCRIPTION
 .PP
@@ -41,16 +47,31 @@
 
 The
 .BR io_uring_prep_socket_direct (3)
-works just like
+helper works just like
 .BR io_uring_prep_socket (3),
 except it maps the socket to a direct descriptor rather than return a normal
 file descriptor. The
 .I file_index
-argument should be set to the slot that should be used for this socket, or
-.B IORING_FILE_INDEX_ALLOC
-if io_uring should allocate a free one.
+argument should be set to the slot that should be used for this socket.
 
-If the direct variant is used, the application must first have registered
+The
+.BR io_uring_prep_socket_direct_alloc (3)
+helper works just like
+.BR io_uring_prep_socket_direct (3),
+except it allocates a new direct descriptor rather than pass a free slot in. It
+is equivalent to using
+.BR io_uring_prep_socket_direct (3)
+with
+.B IORING_FILE_INDEX_ALLOC
+as the
+.I
+file_index .
+Upon completion, the
+.I res
+field of the CQE will return the direct slot that was allocated for the
+socket.
+
+If the direct variants are used, the application must first have registered
 a file table using
 .BR io_uring_register_files (3)
 of the appropriate size. Once registered, a direct socket request may use any

diff --git a/man/io_uring_prep_socket_direct_alloc.3 b/man/io_uring_prep_socket_direct_alloc.3
new file mode 120000
index 0000000..15d7b7f
--- /dev/null
+++ b/man/io_uring_prep_socket_direct_alloc.3

@@ -0,0 +1 @@
+io_uring_prep_socket.3
\ No newline at end of file

diff --git a/man/io_uring_prep_splice.3 b/man/io_uring_prep_splice.3
index cb82ad0..3d6e38f 100644
--- a/man/io_uring_prep_splice.3
+++ b/man/io_uring_prep_splice.3

@@ -52,6 +52,36 @@
 .I fd_in
 given as a registered file descriptor offset.
 
+If
+.I fd_in
+refers to a pipe,
+.I off_in
+is ignored and must be set to -1.
+
+If
+.I fd_in
+does not refer to a pipe and
+.I off_in
+is -1, then
+.I nbytes
+are read from
+.I fd_in
+starting from the file offset, which is incremented by the number of bytes read.
+
+If
+.I fd_in
+does not refer to a pipe and
+.I off_in
+is not -1, then the starting offset of
+.I fd_in
+will be
+.IR off_in .
+
+The same rules apply to
+.I fd_out
+and
+.IR off_out .
+
 This function prepares an async
 .BR splice (2)
 request. See that man page for details.
@@ -78,3 +108,13 @@
 .BR io_uring_submit (3),
 .BR io_uring_register (2),
 .BR splice (2)
+
+.SH NOTES
+Note that even if
+.I fd_in
+or
+.I fd_out
+refers to a pipe, the splice operation can still fail with
+.B EINVAL
+if one of the fd doesn't explicitly support splice operation, e.g. reading from
+terminal is unsupported from kernel 5.7 to 5.11.

diff --git a/man/io_uring_prep_symlinkat.3 b/man/io_uring_prep_symlinkat.3
index 0fa7056..b809f93 100644
--- a/man/io_uring_prep_symlinkat.3
+++ b/man/io_uring_prep_symlinkat.3

@@ -41,7 +41,7 @@
 .I target
 to the new destination indicated by
 .I linkpath
-relative to the the current working directory. This function prepares an async
+relative to the current working directory. This function prepares an async
 .BR symlink (2)
 request. See that man page for details.
 

diff --git a/man/io_uring_prep_timeout.3 b/man/io_uring_prep_timeout.3
index bfb8791..e7a8fca 100644
--- a/man/io_uring_prep_timeout.3
+++ b/man/io_uring_prep_timeout.3

@@ -4,7 +4,7 @@
 .\"
 .TH io_uring_prep_poll_timeout 3 "March 12, 2022" "liburing-2.2" "liburing Manual"
 .SH NAME
-io_uring_prep_timeoute \- prepare a timeout request
+io_uring_prep_timeout \- prepare a timeout request
 .SH SYNOPSIS
 .nf
 .B #include <liburing.h>
@@ -45,12 +45,20 @@
 The realtime clock source should be used.
 .TP
 .B IORING_TIMEOUT_ETIME_SUCCESS
-Consider an expired timeout a success in terms of the posted completion.
-Normally a timeout that triggers would return in a
+Consider an expired timeout a success in terms of the posted completion. This
+means it will not sever dependent links, as a failed request normally would. The
+posted CQE result code will still contain
 .B -ETIME
-CQE
+in the
 .I res
 value.
+.TP
+.B IORING_TIMEOUT_MULTISHOT
+The request will return multiple timeout completions. The completion flag
+IORING_CQE_F_MORE is set if more timeouts are expected. The value specified in
+.I count
+is the number of repeats. A value of 0 means the timeout is indefinite and can
+only be stopped by a removal request. Available since the 6.4 kernel.
 .PP
 The timeout completion event will trigger if either the specified timeout
 has occurred, or the specified number of events to wait for have been posted
@@ -70,7 +78,7 @@
 .TP
 .B -EINVAL
 One of the fields set in the SQE was invalid. For example, two clocksources
-where given, or the specified timeout seconds or nanoseconds where < 0.
+were given, or the specified timeout seconds or nanoseconds were < 0.
 .TP
 .B -EFAULT
 io_uring was unable to access the data specified by

diff --git a/man/io_uring_prep_timeout_update.3 b/man/io_uring_prep_timeout_update.3
index cb9ed12..8bf4f18 100644
--- a/man/io_uring_prep_timeout_update.3
+++ b/man/io_uring_prep_timeout_update.3

@@ -4,7 +4,7 @@
 .\"
 .TH io_uring_prep_poll_timeout_update 3 "March 12, 2022" "liburing-2.2" "liburing Manual"
 .SH NAME
-io_uring_prep_timeoute_update \- prepare a request to update an existing timeout
+io_uring_prep_timeout_update \- prepare a request to update an existing timeout
 .SH SYNOPSIS
 .nf
 .B #include <liburing.h>
@@ -54,6 +54,7 @@
 .I res
 value.
 .PP
+The timeout remove command does not currently accept any flags.
 
 .SH RETURN VALUE
 None
@@ -77,7 +78,7 @@
 .TP
 .B -EINVAL
 One of the fields set in the SQE was invalid. For example, two clocksources
-where given, or the specified timeout seconds or nanoseconds where < 0.
+were given, or the specified timeout seconds or nanoseconds were < 0.
 .TP
 .B -EFAULT
 io_uring was unable to access the data specified by

diff --git a/man/io_uring_prep_waitid.3 b/man/io_uring_prep_waitid.3
new file mode 100644
index 0000000..06bda74
--- /dev/null
+++ b/man/io_uring_prep_waitid.3

@@ -0,0 +1,67 @@
+.\" Copyright (C) 2022 Jens Axboe <[email protected]>
+.\"
+.\" SPDX-License-Identifier: LGPL-2.0-or-later
+.\"
+.TH io_uring_prep_waitid 3 "July 14, 2023" "liburing-2.5" "liburing Manual"
+.SH NAME
+io_uring_prep_waitid \- prepare a waitid request
+.SH SYNOPSIS
+.nf
+.B #include <sys/wait.h>
+.B #include <liburing.h>
+.PP
+.BI "void io_uring_prep_waitid(struct io_uring_sqe *" sqe ","
+.BI "                          idtype_t " idtype ","
+.BI "                          id_t " id ","
+.BI "                          siginfo_t *" infop ","
+.BI "                          int " options ","
+.BI "                          unsigned int " flags ");"
+.fi
+.SH DESCRIPTION
+.PP
+The
+.BR io_uring_prep_waitid (3)
+function prepares a waitid request. The submission queue entry
+.I sqe
+is setup to use the
+.I idtype
+and
+.I id
+arguments select the child(ren), and
+.I options
+to specify the child state changes to wait for. Upon successful
+return, it fills
+.I infop
+with information of the child process, if any.
+.I flags
+is io_uring specific modifier flags. They are currently unused, and hence
+.B 0
+should be passed.
+
+This function prepares an async
+.BR waitid (2)
+request. See that man page for details.
+
+Available since kernel 6.7.
+
+.SH RETURN VALUE
+None
+.SH ERRORS
+The CQE
+.I res
+field will contain the result of the operation. See the related man page for
+details on possible values. Note that where synchronous system calls will return
+.B -1
+on failure and set
+.I errno
+to the actual error value, io_uring never uses
+.IR errno .
+Instead it returns the negated
+.I errno
+directly in the CQE
+.I res
+field.
+.SH SEE ALSO
+.BR io_uring_get_sqe (3),
+.BR io_uring_submit (3),
+.BR waitid (2)

diff --git a/man/io_uring_prep_write.3 b/man/io_uring_prep_write.3
index 791a5f1..794361f 100644
--- a/man/io_uring_prep_write.3
+++ b/man/io_uring_prep_write.3

@@ -40,7 +40,7 @@
 to the file is serialized. It is not encouraged to use this feature if it's
 possible to provide the desired IO offset from the application or library.
 
-On files that are not capable of seeking, the offset is ignored.
+On files that are not capable of seeking, the offset must be 0 or -1.
 
 After the write has been prepared, it can be submitted with one of the submit
 functions.

diff --git a/man/io_uring_prep_write_fixed.3 b/man/io_uring_prep_write_fixed.3
index 5dab4a6..39521b7 100644
--- a/man/io_uring_prep_write_fixed.3
+++ b/man/io_uring_prep_write_fixed.3

@@ -42,12 +42,12 @@
 .I buf
 and
 .I nbytes
-arguments must fall within a region specificed by
+arguments must fall within a region specified by
 .I buf_index
 in the previously registered buffer. The buffer need not be aligned with
 the start of the registered buffer.
 
-After the read has been prepared it can be submitted with one of the submit
+After the write has been prepared it can be submitted with one of the submit
 functions.
 
 .SH RETURN VALUE

diff --git a/man/io_uring_prep_writev.3 b/man/io_uring_prep_writev.3
index 9fb83d9..b325b35 100644
--- a/man/io_uring_prep_writev.3
+++ b/man/io_uring_prep_writev.3

@@ -41,7 +41,7 @@
 to the file is serialized. It is not encouraged to use this feature if it's
 possible to provide the desired IO offset from the application or library.
 
-On files that are not capable of seeking, the offset is ignored.
+On files that are not capable of seeking, the offset must be 0 or -1.
 
 After the write has been prepared it can be submitted with one of the submit
 functions.
@@ -64,8 +64,8 @@
 .I res
 field.
 .SH NOTES
-Unless an application explicitly needs to pass in more than iovec, it is more
-efficient to use
+Unless an application explicitly needs to pass in more than one iovec, it
+is more efficient to use
 .BR io_uring_prep_write (3)
 rather than this function, as no state has to be maintained for a
 non-vectored IO request.

diff --git a/man/io_uring_prep_writev2.3 b/man/io_uring_prep_writev2.3
index 5093596..d7cc46e 100644
--- a/man/io_uring_prep_writev2.3
+++ b/man/io_uring_prep_writev2.3

@@ -67,7 +67,7 @@
 to the file is serialized. It is not encouraged to use this feature if it's
 possible to provide the desired IO offset from the application or library.
 
-On files that are not capable of seeking, the offset is ignored.
+On files that are not capable of seeking, the offset must be 0 or -1.
 
 After the write has been prepared, it can be submitted with one of the submit
 functions.
@@ -90,8 +90,8 @@
 .I res
 field.
 .SH NOTES
-Unless an application explicitly needs to pass in more than iovec, it is more
-efficient to use
+Unless an application explicitly needs to pass in more than one iovec, it
+is more efficient to use
 .BR io_uring_prep_write (3)
 rather than this function, as no state has to be maintained for a
 non-vectored IO request.

diff --git a/man/io_uring_queue_init.3 b/man/io_uring_queue_init.3
index 086b70f..53b2e14 100644
--- a/man/io_uring_queue_init.3
+++ b/man/io_uring_queue_init.3

@@ -17,6 +17,11 @@
 .BI "int io_uring_queue_init_params(unsigned " entries ","
 .BI "                               struct io_uring *" ring ","
 .BI "                               struct io_uring_params *" params ");"
+.PP
+.BI "int io_uring_queue_init_mem(unsigned " entries ","
+.BI "                            struct io_uring *" ring ","
+.BI "                            struct io_uring_params *" params ","
+.BI "                            void *" buf ", size_t " buf_size ");"
 .fi
 .SH DESCRIPTION
 .PP
@@ -33,12 +38,18 @@
 By default, the CQ ring will have twice the number of entries as specified by
 .I entries
 for the SQ ring. This is adequate for regular file or storage workloads, but
-may be too small networked workloads. The SQ ring entries do not impose a limit
-on the number of in-flight requests that the ring can support, it merely limits
-the number that can be submitted to the kernel in one go (batch). if the CQ
-ring overflows, e.g. more entries are generated than fits in the ring before the
-application can reap them, then the ring enters a CQ ring overflow state. This
-is indicated by
+may be too small for networked workloads. The SQ ring entries do not impose a
+limit on the number of in-flight requests that the ring can support, it merely
+limits the number that can be submitted to the kernel in one go (batch). If the
+CQ ring overflows, e.g. more entries are generated than fits in the ring before
+the application can reap them, then if the kernel supports
+.B IORING_FEAT_NODROP
+the ring enters a CQ ring overflow state. Otherwise it drops the CQEs and
+increments
+.I cq.koverflow
+in
+.I struct io_uring
+with the number of CQEs dropped. The overflow state is indicated by
 .B IORING_SQ_CQ_OVERFLOW
 being set in the SQ ring flags. Unless the kernel runs out of available memory,
 entries are not dropped, but it is a much slower completion path and will slow
@@ -65,21 +76,51 @@
 will be passed through to the io_uring_setup syscall (see
 .BR io_uring_setup (2)).
 
-If the
+The
 .BR io_uring_queue_init_params (3)
-variant is used, then the parameters indicated by
+and
+.BR io_uring_queue_init_mem (3)
+variants will pass the parameters indicated by
 .I params
-will be passed straight through to the
+straight through to the
 .BR io_uring_setup (2)
 system call.
 
+The
+.BR io_uring_queue_init_mem (3)
+variant uses the provided
+.I buf
+with associated size
+.I buf_size
+as the memory for the ring, using the
+.B IORING_SETUP_NO_MMAP
+flag to
+.BR io_uring_setup (2).
+The buffer passed to
+.BR io_uring_queue_init_mem (3)
+must already be zeroed.
+Typically, the caller should allocate a huge page and pass that in to
+.BR io_uring_queue_init_mem (3).
+Pages allocated by mmap are already zeroed.
+.BR io_uring_queue_init_mem (3)
+returns the number of bytes used from the provided buffer, so that the app can
+reuse the buffer with the returned offset to put more rings in the same huge
+page.
+
 On success, the resources held by
 .I ring
 should be released via a corresponding call to
 .BR io_uring_queue_exit (3).
 .SH RETURN VALUE
 .BR io_uring_queue_init (3)
-returns 0 on success and
+and
+.BR io_uring_queue_init_params (3)
+return 0 on success and
+.BR -errno
+on failure.
+
+.BR io_uring_queue_init_mem (3)
+returns the number of bytes used from the provided buffer on success, and
 .BR -errno
 on failure.
 .SH SEE ALSO

diff --git a/man/io_uring_queue_init_mem.3 b/man/io_uring_queue_init_mem.3
new file mode 120000
index 0000000..c91609e
--- /dev/null
+++ b/man/io_uring_queue_init_mem.3

@@ -0,0 +1 @@
+io_uring_queue_init.3
\ No newline at end of file

diff --git a/man/io_uring_recvmsg_cmsg_firsthdr.3 b/man/io_uring_recvmsg_cmsg_firsthdr.3
new file mode 120000
index 0000000..8eb1743
--- /dev/null
+++ b/man/io_uring_recvmsg_cmsg_firsthdr.3

@@ -0,0 +1 @@
+io_uring_recvmsg_out.3
\ No newline at end of file

diff --git a/man/io_uring_recvmsg_cmsg_nexthdr.3 b/man/io_uring_recvmsg_cmsg_nexthdr.3
new file mode 120000
index 0000000..8eb1743
--- /dev/null
+++ b/man/io_uring_recvmsg_cmsg_nexthdr.3

@@ -0,0 +1 @@
+io_uring_recvmsg_out.3
\ No newline at end of file

diff --git a/man/io_uring_recvmsg_name.3 b/man/io_uring_recvmsg_name.3
new file mode 120000
index 0000000..8eb1743
--- /dev/null
+++ b/man/io_uring_recvmsg_name.3

@@ -0,0 +1 @@
+io_uring_recvmsg_out.3
\ No newline at end of file

diff --git a/man/io_uring_recvmsg_out.3 b/man/io_uring_recvmsg_out.3
new file mode 100644
index 0000000..8d605d8
--- /dev/null
+++ b/man/io_uring_recvmsg_out.3

@@ -0,0 +1,82 @@
+.\" Copyright (C), 2022  Dylan Yudaken <[email protected]>
+.\"
+.\" SPDX-License-Identifier: LGPL-2.0-or-later
+.\"
+.TH io_uring_recvmsg_out 3 "July 26, 2022" "liburing-2.2" "liburing Manual"
+.SH NAME
+io_uring_recvmsg_out - access data from multishot recvmsg
+.SH SYNOPSIS
+.nf
+.B #include <liburing.h>
+.PP
+.BI "struct io_uring_recvmsg_out *io_uring_recvmsg_validate(void *" buf ","
+.BI "                                                       int " buf_len ","
+.BI "                                                       struct msghdr *" msgh ");"
+.PP
+.BI "void *io_uring_recvmsg_name(struct io_uring_recvmsg_out *" o ");"
+.PP
+.BI "struct cmsghdr *io_uring_recvmsg_cmsg_firsthdr(struct io_uring_recvmsg_out *" o ","
+.BI "                                               struct msghdr *" msgh ");"
+.BI "struct cmsghdr *io_uring_recvmsg_cmsg_nexthdr(struct io_uring_recvmsg_out *" o ","
+.BI "                                              struct msghdr *" msgh ","
+.BI "                                              struct cmsghdr *" cmsg ");"
+.PP
+.BI "void *io_uring_recvmsg_payload(struct io_uring_recvmsg_out *" o ","
+.BI "                               struct msghdr *" msgh ");"
+.BI "unsigned int io_uring_recvmsg_payload_length(struct io_uring_recvmsg_out *" o ","
+.BI "                                             int " buf_len ","
+.BI "                                             struct msghdr *" msgh ");"
+.PP
+.fi
+
+.SH DESCRIPTION
+
+These functions are used to access data in the payload delivered by
+.BR io_uring_prep_recvmsg_multishot (3).
+.PP
+.I msgh
+should point to the
+.I struct msghdr
+submitted with the request.
+.PP
+.BR io_uring_recvmsg_validate (3)
+will validate a buffer delivered by
+.BR io_uring_prep_recvmsg_multishot (3)
+and extract the
+.I io_uring_recvmsg_out
+if it is valid, returning a pointer to it or else NULL.
+.PP
+The structure is defined as follows:
+.PP
+.in +4n
+.EX
+
+struct io_uring_recvmsg_out {
+        __u32 namelen;    /* Name byte count as would have been populated
+                           * by recvmsg(2) */
+        __u32 controllen; /* Control byte count */
+        __u32 payloadlen; /* Payload byte count as would have been returned
+                           * by recvmsg(2) */
+        __u32 flags;      /* Flags result as would have been populated
+                           * by recvmsg(2) */
+};
+
+.IP * 3
+.BR io_uring_recvmsg_name (3)
+returns a pointer to the name in the buffer.
+.IP *
+.BR io_uring_recvmsg_cmsg_firsthdr (3)
+returns a pointer to the first cmsg in the buffer, or NULL.
+.IP *
+.BR io_uring_recvmsg_cmsg_nexthdr (3)
+returns a pointer to the next cmsg in the buffer, or NULL.
+.IP *
+.BR io_uring_recvmsg_payload (3)
+returns a pointer to the payload in the buffer.
+.IP *
+.BR io_uring_recvmsg_payload_length (3)
+Calculates the usable payload length in bytes.
+
+
+.SH "SEE ALSO"
+.BR io_uring_prep_recvmsg_multishot (3)

diff --git a/man/io_uring_recvmsg_payload.3 b/man/io_uring_recvmsg_payload.3
new file mode 120000
index 0000000..8eb1743
--- /dev/null
+++ b/man/io_uring_recvmsg_payload.3

@@ -0,0 +1 @@
+io_uring_recvmsg_out.3
\ No newline at end of file

diff --git a/man/io_uring_recvmsg_payload_length.3 b/man/io_uring_recvmsg_payload_length.3
new file mode 120000
index 0000000..8eb1743
--- /dev/null
+++ b/man/io_uring_recvmsg_payload_length.3

@@ -0,0 +1 @@
+io_uring_recvmsg_out.3
\ No newline at end of file

diff --git a/man/io_uring_recvmsg_validate.3 b/man/io_uring_recvmsg_validate.3
new file mode 120000
index 0000000..8eb1743
--- /dev/null
+++ b/man/io_uring_recvmsg_validate.3

@@ -0,0 +1 @@
+io_uring_recvmsg_out.3
\ No newline at end of file

diff --git a/man/io_uring_register.2 b/man/io_uring_register.2
index 1e91caf..bfa4c80 100644
--- a/man/io_uring_register.2
+++ b/man/io_uring_register.2

@@ -3,12 +3,12 @@
 .\"
 .\" SPDX-License-Identifier: LGPL-2.0-or-later
 .\"
-.TH IO_URING_REGISTER 2 2019-01-17 "Linux" "Linux Programmer's Manual"
+.TH io_uring_register 2 2019-01-17 "Linux" "Linux Programmer's Manual"
 .SH NAME
 io_uring_register \- register files or user buffers for asynchronous I/O 
 .SH SYNOPSIS
 .nf
-.BR "#include <linux/io_uring.h>"
+.BR "#include <liburing.h>"
 .PP
 .BI "int io_uring_register(unsigned int " fd ", unsigned int " opcode ,
 .BI "                      void *" arg ", unsigned int " nr_args );
@@ -18,7 +18,7 @@
 .PP
 
 The
-.BR io_uring_register ()
+.BR io_uring_register (2)
 system call registers resources (e.g. user buffers, files, eventfd,
 personality, restrictions) for use in an
 .BR io_uring (7)
@@ -31,6 +31,14 @@
 .I fd
 is the file descriptor returned by a call to
 .BR io_uring_setup (2).
+If
+.I opcode
+has the flag
+.B IORING_REGISTER_USE_REGISTERED_RING
+ored into it,
+.I fd
+is instead the index of a registered ring fd.
+
 .I opcode
 can be one of:
 
@@ -85,7 +93,7 @@
 An application can increase or decrease the size or number of
 registered buffers by first unregistering the existing buffers, and
 then issuing a new call to
-.BR io_uring_register ()
+.BR io_uring_register (2)
 with the new buffers.
 
 Note that before 5.13 registering buffers would wait for the ring to idle.
@@ -93,7 +101,9 @@
 wait for those to finish before proceeding.
 
 An application need not unregister buffers explicitly before shutting
-down the io_uring instance. Available since 5.1.
+down the io_uring instance. Note, however, that shutdown processing may run
+asynchronously within the kernel. As a result, it is not guaranteed that
+pages are immediately unpinned in this case. Available since 5.1.
 
 .TP
 .B IORING_REGISTER_BUFFERS2
@@ -103,7 +113,7 @@
 
 .I arg
 points to a
-.I struct io_uring_rsrc_register,
+.IR struct io_uring_rsrc_register ,
 and
 .I nr_args
 should be set to the number of bytes in the structure.
@@ -145,7 +155,7 @@
 set to the specified tag and all other fields zeroed.
 
 Note that resource updates, e.g.
-.B IORING_REGISTER_BUFFERS_UPDATE,
+.BR IORING_REGISTER_BUFFERS_UPDATE ,
 don't necessarily deallocate resources by the time it returns, but they might
 be held alive until all requests using it complete.
 
@@ -157,9 +167,13 @@
 a real one, or replacing an existing entry.
 
 .I arg
-must contain a pointer to a struct io_uring_rsrc_update2, which contains
+must contain a pointer to a
+.I struct
+.IR io_uring_rsrc_update2 ,
+which contains
 an offset on which to start the update, and an array of
-.I struct iovec.
+.I struct
+.IR iovec .
 .I tags
 points to an array of tags.
 .I nr
@@ -193,7 +207,7 @@
 This operation takes no argument, and
 .I arg
 must be passed as NULL.  All previously registered buffers associated
-with the io_uring instance will be released. Available since 5.1.
+with the io_uring instance will be released synchronously. Available since 5.1.
 
 .TP
 .B IORING_REGISTER_FILES
@@ -216,7 +230,7 @@
 The file set may be sparse, meaning that the
 .B fd
 field in the array may be set to
-.B -1.
+.BR -1 .
 See
 .B IORING_REGISTER_FILES_UPDATE
 for how to update files in place.
@@ -234,11 +248,12 @@
 .TP
 .B IORING_REGISTER_FILES2
 Register files for I/O. Similar to
-.B IORING_REGISTER_FILES.
+.BR IORING_REGISTER_FILES .
 
 .I arg
 points to a
-.I struct io_uring_rsrc_register,
+.I struct
+.IR io_uring_rsrc_register ,
 and
 .I nr_args
 should be set to the number of bytes in the structure.
@@ -256,7 +271,7 @@
 for more info on resource tagging.
 
 Note that resource updates, e.g.
-.B IORING_REGISTER_FILES_UPDATE,
+.BR IORING_REGISTER_FILES_UPDATE ,
 don't necessarily deallocate resources, they might be held until all requests
 using that resource complete.
 
@@ -266,14 +281,15 @@
 .B IORING_REGISTER_FILES_UPDATE
 This operation replaces existing files in the registered file set with new
 ones, either turning a sparse entry (one where fd is equal to
-.B -1
-) into a real one, removing an existing entry (new one is set to
-.B -1
-), or replacing an existing entry with a new existing entry.
+.BR -1 )
+into a real one, removing an existing entry (new one is set to
+.BR -1 ),
+or replacing an existing entry with a new existing entry.
 
 .I arg
 must contain a pointer to a
-.I struct io_uring_files_update,
+.I struct
+.IR io_uring_files_update ,
 which contains
 an offset on which to start the update, and an array of file descriptors to
 use for the update.
@@ -282,27 +298,30 @@
 since 5.5.
 
 File descriptors can be skipped if they are set to
-.B IORING_REGISTER_FILES_SKIP.
+.BR IORING_REGISTER_FILES_SKIP .
 Skipping an fd will not touch the file associated with the previous
 fd at that index. Available since 5.12.
 
 .TP
 .B IORING_REGISTER_FILES_UPDATE2
-Similar to IORING_REGISTER_FILES_UPDATE, replaces existing files in the
+Similar to
+.BR IORING_REGISTER_FILES_UPDATE ,
+replaces existing files in the
 registered file set with new ones, either turning a sparse entry (one where
 fd is equal to
-.B -1
-) into a real one, removing an existing entry (new one is set to
-.B -1
-), or replacing an existing entry with a new existing entry.
+.BR -1 )
+into a real one, removing an existing entry (new one is set to
+.BR -1 ),
+or replacing an existing entry with a new existing entry.
 
 .I arg
 must contain a pointer to a
-.I struct io_uring_rsrc_update2,
+.I struct
+.IR io_uring_rsrc_update2 ,
 which contains
 an offset on which to start the update, and an array of file descriptors to
 use for the update stored in
-.I data.
+.IR data .
 .I tags
 points to an array of tags.
 .I nr
@@ -322,7 +341,9 @@
 
 .TP
 .B IORING_REGISTER_EVENTFD
-It's possible to use eventfd(2) to get notified of completion events on an
+It's possible to use
+.BR eventfd (2)
+to get notified of completion events on an
 io_uring instance. If this is desired, an eventfd file descriptor can be
 registered through this operation.
 .I arg
@@ -347,12 +368,12 @@
 .TP
 .B IORING_REGISTER_EVENTFD_ASYNC
 This works just like
-.B IORING_REGISTER_EVENTFD
-, except notifications are only posted for events that complete in an async
+.BR IORING_REGISTER_EVENTFD ,
+except notifications are only posted for events that complete in an async
 manner. This means that events that complete inline while being submitted
 do not trigger a notification event. The arguments supplied are the same as
 for
-.B IORING_REGISTER_EVENTFD.
+.BR IORING_REGISTER_EVENTFD .
 Available since 5.6.
 
 .TP
@@ -403,7 +424,7 @@
 .TP
 .B IORING_REGISTER_ENABLE_RINGS
 This operation enables an io_uring ring started in a disabled state
-.RB (IORING_SETUP_R_DISABLED
+.RB ( IORING_SETUP_R_DISABLED
 was specified in the call to
 .BR io_uring_setup (2)).
 While the io_uring ring is disabled, submissions are not allowed and
@@ -428,8 +449,8 @@
 entries.
 
 With an entry it is possible to allow an
-.BR io_uring_register ()
-.I opcode,
+.BR io_uring_register (2)
+.IR opcode ,
 or specify which
 .I opcode
 and
@@ -440,13 +461,13 @@
 to be specified (these flags must be set on each submission queue entry).
 
 All the restrictions must be submitted with a single
-.BR io_uring_register ()
+.BR io_uring_register (2)
 call and they are handled as an allowlist (opcodes and flags not registered,
 are not allowed).
 
 Restrictions can be registered only if the io_uring ring started in a disabled
 state
-.RB (IORING_SETUP_R_DISABLED
+.RB ( IORING_SETUP_R_DISABLED
 must be specified in the call to
 .BR io_uring_setup (2)).
 
@@ -470,7 +491,7 @@
 .TP
 .B IORING_UNREGISTER_IOWQ_AFF
 Undoes a CPU mask previously set with
-.B IORING_REGISTER_IOWQ_AFF.
+.BR IORING_REGISTER_IOWQ_AFF .
 Must not have
 .I arg
 or
@@ -495,7 +516,7 @@
 pointer to an array of two values, with the values in the array being set to
 the maximum count of workers per NUMA node. Index 0 holds the bounded worker
 count, and index 1 holds the unbounded worker count. On successful return, the
-passed in array will contain the previous maximum valyes for each type. If the
+passed in array will contain the previous maximum values for each type. If the
 count being passed in is 0, then this command returns the current maximum values
 and doesn't modify the current setting.
 .I nr_args
@@ -518,8 +539,8 @@
 system call.
 
 .I arg
-must be set to an unsigned int pointer to an array of type
-.I struct io_uring_rsrc_register
+must be set to a pointer to an array of type
+.I struct io_uring_rsrc_update
 of
 .I nr_args
 number of entries. The
@@ -546,7 +567,7 @@
 is used rather than a real file descriptor.
 
 Each thread or process using a ring must register the file descriptor directly
-by issuing this request.o
+by issuing this request.
 
 The maximum number of supported registered ring descriptors is currently
 limited to
@@ -557,11 +578,11 @@
 .TP
 .B IORING_UNREGISTER_RING_FDS
 Unregister descriptors previously registered with
-.B IORING_REGISTER_RING_FDS.
+.BR IORING_REGISTER_RING_FDS .
 
 .I arg
-must be set to an unsigned int pointer to an array of type
-.I struct io_uring_rsrc_register
+must be set to a pointer to an array of type
+.I struct io_uring_rsrc_update
 of
 .I nr_args
 number of entries. Only the
@@ -579,15 +600,255 @@
 
 Available since 5.18.
 
+.TP
+.B IORING_REGISTER_PBUF_RING
+Registers a shared buffer ring to be used with provided buffers. This is a
+newer alternative to using
+.B IORING_OP_PROVIDE_BUFFERS
+which is more efficient, to be used with request types that support the
+.B IOSQE_BUFFER_SELECT
+flag.
+
+The
+.I arg
+argument must be filled in with the appropriate information. It looks as
+follows:
+.PP
+.in +12n
+.EX
+struct io_uring_buf_reg {
+    __u64 ring_addr;
+    __u32 ring_entries;
+    __u16 bgid;
+    __u16 pad;
+    __u64 resv[3];
+};
+.EE
+.in
+.PP
+.in +8n
+The
+.I ring_addr
+field must contain the address to the memory allocated to fit this ring.
+The memory must be page aligned and hence allocated appropriately using eg
+.BR posix_memalign (3)
+or similar. The size of the ring is the product of
+.I ring_entries
+and the size of
+.IR "struct io_uring_buf" .
+.I ring_entries
+is the desired size of the ring, and must be a power-of-2 in size. The maximum
+size allowed is 2^15 (32768).
+.I bgid
+is the buffer group ID associated with this ring. SQEs that select a buffer
+have a buffer group associated with them in their
+.I buf_group
+field, and the associated CQEs will have
+.B IORING_CQE_F_BUFFER
+set in their
+.I flags
+member, which will also contain the specific ID of the buffer selected. The rest
+of the fields are reserved and must be cleared to zero.
+
+.I nr_args
+must be set to 1.
+
+Also see
+.BR io_uring_register_buf_ring (3)
+for more details. Available since 5.19.
+
+.TP
+.B IORING_UNREGISTER_PBUF_RING
+Unregister a previously registered provided buffer ring.
+.I arg
+must be set to the address of a struct io_uring_buf_reg, with just the
+.I bgid
+field set to the buffer group ID of the previously registered provided buffer
+group.
+.I nr_args
+must be set to 1. Also see
+.BR IORING_REGISTER_PBUF_RING .
+
+Available since 5.19.
+
+.TP
+.B IORING_REGISTER_SYNC_CANCEL
+Performs a synchronous cancelation request, which works in a similar fashion to
+.B IORING_OP_ASYNC_CANCEL
+except it completes inline. This can be useful for scenarios where cancelations
+should happen synchronously, rather than needing to issue an SQE and wait for
+completion of that specific CQE.
+
+.I arg
+must be set to a pointer to a struct io_uring_sync_cancel_reg structure, with
+the details filled in for what request(s) to target for cancelation. See
+.BR io_uring_register_sync_cancel (3)
+for details on that. The return values are the same, except they are passed
+back synchronously rather than through the CQE
+.I res
+field.
+.I nr_args
+must be set to 1.
+
+Available since 6.0.
+
+.TP
+.B IORING_REGISTER_FILE_ALLOC_RANGE
+sets the allowable range for fixed file index allocations within the
+kernel. When requests that can instantiate a new fixed file are used with
+.BR IORING_FILE_INDEX_ALLOC ,
+the application is asking the kernel to allocate a new fixed file descriptor
+rather than pass in a specific value for one. By default, the kernel will
+pick any available fixed file descriptor within the range available.
+This effectively allows the application to set aside a range just for dynamic
+allocations, with the remainder being used for specific values.
+
+.I nr_args
+must be set to 1 and
+.I arg
+must be set to a pointer to a struct io_uring_file_index_range:
+.PP
+.in +12n
+.EX
+struct io_uring_file_index_range {
+    __u32 off;
+    __u32 len;
+    __u64 resv;
+};
+.EE
+.in
+.PP
+.in +8n
+with
+.I off
+being set to the starting value for the range, and
+.I len
+being set to the number of descriptors. The reserved
+.I resv
+field must be cleared to zero.
+
+The application must have registered a file table first.
+
+Available since 6.0.
+
+.TP
+.B IORING_REGISTER_PBUF_STATUS
+Can be used to retrieve the current head of a ringbuffer provided earlier via
+.BR IORING_REGISTER_PBUF_RING .
+.I arg
+must point to a
+.PP
+.in +12
+.EX
+struct io_uring_buf_status {
+	__u32	buf_group;	/* input */
+	__u32	head;		/* output */
+	__u32	resv[8];
+};
+.EE
+.in
+.PP
+.in +8
+of which
+.I arg->buf_group
+should contain the buffer group ID for the buffer ring in question,
+.I nr_args
+should be set to 1 and
+.I arg->resv
+should be zeroed out.
+The current head of the ringbuffer will be returned in
+.IR arg->head .
+
+Available since 6.8.
+
+.TP
+.B IORING_REGISTER_NAPI
+Registers a napi instance with the io_uring instance of
+.IR fd .
+.I arg
+should point to a
+.PP
+.in +12
+.EX
+struct io_uring_napi {
+	__u32	busy_poll_to;
+	__u8	prefer_busy_poll;
+	__u8	pad[3];
+	__u64	resv;
+};
+.EE
+.in
+.PP
+.in +8
+in which
+.I arg->busy_poll_to
+should contain the busy poll timeout in micro seconds and
+.I arg->prefer_busy_poll
+should specify whether busy polling should be used rather than IRQs.
+.I nr_args
+should be set to 1 and
+.I arg->pad
+and
+.I arg->resv
+should be zeroed out.
+On successful return the
+.I io_uring_napi
+struct pointed to by
+.I arg
+will contain the previously used settings.
+Must be used with a ring initially created with
+.BR IORING_SETUP_IOPOLL .
+
+Available since 6.9.
+
+.TP
+.B IORING_UNREGISTER_NAPI
+Unregisters a napi instance previously registered via
+.B IORING_REGISTER_NAPI
+to the io_uring instance of
+.IR fd .
+.I arg
+should point to a
+.I struct
+.IR io_uring_napi .
+On successful return the
+.I io_uring_napi
+struct pointed to by
+.I arg
+will contain the previously used settings.
+
+Available since 6.9.
+
+.TP
+.B IORING_REGISTER_CLOCK
+Specifies which clock id io_uring will use for timers while waiting for
+completion events with
+.BR IORING_ENTER_GETEVENTS .
+It's only effective if the timeout argument in
+.I struct io_uring_getevents_arg
+is passed, ignored otherwise.
+When used in conjunction with
+.BR IORING_ENTER_ABS_TIMER ,
+interprets the timeout argument as absolute time of the specified clock.
+
+The default clock is
+.BR CLOCK_MONOTONIC .
+
+Available since 6.12 and supports
+.B CLOCK_MONOTONIC
+and
+.BR CLOCK_BOOTTIME .
+
 .SH RETURN VALUE
 
 On success,
-.BR io_uring_register ()
-returns 0.  On error,
-.B -1
-is returned, and
+.BR io_uring_register (2)
+returns either 0 or a positive value, depending on the
+.I opcode
+used.  On error, a negative error value is returned. The caller should not rely
+on the
 .I errno
-is set accordingly.
+variable.
 
 .SH ERRORS
 .TP
@@ -616,6 +877,9 @@
 was specified, but there were already buffers, files, or restrictions
 registered.
 .TP
+.B EEXIST
+The thread performing the registration is invalid.
+.TP
 .B EFAULT
 buffer is outside of the process' accessible address space, or
 .I iov_len
@@ -666,8 +930,7 @@
 .I nr_args
 file references would exceed the maximum allowed number of files the user
 is allowed to have according to the
-.B
-RLIMIT_NOFILE
+.B RLIMIT_NOFILE
 resource limit and the caller does not have
 .B CAP_SYS_RESOURCE
 capability. Note that this is a per user limit, not per process.
@@ -692,3 +955,26 @@
 .TP
 .B EOPNOTSUPP
 User buffers point to file-backed memory.
+.TP
+.B EFAULT
+User buffers point to file-backed memory (newer kernels).
+.TP
+.B ENOENT
+.B IORING_REGISTER_PBUF_STATUS
+was specified, but
+.I buf_group
+did not refer to a currently valid buffer group.
+.TP
+.B EINVAL
+.B IORING_REGISTER_PBUF_STATUS
+was specified, but the valid buffer group specified by
+.I buf_group
+did not refer to a buffer group registered via
+.BR IORING_REGISTER_PBUF_RING .
+.TP
+.B EINVAL
+.B IORING_REGISTER_NAPI
+was specified, but the ring associated with
+.I fd
+has not been created with
+.BR IORING_SETUP_IOPOLL .

diff --git a/man/io_uring_register_buf_ring.3 b/man/io_uring_register_buf_ring.3
index 9e520bf..213f648 100644
--- a/man/io_uring_register_buf_ring.3
+++ b/man/io_uring_register_buf_ring.3

@@ -55,21 +55,34 @@
 and the size of
 .IR "struct io_uring_buf" .
 .I ring_entries
-is the desired size of the ring, and must be a power-of-2 in size.
+is the desired size of the ring, and must be a power-of-2 in size. The maximum
+size allowed is 2^15 (32768).
 .I bgid
 is the buffer group ID associated with this ring. SQEs that select a buffer
-has a buffer group associated with them in their
+have a buffer group associated with them in their
 .I buf_group
-field, and the associated CQE will have
+field, and the associated CQEs will have
 .B IORING_CQE_F_BUFFER
 set in their
 .I flags
-member, which will also contain the specific ID of the buffer seleted. The rest
+member, which will also contain the specific ID of the buffer selected. The rest
 of the fields are reserved and must be cleared to zero.
 
 The
 .I flags
-argument is currently unused and must be set to zero.
+argument can be set to one of the following values:
+.TP
+.B IORING_REGISTER_PBUF_RING
+The buffers in this ring can be incrementally consumed. With partial
+consumption, each completion of a given buffer ID will continue where the
+previous one left off, or from the start if no completions have been seen yet.
+When more completions should be expected for a given buffer ID, the CQE will
+have
+.B IORING_CQE_F_BUF_MORE
+set in the
+.I flags
+member.
+.PP
 
 A shared buffer ring looks as follows:
 .PP
@@ -117,12 +130,12 @@
 
 Reserved fields must not be touched. Applications must use
 .BR io_uring_buf_ring_init (3)
-to initialise the buffer ring. Applications may use
+to initialise the buffer ring before use. Applications may use
 .BR io_uring_buf_ring_add (3)
 and
 .BR io_uring_buf_ring_advance (3)
 or
-.BR io_uring_buf_ring_advance (3)
+.BR io_uring_buf_ring_cq_advance (3)
 to provide buffers, which will set these fields and update the tail.
 
 Available since 5.19.
@@ -132,8 +145,13 @@
 .BR io_uring_register_buf_ring (3)
 returns 0. On failure it returns
 .BR -errno .
+.SH NOTES
+Unless manual setup is needed, it's recommended to use
+.BR io_uring_setup_buf_ring (3)
+as it provides a simpler way to setup a provided buffer ring.
 .SH SEE ALSO
 .BR io_uring_buf_ring_init (3),
 .BR io_uring_buf_ring_add (3),
+.BR io_uring_setup_buf_ring (3),
 .BR io_uring_buf_ring_advance (3),
 .BR io_uring_buf_ring_cq_advance (3)

diff --git a/man/io_uring_register_buffers.3 b/man/io_uring_register_buffers.3
index 656ac42..dfb292d 100644
--- a/man/io_uring_register_buffers.3
+++ b/man/io_uring_register_buffers.3

@@ -9,12 +9,23 @@
 .nf
 .B #include <liburing.h>
 .PP
-.BI "int io_uring_register_buffers(struct io_uring *" ring ",
-.BI "                              const struct iovec *" iovecs ",
+.BI "int io_uring_register_buffers(struct io_uring *" ring ","
+.BI "                              const struct iovec *" iovecs ","
 .BI "                              unsigned " nr_iovecs ");"
 .PP
-.BI "int io_uring_register_buffers_sparse(struct io_uring *" ring ",
-.BI "                              unsigned " nr_iovecs ");"
+.BI "int io_uring_register_buffers_tags(struct io_uring *" ring ","
+.BI "                                   const struct iovec *" iovecs ","
+.BI "                                   const __u64 *" tags ","
+.BI "                                   unsigned " nr ");"
+.PP
+.BI "int io_uring_register_buffers_sparse(struct io_uring *" ring ","
+.BI "                                     unsigned " nr_iovecs ");"
+.PP
+.BI "int io_uring_register_buffers_update_tag(struct io_uring *" ring ","
+.BI "                                         unsigned " off ","
+.BI "                                         const struct iovec *" iovecs ","
+.BI "                                         const __u64 *" tags ","
+.BI "                                         unsigned " nr ");"
 .fi
 .SH DESCRIPTION
 .PP
@@ -28,6 +39,16 @@
 .IR ring .
 
 The
+.BR io_uring_register_buffers_tags (3)
+function behaves the same as
+.BR io_uring_register_buffers (3)
+function but additionally takes
+.I tags
+parameter. See
+.B IORING_REGISTER_BUFFERS2
+for the resource tagging description.
+
+The
 .BR io_uring_register_buffers_sparse (3)
 function registers
 .I nr_iovecs
@@ -46,16 +67,39 @@
 every time IO is performed to that region. Additionally, it also avoids
 manipulating the page reference counts for each IO.
 
+The
+.BR io_uring_register_buffers_update_tag (3)
+function updates registered buffers with new ones, either turning a sparse
+entry into a real one, or replacing an existing entry. The
+.I off
+is offset on which to start the update
+.I nr
+number of buffers defined by the array
+.I iovecs
+belonging to the
+.IR ring .
+The
+.I tags
+points to an array of tags. See
+.B IORING_REGISTER_BUFFERS2
+for the resource tagging description.
+
 .SH RETURN VALUE
 On success
-.BR io_uring_register_buffers (3)
+.BR io_uring_register_buffers (3),
+.BR io_uring_register_buffers_tags (3)
 and
 .BR io_uring_register_buffers_sparse (3)
-return 0. On failure they return
+return 0.
+.BR io_uring_register_buffers_update_tag (3)
+return number of buffers updated.
+On failure they return
 .BR -errno .
 .SH SEE ALSO
+.BR io_uring_register (2),
 .BR io_uring_get_sqe (3),
 .BR io_uring_unregister_buffers (3),
+.BR io_uring_clone_buffers (3),
 .BR io_uring_register_buf_ring (3),
 .BR io_uring_prep_read_fixed (3),
 .BR io_uring_prep_write_fixed (3)

diff --git a/man/io_uring_register_buffers_sparse.3 b/man/io_uring_register_buffers_sparse.3
new file mode 120000
index 0000000..1019ce4
--- /dev/null
+++ b/man/io_uring_register_buffers_sparse.3

@@ -0,0 +1 @@
+io_uring_register_buffers.3
\ No newline at end of file

diff --git a/man/io_uring_register_buffers_tags.3 b/man/io_uring_register_buffers_tags.3
new file mode 120000
index 0000000..1019ce4
--- /dev/null
+++ b/man/io_uring_register_buffers_tags.3

@@ -0,0 +1 @@
+io_uring_register_buffers.3
\ No newline at end of file

diff --git a/man/io_uring_register_buffers_update_tag.3 b/man/io_uring_register_buffers_update_tag.3
new file mode 120000
index 0000000..1019ce4
--- /dev/null
+++ b/man/io_uring_register_buffers_update_tag.3

@@ -0,0 +1 @@
+io_uring_register_buffers.3
\ No newline at end of file

diff --git a/man/io_uring_register_clock.3 b/man/io_uring_register_clock.3
new file mode 100644
index 0000000..56ed1ac
--- /dev/null
+++ b/man/io_uring_register_clock.3

@@ -0,0 +1,72 @@
+.\" Copyright (C) 2024 Jens Axboe <[email protected]>
+.\"
+.\" SPDX-License-Identifier: LGPL-2.0-or-later
+.\"
+.TH io_uring_register_clock 3 "Aug 18, 2024" "liburing-2.8" "liburing Manual"
+.SH NAME
+io_uring_register_clock \- set clock source for event waiting
+.SH SYNOPSIS
+.nf
+.B #include <liburing.h>
+.PP
+.BI "int io_uring_register_clock(struct io_uring *" ring ",
+.BI "                            struct io_uring_clock_register *" arg ");"
+.fi
+.SH DESCRIPTION
+.PP
+The
+.BR io_uring_register_clock (3)
+function registers which clock source should be used by io_uring, when an
+application waits for event completions. The
+.IR ring
+argument should point to the ring in question, and the
+.IR arg
+argument should be a pointer to a
+.B struct io_uring_clock_register .
+
+The
+.IR arg
+argument must be filled in with the appropriate information. It looks as
+follows:
+.PP
+.in +4n
+.EX
+struct io_uring_clock_register {
+    __u32 clockid;
+    __u32 __resv[3];
+};
+.EE
+.in
+.PP
+The
+.I clockid
+field must contain the clock source, with valid sources being:
+.TP
+.B CLOCK_MONOTONIC
+a nonsettable system-wide clock that represents monotonic time.
+.TP
+.B CLOCK_BOOTTIME
+A nonsettable system-wide clock that is identical to
+.B CLOCK_MONOTONIC ,
+except that is also icnludes any time that the system is suspended.
+.PP
+See
+.BR clock_gettime (3)
+for more details.
+
+The
+.I __resv
+fields must be filled with zeroes.
+
+Available since 6.12.
+
+.SH RETURN VALUE
+On success
+.BR io_uring_register_clock (3)
+returns 0. On failure it returns
+.BR -errno .
+.SH SEE ALSO
+.BR clock_gettime (3),
+.BR io_uring_register (2),
+.BR io_uring_wait_cqe (3),
+.BR io_uring_wait_cqe_timeout (3),

diff --git a/man/io_uring_register_eventfd.3 b/man/io_uring_register_eventfd.3
index 5cbe72a..d7b23ee 100644
--- a/man/io_uring_register_eventfd.3
+++ b/man/io_uring_register_eventfd.3

@@ -44,7 +44,6 @@
 as a hint to check the CQ ring for completions.
 .SH RETURN VALUE
 Returns 0 on success, or
-or
 .BR -errno
 on error.
 .SH SEE ALSO

diff --git a/man/io_uring_register_file_alloc_range.3 b/man/io_uring_register_file_alloc_range.3
new file mode 100644
index 0000000..1afd41b
--- /dev/null
+++ b/man/io_uring_register_file_alloc_range.3

@@ -0,0 +1,52 @@
+.\" Copyright (C) 2022 Jens Axboe <[email protected]>
+.\"
+.\" SPDX-License-Identifier: LGPL-2.0-or-later
+.\"
+.TH io_uring_register_file_alloc_range 3 "Oct 21, 2022" "liburing-2.3" "liburing Manual"
+.SH NAME
+io_uring_register_file_alloc_range \- set range for fixed file allocations
+.SH SYNOPSIS
+.nf
+.B #include <liburing.h>
+.PP
+.BI "int io_uring_register_file_alloc_range(struct io_uring *" ring ",
+.BI "                                       unsigned " off ","
+.BI "                                       unsigned " len ");"
+.BI "
+.fi
+.SH DESCRIPTION
+.PP
+The
+.BR io_uring_register_file_alloc_range (3)
+function sets the allowable range for fixed file index allocations within the
+kernel. When requests that can instantiate a new fixed file are used with
+.B IORING_FILE_INDEX_ALLOC ,
+the application is asking the kernel to allocate a new fixed file descriptor
+rather than pass in a specific value for one. By default, the kernel will
+pick any available fixed file descriptor within the range available. Calling
+this function with
+.I off
+set to the starting offset and
+.I len
+set to the number of descriptors, the application can limit the allocated
+descriptors to that particular range. This effectively allows the application
+to set aside a range just for dynamic allocations, with the remainder being
+used for specific values.
+
+The application must have registered a fixed file table upfront, eg through
+.BR io_uring_register_files (3)
+or
+.BR io_uring_register_files_sparse (3) .
+
+Available since 6.0.
+
+.SH RETURN VALUE
+On success
+.BR io_uring_register_buf_ring (3)
+returns 0. On failure it returns
+.BR -errno .
+.SH SEE ALSO
+.BR io_uring_register_files (3)
+.BR io_uring_prep_accept_direct (3)
+.BR io_uring_prep_openat_direct (3)
+.BR io_uring_prep_socket_direct (3)

diff --git a/man/io_uring_register_files.3 b/man/io_uring_register_files.3
index 0a9ccc3..c7f73e9 100644
--- a/man/io_uring_register_files.3
+++ b/man/io_uring_register_files.3

@@ -13,8 +13,24 @@
 .BI "                            const int *" files ","
 .BI "                            unsigned " nr_files ");"
 .PP
+.BI "int io_uring_register_files_tags(struct io_uring *" ring ","
+.BI "                                 const int *" files ","
+.BI "                                 const __u64 *" tags ","
+.BI "                                 unsigned " nr ");"
+.PP
 .BI "int io_uring_register_files_sparse(struct io_uring *" ring ","
-.BI "                            unsigned " nr_files ");"
+.BI "                                   unsigned " nr_files ");"
+.PP
+.BI "int io_uring_register_files_update(struct io_uring *" ring ","
+.BI "                                   unsigned " off ","
+.BI "                                   const int *" files ","
+.BI "                                   unsigned " nr_files ");"
+.PP
+.BI "int io_uring_register_files_update_tag(struct io_uring *" ring ","
+.BI "                                   unsigned " off ","
+.BI "                                   const int *" files ","
+.BI "                                   const __u64 *" tags ","
+.BI "                                   unsigned " nr_files ");"
 .fi
 .SH DESCRIPTION
 .PP
@@ -29,22 +45,76 @@
 for subsequent operations.
 
 The
+.BR io_uring_register_files_tags (3)
+function behaves the same as
+.BR io_uring_register_files (3)
+function but additionally takes
+.I tags
+parameter. See
+.B IORING_REGISTER_BUFFERS2
+for the resource tagging description.
+
+The
 .BR io_uring_register_files_sparse (3)
 function registers an empty file table of
 .I nr_files
-number of file descriptors. The sparse variant is available in kernels 5.19
-and later.
+number of file descriptors. These files must be updated before use, using eg
+.BR io_uring_register_files_update_tag (3).
+Note that if the size of the sparse table exceeds what
+.B RLIMIT_NOFILE
+allows, then
+.BR io_uring_register_files_sparse (3)
+will attempt to raise the limit using
+.B setrlimit (2)
+and retry the operation. If the registration fails after doing that, then an
+error will be returned.
+The sparse variant is available in kernels 5.19 and later.
 
 Registering a file table is a prerequisite for using any request that uses
 direct descriptors.
 
+Registered files have less overhead per operation than normal files. This
+is due to the kernel grabbing a reference count on a file when an operation
+begins, and dropping it when it's done. When the process file table is
+shared, for example if the process has ever created any threads, then this
+cost goes up even more. Using registered files reduces the overhead of
+file reference management across requests that operate on a file.
+
+The
+.BR io_uring_register_files_update (3)
+function updates existing registered files. The
+.I off
+is offset on which to start the update
+.I nr_files
+number of files defined by the array
+.I files
+belonging to the
+.IR ring .
+
+The
+.BR io_uring_register_files_update_tag (3)
+function behaves the same as
+.BR io_uring_register_files_update (3)
+function but additionally takes
+.I tags
+parameter. See
+.B IORING_REGISTER_BUFFERS2
+for the resource tagging description.
+
 .SH RETURN VALUE
 On success
-.BR io_uring_register_files (3)
+.BR io_uring_register_files (3),
+.BR io_uring_register_files_tags (3)
 and
 .BR io_uring_register_files_sparse (3)
-return 0. On failure they return
+return 0.
+.BR io_uring_register_files_update (3)
+and
+.BR io_uring_register_files_update_tag (3)
+return number of files updated.
+On failure they return
 .BR -errno .
 .SH SEE ALSO
+.BR io_uring_register (2),
 .BR io_uring_get_sqe (3),
 .BR io_uring_unregister_files (3)

diff --git a/man/io_uring_register_files_sparse.3 b/man/io_uring_register_files_sparse.3
new file mode 120000
index 0000000..db38b93
--- /dev/null
+++ b/man/io_uring_register_files_sparse.3

@@ -0,0 +1 @@
+io_uring_register_files.3
\ No newline at end of file

diff --git a/man/io_uring_register_files_tags.3 b/man/io_uring_register_files_tags.3
new file mode 120000
index 0000000..db38b93
--- /dev/null
+++ b/man/io_uring_register_files_tags.3

@@ -0,0 +1 @@
+io_uring_register_files.3
\ No newline at end of file

diff --git a/man/io_uring_register_files_update.3 b/man/io_uring_register_files_update.3
new file mode 120000
index 0000000..db38b93
--- /dev/null
+++ b/man/io_uring_register_files_update.3

@@ -0,0 +1 @@
+io_uring_register_files.3
\ No newline at end of file

diff --git a/man/io_uring_register_files_update_tag.3 b/man/io_uring_register_files_update_tag.3
new file mode 120000
index 0000000..db38b93
--- /dev/null
+++ b/man/io_uring_register_files_update_tag.3

@@ -0,0 +1 @@
+io_uring_register_files.3
\ No newline at end of file

diff --git a/man/io_uring_register_iowq_aff.3 b/man/io_uring_register_iowq_aff.3
index e782914..686222c 100644
--- a/man/io_uring_register_iowq_aff.3
+++ b/man/io_uring_register_iowq_aff.3

@@ -38,6 +38,12 @@
 .BR io_uring_unregister_iowq_aff (3)
 may be called to restore CPU affinities to the default.
 
+Applications must define
+.B _GNU_SOURCE
+to obtain the definition of this helper, as
+.I cpu_set_t
+will not be defined without it.
+
 .SH RETURN VALUE
 Returns
 .B 0

diff --git a/man/io_uring_register_napi.3 b/man/io_uring_register_napi.3
new file mode 100644
index 0000000..6ce8cff
--- /dev/null
+++ b/man/io_uring_register_napi.3

@@ -0,0 +1,40 @@
+.\" Copyright (C) 2022 Stefan Roesch <[email protected]>
+.\"
+.\" SPDX-License-Identifier: LGPL-2.0-or-later
+.\"
+.TH io_uring_register_napi 3 "November 16, 2022" "liburing-2.4" "liburing Manual"
+.SH NAME
+io_uring_register_napi \- register NAPI busy poll settings
+.SH SYNOPSIS
+.nf
+.B #include <liburing.h>
+.PP
+.BI "int io_uring_register_napi(struct io_uring *" ring ","
+.BI "                           struct io_uring_napi *" napi)
+.PP
+.fi
+.SH DESCRIPTION
+.PP
+The
+.BR io_uring_register_napi (3)
+function registers the NAPI settings for subsequent operations. The NAPI
+settings are specified in the structure that is passed in the
+.I napi
+parameter. The structure consists of the napi timeout
+.I busy_poll_to
+(napi busy poll timeout in us) and
+.IR prefer_busy_poll .
+
+Registering a NAPI settings sets the mode when calling the function
+napi_busy_loop and corresponds to the SO_PREFER_BUSY_POLL socket
+option.
+
+NAPI busy poll can reduce the network roundtrip time.
+
+
+.SH RETURN VALUE
+On success
+.BR io_uring_register_napi (3)
+return 0. On failure they return
+.BR -errno .
+It also updates the napi structure with the current values.

diff --git a/man/io_uring_register_ring_fd.3 b/man/io_uring_register_ring_fd.3
index e70c551..0284fe4 100644
--- a/man/io_uring_register_ring_fd.3
+++ b/man/io_uring_register_ring_fd.3

@@ -32,6 +32,8 @@
 this function to register the ring descriptor when a ring is set up. See NOTES
 for restrictions when a ring is shared.
 
+Available since kernel 5.18.
+
 .SH NOTES
 When the ring descriptor is registered, it is stored internally in the
 .I struct io_uring

diff --git a/man/io_uring_register_sync_cancel.3 b/man/io_uring_register_sync_cancel.3
new file mode 100644
index 0000000..18fcf99
--- /dev/null
+++ b/man/io_uring_register_sync_cancel.3

@@ -0,0 +1,71 @@
+.\" Copyright (C) 2022 Jens Axboe <[email protected]>
+.\"
+.\" SPDX-License-Identifier: LGPL-2.0-or-later
+.\"
+.TH io_uring_register_sync_cancel 3 "September 21, 2022" "liburing-2.3" "liburing Manual"
+.SH NAME
+io_uring_register_sync_cancel \- issue a synchronous cancelation request
+.SH SYNOPSIS
+.nf
+.B #include <liburing.h>
+.PP
+.BI "int io_uring_register_sync_cancel(struct io_uring *" ring ",
+.BI "                                  struct io_uring_sync_cancel_reg *" reg ");
+.PP
+.SH DESCRIPTION
+.PP
+The
+.BR io_uring_register_sync_cancel (3)
+function performs a synchronous cancelation request based on the parameters
+specified in
+.I reg .
+
+The
+.I reg
+argument must be filled in with the appropriate information for the
+cancelation request. It looks as follows:
+.PP
+.in +4n
+.EX
+struct io_uring_sync_cancel_reg {
+    __u64 addr;
+    __s32 fd;
+    __u32 flags;
+    struct __kernel_timespec timeout;
+    __u64 pad[4];
+};
+.EE
+.in
+.PP
+
+The arguments largely mirror what the async prep functions support, see
+.BR io_uring_prep_cancel (3)
+for details. Similarly, the return value is the same. The exception is the
+.I timeout
+argument, which can be used to limit the time that the kernel will wait for
+cancelations to be successful. If the
+.I tv_sec
+and
+.I tv_nsec
+values are set to anything but
+.B -1UL ,
+then they indicate a relative timeout upon which cancelations should be
+completed by.
+
+The
+.I pad
+values must be zero filled.
+
+.SH RETURN VALUE
+See
+.BR io_uring_prep_cancel (3)
+for details on the return value. If
+.I timeout
+is set to indicate a timeout, then
+.B -ETIME
+will be returned if exceeded. If an unknown value is set in the request,
+or if the pad values are not cleared to zero, then
+.I -EINVAL
+is returned.
+.SH SEE ALSO
+.BR io_uring_prep_cancel (3)

diff --git a/man/io_uring_setup.2 b/man/io_uring_setup.2
index 75c69ff..2f87783 100644
--- a/man/io_uring_setup.2
+++ b/man/io_uring_setup.2

@@ -4,20 +4,22 @@
 .\"
 .\" SPDX-License-Identifier: LGPL-2.0-or-later
 .\"
-.TH IO_URING_SETUP 2 2019-01-29 "Linux" "Linux Programmer's Manual"
+.TH io_uring_setup 2 2019-01-29 "Linux" "Linux Programmer's Manual"
 .SH NAME
 io_uring_setup \- setup a context for performing asynchronous I/O
 .SH SYNOPSIS
 .nf
-.BR "#include <linux/io_uring.h>"
+.BR "#include <liburing.h>"
 .PP
 .BI "int io_uring_setup(u32 " entries ", struct io_uring_params *" p );
 .fi
 .PP
 .SH DESCRIPTION
 .PP
-The io_uring_setup() system call sets up a submission queue (SQ) and
-completion queue (CQ) with at least
+The
+.BR io_uring_setup (2)
+system call sets up a submission queue (SQ) and completion queue (CQ) with at
+least
 .I entries
 entries, and returns a file descriptor which can be used to perform
 subsequent operations on the io_uring instance.  The submission and
@@ -69,6 +71,14 @@
 It is illegal to mix and match polled and non-polled I/O on an io_uring
 instance.
 
+This is only applicable for storage devices for now, and the storage device
+must be configured for polling. How to do that depends on the device type
+in question. For NVMe devices, the nvme driver must be loaded with the
+.I poll_queues
+parameter set to the desired number of polling queues. The polling queues
+will be shared appropriately between the CPUs in the system, if the number
+is less than the number of online CPU threads.
+
 .TP
 .B IORING_SETUP_SQPOLL
 When this flag is specified, a kernel thread is created to perform
@@ -117,20 +127,34 @@
 described below.
 .TP
 .BR
+Note that, when using a ring setup with
+.BR IORING_SETUP_SQPOLL ,
+you never directly call the
+.BR io_uring_enter (2)
+system call. That is usually taken care of by liburing's
+.BR io_uring_submit (3)
+function. It automatically determines if you are using
+polling mode or not and deals with when your program needs to call
+.BR io_uring_enter (2)
+without you having to bother about it.
+.TP
+.BR
 Before version 5.11 of the Linux kernel, to successfully use this feature, the
 application must register a set of files to be used for IO through
 .BR io_uring_register (2)
 using the
 .B IORING_REGISTER_FILES
 opcode. Failure to do so will result in submitted IO being errored with
-.B EBADF.
+.BR EBADF .
 The presence of this feature can be detected by the
 .B IORING_FEAT_SQPOLL_NONFIXED
 feature flag.
 In version 5.11 and later, it is no longer necessary to register files to use
 this feature. 5.11 also allows using this as non-root, if the user has the
 .B CAP_SYS_NICE
-capability.
+capability. In 5.13 this requirement was also relaxed, and no special privileges
+are needed for SQPOLL in newer kernels. Certain stable kernels older than 5.13
+may also support unprivileged SQPOLL.
 .TP
 .B IORING_SETUP_SQ_AFF
 If this flag is specified, then the poll thread will be bound to the
@@ -147,7 +171,7 @@
 .TP
 .B IORING_SETUP_CQSIZE
 Create the completion queue with
-.IR "struct io_uring_params.cq_entries"
+.I "struct io_uring_params.cq_entries"
 entries.  The value must be greater than
 .IR entries ,
 and may be rounded up to the next power-of-two.
@@ -156,27 +180,30 @@
 If this flag is specified, and if
 .IR entries
 exceeds
-.B IORING_MAX_ENTRIES ,
+.BR IORING_MAX_ENTRIES ,
 then
-.IR entries
+.I entries
 will be clamped at
-.B IORING_MAX_ENTRIES .
+.BR IORING_MAX_ENTRIES .
 If the flag
-.BR IORING_SETUP_SQPOLL
+.B IORING_SETUP_CQSIZE
 is set, and if the value of
-.IR "struct io_uring_params.cq_entries"
+.I "struct io_uring_params.cq_entries"
 exceeds
-.B IORING_MAX_CQ_ENTRIES ,
+.BR IORING_MAX_CQ_ENTRIES ,
 then it will be clamped at
-.B IORING_MAX_CQ_ENTRIES .
+.BR IORING_MAX_CQ_ENTRIES .
 .TP
 .B IORING_SETUP_ATTACH_WQ
 This flag should be set in conjunction with
-.IR "struct io_uring_params.wq_fd"
+.I "struct io_uring_params.wq_fd"
 being set to an existing io_uring ring file descriptor. When set, the
 io_uring instance being created will share the asynchronous worker
 thread backend of the specified io_uring ring, rather than create a new
-separate thread pool.
+separate thread pool. Additionally the sq polling thread will be shared,
+if
+.B IORING_SETUP_SQPOLL
+is set.
 .TP
 .B IORING_SETUP_R_DISABLED
 If this flag is specified, the io_uring ring starts in a disabled state.
@@ -186,7 +213,7 @@
 for details on how to enable the ring. Available since 5.10.
 .TP
 .B IORING_SETUP_SUBMIT_ALL
-Normally io_uring stops submitting a batch of request, if one of these requests
+Normally io_uring stops submitting a batch of requests, if one of these requests
 results in an error. This can cause submission of less than what is expected,
 if a request ends in error while being submitted. If the ring is created with
 this flag,
@@ -212,9 +239,9 @@
 .TP
 .B IORING_SETUP_TASKRUN_FLAG
 Used in conjunction with
-.B IORING_SETUP_COOP_TASKRUN,
+.BR IORING_SETUP_COOP_TASKRUN ,
 this provides a flag,
-.B IORING_SQ_TASKRUN,
+.BR IORING_SQ_TASKRUN ,
 which is set in the SQ ring
 .I flags
 whenever completions are pending that should be processed. liburing will check
@@ -234,11 +261,90 @@
 passthrough command for NVMe passthrough needs this. Available since 5.19.
 .TP
 .B IORING_SETUP_CQE32
-If set, io_uring will use 32-byte CQEs rather than the normal 32-byte sized
+If set, io_uring will use 32-byte CQEs rather than the normal 16-byte sized
 variant. This is a requirement for using certain request types, as of 5.19
 only the
 .B IORING_OP_URING_CMD
 passthrough command for NVMe passthrough needs this. Available since 5.19.
+.TP
+.B IORING_SETUP_SINGLE_ISSUER
+A hint to the kernel that only a single task (or thread) will submit requests, which is
+used for internal optimisations. The submission task is either the task that created the
+ring, or if
+.B IORING_SETUP_R_DISABLED
+is specified then it is the task that enables the ring through
+.BR io_uring_register (2) .
+The kernel enforces this rule, failing requests with
+.B -EEXIST
+if the restriction is violated.
+Note that when
+.B IORING_SETUP_SQPOLL
+is set it is considered that the polling task is doing all submissions
+on behalf of the userspace and so it always complies with the rule disregarding
+how many userspace tasks do
+.BR io_uring_enter (2).
+Available since 6.0.
+.TP
+.B IORING_SETUP_DEFER_TASKRUN
+By default, io_uring will process all outstanding work at the end of any system
+call or thread interrupt. This can delay the application from making other progress.
+Setting this flag will hint to io_uring that it should defer work until an
+.BR io_uring_enter (2)
+call with the 
+.B IORING_ENTER_GETEVENTS
+flag set. This allows the application to request work to run just before it wants to
+process completions.
+This flag requires the
+.B IORING_SETUP_SINGLE_ISSUER
+flag to be set, and also enforces that the call to
+.BR io_uring_enter (2)
+is called from the same thread that submitted requests.
+Note that if this flag is set then it is the application's responsibility to periodically
+trigger work (for example via any of the CQE waiting functions) or else completions may
+not be delivered.
+Available since 6.1.
+.TP
+.B IORING_SETUP_NO_MMAP
+By default, io_uring allocates kernel memory that callers must subsequently
+.BR mmap (2).
+If this flag is set, io_uring instead uses caller-allocated buffers;
+.I p->cq_off.user_addr
+must point to the memory for the sq/cq rings, and
+.I p->sq_off.user_addr
+must point to the memory for the sqes.
+Each allocation must be contiguous memory.
+Typically, callers should allocate this memory by using
+.BR mmap (2)
+to allocate a huge page.
+If this flag is set, a subsequent attempt to
+.BR mmap (2)
+the io_uring file descriptor will fail.
+Available since 6.5.
+.TP
+.B IORING_SETUP_REGISTERED_FD_ONLY
+If this flag is set, io_uring will register the ring file descriptor, and
+return the registered descriptor index, without ever allocating an unregistered
+file descriptor. The caller will need to use
+.B IORING_REGISTER_USE_REGISTERED_RING
+when calling
+.BR io_uring_register (2).
+This flag only makes sense when used alongside with
+.BR IORING_SETUP_NO_MMAP ,
+which also needs to be set.
+Available since 6.5.
+
+.TP
+.B IORING_SETUP_NO_SQARRAY
+If this flag is set, entries in the submission queue will be submitted in order,
+wrapping around to the first entry after reaching the end of the queue. In other
+words, there will be no more indirection via the array of submission entries,
+and the queue will be indexed directly by the submission queue tail and the
+range of indexed represented by it modulo queue size. Subsequently, the user
+should not map the array of submission queue entries, and the corresponding
+offset in
+.I struct io_sqring_offsets
+will be set to zero. Available since 6.6.
+
 .PP
 If no flags are specified, the io_uring instance is setup for
 interrupt driven I/O.  I/O may be submitted using
@@ -255,21 +361,30 @@
 .TP
 .B IORING_FEAT_SINGLE_MMAP
 If this flag is set, the two SQ and CQ rings can be mapped with a single
-.I mmap(2)
+.BR mmap (2)
 call. The SQEs must still be allocated separately. This brings the necessary
-.I mmap(2)
+.BR mmap (2)
 calls down from three to two. Available since kernel 5.4.
 .TP
 .B IORING_FEAT_NODROP
-If this flag is set, io_uring supports never dropping completion events.
-If a completion event occurs and the CQ ring is full, the kernel stores
-the event internally until such a time that the CQ ring has room for more
-entries. If this overflow condition is entered, attempting to submit more
-IO will fail with the
+If this flag is set, io_uring supports almost never dropping completion events.
+A dropped event can only occur if the kernel runs out of memory, in which case
+you have worse problems than a lost event. Your application and others will
+likely get OOM killed anyway. If a completion event occurs and the CQ ring is
+full, the kernel stores the event internally until such a time that the CQ ring
+has room for more entries. In earlier kernels, if this overflow condition is
+entered, attempting to submit more IO would fail with the
 .B -EBUSY
 error value, if it can't flush the overflown events to the CQ ring. If this
 happens, the application must reap events from the CQ ring and attempt the
-submit again. Available since kernel 5.5.
+submit again. If the kernel has no free memory to store the event internally
+it will be visible by an increase in the overflow value on the cqring.
+Available since kernel 5.5. Additionally
+.BR io_uring_enter (2)
+will return
+.B -EBADR
+the next time it would otherwise sleep waiting for completions (since kernel 5.19).
+
 .TP
 .B IORING_FEAT_SUBMIT_STABLE
 If this flag is set, applications can be certain that any data for
@@ -282,19 +397,18 @@
 ==
 .B -1
 with
-.B IORING_OP_{READV,WRITEV}
-,
-.B IORING_OP_{READ,WRITE}_FIXED
-, and
+.BR IORING_OP_{READV,WRITEV} ,
+.BR IORING_OP_{READ,WRITE}_FIXED ,
+and
 .B IORING_OP_{READ,WRITE}
 to mean current file position, which behaves like
-.I preadv2(2)
+.BR preadv2 (2)
 and
-.I pwritev2(2)
+.BR pwritev2 (2)
 with
 .I offset
 ==
-.B -1.
+.BR -1 .
 It'll use (and update) the current file position. This obviously comes
 with the caveat that if the application has multiple reads or writes in flight,
 then the end result will not be as expected. This is similar to threads sharing
@@ -304,15 +418,13 @@
 .B IORING_FEAT_CUR_PERSONALITY
 If this flag is set, then io_uring guarantees that both sync and async
 execution of a request assumes the credentials of the task that called
-.I
-io_uring_enter(2)
+.BR io_uring_enter (2)
 to queue the requests. If this flag isn't set, then requests are issued with
 the credentials of the task that originally registered the io_uring. If only
 one task is using a ring, then this flag doesn't matter as the credentials
 will always be the same. Note that this is the default behavior, tasks can
 still register different personalities through
-.I
-io_uring_register(2)
+.BR io_uring_register (2)
 with
 .B IORING_REGISTER_PERSONALITY
 and specify the personality to use in the sqe. Available since kernel 5.6.
@@ -342,16 +454,16 @@
 can be used for IO commands without needing registration. Available since
 kernel 5.11.
 .TP
-.B IORING_FEAT_ENTER_EXT_ARG
+.B IORING_FEAT_EXT_ARG
 If this flag is set, then the
 .BR io_uring_enter (2)
 system call supports passing in an extended argument instead of just the
-.IR "sigset_t"
+.I "sigset_t"
 of earlier kernels. This.
 extended argument is of type
-.IR "struct io_uring_getevents_arg"
+.I "struct io_uring_getevents_arg"
 and allows the caller to pass in both a
-.IR "sigset_t"
+.I sigset_t
 and a timeout argument for waiting on events. The struct layout is as follows:
 .TP
 .in +8n
@@ -392,14 +504,34 @@
 .B IORING_FEAT_LINKED_FILE
 If this flag is set, then io_uring supports sane assignment of files for SQEs
 that have dependencies. For example, if a chain of SQEs are submitted with
-.B IOSQE_IO_LINK,
+.BR IOSQE_IO_LINK ,
 then kernels without this flag will prepare the file for each link upfront.
 If a previous link opens a file with a known index, eg if direct descriptors
 are used with open or accept, then file assignment needs to happen post
 execution of that SQE. If this flag is set, then the kernel will defer
 file assignment until execution of a given request is started. Available since
 kernel 5.17.
+.TP
+.B IORING_FEAT_REG_REG_RING
+If this flag is set, then io_uring supports calling
+.BR io_uring_register (2)
+using a registered ring fd, via
+.BR IORING_REGISTER_USE_REGISTERED_RING .
+Available since kernel 6.3.
+.TP
+.B IORING_FEAT_MIN_TIMEOUT
+If this flag is set, then io_uring supports passing in a minimum batch wait
+timeout. See
+.BR io_uring_submit_and_wait_min_timeout (3)
+for more details.
 
+.TP
+.B IORING_FEAT_RECVSEND_BUNDLE
+If this flag is set, then io_uring supports bundled send and recv operations.
+See
+.BR io_uring_prep_send_bundle (3)
+for more information. Also implies support for provided buffers in send
+operations.
 .PP
 The rest of the fields in the
 .I struct io_uring_params
@@ -421,7 +553,8 @@
     __u32 flags;
     __u32 dropped;
     __u32 array;
-    __u32 resv[3];
+    __u32 resv1;
+    __u64 user_addr;
 };
 .EE
 .in
@@ -452,7 +585,7 @@
 .BR io_uring_setup (2).
 The addition of
 .I sq_off.array
-to the length of the region accounts for the fact that the ring
+to the length of the region accounts for the fact that the ring is
 located at the end of the data structure.  As an example, the ring
 buffer head pointer can be accessed by adding
 .I sq_off.head
@@ -517,7 +650,8 @@
     __u32 overflow;
     __u32 cqes;
     __u32 flags;
-    __u32 resv[3];
+    __u32 resv1;
+    __u64 user_addr;
 };
 .EE
 .in
@@ -535,7 +669,9 @@
 .PP
 Closing the file descriptor returned by
 .BR io_uring_setup (2)
-will free all resources associated with the io_uring context.
+will free all resources associated with the io_uring context. Note that this
+may happen asynchronously within the kernel, so it is not guaranteed that
+resources are freed immediately.
 .PP
 .SH RETURN VALUE
 .BR io_uring_setup (2)
@@ -548,16 +684,15 @@
 .BR io_uring_enter (2)
 system calls.
 
-On error,
-.B -1
-is returned and
+On error, a negative error code is returned. The caller should not rely on
 .I errno
-is set appropriately.
+variable.
 .PP
 .SH ERRORS
 .TP
 .B EFAULT
-params is outside your accessible address space.
+.I params
+is outside your accessible address space.
 .TP
 .B EINVAL
 The resv array contains non-zero data, p.flags contains an unsupported
@@ -572,6 +707,10 @@
 was specified, but
 .I io_uring_params.cq_entries
 was invalid.
+.B IORING_SETUP_REGISTERED_FD_ONLY
+was specified, but
+.B IORING_SETUP_NO_MMAP
+was not.
 .TP
 .B EMFILE
 The per-process limit on the number of open file descriptors has been
@@ -591,6 +730,20 @@
 .B IORING_SETUP_SQPOLL
 was specified, but the effective user ID of the caller did not have sufficient
 privileges.
+.TP
+.B EPERM
+.I /proc/sys/kernel/io_uring_disabled
+has the value 2, or it has the value 1 and the calling process does not hold the
+.B CAP_SYS_ADMIN
+capability or is not a member of
+.IR /proc/sys/kernel/io_uring_group .
+.TP
+.B ENXIO
+.B IORING_SETUP_ATTACH_WQ
+was set, but
+.I params.wq_fd
+did not refer to an io_uring instance or refers to an instance that is in the
+process of shutting down.
 .SH SEE ALSO
 .BR io_uring_register (2),
 .BR io_uring_enter (2)

diff --git a/man/io_uring_setup_buf_ring.3 b/man/io_uring_setup_buf_ring.3
new file mode 100644
index 0000000..df3b3af
--- /dev/null
+++ b/man/io_uring_setup_buf_ring.3

@@ -0,0 +1,94 @@
+.\" Copyright (C) 2022 Jens Axboe <[email protected]>
+.\"
+.\" SPDX-License-Identifier: LGPL-2.0-or-later
+.\"
+.TH io_uring_setup_buf_ring 3 "Mar 07, 2023" "liburing-2.4" "liburing Manual"
+.SH NAME
+io_uring_setup_buf_ring \- setup and register buffer ring for provided buffers
+.SH SYNOPSIS
+.nf
+.B #include <liburing.h>
+.PP
+.BI "struct io_uring_buf_ring *io_uring_setup_buf_ring(struct io_uring *" ring ",
+.BI "                            unsigned int " nentries ",
+.BI "                            int " bgid ",
+.BI "                            unsigned int " flags ",
+.BI "                            int *" err ");"
+.BI "
+.fi
+.SH DESCRIPTION
+.PP
+The
+.BR io_uring_setup_buf_ring (3)
+function registers a shared buffer ring to be used with provided buffers. For
+the request types that support it, provided buffers are given to the ring and
+one is selected by a request if it has
+.B IOSQE_BUFFER_SELECT
+set in the SQE
+.IR flags ,
+when the request is ready to receive data. This allows both clear ownership
+of the buffer lifetime, and a way to have more read/receive type of operations
+in flight than buffers available.
+
+The
+.I ring
+argument must pointer to the ring for which the provided buffer ring is being
+registered,
+.I nentries
+is the number of entries requested in the buffer ring. This argument must be
+a power-of 2 in size.
+.I bgid
+is the chosen buffer group ID,
+.I flags
+are modifier flags for the operation, and
+.I *err
+is is a pointer to an integer for the error value if any part of the ring
+allocation and registration fails.
+
+The
+.I flags
+argument can be set to one of the following values:
+.TP
+.B IORING_REGISTER_PBUF_RING
+The buffers in this ring can be incrementally consumed. With partial
+consumption, each completion of a given buffer ID will continue where the
+previous one left off, or from the start if no completions have been seen yet.
+When more completions should be expected for a given buffer ID, the CQE will
+have
+.B IORING_CQE_F_BUF_MORE
+set in the
+.I flags
+member.
+.PP
+
+Under the covers, this function uses
+.BR io_uring_register_buf_ring (3)
+to register the ring, and handles the allocation of the ring rather than
+letting the application open code it.
+
+To unregister and free a buffer group ID setup with this function, the
+application must call
+.BR io_uring_free_buf_ring (3) .
+
+Available since 5.19.
+
+.SH RETURN VALUE
+On success
+.BR io_uring_setup_buf_ring (3)
+returns a pointer to the buffer ring. On failure it returns
+.BR NULL
+and sets
+.I *err
+to -errno.
+.SH NOTES
+Note that even if the kernel supports this feature, registering a provided
+buffer ring may still fail with
+.B -EINVAL
+if the host is a 32-bit architecture and the memory being passed in resides in
+high memory.
+.SH SEE ALSO
+.BR io_uring_register_buf_ring (3),
+.BR io_uring_buf_ring_init (3),
+.BR io_uring_buf_ring_add (3),
+.BR io_uring_buf_ring_advance (3),
+.BR io_uring_buf_ring_cq_advance (3)

diff --git a/man/io_uring_sq_ready.3 b/man/io_uring_sq_ready.3
index 9927388..ba155b3 100644
--- a/man/io_uring_sq_ready.3
+++ b/man/io_uring_sq_ready.3

@@ -2,7 +2,7 @@
 .\"
 .\" SPDX-License-Identifier: LGPL-2.0-or-later
 .\"
-.TH io_uring_sq_ready "January 25, 2022" "liburing-2.1" "liburing Manual"
+.TH io_uring_sq_ready 3 "January 25, 2022" "liburing-2.1" "liburing Manual"
 .SH NAME
 io_uring_sq_ready \- number of unconsumed or unsubmitted entries in the SQ ring
 .SH SYNOPSIS
@@ -15,7 +15,7 @@
 .PP
 The
 .BR io_uring_sq_ready (3)
-function retuns the number of unconsumed (if SQPOLL) or unsubmitted entries
+function returns the number of unconsumed (if SQPOLL) or unsubmitted entries
 that exist in the SQ ring belonging to the
 .I ring
 param.

diff --git a/man/io_uring_sq_space_left.3 b/man/io_uring_sq_space_left.3
index b5b2e21..6fd04c4 100644
--- a/man/io_uring_sq_space_left.3
+++ b/man/io_uring_sq_space_left.3

@@ -2,7 +2,7 @@
 .\"
 .\" SPDX-License-Identifier: LGPL-2.0-or-later
 .\"
-.TH io_uring_sq_space-left "January 25, 2022" "liburing-2.1" "liburing Manual"
+.TH io_uring_sq_space-left 3 "January 25, 2022" "liburing-2.1" "liburing Manual"
 .SH NAME
 io_uring_sq_space_left \- free space in the SQ ring
 .SH SYNOPSIS
@@ -15,7 +15,7 @@
 .PP
 The
 .BR io_uring_sq_space_left (3)
-function retuns how much space is left in the SQ ring belonging to the
+function returns how much space is left in the SQ ring belonging to the
 .I ring
 param.
 

diff --git a/man/io_uring_sqe_set_data.3 b/man/io_uring_sqe_set_data.3
index 274a892..0a91f57 100644
--- a/man/io_uring_sqe_set_data.3
+++ b/man/io_uring_sqe_set_data.3

@@ -41,6 +41,15 @@
 can be called to retrieve the data pointer or value associated with the
 submitted request.
 
+Note that if neither of these functions are called, or the
+.I user_data
+field in the
+.IR sqe
+isn't set manually either, then the field may contain a value from a previous
+use of this sqe. If an application relies on always having a valid
+.I user_data
+value present, it must always assign one to each sqe.
+
 .SH RETURN VALUE
 None
 .SH SEE ALSO

diff --git a/man/io_uring_sqe_set_flags.3 b/man/io_uring_sqe_set_flags.3
index 75e836b..ab0bb8e 100644
--- a/man/io_uring_sqe_set_flags.3
+++ b/man/io_uring_sqe_set_flags.3

@@ -2,7 +2,7 @@
 .\"
 .\" SPDX-License-Identifier: LGPL-2.0-or-later
 .\"
-.TH io_uring_sqe_set_flags "January 25, 2022" "liburing-2.1" "liburing Manual"
+.TH io_uring_sqe_set_flags 3 "January 25, 2022" "liburing-2.1" "liburing Manual"
 .SH NAME
 io_uring_sqe_set_flags \- set flags for submission queue entry
 .SH SYNOPSIS
@@ -37,7 +37,8 @@
 always (or most of the time) block, the application can ask for an sqe to be
 issued async from the start. Note that this flag immediately causes the SQE
 to be offloaded to an async helper thread with no initial non-blocking attempt.
-This may be less efficient and should not be used sporadically.
+This may be less efficient and should not be used liberally or without
+understanding the performance and efficiency tradeoffs.
 .TP
 .B IOSQE_IO_LINK
 When this flag is specified, the SQE forms a link with the next SQE in the
@@ -65,7 +66,7 @@
 .B IOSQE_CQE_SKIP_SUCCESS
 Request that no CQE be generated for this request, if it completes successfully.
 This can be useful in cases where the application doesn't need to know when
-a specific request completed, if it completed succesfully.
+a specific request completed, if it completed successfully.
 .TP
 .B IOSQE_BUFFER_SELECT
 If set, and if the request types supports it, select an IO buffer from the

diff --git a/man/io_uring_sqring_wait.3 b/man/io_uring_sqring_wait.3
index d70cf40..4d3a567 100644
--- a/man/io_uring_sqring_wait.3
+++ b/man/io_uring_sqring_wait.3

@@ -2,7 +2,7 @@
 .\"
 .\" SPDX-License-Identifier: LGPL-2.0-or-later
 .\"
-.TH io_uring_sqring_wait "January 25, 2022" "liburing-2.1" "liburing Manual"
+.TH io_uring_sqring_wait 3 "January 25, 2022" "liburing-2.1" "liburing Manual"
 .SH NAME
 io_uring_sqring_wait \- wait for free space in the SQ ring
 .SH SYNOPSIS

diff --git a/man/io_uring_submit.3 b/man/io_uring_submit.3
index f871b89..c7dbbed 100644
--- a/man/io_uring_submit.3
+++ b/man/io_uring_submit.3

@@ -26,7 +26,12 @@
 .SH RETURN VALUE
 On success
 .BR io_uring_submit (3)
-returns the number of submitted submission queue entries. On failure it returns
+returns the number of submitted submission queue entries, if SQPOLL is not used.
+If SQPOLL is used, the return value may report a higher number of submitted
+entries than actually submitted. If the user requires accurate information
+about how many submission queue entries have been successfully submitted, while
+using SQPOLL, the user must fall back to repeatedly submitting a single submission
+queue entry. On failure it returns
 .BR -errno .
 .SH NOTES
 For any request that passes in data in a struct, that data must remain

diff --git a/man/io_uring_submit_and_get_events.3 b/man/io_uring_submit_and_get_events.3
new file mode 100644
index 0000000..9e143d1
--- /dev/null
+++ b/man/io_uring_submit_and_get_events.3

@@ -0,0 +1,31 @@
+.\" Copyright (C), 2022  dylany
+.\" You may distribute this file under the terms of the GNU Free
+.\" Documentation License.
+.TH io_uring_submit_and_get_events 3 "September 5, 2022" "liburing-2.3" "liburing Manual"
+.SH NAME
+io_uring_submit_and_get_events \- submit requests to the submission queue and flush completions
+.SH SYNOPSIS
+.nf
+.B #include <liburing.h>
+.PP
+.BI "int io_uring_submit_and_get_events(struct io_uring *" ring ");"
+.fi
+
+.SH DESCRIPTION
+The
+.BR io_uring_submit_and_get_events (3)
+function submits the next events to the submission queue as with
+.BR io_uring_submit (3) .
+After submission it will flush CQEs as with
+.BR io_uring_get_events (3) .
+
+The benefit of this function is that it does both with only one system call.
+
+.SH RETURN VALUE
+On success
+.BR io_uring_submit_and_get_events (3)
+returns the number of submitted submission queue entries. On failure it returns
+.BR -errno .
+.SH SEE ALSO
+.BR io_uring_submit (3),
+.BR io_uring_get_events (3)

diff --git a/man/io_uring_submit_and_wait.3 b/man/io_uring_submit_and_wait.3
index 1c9eb62..2351f33 100644
--- a/man/io_uring_submit_and_wait.3
+++ b/man/io_uring_submit_and_wait.3

@@ -16,7 +16,7 @@
 .PP
 The
 .BR io_uring_submit_and_wait (3)
-function submits the next events to the submission queue belonging to the
+function submits the next requests from the submission queue belonging to the
 .I ring
 and waits for
 .I wait_nr
@@ -27,12 +27,18 @@
 and prepares the SQE, it can be submitted with
 .BR io_uring_submit_and_wait (3) .
 
+Ideally used with a ring setup with
+.BR IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN
+as that will greatly reduce the number of context switches that an application
+will see waiting on multiple requests.
+
 .SH RETURN VALUE
 On success
 .BR io_uring_submit_and_wait (3)
 returns the number of submitted submission queue entries. On failure it returns
 .BR -errno .
 .SH SEE ALSO
+.BR io_uring_queue_init_params (3),
 .BR io_uring_get_sqe (3),
 .BR io_uring_submit (3),
 .BR io_uring_submit_and_wait_timeout (3)

diff --git a/man/io_uring_submit_and_wait_min_timeout.3 b/man/io_uring_submit_and_wait_min_timeout.3
new file mode 100644
index 0000000..6a52806
--- /dev/null
+++ b/man/io_uring_submit_and_wait_min_timeout.3

@@ -0,0 +1,119 @@
+.\" Copyright (C) 2024 Jens Axboe <[email protected]>
+.\"
+.\" SPDX-License-Identifier: LGPL-2.0-or-later
+.\"
+.TH io_uring_submit_and_wait_min_timeout 3 "Jan 11, 2024" "liburing-2.8" "liburing Manual"
+.SH NAME
+io_uring_submit_and_wait_min_timeout \- submit requests to the submission queue and
+wait for the completion with both batch and normal timeout
+.SH SYNOPSIS
+.nf
+.B #include <liburing.h>
+.PP
+.BI "int io_uring_submit_and_wait_min_timeout(struct io_uring *" ring ","
+.BI "                                         struct io_uring_cqe **" cqe_ptr ","
+.BI "                                         unsigned " wait_nr ","
+.BI "                                         struct __kernel_timespec *" ts ","
+.BI "                                         unsigned int " min_wait_usec ",
+.BI "                                         sigset_t *" sigmask ");"
+.fi
+.SH DESCRIPTION
+.PP
+The
+.BR io_uring_submit_and_wait_min_timeout (3)
+function submits the next requests from the submission queue belonging to the
+.I ring
+and waits for
+.I wait_nr
+completion events, or until the timeout
+.I ts
+expires. The completion events are stored in the
+.I cqe_ptr
+array. If non-zero,
+.I min_wait_usec
+denotes a timeout for the
+.I wait_nr
+batch.
+
+The
+.I sigmask
+specifies the set of signals to block. If set, it is equivalent to atomically
+executing the following calls:
+.PP
+.in +4n
+.EX
+sigset_t origmask;
+
+pthread_sigmask(SIG_SETMASK, &sigmask, &origmask);
+ret = io_uring_submit_and_wait_min_timeout(ring, cqe, wait_nr, ts, min_wait, NULL);
+pthread_sigmask(SIG_SETMASK, &origmask, NULL);
+.EE
+.in
+.PP
+This works like
+.BR io_uring_submit_and_wait_timeout (3)
+with the twist that it applies a minimum timeout for the requested batch size
+of requests to wait for. While
+.BR io_uring_submit_and_wait_timeout (3)
+waits for as long as
+.IR ts
+specifies, or until
+.IR wait_nr
+of request completions have been received, if
+.IR min_wait_usec
+is set, then this is the timeout for the
+.IR wait_nr
+number of requests. If the requested number of completions have been received
+within
+.IR min_wait_usec
+number of microseconds, then the function returns successfully. If that isn't
+the case, once
+.IR min_wait_usec
+time has passed, control is returned if any completions have been posted. If
+no completions have been posted, the kernel switches to a normal wait of up
+to
+.IR ts
+specified amount of time, subtracting the time already waited. If any
+completions are posted after this happens, control is returned immediately to
+the application.
+
+This differs from the normal timeout waiting in that waiting continues post
+the initial timeout, if and only if no completions have been posted. It's meant
+to be used to optimize batch waiting for requests, where the application
+allots a budget of
+.IR min_wait_usec
+amount of time to receive
+.IR wait_nr
+number of completions, but if none are received, then waiting can continue
+without incurring extra context switches or extra kernel/user transitions.
+
+Can be used with any ring, as long as the kernel supports it. Support is
+indicated by checking the
+.BR IORING_FEAT_MIN_TIMEOUT
+feature flag after the ring has been setup. Ideally used with a ring setup
+with
+.BR IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN
+as that will greatly reduce the number of context switches that an application
+will see waiting on multiple requests.
+
+Available since 6.12.
+
+.SH RETURN VALUE
+On success
+.BR io_uring_submit_and_wait_min_timeout (3)
+returns the number of submitted submission queue entries. On failure it returns
+.BR -errno .
+If the kernel doesn't support this functionality,
+.BR -EINVAL
+will be returned. See note on the feature flag.
+The most common failure case is not receiving a completion within the specified
+timeout,
+.B -ETIME
+is returned in this case.
+.SH SEE ALSO
+.BR io_uring_queue_init_params (3),
+.BR io_uring_get_sqe (3),
+.BR io_uring_submit (3),
+.BR io_uring_submit_and_wait (3),
+.BR io_uring_submit_and_wait_timeout (3),
+.BR io_uring_wait_cqe (3)

diff --git a/man/io_uring_submit_and_wait_timeout.3 b/man/io_uring_submit_and_wait_timeout.3
index 80fe889..7461176 100644
--- a/man/io_uring_submit_and_wait_timeout.3
+++ b/man/io_uring_submit_and_wait_timeout.3

@@ -20,34 +20,54 @@
 .PP
 The
 .BR io_uring_submit_and_wait_timeout (3)
-function submits the next events to the submission queue belonging to the
+function submits the next requests from the submission queue belonging to the
 .I ring
 and waits for
 .I wait_nr
-completion events or until the timeout
+completion events, or until the timeout
 .I ts
 expires. The completion events are stored in the
 .I cqe_ptr
-array. The
+array.
+.PP
+The
 .I sigmask
-specifies the set of signals to block. The prevailing signal mask is restored
-before returning.
+specifies the set of signals to block. If set, it is equivalent to atomically
+executing the following calls:
+.PP
+.in +4n
+.EX
+sigset_t origmask;
 
+pthread_sigmask(SIG_SETMASK, &sigmask, &origmask);
+ret = io_uring_submit_and_wait_timeout(ring, cqe, wait_nr, ts, NULL);
+pthread_sigmask(SIG_SETMASK, &origmask, NULL);
+.EE
+.in
+.PP
 After the caller retrieves a submission queue entry (SQE) with
 .BR io_uring_get_sqe (3)
 and prepares the SQE, it can be submitted with
 .BR io_uring_submit_and_wait_timeout (3) .
 
+Ideally used with a ring setup with
+.BR IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN
+as that will greatly reduce the number of context switches that an application
+will see waiting on multiple requests.
+
 .SH RETURN VALUE
 On success
 .BR io_uring_submit_and_wait_timeout (3)
 returns the number of submitted submission queue entries. On failure it returns
 .BR -errno .
+Note that in earlier versions of the liburing library, the return value was 0
+on success.
 The most common failure case is not receiving a completion within the specified
 timeout,
 .B -ETIME
 is returned in this case.
 .SH SEE ALSO
+.BR io_uring_queue_init_params (3),
 .BR io_uring_get_sqe (3),
 .BR io_uring_submit (3),
 .BR io_uring_submit_and_wait (3),

diff --git a/man/io_uring_unregister_napi.3 b/man/io_uring_unregister_napi.3
new file mode 100644
index 0000000..f7087ef
--- /dev/null
+++ b/man/io_uring_unregister_napi.3

@@ -0,0 +1,27 @@
+.\" Copyright (C) 2022 Stefan Roesch <[email protected]>
+.\"
+.\" SPDX-License-Identifier: LGPL-2.0-or-later
+.\"
+.TH io_uring_unregister_napi 3 "November 16, 2022" "liburing-2.4" "liburing Manual"
+.SH NAME
+io_uring_unregister_napi \- unregister NAPI busy poll settings
+.SH SYNOPSIS
+.nf
+.B #include <liburing.h>
+.PP
+.BI "int io_uring_unregister_napi(struct io_uring *" ring ","
+.BI "                             struct io_uring_napi *" napi)
+.PP
+.fi
+.SH DESCRIPTION
+.PP
+The
+.BR io_uring_unregister_napi (3)
+function unregisters the NAPI busy poll settings for subsequent operations.
+
+.SH RETURN VALUE
+On success
+.BR io_uring_unregister_napi (3)
+return 0. On failure they return
+.BR -errno .
+It also updates the napi structure with the current values.

diff --git a/man/io_uring_wait_cqe.3 b/man/io_uring_wait_cqe.3
index c115f6f..c2fffed 100644
--- a/man/io_uring_wait_cqe.3
+++ b/man/io_uring_wait_cqe.3

@@ -31,10 +31,11 @@
 .SH RETURN VALUE
 On success
 .BR io_uring_wait_cqe (3)
-returns 0 and the cqe_ptr parm is filled in. On failure it returns
+returns 0 and the cqe_ptr param is filled in. On failure it returns
 .BR -errno .
 The return value indicates the result of waiting for a CQE, and it has no
 relation to the CQE result itself.
 .SH SEE ALSO
 .BR io_uring_submit (3),
+.BR io_uring_wait_cqe_timeout (3),
 .BR io_uring_wait_cqes (3)

diff --git a/man/io_uring_wait_cqe_nr.3 b/man/io_uring_wait_cqe_nr.3
index 5a4a5d5..9e19098 100644
--- a/man/io_uring_wait_cqe_nr.3
+++ b/man/io_uring_wait_cqe_nr.3

@@ -31,13 +31,19 @@
 the application can retrieve the completion with
 .BR io_uring_wait_cqe (3).
 
+Ideally used with a ring setup with
+.BR IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN
+as that will greatly reduce the number of context switches that an application
+will see waiting on multiple requests.
+
 .SH RETURN VALUE
 On success
 .BR io_uring_wait_cqe_nr (3)
-returns 0 and the cqe_ptr parm is filled in. On failure it returns
+returns 0 and the cqe_ptr param is filled in. On failure it returns
 .BR -errno .
 The return value indicates the result of waiting for a CQE, and it has no
 relation to the CQE result itself.
 .SH SEE ALSO
+.BR io_uring_queue_init_params (3),
 .BR io_uring_submit (3),
 .BR io_uring_wait_cqes (3)

diff --git a/man/io_uring_wait_cqe_timeout.3 b/man/io_uring_wait_cqe_timeout.3
index 965fc32..901772b 100644
--- a/man/io_uring_wait_cqe_timeout.3
+++ b/man/io_uring_wait_cqe_timeout.3

@@ -40,14 +40,22 @@
 For newer kernels with that feature flag set, there is no implied submit
 when waiting for a request.
 
+If
+.I ts
+is
+.B NULL ,
+then this behaves like
+.BR io_uring_wait_cqe (3)
+in that it will wait forever for an event.
+
 .SH RETURN VALUE
 On success
-.BR io_uring_wait_cqes (3)
-returns 0 and the cqe_ptr parm is filled in. On failure it returns
+.BR io_uring_wait_cqe_timeout (3)
+returns 0 and the cqe_ptr param is filled in. On failure it returns
 .BR -errno .
 The return value indicates the result of waiting for a CQE, and it has no
 relation to the CQE result itself.
 .SH SEE ALSO
 .BR io_uring_submit (3),
-.BR io_uring_wait_cqe_timeout (3),
+.BR io_uring_wait_cqes (3),
 .BR io_uring_wait_cqe (3)

diff --git a/man/io_uring_wait_cqes.3 b/man/io_uring_wait_cqes.3
index b771ebe..816cd73 100644
--- a/man/io_uring_wait_cqes.3
+++ b/man/io_uring_wait_cqes.3

@@ -25,14 +25,29 @@
 .I ring
 param, waiting for them if necessary or until the timeout
 .I ts
-expires. The
+expires.
+.PP
+The
 .I sigmask
-specifies the set of signals to block. The prevailing signal mask is restored
-before returning.
+specifies the set of signals to block. If set, it is equivalent to atomically
+executing the following calls:
+.PP
+.in +4n
+.EX
+sigset_t origmask;
 
+pthread_sigmask(SIG_SETMASK, &sigmask, &origmask);
+ret = io_uring_wait_cqes(ring, cqe, wait_nr, ts, NULL);
+pthread_sigmask(SIG_SETMASK, &origmask, NULL);
+.EE
+.in
+.PP
 The
 .I cqe_ptr
-param is filled in on success.
+param is filled in on success with the first CQE. Callers of this function
+should use
+.BR io_uring_for_each_cqe (3)
+to iterate all available CQEs.
 
 If
 .I ts
@@ -45,12 +60,21 @@
 For newer kernels with that feature flag set, there is no implied submit
 when waiting for a request.
 
+If
+.I ts
+is
+.B NULL ,
+then this behaves like
+.BR io_uring_wait_cqe (3)
+in that it will wait forever for an event.
+
 .SH RETURN VALUE
 On success
 .BR io_uring_wait_cqes (3)
-returns 0 and the cqe_ptr parm is filled in. On failure it returns
+returns 0 and the cqe_ptr param is filled in. On failure it returns
 .BR -errno .
 .SH SEE ALSO
 .BR io_uring_submit (3),
+.BR io_uring_for_each_cqe (3),
 .BR io_uring_wait_cqe_timeout (3),
 .BR io_uring_wait_cqe (3)

diff --git a/man/io_uring_wait_cqes_min_timeout.3 b/man/io_uring_wait_cqes_min_timeout.3
new file mode 100644
index 0000000..e3d9849
--- /dev/null
+++ b/man/io_uring_wait_cqes_min_timeout.3

@@ -0,0 +1,76 @@
+.\" Copyright (C) 2024 Jens Axboe <[email protected]>
+.\"
+.\" SPDX-License-Identifier: LGPL-2.0-or-later
+.\"
+.TH io_uring_wait_cqes_min_timeout 3 "Feb 13, 2024" "liburing-2.8" "liburing Manual"
+.SH NAME
+io_uring_wait_cqes_min_timeout \- wait for completions with both batch and normal timeout
+.SH SYNOPSIS
+.nf
+.B #include <liburing.h>
+.PP
+.BI "int io_uring_wait_cqes_min_timeout(struct io_uring *" ring ","
+.BI "                                   struct io_uring_cqe **" cqe_ptr ","
+.BI "                                   unsigned " wait_nr ","
+.BI "                                   struct __kernel_timespec *" ts ","
+.BI "                                   unsigned int " min_wait_usec ",
+.BI "                                   sigset_t *" sigmask ");"
+.fi
+.SH DESCRIPTION
+.PP
+The
+.BR io_uring_wait_cqes_min_timeout (3)
+waits for completions from the submission queue belonging to the
+.I ring
+and waits for
+.I wait_nr
+completion events, or until the timeout
+.I ts
+expires. The completion events are stored in the
+.I cqe_ptr
+array. If non-zero,
+.I min_wait_usec
+denotes a timeout for the
+.I wait_nr
+batch.
+
+The
+.I sigmask
+specifies the set of signals to block. If set, it is equivalent to atomically
+executing the following calls:
+.PP
+.in +4n
+.EX
+sigset_t origmask;
+
+pthread_sigmask(SIG_SETMASK, &sigmask, &origmask);
+ret = io_uring_wait_cqes_min_timeout(ring, cqe, wait_nr, ts, min_wait, NULL);
+pthread_sigmask(SIG_SETMASK, &origmask, NULL);
+.EE
+.in
+.PP
+This works like
+.BR io_uring_submit_and_wait_min_timeout (3)
+except that it doesn't submit requests. See that man page for a description
+for how the min timeout waiting works.
+
+Available since 6.12.
+
+.SH RETURN VALUE
+On success
+.BR io_uring_wait_cqes_min_timeout (3)
+returns the 0.On failure it returns
+.BR -errno .
+If the kernel doesn't support this functionality,
+.BR -EINVAL
+will be returned. See note on the feature flag.
+The most common failure case is not receiving a completion within the specified
+timeout,
+.B -ETIME
+is returned in this case.
+.SH SEE ALSO
+.BR io_uring_wait_cqe (3),
+.BR io_uring_wait_cqes (3),
+.BR io_uring_wait_cqe_timeout (3),
+.BR io_uring_wait_cqes (3),
+.BR io_uring_submit_and_wait_min_timeout (3)

diff --git a/src/Makefile b/src/Makefile
index 12cf49f..30e4ffc 100644
--- a/src/Makefile
+++ b/src/Makefile

@@ -5,23 +5,30 @@
 libdir ?= $(prefix)/lib
 libdevdir ?= $(prefix)/lib
 
+LIBURING_CFLAGS ?=
 CPPFLAGS ?=
 override CPPFLAGS += -D_GNU_SOURCE \
-	-Iinclude/ -include ../config-host.h
-CFLAGS ?= -g -O2 -Wall -Wextra -fno-stack-protector
-override CFLAGS += -Wno-unused-parameter -Wno-sign-compare -DLIBURING_INTERNAL
+	-Iinclude/ -include ../config-host.h \
+	-D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64
+CFLAGS ?= -O3 -Wall -Wextra -fno-stack-protector
+override CFLAGS += -Wno-unused-parameter \
+	-DLIBURING_INTERNAL \
+	$(LIBURING_CFLAGS)
 SO_CFLAGS=-fPIC $(CFLAGS)
 L_CFLAGS=$(CFLAGS)
-LINK_FLAGS=
-LINK_FLAGS+=$(LDFLAGS)
 ENABLE_SHARED ?= 1
 
 soname=liburing.so.$(VERSION_MAJOR)
 libname=liburing.so.$(VERSION)
+ffi_soname=liburing-ffi.so.$(VERSION_MAJOR)
+ffi_libname=liburing-ffi.so.$(VERSION)
+
 all_targets += liburing.a
+all_targets += liburing-ffi.a
 
 ifeq ($(ENABLE_SHARED),1)
 all_targets += $(libname)
+all_targets += $(ffi_libname)
 endif
 
 include ../Makefile.quiet
@@ -30,22 +37,36 @@
 include ../config-host.mak
 endif
 
+ifeq ($(CONFIG_USE_SANITIZER),y)
+	LINK_FLAGS=
+else
+	LINK_FLAGS=-Wl,-z,defs
+endif
+LINK_FLAGS+=$(LDFLAGS)
+
 all: $(all_targets)
 
-liburing_srcs := setup.c queue.c register.c
+liburing_srcs := setup.c queue.c register.c syscall.c version.c
 
 ifeq ($(CONFIG_NOLIBC),y)
 	liburing_srcs += nolibc.c
-	override CFLAGS += -nostdlib -nodefaultlibs -ffreestanding
-	override CPPFLAGS += -nostdlib -nodefaultlibs -ffreestanding
-	override LINK_FLAGS += -nostdlib -nodefaultlibs
-else
-	liburing_srcs += syscall.c
+	override CFLAGS += -nostdlib -nodefaultlibs -ffreestanding -fno-builtin -fno-stack-protector
+	override CPPFLAGS += -nostdlib -nodefaultlibs -ffreestanding -fno-builtin -fno-stack-protector
+	override LINK_FLAGS += -nostdlib -nodefaultlibs $(libgcc_link_flag)
+endif
+
+ifeq ($(CONFIG_USE_SANITIZER),y)
+	override CFLAGS += -fsanitize=address,undefined -g -fno-omit-frame-pointer -fno-optimize-sibling-calls
+	override CPPFLAGS += -fsanitize=address,undefined -g -fno-omit-frame-pointer -fno-optimize-sibling-calls
+	override LINK_FLAGS += -fsanitize=address,undefined
+	liburing_srcs += sanitize.c
 endif
 
 override CPPFLAGS += -MT "$@" -MMD -MP -MF "[email protected]"
 liburing_objs := $(patsubst %.c,%.ol,$(liburing_srcs))
 liburing_sobjs := $(patsubst %.c,%.os,$(liburing_srcs))
+liburing_ffi_objs := ffi.ol
+liburing_ffi_sobjs := ffi.os
 
 %.os: %.c
 	$(QUIET_CC)$(CC) $(CPPFLAGS) $(SO_CFLAGS) -c -o $@ $<
@@ -64,25 +85,58 @@
 	$(QUIET_AR)$(AR) r liburing.a $^
 	$(QUIET_RANLIB)$(RANLIB) liburing.a
 
+liburing-ffi.a: $(liburing_objs) $(liburing_ffi_objs)
+	@rm -f liburing-ffi.a
+	$(QUIET_AR)$(AR) r liburing-ffi.a $^
+	$(QUIET_RANLIB)$(RANLIB) liburing-ffi.a
+
 $(libname): $(liburing_sobjs) liburing.map
 	$(QUIET_CC)$(CC) $(SO_CFLAGS) -shared -Wl,--version-script=liburing.map -Wl,-soname=$(soname) -o $@ $(liburing_sobjs) $(LINK_FLAGS)
 
+$(ffi_libname): $(liburing_ffi_objs) $(liburing_ffi_sobjs) $(liburing_sobjs) liburing-ffi.map
+	$(QUIET_CC)$(CC) $(SO_CFLAGS) -shared -Wl,--version-script=liburing-ffi.map -Wl,-soname=$(ffi_soname) -o $@ $(liburing_sobjs) $(liburing_ffi_sobjs) $(LINK_FLAGS)
+
 install: $(all_targets)
 	install -D -m 644 include/liburing/io_uring.h $(includedir)/liburing/io_uring.h
 	install -D -m 644 include/liburing.h $(includedir)/liburing.h
 	install -D -m 644 include/liburing/compat.h $(includedir)/liburing/compat.h
 	install -D -m 644 include/liburing/barrier.h $(includedir)/liburing/barrier.h
+	install -D -m 644 include/liburing/sanitize.h $(includedir)/liburing/sanitize.h
+	install -D -m 644 include/liburing/io_uring_version.h $(includedir)/liburing/io_uring_version.h
 	install -D -m 644 liburing.a $(libdevdir)/liburing.a
+	install -D -m 644 liburing-ffi.a $(libdevdir)/liburing-ffi.a
 ifeq ($(ENABLE_SHARED),1)
 	install -D -m 755 $(libname) $(libdir)/$(libname)
+	install -D -m 755 $(ffi_libname) $(libdir)/$(ffi_libname)
 	ln -sf $(libname) $(libdir)/$(soname)
 	ln -sf $(relativelibdir)$(libname) $(libdevdir)/liburing.so
+	ln -sf $(ffi_libname) $(libdir)/$(ffi_soname)
+	ln -sf $(relativelibdir)$(ffi_libname) $(libdevdir)/liburing-ffi.so
+endif
+
+uninstall:
+	@rm -f $(includedir)/liburing/io_uring.h
+	@rm -f $(includedir)/liburing.h
+	@rm -f $(includedir)/liburing/compat.h
+	@rm -f $(includedir)/liburing/barrier.h
+	@rm -f $(includedir)/liburing/sanitize.h
+	@rm -f $(includedir)/liburing/io_uring_version.h
+	@rm -f $(libdevdir)/liburing.a
+	@rm -f $(libdevdir)/liburing-ffi.a
+ifeq ($(ENABLE_SHARED),1)
+	@rm -f $(libdir)/$(libname)
+	@rm -f $(libdir)/$(ffi_libname)
+	@rm -f $(libdir)/$(soname)
+	@rm -f $(libdevdir)/liburing.so
+	@rm -f $(libdir)/$(ffi_soname)
+	@rm -f $(libdevdir)/liburing-ffi.so
 endif
 
 clean:
-	@rm -f $(all_targets) $(liburing_objs) $(liburing_sobjs) $(soname).new
+	@rm -f $(all_targets) $(liburing_objs) $(liburing_sobjs) $(liburing_ffi_objs) $(liburing_ffi_sobjs) $(soname).new
 	@rm -f *.so* *.a *.o *.d
 	@rm -f include/liburing/compat.h
+	@rm -f include/liburing/io_uring_version.h
 
 	@# When cleaning, we don't include ../config-host.mak,
 	@# so the nolibc objects are always skipped, clean them up!

diff --git a/src/arch/aarch64/lib.h b/src/arch/aarch64/lib.h
new file mode 100644
index 0000000..41bcfc9
--- /dev/null
+++ b/src/arch/aarch64/lib.h

@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef LIBURING_ARCH_AARCH64_LIB_H
+#define LIBURING_ARCH_AARCH64_LIB_H
+
+#include <elf.h>
+#include "../../syscall.h"
+
+static inline long __get_page_size(void)
+{
+	Elf64_Off buf[2];
+	long ret = 4096;
+	int fd;
+
+	fd = __sys_open("/proc/self/auxv", O_RDONLY, 0);
+	if (fd < 0)
+		return ret;
+
+	while (1) {
+		ssize_t x;
+
+		x = __sys_read(fd, buf, sizeof(buf));
+		if (x < (long) sizeof(buf))
+			break;
+
+		if (buf[0] == AT_PAGESZ) {
+			ret = buf[1];
+			break;
+		}
+	}
+
+	__sys_close(fd);
+	return ret;
+}
+
+static inline long get_page_size(void)
+{
+	static long cache_val;
+
+	if (cache_val)
+		return cache_val;
+
+	cache_val = __get_page_size();
+	return cache_val;
+}
+
+#endif /* #ifndef LIBURING_ARCH_AARCH64_LIB_H */

diff --git a/src/arch/aarch64/syscall.h b/src/arch/aarch64/syscall.h
index c0ab7e2..b00e90b 100644
--- a/src/arch/aarch64/syscall.h
+++ b/src/arch/aarch64/syscall.h

@@ -1,9 +1,5 @@
 /* SPDX-License-Identifier: MIT */
 
-#ifndef __INTERNAL__LIBURING_SYSCALL_H
-	#error "This file should be included from src/syscall.h (liburing)"
-#endif
-
 #ifndef LIBURING_ARCH_AARCH64_SYSCALL_H
 #define LIBURING_ARCH_AARCH64_SYSCALL_H
 

diff --git a/src/arch/generic/lib.h b/src/arch/generic/lib.h
index 737e795..6b006c6 100644
--- a/src/arch/generic/lib.h
+++ b/src/arch/generic/lib.h

@@ -1,9 +1,5 @@
 /* SPDX-License-Identifier: MIT */
 
-#ifndef __INTERNAL__LIBURING_LIB_H
-	#error "This file should be included from src/lib.h (liburing)"
-#endif
-
 #ifndef LIBURING_ARCH_GENERIC_LIB_H
 #define LIBURING_ARCH_GENERIC_LIB_H
 

diff --git a/src/arch/generic/syscall.h b/src/arch/generic/syscall.h
index fa93064..00730e0 100644
--- a/src/arch/generic/syscall.h
+++ b/src/arch/generic/syscall.h

@@ -1,31 +1,30 @@
 /* SPDX-License-Identifier: MIT */
 
-#ifndef __INTERNAL__LIBURING_SYSCALL_H
-	#error "This file should be included from src/syscall.h (liburing)"
-#endif
-
 #ifndef LIBURING_ARCH_GENERIC_SYSCALL_H
 #define LIBURING_ARCH_GENERIC_SYSCALL_H
 
-static inline int ____sys_io_uring_register(int fd, unsigned opcode,
-					    const void *arg, unsigned nr_args)
+#include <fcntl.h>
+
+static inline int __sys_io_uring_register(unsigned int fd, unsigned int opcode,
+					  const void *arg, unsigned int nr_args)
 {
 	int ret;
 	ret = syscall(__NR_io_uring_register, fd, opcode, arg, nr_args);
 	return (ret < 0) ? -errno : ret;
 }
 
-static inline int ____sys_io_uring_setup(unsigned entries,
-					 struct io_uring_params *p)
+static inline int __sys_io_uring_setup(unsigned int entries,
+				       struct io_uring_params *p)
 {
 	int ret;
 	ret = syscall(__NR_io_uring_setup, entries, p);
 	return (ret < 0) ? -errno : ret;
 }
 
-static inline int ____sys_io_uring_enter2(int fd, unsigned to_submit,
-					  unsigned min_complete, unsigned flags,
-					  sigset_t *sig, int sz)
+static inline int __sys_io_uring_enter2(unsigned int fd, unsigned int to_submit,
+					unsigned int min_complete,
+					unsigned int flags, sigset_t *sig,
+					size_t sz)
 {
 	int ret;
 	ret = syscall(__NR_io_uring_enter, fd, to_submit, min_complete, flags,
@@ -33,12 +32,26 @@
 	return (ret < 0) ? -errno : ret;
 }
 
-static inline int ____sys_io_uring_enter(int fd, unsigned to_submit,
-					 unsigned min_complete, unsigned flags,
-					 sigset_t *sig)
+static inline int __sys_io_uring_enter(unsigned int fd, unsigned int to_submit,
+				       unsigned int min_complete,
+				       unsigned int flags, sigset_t *sig)
 {
-	return ____sys_io_uring_enter2(fd, to_submit, min_complete, flags, sig,
-				       _NSIG / 8);
+	return __sys_io_uring_enter2(fd, to_submit, min_complete, flags, sig,
+				     _NSIG / 8);
+}
+
+static inline int __sys_open(const char *pathname, int flags, mode_t mode)
+{
+	int ret;
+	ret = open(pathname, flags, mode);
+	return (ret < 0) ? -errno : ret;
+}
+
+static inline ssize_t __sys_read(int fd, void *buffer, size_t size)
+{
+	ssize_t ret;
+	ret = read(fd, buffer, size);
+	return (ret < 0) ? -errno : ret;
 }
 
 static inline void *__sys_mmap(void *addr, size_t length, int prot, int flags,

diff --git a/src/arch/riscv64/lib.h b/src/arch/riscv64/lib.h
new file mode 100644
index 0000000..ef66d76
--- /dev/null
+++ b/src/arch/riscv64/lib.h

@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef LIBURING_ARCH_RISCV64_LIB_H
+#define LIBURING_ARCH_RISCV64_LIB_H
+
+#include <elf.h>
+#include <sys/auxv.h>
+#include "../../syscall.h"
+
+static inline long __get_page_size(void)
+{
+	Elf64_Off buf[2];
+	long ret = 4096;
+	int fd;
+
+	fd = __sys_open("/proc/self/auxv", O_RDONLY, 0);
+	if (fd < 0)
+		return ret;
+
+	while (1) {
+		ssize_t x;
+
+		x = __sys_read(fd, buf, sizeof(buf));
+		if (x < (long) sizeof(buf))
+			break;
+
+		if (buf[0] == AT_PAGESZ) {
+			ret = buf[1];
+			break;
+		}
+	}
+
+	__sys_close(fd);
+	return ret;
+}
+
+static inline long get_page_size(void)
+{
+	static long cache_val;
+
+	if (cache_val)
+		return cache_val;
+
+	cache_val = __get_page_size();
+	return cache_val;
+}
+
+#endif /* #ifndef LIBURING_ARCH_RISCV64_LIB_H */

diff --git a/src/arch/riscv64/syscall.h b/src/arch/riscv64/syscall.h
new file mode 100644
index 0000000..42cc6be
--- /dev/null
+++ b/src/arch/riscv64/syscall.h

@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef LIBURING_ARCH_RISCV64_SYSCALL_H
+#define LIBURING_ARCH_RISCV64_SYSCALL_H
+
+#if defined(__riscv) && __riscv_xlen == 64
+
+#define __do_syscallM(...) ({						\
+	__asm__ volatile (						\
+		"ecall"							\
+		: "=r"(a0)						\
+		: __VA_ARGS__						\
+		: "memory", "a1");					\
+	(long) a0;							\
+})
+
+#define __do_syscallN(...) ({						\
+	__asm__ volatile (						\
+		"ecall"							\
+		: "=r"(a0)						\
+		: __VA_ARGS__						\
+		: "memory");					\
+	(long) a0;							\
+})
+
+#define __do_syscall0(__n) ({						\
+	register long a7 __asm__("a7") = __n;				\
+	register long a0 __asm__("a0");					\
+									\
+	__do_syscallM("r" (a7));					\
+})
+
+#define __do_syscall1(__n, __a) ({					\
+	register long a7 __asm__("a7") = __n;				\
+	register __typeof__(__a) a0 __asm__("a0") = __a;		\
+									\
+	__do_syscallM("r" (a7), "0" (a0));				\
+})
+
+#define __do_syscall2(__n, __a, __b) ({					\
+	register long a7 __asm__("a7") = __n;				\
+	register __typeof__(__a) a0 __asm__("a0") = __a;		\
+	register __typeof__(__b) a1 __asm__("a1") = __b;		\
+									\
+	__do_syscallN("r" (a7), "0" (a0), "r" (a1));			\
+})
+
+#define __do_syscall3(__n, __a, __b, __c) ({				\
+	register long a7 __asm__("a7") = __n;				\
+	register __typeof__(__a) a0 __asm__("a0") = __a;		\
+	register __typeof__(__b) a1 __asm__("a1") = __b;		\
+	register __typeof__(__c) a2 __asm__("a2") = __c;		\
+									\
+	__do_syscallN("r" (a7), "0" (a0), "r" (a1), "r" (a2));		\
+})
+
+#define __do_syscall4(__n, __a, __b, __c, __d) ({			\
+	register long a7 __asm__("a7") = __n;				\
+	register __typeof__(__a) a0 __asm__("a0") = __a;		\
+	register __typeof__(__b) a1 __asm__("a1") = __b;		\
+	register __typeof__(__c) a2 __asm__("a2") = __c;		\
+	register __typeof__(__d) a3 __asm__("a3") = __d;		\
+									\
+	__do_syscallN("r" (a7), "0" (a0), "r" (a1), "r" (a2), "r" (a3));\
+})
+
+#define __do_syscall5(__n, __a, __b, __c, __d, __e) ({			\
+	register long a7 __asm__("a7") = __n;				\
+	register __typeof__(__a) a0 __asm__("a0") = __a;		\
+	register __typeof__(__b) a1 __asm__("a1") = __b;		\
+	register __typeof__(__c) a2 __asm__("a2") = __c;		\
+	register __typeof__(__d) a3 __asm__("a3") = __d;		\
+	register __typeof__(__e) a4 __asm__("a4") = __e;		\
+									\
+	__do_syscallN("r" (a7), "0" (a0), "r" (a1), "r" (a2), "r" (a3),	\
+			"r"(a4));					\
+})
+
+#define __do_syscall6(__n, __a, __b, __c, __d, __e, __f) ({		\
+	register long a7 __asm__("a7") = __n;				\
+	register __typeof__(__a) a0 __asm__("a0") = __a;		\
+	register __typeof__(__b) a1 __asm__("a1") = __b;		\
+	register __typeof__(__c) a2 __asm__("a2") = __c;		\
+	register __typeof__(__d) a3 __asm__("a3") = __d;		\
+	register __typeof__(__e) a4 __asm__("a4") = __e;		\
+	register __typeof__(__f) a5 __asm__("a5") = __f;		\
+									\
+	__do_syscallN("r" (a7), "0" (a0), "r" (a1), "r" (a2), "r" (a3),	\
+			"r" (a4), "r"(a5));				\
+})
+
+#include "../syscall-defs.h"
+
+#else /* #if defined(__riscv) && __riscv_xlen == 64 */
+
+#include "../generic/syscall.h"
+
+#endif /*  #if defined(__riscv) && __riscv_xlen == 64 */
+
+#endif /* #ifndef LIBURING_ARCH_RISCV64_SYSCALL_H */

diff --git a/src/arch/syscall-defs.h b/src/arch/syscall-defs.h
index 1e8ae1b..d351f8b 100644
--- a/src/arch/syscall-defs.h
+++ b/src/arch/syscall-defs.h

@@ -3,12 +3,31 @@
 #ifndef LIBURING_ARCH_SYSCALL_DEFS_H
 #define LIBURING_ARCH_SYSCALL_DEFS_H
 
+#include <fcntl.h>
+
+static inline int __sys_open(const char *pathname, int flags, mode_t mode)
+{
+	/*
+	 * Some architectures don't have __NR_open, but __NR_openat.
+	 */
+#ifdef __NR_open
+	return (int) __do_syscall3(__NR_open, pathname, flags, mode);
+#else
+	return (int) __do_syscall4(__NR_openat, AT_FDCWD, pathname, flags, mode);
+#endif
+}
+
+static inline ssize_t __sys_read(int fd, void *buffer, size_t size)
+{
+	return (ssize_t) __do_syscall3(__NR_read, fd, buffer, size);
+}
+
 static inline void *__sys_mmap(void *addr, size_t length, int prot, int flags,
 			       int fd, off_t offset)
 {
 	int nr;
 
-#if defined(__i386__)
+#if defined(__NR_mmap2)
 	nr = __NR_mmap2;
 	offset >>= 12;
 #else
@@ -42,33 +61,34 @@
 	return (int) __do_syscall1(__NR_close, fd);
 }
 
-static inline int ____sys_io_uring_register(int fd, unsigned opcode,
-					    const void *arg, unsigned nr_args)
+static inline int __sys_io_uring_register(unsigned int fd, unsigned int opcode,
+					  const void *arg, unsigned int nr_args)
 {
 	return (int) __do_syscall4(__NR_io_uring_register, fd, opcode, arg,
 				   nr_args);
 }
 
-static inline int ____sys_io_uring_setup(unsigned entries,
-					 struct io_uring_params *p)
+static inline int __sys_io_uring_setup(unsigned int entries,
+				       struct io_uring_params *p)
 {
 	return (int) __do_syscall2(__NR_io_uring_setup, entries, p);
 }
 
-static inline int ____sys_io_uring_enter2(int fd, unsigned to_submit,
-					  unsigned min_complete, unsigned flags,
-					  sigset_t *sig, int sz)
+static inline int __sys_io_uring_enter2(unsigned int fd, unsigned int to_submit,
+					unsigned int min_complete,
+					unsigned int flags, sigset_t *sig,
+					size_t sz)
 {
 	return (int) __do_syscall6(__NR_io_uring_enter, fd, to_submit,
 				   min_complete, flags, sig, sz);
 }
 
-static inline int ____sys_io_uring_enter(int fd, unsigned to_submit,
-					 unsigned min_complete, unsigned flags,
-					 sigset_t *sig)
+static inline int __sys_io_uring_enter(unsigned int fd, unsigned int to_submit,
+				       unsigned int min_complete,
+				       unsigned int flags, sigset_t *sig)
 {
-	return ____sys_io_uring_enter2(fd, to_submit, min_complete, flags, sig,
-				       _NSIG / 8);
+	return __sys_io_uring_enter2(fd, to_submit, min_complete, flags, sig,
+				     _NSIG / 8);
 }
 
 #endif

diff --git a/src/arch/x86/lib.h b/src/arch/x86/lib.h
index e6a74f3..6ece2d4 100644
--- a/src/arch/x86/lib.h
+++ b/src/arch/x86/lib.h

@@ -1,9 +1,5 @@
 /* SPDX-License-Identifier: MIT */
 
-#ifndef __INTERNAL__LIBURING_LIB_H
-	#error "This file should be included from src/lib.h (liburing)"
-#endif
-
 #ifndef LIBURING_ARCH_X86_LIB_H
 #define LIBURING_ARCH_X86_LIB_H
 

diff --git a/src/arch/x86/syscall.h b/src/arch/x86/syscall.h
index 43c576b..cb8fb91 100644
--- a/src/arch/x86/syscall.h
+++ b/src/arch/x86/syscall.h

@@ -1,9 +1,5 @@
 /* SPDX-License-Identifier: MIT */
 
-#ifndef __INTERNAL__LIBURING_SYSCALL_H
-	#error "This file should be included from src/syscall.h (liburing)"
-#endif
-
 #ifndef LIBURING_ARCH_X86_SYSCALL_H
 #define LIBURING_ARCH_X86_SYSCALL_H
 

diff --git a/src/ffi.c b/src/ffi.c
new file mode 100644
index 0000000..03e382e
--- /dev/null
+++ b/src/ffi.c

@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+#define IOURINGINLINE
+
+#ifdef __clang__
+// clang doesn't seem to particularly like that we're including a header that
+// deliberately contains function definitions so we explicitly silence it
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#endif
+
+#include "liburing.h"
+
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif

diff --git a/src/include/liburing.h b/src/include/liburing.h
index 1c1b03e..6eae138 100644
--- a/src/include/liburing.h
+++ b/src/include/liburing.h

@@ -2,14 +2,6 @@
 #ifndef LIB_URING_H
 #define LIB_URING_H
 
-#ifndef _XOPEN_SOURCE
-#define _XOPEN_SOURCE 500 /* Required for glibc to expose sigset_t */
-#endif
-
-#ifndef _GNU_SOURCE
-#define _GNU_SOURCE /* Required for musl to expose cpu_set_t */
-#endif
-
 #include <sys/socket.h>
 #include <sys/stat.h>
 #include <sys/uio.h>
@@ -21,10 +13,13 @@
 #include <fcntl.h>
 #include <sched.h>
 #include <linux/swab.h>
+#include <sys/wait.h>
 #include "liburing/compat.h"
 #include "liburing/io_uring.h"
+#include "liburing/io_uring_version.h"
 #include "liburing/barrier.h"
 
+
 #ifndef uring_unlikely
 #define uring_unlikely(cond)	__builtin_expect(!!(cond), 0)
 #endif
@@ -33,6 +28,46 @@
 #define uring_likely(cond)	__builtin_expect(!!(cond), 1)
 #endif
 
+#ifndef IOURINGINLINE
+#define IOURINGINLINE static inline
+#endif
+
+#ifdef __alpha__
+/*
+ * alpha and mips are the exceptions, all other architectures have
+ * common numbers for new system calls.
+ */
+#ifndef __NR_io_uring_setup
+#define __NR_io_uring_setup		535
+#endif
+#ifndef __NR_io_uring_enter
+#define __NR_io_uring_enter		536
+#endif
+#ifndef __NR_io_uring_register
+#define __NR_io_uring_register		537
+#endif
+#elif defined __mips__
+#ifndef __NR_io_uring_setup
+#define __NR_io_uring_setup		(__NR_Linux + 425)
+#endif
+#ifndef __NR_io_uring_enter
+#define __NR_io_uring_enter		(__NR_Linux + 426)
+#endif
+#ifndef __NR_io_uring_register
+#define __NR_io_uring_register		(__NR_Linux + 427)
+#endif
+#else /* !__alpha__ and !__mips__ */
+#ifndef __NR_io_uring_setup
+#define __NR_io_uring_setup		425
+#endif
+#ifndef __NR_io_uring_enter
+#define __NR_io_uring_enter		426
+#endif
+#ifndef __NR_io_uring_register
+#define __NR_io_uring_register		427
+#endif
+#endif
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -43,7 +78,9 @@
 struct io_uring_sq {
 	unsigned *khead;
 	unsigned *ktail;
+	// Deprecated: use `ring_mask` instead of `*kring_mask`
 	unsigned *kring_mask;
+	// Deprecated: use `ring_entries` instead of `*kring_entries`
 	unsigned *kring_entries;
 	unsigned *kflags;
 	unsigned *kdropped;
@@ -56,13 +93,18 @@
 	size_t ring_sz;
 	void *ring_ptr;
 
-	unsigned pad[4];
+	unsigned ring_mask;
+	unsigned ring_entries;
+
+	unsigned pad[2];
 };
 
 struct io_uring_cq {
 	unsigned *khead;
 	unsigned *ktail;
+	// Deprecated: use `ring_mask` instead of `*kring_mask`
 	unsigned *kring_mask;
+	// Deprecated: use `ring_entries` instead of `*kring_entries`
 	unsigned *kring_entries;
 	unsigned *kflags;
 	unsigned *koverflow;
@@ -71,7 +113,10 @@
 	size_t ring_sz;
 	void *ring_ptr;
 
-	unsigned pad[4];
+	unsigned ring_mask;
+	unsigned ring_entries;
+
+	unsigned pad[2];
 };
 
 struct io_uring {
@@ -105,7 +150,7 @@
  */
 void io_uring_free_probe(struct io_uring_probe *probe);
 
-static inline int io_uring_opcode_supported(const struct io_uring_probe *p,
+IOURINGINLINE int io_uring_opcode_supported(const struct io_uring_probe *p,
 					    int op)
 {
 	if (op > p->last_op)
@@ -113,6 +158,9 @@
 	return (p->ops[op].flags & IO_URING_OP_SUPPORTED) != 0;
 }
 
+int io_uring_queue_init_mem(unsigned entries, struct io_uring *ring,
+				struct io_uring_params *p,
+				void *buf, size_t buf_size);
 int io_uring_queue_init_params(unsigned entries, struct io_uring *ring,
 				struct io_uring_params *p);
 int io_uring_queue_init(unsigned entries, struct io_uring *ring,
@@ -126,6 +174,12 @@
 int io_uring_wait_cqes(struct io_uring *ring, struct io_uring_cqe **cqe_ptr,
 		       unsigned wait_nr, struct __kernel_timespec *ts,
 		       sigset_t *sigmask);
+int io_uring_wait_cqes_min_timeout(struct io_uring *ring,
+				   struct io_uring_cqe **cqe_ptr,
+				   unsigned wait_nr,
+				   struct __kernel_timespec *ts,
+				   unsigned int min_ts_usec,
+				   sigset_t *sigmask);
 int io_uring_wait_cqe_timeout(struct io_uring *ring,
 			      struct io_uring_cqe **cqe_ptr,
 			      struct __kernel_timespec *ts);
@@ -136,7 +190,14 @@
 				     unsigned wait_nr,
 				     struct __kernel_timespec *ts,
 				     sigset_t *sigmask);
+int io_uring_submit_and_wait_min_timeout(struct io_uring *ring,
+					 struct io_uring_cqe **cqe_ptr,
+					 unsigned wait_nr,
+					 struct __kernel_timespec *ts,
+					 unsigned min_wait,
+					 sigset_t *sigmask);
 
+int io_uring_clone_buffers(struct io_uring *dst, struct io_uring *src);
 int io_uring_register_buffers(struct io_uring *ring, const struct iovec *iovecs,
 			      unsigned nr_iovecs);
 int io_uring_register_buffers_tags(struct io_uring *ring,
@@ -160,7 +221,7 @@
 
 int io_uring_unregister_files(struct io_uring *ring);
 int io_uring_register_files_update(struct io_uring *ring, unsigned off,
-				   int *files, unsigned nr_files);
+				   const int *files, unsigned nr_files);
 int io_uring_register_eventfd(struct io_uring *ring, int fd);
 int io_uring_register_eventfd_async(struct io_uring *ring, int fd);
 int io_uring_unregister_eventfd(struct io_uring *ring);
@@ -173,16 +234,56 @@
 				   unsigned int nr_res);
 int io_uring_enable_rings(struct io_uring *ring);
 int __io_uring_sqring_wait(struct io_uring *ring);
+#ifdef _GNU_SOURCE
 int io_uring_register_iowq_aff(struct io_uring *ring, size_t cpusz,
 				const cpu_set_t *mask);
+#endif
 int io_uring_unregister_iowq_aff(struct io_uring *ring);
 int io_uring_register_iowq_max_workers(struct io_uring *ring,
 				       unsigned int *values);
 int io_uring_register_ring_fd(struct io_uring *ring);
 int io_uring_unregister_ring_fd(struct io_uring *ring);
+int io_uring_close_ring_fd(struct io_uring *ring);
 int io_uring_register_buf_ring(struct io_uring *ring,
 			       struct io_uring_buf_reg *reg, unsigned int flags);
 int io_uring_unregister_buf_ring(struct io_uring *ring, int bgid);
+int io_uring_buf_ring_head(struct io_uring *ring, int buf_group, uint16_t *head);
+int io_uring_register_sync_cancel(struct io_uring *ring,
+				 struct io_uring_sync_cancel_reg *reg);
+
+int io_uring_register_file_alloc_range(struct io_uring *ring,
+					unsigned off, unsigned len);
+
+int io_uring_register_napi(struct io_uring *ring, struct io_uring_napi *napi);
+int io_uring_unregister_napi(struct io_uring *ring, struct io_uring_napi *napi);
+
+int io_uring_register_clock(struct io_uring *ring,
+			    struct io_uring_clock_register *arg);
+
+int io_uring_get_events(struct io_uring *ring);
+int io_uring_submit_and_get_events(struct io_uring *ring);
+
+/*
+ * io_uring syscalls.
+ */
+int io_uring_enter(unsigned int fd, unsigned int to_submit,
+		   unsigned int min_complete, unsigned int flags, sigset_t *sig);
+int io_uring_enter2(unsigned int fd, unsigned int to_submit,
+		    unsigned int min_complete, unsigned int flags,
+		    sigset_t *sig, size_t sz);
+int io_uring_setup(unsigned int entries, struct io_uring_params *p);
+int io_uring_register(unsigned int fd, unsigned int opcode, const void *arg,
+		      unsigned int nr_args);
+
+/*
+ * Mapped buffer ring alloc/register + unregister/free helpers
+ */
+struct io_uring_buf_ring *io_uring_setup_buf_ring(struct io_uring *ring,
+						  unsigned int nentries,
+						  int bgid, unsigned int flags,
+						  int *err);
+int io_uring_free_buf_ring(struct io_uring *ring, struct io_uring_buf_ring *br,
+			   unsigned int nentries, int bgid);
 
 /*
  * Helper for the peek/wait single cqe functions. Exported because of that,
@@ -204,21 +305,27 @@
 #define io_uring_cqe_index(ring,ptr,mask)				\
 	(((ptr) & (mask)) << io_uring_cqe_shift(ring))
 
+/*
+ * NOTE: we should just get rid of the 'head' being passed in here, it doesn't
+ * serve a purpose anymore. The below is a bit of a work-around to ensure that
+ * the compiler doesn't complain about 'head' being unused (or only written,
+ * never read), as we use a local iterator for both the head and tail tracking.
+ */
 #define io_uring_for_each_cqe(ring, head, cqe)				\
 	/*								\
 	 * io_uring_smp_load_acquire() enforces the order of tail	\
 	 * and CQE reads.						\
 	 */								\
-	for (head = *(ring)->cq.khead;					\
-	     (cqe = (head != io_uring_smp_load_acquire((ring)->cq.ktail) ? \
-		&(ring)->cq.cqes[io_uring_cqe_index(ring, head, *(ring)->cq.kring_mask)] : NULL)); \
-	     head++)							\
+	for (__u32 __HEAD__ = (head) = *(ring)->cq.khead,		\
+	     __TAIL__ = io_uring_smp_load_acquire((ring)->cq.ktail);	\
+	     (cqe = ((head) != __TAIL__ ?				\
+	     &(ring)->cq.cqes[io_uring_cqe_index(ring, __HEAD__, (ring)->cq.ring_mask)] : NULL)); \
+	     (head) = ++__HEAD__)
 
 /*
  * Must be called after io_uring_for_each_cqe()
  */
-static inline void io_uring_cq_advance(struct io_uring *ring,
-				       unsigned nr)
+IOURINGINLINE void io_uring_cq_advance(struct io_uring *ring, unsigned nr)
 {
 	if (nr) {
 		struct io_uring_cq *cq = &ring->cq;
@@ -235,7 +342,7 @@
  * Must be called after io_uring_{peek,wait}_cqe() after the cqe has
  * been processed by the application.
  */
-static inline void io_uring_cqe_seen(struct io_uring *ring,
+IOURINGINLINE void io_uring_cqe_seen(struct io_uring *ring,
 				     struct io_uring_cqe *cqe)
 {
 	if (cqe)
@@ -250,12 +357,12 @@
  * Associate pointer @data with the sqe, for later retrieval from the cqe
  * at command completion time with io_uring_cqe_get_data().
  */
-static inline void io_uring_sqe_set_data(struct io_uring_sqe *sqe, void *data)
+IOURINGINLINE void io_uring_sqe_set_data(struct io_uring_sqe *sqe, void *data)
 {
 	sqe->user_data = (unsigned long) data;
 }
 
-static inline void *io_uring_cqe_get_data(const struct io_uring_cqe *cqe)
+IOURINGINLINE void *io_uring_cqe_get_data(const struct io_uring_cqe *cqe)
 {
 	return (void *) (uintptr_t) cqe->user_data;
 }
@@ -265,13 +372,13 @@
  * time with io_uring_cqe_get_data64. Just like the non-64 variants, except
  * these store a 64-bit type rather than a data pointer.
  */
-static inline void io_uring_sqe_set_data64(struct io_uring_sqe *sqe,
+IOURINGINLINE void io_uring_sqe_set_data64(struct io_uring_sqe *sqe,
 					   __u64 data)
 {
 	sqe->user_data = data;
 }
 
-static inline __u64 io_uring_cqe_get_data64(const struct io_uring_cqe *cqe)
+IOURINGINLINE __u64 io_uring_cqe_get_data64(const struct io_uring_cqe *cqe)
 {
 	return cqe->user_data;
 }
@@ -281,30 +388,23 @@
  */
 #define LIBURING_HAVE_DATA64
 
-static inline void io_uring_sqe_set_flags(struct io_uring_sqe *sqe,
+IOURINGINLINE void io_uring_sqe_set_flags(struct io_uring_sqe *sqe,
 					  unsigned flags)
 {
 	sqe->flags = (__u8) flags;
 }
 
-static inline void __io_uring_set_target_fixed_file(struct io_uring_sqe *sqe,
+IOURINGINLINE void __io_uring_set_target_fixed_file(struct io_uring_sqe *sqe,
 						    unsigned int file_index)
 {
 	/* 0 means no fixed files, indexes should be encoded as "index + 1" */
 	sqe->file_index = file_index + 1;
 }
 
-static inline void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd,
-				    const void *addr, unsigned len,
-				    __u64 offset)
+IOURINGINLINE void io_uring_initialize_sqe(struct io_uring_sqe *sqe)
 {
-	sqe->opcode = (__u8) op;
 	sqe->flags = 0;
 	sqe->ioprio = 0;
-	sqe->fd = fd;
-	sqe->off = offset;
-	sqe->addr = (unsigned long) addr;
-	sqe->len = len;
 	sqe->rw_flags = 0;
 	sqe->buf_index = 0;
 	sqe->personality = 0;
@@ -313,29 +413,39 @@
 	sqe->__pad2[0] = 0;
 }
 
-/**
- * @pre Either fd_in or fd_out must be a pipe.
- * @param off_in If fd_in refers to a pipe, off_in must be (int64_t) -1;
- *		 If fd_in does not refer to a pipe and off_in is (int64_t) -1,
- *		 then bytes are read from fd_in starting from the file offset
- *		 and it is adjust appropriately;
- *               If fd_in does not refer to a pipe and off_in is not
- *		 (int64_t) -1, then the  starting offset of fd_in will be
- *		 off_in.
- * @param off_out The description of off_in also applied to off_out.
- * @param splice_flags see man splice(2) for description of flags.
+IOURINGINLINE void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd,
+				    const void *addr, unsigned len,
+				    __u64 offset)
+{
+	sqe->opcode = (__u8) op;
+	sqe->fd = fd;
+	sqe->off = offset;
+	sqe->addr = (unsigned long) addr;
+	sqe->len = len;
+}
+
+/*
+ * io_uring_prep_splice() - Either @fd_in or @fd_out must be a pipe.
+ *
+ * - If @fd_in refers to a pipe, @off_in is ignored and must be set to -1.
+ *
+ * - If @fd_in does not refer to a pipe and @off_in is -1, then @nbytes are read
+ *   from @fd_in starting from the file offset, which is incremented by the
+ *   number of bytes read.
+ *
+ * - If @fd_in does not refer to a pipe and @off_in is not -1, then the starting
+ *   offset of @fd_in will be @off_in.
  *
  * This splice operation can be used to implement sendfile by splicing to an
  * intermediate pipe first, then splice to the final destination.
  * In fact, the implementation of sendfile in kernel uses splice internally.
  *
  * NOTE that even if fd_in or fd_out refers to a pipe, the splice operation
- * can still failed with EINVAL if one of the fd doesn't explicitly support
- * splice operation, e.g. reading from terminal is unsupported from kernel 5.7
- * to 5.11.
+ * can still fail with EINVAL if one of the fd doesn't explicitly support splice
+ * operation, e.g. reading from terminal is unsupported from kernel 5.7 to 5.11.
  * Check issue #291 for more information.
  */
-static inline void io_uring_prep_splice(struct io_uring_sqe *sqe,
+IOURINGINLINE void io_uring_prep_splice(struct io_uring_sqe *sqe,
 					int fd_in, int64_t off_in,
 					int fd_out, int64_t off_out,
 					unsigned int nbytes,
@@ -348,7 +458,7 @@
 	sqe->splice_flags = splice_flags;
 }
 
-static inline void io_uring_prep_tee(struct io_uring_sqe *sqe,
+IOURINGINLINE void io_uring_prep_tee(struct io_uring_sqe *sqe,
 				     int fd_in, int fd_out,
 				     unsigned int nbytes,
 				     unsigned int splice_flags)
@@ -359,14 +469,14 @@
 	sqe->splice_flags = splice_flags;
 }
 
-static inline void io_uring_prep_readv(struct io_uring_sqe *sqe, int fd,
+IOURINGINLINE void io_uring_prep_readv(struct io_uring_sqe *sqe, int fd,
 				       const struct iovec *iovecs,
 				       unsigned nr_vecs, __u64 offset)
 {
 	io_uring_prep_rw(IORING_OP_READV, sqe, fd, iovecs, nr_vecs, offset);
 }
 
-static inline void io_uring_prep_readv2(struct io_uring_sqe *sqe, int fd,
+IOURINGINLINE void io_uring_prep_readv2(struct io_uring_sqe *sqe, int fd,
 				       const struct iovec *iovecs,
 				       unsigned nr_vecs, __u64 offset,
 				       int flags)
@@ -375,7 +485,7 @@
 	sqe->rw_flags = flags;
 }
 
-static inline void io_uring_prep_read_fixed(struct io_uring_sqe *sqe, int fd,
+IOURINGINLINE void io_uring_prep_read_fixed(struct io_uring_sqe *sqe, int fd,
 					    void *buf, unsigned nbytes,
 					    __u64 offset, int buf_index)
 {
@@ -383,14 +493,14 @@
 	sqe->buf_index = (__u16) buf_index;
 }
 
-static inline void io_uring_prep_writev(struct io_uring_sqe *sqe, int fd,
+IOURINGINLINE void io_uring_prep_writev(struct io_uring_sqe *sqe, int fd,
 					const struct iovec *iovecs,
 					unsigned nr_vecs, __u64 offset)
 {
 	io_uring_prep_rw(IORING_OP_WRITEV, sqe, fd, iovecs, nr_vecs, offset);
 }
 
-static inline void io_uring_prep_writev2(struct io_uring_sqe *sqe, int fd,
+IOURINGINLINE void io_uring_prep_writev2(struct io_uring_sqe *sqe, int fd,
 				       const struct iovec *iovecs,
 				       unsigned nr_vecs, __u64 offset,
 				       int flags)
@@ -399,7 +509,7 @@
 	sqe->rw_flags = flags;
 }
 
-static inline void io_uring_prep_write_fixed(struct io_uring_sqe *sqe, int fd,
+IOURINGINLINE void io_uring_prep_write_fixed(struct io_uring_sqe *sqe, int fd,
 					     const void *buf, unsigned nbytes,
 					     __u64 offset, int buf_index)
 {
@@ -407,14 +517,22 @@
 	sqe->buf_index = (__u16) buf_index;
 }
 
-static inline void io_uring_prep_recvmsg(struct io_uring_sqe *sqe, int fd,
+IOURINGINLINE void io_uring_prep_recvmsg(struct io_uring_sqe *sqe, int fd,
 					 struct msghdr *msg, unsigned flags)
 {
 	io_uring_prep_rw(IORING_OP_RECVMSG, sqe, fd, msg, 1, 0);
 	sqe->msg_flags = flags;
 }
 
-static inline void io_uring_prep_sendmsg(struct io_uring_sqe *sqe, int fd,
+IOURINGINLINE void io_uring_prep_recvmsg_multishot(struct io_uring_sqe *sqe,
+						   int fd, struct msghdr *msg,
+						   unsigned flags)
+{
+	io_uring_prep_recvmsg(sqe, fd, msg, flags);
+	sqe->ioprio |= IORING_RECV_MULTISHOT;
+}
+
+IOURINGINLINE void io_uring_prep_sendmsg(struct io_uring_sqe *sqe, int fd,
 					 const struct msghdr *msg,
 					 unsigned flags)
 {
@@ -422,7 +540,7 @@
 	sqe->msg_flags = flags;
 }
 
-static inline unsigned __io_uring_prep_poll_mask(unsigned poll_mask)
+IOURINGINLINE unsigned __io_uring_prep_poll_mask(unsigned poll_mask)
 {
 #if __BYTE_ORDER == __BIG_ENDIAN
 	poll_mask = __swahw32(poll_mask);
@@ -430,28 +548,28 @@
 	return poll_mask;
 }
 
-static inline void io_uring_prep_poll_add(struct io_uring_sqe *sqe, int fd,
+IOURINGINLINE void io_uring_prep_poll_add(struct io_uring_sqe *sqe, int fd,
 					  unsigned poll_mask)
 {
 	io_uring_prep_rw(IORING_OP_POLL_ADD, sqe, fd, NULL, 0, 0);
 	sqe->poll32_events = __io_uring_prep_poll_mask(poll_mask);
 }
 
-static inline void io_uring_prep_poll_multishot(struct io_uring_sqe *sqe,
+IOURINGINLINE void io_uring_prep_poll_multishot(struct io_uring_sqe *sqe,
 						int fd, unsigned poll_mask)
 {
 	io_uring_prep_poll_add(sqe, fd, poll_mask);
 	sqe->len = IORING_POLL_ADD_MULTI;
 }
 
-static inline void io_uring_prep_poll_remove(struct io_uring_sqe *sqe,
+IOURINGINLINE void io_uring_prep_poll_remove(struct io_uring_sqe *sqe,
 					     __u64 user_data)
 {
 	io_uring_prep_rw(IORING_OP_POLL_REMOVE, sqe, -1, NULL, 0, 0);
 	sqe->addr = user_data;
 }
 
-static inline void io_uring_prep_poll_update(struct io_uring_sqe *sqe,
+IOURINGINLINE void io_uring_prep_poll_update(struct io_uring_sqe *sqe,
 					     __u64 old_user_data,
 					     __u64 new_user_data,
 					     unsigned poll_mask, unsigned flags)
@@ -462,19 +580,19 @@
 	sqe->poll32_events = __io_uring_prep_poll_mask(poll_mask);
 }
 
-static inline void io_uring_prep_fsync(struct io_uring_sqe *sqe, int fd,
+IOURINGINLINE void io_uring_prep_fsync(struct io_uring_sqe *sqe, int fd,
 				       unsigned fsync_flags)
 {
 	io_uring_prep_rw(IORING_OP_FSYNC, sqe, fd, NULL, 0, 0);
 	sqe->fsync_flags = fsync_flags;
 }
 
-static inline void io_uring_prep_nop(struct io_uring_sqe *sqe)
+IOURINGINLINE void io_uring_prep_nop(struct io_uring_sqe *sqe)
 {
 	io_uring_prep_rw(IORING_OP_NOP, sqe, -1, NULL, 0, 0);
 }
 
-static inline void io_uring_prep_timeout(struct io_uring_sqe *sqe,
+IOURINGINLINE void io_uring_prep_timeout(struct io_uring_sqe *sqe,
 					 struct __kernel_timespec *ts,
 					 unsigned count, unsigned flags)
 {
@@ -482,7 +600,7 @@
 	sqe->timeout_flags = flags;
 }
 
-static inline void io_uring_prep_timeout_remove(struct io_uring_sqe *sqe,
+IOURINGINLINE void io_uring_prep_timeout_remove(struct io_uring_sqe *sqe,
 						__u64 user_data, unsigned flags)
 {
 	io_uring_prep_rw(IORING_OP_TIMEOUT_REMOVE, sqe, -1, NULL, 0, 0);
@@ -490,7 +608,7 @@
 	sqe->timeout_flags = flags;
 }
 
-static inline void io_uring_prep_timeout_update(struct io_uring_sqe *sqe,
+IOURINGINLINE void io_uring_prep_timeout_update(struct io_uring_sqe *sqe,
 						struct __kernel_timespec *ts,
 						__u64 user_data, unsigned flags)
 {
@@ -500,7 +618,7 @@
 	sqe->timeout_flags = flags | IORING_TIMEOUT_UPDATE;
 }
 
-static inline void io_uring_prep_accept(struct io_uring_sqe *sqe, int fd,
+IOURINGINLINE void io_uring_prep_accept(struct io_uring_sqe *sqe, int fd,
 					struct sockaddr *addr,
 					socklen_t *addrlen, int flags)
 {
@@ -510,16 +628,19 @@
 }
 
 /* accept directly into the fixed file table */
-static inline void io_uring_prep_accept_direct(struct io_uring_sqe *sqe, int fd,
+IOURINGINLINE void io_uring_prep_accept_direct(struct io_uring_sqe *sqe, int fd,
 					       struct sockaddr *addr,
 					       socklen_t *addrlen, int flags,
 					       unsigned int file_index)
 {
 	io_uring_prep_accept(sqe, fd, addr, addrlen, flags);
+	/* offset by 1 for allocation */
+	if (file_index == IORING_FILE_INDEX_ALLOC)
+		file_index--;
 	__io_uring_set_target_fixed_file(sqe, file_index);
 }
 
-static inline void io_uring_prep_multishot_accept(struct io_uring_sqe *sqe,
+IOURINGINLINE void io_uring_prep_multishot_accept(struct io_uring_sqe *sqe,
 						  int fd, struct sockaddr *addr,
 						  socklen_t *addrlen, int flags)
 {
@@ -528,7 +649,7 @@
 }
 
 /* multishot accept directly into the fixed file table */
-static inline void io_uring_prep_multishot_accept_direct(struct io_uring_sqe *sqe,
+IOURINGINLINE void io_uring_prep_multishot_accept_direct(struct io_uring_sqe *sqe,
 							 int fd,
 							 struct sockaddr *addr,
 							 socklen_t *addrlen,
@@ -538,7 +659,7 @@
 	__io_uring_set_target_fixed_file(sqe, IORING_FILE_INDEX_ALLOC - 1);
 }
 
-static inline void io_uring_prep_cancel64(struct io_uring_sqe *sqe,
+IOURINGINLINE void io_uring_prep_cancel64(struct io_uring_sqe *sqe,
 					  __u64 user_data, int flags)
 {
 	io_uring_prep_rw(IORING_OP_ASYNC_CANCEL, sqe, -1, NULL, 0, 0);
@@ -546,20 +667,20 @@
 	sqe->cancel_flags = (__u32) flags;
 }
 
-static inline void io_uring_prep_cancel(struct io_uring_sqe *sqe,
+IOURINGINLINE void io_uring_prep_cancel(struct io_uring_sqe *sqe,
 					void *user_data, int flags)
 {
 	io_uring_prep_cancel64(sqe, (__u64) (uintptr_t) user_data, flags);
 }
 
-static inline void io_uring_prep_cancel_fd(struct io_uring_sqe *sqe, int fd,
+IOURINGINLINE void io_uring_prep_cancel_fd(struct io_uring_sqe *sqe, int fd,
 					   unsigned int flags)
 {
 	io_uring_prep_rw(IORING_OP_ASYNC_CANCEL, sqe, fd, NULL, 0, 0);
 	sqe->cancel_flags = (__u32) flags | IORING_ASYNC_CANCEL_FD;
 }
 
-static inline void io_uring_prep_link_timeout(struct io_uring_sqe *sqe,
+IOURINGINLINE void io_uring_prep_link_timeout(struct io_uring_sqe *sqe,
 					      struct __kernel_timespec *ts,
 					      unsigned flags)
 {
@@ -567,14 +688,27 @@
 	sqe->timeout_flags = flags;
 }
 
-static inline void io_uring_prep_connect(struct io_uring_sqe *sqe, int fd,
+IOURINGINLINE void io_uring_prep_connect(struct io_uring_sqe *sqe, int fd,
 					 const struct sockaddr *addr,
 					 socklen_t addrlen)
 {
 	io_uring_prep_rw(IORING_OP_CONNECT, sqe, fd, addr, 0, addrlen);
 }
 
-static inline void io_uring_prep_files_update(struct io_uring_sqe *sqe,
+IOURINGINLINE void io_uring_prep_bind(struct io_uring_sqe *sqe, int fd,
+				      struct sockaddr *addr,
+				      socklen_t addrlen)
+{
+	io_uring_prep_rw(IORING_OP_BIND, sqe, fd, addr, 0, addrlen);
+}
+
+IOURINGINLINE void io_uring_prep_listen(struct io_uring_sqe *sqe, int fd,
+				      int backlog)
+{
+	io_uring_prep_rw(IORING_OP_LISTEN, sqe, fd, 0, backlog, 0);
+}
+
+IOURINGINLINE void io_uring_prep_files_update(struct io_uring_sqe *sqe,
 					      int *fds, unsigned nr_fds,
 					      int offset)
 {
@@ -582,16 +716,15 @@
 				(__u64) offset);
 }
 
-static inline void io_uring_prep_fallocate(struct io_uring_sqe *sqe, int fd,
-					   int mode, off_t offset, off_t len)
+IOURINGINLINE void io_uring_prep_fallocate(struct io_uring_sqe *sqe, int fd,
+					   int mode, __u64 offset, __u64 len)
 {
-
 	io_uring_prep_rw(IORING_OP_FALLOCATE, sqe, fd,
-			(const uintptr_t *) (unsigned long) len,
-			(unsigned int) mode, (__u64) offset);
+			0, (unsigned int) mode, (__u64) offset);
+	sqe->addr = (__u64) len;
 }
 
-static inline void io_uring_prep_openat(struct io_uring_sqe *sqe, int dfd,
+IOURINGINLINE void io_uring_prep_openat(struct io_uring_sqe *sqe, int dfd,
 					const char *path, int flags,
 					mode_t mode)
 {
@@ -600,34 +733,61 @@
 }
 
 /* open directly into the fixed file table */
-static inline void io_uring_prep_openat_direct(struct io_uring_sqe *sqe,
+IOURINGINLINE void io_uring_prep_openat_direct(struct io_uring_sqe *sqe,
 					       int dfd, const char *path,
 					       int flags, mode_t mode,
 					       unsigned file_index)
 {
 	io_uring_prep_openat(sqe, dfd, path, flags, mode);
+	/* offset by 1 for allocation */
+	if (file_index == IORING_FILE_INDEX_ALLOC)
+		file_index--;
 	__io_uring_set_target_fixed_file(sqe, file_index);
 }
 
-static inline void io_uring_prep_close(struct io_uring_sqe *sqe, int fd)
+IOURINGINLINE void io_uring_prep_open(struct io_uring_sqe *sqe,
+					const char *path, int flags, mode_t mode)
+{
+	io_uring_prep_openat(sqe, AT_FDCWD, path, flags, mode);
+}
+
+/* open directly into the fixed file table */
+IOURINGINLINE void io_uring_prep_open_direct(struct io_uring_sqe *sqe,
+							const char *path, int flags, mode_t mode,
+							unsigned file_index)
+{
+	io_uring_prep_openat_direct(sqe, AT_FDCWD, path, flags, mode, file_index);
+}
+
+IOURINGINLINE void io_uring_prep_close(struct io_uring_sqe *sqe, int fd)
 {
 	io_uring_prep_rw(IORING_OP_CLOSE, sqe, fd, NULL, 0, 0);
 }
 
-static inline void io_uring_prep_close_direct(struct io_uring_sqe *sqe,
+IOURINGINLINE void io_uring_prep_close_direct(struct io_uring_sqe *sqe,
 					      unsigned file_index)
 {
 	io_uring_prep_close(sqe, 0);
 	__io_uring_set_target_fixed_file(sqe, file_index);
 }
 
-static inline void io_uring_prep_read(struct io_uring_sqe *sqe, int fd,
+IOURINGINLINE void io_uring_prep_read(struct io_uring_sqe *sqe, int fd,
 				      void *buf, unsigned nbytes, __u64 offset)
 {
 	io_uring_prep_rw(IORING_OP_READ, sqe, fd, buf, nbytes, offset);
 }
 
-static inline void io_uring_prep_write(struct io_uring_sqe *sqe, int fd,
+IOURINGINLINE void io_uring_prep_read_multishot(struct io_uring_sqe *sqe,
+						int fd, unsigned nbytes,
+						__u64 offset, int buf_group)
+{
+	io_uring_prep_rw(IORING_OP_READ_MULTISHOT, sqe, fd, NULL, nbytes,
+			 offset);
+	sqe->buf_group = buf_group;
+	sqe->flags = IOSQE_BUFFER_SELECT;
+}
+
+IOURINGINLINE void io_uring_prep_write(struct io_uring_sqe *sqe, int fd,
 				       const void *buf, unsigned nbytes,
 				       __u64 offset)
 {
@@ -635,44 +795,184 @@
 }
 
 struct statx;
-static inline void io_uring_prep_statx(struct io_uring_sqe *sqe, int dfd,
-				const char *path, int flags, unsigned mask,
-				struct statx *statxbuf)
+IOURINGINLINE void io_uring_prep_statx(struct io_uring_sqe *sqe, int dfd,
+				       const char *path, int flags,
+				       unsigned mask, struct statx *statxbuf)
 {
 	io_uring_prep_rw(IORING_OP_STATX, sqe, dfd, path, mask,
 				(__u64) (unsigned long) statxbuf);
 	sqe->statx_flags = (__u32) flags;
 }
 
-static inline void io_uring_prep_fadvise(struct io_uring_sqe *sqe, int fd,
-					 __u64 offset, off_t len, int advice)
+IOURINGINLINE void io_uring_prep_fadvise(struct io_uring_sqe *sqe, int fd,
+					 __u64 offset, __u32 len, int advice)
 {
 	io_uring_prep_rw(IORING_OP_FADVISE, sqe, fd, NULL, (__u32) len, offset);
 	sqe->fadvise_advice = (__u32) advice;
 }
 
-static inline void io_uring_prep_madvise(struct io_uring_sqe *sqe, void *addr,
-					 off_t length, int advice)
+IOURINGINLINE void io_uring_prep_madvise(struct io_uring_sqe *sqe, void *addr,
+					 __u32 length, int advice)
 {
 	io_uring_prep_rw(IORING_OP_MADVISE, sqe, -1, addr, (__u32) length, 0);
 	sqe->fadvise_advice = (__u32) advice;
 }
 
-static inline void io_uring_prep_send(struct io_uring_sqe *sqe, int sockfd,
+IOURINGINLINE void io_uring_prep_fadvise64(struct io_uring_sqe *sqe, int fd,
+					 __u64 offset, off_t len, int advice)
+{
+	io_uring_prep_rw(IORING_OP_FADVISE, sqe, fd, NULL, 0, offset);
+	sqe->addr = len;
+	sqe->fadvise_advice = (__u32) advice;
+}
+
+IOURINGINLINE void io_uring_prep_madvise64(struct io_uring_sqe *sqe, void *addr,
+					 off_t length, int advice)
+{
+	io_uring_prep_rw(IORING_OP_MADVISE, sqe, -1, addr, 0, length);
+	sqe->fadvise_advice = (__u32) advice;
+}
+
+IOURINGINLINE void io_uring_prep_send(struct io_uring_sqe *sqe, int sockfd,
 				      const void *buf, size_t len, int flags)
 {
 	io_uring_prep_rw(IORING_OP_SEND, sqe, sockfd, buf, (__u32) len, 0);
 	sqe->msg_flags = (__u32) flags;
 }
 
-static inline void io_uring_prep_recv(struct io_uring_sqe *sqe, int sockfd,
+IOURINGINLINE void io_uring_prep_send_bundle(struct io_uring_sqe *sqe,
+					     int sockfd, size_t len, int flags)
+{
+	io_uring_prep_send(sqe, sockfd, NULL, len, flags);
+	sqe->ioprio |= IORING_RECVSEND_BUNDLE;
+}
+
+IOURINGINLINE void io_uring_prep_send_set_addr(struct io_uring_sqe *sqe,
+						const struct sockaddr *dest_addr,
+						__u16 addr_len)
+{
+	sqe->addr2 = (unsigned long)(const void *)dest_addr;
+	sqe->addr_len = addr_len;
+}
+
+IOURINGINLINE void io_uring_prep_sendto(struct io_uring_sqe *sqe, int sockfd,
+					const void *buf, size_t len, int flags,
+					const struct sockaddr *addr,
+					socklen_t addrlen)
+{
+	io_uring_prep_send(sqe, sockfd, buf, len, flags);
+	io_uring_prep_send_set_addr(sqe, addr, addrlen);
+}
+
+IOURINGINLINE void io_uring_prep_send_zc(struct io_uring_sqe *sqe, int sockfd,
+					 const void *buf, size_t len, int flags,
+					 unsigned zc_flags)
+{
+	io_uring_prep_rw(IORING_OP_SEND_ZC, sqe, sockfd, buf, (__u32) len, 0);
+	sqe->msg_flags = (__u32) flags;
+	sqe->ioprio = zc_flags;
+}
+
+IOURINGINLINE void io_uring_prep_send_zc_fixed(struct io_uring_sqe *sqe,
+						int sockfd, const void *buf,
+						size_t len, int flags,
+						unsigned zc_flags,
+						unsigned buf_index)
+{
+	io_uring_prep_send_zc(sqe, sockfd, buf, len, flags, zc_flags);
+	sqe->ioprio |= IORING_RECVSEND_FIXED_BUF;
+	sqe->buf_index = buf_index;
+}
+
+IOURINGINLINE void io_uring_prep_sendmsg_zc(struct io_uring_sqe *sqe, int fd,
+					    const struct msghdr *msg,
+					    unsigned flags)
+{
+	io_uring_prep_sendmsg(sqe, fd, msg, flags);
+	sqe->opcode = IORING_OP_SENDMSG_ZC;
+}
+
+IOURINGINLINE void io_uring_prep_recv(struct io_uring_sqe *sqe, int sockfd,
 				      void *buf, size_t len, int flags)
 {
 	io_uring_prep_rw(IORING_OP_RECV, sqe, sockfd, buf, (__u32) len, 0);
 	sqe->msg_flags = (__u32) flags;
 }
 
-static inline void io_uring_prep_openat2(struct io_uring_sqe *sqe, int dfd,
+IOURINGINLINE void io_uring_prep_recv_multishot(struct io_uring_sqe *sqe,
+						int sockfd, void *buf,
+						size_t len, int flags)
+{
+	io_uring_prep_recv(sqe, sockfd, buf, len, flags);
+	sqe->ioprio |= IORING_RECV_MULTISHOT;
+}
+
+IOURINGINLINE struct io_uring_recvmsg_out *
+io_uring_recvmsg_validate(void *buf, int buf_len, struct msghdr *msgh)
+{
+	unsigned long header = msgh->msg_controllen + msgh->msg_namelen +
+				sizeof(struct io_uring_recvmsg_out);
+	if (buf_len < 0 || (unsigned long)buf_len < header)
+		return NULL;
+	return (struct io_uring_recvmsg_out *)buf;
+}
+
+IOURINGINLINE void *io_uring_recvmsg_name(struct io_uring_recvmsg_out *o)
+{
+	return (void *) &o[1];
+}
+
+IOURINGINLINE struct cmsghdr *
+io_uring_recvmsg_cmsg_firsthdr(struct io_uring_recvmsg_out *o,
+			       struct msghdr *msgh)
+{
+	if (o->controllen < sizeof(struct cmsghdr))
+		return NULL;
+
+	return (struct cmsghdr *)((unsigned char *) io_uring_recvmsg_name(o) +
+			msgh->msg_namelen);
+}
+
+IOURINGINLINE struct cmsghdr *
+io_uring_recvmsg_cmsg_nexthdr(struct io_uring_recvmsg_out *o, struct msghdr *msgh,
+			      struct cmsghdr *cmsg)
+{
+	unsigned char *end;
+
+	if (cmsg->cmsg_len < sizeof(struct cmsghdr))
+		return NULL;
+	end = (unsigned char *) io_uring_recvmsg_cmsg_firsthdr(o, msgh) +
+		o->controllen;
+	cmsg = (struct cmsghdr *)((unsigned char *) cmsg +
+			CMSG_ALIGN(cmsg->cmsg_len));
+
+	if ((unsigned char *) (cmsg + 1) > end)
+		return NULL;
+	if (((unsigned char *) cmsg) + CMSG_ALIGN(cmsg->cmsg_len) > end)
+		return NULL;
+
+	return cmsg;
+}
+
+IOURINGINLINE void *io_uring_recvmsg_payload(struct io_uring_recvmsg_out *o,
+					     struct msghdr *msgh)
+{
+	return (void *)((unsigned char *)io_uring_recvmsg_name(o) +
+			msgh->msg_namelen + msgh->msg_controllen);
+}
+
+IOURINGINLINE unsigned int
+io_uring_recvmsg_payload_length(struct io_uring_recvmsg_out *o,
+				int buf_len, struct msghdr *msgh)
+{
+	unsigned long payload_start, payload_end;
+
+	payload_start = (unsigned long) io_uring_recvmsg_payload(o, msgh);
+	payload_end = (unsigned long) o + buf_len;
+	return (unsigned int) (payload_end - payload_start);
+}
+
+IOURINGINLINE void io_uring_prep_openat2(struct io_uring_sqe *sqe, int dfd,
 					const char *path, struct open_how *how)
 {
 	io_uring_prep_rw(IORING_OP_OPENAT2, sqe, dfd, path, sizeof(*how),
@@ -680,17 +980,20 @@
 }
 
 /* open directly into the fixed file table */
-static inline void io_uring_prep_openat2_direct(struct io_uring_sqe *sqe,
+IOURINGINLINE void io_uring_prep_openat2_direct(struct io_uring_sqe *sqe,
 						int dfd, const char *path,
 						struct open_how *how,
 						unsigned file_index)
 {
 	io_uring_prep_openat2(sqe, dfd, path, how);
+	/* offset by 1 for allocation */
+	if (file_index == IORING_FILE_INDEX_ALLOC)
+		file_index--;
 	__io_uring_set_target_fixed_file(sqe, file_index);
 }
 
 struct epoll_event;
-static inline void io_uring_prep_epoll_ctl(struct io_uring_sqe *sqe, int epfd,
+IOURINGINLINE void io_uring_prep_epoll_ctl(struct io_uring_sqe *sqe, int epfd,
 					   int fd, int op,
 					   struct epoll_event *ev)
 {
@@ -698,7 +1001,7 @@
 				(__u32) op, (__u32) fd);
 }
 
-static inline void io_uring_prep_provide_buffers(struct io_uring_sqe *sqe,
+IOURINGINLINE void io_uring_prep_provide_buffers(struct io_uring_sqe *sqe,
 						 void *addr, int len, int nr,
 						 int bgid, int bid)
 {
@@ -707,35 +1010,35 @@
 	sqe->buf_group = (__u16) bgid;
 }
 
-static inline void io_uring_prep_remove_buffers(struct io_uring_sqe *sqe,
+IOURINGINLINE void io_uring_prep_remove_buffers(struct io_uring_sqe *sqe,
 						int nr, int bgid)
 {
 	io_uring_prep_rw(IORING_OP_REMOVE_BUFFERS, sqe, nr, NULL, 0, 0);
 	sqe->buf_group = (__u16) bgid;
 }
 
-static inline void io_uring_prep_shutdown(struct io_uring_sqe *sqe, int fd,
+IOURINGINLINE void io_uring_prep_shutdown(struct io_uring_sqe *sqe, int fd,
 					  int how)
 {
 	io_uring_prep_rw(IORING_OP_SHUTDOWN, sqe, fd, NULL, (__u32) how, 0);
 }
 
-static inline void io_uring_prep_unlinkat(struct io_uring_sqe *sqe, int dfd,
+IOURINGINLINE void io_uring_prep_unlinkat(struct io_uring_sqe *sqe, int dfd,
 					  const char *path, int flags)
 {
 	io_uring_prep_rw(IORING_OP_UNLINKAT, sqe, dfd, path, 0, 0);
 	sqe->unlink_flags = (__u32) flags;
 }
 
-static inline void io_uring_prep_unlink(struct io_uring_sqe *sqe,
+IOURINGINLINE void io_uring_prep_unlink(struct io_uring_sqe *sqe,
 					  const char *path, int flags)
 {
 	io_uring_prep_unlinkat(sqe, AT_FDCWD, path, flags);
 }
 
-static inline void io_uring_prep_renameat(struct io_uring_sqe *sqe, int olddfd,
+IOURINGINLINE void io_uring_prep_renameat(struct io_uring_sqe *sqe, int olddfd,
 					  const char *oldpath, int newdfd,
-					  const char *newpath, int flags)
+					  const char *newpath, unsigned int flags)
 {
 	io_uring_prep_rw(IORING_OP_RENAMEAT, sqe, olddfd, oldpath,
 				(__u32) newdfd,
@@ -743,13 +1046,14 @@
 	sqe->rename_flags = (__u32) flags;
 }
 
-static inline void io_uring_prep_rename(struct io_uring_sqe *sqe,
-					  const char *oldpath, const char *newpath)
+IOURINGINLINE void io_uring_prep_rename(struct io_uring_sqe *sqe,
+					const char *oldpath,
+					const char *newpath)
 {
 	io_uring_prep_renameat(sqe, AT_FDCWD, oldpath, AT_FDCWD, newpath, 0);
 }
 
-static inline void io_uring_prep_sync_file_range(struct io_uring_sqe *sqe,
+IOURINGINLINE void io_uring_prep_sync_file_range(struct io_uring_sqe *sqe,
 						 int fd, unsigned len,
 						 __u64 offset, int flags)
 {
@@ -757,19 +1061,19 @@
 	sqe->sync_range_flags = (__u32) flags;
 }
 
-static inline void io_uring_prep_mkdirat(struct io_uring_sqe *sqe, int dfd,
+IOURINGINLINE void io_uring_prep_mkdirat(struct io_uring_sqe *sqe, int dfd,
 					const char *path, mode_t mode)
 {
 	io_uring_prep_rw(IORING_OP_MKDIRAT, sqe, dfd, path, mode, 0);
 }
 
-static inline void io_uring_prep_mkdir(struct io_uring_sqe *sqe,
+IOURINGINLINE void io_uring_prep_mkdir(struct io_uring_sqe *sqe,
 					const char *path, mode_t mode)
 {
 	io_uring_prep_mkdirat(sqe, AT_FDCWD, path, mode);
 }
 
-static inline void io_uring_prep_symlinkat(struct io_uring_sqe *sqe,
+IOURINGINLINE void io_uring_prep_symlinkat(struct io_uring_sqe *sqe,
 					   const char *target, int newdirfd,
 					   const char *linkpath)
 {
@@ -777,13 +1081,14 @@
 				(uint64_t) (uintptr_t) linkpath);
 }
 
-static inline void io_uring_prep_symlink(struct io_uring_sqe *sqe,
-					   const char *target, const char *linkpath)
+IOURINGINLINE void io_uring_prep_symlink(struct io_uring_sqe *sqe,
+					 const char *target,
+					 const char *linkpath)
 {
 	io_uring_prep_symlinkat(sqe, target, AT_FDCWD, linkpath);
 }
 
-static inline void io_uring_prep_linkat(struct io_uring_sqe *sqe, int olddfd,
+IOURINGINLINE void io_uring_prep_linkat(struct io_uring_sqe *sqe, int olddfd,
 					const char *oldpath, int newdfd,
 					const char *newpath, int flags)
 {
@@ -792,25 +1097,55 @@
 	sqe->hardlink_flags = (__u32) flags;
 }
 
-static inline void io_uring_prep_link(struct io_uring_sqe *sqe,
-					const char *oldpath, const char *newpath, int flags)
+IOURINGINLINE void io_uring_prep_link(struct io_uring_sqe *sqe,
+				      const char *oldpath, const char *newpath,
+				      int flags)
 {
 	io_uring_prep_linkat(sqe, AT_FDCWD, oldpath, AT_FDCWD, newpath, flags);
 }
 
-static inline void io_uring_prep_msg_ring(struct io_uring_sqe *sqe, int fd,
+IOURINGINLINE void io_uring_prep_msg_ring_cqe_flags(struct io_uring_sqe *sqe,
+					  int fd, unsigned int len, __u64 data,
+					  unsigned int flags, unsigned int cqe_flags)
+{
+	io_uring_prep_rw(IORING_OP_MSG_RING, sqe, fd, NULL, len, data);
+	sqe->msg_ring_flags = IORING_MSG_RING_FLAGS_PASS | flags;
+	sqe->file_index = cqe_flags;
+}
+
+IOURINGINLINE void io_uring_prep_msg_ring(struct io_uring_sqe *sqe, int fd,
 					  unsigned int len, __u64 data,
 					  unsigned int flags)
 {
 	io_uring_prep_rw(IORING_OP_MSG_RING, sqe, fd, NULL, len, data);
-	sqe->rw_flags = flags;
+	sqe->msg_ring_flags = flags;
 }
 
-static inline void io_uring_prep_getxattr(struct io_uring_sqe *sqe,
-					  const char *name,
-					  const char *value,
-					  const char *path,
-					  size_t len)
+IOURINGINLINE void io_uring_prep_msg_ring_fd(struct io_uring_sqe *sqe, int fd,
+					     int source_fd, int target_fd,
+					     __u64 data, unsigned int flags)
+{
+	io_uring_prep_rw(IORING_OP_MSG_RING, sqe, fd,
+			 (void *) (uintptr_t) IORING_MSG_SEND_FD, 0, data);
+	sqe->addr3 = source_fd;
+	/* offset by 1 for allocation */
+	if ((unsigned int) target_fd == IORING_FILE_INDEX_ALLOC)
+		target_fd--;
+	__io_uring_set_target_fixed_file(sqe, target_fd);
+	sqe->msg_ring_flags = flags;
+}
+
+IOURINGINLINE void io_uring_prep_msg_ring_fd_alloc(struct io_uring_sqe *sqe,
+						   int fd, int source_fd,
+						   __u64 data, unsigned int flags)
+{
+	io_uring_prep_msg_ring_fd(sqe, fd, source_fd, IORING_FILE_INDEX_ALLOC,
+				  data, flags);
+}
+
+IOURINGINLINE void io_uring_prep_getxattr(struct io_uring_sqe *sqe,
+					  const char *name, char *value,
+					  const char *path, unsigned int len)
 {
 	io_uring_prep_rw(IORING_OP_GETXATTR, sqe, 0, name, len,
 				(__u64) (uintptr_t) value);
@@ -818,12 +1153,10 @@
 	sqe->xattr_flags = 0;
 }
 
-static inline void io_uring_prep_setxattr(struct io_uring_sqe *sqe,
-					  const char *name,
-					  const char *value,
-					  const char *path,
-					  int flags,
-					  size_t len)
+IOURINGINLINE void io_uring_prep_setxattr(struct io_uring_sqe *sqe,
+					  const char *name, const char *value,
+					  const char *path, int flags,
+					  unsigned int len)
 {
 	io_uring_prep_rw(IORING_OP_SETXATTR, sqe, 0, name, len,
 				(__u64) (uintptr_t) value);
@@ -831,30 +1164,25 @@
 	sqe->xattr_flags = flags;
 }
 
-static inline void io_uring_prep_fgetxattr(struct io_uring_sqe *sqe,
-		                           int         fd,
-					   const char *name,
-					   const char *value,
-					   size_t      len)
+IOURINGINLINE void io_uring_prep_fgetxattr(struct io_uring_sqe *sqe,
+					   int fd, const char *name,
+					   char *value, unsigned int len)
 {
 	io_uring_prep_rw(IORING_OP_FGETXATTR, sqe, fd, name, len,
 				(__u64) (uintptr_t) value);
 	sqe->xattr_flags = 0;
 }
 
-static inline void io_uring_prep_fsetxattr(struct io_uring_sqe *sqe,
-					   int         fd,
-					   const char *name,
-					   const char *value,
-					   int         flags,
-					   size_t      len)
+IOURINGINLINE void io_uring_prep_fsetxattr(struct io_uring_sqe *sqe, int fd,
+					   const char *name, const char	*value,
+					   int flags, unsigned int len)
 {
 	io_uring_prep_rw(IORING_OP_FSETXATTR, sqe, fd, name, len,
 				(__u64) (uintptr_t) value);
 	sqe->xattr_flags = flags;
 }
 
-static inline void io_uring_prep_socket(struct io_uring_sqe *sqe, int domain,
+IOURINGINLINE void io_uring_prep_socket(struct io_uring_sqe *sqe, int domain,
 					int type, int protocol,
 					unsigned int flags)
 {
@@ -862,7 +1190,7 @@
 	sqe->rw_flags = flags;
 }
 
-static inline void io_uring_prep_socket_direct(struct io_uring_sqe *sqe,
+IOURINGINLINE void io_uring_prep_socket_direct(struct io_uring_sqe *sqe,
 					       int domain, int type,
 					       int protocol,
 					       unsigned file_index,
@@ -870,12 +1198,16 @@
 {
 	io_uring_prep_rw(IORING_OP_SOCKET, sqe, domain, NULL, protocol, type);
 	sqe->rw_flags = flags;
+	/* offset by 1 for allocation */
+	if (file_index == IORING_FILE_INDEX_ALLOC)
+		file_index--;
 	__io_uring_set_target_fixed_file(sqe, file_index);
 }
 
-static inline void io_uring_prep_socket_direct_alloc(struct io_uring_sqe *sqe,
-				int domain, int type, int protocol,
-				unsigned int flags)
+IOURINGINLINE void io_uring_prep_socket_direct_alloc(struct io_uring_sqe *sqe,
+						     int domain, int type,
+						     int protocol,
+						     unsigned int flags)
 {
 	io_uring_prep_rw(IORING_OP_SOCKET, sqe, domain, NULL, protocol, type);
 	sqe->rw_flags = flags;
@@ -883,12 +1215,100 @@
 }
 
 /*
+ * Prepare commands for sockets
+ */
+IOURINGINLINE void io_uring_prep_cmd_sock(struct io_uring_sqe *sqe,
+					  int cmd_op,
+					  int fd,
+					  int level,
+					  int optname,
+					  void *optval,
+					  int optlen)
+{
+	io_uring_prep_rw(IORING_OP_URING_CMD, sqe, fd, NULL, 0, 0);
+	sqe->optval = (unsigned long) (uintptr_t) optval;
+	sqe->optname = optname;
+	sqe->optlen = optlen;
+	sqe->cmd_op = cmd_op;
+	sqe->level = level;
+}
+
+IOURINGINLINE void io_uring_prep_waitid(struct io_uring_sqe *sqe,
+					idtype_t idtype,
+					id_t id,
+					siginfo_t *infop,
+					int options, unsigned int flags)
+{
+	io_uring_prep_rw(IORING_OP_WAITID, sqe, id, NULL, (unsigned) idtype, 0);
+	sqe->waitid_flags = flags;
+	sqe->file_index = options;
+	sqe->addr2 = (unsigned long) infop;
+}
+
+IOURINGINLINE void io_uring_prep_futex_wake(struct io_uring_sqe *sqe,
+					    uint32_t *futex, uint64_t val,
+					    uint64_t mask, uint32_t futex_flags,
+					    unsigned int flags)
+{
+	io_uring_prep_rw(IORING_OP_FUTEX_WAKE, sqe, futex_flags, futex, 0, val);
+	sqe->futex_flags = flags;
+	sqe->addr3 = mask;
+}
+
+IOURINGINLINE void io_uring_prep_futex_wait(struct io_uring_sqe *sqe,
+					    uint32_t *futex, uint64_t val,
+					    uint64_t mask, uint32_t futex_flags,
+					    unsigned int flags)
+{
+	io_uring_prep_rw(IORING_OP_FUTEX_WAIT, sqe, futex_flags, futex, 0, val);
+	sqe->futex_flags = flags;
+	sqe->addr3 = mask;
+}
+
+struct futex_waitv;
+IOURINGINLINE void io_uring_prep_futex_waitv(struct io_uring_sqe *sqe,
+					     struct futex_waitv *futex,
+					     uint32_t nr_futex,
+					     unsigned int flags)
+{
+	io_uring_prep_rw(IORING_OP_FUTEX_WAITV, sqe, 0, futex, nr_futex, 0);
+	sqe->futex_flags = flags;
+}
+
+IOURINGINLINE void io_uring_prep_fixed_fd_install(struct io_uring_sqe *sqe,
+						  int fd,
+						  unsigned int flags)
+{
+	io_uring_prep_rw(IORING_OP_FIXED_FD_INSTALL, sqe, fd, NULL, 0, 0);
+	sqe->flags = IOSQE_FIXED_FILE;
+	sqe->install_fd_flags = flags;
+}
+
+#ifdef _GNU_SOURCE
+IOURINGINLINE void io_uring_prep_ftruncate(struct io_uring_sqe *sqe,
+				       int fd, loff_t len)
+{
+	io_uring_prep_rw(IORING_OP_FTRUNCATE, sqe, fd, 0, 0, len);
+}
+#endif
+
+IOURINGINLINE void io_uring_prep_cmd_discard(struct io_uring_sqe *sqe,
+					     int fd,
+					     uint64_t offset, uint64_t nbytes)
+{
+	io_uring_prep_rw(IORING_OP_URING_CMD, sqe, fd, 0, 0, 0);
+	sqe->cmd_op = BLOCK_URING_CMD_DISCARD;
+	sqe->addr = offset;
+	sqe->addr3 = nbytes;
+}
+
+/*
  * Returns number of unconsumed (if SQPOLL) or unsubmitted entries exist in
  * the SQ ring
  */
-static inline unsigned io_uring_sq_ready(const struct io_uring *ring)
+IOURINGINLINE unsigned io_uring_sq_ready(const struct io_uring *ring)
 {
-	unsigned khead = *ring->sq.khead;
+	unsigned khead;
 
 	/*
 	 * Without a barrier, we could miss an update and think the SQ wasn't
@@ -897,6 +1317,8 @@
 	 */
 	if (ring->flags & IORING_SETUP_SQPOLL)
 		khead = io_uring_smp_load_acquire(ring->sq.khead);
+	else
+		khead = *ring->sq.khead;
 
 	/* always use real head, to avoid losing sync for short submit */
 	return ring->sq.sqe_tail - khead;
@@ -905,9 +1327,9 @@
 /*
  * Returns how much space is left in the SQ ring.
  */
-static inline unsigned io_uring_sq_space_left(const struct io_uring *ring)
+IOURINGINLINE unsigned io_uring_sq_space_left(const struct io_uring *ring)
 {
-	return *ring->sq.kring_entries - io_uring_sq_ready(ring);
+	return ring->sq.ring_entries - io_uring_sq_ready(ring);
 }
 
 /*
@@ -917,7 +1339,7 @@
  * action is taken. Note: may return -EINVAL if the kernel doesn't support
  * this feature.
  */
-static inline int io_uring_sqring_wait(struct io_uring *ring)
+IOURINGINLINE int io_uring_sqring_wait(struct io_uring *ring)
 {
 	if (!(ring->flags & IORING_SETUP_SQPOLL))
 		return 0;
@@ -930,15 +1352,24 @@
 /*
  * Returns how many unconsumed entries are ready in the CQ ring
  */
-static inline unsigned io_uring_cq_ready(const struct io_uring *ring)
+IOURINGINLINE unsigned io_uring_cq_ready(const struct io_uring *ring)
 {
 	return io_uring_smp_load_acquire(ring->cq.ktail) - *ring->cq.khead;
 }
 
 /*
+ * Returns true if there are overflow entries waiting to be flushed onto
+ * the CQ ring
+ */
+IOURINGINLINE bool io_uring_cq_has_overflow(const struct io_uring *ring)
+{
+	return IO_URING_READ_ONCE(*ring->sq.kflags) & IORING_SQ_CQ_OVERFLOW;
+}
+
+/*
  * Returns true if the eventfd notification is currently enabled
  */
-static inline bool io_uring_cq_eventfd_enabled(const struct io_uring *ring)
+IOURINGINLINE bool io_uring_cq_eventfd_enabled(const struct io_uring *ring)
 {
 	if (!ring->cq.kflags)
 		return true;
@@ -950,7 +1381,7 @@
  * Toggle eventfd notification on or off, if an eventfd is registered with
  * the ring.
  */
-static inline int io_uring_cq_eventfd_toggle(struct io_uring *ring,
+IOURINGINLINE int io_uring_cq_eventfd_toggle(struct io_uring *ring,
 					     bool enabled)
 {
 	uint32_t flags;
@@ -978,7 +1409,7 @@
  * readily available. Returns 0 with cqe_ptr filled in on success, -errno on
  * failure.
  */
-static inline int io_uring_wait_cqe_nr(struct io_uring *ring,
+IOURINGINLINE int io_uring_wait_cqe_nr(struct io_uring *ring,
 				      struct io_uring_cqe **cqe_ptr,
 				      unsigned wait_nr)
 {
@@ -990,14 +1421,14 @@
  * "official" versions of this, io_uring_peek_cqe(), io_uring_wait_cqe(),
  * or io_uring_wait_cqes*().
  */
-static inline int __io_uring_peek_cqe(struct io_uring *ring,
+IOURINGINLINE int __io_uring_peek_cqe(struct io_uring *ring,
 				      struct io_uring_cqe **cqe_ptr,
 				      unsigned *nr_available)
 {
 	struct io_uring_cqe *cqe;
 	int err = 0;
 	unsigned available;
-	unsigned mask = *ring->cq.kring_mask;
+	unsigned mask = ring->cq.ring_mask;
 	int shift = 0;
 
 	if (ring->flags & IORING_SETUP_CQE32)
@@ -1036,7 +1467,7 @@
  * Return an IO completion, if one is readily available. Returns 0 with
  * cqe_ptr filled in on success, -errno on failure.
  */
-static inline int io_uring_peek_cqe(struct io_uring *ring,
+IOURINGINLINE int io_uring_peek_cqe(struct io_uring *ring,
 				    struct io_uring_cqe **cqe_ptr)
 {
 	if (!__io_uring_peek_cqe(ring, cqe_ptr, NULL) && *cqe_ptr)
@@ -1049,7 +1480,7 @@
  * Return an IO completion, waiting for it if necessary. Returns 0 with
  * cqe_ptr filled in on success, -errno on failure.
  */
-static inline int io_uring_wait_cqe(struct io_uring *ring,
+IOURINGINLINE int io_uring_wait_cqe(struct io_uring *ring,
 				    struct io_uring_cqe **cqe_ptr)
 {
 	if (!__io_uring_peek_cqe(ring, cqe_ptr, NULL) && *cqe_ptr)
@@ -1065,21 +1496,25 @@
  *
  * Returns a vacant sqe, or NULL if we're full.
  */
-static inline struct io_uring_sqe *_io_uring_get_sqe(struct io_uring *ring)
+IOURINGINLINE struct io_uring_sqe *_io_uring_get_sqe(struct io_uring *ring)
 {
 	struct io_uring_sq *sq = &ring->sq;
-	unsigned int head = io_uring_smp_load_acquire(sq->khead);
-	unsigned int next = sq->sqe_tail + 1;
+	unsigned int head, next = sq->sqe_tail + 1;
 	int shift = 0;
 
 	if (ring->flags & IORING_SETUP_SQE128)
 		shift = 1;
+	if (!(ring->flags & IORING_SETUP_SQPOLL))
+		head = *sq->khead;
+	else
+		head = io_uring_smp_load_acquire(sq->khead);
 
-	if (next - head <= *sq->kring_entries) {
+	if (next - head <= sq->ring_entries) {
 		struct io_uring_sqe *sqe;
 
-		sqe = &sq->sqes[(sq->sqe_tail & *sq->kring_mask) << shift];
+		sqe = &sq->sqes[(sq->sqe_tail & sq->ring_mask) << shift];
 		sq->sqe_tail = next;
+		io_uring_initialize_sqe(sqe);
 		return sqe;
 	}
 
@@ -1089,12 +1524,12 @@
 /*
  * Return the appropriate mask for a buffer ring of size 'ring_entries'
  */
-static inline int io_uring_buf_ring_mask(__u32 ring_entries)
+IOURINGINLINE int io_uring_buf_ring_mask(__u32 ring_entries)
 {
 	return ring_entries - 1;
 }
 
-static inline void io_uring_buf_ring_init(struct io_uring_buf_ring *br)
+IOURINGINLINE void io_uring_buf_ring_init(struct io_uring_buf_ring *br)
 {
 	br->tail = 0;
 }
@@ -1102,7 +1537,7 @@
 /*
  * Assign 'buf' with the addr/len/buffer ID supplied
  */
-static inline void io_uring_buf_ring_add(struct io_uring_buf_ring *br,
+IOURINGINLINE void io_uring_buf_ring_add(struct io_uring_buf_ring *br,
 					 void *addr, unsigned int len,
 					 unsigned short bid, int mask,
 					 int buf_offset)
@@ -1119,7 +1554,7 @@
  * io_uring_buf_ring_add() has been called 'count' times to fill in new
  * buffers.
  */
-static inline void io_uring_buf_ring_advance(struct io_uring_buf_ring *br,
+IOURINGINLINE void io_uring_buf_ring_advance(struct io_uring_buf_ring *br,
 					     int count)
 {
 	unsigned short new_tail = br->tail + count;
@@ -1127,6 +1562,14 @@
 	io_uring_smp_store_release(&br->tail, new_tail);
 }
 
+IOURINGINLINE void __io_uring_buf_ring_cq_advance(struct io_uring *ring,
+						  struct io_uring_buf_ring *br,
+						  int cq_count, int buf_count)
+{
+	io_uring_buf_ring_advance(br, buf_count);
+	io_uring_cq_advance(ring, cq_count);
+}
+
 /*
  * Make 'count' new buffers visible to the kernel while at the same time
  * advancing the CQ ring seen entries. This can be used when the application
@@ -1134,16 +1577,29 @@
  * avoiding an extra atomic when needing to increment both the CQ ring and
  * the ring buffer index at the same time.
  */
-static inline void io_uring_buf_ring_cq_advance(struct io_uring *ring,
+IOURINGINLINE void io_uring_buf_ring_cq_advance(struct io_uring *ring,
 						struct io_uring_buf_ring *br,
 						int count)
 {
-	br->tail += count;
-	io_uring_cq_advance(ring, count);
+	__io_uring_buf_ring_cq_advance(ring, br, count, count);
+}
+
+IOURINGINLINE int io_uring_buf_ring_available(struct io_uring *ring,
+					      struct io_uring_buf_ring *br,
+					      unsigned short bgid)
+{
+	uint16_t head;
+	int ret;
+
+	ret = io_uring_buf_ring_head(ring, bgid, &head);
+	if (ret)
+		return ret;
+
+	return (uint16_t) (br->tail - head);
 }
 
 #ifndef LIBURING_INTERNAL
-static inline struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring)
+IOURINGINLINE struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring)
 {
 	return _io_uring_get_sqe(ring);
 }
@@ -1154,8 +1610,30 @@
 ssize_t io_uring_mlock_size(unsigned entries, unsigned flags);
 ssize_t io_uring_mlock_size_params(unsigned entries, struct io_uring_params *p);
 
+/*
+ * Versioning information for liburing.
+ *
+ * Use IO_URING_CHECK_VERSION() for compile time checks including from
+ * preprocessor directives.
+ *
+ * Use io_uring_check_version() for runtime checks of the version of
+ * liburing that was loaded by the dynamic linker.
+ */
+int io_uring_major_version(void);
+int io_uring_minor_version(void);
+bool io_uring_check_version(int major, int minor);
+
+#define IO_URING_CHECK_VERSION(major,minor) \
+  (major > IO_URING_VERSION_MAJOR ||        \
+   (major == IO_URING_VERSION_MAJOR &&      \
+    minor > IO_URING_VERSION_MINOR))
+
 #ifdef __cplusplus
 }
 #endif
 
+#ifdef IOURINGINLINE
+#undef IOURINGINLINE
+#endif
+
 #endif

diff --git a/src/include/liburing/io_uring.h b/src/include/liburing/io_uring.h
index 2f391c9..4ac13f7 100644
--- a/src/include/liburing/io_uring.h
+++ b/src/include/liburing/io_uring.h

@@ -10,6 +10,15 @@
 
 #include <linux/fs.h>
 #include <linux/types.h>
+/*
+ * this file is shared with liburing and that has to autodetect
+ * if linux/time_types.h is available or not, it can
+ * define UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H
+ * if linux/time_types.h is not available
+ */
+#ifndef UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H
+#include <linux/time_types.h>
+#endif
 
 #ifdef __cplusplus
 extern "C" {
@@ -26,10 +35,18 @@
 	union {
 		__u64	off;	/* offset into file */
 		__u64	addr2;
+		struct {
+			__u32	cmd_op;
+			__u32	__pad1;
+		};
 	};
 	union {
 		__u64	addr;	/* pointer to buffer or iovecs */
 		__u64	splice_off_in;
+		struct {
+			__u32	level;
+			__u32	optname;
+		};
 	};
 	__u32	len;		/* buffer size or number of iovecs */
 	union {
@@ -50,6 +67,12 @@
 		__u32		unlink_flags;
 		__u32		hardlink_flags;
 		__u32		xattr_flags;
+		__u32		msg_ring_flags;
+		__u32		uring_cmd_flags;
+		__u32		waitid_flags;
+		__u32		futex_flags;
+		__u32		install_fd_flags;
+		__u32		nop_flags;
 	};
 	__u64	user_data;	/* data to be passed back at completion time */
 	/* pack this to avoid bogus arm OABI complaints */
@@ -64,9 +87,24 @@
 	union {
 		__s32	splice_fd_in;
 		__u32	file_index;
+		__u32	optlen;
+		struct {
+			__u16	addr_len;
+			__u16	__pad3[1];
+		};
 	};
-	__u64	addr3;
-	__u64	__pad2[1];
+	union {
+		struct {
+			__u64	addr3;
+			__u64	__pad2[1];
+		};
+		__u64	optval;
+		/*
+		 * If the ring is initialized with IORING_SETUP_SQE128, then
+		 * this field is used for 80 bytes of arbitrary command data
+		 */
+		__u8	cmd[0];
+	};
 };
 
 /*
@@ -78,7 +116,7 @@
  */
 #define IORING_FILE_INDEX_ALLOC		(~0U)
 
-enum {
+enum io_uring_sqe_flags_bit {
 	IOSQE_FIXED_FILE_BIT,
 	IOSQE_IO_DRAIN_BIT,
 	IOSQE_IO_LINK_BIT,
@@ -131,9 +169,36 @@
  * IORING_SQ_TASKRUN in the sq ring flags. Not valid with COOP_TASKRUN.
  */
 #define IORING_SETUP_TASKRUN_FLAG	(1U << 9)
-
 #define IORING_SETUP_SQE128		(1U << 10) /* SQEs are 128 byte */
 #define IORING_SETUP_CQE32		(1U << 11) /* CQEs are 32 byte */
+/*
+ * Only one task is allowed to submit requests
+ */
+#define IORING_SETUP_SINGLE_ISSUER	(1U << 12)
+
+/*
+ * Defer running task work to get events.
+ * Rather than running bits of task work whenever the task transitions
+ * try to do it just before it is needed.
+ */
+#define IORING_SETUP_DEFER_TASKRUN	(1U << 13)
+
+/*
+ * Application provides the memory for the rings
+ */
+#define IORING_SETUP_NO_MMAP		(1U << 14)
+
+/*
+ * Register the ring fd in itself for use with
+ * IORING_REGISTER_USE_REGISTERED_RING; return a registered fd index rather
+ * than an fd.
+ */
+#define IORING_SETUP_REGISTERED_FD_ONLY	(1U << 15)
+
+/*
+ * Removes indirection through the SQ index array.
+ */
+#define IORING_SETUP_NO_SQARRAY		(1U << 16)
 
 enum io_uring_op {
 	IORING_OP_NOP,
@@ -183,12 +248,32 @@
 	IORING_OP_GETXATTR,
 	IORING_OP_SOCKET,
 	IORING_OP_URING_CMD,
+	IORING_OP_SEND_ZC,
+	IORING_OP_SENDMSG_ZC,
+	IORING_OP_READ_MULTISHOT,
+	IORING_OP_WAITID,
+	IORING_OP_FUTEX_WAIT,
+	IORING_OP_FUTEX_WAKE,
+	IORING_OP_FUTEX_WAITV,
+	IORING_OP_FIXED_FD_INSTALL,
+	IORING_OP_FTRUNCATE,
+	IORING_OP_BIND,
+	IORING_OP_LISTEN,
 
 	/* this goes last, obviously */
 	IORING_OP_LAST,
 };
 
 /*
+ * sqe->uring_cmd_flags		top 8bits aren't available for userspace
+ * IORING_URING_CMD_FIXED	use registered buffer; pass this flag
+ *				along with setting sqe->buf_index.
+ */
+#define IORING_URING_CMD_FIXED	(1U << 0)
+#define IORING_URING_CMD_MASK	IORING_URING_CMD_FIXED
+
+
+/*
  * sqe->fsync_flags
  */
 #define IORING_FSYNC_DATASYNC	(1U << 0)
@@ -202,6 +287,7 @@
 #define IORING_TIMEOUT_REALTIME		(1U << 3)
 #define IORING_LINK_TIMEOUT_UPDATE	(1U << 4)
 #define IORING_TIMEOUT_ETIME_SUCCESS	(1U << 5)
+#define IORING_TIMEOUT_MULTISHOT	(1U << 6)
 #define IORING_TIMEOUT_CLOCK_MASK	(IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME)
 #define IORING_TIMEOUT_UPDATE_MASK	(IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE)
 /*
@@ -220,10 +306,13 @@
  *
  * IORING_POLL_UPDATE		Update existing poll request, matching
  *				sqe->addr as the old user_data field.
+ *
+ * IORING_POLL_LEVEL		Level triggered poll.
  */
 #define IORING_POLL_ADD_MULTI	(1U << 0)
 #define IORING_POLL_UPDATE_EVENTS	(1U << 1)
 #define IORING_POLL_UPDATE_USER_DATA	(1U << 2)
+#define IORING_POLL_ADD_LEVEL		(1U << 3)
 
 /*
  * ASYNC_CANCEL flags.
@@ -232,31 +321,107 @@
  * IORING_ASYNC_CANCEL_FD	Key off 'fd' for cancelation rather than the
  *				request 'user_data'
  * IORING_ASYNC_CANCEL_ANY	Match any request
+ * IORING_ASYNC_CANCEL_FD_FIXED	'fd' passed in is a fixed descriptor
+ * IORING_ASYNC_CANCEL_USERDATA	Match on user_data, default for no other key
+ * IORING_ASYNC_CANCEL_OP	Match request based on opcode
  */
 #define IORING_ASYNC_CANCEL_ALL	(1U << 0)
 #define IORING_ASYNC_CANCEL_FD	(1U << 1)
-#define IORING_ASYNC_CANCEL_ANY (1U << 2)
+#define IORING_ASYNC_CANCEL_ANY	(1U << 2)
+#define IORING_ASYNC_CANCEL_FD_FIXED	(1U << 3)
+#define IORING_ASYNC_CANCEL_USERDATA	(1U << 4)
+#define IORING_ASYNC_CANCEL_OP	(1U << 5)
 
 /*
- * send/sendmsg and recv/recvmsg flags (sqe->addr2)
+ * send/sendmsg and recv/recvmsg flags (sqe->ioprio)
  *
  * IORING_RECVSEND_POLL_FIRST	If set, instead of first attempting to send
  *				or receive and arm poll if that yields an
  *				-EAGAIN result, arm poll upfront and skip
  *				the initial transfer attempt.
+ *
+ * IORING_RECV_MULTISHOT	Multishot recv. Sets IORING_CQE_F_MORE if
+ *				the handler will continue to report
+ *				CQEs on behalf of the same SQE.
+ *
+ * IORING_RECVSEND_FIXED_BUF	Use registered buffers, the index is stored in
+ *				the buf_index field.
+ *
+ * IORING_SEND_ZC_REPORT_USAGE
+ *				If set, SEND[MSG]_ZC should report
+ *				the zerocopy usage in cqe.res
+ *				for the IORING_CQE_F_NOTIF cqe.
+ *				0 is reported if zerocopy was actually possible.
+ *				IORING_NOTIF_USAGE_ZC_COPIED if data was copied
+ *				(at least partially).
+ *
+ * IORING_RECVSEND_BUNDLE	Used with IOSQE_BUFFER_SELECT. If set, send or
+ *				recv will grab as many buffers from the buffer
+ *				group ID given and send them all. The completion
+ *				result 	will be the number of buffers send, with
+ *				the starting buffer ID in cqe->flags as per
+ *				usual for provided buffer usage. The buffers
+ *				will be contiguous from the starting buffer ID.
  */
 #define IORING_RECVSEND_POLL_FIRST	(1U << 0)
+#define IORING_RECV_MULTISHOT		(1U << 1)
+#define IORING_RECVSEND_FIXED_BUF	(1U << 2)
+#define IORING_SEND_ZC_REPORT_USAGE	(1U << 3)
+#define IORING_RECVSEND_BUNDLE		(1U << 4)
+
+/*
+ * cqe.res for IORING_CQE_F_NOTIF if
+ * IORING_SEND_ZC_REPORT_USAGE was requested
+ *
+ * It should be treated as a flag, all other
+ * bits of cqe.res should be treated as reserved!
+ */
+#define IORING_NOTIF_USAGE_ZC_COPIED    (1U << 31)
 
 /*
  * accept flags stored in sqe->ioprio
  */
 #define IORING_ACCEPT_MULTISHOT	(1U << 0)
+#define IORING_ACCEPT_DONTWAIT	(1U << 1)
+#define IORING_ACCEPT_POLL_FIRST	(1U << 2)
+
+/*
+ * IORING_OP_MSG_RING command types, stored in sqe->addr
+ */
+enum io_uring_msg_ring_flags {
+	IORING_MSG_DATA,	/* pass sqe->len as 'res' and off as user_data */
+	IORING_MSG_SEND_FD,	/* send a registered fd to another ring */
+};
+
+/*
+ * IORING_OP_MSG_RING flags (sqe->msg_ring_flags)
+ *
+ * IORING_MSG_RING_CQE_SKIP	Don't post a CQE to the target ring. Not
+ *				applicable for IORING_MSG_DATA, obviously.
+ */
+#define IORING_MSG_RING_CQE_SKIP	(1U << 0)
+/* Pass through the flags from sqe->file_index to cqe->flags */
+#define IORING_MSG_RING_FLAGS_PASS	(1U << 1)
+
+/*
+ * IORING_OP_FIXED_FD_INSTALL flags (sqe->install_fd_flags)
+ *
+ * IORING_FIXED_FD_NO_CLOEXEC	Don't mark the fd as O_CLOEXEC
+ */
+#define IORING_FIXED_FD_NO_CLOEXEC	(1U << 0)
+
+/*
+ * IORING_OP_NOP flags (sqe->nop_flags)
+ *
+ * IORING_NOP_INJECT_RESULT	Inject result from sqe->result
+ */
+#define IORING_NOP_INJECT_RESULT	(1U << 0)
 
 /*
  * IO completion data structure (Completion Queue Entry)
  */
 struct io_uring_cqe {
-	__u64	user_data;	/* sqe->data submission passed back */
+	__u64	user_data;	/* sqe->user_data submission passed back */
 	__s32	res;		/* result code for this event */
 	__u32	flags;
 
@@ -273,14 +438,25 @@
  * IORING_CQE_F_BUFFER	If set, the upper 16 bits are the buffer ID
  * IORING_CQE_F_MORE	If set, parent SQE will generate more CQE entries
  * IORING_CQE_F_SOCK_NONEMPTY	If set, more data to read after socket recv
+ * IORING_CQE_F_NOTIF	Set for notification CQEs. Can be used to distinct
+ * 			them from sends.
+ * IORING_CQE_F_BUF_MORE If set, the buffer ID set in the completion will get
+ *			more completions. In other words, the buffer is being
+ *			partially consumed, and will be used by the kernel for
+ *			more completions. This is only set for buffers used via
+ *			the incremental buffer consumption, as provided by
+ *			a ring buffer setup with IOU_PBUF_RING_INC. For any
+ *			other provided buffer type, all completions with a
+ *			buffer passed back is automatically returned to the
+ *			application.
  */
 #define IORING_CQE_F_BUFFER		(1U << 0)
 #define IORING_CQE_F_MORE		(1U << 1)
 #define IORING_CQE_F_SOCK_NONEMPTY	(1U << 2)
+#define IORING_CQE_F_NOTIF		(1U << 3)
+#define IORING_CQE_F_BUF_MORE		(1U << 4)
 
-enum {
-	IORING_CQE_BUFFER_SHIFT		= 16,
-};
+#define IORING_CQE_BUFFER_SHIFT		16
 
 /*
  * Magic offsets for the application to mmap the data it needs
@@ -288,6 +464,9 @@
 #define IORING_OFF_SQ_RING		0ULL
 #define IORING_OFF_CQ_RING		0x8000000ULL
 #define IORING_OFF_SQES			0x10000000ULL
+#define IORING_OFF_PBUF_RING		0x80000000ULL
+#define IORING_OFF_PBUF_SHIFT		16
+#define IORING_OFF_MMAP_MASK		0xf8000000ULL
 
 /*
  * Filled with the offset for mmap(2)
@@ -301,7 +480,7 @@
 	__u32 dropped;
 	__u32 array;
 	__u32 resv1;
-	__u64 resv2;
+	__u64 user_addr;
 };
 
 /*
@@ -320,7 +499,7 @@
 	__u32 cqes;
 	__u32 flags;
 	__u32 resv1;
-	__u64 resv2;
+	__u64 user_addr;
 };
 
 /*
@@ -338,6 +517,7 @@
 #define IORING_ENTER_SQ_WAIT		(1U << 2)
 #define IORING_ENTER_EXT_ARG		(1U << 3)
 #define IORING_ENTER_REGISTERED_RING	(1U << 4)
+#define IORING_ENTER_ABS_TIMER		(1U << 5)
 
 /*
  * Passed in for io_uring_setup(2). Copied back with updated info on success
@@ -371,11 +551,14 @@
 #define IORING_FEAT_RSRC_TAGS		(1U << 10)
 #define IORING_FEAT_CQE_SKIP		(1U << 11)
 #define IORING_FEAT_LINKED_FILE		(1U << 12)
+#define IORING_FEAT_REG_REG_RING	(1U << 13)
+#define IORING_FEAT_RECVSEND_BUNDLE	(1U << 14)
+#define IORING_FEAT_MIN_TIMEOUT		(1U << 15)
 
 /*
  * io_uring_register(2) opcodes and arguments
  */
-enum {
+enum io_uring_register_op {
 	IORING_REGISTER_BUFFERS			= 0,
 	IORING_UNREGISTER_BUFFERS		= 1,
 	IORING_REGISTER_FILES			= 2,
@@ -411,12 +594,33 @@
 	IORING_REGISTER_PBUF_RING		= 22,
 	IORING_UNREGISTER_PBUF_RING		= 23,
 
+	/* sync cancelation API */
+	IORING_REGISTER_SYNC_CANCEL		= 24,
+
+	/* register a range of fixed file slots for automatic slot allocation */
+	IORING_REGISTER_FILE_ALLOC_RANGE	= 25,
+
+	/* return status information for a buffer group */
+	IORING_REGISTER_PBUF_STATUS		= 26,
+
+	/* set/clear busy poll settings */
+	IORING_REGISTER_NAPI			= 27,
+	IORING_UNREGISTER_NAPI			= 28,
+
+	IORING_REGISTER_CLOCK			= 29,
+
+	/* clone registered buffers from source ring to current ring */
+	IORING_REGISTER_CLONE_BUFFERS		= 30,
+
 	/* this goes last */
-	IORING_REGISTER_LAST
+	IORING_REGISTER_LAST,
+
+	/* flag added to the opcode to use a registered ring fd */
+	IORING_REGISTER_USE_REGISTERED_RING	= 1U << 31
 };
 
 /* io-wq worker categories */
-enum {
+enum io_wq_type {
 	IO_WQ_BOUND,
 	IO_WQ_UNBOUND,
 };
@@ -474,7 +678,7 @@
 	__u8 ops_len;	/* length of ops[] array below */
 	__u16 resv;
 	__u32 resv2[3];
-	struct io_uring_probe_op ops[0];
+	struct io_uring_probe_op ops[];
 };
 
 struct io_uring_restriction {
@@ -488,6 +692,21 @@
 	__u32 resv2[3];
 };
 
+struct io_uring_clock_register {
+	__u32	clockid;
+	__u32	__resv[3];
+};
+
+enum {
+	IORING_REGISTER_SRC_REGISTERED = 1,
+};
+
+struct io_uring_clone_buffers {
+	__u32	src_fd;
+	__u32	flags;
+	__u32	pad[6];
+};
+
 struct io_uring_buf {
 	__u64	addr;
 	__u32	len;
@@ -511,19 +730,56 @@
 	};
 };
 
+/*
+ * Flags for IORING_REGISTER_PBUF_RING.
+ *
+ * IOU_PBUF_RING_MMAP:	If set, kernel will allocate the memory for the ring.
+ *			The application must not set a ring_addr in struct
+ *			io_uring_buf_reg, instead it must subsequently call
+ *			mmap(2) with the offset set as:
+ *			IORING_OFF_PBUF_RING | (bgid << IORING_OFF_PBUF_SHIFT)
+ *			to get a virtual mapping for the ring.
+ * IOU_PBUF_RING_INC:	If set, buffers consumed from this buffer ring can be
+ *			consumed incrementally. Normally one (or more) buffers
+ *			are fully consumed. With incremental consumptions, it's
+ *			feasible to register big ranges of buffers, and each
+ *			use of it will consume only as much as it needs. This
+ *			requires that both the kernel and application keep
+ *			track of where the current read/recv index is at.
+ */
+enum io_uring_register_pbuf_ring_flags {
+	IOU_PBUF_RING_MMAP	= 1,
+	IOU_PBUF_RING_INC	= 2,
+};
+
 /* argument for IORING_(UN)REGISTER_PBUF_RING */
 struct io_uring_buf_reg {
 	__u64	ring_addr;
 	__u32	ring_entries;
 	__u16	bgid;
-	__u16	pad;
+	__u16	flags;
 	__u64	resv[3];
 };
 
+/* argument for IORING_REGISTER_PBUF_STATUS */
+struct io_uring_buf_status {
+	__u32	buf_group;	/* input */
+	__u32	head;		/* output */
+	__u32	resv[8];
+};
+
+/* argument for IORING_(UN)REGISTER_NAPI */
+struct io_uring_napi {
+	__u32	busy_poll_to;
+	__u8	prefer_busy_poll;
+	__u8	pad[3];
+	__u64	resv;
+};
+
 /*
  * io_uring_restriction->opcode values
  */
-enum {
+enum io_uring_register_restriction_op {
 	/* Allow an io_uring_register(2) opcode */
 	IORING_RESTRICTION_REGISTER_OP		= 0,
 
@@ -542,14 +798,49 @@
 struct io_uring_getevents_arg {
 	__u64	sigmask;
 	__u32	sigmask_sz;
-	__u32	pad;
+	__u32	min_wait_usec;
 	__u64	ts;
 };
 
 /*
- * accept flags stored in sqe->ioprio
+ * Argument for IORING_REGISTER_SYNC_CANCEL
  */
-#define IORING_ACCEPT_MULTISHOT	(1U << 0)
+struct io_uring_sync_cancel_reg {
+	__u64				addr;
+	__s32				fd;
+	__u32				flags;
+	struct __kernel_timespec	timeout;
+	__u8				opcode;
+	__u8				pad[7];
+	__u64				pad2[3];
+};
+
+/*
+ * Argument for IORING_REGISTER_FILE_ALLOC_RANGE
+ * The range is specified as [off, off + len)
+ */
+struct io_uring_file_index_range {
+	__u32	off;
+	__u32	len;
+	__u64	resv;
+};
+
+struct io_uring_recvmsg_out {
+	__u32 namelen;
+	__u32 controllen;
+	__u32 payloadlen;
+	__u32 flags;
+};
+
+/*
+ * Argument for IORING_OP_URING_CMD when file is a socket
+ */
+enum io_uring_socket_op {
+	SOCKET_URING_OP_SIOCINQ		= 0,
+	SOCKET_URING_OP_SIOCOUTQ,
+	SOCKET_URING_OP_GETSOCKOPT,
+	SOCKET_URING_OP_SETSOCKOPT,
+};
 
 #ifdef __cplusplus
 }

diff --git a/src/include/liburing/sanitize.h b/src/include/liburing/sanitize.h
new file mode 100644
index 0000000..9a53961
--- /dev/null
+++ b/src/include/liburing/sanitize.h

@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: MIT */
+#ifndef LIBURING_SANITIZE_H
+#define LIBURING_SANITIZE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct io_uring;
+struct iovec;
+
+#if defined(CONFIG_USE_SANITIZER)
+void liburing_sanitize_ring(struct io_uring *ring);
+void liburing_sanitize_address(const void *addr);
+void liburing_sanitize_region(const void *addr, unsigned int len);
+void liburing_sanitize_iovecs(const struct iovec *iovecs, unsigned nr);
+#else
+#define __maybe_unused	__attribute__((__unused__))
+static inline void liburing_sanitize_ring(struct io_uring __maybe_unused *ring)
+{
+}
+static inline void liburing_sanitize_address(const void __maybe_unused *addr)
+{
+}
+static inline void liburing_sanitize_region(const void __maybe_unused *addr,
+					    unsigned int __maybe_unused len)
+{
+}
+static inline void liburing_sanitize_iovecs(const struct iovec __maybe_unused *iovecs,
+					    unsigned __maybe_unused nr)
+{
+}
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif

diff --git a/src/int_flags.h b/src/int_flags.h
index 90505ec..7cfdd3c 100644
--- a/src/int_flags.h
+++ b/src/int_flags.h

@@ -2,8 +2,17 @@
 #ifndef LIBURING_INT_FLAGS
 #define LIBURING_INT_FLAGS
 
+#define INT_FLAGS_MASK		(IORING_ENTER_REGISTERED_RING)
+
 enum {
-	INT_FLAG_REG_RING	= 1,
+	INT_FLAG_REG_RING	= IORING_ENTER_REGISTERED_RING,
+	INT_FLAG_REG_REG_RING	= 1,
+	INT_FLAG_APP_MEM	= 2,
 };
 
+static inline int ring_enter_flags(struct io_uring *ring)
+{
+	return ring->int_flags & INT_FLAGS_MASK;
+}
+
 #endif

diff --git a/src/lib.h b/src/lib.h
index 6672cc5..ab1f095 100644
--- a/src/lib.h
+++ b/src/lib.h

@@ -6,52 +6,47 @@
 #include <string.h>
 #include <unistd.h>
 
-#define __INTERNAL__LIBURING_LIB_H
 #if defined(__x86_64__) || defined(__i386__)
-	#include "arch/x86/lib.h"
+#include "arch/x86/lib.h"
+#elif defined(__aarch64__)
+#include "arch/aarch64/lib.h"
+#elif defined(__riscv) && __riscv_xlen == 64
+#include "arch/riscv64/lib.h"
 #else
-	/*
-	 * We don't have nolibc support for this arch. Must use libc!
-	 */
-	#ifdef CONFIG_NOLIBC
-		#error "This arch doesn't support building liburing without libc"
-	#endif
-	/* libc wrappers. */
-	#include "arch/generic/lib.h"
+/*
+ * We don't have nolibc support for this arch. Must use libc!
+ */
+#ifdef CONFIG_NOLIBC
+#error "This arch doesn't support building liburing without libc"
 #endif
-#undef __INTERNAL__LIBURING_LIB_H
+/* libc wrappers. */
+#include "arch/generic/lib.h"
+#endif
 
 
 #ifndef offsetof
-	#define offsetof(TYPE, FIELD) ((size_t) &((TYPE *)0)->FIELD)
+#define offsetof(TYPE, FIELD) ((size_t) &((TYPE *)0)->FIELD)
 #endif
 
 #ifndef container_of
-	#define container_of(PTR, TYPE, FIELD) ({			\
-		__typeof__(((TYPE *)0)->FIELD) *__FIELD_PTR = (PTR);	\
-		(TYPE *)((char *) __FIELD_PTR - offsetof(TYPE, FIELD));	\
-	})
+#define container_of(PTR, TYPE, FIELD) ({			\
+	__typeof__(((TYPE *)0)->FIELD) *__FIELD_PTR = (PTR);	\
+	(TYPE *)((char *) __FIELD_PTR - offsetof(TYPE, FIELD));	\
+})
 #endif
 
+#define __maybe_unused		__attribute__((__unused__))
+#define __hot			__attribute__((__hot__))
+#define __cold			__attribute__((__cold__))
+
+#ifdef CONFIG_NOLIBC
+void *__uring_memset(void *s, int c, size_t n);
 void *__uring_malloc(size_t len);
 void __uring_free(void *p);
 
-static inline void *uring_malloc(size_t len)
-{
-#ifdef CONFIG_NOLIBC
-	return __uring_malloc(len);
-#else
-	return malloc(len);
+#define malloc(LEN)		__uring_malloc(LEN)
+#define free(PTR)		__uring_free(PTR)
+#define memset(PTR, C, LEN)	__uring_memset(PTR, C, LEN)
 #endif
-}
-
-static inline void uring_free(void *ptr)
-{
-#ifdef CONFIG_NOLIBC
-	__uring_free(ptr);
-#else
-	free(ptr);
-#endif
-}
 
 #endif /* #ifndef LIBURING_LIB_H */

diff --git a/src/liburing-ffi.map b/src/liburing-ffi.map
new file mode 100644
index 0000000..02766e1
--- /dev/null
+++ b/src/liburing-ffi.map

@@ -0,0 +1,217 @@
+LIBURING_2.4 {
+	global:
+		io_uring_get_probe;
+		io_uring_get_probe_ring;
+		io_uring_free_probe;
+		io_uring_get_sqe;
+		io_uring_peek_batch_cqe;
+		io_uring_queue_exit;
+		io_uring_queue_init;
+		io_uring_queue_init_params;
+		io_uring_queue_mmap;
+		io_uring_register_buffers;
+		io_uring_register_eventfd;
+		io_uring_register_eventfd_async;
+		io_uring_register_files;
+		io_uring_register_files_update;
+		io_uring_register_personality;
+		io_uring_register_probe;
+		io_uring_ring_dontfork;
+		io_uring_submit;
+		io_uring_submit_and_wait;
+		io_uring_unregister_buffers;
+		io_uring_unregister_eventfd;
+		io_uring_unregister_files;
+		io_uring_unregister_personality;
+		io_uring_wait_cqe_timeout;
+		io_uring_wait_cqes;
+
+		__io_uring_get_cqe;
+		__io_uring_sqring_wait;
+
+		io_uring_mlock_size_params;
+		io_uring_mlock_size;
+		io_uring_register_buffers_tags;
+		io_uring_register_buffers_update_tag;
+		io_uring_register_files_tags;
+		io_uring_register_files_update_tag;
+		io_uring_register_iowq_aff;
+		io_uring_unregister_iowq_aff;
+		io_uring_register_iowq_max_workers;
+
+		io_uring_submit_and_wait_timeout;
+		io_uring_register_ring_fd;
+		io_uring_unregister_ring_fd;
+		io_uring_register_files_sparse;
+		io_uring_register_buffers_sparse;
+		io_uring_register_buf_ring;
+		io_uring_unregister_buf_ring;
+		io_uring_close_ring_fd;
+		io_uring_setup_buf_ring;
+		io_uring_free_buf_ring;
+
+		io_uring_register_sync_cancel;
+		io_uring_register_file_alloc_range;
+		io_uring_enter;
+		io_uring_enter2;
+		io_uring_setup;
+		io_uring_register;
+		io_uring_get_events;
+		io_uring_submit_and_get_events;
+
+		io_uring_major_version;
+		io_uring_minor_version;
+		io_uring_check_version;
+
+		io_uring_peek_cqe;
+		io_uring_prep_timeout_update;
+		io_uring_buf_ring_init;
+		io_uring_prep_mkdirat;
+		io_uring_prep_recv_multishot;
+		io_uring_cq_advance;
+		io_uring_prep_multishot_accept;
+		io_uring_prep_fallocate;
+		io_uring_prep_link_timeout;
+		io_uring_prep_fsync;
+		io_uring_prep_openat_direct;
+		io_uring_prep_multishot_accept_direct;
+		io_uring_opcode_supported;
+		io_uring_prep_madvise;
+		io_uring_prep_send_set_addr;
+		io_uring_recvmsg_payload_length;
+		io_uring_prep_readv2;
+		io_uring_prep_msg_ring;
+		io_uring_prep_rename;
+		io_uring_prep_fadvise;
+		io_uring_prep_send_zc;
+		io_uring_buf_ring_advance;
+		io_uring_cqe_get_data;
+		io_uring_prep_symlinkat;
+		io_uring_prep_writev;
+		io_uring_cq_eventfd_toggle;
+		io_uring_prep_provide_buffers;
+		io_uring_cq_has_overflow;
+		io_uring_prep_cancel_fd;
+		io_uring_prep_socket;
+		io_uring_prep_close_direct;
+		io_uring_recvmsg_name;
+		io_uring_prep_timeout_remove;
+		io_uring_sqring_wait;
+		io_uring_cq_eventfd_enabled;
+		io_uring_prep_remove_buffers;
+		io_uring_prep_tee;
+		io_uring_prep_accept_direct;
+		io_uring_prep_nop;
+		io_uring_prep_getxattr;
+		io_uring_prep_link;
+		io_uring_prep_cancel;
+		io_uring_prep_readv;
+		io_uring_prep_connect;
+		io_uring_cq_ready;
+		io_uring_enable_rings;
+		io_uring_prep_shutdown;
+		io_uring_prep_openat;
+		io_uring_sq_space_left;
+		io_uring_recvmsg_payload;
+		io_uring_prep_send;
+		io_uring_buf_ring_add;
+		io_uring_prep_send_zc_fixed;
+		io_uring_prep_epoll_ctl;
+		io_uring_recvmsg_cmsg_firsthdr;
+		io_uring_prep_socket_direct;
+		io_uring_buf_ring_cq_advance;
+		__io_uring_buf_ring_cq_advance;
+		io_uring_prep_mkdir;
+		io_uring_wait_cqe_nr;
+		io_uring_prep_unlink;
+		io_uring_prep_writev2;
+		io_uring_prep_openat2_direct;
+		io_uring_sqe_set_flags;
+		io_uring_sqe_set_data;
+		io_uring_prep_accept;
+		io_uring_prep_poll_update;
+		io_uring_prep_splice;
+		io_uring_prep_poll_multishot;
+		io_uring_prep_symlink;
+		io_uring_sqe_set_data64;
+		io_uring_prep_cancel64;
+		io_uring_prep_fsetxattr;
+		io_uring_prep_recvmsg_multishot;
+		io_uring_cqe_seen;
+		io_uring_prep_sendmsg_zc;
+		io_uring_prep_read;
+		io_uring_prep_statx;
+		io_uring_prep_sendmsg;
+		io_uring_prep_unlinkat;
+		io_uring_prep_setxattr;
+		io_uring_cqe_get_data64;
+		io_uring_prep_renameat;
+		io_uring_prep_poll_remove;
+		io_uring_prep_close;
+		io_uring_sq_ready;
+		io_uring_prep_files_update;
+		io_uring_wait_cqe;
+		io_uring_prep_fgetxattr;
+		io_uring_prep_socket_direct_alloc;
+		io_uring_prep_sync_file_range;
+		io_uring_prep_read_fixed;
+		io_uring_prep_openat2;
+		io_uring_prep_recvmsg;
+		io_uring_recvmsg_cmsg_nexthdr;
+		io_uring_recvmsg_validate;
+		io_uring_prep_rw;
+		io_uring_prep_timeout;
+		io_uring_prep_linkat;
+		io_uring_prep_write_fixed;
+		io_uring_prep_poll_add;
+		io_uring_buf_ring_mask;
+		io_uring_register_restrictions;
+		io_uring_prep_write;
+		io_uring_prep_recv;
+		io_uring_prep_msg_ring_cqe_flags;
+		io_uring_prep_msg_ring_fd;
+		io_uring_prep_msg_ring_fd_alloc;
+		io_uring_prep_sendto;
+		io_uring_register_napi;		/* Added in 2.6. */
+		io_uring_unregister_napi;	/* Added in 2.6. */
+	local:
+		*;
+};
+
+LIBURING_2.5 {
+	global:
+		io_uring_queue_init_mem;
+		io_uring_prep_cmd_sock;		/* Added in 2.5,
+						   exported in 2.6. */
+		io_uring_prep_read_multishot;	/* Added in 2.6. */
+		io_uring_prep_waitid;		/* Added in 2.6. */
+		io_uring_prep_futex_wake;	/* Added in 2.6. */
+		io_uring_prep_futex_wait;	/* Added in 2.6. */
+		io_uring_prep_futex_waitv;	/* Added in 2.6. */
+} LIBURING_2.4;
+
+LIBURING_2.6 {
+	global:
+		io_uring_prep_fixed_fd_install;
+		io_uring_buf_ring_available;
+		io_uring_prep_ftruncate;
+		io_uring_prep_send_bundle;
+} LIBURING_2.5;
+
+LIBURING_2.7 {
+		io_uring_prep_fadvise64;
+		io_uring_prep_madvise64;
+		io_uring_prep_bind;
+		io_uring_prep_listen;
+} LIBURING_2.6;
+
+LIBURING_2.8 {
+	global:
+		io_uring_register_clock;
+		io_uring_submit_and_wait_min_timeout;
+		io_uring_wait_cqes_min_timeout;
+		io_uring_clone_buffers;
+		io_uring_prep_cmd_discard;
+		io_uring_prep_open;
+		io_uring_prep_open_direct;
+} LIBURING_2.7;

diff --git a/src/liburing.map b/src/liburing.map
index 879f791..c13cd36 100644
--- a/src/liburing.map
+++ b/src/liburing.map

@@ -55,3 +55,51 @@
 		io_uring_register_buf_ring;
 		io_uring_unregister_buf_ring;
 } LIBURING_2.1;
+
+LIBURING_2.3 {
+	global:
+		io_uring_register_sync_cancel;
+		io_uring_register_file_alloc_range;
+		io_uring_enter;
+		io_uring_enter2;
+		io_uring_setup;
+		io_uring_register;
+		io_uring_get_events;
+		io_uring_submit_and_get_events;
+} LIBURING_2.2;
+
+LIBURING_2.4 {
+	global:
+		io_uring_major_version;
+		io_uring_minor_version;
+		io_uring_check_version;
+
+		io_uring_close_ring_fd;
+		io_uring_enable_rings;
+		io_uring_register_restrictions;
+		io_uring_setup_buf_ring;
+		io_uring_free_buf_ring;
+} LIBURING_2.3;
+
+LIBURING_2.5 {
+	global:
+		io_uring_queue_init_mem;
+} LIBURING_2.4;
+
+LIBURING_2.6 {
+	global:
+		io_uring_buf_ring_head;
+		io_uring_register_napi;
+		io_uring_unregister_napi;
+} LIBURING_2.5;
+
+LIBURING_2.7 {
+} LIBURING_2.6;
+
+LIBURING_2.8 {
+	global:
+		io_uring_register_clock;
+		io_uring_submit_and_wait_min_timeout;
+		io_uring_wait_cqes_min_timeout;
+		io_uring_clone_buffers;
+} LIBURING_2.7;

diff --git a/src/nolibc.c b/src/nolibc.c
index 9a04ead..ac81575 100644
--- a/src/nolibc.c
+++ b/src/nolibc.c

@@ -7,14 +7,21 @@
 #include "lib.h"
 #include "syscall.h"
 
-void *memset(void *s, int c, size_t n)
+void *__uring_memset(void *s, int c, size_t n)
 {
 	size_t i;
 	unsigned char *p = s;
 
-	for (i = 0; i < n; i++)
+	for (i = 0; i < n; i++) {
 		p[i] = (unsigned char) c;
 
+		/*
+		 * An empty inline ASM to avoid auto-vectorization
+		 * because it's too bloated for liburing.
+		 */
+		__asm__ volatile ("");
+	}
+
 	return s;
 }
 

diff --git a/src/queue.c b/src/queue.c
index ce0ecf6..dc79072 100644
--- a/src/queue.c
+++ b/src/queue.c

@@ -5,6 +5,7 @@
 #include "syscall.h"
 #include "liburing.h"
 #include "int_flags.h"
+#include "liburing/sanitize.h"
 #include "liburing/compat.h"
 #include "liburing/io_uring.h"
 
@@ -12,9 +13,15 @@
  * Returns true if we're not using SQ thread (thus nobody submits but us)
  * or if IORING_SQ_NEED_WAKEUP is set, so submit thread must be explicitly
  * awakened. For the latter case, we set the thread wakeup flag.
+ * If no SQEs are ready for submission, returns false.
  */
-static inline bool sq_ring_needs_enter(struct io_uring *ring, unsigned *flags)
+static inline bool sq_ring_needs_enter(struct io_uring *ring,
+				       unsigned submit,
+				       unsigned *flags)
 {
+	if (!submit)
+		return false;
+
 	if (!(ring->flags & IORING_SETUP_SQPOLL))
 		return true;
 
@@ -49,6 +56,7 @@
 	unsigned wait_nr;
 	unsigned get_flags;
 	int sz;
+	int has_ts;
 	void *arg;
 };
 
@@ -58,52 +66,65 @@
 {
 	struct io_uring_cqe *cqe = NULL;
 	bool looped = false;
-	int err;
+	int err = 0;
 
 	do {
 		bool need_enter = false;
-		unsigned flags = 0;
+		unsigned flags = ring_enter_flags(ring);
 		unsigned nr_available;
 		int ret;
 
-		err = __io_uring_peek_cqe(ring, &cqe, &nr_available);
-		if (err)
+		ret = __io_uring_peek_cqe(ring, &cqe, &nr_available);
+		if (ret) {
+			if (!err)
+				err = ret;
 			break;
+		}
 		if (!cqe && !data->wait_nr && !data->submit) {
 			/*
-			 * If we already looped once, we already entererd
+			 * If we already looped once, we already entered
 			 * the kernel. Since there's nothing to submit or
 			 * wait for, don't keep retrying.
 			 */
 			if (looped || !cq_ring_needs_enter(ring)) {
-				err = -EAGAIN;
+				if (!err)
+					err = -EAGAIN;
 				break;
 			}
 			need_enter = true;
 		}
 		if (data->wait_nr > nr_available || need_enter) {
-			flags = IORING_ENTER_GETEVENTS | data->get_flags;
+			flags |= IORING_ENTER_GETEVENTS | data->get_flags;
 			need_enter = true;
 		}
-		if (data->submit && sq_ring_needs_enter(ring, &flags))
+		if (sq_ring_needs_enter(ring, data->submit, &flags))
 			need_enter = true;
 		if (!need_enter)
 			break;
+		if (looped && data->has_ts) {
+			struct io_uring_getevents_arg *arg = data->arg;
 
-		if (ring->int_flags & INT_FLAG_REG_RING)
-			flags |= IORING_ENTER_REGISTERED_RING;
-		ret = ____sys_io_uring_enter2(ring->enter_ring_fd, data->submit,
-					      data->wait_nr, flags, data->arg,
-					      data->sz);
+			if (!cqe && arg->ts && !err)
+				err = -ETIME;
+			break;
+		}
+
+		ret = __sys_io_uring_enter2(ring->enter_ring_fd, data->submit,
+					    data->wait_nr, flags, data->arg,
+					    data->sz);
 		if (ret < 0) {
-			err = ret;
+			if (!err)
+				err = ret;
 			break;
 		}
 
 		data->submit -= ret;
 		if (cqe)
 			break;
-		looped = true;
+		if (!looped) {
+			looped = true;
+			err = ret;
+		}
 	} while (1);
 
 	*cqe_ptr = cqe;
@@ -124,6 +145,13 @@
 	return _io_uring_get_cqe(ring, cqe_ptr, &data);
 }
 
+int io_uring_get_events(struct io_uring *ring)
+{
+	int flags = IORING_ENTER_GETEVENTS | ring_enter_flags(ring);
+
+	return __sys_io_uring_enter(ring->enter_ring_fd, 0, 0, flags, NULL);
+}
+
 /*
  * Fill in an array of IO completions up to count, if any are available.
  * Returns the amount of IO completions filled.
@@ -142,7 +170,7 @@
 	ready = io_uring_cq_ready(ring);
 	if (ready) {
 		unsigned head = *ring->cq.khead;
-		unsigned mask = *ring->cq.kring_mask;
+		unsigned mask = ring->cq.ring_mask;
 		unsigned last;
 		int i = 0;
 
@@ -155,19 +183,14 @@
 	}
 
 	if (overflow_checked)
-		goto done;
+		return 0;
 
 	if (cq_ring_needs_flush(ring)) {
-		int flags = IORING_ENTER_GETEVENTS;
-
-		if (ring->int_flags & INT_FLAG_REG_RING)
-			flags |= IORING_ENTER_REGISTERED_RING;
-		____sys_io_uring_enter(ring->enter_ring_fd, 0, 0, flags, NULL);
+		io_uring_get_events(ring);
 		overflow_checked = true;
 		goto again;
 	}
 
-done:
 	return 0;
 }
 
@@ -175,43 +198,29 @@
  * Sync internal state with kernel ring state on the SQ side. Returns the
  * number of pending items in the SQ ring, for the shared ring.
  */
-int __io_uring_flush_sq(struct io_uring *ring)
+static unsigned __io_uring_flush_sq(struct io_uring *ring)
 {
 	struct io_uring_sq *sq = &ring->sq;
-	const unsigned mask = *sq->kring_mask;
-	unsigned ktail = *sq->ktail;
-	unsigned to_submit = sq->sqe_tail - sq->sqe_head;
+	unsigned tail = sq->sqe_tail;
 
-	if (!to_submit)
-		goto out;
-
+	if (sq->sqe_head != tail) {
+		sq->sqe_head = tail;
+		/*
+		 * Ensure kernel sees the SQE updates before the tail update.
+		 */
+		if (!(ring->flags & IORING_SETUP_SQPOLL))
+			*sq->ktail = tail;
+		else
+			io_uring_smp_store_release(sq->ktail, tail);
+	}
 	/*
-	 * Fill in sqes that we have queued up, adding them to the kernel ring
-	 */
-	do {
-		sq->array[ktail & mask] = sq->sqe_head & mask;
-		ktail++;
-		sq->sqe_head++;
-	} while (--to_submit);
-
-	/*
-	 * Ensure that the kernel sees the SQE updates before it sees the tail
-	 * update.
-	 */
-	io_uring_smp_store_release(sq->ktail, ktail);
-out:
-	/*
-	 * This _may_ look problematic, as we're not supposed to be reading
-	 * SQ->head without acquire semantics. When we're in SQPOLL mode, the
-	 * kernel submitter could be updating this right now. For non-SQPOLL,
-	 * task itself does it, and there's no potential race. But even for
-	 * SQPOLL, the load is going to be potentially out-of-date the very
-	 * instant it's done, regardless or whether or not it's done
-	 * atomically. Worst case, we're going to be over-estimating what
-	 * we can submit. The point is, we need to be able to deal with this
-	 * situation regardless of any perceived atomicity.
-	 */
-	return ktail - *sq->khead;
+	* This load needs to be atomic, since sq->khead is written concurrently
+	* by the kernel, but it doesn't need to be load_acquire, since the
+	* kernel doesn't store to the submission queue; it advances khead just
+	* to indicate that it's finished reading the submission queue entries
+	* so they're available for us to write to.
+	*/
+	return tail - IO_URING_READ_ONCE(*sq->khead);
 }
 
 /*
@@ -222,6 +231,7 @@
 				  struct io_uring_cqe **cqe_ptr,
 				  unsigned wait_nr,
 				  struct __kernel_timespec *ts,
+				  unsigned int min_wait_usec,
 				  sigset_t *sigmask)
 {
 	struct io_uring_getevents_arg arg = {
@@ -233,9 +243,13 @@
 		.wait_nr	= wait_nr,
 		.get_flags	= IORING_ENTER_EXT_ARG,
 		.sz		= sizeof(arg),
+		.has_ts		= ts != NULL,
 		.arg		= &arg
 	};
 
+	if (min_wait_usec && ring->features & IORING_FEAT_MIN_TIMEOUT)
+		arg.min_wait_usec = min_wait_usec;
+
 	return _io_uring_get_cqe(ring, cqe_ptr, &data);
 }
 
@@ -288,7 +302,7 @@
 	if (ts) {
 		if (ring->features & IORING_FEAT_EXT_ARG)
 			return io_uring_wait_cqes_new(ring, cqe_ptr, wait_nr,
-							ts, sigmask);
+							ts, 0, sigmask);
 		to_submit = __io_uring_submit_timeout(ring, wait_nr, ts);
 		if (to_submit < 0)
 			return to_submit;
@@ -297,11 +311,20 @@
 	return __io_uring_get_cqe(ring, cqe_ptr, to_submit, wait_nr, sigmask);
 }
 
-int io_uring_submit_and_wait_timeout(struct io_uring *ring,
-				     struct io_uring_cqe **cqe_ptr,
-				     unsigned wait_nr,
-				     struct __kernel_timespec *ts,
-				     sigset_t *sigmask)
+int io_uring_wait_cqes_min_timeout(struct io_uring *ring,
+				   struct io_uring_cqe **cqe_ptr,
+				   unsigned wait_nr,
+				   struct __kernel_timespec *ts,
+				   unsigned int min_wait_usec, sigset_t *sigmask)
+{
+	return io_uring_wait_cqes_new(ring, cqe_ptr, wait_nr, ts, min_wait_usec,
+					sigmask);
+}
+
+static int __io_uring_submit_and_wait_timeout(struct io_uring *ring,
+			struct io_uring_cqe **cqe_ptr, unsigned wait_nr,
+			struct __kernel_timespec *ts,
+			unsigned int min_wait, sigset_t *sigmask)
 {
 	int to_submit;
 
@@ -310,6 +333,7 @@
 			struct io_uring_getevents_arg arg = {
 				.sigmask	= (unsigned long) sigmask,
 				.sigmask_sz	= _NSIG / 8,
+				.min_wait_usec	= min_wait,
 				.ts		= (unsigned long) ts
 			};
 			struct get_data data = {
@@ -317,6 +341,7 @@
 				.wait_nr	= wait_nr,
 				.get_flags	= IORING_ENTER_EXT_ARG,
 				.sz		= sizeof(arg),
+				.has_ts		= ts != NULL,
 				.arg		= &arg
 			};
 
@@ -331,6 +356,29 @@
 	return __io_uring_get_cqe(ring, cqe_ptr, to_submit, wait_nr, sigmask);
 }
 
+int io_uring_submit_and_wait_min_timeout(struct io_uring *ring,
+					 struct io_uring_cqe **cqe_ptr,
+					 unsigned wait_nr,
+					 struct __kernel_timespec *ts,
+					 unsigned min_wait,
+					 sigset_t *sigmask)
+{
+	if (!(ring->features & IORING_FEAT_MIN_TIMEOUT))
+		return -EINVAL;
+	return __io_uring_submit_and_wait_timeout(ring, cqe_ptr, wait_nr, ts,
+						  min_wait, sigmask);
+}
+
+int io_uring_submit_and_wait_timeout(struct io_uring *ring,
+				     struct io_uring_cqe **cqe_ptr,
+				     unsigned wait_nr,
+				     struct __kernel_timespec *ts,
+				     sigset_t *sigmask)
+{
+	return __io_uring_submit_and_wait_timeout(ring, cqe_ptr, wait_nr, ts, 0,
+						  sigmask);
+}
+
 /*
  * See io_uring_wait_cqes() - this function is the same, it just always uses
  * '1' as the wait_nr.
@@ -348,20 +396,20 @@
  * Returns number of sqes submitted
  */
 static int __io_uring_submit(struct io_uring *ring, unsigned submitted,
-			     unsigned wait_nr)
+			     unsigned wait_nr, bool getevents)
 {
-	unsigned flags;
+	bool cq_needs_enter = getevents || wait_nr || cq_ring_needs_enter(ring);
+	unsigned flags = ring_enter_flags(ring);
 	int ret;
 
-	flags = 0;
-	if (sq_ring_needs_enter(ring, &flags) || wait_nr) {
-		if (wait_nr || (ring->flags & IORING_SETUP_IOPOLL))
-			flags |= IORING_ENTER_GETEVENTS;
-		if (ring->int_flags & INT_FLAG_REG_RING)
-			flags |= IORING_ENTER_REGISTERED_RING;
+	liburing_sanitize_ring(ring);
 
-		ret = ____sys_io_uring_enter(ring->enter_ring_fd, submitted,
-						wait_nr, flags, NULL);
+	if (sq_ring_needs_enter(ring, submitted, &flags) || cq_needs_enter) {
+		if (cq_needs_enter)
+			flags |= IORING_ENTER_GETEVENTS;
+
+		ret = __sys_io_uring_enter(ring->enter_ring_fd, submitted,
+					   wait_nr, flags, NULL);
 	} else
 		ret = submitted;
 
@@ -370,7 +418,7 @@
 
 static int __io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr)
 {
-	return __io_uring_submit(ring, __io_uring_flush_sq(ring), wait_nr);
+	return __io_uring_submit(ring, __io_uring_flush_sq(ring), wait_nr, false);
 }
 
 /*
@@ -393,6 +441,11 @@
 	return __io_uring_submit_and_wait(ring, wait_nr);
 }
 
+int io_uring_submit_and_get_events(struct io_uring *ring)
+{
+	return __io_uring_submit(ring, __io_uring_flush_sq(ring), 0, true);
+}
+
 #ifdef LIBURING_INTERNAL
 struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring)
 {
@@ -402,10 +455,7 @@
 
 int __io_uring_sqring_wait(struct io_uring *ring)
 {
-	int flags = IORING_ENTER_SQ_WAIT;
+	int flags = IORING_ENTER_SQ_WAIT | ring_enter_flags(ring);
 
-	if (ring->int_flags & INT_FLAG_REG_RING)
-		flags |= IORING_ENTER_REGISTERED_RING;
-
-	return  ____sys_io_uring_enter(ring->enter_ring_fd, 0, 0, flags, NULL);
+	return __sys_io_uring_enter(ring->enter_ring_fd, 0, 0, flags, NULL);
 }

diff --git a/src/register.c b/src/register.c
index 993c450..4fa2255 100644
--- a/src/register.c
+++ b/src/register.c

@@ -7,12 +7,32 @@
 #include "int_flags.h"
 #include "liburing/compat.h"
 #include "liburing/io_uring.h"
+#include "liburing/sanitize.h"
+
+static inline int do_register(struct io_uring *ring, unsigned int opcode,
+			      const void *arg, unsigned int nr_args)
+{
+	int fd;
+
+	liburing_sanitize_address(arg);
+
+	if (ring->int_flags & INT_FLAG_REG_REG_RING) {
+		opcode |= IORING_REGISTER_USE_REGISTERED_RING;
+		fd = ring->enter_ring_fd;
+	} else {
+		fd = ring->ring_fd;
+	}
+
+	return __sys_io_uring_register(fd, opcode, arg, nr_args);
+}
 
 int io_uring_register_buffers_update_tag(struct io_uring *ring, unsigned off,
 					 const struct iovec *iovecs,
 					 const __u64 *tags,
 					 unsigned nr)
 {
+	liburing_sanitize_iovecs(iovecs, nr);
+
 	struct io_uring_rsrc_update2 up = {
 		.offset	= off,
 		.data = (unsigned long)iovecs,
@@ -20,9 +40,7 @@
 		.nr = nr,
 	};
 
-	return ____sys_io_uring_register(ring->ring_fd,
-					 IORING_REGISTER_BUFFERS_UPDATE, &up,
-					 sizeof(up));
+	return do_register(ring, IORING_REGISTER_BUFFERS_UPDATE, &up, sizeof(up));
 }
 
 int io_uring_register_buffers_tags(struct io_uring *ring,
@@ -30,15 +48,15 @@
 				   const __u64 *tags,
 				   unsigned nr)
 {
+	liburing_sanitize_iovecs(iovecs, nr);
+
 	struct io_uring_rsrc_register reg = {
 		.nr = nr,
 		.data = (unsigned long)iovecs,
 		.tags = (unsigned long)tags,
 	};
 
-	return ____sys_io_uring_register(ring->ring_fd,
-					 IORING_REGISTER_BUFFERS2, &reg,
-					 sizeof(reg));
+	return do_register(ring, IORING_REGISTER_BUFFERS2, &reg, sizeof(reg));
 }
 
 int io_uring_register_buffers_sparse(struct io_uring *ring, unsigned nr)
@@ -48,34 +66,29 @@
 		.nr = nr,
 	};
 
-	return ____sys_io_uring_register(ring->ring_fd,
-					 IORING_REGISTER_BUFFERS2, &reg,
-					 sizeof(reg));
+	return do_register(ring, IORING_REGISTER_BUFFERS2, &reg, sizeof(reg));
 }
 
 int io_uring_register_buffers(struct io_uring *ring, const struct iovec *iovecs,
 			      unsigned nr_iovecs)
 {
-	int ret;
+	liburing_sanitize_iovecs(iovecs, nr_iovecs);
 
-	ret = ____sys_io_uring_register(ring->ring_fd, IORING_REGISTER_BUFFERS,
-					iovecs, nr_iovecs);
-	return (ret < 0) ? ret : 0;
+	return do_register(ring, IORING_REGISTER_BUFFERS, iovecs, nr_iovecs);
 }
 
 int io_uring_unregister_buffers(struct io_uring *ring)
 {
-	int ret;
-
-	ret = ____sys_io_uring_register(ring->ring_fd,
-					IORING_UNREGISTER_BUFFERS, NULL, 0);
-	return (ret < 0) ? ret : 0;
+	return do_register(ring, IORING_UNREGISTER_BUFFERS, NULL, 0);
 }
 
 int io_uring_register_files_update_tag(struct io_uring *ring, unsigned off,
 					const int *files, const __u64 *tags,
 					unsigned nr_files)
 {
+	liburing_sanitize_address(files);
+	liburing_sanitize_address(tags);
+
 	struct io_uring_rsrc_update2 up = {
 		.offset	= off,
 		.data = (unsigned long)files,
@@ -83,9 +96,7 @@
 		.nr = nr_files,
 	};
 
-	return ____sys_io_uring_register(ring->ring_fd,
-					 IORING_REGISTER_FILES_UPDATE2, &up,
-					 sizeof(up));
+	return do_register(ring, IORING_REGISTER_FILES_UPDATE2, &up, sizeof(up));
 }
 
 /*
@@ -96,16 +107,16 @@
  * Returns number of files updated on success, -ERROR on failure.
  */
 int io_uring_register_files_update(struct io_uring *ring, unsigned off,
-				   int *files, unsigned nr_files)
+				   const int *files, unsigned nr_files)
 {
+	liburing_sanitize_address(files);
+
 	struct io_uring_files_update up = {
 		.offset	= off,
 		.fds	= (unsigned long) files,
 	};
 
-	return ____sys_io_uring_register(ring->ring_fd,
-					 IORING_REGISTER_FILES_UPDATE, &up,
-					 nr_files);
+	return do_register(ring, IORING_REGISTER_FILES_UPDATE, &up, nr_files);
 }
 
 static int increase_rlimit_nofile(unsigned nr)
@@ -134,9 +145,7 @@
 	int ret, did_increase = 0;
 
 	do {
-		ret = ____sys_io_uring_register(ring->ring_fd,
-						IORING_REGISTER_FILES2, &reg,
-						sizeof(reg));
+		ret = do_register(ring, IORING_REGISTER_FILES2, &reg, sizeof(reg));
 		if (ret >= 0)
 			break;
 		if (ret == -EMFILE && !did_increase) {
@@ -153,6 +162,9 @@
 int io_uring_register_files_tags(struct io_uring *ring, const int *files,
 				 const __u64 *tags, unsigned nr)
 {
+	liburing_sanitize_address(files);
+	liburing_sanitize_address(tags);
+
 	struct io_uring_rsrc_register reg = {
 		.nr = nr,
 		.data = (unsigned long)files,
@@ -161,9 +173,7 @@
 	int ret, did_increase = 0;
 
 	do {
-		ret = ____sys_io_uring_register(ring->ring_fd,
-						IORING_REGISTER_FILES2, &reg,
-						sizeof(reg));
+		ret = do_register(ring, IORING_REGISTER_FILES2, &reg, sizeof(reg));
 		if (ret >= 0)
 			break;
 		if (ret == -EMFILE && !did_increase) {
@@ -182,10 +192,10 @@
 {
 	int ret, did_increase = 0;
 
+	liburing_sanitize_address(files);
+
 	do {
-		ret = ____sys_io_uring_register(ring->ring_fd,
-						IORING_REGISTER_FILES, files,
-						nr_files);
+		ret = do_register(ring, IORING_REGISTER_FILES, files, nr_files);
 		if (ret >= 0)
 			break;
 		if (ret == -EMFILE && !did_increase) {
@@ -201,100 +211,69 @@
 
 int io_uring_unregister_files(struct io_uring *ring)
 {
-	int ret;
-
-	ret = ____sys_io_uring_register(ring->ring_fd, IORING_UNREGISTER_FILES,
-					NULL, 0);
-	return (ret < 0) ? ret : 0;
+	return do_register(ring, IORING_UNREGISTER_FILES, NULL, 0);
 }
 
 int io_uring_register_eventfd(struct io_uring *ring, int event_fd)
 {
-	int ret;
-
-	ret = ____sys_io_uring_register(ring->ring_fd, IORING_REGISTER_EVENTFD,
-					&event_fd, 1);
-	return (ret < 0) ? ret : 0;
+	return do_register(ring, IORING_REGISTER_EVENTFD, &event_fd, 1);
 }
 
 int io_uring_unregister_eventfd(struct io_uring *ring)
 {
-	int ret;
-
-	ret = ____sys_io_uring_register(ring->ring_fd,
-					IORING_UNREGISTER_EVENTFD, NULL, 0);
-	return (ret < 0) ? ret : 0;
+	return do_register(ring, IORING_UNREGISTER_EVENTFD, NULL, 0);
 }
 
 int io_uring_register_eventfd_async(struct io_uring *ring, int event_fd)
 {
-	int ret;
-
-	ret = ____sys_io_uring_register(ring->ring_fd,
-					IORING_REGISTER_EVENTFD_ASYNC,
-					&event_fd, 1);
-	return (ret < 0) ? ret : 0;
+	return do_register(ring, IORING_REGISTER_EVENTFD_ASYNC, &event_fd, 1);
 }
 
 int io_uring_register_probe(struct io_uring *ring, struct io_uring_probe *p,
 			    unsigned int nr_ops)
 {
-	int ret;
-
-	ret = ____sys_io_uring_register(ring->ring_fd, IORING_REGISTER_PROBE, p,
-					nr_ops);
-	return (ret < 0) ? ret : 0;
+	return do_register(ring, IORING_REGISTER_PROBE, p, nr_ops);
 }
 
 int io_uring_register_personality(struct io_uring *ring)
 {
-	return ____sys_io_uring_register(ring->ring_fd,
-					 IORING_REGISTER_PERSONALITY, NULL, 0);
+	return do_register(ring, IORING_REGISTER_PERSONALITY, NULL, 0);
 }
 
 int io_uring_unregister_personality(struct io_uring *ring, int id)
 {
-	return ____sys_io_uring_register(ring->ring_fd,
-					 IORING_UNREGISTER_PERSONALITY, NULL,
-					 id);
+	return do_register(ring, IORING_UNREGISTER_PERSONALITY, NULL, id);
 }
 
 int io_uring_register_restrictions(struct io_uring *ring,
 				   struct io_uring_restriction *res,
 				   unsigned int nr_res)
 {
-	int ret;
-
-	ret = ____sys_io_uring_register(ring->ring_fd,
-					IORING_REGISTER_RESTRICTIONS, res,
-					nr_res);
-	return (ret < 0) ? ret : 0;
+	return do_register(ring, IORING_REGISTER_RESTRICTIONS, res, nr_res);
 }
 
 int io_uring_enable_rings(struct io_uring *ring)
 {
-	return ____sys_io_uring_register(ring->ring_fd,
-					 IORING_REGISTER_ENABLE_RINGS, NULL, 0);
+	return do_register(ring, IORING_REGISTER_ENABLE_RINGS, NULL, 0);
 }
 
 int io_uring_register_iowq_aff(struct io_uring *ring, size_t cpusz,
 			       const cpu_set_t *mask)
 {
-	return ____sys_io_uring_register(ring->ring_fd,
-					 IORING_REGISTER_IOWQ_AFF, mask, cpusz);
+	if (cpusz >= (1U << 31))
+		return -EINVAL;
+
+	return do_register(ring, IORING_REGISTER_IOWQ_AFF, mask, (int) cpusz);
 }
 
 int io_uring_unregister_iowq_aff(struct io_uring *ring)
 {
-	return  ____sys_io_uring_register(ring->ring_fd,
-					  IORING_UNREGISTER_IOWQ_AFF, NULL, 0);
+	return do_register(ring, IORING_UNREGISTER_IOWQ_AFF, NULL, 0);
 }
 
 int io_uring_register_iowq_max_workers(struct io_uring *ring, unsigned int *val)
 {
-	return ____sys_io_uring_register(ring->ring_fd,
-					 IORING_REGISTER_IOWQ_MAX_WORKERS, val,
-					 2);
+	return do_register(ring, IORING_REGISTER_IOWQ_MAX_WORKERS, val, 2);
 }
 
 int io_uring_register_ring_fd(struct io_uring *ring)
@@ -305,11 +284,16 @@
 	};
 	int ret;
 
-	ret = ____sys_io_uring_register(ring->ring_fd, IORING_REGISTER_RING_FDS,
-					&up, 1);
+	if (ring->int_flags & INT_FLAG_REG_RING)
+		return -EEXIST;
+
+	ret = do_register(ring, IORING_REGISTER_RING_FDS, &up, 1);
 	if (ret == 1) {
 		ring->enter_ring_fd = up.offset;
 		ring->int_flags |= INT_FLAG_REG_RING;
+		if (ring->features & IORING_FEAT_REG_REG_RING) {
+			ring->int_flags |= INT_FLAG_REG_REG_RING;
+		}
 	}
 	return ret;
 }
@@ -322,26 +306,105 @@
 	};
 	int ret;
 
-	ret = ____sys_io_uring_register(ring->ring_fd,
-					IORING_UNREGISTER_RING_FDS, &up, 1);
+	if (!(ring->int_flags & INT_FLAG_REG_RING))
+		return -EINVAL;
+
+	ret = do_register(ring, IORING_UNREGISTER_RING_FDS, &up, 1);
 	if (ret == 1) {
 		ring->enter_ring_fd = ring->ring_fd;
-		ring->int_flags &= ~INT_FLAG_REG_RING;
+		ring->int_flags &= ~(INT_FLAG_REG_RING | INT_FLAG_REG_REG_RING);
 	}
 	return ret;
 }
 
-int io_uring_register_buf_ring(struct io_uring *ring,
-			       struct io_uring_buf_reg *reg, unsigned int flags)
+int io_uring_close_ring_fd(struct io_uring *ring)
 {
-	return ____sys_io_uring_register(ring->ring_fd,
-					 IORING_REGISTER_PBUF_RING, reg, 1);
+	if (!(ring->features & IORING_FEAT_REG_REG_RING))
+		return -EOPNOTSUPP;
+	if (!(ring->int_flags & INT_FLAG_REG_RING))
+		return -EINVAL;
+	if (ring->ring_fd == -1)
+		return -EBADF;
+
+	__sys_close(ring->ring_fd);
+	ring->ring_fd = -1;
+	return 1;
+}
+
+int io_uring_register_buf_ring(struct io_uring *ring,
+			       struct io_uring_buf_reg *reg,
+			       unsigned int __maybe_unused flags)
+{
+	reg->flags |= flags;
+	return do_register(ring, IORING_REGISTER_PBUF_RING, reg, 1);
 }
 
 int io_uring_unregister_buf_ring(struct io_uring *ring, int bgid)
 {
 	struct io_uring_buf_reg reg = { .bgid = bgid };
 
-	return ____sys_io_uring_register(ring->ring_fd,
-					 IORING_UNREGISTER_PBUF_RING, &reg, 1);
+	return do_register(ring, IORING_UNREGISTER_PBUF_RING, &reg, 1);
+}
+
+int io_uring_buf_ring_head(struct io_uring *ring, int buf_group, uint16_t *head)
+{
+	liburing_sanitize_address(head);
+
+	struct io_uring_buf_status buf_status = {
+		.buf_group	= buf_group,
+	};
+	int ret;
+
+	ret = do_register(ring, IORING_REGISTER_PBUF_STATUS, &buf_status, 1);
+	if (ret)
+		return ret;
+	*head = buf_status.head;
+	return 0;
+}
+
+int io_uring_register_sync_cancel(struct io_uring *ring,
+				  struct io_uring_sync_cancel_reg *reg)
+{
+	return do_register(ring, IORING_REGISTER_SYNC_CANCEL, reg, 1);
+}
+
+int io_uring_register_file_alloc_range(struct io_uring *ring,
+					unsigned off, unsigned len)
+{
+	struct io_uring_file_index_range range = {
+		.off = off,
+		.len = len
+	};
+
+	return do_register(ring, IORING_REGISTER_FILE_ALLOC_RANGE, &range, 0);
+}
+
+int io_uring_register_napi(struct io_uring *ring, struct io_uring_napi *napi)
+{
+	return do_register(ring, IORING_REGISTER_NAPI, napi, 1);
+}
+
+int io_uring_unregister_napi(struct io_uring *ring, struct io_uring_napi *napi)
+{
+	return do_register(ring, IORING_UNREGISTER_NAPI, napi, 1);
+}
+
+int io_uring_register_clock(struct io_uring *ring,
+			    struct io_uring_clock_register *arg)
+{
+	return do_register(ring, IORING_REGISTER_CLOCK, arg, 0);
+}
+
+int io_uring_clone_buffers(struct io_uring *dst, struct io_uring *src)
+{
+	struct io_uring_clone_buffers buf = { .src_fd = src->ring_fd, };
+
+	if (src->int_flags & INT_FLAG_REG_REG_RING) {
+		buf.src_fd = src->enter_ring_fd;
+		buf.flags = IORING_REGISTER_SRC_REGISTERED;
+	} else {
+		buf.src_fd = src->ring_fd;
+	}
+
+	return do_register(dst, IORING_REGISTER_CLONE_BUFFERS, &buf, 1);
 }

diff --git a/src/sanitize.c b/src/sanitize.c
new file mode 100644
index 0000000..46391a6
--- /dev/null
+++ b/src/sanitize.c

@@ -0,0 +1,176 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "liburing/sanitize.h"
+
+#include <sanitizer/asan_interface.h>
+#include <stdlib.h>
+#include "liburing.h"
+
+static inline void sanitize_sqe_addr(struct io_uring_sqe *sqe)
+{
+	if (__asan_address_is_poisoned((void *) (unsigned long) sqe->addr) != 0) {
+		__asan_describe_address((void *) (unsigned long) sqe->addr);
+		exit(1);
+	}
+}
+static inline void sanitize_sqe_optval(struct io_uring_sqe *sqe)
+{
+	if (__asan_region_is_poisoned((void *) (unsigned long) sqe->optval, sqe->optlen) != 0) {
+		__asan_describe_address((void *) (unsigned long) sqe->optval);
+		exit(1);
+	}
+}
+static inline void sanitize_sqe_addr2(struct io_uring_sqe *sqe)
+{
+	if (__asan_address_is_poisoned((void *) (unsigned long) sqe->addr2) != 0) {
+		__asan_describe_address((void *) (unsigned long) sqe->addr2);
+		exit(1);
+	}
+}
+static inline void sanitize_sqe_addr3(struct io_uring_sqe *sqe)
+{
+	if (__asan_address_is_poisoned((void *) (unsigned long) sqe->addr3) != 0) {
+		__asan_describe_address((void *) (unsigned long) sqe->addr3);
+		exit(1);
+	}
+}
+static inline void sanitize_sqe_addr_and_add2(struct io_uring_sqe *sqe)
+{
+	sanitize_sqe_addr(sqe);
+	sanitize_sqe_addr2(sqe);
+}
+static inline void sanitize_sqe_addr_and_add3(struct io_uring_sqe *sqe)
+{
+	sanitize_sqe_addr(sqe);
+	sanitize_sqe_addr3(sqe);
+}
+static inline void sanitize_sqe_nop(struct io_uring_sqe *sqe)
+{
+}
+
+typedef void (*sanitize_sqe_handler)(struct io_uring_sqe *sqe);
+sanitize_sqe_handler sanitize_handlers[IORING_OP_LAST];
+bool sanitize_handlers_initialized = false;
+
+static inline void initialize_sanitize_handlers()
+{
+	if (sanitize_handlers_initialized)
+		return;
+
+	sanitize_handlers[IORING_OP_NOP] = sanitize_sqe_nop;
+	sanitize_handlers[IORING_OP_READV] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_WRITEV] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_FSYNC] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_READ_FIXED] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_WRITE_FIXED] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_POLL_ADD] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_POLL_REMOVE] = sanitize_sqe_nop;
+	sanitize_handlers[IORING_OP_SYNC_FILE_RANGE] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_SENDMSG] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_RECVMSG] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_TIMEOUT] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_TIMEOUT_REMOVE] = sanitize_sqe_nop;
+	sanitize_handlers[IORING_OP_ACCEPT] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_ASYNC_CANCEL] = sanitize_sqe_nop;
+	sanitize_handlers[IORING_OP_LINK_TIMEOUT] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_CONNECT] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_FALLOCATE] = sanitize_sqe_nop;
+	sanitize_handlers[IORING_OP_OPENAT] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_CLOSE] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_FILES_UPDATE] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_STATX] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_READ] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_WRITE] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_FADVISE] = sanitize_sqe_nop;
+	sanitize_handlers[IORING_OP_MADVISE] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_SEND] = sanitize_sqe_addr_and_add2;
+	sanitize_handlers[IORING_OP_RECV] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_OPENAT2] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_EPOLL_CTL] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_SPLICE] = sanitize_sqe_nop;
+	sanitize_handlers[IORING_OP_PROVIDE_BUFFERS] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_REMOVE_BUFFERS] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_TEE] = sanitize_sqe_nop;
+	sanitize_handlers[IORING_OP_SHUTDOWN] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_RENAMEAT] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_UNLINKAT] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_MKDIRAT] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_SYMLINKAT] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_LINKAT] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_MSG_RING] = sanitize_sqe_addr_and_add3;
+	sanitize_handlers[IORING_OP_FSETXATTR] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_SETXATTR] = sanitize_sqe_addr_and_add3;
+	sanitize_handlers[IORING_OP_FGETXATTR] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_GETXATTR] = sanitize_sqe_addr_and_add3;
+	sanitize_handlers[IORING_OP_SOCKET] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_URING_CMD] = sanitize_sqe_optval;
+	sanitize_handlers[IORING_OP_SEND_ZC] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_SENDMSG_ZC] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_READ_MULTISHOT] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_WAITID] = sanitize_sqe_addr_and_add2;
+	sanitize_handlers[IORING_OP_FUTEX_WAIT] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_FUTEX_WAKE] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_FUTEX_WAITV] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_FIXED_FD_INSTALL] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_FTRUNCATE] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_BIND] = sanitize_sqe_addr;
+	sanitize_handlers[IORING_OP_LISTEN] = sanitize_sqe_addr;
+	sanitize_handlers_initialized = true;
+}
+
+void liburing_sanitize_ring(struct io_uring *ring)
+{
+	struct io_uring_sq *sq = &ring->sq;
+	struct io_uring_sqe *sqe;
+	unsigned int head;
+	int shift = 0;
+
+	initialize_sanitize_handlers();
+
+	if (ring->flags & IORING_SETUP_SQE128)
+		shift = 1;
+	if (!(ring->flags & IORING_SETUP_SQPOLL))
+		head = *sq->khead;
+	else
+		head = io_uring_smp_load_acquire(sq->khead);
+
+	while (head != sq->sqe_tail) {
+		sqe = &sq->sqes[(head & sq->ring_mask) << shift];
+		if (sqe->opcode < IORING_OP_LAST)
+			sanitize_handlers[sqe->opcode](sqe);
+		head++;
+	}
+}
+
+void liburing_sanitize_address(const void *addr)
+{
+	if (__asan_address_is_poisoned(addr) != 0) {
+		__asan_describe_address((void *)addr);
+		exit(1);
+	}
+}
+
+void liburing_sanitize_region(const void *addr, unsigned int len)
+{
+	if (__asan_region_is_poisoned((void *)addr, len) != 0) {
+		__asan_describe_address((void *)addr);
+		exit(1);
+	}
+}
+
+void liburing_sanitize_iovecs(const struct iovec *iovecs, unsigned nr)
+{
+	unsigned i;
+
+	if (__asan_address_is_poisoned((void *)iovecs) != 0) {
+		__asan_describe_address((void *)iovecs);
+		exit(1);
+	}
+
+	for (i = 0; i < nr; i++) {
+		if (__asan_region_is_poisoned((void *)iovecs[i].iov_base, iovecs[i].iov_len) != 0) {
+			__asan_describe_address((void *)iovecs[i].iov_base);
+			exit(1);
+		}
+	}
+}

diff --git a/src/setup.c b/src/setup.c
index d2adc7f..da2020e 100644
--- a/src/setup.c
+++ b/src/setup.c

@@ -5,16 +5,96 @@
 #include "syscall.h"
 #include "liburing.h"
 #include "int_flags.h"
+#include "setup.h"
 #include "liburing/compat.h"
 #include "liburing/io_uring.h"
 
+#define KERN_MAX_ENTRIES	32768
+#define KERN_MAX_CQ_ENTRIES	(2 * KERN_MAX_ENTRIES)
+
+static inline int __fls(int x)
+{
+	if (!x)
+		return 0;
+	return 8 * sizeof(x) - __builtin_clz(x);
+}
+
+static unsigned roundup_pow2(unsigned depth)
+{
+	return 1U << __fls(depth - 1);
+}
+
+static int get_sq_cq_entries(unsigned entries, struct io_uring_params *p,
+			     unsigned *sq, unsigned *cq)
+{
+	unsigned cq_entries;
+
+	if (!entries)
+		return -EINVAL;
+	if (entries > KERN_MAX_ENTRIES) {
+		if (!(p->flags & IORING_SETUP_CLAMP))
+			return -EINVAL;
+		entries = KERN_MAX_ENTRIES;
+	}
+
+	entries = roundup_pow2(entries);
+	if (p->flags & IORING_SETUP_CQSIZE) {
+		if (!p->cq_entries)
+			return -EINVAL;
+		cq_entries = p->cq_entries;
+		if (cq_entries > KERN_MAX_CQ_ENTRIES) {
+			if (!(p->flags & IORING_SETUP_CLAMP))
+				return -EINVAL;
+			cq_entries = KERN_MAX_CQ_ENTRIES;
+		}
+		cq_entries = roundup_pow2(cq_entries);
+		if (cq_entries < entries)
+			return -EINVAL;
+	} else {
+		cq_entries = 2 * entries;
+	}
+
+	*sq = entries;
+	*cq = cq_entries;
+	return 0;
+}
+
 static void io_uring_unmap_rings(struct io_uring_sq *sq, struct io_uring_cq *cq)
 {
-	__sys_munmap(sq->ring_ptr, sq->ring_sz);
-	if (cq->ring_ptr && cq->ring_ptr != sq->ring_ptr)
+	if (sq->ring_sz)
+		__sys_munmap(sq->ring_ptr, sq->ring_sz);
+	if (cq->ring_ptr && cq->ring_sz && cq->ring_ptr != sq->ring_ptr)
 		__sys_munmap(cq->ring_ptr, cq->ring_sz);
 }
 
+static void io_uring_setup_ring_pointers(struct io_uring_params *p,
+					 struct io_uring_sq *sq,
+					 struct io_uring_cq *cq)
+{
+	sq->khead = sq->ring_ptr + p->sq_off.head;
+	sq->ktail = sq->ring_ptr + p->sq_off.tail;
+	sq->kring_mask = sq->ring_ptr + p->sq_off.ring_mask;
+	sq->kring_entries = sq->ring_ptr + p->sq_off.ring_entries;
+	sq->kflags = sq->ring_ptr + p->sq_off.flags;
+	sq->kdropped = sq->ring_ptr + p->sq_off.dropped;
+	if (!(p->flags & IORING_SETUP_NO_SQARRAY))
+		sq->array = sq->ring_ptr + p->sq_off.array;
+
+	cq->khead = cq->ring_ptr + p->cq_off.head;
+	cq->ktail = cq->ring_ptr + p->cq_off.tail;
+	cq->kring_mask = cq->ring_ptr + p->cq_off.ring_mask;
+	cq->kring_entries = cq->ring_ptr + p->cq_off.ring_entries;
+	cq->koverflow = cq->ring_ptr + p->cq_off.overflow;
+	cq->cqes = cq->ring_ptr + p->cq_off.cqes;
+	if (p->cq_off.flags)
+		cq->kflags = cq->ring_ptr + p->cq_off.flags;
+
+	sq->ring_mask = *sq->kring_mask;
+	sq->ring_entries = *sq->kring_entries;
+	cq->ring_mask = *cq->kring_mask;
+	cq->ring_entries = *cq->kring_entries;
+}
+
 static int io_uring_mmap(int fd, struct io_uring_params *p,
 			 struct io_uring_sq *sq, struct io_uring_cq *cq)
 {
@@ -52,14 +132,6 @@
 		}
 	}
 
-	sq->khead = sq->ring_ptr + p->sq_off.head;
-	sq->ktail = sq->ring_ptr + p->sq_off.tail;
-	sq->kring_mask = sq->ring_ptr + p->sq_off.ring_mask;
-	sq->kring_entries = sq->ring_ptr + p->sq_off.ring_entries;
-	sq->kflags = sq->ring_ptr + p->sq_off.flags;
-	sq->kdropped = sq->ring_ptr + p->sq_off.dropped;
-	sq->array = sq->ring_ptr + p->sq_off.array;
-
 	size = sizeof(struct io_uring_sqe);
 	if (p->flags & IORING_SETUP_SQE128)
 		size += 64;
@@ -72,14 +144,7 @@
 		return ret;
 	}
 
-	cq->khead = cq->ring_ptr + p->cq_off.head;
-	cq->ktail = cq->ring_ptr + p->cq_off.tail;
-	cq->kring_mask = cq->ring_ptr + p->cq_off.ring_mask;
-	cq->kring_entries = cq->ring_ptr + p->cq_off.ring_entries;
-	cq->koverflow = cq->ring_ptr + p->cq_off.overflow;
-	cq->cqes = cq->ring_ptr + p->cq_off.cqes;
-	if (p->cq_off.flags)
-		cq->kflags = cq->ring_ptr + p->cq_off.flags;
+	io_uring_setup_ring_pointers(p, sq, cq);
 	return 0;
 }
 
@@ -89,25 +154,18 @@
  * Returns -errno on error, or zero on success.  On success, 'ring'
  * contains the necessary information to read/write to the rings.
  */
-int io_uring_queue_mmap(int fd, struct io_uring_params *p, struct io_uring *ring)
+__cold int io_uring_queue_mmap(int fd, struct io_uring_params *p,
+			       struct io_uring *ring)
 {
-	int ret;
-
 	memset(ring, 0, sizeof(*ring));
-	ret = io_uring_mmap(fd, p, &ring->sq, &ring->cq);
-	if (!ret) {
-		ring->flags = p->flags;
-		ring->ring_fd = ring->enter_ring_fd = fd;
-		ring->int_flags = 0;
-	}
-	return ret;
+	return io_uring_mmap(fd, p, &ring->sq, &ring->cq);
 }
 
 /*
  * Ensure that the mmap'ed rings aren't available to a child after a fork(2).
  * This uses madvise(..., MADV_DONTFORK) on the mmap'ed ranges.
  */
-int io_uring_ring_dontfork(struct io_uring *ring)
+__cold int io_uring_ring_dontfork(struct io_uring *ring)
 {
 	size_t len;
 	int ret;
@@ -118,7 +176,7 @@
 	len = sizeof(struct io_uring_sqe);
 	if (ring->flags & IORING_SETUP_SQE128)
 		len += 64;
-	len *= *ring->sq.kring_entries;
+	len *= ring->sq.ring_entries;
 	ret = __sys_madvise(ring->sq.sqes, len, MADV_DONTFORK);
 	if (ret < 0)
 		return ret;
@@ -138,30 +196,228 @@
 	return 0;
 }
 
+/* FIXME */
+static size_t huge_page_size = 2 * 1024 * 1024;
+
+#define KRING_SIZE	64
+
+/*
+ * Returns negative for error, or number of bytes used in the buffer on success
+ */
+static int io_uring_alloc_huge(unsigned entries, struct io_uring_params *p,
+			       struct io_uring_sq *sq, struct io_uring_cq *cq,
+			       void *buf, size_t buf_size)
+{
+	unsigned long page_size = get_page_size();
+	unsigned sq_entries, cq_entries;
+	size_t ring_mem, sqes_mem, cqes_mem;
+	unsigned long mem_used = 0;
+	void *ptr;
+	int ret;
+
+	ret = get_sq_cq_entries(entries, p, &sq_entries, &cq_entries);
+	if (ret)
+		return ret;
+
+	ring_mem = KRING_SIZE;
+
+	sqes_mem = sq_entries * sizeof(struct io_uring_sqe);
+	if (!(p->flags & IORING_SETUP_NO_SQARRAY))
+		sqes_mem += sq_entries * sizeof(unsigned);
+	sqes_mem = (sqes_mem + page_size - 1) & ~(page_size - 1);
+
+	cqes_mem = cq_entries * sizeof(struct io_uring_cqe);
+	if (p->flags & IORING_SETUP_CQE32)
+		cqes_mem *= 2;
+	ring_mem += sqes_mem + cqes_mem;
+	mem_used = ring_mem;
+	mem_used = (mem_used + page_size - 1) & ~(page_size - 1);
+
+	/*
+	 * A maxed-out number of CQ entries with IORING_SETUP_CQE32 fills a 2MB
+	 * huge page by itself, so the SQ entries won't fit in the same huge
+	 * page. For SQEs, that shouldn't be possible given KERN_MAX_ENTRIES,
+	 * but check that too to future-proof (e.g. against different huge page
+	 * sizes). Bail out early so we don't overrun.
+	 */
+	if (!buf && (sqes_mem > huge_page_size || ring_mem > huge_page_size))
+		return -ENOMEM;
+
+	if (buf) {
+		if (mem_used > buf_size)
+			return -ENOMEM;
+		ptr = buf;
+	} else {
+		int map_hugetlb = 0;
+		if (sqes_mem <= page_size)
+			buf_size = page_size;
+		else {
+			buf_size = huge_page_size;
+			map_hugetlb = MAP_HUGETLB;
+		}
+		ptr = __sys_mmap(NULL, buf_size, PROT_READ|PROT_WRITE,
+					MAP_SHARED|MAP_ANONYMOUS|map_hugetlb,
+					-1, 0);
+		if (IS_ERR(ptr))
+			return PTR_ERR(ptr);
+	}
+
+	sq->sqes = ptr;
+	if (mem_used <= buf_size) {
+		sq->ring_ptr = (void *) sq->sqes + sqes_mem;
+		/* clear ring sizes, we have just one mmap() to undo */
+		cq->ring_sz = 0;
+		sq->ring_sz = 0;
+	} else {
+		int map_hugetlb = 0;
+		if (ring_mem <= page_size)
+			buf_size = page_size;
+		else {
+			buf_size = huge_page_size;
+			map_hugetlb = MAP_HUGETLB;
+		}
+		ptr = __sys_mmap(NULL, buf_size, PROT_READ|PROT_WRITE,
+					MAP_SHARED|MAP_ANONYMOUS|map_hugetlb,
+					-1, 0);
+		if (IS_ERR(ptr)) {
+			__sys_munmap(sq->sqes, 1);
+			return PTR_ERR(ptr);
+		}
+		sq->ring_ptr = ptr;
+		sq->ring_sz = buf_size;
+		cq->ring_sz = 0;
+	}
+
+	cq->ring_ptr = (void *) sq->ring_ptr;
+	p->sq_off.user_addr = (unsigned long) sq->sqes;
+	p->cq_off.user_addr = (unsigned long) sq->ring_ptr;
+	return (int) mem_used;
+}
+
+int __io_uring_queue_init_params(unsigned entries, struct io_uring *ring,
+				 struct io_uring_params *p, void *buf,
+				 size_t buf_size)
+{
+	int fd, ret = 0;
+	unsigned *sq_array;
+	unsigned sq_entries, index;
+
+	memset(ring, 0, sizeof(*ring));
+
+	/*
+	 * The kernel does this check already, but checking it here allows us
+	 * to avoid handling it below.
+	 */
+	if (p->flags & IORING_SETUP_REGISTERED_FD_ONLY
+	    && !(p->flags & IORING_SETUP_NO_MMAP))
+		return -EINVAL;
+
+	if (p->flags & IORING_SETUP_NO_MMAP) {
+		ret = io_uring_alloc_huge(entries, p, &ring->sq, &ring->cq,
+						buf, buf_size);
+		if (ret < 0)
+			return ret;
+		if (buf)
+			ring->int_flags |= INT_FLAG_APP_MEM;
+	}
+
+	fd = __sys_io_uring_setup(entries, p);
+	if (fd < 0) {
+		if ((p->flags & IORING_SETUP_NO_MMAP) &&
+		    !(ring->int_flags & INT_FLAG_APP_MEM)) {
+			__sys_munmap(ring->sq.sqes, 1);
+			io_uring_unmap_rings(&ring->sq, &ring->cq);
+		}
+		return fd;
+	}
+
+	if (!(p->flags & IORING_SETUP_NO_MMAP)) {
+		ret = io_uring_queue_mmap(fd, p, ring);
+		if (ret) {
+			__sys_close(fd);
+			return ret;
+		}
+	} else {
+		io_uring_setup_ring_pointers(p, &ring->sq, &ring->cq);
+	}
+
+	/*
+	 * Directly map SQ slots to SQEs
+	 */
+	sq_entries = ring->sq.ring_entries;
+
+	if (!(p->flags & IORING_SETUP_NO_SQARRAY)) {
+		sq_array = ring->sq.array;
+		for (index = 0; index < sq_entries; index++)
+			sq_array[index] = index;
+	}
+	ring->features = p->features;
+	ring->flags = p->flags;
+	ring->enter_ring_fd = fd;
+	if (p->flags & IORING_SETUP_REGISTERED_FD_ONLY) {
+		ring->ring_fd = -1;
+		ring->int_flags |= INT_FLAG_REG_RING | INT_FLAG_REG_REG_RING;
+	} else {
+		ring->ring_fd = fd;
+	}
+
+	return ret;
+}
+
+static int io_uring_queue_init_try_nosqarr(unsigned entries, struct io_uring *ring,
+					   struct io_uring_params *p, void *buf,
+					   size_t buf_size)
+{
+	unsigned flags = p->flags;
+	int ret;
+
+	p->flags |= IORING_SETUP_NO_SQARRAY;
+	ret = __io_uring_queue_init_params(entries, ring, p, buf, buf_size);
+
+	/* don't fallback if explicitly asked for NOSQARRAY */
+	if (ret != -EINVAL || (flags & IORING_SETUP_NO_SQARRAY))
+		return ret;
+
+	p->flags = flags;
+	return __io_uring_queue_init_params(entries, ring, p, buf, buf_size);
+}
+
+/*
+ * Like io_uring_queue_init_params(), except it allows the application to pass
+ * in a pre-allocated memory range that is used for the shared data between
+ * the kernel and the application. This includes the sqes array, and the two
+ * rings. The memory must be contiguous, the use case here is that the app
+ * allocates a huge page and passes it in.
+ *
+ * Returns the number of bytes used in the buffer, the app can then reuse
+ * the buffer with the returned offset to put more rings in the same huge
+ * page. Returns -ENOMEM if there's not enough room left in the buffer to
+ * host the ring.
+ */
+int io_uring_queue_init_mem(unsigned entries, struct io_uring *ring,
+			    struct io_uring_params *p,
+			    void *buf, size_t buf_size)
+{
+	/* should already be set... */
+	p->flags |= IORING_SETUP_NO_MMAP;
+	return io_uring_queue_init_try_nosqarr(entries, ring, p, buf, buf_size);
+}
+
 int io_uring_queue_init_params(unsigned entries, struct io_uring *ring,
 			       struct io_uring_params *p)
 {
-	int fd, ret;
+	int ret;
 
-	fd = ____sys_io_uring_setup(entries, p);
-	if (fd < 0)
-		return fd;
-
-	ret = io_uring_queue_mmap(fd, p, ring);
-	if (ret) {
-		__sys_close(fd);
-		return ret;
-	}
-
-	ring->features = p->features;
-	return 0;
+	ret = io_uring_queue_init_try_nosqarr(entries, ring, p, NULL, 0);
+	return ret >= 0 ? 0 : ret;
 }
 
 /*
  * Returns -errno on error, or zero on success. On success, 'ring'
  * contains the necessary information to read/write to the rings.
  */
-int io_uring_queue_init(unsigned entries, struct io_uring *ring, unsigned flags)
+__cold int io_uring_queue_init(unsigned entries, struct io_uring *ring,
+			       unsigned flags)
 {
 	struct io_uring_params p;
 
@@ -171,34 +427,44 @@
 	return io_uring_queue_init_params(entries, ring, &p);
 }
 
-void io_uring_queue_exit(struct io_uring *ring)
+__cold void io_uring_queue_exit(struct io_uring *ring)
 {
 	struct io_uring_sq *sq = &ring->sq;
 	struct io_uring_cq *cq = &ring->cq;
 	size_t sqe_size;
 
-	sqe_size = sizeof(struct io_uring_sqe);
-	if (ring->flags & IORING_SETUP_SQE128)
-		sqe_size += 64;
-	__sys_munmap(sq->sqes, sqe_size * *sq->kring_entries);
-	io_uring_unmap_rings(sq, cq);
+	if (!sq->ring_sz && !(ring->int_flags & INT_FLAG_APP_MEM)) {
+		sqe_size = sizeof(struct io_uring_sqe);
+		if (ring->flags & IORING_SETUP_SQE128)
+			sqe_size += 64;
+		__sys_munmap(sq->sqes, sqe_size * sq->ring_entries);
+		io_uring_unmap_rings(sq, cq);
+	} else {
+		if (!(ring->int_flags & INT_FLAG_APP_MEM)) {
+			__sys_munmap(sq->sqes,
+				*sq->kring_entries * sizeof(struct io_uring_sqe));
+			io_uring_unmap_rings(sq, cq);
+		}
+	}
+
 	/*
 	 * Not strictly required, but frees up the slot we used now rather
 	 * than at process exit time.
 	 */
 	if (ring->int_flags & INT_FLAG_REG_RING)
 		io_uring_unregister_ring_fd(ring);
-	__sys_close(ring->ring_fd);
+	if (ring->ring_fd != -1)
+		__sys_close(ring->ring_fd);
 }
 
-struct io_uring_probe *io_uring_get_probe_ring(struct io_uring *ring)
+__cold struct io_uring_probe *io_uring_get_probe_ring(struct io_uring *ring)
 {
 	struct io_uring_probe *probe;
 	size_t len;
 	int r;
 
 	len = sizeof(*probe) + 256 * sizeof(struct io_uring_probe_op);
-	probe = uring_malloc(len);
+	probe = malloc(len);
 	if (!probe)
 		return NULL;
 	memset(probe, 0, len);
@@ -207,11 +473,11 @@
 	if (r >= 0)
 		return probe;
 
-	uring_free(probe);
+	free(probe);
 	return NULL;
 }
 
-struct io_uring_probe *io_uring_get_probe(void)
+__cold struct io_uring_probe *io_uring_get_probe(void)
 {
 	struct io_uring ring;
 	struct io_uring_probe *probe;
@@ -226,34 +492,20 @@
 	return probe;
 }
 
-void io_uring_free_probe(struct io_uring_probe *probe)
+__cold void io_uring_free_probe(struct io_uring_probe *probe)
 {
-	uring_free(probe);
+	free(probe);
 }
 
-static inline int __fls(int x)
-{
-	if (!x)
-		return 0;
-	return 8 * sizeof(x) - __builtin_clz(x);
-}
-
-static unsigned roundup_pow2(unsigned depth)
-{
-	return 1UL << __fls(depth - 1);
-}
-
-static size_t npages(size_t size, unsigned page_size)
+static size_t npages(size_t size, long page_size)
 {
 	size--;
 	size /= page_size;
-	return __fls(size);
+	return __fls((int) size);
 }
 
-#define KRING_SIZE	320
-
 static size_t rings_size(struct io_uring_params *p, unsigned entries,
-			 unsigned cq_entries, unsigned page_size)
+			 unsigned cq_entries, long page_size)
 {
 	size_t pages, sq_size, cq_size;
 
@@ -273,9 +525,6 @@
 	return pages * page_size;
 }
 
-#define KERN_MAX_ENTRIES	32768
-#define KERN_MAX_CQ_ENTRIES	(2 * KERN_MAX_ENTRIES)
-
 /*
  * Return the required ulimit -l memlock memory required for a given ring
  * setup, in bytes. May return -errno on error. On newer (5.12+) kernels,
@@ -284,13 +533,17 @@
  * return the required memory so that the caller can ensure that enough space
  * is available before setting up a ring with the specified parameters.
  */
-ssize_t io_uring_mlock_size_params(unsigned entries, struct io_uring_params *p)
+__cold ssize_t io_uring_mlock_size_params(unsigned entries,
+					  struct io_uring_params *p)
 {
-	struct io_uring_params lp = { };
+	struct io_uring_params lp;
 	struct io_uring ring;
-	unsigned cq_entries;
+	unsigned cq_entries, sq;
 	long page_size;
 	ssize_t ret;
+	int cret;
+
+	memset(&lp, 0, sizeof(lp));
 
 	/*
 	 * We only really use this inited ring to see if the kernel is newer
@@ -318,34 +571,119 @@
 		entries = KERN_MAX_ENTRIES;
 	}
 
-	entries = roundup_pow2(entries);
-	if (p->flags & IORING_SETUP_CQSIZE) {
-		if (!p->cq_entries)
-			return -EINVAL;
-		cq_entries = p->cq_entries;
-		if (cq_entries > KERN_MAX_CQ_ENTRIES) {
-			if (!(p->flags & IORING_SETUP_CLAMP))
-				return -EINVAL;
-			cq_entries = KERN_MAX_CQ_ENTRIES;
-		}
-		cq_entries = roundup_pow2(cq_entries);
-		if (cq_entries < entries)
-			return -EINVAL;
-	} else {
-		cq_entries = 2 * entries;
-	}
+	cret = get_sq_cq_entries(entries, p, &sq, &cq_entries);
+	if (cret)
+		return cret;
 
 	page_size = get_page_size();
-	return rings_size(p, entries, cq_entries, page_size);
+	return rings_size(p, sq, cq_entries, page_size);
 }
 
 /*
  * Return required ulimit -l memory space for a given ring setup. See
  * @io_uring_mlock_size_params().
  */
-ssize_t io_uring_mlock_size(unsigned entries, unsigned flags)
+__cold ssize_t io_uring_mlock_size(unsigned entries, unsigned flags)
 {
-	struct io_uring_params p = { .flags = flags, };
+	struct io_uring_params p;
 
+	memset(&p, 0, sizeof(p));
+	p.flags = flags;
 	return io_uring_mlock_size_params(entries, &p);
 }
+
+#if defined(__hppa__)
+static struct io_uring_buf_ring *br_setup(struct io_uring *ring,
+					  unsigned int nentries, int bgid,
+					  unsigned int flags, int *err)
+{
+	struct io_uring_buf_ring *br;
+	struct io_uring_buf_reg reg;
+	size_t ring_size;
+	off_t off;
+	int lret;
+
+	memset(&reg, 0, sizeof(reg));
+	reg.ring_entries = nentries;
+	reg.bgid = bgid;
+	reg.flags = IOU_PBUF_RING_MMAP;
+
+	*err = 0;
+	lret = io_uring_register_buf_ring(ring, &reg, flags);
+	if (lret) {
+		*err = lret;
+		return NULL;
+	}
+
+	off = IORING_OFF_PBUF_RING | (unsigned long long) bgid << IORING_OFF_PBUF_SHIFT;
+	ring_size = nentries * sizeof(struct io_uring_buf);
+	br = __sys_mmap(NULL, ring_size, PROT_READ | PROT_WRITE,
+			MAP_SHARED | MAP_POPULATE, ring->ring_fd, off);
+	if (IS_ERR(br)) {
+		*err = PTR_ERR(br);
+		return NULL;
+	}
+
+	return br;
+}
+#else
+static struct io_uring_buf_ring *br_setup(struct io_uring *ring,
+					  unsigned int nentries, int bgid,
+					  unsigned int flags, int *err)
+{
+	struct io_uring_buf_ring *br;
+	struct io_uring_buf_reg reg;
+	size_t ring_size;
+	int lret;
+
+	memset(&reg, 0, sizeof(reg));
+	ring_size = nentries * sizeof(struct io_uring_buf);
+	br = __sys_mmap(NULL, ring_size, PROT_READ | PROT_WRITE,
+			MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+	if (IS_ERR(br)) {
+		*err = PTR_ERR(br);
+		return NULL;
+	}
+
+	reg.ring_addr = (unsigned long) (uintptr_t) br;
+	reg.ring_entries = nentries;
+	reg.bgid = bgid;
+
+	*err = 0;
+	lret = io_uring_register_buf_ring(ring, &reg, flags);
+	if (lret) {
+		__sys_munmap(br, ring_size);
+		*err = lret;
+		br = NULL;
+	}
+
+	return br;
+}
+#endif
+
+struct io_uring_buf_ring *io_uring_setup_buf_ring(struct io_uring *ring,
+						  unsigned int nentries,
+						  int bgid, unsigned int flags,
+						  int *err)
+{
+	struct io_uring_buf_ring *br;
+
+	br = br_setup(ring, nentries, bgid, flags, err);
+	if (br)
+		io_uring_buf_ring_init(br);
+
+	return br;
+}
+
+int io_uring_free_buf_ring(struct io_uring *ring, struct io_uring_buf_ring *br,
+			   unsigned int nentries, int bgid)
+{
+	int ret;
+
+	ret = io_uring_unregister_buf_ring(ring, bgid);
+	if (ret)
+		return ret;
+
+	__sys_munmap(br, nentries * sizeof(struct io_uring_buf));
+	return 0;
+}

diff --git a/src/setup.h b/src/setup.h
new file mode 100644
index 0000000..ae44314
--- /dev/null
+++ b/src/setup.h

@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: MIT */
+#ifndef LIBURING_SETUP_H
+#define LIBURING_SETUP_H
+
+int __io_uring_queue_init_params(unsigned entries, struct io_uring *ring,
+				 struct io_uring_params *p, void *buf,
+				 size_t buf_size);
+
+#endif

diff --git a/src/syscall.c b/src/syscall.c
index 362f1f5..2054d17 100644
--- a/src/syscall.c
+++ b/src/syscall.c

@@ -1,47 +1,29 @@
 /* SPDX-License-Identifier: MIT */
-#define _DEFAULT_SOURCE
 
-/*
- * Functions in this file require libc, only build them when we use libc.
- *
- * Note:
- * liburing's tests still need these functions.
- */
-#if defined(CONFIG_NOLIBC) && !defined(LIBURING_BUILD_TEST)
-#error "This file should only be compiled for libc build, or for liburing tests"
-#endif
-
-/*
- * Will go away once libc support is there
- */
-#include <unistd.h>
-#include <sys/syscall.h>
-#include <sys/uio.h>
-#include "liburing/compat.h"
-#include "liburing/io_uring.h"
 #include "syscall.h"
+#include <liburing.h>
 
-int __sys_io_uring_register(int fd, unsigned opcode, const void *arg,
-			    unsigned nr_args)
+int io_uring_enter(unsigned int fd, unsigned int to_submit,
+		   unsigned int min_complete, unsigned int flags, sigset_t *sig)
 {
-	return syscall(__NR_io_uring_register, fd, opcode, arg, nr_args);
+	return __sys_io_uring_enter(fd, to_submit, min_complete, flags, sig);
 }
 
-int __sys_io_uring_setup(unsigned entries, struct io_uring_params *p)
-{
-	return syscall(__NR_io_uring_setup, entries, p);
-}
-
-int __sys_io_uring_enter2(int fd, unsigned to_submit, unsigned min_complete,
-			  unsigned flags, sigset_t *sig, int sz)
-{
-	return syscall(__NR_io_uring_enter, fd, to_submit, min_complete, flags,
-		       sig, sz);
-}
-
-int __sys_io_uring_enter(int fd, unsigned to_submit, unsigned min_complete,
-			 unsigned flags, sigset_t *sig)
+int io_uring_enter2(unsigned int fd, unsigned int to_submit,
+		    unsigned int min_complete, unsigned int flags,
+		    sigset_t *sig, size_t sz)
 {
 	return __sys_io_uring_enter2(fd, to_submit, min_complete, flags, sig,
-				     _NSIG / 8);
+				     sz);
+}
+
+int io_uring_setup(unsigned int entries, struct io_uring_params *p)
+{
+	return __sys_io_uring_setup(entries, p);
+}
+
+int io_uring_register(unsigned int fd, unsigned int opcode, const void *arg,
+		      unsigned int nr_args)
+{
+	return __sys_io_uring_register(fd, opcode, arg, nr_args);
 }

diff --git a/src/syscall.h b/src/syscall.h
index 214789d..6327f0f 100644
--- a/src/syscall.h
+++ b/src/syscall.h

@@ -10,45 +10,8 @@
 #include <sys/mman.h>
 #include <sys/syscall.h>
 #include <sys/resource.h>
-
 #include <liburing.h>
 
-#ifdef __alpha__
-/*
- * alpha and mips are exception, other architectures have
- * common numbers for new system calls.
- */
-#ifndef __NR_io_uring_setup
-#define __NR_io_uring_setup		535
-#endif
-#ifndef __NR_io_uring_enter
-#define __NR_io_uring_enter		536
-#endif
-#ifndef __NR_io_uring_register
-#define __NR_io_uring_register	537
-#endif
-#elif defined __mips__
-#ifndef __NR_io_uring_setup
-#define __NR_io_uring_setup		(__NR_Linux + 425)
-#endif
-#ifndef __NR_io_uring_enter
-#define __NR_io_uring_enter		(__NR_Linux + 426)
-#endif
-#ifndef __NR_io_uring_register
-#define __NR_io_uring_register	(__NR_Linux + 427)
-#endif
-#else /* !__alpha__ and !__mips__ */
-#ifndef __NR_io_uring_setup
-#define __NR_io_uring_setup		425
-#endif
-#ifndef __NR_io_uring_enter
-#define __NR_io_uring_enter		426
-#endif
-#ifndef __NR_io_uring_register
-#define __NR_io_uring_register		427
-#endif
-#endif
-
 /*
  * Don't put this below the #include "arch/$arch/syscall.h", that
  * file may need it.
@@ -60,9 +23,9 @@
 	return (void *) n;
 }
 
-static inline intptr_t PTR_ERR(const void *ptr)
+static inline int PTR_ERR(const void *ptr)
 {
-	return (intptr_t) ptr;
+	return (int) (intptr_t) ptr;
 }
 
 static inline bool IS_ERR(const void *ptr)
@@ -70,11 +33,12 @@
 	return uring_unlikely((uintptr_t) ptr >= (uintptr_t) -4095UL);
 }
 
-#define __INTERNAL__LIBURING_SYSCALL_H
 #if defined(__x86_64__) || defined(__i386__)
 #include "arch/x86/syscall.h"
 #elif defined(__aarch64__)
 #include "arch/aarch64/syscall.h"
+#elif defined(__riscv) && __riscv_xlen == 64
+#include "arch/riscv64/syscall.h"
 #else
 /*
  * We don't have native syscall wrappers
@@ -86,18 +50,4 @@
 /* libc syscall wrappers. */
 #include "arch/generic/syscall.h"
 #endif
-#undef __INTERNAL__LIBURING_SYSCALL_H
-
-/*
- * For backward compatibility.
- * (these __sys* functions always use libc, see syscall.c)
- */
-int __sys_io_uring_setup(unsigned entries, struct io_uring_params *p);
-int __sys_io_uring_enter(int fd, unsigned to_submit, unsigned min_complete,
-			 unsigned flags, sigset_t *sig);
-int __sys_io_uring_enter2(int fd, unsigned to_submit, unsigned min_complete,
-			  unsigned flags, sigset_t *sig, int sz);
-int __sys_io_uring_register(int fd, unsigned int opcode, const void *arg,
-			    unsigned int nr_args);
-
 #endif

diff --git a/src/version.c b/src/version.c
new file mode 100644
index 0000000..e6a884c
--- /dev/null
+++ b/src/version.c

@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "liburing.h"
+#include "liburing/io_uring_version.h"
+
+int io_uring_major_version(void)
+{
+	return IO_URING_VERSION_MAJOR;
+}
+
+int io_uring_minor_version(void)
+{
+	return IO_URING_VERSION_MINOR;
+}
+
+bool io_uring_check_version(int major, int minor)
+{
+	return major > io_uring_major_version() ||
+		(major == io_uring_major_version() &&
+		 minor > io_uring_minor_version());
+}

diff --git a/test/232c93d07b74.c b/test/232c93d07b74.c
index 8a7810b..d3053b9 100644
--- a/test/232c93d07b74.c
+++ b/test/232c93d07b74.c

@@ -21,21 +21,21 @@
 #include <netinet/in.h>
 #include <arpa/inet.h>
 
+#include "helpers.h"
 #include "liburing.h"
 
 #define RECV_BUFF_SIZE 2
 #define SEND_BUFF_SIZE 3
 
-#define PORT	0x1234
-
 struct params {
 	int tcp;
 	int non_blocking;
+	__be16 bind_port;
 };
 
-pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
-pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
-int rcv_ready = 0;
+static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
+static int rcv_ready = 0;
 
 static void set_rcv_ready(void)
 {
@@ -64,8 +64,7 @@
 	int res;
 
 	if (p->tcp) {
-		int val = 1;
-                
+		int ret, val = 1;
 
 		s0 = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, IPPROTO_TCP);
 		res = setsockopt(s0, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val));
@@ -76,10 +75,10 @@
 		struct sockaddr_in addr;
 
 		addr.sin_family = AF_INET;
-		addr.sin_port = htons(PORT);
 		addr.sin_addr.s_addr = inet_addr("127.0.0.1");
-		res = bind(s0, (struct sockaddr *) &addr, sizeof(addr));
-		assert(res != -1);
+		ret = t_bind_ephemeral_port(s0, &addr);
+		assert(!ret);
+		p->bind_port = addr.sin_port;
 	} else {
 		s0 = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
 		assert(s0 != -1);
@@ -191,7 +190,7 @@
 		struct sockaddr_in addr;
 
 		addr.sin_family = AF_INET;
-		addr.sin_port = htons(PORT);
+		addr.sin_port = p->bind_port;
 		addr.sin_addr.s_addr = inet_addr("127.0.0.1");
 		ret = connect(s0, (struct sockaddr*) &addr, sizeof(addr));
 		assert(ret != -1);
@@ -281,10 +280,10 @@
 	struct params p;
 	pthread_t t1, t2;
 	void *res1, *res2;
-	int i, exit_val = 0;
+	int i, exit_val = T_EXIT_PASS;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
 	for (i = 0; i < 4; i++) {
 		p.tcp = i & 1;
@@ -298,7 +297,7 @@
 		pthread_join(t2, &res2);
 		if (res1 || res2) {
 			fprintf(stderr, "Failed tcp=%d, non_blocking=%d\n", p.tcp, p.non_blocking);
-			exit_val = 1;
+			exit_val = T_EXIT_FAIL;
 		}
 	}
 

diff --git a/test/35fa71a030ca.c b/test/35fa71a030ca.c
index 9a6ddb6..7914f59 100644
--- a/test/35fa71a030ca.c
+++ b/test/35fa71a030ca.c

@@ -25,8 +25,10 @@
 #include <linux/futex.h>
 
 #include "liburing.h"
+#include "helpers.h"
 #include "../src/syscall.h"
 
+#ifndef CONFIG_USE_SANITIZER
 #if !defined(SYS_futex) && defined(SYS_futex_time64)
 # define SYS_futex SYS_futex_time64
 #endif
@@ -175,7 +177,7 @@
 }
 
 #define SYZ_HAVE_SETUP_TEST 1
-static void setup_test()
+static void setup_test(void)
 {
   prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
   setpgrp();
@@ -261,7 +263,7 @@
   }
 }
 
-uint64_t r[1] = {0xffffffffffffffff};
+static uint64_t r[1] = {0xffffffffffffffff};
 
 void execute_call(int call)
 {
@@ -317,12 +319,18 @@
 int main(int argc, char *argv[])
 {
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 	signal(SIGINT, sig_int);
-	mmap((void *) 0x20000000, 0x1000000, 3, 0x32, -1, 0);
+	mmap((void *) 0x20000000, 0x1000000, 3, MAP_ANON|MAP_PRIVATE, -1, 0);
 	signal(SIGALRM, sig_int);
 	alarm(5);
 
 	loop();
-	return 0;
+	return T_EXIT_PASS;
 }
+#else
+int main(int argc, char *argv[])
+{
+	return T_EXIT_SKIP;
+}
+#endif

diff --git a/test/500f9fbadef8.c b/test/500f9fbadef8.c
index dbd5751..69dd9d0 100644
--- a/test/500f9fbadef8.c
+++ b/test/500f9fbadef8.c

@@ -27,7 +27,7 @@
 	int ret, fd;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
 	t_posix_memalign(&iov.iov_base, 4096, 4096);
 	iov.iov_len = 4096;
@@ -35,15 +35,17 @@
 	ret = io_uring_queue_init(1, &ring, IORING_SETUP_IOPOLL);
 	if (ret) {
 		fprintf(stderr, "ring setup failed\n");
-		return 1;
+		return T_EXIT_FAIL;
 
 	}
 
 	sprintf(buf, "./XXXXXX");
 	fd = mkostemp(buf, O_WRONLY | O_DIRECT | O_CREAT);
 	if (fd < 0) {
+		if (errno == EINVAL)
+			return T_EXIT_SKIP;
 		perror("mkostemp");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	offset = 0;
@@ -73,17 +75,19 @@
 		io_uring_cqe_seen(&ring, cqe);
 		offset += 4096;
 	} while (--blocks);
-		
+
 	close(fd);
 	unlink(buf);
-	return 0;
+	free(iov.iov_base);
+	return T_EXIT_PASS;
 err:
 	close(fd);
 	unlink(buf);
-	return 1;
+	free(iov.iov_base);
+	return T_EXIT_FAIL;
 skipped:
 	fprintf(stderr, "Polling not supported in current dir, test skipped\n");
 	close(fd);
 	unlink(buf);
-	return 0;
+	return T_EXIT_SKIP;
 }

diff --git a/test/7ad0e4b2f83c.c b/test/7ad0e4b2f83c.c
index 4d760e1..6b3bbc9 100644
--- a/test/7ad0e4b2f83c.c
+++ b/test/7ad0e4b2f83c.c

@@ -3,31 +3,7 @@
 #include <time.h>
 #include <sys/time.h>
 #include "liburing.h"
-
-static unsigned long long mtime_since(const struct timeval *s,
-				      const struct timeval *e)
-{
-	long long sec, usec;
-
-	sec = e->tv_sec - s->tv_sec;
-	usec = (e->tv_usec - s->tv_usec);
-	if (sec > 0 && usec < 0) {
-		sec--;
-		usec += 1000000;
-	}
-
-	sec *= 1000;
-	usec /= 1000;
-	return sec + usec;
-}
-
-static unsigned long long mtime_since_now(struct timeval *tv)
-{
-	struct timeval end;
-
-	gettimeofday(&end, NULL);
-	return mtime_since(tv, &end);
-}
+#include "helpers.h"
 
 int main(int argc, char *argv[])
 {
@@ -40,12 +16,12 @@
 	int ret;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
 	ret = io_uring_queue_init(32, &ring, 0);
 	if (ret) {
 		fprintf(stderr, "io_uring_queue_init=%d\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	sqe = io_uring_get_sqe(&ring);
@@ -53,7 +29,7 @@
 	ret = io_uring_submit(&ring);
 	if (ret != 1) {
 		fprintf(stderr, "io_uring_submit1=%d\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 
@@ -62,7 +38,7 @@
 	ret = io_uring_wait_cqe_timeout(&ring, &cqe, &ts1);
 	if (ret) {
 		fprintf(stderr, "io_uring_wait_cqe_timeout=%d\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 	io_uring_cqe_seen(&ring, cqe);
 	gettimeofday(&tv, NULL);
@@ -75,7 +51,7 @@
 	ret = io_uring_submit(&ring);
 	if (ret != 1) {
 		fprintf(stderr, "io_uring_submit2=%d\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	io_uring_wait_cqe(&ring, &cqe);
@@ -83,11 +59,11 @@
 	msec = mtime_since_now(&tv);
 	if (msec >= 900 && msec <= 1100) {
 		io_uring_queue_exit(&ring);
-		return 0;
+		return T_EXIT_PASS;
 	}
 
 	fprintf(stderr, "%s: Timeout seems wonky (got %lu)\n", __FUNCTION__,
 								msec);
 	io_uring_queue_exit(&ring);
-	return 1;
+	return T_EXIT_FAIL;
 }

diff --git a/test/8a9973408177.c b/test/8a9973408177.c
index 94bf781..9c8bb28 100644
--- a/test/8a9973408177.c
+++ b/test/8a9973408177.c

@@ -7,6 +7,7 @@
 #include <fcntl.h>
 
 #include "liburing.h"
+#include "helpers.h"
 
 static int register_file(struct io_uring *ring)
 {
@@ -85,12 +86,12 @@
 	int ret;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
 	ret = io_uring_queue_init(8, &ring, 0);
 	if (ret) {
 		printf("ring setup failed\n");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	ret = register_file(&ring);
@@ -102,5 +103,5 @@
 		return ret;
 	}
 
-	return 0;
+	return T_EXIT_PASS;
 }

diff --git a/test/917257daa0fe.c b/test/917257daa0fe.c
index 1d00ef1..f4fcd7a 100644
--- a/test/917257daa0fe.c
+++ b/test/917257daa0fe.c

@@ -11,14 +11,16 @@
 #include <unistd.h>
 
 #include "liburing.h"
+#include "helpers.h"
 #include "../src/syscall.h"
 
+#ifndef CONFIG_USE_SANITIZER
 int main(int argc, char *argv[])
 {
   if (argc > 1)
-    return 0;
+    return T_EXIT_SKIP;
 
-  mmap((void *) 0x20000000, 0x1000000, 3, 0x32, -1, 0);
+  mmap((void *) 0x20000000, 0x1000000, 3, MAP_ANON|MAP_PRIVATE, -1, 0);
 
   *(uint32_t*)0x20000000 = 0;
   *(uint32_t*)0x20000004 = 0;
@@ -49,5 +51,11 @@
   *(uint32_t*)0x2000006c = 0;
   *(uint64_t*)0x20000070 = 0;
   __sys_io_uring_setup(0x7a6, (struct io_uring_params *) 0x20000000UL);
-  return 0;
+  return T_EXIT_PASS;
 }
+#else
+int main(int argc, char *argv[])
+{
+	return T_EXIT_SKIP;
+}
+#endif

diff --git a/test/Makefile b/test/Makefile
index e3204a7..c446cae 100644
--- a/test/Makefile
+++ b/test/Makefile

@@ -13,9 +13,10 @@
 	-D_GNU_SOURCE \
 	-D__SANE_USERSPACE_TYPES__ \
 	-I../src/include/ \
-	-include ../config-host.h
+	-include ../config-host.h \
+	-D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64
 
-CFLAGS ?= -g -O2 -Wall -Wextra
+CFLAGS ?= -g -O3 -Wall -Wextra
 XCFLAGS = -Wno-unused-parameter -Wno-sign-compare
 
 ifdef CONFIG_HAVE_STRINGOP_OVERFLOW
@@ -26,6 +27,10 @@
 	XCFLAGS += -Warray-bounds=0
 endif
 
+ifeq ($(CONFIG_USE_SANITIZER),y)
+	XCFLAGS += -fsanitize=address,undefined -fno-omit-frame-pointer -fno-optimize-sibling-calls
+endif
+
 CXXFLAGS ?= $(CFLAGS)
 override CFLAGS += $(XCFLAGS) -DLIBURING_BUILD_TEST
 override CXXFLAGS += $(XCFLAGS) -std=c++11 -DLIBURING_BUILD_TEST
@@ -45,15 +50,22 @@
 	a4c0b3decb33.c \
 	accept.c \
 	accept-link.c \
+	accept-non-empty.c \
 	accept-reuse.c \
 	accept-test.c \
 	across-fork.c \
 	b19062a56726.c \
 	b5837bd5311d.c \
+	bind-listen.c \
 	buf-ring.c \
+	buf-ring-nommap.c \
+	buf-ring-put.c \
 	ce593a6c480a.c \
 	close-opath.c \
 	connect.c \
+	connect-rep.c \
+	coredump.c \
+	cmd-discard.c \
 	cq-full.c \
 	cq-overflow.c \
 	cq-peek-batch.c \
@@ -62,51 +74,79 @@
 	d4ae271dfaae.c \
 	d77a67ed5f27.c \
 	defer.c \
+	defer-taskrun.c \
+	defer-tw-timeout.c \
 	double-poll-crash.c \
 	drop-submit.c \
 	eeed8b54e0df.c \
 	empty-eownerdead.c \
+	eploop.c \
 	eventfd.c \
 	eventfd-disable.c \
 	eventfd-reg.c \
 	eventfd-ring.c \
+	evloop.c \
 	exec-target.c \
 	exit-no-cleanup.c \
 	fadvise.c \
 	fallocate.c \
 	fc2a85cb02ef.c \
+	fd-install.c \
+	fd-pass.c \
+	fdinfo.c \
+	fifo-nonblock-read.c \
 	file-register.c \
 	files-exit-hang-poll.c \
 	files-exit-hang-timeout.c \
 	file-update.c \
 	file-verify.c \
 	fixed-buf-iter.c \
+	fixed-buf-merge.c \
+	fixed-hugepage.c \
 	fixed-link.c \
 	fixed-reuse.c \
 	fpos.c \
+	fsnotify.c \
 	fsync.c \
+	futex.c \
 	hardlink.c \
+	ignore-single-mmap.c \
+	init-mem.c \
 	io-cancel.c \
 	iopoll.c \
+	iopoll-leak.c \
+	iopoll-overflow.c \
 	io_uring_enter.c \
+	io_uring_passthrough.c \
 	io_uring_register.c \
 	io_uring_setup.c \
+	kallsyms.c \
 	lfs-openat.c \
 	lfs-openat-write.c \
 	link.c \
 	link_drain.c \
 	link-timeout.c \
+	linked-defer-close.c \
 	madvise.c \
+	min-timeout.c \
+	min-timeout-wait.c \
 	mkdir.c \
 	msg-ring.c \
+	msg-ring-fd.c \
+	msg-ring-flags.c \
+	msg-ring-overflow.c \
 	multicqes_drain.c \
+	napi-test.c \
+	no-mmap-inval.c \
 	nop-all-sizes.c \
 	nop.c \
+	ooo-file-unreg.c \
 	openat2.c \
 	open-close.c \
 	open-direct-link.c \
 	open-direct-pick.c \
 	personality.c \
+	pipe-bug.c \
 	pipe-eof.c \
 	pipe-reuse.c \
 	poll.c \
@@ -115,65 +155,100 @@
 	poll-cancel-ton.c \
 	poll-link.c \
 	poll-many.c \
+	poll-mshot-overflow.c \
 	poll-mshot-update.c \
+	poll-race.c \
+	poll-race-mshot.c \
 	poll-ring.c \
 	poll-v-poll.c \
 	pollfree.c \
 	probe.c \
 	read-before-exit.c \
+	read-mshot.c \
+	read-mshot-empty.c \
+	read-mshot-stdin.c \
 	read-write.c \
 	recv-msgall.c \
 	recv-msgall-stream.c \
+	recv-multishot.c \
+	reg-fd-only.c \
+	reg-hint.c \
+	reg-reg-ring.c \
+	regbuf-clone.c \
+	regbuf-merge.c \
 	register-restrictions.c \
 	rename.c \
 	ringbuf-read.c \
+	ringbuf-status.c \
 	ring-leak2.c \
 	ring-leak.c \
 	rsrc_tags.c \
 	rw_merge_test.c \
 	self.c \
-	sendmsg_fs_cve.c \
+	recvsend_bundle.c \
+	recvsend_bundle-inc.c \
 	send_recv.c \
 	send_recvmsg.c \
+	send-zerocopy.c \
 	shared-wq.c \
 	short-read.c \
 	shutdown.c \
 	sigfd-deadlock.c \
+	single-issuer.c \
 	skip-cqe.c \
 	socket.c \
+	socket-io-cmd.c \
+	socket-getsetsock-cmd.c \
 	socket-rw.c \
 	socket-rw-eagain.c \
 	socket-rw-offset.c \
 	splice.c \
 	sq-full.c \
 	sq-full-cpp.cc \
-	sqpoll-cancel-hang.c \
 	sqpoll-disable-exit.c \
+	sqpoll-exec.c \
 	sq-poll-dup.c \
 	sqpoll-exit-hang.c \
 	sq-poll-kthread.c \
 	sq-poll-share.c \
 	sqpoll-sleep.c \
 	sq-space_left.c \
+	sqwait.c \
 	stdout.c \
+	submit-and-wait.c \
 	submit-link-fail.c \
 	submit-reuse.c \
 	symlink.c \
+	sync-cancel.c \
 	teardowns.c \
 	thread-exit.c \
 	timeout.c \
 	timeout-new.c \
-	timeout-overflow.c \
+	truncate.c \
 	tty-write-dpoll.c \
 	unlink.c \
+	uring_cmd_ublk.c \
+	version.c \
+	waitid.c \
+	wait-timeout.c \
 	wakeup-hang.c \
+	wq-aff.c \
 	xattr.c \
-	skip-cqe.c \
+	# EOL
+
+# Please keep this list sorted alphabetically.
+asan_test_srcs := \
+	xfail_prep_link_timeout_out_of_scope.c \
+	xfail_register_buffers_out_of_scope.c \
 	# EOL
 
 all_targets :=
 include ../Makefile.quiet
 
+ifeq ($(CONFIG_NOLIBC),y)
+	test_srcs += nolibc.c
+endif
+
 ifdef CONFIG_HAVE_STATX
 	test_srcs += statx.c
 else ifdef CONFIG_HAVE_GLIBC_STATX
@@ -192,30 +267,37 @@
 run_test_targets := $(patsubst %,%.run_test,$(test_targets))
 test_targets := $(patsubst %,%.t,$(test_targets))
 all_targets += $(test_targets)
+helpers = helpers.o
 
-#
-# Build ../src/syscall.c manually from test's Makefile to support
-# liburing nolibc.
-#
-# Functions in ../src/syscall.c require libc to work with, if we
-# build liburing without libc, we don't have those functions
-# in liburing.a. So build it manually here.
-#
-helpers = helpers.o ../src/syscall.o
+ifeq ($(CONFIG_USE_SANITIZER),y)
+	asan_test_targets := $(patsubst %.c,%,$(asan_test_srcs))
+	asan_test_targets := $(patsubst %.cc,%,$(asan_test_targets))
+	asan_run_test_targets := $(patsubst %,%.run_test,$(asan_test_targets))
+	asan_test_targets := $(patsubst %,%.t,$(asan_test_targets))
+	all_targets += $(asan_test_targets)
+endif
 
-all: $(test_targets)
-
-../src/syscall.o: ../src/syscall.c
-	$(QUIET_CC)$(CC) $(CPPFLAGS) $(CFLAGS) -o $@ -c $<
+all: $(test_targets) $(asan_test_targets)
 
 helpers.o: helpers.c
 	$(QUIET_CC)$(CC) $(CPPFLAGS) $(CFLAGS) -o $@ -c $<
 
-%.t: %.c $(helpers) helpers.h ../src/liburing.a
+LIBURING := $(shell if [ -e ../src/liburing.a ]; then echo ../src/liburing.a; fi)
+
+%.t: %.c $(helpers) helpers.h $(LIBURING)
 	$(QUIET_CC)$(CC) $(CPPFLAGS) $(CFLAGS) -o $@ $< $(helpers) $(LDFLAGS)
 
-%.t: %.cc $(helpers) helpers.h ../src/liburing.a
-	$(QUIET_CXX)$(CXX) $(CPPFLAGS) $(CXXFLAGS) -o $@ $< $(helpers) $(LDFLAGS)
+#
+# Clang++ is not happy with -Wmissing-prototypes:
+#
+#   cc1plus: warning: command-line option '-Wmissing-prototypes' \
+#   is valid for C/ObjC but not for C++
+#
+%.t: %.cc $(helpers) helpers.h $(LIBURING)
+	$(QUIET_CXX)$(CXX) \
+	$(patsubst -Wmissing-prototypes,,$(CPPFLAGS)) \
+	$(patsubst -Wmissing-prototypes,,$(CXXFLAGS)) \
+	-o $@ $< $(helpers) $(LDFLAGS)
 
 
 install: $(test_targets) runtests.sh runtests-loop.sh
@@ -224,15 +306,18 @@
 	$(INSTALL) -D -m 755 runtests.sh  $(datadir)/liburing-test/
 	$(INSTALL) -D -m 755 runtests-loop.sh  $(datadir)/liburing-test/
 
+uninstall:
+	@rm -rf $(datadir)/liburing-test/
+
 clean:
 	@rm -f $(all_targets) helpers.o output/*
 	@rm -rf output/
 
 runtests: all
-	@./runtests.sh $(test_targets)
+	@./runtests.sh $(test_targets) $(asan_test_targets)
 
 runtests-loop: all
-	@./runtests-loop.sh $(test_targets)
+	@./runtests-loop.sh $(test_targets) $(asan_test_targets)
 
 %.run_test: %.t
 	@./runtests-quiet.sh $<

diff --git a/test/a0908ae19763.c b/test/a0908ae19763.c
index 00cb559..2df5501 100644
--- a/test/a0908ae19763.c
+++ b/test/a0908ae19763.c

@@ -11,15 +11,17 @@
 #include <unistd.h>
 
 #include "liburing.h"
+#include "helpers.h"
 #include "../src/syscall.h"
 
-uint64_t r[1] = {0xffffffffffffffff};
+#ifndef CONFIG_USE_SANITIZER
+static uint64_t r[1] = {0xffffffffffffffff};
 
 int main(int argc, char *argv[])
 {
   if (argc > 1)
-    return 0;
-  mmap((void *) 0x20000000, 0x1000000, 3, 0x32, -1, 0);
+    return T_EXIT_SKIP;
+  mmap((void *) 0x20000000, 0x1000000, 3, MAP_ANON|MAP_PRIVATE, -1, 0);
   intptr_t res = 0;
   *(uint32_t*)0x20000080 = 0;
   *(uint32_t*)0x20000084 = 0;
@@ -54,5 +56,11 @@
     r[0] = res;
   *(uint32_t*)0x20000280 = -1;
   __sys_io_uring_register(r[0], 2, (const void *) 0x20000280, 1);
-  return 0;
+  return T_EXIT_PASS;
 }
+#else
+int main(int argc, char *argv[])
+{
+	return T_EXIT_SKIP;
+}
+#endif

diff --git a/test/a4c0b3decb33.c b/test/a4c0b3decb33.c
index 34b0af2..60cbf74 100644
--- a/test/a4c0b3decb33.c
+++ b/test/a4c0b3decb33.c

@@ -21,8 +21,10 @@
 #include <unistd.h>
 
 #include "liburing.h"
+#include "helpers.h"
 #include "../src/syscall.h"
 
+#ifndef CONFIG_USE_SANITIZER
 static void sleep_ms(uint64_t ms)
 {
 	usleep(ms * 1000);
@@ -94,7 +96,7 @@
 	}
 }
 
-static void setup_test()
+static void setup_test(void)
 {
 	prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
 	setpgrp();
@@ -108,7 +110,7 @@
 static void loop(void)
 {
 	int iter;
-	for (iter = 0; iter < 5000; iter++) {
+	for (iter = 0; iter < 50; iter++) {
 		int pid = fork();
 		if (pid < 0)
 			exit(1);
@@ -172,9 +174,15 @@
 int main(int argc, char *argv[])
 {
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 	signal(SIGINT, sig_int);
-	mmap((void *) 0x20000000, 0x1000000, 3, 0x32, -1, 0);
+	mmap((void *) 0x20000000, 0x1000000, 3, MAP_ANON|MAP_PRIVATE, -1, 0);
 	loop();
-	return 0;
+	return T_EXIT_PASS;
 }
+#else
+int main(int argc, char *argv[])
+{
+	return T_EXIT_SKIP;
+}
+#endif

diff --git a/test/accept-link.c b/test/accept-link.c
index f111275..9fa6cc6 100644
--- a/test/accept-link.c
+++ b/test/accept-link.c

@@ -14,9 +14,10 @@
 #include <arpa/inet.h>
 
 #include "liburing.h"
+#include "helpers.h"
 
-pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
-pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
+static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
 
 static int recv_thread_ready = 0;
 static int recv_thread_done = 0;
@@ -76,7 +77,7 @@
 	return NULL;
 }
 
-void *recv_thread(void *arg)
+static void *recv_thread(void *arg)
 {
 	struct data *data = arg;
 	struct io_uring ring;
@@ -194,7 +195,7 @@
 	if (ret) {
 		fprintf(stderr, "queue_init: %d\n", ret);
 		return 1;
-	};
+	}
 
 	fast_poll = (p.features & IORING_FEAT_FAST_POLL) != 0;
 	io_uring_queue_exit(&ring);
@@ -239,16 +240,16 @@
 int main(int argc, char *argv[])
 {
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 	if (test_accept_timeout(0, 200000000)) {
 		fprintf(stderr, "accept timeout 0 failed\n");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	if (test_accept_timeout(1, 1000000000)) {
 		fprintf(stderr, "accept and connect timeout 0 failed\n");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
-	return 0;
+	return T_EXIT_PASS;
 }

diff --git a/test/accept-non-empty.c b/test/accept-non-empty.c
new file mode 100644
index 0000000..bc3f781
--- /dev/null
+++ b/test/accept-non-empty.c

@@ -0,0 +1,256 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Check that kernels that support it will return IORING_CQE_F_SOCK_NONEMPTY
+ * on accepts requests where more connections are pending.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <assert.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/un.h>
+#include <netinet/tcp.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <pthread.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+static int no_more_accept;
+
+#define MAX_ACCEPTS	8
+
+struct data {
+	pthread_t thread;
+	pthread_barrier_t barrier;
+	pthread_barrier_t conn_barrier;
+	int connects;
+};
+
+static int start_accept_listen(int port_off, int extra_flags)
+{
+	struct sockaddr_in addr;
+	int32_t val = 1;
+	int fd, ret;
+
+	fd = socket(AF_INET, SOCK_STREAM | extra_flags, IPPROTO_TCP);
+
+	ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val));
+	assert(ret != -1);
+	ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val));
+	assert(ret != -1);
+
+	addr.sin_family = AF_INET;
+	addr.sin_port = htons(0x1235 + port_off);
+	addr.sin_addr.s_addr = inet_addr("127.0.0.1");
+
+	ret = bind(fd, (struct sockaddr *) &addr, sizeof(addr));
+	assert(ret != -1);
+	ret = listen(fd, 20000);
+	assert(ret != -1);
+
+	return fd;
+}
+
+static void *connect_fn(void *data)
+{
+	struct sockaddr_in addr = { };
+	struct data *d = data;
+	int i;
+
+	pthread_barrier_wait(&d->barrier);
+
+	addr.sin_family = AF_INET;
+	addr.sin_port = htons(0x1235);
+	addr.sin_addr.s_addr = inet_addr("127.0.0.1");
+
+	for (i = 0; i < d->connects; i++) {
+		int s;
+
+		s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+		if (s < 0) {
+			perror("socket");
+			break;
+		}
+		if (connect(s, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
+			perror("connect");
+			break;
+		}
+	}
+
+	if (i)
+		pthread_barrier_wait(&d->conn_barrier);
+
+	return NULL;
+}
+
+static void setup_thread(struct data *d, int nconns)
+{
+	d->connects = nconns;
+	pthread_barrier_init(&d->barrier, NULL, 2);
+	pthread_barrier_init(&d->conn_barrier, NULL, 2);
+	pthread_create(&d->thread, NULL, connect_fn, d);
+}
+
+static int test_maccept(struct data *d, int flags, int fixed)
+{
+	struct io_uring_params p = { };
+	struct io_uring ring;
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	int err = 0, fd, ret, i, *fds;
+
+	p.flags = flags;
+	ret = io_uring_queue_init_params(8, &ring, &p);
+	if (ret == -EINVAL) {
+		return T_EXIT_SKIP;
+	} else if (ret < 0) {
+		fprintf(stderr, "ring setup failure: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	if (!(p.features & IORING_FEAT_RECVSEND_BUNDLE)) {
+		no_more_accept = 1;
+		return 0;
+	}
+
+	setup_thread(d, MAX_ACCEPTS);
+
+	fds = malloc(MAX_ACCEPTS * sizeof(int));
+	memset(fds, -1, MAX_ACCEPTS * sizeof(int));
+
+	if (fixed) {
+		io_uring_register_ring_fd(&ring);
+
+		ret = io_uring_register_files(&ring, fds, MAX_ACCEPTS);
+		if (ret) {
+			fprintf(stderr, "file reg %d\n", ret);
+			return -1;
+		}
+	}
+
+	fd = start_accept_listen(0, 0);
+
+	pthread_barrier_wait(&d->barrier);
+
+	if (d->connects > 1)
+		pthread_barrier_wait(&d->conn_barrier);
+
+	for (i = 0; i < d->connects; i++) {
+		sqe = io_uring_get_sqe(&ring);
+		if (fixed)
+			io_uring_prep_accept_direct(sqe, fd, NULL, NULL, 0, i);
+		else
+			io_uring_prep_accept(sqe, fd, NULL, NULL, 0);
+
+		ret = io_uring_submit_and_wait(&ring, 1);
+		assert(ret != -1);
+
+		ret = io_uring_wait_cqe(&ring, &cqe);
+		assert(!ret);
+		if (cqe->res < 0) {
+			fprintf(stderr, "res=%d\n", cqe->res);
+			break;
+		}
+		fds[i] = cqe->res;
+		if (d->connects == 1) {
+			if (cqe->flags & IORING_CQE_F_SOCK_NONEMPTY) {
+				fprintf(stderr, "Non-empty sock on single?\n");
+				err = 1;
+				break;
+			}
+		} else {
+			int last = i + 1 == d->connects;
+
+			if (last && cqe->flags & IORING_CQE_F_SOCK_NONEMPTY) {
+				fprintf(stderr, "Non-empty sock on last?\n");
+				err = 1;
+				break;
+			} else if (!last && !(cqe->flags & IORING_CQE_F_SOCK_NONEMPTY)) {
+				fprintf(stderr, "Empty on multi connect?\n");
+				err = 1;
+				break;
+			}
+		}
+		io_uring_cqe_seen(&ring, cqe);
+	}
+
+	close(fd);
+	if (!fixed) {
+		for (i = 0; i < MAX_ACCEPTS; i++)
+			if (fds[i] != -1)
+				close(fds[i]);
+	}
+	free(fds);
+	io_uring_queue_exit(&ring);
+	return err;
+}
+
+static int test(int flags, int fixed)
+{
+	struct data d;
+	void *tret;
+	int ret;
+
+	ret = test_maccept(&d, flags, fixed);
+	if (ret) {
+		fprintf(stderr, "test conns=1 failed\n");
+		return ret;
+	}
+	if (no_more_accept)
+		return T_EXIT_SKIP;
+
+	pthread_join(d.thread, &tret);
+
+	ret = test_maccept(&d, flags, fixed);
+	if (ret) {
+		fprintf(stderr, "test conns=MAX failed\n");
+		return ret;
+	}
+
+	pthread_join(d.thread, &tret);
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	int ret;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	ret = test(0, 0);
+	if (no_more_accept)
+		return T_EXIT_SKIP;
+	if (ret) {
+		fprintf(stderr, "test 0 0 failed\n");
+		return ret;
+	}
+
+	ret = test(IORING_SETUP_SINGLE_ISSUER|IORING_SETUP_DEFER_TASKRUN, 0);
+	if (ret) {
+		fprintf(stderr, "test DEFER 0 failed\n");
+		return ret;
+	}
+
+	ret = test(0, 1);
+	if (ret) {
+		fprintf(stderr, "test 0 1 failed\n");
+		return ret;
+	}
+
+	ret = test(IORING_SETUP_SINGLE_ISSUER|IORING_SETUP_DEFER_TASKRUN, 1);
+	if (ret) {
+		fprintf(stderr, "test DEFER 1 failed\n");
+		return ret;
+	}
+
+	return 0;
+}

diff --git a/test/accept-reuse.c b/test/accept-reuse.c
index c95ac70..716f201 100644
--- a/test/accept-reuse.c
+++ b/test/accept-reuse.c

@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: MIT */
-#include <liburing.h>
 #include <netdb.h>
 #include <string.h>
 #include <sys/socket.h>
@@ -8,24 +7,24 @@
 #include <stdio.h>
 #include <errno.h>
 #include "liburing.h"
+#include "helpers.h"
 #include "../src/syscall.h"
 
-struct io_uring io_uring;
+static struct io_uring io_uring;
 
-int sys_io_uring_enter(const int fd,
-		       const unsigned to_submit,
-		       const unsigned min_complete,
-		       const unsigned flags, sigset_t * const sig)
+static int sys_io_uring_enter(const int fd, const unsigned to_submit,
+			      const unsigned min_complete,
+			      const unsigned flags, sigset_t * const sig)
 {
 	return __sys_io_uring_enter(fd, to_submit, min_complete, flags, sig);
 }
 
-int submit_sqe(void)
+static int submit_sqe(void)
 {
 	struct io_uring_sq *sq = &io_uring.sq;
 	const unsigned tail = *sq->ktail;
 
-	sq->array[tail & *sq->kring_mask] = 0;
+	sq->array[tail & sq->ring_mask] = 0;
 	io_uring_smp_store_release(sq->ktail, tail + 1);
 
 	return sys_io_uring_enter(io_uring.ring_fd, 1, 0, 0, NULL);
@@ -43,17 +42,17 @@
 	int ret, listen_fd, connect_fd, val, i;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
 	memset(&params, 0, sizeof(params));
-	ret = io_uring_queue_init_params(4, &io_uring, &params);
+	ret = t_io_uring_init_sqarray(4, &io_uring, &params);
 	if (ret) {
 		fprintf(stderr, "io_uring_init_failed: %d\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 	if (!(params.features & IORING_FEAT_SUBMIT_STABLE)) {
 		fprintf(stdout, "FEAT_SUBMIT_STABLE not there, skipping\n");
-		return 0;
+		return T_EXIT_SKIP;
 	}
 
 	memset(&hints, 0, sizeof(hints));
@@ -64,7 +63,7 @@
 	ret = getaddrinfo(NULL, "12345", &hints, &addr_info_list);
 	if (ret < 0) {
 		perror("getaddrinfo");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	for (ai = addr_info_list; ai; ai = ai->ai_next) {
@@ -75,7 +74,7 @@
 	}
 	if (!addr_info) {
 		fprintf(stderr, "addrinfo not found\n");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	sqe = &io_uring.sq.sqes[0];
@@ -85,7 +84,7 @@
 			   addr_info->ai_protocol);
 	if (ret < 0) {
 		perror("socket");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 	listen_fd = ret;
 
@@ -96,13 +95,13 @@
 	ret = bind(listen_fd, addr_info->ai_addr, addr_info->ai_addrlen);
 	if (ret < 0) {
 		perror("bind");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	ret = listen(listen_fd, SOMAXCONN);
 	if (ret < 0) {
 		perror("listen");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	memset(&sa, 0, sizeof(sa));
@@ -112,14 +111,14 @@
 	ret = submit_sqe();
 	if (ret != 1) {
 		fprintf(stderr, "submit failed: %d\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	connect_fd = -1;
 	ret = socket(addr_info->ai_family, SOCK_STREAM, addr_info->ai_protocol);
 	if (ret < 0) {
 		perror("socket");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 	connect_fd = ret;
 
@@ -129,7 +128,7 @@
 	ret = submit_sqe();
 	if (ret != 1) {
 		fprintf(stderr, "submit failed: %d\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	for (i = 0; i < 2; i++) {
@@ -138,20 +137,20 @@
 		ret = io_uring_wait_cqe(&io_uring, &cqe);
 		if (ret) {
 			fprintf(stderr, "io_uring_wait_cqe: %d\n", ret);
-			return 1;
+			return T_EXIT_FAIL;
 		}
 
 		switch (cqe->user_data) {
 		case 1:
 			if (cqe->res < 0) {
 				fprintf(stderr, "accept failed: %d\n", cqe->res);
-				return 1;
+				return T_EXIT_FAIL;
 			}
 			break;
 		case 2:
 			if (cqe->res) {
 				fprintf(stderr, "connect failed: %d\n", cqe->res);
-				return 1;
+				return T_EXIT_FAIL;
 			}
 			break;
 		}
@@ -160,5 +159,5 @@
 
 	freeaddrinfo(addr_info_list);
 	io_uring_queue_exit(&io_uring);
-	return 0;
+	return T_EXIT_PASS;
 }

diff --git a/test/accept-test.c b/test/accept-test.c
index 4a904e4..99f6080 100644
--- a/test/accept-test.c
+++ b/test/accept-test.c

@@ -8,6 +8,7 @@
 #include <sys/un.h>
 #include <assert.h>
 #include "liburing.h"
+#include "helpers.h"
 
 int main(int argc, char *argv[])
 {
@@ -23,11 +24,11 @@
 	};
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
 	if (io_uring_queue_init(4, &ring, 0) != 0) {
 		fprintf(stderr, "ring setup failed\n");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	fd = socket(AF_UNIX, SOCK_STREAM, 0);
@@ -45,7 +46,7 @@
 	sqe = io_uring_get_sqe(&ring);
 	if (!sqe) {
 		fprintf(stderr, "get sqe failed\n");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 	io_uring_prep_accept(sqe, fd, (struct sockaddr*)&addr, &addrlen, 0);
 	sqe->user_data = 1;
@@ -53,27 +54,30 @@
 	ret = io_uring_submit(&ring);
 	if (ret != 1) {
 		fprintf(stderr, "Got submit %d, expected 1\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	ret = io_uring_wait_cqe_timeout(&ring, &cqe, &ts);
 	if (!ret) {
 		if (cqe->res == -EBADF || cqe->res == -EINVAL) {
 			fprintf(stdout, "Accept not supported, skipping\n");
-			goto out;
+			goto skip;
 		} else if (cqe->res < 0) {
 			fprintf(stderr, "cqe error %d\n", cqe->res);
 			goto err;
 		}
 	} else if (ret != -ETIME) {
 		fprintf(stderr, "accept() failed to use addr & addrlen parameters!\n");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
-out:
 	io_uring_queue_exit(&ring);
-	return 0;
+	return T_EXIT_PASS;
+
+skip:
+	io_uring_queue_exit(&ring);
+	return T_EXIT_SKIP;
 err:
 	io_uring_queue_exit(&ring);
-	return 1;
+	return T_EXIT_FAIL;
 }

diff --git a/test/accept.c b/test/accept.c
index 8078ccb..04d1e1b 100644
--- a/test/accept.c
+++ b/test/accept.c

@@ -24,6 +24,9 @@
 #include "liburing.h"
 
 #define MAX_FDS 32
+#define NOP_USER_DATA (1LLU << 50)
+#define INITIAL_USER_DATA 1000
+
 static int no_accept;
 static int no_accept_multi;
 
@@ -39,6 +42,7 @@
 	bool queue_accept_before_connect;
 	bool multishot;
 	int extra_loops;
+	bool overflow;
 };
 
 static void close_fds(int fds[], int nr)
@@ -56,7 +60,7 @@
 	close_fds(c_fd, nr);
 }
 
-static void queue_send(struct io_uring *ring, int fd)
+static void *queue_send(struct io_uring *ring, int fd)
 {
 	struct io_uring_sqe *sqe;
 	struct data *d;
@@ -68,9 +72,11 @@
 	sqe = io_uring_get_sqe(ring);
 	io_uring_prep_writev(sqe, fd, &d->iov, 1, 0);
 	sqe->user_data = 1;
+
+	return d;
 }
 
-static void queue_recv(struct io_uring *ring, int fd, bool fixed)
+static void *queue_recv(struct io_uring *ring, int fd, bool fixed)
 {
 	struct io_uring_sqe *sqe;
 	struct data *d;
@@ -84,6 +90,26 @@
 	sqe->user_data = 2;
 	if (fixed)
 		sqe->flags |= IOSQE_FIXED_FILE;
+
+	return d;
+}
+
+static void queue_accept_multishot(struct io_uring *ring, int fd,
+				   int idx, bool fixed)
+{
+	struct io_uring_sqe *sqe = io_uring_get_sqe(ring);
+	int ret;
+
+	if (fixed)
+		io_uring_prep_multishot_accept_direct(sqe, fd,
+						NULL, NULL,
+						0);
+	else
+		io_uring_prep_multishot_accept(sqe, fd, NULL, NULL, 0);
+
+	io_uring_sqe_set_data64(sqe, idx);
+	ret = io_uring_submit(ring);
+	assert(ret != -1);
 }
 
 static void queue_accept_conn(struct io_uring *ring, int fd,
@@ -93,40 +119,51 @@
 	int ret;
 	int fixed_idx = args.fixed ? 0 : -1;
 	int count = 1 + args.extra_loops;
-	bool multishot = args.multishot;
+
+	if (args.multishot) {
+		queue_accept_multishot(ring, fd, INITIAL_USER_DATA, args.fixed);
+		return;
+	}
 
 	while (count--) {
 		sqe = io_uring_get_sqe(ring);
 		if (fixed_idx < 0) {
-			if (!multishot)
-				io_uring_prep_accept(sqe, fd, NULL, NULL, 0);
-			else
-				io_uring_prep_multishot_accept(sqe, fd, NULL,
-							       NULL, 0);
+			io_uring_prep_accept(sqe, fd, NULL, NULL, 0);
 		} else {
-			if (!multishot)
-				io_uring_prep_accept_direct(sqe, fd, NULL, NULL,
-							    0, fixed_idx);
-			else
-				io_uring_prep_multishot_accept_direct(sqe, fd,
-								      NULL, NULL,
-								      0);
+			io_uring_prep_accept_direct(sqe, fd, NULL, NULL,
+						    0, fixed_idx);
 		}
-
 		ret = io_uring_submit(ring);
 		assert(ret != -1);
 	}
 }
 
-static int accept_conn(struct io_uring *ring, int fixed_idx, bool multishot)
+static int accept_conn(struct io_uring *ring, int fixed_idx, int *multishot, int fd)
 {
-	struct io_uring_cqe *cqe;
+	struct io_uring_cqe *pcqe;
+	struct io_uring_cqe cqe;
 	int ret;
 
-	ret = io_uring_wait_cqe(ring, &cqe);
-	assert(!ret);
-	ret = cqe->res;
-	io_uring_cqe_seen(ring, cqe);
+	do {
+		ret = io_uring_wait_cqe(ring, &pcqe);
+		assert(!ret);
+		cqe = *pcqe;
+		io_uring_cqe_seen(ring, pcqe);
+	} while (cqe.user_data == NOP_USER_DATA);
+
+	if (*multishot) {
+		if (!(cqe.flags & IORING_CQE_F_MORE)) {
+			(*multishot)++;
+			queue_accept_multishot(ring, fd, *multishot, fixed_idx == 0);
+		} else {
+			if (cqe.user_data != *multishot) {
+				fprintf(stderr, "received multishot after told done!\n");
+				return -ECANCELED;
+			}
+		}
+	}
+
+	ret = cqe.res;
 
 	if (fixed_idx >= 0) {
 		if (ret > 0) {
@@ -161,11 +198,9 @@
 		addr = &laddr;
 
 	addr->sin_family = AF_INET;
-	addr->sin_port = htons(0x1235 + port_off);
 	addr->sin_addr.s_addr = inet_addr("127.0.0.1");
-
-	ret = bind(fd, (struct sockaddr*)addr, sizeof(*addr));
-	assert(ret != -1);
+	ret = t_bind_ephemeral_port(fd, addr);
+	assert(!ret);
 	ret = listen(fd, 128);
 	assert(ret != -1);
 
@@ -203,6 +238,32 @@
 	return fd;
 }
 
+static void cause_overflow(struct io_uring *ring)
+{
+	int i, ret;
+
+	for (i = 0; i < ring->cq.ring_entries; i++) {
+		struct io_uring_sqe *sqe = io_uring_get_sqe(ring);
+
+		io_uring_prep_nop(sqe);
+		io_uring_sqe_set_data64(sqe, NOP_USER_DATA);
+		ret = io_uring_submit(ring);
+		assert(ret != -1);
+	}
+
+}
+
+static void clear_overflow(struct io_uring *ring)
+{
+	struct io_uring_cqe *cqe;
+
+	while (!io_uring_peek_cqe(ring, &cqe)) {
+		if (cqe->user_data != NOP_USER_DATA)
+			break;
+		io_uring_cqe_seen(ring, cqe);
+	}
+}
+
 static int test_loop(struct io_uring *ring,
 		     struct accept_test_args args,
 		     int recv_s0,
@@ -215,15 +276,25 @@
 	bool multishot = args.multishot;
 	uint32_t multishot_mask = 0;
 	int nr_fds = multishot ? MAX_FDS : 1;
+	int multishot_idx = multishot ? INITIAL_USER_DATA : 0;
+	int err_ret = T_EXIT_FAIL;
+	void* send_d = 0;
+	void* recv_d = 0;
 
-	for (i = 0; i < nr_fds; i++)
+	if (args.overflow)
+		cause_overflow(ring);
+
+	for (i = 0; i < nr_fds; i++) {
 		c_fd[i] = set_client_fd(addr);
+		if (args.overflow && i == nr_fds / 2)
+			clear_overflow(ring);
+	}
 
 	if (!args.queue_accept_before_connect)
 		queue_accept_conn(ring, recv_s0, args);
 
 	for (i = 0; i < nr_fds; i++) {
-		s_fd[i] = accept_conn(ring, fixed ? 0 : -1, multishot);
+		s_fd[i] = accept_conn(ring, fixed ? 0 : -1, &multishot_idx, recv_s0);
 		if (s_fd[i] == -EINVAL) {
 			if (args.accept_should_error)
 				goto out;
@@ -235,6 +306,7 @@
 				no_accept_multi = 1;
 			else
 				no_accept = 1;
+			ret = T_EXIT_SKIP;
 			goto out;
 		} else if (s_fd[i] < 0) {
 			if (args.accept_should_error &&
@@ -245,6 +317,9 @@
 				multishot ? "Multishot" : "",
 				i, s_fd[i]);
 			goto err;
+		} else if (s_fd[i] == 195 && args.overflow) {
+			fprintf(stderr, "Broken overflow handling\n");
+			goto err;
 		}
 
 		if (multishot && fixed) {
@@ -271,8 +346,8 @@
 		goto out;
 	}
 
-	queue_send(ring, c_fd[0]);
-	queue_recv(ring, s_fd[0], fixed);
+	send_d = queue_send(ring, c_fd[0]);
+	recv_d = queue_recv(ring, s_fd[0], fixed);
 
 	ret = io_uring_submit_and_wait(ring, 2);
 	assert(ret != -1);
@@ -296,11 +371,15 @@
 	}
 
 out:
+	free(send_d);
+	free(recv_d);
 	close_sock_fds(s_fd, c_fd, nr_fds, fixed);
-	return 0;
+	return T_EXIT_PASS;
 err:
+	free(send_d);
+	free(recv_d);
 	close_sock_fds(s_fd, c_fd, nr_fds, fixed);
-	return 1;
+	return err_ret;
 }
 
 static int test(struct io_uring *ring, struct accept_test_args args)
@@ -309,7 +388,7 @@
 	int ret = 0;
 	int loop;
 	int32_t recv_s0 = start_accept_listen(&addr, 0,
-					      args.nonblock ? O_NONBLOCK : 0);
+					      args.nonblock ? SOCK_NONBLOCK : 0);
 	if (args.queue_accept_before_connect)
 		queue_accept_conn(ring, recv_s0, args);
 	for (loop = 0; loop < 1 + args.extra_loops; loop++) {
@@ -362,7 +441,7 @@
 };
 
 /*
- * Test issue many accepts and see if we handle cancellation on exit
+ * Test issue many accepts and see if we handle cancelation on exit
  */
 static int test_accept_many(struct test_accept_many_args args)
 {
@@ -395,7 +474,7 @@
 
 	for (i = 0; i < nr_socks; i++)
 		fds[i] = start_accept_listen(NULL, i,
-					     args.nonblock ? O_NONBLOCK : 0);
+					     args.nonblock ? SOCK_NONBLOCK : 0);
 
 	for (i = 0; i < nr; i++) {
 		int sock_idx = args.single_sock ? 0 : i;
@@ -417,7 +496,7 @@
 		if (io_uring_peek_cqe(&m_io_uring, &cqe))
 			break;
 		if (cqe->res != -ECANCELED) {
-			fprintf(stderr, "Expected cqe to be cancelled %d\n", cqe->res);
+			fprintf(stderr, "Expected cqe to be canceled %d\n", cqe->res);
 			ret = 1;
 			goto out;
 		}
@@ -444,7 +523,7 @@
 	int fd, i, ret;
 
 	if (multishot && no_accept_multi)
-		return 0;
+		return T_EXIT_SKIP;
 
 	ret = io_uring_queue_init(32, &m_io_uring, 0);
 	assert(ret >= 0);
@@ -489,6 +568,9 @@
 			fprintf(stderr, "unexpected 0 user data\n");
 			goto err;
 		} else if (cqe->user_data <= nr) {
+			/* no multishot */
+			if (cqe->res == -EINVAL)
+				return T_EXIT_SKIP;
 			if (cqe->res != -EINTR && cqe->res != -ECANCELED) {
 				fprintf(stderr, "Cancelled accept got %d\n", cqe->res);
 				goto err;
@@ -527,18 +609,19 @@
 	return ret;
 }
 
-static int test_multishot_accept(int count, bool before)
+static int test_multishot_accept(int count, bool before, bool overflow)
 {
 	struct io_uring m_io_uring;
 	int ret;
 	struct accept_test_args args = {
 		.queue_accept_before_connect = before,
 		.multishot = true,
-		.extra_loops = count - 1
+		.extra_loops = count - 1,
+		.overflow = overflow
 	};
 
 	if (no_accept_multi)
-		return 0;
+		return T_EXIT_SKIP;
 
 	ret = io_uring_queue_init(MAX_FDS + 10, &m_io_uring, 0);
 	assert(ret >= 0);
@@ -547,7 +630,7 @@
 	return ret;
 }
 
-static int test_accept_multishot_wrong_arg()
+static int test_accept_multishot_wrong_arg(void)
 {
 	struct io_uring m_io_uring;
 	struct io_uring_cqe *cqe;
@@ -612,7 +695,12 @@
 	ret = io_uring_queue_init(32, &m_io_uring, 0);
 	assert(ret >= 0);
 	ret = io_uring_register_files(&m_io_uring, &fd, 1);
-	assert(ret == 0);
+	if (ret) {
+		/* kernel doesn't support sparse registered files, skip */
+		if (ret == -EBADF || ret == -EINVAL)
+			return T_EXIT_SKIP;
+		return T_EXIT_FAIL;
+	}
 	ret = test(&m_io_uring, args);
 	io_uring_queue_exit(&m_io_uring);
 	return ret;
@@ -628,13 +716,18 @@
 	};
 
 	if (no_accept_multi)
-		return 0;
+		return T_EXIT_SKIP;
 
 	memset(fd, -1, sizeof(fd));
 	ret = io_uring_queue_init(MAX_FDS + 10, &m_io_uring, 0);
 	assert(ret >= 0);
 	ret = io_uring_register_files(&m_io_uring, fd, MAX_FDS);
-	assert(ret == 0);
+	if (ret) {
+		/* kernel doesn't support sparse registered files, skip */
+		if (ret == -EBADF || ret == -EINVAL)
+			return T_EXIT_SKIP;
+		return T_EXIT_FAIL;
+	}
 	ret = test(&m_io_uring, args);
 	io_uring_queue_exit(&m_io_uring);
 	return ret;
@@ -668,145 +761,152 @@
 	int ret;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
+
 	ret = test_accept(1, false);
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "test_accept failed\n");
 		return ret;
 	}
 	if (no_accept)
-		return 0;
+		return T_EXIT_SKIP;
 
 	ret = test_accept(2, false);
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "test_accept(2) failed\n");
 		return ret;
 	}
 
 	ret = test_accept(2, true);
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "test_accept(2, true) failed\n");
 		return ret;
 	}
 
 	ret = test_accept_nonblock(false, 1);
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "test_accept_nonblock failed\n");
 		return ret;
 	}
 
 	ret = test_accept_nonblock(true, 1);
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "test_accept_nonblock(before, 1) failed\n");
 		return ret;
 	}
 
 	ret = test_accept_nonblock(true, 3);
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "test_accept_nonblock(before,3) failed\n");
 		return ret;
 	}
 
 	ret = test_accept_fixed();
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "test_accept_fixed failed\n");
 		return ret;
 	}
 
 	ret = test_multishot_fixed_accept();
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "test_multishot_fixed_accept failed\n");
 		return ret;
 	}
 
 	ret = test_accept_multishot_wrong_arg();
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "test_accept_multishot_wrong_arg failed\n");
 		return ret;
 	}
 
 	ret = test_accept_sqpoll();
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "test_accept_sqpoll failed\n");
 		return ret;
 	}
 
 	ret = test_accept_cancel(0, 1, false);
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "test_accept_cancel nodelay failed\n");
 		return ret;
 	}
 
 	ret = test_accept_cancel(10000, 1, false);
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "test_accept_cancel delay failed\n");
 		return ret;
 	}
 
 	ret = test_accept_cancel(0, 4, false);
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "test_accept_cancel nodelay failed\n");
 		return ret;
 	}
 
 	ret = test_accept_cancel(10000, 4, false);
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "test_accept_cancel delay failed\n");
 		return ret;
 	}
 
 	ret = test_accept_cancel(0, 1, true);
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "test_accept_cancel multishot nodelay failed\n");
 		return ret;
 	}
 
 	ret = test_accept_cancel(10000, 1, true);
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "test_accept_cancel multishot delay failed\n");
 		return ret;
 	}
 
 	ret = test_accept_cancel(0, 4, true);
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "test_accept_cancel multishot nodelay failed\n");
 		return ret;
 	}
 
 	ret = test_accept_cancel(10000, 4, true);
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "test_accept_cancel multishot delay failed\n");
 		return ret;
 	}
 
-	ret = test_multishot_accept(1, false);
-	if (ret) {
-		fprintf(stderr, "test_multishot_accept(1, false) failed\n");
+	ret = test_multishot_accept(1, true, true);
+	if (ret == T_EXIT_FAIL) {
+		fprintf(stderr, "test_multishot_accept(1, false, true) failed\n");
 		return ret;
 	}
 
-	ret = test_multishot_accept(1, true);
-	if (ret) {
-		fprintf(stderr, "test_multishot_accept(1, true) failed\n");
+	ret = test_multishot_accept(1, false, false);
+	if (ret == T_EXIT_FAIL) {
+		fprintf(stderr, "test_multishot_accept(1, false, false) failed\n");
+		return ret;
+	}
+
+	ret = test_multishot_accept(1, true, false);
+	if (ret == T_EXIT_FAIL) {
+		fprintf(stderr, "test_multishot_accept(1, true, false) failed\n");
 		return ret;
 	}
 
 	ret = test_accept_many((struct test_accept_many_args) {});
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "test_accept_many failed\n");
 		return ret;
 	}
 
 	ret = test_accept_many((struct test_accept_many_args) {
 				.usecs = 100000 });
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "test_accept_many(sleep) failed\n");
 		return ret;
 	}
 
 	ret = test_accept_many((struct test_accept_many_args) {
 				.nonblock = true });
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "test_accept_many(nonblock) failed\n");
 		return ret;
 	}
@@ -815,15 +915,15 @@
 				.nonblock = true,
 				.single_sock = true,
 				.close_fds = true });
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "test_accept_many(nonblock,close) failed\n");
 		return ret;
 	}
 
 	ret = test_accept_pending_on_exit();
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "test_accept_pending_on_exit failed\n");
 		return ret;
 	}
-	return 0;
+	return T_EXIT_PASS;
 }

diff --git a/test/across-fork.c b/test/across-fork.c
index 009fe52..08dec13 100644
--- a/test/across-fork.c
+++ b/test/across-fork.c

@@ -15,6 +15,7 @@
 #include <unistd.h>
 
 #include "liburing.h"
+#include "helpers.h"
 
 
 struct forktestmem
@@ -141,13 +142,13 @@
 	pid_t p;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
 	shmem = mmap(0, sizeof(struct forktestmem), PROT_READ|PROT_WRITE,
 		   MAP_SHARED | MAP_ANONYMOUS, 0, 0);
 	if (!shmem) {
 		fprintf(stderr, "mmap failed\n");
-		exit(1);
+		exit(T_EXIT_FAIL);
 	}
 
 	pthread_barrierattr_init(&shmem->barrierattr);
@@ -157,12 +158,12 @@
 	ret = io_uring_queue_init(10, &shmem->ring, 0);
 	if (ret < 0) {
 		fprintf(stderr, "queue init failed\n");
-		exit(1);
+		exit(T_EXIT_FAIL);
 	}
 
 	if (mkdtemp(tmpdir) == NULL) {
 		fprintf(stderr, "temp directory creation failed\n");
-		exit(1);
+		exit(T_EXIT_FAIL);
 	}
 
 	shared_fd = open_tempfile(tmpdir, "shared");
@@ -275,9 +276,9 @@
 		goto errcleanup;
 
 	cleanup(tmpdir);
-	exit(0);
+	exit(T_EXIT_PASS);
 
 errcleanup:
 	cleanup(tmpdir);
-	exit(1);
+	exit(T_EXIT_FAIL);
 }

diff --git a/test/b19062a56726.c b/test/b19062a56726.c
index 6a0f686..dba563c 100644
--- a/test/b19062a56726.c
+++ b/test/b19062a56726.c

@@ -11,14 +11,16 @@
 #include <unistd.h>
 
 #include "liburing.h"
+#include "helpers.h"
 #include "../src/syscall.h"
 
+#ifndef CONFIG_USE_SANITIZER
 int main(int argc, char *argv[])
 {
   if (argc > 1)
-    return 0;
+    return T_EXIT_SKIP;
 
-  mmap((void *) 0x20000000, 0x1000000, 3, 0x32, -1, 0);
+  mmap((void *) 0x20000000, 0x1000000, 3, MAP_ANON|MAP_PRIVATE, -1, 0);
 
   *(uint32_t*)0x20000200 = 0;
   *(uint32_t*)0x20000204 = 0;
@@ -49,5 +51,11 @@
   *(uint32_t*)0x2000026c = 0;
   *(uint64_t*)0x20000270 = 0;
   __sys_io_uring_setup(0xc9f, (struct io_uring_params *) 0x20000200);
-  return 0;
+  return T_EXIT_PASS;
 }
+#else
+int main(int argc, char *argv[])
+{
+	return T_EXIT_SKIP;
+}
+#endif

diff --git a/test/b5837bd5311d.c b/test/b5837bd5311d.c
index 57a2b58..0d0b4c0 100644
--- a/test/b5837bd5311d.c
+++ b/test/b5837bd5311d.c

@@ -4,6 +4,7 @@
  */
 #include <stdio.h>
 #include "liburing.h"
+#include "helpers.h"
 
 int main(int argc, char *argv[])
 {
@@ -17,11 +18,11 @@
 	};
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
 	if (io_uring_queue_init(4, &ring, 0) != 0) {
 		fprintf(stderr, "ring setup failed\n");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	/*
@@ -31,14 +32,14 @@
 	sqe = io_uring_get_sqe(&ring);
 	if (!sqe) {
 		fprintf(stderr, "get sqe failed\n");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 	io_uring_prep_timeout(sqe, &ts, (unsigned)-1, 0);
 
 	ret = io_uring_submit(&ring);
 	if (ret != 1) {
 		fprintf(stderr, "Got submit %d, expected 1\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	/*
@@ -50,28 +51,28 @@
 	sqe = io_uring_get_sqe(&ring);
 	if (!sqe) {
 		fprintf(stderr, "get sqe failed\n");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 	io_uring_prep_nop(sqe);
 
 	ret = io_uring_submit_and_wait(&ring, 2);
 	if (ret != 1) {
 		fprintf(stderr, "Got submit %d, expected 1\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	if (io_uring_peek_cqe(&ring, &cqe) != 0) {
 		fprintf(stderr, "Unable to peek cqe!\n");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	io_uring_cqe_seen(&ring, cqe);
 
 	if (io_uring_peek_cqe(&ring, &cqe) != 0) {
 		fprintf(stderr, "Unable to peek cqe!\n");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	io_uring_queue_exit(&ring);
-	return 0;
+	return T_EXIT_PASS;
 }

diff --git a/test/bind-listen.c b/test/bind-listen.c
new file mode 100644
index 0000000..6f80f17
--- /dev/null
+++ b/test/bind-listen.c

@@ -0,0 +1,408 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Configure and operate a TCP socket solely with io_uring.
+ */
+#include <stdio.h>
+#include <string.h>
+#include <liburing.h>
+#include <err.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include <sys/socket.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <netinet/ip.h>
+#include "liburing.h"
+#include "helpers.h"
+
+static void msec_to_ts(struct __kernel_timespec *ts, unsigned int msec)
+{
+        ts->tv_sec = msec / 1000;
+        ts->tv_nsec = (msec % 1000) * 1000000;
+}
+
+static const char *magic = "Hello World!";
+static int use_port = 8000;
+
+enum {
+	SRV_INDEX = 0,
+	CLI_INDEX,
+	CONN_INDEX,
+};
+
+static int connect_client(struct io_uring *ring, unsigned short peer_port)
+{
+	struct __kernel_timespec ts;
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	int head, ret, submitted = 0;
+	struct sockaddr_in peer_addr;
+ 	socklen_t addr_len = sizeof(peer_addr);
+
+	peer_addr.sin_family = AF_INET;
+	peer_addr.sin_port = peer_port;
+	peer_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_socket_direct(sqe, AF_INET, SOCK_STREAM, 0,
+				    CLI_INDEX, 0);
+	sqe->flags |= IOSQE_IO_LINK;
+
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_connect(sqe, CLI_INDEX, (struct sockaddr*) &peer_addr, addr_len);
+	sqe->flags |= IOSQE_FIXED_FILE | IOSQE_IO_LINK;
+
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_send(sqe, CLI_INDEX, magic, strlen(magic), 0);
+	sqe->flags |= IOSQE_FIXED_FILE;
+
+	submitted = ret = io_uring_submit(ring);
+	if (ret < 0)
+		return T_SETUP_SKIP;
+
+	msec_to_ts(&ts, 300);
+	ret = io_uring_wait_cqes(ring, &cqe, submitted, &ts, NULL);
+	if (ret < 0)
+		return T_SETUP_SKIP;
+
+	io_uring_for_each_cqe(ring, head, cqe) {
+		ret = cqe->res;
+		if (ret < 0)
+			return T_SETUP_SKIP;
+	} io_uring_cq_advance(ring, submitted);
+
+	return T_SETUP_OK;
+}
+
+static int setup_srv(struct io_uring *ring, struct sockaddr_in *server_addr)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct __kernel_timespec ts;
+	int ret, val, submitted;
+	unsigned head;
+
+	memset(server_addr, 0, sizeof(struct sockaddr_in));
+	server_addr->sin_family = AF_INET;
+	server_addr->sin_port = htons(use_port++);
+	server_addr->sin_addr.s_addr = htons(INADDR_ANY);
+
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_socket_direct(sqe, AF_INET, SOCK_STREAM, 0, SRV_INDEX, 0);
+	sqe->flags |= IOSQE_IO_LINK;
+
+	sqe = io_uring_get_sqe(ring);
+	val = 1;
+	io_uring_prep_cmd_sock(sqe, SOCKET_URING_OP_SETSOCKOPT, 0, SOL_SOCKET,
+			       SO_REUSEADDR, &val, sizeof(val));
+	sqe->flags |= IOSQE_FIXED_FILE | IOSQE_IO_LINK;
+
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_bind(sqe, SRV_INDEX, (struct sockaddr *) server_addr,
+			   sizeof(struct sockaddr_in));
+	sqe->flags |= IOSQE_FIXED_FILE | IOSQE_IO_LINK;
+
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_listen(sqe, SRV_INDEX, 1);
+	sqe->flags |= IOSQE_FIXED_FILE;
+
+	submitted = ret = io_uring_submit(ring);
+	if (ret < 0) {
+		fprintf(stderr, "submission failed. %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	msec_to_ts(&ts, 300);
+	ret = io_uring_wait_cqes(ring, &cqe, ret, &ts, NULL);
+	if (ret < 0) {
+		fprintf(stderr, "submission failed. %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	io_uring_for_each_cqe(ring, head, cqe) {
+		ret = cqe->res;
+		if (ret < 0) {
+			fprintf(stderr, "Server startup failed. step %d got %d \n", head, ret);
+			return T_EXIT_FAIL;
+		}
+	} io_uring_cq_advance(ring, submitted);
+
+	return T_SETUP_OK;
+}
+
+static int test_good_server(unsigned int ring_flags)
+{
+	struct sockaddr_in server_addr;
+	struct __kernel_timespec ts;
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct io_uring ring;
+	int ret;
+	int fds[3];
+	char buf[1024];
+
+	memset(fds, -1, sizeof(fds));
+
+	ret = t_create_ring(10, &ring, ring_flags | IORING_SETUP_SUBMIT_ALL);
+	if (ret < 0) {
+		fprintf(stderr, "queue_init: %s\n", strerror(-ret));
+		return T_SETUP_SKIP;
+	}
+
+	ret = io_uring_register_files(&ring, fds, 3);
+	if (ret) {
+		fprintf(stderr, "server file register %d\n", ret);
+		return T_SETUP_SKIP;
+	}
+
+	ret = setup_srv(&ring, &server_addr);
+	if (ret != T_SETUP_OK) {
+		fprintf(stderr, "srv startup failed.\n");
+		return T_EXIT_FAIL;
+	}
+
+	if (connect_client(&ring, server_addr.sin_port) != T_SETUP_OK) {
+		fprintf(stderr, "cli startup failed.\n");
+		return T_SETUP_SKIP;
+	}
+
+	/* Wait for a request */
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_accept_direct(sqe, SRV_INDEX, NULL, NULL, 0, CONN_INDEX);
+	sqe->flags |= IOSQE_FIXED_FILE;
+
+	io_uring_submit(&ring);
+	io_uring_wait_cqe(&ring, &cqe);
+	if (cqe->res < 0) {
+		fprintf(stderr, "accept failed. %d\n", cqe->res);
+		return T_EXIT_FAIL;
+	}
+	io_uring_cqe_seen(&ring, cqe);
+
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_recv(sqe, CONN_INDEX, buf, sizeof(buf), 0);
+	sqe->flags |= IOSQE_FIXED_FILE;
+
+	io_uring_submit(&ring);
+	io_uring_wait_cqe_timeout(&ring, &cqe, &ts);
+
+	if (cqe->res < 0) {
+		fprintf(stderr, "bad receive cqe. %d\n", cqe->res);
+		return T_EXIT_FAIL;
+	}
+	ret = cqe->res;
+	io_uring_cqe_seen(&ring, cqe);
+
+	io_uring_queue_exit(&ring);
+
+	if (ret != strlen(magic) || strncmp(buf, magic, ret)) {
+		fprintf(stderr, "didn't receive expected string. Got %d '%s'\n", ret, buf);
+		return T_EXIT_FAIL;
+	}
+
+	return T_EXIT_PASS;
+}
+
+static int test_bad_bind(void)
+{
+	struct sockaddr_in server_addr;
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct io_uring ring;
+	int sock = -1, err;
+	int ret = T_EXIT_FAIL;
+
+	memset(&server_addr, 0, sizeof(struct sockaddr_in));
+	server_addr.sin_family = AF_INET;
+	server_addr.sin_port = htons(9001);
+	server_addr.sin_addr.s_addr = htons(INADDR_ANY);
+
+	err = t_create_ring(1, &ring, 0);
+	if (err < 0) {
+		fprintf(stderr, "queue_init: %s\n", strerror(-ret));
+		return T_SETUP_SKIP;
+	}
+
+	sock = socket(AF_INET, SOCK_STREAM, 0);
+	if (sock < 0) {
+		perror("socket");
+		goto fail;
+	}
+
+	/* Bind with size 0 */
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_bind(sqe, sock, (struct sockaddr *) &server_addr, 0);
+	err = io_uring_submit(&ring);
+	if (err < 0)
+		goto fail;
+
+	err = io_uring_wait_cqe(&ring, &cqe);
+	if (err)
+		goto fail;
+
+	if (cqe->res != -EINVAL)
+		goto fail;
+	io_uring_cqe_seen(&ring, cqe);
+
+	/* Bind with bad fd */
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_bind(sqe, 0, (struct sockaddr *) &server_addr,  sizeof(struct sockaddr_in));
+	err = io_uring_submit(&ring);
+	if (err < 0)
+		goto fail;
+
+	err = io_uring_wait_cqe(&ring, &cqe);
+	if (err)
+		goto fail;
+	if (cqe->res != -ENOTSOCK)
+		goto fail;
+	io_uring_cqe_seen(&ring, cqe);
+
+	ret = T_EXIT_PASS;
+
+	/* bind with weird value */
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_bind(sqe, sock, (struct sockaddr *) &server_addr,  sizeof(struct sockaddr_in));
+	sqe->rw_flags = 1;
+	err = io_uring_submit(&ring);
+	if (err < 0)
+		goto fail;
+
+	err = io_uring_wait_cqe(&ring, &cqe);
+	if (err)
+		goto fail;
+	if (cqe->res != -EINVAL)
+		goto fail;
+	io_uring_cqe_seen(&ring, cqe);
+
+	ret = T_EXIT_PASS;
+
+fail:
+	io_uring_queue_exit(&ring);
+	if (sock != -1)
+		close(sock);
+	return ret;
+}
+
+static int test_bad_listen(void)
+{
+	struct sockaddr_in server_addr;
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct io_uring ring;
+	int sock = -1, err;
+	int ret = T_EXIT_FAIL;
+
+	memset(&server_addr, 0, sizeof(struct sockaddr_in));
+	server_addr.sin_family = AF_INET;
+	server_addr.sin_port = htons(8001);
+	server_addr.sin_addr.s_addr = htons(INADDR_ANY);
+
+	err = t_create_ring(1, &ring, 0);
+	if (err < 0) {
+		fprintf(stderr, "queue_init: %d\n", err);
+		return T_SETUP_SKIP;
+	}
+
+	sock = socket(AF_INET, SOCK_STREAM, 0);
+	if (sock < 0) {
+		perror("socket");
+		goto fail;
+	}
+
+	err = t_bind_ephemeral_port(sock, &server_addr);
+	if (err) {
+		fprintf(stderr, "bind: %s\n", strerror(-err));
+		goto fail;
+	}
+
+	/* listen on bad sock */
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_listen(sqe, 0, 1);
+	err = io_uring_submit(&ring);
+	if (err < 0)
+		goto fail;
+
+	err = io_uring_wait_cqe(&ring, &cqe);
+	if (err)
+		goto fail;
+
+	if (cqe->res != -ENOTSOCK)
+		goto fail;
+	io_uring_cqe_seen(&ring, cqe);
+
+	/* listen with weird parameters */
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_listen(sqe, sock, 1);
+	sqe->addr2 = 0xffffff;
+	err = io_uring_submit(&ring);
+	if (err < 0)
+		goto fail;
+
+	err = io_uring_wait_cqe(&ring, &cqe);
+	if (err)
+		goto fail;
+
+	if (cqe->res != -EINVAL)
+		goto fail;
+	io_uring_cqe_seen(&ring, cqe);
+
+	ret = T_EXIT_PASS;
+fail:
+	io_uring_queue_exit(&ring);
+	if (sock != -1)
+		close(sock);
+	return ret;
+}
+
+int main(int argc, char *argv[])
+{
+	struct io_uring_probe *probe;
+	int ret;
+
+	if (argc > 1)
+		return 0;
+
+	/*
+	 * This test is not supported on older kernels. Check for
+	 * OP_LISTEN, since that is the last feature required to support
+	 * it.
+	 */
+	probe = io_uring_get_probe();
+	if (!probe)
+		return T_EXIT_SKIP;
+	if (!io_uring_opcode_supported(probe, IORING_OP_LISTEN))
+		return T_EXIT_SKIP;
+
+	ret = test_good_server(0);
+	if (ret) {
+		fprintf(stderr, "good 0 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_good_server(IORING_SETUP_SINGLE_ISSUER|IORING_SETUP_DEFER_TASKRUN);
+	if (ret) {
+		fprintf(stderr, "good defer failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_good_server(IORING_SETUP_SQPOLL);
+	if (ret) {
+		fprintf(stderr, "good sqpoll failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_bad_bind();
+	if (ret) {
+		fprintf(stderr, "bad bind failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_bad_listen();
+	if (ret) {
+		fprintf(stderr, "bad listen failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	return T_EXIT_PASS;
+}

diff --git a/test/buf-ring-nommap.c b/test/buf-ring-nommap.c
new file mode 100644
index 0000000..a4396e8
--- /dev/null
+++ b/test/buf-ring-nommap.c

@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: test IOU_PBUF_RING_MMAP with a ring setup with a ring
+ *		setup without mmap'ing sq/cq arrays
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/mman.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+static int bgid = 5;
+static int bid = 89;
+
+int main(int argc, char *argv[])
+{
+	struct io_uring_buf_ring *br;
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct io_uring ring;
+	size_t ring_size;
+	int ret, ring_mask, fds[2];
+	struct io_uring_buf_reg reg = {
+		.ring_entries = 1,
+		.bgid = bgid,
+		.flags = IOU_PBUF_RING_MMAP,
+	};
+	struct io_uring_params p = { };
+	void *ring_mem;
+	char buf[32];
+	off_t off;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	if (posix_memalign(&ring_mem, 16384, 16384))
+		return T_EXIT_FAIL;
+
+	memset(ring_mem, 0, 16384);
+
+	p.flags = IORING_SETUP_NO_MMAP;
+	ret = io_uring_queue_init_mem(1, &ring, &p, ring_mem, 16384);
+	if (ret < 0) {
+		if (ret == -EINVAL || ret == -ENOMEM) {
+			free(ring_mem);
+			return T_EXIT_SKIP;
+		}
+		fprintf(stderr, "queue init failed %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	if (pipe(fds) < 0) {
+		perror("pipe");
+		return T_EXIT_FAIL;
+	}
+
+	ring_size = sizeof(struct io_uring_buf);
+	ring_mask = io_uring_buf_ring_mask(1);
+
+	ret = io_uring_register_buf_ring(&ring, &reg, 0);
+	if (ret) {
+		if (ret == -EINVAL) {
+			free(ring_mem);
+			return T_EXIT_SKIP;
+		}
+		fprintf(stderr, "reg buf ring: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	off = IORING_OFF_PBUF_RING |
+			(unsigned long long) bgid << IORING_OFF_PBUF_SHIFT;
+	br = mmap(NULL, ring_size, PROT_READ | PROT_WRITE,
+				MAP_SHARED | MAP_POPULATE, ring.ring_fd, off);
+	if (br == MAP_FAILED) {
+		if (errno == ENOMEM) {
+			free(ring_mem);
+			return T_EXIT_SKIP;
+		}
+		perror("mmap");
+		return T_EXIT_FAIL;
+	}
+
+	io_uring_buf_ring_add(br, buf, sizeof(buf), bid, ring_mask, 0);
+	io_uring_buf_ring_advance(br, 1);
+
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_read(sqe, fds[0], NULL, 0, 0);
+	sqe->flags |= IOSQE_BUFFER_SELECT;
+	sqe->buf_group = bgid;
+
+	io_uring_submit(&ring);
+
+	ret = write(fds[1], "Hello", 5);
+	if (ret < 0) {
+		perror("write");
+		return T_EXIT_FAIL;
+	} else if (ret != 5) {
+		fprintf(stderr, "short write %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	ret = io_uring_wait_cqe(&ring, &cqe);
+	if (ret) {
+		fprintf(stderr, "wait %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+	if (cqe->res < 0) {
+		fprintf(stderr, "cqe res %d\n", cqe->res);
+		return T_EXIT_FAIL;
+	}
+	if (!(cqe->flags & IORING_CQE_F_BUFFER)) {
+		fprintf(stderr, "buffer not selected in cqe\n");
+		return T_EXIT_FAIL;
+	}
+	if ((cqe->flags >> IORING_CQE_BUFFER_SHIFT) != bid) {
+		fprintf(stderr, "wrong buffer id returned\n");
+		return T_EXIT_FAIL;
+	}
+
+	io_uring_cqe_seen(&ring, cqe);
+
+	io_uring_queue_exit(&ring);
+	free(ring_mem);
+	return T_EXIT_PASS;
+}

diff --git a/test/buf-ring-put.c b/test/buf-ring-put.c
new file mode 100644
index 0000000..9480c73
--- /dev/null
+++ b/test/buf-ring-put.c

@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: test persistence of mmap'ed provided ring buffers. Use a range
+ *		of buffer group IDs that puts us into both the lower end array
+ *		and higher end xarry.
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/mman.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+#define BGID_START	60
+#define BGID_NR		10
+#define ENTRIES		512
+
+int main(int argc, char *argv[])
+{
+	struct io_uring_buf_ring *br[BGID_NR];
+	struct io_uring ring;
+	size_t ring_size;
+	int ret, i, j;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	ret = io_uring_queue_init(1, &ring, 0);
+	if (ret) {
+		fprintf(stderr, "queue init failed %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	ring_size = ENTRIES * sizeof(struct io_uring_buf);
+
+	for (i = 0; i < BGID_NR; i++) {
+		int bgid = BGID_START + i;
+		struct io_uring_buf_reg reg = {
+			.ring_entries = ENTRIES,
+			.bgid = bgid,
+			.flags = IOU_PBUF_RING_MMAP,
+		};
+		off_t off;
+
+		ret = io_uring_register_buf_ring(&ring, &reg, 0);
+		if (ret) {
+			if (ret == -EINVAL)
+				return T_EXIT_SKIP;
+			fprintf(stderr, "reg buf ring: %d\n", ret);
+			return T_EXIT_FAIL;
+		}
+
+		off = IORING_OFF_PBUF_RING |
+			(unsigned long long) bgid << IORING_OFF_PBUF_SHIFT;
+		br[i] = mmap(NULL, ring_size, PROT_READ | PROT_WRITE,
+				MAP_SHARED | MAP_POPULATE, ring.ring_fd, off);
+		if (br[i] == MAP_FAILED) {
+			perror("mmap");
+			return T_EXIT_FAIL;
+		}
+	}
+
+	for (i = 0; i < BGID_NR; i++) {
+		ret = io_uring_unregister_buf_ring(&ring, BGID_START + i);
+		if (ret) {
+			fprintf(stderr, "reg buf ring: %d\n", ret);
+			return T_EXIT_FAIL;
+		}
+	}
+
+	for (j = 0; j < 1000; j++) {
+		for (i = 0; i < BGID_NR; i++)
+			memset(br[i], 0x5a, ring_size);
+		usleep(1000);
+	}
+
+	io_uring_queue_exit(&ring);
+	return T_EXIT_PASS;
+}

diff --git a/test/buf-ring.c b/test/buf-ring.c
index 3d12ef6..924b5df 100644
--- a/test/buf-ring.c
+++ b/test/buf-ring.c

@@ -9,20 +9,22 @@
 #include <stdlib.h>
 #include <string.h>
 #include <fcntl.h>
+#include <sys/mman.h>
 
 #include "liburing.h"
 #include "helpers.h"
 
 static int no_buf_ring;
+static int pagesize;
 
 /* test trying to register classic group when ring group exists */
 static int test_mixed_reg2(int bgid)
 {
-	struct io_uring_buf_reg reg = { };
+	struct io_uring_buf_ring *br;
 	struct io_uring_sqe *sqe;
 	struct io_uring_cqe *cqe;
 	struct io_uring ring;
-	void *ptr, *bufs;
+	void *bufs;
 	int ret;
 
 	ret = t_create_ring(1, &ring, 0);
@@ -31,15 +33,8 @@
 	else if (ret != T_SETUP_OK)
 		return 1;
 
-	if (posix_memalign(&ptr, 4096, 4096))
-		return 1;
-
-	reg.ring_addr = (unsigned long) ptr;
-	reg.ring_entries = 32;
-	reg.bgid = bgid;
-
-	ret = io_uring_register_buf_ring(&ring, &reg, 0);
-	if (ret) {
+	br = io_uring_setup_buf_ring(&ring, 32, bgid, 0, &ret);
+	if (!br) {
 		fprintf(stderr, "Buffer ring register failed %d\n", ret);
 		return 1;
 	}
@@ -60,18 +55,20 @@
 	}
 	io_uring_cqe_seen(&ring, cqe);
 
+	io_uring_free_buf_ring(&ring, br, 32, bgid);
 	io_uring_queue_exit(&ring);
+	free(bufs);
 	return 0;
 }
 
 /* test trying to register ring group when  classic group exists */
 static int test_mixed_reg(int bgid)
 {
-	struct io_uring_buf_reg reg = { };
+	struct io_uring_buf_ring *br;
 	struct io_uring_sqe *sqe;
 	struct io_uring_cqe *cqe;
 	struct io_uring ring;
-	void *ptr, *bufs;
+	void *bufs;
 	int ret;
 
 	ret = t_create_ring(1, &ring, 0);
@@ -96,28 +93,22 @@
 	}
 	io_uring_cqe_seen(&ring, cqe);
 
-	if (posix_memalign(&ptr, 4096, 4096))
-		return 1;
-
-	reg.ring_addr = (unsigned long) ptr;
-	reg.ring_entries = 32;
-	reg.bgid = bgid;
-
-	ret = io_uring_register_buf_ring(&ring, &reg, 0);
-	if (ret != -EEXIST) {
-		fprintf(stderr, "Buffer ring register failed %d\n", ret);
+	br = io_uring_setup_buf_ring(&ring, 32, bgid, 0, &ret);
+	if (br) {
+		fprintf(stderr, "Buffer ring setup succeeded unexpectedly %d\n", ret);
 		return 1;
 	}
 
 	io_uring_queue_exit(&ring);
+	free(bufs);
 	return 0;
 }
 
 static int test_double_reg_unreg(int bgid)
 {
 	struct io_uring_buf_reg reg = { };
+	struct io_uring_buf_ring *br;
 	struct io_uring ring;
-	void *ptr;
 	int ret;
 
 	ret = t_create_ring(1, &ring, 0);
@@ -126,21 +117,14 @@
 	else if (ret != T_SETUP_OK)
 		return 1;
 
-	if (posix_memalign(&ptr, 4096, 4096))
-		return 1;
-
-	reg.ring_addr = (unsigned long) ptr;
-	reg.ring_entries = 32;
-	reg.bgid = bgid;
-
-	ret = io_uring_register_buf_ring(&ring, &reg, 0);
-	if (ret) {
+	br = io_uring_setup_buf_ring(&ring, 32, bgid, 0, &ret);
+	if (!br) {
 		fprintf(stderr, "Buffer ring register failed %d\n", ret);
 		return 1;
 	}
 
 	/* check that 2nd register with same bgid fails */
-	reg.ring_addr = (unsigned long) ptr;
+	reg.ring_addr = (unsigned long) br;
 	reg.ring_entries = 32;
 	reg.bgid = bgid;
 
@@ -150,7 +134,7 @@
 		return 1;
 	}
 
-	ret = io_uring_unregister_buf_ring(&ring, bgid);
+	ret = io_uring_free_buf_ring(&ring, br, 32, bgid);
 	if (ret) {
 		fprintf(stderr, "Buffer ring register failed %d\n", ret);
 		return 1;
@@ -168,9 +152,8 @@
 
 static int test_reg_unreg(int bgid)
 {
-	struct io_uring_buf_reg reg = { };
+	struct io_uring_buf_ring *br;
 	struct io_uring ring;
-	void *ptr;
 	int ret;
 
 	ret = t_create_ring(1, &ring, 0);
@@ -179,15 +162,8 @@
 	else if (ret != T_SETUP_OK)
 		return 1;
 
-	if (posix_memalign(&ptr, 4096, 4096))
-		return 1;
-
-	reg.ring_addr = (unsigned long) ptr;
-	reg.ring_entries = 32;
-	reg.bgid = bgid;
-
-	ret = io_uring_register_buf_ring(&ring, &reg, 0);
-	if (ret) {
+	br = io_uring_setup_buf_ring(&ring, 32, bgid, 0, &ret);
+	if (!br) {
 		if (ret == -EINVAL) {
 			no_buf_ring = 1;
 			return 0;
@@ -196,9 +172,9 @@
 		return 1;
 	}
 
-	ret = io_uring_unregister_buf_ring(&ring, bgid);
+	ret = io_uring_free_buf_ring(&ring, br, 32, bgid);
 	if (ret) {
-		fprintf(stderr, "Buffer ring register failed %d\n", ret);
+		fprintf(stderr, "Buffer ring unregister failed %d\n", ret);
 		return 1;
 	}
 
@@ -206,6 +182,78 @@
 	return 0;
 }
 
+static int test_bad_reg(int bgid)
+{
+	struct io_uring ring;
+	int ret;
+	struct io_uring_buf_reg reg = { };
+
+	ret = t_create_ring(1, &ring, 0);
+	if (ret == T_SETUP_SKIP)
+		return 0;
+	else if (ret != T_SETUP_OK)
+		return 1;
+
+	reg.ring_addr = 4096;
+	reg.ring_entries = 32;
+	reg.bgid = bgid;
+
+	ret = io_uring_register_buf_ring(&ring, &reg, 0);
+	if (!ret)
+		fprintf(stderr, "Buffer ring register worked unexpectedly\n");
+
+	io_uring_queue_exit(&ring);
+	return !ret;
+}
+
+static int test_full_page_reg(int bgid)
+{
+#if defined(__hppa__)
+	return T_EXIT_SKIP;
+#else
+	struct io_uring ring;
+	int ret;
+	void *ptr;
+	struct io_uring_buf_reg reg = { };
+	int entries = pagesize / sizeof(struct io_uring_buf);
+
+	ret = io_uring_queue_init(1, &ring, 0);
+	if (ret) {
+		fprintf(stderr, "queue init failed %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	ret = posix_memalign(&ptr, pagesize, pagesize * 2);
+	if (ret) {
+		fprintf(stderr, "posix_memalign failed %d\n", ret);
+		goto err;
+	}
+
+	ret = mprotect(ptr + pagesize, pagesize, PROT_NONE);
+	if (ret) {
+		fprintf(stderr, "mprotect failed %d\n", errno);
+		goto err1;
+	}
+
+	reg.ring_addr = (unsigned long) ptr;
+	reg.ring_entries = entries;
+	reg.bgid = bgid;
+
+	ret = io_uring_register_buf_ring(&ring, &reg, 0);
+	if (ret)
+		fprintf(stderr, "register buf ring failed %d\n", ret);
+
+	if (mprotect(ptr + pagesize, pagesize, PROT_READ | PROT_WRITE))
+		fprintf(stderr, "reverting mprotect failed %d\n", errno);
+
+err1:
+	free(ptr);
+err:
+	io_uring_queue_exit(&ring);
+	return ret ? T_EXIT_FAIL : T_EXIT_PASS;
+#endif
+}
+
 static int test_one_read(int fd, int bgid, struct io_uring *ring)
 {
 	int ret;
@@ -246,51 +294,63 @@
 	return cqe->flags >> 16;
 }
 
-static int test_running(int bgid, int entries, int loops)
+static int test_running(int bgid, int entries, int loops, int use_mmap)
 {
-	struct io_uring_buf_reg reg = { };
-	struct io_uring ring;
-	void *ptr;
-	char buffer[8];
-	int ret;
-	int ring_size = (entries * sizeof(struct io_uring_buf) + 4095) & (~4095);
 	int ring_mask = io_uring_buf_ring_mask(entries);
-
-	int loop, idx;
-	bool *buffers;
 	struct io_uring_buf_ring *br;
-	int read_fd;
+	int ret, loop, idx, read_fd;
+	struct io_uring ring;
+	char buffer[8];
+	bool *buffers;
 
 	ret = t_create_ring(1, &ring, 0);
 	if (ret == T_SETUP_SKIP)
-		return 0;
+		return T_EXIT_SKIP;
 	else if (ret != T_SETUP_OK)
-		return 1;
+		return T_EXIT_FAIL;
 
-	if (posix_memalign(&ptr, 4096, ring_size))
-		return 1;
+	if (!use_mmap) {
+		br = io_uring_setup_buf_ring(&ring, entries, bgid, 0, &ret);
+		if (!br) {
+			/* by now should have checked if this is supported or not */
+			fprintf(stderr, "Buffer ring register failed %d\n", ret);
+			return T_EXIT_FAIL;
+		}
+	} else {
+		struct io_uring_buf_reg reg = {
+			.ring_entries = entries,
+			.bgid = bgid,
+			.flags = IOU_PBUF_RING_MMAP,
+		};
+		size_t ring_size;
+		off_t off;
 
-	br = (struct io_uring_buf_ring *)ptr;
-	io_uring_buf_ring_init(br);
+		ret = io_uring_register_buf_ring(&ring, &reg, 0);
+		if (ret) {
+			if (ret == -EINVAL)
+				return T_EXIT_SKIP;
+			fprintf(stderr, "mmap ring register failed %d\n", ret);
+			return T_EXIT_FAIL;
+		}
+
+		off = IORING_OFF_PBUF_RING |
+			(unsigned long long) bgid << IORING_OFF_PBUF_SHIFT;
+		ring_size = sizeof(struct io_uring_buf) * entries;
+		br = mmap(NULL, ring_size, PROT_READ | PROT_WRITE,
+				MAP_SHARED | MAP_POPULATE, ring.ring_fd, off);
+		if (br == MAP_FAILED) {
+			perror("mmap");
+			return T_EXIT_FAIL;
+		}
+	}
 
 	buffers = malloc(sizeof(bool) * entries);
 	if (!buffers)
-		return 1;
+		return T_EXIT_SKIP;
 
 	read_fd = open("/dev/zero", O_RDONLY);
 	if (read_fd < 0)
-		return 1;
-
-	reg.ring_addr = (unsigned long) ptr;
-	reg.ring_entries = entries;
-	reg.bgid = bgid;
-
-	ret = io_uring_register_buf_ring(&ring, &reg, 0);
-	if (ret) {
-		/* by now should have checked if this is supported or not */
-		fprintf(stderr, "Buffer ring register failed %d\n", ret);
-		return 1;
-	}
+		return T_EXIT_SKIP;
 
 	for (loop = 0; loop < loops; loop++) {
 		memset(buffers, 0, sizeof(bool) * entries);
@@ -303,28 +363,28 @@
 			ret = test_one_read(read_fd, bgid, &ring);
 			if (ret < 0) {
 				fprintf(stderr, "bad run %d/%d = %d\n", loop, idx, ret);
-				return ret;
+				return T_EXIT_FAIL;
 			}
 			if (buffers[ret]) {
 				fprintf(stderr, "reused buffer %d/%d = %d!\n", loop, idx, ret);
-				return 1;
+				return T_EXIT_FAIL;
 			}
 			if (buffer[0] != 0) {
 				fprintf(stderr, "unexpected read %d %d/%d = %d!\n",
 						(int)buffer[0], loop, idx, ret);
-				return 1;
+				return T_EXIT_FAIL;
 			}
 			if (buffer[1] != 1) {
 				fprintf(stderr, "unexpected spilled read %d %d/%d = %d!\n",
 						(int)buffer[1], loop, idx, ret);
-				return 1;
+				return T_EXIT_FAIL;
 			}
 			buffers[ret] = true;
 		}
 		ret = test_one_read(read_fd, bgid, &ring);
 		if (ret != -ENOBUFS) {
 			fprintf(stderr, "expected enobufs run %d = %d\n", loop, ret);
-			return 1;
+			return T_EXIT_FAIL;
 		}
 
 	}
@@ -332,13 +392,13 @@
 	ret = io_uring_unregister_buf_ring(&ring, bgid);
 	if (ret) {
 		fprintf(stderr, "Buffer ring register failed %d\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	close(read_fd);
 	io_uring_queue_exit(&ring);
 	free(buffers);
-	return 0;
+	return T_EXIT_PASS;
 }
 
 int main(int argc, char *argv[])
@@ -348,43 +408,68 @@
 	int ret, i;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
+
+	pagesize = getpagesize();
 
 	for (i = 0; bgids[i] != -1; i++) {
 		ret = test_reg_unreg(bgids[i]);
 		if (ret) {
 			fprintf(stderr, "test_reg_unreg failed\n");
-			return 1;
+			return T_EXIT_FAIL;
 		}
 		if (no_buf_ring)
 			break;
 
+		ret = test_bad_reg(bgids[i]);
+		if (ret) {
+			fprintf(stderr, "test_bad_reg failed\n");
+			return T_EXIT_FAIL;
+		}
+
 		ret = test_double_reg_unreg(bgids[i]);
 		if (ret) {
 			fprintf(stderr, "test_double_reg_unreg failed\n");
-			return 1;
+			return T_EXIT_FAIL;
 		}
 
 		ret = test_mixed_reg(bgids[i]);
 		if (ret) {
 			fprintf(stderr, "test_mixed_reg failed\n");
-			return 1;
+			return T_EXIT_FAIL;
 		}
 
 		ret = test_mixed_reg2(bgids[i]);
 		if (ret) {
 			fprintf(stderr, "test_mixed_reg2 failed\n");
-			return 1;
+			return T_EXIT_FAIL;
+		}
+
+		ret = test_full_page_reg(bgids[i]);
+		if (ret == T_EXIT_FAIL) {
+			fprintf(stderr, "test_full_page_reg failed\n");
+			return T_EXIT_FAIL;
 		}
 	}
 
 	for (i = 0; !no_buf_ring && entries[i] != -1; i++) {
-		ret = test_running(2, entries[i], 3);
+		ret = test_running(2, entries[i], 3, 0);
 		if (ret) {
 			fprintf(stderr, "test_running(%d) failed\n", entries[i]);
-			return 1;
+			return T_EXIT_FAIL;
 		}
 	}
 
-	return 0;
+	for (i = 0; !no_buf_ring && entries[i] != -1; i++) {
+		ret = test_running(2, entries[i], 3, 1);
+		if (ret == T_EXIT_SKIP) {
+			break;
+		} else if (ret != T_EXIT_PASS) {
+			fprintf(stderr, "test_running(%d) mmap failed\n", entries[i]);
+			return T_EXIT_FAIL;
+		}
+	}
+
+
+	return T_EXIT_PASS;
 }

diff --git a/test/ce593a6c480a.c b/test/ce593a6c480a.c
index 47de128..9fa74a9 100644
--- a/test/ce593a6c480a.c
+++ b/test/ce593a6c480a.c

@@ -15,7 +15,7 @@
 
 static int use_sqpoll = 0;
 
-void notify_fd(int fd)
+static void notify_fd(int fd)
 {
 	char buf[8] = {0, 0, 0, 0, 0, 0, 1};
 	int ret;
@@ -25,7 +25,7 @@
 		perror("write");
 }
 
-void *delay_set_fd_from_thread(void *data)
+static void *delay_set_fd_from_thread(void *data)
 {
 	int fd = (intptr_t) data;
 
@@ -46,7 +46,7 @@
 	pthread_t tid;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
 	/* Create an eventfd to be registered with the loop to be
 	 * notified of events being ready
@@ -54,14 +54,14 @@
 	loop_fd = eventfd(0, EFD_CLOEXEC);
 	if (loop_fd == -1) {
 		fprintf(stderr, "eventfd errno=%d\n", errno);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	/* Create an eventfd that can create events */
 	use_fd = other_fd = eventfd(0, EFD_CLOEXEC);
 	if (other_fd == -1) {
 		fprintf(stderr, "eventfd errno=%d\n", errno);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	if (use_sqpoll)
@@ -70,21 +70,21 @@
 	/* Setup the ring with a registered event fd to be notified on events */
 	ret = t_create_ring_params(8, &ring, &p);
 	if (ret == T_SETUP_SKIP)
-		return 0;
+		return T_EXIT_PASS;
 	else if (ret < 0)
 		return ret;
 
 	ret = io_uring_register_eventfd(&ring, loop_fd);
 	if (ret < 0) {
 		fprintf(stderr, "register_eventfd=%d\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	if (use_sqpoll) {
 		ret = io_uring_register_files(&ring, &other_fd, 1);
 		if (ret < 0) {
 			fprintf(stderr, "register_files=%d\n", ret);
-			return 1;
+			return T_EXIT_FAIL;
 		}
 		use_fd = 0;
 	}
@@ -98,7 +98,7 @@
 	ret = io_uring_submit(&ring);
 	if (ret != 1) {
 		fprintf(stderr, "submit=%d\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	/*
@@ -111,13 +111,16 @@
 			(void*) (intptr_t) other_fd);
 
 	/* Wait on the event fd for an event to be ready */
-	ret = read(loop_fd, buf, 8);
+	do {
+		ret = read(loop_fd, buf, 8);
+	} while (ret < 0 && errno == EINTR);
+
 	if (ret < 0) {
 		perror("read");
-		return 1;
+		return T_EXIT_FAIL;
 	} else if (ret != 8) {
 		fprintf(stderr, "Odd-sized eventfd read: %d\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 
@@ -128,9 +131,9 @@
 	}
 	if (cqe->res < 0) {
 		fprintf(stderr, "cqe->res=%d\n", cqe->res);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	io_uring_cqe_seen(&ring, cqe);
-	return 0;
+	return T_EXIT_PASS;
 }

diff --git a/test/close-opath.c b/test/close-opath.c
index f267dad..5199a69 100644
--- a/test/close-opath.c
+++ b/test/close-opath.c

@@ -15,12 +15,13 @@
 
 #include <errno.h>
 #include <fcntl.h>
-#include <liburing.h>
 #include <sys/stat.h>
 #include <stdio.h>
 #include <string.h>
 #include <unistd.h>
 
+#include "liburing.h"
+
 typedef struct
 {
 	const char *const flnames;

diff --git a/test/cmd-discard.c b/test/cmd-discard.c
new file mode 100644
index 0000000..eddb5b7
--- /dev/null
+++ b/test/cmd-discard.c

@@ -0,0 +1,425 @@
+/* SPDX-License-Identifier: MIT */
+
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <linux/fs.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+#define MAX_TEST_LBAS		1024
+
+static const char *filename;
+struct opcode {
+	int op;
+	bool test;
+	bool not_supported;
+};
+
+#define TEST_BLOCK_URING_CMD_MAX		3
+
+static struct opcode opcodes[TEST_BLOCK_URING_CMD_MAX] = {
+	{ .op = BLOCK_URING_CMD_DISCARD, .test = true, },
+	{ .test = false, },
+	{ .test = false, },
+};
+
+static int lba_size;
+static uint64_t bdev_size;
+static uint64_t bdev_size_lbas;
+static char *buffer;
+
+static void prep_blk_cmd(struct io_uring_sqe *sqe, int fd,
+			 uint64_t from, uint64_t len,
+			 int cmd_op)
+{
+	assert(cmd_op == BLOCK_URING_CMD_DISCARD);
+
+	io_uring_prep_cmd_discard(sqe, fd, from, len);
+}
+
+static int queue_cmd_range(struct io_uring *ring, int bdev_fd,
+			   uint64_t from, uint64_t len, int cmd_op)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	int err;
+
+	sqe = io_uring_get_sqe(ring);
+	assert(sqe != NULL);
+	prep_blk_cmd(sqe, bdev_fd, from, len, cmd_op);
+
+	err = io_uring_submit_and_wait(ring, 1);
+	if (err != 1) {
+		fprintf(stderr, "io_uring_submit_and_wait failed %d\n", err);
+		exit(1);
+	}
+
+	err = io_uring_wait_cqe(ring, &cqe);
+	if (err) {
+		fprintf(stderr, "io_uring_wait_cqe failed %d (op %i)\n",
+				err, cmd_op);
+		exit(1);
+	}
+
+	err = cqe->res;
+	io_uring_cqe_seen(ring, cqe);
+	return err;
+}
+
+static int queue_cmd_lba(struct io_uring *ring, int bdev_fd,
+			 uint64_t from, uint64_t nr_lba, int cmd_op)
+{
+	return queue_cmd_range(ring, bdev_fd, from * lba_size,
+				nr_lba * lba_size, cmd_op);
+}
+
+static int queue_discard_lba(struct io_uring *ring, int bdev_fd,
+			     uint64_t from, uint64_t nr_lba)
+{
+	return queue_cmd_lba(ring, bdev_fd, from, nr_lba,
+				BLOCK_URING_CMD_DISCARD);
+}
+
+static int test_parallel(struct io_uring *ring, int fd, int cmd_op)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	int inflight = 0;
+	int max_inflight = 16;
+	int left = 1000;
+	int ret;
+
+	while (left || inflight) {
+		int queued = 0;
+		unsigned head, nr_cqes = 0;
+		int lba_len = 8;
+
+		while (inflight < max_inflight && left) {
+			int off = rand() % (MAX_TEST_LBAS - lba_len);
+			sqe = io_uring_get_sqe(ring);
+			assert(sqe != NULL);
+
+			prep_blk_cmd(sqe, fd, off * lba_size,
+				     lba_len * lba_size, cmd_op);
+			if (rand() & 1)
+				sqe->flags |= IOSQE_ASYNC;
+
+			queued++;
+			left--;
+			inflight++;
+		}
+		if (queued) {
+			ret = io_uring_submit(ring);
+			if (ret != queued) {
+				fprintf(stderr, "io_uring_submit failed %d\n", ret);
+				return T_EXIT_FAIL;
+			}
+		}
+
+		ret = io_uring_wait_cqe(ring, &cqe);
+		if (ret) {
+			fprintf(stderr, "io_uring_wait_cqe failed %d\n", ret);
+			exit(1);
+		}
+
+		io_uring_for_each_cqe(ring, head, cqe) {
+			nr_cqes++;
+			inflight--;
+			if (cqe->res != 0) {
+				fprintf(stderr, "cmd %i failed %i\n", cmd_op,
+						cqe->res);
+				return T_EXIT_FAIL;
+			}
+		}
+		io_uring_cq_advance(ring, nr_cqes);
+	}
+
+	return 0;
+}
+
+static int cmd_issue_verify(struct io_uring *ring, int fd, int lba, int len,
+			    int cmd_op)
+{
+	int verify = (cmd_op != BLOCK_URING_CMD_DISCARD);
+	int ret, i;
+	ssize_t res;
+
+	if (verify) {
+		for (i = 0; i < len; i++) {
+			size_t off = (i + lba) * lba_size;
+
+			res = pwrite(fd, buffer, lba_size, off);
+			if (res == -1) {
+				fprintf(stderr, "pwrite failed\n");
+				return T_EXIT_FAIL;
+			}
+		}
+	}
+
+	ret = queue_cmd_lba(ring, fd, lba, len, cmd_op);
+	if (ret) {
+		if (ret == -EINVAL || ret == -EOPNOTSUPP)
+			return T_EXIT_SKIP;
+
+		fprintf(stderr, "cmd_issue_verify %i fail lba %i len %i  ret %i\n",
+				cmd_op, lba, len, ret);
+		return T_EXIT_FAIL;
+	}
+
+	if (verify) {
+		for (i = 0; i < len; i++) {
+			size_t off = (i + lba) * lba_size;
+
+			res = pread(fd, buffer, lba_size, off);
+			if (res == -1) {
+				fprintf(stderr, "pread failed\n");
+				return T_EXIT_FAIL;
+			}
+			if (!memchr(buffer, 0, lba_size)) {
+				fprintf(stderr, "mem cmp failed, lba %i\n", lba + i);
+				return T_EXIT_FAIL;
+			}
+		}
+	}
+	return 0;
+}
+
+static int basic_cmd_test(struct io_uring *ring, int op)
+{
+	int cmd_op = opcodes[op].op;
+	int ret, fd;
+
+	if (!opcodes[op].test)
+		return T_EXIT_SKIP;
+
+	fd = open(filename, O_DIRECT | O_RDWR | O_EXCL);
+	if (fd < 0) {
+		fprintf(stderr, "open failed %i\n", errno);
+		return T_EXIT_FAIL;
+	}
+
+	ret = cmd_issue_verify(ring, fd, 0, 1, cmd_op);
+	if (ret == T_EXIT_SKIP) {
+		printf("cmd %i not supported, skip\n", cmd_op);
+		opcodes[op].not_supported = 1;
+		close(fd);
+		return T_EXIT_SKIP;
+	} else if (ret) {
+		fprintf(stderr, "cmd %i fail 0 1\n", cmd_op);
+		return T_EXIT_FAIL;
+	}
+
+	ret = cmd_issue_verify(ring, fd, 7, 15, cmd_op);
+	if (ret) {
+		fprintf(stderr, "cmd %i fail 7 15 %i\n", cmd_op, ret);
+		return T_EXIT_FAIL;
+	}
+
+	ret = cmd_issue_verify(ring, fd, 1, MAX_TEST_LBAS - 1, cmd_op);
+	if (ret) {
+		fprintf(stderr, "large cmd %i failed %i\n", cmd_op, ret);
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_parallel(ring, fd, cmd_op);
+	if (ret) {
+		fprintf(stderr, "test_parallel() %i failed %i\n", cmd_op, ret);
+		return T_EXIT_FAIL;
+	}
+
+	close(fd);
+	return 0;
+}
+
+static int test_fail_edge_cases(struct io_uring *ring, int op)
+{
+	int cmd_op = opcodes[op].op;
+	int ret, fd;
+
+	if (!opcodes[op].test)
+		return T_EXIT_SKIP;
+
+	fd = open(filename, O_DIRECT | O_RDWR | O_EXCL);
+	if (fd < 0) {
+		fprintf(stderr, "open failed %i\n", errno);
+		return T_EXIT_FAIL;
+	}
+
+	ret = queue_cmd_lba(ring, fd, bdev_size_lbas, 1, cmd_op);
+	if (ret >= 0) {
+		fprintf(stderr, "cmd %i beyond capacity %i\n",
+				cmd_op, ret);
+		return 1;
+	}
+
+	ret = queue_cmd_lba(ring, fd, bdev_size_lbas - 1, 2, cmd_op);
+	if (ret >= 0) {
+		fprintf(stderr, "cmd %i beyond capacity with overlap %i\n",
+				cmd_op, ret);
+		return 1;
+	}
+
+	ret = queue_cmd_range(ring, fd, (uint64_t)-lba_size, lba_size + 2,
+			      cmd_op);
+	if (ret >= 0) {
+		fprintf(stderr, "cmd %i range overflow %i\n",
+				cmd_op, ret);
+		return 1;
+	}
+
+	ret = queue_cmd_range(ring, fd, lba_size / 2, lba_size, cmd_op);
+	if (ret >= 0) {
+		fprintf(stderr, "cmd %i unaligned offset %i\n",
+				cmd_op, ret);
+		return 1;
+	}
+
+	ret = queue_cmd_range(ring, fd, 0, lba_size / 2, cmd_op);
+	if (ret >= 0) {
+		fprintf(stderr, "cmd %i unaligned size %i\n",
+				cmd_op, ret);
+		return 1;
+	}
+
+	close(fd);
+	return 0;
+}
+
+static int test_rdonly(struct io_uring *ring, int op)
+{
+	int ret, fd;
+	int ro;
+
+	if (!opcodes[op].test)
+		return T_EXIT_SKIP;
+
+	fd = open(filename, O_DIRECT | O_RDONLY | O_EXCL);
+	if (fd < 0) {
+		fprintf(stderr, "open failed %i\n", errno);
+		return T_EXIT_FAIL;
+	}
+
+	ret = queue_discard_lba(ring, fd, 0, 1);
+	if (ret >= 0) {
+		fprintf(stderr, "discarded with O_RDONLY %i\n", ret);
+		return 1;
+	}
+	close(fd);
+
+	fd = open(filename, O_DIRECT | O_RDWR | O_EXCL);
+	if (fd < 0) {
+		fprintf(stderr, "open failed %i\n", errno);
+		return T_EXIT_FAIL;
+	}
+
+	ro = 1;
+	ret = ioctl(fd, BLKROSET, &ro);
+	if (ret) {
+		fprintf(stderr, "BLKROSET 1 failed %i\n", errno);
+		return T_EXIT_FAIL;
+	}
+
+	ret = queue_discard_lba(ring, fd, 0, 1);
+	if (ret >= 0) {
+		fprintf(stderr, "discarded with O_RDONLY %i\n", ret);
+		return 1;
+	}
+
+	ro = 0;
+	ret = ioctl(fd, BLKROSET, &ro);
+	if (ret) {
+		fprintf(stderr, "BLKROSET 0 failed %i\n", errno);
+		return T_EXIT_FAIL;
+	}
+	close(fd);
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	struct io_uring ring;
+	int fd, ret, i, fret;
+	int cmd_op;
+
+	if (argc != 2)
+		return T_EXIT_SKIP;
+	filename = argv[1];
+
+	fd = open(filename, O_DIRECT | O_RDONLY | O_EXCL);
+	if (fd < 0) {
+		fprintf(stderr, "open failed %i\n", errno);
+		return T_EXIT_FAIL;
+	}
+
+	ret = ioctl(fd, BLKGETSIZE64, &bdev_size);
+	if (ret < 0) {
+		fprintf(stderr, "BLKGETSIZE64 failed %i\n", errno);
+		return T_EXIT_FAIL;
+	}
+	ret = ioctl(fd, BLKSSZGET, &lba_size);
+	if (ret < 0) {
+		fprintf(stderr, "BLKSSZGET failed %i\n", errno);
+		return T_EXIT_FAIL;
+	}
+	assert(bdev_size % lba_size == 0);
+	bdev_size_lbas = bdev_size / lba_size;
+	close(fd);
+
+	buffer = aligned_alloc(lba_size, lba_size);
+	if (!buffer) {
+		fprintf(stderr, "aligned_alloc failed\n");
+		return T_EXIT_FAIL;
+	}
+	for (i = 0; i < lba_size; i++)
+		buffer[i] = i ^ 0xA7;
+
+	if (bdev_size_lbas < MAX_TEST_LBAS) {
+		fprintf(stderr, "the device is too small, skip\n");
+		return T_EXIT_SKIP;
+	}
+
+	ret = io_uring_queue_init(16, &ring, 0);
+	if (ret) {
+		fprintf(stderr, "queue init failed: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	fret = T_EXIT_SKIP;
+	for (cmd_op = 0; cmd_op < TEST_BLOCK_URING_CMD_MAX; cmd_op++) {
+		if (!opcodes[cmd_op].test)
+			continue;
+		ret = basic_cmd_test(&ring, cmd_op);
+		if (ret) {
+			if (ret == T_EXIT_SKIP)
+				continue;
+
+			fprintf(stderr, "basic_cmd_test() failed, cmd %i\n",
+					cmd_op);
+			return T_EXIT_FAIL;
+		}
+
+		ret = test_rdonly(&ring, cmd_op);
+		if (ret) {
+			fprintf(stderr, "test_rdonly() failed, cmd %i\n",
+					cmd_op);
+			return T_EXIT_FAIL;
+		}
+
+		ret = test_fail_edge_cases(&ring, cmd_op);
+		if (ret) {
+			fprintf(stderr, "test_fail_edge_cases() failed, cmd %i\n",
+					cmd_op);
+			return T_EXIT_FAIL;
+		}
+		fret = T_EXIT_PASS;
+	}
+
+	io_uring_queue_exit(&ring);
+	free(buffer);
+	return fret;
+}

diff --git a/test/config b/test/config
index 6c0925a..29da086 100644
--- a/test/config
+++ b/test/config

@@ -1,5 +1,9 @@
 # Copy this to config.local, uncomment and define values
 #
+# NOTE: any files or devices added here will be used by tests that take
+# a file or device arguments This includes tests that are destructive with
+# respect to data contents. They may get erased or overwritten as part of tests.
+#
 # Define tests to exclude from running
 # TEST_EXCLUDE=""
 #

diff --git a/test/connect-rep.c b/test/connect-rep.c
new file mode 100644
index 0000000..431df96
--- /dev/null
+++ b/test/connect-rep.c

@@ -0,0 +1,204 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Check that repeated IORING_OP_CONNECT to a socket without a listener keeps
+ * yielding -ECONNREFUSED rather than -ECONNABORTED. Based on a reproducer
+ * from:
+ *
+ * https://github.com/axboe/liburing/issues/828
+ *
+ * and adopted to our usual test cases. Other changes made like looping,
+ * using different ring types, adding a memset() for reuse, etc.
+ *
+ */
+#include <stdio.h>
+#include <netinet/in.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <arpa/inet.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+static unsigned long ud;
+
+static int init_test_server(struct sockaddr_in *serv_addr)
+{
+	socklen_t servaddr_len = sizeof(struct sockaddr_in);
+	int fd;
+
+	/* Init server socket. Bind but don't listen */
+	fd = socket(AF_INET, SOCK_STREAM, 0);
+	if (fd < 0) {
+		perror("socket");
+		return -1;
+	}
+
+	serv_addr->sin_family = AF_INET;
+	serv_addr->sin_addr.s_addr = inet_addr("127.0.0.1");
+
+	if (bind(fd, (struct sockaddr *) serv_addr, servaddr_len) < 0) {
+		perror("bind");
+		return -1;
+	}
+
+	/*
+	 * Get the addresses the socket is bound to because the port is chosen
+	 * by the network stack.
+	 */
+	if (getsockname(fd, (struct sockaddr *)serv_addr, &servaddr_len) < 0) {
+		perror("getsockname");
+		return -1;
+	}
+
+	return fd;
+}
+
+static int init_test_client(void)
+{
+	socklen_t addr_len = sizeof(struct sockaddr_in);
+	struct sockaddr_in client_addr = {};
+	int clientfd;
+
+	clientfd = socket(AF_INET, SOCK_STREAM, 0);
+	if (clientfd < 0) {
+		perror("socket");
+		return -1;
+	}
+
+	client_addr.sin_family = AF_INET;
+	client_addr.sin_addr.s_addr = inet_addr("127.0.0.1");
+
+	if (bind(clientfd, (struct sockaddr *)&client_addr, addr_len) < 0) {
+		perror("bind");
+		close(clientfd);
+		return -1;
+	}
+
+	/*
+	 * Get the addresses the socket is bound to because the port is chosen
+	 * by the network stack.
+	 */
+	 if (getsockname(clientfd, (struct sockaddr *)&client_addr, &addr_len) < 0) {
+		 perror("getsockname");
+		 close(clientfd);
+		 return -1;
+	 }
+
+	 return clientfd;
+}
+
+static int get_completion_and_print(struct io_uring *ring)
+{
+	struct io_uring_cqe *cqe;
+	int ret, res;
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret < 0) {
+		fprintf(stderr, "wait_cqe=%d\n", ret);
+		return -1;
+	}
+
+	/* Mark this completion as seen */
+	res = cqe->res;
+	io_uring_cqe_seen(ring, cqe);
+	return res;
+}
+
+static int test_connect(struct io_uring *ring,
+			int clientfd, struct sockaddr_in *serv_addr)
+{
+	struct sockaddr_in local_sa;
+	struct io_uring_sqe *sqe;
+	int ret;
+
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_connect(sqe, clientfd, (const struct sockaddr *)serv_addr,
+				sizeof(struct sockaddr_in));
+	sqe->user_data = ++ud;
+
+	memcpy(&local_sa, serv_addr, sizeof(local_sa));
+
+	ret = io_uring_submit_and_wait(ring, 1);
+	if (ret != 1) {
+		fprintf(stderr, "submit=%d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	/* check for reuse at the same time */
+	memset(&local_sa, 0xff, sizeof(local_sa));
+
+	ret = get_completion_and_print(ring);
+	if (ret != -ECONNREFUSED) {
+		fprintf(stderr, "Connect got %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+	return T_EXIT_PASS;
+}
+
+static int test(int flags)
+{
+	struct io_uring_params params = { .flags = flags, };
+	struct sockaddr_in serv_addr = {};
+	struct io_uring ring;
+	int ret, clientfd, s_fd, i;
+
+	if (flags & IORING_SETUP_SQPOLL)
+		params.sq_thread_idle = 50;
+
+	ret = io_uring_queue_init_params(8, &ring, &params);
+	if (ret < 0) {
+		fprintf(stderr, "Queue init: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	s_fd = init_test_server(&serv_addr);
+	if (s_fd < 0)
+		return T_EXIT_FAIL;
+
+	clientfd = init_test_client();
+	if (clientfd < 0) {
+		close(s_fd);
+		return T_EXIT_FAIL;
+	}
+
+	/* make sure SQPOLL thread is sleeping */
+	if (flags & IORING_SETUP_SQPOLL)
+		usleep(100000);
+
+	for (i = 0; i < 32; i++) {
+		ret = test_connect(&ring, clientfd, &serv_addr);
+		if (ret == T_EXIT_SKIP)
+			return T_EXIT_SKIP;
+		else if (ret == T_EXIT_PASS)
+			continue;
+
+		return T_EXIT_FAIL;
+	}
+
+	close(s_fd);
+	close(clientfd);
+	return T_EXIT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	int ret;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	ret = test(0);
+	if (ret == T_EXIT_FAIL) {
+		fprintf(stderr, "test(0) failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test(IORING_SETUP_SQPOLL);
+	if (ret == T_EXIT_FAIL) {
+		fprintf(stderr, "test(SQPOLL) failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	return 0;
+}

diff --git a/test/connect.c b/test/connect.c
index 3ae10de..71f0e39 100644
--- a/test/connect.c
+++ b/test/connect.c

@@ -15,8 +15,10 @@
 #include <netinet/in.h>
 #include <netinet/tcp.h>
 #include <arpa/inet.h>
+#include <sys/stat.h>
 
 #include "liburing.h"
+#include "helpers.h"
 
 static int no_connect;
 static unsigned short use_port;
@@ -131,7 +133,7 @@
 	return ret;
 }
 
-static int connect_socket(struct io_uring *ring, int fd, int *code)
+static int connect_socket(struct io_uring *ring, int fd, int *code, int async)
 {
 	struct sockaddr_in addr;
 	int ret, res;
@@ -148,6 +150,8 @@
 	}
 
 	io_uring_prep_connect(sqe, fd, (struct sockaddr*)&addr, sizeof(addr));
+	if (async)
+		sqe->flags |= IOSQE_ASYNC;
 	sqe->user_data = 1;
 
 	ret = submit_and_wait(ring, &res);
@@ -184,7 +188,7 @@
 	if (connect_fd == -1)
 		return -1;
 
-	ret = connect_socket(ring, connect_fd, &code);
+	ret = connect_socket(ring, connect_fd, &code, 0);
 	if (ret == -1)
 		goto err;
 
@@ -207,7 +211,7 @@
 	return -1;
 }
 
-static int test_connect(struct io_uring *ring)
+static int test_connect(struct io_uring *ring, int async)
 {
 	int accept_fd;
 	int connect_fd;
@@ -225,7 +229,7 @@
 	if (connect_fd == -1)
 		goto err1;
 
-	ret = connect_socket(ring, connect_fd, &code);
+	ret = connect_socket(ring, connect_fd, &code, async);
 	if (ret == -1)
 		goto err2;
 
@@ -255,6 +259,13 @@
 	struct sockaddr_in addr;
 	struct io_uring_sqe *sqe;
 	struct __kernel_timespec ts = {.tv_sec = 0, .tv_nsec = 100000};
+	struct stat sb;
+
+	/*
+	 * Test reliably fails if syncookies isn't enabled
+	 */
+	if (stat("/proc/sys/net/ipv4/tcp_syncookies", &sb) < 0)
+		return T_EXIT_SKIP;
 
 	connect_fd[0] = create_socket();
 	if (connect_fd[0] == -1)
@@ -287,7 +298,7 @@
 	}
 
 	// We first connect with one client socket in order to fill the accept queue.
-	ret = connect_socket(ring, connect_fd[0], &code);
+	ret = connect_socket(ring, connect_fd[0], &code, 0);
 	if (ret == -1 || code != 0) {
 		fprintf(stderr, "unable to connect\n");
 		goto err;
@@ -354,18 +365,15 @@
 	return -1;
 }
 
-int main(int argc, char *argv[])
+static int test(int flags)
 {
 	struct io_uring ring;
 	int ret;
 
-	if (argc > 1)
-		return 0;
-
-	ret = io_uring_queue_init(8, &ring, 0);
+	ret = io_uring_queue_init(8, &ring, flags);
 	if (ret) {
 		fprintf(stderr, "io_uring_queue_setup() = %d\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	srand(getpid());
@@ -376,23 +384,59 @@
 	ret = test_connect_with_no_peer(&ring);
 	if (ret == -1) {
 		fprintf(stderr, "test_connect_with_no_peer(): failed\n");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 	if (no_connect)
-		return 0;
+		return T_EXIT_SKIP;
 
-	ret = test_connect(&ring);
+	ret = test_connect(&ring, 0);
 	if (ret == -1) {
 		fprintf(stderr, "test_connect(): failed\n");
-		return 1;
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_connect(&ring, 1);
+	if (ret == -1) {
+		fprintf(stderr, "test_connect(): failed\n");
+		return T_EXIT_FAIL;
 	}
 
 	ret = test_connect_timeout(&ring);
 	if (ret == -1) {
 		fprintf(stderr, "test_connect_timeout(): failed\n");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	io_uring_queue_exit(&ring);
-	return 0;
+	return T_EXIT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	int ret;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	ret = test(0);
+	if (ret == -1) {
+		fprintf(stderr, "test 0 failed\n");
+		return T_EXIT_FAIL;
+	}
+	if (no_connect)
+		return T_EXIT_SKIP;
+
+	ret = test(IORING_SETUP_SQPOLL);
+	if (ret == -1) {
+		fprintf(stderr, "test SQPOLL failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test(IORING_SETUP_SINGLE_ISSUER|IORING_SETUP_DEFER_TASKRUN);
+	if (ret == -1) {
+		fprintf(stderr, "test DEFER failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	return T_EXIT_PASS;
 }

diff --git a/test/coredump.c b/test/coredump.c
new file mode 100644
index 0000000..9abc7a3
--- /dev/null
+++ b/test/coredump.c

@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: trigger segfault. A recent 6.4-rc kernel introduced a bug
+ *		via vhost where segfaults for applications using io_uring
+ *		would hang in D state forever upon trying to generate the
+ *		core file. Perform a trivial test where a child process
+ *		generates a NULL pointer dereference and ensure that we don't
+ *		hang.
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/wait.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+#ifndef CONFIG_USE_SANITIZER
+static void test(void)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring ring;
+	int *ptr = NULL;
+	int fds[2];
+	char r1;
+
+	if (pipe(fds) < 0) {
+		perror("pipe");
+		exit(0);
+	}
+
+	io_uring_queue_init(8, &ring, 0);
+
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_read(sqe, fds[0], &r1, sizeof(r1), 0);
+	sqe->flags = IOSQE_ASYNC;
+	sqe->user_data = 1;
+
+	io_uring_submit(&ring);
+	*ptr = 0;
+	exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+	pid_t pid;
+	int wstat;
+
+	pid = fork();
+	if (pid < 0) {
+		perror("fork");
+		return T_EXIT_SKIP;
+	} else if (!pid) {
+		test();
+	}
+
+	wait(&wstat);
+	unlink("core");
+	return T_EXIT_PASS;
+}
+#else
+int main(int argc, char *argv[])
+{
+	return T_EXIT_SKIP;
+}
+#endif

diff --git a/test/cq-full.c b/test/cq-full.c
index 5c4041b..9f4a065 100644
--- a/test/cq-full.c
+++ b/test/cq-full.c

@@ -11,6 +11,7 @@
 #include <fcntl.h>
 
 #include "liburing.h"
+#include "helpers.h"
 
 static int queue_n_nops(struct io_uring *ring, int n)
 {
@@ -49,13 +50,13 @@
 	int i, ret;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
 	memset(&p, 0, sizeof(p));
 	ret = io_uring_queue_init_params(4, &ring, &p);
 	if (ret) {
 		printf("ring setup failed\n");
-		return 1;
+		return T_EXIT_FAIL;
 
 	}
 
@@ -89,8 +90,8 @@
 	}
 
 	io_uring_queue_exit(&ring);
-	return 0;
+	return T_EXIT_PASS;
 err:
 	io_uring_queue_exit(&ring);
-	return 1;
+	return T_EXIT_FAIL;
 }

diff --git a/test/cq-overflow.c b/test/cq-overflow.c
index 057570e..c2a6848 100644
--- a/test/cq-overflow.c
+++ b/test/cq-overflow.c

@@ -9,6 +9,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <fcntl.h>
+#include <assert.h>
 
 #include "helpers.h"
 #include "liburing.h"
@@ -21,7 +22,34 @@
 
 #define ENTRIES	8
 
-static int test_io(const char *file, unsigned long usecs, unsigned *drops, int fault)
+/*
+ * io_uring has rare cases where CQEs are lost.
+ * This happens when there is no space in the CQ ring, and also there is no
+ * GFP_ATOMIC memory available. In reality this probably means that the process
+ * is about to be killed as many other things might start failing, but we still
+ * want to test that liburing and the kernel deal with this properly. The fault
+ * injection framework allows us to test this scenario. Unfortunately this
+ * requires some system wide changes and so we do not enable this by default.
+ * The tests in this file should work in both cases (where overflows are queued
+ * and where they are dropped) on recent kernels.
+ *
+ * In order to test dropped CQEs you should enable fault injection in the kernel
+ * config:
+ *
+ * CONFIG_FAULT_INJECTION=y
+ * CONFIG_FAILSLAB=y
+ * CONFIG_FAULT_INJECTION_DEBUG_FS=y
+ *
+ * and then run the test as follows:
+ * echo Y > /sys/kernel/debug/failslab/task-filter
+ * echo 100 > /sys/kernel/debug/failslab/probability
+ * echo 0 > /sys/kernel/debug/failslab/verbose
+ * echo 100000 > /sys/kernel/debug/failslab/times
+ * bash -c "echo 1 > /proc/self/make-it-fail && exec ./cq-overflow.t"
+ */
+
+static int test_io(const char *file, unsigned long usecs, unsigned *drops,
+		   int fault)
 {
 	struct io_uring_sqe *sqe;
 	struct io_uring_cqe *cqe;
@@ -29,18 +57,22 @@
 	unsigned reaped, total;
 	struct io_uring ring;
 	int nodrop, i, fd, ret;
+	bool cqe_dropped = false;
 
 	fd = open(file, O_RDONLY | O_DIRECT);
 	if (fd < 0) {
+		if (errno == EINVAL)
+			return T_EXIT_SKIP;
 		perror("file open");
-		goto err;
+		return T_EXIT_FAIL;
 	}
 
 	memset(&p, 0, sizeof(p));
 	ret = io_uring_queue_init_params(ENTRIES, &ring, &p);
 	if (ret) {
+		close(fd);
 		fprintf(stderr, "ring create failed: %d\n", ret);
-		goto err;
+		return T_EXIT_FAIL;
 	}
 	nodrop = 0;
 	if (p.features & IORING_FEAT_NODROP)
@@ -56,8 +88,10 @@
 			goto err;
 		}
 		offset = BS * (rand() % BUFFERS);
-		if (fault && i == ENTRIES + 4)
+		if (fault && i == ENTRIES + 4) {
+			free(vecs[i].iov_base);
 			vecs[i].iov_base = NULL;
+		}
 		io_uring_prep_readv(sqe, fd, &vecs[i], 1, offset);
 
 		ret = io_uring_submit(&ring);
@@ -103,8 +137,8 @@
 reap_it:
 	reaped = 0;
 	do {
-		if (nodrop) {
-			/* nodrop should never lose events */
+		if (nodrop && !cqe_dropped) {
+			/* nodrop should never lose events unless cqe_dropped */
 			if (reaped == total)
 				break;
 		} else {
@@ -112,7 +146,10 @@
 				break;
 		}
 		ret = io_uring_wait_cqe(&ring, &cqe);
-		if (ret) {
+		if (nodrop && ret == -EBADR) {
+			cqe_dropped = true;
+			continue;
+		} else if (ret) {
 			fprintf(stderr, "wait_cqe=%d\n", ret);
 			goto err;
 		}
@@ -132,7 +169,7 @@
 		goto err;
 	}
 
-	if (!nodrop) {
+	if (!nodrop || cqe_dropped) {
 		*drops = *ring.cq.koverflow;
 	} else if (*ring.cq.koverflow) {
 		fprintf(stderr, "Found %u overflows\n", *ring.cq.koverflow);
@@ -141,30 +178,41 @@
 
 	io_uring_queue_exit(&ring);
 	close(fd);
-	return 0;
+	return T_EXIT_PASS;
 err:
 	if (fd != -1)
 		close(fd);
 	io_uring_queue_exit(&ring);
-	return 1;
+	return T_EXIT_SKIP;
 }
 
 static int reap_events(struct io_uring *ring, unsigned nr_events, int do_wait)
 {
 	struct io_uring_cqe *cqe;
 	int i, ret = 0, seq = 0;
+	unsigned int start_overflow = *ring->cq.koverflow;
+	bool dropped = false;
 
 	for (i = 0; i < nr_events; i++) {
 		if (do_wait)
 			ret = io_uring_wait_cqe(ring, &cqe);
 		else
 			ret = io_uring_peek_cqe(ring, &cqe);
-		if (ret) {
+		if (do_wait && ret == -EBADR) {
+			unsigned int this_drop = *ring->cq.koverflow -
+				start_overflow;
+
+			dropped = true;
+			start_overflow = *ring->cq.koverflow;
+			assert(this_drop > 0);
+			i += (this_drop - 1);
+			continue;
+		} else if (ret) {
 			if (ret != -EAGAIN)
 				fprintf(stderr, "cqe peek failed: %d\n", ret);
 			break;
 		}
-		if (cqe->user_data != seq) {
+		if (!dropped && cqe->user_data != seq) {
 			fprintf(stderr, "cqe sequence out-of-order\n");
 			fprintf(stderr, "got %d, wanted %d\n", (int) cqe->user_data,
 					seq);
@@ -241,19 +289,206 @@
 	return 1;
 }
 
+
+static void submit_one_nop(struct io_uring *ring, int ud)
+{
+	struct io_uring_sqe *sqe;
+	int ret;
+
+	sqe = io_uring_get_sqe(ring);
+	assert(sqe);
+	io_uring_prep_nop(sqe);
+	sqe->user_data = ud;
+	ret = io_uring_submit(ring);
+	assert(ret == 1);
+}
+
+/*
+ * Create an overflow condition and ensure that SQEs are still processed
+ */
+static int test_overflow_handling(bool batch, int cqe_multiple, bool poll,
+				  bool defer)
+{
+	struct io_uring ring;
+	struct io_uring_params p;
+	int ret, i, j, ud, cqe_count;
+	unsigned int count;
+	int const N = 8;
+	int const LOOPS = 128;
+	int const QUEUE_LENGTH = 1024;
+	int completions[N];
+	int queue[QUEUE_LENGTH];
+	int queued = 0;
+	int outstanding = 0;
+	bool cqe_dropped = false;
+
+	memset(&completions, 0, sizeof(int) * N);
+	memset(&p, 0, sizeof(p));
+	p.cq_entries = 2 * cqe_multiple;
+	p.flags |= IORING_SETUP_CQSIZE;
+
+	if (poll)
+		p.flags |= IORING_SETUP_IOPOLL;
+
+	if (defer)
+		p.flags |= IORING_SETUP_SINGLE_ISSUER |
+			   IORING_SETUP_DEFER_TASKRUN;
+
+	ret = io_uring_queue_init_params(2, &ring, &p);
+	if (ret) {
+		fprintf(stderr, "io_uring_queue_init failed %d\n", ret);
+		return 1;
+	}
+
+	assert(p.cq_entries < N);
+	/* submit N SQEs, some should overflow */
+	for (i = 0; i < N; i++) {
+		submit_one_nop(&ring, i);
+		outstanding++;
+	}
+
+	for (i = 0; i < LOOPS; i++) {
+		struct io_uring_cqe *cqes[N];
+
+		if (io_uring_cq_has_overflow(&ring)) {
+			/*
+			 * Flush any overflowed CQEs and process those. Actively
+			 * flush these to make sure CQEs arrive in vague order
+			 * of being sent.
+			 */
+			ret = io_uring_get_events(&ring);
+			if (ret != 0) {
+				fprintf(stderr,
+					"io_uring_get_events returned %d\n",
+					ret);
+				goto err;
+			}
+		} else if (!cqe_dropped) {
+			for (j = 0; j < queued; j++) {
+				submit_one_nop(&ring, queue[j]);
+				outstanding++;
+			}
+			queued = 0;
+		}
+
+		/* We have lost some random cqes, stop if no remaining. */
+		if (cqe_dropped && outstanding == *ring.cq.koverflow)
+			break;
+
+		ret = io_uring_wait_cqe(&ring, &cqes[0]);
+		if (ret == -EBADR) {
+			cqe_dropped = true;
+			fprintf(stderr, "CQE dropped\n");
+			continue;
+		} else if (ret != 0) {
+			fprintf(stderr, "io_uring_wait_cqes failed %d\n", ret);
+			goto err;
+		}
+		cqe_count = 1;
+		if (batch) {
+			ret = io_uring_peek_batch_cqe(&ring, &cqes[0], 2);
+			if (ret < 0) {
+				fprintf(stderr,
+					"io_uring_peek_batch_cqe failed %d\n",
+					ret);
+				goto err;
+			}
+			cqe_count = ret;
+		}
+		for (j = 0; j < cqe_count; j++) {
+			assert(cqes[j]->user_data < N);
+			ud = cqes[j]->user_data;
+			completions[ud]++;
+			assert(queued < QUEUE_LENGTH);
+			queue[queued++] = (int)ud;
+		}
+		io_uring_cq_advance(&ring, cqe_count);
+		outstanding -= cqe_count;
+	}
+
+	/* See if there were any drops by flushing the CQ ring *and* overflow */
+	do {
+		struct io_uring_cqe *cqe;
+
+		ret = io_uring_get_events(&ring);
+		if (ret < 0) {
+			if (ret == -EBADR) {
+				fprintf(stderr, "CQE dropped\n");
+				cqe_dropped = true;
+				break;
+			}
+			goto err;
+		}
+		if (outstanding && !io_uring_cq_ready(&ring))
+			ret = io_uring_wait_cqe_timeout(&ring, &cqe, NULL);
+
+		if (ret && ret != -ETIME) {
+			if (ret == -EBADR) {
+				fprintf(stderr, "CQE dropped\n");
+				cqe_dropped = true;
+				break;
+			}
+			fprintf(stderr, "wait_cqe_timeout = %d\n", ret);
+			goto err;
+		}
+		count = io_uring_cq_ready(&ring);
+		io_uring_cq_advance(&ring, count);
+		outstanding -= count;
+	} while (count);
+
+	io_uring_queue_exit(&ring);
+
+	/* Make sure that completions come back in the same order they were
+	 * sent. If they come back unfairly then this will concentrate on a
+	 * couple of indices.
+	 */
+	for (i = 1; !cqe_dropped && i < N; i++) {
+		if (abs(completions[i] - completions[i - 1]) > 1) {
+			fprintf(stderr, "bad completion size %d %d\n",
+				completions[i], completions[i - 1]);
+			goto err;
+		}
+	}
+	return 0;
+err:
+	io_uring_queue_exit(&ring);
+	return 1;
+}
+
 int main(int argc, char *argv[])
 {
 	const char *fname = ".cq-overflow";
 	unsigned iters, drops;
 	unsigned long usecs;
 	int ret;
+	int i;
+	bool can_defer;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
+
+	can_defer = t_probe_defer_taskrun();
+	for (i = 0; i < 16; i++) {
+		bool batch = i & 1;
+		int mult = (i & 2) ? 1 : 2;
+		bool poll = i & 4;
+		bool defer = i & 8;
+
+		if (defer && !can_defer)
+			continue;
+
+		ret = test_overflow_handling(batch, mult, poll, defer);
+		if (ret) {
+			fprintf(stderr, "test_overflow_handling("
+				"batch=%d, mult=%d, poll=%d, defer=%d) failed\n",
+				batch, mult, poll, defer);
+			goto err;
+		}
+	}
 
 	ret = test_overflow();
 	if (ret) {
-		printf("test_overflow failed\n");
+		fprintf(stderr, "test_overflow failed\n");
 		return ret;
 	}
 
@@ -266,7 +501,10 @@
 	do {
 		drops = 0;
 
-		if (test_io(fname, usecs, &drops, 0)) {
+		ret = test_io(fname, usecs, &drops, 0);
+		if (ret == T_EXIT_SKIP)
+			break;
+		else if (ret != T_EXIT_PASS) {
 			fprintf(stderr, "test_io nofault failed\n");
 			goto err;
 		}
@@ -276,19 +514,29 @@
 		iters++;
 	} while (iters < 40);
 
-	if (test_io(fname, usecs, &drops, 0)) {
+	if (test_io(fname, usecs, &drops, 0) == T_EXIT_FAIL) {
 		fprintf(stderr, "test_io nofault failed\n");
 		goto err;
 	}
 
-	if (test_io(fname, usecs, &drops, 1)) {
+	if (test_io(fname, usecs, &drops, 1) == T_EXIT_FAIL) {
 		fprintf(stderr, "test_io fault failed\n");
 		goto err;
 	}
 
 	unlink(fname);
-	return 0;
+	if(vecs != NULL) {
+		for (i = 0; i < BUFFERS; i++)
+			free(vecs[i].iov_base);
+	}
+	free(vecs);
+	return T_EXIT_PASS;
 err:
 	unlink(fname);
-	return 1;
+	if(vecs != NULL) {
+		for (i = 0; i < BUFFERS; i++)
+			free(vecs[i].iov_base);
+	}
+	free(vecs);
+	return T_EXIT_FAIL;
 }

diff --git a/test/cq-peek-batch.c b/test/cq-peek-batch.c
index 6c47bec..3dd30da 100644
--- a/test/cq-peek-batch.c
+++ b/test/cq-peek-batch.c

@@ -11,6 +11,7 @@
 #include <fcntl.h>
 
 #include "liburing.h"
+#include "helpers.h"
 
 static int queue_n_nops(struct io_uring *ring, int n, int offset)
 {
@@ -58,12 +59,12 @@
 	unsigned got;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
 	ret = io_uring_queue_init(4, &ring, 0);
 	if (ret) {
 		printf("ring setup failed\n");
-		return 1;
+		return T_EXIT_FAIL;
 
 	}
 
@@ -95,8 +96,8 @@
 
 	io_uring_cq_advance(&ring, 8);
 	io_uring_queue_exit(&ring);
-	return 0;
+	return T_EXIT_PASS;
 err:
 	io_uring_queue_exit(&ring);
-	return 1;
+	return T_EXIT_FAIL;
 }

diff --git a/test/cq-ready.c b/test/cq-ready.c
index 7af7e54..455e770 100644
--- a/test/cq-ready.c
+++ b/test/cq-ready.c

@@ -11,6 +11,7 @@
 #include <fcntl.h>
 
 #include "liburing.h"
+#include "helpers.h"
 
 static int queue_n_nops(struct io_uring *ring, int n)
 {
@@ -56,12 +57,12 @@
 	unsigned ready;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
 	ret = io_uring_queue_init(4, &ring, 0);
 	if (ret) {
 		printf("ring setup failed\n");
-		return 1;
+		return T_EXIT_FAIL;
 
 	}
 
@@ -87,8 +88,8 @@
 	CHECK_READY(&ring, 0);
 
 	io_uring_queue_exit(&ring);
-	return 0;
+	return T_EXIT_PASS;
 err:
 	io_uring_queue_exit(&ring);
-	return 1;
+	return T_EXIT_FAIL;
 }

diff --git a/test/cq-size.c b/test/cq-size.c
index 4e6e3d1..215a52e 100644
--- a/test/cq-size.c
+++ b/test/cq-size.c

@@ -10,6 +10,7 @@
 #include <fcntl.h>
 
 #include "liburing.h"
+#include "helpers.h"
 
 int main(int argc, char *argv[])
 {
@@ -18,7 +19,7 @@
 	int ret;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
 	memset(&p, 0, sizeof(p));
 	p.flags = IORING_SETUP_CQSIZE;
@@ -31,7 +32,7 @@
 			goto done;
 		}
 		printf("ring setup failed\n");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	if (p.cq_entries < 64) {
@@ -58,7 +59,7 @@
 	}
 
 done:
-	return 0;
+	return T_EXIT_PASS;
 err:
-	return 1;
+	return T_EXIT_FAIL;
 }

diff --git a/test/d4ae271dfaae.c b/test/d4ae271dfaae.c
index 397b94b..9621cf8 100644
--- a/test/d4ae271dfaae.c
+++ b/test/d4ae271dfaae.c

@@ -19,21 +19,21 @@
 int main(int argc, char *argv[])
 {
 	struct io_uring ring;
-	int i, fd, ret;
+	int i, fd, ret, __e;
 	struct io_uring_sqe *sqe;
 	struct io_uring_cqe *cqe;
-	struct iovec *iovecs;
+	struct iovec *iovecs = NULL;
 	struct io_uring_params p;
 	char *fname;
 	void *buf;
 
 	memset(&p, 0, sizeof(p));
 	p.flags = IORING_SETUP_SQPOLL;
-	ret = t_create_ring_params(4, &ring, &p);
+	ret = t_create_ring_params(16, &ring, &p);
 	if (ret == T_SETUP_SKIP)
-		return 0;
+		return T_EXIT_SKIP;
 	else if (ret < 0)
-		return 1;
+		return T_EXIT_FAIL;
 
 	if (argc > 1) {
 		fname = argv[1];
@@ -43,10 +43,13 @@
 	}
 
 	fd = open(fname, O_RDONLY | O_DIRECT);
+	__e = errno;
 	if (fname != argv[1])
 		unlink(fname);
 	if (fd < 0) {
-		perror("open");
+		if (__e == EINVAL || __e == EPERM || __e == EACCES)
+			return T_EXIT_SKIP;
+		fprintf(stderr, "open: %s\n", strerror(__e));
 		goto out;
 	}
 
@@ -92,5 +95,10 @@
 	close(fd);
 out:
 	io_uring_queue_exit(&ring);
+	if (iovecs != NULL) { //
+		for (i = 0; i < 10; i++)
+			free(iovecs[i].iov_base);
+		free(iovecs);
+	}
 	return ret;
 }

diff --git a/test/d77a67ed5f27.c b/test/d77a67ed5f27.c
index e56fdcd..69011af 100644
--- a/test/d77a67ed5f27.c
+++ b/test/d77a67ed5f27.c

@@ -22,7 +22,7 @@
 	int ret, data;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
 	signal(SIGALRM, sig_alrm);
 
@@ -31,9 +31,9 @@
 	p.flags = IORING_SETUP_SQPOLL;
 	ret = t_create_ring_params(4, &ring, &p);
 	if (ret == T_SETUP_SKIP)
-		return 0;
+		return T_EXIT_SKIP;
 	else if (ret < 0)
-		return 1;
+		return T_EXIT_FAIL;
 
 	/* make sure sq thread is sleeping at this point */
 	usleep(150000);
@@ -42,7 +42,7 @@
 	sqe = io_uring_get_sqe(&ring);
 	if (!sqe) {
 		fprintf(stderr, "sqe get failed\n");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	io_uring_prep_nop(sqe);
@@ -58,8 +58,8 @@
 	data = (unsigned long) io_uring_cqe_get_data(cqe);
 	if (data != 42) {
 		fprintf(stderr, "invalid data: %d\n", data);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
-	return 0;
+	return T_EXIT_PASS;
 }

diff --git a/test/defer-taskrun.c b/test/defer-taskrun.c
new file mode 100644
index 0000000..3ef7801
--- /dev/null
+++ b/test/defer-taskrun.c

@@ -0,0 +1,391 @@
+// SPDX-License-Identifier: MIT
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/eventfd.h>
+#include <signal.h>
+#include <poll.h>
+#include <assert.h>
+#include <pthread.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include "liburing.h"
+#include "test.h"
+#include "helpers.h"
+
+#define EXEC_FILENAME ".defer-taskrun"
+#define EXEC_FILESIZE (1U<<20)
+
+static bool can_read_t(int fd, int time)
+{
+	int ret;
+	struct pollfd p = {
+		.fd = fd,
+		.events = POLLIN,
+	};
+
+	ret = poll(&p, 1, time);
+
+	return ret == 1;
+}
+
+static bool can_read(int fd)
+{
+	return can_read_t(fd, 0);
+}
+
+static void eventfd_clear(int fd)
+{
+	uint64_t val;
+	int ret;
+
+	assert(can_read(fd));
+	ret = read(fd, &val, 8);
+	assert(ret == 8);
+}
+
+static void eventfd_trigger(int fd)
+{
+	uint64_t val = 1;
+	int ret;
+
+	ret = write(fd, &val, sizeof(val));
+	assert(ret == sizeof(val));
+}
+
+#define CHECK(x)								\
+do {										\
+	if (!(x)) {								\
+		fprintf(stderr, "%s:%d %s failed\n", __FILE__, __LINE__, #x);	\
+		return -1;							\
+	}									\
+} while (0)
+
+
+static int test_eventfd(void)
+{
+	struct io_uring ring;
+	int ret;
+	int fda, fdb;
+	struct io_uring_cqe *cqe;
+
+	ret = io_uring_queue_init(8, &ring, IORING_SETUP_SINGLE_ISSUER |
+					    IORING_SETUP_DEFER_TASKRUN);
+	if (ret)
+		return ret;
+
+	fda = eventfd(0, EFD_NONBLOCK);
+	fdb = eventfd(0, EFD_NONBLOCK);
+
+	CHECK(fda >= 0 && fdb >= 0);
+
+	ret = io_uring_register_eventfd(&ring, fda);
+	if (ret)
+		return ret;
+
+	CHECK(!can_read(fda));
+	CHECK(!can_read(fdb));
+
+	io_uring_prep_poll_add(io_uring_get_sqe(&ring), fdb, POLLIN);
+	io_uring_submit(&ring);
+	CHECK(!can_read(fda)); /* poll should not have completed */
+
+	io_uring_prep_nop(io_uring_get_sqe(&ring));
+	io_uring_submit(&ring);
+	CHECK(can_read(fda)); /* nop should have */
+
+	CHECK(io_uring_peek_cqe(&ring, &cqe) == 0);
+	CHECK(cqe->res == 0);
+	io_uring_cqe_seen(&ring, cqe);
+	eventfd_clear(fda);
+
+	eventfd_trigger(fdb);
+	/* can take time due to rcu_call */
+	CHECK(can_read_t(fda, 1000));
+
+	/* should not have processed the cqe yet */
+	CHECK(io_uring_cq_ready(&ring) == 0);
+
+	io_uring_get_events(&ring);
+	CHECK(io_uring_cq_ready(&ring) == 1);
+
+
+	io_uring_queue_exit(&ring);
+	return 0;
+}
+
+struct thread_data {
+	struct io_uring ring;
+	int efd;
+	char buff[8];
+};
+
+static void *thread(void *t)
+{
+	struct thread_data *td = t;
+
+	io_uring_enable_rings(&td->ring);
+	io_uring_prep_read(io_uring_get_sqe(&td->ring), td->efd, td->buff, sizeof(td->buff), 0);
+	io_uring_submit(&td->ring);
+
+	return NULL;
+}
+
+static int test_thread_shutdown(void)
+{
+	pthread_t t1;
+	int ret;
+	struct thread_data td;
+	struct io_uring_cqe *cqe;
+	uint64_t val = 1;
+
+	ret = io_uring_queue_init(8, &td.ring, IORING_SETUP_SINGLE_ISSUER |
+					       IORING_SETUP_DEFER_TASKRUN |
+					       IORING_SETUP_R_DISABLED);
+	if (ret)
+		return ret;
+
+	CHECK(io_uring_get_events(&td.ring) == -EBADFD);
+
+	td.efd = eventfd(0, 0);
+	CHECK(td.efd >= 0);
+
+	CHECK(pthread_create(&t1, NULL, thread, &td) == 0);
+	CHECK(pthread_join(t1, NULL) == 0);
+
+	CHECK(io_uring_get_events(&td.ring) == -EEXIST);
+
+	CHECK(write(td.efd, &val, sizeof(val)) == sizeof(val));
+	CHECK(io_uring_wait_cqe(&td.ring, &cqe) == -EEXIST);
+
+	close(td.efd);
+	io_uring_queue_exit(&td.ring);
+	return 0;
+}
+
+static int test_exec(const char *filename)
+{
+	int ret;
+	int fd;
+	struct io_uring ring;
+	pid_t fork_pid;
+	static char * const new_argv[] = {"1", "2", "3", NULL};
+	static char * const new_env[] = {NULL};
+	char *buff;
+
+	fork_pid = fork();
+	CHECK(fork_pid >= 0);
+	if (fork_pid > 0) {
+		int wstatus;
+
+		CHECK(waitpid(fork_pid, &wstatus, 0) != (pid_t)-1);
+		if (!WIFEXITED(wstatus) || WEXITSTATUS(wstatus) == T_EXIT_FAIL) {
+			fprintf(stderr, "child failed %i\n", WEXITSTATUS(wstatus));
+			return -1;
+		}
+		return T_EXIT_PASS;
+	}
+
+	ret = io_uring_queue_init(8, &ring, IORING_SETUP_SINGLE_ISSUER |
+					    IORING_SETUP_DEFER_TASKRUN);
+	if (ret)
+		return ret;
+
+	if (filename) {
+		fd = open(filename, O_RDONLY | O_DIRECT);
+		if (fd < 0 && (errno == EINVAL || errno == EPERM || errno == EACCES))
+			return T_EXIT_SKIP;
+	} else {
+		t_create_file(EXEC_FILENAME, EXEC_FILESIZE);
+		fd = open(EXEC_FILENAME, O_RDONLY | O_DIRECT);
+		if (fd < 0 && (errno == EINVAL || errno == EPERM || errno == EACCES)) {
+			unlink(EXEC_FILENAME);
+			return T_EXIT_SKIP;
+		}
+		unlink(EXEC_FILENAME);
+	}
+	buff = (char*)malloc(EXEC_FILESIZE);
+	CHECK(posix_memalign((void **)&buff, 4096, EXEC_FILESIZE) == 0);
+	CHECK(buff);
+
+	CHECK(fd >= 0);
+	io_uring_prep_read(io_uring_get_sqe(&ring), fd, buff, EXEC_FILESIZE, 0);
+	io_uring_submit(&ring);
+	ret = execve("/proc/self/exe", new_argv, new_env);
+	/* if we get here it failed anyway */
+	fprintf(stderr, "execve failed %d\n", ret);
+	return T_EXIT_FAIL;
+}
+
+static int test_flag(void)
+{
+	struct io_uring ring;
+	int ret;
+	int fd;
+	struct io_uring_cqe *cqe;
+
+	ret = io_uring_queue_init(8, &ring, IORING_SETUP_SINGLE_ISSUER |
+					    IORING_SETUP_DEFER_TASKRUN |
+					    IORING_SETUP_TASKRUN_FLAG);
+	CHECK(!ret);
+
+	fd = eventfd(0, EFD_NONBLOCK);
+	CHECK(fd >= 0);
+
+	io_uring_prep_poll_add(io_uring_get_sqe(&ring), fd, POLLIN);
+	io_uring_submit(&ring);
+	CHECK(!can_read(fd)); /* poll should not have completed */
+
+	eventfd_trigger(fd);
+	CHECK(can_read(fd));
+
+	/* should not have processed the poll cqe yet */
+	CHECK(io_uring_cq_ready(&ring) == 0);
+
+	/* flag should be set */
+	CHECK(IO_URING_READ_ONCE(*ring.sq.kflags) & IORING_SQ_TASKRUN);
+
+	/* Specifically peek, knowing we have only no cqe
+	 * but because the flag is set, liburing should try and get more
+	 */
+	ret = io_uring_peek_cqe(&ring, &cqe);
+
+	CHECK(ret == 0 && cqe);
+	CHECK(!(IO_URING_READ_ONCE(*ring.sq.kflags) & IORING_SQ_TASKRUN));
+
+	close(fd);
+	io_uring_queue_exit(&ring);
+	return 0;
+}
+
+static int test_ring_shutdown(void)
+{
+	struct io_uring ring;
+	int ret;
+	int fd[2];
+	char buff = '\0';
+	char send = 'X';
+
+	ret = io_uring_queue_init(8, &ring, IORING_SETUP_SINGLE_ISSUER |
+					    IORING_SETUP_DEFER_TASKRUN |
+					    IORING_SETUP_TASKRUN_FLAG);
+	CHECK(!ret);
+
+	ret = t_create_socket_pair(fd, true);
+	CHECK(!ret);
+
+	io_uring_prep_recv(io_uring_get_sqe(&ring), fd[0], &buff, 1, 0);
+	io_uring_submit(&ring);
+
+	ret = write(fd[1], &send, 1);
+	CHECK(ret == 1);
+
+	/* should not have processed the poll cqe yet */
+	CHECK(io_uring_cq_ready(&ring) == 0);
+	io_uring_queue_exit(&ring);
+
+	/* task work should have been processed by now */
+	CHECK(buff = 'X');
+
+	return 0;
+}
+
+static int test_drain(void)
+{
+	struct io_uring ring;
+	int ret, i, fd[2];
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct iovec iovecs[128];
+	char buff[ARRAY_SIZE(iovecs)];
+
+	ret = io_uring_queue_init(8, &ring, IORING_SETUP_SINGLE_ISSUER |
+					    IORING_SETUP_DEFER_TASKRUN |
+					    IORING_SETUP_TASKRUN_FLAG);
+	CHECK(!ret);
+
+	for (i = 0; i < ARRAY_SIZE(iovecs); i++) {
+		iovecs[i].iov_base = &buff[i];
+		iovecs[i].iov_len = 1;
+	}
+
+	ret = t_create_socket_pair(fd, true);
+	CHECK(!ret);
+
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_writev(sqe, fd[1], &iovecs[0], ARRAY_SIZE(iovecs), 0);
+	sqe->flags |= IOSQE_IO_DRAIN;
+	io_uring_submit(&ring);
+
+	for (i = 0; i < ARRAY_SIZE(iovecs); i++)
+		iovecs[i].iov_base = NULL;
+
+	CHECK(io_uring_wait_cqe(&ring, &cqe) == 0);
+	CHECK(cqe->res == 128);
+
+	close(fd[0]);
+	close(fd[1]);
+	io_uring_queue_exit(&ring);
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	int ret;
+	const char *filename = NULL;
+
+	if (argc > 2)
+		return T_EXIT_SKIP;
+	if (argc == 2) {
+		/* This test exposes interesting behaviour with a null-blk
+		 * device configured like:
+		 * $ modprobe null-blk completion_nsec=100000000 irqmode=2
+		 * and then run with $ defer-taskrun.t /dev/nullb0
+		 */
+		filename = argv[1];
+	}
+
+	if (!t_probe_defer_taskrun())
+		return T_EXIT_SKIP;
+
+	ret = test_thread_shutdown();
+	if (ret) {
+		fprintf(stderr, "test_thread_shutdown failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_exec(filename);
+	if (ret == T_EXIT_FAIL) {
+		fprintf(stderr, "test_exec failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_eventfd();
+	if (ret) {
+		fprintf(stderr, "eventfd failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_flag();
+	if (ret) {
+		fprintf(stderr, "flag failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_ring_shutdown();
+	if (ret) {
+		fprintf(stderr, "test_ring_shutdown failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_drain();
+	if (ret) {
+		fprintf(stderr, "test_drain failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	return T_EXIT_PASS;
+}

diff --git a/test/defer-tw-timeout.c b/test/defer-tw-timeout.c
new file mode 100644
index 0000000..27742c4
--- /dev/null
+++ b/test/defer-tw-timeout.c

@@ -0,0 +1,176 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: test waiting for more events than what will be posted with
+ *		a timeout with DEFER_TASKRUN. All kernels should time out,
+ *		but a non-buggy kernel will end up with one CQE available
+ *		for reaping. Buggy kernels will not have processed the
+ *		task_work and will have 0 events.
+ *
+ */
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <pthread.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+struct d {
+	int fd;
+};
+
+static void *thread_fn(void *data)
+{
+	struct d *d = data;
+	int ret;
+
+	usleep(100000);
+	ret = write(d->fd, "Hello", 5);
+	if (ret < 0)
+		perror("write");
+	return NULL;
+}
+
+static int test_poll(struct io_uring *ring)
+{
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	struct __kernel_timespec ts;
+	int ret, fds[2], i;
+	pthread_t thread;
+	char buf[32];
+	struct d d;
+	void *tret;
+
+	if (pipe(fds) < 0) {
+		perror("pipe");
+		return 1;
+	}
+	d.fd = fds[1];
+
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_read(sqe, fds[0], buf, sizeof(buf), 0);
+
+	pthread_create(&thread, NULL, thread_fn, &d);
+
+	ts.tv_sec = 1;
+	ts.tv_nsec = 0;
+
+	ret = io_uring_submit_and_wait_timeout(ring, &cqe, 2, &ts, NULL);
+	if (ret != 1) {
+		fprintf(stderr, "unexpected wait ret %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	for (i = 0; i < 2; i++) {
+		ret = io_uring_peek_cqe(ring, &cqe);
+		if (ret)
+			break;
+		io_uring_cqe_seen(ring, cqe);
+	}
+
+	if (i != 1) {
+		fprintf(stderr, "Got %d request, expected 1\n", i);
+		return T_EXIT_FAIL;
+	}
+
+	pthread_join(thread, &tret);
+	return T_EXIT_PASS;
+}
+
+static int test_file(struct io_uring *ring, char *__fname)
+{
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	struct __kernel_timespec ts;
+	char filename[64], *fname;
+	int fd, ret, i;
+	void *buf;
+
+	if (!__fname) {
+		fname = filename;
+		sprintf(fname, ".defer-tw-timeout.%d", getpid());
+		t_create_file(fname, 128*1024);
+	} else {
+		fname = __fname;
+	}
+
+	fd = open(fname, O_RDONLY | O_DIRECT);
+	if (fd < 0) {
+		if (errno == EINVAL || errno == EPERM || errno == EACCES) {
+			if (!__fname)
+				unlink(fname);
+			return T_EXIT_SKIP;
+		}
+		perror("open");
+		if (!__fname)
+			unlink(fname);
+		return T_EXIT_FAIL;
+	}
+
+	if (!__fname)
+		unlink(fname);
+
+	if (posix_memalign(&buf, 4096, 4096)) {
+		close(fd);
+		return T_EXIT_FAIL;
+	}
+
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_read(sqe, fd, buf, 4096, 0);
+
+	ts.tv_sec = 1;
+	ts.tv_nsec = 0;
+
+	ret = io_uring_submit_and_wait_timeout(ring, &cqe, 2, &ts, NULL);
+	if (ret != 1) {
+		fprintf(stderr, "unexpected wait ret %d\n", ret);
+		close(fd);
+		free(buf);
+		return T_EXIT_FAIL;
+	}
+
+	for (i = 0; i < 2; i++) {
+		ret = io_uring_peek_cqe(ring, &cqe);
+		if (ret)
+			break;
+		io_uring_cqe_seen(ring, cqe);
+	}
+
+	if (i != 1) {
+		fprintf(stderr, "Got %d request, expected 1\n", i);
+		close(fd);
+		free(buf);
+		return T_EXIT_FAIL;
+	}
+
+	close(fd);
+	free(buf);
+	return T_EXIT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	struct io_uring ring;
+	char *fname = NULL;
+	int ret;
+
+	ret = io_uring_queue_init(8, &ring, IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN);
+	if (ret == -EINVAL)
+		return T_EXIT_SKIP;
+
+	if (argc > 1)
+		fname = argv[1];
+
+	ret = test_file(&ring, fname);
+	if (ret != T_EXIT_PASS)
+		return ret;
+
+	ret = test_poll(&ring);
+	if (ret != T_EXIT_PASS)
+		return ret;
+
+	return T_EXIT_PASS;
+}

diff --git a/test/defer.c b/test/defer.c
index 68ee4b4..b0770ef 100644
--- a/test/defer.c
+++ b/test/defer.c

@@ -57,7 +57,7 @@
 		case OP_REMOVE_BUFFERS:
 			io_uring_prep_remove_buffers(sqe, 10, 1);
 			break;
-		};
+		}
 		sqe->user_data = i;
 		ctx->sqes[i] = sqe;
 	}
@@ -88,7 +88,7 @@
 	return 0;
 }
 
-static int test_cancelled_userdata(struct io_uring *ring)
+static int test_canceled_userdata(struct io_uring *ring)
 {
 	struct test_context ctx;
 	int ret, i, nr = 100;
@@ -96,7 +96,7 @@
 	if (init_context(&ctx, ring, nr, OP_NOP))
 		return 1;
 
-	for (i = 0; i < nr; i++)
+	for (i = 0; i < nr - 1; i++)
 		ctx.sqes[i]->flags |= IOSQE_IO_LINK;
 
 	ret = io_uring_submit(ring);
@@ -130,7 +130,7 @@
 	if (init_context(&ctx, ring, nr, OP_REMOVE_BUFFERS))
 		return 1;
 
-	for (i = 0; i < nr; i++)
+	for (i = 0; i < nr - 1; i++)
 		ctx.sqes[i]->flags |= IOSQE_IO_LINK;
 
 	ret = io_uring_submit(ring);
@@ -260,25 +260,25 @@
 	int ret;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
 	memset(&p, 0, sizeof(p));
 	ret = io_uring_queue_init_params(RING_SIZE, &ring, &p);
 	if (ret) {
 		printf("ring setup failed %i\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	ret = io_uring_queue_init(RING_SIZE, &poll_ring, IORING_SETUP_IOPOLL);
 	if (ret) {
 		printf("poll_ring setup failed\n");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 
-	ret = test_cancelled_userdata(&poll_ring);
+	ret = test_canceled_userdata(&poll_ring);
 	if (ret) {
-		printf("test_cancelled_userdata failed\n");
+		printf("test_canceled_userdata failed\n");
 		return ret;
 	}
 
@@ -305,9 +305,9 @@
 	ret = t_create_ring(RING_SIZE, &sqthread_ring,
 				IORING_SETUP_SQPOLL | IORING_SETUP_IOPOLL);
 	if (ret == T_SETUP_SKIP)
-		return 0;
+		return T_EXIT_SKIP;
 	else if (ret < 0)
-		return 1;
+		return T_EXIT_FAIL;
 
 	ret = test_thread_link_cancel(&sqthread_ring);
 	if (ret) {
@@ -315,5 +315,5 @@
 		return ret;
 	}
 
-	return 0;
+	return T_EXIT_PASS;
 }

diff --git a/test/double-poll-crash.c b/test/double-poll-crash.c
index 231c7da..6ddf431 100644
--- a/test/double-poll-crash.c
+++ b/test/double-poll-crash.c

@@ -15,6 +15,7 @@
 #include <unistd.h>
 
 #include "liburing.h"
+#include "helpers.h"
 #include "../src/syscall.h"
 
 #define SIZEOF_IO_URING_SQE 64
@@ -108,24 +109,24 @@
   }
 }
 
-uint64_t r[4] = {0xffffffffffffffff, 0x0, 0x0, 0xffffffffffffffff};
+static uint64_t r[4] = {0xffffffffffffffff, 0x0, 0x0, 0xffffffffffffffff};
 
 int main(int argc, char *argv[])
 {
   void *mmap_ret;
-#if !defined(__i386) && !defined(__x86_64__)
-  return 0;
+#if (!defined(__i386) && !defined(__x86_64__)) || defined(CONFIG_USE_SANITIZER)
+  return T_EXIT_SKIP;
 #endif
 
   if (argc > 1)
-    return 0;
+    return T_EXIT_SKIP;
 
-  mmap_ret = mmap((void *)0x20000000ul, 0x1000000ul, 7ul, 0x32ul, -1, 0ul);
+  mmap_ret = mmap((void *)0x20000000ul, 0x1000000ul, 7ul, MAP_ANON|MAP_PRIVATE, -1, 0ul);
   if (mmap_ret == MAP_FAILED)
-    return 0;
-  mmap_ret = mmap((void *)0x21000000ul, 0x1000ul, 0ul, 0x32ul, -1, 0ul);
+    return T_EXIT_SKIP;
+  mmap_ret = mmap((void *)0x21000000ul, 0x1000ul, 0ul, MAP_ANON|MAP_PRIVATE, -1, 0ul);
   if (mmap_ret == MAP_FAILED)
-    return 0;
+    return T_EXIT_SKIP;
   intptr_t res = 0;
   *(uint32_t*)0x20000484 = 0;
   *(uint32_t*)0x20000488 = 0;
@@ -190,5 +191,5 @@
                             "\xbd\x43\x7d\x16\x69\x3e\x05",
          19);
   ioctl(r[3], 0x5404, 0x20000080ul);
-  return 0;
+  return T_EXIT_PASS;
 }

diff --git a/test/drop-submit.c b/test/drop-submit.c
index 7b15f26..6af0630 100644
--- a/test/drop-submit.c
+++ b/test/drop-submit.c

@@ -9,6 +9,7 @@
 #include <stdlib.h>
 
 #include "liburing.h"
+#include "helpers.h"
 
 static int test(struct io_uring *ring, int expect_drops)
 {
@@ -63,7 +64,7 @@
 	int ret;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
 	ret = io_uring_queue_init(8, &ring, IORING_SETUP_SUBMIT_ALL);
 	if (ret)
@@ -80,7 +81,7 @@
 	ret = io_uring_queue_init(8, &ring, 0);
 	if (ret) {
 		fprintf(stderr, "ring setup failed\n");
-		return 0;
+		return T_EXIT_FAIL;
 	}
 
 	ret = test(&ring, 1);
@@ -89,5 +90,5 @@
 		return ret;
 	}
 
-	return 0;
+	return T_EXIT_PASS;
 }

diff --git a/test/eeed8b54e0df.c b/test/eeed8b54e0df.c
index 62f6f45..58fd9d9 100644
--- a/test/eeed8b54e0df.c
+++ b/test/eeed8b54e0df.c

@@ -33,6 +33,7 @@
 	}
 
 	buf = t_malloc(BLOCK);
+	memset(buf, 0, BLOCK);
 	ret = write(fd, buf, BLOCK);
 	if (ret != BLOCK) {
 		if (ret < 0)
@@ -64,7 +65,7 @@
 	int ret, fd;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
 	iov.iov_base = t_malloc(4096);
 	iov.iov_len = 4096;
@@ -72,19 +73,19 @@
 	ret = io_uring_queue_init(2, &ring, 0);
 	if (ret) {
 		printf("ring setup failed\n");
-		return 1;
+		return T_EXIT_FAIL;
 
 	}
 
 	sqe = io_uring_get_sqe(&ring);
 	if (!sqe) {
 		printf("get sqe failed\n");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	fd = get_file_fd();
 	if (fd < 0)
-		return 1;
+		return T_EXIT_FAIL;
 
 	io_uring_prep_readv(sqe, fd, &iov, 1, 0);
 	sqe->rw_flags = RWF_NOWAIT;
@@ -101,14 +102,21 @@
 		goto err;
 	}
 
+	ret = T_EXIT_PASS;
 	if (cqe->res != -EAGAIN && cqe->res != 4096) {
-		printf("cqe error: %d\n", cqe->res);
-		goto err;
+		if (cqe->res == -EOPNOTSUPP) {
+			ret = T_EXIT_SKIP;
+		} else {
+			printf("cqe error: %d\n", cqe->res);
+			goto err;
+		}
 	}
 
 	close(fd);
-	return 0;
+	free(iov.iov_base);
+	return ret;
 err:
 	close(fd);
-	return 1;
+	free(iov.iov_base);
+	return T_EXIT_FAIL;
 }

diff --git a/test/empty-eownerdead.c b/test/empty-eownerdead.c
index 40f854f..110fdf7 100644
--- a/test/empty-eownerdead.c
+++ b/test/empty-eownerdead.c

@@ -17,14 +17,14 @@
 	int ret;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
 	p.flags = IORING_SETUP_SQPOLL;
 	p.sq_thread_idle = 100;
 
 	ret = t_create_ring_params(1, &ring, &p);
 	if (ret == T_SETUP_SKIP)
-		return 0;
+		return T_EXIT_SKIP;
 	else if (ret < 0)
 		goto err;
 
@@ -39,7 +39,7 @@
 		goto err;
 	}
 
-	return 0;
+	return T_EXIT_PASS;
 err:
-	return 1;
+	return T_EXIT_FAIL;
 }

diff --git a/test/eploop.c b/test/eploop.c
new file mode 100644
index 0000000..5225c05
--- /dev/null
+++ b/test/eploop.c

@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Test that we don't recursively generate completion events if an io_uring
+ * fd is added to an epoll context, and the ring itself polls for events on
+ * the epollfd. Older kernels will stop on overflow, newer kernels will
+ * detect this earlier and abort correctly.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/epoll.h>
+#include <sys/types.h>
+#include <poll.h>
+#include "liburing.h"
+#include "helpers.h"
+
+int main(int argc, char *argv[])
+{
+	struct io_uring ring;
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct epoll_event ev = { };
+	int epollfd, ret, i;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	ret = io_uring_queue_init(8, &ring, 0);
+	if (ret) {
+		fprintf(stderr, "Ring init failed: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	epollfd = epoll_create1(0);
+	if (epollfd < 0) {
+		perror("epoll_create");
+		return T_EXIT_FAIL;
+	}
+
+	ev.events = EPOLLIN;
+	ev.data.fd = ring.ring_fd;
+	ret = epoll_ctl(epollfd, EPOLL_CTL_ADD, ring.ring_fd, &ev);
+	if (ret < 0) {
+		perror("epoll_ctl");
+		return T_EXIT_FAIL;
+	}
+
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_poll_multishot(sqe, epollfd, POLLIN);
+	sqe->user_data = 1;
+	io_uring_submit(&ring);
+
+	sqe = io_uring_get_sqe(&ring);
+	sqe->user_data = 2;
+	io_uring_prep_nop(sqe);
+	io_uring_submit(&ring);
+
+	for (i = 0; i < 2; i++) {
+		ret = io_uring_wait_cqe(&ring, &cqe);
+		if (ret) {
+			fprintf(stderr, "wait_cqe ret = %d\n", ret);
+			break;
+		}
+		io_uring_cqe_seen(&ring, cqe);
+	}
+
+	ret = io_uring_peek_cqe(&ring, &cqe);
+	if (!ret) {
+		fprintf(stderr, "Generated too many events\n");
+		return T_EXIT_FAIL;
+	}
+
+	return T_EXIT_PASS;
+}

diff --git a/test/eventfd-disable.c b/test/eventfd-disable.c
index 6567be0..162f9f9 100644
--- a/test/eventfd-disable.c
+++ b/test/eventfd-disable.c

@@ -13,8 +13,9 @@
 #include <sys/eventfd.h>
 
 #include "liburing.h"
+#include "helpers.h"
 
-int main(int argc, char *argv[])
+static int test(bool defer)
 {
 	struct io_uring_params p = {};
 	struct io_uring_sqe *sqe;
@@ -27,36 +28,37 @@
 	};
 	int ret, evfd, i;
 
-	if (argc > 1)
-		return 0;
+	if (defer)
+		p.flags |= IORING_SETUP_SINGLE_ISSUER |
+			   IORING_SETUP_DEFER_TASKRUN;
 
 	ret = io_uring_queue_init_params(64, &ring, &p);
 	if (ret) {
 		fprintf(stderr, "ring setup failed: %d\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	evfd = eventfd(0, EFD_CLOEXEC);
 	if (evfd < 0) {
 		perror("eventfd");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	ret = io_uring_register_eventfd(&ring, evfd);
 	if (ret) {
 		fprintf(stderr, "failed to register evfd: %d\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	if (!io_uring_cq_eventfd_enabled(&ring)) {
 		fprintf(stderr, "eventfd disabled\n");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	ret = io_uring_cq_eventfd_toggle(&ring, false);
 	if (ret) {
 		fprintf(stdout, "Skipping, CQ flags not available!\n");
-		return 0;
+		return T_EXIT_SKIP;
 	}
 
 	sqe = io_uring_get_sqe(&ring);
@@ -66,7 +68,7 @@
 	ret = io_uring_submit(&ring);
 	if (ret != 1) {
 		fprintf(stderr, "submit: %d\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	for (i = 0; i < 63; i++) {
@@ -78,24 +80,24 @@
 	ret = io_uring_submit(&ring);
 	if (ret != 63) {
 		fprintf(stderr, "submit: %d\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	for (i = 0; i < 63; i++) {
 		ret = io_uring_wait_cqe(&ring, &cqe);
 		if (ret) {
 			fprintf(stderr, "wait: %d\n", ret);
-			return 1;
+			return T_EXIT_FAIL;
 		}
 
 		switch (cqe->user_data) {
 		case 1: /* eventfd */
 			fprintf(stderr, "eventfd unexpected: %d\n", (int)ptr);
-			return 1;
+			return T_EXIT_FAIL;
 		case 2:
 			if (cqe->res) {
 				fprintf(stderr, "nop: %d\n", cqe->res);
-				return 1;
+				return T_EXIT_FAIL;
 			}
 			break;
 		}
@@ -105,7 +107,7 @@
 	ret = io_uring_cq_eventfd_toggle(&ring, true);
 	if (ret) {
 		fprintf(stderr, "io_uring_cq_eventfd_toggle: %d\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	sqe = io_uring_get_sqe(&ring);
@@ -115,37 +117,63 @@
 	ret = io_uring_submit(&ring);
 	if (ret != 1) {
 		fprintf(stderr, "submit: %d\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	for (i = 0; i < 2; i++) {
 		ret = io_uring_wait_cqe(&ring, &cqe);
 		if (ret) {
 			fprintf(stderr, "wait: %d\n", ret);
-			return 1;
+			return T_EXIT_FAIL;
 		}
 
 		switch (cqe->user_data) {
 		case 1: /* eventfd */
 			if (cqe->res != sizeof(ptr)) {
 				fprintf(stderr, "read res: %d\n", cqe->res);
-				return 1;
+				return T_EXIT_FAIL;
 			}
 
 			if (ptr != 1) {
 				fprintf(stderr, "eventfd: %d\n", (int)ptr);
-				return 1;
+				return T_EXIT_FAIL;
 			}
 			break;
 		case 2:
 			if (cqe->res) {
 				fprintf(stderr, "nop: %d\n", cqe->res);
-				return 1;
+				return T_EXIT_FAIL;
 			}
 			break;
 		}
 		io_uring_cqe_seen(&ring, cqe);
 	}
 
-	return 0;
+	io_uring_queue_exit(&ring);
+	close(evfd);
+	return T_EXIT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	int ret;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	ret = test(false);
+	if (ret != T_EXIT_PASS) {
+		fprintf(stderr, "%s: test(false) failed\n", argv[0]);
+		return ret;
+	}
+
+	if (t_probe_defer_taskrun()) {
+		ret = test(true);
+		if (ret != T_EXIT_PASS) {
+			fprintf(stderr, "%s: test(true) failed\n", argv[0]);
+			return ret;
+		}
+	}
+
+	return ret;
 }

diff --git a/test/eventfd-reg.c b/test/eventfd-reg.c
index b447455..6514f48 100644
--- a/test/eventfd-reg.c
+++ b/test/eventfd-reg.c

@@ -13,6 +13,7 @@
 #include <sys/eventfd.h>
 
 #include "liburing.h"
+#include "helpers.h"
 
 int main(int argc, char *argv[])
 {
@@ -21,39 +22,39 @@
 	int ret, evfd[2], i;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
 	ret = io_uring_queue_init_params(8, &ring, &p);
 	if (ret) {
 		fprintf(stderr, "ring setup failed: %d\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	evfd[0] = eventfd(0, EFD_CLOEXEC);
 	evfd[1] = eventfd(0, EFD_CLOEXEC);
 	if (evfd[0] < 0 || evfd[1] < 0) {
 		perror("eventfd");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	ret = io_uring_register_eventfd(&ring, evfd[0]);
 	if (ret) {
 		fprintf(stderr, "failed to register evfd: %d\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
-	/* Check that registrering again will get -EBUSY */
+	/* Check that registering again will get -EBUSY */
 	ret = io_uring_register_eventfd(&ring, evfd[1]);
 	if (ret != -EBUSY) {
 		fprintf(stderr, "unexpected 2nd register: %d\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 	close(evfd[1]);
 
 	ret = io_uring_unregister_eventfd(&ring);
 	if (ret) {
 		fprintf(stderr, "unexpected unregister: %d\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	/* loop 100 registers/unregister */
@@ -61,16 +62,16 @@
 		ret = io_uring_register_eventfd(&ring, evfd[0]);
 		if (ret) {
 			fprintf(stderr, "failed to register evfd: %d\n", ret);
-			return 1;
+			return T_EXIT_FAIL;
 		}
 
 		ret = io_uring_unregister_eventfd(&ring);
 		if (ret) {
 			fprintf(stderr, "unexpected unregister: %d\n", ret);
-			return 1;
+			return T_EXIT_FAIL;
 		}
 	}
 
 	close(evfd[0]);
-	return 0;
+	return T_EXIT_PASS;
 }

diff --git a/test/eventfd-ring.c b/test/eventfd-ring.c
index d217312..b6bcf72 100644
--- a/test/eventfd-ring.c
+++ b/test/eventfd-ring.c

@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: MIT */
 /*
- * Description: run various nop tests
+ * Description: test use of eventfds with multiple rings
  *
  */
 #include <errno.h>
@@ -13,6 +13,7 @@
 #include <sys/eventfd.h>
 
 #include "liburing.h"
+#include "helpers.h"
 
 int main(int argc, char *argv[])
 {
@@ -22,45 +23,45 @@
 	int ret, evfd1, evfd2;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
 	ret = io_uring_queue_init_params(8, &ring1, &p);
 	if (ret) {
 		fprintf(stderr, "ring setup failed: %d\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 	if (!(p.features & IORING_FEAT_CUR_PERSONALITY)) {
 		fprintf(stdout, "Skipping\n");
-		return 0;
+		return T_EXIT_SKIP;
 	}
 	ret = io_uring_queue_init(8, &ring2, 0);
 	if (ret) {
 		fprintf(stderr, "ring setup failed: %d\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	evfd1 = eventfd(0, EFD_CLOEXEC);
 	if (evfd1 < 0) {
 		perror("eventfd");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	evfd2 = eventfd(0, EFD_CLOEXEC);
 	if (evfd2 < 0) {
 		perror("eventfd");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	ret = io_uring_register_eventfd(&ring1, evfd1);
 	if (ret) {
 		fprintf(stderr, "failed to register evfd: %d\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	ret = io_uring_register_eventfd(&ring2, evfd2);
 	if (ret) {
 		fprintf(stderr, "failed to register evfd: %d\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	sqe = io_uring_get_sqe(&ring1);
@@ -74,13 +75,13 @@
 	ret = io_uring_submit(&ring1);
 	if (ret != 1) {
 		fprintf(stderr, "submit: %d\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	ret = io_uring_submit(&ring2);
 	if (ret != 1) {
 		fprintf(stderr, "submit: %d\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	sqe = io_uring_get_sqe(&ring1);
@@ -90,8 +91,8 @@
 	ret = io_uring_submit(&ring1);
 	if (ret != 1) {
 		fprintf(stderr, "submit: %d\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
-	return 0;
+	return T_EXIT_PASS;
 }

diff --git a/test/eventfd.c b/test/eventfd.c
index 5c49ece..0ab15dd 100644
--- a/test/eventfd.c
+++ b/test/eventfd.c

@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: MIT */
 /*
- * Description: run various nop tests
+ * Description: run various eventfd tests
  *
  */
 #include <errno.h>
@@ -13,6 +13,7 @@
 #include <sys/eventfd.h>
 
 #include "liburing.h"
+#include "helpers.h"
 
 int main(int argc, char *argv[])
 {
@@ -28,28 +29,28 @@
 	int ret, evfd, i;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
 	ret = io_uring_queue_init_params(8, &ring, &p);
 	if (ret) {
 		fprintf(stderr, "ring setup failed: %d\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 	if (!(p.features & IORING_FEAT_CUR_PERSONALITY)) {
 		fprintf(stdout, "Skipping\n");
-		return 0;
+		return T_EXIT_SKIP;
 	}
 
 	evfd = eventfd(0, EFD_CLOEXEC);
 	if (evfd < 0) {
 		perror("eventfd");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	ret = io_uring_register_eventfd(&ring, evfd);
 	if (ret) {
 		fprintf(stderr, "failed to register evfd: %d\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	sqe = io_uring_get_sqe(&ring);
@@ -59,13 +60,12 @@
 
 	sqe = io_uring_get_sqe(&ring);
 	io_uring_prep_readv(sqe, evfd, &vec, 1, 0);
-	sqe->flags |= IOSQE_IO_LINK;
 	sqe->user_data = 2;
 
 	ret = io_uring_submit(&ring);
 	if (ret != 2) {
 		fprintf(stderr, "submit: %d\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	sqe = io_uring_get_sqe(&ring);
@@ -75,38 +75,38 @@
 	ret = io_uring_submit(&ring);
 	if (ret != 1) {
 		fprintf(stderr, "submit: %d\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	for (i = 0; i < 3; i++) {
 		ret = io_uring_wait_cqe(&ring, &cqe);
 		if (ret) {
 			fprintf(stderr, "wait: %d\n", ret);
-			return 1;
+			return T_EXIT_FAIL;
 		}
 		switch (cqe->user_data) {
 		case 1:
 			/* POLLIN */
 			if (cqe->res != 1) {
 				fprintf(stderr, "poll: %d\n", cqe->res);
-				return 1;
+				return T_EXIT_FAIL;
 			}
 			break;
 		case 2:
 			if (cqe->res != sizeof(ptr)) {
 				fprintf(stderr, "read: %d\n", cqe->res);
-				return 1;
+				return T_EXIT_FAIL;
 			}
 			break;
 		case 3:
 			if (cqe->res) {
 				fprintf(stderr, "nop: %d\n", cqe->res);
-				return 1;
+				return T_EXIT_FAIL;
 			}
 			break;
 		}
 		io_uring_cqe_seen(&ring, cqe);
 	}
 
-	return 0;
+	return T_EXIT_PASS;
 }

diff --git a/test/evloop.c b/test/evloop.c
new file mode 100644
index 0000000..399b615
--- /dev/null
+++ b/test/evloop.c

@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Test that we don't recursively generate completion events if an io_uring
+ * has an eventfd registered that triggers on completions, and we add a poll
+ * request with multishot on the eventfd. Older kernels will stop on overflow,
+ * newer kernels will detect this earlier and abort correctly.
+ */
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/eventfd.h>
+#include <sys/types.h>
+#include <poll.h>
+#include <assert.h>
+#include "liburing.h"
+#include "helpers.h"
+
+int main(int argc, char *argv[])
+{
+	struct io_uring ring;
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	int ret, efd, i;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	ret = io_uring_queue_init(8, &ring, 0);
+	if (ret) {
+		fprintf(stderr, "Ring init failed: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	efd = eventfd(0, 0);
+	if (efd < 0) {
+		perror("eventfd");
+		return T_EXIT_FAIL;
+	}
+
+	ret = io_uring_register_eventfd(&ring, efd);
+	if (ret) {
+		fprintf(stderr, "Ring eventfd register failed: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_poll_multishot(sqe, efd, POLLIN);
+	sqe->user_data = 1;
+	io_uring_submit(&ring);
+
+	sqe = io_uring_get_sqe(&ring);
+	sqe->user_data = 2;
+	io_uring_prep_nop(sqe);
+	io_uring_submit(&ring);
+
+	for (i = 0; i < 2; i++) {
+		ret = io_uring_wait_cqe(&ring, &cqe);
+		if (ret) {
+			fprintf(stderr, "wait_cqe ret = %d\n", ret);
+			break;
+		}
+		io_uring_cqe_seen(&ring, cqe);
+	}
+
+	ret = io_uring_peek_cqe(&ring, &cqe);
+	if (!ret) {
+		fprintf(stderr, "Generated too many events\n");
+		return T_EXIT_FAIL;
+	}
+
+	return T_EXIT_PASS;
+}

diff --git a/test/exit-no-cleanup.c b/test/exit-no-cleanup.c
index b3fd5a4..9389af8 100644
--- a/test/exit-no-cleanup.c
+++ b/test/exit-no-cleanup.c

@@ -19,6 +19,11 @@
 #include "liburing.h"
 #include "helpers.h"
 
+// on fast enough machines with enough cores, the first few threads will post
+// enough sem's to cause the main thread to exit while some threads are half way
+// initialization. This causes a null deference somewhere in thread cleanup,
+// which trips ASAN.
+#ifndef CONFIG_USE_SANITIZER
 #define IORING_ENTRIES 8
 
 static pthread_t *threads;
@@ -26,7 +31,7 @@
 static int sleep_fd, notify_fd;
 static sem_t sem;
 
-void *thread_func(void *arg)
+static void *thread_func(void *arg)
 {
 	struct io_uring ring;
 	int res;
@@ -74,7 +79,7 @@
 	const uint64_t n = 0x42;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
 	cpus = get_nprocs();
 	res = pthread_barrier_init(&init_barrier, NULL, cpus);
@@ -115,3 +120,9 @@
 	// Exit without resource cleanup
 	exit(EXIT_SUCCESS);
 }
+#else
+int main(int argc, char *argv[])
+{
+	return T_EXIT_SKIP;
+}
+#endif

diff --git a/test/fadvise.c b/test/fadvise.c
index 278a045..7a1b611 100644
--- a/test/fadvise.c
+++ b/test/fadvise.c

@@ -18,30 +18,6 @@
 #define LOOPS		100
 #define MIN_LOOPS	10
 
-static unsigned long long utime_since(const struct timeval *s,
-				      const struct timeval *e)
-{
-	long long sec, usec;
-
-	sec = e->tv_sec - s->tv_sec;
-	usec = (e->tv_usec - s->tv_usec);
-	if (sec > 0 && usec < 0) {
-		sec--;
-		usec += 1000000;
-	}
-
-	sec *= 1000000;
-	return sec + usec;
-}
-
-static unsigned long long utime_since_now(struct timeval *tv)
-{
-	struct timeval end;
-
-	gettimeofday(&end, NULL);
-	return utime_since(tv, &end);
-}
-
 static int do_fadvise(struct io_uring *ring, int fd, off_t offset, off_t len,
 		      int advice)
 {
@@ -73,7 +49,7 @@
 	if (ret == -EINVAL || ret == -EBADF) {
 		fprintf(stdout, "Fadvise not supported, skipping\n");
 		unlink(".fadvise.tmp");
-		exit(0);
+		exit(T_EXIT_SKIP);
 	} else if (ret) {
 		fprintf(stderr, "cqe->res=%d\n", cqe->res);
 	}
@@ -92,7 +68,7 @@
 		perror("lseek");
 		return -1;
 	}
-	
+
 	gettimeofday(&tv, NULL);
 	ret = read(fd, buf, FILE_SIZE);
 	t = utime_since_now(&tv);
@@ -115,6 +91,8 @@
 
 	fd = open(filename, O_RDONLY);
 	if (fd < 0) {
+		if (errno == EPERM || errno == EACCES)
+			return T_EXIT_SKIP;
 		perror("open");
 		return 1;
 	}
@@ -148,9 +126,12 @@
 		return 1;
 
 	if (cached_read < uncached_read &&
-	    cached_read2 < uncached_read)
+		cached_read2 < uncached_read) {
+		free(buf);
 		return 0;
+	}
 
+	free(buf);
 	return 2;
 }
 
@@ -174,6 +155,8 @@
 	good = bad = 0;
 	for (i = 0; i < LOOPS; i++) {
 		ret = test_fadvise(&ring, fname);
+		if (ret == T_EXIT_SKIP)
+			return T_EXIT_SKIP;
 		if (ret == 1) {
 			fprintf(stderr, "read_fadvise failed\n");
 			goto err;
@@ -186,7 +169,7 @@
 	}
 
 	/* too hard to reliably test, just ignore */
-	if (0 && bad > good) {
+	if ((0) && bad > good) {
 		fprintf(stderr, "Suspicious timings\n");
 		goto err;
 	}
@@ -194,9 +177,9 @@
 	if (fname != argv[1])
 		unlink(fname);
 	io_uring_queue_exit(&ring);
-	return 0;
+	return T_EXIT_PASS;
 err:
 	if (fname != argv[1])
 		unlink(fname);
-	return 1;
+	return T_EXIT_FAIL;
 }

diff --git a/test/fallocate.c b/test/fallocate.c
index ddb53a6..735cc5b 100644
--- a/test/fallocate.c
+++ b/test/fallocate.c

@@ -12,8 +12,10 @@
 #include <stdlib.h>
 #include <string.h>
 #include <fcntl.h>
+#include <signal.h>
 
 #include "liburing.h"
+#include "helpers.h"
 
 static int no_fallocate;
 
@@ -66,14 +68,15 @@
 	if (cqe->res == -EINVAL) {
 		fprintf(stdout, "Fallocate not supported, skipping\n");
 		no_fallocate = 1;
-		goto out;
+		goto skip;
 	} else if (cqe->res != -EFBIG) {
 		fprintf(stderr, "Expected -EFBIG: %d\n", cqe->res);
 		goto err;
 	}
 	io_uring_cqe_seen(ring, cqe);
-out:
 	return 0;
+skip:
+	return T_EXIT_SKIP;
 err:
 	return 1;
 }
@@ -116,7 +119,7 @@
 	if (cqe->res == -EINVAL) {
 		fprintf(stdout, "Fallocate not supported, skipping\n");
 		no_fallocate = 1;
-		goto out;
+		goto skip;
 	}
 	if (cqe->res) {
 		fprintf(stderr, "cqe->res=%d\n", cqe->res);
@@ -135,8 +138,9 @@
 		goto err;
 	}
 
-out:
 	return 0;
+skip:
+	return T_EXIT_SKIP;
 err:
 	return 1;
 }
@@ -213,23 +217,33 @@
 	return 1;
 }
 
+static void sig_xfsz(int sig)
+{
+}
+
 int main(int argc, char *argv[])
 {
+	struct sigaction act = { };
 	struct io_uring ring;
 	int ret;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
+
+	act.sa_handler = sig_xfsz;
+	sigaction(SIGXFSZ, &act, NULL);
 
 	ret = io_uring_queue_init(8, &ring, 0);
 	if (ret) {
 		fprintf(stderr, "ring setup failed\n");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	ret = test_fallocate(&ring);
 	if (ret) {
-		fprintf(stderr, "test_fallocate failed\n");
+		if (ret != T_EXIT_SKIP) {
+			fprintf(stderr, "test_fallocate failed\n");
+		}
 		return ret;
 	}
 
@@ -241,9 +255,11 @@
 
 	ret = test_fallocate_rlimit(&ring);
 	if (ret) {
-		fprintf(stderr, "test_fallocate_rlimit failed\n");
+		if (ret != T_EXIT_SKIP) {
+			fprintf(stderr, "test_fallocate_rlimit failed\n");
+		}
 		return ret;
 	}
 
-	return 0;
+	return T_EXIT_PASS;
 }

diff --git a/test/fc2a85cb02ef.c b/test/fc2a85cb02ef.c
index bdc3c48..9834bd6 100644
--- a/test/fc2a85cb02ef.c
+++ b/test/fc2a85cb02ef.c

@@ -16,6 +16,7 @@
 #include <unistd.h>
 
 #include "liburing.h"
+#include "helpers.h"
 #include "../src/syscall.h"
 
 static bool write_file(const char* file, const char* what, ...)
@@ -53,7 +54,7 @@
   return fd;
 }
 
-static int setup_fault()
+static int setup_fault(void)
 {
   static struct {
     const char* file;
@@ -78,16 +79,16 @@
   return 0;
 }
 
-uint64_t r[2] = {0xffffffffffffffff, 0xffffffffffffffff};
+static uint64_t r[2] = {0xffffffffffffffff, 0xffffffffffffffff};
 
 int main(int argc, char *argv[])
 {
   if (argc > 1)
-    return 0;
-  mmap((void *) 0x20000000ul, 0x1000000ul, 3ul, 0x32ul, -1, 0);
+    return T_EXIT_SKIP;
+  mmap((void *) 0x20000000ul, 0x1000000ul, 3ul, MAP_ANON|MAP_PRIVATE, -1, 0);
   if (setup_fault()) {
     printf("Test needs failslab/fail_futex/fail_page_alloc enabled, skipped\n");
-    return 0;
+    return T_EXIT_SKIP;
   }
   intptr_t res = 0;
   *(uint32_t*)0x20000000 = 0;
@@ -127,5 +128,5 @@
   *(uint32_t*)0x20000080 = r[1];
   inject_fault(1);
   __sys_io_uring_register(r[0], 2ul, (const void *) 0x20000080ul, 1ul);
-  return 0;
+  return T_EXIT_PASS;
 }

diff --git a/test/fd-install.c b/test/fd-install.c
new file mode 100644
index 0000000..e2462a1
--- /dev/null
+++ b/test/fd-install.c

@@ -0,0 +1,500 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: test installing a direct descriptor into the regular
+ *		file table
+ *
+ */
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+static int no_fd_install;
+
+/* test that O_CLOEXEC is accepted, and others are not */
+static int test_flags(struct io_uring *ring, int async)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	int ret, fds[2], fd;
+
+	if (pipe(fds) < 0) {
+		perror("pipe");
+		return T_EXIT_FAIL;
+	}
+
+	ret = io_uring_register_files(ring, &fds[0], 1);
+	if (ret) {
+		fprintf(stderr, "failed register files %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	/* check that setting an invalid flag fails */
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_fixed_fd_install(sqe, 0, 1U << 17);
+	io_uring_submit(ring);
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret) {
+		fprintf(stderr, "wait cqe %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+	if (cqe->res != -EINVAL) {
+		fprintf(stderr, "unexpected cqe res %d\n", cqe->res);
+		return T_EXIT_FAIL;
+	}
+	io_uring_cqe_seen(ring, cqe);
+
+	/* check that IORING_FIXED_FD_NO_CLOEXEC is accepted */
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_fixed_fd_install(sqe, 0, IORING_FIXED_FD_NO_CLOEXEC);
+	if (async)
+		sqe->flags |= IOSQE_ASYNC;
+	io_uring_submit(ring);
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret) {
+		fprintf(stderr, "wait cqe %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+	if (cqe->res < 0) {
+		fprintf(stderr, "unexpected cqe res %d\n", cqe->res);
+		return T_EXIT_FAIL;
+	}
+	fd = cqe->res;
+	io_uring_cqe_seen(ring, cqe);
+
+	close(fds[0]);
+	close(fds[1]);
+	close(fd);
+	io_uring_unregister_files(ring);
+	
+	return T_EXIT_PASS;
+}
+
+static int test_linked(struct io_uring *ring)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	int ret, fds[2], fd, i;
+
+	if (pipe(fds) < 0) {
+		perror("pipe");
+		return T_EXIT_FAIL;
+	}
+
+	ret = io_uring_register_files(ring, &fds[0], 1);
+	if (ret) {
+		fprintf(stderr, "failed register files %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_nop(sqe);
+	sqe->flags |= IOSQE_IO_LINK;
+	sqe->user_data = 1;
+
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_fixed_fd_install(sqe, 0, 0);
+	sqe->user_data = 2;
+
+	ret = io_uring_submit(ring);
+	if (ret != 2) {
+		fprintf(stderr, "submit: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	fd = -1;
+	for (i = 0; i < 2; i++) {
+		ret = io_uring_wait_cqe(ring, &cqe);
+		if (ret) {
+			fprintf(stderr, "wait cqe %d\n", ret);
+			return T_EXIT_FAIL;
+		}
+		if (cqe->res < 0) {
+			fprintf(stderr, "unexpected cqe res %d\n", cqe->res);
+			return T_EXIT_FAIL;
+		}
+		if (cqe->user_data == 2)
+			fd = cqe->res;
+		io_uring_cqe_seen(ring, cqe);
+	}
+
+	close(fds[0]);
+	close(fds[1]);
+	if (fd != -1)
+		close(fd);
+	io_uring_unregister_files(ring);
+	return T_EXIT_PASS;
+}
+
+/* test not setting IOSQE_FIXED_FILE */
+static int test_not_fixed(struct io_uring *ring)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	int ret, fds[2];
+
+	if (pipe(fds) < 0) {
+		perror("pipe");
+		return T_EXIT_FAIL;
+	}
+
+	ret = io_uring_register_files(ring, &fds[0], 1);
+	if (ret) {
+		fprintf(stderr, "failed register files %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_fixed_fd_install(sqe, 0, 0);
+	sqe->flags &= ~IOSQE_FIXED_FILE;
+	io_uring_submit(ring);
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret) {
+		fprintf(stderr, "wait cqe %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+	if (cqe->res != -EBADF) {
+		fprintf(stderr, "unexpected cqe res %d\n", cqe->res);
+		return T_EXIT_FAIL;
+	}
+
+	io_uring_cqe_seen(ring, cqe);
+
+	close(fds[0]);
+	close(fds[1]);
+	io_uring_unregister_files(ring);
+	
+	return T_EXIT_PASS;
+}
+
+/* test invalid direct descriptor indexes */
+static int test_bad_fd(struct io_uring *ring, int some_fd)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	int ret;
+
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_fixed_fd_install(sqe, some_fd, 0);
+	io_uring_submit(ring);
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret) {
+		fprintf(stderr, "wait cqe %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+	if (cqe->res != -EBADF) {
+		fprintf(stderr, "unexpected cqe res %d\n", cqe->res);
+		return T_EXIT_FAIL;
+	}
+
+	io_uring_cqe_seen(ring, cqe);
+	return T_EXIT_PASS;
+}
+
+/* test basic functionality of shifting a direct descriptor to a normal file */
+static int test_working(struct io_uring *ring)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	int ret, fds[2];
+	char buf[32];
+
+	if (pipe(fds) < 0) {
+		perror("pipe");
+		return T_EXIT_FAIL;
+	}
+
+	/* register read side */
+	ret = io_uring_register_files(ring, &fds[0], 1);
+	if (ret) {
+		fprintf(stderr, "failed register files %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	/* close normal descriptor */
+	close(fds[0]);
+
+	/* normal read should fail */
+	ret = read(fds[0], buf, 1);
+	if (ret != -1) {
+		fprintf(stderr, "unexpected read ret %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+	if (errno != EBADF) {
+		fprintf(stderr, "unexpected read failure %d\n", errno);
+		return T_EXIT_FAIL;
+	}
+
+	/* verify we can read the data */
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_read(sqe, 0, buf, sizeof(buf), 0);
+	sqe->flags |= IOSQE_FIXED_FILE;
+	io_uring_submit(ring);
+
+	/* put some data in the pipe */
+	ret = write(fds[1], "Hello", 5);
+	if (ret < 0) {
+		perror("write");
+		return T_EXIT_FAIL;
+	} else if (ret != 5) {
+		fprintf(stderr, "short write %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret) {
+		fprintf(stderr, "wait cqe %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+	if (cqe->res != 5) {
+		fprintf(stderr, "weird pipe read ret %d\n", cqe->res);
+		return T_EXIT_FAIL;
+	}
+	io_uring_cqe_seen(ring, cqe);
+
+	/* fixed pipe read worked, now re-install as a regular fd */
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_fixed_fd_install(sqe, 0, 0);
+	io_uring_submit(ring);
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret) {
+		fprintf(stderr, "wait cqe %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+	if (cqe->res == -EINVAL) {
+		no_fd_install = 1;
+		return T_EXIT_SKIP;
+	}
+	if (cqe->res < 0) {
+		fprintf(stderr, "failed install fd: %d\n", cqe->res);
+		return T_EXIT_FAIL;
+	}
+	/* stash new pipe read side fd in old spot */
+	fds[0] = cqe->res;
+	io_uring_cqe_seen(ring, cqe);
+
+	ret = write(fds[1], "Hello", 5);
+	if (ret < 0) {
+		perror("write");
+		return T_EXIT_FAIL;
+	} else if (ret != 5) {
+		fprintf(stderr, "short write %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	/* normal pipe read should now work with new fd */
+	ret = read(fds[0], buf, sizeof(buf));
+	if (ret != 5) {
+		fprintf(stderr, "unexpected read ret %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	/* close fixed file */
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_close_direct(sqe, 0);
+	io_uring_submit(ring);
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret) {
+		fprintf(stderr, "wait cqe %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+	if (cqe->res) {
+		fprintf(stderr, "close fixed fd %d\n", cqe->res);
+		return T_EXIT_FAIL;
+	}
+	io_uring_cqe_seen(ring, cqe);
+
+	ret = write(fds[1], "Hello", 5);
+	if (ret < 0) {
+		perror("write");
+		return T_EXIT_FAIL;
+	} else if (ret != 5) {
+		fprintf(stderr, "short write %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	/* normal pipe read should still work with new fd */
+	ret = read(fds[0], buf, sizeof(buf));
+	if (ret != 5) {
+		fprintf(stderr, "unexpected read ret %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	/* fixed fd pipe read should now fail */
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_read(sqe, 0, buf, sizeof(buf), 0);
+	sqe->flags = IOSQE_FIXED_FILE;
+	io_uring_submit(ring);
+
+	/* put some data in the pipe */
+	ret = write(fds[1], "Hello", 5);
+	if (ret < 0) {
+		perror("write");
+		return T_EXIT_FAIL;
+	} else if (ret != 5) {
+		fprintf(stderr, "short write %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret) {
+		fprintf(stderr, "wait cqe %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+	if (cqe->res != -EBADF) {
+		fprintf(stderr, "weird pipe read ret %d\n", cqe->res);
+		return T_EXIT_FAIL;
+	}
+	io_uring_cqe_seen(ring, cqe);
+
+	close(fds[0]);
+	close(fds[1]);
+	io_uring_unregister_files(ring);
+	return T_EXIT_PASS;
+}
+
+static int test_creds(struct io_uring *ring, int async)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	int cred_id, ret, fds[2];
+
+	if (pipe(fds) < 0) {
+		perror("pipe");
+		return T_EXIT_FAIL;
+	}
+
+	ret = io_uring_register_files(ring, &fds[0], 1);
+	if (ret) {
+		fprintf(stderr, "failed register files %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	cred_id = io_uring_register_personality(ring);
+	if (cred_id < 0) {
+		fprintf(stderr, "Failed registering creds: %d\n", cred_id);
+		return T_EXIT_FAIL;
+	}
+
+	/* check that asking for creds fails */
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_fixed_fd_install(sqe, 0, 0);
+	if (async)
+		sqe->flags |= IOSQE_ASYNC;
+	sqe->personality = cred_id;
+	io_uring_submit(ring);
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret) {
+		fprintf(stderr, "wait cqe %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+	if (cqe->res > 0) {
+		fprintf(stderr, "install succeeded with creds\n");
+		return T_EXIT_FAIL;
+	}
+	if (cqe->res != -EPERM) {
+		fprintf(stderr, "unexpected cqe res %d\n", cqe->res);
+		return T_EXIT_FAIL;
+	}
+	io_uring_cqe_seen(ring, cqe);
+
+	close(fds[0]);
+	close(fds[1]);
+	io_uring_unregister_files(ring);
+	io_uring_unregister_personality(ring, cred_id);
+	return T_EXIT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	struct io_uring ring;
+	int ret;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	ret = io_uring_queue_init(4, &ring, 0);
+	if (ret) {
+		fprintf(stderr, "ring setup failed: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_working(&ring);
+	if (ret != T_EXIT_PASS) {
+		if (ret == T_EXIT_FAIL)
+			fprintf(stderr, "test_working failed\n");
+		return ret;
+	}
+	if (no_fd_install)
+		return T_EXIT_SKIP;
+
+	ret = test_bad_fd(&ring, 0);
+	if (ret != T_EXIT_PASS) {
+		if (ret == T_EXIT_FAIL)
+			fprintf(stderr, "test_bad_fd 0 failed\n");
+		return ret;
+	}
+
+	ret = test_bad_fd(&ring, 500);
+	if (ret != T_EXIT_PASS) {
+		if (ret == T_EXIT_FAIL)
+			fprintf(stderr, "test_bad_fd 500 failed\n");
+		return ret;
+	}
+	
+	ret = test_not_fixed(&ring);
+	if (ret != T_EXIT_PASS) {
+		if (ret == T_EXIT_FAIL)
+			fprintf(stderr, "test_not_fixed failed\n");
+		return ret;
+	}
+
+	ret = test_flags(&ring, 0);
+	if (ret != T_EXIT_PASS) {
+		if (ret == T_EXIT_FAIL)
+			fprintf(stderr, "test_flags 0 failed\n");
+		return ret;
+	}
+
+	ret = test_flags(&ring, 1);
+	if (ret != T_EXIT_PASS) {
+		if (ret == T_EXIT_FAIL)
+			fprintf(stderr, "test_flags 1 failed\n");
+		return ret;
+	}
+
+	ret = test_creds(&ring, 0);
+	if (ret != T_EXIT_PASS) {
+		if (ret == T_EXIT_FAIL)
+			fprintf(stderr, "test_creds 0 failed\n");
+		return ret;
+	}
+
+	ret = test_creds(&ring, 1);
+	if (ret != T_EXIT_PASS) {
+		if (ret == T_EXIT_FAIL)
+			fprintf(stderr, "test_creds 1 failed\n");
+		return ret;
+	}
+
+	ret = test_linked(&ring);
+	if (ret != T_EXIT_PASS) {
+		if (ret == T_EXIT_FAIL)
+			fprintf(stderr, "test_linked failed\n");
+		return ret;
+	}
+	
+	return T_EXIT_PASS;
+}

diff --git a/test/fd-pass.c b/test/fd-pass.c
new file mode 100644
index 0000000..0c981fb
--- /dev/null
+++ b/test/fd-pass.c

@@ -0,0 +1,237 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: run various fixed file fd passing tests
+ *
+ */
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+#define FSIZE		128
+#define PAT		0x9a
+#define USER_DATA	0x89
+
+static int no_fd_pass;
+
+static int verify_fixed_read(struct io_uring *ring, int fixed_fd, int fail)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	unsigned char buf[FSIZE];
+	int i;
+	
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_read(sqe, fixed_fd, buf, FSIZE, 0);
+	sqe->flags |= IOSQE_FIXED_FILE;
+	io_uring_submit(ring);
+
+	io_uring_wait_cqe(ring, &cqe);
+	if (cqe->res != FSIZE) {
+		if (fail && cqe->res == -EBADF)
+			return 0;
+		fprintf(stderr, "Read: %d\n", cqe->res);
+		return 1;
+	}
+	io_uring_cqe_seen(ring, cqe);
+
+	for (i = 0; i < FSIZE; i++) {
+		if (buf[i] != PAT) {
+			fprintf(stderr, "got %x, wanted %x\n", buf[i], PAT);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+static int test(const char *filename, int source_fd, int target_fd)
+{
+	struct io_uring sring, dring;
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	int ret;
+
+	ret = io_uring_queue_init(8, &sring, 0);
+	if (ret) {
+		fprintf(stderr, "ring setup failed: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+	ret = io_uring_queue_init(8, &dring, 0);
+	if (ret) {
+		fprintf(stderr, "ring setup failed: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	ret = io_uring_register_files_sparse(&sring, 8);
+	if (ret) {
+		if (ret == -EINVAL)
+			return T_EXIT_SKIP;
+		fprintf(stderr, "register files failed %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+	ret = io_uring_register_files_sparse(&dring, 8);
+	if (ret) {
+		fprintf(stderr, "register files failed %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+	if (target_fd == IORING_FILE_INDEX_ALLOC) {
+		/* we want to test installing into a non-zero slot */
+		ret = io_uring_register_file_alloc_range(&dring, 1, 1);
+		if (ret) {
+			fprintf(stderr, "io_uring_register_file_alloc_range %d\n", ret);
+			return T_EXIT_FAIL;
+		}
+	}
+
+	/* open direct descriptor */
+	sqe = io_uring_get_sqe(&sring);
+	io_uring_prep_openat_direct(sqe, AT_FDCWD, filename, 0, 0644, source_fd);
+	io_uring_submit(&sring);
+	ret = io_uring_wait_cqe(&sring, &cqe);
+	if (ret) {
+		fprintf(stderr, "wait cqe failed %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+	if (cqe->res) {
+		fprintf(stderr, "cqe res %d\n", cqe->res);
+		return T_EXIT_FAIL;
+	}
+	io_uring_cqe_seen(&sring, cqe);
+
+	/* verify data is sane for source ring */
+	if (verify_fixed_read(&sring, source_fd, 0))
+		return T_EXIT_FAIL;
+
+	/* send direct descriptor to destination ring */
+	sqe = io_uring_get_sqe(&sring);
+	if (target_fd == IORING_FILE_INDEX_ALLOC) {
+		io_uring_prep_msg_ring_fd_alloc(sqe, dring.ring_fd, source_fd,
+						USER_DATA, 0);
+	} else {
+
+		io_uring_prep_msg_ring_fd(sqe, dring.ring_fd, source_fd,
+					  target_fd, USER_DATA, 0);
+	}
+	io_uring_submit(&sring);
+
+	ret = io_uring_wait_cqe(&sring, &cqe);
+	if (ret) {
+		fprintf(stderr, "wait cqe failed %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+	if (cqe->res < 0) {
+		if (cqe->res == -EINVAL && !no_fd_pass) {
+			no_fd_pass = 1;
+			return T_EXIT_SKIP;
+		}
+		fprintf(stderr, "msg_ring failed %d\n", cqe->res);
+		return T_EXIT_FAIL;
+	}
+	io_uring_cqe_seen(&sring, cqe);
+
+	/* get posted completion for the passing */
+	ret = io_uring_wait_cqe(&dring, &cqe);
+	if (ret) {
+		fprintf(stderr, "wait cqe failed %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+	if (cqe->user_data != USER_DATA) {
+		fprintf(stderr, "bad user_data %ld\n", (long) cqe->res);
+		return T_EXIT_FAIL;
+	}
+	if (cqe->res < 0) {
+		fprintf(stderr, "bad result %i\n", cqe->res);
+		return T_EXIT_FAIL;
+	}
+	if (target_fd == IORING_FILE_INDEX_ALLOC) {
+		if (cqe->res != 1) {
+			fprintf(stderr, "invalid allocated index %i\n", cqe->res);
+			return T_EXIT_FAIL;
+		}
+		target_fd = cqe->res;
+	}
+	io_uring_cqe_seen(&dring, cqe);
+
+	/* now verify we can read the sane data from the destination ring */
+	if (verify_fixed_read(&dring, target_fd, 0))
+		return T_EXIT_FAIL;
+
+	/* close descriptor in source ring */
+	sqe = io_uring_get_sqe(&sring);
+	io_uring_prep_close_direct(sqe, source_fd);
+	io_uring_submit(&sring);
+
+	ret = io_uring_wait_cqe(&sring, &cqe);
+	if (ret) {
+		fprintf(stderr, "wait cqe failed %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+	if (cqe->res) {
+		fprintf(stderr, "direct close failed %d\n", cqe->res);
+		return T_EXIT_FAIL;
+	}
+	io_uring_cqe_seen(&sring, cqe);
+
+	/* check that source ring fails after close */
+	if (verify_fixed_read(&sring, source_fd, 1))
+		return T_EXIT_FAIL;
+
+	/* check we can still read from destination ring */
+	if (verify_fixed_read(&dring, target_fd, 0))
+		return T_EXIT_FAIL;
+
+	io_uring_queue_exit(&sring);
+	io_uring_queue_exit(&dring);
+	return T_EXIT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	char fname[80];
+	int ret;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	sprintf(fname, ".fd-pass.%d", getpid());
+	t_create_file_pattern(fname, FSIZE, PAT);
+
+	ret = test(fname, 0, 1);
+	if (ret == T_EXIT_FAIL) {
+		fprintf(stderr, "test failed 0 1\n");
+		ret = T_EXIT_FAIL;
+	}
+
+	ret = test(fname, 0, 2);
+	if (ret == T_EXIT_FAIL) {
+		fprintf(stderr, "test failed 0 2\n");
+		ret = T_EXIT_FAIL;
+	}
+
+	ret = test(fname, 1, 1);
+	if (ret == T_EXIT_FAIL) {
+		fprintf(stderr, "test failed 1 1\n");
+		ret = T_EXIT_FAIL;
+	}
+
+	ret = test(fname, 1, 0);
+	if (ret == T_EXIT_FAIL) {
+		fprintf(stderr, "test failed 1 0\n");
+		ret = T_EXIT_FAIL;
+	}
+
+	ret = test(fname, 1, IORING_FILE_INDEX_ALLOC);
+	if (ret == T_EXIT_FAIL) {
+		fprintf(stderr, "test failed 1 ALLOC\n");
+		ret = T_EXIT_FAIL;
+	}
+
+	unlink(fname);
+	return ret;
+}

diff --git a/test/fdinfo.c b/test/fdinfo.c
new file mode 100644
index 0000000..f9ef3c4
--- /dev/null
+++ b/test/fdinfo.c

@@ -0,0 +1,427 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: basic read/write tests with buffered, O_DIRECT, and SQPOLL
+ */
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <sys/resource.h>
+
+#include "helpers.h"
+#include "liburing.h"
+
+#define FILE_SIZE	(256 * 1024)
+#define BS		8192
+#define BUFFERS		(FILE_SIZE / BS)
+
+static struct iovec *vecs;
+static int no_read;
+static int warned;
+
+static void fdinfo_read(struct io_uring *ring)
+{
+	char fd_name[128];
+	char *buf;
+	int fd;
+
+	buf = malloc(4096);
+
+	sprintf(fd_name, "/proc/self/fdinfo/%d", ring->ring_fd);
+	fd = open(fd_name, O_RDONLY);
+	if (fd < 0) {
+		perror("open");
+		return;
+	}
+
+	do {
+		int ret = read(fd, buf, 4096);
+
+		if (ret < 0) {
+			perror("fdinfo read");
+			break;
+		} else if (ret == 4096) {
+			continue;
+		}
+		break;
+	} while (1);
+
+	close(fd);
+	free(buf);
+}
+
+static int __test_io(const char *file, struct io_uring *ring, int write,
+		     int buffered, int sqthread, int fixed, int nonvec,
+		     int buf_select, int seq, int exp_len)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	int open_flags;
+	int i, fd = -1, ret;
+	off_t offset;
+
+#ifdef VERBOSE
+	fprintf(stdout, "%s: start %d/%d/%d/%d/%d: ", __FUNCTION__, write,
+							buffered, sqthread,
+							fixed, nonvec);
+#endif
+	if (write)
+		open_flags = O_WRONLY;
+	else
+		open_flags = O_RDONLY;
+	if (!buffered)
+		open_flags |= O_DIRECT;
+
+	if (fixed) {
+		ret = t_register_buffers(ring, vecs, BUFFERS);
+		if (ret == T_SETUP_SKIP)
+			return T_EXIT_SKIP;
+		if (ret != T_SETUP_OK) {
+			fprintf(stderr, "buffer reg failed: %d\n", ret);
+			goto err;
+		}
+	}
+
+	fd = open(file, open_flags);
+	if (fd < 0) {
+		if (errno == EINVAL)
+			return 0;
+		if (errno == EPERM || errno == EACCES)
+			return T_EXIT_SKIP;
+		perror("file open");
+		goto err;
+	}
+
+	if (sqthread) {
+		ret = io_uring_register_files(ring, &fd, 1);
+		if (ret) {
+			fprintf(stderr, "file reg failed: %d\n", ret);
+			goto err;
+		}
+	}
+
+	offset = 0;
+	for (i = 0; i < BUFFERS; i++) {
+		sqe = io_uring_get_sqe(ring);
+		if (!sqe) {
+			fprintf(stderr, "sqe get failed\n");
+			goto err;
+		}
+		if (!seq)
+			offset = BS * (rand() % BUFFERS);
+		if (write) {
+			int do_fixed = fixed;
+			int use_fd = fd;
+
+			if (sqthread)
+				use_fd = 0;
+			if (fixed && (i & 1))
+				do_fixed = 0;
+			if (do_fixed) {
+				io_uring_prep_write_fixed(sqe, use_fd, vecs[i].iov_base,
+								vecs[i].iov_len,
+								offset, i);
+			} else if (nonvec) {
+				io_uring_prep_write(sqe, use_fd, vecs[i].iov_base,
+							vecs[i].iov_len, offset);
+			} else {
+				io_uring_prep_writev(sqe, use_fd, &vecs[i], 1,
+								offset);
+			}
+		} else {
+			int do_fixed = fixed;
+			int use_fd = fd;
+
+			if (sqthread)
+				use_fd = 0;
+			if (fixed && (i & 1))
+				do_fixed = 0;
+			if (do_fixed) {
+				io_uring_prep_read_fixed(sqe, use_fd, vecs[i].iov_base,
+								vecs[i].iov_len,
+								offset, i);
+			} else if (nonvec) {
+				io_uring_prep_read(sqe, use_fd, vecs[i].iov_base,
+							vecs[i].iov_len, offset);
+			} else {
+				io_uring_prep_readv(sqe, use_fd, &vecs[i], 1,
+								offset);
+			}
+
+		}
+		sqe->user_data = i;
+		if (sqthread)
+			sqe->flags |= IOSQE_FIXED_FILE;
+		if (buf_select) {
+			if (nonvec)
+				sqe->addr = 0;
+			sqe->flags |= IOSQE_BUFFER_SELECT;
+			sqe->buf_group = buf_select;
+		}
+		if (seq)
+			offset += BS;
+	}
+
+	fdinfo_read(ring);
+
+	ret = io_uring_submit(ring);
+	if (ret != BUFFERS) {
+		fprintf(stderr, "submit got %d, wanted %d\n", ret, BUFFERS);
+		goto err;
+	}
+
+	for (i = 0; i < 10; i++) {
+		fdinfo_read(ring);
+		usleep(2);
+	}
+
+	for (i = 0; i < BUFFERS; i++) {
+		ret = io_uring_wait_cqe(ring, &cqe);
+		if (ret) {
+			fprintf(stderr, "wait_cqe=%d\n", ret);
+			goto err;
+		}
+		if (cqe->res == -EINVAL && nonvec) {
+			if (!warned) {
+				fprintf(stdout, "Non-vectored IO not "
+					"supported, skipping\n");
+				warned = 1;
+				no_read = 1;
+			}
+		} else if (exp_len == -1) {
+			int iov_len = vecs[cqe->user_data].iov_len;
+
+			if (cqe->res != iov_len) {
+				fprintf(stderr, "cqe res %d, wanted %d\n",
+					cqe->res, iov_len);
+				goto err;
+			}
+		} else if (cqe->res != exp_len) {
+			fprintf(stderr, "cqe res %d, wanted %d\n", cqe->res, exp_len);
+			goto err;
+		}
+		if (buf_select && exp_len == BS) {
+			int bid = cqe->flags >> 16;
+			unsigned char *ptr = vecs[bid].iov_base;
+			int j;
+
+			for (j = 0; j < BS; j++) {
+				if (ptr[j] == cqe->user_data)
+					continue;
+
+				fprintf(stderr, "Data mismatch! bid=%d, "
+						"wanted=%d, got=%d\n", bid,
+						(int)cqe->user_data, ptr[j]);
+				return 1;
+			}
+		}
+		io_uring_cqe_seen(ring, cqe);
+	}
+
+	if (fixed) {
+		ret = io_uring_unregister_buffers(ring);
+		if (ret) {
+			fprintf(stderr, "buffer unreg failed: %d\n", ret);
+			goto err;
+		}
+	}
+	if (sqthread) {
+		ret = io_uring_unregister_files(ring);
+		if (ret) {
+			fprintf(stderr, "file unreg failed: %d\n", ret);
+			goto err;
+		}
+	}
+
+	close(fd);
+#ifdef VERBOSE
+	fprintf(stdout, "PASS\n");
+#endif
+	return 0;
+err:
+#ifdef VERBOSE
+	fprintf(stderr, "FAILED\n");
+#endif
+	if (fd != -1)
+		close(fd);
+	return 1;
+}
+static int test_io(const char *file, int write, int buffered, int sqthread,
+		   int fixed, int nonvec, int exp_len)
+{
+	struct io_uring ring;
+	int ret, ring_flags = 0;
+
+	if (sqthread)
+		ring_flags = IORING_SETUP_SQPOLL;
+
+	ret = t_create_ring(64, &ring, ring_flags);
+	if (ret == T_SETUP_SKIP)
+		return 0;
+	if (ret != T_SETUP_OK) {
+		fprintf(stderr, "ring create failed: %d\n", ret);
+		return 1;
+	}
+
+	ret = __test_io(file, &ring, write, buffered, sqthread, fixed, nonvec,
+			0, 0, exp_len);
+	io_uring_queue_exit(&ring);
+	return ret;
+}
+
+static int has_nonvec_read(void)
+{
+	struct io_uring_probe *p;
+	struct io_uring ring;
+	int ret;
+
+	ret = io_uring_queue_init(1, &ring, 0);
+	if (ret) {
+		fprintf(stderr, "queue init failed: %d\n", ret);
+		exit(ret);
+	}
+
+	p = t_calloc(1, sizeof(*p) + 256 * sizeof(struct io_uring_probe_op));
+	ret = io_uring_register_probe(&ring, p, 256);
+	/* if we don't have PROBE_REGISTER, we don't have OP_READ/WRITE */
+	if (ret == -EINVAL) {
+out:
+		io_uring_queue_exit(&ring);
+		free(p);
+		return 0;
+	} else if (ret) {
+		fprintf(stderr, "register_probe: %d\n", ret);
+		goto out;
+	}
+
+	if (p->ops_len <= IORING_OP_READ)
+		goto out;
+	if (!(p->ops[IORING_OP_READ].flags & IO_URING_OP_SUPPORTED))
+		goto out;
+	io_uring_queue_exit(&ring);
+	free(p);
+	return 1;
+}
+
+static int test_eventfd_read(int flags)
+{
+	struct io_uring ring;
+	int fd, ret;
+	eventfd_t event;
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+
+	if (no_read)
+		return 0;
+	ret = t_create_ring(64, &ring, flags);
+	if (ret == T_SETUP_SKIP)
+		return 0;
+	if (ret != T_SETUP_OK) {
+		if (ret == -EINVAL)
+			return 0;
+		fprintf(stderr, "ring create failed: %d\n", ret);
+		return 1;
+	}
+
+	fd = eventfd(1, 0);
+	if (fd < 0) {
+		perror("eventfd");
+		return 1;
+	}
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_read(sqe, fd, &event, sizeof(eventfd_t), 0);
+	ret = io_uring_submit(&ring);
+	if (ret != 1) {
+		fprintf(stderr, "submitted %d\n", ret);
+		return 1;
+	}
+	fdinfo_read(&ring);
+	eventfd_write(fd, 1);
+	ret = io_uring_wait_cqe(&ring, &cqe);
+	if (ret) {
+		fprintf(stderr, "wait_cqe=%d\n", ret);
+		return 1;
+	}
+	if (cqe->res == -EINVAL) {
+		fprintf(stdout, "eventfd IO not supported, skipping\n");
+	} else if (cqe->res != sizeof(eventfd_t)) {
+		fprintf(stderr, "cqe res %d, wanted %d\n", cqe->res,
+						(int) sizeof(eventfd_t));
+		return 1;
+	}
+	io_uring_cqe_seen(&ring, cqe);
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	int i, ret, nr;
+	char buf[256];
+	char *fname;
+
+	if (argc > 1) {
+		fname = argv[1];
+	} else {
+		srand((unsigned)time(NULL));
+		snprintf(buf, sizeof(buf), ".basic-rw-%u-%u",
+			(unsigned)rand(), (unsigned)getpid());
+		fname = buf;
+		t_create_file(fname, FILE_SIZE);
+	}
+
+	signal(SIGXFSZ, SIG_IGN);
+
+	vecs = t_create_buffers(BUFFERS, BS);
+
+	/* if we don't have nonvec read, skip testing that */
+	nr = has_nonvec_read() ? 32 : 16;
+
+	for (i = 0; i < nr; i++) {
+		int write = (i & 1) != 0;
+		int buffered = (i & 2) != 0;
+		int sqthread = (i & 4) != 0;
+		int fixed = (i & 8) != 0;
+		int nonvec = (i & 16) != 0;
+
+		ret = test_io(fname, write, buffered, sqthread, fixed, nonvec,
+			      BS);
+		if (ret == T_EXIT_SKIP)
+			continue;
+		if (ret) {
+			fprintf(stderr, "test_io failed %d/%d/%d/%d/%d\n",
+				write, buffered, sqthread, fixed, nonvec);
+			goto err;
+		}
+	}
+
+	ret = test_eventfd_read(0);
+	if (ret) {
+		fprintf(stderr, "eventfd read 0 failed\n");
+		goto err;
+	}
+
+	ret = test_eventfd_read(IORING_SETUP_DEFER_TASKRUN|IORING_SETUP_SINGLE_ISSUER);
+	if (ret) {
+		fprintf(stderr, "eventfd read defer failed\n");
+		goto err;
+	}
+
+	ret = test_eventfd_read(IORING_SETUP_SQPOLL);
+	if (ret) {
+		fprintf(stderr, "eventfd read sqpoll failed\n");
+		goto err;
+	}
+
+	if (fname != argv[1])
+		unlink(fname);
+	return 0;
+err:
+	if (fname != argv[1])
+		unlink(fname);
+	return 1;
+}

diff --git a/test/fifo-nonblock-read.c b/test/fifo-nonblock-read.c
new file mode 100644
index 0000000..3e21383
--- /dev/null
+++ b/test/fifo-nonblock-read.c

@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: Test O_NONBLOCK reading from fifo, should result in proper
+ *		retry and a positive read results. Buggy result would be
+ *		-EAGAIN being returned to the user.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+int main(int argc, char *argv[])
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct io_uring ring;
+	char buf[32];
+	int fds[2];
+	int flags;
+	int ret;
+
+	io_uring_queue_init(1, &ring, 0);
+
+	if (pipe(fds) < 0) {
+		perror("pipe");
+		return T_EXIT_FAIL;
+	}
+
+	flags = fcntl(fds[0], F_GETFL, 0);
+	if (flags < 0) {
+		perror("fcntl get");
+		return T_EXIT_FAIL;
+	}
+	flags |= O_NONBLOCK;
+	ret = fcntl(fds[0], F_SETFL, flags);
+	if (ret < 0) {
+		perror("fcntl set");
+		return T_EXIT_FAIL;
+	}
+
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_read(sqe, fds[0], buf, sizeof(buf), 0);
+	io_uring_submit(&ring);
+
+	usleep(10000);
+
+	ret = write(fds[1], "Hello\n", 6);
+	if (ret < 0) {
+		perror("pipe write");
+		return T_EXIT_FAIL;
+	}
+
+	ret = io_uring_wait_cqe(&ring, &cqe);
+	if (ret < 0) {
+		fprintf(stderr, "wait=%d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	if (cqe->res < 0) {
+		fprintf(stderr, "cqe res %d\n", cqe->res);
+		return T_EXIT_FAIL;
+	}
+
+	io_uring_cqe_seen(&ring, cqe);
+	io_uring_queue_exit(&ring);
+	return T_EXIT_PASS;
+}

diff --git a/test/file-register.c b/test/file-register.c
index 6889dbf..a08cf9c 100644
--- a/test/file-register.c
+++ b/test/file-register.c

@@ -9,6 +9,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <fcntl.h>
+#include <limits.h>
 #include <sys/resource.h>
 
 #include "helpers.h"
@@ -119,16 +120,21 @@
 		fds = open_files(1, 0, off);
 		ret = io_uring_register_files_update(ring, off, fds, 1);
 		if (ret != 1) {
-			if (off == 300 && ret == -EINVAL)
+			if (off == 300 && ret == -EINVAL) {
+				free(fds);
 				break;
+			}
 			fprintf(stderr, "%s: update ret=%d\n", __FUNCTION__, ret);
+			free(fds);
 			break;
 		}
 		if (off >= 300) {
 			fprintf(stderr, "%s: Succeeded beyond end-of-list?\n", __FUNCTION__);
+			free(fds);
 			goto err;
 		}
 		off++;
+		free(fds);
 	} while (1);
 
 	ret = io_uring_unregister_files(ring);
@@ -304,8 +310,8 @@
 	files = open_files(100, 100, 0);
 	ret = io_uring_register_files(ring, files, 200);
 	if (ret) {
-		if (ret == -EBADF) {
-			fprintf(stdout, "Sparse files not supported\n");
+		if (ret == -EBADF || ret == -EINVAL) {
+			fprintf(stdout, "Sparse files not supported, skipping\n");
 			no_update = 1;
 			goto done;
 		}
@@ -351,15 +357,21 @@
 static int test_basic(struct io_uring *ring, int fail)
 {
 	int *files;
-	int ret;
+	int ret, i;
 	int nr_files = fail ? 10 : 100;
 
-	files = open_files(nr_files, 0, 0);
+	files = open_files(nr_files, fail ? 90 : 0, 0);
+	if (fail) {
+		for (i = nr_files; i < nr_files + 90; i++)
+			files[i] = -2;
+	}
 	ret = io_uring_register_files(ring, files, 100);
 	if (ret) {
 		if (fail) {
-			if (ret == -EBADF || ret == -EFAULT)
+			if (ret == -EBADF || ret == -EFAULT) {
+				close_files(files, nr_files, 0);
 				return 0;
+			}
 		}
 		fprintf(stderr, "%s: register %d\n", __FUNCTION__, ret);
 		goto err;
@@ -626,6 +638,7 @@
 	ret = io_uring_register_files(&ring, fds, 256);
 	if (ret) {
 		fprintf(stderr, "file_register: %d\n", ret);
+		free(fds);
 		return ret;
 	}
 
@@ -634,6 +647,7 @@
 		ret = io_uring_register_files_update(&ring, i, &newfd, 1);
 		if (ret != 1) {
 			fprintf(stderr, "file_update: %d\n", ret);
+			free(fds);
 			return ret;
 		}
 	}
@@ -645,6 +659,7 @@
 	ret = io_uring_register_files(&ring, fds, 256);
 	if (ret) {
 		fprintf(stderr, "file_register: %d\n", ret);
+		free(fds);
 		return ret;
 	}
 
@@ -653,12 +668,14 @@
 		ret = io_uring_register_files_update(&ring, i, &newfd, 1);
 		if (ret != 1) {
 			fprintf(stderr, "file_update: %d\n", ret);
+			free(fds);
 			return ret;
 		}
 	}
 	io_uring_unregister_files(&ring);
 
 	io_uring_queue_exit(&ring);
+	free(fds);
 	return 0;
 }
 
@@ -830,124 +847,355 @@
 	return 0;
 }
 
+static int file_update_alloc(struct io_uring *ring, int *fd)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	int ret;
+
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_files_update(sqe, fd, 1, IORING_FILE_INDEX_ALLOC);
+
+	ret = io_uring_submit(ring);
+	if (ret != 1) {
+		fprintf(stderr, "%s: got %d, wanted 1\n", __FUNCTION__, ret);
+		return -1;
+	}
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret < 0) {
+		fprintf(stderr, "%s: io_uring_wait_cqe=%d\n", __FUNCTION__, ret);
+		return -1;
+	}
+	ret = cqe->res;
+	io_uring_cqe_seen(ring, cqe);
+	return ret;
+}
+
+static int test_out_of_range_file_ranges(struct io_uring *ring)
+{
+	int ret;
+
+	ret = io_uring_register_file_alloc_range(ring, 8, 3);
+	if (ret != -EINVAL) {
+		fprintf(stderr, "overlapping range %i\n", ret);
+		return 1;
+	}
+
+	ret = io_uring_register_file_alloc_range(ring, 10, 1);
+	if (ret != -EINVAL) {
+		fprintf(stderr, "out of range index %i\n", ret);
+		return 1;
+	}
+
+	ret = io_uring_register_file_alloc_range(ring, 7, ~1U);
+	if (ret != -EOVERFLOW) {
+		fprintf(stderr, "overflow %i\n", ret);
+		return 1;
+	}
+
+	return 0;
+}
+
+static int test_overallocating_file_range(struct io_uring *ring, int fds[2])
+{
+	int roff = 7, rlen = 2;
+	int ret, i, fd;
+
+	ret = io_uring_register_file_alloc_range(ring, roff, rlen);
+	if (ret) {
+		fprintf(stderr, "io_uring_register_file_alloc_range %i\n", ret);
+		return 1;
+	}
+
+	for (i = 0; i < rlen; i++) {
+		fd = fds[0];
+		ret = file_update_alloc(ring, &fd);
+		if (ret != 1) {
+			fprintf(stderr, "file_update_alloc\n");
+			return 1;
+		}
+
+		if (fd < roff || fd >= roff + rlen) {
+			fprintf(stderr, "invalid off result %i\n", fd);
+			return 1;
+		}
+	}
+
+	fd = fds[0];
+	ret = file_update_alloc(ring, &fd);
+	if (ret != -ENFILE) {
+		fprintf(stderr, "overallocated %i, off %i\n", ret, fd);
+		return 1;
+	}
+
+	return 0;
+}
+
+static int test_zero_range_alloc(struct io_uring *ring, int fds[2])
+{
+	int ret, fd;
+
+	ret = io_uring_register_file_alloc_range(ring, 7, 0);
+	if (ret) {
+		fprintf(stderr, "io_uring_register_file_alloc_range failed %i\n", ret);
+		return 1;
+	}
+
+	fd = fds[0];
+	ret = file_update_alloc(ring, &fd);
+	if (ret != -ENFILE) {
+		fprintf(stderr, "zero alloc %i\n", ret);
+		return 1;
+	}
+	return 0;
+}
+
+static int test_defer_taskrun(void)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring ring;
+	int ret, fds[2];
+	char buff = 'x';
+
+	ret = io_uring_queue_init(8, &ring,
+				  IORING_SETUP_DEFER_TASKRUN | IORING_SETUP_SINGLE_ISSUER);
+	if (ret) {
+		fprintf(stderr, "ring init\n");
+		return 1;
+	}
+
+	ret = pipe(fds);
+	if (ret) {
+		fprintf(stderr, "bad pipes\n");
+		return 1;
+	}
+
+	ret = io_uring_register_files(&ring, &fds[0], 2);
+	if (ret) {
+		fprintf(stderr, "bad register %d\n", ret);
+		return 1;
+	}
+
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_read(sqe, 0, &buff, 1, 0);
+	sqe->flags |= IOSQE_FIXED_FILE;
+	ret = io_uring_submit(&ring);
+	if (ret != 1) {
+		fprintf(stderr, "bad submit\n");
+		return 1;
+	}
+
+	ret = write(fds[1], &buff, 1);
+	if (ret != 1) {
+		fprintf(stderr, "bad pipe write\n");
+		return 1;
+	}
+
+	ret = io_uring_unregister_files(&ring);
+	if (ret) {
+		fprintf(stderr, "bad unregister %d\n", ret);
+		return 1;
+	}
+
+	close(fds[0]);
+	close(fds[1]);
+	io_uring_queue_exit(&ring);
+	return 0;
+}
+
+static int test_file_alloc_ranges(void)
+{
+	struct io_uring ring;
+	int ret, pipe_fds[2];
+
+	if (pipe(pipe_fds)) {
+		fprintf(stderr, "pipes\n");
+		return 1;
+	}
+	ret = io_uring_queue_init(8, &ring, 0);
+	if (ret) {
+		fprintf(stderr, "queue_init: %d\n", ret);
+		return 1;
+	}
+
+	ret = io_uring_register_files_sparse(&ring, 10);
+	if (ret == -EINVAL) {
+not_supported:
+		close(pipe_fds[0]);
+		close(pipe_fds[1]);
+		io_uring_queue_exit(&ring);
+		printf("file alloc ranges are not supported, skip\n");
+		return 0;
+	} else if (ret) {
+		fprintf(stderr, "io_uring_register_files_sparse %i\n", ret);
+		return ret;
+	}
+
+	ret = io_uring_register_file_alloc_range(&ring, 0, 1);
+	if (ret) {
+		if (ret == -EINVAL)
+			goto not_supported;
+		fprintf(stderr, "io_uring_register_file_alloc_range %i\n", ret);
+		return 1;
+	}
+
+	ret = test_overallocating_file_range(&ring, pipe_fds);
+	if (ret) {
+		fprintf(stderr, "test_overallocating_file_range() failed\n");
+		return 1;
+	}
+
+	ret = test_out_of_range_file_ranges(&ring);
+	if (ret) {
+		fprintf(stderr, "test_out_of_range_file_ranges() failed\n");
+		return 1;
+	}
+
+	ret = test_zero_range_alloc(&ring, pipe_fds);
+	if (ret) {
+		fprintf(stderr, "test_zero_range_alloc() failed\n");
+		return 1;
+	}
+
+	close(pipe_fds[0]);
+	close(pipe_fds[1]);
+	io_uring_queue_exit(&ring);
+	return 0;
+}
+
 int main(int argc, char *argv[])
 {
 	struct io_uring ring;
 	int ret;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
 	ret = io_uring_queue_init(8, &ring, 0);
 	if (ret) {
-		printf("ring setup failed\n");
-		return 1;
+		fprintf(stderr, "ring setup failed\n");
+		return T_EXIT_FAIL;
 	}
 
 	ret = test_basic(&ring, 0);
 	if (ret) {
-		printf("test_basic failed\n");
-		return ret;
+		fprintf(stderr, "test_basic failed\n");
+		return T_EXIT_FAIL;
 	}
 
 	ret = test_basic(&ring, 1);
 	if (ret) {
-		printf("test_basic failed\n");
-		return ret;
+		fprintf(stderr, "test_basic failed\n");
+		return T_EXIT_FAIL;
 	}
 
 	ret = test_basic_many(&ring);
 	if (ret) {
-		printf("test_basic_many failed\n");
-		return ret;
+		fprintf(stderr, "test_basic_many failed\n");
+		return T_EXIT_FAIL;
 	}
 
 	ret = test_sparse(&ring);
 	if (ret) {
-		printf("test_sparse failed\n");
-		return ret;
+		fprintf(stderr, "test_sparse failed\n");
+		return T_EXIT_FAIL;
 	}
 
 	if (no_update)
-		return 0;
+		return T_EXIT_SKIP;
 
 	ret = test_additions(&ring);
 	if (ret) {
-		printf("test_additions failed\n");
-		return ret;
+		fprintf(stderr, "test_additions failed\n");
+		return T_EXIT_FAIL;
 	}
 
 	ret = test_removals(&ring);
 	if (ret) {
-		printf("test_removals failed\n");
-		return ret;
+		fprintf(stderr, "test_removals failed\n");
+		return T_EXIT_FAIL;
 	}
 
 	ret = test_replace(&ring);
 	if (ret) {
-		printf("test_replace failed\n");
-		return ret;
+		fprintf(stderr, "test_replace failed\n");
+		return T_EXIT_FAIL;
 	}
 
 	ret = test_replace_all(&ring);
 	if (ret) {
-		printf("test_replace_all failed\n");
-		return ret;
+		fprintf(stderr, "test_replace_all failed\n");
+		return T_EXIT_FAIL;
 	}
 
 	ret = test_grow(&ring);
 	if (ret) {
-		printf("test_grow failed\n");
-		return ret;
+		fprintf(stderr, "test_grow failed\n");
+		return T_EXIT_FAIL;
 	}
 
 	ret = test_shrink(&ring);
 	if (ret) {
-		printf("test_shrink failed\n");
-		return ret;
+		fprintf(stderr, "test_shrink failed\n");
+		return T_EXIT_FAIL;
 	}
 
 	ret = test_zero(&ring);
 	if (ret) {
-		printf("test_zero failed\n");
-		return ret;
+		fprintf(stderr, "test_zero failed\n");
+		return T_EXIT_FAIL;
 	}
 
 	ret = test_huge(&ring);
 	if (ret) {
-		printf("test_huge failed\n");
-		return ret;
+		fprintf(stderr, "test_huge failed\n");
+		return T_EXIT_FAIL;
 	}
 
 	ret = test_skip(&ring);
 	if (ret) {
-		printf("test_skip failed\n");
-		return 1;
+		fprintf(stderr, "test_skip failed\n");
+		return T_EXIT_FAIL;
 	}
 
 	ret = test_sparse_updates();
 	if (ret) {
-		printf("test_sparse_updates failed\n");
-		return ret;
+		fprintf(stderr, "test_sparse_updates failed\n");
+		return T_EXIT_FAIL;
 	}
 
 	ret = test_fixed_removal_ordering();
 	if (ret) {
-		printf("test_fixed_removal_ordering failed\n");
-		return 1;
+		fprintf(stderr, "test_fixed_removal_ordering failed\n");
+		return T_EXIT_FAIL;
 	}
 
 	ret = test_mixed_af_unix();
 	if (ret) {
-		printf("test_mixed_af_unix failed\n");
-		return 1;
+		fprintf(stderr, "test_mixed_af_unix failed\n");
+		return T_EXIT_FAIL;
 	}
 
 	ret = test_partial_register_fail();
 	if (ret) {
-		printf("test_partial_register_fail failed\n");
-		return ret;
+		fprintf(stderr, "test_partial_register_fail failed\n");
+		return T_EXIT_FAIL;
 	}
 
-	return 0;
+	ret = test_file_alloc_ranges();
+	if (ret) {
+		fprintf(stderr, "test_partial_register_fail failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	if (t_probe_defer_taskrun()) {
+		ret = test_defer_taskrun();
+		if (ret) {
+			fprintf(stderr, "test_defer_taskrun failed\n");
+			return T_EXIT_FAIL;
+		}
+	}
+
+	return T_EXIT_PASS;
 }

diff --git a/test/file-update.c b/test/file-update.c
index 578017e..d1f2d7e 100644
--- a/test/file-update.c
+++ b/test/file-update.c

@@ -131,18 +131,68 @@
 	free(fds);
 	if (ret == -EINVAL) {
 		fprintf(stdout, "IORING_OP_FILES_UPDATE not supported, skipping\n");
-		return 0;
+		return T_EXIT_SKIP;
 	}
 	return ret != 10;
 }
 
+static int test_update_no_table(void)
+{
+	int up_fd, fds[4] = {-1, 0, 1, 4};
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct io_uring ring;
+	int ret;
+
+	ret = t_create_ring(2, &ring, 0);
+	if (ret == T_SETUP_SKIP)
+		return T_EXIT_SKIP;
+	else if (ret != T_SETUP_OK)
+		return ret;
+
+	ret = io_uring_register_files(&ring, fds, 4);
+	/* ignore other failures */
+	if (ret && ret != -EBADF) {
+		fprintf(stderr, "Failed registering file table: %d\n", ret);
+		goto fail;
+	}
+
+	sqe = io_uring_get_sqe(&ring);
+	up_fd = ring.ring_fd;
+	io_uring_prep_files_update(sqe, &up_fd, 1, -1); //offset = -1
+	ret = io_uring_submit(&ring);
+	if (ret != 1) {
+		fprintf(stderr, "Failed submit: %d\n", ret);
+		goto fail;
+	}
+
+	ret = io_uring_wait_cqe(&ring, &cqe);
+	if (ret) {
+		fprintf(stderr, "Failed wait: %d\n", ret);
+		goto fail;
+	}
+	ret = cqe->res;
+	io_uring_cqe_seen(&ring, cqe);
+	if (ret != -EMFILE && ret != -EINVAL && ret != -EOVERFLOW &&
+	    ret != -ENXIO && ret != -EBADF) {
+		fprintf(stderr, "Bad cqe res: %d\n", ret);
+		goto fail;
+	}
+
+	io_uring_queue_exit(&ring);
+	return T_EXIT_PASS;
+fail:
+	io_uring_queue_exit(&ring);
+	return T_EXIT_FAIL;
+}
+
 int main(int argc, char *argv[])
 {
 	struct io_uring r1, r2, r3;
 	int ret;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
 	if (io_uring_queue_init(8, &r1, 0) ||
 	    io_uring_queue_init(8, &r2, 0) ||
@@ -165,9 +215,17 @@
 
 	ret = test_sqe_update(&r1);
 	if (ret) {
-		fprintf(stderr, "test_sqe_update failed\n");
+		if (ret != T_EXIT_SKIP)
+			fprintf(stderr, "test_sqe_update failed\n");
 		return ret;
 	}
 
-	return 0;
+	ret = test_update_no_table();
+	if (ret) {
+		if (ret != T_EXIT_SKIP)
+			fprintf(stderr, "test_sqe_update failed\n");
+		return ret;
+	}
+
+	return T_EXIT_PASS;
 }

diff --git a/test/file-verify.c b/test/file-verify.c
index 327cb1d..2c6bdb4 100644
--- a/test/file-verify.c
+++ b/test/file-verify.c

@@ -10,7 +10,6 @@
 #include <string.h>
 #include <fcntl.h>
 #include <assert.h>
-#include <string.h>
 #include <sys/ioctl.h>
 #include <sys/stat.h>
 #include <linux/fs.h>
@@ -29,24 +28,42 @@
 #define MAX_VECS	16
 
 /*
- * Can be anything, let's just do something for a bit of parallellism
+ * Can be anything, let's just do something for a bit of parallelism
  */
 #define READ_BATCH	16
 
+static void verify_buf_sync(void *buf, size_t size, bool registered)
+{
+#if defined(__hppa__)
+	if (registered) {
+		unsigned long off = (unsigned long) buf & 4095;
+		unsigned long p = (unsigned long) buf & ~4095;
+		int i;
+
+		size += off;
+		for (i = 0; i < size; i += 32)
+			asm volatile("fdc 0(%0)" : : "r" (p + i));
+	}
+#endif
+}
+
 /*
  * Each offset in the file has the offset / sizeof(int) stored for every
  * sizeof(int) address.
  */
-static int verify_buf(void *buf, size_t size, off_t off)
+static int verify_buf(void *buf, size_t size, off_t off, bool registered)
 {
 	int i, u_in_buf = size / sizeof(unsigned int);
 	unsigned int *ptr;
 
+	verify_buf_sync(buf, size, registered);
+
 	off /= sizeof(unsigned int);
 	ptr = buf;
 	for (i = 0; i < u_in_buf; i++) {
 		if (off != *ptr) {
-			fprintf(stderr, "Found %u, wanted %lu\n", *ptr, off);
+			fprintf(stderr, "Found %u, wanted %llu\n", *ptr,
+					(unsigned long long) off);
 			return 1;
 		}
 		ptr++;
@@ -73,6 +90,8 @@
 	else
 		fd = open(fname, O_DIRECT | O_RDWR);
 	if (fd < 0) {
+		if (!buffered && errno == EINVAL)
+			return T_EXIT_SKIP;
 		perror("open");
 		return 1;
 	}
@@ -197,7 +216,7 @@
 		goto err;
 	}
 
-	if (verify_buf(buf, CHUNK_SIZE / 2, 0))
+	if (verify_buf(buf, CHUNK_SIZE / 2, 0, false))
 		goto err;
 
 	/*
@@ -329,6 +348,8 @@
 		flags |= O_DIRECT;
 	fd = open(fname, flags);
 	if (fd < 0) {
+		if (errno == EINVAL || errno == EPERM || errno == EACCES)
+			return T_EXIT_SKIP;
 		perror("open");
 		return 1;
 	}
@@ -364,9 +385,12 @@
 			v[i].iov_base = buf[i];
 			v[i].iov_len = CHUNK_SIZE;
 		}
-		ret = io_uring_register_buffers(ring, v, READ_BATCH);
+		ret = t_register_buffers(ring, v, READ_BATCH);
 		if (ret) {
-			fprintf(stderr, "Error buffer reg %d\n", ret);
+			if (ret == T_SETUP_SKIP) {
+				ret = 0;
+				goto free_bufs;
+			}
 			goto err;
 		}
 	}
@@ -430,6 +454,10 @@
 				fprintf(stderr, "bad read %d, read %d\n", cqe->res, i);
 				goto err;
 			}
+			if (cqe->res < CHUNK_SIZE) {
+				fprintf(stderr, "short read %d, read %d\n", cqe->res, i);
+				goto err;
+			}
 			if (cqe->flags & IORING_CQE_F_BUFFER)
 				index = cqe->flags >> 16;
 			else
@@ -441,12 +469,12 @@
 					void *buf = vecs[index][j].iov_base;
 					size_t len = vecs[index][j].iov_len;
 
-					if (verify_buf(buf, len, voff))
+					if (verify_buf(buf, len, voff, registered))
 						goto err;
 					voff += len;
 				}
 			} else {
-				if (verify_buf(buf[index], CHUNK_SIZE, voff))
+				if (verify_buf(buf[index], CHUNK_SIZE, voff, registered))
 					goto err;
 			}
 		}
@@ -456,6 +484,7 @@
 done:
 	if (registered)
 		io_uring_unregister_buffers(ring);
+free_bufs:
 	if (vectored) {
 		for (j = 0; j < READ_BATCH; j++)
 			for (i = 0; i < nr_vecs; i++)
@@ -480,6 +509,8 @@
 
 	fd = open(fname, O_WRONLY);
 	if (fd < 0) {
+		if (errno == EPERM || errno == EACCES)
+			return T_EXIT_SKIP;
 		perror("open");
 		return 1;
 	}
@@ -532,98 +563,103 @@
 		goto err;
 	}
 
-	if (fill_pattern(fname))
+	ret = fill_pattern(fname);
+	if (ret == T_EXIT_SKIP)
+		return T_EXIT_SKIP;
+	else if (ret)
 		goto err;
 
 	ret = test(&ring, fname, 1, 0, 0, 0, 0);
+	if (ret == T_EXIT_SKIP)
+		return T_EXIT_SKIP;
 	if (ret) {
 		fprintf(stderr, "Buffered novec test failed\n");
 		goto err;
 	}
 	ret = test(&ring, fname, 1, 0, 0, 1, 0);
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "Buffered novec reg test failed\n");
 		goto err;
 	}
 	ret = test(&ring, fname, 1, 0, 0, 0, 1);
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "Buffered novec provide test failed\n");
 		goto err;
 	}
 	ret = test(&ring, fname, 1, 1, 0, 0, 0);
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "Buffered vec test failed\n");
 		goto err;
 	}
 	ret = test(&ring, fname, 1, 1, 1, 0, 0);
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "Buffered small vec test failed\n");
 		goto err;
 	}
 
 	ret = test(&ring, fname, 0, 0, 0, 0, 0);
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "O_DIRECT novec test failed\n");
 		goto err;
 	}
 	ret = test(&ring, fname, 0, 0, 0, 1, 0);
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "O_DIRECT novec reg test failed\n");
 		goto err;
 	}
 	ret = test(&ring, fname, 0, 0, 0, 0, 1);
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "O_DIRECT novec provide test failed\n");
 		goto err;
 	}
 	ret = test(&ring, fname, 0, 1, 0, 0, 0);
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "O_DIRECT vec test failed\n");
 		goto err;
 	}
 	ret = test(&ring, fname, 0, 1, 1, 0, 0);
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "O_DIRECT small vec test failed\n");
 		goto err;
 	}
 
 	ret = test_truncate(&ring, fname, 1, 0, 0);
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "Buffered end truncate read failed\n");
 		goto err;
 	}
 	ret = test_truncate(&ring, fname, 1, 1, 0);
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "Buffered end truncate vec read failed\n");
 		goto err;
 	}
 	ret = test_truncate(&ring, fname, 1, 0, 1);
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "Buffered end truncate pbuf read failed\n");
 		goto err;
 	}
 
 	ret = test_truncate(&ring, fname, 0, 0, 0);
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "O_DIRECT end truncate read failed\n");
 		goto err;
 	}
 	ret = test_truncate(&ring, fname, 0, 1, 0);
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "O_DIRECT end truncate vec read failed\n");
 		goto err;
 	}
 	ret = test_truncate(&ring, fname, 0, 0, 1);
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "O_DIRECT end truncate pbuf read failed\n");
 		goto err;
 	}
 
 	if (buf == fname)
 		unlink(fname);
-	return 0;
+	return T_EXIT_PASS;
 err:
 	if (buf == fname)
 		unlink(fname);
-	return 1;
+	return T_EXIT_FAIL;
 }

diff --git a/test/files-exit-hang-poll.c b/test/files-exit-hang-poll.c
index c3f7fb7..04febc8 100644
--- a/test/files-exit-hang-poll.c
+++ b/test/files-exit-hang-poll.c

@@ -15,11 +15,10 @@
 #include <unistd.h>
 #include <poll.h>
 #include "liburing.h"
+#include "helpers.h"
 
 #define BACKLOG 512
 
-#define PORT 9100
-
 static struct io_uring ring;
 
 static void add_poll(struct io_uring *ring, int fd)
@@ -63,15 +62,14 @@
 	struct io_uring_cqe *cqe;
 	int ret, sock_listen_fd;
 	const int val = 1;
-	int i;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
 	sock_listen_fd = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, 0);
 	if (sock_listen_fd < 0) {
 		perror("socket");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	setsockopt(sock_listen_fd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val));
@@ -80,29 +78,18 @@
 	serv_addr.sin_family = AF_INET;
 	serv_addr.sin_addr.s_addr = INADDR_ANY;
 
-	for (i = 0; i < 100; i++) {
-		serv_addr.sin_port = htons(PORT + i);
-
-		ret = bind(sock_listen_fd, (struct sockaddr *)&serv_addr, sizeof(serv_addr));
-		if (!ret)
-			break;
-		if (errno != EADDRINUSE) {
-			fprintf(stderr, "bind: %s\n", strerror(errno));
-			return 1;
-		}
-		if (i == 99) {
-			printf("Gave up on finding a port, skipping\n");
-			goto out;
-		}
+	if (t_bind_ephemeral_port(sock_listen_fd, &serv_addr)) {
+		perror("bind");
+		return T_EXIT_FAIL;
 	}
 
 	if (listen(sock_listen_fd, BACKLOG) < 0) {
 		perror("Error listening on socket\n");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	if (setup_io_uring())
-		return 1;
+		return T_EXIT_FAIL;
 
 	add_poll(&ring, sock_listen_fd);
 	add_accept(&ring, sock_listen_fd);
@@ -110,7 +97,7 @@
 	ret = io_uring_submit(&ring);
 	if (ret != 2) {
 		fprintf(stderr, "submit=%d\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	signal(SIGALRM, alarm_sig);
@@ -119,10 +106,9 @@
 	ret = io_uring_wait_cqe(&ring, &cqe);
 	if (ret) {
 		fprintf(stderr, "wait_cqe=%d\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
-out:
 	io_uring_queue_exit(&ring);
-	return 0;
+	return T_EXIT_PASS;
 }

diff --git a/test/files-exit-hang-timeout.c b/test/files-exit-hang-timeout.c
index 09efc4f..b4f9eca 100644
--- a/test/files-exit-hang-timeout.c
+++ b/test/files-exit-hang-timeout.c

@@ -15,14 +15,15 @@
 #include <unistd.h>
 #include <poll.h>
 #include "liburing.h"
+#include "helpers.h"
 
 #define BACKLOG 512
 
 #define PORT 9100
 
-struct io_uring ring;
+static struct io_uring ring;
 
-struct __kernel_timespec ts = {
+static struct __kernel_timespec ts = {
 	.tv_sec		= 300,
 	.tv_nsec	= 0,
 };
@@ -42,13 +43,12 @@
 
 	sqe = io_uring_get_sqe(ring);
 	io_uring_prep_accept(sqe, fd, 0, 0, SOCK_NONBLOCK | SOCK_CLOEXEC);
-	sqe->flags |= IOSQE_IO_LINK;
 }
 
 static int setup_io_uring(void)
 {
 	int ret;
-       
+
 	ret = io_uring_queue_init(16, &ring, 0);
 	if (ret) {
 		fprintf(stderr, "Unable to setup io_uring: %s\n", strerror(-ret));
@@ -72,12 +72,12 @@
 	int i;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
 	sock_listen_fd = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, 0);
 	if (sock_listen_fd < 0) {
 		perror("socket");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	setsockopt(sock_listen_fd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val));
@@ -94,21 +94,21 @@
 			break;
 		if (errno != EADDRINUSE) {
 			fprintf(stderr, "bind: %s\n", strerror(errno));
-			return 1;
+			return T_EXIT_FAIL;
 		}
 		if (i == 99) {
 			printf("Gave up on finding a port, skipping\n");
-			goto out;
+			goto skip;
 		}
 	}
 
 	if (listen(sock_listen_fd, BACKLOG) < 0) {
 		perror("Error listening on socket\n");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	if (setup_io_uring())
-		return 1;
+		return T_EXIT_FAIL;
 
 	add_timeout(&ring, sock_listen_fd);
 	add_accept(&ring, sock_listen_fd);
@@ -116,7 +116,7 @@
 	ret = io_uring_submit(&ring);
 	if (ret != 2) {
 		fprintf(stderr, "submit=%d\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	signal(SIGALRM, alarm_sig);
@@ -125,10 +125,12 @@
 	ret = io_uring_wait_cqe(&ring, &cqe);
 	if (ret) {
 		fprintf(stderr, "wait_cqe=%d\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
-out:
 	io_uring_queue_exit(&ring);
-	return 0;
+	return T_EXIT_PASS;
+skip:
+	io_uring_queue_exit(&ring);
+	return T_EXIT_SKIP;
 }

diff --git a/test/fixed-buf-iter.c b/test/fixed-buf-iter.c
index 9576993..fdd9459 100644
--- a/test/fixed-buf-iter.c
+++ b/test/fixed-buf-iter.c

@@ -63,7 +63,7 @@
 		return 1;
 	}
 
-	if (cqe->res < 0) { 
+	if (cqe->res < 0) {
 		fprintf(stderr, "Error in async operation: %s\n", strerror(-cqe->res));
 		return 1;
 	}
@@ -87,6 +87,8 @@
 		return 1;
 	}
 	io_uring_cqe_seen(ring, cqe);
+	for (i = 0; i < BUFFERS; i++)
+		free(iov[i].iov_base);
 	return 0;
 }
 
@@ -96,20 +98,20 @@
 	int ret;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
 	ret = t_create_ring(8, &ring, 0);
 	if (ret == T_SETUP_SKIP)
-		return 0;
+		return T_EXIT_SKIP;
 	else if (ret < 0)
-		return 1;
+		return T_EXIT_FAIL;
 
 	ret = test(&ring);
 	if (ret) {
 		fprintf(stderr, "Test failed\n");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	io_uring_queue_exit(&ring);
-	return 0;
+	return T_EXIT_PASS;
 }

diff --git a/test/fixed-buf-merge.c b/test/fixed-buf-merge.c
new file mode 100644
index 0000000..fbc94a4
--- /dev/null
+++ b/test/fixed-buf-merge.c

@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Test fixed buffer merging/skipping
+ *
+ * Taken from: https://github.com/axboe/liburing/issues/994
+ *
+ */
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+int main(int argc, char *argv[])
+{
+	int ret, i, fd, initial_offset = 4096, num_requests = 3;
+	struct io_uring ring;
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct iovec iov;
+	char *buffer, *to_free;
+	unsigned head;
+	char filename[64];
+
+	ret = io_uring_queue_init(4, &ring, 0);
+	if (ret) {
+		fprintf(stderr, "queue_init: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	sprintf(filename, ".fixed-buf-%d", getpid());
+	t_create_file(filename, 4 * 4096);
+
+	fd = open(filename, O_RDONLY | O_DIRECT, 0644);
+	if (fd < 0) {
+		if (errno == EINVAL) {
+			unlink(filename);
+			return T_EXIT_SKIP;
+		}
+		perror("open");
+		goto err_unlink;
+	}
+
+	to_free = buffer = aligned_alloc(4096, 128 * 4096);
+	if (!buffer) {
+		perror("aligned_alloc");
+		goto err_unlink;
+	}
+
+	/* Register buffer */
+	iov.iov_base = buffer;
+	iov.iov_len = 128 * 4096;
+
+	ret = io_uring_register_buffers(&ring, &iov, 1);
+	if (ret) {
+		fprintf(stderr, "buf register: %d\n", ret);
+		goto err_unlink;
+	}
+
+	/* Prepare read requests */
+	buffer += initial_offset;
+	for (i = 0; i < num_requests; i++) {
+		sqe = io_uring_get_sqe(&ring);
+		io_uring_prep_read_fixed(sqe, fd, buffer, 4096, 4096 * i, 0);
+		buffer += 4096;
+	}
+
+	/* Submit requests and reap completions */
+	ret = io_uring_submit_and_wait(&ring, num_requests);
+	if (ret != num_requests) {
+		fprintf(stderr, "Submit and wait: %d\n", ret);
+		goto err_unlink;
+	}
+
+	i = 0;
+	io_uring_for_each_cqe(&ring, head, cqe) {
+		if (cqe->res != 4096) {
+			fprintf(stderr, "cqe: %d\n", cqe->res);
+			goto err_unlink;
+		}
+		i++;
+	}
+
+	if (i != num_requests) {
+		fprintf(stderr, "Got %d completions\n", i);
+		goto err_unlink;
+	}
+
+	io_uring_cq_advance(&ring, i);
+	io_uring_queue_exit(&ring);
+	close(fd);
+	free(to_free);
+	unlink(filename);
+	return T_EXIT_PASS;
+err_unlink:
+	unlink(filename);
+	return T_EXIT_FAIL;
+}

diff --git a/test/fixed-hugepage.c b/test/fixed-hugepage.c
new file mode 100644
index 0000000..88a6d20
--- /dev/null
+++ b/test/fixed-hugepage.c

@@ -0,0 +1,422 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Test fixed buffers consisting of hugepages.
+ */
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <linux/mman.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+/*
+ * Before testing
+ * echo (>=4) > /proc/sys/vm/nr_hugepages
+ * echo madvise > /sys/kernel/mm/transparent_hugepage/enabled
+ * echo always > /sys/kernel/mm/transparent_hugepage/hugepages-16kB/enabled
+ *
+ * Not 100% guaranteed to get THP-backed memory, but in general it does.
+ */
+#define MTHP_16KB	(16UL * 1024)
+#define HUGEPAGE_SIZE	(2UL * 1024 * 1024)
+#define NR_BUFS		1
+#define IN_FD		"/dev/urandom"
+#define OUT_FD		"/dev/zero"
+
+static int open_files(char *fname_in, int *fd_in, int *fd_out)
+{
+	*fd_in = open(fname_in, O_RDONLY, 0644);
+	if (*fd_in < 0) {
+		printf("open %s failed\n", fname_in);
+		return -1;
+	}
+
+	*fd_out = open(OUT_FD, O_RDWR, 0644);
+	if (*fd_out < 0) {
+		printf("open %s failed\n", OUT_FD);
+		return -1;
+	}
+
+	return 0;
+}
+
+static void unmap(struct iovec *iov, int nr_bufs, size_t offset)
+{
+	int i;
+
+	for (i = 0; i < nr_bufs; i++)
+		munmap(iov[i].iov_base - offset, iov[i].iov_len + offset);
+}
+
+static int mmap_hugebufs(struct iovec *iov, int nr_bufs, size_t buf_size, size_t offset)
+{
+	int i;
+
+	for (i = 0; i < nr_bufs; i++) {
+		void *base = NULL;
+
+		base = mmap(NULL, buf_size, PROT_READ | PROT_WRITE,
+				MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
+		if (base == MAP_FAILED) {
+			printf("Unable to map hugetlb page. Try increasing the "
+				"value in /proc/sys/vm/nr_hugepages\n");
+			unmap(iov, i, offset);
+			return -1;
+		}
+
+		memset(base, 0, buf_size);
+		iov[i].iov_base = base + offset;
+		iov[i].iov_len = buf_size - offset;
+	}
+
+	return 0;
+}
+
+/* map a hugepage and smaller page to a contiguous memory */
+static int mmap_mixture(struct iovec *iov, int nr_bufs, size_t buf_size, bool huge_on_left)
+{
+	int i;
+	void *small_base = NULL, *huge_base = NULL, *start = NULL,
+	     *huge_start = NULL, *small_start = NULL;
+	size_t small_size = buf_size - HUGEPAGE_SIZE;
+	size_t seg_size = ((buf_size / HUGEPAGE_SIZE) + 1) * HUGEPAGE_SIZE;
+
+	start = mmap(NULL, seg_size * nr_bufs, PROT_NONE,
+			MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
+	if (start == MAP_FAILED) {
+		printf("Unable to preserve the page mixture memory. "
+			"Try increasing the RLIMIT_MEMLOCK resource limit\n");
+		return -1;
+	}
+
+	for (i = 0; i < nr_bufs; i++) {
+		if (huge_on_left) {
+			huge_start = start;
+			small_start = start + HUGEPAGE_SIZE;
+		} else {
+			huge_start = start + HUGEPAGE_SIZE;
+			small_start = start + HUGEPAGE_SIZE - small_size;
+		}
+
+		huge_base = mmap(huge_start, HUGEPAGE_SIZE, PROT_READ | PROT_WRITE,
+				MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_FIXED, -1, 0);
+		if (huge_base == MAP_FAILED) {
+			printf("Unable to map hugetlb page in the page mixture. "
+				"Try increasing the value in /proc/sys/vm/nr_hugepages\n");
+			unmap(iov, nr_bufs, 0);
+			return -1;
+		}
+
+		small_base = mmap(small_start, small_size, PROT_READ | PROT_WRITE,
+				MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
+		if (small_base == MAP_FAILED) {
+			printf("Unable to map small page in the page mixture. "
+				"Try increasing the RLIMIT_MEMLOCK resource limit\n");
+			unmap(iov, nr_bufs, 0);
+			return -1;
+		}
+
+		if (huge_on_left) {
+			iov[i].iov_base = huge_base;
+			memset(huge_base, 0, buf_size);
+		}
+		else {
+			iov[i].iov_base = small_base;
+			memset(small_base, 0, buf_size);
+		}
+		iov[i].iov_len = buf_size;
+		start += seg_size;
+	}
+
+	return 0;
+}
+
+static void free_bufs(struct iovec *iov, int nr_bufs, size_t offset)
+{
+	int i;
+
+	for (i = 0; i < nr_bufs; i++)
+		free(iov[i].iov_base - offset);
+}
+
+static int get_mthp_bufs(struct iovec *iov, int nr_bufs, size_t buf_size,
+		size_t alignment, size_t offset)
+{
+	int i;
+
+	for (i = 0; i < nr_bufs; i++) {
+		void *base = NULL;
+
+		if (posix_memalign(&base, alignment, buf_size)) {
+			printf("Unable to allocate mthp pages. "
+				"Try increasing the RLIMIT_MEMLOCK resource limit\n");
+			free_bufs(iov, i, offset);
+			return -1;
+		}
+
+		memset(base, 0, buf_size);
+		iov[i].iov_base = base + offset;
+		iov[i].iov_len = buf_size - offset;
+	}
+
+	return 0;
+}
+
+static int do_read(struct io_uring *ring, int fd, struct iovec *iov, int nr_bufs)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	int i, ret;
+
+	for (i = 0; i < nr_bufs; i++) {
+		sqe = io_uring_get_sqe(ring);
+		if (!sqe) {
+			fprintf(stderr, "Could not get SQE.\n");
+			return -1;
+		}
+
+		io_uring_prep_read_fixed(sqe, fd, iov[i].iov_base, iov[i].iov_len, 0, i);
+		io_uring_submit(ring);
+
+		ret = io_uring_wait_cqe(ring, &cqe);
+		if (ret < 0) {
+			fprintf(stderr, "Error waiting for completion: %s\n", strerror(-ret));
+			return -1;
+		}
+
+		if (cqe->res < 0) {
+			fprintf(stderr, "Error in async read operation: %s\n", strerror(-cqe->res));
+			return -1;
+		}
+		if (cqe->res != iov[i].iov_len) {
+			fprintf(stderr, "cqe res: %d, expected: %lu\n", cqe->res, (unsigned long) iov[i].iov_len);
+			return -1;
+		}
+
+		io_uring_cqe_seen(ring, cqe);
+	}
+
+	return 0;
+}
+
+static int do_write(struct io_uring *ring, int fd, struct iovec *iov, int nr_bufs)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	int i, ret;
+
+	for (i = 0; i < nr_bufs; i++) {
+		sqe = io_uring_get_sqe(ring);
+		if (!sqe) {
+			fprintf(stderr, "Could not get SQE.\n");
+			return -1;
+		}
+
+		io_uring_prep_write_fixed(sqe, fd, iov[i].iov_base, iov[i].iov_len, 0, i);
+		io_uring_submit(ring);
+
+		ret = io_uring_wait_cqe(ring, &cqe);
+		if (ret < 0) {
+			fprintf(stderr, "Error waiting for completion: %s\n", strerror(-ret));
+			return -1;
+		}
+
+		if (cqe->res < 0) {
+			fprintf(stderr, "Error in async write operation: %s\n", strerror(-cqe->res));
+			return -1;
+		}
+		if (cqe->res != iov[i].iov_len) {
+			fprintf(stderr, "cqe res: %d, expected: %lu\n", cqe->res, (unsigned long) iov[i].iov_len);
+			return -1;
+		}
+
+		io_uring_cqe_seen(ring, cqe);
+	}
+
+	return 0;
+}
+
+static int register_submit(struct io_uring *ring, struct iovec *iov,
+						int nr_bufs, int fd_in, int fd_out)
+{
+	int ret;
+
+	ret = io_uring_register_buffers(ring, iov, nr_bufs);
+	if (ret) {
+		if (ret != -ENOMEM)
+			fprintf(stderr, "Error registering buffers: %s\n", strerror(-ret));
+		return ret;
+	}
+
+	ret = do_read(ring, fd_in, iov, nr_bufs);
+	if (ret) {
+		fprintf(stderr, "Read test failed\n");
+		return ret;
+	}
+
+	ret = do_write(ring, fd_out, iov, nr_bufs);
+	if (ret) {
+		fprintf(stderr, "Write test failed\n");
+		return ret;
+	}
+
+	ret = io_uring_unregister_buffers(ring);
+	if (ret) {
+		fprintf(stderr, "Error unregistering buffers for one hugepage test: %s", strerror(-ret));
+		return ret;
+	}
+
+	return 0;
+}
+
+static int test_one_hugepage(struct io_uring *ring, int fd_in, int fd_out)
+{
+	struct iovec iov[NR_BUFS];
+	size_t buf_size = HUGEPAGE_SIZE;
+	int ret;
+
+	if (mmap_hugebufs(iov, NR_BUFS, buf_size, 0))
+		return T_EXIT_SKIP;
+
+	ret = register_submit(ring, iov, NR_BUFS, fd_in, fd_out);
+	unmap(iov, NR_BUFS, 0);
+	if (ret == -ENOMEM)
+		return T_EXIT_SKIP;
+	return ret ? T_EXIT_FAIL : T_EXIT_PASS;
+}
+
+static int test_multi_hugepages(struct io_uring *ring, int fd_in, int fd_out)
+{
+	struct iovec iov[NR_BUFS];
+	size_t buf_size = 4 * HUGEPAGE_SIZE;
+	int ret;
+
+	if (mmap_hugebufs(iov, NR_BUFS, buf_size, 0))
+		return T_EXIT_SKIP;
+
+	ret = register_submit(ring, iov, NR_BUFS, fd_in, fd_out);
+	unmap(iov, NR_BUFS, 0);
+	if (ret == -ENOMEM)
+		return T_EXIT_SKIP;
+	return ret ? T_EXIT_FAIL : T_EXIT_PASS;
+}
+
+static int test_unaligned_hugepage(struct io_uring *ring, int fd_in, int fd_out)
+{
+	struct iovec iov[NR_BUFS];
+	size_t buf_size = 3 * HUGEPAGE_SIZE;
+	size_t offset = 0x1234;
+	int ret;
+
+	if (mmap_hugebufs(iov, NR_BUFS, buf_size, offset))
+		return T_EXIT_SKIP;
+
+	ret = register_submit(ring, iov, NR_BUFS, fd_in, fd_out);
+	unmap(iov, NR_BUFS, offset);
+	if (ret == -ENOMEM)
+		return T_EXIT_SKIP;
+	return ret ? T_EXIT_FAIL : T_EXIT_PASS;
+}
+
+static int test_multi_unaligned_mthps(struct io_uring *ring, int fd_in, int fd_out)
+{
+	struct iovec iov[NR_BUFS];
+	int ret;
+	size_t buf_size = 3 * MTHP_16KB;
+	size_t offset = 0x1234;
+
+	if (get_mthp_bufs(iov, NR_BUFS, buf_size, MTHP_16KB, offset))
+		return T_EXIT_SKIP;
+
+	ret = register_submit(ring, iov, NR_BUFS, fd_in, fd_out);
+	free_bufs(iov, NR_BUFS, offset);
+	if (ret == -ENOMEM)
+		return T_EXIT_SKIP;
+	return ret ? T_EXIT_FAIL : T_EXIT_PASS;
+}
+
+/* Should not coalesce */
+static int test_page_mixture(struct io_uring *ring, int fd_in, int fd_out, int huge_on_left)
+{
+	struct iovec iov[NR_BUFS];
+	size_t buf_size = HUGEPAGE_SIZE + MTHP_16KB;
+	int ret;
+
+	if (mmap_mixture(iov, NR_BUFS, buf_size, huge_on_left))
+		return T_EXIT_SKIP;
+
+	ret = register_submit(ring, iov, NR_BUFS, fd_in, fd_out);
+	unmap(iov, NR_BUFS, 0);
+	if (ret == -ENOMEM)
+		return T_EXIT_SKIP;
+	return ret ? T_EXIT_FAIL : T_EXIT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	struct io_uring ring;
+	int ret, fd_in, fd_out;
+	char *fname_in;
+
+	if (argc > 1)
+		fname_in = argv[1];
+	else
+		fname_in = IN_FD;
+
+	if (open_files(fname_in, &fd_in, &fd_out))
+		return T_EXIT_SKIP;
+
+	ret = t_create_ring(8, &ring, 0);
+	if (ret == T_SETUP_SKIP)
+		return T_EXIT_SKIP;
+	else if (ret < 0)
+		return T_EXIT_FAIL;
+
+	ret = test_one_hugepage(&ring, fd_in, fd_out);
+	if (ret != T_EXIT_PASS) {
+		if (ret != T_EXIT_SKIP)
+			fprintf(stderr, "Test one hugepage failed.\n");
+		return ret;
+	}
+
+	ret = test_multi_hugepages(&ring, fd_in, fd_out);
+	if (ret != T_EXIT_PASS) {
+		if (ret != T_EXIT_SKIP)
+			fprintf(stderr, "Test multi hugepages failed.\n");
+		return ret;
+	}
+
+	ret = test_unaligned_hugepage(&ring, fd_in, fd_out);
+	if (ret != T_EXIT_PASS) {
+		if (ret != T_EXIT_SKIP)
+			fprintf(stderr, "Test unaligned hugepage failed.\n");
+		return ret;
+	}
+
+	ret = test_multi_unaligned_mthps(&ring, fd_in, fd_out);
+	if (ret != T_EXIT_PASS) {
+		if (ret != T_EXIT_SKIP)
+			fprintf(stderr, "Test unaligned multi-size'd THPs failed.\n");
+		return ret;
+	}
+
+	ret = test_page_mixture(&ring, fd_in, fd_out, true);
+	if (ret != T_EXIT_PASS) {
+		if (ret != T_EXIT_SKIP)
+			fprintf(stderr, "Test huge small page mixture (start with huge) failed.\n");
+		return ret;
+	}
+
+	ret = test_page_mixture(&ring, fd_in, fd_out, false);
+	if (ret != T_EXIT_PASS) {
+		if (ret != T_EXIT_SKIP)
+			fprintf(stderr, "Test huge small page mixture (start with small) failed.\n");
+		return ret;
+	}
+
+	io_uring_queue_exit(&ring);
+	return T_EXIT_PASS;
+}

diff --git a/test/fixed-link.c b/test/fixed-link.c
index 60d96ec..574512d 100644
--- a/test/fixed-link.c
+++ b/test/fixed-link.c

@@ -19,29 +19,29 @@
 	int i, fd, ret;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
 	fd = open("/dev/zero", O_RDONLY);
 	if (fd < 0) {
 		fprintf(stderr, "Failed to open /dev/zero\n");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	if (io_uring_queue_init(32, &ring, 0) < 0) {
-		fprintf(stderr, "Faild to init io_uring\n");
+		fprintf(stderr, "Failed to init io_uring\n");
 		close(fd);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	for (i = 0; i < IOVECS_LEN; ++i) {
 		iovecs[i].iov_base = t_malloc(64);
 		iovecs[i].iov_len = 64;
-	};
+	}
 
 	ret = io_uring_register_buffers(&ring, iovecs, IOVECS_LEN);
 	if (ret) {
 		fprintf(stderr, "Failed to register buffers\n");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	for (i = 0; i < IOVECS_LEN; ++i) {
@@ -58,10 +58,10 @@
 	ret = io_uring_submit_and_wait(&ring, IOVECS_LEN);
 	if (ret < 0) {
 		fprintf(stderr, "Failed to submit IO\n");
-		return 1;
+		return T_EXIT_FAIL;
 	} else if (ret < 2) {
 		fprintf(stderr, "Submitted %d, wanted %d\n", ret, IOVECS_LEN);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	for (i = 0; i < IOVECS_LEN; i++) {
@@ -70,12 +70,12 @@
 		ret = io_uring_wait_cqe(&ring, &cqe);
 		if (ret) {
 			fprintf(stderr, "wait_cqe=%d\n", ret);
-			return 1;
+			return T_EXIT_FAIL;
 		}
 		if (cqe->res != iovecs[i].iov_len) {
 			fprintf(stderr, "read: wanted %ld, got %d\n",
 					(long) iovecs[i].iov_len, cqe->res);
-			return 1;
+			return T_EXIT_FAIL;
 		}
 		io_uring_cqe_seen(&ring, cqe);
 	}
@@ -86,5 +86,5 @@
 	for (i = 0; i < IOVECS_LEN; ++i)
 		free(iovecs[i].iov_base);
 
-	return 0;
+	return T_EXIT_PASS;
 }

diff --git a/test/fixed-reuse.c b/test/fixed-reuse.c
index 4cd8e37..401251a 100644
--- a/test/fixed-reuse.c
+++ b/test/fixed-reuse.c

@@ -124,21 +124,21 @@
 	int ret, files[MAX_FILES];
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
 	ret = io_uring_queue_init_params(8, &ring, &p);
 	if (ret) {
 		fprintf(stderr, "ring setup failed: %d\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
 	}
 	if (!(p.features & IORING_FEAT_CQE_SKIP))
-		return 0;
+		return T_EXIT_SKIP;
 
 	memset(files, -1, sizeof(files));
 	ret = io_uring_register_files(&ring, files, ARRAY_SIZE(files));
 	if (ret) {
 		fprintf(stderr, "Failed registering files\n");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	t_create_file_pattern(FNAME1, 4096, PAT1);
@@ -152,9 +152,9 @@
 
 	unlink(FNAME1);
 	unlink(FNAME2);
-	return 0;
+	return T_EXIT_PASS;
 err:
 	unlink(FNAME1);
 	unlink(FNAME2);
-	return 1;
+	return T_EXIT_FAIL;
 }

diff --git a/test/fpos.c b/test/fpos.c
index 78a6152..348ac76 100644
--- a/test/fpos.c
+++ b/test/fpos.c

@@ -52,6 +52,9 @@
 	unsigned char buff[QUEUE_SIZE * blocksize];
 	unsigned char reordered[QUEUE_SIZE * blocksize];
 
+	memset(buff, 0, QUEUE_SIZE * blocksize);
+	memset(reordered, 0, QUEUE_SIZE * blocksize);
+
 	create_file(".test_fpos_read", FILE_SIZE);
 	fd = open(".test_fpos_read", O_RDONLY);
 	unlink(".test_fpos_read");
@@ -95,7 +98,7 @@
 			if (res == 0) {
 				done = true;
 			} else if (res == -ECANCELED) {
-				/* cancelled, probably ok */
+				/* canceled, probably ok */
 			} else if (res < 0 || res > blocksize) {
 				fprintf(stderr, "bad read: %d\n", res);
 				return -1;
@@ -225,12 +228,12 @@
 	int ret;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
 	ret = io_uring_queue_init(QUEUE_SIZE, &ring, 0);
 	if (ret) {
 		fprintf(stderr, "ring setup failed\n");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	for (int test = 0; test < 8; test++) {
@@ -248,5 +251,5 @@
 			return -1;
 		}
 	}
-	return 0;
+	return T_EXIT_PASS;
 }

diff --git a/test/fsnotify.c b/test/fsnotify.c
new file mode 100644
index 0000000..27ca019
--- /dev/null
+++ b/test/fsnotify.c

@@ -0,0 +1,119 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: test fsnotify access off O_DIRECT read
+ */
+
+#include "helpers.h"
+
+#ifdef CONFIG_HAVE_FANOTIFY
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/fanotify.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+
+#include "liburing.h"
+
+int main(int argc, char *argv[])
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct io_uring ring;
+	int fan, ret, fd, err;
+	char fname[64], *f;
+	struct stat sb;
+	void *buf;
+
+	fan = fanotify_init(FAN_CLASS_NOTIF|FAN_CLASS_CONTENT, 0);
+	if (fan < 0) {
+		if (errno == ENOSYS)
+			return T_EXIT_SKIP;
+		if (geteuid())
+			return T_EXIT_SKIP;
+		perror("fanotify_init");
+		return T_EXIT_FAIL;
+	}
+
+	err = T_EXIT_FAIL;
+	if (argc > 1) {
+		f = argv[1];
+		fd = open(argv[1], O_RDONLY | O_DIRECT);
+		if (fd < 0 && errno == EINVAL)
+			return T_EXIT_SKIP;
+	} else {
+		sprintf(fname, ".fsnotify.%d", getpid());
+		f = fname;
+		t_create_file(fname, 8192);
+		fd = open(fname, O_RDONLY | O_DIRECT);
+		if (fd < 0 && errno == EINVAL) {
+			unlink(fname);
+			return T_EXIT_SKIP;
+		}
+	}
+	if (fd < 0) {
+		perror("open");
+		goto out;
+	}
+
+	if (fstat(fd, &sb) < 0) {
+		perror("fstat");
+		goto out;
+	}
+	if ((sb.st_mode & S_IFMT) != S_IFREG) {
+		err = T_EXIT_SKIP;
+		close(fd);
+		goto out;
+	}
+
+	ret = fanotify_mark(fan, FAN_MARK_ADD, FAN_ACCESS|FAN_MODIFY, fd, NULL);
+	if (ret < 0) {
+		perror("fanotify_mark");
+		goto out;
+	}
+
+	if (fork()) {
+		int wstat;
+
+		io_uring_queue_init(1, &ring, 0);
+		if (posix_memalign(&buf, 4096, 4096))
+			goto out;
+		sqe = io_uring_get_sqe(&ring);
+		io_uring_prep_read(sqe, fd, buf, 4096, 0);
+		io_uring_submit(&ring);
+		ret = io_uring_wait_cqe(&ring, &cqe);
+		if (ret) {
+			fprintf(stderr, "wait_ret=%d\n", ret);
+			goto out;
+		}
+		wait(&wstat);
+		if (!WEXITSTATUS(wstat))
+			err = T_EXIT_PASS;
+		free(buf);
+	} else {
+		struct fanotify_event_metadata m;
+		int fret;
+
+		fret = read(fan, &m, sizeof(m));
+		if (fret < 0)
+			perror("fanotify read");
+		/* fail if mask isn't right or pid indicates non-task context */
+		else if (!(m.mask & 1) || !m.pid)
+			exit(1);
+		exit(0);
+	}
+
+out:
+	if (f == fname)
+		unlink(fname);
+	return err;
+}
+
+#else /* #ifdef CONFIG_HAVE_FANOTIFY */
+
+int main(void)
+{
+	return T_EXIT_SKIP;
+}
+#endif /* #ifdef CONFIG_HAVE_FANOTIFY */

diff --git a/test/fsync.c b/test/fsync.c
index 5ae8441..44dd7ec 100644
--- a/test/fsync.c
+++ b/test/fsync.c

@@ -193,12 +193,12 @@
 	int ret;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
 	ret = io_uring_queue_init(8, &ring, 0);
 	if (ret) {
 		fprintf(stderr, "ring setup failed\n");
-		return 1;
+		return T_EXIT_FAIL;
 
 	}
 
@@ -220,5 +220,5 @@
 		return ret;
 	}
 
-	return 0;
+	return T_EXIT_PASS;
 }

diff --git a/test/futex.c b/test/futex.c
new file mode 100644
index 0000000..19e5d74
--- /dev/null
+++ b/test/futex.c

@@ -0,0 +1,583 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: exercise futex wait/wake/waitv
+ *
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <errno.h>
+#include <linux/futex.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+#define LOOPS	500
+#define NFUTEX	8
+
+#ifndef FUTEX2_SIZE_U8
+#define FUTEX2_SIZE_U8		0x00
+#define FUTEX2_SIZE_U16		0x01
+#define FUTEX2_SIZE_U32		0x02
+#define FUTEX2_SIZE_U64		0x03
+#define FUTEX2_NUMA		0x04
+			/*	0x08 */
+			/*	0x10 */
+			/*	0x20 */
+			/*	0x40 */
+#define FUTEX2_PRIVATE		FUTEX_PRIVATE_FLAG
+
+#define FUTEX2_SIZE_MASK	0x03
+#endif
+
+static int no_futex;
+
+static void *fwake(void *data)
+{
+	unsigned int *futex = data;
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct io_uring ring;
+	int ret;
+
+	ret = io_uring_queue_init(1, &ring, 0);
+	if (ret) {
+		fprintf(stderr, "queue init: %d\n", ret);
+		return NULL;
+	}
+
+	*futex = 1;
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_futex_wake(sqe, futex, 1, FUTEX_BITSET_MATCH_ANY,
+				 FUTEX2_SIZE_U32, 0);
+	sqe->user_data = 3;
+
+	io_uring_submit(&ring);
+
+	ret = io_uring_wait_cqe(&ring, &cqe);
+	if (ret) {
+		fprintf(stderr, "wait: %d\n", ret);
+		return NULL;
+	}
+	io_uring_cqe_seen(&ring, cqe);
+	io_uring_queue_exit(&ring);
+	return NULL;
+}
+
+static int __test(struct io_uring *ring, int vectored, int async,
+		  int async_cancel)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct futex_waitv fw[NFUTEX];
+	unsigned int *futex;
+	pthread_t threads[NFUTEX];
+	void *tret;
+	int ret, i, nfutex;
+
+	nfutex = NFUTEX;
+	if (!vectored)
+		nfutex = 1;
+
+	futex = calloc(nfutex, sizeof(*futex));
+	for (i = 0; i < nfutex; i++) {
+		fw[i].val = 0;
+		fw[i].uaddr = (unsigned long) &futex[i];
+		fw[i].flags = FUTEX2_SIZE_U32;
+		fw[i].__reserved = 0;
+	}
+
+	sqe = io_uring_get_sqe(ring);
+	if (vectored)
+		io_uring_prep_futex_waitv(sqe, fw, nfutex, 0);
+	else
+		io_uring_prep_futex_wait(sqe, futex, 0, FUTEX_BITSET_MATCH_ANY,
+					 FUTEX2_SIZE_U32, 0);
+	if (async)
+		sqe->flags |= IOSQE_ASYNC;
+	sqe->user_data = 1;
+
+	io_uring_submit(ring);
+
+	for (i = 0; i < nfutex; i++)
+		pthread_create(&threads[i], NULL, fwake, &futex[i]);
+
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_cancel64(sqe, 1, 0);
+	if (async_cancel)
+		sqe->flags |= IOSQE_ASYNC;
+	sqe->user_data = 2;
+
+	io_uring_submit(ring);
+
+	for (i = 0; i < 2; i++) {
+		ret = io_uring_wait_cqe(ring, &cqe);
+		if (ret) {
+			fprintf(stderr, "parent wait %d\n", ret);
+			return 1;
+		}
+
+		if (cqe->res == -EINVAL || cqe->res == -EOPNOTSUPP) {
+			no_futex = 1;
+			free(futex);
+			return 0;
+		}
+		io_uring_cqe_seen(ring, cqe);
+	}
+
+	ret = io_uring_peek_cqe(ring, &cqe);
+	if (!ret) {
+		fprintf(stderr, "peek found cqe!\n");
+		return 1;
+	}
+
+	for (i = 0; i < nfutex; i++)
+		pthread_join(threads[i], &tret);
+
+	free(futex);
+	return 0;
+}
+
+static int test(int flags, int vectored)
+{
+	struct io_uring ring;
+	int ret, i;
+
+	ret = io_uring_queue_init(8, &ring, flags);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < LOOPS; i++) {
+		int async_cancel = (!i % 2);
+		int async_wait = !(i % 3);
+		ret = __test(&ring, vectored, async_wait, async_cancel);
+		if (ret) {
+			fprintf(stderr, "flags=%x, failed=%d\n", flags, i);
+			break;
+		}
+		if (no_futex)
+			break;
+	}
+
+	io_uring_queue_exit(&ring);
+	return ret;
+}
+
+static int test_order(int vectored, int async)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct futex_waitv fw = { };
+	struct io_uring_sync_cancel_reg reg = { };
+	struct io_uring ring;
+	unsigned int *futex;
+	int ret, i;
+
+	ret = io_uring_queue_init(8, &ring, 0);
+	if (ret)
+		return ret;
+
+	futex = malloc(sizeof(*futex));
+	*futex = 0;
+
+	fw.uaddr = (unsigned long) futex;
+	fw.flags = FUTEX2_SIZE_U32;
+
+	/*
+	 * Submit two futex waits
+	 */
+	sqe = io_uring_get_sqe(&ring);
+	if (!vectored)
+		io_uring_prep_futex_wait(sqe, futex, 0, FUTEX_BITSET_MATCH_ANY,
+					 FUTEX2_SIZE_U32, 0);
+	else
+		io_uring_prep_futex_waitv(sqe, &fw, 1, 0);
+	sqe->user_data = 1;
+
+	sqe = io_uring_get_sqe(&ring);
+	if (!vectored)
+		io_uring_prep_futex_wait(sqe, futex, 0, FUTEX_BITSET_MATCH_ANY,
+					 FUTEX2_SIZE_U32, 0);
+	else
+		io_uring_prep_futex_waitv(sqe, &fw, 1, 0);
+	sqe->user_data = 2;
+
+	io_uring_submit(&ring);
+
+	/*
+	 * Now submit wake for just one futex
+	 */
+	*futex = 1;
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_futex_wake(sqe, futex, 1, FUTEX_BITSET_MATCH_ANY,
+				 FUTEX2_SIZE_U32, 0);
+	sqe->user_data = 100;
+	if (async)
+		sqe->flags |= IOSQE_ASYNC;
+
+	io_uring_submit(&ring);
+
+	/*
+	 * We expect to find completions for the first futex wait, and
+	 * the futex wake. We should not see the last futex wait.
+	 */
+	for (i = 0; i < 2; i++) {
+		ret = io_uring_wait_cqe(&ring, &cqe);
+		if (ret) {
+			fprintf(stderr, "wait %d\n", ret);
+			return 1;
+		}
+		if (cqe->user_data == 1 || cqe->user_data == 100) {
+			io_uring_cqe_seen(&ring, cqe);
+			continue;
+		}
+		fprintf(stderr, "unexpected cqe %lu, res %d\n", (unsigned long) cqe->user_data, cqe->res);
+		return 1;
+	}
+
+	ret = io_uring_peek_cqe(&ring, &cqe);
+	if (ret != -EAGAIN) {
+		fprintf(stderr, "Unexpected cqe available: %d\n", cqe->res);
+		return 1;
+	}
+
+	reg.addr = 2;
+	ret = io_uring_register_sync_cancel(&ring, &reg);
+	if (ret != 1) {
+		fprintf(stderr, "Failed to cancel pending futex wait: %d\n", ret);
+		return 1;
+	}
+
+	io_uring_queue_exit(&ring);
+	free(futex);
+	return 0;
+}
+
+static int test_multi_wake(int vectored)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct futex_waitv fw;
+	struct io_uring ring;
+	unsigned int *futex;
+	int ret, i;
+
+	ret = io_uring_queue_init(8, &ring, 0);
+	if (ret)
+		return ret;
+
+	futex = malloc(sizeof(*futex));
+	*futex = 0;
+
+	fw.val = 0;
+	fw.uaddr = (unsigned long) futex;
+	fw.flags = FUTEX2_SIZE_U32;
+	fw.__reserved = 0;
+
+	/*
+	 * Submit two futex waits
+	 */
+	sqe = io_uring_get_sqe(&ring);
+	if (!vectored)
+		io_uring_prep_futex_wait(sqe, futex, 0, FUTEX_BITSET_MATCH_ANY,
+					 FUTEX2_SIZE_U32, 0);
+	else
+		io_uring_prep_futex_waitv(sqe, &fw, 1, 0);
+	sqe->user_data = 1;
+
+	sqe = io_uring_get_sqe(&ring);
+	if (!vectored)
+		io_uring_prep_futex_wait(sqe, futex, 0, FUTEX_BITSET_MATCH_ANY,
+					 FUTEX2_SIZE_U32, 0);
+	else
+		io_uring_prep_futex_waitv(sqe, &fw, 1, 0);
+	sqe->user_data = 2;
+
+	io_uring_submit(&ring);
+
+	/*
+	 * Now submit wake for both futexes
+	 */
+	*futex = 1;
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_futex_wake(sqe, futex, 2, FUTEX_BITSET_MATCH_ANY,
+				 FUTEX2_SIZE_U32, 0);
+	sqe->user_data = 100;
+
+	io_uring_submit(&ring);
+
+	/*
+	 * We expect to find completions for the both futex waits, and
+	 * the futex wake.
+	 */
+	for (i = 0; i < 3; i++) {
+		ret = io_uring_wait_cqe(&ring, &cqe);
+		if (ret) {
+			fprintf(stderr, "wait %d\n", ret);
+			return 1;
+		}
+		if (cqe->res < 0) {
+			fprintf(stderr, "cqe error %d\n", cqe->res);
+			return 1;
+		}
+		io_uring_cqe_seen(&ring, cqe);
+	}
+
+	ret = io_uring_peek_cqe(&ring, &cqe);
+	if (!ret) {
+		fprintf(stderr, "peek found cqe!\n");
+		return 1;
+	}
+
+	io_uring_queue_exit(&ring);
+	free(futex);
+	return 0;
+}
+
+/*
+ * Test that waking 0 futexes returns 0
+ */
+static int test_wake_zero(void)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct io_uring ring;
+	unsigned int *futex;
+	int ret;
+
+	ret = io_uring_queue_init(8, &ring, 0);
+	if (ret)
+		return ret;
+
+	futex = malloc(sizeof(*futex));
+	*futex = 0;
+
+	sqe = io_uring_get_sqe(&ring);
+	sqe->user_data = 1;
+	io_uring_prep_futex_wait(sqe, futex, 0, FUTEX_BITSET_MATCH_ANY,
+				 FUTEX2_SIZE_U32, 0);
+
+	io_uring_submit(&ring);
+
+	sqe = io_uring_get_sqe(&ring);
+	sqe->user_data = 2;
+	io_uring_prep_futex_wake(sqe, futex, 0, FUTEX_BITSET_MATCH_ANY,
+				 FUTEX2_SIZE_U32, 0);
+
+	io_uring_submit(&ring);
+
+	ret = io_uring_wait_cqe(&ring, &cqe);
+
+	/*
+	 * Should get zero res and it should be the wake
+	 */
+	if (cqe->res || cqe->user_data != 2) {
+		fprintf(stderr, "cqe res %d, data %ld\n", cqe->res, (long) cqe->user_data);
+		return 1;
+	}
+	io_uring_cqe_seen(&ring, cqe);
+
+	/*
+	 * Should not have the wait complete
+	 */
+	ret = io_uring_peek_cqe(&ring, &cqe);
+	if (!ret) {
+		fprintf(stderr, "peek found cqe!\n");
+		return 1;
+	}
+
+	io_uring_queue_exit(&ring);
+	free(futex);
+	return 0;
+}
+
+/*
+ * Test invalid wait/wake/waitv flags
+ */
+static int test_invalid(void)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct futex_waitv fw;
+	struct io_uring ring;
+	unsigned int *futex;
+	int ret;
+
+	ret = io_uring_queue_init(8, &ring, 0);
+	if (ret)
+		return ret;
+
+	futex = malloc(sizeof(*futex));
+	*futex = 0;
+
+	sqe = io_uring_get_sqe(&ring);
+	sqe->user_data = 1;
+	io_uring_prep_futex_wait(sqe, futex, 0, FUTEX_BITSET_MATCH_ANY, 0x1000,
+				 0);
+
+	io_uring_submit(&ring);
+
+	ret = io_uring_wait_cqe(&ring, &cqe);
+
+	/*
+	 * Should get zero res and it should be the wake
+	 */
+	if (cqe->res != -EINVAL) {
+		fprintf(stderr, "wait cqe res %d\n", cqe->res);
+		return 1;
+	}
+	io_uring_cqe_seen(&ring, cqe);
+
+	sqe = io_uring_get_sqe(&ring);
+	sqe->user_data = 1;
+	io_uring_prep_futex_wake(sqe, futex, 0, FUTEX_BITSET_MATCH_ANY, 0x1000,
+				 0);
+
+	io_uring_submit(&ring);
+
+	ret = io_uring_wait_cqe(&ring, &cqe);
+
+	/*
+	 * Should get zero res and it should be the wake
+	 */
+	if (cqe->res != -EINVAL) {
+		fprintf(stderr, "wake cqe res %d\n", cqe->res);
+		return 1;
+	}
+	io_uring_cqe_seen(&ring, cqe);
+
+	fw.val = 0;
+	fw.uaddr = (unsigned long) futex;
+	fw.flags = FUTEX2_SIZE_U32 | 0x1000;
+	fw.__reserved = 0;
+
+	sqe = io_uring_get_sqe(&ring);
+	sqe->user_data = 1;
+	io_uring_prep_futex_waitv(sqe, &fw, 1, 0);
+
+	io_uring_submit(&ring);
+
+	ret = io_uring_wait_cqe(&ring, &cqe);
+
+	/*
+	 * Should get zero res and it should be the wake
+	 */
+	if (cqe->res != -EINVAL) {
+		fprintf(stderr, "waitv cqe res %d\n", cqe->res);
+		return 1;
+	}
+	io_uring_cqe_seen(&ring, cqe);
+
+	io_uring_queue_exit(&ring);
+	free(futex);
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	int ret;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	ret = test(0, 0);
+	if (ret) {
+		fprintf(stderr, "test 0 0 failed\n");
+		return T_EXIT_FAIL;
+	}
+	if (no_futex)
+		return T_EXIT_SKIP;
+
+	ret = test(0, 1);
+	if (ret) {
+		fprintf(stderr, "test 0 1 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_wake_zero();
+	if (ret) {
+		fprintf(stderr, "wake 0 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_invalid();
+	if (ret) {
+		fprintf(stderr, "test invalid failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test(IORING_SETUP_SQPOLL, 0);
+	if (ret) {
+		fprintf(stderr, "test sqpoll 0 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test(IORING_SETUP_SQPOLL, 1);
+	if (ret) {
+		fprintf(stderr, "test sqpoll 1 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test(IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN, 0);
+	if (ret) {
+		fprintf(stderr, "test single coop 0 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test(IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN, 1);
+	if (ret) {
+		fprintf(stderr, "test single coop 1 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test(IORING_SETUP_COOP_TASKRUN, 0);
+	if (ret) {
+		fprintf(stderr, "test taskrun 0 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test(IORING_SETUP_COOP_TASKRUN, 1);
+	if (ret) {
+		fprintf(stderr, "test taskrun 1 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_order(0, 0);
+	if (ret) {
+		fprintf(stderr, "test_order 0 0 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_order(1, 0);
+	if (ret) {
+		fprintf(stderr, "test_order 1 0 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_order(0, 1);
+	if (ret) {
+		fprintf(stderr, "test_order 0 1 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_order(1, 1);
+	if (ret) {
+		fprintf(stderr, "test_order 1 1 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_multi_wake(0);
+	if (ret) {
+		fprintf(stderr, "multi_wake 0 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_multi_wake(1);
+	if (ret) {
+		fprintf(stderr, "multi_wake 1 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	return T_EXIT_PASS;
+}

diff --git a/test/hardlink.c b/test/hardlink.c
index 634b8ed..cf170d0 100644
--- a/test/hardlink.c
+++ b/test/hardlink.c

@@ -10,40 +10,39 @@
 #include <unistd.h>
 
 #include "liburing.h"
+#include "helpers.h"
 
-
-static int do_linkat(struct io_uring *ring, const char *oldname, const char *newname)
+static int do_linkat(struct io_uring *ring, int olddirfd, const char *oldname,
+		     const char *newname, int flags)
 {
-	int ret;
 	struct io_uring_sqe *sqe;
 	struct io_uring_cqe *cqe;
+	int ret;
 
 	sqe = io_uring_get_sqe(ring);
 	if (!sqe) {
 		fprintf(stderr, "sqe get failed\n");
-		goto err;
+		return 1;
 	}
-	io_uring_prep_linkat(sqe, AT_FDCWD, oldname, AT_FDCWD, newname, 0);
+	io_uring_prep_linkat(sqe, olddirfd, oldname, AT_FDCWD, newname, flags);
 
 	ret = io_uring_submit(ring);
 	if (ret != 1) {
 		fprintf(stderr, "submit failed: %d\n", ret);
-		goto err;
+		return 1;
 	}
 
 	ret = io_uring_wait_cqes(ring, &cqe, 1, 0, 0);
 	if (ret) {
 		fprintf(stderr, "wait_cqe failed: %d\n", ret);
-		goto err;
+		return 1;
 	}
 	ret = cqe->res;
 	io_uring_cqe_seen(ring, cqe);
 	return ret;
-err:
-	return 1;
 }
 
-int files_linked_ok(const char* fn1, const char *fn2)
+static int files_linked_ok(const char* fn1, const char *fn2)
 {
 	struct stat s1, s2;
 
@@ -69,12 +68,14 @@
 int main(int argc, char *argv[])
 {
 	static const char target[] = "io_uring-linkat-test-target";
+	static const char emptyname[] = "io_uring-linkat-test-empty";
 	static const char linkname[] = "io_uring-linkat-test-link";
-	int ret;
+	static const char symlinkname[] = "io_uring-linkat-test-symlink";
 	struct io_uring ring;
+	int ret, fd, exit_status = T_EXIT_FAIL;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
 	ret = io_uring_queue_init(8, &ring, 0);
 	if (ret) {
@@ -82,55 +83,88 @@
 		return ret;
 	}
 
-	ret = open(target, O_CREAT | O_RDWR | O_EXCL, 0600);
+	ret = fd = open(target, O_CREAT | O_RDWR | O_EXCL, 0600);
 	if (ret < 0) {
 		perror("open");
-		goto err;
+		goto out;
 	}
-	if (write(ret, "linktest", 8) != 8) {
-		close(ret);
-		goto err1;
+	if (write(fd, "linktest", 8) != 8) {
+		close(fd);
+		goto out;
 	}
-	close(ret);
+	if(geteuid()) {
+		fprintf(stdout, "not root, skipping AT_EMPTY_PATH test\n");
+	} else {
+		ret = do_linkat(&ring, fd, "", emptyname, AT_EMPTY_PATH);
+		if (ret < 0) {
+			if (ret == -EBADF || ret == -EINVAL) {
+				fprintf(stdout, "linkat not supported, skipping\n");
+				exit_status = T_EXIT_SKIP;
+				goto out;
+			}
+			fprintf(stderr, "linkat: %s\n", strerror(-ret));
+			goto out;
+		} else if (ret) {
+			goto out;
+		}
+		if (!files_linked_ok(emptyname, target))
+			goto out;
+		unlinkat(AT_FDCWD, emptyname, 0);
+	}
+	close(fd);
 
-	ret = do_linkat(&ring, target, linkname);
+	ret = symlink(target, symlinkname);
+	if (ret < 0) {
+		perror("open");
+		goto out;
+	}
+
+	ret = do_linkat(&ring, AT_FDCWD, target, linkname, 0);
 	if (ret < 0) {
 		if (ret == -EBADF || ret == -EINVAL) {
 			fprintf(stdout, "linkat not supported, skipping\n");
+			exit_status = T_EXIT_SKIP;
 			goto out;
 		}
 		fprintf(stderr, "linkat: %s\n", strerror(-ret));
-		goto err1;
+		goto out;
 	} else if (ret) {
-		goto err1;
+		goto out;
 	}
 
 	if (!files_linked_ok(linkname, target))
-		goto err2;
+		goto out;
 
-	ret = do_linkat(&ring, target, linkname);
+	unlinkat(AT_FDCWD, linkname, 0);
+
+	ret = do_linkat(&ring, AT_FDCWD, symlinkname, linkname, AT_SYMLINK_FOLLOW);
+	if (ret < 0) {
+		fprintf(stderr, "linkat: %s\n", strerror(-ret));
+		goto out;
+	} else if (ret) {
+		goto out;
+	}
+
+	if (!files_linked_ok(symlinkname, target))
+		goto out;
+
+	ret = do_linkat(&ring, AT_FDCWD, target, linkname, 0);
 	if (ret != -EEXIST) {
 		fprintf(stderr, "test_linkat linkname already exists failed: %d\n", ret);
-		goto err2;
+		goto out;
 	}
 
-	ret = do_linkat(&ring, target, "surely/this/does/not/exist");
+	ret = do_linkat(&ring, AT_FDCWD, target, "surely/this/does/not/exist", 0);
 	if (ret != -ENOENT) {
 		fprintf(stderr, "test_linkat no parent failed: %d\n", ret);
-		goto err2;
+		goto out;
 	}
-
+	exit_status = T_EXIT_PASS;
 out:
+	unlinkat(AT_FDCWD, symlinkname, 0);
 	unlinkat(AT_FDCWD, linkname, 0);
+	unlinkat(AT_FDCWD, emptyname, 0);
 	unlinkat(AT_FDCWD, target, 0);
 	io_uring_queue_exit(&ring);
-	return 0;
-err2:
-	unlinkat(AT_FDCWD, linkname, 0);
-err1:
-	unlinkat(AT_FDCWD, target, 0);
-err:
-	io_uring_queue_exit(&ring);
-	return 1;
+	return exit_status;
 }
-

diff --git a/test/helpers.c b/test/helpers.c
index 491822e..e84aaa7 100644
--- a/test/helpers.c
+++ b/test/helpers.c

@@ -8,8 +8,13 @@
 #include <stdio.h>
 #include <fcntl.h>
 #include <unistd.h>
+#include <stdarg.h>
 #include <sys/types.h>
 
+#include <arpa/inet.h>
+#include <netinet/ip.h>
+#include <netinet/tcp.h>
+
 #include "helpers.h"
 #include "liburing.h"
 
@@ -25,6 +30,26 @@
 }
 
 /*
+ * Helper for binding socket to an ephemeral port.
+ * The port number to be bound is returned in @addr->sin_port.
+ */
+int t_bind_ephemeral_port(int fd, struct sockaddr_in *addr)
+{
+	socklen_t addrlen;
+	int ret;
+
+	addr->sin_port = 0;
+	if (bind(fd, (struct sockaddr *)addr, sizeof(*addr)))
+		return -errno;
+
+	addrlen = sizeof(*addr);
+	ret = getsockname(fd, (struct sockaddr *)addr, &addrlen);
+	assert(!ret);
+	assert(addr->sin_port != 0);
+	return 0;
+}
+
+/*
  * Helper for allocating size bytes aligned on a boundary.
  */
 void t_posix_memalign(void **memptr, size_t alignment, size_t size)
@@ -53,7 +78,7 @@
 {
 	ssize_t ret;
 	char *buf;
-	int fd; 
+	int fd;
 
 	buf = t_malloc(size);
 	memset(buf, pattern, size);
@@ -90,7 +115,7 @@
 	vecs = t_malloc(buf_num * sizeof(struct iovec));
 	for (i = 0; i < buf_num; i++) {
 		t_posix_memalign(&vecs[i].iov_base, buf_size, buf_size);
-		vecs[i].iov_len = buf_size; 
+		vecs[i].iov_len = buf_size;
 	}
 	return vecs;
 }
@@ -112,7 +137,8 @@
 		return T_SETUP_SKIP;
 	}
 
-	fprintf(stderr, "queue_init: %s\n", strerror(-ret));
+	if (ret != -EINVAL)
+		fprintf(stderr, "queue_init: %s\n", strerror(-ret));
 	return ret;
 }
 
@@ -143,3 +169,197 @@
 	fprintf(stderr, "buffer register failed: %s\n", strerror(-ret));
 	return ret;
 }
+
+int t_create_socket_pair(int fd[2], bool stream)
+{
+	int ret;
+	int type = stream ? SOCK_STREAM : SOCK_DGRAM;
+	int val;
+	struct sockaddr_in serv_addr;
+	struct sockaddr *paddr;
+	socklen_t paddrlen;
+
+	type |= SOCK_CLOEXEC;
+	fd[0] = socket(AF_INET, type, 0);
+	if (fd[0] < 0)
+		return errno;
+	fd[1] = socket(AF_INET, type, 0);
+	if (fd[1] < 0) {
+		ret = errno;
+		close(fd[0]);
+		return ret;
+	}
+
+	val = 1;
+	if (setsockopt(fd[0], SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)))
+		goto errno_cleanup;
+
+	memset(&serv_addr, 0, sizeof(serv_addr));
+	serv_addr.sin_family = AF_INET;
+	serv_addr.sin_port = 0;
+	inet_pton(AF_INET, "127.0.0.1", &serv_addr.sin_addr);
+
+	paddr = (struct sockaddr *)&serv_addr;
+	paddrlen = sizeof(serv_addr);
+
+	if (bind(fd[0], paddr, paddrlen)) {
+		fprintf(stderr, "bind failed\n");
+		goto errno_cleanup;
+	}
+
+	if (stream && listen(fd[0], 16)) {
+		fprintf(stderr, "listen failed\n");
+		goto errno_cleanup;
+	}
+
+	if (getsockname(fd[0], (struct sockaddr *)&serv_addr,
+			(socklen_t *)&paddrlen)) {
+		fprintf(stderr, "getsockname failed\n");
+		goto errno_cleanup;
+	}
+	inet_pton(AF_INET, "127.0.0.1", &serv_addr.sin_addr);
+
+	if (connect(fd[1], (struct sockaddr *)&serv_addr, paddrlen)) {
+		fprintf(stderr, "connect failed\n");
+		goto errno_cleanup;
+	}
+
+	if (!stream) {
+		/* connect the other udp side */
+		if (getsockname(fd[1], (struct sockaddr *)&serv_addr,
+				(socklen_t *)&paddrlen)) {
+			fprintf(stderr, "getsockname failed\n");
+			goto errno_cleanup;
+		}
+		inet_pton(AF_INET, "127.0.0.1", &serv_addr.sin_addr);
+
+		if (connect(fd[0], (struct sockaddr *)&serv_addr, paddrlen)) {
+			fprintf(stderr, "connect failed\n");
+			goto errno_cleanup;
+		}
+		return 0;
+	}
+
+	/* for stream case we must accept and cleanup the listen socket */
+
+	ret = accept(fd[0], NULL, NULL);
+	if (ret < 0)
+		goto errno_cleanup;
+
+	close(fd[0]);
+	fd[0] = ret;
+
+	return 0;
+
+errno_cleanup:
+	ret = errno;
+	close(fd[0]);
+	close(fd[1]);
+	return ret;
+}
+
+bool t_probe_defer_taskrun(void)
+{
+	struct io_uring ring;
+	int ret;
+
+	ret = io_uring_queue_init(1, &ring, IORING_SETUP_SINGLE_ISSUER |
+					    IORING_SETUP_DEFER_TASKRUN);
+	if (ret < 0)
+		return false;
+	io_uring_queue_exit(&ring);
+	return true;
+}
+
+/*
+ * Sync internal state with kernel ring state on the SQ side. Returns the
+ * number of pending items in the SQ ring, for the shared ring.
+ */
+unsigned __io_uring_flush_sq(struct io_uring *ring)
+{
+	struct io_uring_sq *sq = &ring->sq;
+	unsigned tail = sq->sqe_tail;
+
+	if (sq->sqe_head != tail) {
+		sq->sqe_head = tail;
+		/*
+		 * Ensure kernel sees the SQE updates before the tail update.
+		 */
+		if (!(ring->flags & IORING_SETUP_SQPOLL))
+			*sq->ktail = tail;
+		else
+			io_uring_smp_store_release(sq->ktail, tail);
+	}
+	/*
+	* This load needs to be atomic, since sq->khead is written concurrently
+	* by the kernel, but it doesn't need to be load_acquire, since the
+	* kernel doesn't store to the submission queue; it advances khead just
+	* to indicate that it's finished reading the submission queue entries
+	* so they're available for us to write to.
+	*/
+	return tail - IO_URING_READ_ONCE(*sq->khead);
+}
+
+/*
+ * Implementation of error(3), prints an error message and exits.
+ */
+void t_error(int status, int errnum, const char *format, ...)
+{
+	va_list args;
+    	va_start(args, format);
+
+	vfprintf(stderr, format, args);
+    	if (errnum)
+        	fprintf(stderr, ": %s", strerror(errnum));
+
+	fprintf(stderr, "\n");
+	va_end(args);
+    	exit(status);
+}
+
+unsigned long long mtime_since(const struct timeval *s, const struct timeval *e)
+{
+	long long sec, usec;
+
+	sec = e->tv_sec - s->tv_sec;
+	usec = (e->tv_usec - s->tv_usec);
+	if (sec > 0 && usec < 0) {
+		sec--;
+		usec += 1000000;
+	}
+
+	sec *= 1000;
+	usec /= 1000;
+	return sec + usec;
+}
+
+unsigned long long mtime_since_now(struct timeval *tv)
+{
+	struct timeval end;
+
+	gettimeofday(&end, NULL);
+	return mtime_since(tv, &end);
+}
+
+unsigned long long utime_since(const struct timeval *s, const struct timeval *e)
+{
+	long long sec, usec;
+
+	sec = e->tv_sec - s->tv_sec;
+	usec = (e->tv_usec - s->tv_usec);
+	if (sec > 0 && usec < 0) {
+		sec--;
+		usec += 1000000;
+	}
+
+	sec *= 1000000;
+	return sec + usec;
+}
+
+unsigned long long utime_since_now(struct timeval *tv)
+{
+	struct timeval end;
+
+	gettimeofday(&end, NULL);
+	return utime_since(tv, &end);
+}

diff --git a/test/helpers.h b/test/helpers.h
index d0beb93..9e1cdf5 100644
--- a/test/helpers.h
+++ b/test/helpers.h

@@ -10,12 +10,28 @@
 #endif
 
 #include "liburing.h"
+#include "../src/setup.h"
+#include <arpa/inet.h>
+#include <sys/time.h>
 
 enum t_setup_ret {
 	T_SETUP_OK	= 0,
 	T_SETUP_SKIP,
 };
 
+enum t_test_result {
+	T_EXIT_PASS   = 0,
+	T_EXIT_FAIL   = 1,
+	T_EXIT_SKIP   = 77,
+};
+
+/*
+ * Helper for binding socket to an ephemeral port.
+ * The port number to be bound is returned in @addr->sin_port.
+ */
+int t_bind_ephemeral_port(int fd, struct sockaddr_in *addr);
+
+
 /*
  * Helper for allocating memory in tests.
  */
@@ -53,6 +69,11 @@
 struct iovec *t_create_buffers(size_t buf_num, size_t buf_size);
 
 /*
+ * Helper for creating connected socket pairs
+ */
+int t_create_socket_pair(int fd[2], bool stream);
+
+/*
  * Helper for setting up a ring and checking for user privs
  */
 enum t_setup_ret t_create_ring_params(int depth, struct io_uring *ring,
@@ -64,8 +85,28 @@
 				    const struct iovec *iovecs,
 				    unsigned nr_iovecs);
 
+bool t_probe_defer_taskrun(void);
+
+unsigned __io_uring_flush_sq(struct io_uring *ring);
+
+static inline int t_io_uring_init_sqarray(unsigned entries, struct io_uring *ring,
+					struct io_uring_params *p)
+{
+	int ret;
+
+	ret = __io_uring_queue_init_params(entries, ring, p, NULL, 0);
+	return ret >= 0 ? 0 : ret;
+}
+
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
 
+void t_error(int status, int errnum, const char *format, ...);
+
+unsigned long long mtime_since(const struct timeval *s, const struct timeval *e);
+unsigned long long mtime_since_now(struct timeval *tv);
+unsigned long long utime_since(const struct timeval *s, const struct timeval *e);
+unsigned long long utime_since_now(struct timeval *tv);
+
 #ifdef __cplusplus
 }
 #endif

diff --git a/test/ignore-single-mmap.c b/test/ignore-single-mmap.c
new file mode 100644
index 0000000..455d90e
--- /dev/null
+++ b/test/ignore-single-mmap.c

@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * 6.10-rc merge window had a bug where the rewritten mmap support caused
+ * rings allocated with > 1 page, but asking for smaller mappings, would
+ * cause -EFAULT to be returned rather than a successful map. This hit
+ * applications either using an ancient liburing with IORING_FEAT_SINGLE_MMAP
+ * support, or application just ignoring that feature flag and still doing
+ * 3 mmap operations to map the ring.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "../src/syscall.h"
+#include "liburing.h"
+#include "helpers.h"
+
+#define ENTRIES	128
+
+int main(int argc, char *argv[])
+{
+	struct io_uring_params p = { };
+	void *ptr;
+	int fd;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	fd = __sys_io_uring_setup(ENTRIES, &p);
+	if (fd < 0)
+		return T_EXIT_SKIP;
+
+	if (!(p.features & IORING_FEAT_SINGLE_MMAP)) {
+		close(fd);
+		return T_EXIT_SKIP;
+	}
+
+	ptr = __sys_mmap(0, ENTRIES * sizeof(unsigned), PROT_READ | PROT_WRITE,
+				MAP_SHARED | MAP_POPULATE, fd,
+				IORING_OFF_SQ_RING);
+	if (!IS_ERR(ptr)) {
+		close(fd);
+		return T_EXIT_PASS;
+	}
+
+	fprintf(stderr, "ring sqe array mmap: %d\n", PTR_ERR(ptr));
+	return T_EXIT_FAIL;
+}

diff --git a/test/init-mem.c b/test/init-mem.c
new file mode 100644
index 0000000..59973c8
--- /dev/null
+++ b/test/init-mem.c

@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: Check that io_uring_queue_init_mem() doesn't underestimate
+ *		the memory required for various size rings.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/mman.h>
+#include <linux/mman.h>
+#include <stdlib.h>
+#include <string.h>
+#include <netinet/udp.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+#define PRE_RED	0x5aa55aa55aa55aa5ULL
+#define POST_RED 0xa55aa55aa55aa55aULL
+
+struct ctx {
+	struct io_uring		ring;
+	void			*ring_mem;
+	void			*mem;
+	unsigned long long	*pre;
+	unsigned long long	*post;
+};
+
+struct q_entries {
+	unsigned int sqes;
+	unsigned int cqes;
+};
+
+static int setup_ctx(struct ctx *ctx, struct q_entries *q)
+{
+	struct io_uring_params p = { };
+	int ret;
+
+	if (posix_memalign(&ctx->mem, 4096, 2*1024*1024))
+		return T_EXIT_FAIL;
+
+	ctx->pre = ctx->mem + 4096 - sizeof(unsigned long long);
+	*ctx->pre = PRE_RED;
+
+	ctx->ring_mem = ctx->mem + 4096;
+	p.flags |= IORING_SETUP_CQSIZE | IORING_SETUP_NO_SQARRAY;
+	p.sq_entries = q->sqes;
+	p.cq_entries = q->cqes;
+
+	ret = io_uring_queue_init_mem(q->sqes, &ctx->ring, &p,
+					ctx->ring_mem, 2*1024*1024);
+
+	if (ret < 0) {
+		if (ret == -EINVAL)
+			return T_EXIT_SKIP;
+		fprintf(stderr, "queue init: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	ctx->post = ctx->ring_mem + ret;
+	*ctx->post = POST_RED;
+	return 0;
+}
+
+static void clean_ctx(struct ctx *ctx)
+{
+	io_uring_queue_exit(&ctx->ring);
+	free(ctx->mem);
+}
+
+static int check_red(struct ctx *ctx, unsigned long i)
+{
+	int fail = 0;
+
+	if (*ctx->pre != PRE_RED) {
+		printf("pre redzone=%llx at i=%lu\n", *ctx->pre, i);
+		fail = 1;
+	}
+	if (*ctx->post != POST_RED) {
+		printf("post redzone=%llx at i=%lu\n", *ctx->post, i);
+		fail = 1;
+	}
+	return fail;
+}
+
+static int test(struct q_entries *q)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct ctx ctx = { };
+	unsigned long i, ud;
+	int j, ret, batch;
+
+	ret = setup_ctx(&ctx, q);
+	if (ret == T_EXIT_SKIP) {
+		clean_ctx(&ctx);
+		return T_EXIT_SKIP;
+	} else if (ret != T_EXIT_PASS) {
+		return ret;
+	}
+
+	batch = 64;
+	if (batch > q->sqes)
+		batch = q->sqes;
+
+	i = ud = 0;
+	while (i < q->cqes * 2) {
+		if (check_red(&ctx, i))
+			return T_EXIT_FAIL;
+		for (j = 0; j < batch; j++) {
+			sqe = io_uring_get_sqe(&ctx.ring);
+			io_uring_prep_nop(sqe);
+			sqe->user_data = j + (unsigned long) i;
+		}
+		io_uring_submit(&ctx.ring);
+		for (j = 0; j < batch; j++) {
+			ret = io_uring_wait_cqe(&ctx.ring, &cqe);
+			if (ret)
+				goto err;
+			if (cqe->user_data != ud) {
+				fprintf(stderr, "ud=%lu, wanted %lu\n", (unsigned long) cqe->user_data, ud);
+				goto err;
+			}
+			ud++;
+			io_uring_cqe_seen(&ctx.ring, cqe);
+		}
+		i += batch;
+	}
+
+	clean_ctx(&ctx);
+	return T_EXIT_PASS;
+err:
+	clean_ctx(&ctx);
+	return T_EXIT_FAIL;
+}
+
+int main(int argc, char *argv[])
+{
+	struct q_entries q_entries[] = {
+		{ 256, 16384 },
+		{ 32, 4096 },
+		{ 128, 8192 },
+		{ 4096, 32768 },
+		{ 1, 8 },
+		{ 2, 1024 },
+	};
+	int i, ret;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	for (i = 0; i < ARRAY_SIZE(q_entries); i++) {
+		ret = test(&q_entries[i]);
+		if (ret == T_EXIT_SKIP) {
+			return T_EXIT_SKIP;
+		} else if (ret != T_EXIT_PASS) {
+			fprintf(stderr, "Failed at %d/%d\n", q_entries[i].sqes,
+							q_entries[i].cqes);
+			return T_EXIT_FAIL;
+		}
+	}
+
+	return T_EXIT_PASS;
+}

diff --git a/test/io-cancel.c b/test/io-cancel.c
index d5e3ae9..7ac4664 100644
--- a/test/io-cancel.c
+++ b/test/io-cancel.c

@@ -22,30 +22,6 @@
 
 static struct iovec *vecs;
 
-static unsigned long long utime_since(const struct timeval *s,
-				      const struct timeval *e)
-{
-	long long sec, usec;
-
-	sec = e->tv_sec - s->tv_sec;
-	usec = (e->tv_usec - s->tv_usec);
-	if (sec > 0 && usec < 0) {
-		sec--;
-		usec += 1000000;
-	}
-
-	sec *= 1000000;
-	return sec + usec;
-}
-
-static unsigned long long utime_since_now(struct timeval *tv)
-{
-	struct timeval end;
-
-	gettimeofday(&end, NULL);
-	return utime_since(tv, &end);
-}
-
 static int start_io(struct io_uring *ring, int fd, int do_write)
 {
 	struct io_uring_sqe *sqe;
@@ -93,7 +69,7 @@
 		if (do_partial && cqe->user_data) {
 			if (!(cqe->user_data & 1)) {
 				if (cqe->res != BS) {
-					fprintf(stderr, "IO %d wasn't cancelled but got error %d\n", (unsigned) cqe->user_data, cqe->res);
+					fprintf(stderr, "IO %d wasn't canceled but got error %d\n", (unsigned) cqe->user_data, cqe->res);
 					goto err;
 				}
 			}
@@ -147,7 +123,7 @@
 
 /*
  * Test cancels. If 'do_partial' is set, then we only attempt to cancel half of
- * the submitted IO. This is done to verify that cancelling one piece of IO doesn't
+ * the submitted IO. This is done to verify that canceling one piece of IO doesn't
  * impact others.
  */
 static int test_io_cancel(const char *file, int do_write, int do_partial,
@@ -161,6 +137,8 @@
 
 	fd = open(file, O_RDWR | O_DIRECT);
 	if (fd < 0) {
+		if (errno == EINVAL)
+			return T_EXIT_SKIP;
 		perror("file open");
 		goto err;
 	}
@@ -269,7 +247,7 @@
 
 	ret = io_uring_wait_cqe_timeout(&ring1, &cqe, &ts);
 	if (ret != -ETIME) {
-		fprintf(stderr, "read got cancelled or wait failed\n");
+		fprintf(stderr, "read got canceled or wait failed\n");
 		return 1;
 	}
 	io_uring_cqe_seen(&ring1, cqe);
@@ -345,18 +323,21 @@
 			case 1:
 				if (cqe->res != -EINTR &&
 				    cqe->res != -ECANCELED) {
-					fprintf(stderr, "%i %i\n", (int)cqe->user_data, cqe->res);
+					fprintf(stderr, "user_data %i res %i\n",
+						(unsigned)cqe->user_data, cqe->res);
 					exit(1);
 				}
 				break;
 			case 2:
 				if (cqe->res != -EALREADY && cqe->res) {
-					fprintf(stderr, "%i %i\n", (int)cqe->user_data, cqe->res);
+					fprintf(stderr, "user_data %i res %i\n",
+						(unsigned)cqe->user_data, cqe->res);
 					exit(1);
 				}
 				break;
 			default:
-				fprintf(stderr, "%i %i\n", (int)cqe->user_data, cqe->res);
+				fprintf(stderr, "user_data %i res %i\n",
+					(unsigned)cqe->user_data, cqe->res);
 				exit(1);
 			}
 
@@ -365,8 +346,13 @@
 		exit(0);
 	} else {
 		int wstatus;
+		pid_t childpid;
 
-		if (waitpid(p, &wstatus, 0) == (pid_t)-1) {
+		do {
+			childpid = waitpid(p, &wstatus, 0);
+		} while (childpid == (pid_t)-1 && errno == EINTR);
+
+		if (childpid == (pid_t)-1) {
 			perror("waitpid()");
 			return 1;
 		}
@@ -444,7 +430,8 @@
 		if ((cqe->user_data == 1 && cqe->res != -ECANCELED) ||
 		    (cqe->user_data == 2 && cqe->res != -ECANCELED) ||
 		    (cqe->user_data == 3 && cqe->res != -ETIME)) {
-			fprintf(stderr, "%i %i\n", (int)cqe->user_data, cqe->res);
+			fprintf(stderr, "user_data %i res %i\n",
+				(unsigned)cqe->user_data, cqe->res);
 			return 1;
 		}
 		io_uring_cqe_seen(&ring, cqe);
@@ -490,7 +477,7 @@
 	sleep(1);
 	io_uring_queue_exit(&ring);
 
-	/* close the write end, so if ring is cancelled properly read() fails*/
+	/* close the write end, so if ring is canceled properly read() fails*/
 	close(fds[1]);
 	ret = read(fds[0], buffer, 10);
 	close(fds[0]);
@@ -503,26 +490,26 @@
 	int i, ret;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
 	if (test_dont_cancel_another_ring()) {
 		fprintf(stderr, "test_dont_cancel_another_ring() failed\n");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	if (test_cancel_req_across_fork()) {
 		fprintf(stderr, "test_cancel_req_across_fork() failed\n");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	if (test_cancel_inflight_exit()) {
 		fprintf(stderr, "test_cancel_inflight_exit() failed\n");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	if (test_sqpoll_cancel_iowq_requests()) {
 		fprintf(stderr, "test_sqpoll_cancel_iowq_requests() failed\n");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	t_create_file(fname, FILE_SIZE);
@@ -535,7 +522,7 @@
 		int async = (i & 4) != 0;
 
 		ret = test_io_cancel(fname, write, partial, async);
-		if (ret) {
+		if (ret == T_EXIT_FAIL) {
 			fprintf(stderr, "test_io_cancel %d %d %d failed\n",
 				write, partial, async);
 			goto err;
@@ -543,8 +530,8 @@
 	}
 
 	unlink(fname);
-	return 0;
+	return T_EXIT_PASS;
 err:
 	unlink(fname);
-	return 1;
+	return T_EXIT_FAIL;
 }

diff --git a/test/io_uring_enter.c b/test/io_uring_enter.c
index ef00bf6..efa3254 100644
--- a/test/io_uring_enter.c
+++ b/test/io_uring_enter.c

@@ -38,14 +38,14 @@
 {
 	int ret;
 
-	ret = __sys_io_uring_enter(fd, to_submit, min_complete, flags, sig);
-	if (ret != -1) {
-		fprintf(stderr, "expected %s, but call succeeded\n", strerror(error));
+	ret = io_uring_enter(fd, to_submit, min_complete, flags, sig);
+	if (ret >= 0) {
+		fprintf(stderr, "expected %s, but call succeeded\n", strerror(-error));
 		return 1;
 	}
 
-	if (errno != error) {
-		fprintf(stderr, "expected %d, got %d\n", error, errno);
+	if (ret != error) {
+		fprintf(stderr, "expected %d, got %d\n", error, ret);
 		return 1;
 	}
 
@@ -54,17 +54,17 @@
 
 static int try_io_uring_enter(int fd, unsigned int to_submit,
 			      unsigned int min_complete, unsigned int flags,
-			      sigset_t *sig, int expect, int error)
+			      sigset_t *sig, int expect)
 {
 	int ret;
 
-	if (expect == -1)
-		return expect_fail(fd, to_submit, min_complete,
-				   flags, sig, error);
+	if (expect < 0)
+		return expect_fail(fd, to_submit, min_complete, flags, sig,
+				   expect);
 
-	ret = __sys_io_uring_enter(fd, to_submit, min_complete, flags, sig);
+	ret = io_uring_enter(fd, to_submit, min_complete, flags, sig);
 	if (ret != expect) {
-		fprintf(stderr, "Expected %d, got %d\n", expect, errno);
+		fprintf(stderr, "Expected %d, got %d\n", expect, ret);
 		return 1;
 	}
 
@@ -169,7 +169,7 @@
 	ret = io_uring_submit(ring);
 	unlink(template);
 	if (ret < 0) {
-		perror("io_uring_enter");
+		fprintf(stderr, "io_uring_queue_enter: %s\n", strerror(-ret));
 		exit(1);
 	}
 }
@@ -183,38 +183,41 @@
 	unsigned ktail, mask, index;
 	unsigned sq_entries;
 	unsigned completed, dropped;
+	struct io_uring_params p;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
-	ret = io_uring_queue_init(IORING_MAX_ENTRIES, &ring, 0);
+	memset(&p, 0, sizeof(p));
+	ret = t_io_uring_init_sqarray(IORING_MAX_ENTRIES, &ring, &p);
 	if (ret == -ENOMEM)
-		ret = io_uring_queue_init(IORING_MAX_ENTRIES_FALLBACK, &ring, 0);
+		ret = t_io_uring_init_sqarray(IORING_MAX_ENTRIES_FALLBACK,
+					      &ring, &p);
 	if (ret < 0) {
-		perror("io_uring_queue_init");
-		exit(1);
+		fprintf(stderr, "queue_init: %s\n", strerror(-ret));
+		exit(T_EXIT_FAIL);
 	}
-	mask = *sq->kring_mask;
+	mask = sq->ring_mask;
 
 	/* invalid flags */
-	status |= try_io_uring_enter(ring.ring_fd, 1, 0, ~0U, NULL, -1, EINVAL);
+	status |= try_io_uring_enter(ring.ring_fd, 1, 0, ~0U, NULL, -EINVAL);
 
 	/* invalid fd, EBADF */
-	status |= try_io_uring_enter(-1, 0, 0, 0, NULL, -1, EBADF);
+	status |= try_io_uring_enter(-1, 0, 0, 0, NULL, -EBADF);
 
 	/* valid, non-ring fd, EOPNOTSUPP */
-	status |= try_io_uring_enter(0, 0, 0, 0, NULL, -1, EOPNOTSUPP);
+	status |= try_io_uring_enter(0, 0, 0, 0, NULL, -EOPNOTSUPP);
 
 	/* to_submit: 0, flags: 0;  should get back 0. */
-	status |= try_io_uring_enter(ring.ring_fd, 0, 0, 0, NULL, 0, 0);
+	status |= try_io_uring_enter(ring.ring_fd, 0, 0, 0, NULL, 0);
 
 	/* fill the sq ring */
-	sq_entries = *ring.sq.kring_entries;
+	sq_entries = ring.sq.ring_entries;
 	submit_io(&ring, sq_entries);
-	ret = __sys_io_uring_enter(ring.ring_fd, 0, sq_entries,
-					IORING_ENTER_GETEVENTS, NULL);
+	ret = io_uring_enter(ring.ring_fd, 0, sq_entries,
+			     IORING_ENTER_GETEVENTS, NULL);
 	if (ret < 0) {
-		perror("io_uring_enter");
+		fprintf(stderr, "io_uring_enter: %s\n", strerror(-ret));
 		status = 1;
 	} else {
 		/*
@@ -235,7 +238,7 @@
 	 * Add an invalid index to the submission queue.  This should
 	 * result in the dropped counter increasing.
 	 */
-	index = *sq->kring_entries + 1; // invalid index
+	index = sq->ring_entries + 1; // invalid index
 	dropped = *sq->kdropped;
 	ktail = *sq->ktail;
 	sq->array[ktail & mask] = index;
@@ -246,7 +249,7 @@
 	 */
 	io_uring_smp_store_release(sq->ktail, ktail);
 
-	ret = __sys_io_uring_enter(ring.ring_fd, 1, 0, 0, NULL);
+	ret = io_uring_enter(ring.ring_fd, 1, 0, 0, NULL);
 	/* now check to see if our sqe was dropped */
 	if (*sq->kdropped == dropped) {
 		fprintf(stderr, "dropped counter did not increase\n");
@@ -254,8 +257,8 @@
 	}
 
 	if (!status)
-		return 0;
+		return T_EXIT_PASS;
 
 	fprintf(stderr, "FAIL\n");
-	return -1;
+	return T_EXIT_FAIL;
 }

diff --git a/test/io_uring_passthrough.c b/test/io_uring_passthrough.c
new file mode 100644
index 0000000..f18a186
--- /dev/null
+++ b/test/io_uring_passthrough.c

@@ -0,0 +1,486 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: basic read/write tests for io_uring passthrough commands
+ */
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "helpers.h"
+#include "liburing.h"
+#include "../src/syscall.h"
+#include "nvme.h"
+
+#define FILE_SIZE	(256 * 1024)
+#define BS		8192
+#define BUFFERS		(FILE_SIZE / BS)
+
+static struct iovec *vecs;
+static int no_pt;
+
+/*
+ * Each offset in the file has the ((test_case / 2) * FILE_SIZE)
+ * + (offset / sizeof(int)) stored for every
+ * sizeof(int) address.
+ */
+static int verify_buf(int tc, void *buf, off_t off)
+{
+	int i, u_in_buf = BS / sizeof(unsigned int);
+	unsigned int *ptr;
+
+	off /= sizeof(unsigned int);
+	off += (tc / 2) * FILE_SIZE;
+	ptr = buf;
+	for (i = 0; i < u_in_buf; i++) {
+		if (off != *ptr) {
+			fprintf(stderr, "Found %u, wanted %llu\n", *ptr,
+					(unsigned long long) off);
+			return 1;
+		}
+		ptr++;
+		off++;
+	}
+
+	return 0;
+}
+
+static int fill_pattern(int tc)
+{
+	unsigned int val, *ptr;
+	int i, j;
+	int u_in_buf = BS / sizeof(val);
+
+	val = (tc / 2) * FILE_SIZE;
+	for (i = 0; i < BUFFERS; i++) {
+		ptr = vecs[i].iov_base;
+		for (j = 0; j < u_in_buf; j++) {
+			*ptr = val;
+			val++;
+			ptr++;
+		}
+	}
+
+	return 0;
+}
+
+static int __test_io(const char *file, struct io_uring *ring, int tc, int read,
+		     int sqthread, int fixed, int nonvec)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct nvme_uring_cmd *cmd;
+	int open_flags;
+	int do_fixed;
+	int i, ret, fd = -1;
+	off_t offset;
+	__u64 slba;
+	__u32 nlb;
+
+	if (read)
+		open_flags = O_RDONLY;
+	else
+		open_flags = O_WRONLY;
+
+	if (fixed) {
+		ret = t_register_buffers(ring, vecs, BUFFERS);
+		if (ret == T_SETUP_SKIP)
+			return 0;
+		if (ret != T_SETUP_OK) {
+			fprintf(stderr, "buffer reg failed: %d\n", ret);
+			goto err;
+		}
+	}
+
+	fd = open(file, open_flags);
+	if (fd < 0) {
+		if (errno == EACCES || errno == EPERM)
+			return T_EXIT_SKIP;
+		perror("file open");
+		goto err;
+	}
+
+	if (sqthread) {
+		ret = io_uring_register_files(ring, &fd, 1);
+		if (ret) {
+			fprintf(stderr, "file reg failed: %d\n", ret);
+			goto err;
+		}
+	}
+
+	if (!read)
+		fill_pattern(tc);
+
+	offset = 0;
+	for (i = 0; i < BUFFERS; i++) {
+		sqe = io_uring_get_sqe(ring);
+		if (!sqe) {
+			fprintf(stderr, "sqe get failed\n");
+			goto err;
+		}
+		if (read) {
+			int use_fd = fd;
+
+			do_fixed = fixed;
+
+			if (sqthread)
+				use_fd = 0;
+			if (fixed && (i & 1))
+				do_fixed = 0;
+			if (do_fixed) {
+				io_uring_prep_read_fixed(sqe, use_fd, vecs[i].iov_base,
+								vecs[i].iov_len,
+								offset, i);
+				sqe->cmd_op = NVME_URING_CMD_IO;
+			} else if (nonvec) {
+				io_uring_prep_read(sqe, use_fd, vecs[i].iov_base,
+							vecs[i].iov_len, offset);
+				sqe->cmd_op = NVME_URING_CMD_IO;
+			} else {
+				io_uring_prep_readv(sqe, use_fd, &vecs[i], 1,
+								offset);
+				sqe->cmd_op = NVME_URING_CMD_IO_VEC;
+			}
+		} else {
+			int use_fd = fd;
+
+			do_fixed = fixed;
+
+			if (sqthread)
+				use_fd = 0;
+			if (fixed && (i & 1))
+				do_fixed = 0;
+			if (do_fixed) {
+				io_uring_prep_write_fixed(sqe, use_fd, vecs[i].iov_base,
+								vecs[i].iov_len,
+								offset, i);
+				sqe->cmd_op = NVME_URING_CMD_IO;
+			} else if (nonvec) {
+				io_uring_prep_write(sqe, use_fd, vecs[i].iov_base,
+							vecs[i].iov_len, offset);
+				sqe->cmd_op = NVME_URING_CMD_IO;
+			} else {
+				io_uring_prep_writev(sqe, use_fd, &vecs[i], 1,
+								offset);
+				sqe->cmd_op = NVME_URING_CMD_IO_VEC;
+			}
+		}
+		sqe->opcode = IORING_OP_URING_CMD;
+		if (do_fixed)
+			sqe->uring_cmd_flags |= IORING_URING_CMD_FIXED;
+		sqe->user_data = ((uint64_t)offset << 32) | i;
+		if (sqthread)
+			sqe->flags |= IOSQE_FIXED_FILE;
+
+		cmd = (struct nvme_uring_cmd *)sqe->cmd;
+		memset(cmd, 0, sizeof(struct nvme_uring_cmd));
+
+		cmd->opcode = read ? nvme_cmd_read : nvme_cmd_write;
+
+		slba = offset >> lba_shift;
+		nlb = (BS >> lba_shift) - 1;
+
+		/* cdw10 and cdw11 represent starting lba */
+		cmd->cdw10 = slba & 0xffffffff;
+		cmd->cdw11 = slba >> 32;
+		/* cdw12 represent number of lba's for read/write */
+		cmd->cdw12 = nlb;
+		if (do_fixed || nonvec) {
+			cmd->addr = (__u64)(uintptr_t)vecs[i].iov_base;
+			cmd->data_len = vecs[i].iov_len;
+		} else {
+			cmd->addr = (__u64)(uintptr_t)&vecs[i];
+			cmd->data_len = 1;
+		}
+		cmd->nsid = nsid;
+
+		offset += BS;
+	}
+
+	ret = io_uring_submit(ring);
+	if (ret != BUFFERS) {
+		fprintf(stderr, "submit got %d, wanted %d\n", ret, BUFFERS);
+		goto err;
+	}
+
+	for (i = 0; i < BUFFERS; i++) {
+		ret = io_uring_wait_cqe(ring, &cqe);
+		if (ret) {
+			fprintf(stderr, "wait_cqe=%d\n", ret);
+			goto err;
+		}
+		if (cqe->res != 0) {
+			if (!no_pt) {
+				no_pt = 1;
+				goto skip;
+			}
+			fprintf(stderr, "cqe res %d, wanted 0\n", cqe->res);
+			goto err;
+		}
+		io_uring_cqe_seen(ring, cqe);
+		if (read) {
+			int index = cqe->user_data & 0xffffffff;
+			void *buf = vecs[index].iov_base;
+			off_t voff = cqe->user_data >> 32;
+
+			if (verify_buf(tc, buf, voff))
+				goto err;
+		}
+	}
+
+	if (fixed) {
+		ret = io_uring_unregister_buffers(ring);
+		if (ret) {
+			fprintf(stderr, "buffer unreg failed: %d\n", ret);
+			goto err;
+		}
+	}
+	if (sqthread) {
+		ret = io_uring_unregister_files(ring);
+		if (ret) {
+			fprintf(stderr, "file unreg failed: %d\n", ret);
+			goto err;
+		}
+	}
+
+skip:
+	close(fd);
+	return 0;
+err:
+	if (fd != -1)
+		close(fd);
+	return 1;
+}
+
+static int test_io(const char *file, int tc, int read, int sqthread,
+		   int fixed, int nonvec)
+{
+	struct io_uring ring;
+	int ret, ring_flags = 0;
+
+	ring_flags |= IORING_SETUP_SQE128;
+	ring_flags |= IORING_SETUP_CQE32;
+
+	if (sqthread)
+		ring_flags |= IORING_SETUP_SQPOLL;
+
+	ret = t_create_ring(64, &ring, ring_flags);
+	if (ret == T_SETUP_SKIP)
+		return 0;
+	if (ret != T_SETUP_OK) {
+		if (ret == -EINVAL) {
+			no_pt = 1;
+			return T_SETUP_SKIP;
+		}
+		fprintf(stderr, "ring create failed: %d\n", ret);
+		return 1;
+	}
+
+	ret = __test_io(file, &ring, tc, read, sqthread, fixed, nonvec);
+	io_uring_queue_exit(&ring);
+
+	return ret;
+}
+
+/*
+ * Send a passthrough command that nvme will fail during submission.
+ * This comes handy for testing error handling.
+ */
+static int test_invalid_passthru_submit(const char *file)
+{
+	struct io_uring ring;
+	int fd, ret, ring_flags, open_flags;
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	struct nvme_uring_cmd *cmd;
+
+	ring_flags = IORING_SETUP_CQE32 | IORING_SETUP_SQE128;
+
+	ret = t_create_ring(1, &ring, ring_flags);
+	if (ret != T_SETUP_OK) {
+		fprintf(stderr, "ring create failed: %d\n", ret);
+		return 1;
+	}
+
+	open_flags = O_RDONLY;
+	fd = open(file, open_flags);
+	if (fd < 0) {
+		perror("file open");
+		goto err;
+	}
+
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_read(sqe, fd, vecs[0].iov_base, vecs[0].iov_len, 0);
+	sqe->cmd_op = NVME_URING_CMD_IO;
+	sqe->opcode = IORING_OP_URING_CMD;
+	sqe->user_data = 1;
+	cmd = (struct nvme_uring_cmd *)sqe->cmd;
+	memset(cmd, 0, sizeof(struct nvme_uring_cmd));
+	cmd->opcode = nvme_cmd_read;
+	cmd->addr = (__u64)(uintptr_t)&vecs[0].iov_base;
+	cmd->data_len = vecs[0].iov_len;
+	/* populate wrong nsid to force failure */
+	cmd->nsid = nsid + 1;
+
+	ret = io_uring_submit(&ring);
+	if (ret != 1) {
+		fprintf(stderr, "submit got %d, wanted %d\n", ret, 1);
+		goto err;
+	}
+	ret = io_uring_wait_cqe(&ring, &cqe);
+	if (ret) {
+		fprintf(stderr, "wait_cqe=%d\n", ret);
+		goto err;
+	}
+	if (cqe->res == 0) {
+		fprintf(stderr, "cqe res %d, wanted failure\n", cqe->res);
+		goto err;
+	}
+	io_uring_cqe_seen(&ring, cqe);
+	close(fd);
+	io_uring_queue_exit(&ring);
+	return 0;
+err:
+	if (fd != -1)
+		close(fd);
+	io_uring_queue_exit(&ring);
+	return 1;
+}
+
+/*
+ * if we are polling io_uring_submit needs to always enter the
+ * kernel to fetch events
+ */
+static int test_io_uring_submit_enters(const char *file)
+{
+	struct io_uring ring;
+	int fd, i, ret, ring_flags, open_flags;
+	unsigned head;
+	struct io_uring_cqe *cqe;
+	struct nvme_uring_cmd *cmd;
+	struct io_uring_sqe *sqe;
+
+	ring_flags = IORING_SETUP_IOPOLL;
+	ring_flags |= IORING_SETUP_SQE128;
+	ring_flags |= IORING_SETUP_CQE32;
+
+	ret = io_uring_queue_init(64, &ring, ring_flags);
+	if (ret) {
+		fprintf(stderr, "ring create failed: %d\n", ret);
+		return 1;
+	}
+
+	open_flags = O_WRONLY;
+	fd = open(file, open_flags);
+	if (fd < 0) {
+		perror("file open");
+		goto err;
+	}
+
+	for (i = 0; i < BUFFERS; i++) {
+		off_t offset = BS * (rand() % BUFFERS);
+		__u64 slba;
+		__u32 nlb;
+
+		sqe = io_uring_get_sqe(&ring);
+		io_uring_prep_readv(sqe, fd, &vecs[i], 1, offset);
+		sqe->user_data = i;
+		sqe->opcode = IORING_OP_URING_CMD;
+		sqe->cmd_op = NVME_URING_CMD_IO;
+		cmd = (struct nvme_uring_cmd *)sqe->cmd;
+		memset(cmd, 0, sizeof(struct nvme_uring_cmd));
+
+		slba = offset >> lba_shift;
+		nlb = (BS >> lba_shift) - 1;
+
+		cmd->opcode = nvme_cmd_read;
+		cmd->cdw10 = slba & 0xffffffff;
+		cmd->cdw11 = slba >> 32;
+		cmd->cdw12 = nlb;
+		cmd->addr = (__u64)(uintptr_t)&vecs[i];
+		cmd->data_len = 1;
+		cmd->nsid = nsid;
+	}
+
+	/* submit manually to avoid adding IORING_ENTER_GETEVENTS */
+	ret = __sys_io_uring_enter(ring.ring_fd, __io_uring_flush_sq(&ring), 0,
+						0, NULL);
+	if (ret < 0)
+		goto err;
+
+	for (i = 0; i < 500; i++) {
+		ret = io_uring_submit(&ring);
+		if (ret != 0) {
+			fprintf(stderr, "still had %d sqes to submit\n", ret);
+			goto err;
+		}
+
+		io_uring_for_each_cqe(&ring, head, cqe) {
+			if (cqe->res == -EOPNOTSUPP)
+				fprintf(stdout, "Device doesn't support polled IO\n");
+			goto ok;
+		}
+		usleep(10000);
+	}
+err:
+	ret = 1;
+	if (fd != -1)
+		close(fd);
+
+ok:
+	io_uring_queue_exit(&ring);
+	return ret;
+}
+
+int main(int argc, char *argv[])
+{
+	int i, ret;
+	char *fname;
+
+	if (argc < 2)
+		return T_EXIT_SKIP;
+
+	fname = argv[1];
+	ret = nvme_get_info(fname);
+
+	if (ret)
+		return T_EXIT_SKIP;
+
+	vecs = t_create_buffers(BUFFERS, BS);
+
+	for (i = 0; i < 16; i++) {
+		int read = (i & 1) != 0;
+		int sqthread = (i & 2) != 0;
+		int fixed = (i & 4) != 0;
+		int nonvec = (i & 8) != 0;
+
+		ret = test_io(fname, i, read, sqthread, fixed, nonvec);
+		if (no_pt)
+			break;
+		if (ret) {
+			fprintf(stderr, "test_io failed %d/%d/%d/%d\n",
+				read, sqthread, fixed, nonvec);
+			goto err;
+		}
+	}
+
+	if (no_pt)
+		return T_EXIT_SKIP;
+
+	ret = test_io_uring_submit_enters(fname);
+	if (ret) {
+		fprintf(stderr, "test_io_uring_submit_enters failed\n");
+		goto err;
+	}
+
+	ret = test_invalid_passthru_submit(fname);
+	if (ret) {
+		fprintf(stderr, "test_invalid_passthru_submit failed\n");
+		goto err;
+	}
+
+	return T_EXIT_PASS;
+err:
+	return T_EXIT_FAIL;
+}

diff --git a/test/io_uring_register.c b/test/io_uring_register.c
index e639f05..b53a67d 100644
--- a/test/io_uring_register.c
+++ b/test/io_uring_register.c

@@ -21,6 +21,7 @@
 #include <linux/mman.h>
 #include <sys/time.h>
 #include <sys/resource.h>
+#include <sys/vfs.h>
 #include <limits.h>
 
 #include "helpers.h"
@@ -31,33 +32,22 @@
 static rlim_t mlock_limit;
 static int devnull;
 
-#if !defined(CONFIG_HAVE_MEMFD_CREATE)
-#include <sys/syscall.h>
-#include <linux/memfd.h>
-
-static int memfd_create(const char *name, unsigned int flags)
-{
-	return (int)syscall(SYS_memfd_create, name, flags);
-}
-#endif
-
-
 static int expect_fail(int fd, unsigned int opcode, void *arg,
-	    unsigned int nr_args, int error)
+		       unsigned int nr_args, int error, int error2)
 {
 	int ret;
 
-	ret = __sys_io_uring_register(fd, opcode, arg, nr_args);
-	if (ret != -1) {
+	ret = io_uring_register(fd, opcode, arg, nr_args);
+	if (ret >= 0) {
 		int ret2 = 0;
 
 		fprintf(stderr, "expected %s, but call succeeded\n", strerror(error));
 		if (opcode == IORING_REGISTER_BUFFERS) {
-			ret2 = __sys_io_uring_register(fd,
-					IORING_UNREGISTER_BUFFERS, 0, 0);
+			ret2 = io_uring_register(fd, IORING_UNREGISTER_BUFFERS,
+						 0, 0);
 		} else if (opcode == IORING_REGISTER_FILES) {
-			ret2 = __sys_io_uring_register(fd,
-					IORING_UNREGISTER_FILES, 0, 0);
+			ret2 = io_uring_register(fd, IORING_UNREGISTER_FILES, 0,
+						 0);
 		}
 		if (ret2) {
 			fprintf(stderr, "internal error: failed to unregister\n");
@@ -66,8 +56,8 @@
 		return 1;
 	}
 
-	if (errno != error) {
-		fprintf(stderr, "expected %d, got %d\n", error, errno);
+	if (ret != error && (error2 && ret != error2)) {
+		fprintf(stderr, "expected %d/%d, got %d\n", error, error2, ret);
 		return 1;
 	}
 	return 0;
@@ -77,7 +67,7 @@
 {
 	int fd;
 
-	fd = __sys_io_uring_setup(entries, p);
+	fd = io_uring_setup(entries, p);
 	if (fd < 0) {
 		perror("io_uring_setup");
 		exit(1);
@@ -87,8 +77,13 @@
 
 #define MAXFDS (UINT_MAX * sizeof(int))
 
+#define OFS_MAGIC	0x794c7630
+#define TMPFS_MAGIC	0x01021994
+#define RAMFS_MAGIC	0x858458f6
+
 static void *map_filebacked(size_t size)
 {
+	struct statfs buf;
 	int fd, ret;
 	void *addr;
 	char template[32] = "io_uring_register-test-XXXXXXXX";
@@ -98,8 +93,21 @@
 		perror("mkstemp");
 		return NULL;
 	}
+	if (statfs(template, &buf) < 0) {
+		perror("statfs");
+		unlink(template);
+		close(fd);
+		return NULL;
+	}
 	unlink(template);
 
+	/* virtual file systems may not present as file mapped */
+	if (buf.f_type == OFS_MAGIC || buf.f_type == RAMFS_MAGIC ||
+	    buf.f_type == TMPFS_MAGIC) {
+		close(fd);
+		return NULL;
+	}
+
 	ret = ftruncate(fd, size);
 	if (ret < 0) {
 		perror("ftruncate");
@@ -197,18 +205,16 @@
 	 */
 	nr_fds = UINT_MAX;
 	while (nr_fds) {
-		ret = __sys_io_uring_register(uring_fd, IORING_REGISTER_FILES,
-						fd_as, nr_fds);
+		ret = io_uring_register(uring_fd, IORING_REGISTER_FILES, fd_as,
+					nr_fds);
 		if (ret != 0) {
 			nr_fds /= 2;
 			continue;
 		}
 		status = 0;
-		ret = __sys_io_uring_register(uring_fd, IORING_UNREGISTER_FILES,
-						0, 0);
+		ret = io_uring_register(uring_fd, IORING_UNREGISTER_FILES, 0, 0);
 		if (ret < 0) {
-			ret = errno;
-			errno = ret;
+			errno = -ret;
 			perror("io_uring_register UNREGISTER_FILES");
 			exit(1);
 		}
@@ -241,24 +247,21 @@
 	iov.iov_base = buf;
 
 	while (iov.iov_len) {
-		ret = __sys_io_uring_register(fd, IORING_REGISTER_BUFFERS, &iov, 1);
-		if (ret < 0) {
-			if (errno == ENOMEM) {
-				iov.iov_len /= 2;
-				continue;
-			}
-			if (errno == EFAULT) {
-				free(buf);
-				return 0;
-			}
-			fprintf(stderr, "expected success or EFAULT, got %d\n", errno);
+		ret = io_uring_register(fd, IORING_REGISTER_BUFFERS, &iov, 1);
+		if (ret == -ENOMEM) {
+			iov.iov_len /= 2;
+			continue;
+		} else if (ret == -EFAULT) {
+			free(buf);
+			return 0;
+		} else if (ret) {
+			fprintf(stderr, "expected success or EFAULT, got %d\n", ret);
 			free(buf);
 			return 1;
 		}
-		ret = __sys_io_uring_register(fd, IORING_UNREGISTER_BUFFERS,
-						NULL, 0);
+		ret = io_uring_register(fd, IORING_UNREGISTER_BUFFERS, NULL, 0);
 		if (ret != 0) {
-			fprintf(stderr, "error: unregister failed with %d\n", errno);
+			fprintf(stderr, "error: unregister failed with %d\n", ret);
 			free(buf);
 			return 1;
 		}
@@ -290,18 +293,18 @@
 		iovs[i].iov_len = pagesize;
 	}
 
-	status |= expect_fail(fd, IORING_REGISTER_BUFFERS, iovs, nr, EINVAL);
+	status |= expect_fail(fd, IORING_REGISTER_BUFFERS, iovs, nr, -EINVAL, 0);
 
 	/* reduce to UIO_MAXIOV */
 	nr = UIO_MAXIOV;
-	ret = __sys_io_uring_register(fd, IORING_REGISTER_BUFFERS, iovs, nr);
-	if (ret && (errno == ENOMEM || errno == EPERM) && geteuid()) {
+	ret = io_uring_register(fd, IORING_REGISTER_BUFFERS, iovs, nr);
+	if ((ret == -ENOMEM || ret == -EPERM) && geteuid()) {
 		fprintf(stderr, "can't register large iovec for regular users, skip\n");
 	} else if (ret != 0) {
-		fprintf(stderr, "expected success, got %d\n", errno);
+		fprintf(stderr, "expected success, got %d\n", ret);
 		status = 1;
 	} else {
-		__sys_io_uring_register(fd, IORING_UNREGISTER_BUFFERS, 0, 0);
+		io_uring_register(fd, IORING_UNREGISTER_BUFFERS, 0, 0);
 	}
 	free(buf);
 	free(iovs);
@@ -321,12 +324,12 @@
 	/* NULL pointer for base */
 	iov.iov_base = 0;
 	iov.iov_len = 4096;
-	status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, EFAULT);
+	status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT, 0);
 
 	/* valid base, 0 length */
 	iov.iov_base = &buf;
 	iov.iov_len = 0;
-	status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, EFAULT);
+	status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT, 0);
 
 	/* valid base, length exceeds size */
 	/* this requires an unampped page directly after buf */
@@ -337,7 +340,7 @@
 	assert(ret == 0);
 	iov.iov_base = buf;
 	iov.iov_len = 2 * pagesize;
-	status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, EFAULT);
+	status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT, 0);
 	munmap(buf, pagesize);
 
 	/* huge page */
@@ -355,22 +358,23 @@
 		 */
 		iov.iov_base = buf;
 		iov.iov_len = 2*1024*1024;
-		ret = __sys_io_uring_register(fd, IORING_REGISTER_BUFFERS, &iov, 1);
+		ret = io_uring_register(fd, IORING_REGISTER_BUFFERS, &iov, 1);
 		if (ret < 0) {
-			if (errno == ENOMEM)
+			if (ret == -ENOMEM)
 				printf("Unable to test registering of a huge "
 				       "page.  Try increasing the "
 				       "RLIMIT_MEMLOCK resource limit by at "
 				       "least 2MB.");
 			else {
-				fprintf(stderr, "expected success, got %d\n", errno);
+				fprintf(stderr, "expected success, got %d\n", ret);
 				status = 1;
 			}
 		} else {
-			ret = __sys_io_uring_register(fd,
-					IORING_UNREGISTER_BUFFERS, 0, 0);
+			ret = io_uring_register(fd, IORING_UNREGISTER_BUFFERS,
+						0, 0);
 			if (ret < 0) {
-				perror("io_uring_unregister");
+				fprintf(stderr, "io_uring_unregister: %s\n",
+					strerror(-ret));
 				status = 1;
 			}
 		}
@@ -380,12 +384,12 @@
 
 	/* file-backed buffers -- not supported */
 	buf = map_filebacked(2*1024*1024);
-	if (!buf)
-		status = 1;
-	iov.iov_base = buf;
-	iov.iov_len = 2*1024*1024;
-	status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, EOPNOTSUPP);
-	munmap(buf, 2*1024*1024);
+	if (buf) {
+		iov.iov_base = buf;
+		iov.iov_len = 2*1024*1024;
+		status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT, -EOPNOTSUPP);
+		munmap(buf, 2*1024*1024);
+	}
 
 	/* bump up against the soft limit and make sure we get EFAULT
 	 * or whatever we're supposed to get.  NOTE: this requires
@@ -412,7 +416,7 @@
 
 	ret = io_uring_submit(ring);
 	if (ret != 1) {
-		fprintf(stderr, "failed to submit poll sqe: %d.\n", errno);
+		fprintf(stderr, "failed to submit poll sqe: %d.\n", ret);
 		return 1;
 	}
 
@@ -422,7 +426,7 @@
 		return 1;
 	}
 	ret = 0;
-	if (cqe->res != POLLOUT) {
+	if (!(cqe->res & POLLOUT)) {
 		fprintf(stderr, "io_uring_wait_cqe: expected 0x%.8x, got 0x%.8x\n",
 		       POLLOUT, cqe->res);
 		ret = 1;
@@ -432,14 +436,14 @@
 	return ret;
 }
 
-static int test_poll_ringfd(void)
+static int __test_poll_ringfd(int ring_flags)
 {
 	int status = 0;
 	int ret;
 	int fd;
 	struct io_uring ring;
 
-	ret = io_uring_queue_init(1, &ring, 0);
+	ret = io_uring_queue_init(2, &ring, ring_flags);
 	if (ret) {
 		perror("io_uring_queue_init");
 		return 1;
@@ -454,7 +458,7 @@
 	 * fail, because the kernel does not allow registering of the
 	 * ring_fd.
 	 */
-	status |= expect_fail(fd, IORING_REGISTER_FILES, &fd, 1, EBADF);
+	status |= expect_fail(fd, IORING_REGISTER_FILES, &fd, 1, -EBADF, 0);
 
 	/* tear down queue */
 	io_uring_queue_exit(&ring);
@@ -462,111 +466,15 @@
 	return status;
 }
 
-static int test_shmem(void)
+static int test_poll_ringfd(void)
 {
-	const char pattern = 0xEA;
-	const int len = 4096;
-	struct io_uring_sqe *sqe;
-	struct io_uring_cqe *cqe;
-	struct io_uring ring;
-	struct iovec iov;
-	int memfd, ret, i;
-	char *mem;
-	int pipefd[2] = {-1, -1};
+	int ret;
 
-	ret = io_uring_queue_init(8, &ring, 0);
+	ret = __test_poll_ringfd(0);
 	if (ret)
-		return 1;
+		return ret;
 
-	if (pipe(pipefd)) {
-		perror("pipe");
-		return 1;
-	}
-	memfd = memfd_create("uring-shmem-test", 0);
-	if (memfd < 0) {
-		fprintf(stderr, "memfd_create() failed %i\n", -errno);
-		return 1;
-	}
-	if (ftruncate(memfd, len)) {
-		fprintf(stderr, "can't truncate memfd\n");
-		return 1;
-	}
-	mem = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, memfd, 0);
-	if (!mem) {
-		fprintf(stderr, "mmap failed\n");
-		return 1;
-	}
-	for (i = 0; i < len; i++)
-		mem[i] = pattern;
-
-	iov.iov_base = mem;
-	iov.iov_len = len;
-	ret = io_uring_register_buffers(&ring, &iov, 1);
-	if (ret) {
-		if (ret == -EOPNOTSUPP) {
-			fprintf(stdout, "memfd registration isn't supported, "
-					"skip\n");
-			goto out;
-		}
-
-		fprintf(stderr, "buffer reg failed: %d\n", ret);
-		return 1;
-	}
-
-	/* check that we can read and write from/to shmem reg buffer */
-	sqe = io_uring_get_sqe(&ring);
-	io_uring_prep_write_fixed(sqe, pipefd[1], mem, 512, 0, 0);
-	sqe->user_data = 1;
-
-	ret = io_uring_submit(&ring);
-	if (ret != 1) {
-		fprintf(stderr, "submit write failed\n");
-		return 1;
-	}
-	ret = io_uring_wait_cqe(&ring, &cqe);
-	if (ret < 0 || cqe->user_data != 1 || cqe->res != 512) {
-		fprintf(stderr, "reading from shmem failed\n");
-		return 1;
-	}
-	io_uring_cqe_seen(&ring, cqe);
-
-	/* clean it, should be populated with the pattern back from the pipe */
-	memset(mem, 0, 512);
-	sqe = io_uring_get_sqe(&ring);
-	io_uring_prep_read_fixed(sqe, pipefd[0], mem, 512, 0, 0);
-	sqe->user_data = 2;
-
-	ret = io_uring_submit(&ring);
-	if (ret != 1) {
-		fprintf(stderr, "submit write failed\n");
-		return 1;
-	}
-	ret = io_uring_wait_cqe(&ring, &cqe);
-	if (ret < 0 || cqe->user_data != 2 || cqe->res != 512) {
-		fprintf(stderr, "reading from shmem failed\n");
-		return 1;
-	}
-	io_uring_cqe_seen(&ring, cqe);
-
-	for (i = 0; i < 512; i++) {
-		if (mem[i] != pattern) {
-			fprintf(stderr, "data integrity fail\n");
-			return 1;
-		}
-	}
-
-	ret = io_uring_unregister_buffers(&ring);
-	if (ret) {
-		fprintf(stderr, "buffer unreg failed: %d\n", ret);
-		return 1;
-	}
-out:
-	io_uring_queue_exit(&ring);
-	close(pipefd[0]);
-	close(pipefd[1]);
-	munmap(mem, len);
-	close(memfd);
-	return 0;
+	return __test_poll_ringfd(IORING_SETUP_SQPOLL);
 }
 
 int main(int argc, char **argv)
@@ -577,31 +485,31 @@
 	struct rlimit rlim;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
 	/* setup globals */
 	pagesize = getpagesize();
 	ret = getrlimit(RLIMIT_MEMLOCK, &rlim);
 	if (ret < 0) {
 		perror("getrlimit");
-		return 1;
+		return T_EXIT_PASS;
 	}
 	mlock_limit = rlim.rlim_cur;
 	devnull = open("/dev/null", O_RDWR);
 	if (devnull < 0) {
 		perror("open /dev/null");
-		exit(1);
+		exit(T_EXIT_FAIL);
 	}
 
 	/* invalid fd */
-	status |= expect_fail(-1, 0, NULL, 0, EBADF);
+	status |= expect_fail(-1, 0, NULL, 0, -EBADF, 0);
 	/* valid fd that is not an io_uring fd */
-	status |= expect_fail(devnull, 0, NULL, 0, EOPNOTSUPP);
+	status |= expect_fail(devnull, 0, NULL, 0, -EOPNOTSUPP, 0);
 
 	/* invalid opcode */
 	memset(&p, 0, sizeof(p));
 	fd = new_io_uring(1, &p);
-	ret = expect_fail(fd, ~0U, NULL, 0, EINVAL);
+	ret = expect_fail(fd, ~0U, NULL, 0, -EINVAL, 0);
 	if (ret) {
 		/* if this succeeds, tear down the io_uring instance
 		 * and start clean for the next test. */
@@ -621,11 +529,5 @@
 	if (status)
 		fprintf(stderr, "FAIL\n");
 
-	ret = test_shmem();
-	if (ret) {
-		fprintf(stderr, "test_shmem() failed\n");
-		status |= 1;
-	}
-
 	return status;
 }

diff --git a/test/io_uring_setup.c b/test/io_uring_setup.c
index 7752c97..22bc401 100644
--- a/test/io_uring_setup.c
+++ b/test/io_uring_setup.c

@@ -15,93 +15,18 @@
 #include <errno.h>
 #include <sys/sysinfo.h>
 #include "liburing.h"
+#include "helpers.h"
 
-#include "../syscall.h"
-
-char *features_string(struct io_uring_params *p)
-{
-	static char flagstr[64];
-
-	if (!p || !p->features)
-		return "none";
-
-	if (p->features & ~IORING_FEAT_SINGLE_MMAP) {
-		snprintf(flagstr, 64, "0x%.8x", p->features);
-		return flagstr;
-	}
-
-	if (p->features & IORING_FEAT_SINGLE_MMAP)
-		strncat(flagstr, "IORING_FEAT_SINGLE_MMAP", 64 - strlen(flagstr));
-
-	return flagstr;
-}
-
-/*
- * Attempt the call with the given args.  Return 0 when expect matches
- * the return value of the system call, 1 otherwise.
- */
-char *
-flags_string(struct io_uring_params *p)
-{
-	static char flagstr[64];
-	int add_pipe = 0;
-
-	memset(flagstr, 0, sizeof(flagstr));
-
-	if (!p || p->flags == 0)
-		return "none";
-
-	/*
-	 * If unsupported flags are present, just print the bitmask.
-	 */
-	if (p->flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL |
-			 IORING_SETUP_SQ_AFF)) {
-		snprintf(flagstr, 64, "0x%.8x", p->flags);
-		return flagstr;
-	}
-
-	if (p->flags & IORING_SETUP_IOPOLL) {
-		strncat(flagstr, "IORING_SETUP_IOPOLL", 64 - strlen(flagstr));
-		add_pipe = 1;
-	}
-	if (p->flags & IORING_SETUP_SQPOLL) {
-		if (add_pipe)
-			strncat(flagstr, "|", 64 - strlen(flagstr));
-		else
-			add_pipe = 1;
-		strncat(flagstr, "IORING_SETUP_SQPOLL", 64 - strlen(flagstr));
-	}
-	if (p->flags & IORING_SETUP_SQ_AFF) {
-		if (add_pipe)
-			strncat(flagstr, "|", 64 - strlen(flagstr));
-		strncat(flagstr, "IORING_SETUP_SQ_AFF", 64 - strlen(flagstr));
-	}
-
-	return flagstr;
-}
-
-char *
-dump_resv(struct io_uring_params *p)
-{
-	static char resvstr[4096];
-
-	if (!p)
-		return "";
-
-	sprintf(resvstr, "0x%.8x 0x%.8x 0x%.8x", p->resv[0],
-		p->resv[1], p->resv[2]);
-
-	return resvstr;
-}
+#include "../src/syscall.h"
 
 /* bogus: setup returns a valid fd on success... expect can't predict the
    fd we'll get, so this really only takes 1 parameter: error */
-int
-try_io_uring_setup(unsigned entries, struct io_uring_params *p, int expect, int error)
+static int try_io_uring_setup(unsigned entries, struct io_uring_params *p,
+			      int expect)
 {
-	int ret, err;
+	int ret;
 
-	ret = __sys_io_uring_setup(entries, p);
+	ret = io_uring_setup(entries, p);
 	if (ret != expect) {
 		fprintf(stderr, "expected %d, got %d\n", expect, ret);
 		/* if we got a valid uring, close it */
@@ -109,53 +34,52 @@
 			close(ret);
 		return 1;
 	}
-	err = errno;
-	if (expect == -1 && error != err) {
-		if (err == EPERM && geteuid() != 0) {
+
+	if (expect < 0 && expect != ret) {
+		if (ret == -EPERM && geteuid() != 0) {
 			printf("Needs root, not flagging as an error\n");
 			return 0;
 		}
-		fprintf(stderr, "expected errno %d, got %d\n", error, err);
+		fprintf(stderr, "expected errno %d, got %d\n", expect, ret);
 		return 1;
 	}
 
 	return 0;
 }
 
-int
-main(int argc, char **argv)
+int main(int argc, char **argv)
 {
 	int fd;
 	unsigned int status = 0;
 	struct io_uring_params p;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
 	memset(&p, 0, sizeof(p));
-	status |= try_io_uring_setup(0, &p, -1, EINVAL);
-	status |= try_io_uring_setup(1, NULL, -1, EFAULT);
+	status |= try_io_uring_setup(0, &p, -EINVAL);
+	status |= try_io_uring_setup(1, NULL, -EFAULT);
 
 	/* resv array is non-zero */
 	memset(&p, 0, sizeof(p));
 	p.resv[0] = p.resv[1] = p.resv[2] = 1;
-	status |= try_io_uring_setup(1, &p, -1, EINVAL);
+	status |= try_io_uring_setup(1, &p, -EINVAL);
 
 	/* invalid flags */
 	memset(&p, 0, sizeof(p));
 	p.flags = ~0U;
-	status |= try_io_uring_setup(1, &p, -1, EINVAL);
+	status |= try_io_uring_setup(1, &p, -EINVAL);
 
 	/* IORING_SETUP_SQ_AFF set but not IORING_SETUP_SQPOLL */
 	memset(&p, 0, sizeof(p));
 	p.flags = IORING_SETUP_SQ_AFF;
-	status |= try_io_uring_setup(1, &p, -1, EINVAL);
+	status |= try_io_uring_setup(1, &p, -EINVAL);
 
 	/* attempt to bind to invalid cpu */
 	memset(&p, 0, sizeof(p));
 	p.flags = IORING_SETUP_SQPOLL | IORING_SETUP_SQ_AFF;
 	p.sq_thread_cpu = get_nprocs_conf();
-	status |= try_io_uring_setup(1, &p, -1, EINVAL);
+	status |= try_io_uring_setup(1, &p, -EINVAL);
 
 	/* I think we can limit a process to a set of cpus.  I assume
 	 * we shouldn't be able to setup a kernel thread outside of that.
@@ -163,10 +87,10 @@
 
 	/* read/write on io_uring_fd */
 	memset(&p, 0, sizeof(p));
-	fd = __sys_io_uring_setup(1, &p);
+	fd = io_uring_setup(1, &p);
 	if (fd < 0) {
 		fprintf(stderr, "io_uring_setup failed with %d, expected success\n",
-		       errno);
+		       -fd);
 		status = 1;
 	} else {
 		char buf[4096];
@@ -179,8 +103,8 @@
 	}
 
 	if (!status)
-		return 0;
+		return T_EXIT_PASS;
 
 	fprintf(stderr, "FAIL\n");
-	return -1;
+	return T_EXIT_FAIL;
 }

diff --git a/test/iopoll-leak.c b/test/iopoll-leak.c
new file mode 100644
index 0000000..e0faf53
--- /dev/null
+++ b/test/iopoll-leak.c

@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: test a mem leak with IOPOLL
+ */
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include "helpers.h"
+#include "liburing.h"
+
+#define FILE_SIZE	(128 * 1024)
+#define BS		4096
+#define BUFFERS		(FILE_SIZE / BS)
+
+static int do_iopoll(const char *fname)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring ring;
+	struct iovec *iov;
+	int fd;
+
+	fd = open(fname, O_RDONLY | O_DIRECT);
+	if (fd < 0) {
+		if (errno == EINVAL || errno == EPERM || errno == EACCES)
+			return T_EXIT_SKIP;
+		perror("open");
+		return T_EXIT_SKIP;
+	}
+
+	iov = t_create_buffers(1, 4096);
+
+	t_create_ring(2, &ring, IORING_SETUP_IOPOLL);
+
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_read(sqe, fd, iov->iov_base, iov->iov_len, 0);
+	io_uring_submit(&ring);
+
+	close(fd);
+	free(iov->iov_base);
+	free(iov);
+	return T_EXIT_PASS;
+}
+
+static int test(const char *fname)
+{
+	if (fork()) {
+		int stat;
+
+		wait(&stat);
+		return WEXITSTATUS(stat);
+	} else {
+		int ret;
+
+		ret = do_iopoll(fname);
+		exit(ret);
+	}
+}
+
+int main(int argc, char *argv[])
+{
+	char buf[256];
+	char *fname;
+	int i, ret;
+
+	if (argc > 1) {
+		fname = argv[1];
+	} else {
+		srand((unsigned)time(NULL));
+		snprintf(buf, sizeof(buf), ".iopoll-leak-%u-%u",
+			(unsigned)rand(), (unsigned)getpid());
+		fname = buf;
+		t_create_file(fname, FILE_SIZE);
+	}
+
+	for (i = 0; i < 16; i++) {
+		ret = test(fname);
+		if (ret == T_EXIT_SKIP || ret == T_EXIT_FAIL)
+			break;
+	}
+
+	if (fname != argv[1])
+		unlink(fname);
+	return ret;
+}

diff --git a/test/iopoll-overflow.c b/test/iopoll-overflow.c
new file mode 100644
index 0000000..52f237a
--- /dev/null
+++ b/test/iopoll-overflow.c

@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: IOPOLL with overflow test case
+ */
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <sys/resource.h>
+#include "helpers.h"
+#include "liburing.h"
+#include "../src/syscall.h"
+
+#define FILE_SIZE	(128 * 1024)
+#define BS		4096
+#define BUFFERS		(FILE_SIZE / BS)
+
+static struct iovec *vecs;
+
+static int test(struct io_uring *ring, int fd)
+{
+	struct io_uring_sqe *sqe;
+	int i, j, ret;
+	loff_t off;
+
+	off = FILE_SIZE - BS;
+	for (j = 0; j < 8; j++) {
+		for (i = 0; i < BUFFERS; i++) {
+			sqe = io_uring_get_sqe(ring);
+			io_uring_prep_read(sqe, fd, vecs[i].iov_base,
+						vecs[i].iov_len, off);
+			if (!off)
+				off = FILE_SIZE - BS;
+			else
+				off -= BS;
+		}
+		ret = io_uring_submit(ring);
+		if (ret != BUFFERS) {
+			fprintf(stderr, "submitted %d\n", ret);
+			return T_EXIT_FAIL;
+		}
+	}
+
+	sleep(1);
+
+	ret = __sys_io_uring_enter(ring->ring_fd, 0, BUFFERS * 8,
+					IORING_ENTER_GETEVENTS, NULL);
+
+	for (i = 0; i < BUFFERS * 8; i++) {
+		struct io_uring_cqe *cqe;
+
+		ret = io_uring_wait_cqe(ring, &cqe);
+		if (ret) {
+			fprintf(stderr, "wait=%d\n", ret);
+			return T_EXIT_FAIL;
+		}
+		io_uring_cqe_seen(ring, cqe);
+	}
+
+	return T_EXIT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	struct io_uring_params p = { };
+	struct io_uring ring;
+	char buf[256];
+	char *fname;
+	int ret, fd;
+
+	p.flags = IORING_SETUP_IOPOLL | IORING_SETUP_CQSIZE;
+	p.cq_entries = 64;
+	ret = t_create_ring_params(64, &ring, &p);
+	if (ret == T_SETUP_SKIP)
+		return 0;
+	if (ret != T_SETUP_OK) {
+		fprintf(stderr, "ring create failed: %d\n", ret);
+		return 1;
+	}
+
+	if (argc > 1) {
+		fname = argv[1];
+	} else {
+		srand((unsigned)time(NULL));
+		snprintf(buf, sizeof(buf), ".basic-rw-%u-%u",
+			(unsigned)rand(), (unsigned)getpid());
+		fname = buf;
+		t_create_file(fname, FILE_SIZE);
+	}
+
+	fd = open(fname, O_RDONLY | O_DIRECT);
+	if (fd < 0) {
+		if (errno == EINVAL || errno == EACCES || errno == EPERM) {
+			if (fname != argv[1])
+				unlink(fname);
+			return T_EXIT_SKIP;
+		}
+		perror("open");
+		goto err;
+	}
+
+	vecs = t_create_buffers(BUFFERS, BS);
+
+	ret = test(&ring, fd);
+
+	if (fname != argv[1])
+		unlink(fname);
+	return ret;
+err:
+	if (fname != argv[1])
+		unlink(fname);
+	return T_EXIT_FAIL;
+}

diff --git a/test/iopoll.c b/test/iopoll.c
index f3c22d6..2e0f7ea 100644
--- a/test/iopoll.c
+++ b/test/iopoll.c

@@ -87,6 +87,8 @@
 	}
 	fd = open(file, open_flags);
 	if (fd < 0) {
+		if (errno == EINVAL || errno == EPERM || errno == EACCES)
+			return 0;
 		perror("file open");
 		goto err;
 	}
@@ -201,7 +203,80 @@
 	return 1;
 }
 
-extern int __io_uring_flush_sq(struct io_uring *ring);
+static void sig_alrm(int sig)
+{
+	fprintf(stderr, "Ran out of time for peek test!\n");
+	exit(T_EXIT_FAIL);
+}
+
+/*
+ * if we are polling, io_uring_cqe_peek() always needs to enter the kernel
+ */
+static int test_io_uring_cqe_peek(const char *file)
+{
+	struct io_uring_cqe *cqe;
+	struct io_uring ring;
+	struct sigaction act;
+	int fd, i, ret = T_EXIT_FAIL;
+
+	if (no_iopoll)
+		return 0;
+
+	ret = io_uring_queue_init(64, &ring, IORING_SETUP_IOPOLL);
+	if (ret) {
+		fprintf(stderr, "ring create failed: %d\n", ret);
+		return 1;
+	}
+
+	fd = open(file, O_RDONLY | O_DIRECT);
+	if (fd < 0) {
+		if (errno == EINVAL || errno == EPERM || errno == EACCES) {
+			io_uring_queue_exit(&ring);
+			return T_EXIT_SKIP;
+		}
+		perror("file open");
+		goto err;
+	}
+
+	for (i = 0; i < BUFFERS; i++) {
+		struct io_uring_sqe *sqe;
+		off_t offset = BS * (rand() % BUFFERS);
+
+		sqe = io_uring_get_sqe(&ring);
+		io_uring_prep_readv(sqe, fd, &vecs[i], 1, offset);
+		sqe->user_data = 1;
+	}
+
+	/*
+	 * Set alarm for 5 seconds, we should be done way before that
+	 */
+	memset(&act, 0, sizeof(act));
+	act.sa_handler = sig_alrm;
+	sigaction(SIGALRM, &act, NULL);
+	alarm(5);
+
+	ret = io_uring_submit(&ring);
+	if (ret != BUFFERS) {
+		fprintf(stderr, "submit=%d\n", ret);
+		goto err;
+	}
+
+	ret = T_EXIT_PASS;
+	i = 0;
+	do {
+		ret = io_uring_peek_cqe(&ring, &cqe);
+		if (ret)
+			continue;
+		io_uring_cqe_seen(&ring, cqe);
+		i++;
+	} while (i < BUFFERS);
+
+err:
+	if (fd != -1)
+		close(fd);
+	io_uring_queue_exit(&ring);
+	return ret;
+}
 
 /*
  * if we are polling io_uring_submit needs to always enter the
@@ -227,6 +302,8 @@
 	open_flags = O_WRONLY | O_DIRECT;
 	fd = open(file, open_flags);
 	if (fd < 0) {
+		if (errno == EINVAL || errno == EPERM || errno == EACCES)
+			return T_EXIT_SKIP;
 		perror("file open");
 		goto err;
 	}
@@ -274,7 +351,7 @@
 }
 
 static int test_io(const char *file, int write, int sqthread, int fixed,
-		   int buf_select)
+		   int buf_select, int defer)
 {
 	struct io_uring ring;
 	int ret, ring_flags = IORING_SETUP_IOPOLL;
@@ -282,6 +359,10 @@
 	if (no_iopoll)
 		return 0;
 
+	if (defer)
+		ring_flags |= IORING_SETUP_SINGLE_ISSUER |
+			      IORING_SETUP_DEFER_TASKRUN;
+
 	ret = t_create_ring(64, &ring, ring_flags);
 	if (ret == T_SETUP_SKIP)
 		return 0;
@@ -323,7 +404,7 @@
 	char *fname;
 
 	if (probe_buf_select())
-		return 1;
+		return T_EXIT_FAIL;
 
 	if (argc > 1) {
 		fname = argv[1];
@@ -337,19 +418,22 @@
 
 	vecs = t_create_buffers(BUFFERS, BS);
 
-	nr = 16;
+	nr = 32;
 	if (no_buf_select)
 		nr = 8;
+	else if (!t_probe_defer_taskrun())
+		nr = 16;
 	for (i = 0; i < nr; i++) {
 		int write = (i & 1) != 0;
 		int sqthread = (i & 2) != 0;
 		int fixed = (i & 4) != 0;
 		int buf_select = (i & 8) != 0;
+		int defer = (i & 16) != 0;
 
-		ret = test_io(fname, write, sqthread, fixed, buf_select);
+		ret = test_io(fname, write, sqthread, fixed, buf_select, defer);
 		if (ret) {
-			fprintf(stderr, "test_io failed %d/%d/%d/%d\n",
-				write, sqthread, fixed, buf_select);
+			fprintf(stderr, "test_io failed %d/%d/%d/%d/%d\n",
+				write, sqthread, fixed, buf_select, defer);
 			goto err;
 		}
 		if (no_iopoll)
@@ -357,16 +441,25 @@
 	}
 
 	ret = test_io_uring_submit_enters(fname);
-	if (ret) {
-	    fprintf(stderr, "test_io_uring_submit_enters failed\n");
-	    goto err;
+	if (ret == T_EXIT_FAIL) {
+		fprintf(stderr, "test_io_uring_submit_enters failed\n");
+		goto err;
+	}
+
+	/*
+	 * Keep this last, it exits on failure
+	 */
+	ret = test_io_uring_cqe_peek(fname);
+	if (ret == T_EXIT_FAIL) {
+		fprintf(stderr, "test_io_uring_cqe_peek failed\n");
+		goto err;
 	}
 
 	if (fname != argv[1])
 		unlink(fname);
-	return 0;
+	return T_EXIT_PASS;
 err:
 	if (fname != argv[1])
 		unlink(fname);
-	return 1;
+	return T_EXIT_FAIL;
 }

diff --git a/test/kallsyms.c b/test/kallsyms.c
new file mode 100644
index 0000000..e528766
--- /dev/null
+++ b/test/kallsyms.c

@@ -0,0 +1,203 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: read /proc/kallsyms. Mostly just here so that fops->read() can
+ *		get exercised, with and without registered buffers
+ */
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <sys/resource.h>
+
+#include "helpers.h"
+#include "liburing.h"
+
+#define FILE_SIZE	(8 * 1024)
+#define BS		8192
+#define BUFFERS		(FILE_SIZE / BS)
+
+static struct iovec *vecs;
+static int warned;
+
+static int __test_io(const char *file, struct io_uring *ring, int fixed, int nonvec)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	int open_flags;
+	int i, fd = -1, ret;
+	off_t offset;
+
+	open_flags = O_RDONLY;
+	if (fixed) {
+		ret = t_register_buffers(ring, vecs, BUFFERS);
+		if (ret == T_SETUP_SKIP)
+			return 0;
+		if (ret != T_SETUP_OK) {
+			fprintf(stderr, "buffer reg failed: %d\n", ret);
+			goto err;
+		}
+	}
+
+	fd = open(file, open_flags);
+	if (fd < 0) {
+		if (errno == EINVAL || errno == EPERM || errno == ENOENT)
+			return 0;
+		perror("file open");
+		goto err;
+	}
+
+	offset = 0;
+	for (i = 0; i < BUFFERS; i++) {
+		int do_fixed = fixed;
+
+		sqe = io_uring_get_sqe(ring);
+		if (!sqe) {
+			fprintf(stderr, "sqe get failed\n");
+			goto err;
+		}
+		if (fixed && (i & 1))
+			do_fixed = 0;
+		if (do_fixed) {
+			io_uring_prep_read_fixed(sqe, fd, vecs[i].iov_base,
+						vecs[i].iov_len, offset, i);
+		} else if (nonvec) {
+			io_uring_prep_read(sqe, fd, vecs[i].iov_base,
+							vecs[i].iov_len, offset);
+		} else {
+			io_uring_prep_readv(sqe, fd, &vecs[i], 1, offset);
+		}
+		sqe->user_data = i;
+		offset += BS;
+	}
+
+	ret = io_uring_submit(ring);
+	if (ret != BUFFERS) {
+		fprintf(stderr, "submit got %d, wanted %d\n", ret, BUFFERS);
+		goto err;
+	}
+
+	for (i = 0; i < BUFFERS; i++) {
+		ret = io_uring_wait_cqe(ring, &cqe);
+		if (ret) {
+			fprintf(stderr, "wait_cqe=%d\n", ret);
+			goto err;
+		}
+		if (cqe->res == -EINVAL && nonvec) {
+			if (!warned) {
+				fprintf(stdout, "Non-vectored IO not "
+					"supported, skipping\n");
+				warned = 1;
+			}
+		}
+		io_uring_cqe_seen(ring, cqe);
+	}
+
+	if (fixed) {
+		ret = io_uring_unregister_buffers(ring);
+		if (ret) {
+			fprintf(stderr, "buffer unreg failed: %d\n", ret);
+			goto err;
+		}
+	}
+
+	close(fd);
+	return 0;
+err:
+	if (fd != -1)
+		close(fd);
+	return 1;
+}
+static int test_io(const char *file, int fixed, int nonvec)
+{
+	struct io_uring ring;
+	int ret, ring_flags = 0;
+
+	ret = t_create_ring(64, &ring, ring_flags);
+	if (ret == T_SETUP_SKIP)
+		return 0;
+	if (ret != T_SETUP_OK) {
+		fprintf(stderr, "ring create failed: %d\n", ret);
+		return 1;
+	}
+
+	ret = __test_io(file, &ring, fixed, nonvec);
+	io_uring_queue_exit(&ring);
+	return ret;
+}
+
+static int has_nonvec_read(void)
+{
+	struct io_uring_probe *p;
+	struct io_uring ring;
+	int ret;
+
+	ret = io_uring_queue_init(1, &ring, 0);
+	if (ret) {
+		fprintf(stderr, "queue init failed: %d\n", ret);
+		exit(ret);
+	}
+
+	p = t_calloc(1, sizeof(*p) + 256 * sizeof(struct io_uring_probe_op));
+	ret = io_uring_register_probe(&ring, p, 256);
+	/* if we don't have PROBE_REGISTER, we don't have OP_READ/WRITE */
+	if (ret == -EINVAL) {
+out:
+		io_uring_queue_exit(&ring);
+		free(p);
+		return 0;
+	} else if (ret) {
+		fprintf(stderr, "register_probe: %d\n", ret);
+		goto out;
+	}
+
+	if (p->ops_len <= IORING_OP_READ)
+		goto out;
+	if (!(p->ops[IORING_OP_READ].flags & IO_URING_OP_SUPPORTED))
+		goto out;
+	io_uring_queue_exit(&ring);
+	free(p);
+	return 1;
+}
+
+int main(int argc, char *argv[])
+{
+	int ret, nonvec;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	vecs = t_create_buffers(BUFFERS, BS);
+
+	/* if we don't have nonvec read, skip testing that */
+	nonvec = has_nonvec_read();
+
+	if (nonvec) {
+		ret = test_io("/proc/kallsyms", 0, 0);
+		if (ret)
+			goto err;
+	}
+
+	ret = test_io("/proc/kallsyms", 0, 1);
+	if (ret)
+		goto err;
+
+	if (nonvec) {
+		ret = test_io("/proc/kallsyms", 1, 0);
+		if (ret)
+			goto err;
+	}
+
+	ret = test_io("/proc/kallsyms", 1, 1);
+	if (ret)
+		goto err;
+
+	return 0;
+err:
+	fprintf(stderr, "Reading kallsyms failed\n");
+	return 1;
+}

diff --git a/test/lfs-openat-write.c b/test/lfs-openat-write.c
index 6bbf78d..bae51b0 100644
--- a/test/lfs-openat-write.c
+++ b/test/lfs-openat-write.c

@@ -1,9 +1,5 @@
 /* SPDX-License-Identifier: MIT */
 
-#define _LARGEFILE_SOURCE
-#define _FILE_OFFSET_BITS 64
-
-#include <liburing.h>
 #include <string.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -14,14 +10,18 @@
 #include <sys/resource.h>
 #include <unistd.h>
 
+#include "liburing.h"
+#include "helpers.h"
+
 static const int RSIZE = 2;
-static const int OPEN_FLAGS = O_RDWR | O_CREAT;
+static const int OPEN_FLAGS = O_RDWR | O_CREAT | O_LARGEFILE;
 static const mode_t OPEN_MODE = S_IRUSR | S_IWUSR;
 
-#define DIE(...) do {\
-		fprintf(stderr, __VA_ARGS__);\
-		abort();\
-	} while(0);
+#define DIE(...)				\
+	do {					\
+		fprintf(stderr, __VA_ARGS__);	\
+		abort();			\
+	} while(0)
 
 static int do_write(struct io_uring *ring, int fd, off_t offset)
 {
@@ -100,7 +100,7 @@
 	int dfd, ret;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
 	dfd = open("/tmp", O_RDONLY | O_DIRECTORY);
 	if (dfd < 0)

diff --git a/test/lfs-openat.c b/test/lfs-openat.c
index 4823cc4..2601968 100644
--- a/test/lfs-openat.c
+++ b/test/lfs-openat.c

@@ -1,8 +1,5 @@
 /* SPDX-License-Identifier: MIT */
 
-#define _LARGEFILE_SOURCE
-#define _FILE_OFFSET_BITS 64
-
 #include <string.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -15,13 +12,14 @@
 
 #include "liburing.h"
 
-#define DIE(...) do {\
-		fprintf(stderr, __VA_ARGS__);\
-		abort();\
-	} while(0);
+#define DIE(...)				\
+	do {					\
+		fprintf(stderr, __VA_ARGS__);	\
+		abort();			\
+	} while(0)
 
 static const int RSIZE = 2;
-static const int OPEN_FLAGS = O_RDWR | O_CREAT;
+static const int OPEN_FLAGS = O_RDWR | O_CREAT | O_LARGEFILE;
 static const mode_t OPEN_MODE = S_IRUSR | S_IWUSR;
 
 static int open_io_uring(struct io_uring *ring, int dfd, const char *fn)

diff --git a/test/link-timeout.c b/test/link-timeout.c
index ad638e9..63de29e 100644
--- a/test/link-timeout.c
+++ b/test/link-timeout.c

@@ -10,8 +10,10 @@
 #include <string.h>
 #include <fcntl.h>
 #include <poll.h>
+#include <sys/time.h>
 
 #include "liburing.h"
+#include "helpers.h"
 
 static int test_fail_lone_link_timeouts(struct io_uring *ring)
 {
@@ -536,6 +538,139 @@
 	return 1;
 }
 
+static int test_link_timeout_update(struct io_uring *ring, int async)
+{
+	struct __kernel_timespec ts;
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	struct timeval start;
+	unsigned long msec;
+	int fds[2], ret, i;
+	struct iovec iov;
+	char buffer[128];
+
+	if (pipe(fds)) {
+		perror("pipe");
+		return 1;
+	}
+
+	sqe = io_uring_get_sqe(ring);
+	if (!sqe) {
+		printf("get sqe failed\n");
+		goto err;
+	}
+	iov.iov_base = buffer;
+	iov.iov_len = sizeof(buffer);
+	io_uring_prep_readv(sqe, fds[0], &iov, 1, 0);
+	sqe->flags |= IOSQE_IO_LINK;
+	sqe->user_data = 1;
+
+	sqe = io_uring_get_sqe(ring);
+	if (!sqe) {
+		printf("get sqe failed\n");
+		goto err;
+	}
+	ts.tv_sec = 5;
+	ts.tv_nsec = 0;
+	io_uring_prep_link_timeout(sqe, &ts, 0);
+	sqe->user_data = 2;
+
+	ret = io_uring_submit(ring);
+	if (ret != 2) {
+		printf("sqe submit failed: %d\n", ret);
+		goto err;
+	}
+
+	ts.tv_sec = 0;
+	ts.tv_nsec = 100000000LL;
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_timeout_update(sqe, &ts, 2, IORING_LINK_TIMEOUT_UPDATE);
+	if (async)
+		sqe->flags |= IOSQE_ASYNC;
+	sqe->user_data = 3;
+
+	io_uring_submit(ring);
+
+	gettimeofday(&start, NULL);
+	for (i = 0; i < 3; i++) {
+		ret = io_uring_wait_cqe(ring, &cqe);
+		if (ret < 0) {
+			printf("wait completion %d\n", ret);
+			goto err;
+		}
+		switch (cqe->user_data) {
+		case 1:
+			if (cqe->res != -EINTR && cqe->res != -ECANCELED) {
+				fprintf(stderr, "Req %" PRIu64 " got %d\n", (uint64_t) cqe->user_data,
+						cqe->res);
+				goto err;
+			}
+			break;
+		case 2:
+			/* FASTPOLL kernels can cancel successfully */
+			if (cqe->res != -EALREADY && cqe->res != -ETIME) {
+				fprintf(stderr, "Req %" PRIu64 " got %d\n", (uint64_t) cqe->user_data,
+						cqe->res);
+				goto err;
+			}
+			break;
+		case 3:
+			if (cqe->res) {
+				fprintf(stderr, "Req %" PRIu64 " got %d\n", (uint64_t) cqe->user_data,
+						cqe->res);
+				goto err;
+			}
+			break;
+		}
+
+		io_uring_cqe_seen(ring, cqe);
+	}
+
+	msec = mtime_since_now(&start);
+	if (msec < 10 || msec > 200) {
+		fprintf(stderr, "Timeout appears incorrect: %lu\n", msec);
+		goto err;
+	}
+
+	close(fds[0]);
+	close(fds[1]);
+	return 0;
+err:
+	return 1;
+}
+
+static int test_link_timeout_update_invalid(struct io_uring *ring, int async)
+{
+	struct __kernel_timespec ts;
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	int ret;
+
+	ts.tv_sec = 0;
+	ts.tv_nsec = 100000000LL;
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_timeout_update(sqe, &ts, 2, IORING_LINK_TIMEOUT_UPDATE);
+	sqe->user_data = 0xcafe0000;
+	if (async)
+		sqe->flags |= IOSQE_ASYNC;
+
+	io_uring_submit(ring);
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret < 0) {
+		printf("wait completion %d\n", ret);
+		goto err;
+	}
+	if (cqe->res != -ENOENT) {
+		fprintf(stderr, "bad timeout update: %d\n", cqe->res);
+		goto err;
+	}
+	io_uring_cqe_seen(ring, cqe);
+	return 0;
+err:
+	return 1;
+}
+
 static int test_timeout_link_chain1(struct io_uring *ring)
 {
 	struct __kernel_timespec ts;
@@ -1011,12 +1146,12 @@
 	int ret;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
 	ret = io_uring_queue_init(8, &ring, 0);
 	if (ret) {
 		printf("ring setup failed\n");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	ret = test_timeout_link_chain1(&ring);
@@ -1103,5 +1238,30 @@
 		return ret;
 	}
 
-	return 0;
+	ret = test_link_timeout_update(&ring, 0);
+	if (ret) {
+		printf("test_link_timeout_update 0 failed\n");
+		return ret;
+	}
+
+	ret = test_link_timeout_update(&ring, 1);
+	if (ret) {
+		printf("test_link_timeout_update 1 failed\n");
+		return ret;
+	}
+
+	ret = test_link_timeout_update_invalid(&ring, 0);
+	if (ret) {
+		printf("test_link_timeout_update_invalid 0 failed\n");
+		return ret;
+	}
+
+	ret = test_link_timeout_update_invalid(&ring, 1);
+	if (ret) {
+		printf("test_link_timeout_update_invalid 1 failed\n");
+		return ret;
+	}
+
+
+	return T_EXIT_PASS;
 }

diff --git a/test/link.c b/test/link.c
index 41d3899..3c8d991 100644
--- a/test/link.c
+++ b/test/link.c

@@ -11,6 +11,7 @@
 #include <fcntl.h>
 
 #include "liburing.h"
+#include "helpers.h"
 
 static int no_hardlink;
 
@@ -435,19 +436,19 @@
 	int ret;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
 	ret = io_uring_queue_init(8, &ring, 0);
 	if (ret) {
 		printf("ring setup failed\n");
-		return 1;
+		return T_EXIT_FAIL;
 
 	}
 
 	ret = io_uring_queue_init(8, &poll_ring, IORING_SETUP_IOPOLL);
 	if (ret) {
 		printf("poll_ring setup failed\n");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	ret = test_single_link(&ring);
@@ -492,5 +493,5 @@
 		return ret;
 	}
 
-	return 0;
+	return T_EXIT_PASS;
 }

diff --git a/test/link_drain.c b/test/link_drain.c
index b95168d..86b0aa8 100644
--- a/test/link_drain.c
+++ b/test/link_drain.c

@@ -96,7 +96,7 @@
 	return 1;
 }
 
-int test_link_drain_multi(struct io_uring *ring)
+static int test_link_drain_multi(struct io_uring *ring)
 {
 	struct io_uring_cqe *cqe;
 	struct io_uring_sqe *sqe[9];
@@ -198,15 +198,17 @@
 
 }
 
-int main(int argc, char *argv[])
+static int test_drain(bool defer)
 {
 	struct io_uring ring;
 	int i, ret;
+	unsigned int flags = 0;
 
-	if (argc > 1)
-		return 0;
+	if (defer)
+		flags = IORING_SETUP_SINGLE_ISSUER |
+			IORING_SETUP_DEFER_TASKRUN;
 
-	ret = io_uring_queue_init(100, &ring, 0);
+	ret = io_uring_queue_init(100, &ring, flags);
 	if (ret) {
 		printf("ring setup failed\n");
 		return 1;
@@ -227,3 +229,27 @@
 
 	return ret;
 }
+
+int main(int argc, char *argv[])
+{
+	int ret;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	ret = test_drain(false);
+	if (ret) {
+		fprintf(stderr, "test_drain(false) failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	if (t_probe_defer_taskrun()) {
+		ret = test_drain(true);
+		if (ret) {
+			fprintf(stderr, "test_drain(true) failed\n");
+			return T_EXIT_FAIL;
+		}
+	}
+
+	return T_EXIT_PASS;
+}

diff --git a/test/linked-defer-close.c b/test/linked-defer-close.c
new file mode 100644
index 0000000..ca8e62f
--- /dev/null
+++ b/test/linked-defer-close.c

@@ -0,0 +1,224 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Test that the final close of a file does indeed get it closed, if the
+ * ring is setup with DEFER_TASKRUN and the task is waiting in cqring_wait()
+ * during. Also see:
+ *
+ * https://github.com/axboe/liburing/issues/1235
+ *
+ * for a bug report, and the zig code on which this test program is based.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <arpa/inet.h>
+#include <sys/socket.h>
+#include <signal.h>
+#include <pthread.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+enum {
+	IS_ACCEPT = 0,
+	IS_SEND = 0x100,
+	IS_SEND2 = 0x101,
+	IS_SEND3 = 0x102,
+	IS_CLOSE = 0x200,
+};
+
+struct thread_data {
+	int parent_pid;
+};
+
+static void *thread_fn(void *__data)
+{
+	struct thread_data *data = __data;
+	struct sockaddr_in saddr;
+	int sockfd, ret;
+	char msg[64];
+
+	memset(&saddr, 0, sizeof(saddr));
+	saddr.sin_family = AF_INET;
+	saddr.sin_port = htons(9999);
+	inet_pton(AF_INET, "127.0.0.1", &saddr.sin_addr);
+
+	sockfd = socket(AF_INET, SOCK_STREAM, 0);
+	if (sockfd < 0) {
+		perror("socket");
+		goto done;
+	}
+
+	ret = connect(sockfd, (struct sockaddr *) &saddr, sizeof(saddr));
+	if (ret < 0) {
+		perror("connect");
+		close(sockfd);
+		goto done;
+	}
+
+	do {
+		memset(msg, 0, sizeof(msg));
+		ret = recv(sockfd, msg, sizeof(msg), 0);
+	} while (ret > 0);
+
+	close(sockfd);
+done:
+	kill(data->parent_pid, SIGUSR1);
+	return NULL;
+}
+
+/* we got SIGUSR1, exit normally */
+static void sig_usr1(int sig)
+{
+	exit(T_EXIT_PASS);
+}
+
+/* timed out, failure */
+static void sig_timeout(int sig)
+{
+	exit(T_EXIT_FAIL);
+}
+
+int main(int argc, char *argv[])
+{
+	struct io_uring ring;
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct sockaddr_in saddr;
+	char *msg1 = "message number 1\n";
+	char *msg2 = "message number 2\n";
+	char *msg3 = "message number 3\n";
+	int val, send_fd, ret, sockfd;
+	struct sigaction act[2] = { };
+	struct thread_data td;
+	pthread_t thread;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	memset(&saddr, 0, sizeof(saddr));
+	saddr.sin_family = AF_INET;
+	saddr.sin_addr.s_addr = htonl(INADDR_ANY);
+	saddr.sin_port = htons(9999);
+
+	sockfd = socket(AF_INET, SOCK_STREAM, 0);
+	if (sockfd < 0) {
+		perror("socket");
+		return T_EXIT_FAIL;
+	}
+
+	val = 1;
+	setsockopt(sockfd, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val));
+	setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val));
+
+	ret = bind(sockfd, (struct sockaddr *)&saddr, sizeof(saddr));
+	if (ret < 0) {
+		perror("bind");
+		close(sockfd);
+		return T_EXIT_FAIL;
+	}
+
+	ret = listen(sockfd, 1);
+	if (ret < 0) {
+		perror("listen");
+		close(sockfd);
+		return T_EXIT_FAIL;
+	}
+
+	ret = io_uring_queue_init(8, &ring, IORING_SETUP_SINGLE_ISSUER |
+					    IORING_SETUP_DEFER_TASKRUN);
+	if (ret == -EINVAL) {
+		close(sockfd);
+		return T_EXIT_SKIP;
+	}
+
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_multishot_accept(sqe, sockfd, NULL, NULL, 0);
+	sqe->user_data = IS_ACCEPT;
+	io_uring_submit(&ring);
+
+	/* check for no multishot accept */
+	ret = io_uring_peek_cqe(&ring, &cqe);
+	if (!ret && cqe->res == -EINVAL) {
+		close(sockfd);
+		return T_EXIT_SKIP;
+	}
+
+	/* expected exit */
+	act[0].sa_handler = sig_usr1;
+	sigaction(SIGUSR1, &act[0], NULL);
+
+	/* if this hits, we have failed */
+	act[1].sa_handler = sig_timeout;
+	sigaction(SIGALRM, &act[1], NULL);
+	alarm(5);
+	
+	/* start receiver */
+	td.parent_pid = getpid();
+	pthread_create(&thread, NULL, thread_fn, &td);
+
+	do {
+		ret = io_uring_submit_and_wait(&ring, 1);
+		if (ret < 0) {
+			fprintf(stderr, "submit: %d\n", ret);
+			return T_EXIT_FAIL;
+		}
+		ret = io_uring_peek_cqe(&ring, &cqe);
+		if (ret) {
+			fprintf(stderr, "peek: %d\n", ret);
+			return T_EXIT_FAIL;
+		}
+
+		switch (cqe->user_data) {
+		case IS_ACCEPT:
+			send_fd = cqe->res;
+			io_uring_cqe_seen(&ring, cqe);
+
+			/*
+			 * prep two sends, with the 2nd linked to a close
+			 * operation. Once the close has been completed, that
+			 * will terminate the receiving thread and that will
+			 * in turn send this task a SIGUSR1 signal. If the
+			 * kernel is buggy, then we never get SIGUSR1 and we
+			 * will sit forever waiting and be timed out.
+			 */
+			sqe = io_uring_get_sqe(&ring);
+			io_uring_prep_send(sqe, send_fd, msg1, strlen(msg1), 0);
+			sqe->user_data = IS_SEND;
+			sqe->flags = IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_LINK;
+
+			sqe = io_uring_get_sqe(&ring);
+			io_uring_prep_send(sqe, send_fd, msg2, strlen(msg2), 0);
+			sqe->user_data = IS_SEND2;
+			sqe->flags = IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_LINK;
+
+			sqe = io_uring_get_sqe(&ring);
+			io_uring_prep_send(sqe, send_fd, msg3, strlen(msg3), 0);
+			sqe->user_data = IS_SEND3;
+			sqe->flags = IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_LINK;
+
+			sqe = io_uring_get_sqe(&ring);
+			io_uring_prep_close(sqe, send_fd);
+			sqe->user_data = IS_CLOSE;
+			sqe->flags = IOSQE_CQE_SKIP_SUCCESS;
+			break;
+		case IS_SEND:
+		case IS_SEND2:
+			fprintf(stderr, "Should not see send response\n");
+			io_uring_cqe_seen(&ring, cqe);
+			return T_EXIT_FAIL;
+		case IS_CLOSE:
+			fprintf(stderr, "Should not see close response\n");
+			io_uring_cqe_seen(&ring, cqe);
+			return T_EXIT_FAIL;
+		default:
+			fprintf(stderr, "got unknown cqe\n");
+			return T_EXIT_FAIL;
+		}
+	} while (1);
+
+	/* will never get here */
+	return T_EXIT_FAIL;
+}

diff --git a/test/madvise.c b/test/madvise.c
index b85aba8..dd46151 100644
--- a/test/madvise.c
+++ b/test/madvise.c

@@ -20,30 +20,6 @@
 #define LOOPS		100
 #define MIN_LOOPS	10
 
-static unsigned long long utime_since(const struct timeval *s,
-				      const struct timeval *e)
-{
-	long long sec, usec;
-
-	sec = e->tv_sec - s->tv_sec;
-	usec = (e->tv_usec - s->tv_usec);
-	if (sec > 0 && usec < 0) {
-		sec--;
-		usec += 1000000;
-	}
-
-	sec *= 1000000;
-	return sec + usec;
-}
-
-static unsigned long long utime_since_now(struct timeval *tv)
-{
-	struct timeval end;
-
-	gettimeofday(&end, NULL);
-	return utime_since(tv, &end);
-}
-
 static int do_madvise(struct io_uring *ring, void *addr, off_t len, int advice)
 {
 	struct io_uring_sqe *sqe;
@@ -100,6 +76,8 @@
 
 	fd = open(filename, O_RDONLY);
 	if (fd < 0) {
+		if (errno == EACCES || errno == EPERM)
+			return T_EXIT_SKIP;
 		perror("open");
 		return 1;
 	}
@@ -143,9 +121,12 @@
 		return 1;
 
 	if (cached_read < uncached_read &&
-	    cached_read2 < uncached_read)
+	    cached_read2 < uncached_read) {
+		free(buf);
 		return 0;
+	}
 
+	free(buf);
 	return 2;
 }
 
@@ -170,6 +151,8 @@
 	good = bad = 0;
 	for (i = 0; i < LOOPS; i++) {
 		ret = test_madvise(&ring, fname);
+		if (ret == T_EXIT_SKIP)
+			goto skip;
 		if (ret == 1) {
 			fprintf(stderr, "test_madvise failed\n");
 			goto err;
@@ -182,14 +165,18 @@
 	}
 
 	/* too hard to reliably test, just ignore */
-	if (0 && bad > good)
+	if ((0) && bad > good)
 		fprintf(stderr, "Suspicious timings (%u > %u)\n", bad, good);
 	if (fname != argv[1])
 		unlink(fname);
 	io_uring_queue_exit(&ring);
-	return 0;
+	return T_EXIT_PASS;
 err:
 	if (fname != argv[1])
 		unlink(fname);
-	return 1;
+	return T_EXIT_FAIL;
+skip:
+	if (fname != argv[1])
+		unlink(fname);
+	return T_EXIT_SKIP;
 }

diff --git a/test/min-timeout-wait.c b/test/min-timeout-wait.c
new file mode 100644
index 0000000..477a68e
--- /dev/null
+++ b/test/min-timeout-wait.c

@@ -0,0 +1,329 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: run various min_timeout tests
+ *
+ */
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/time.h>
+#include <pthread.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+struct data {
+	pthread_barrier_t startup;
+	unsigned long usec_sleep;
+	int out_fds[8];
+	int nr_fds;
+};
+
+static int time_pass(struct timeval *start, unsigned long min_t,
+		     unsigned long max_t, const char *name)
+{
+	unsigned long elapsed;
+
+	elapsed = mtime_since_now(start);
+	if (elapsed < min_t || elapsed > max_t) {
+		fprintf(stderr, "%s fails time check\n", name);
+		fprintf(stderr, " elapsed=%lu, min=%lu, max=%lu\n", elapsed,
+				min_t, max_t);
+		return T_EXIT_FAIL;
+	}
+	return T_EXIT_PASS;
+}
+
+static void *pipe_write(void *data)
+{
+	struct data *d = data;
+	char buf[32];
+	int i;
+
+	memset(buf, 0x55, sizeof(buf));
+
+	pthread_barrier_wait(&d->startup);
+
+	if (d->usec_sleep)
+		usleep(d->usec_sleep);
+
+	for (i = 0; i < d->nr_fds; i++) {
+		int ret;
+
+		ret = write(d->out_fds[i], buf, sizeof(buf));
+		if (ret < 0) {
+			perror("write");
+			return NULL;
+		}
+	}
+
+	return NULL;
+}
+
+static int __test_writes(struct io_uring *ring, int npipes, int usec_sleep,
+			 int usec_wait, int min_t, int max_t, const char *name)
+{
+	struct __kernel_timespec ts;
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	struct timeval tv;
+	int ret, i, fds[4][2];
+	pthread_t thread;
+	struct data d;
+	char buf[32];
+	void *tret;
+
+	for (i = 0; i < npipes; i++) {
+		if (pipe(fds[i]) < 0) {
+			perror("pipe");
+			return T_EXIT_FAIL;
+		}
+		d.out_fds[i] = fds[i][1];
+	}
+	d.nr_fds = npipes;
+
+	pthread_barrier_init(&d.startup, NULL, 2);
+	d.usec_sleep = usec_sleep;
+
+	pthread_create(&thread, NULL, pipe_write, &d);
+	pthread_barrier_wait(&d.startup);
+
+	for (i = 0; i < npipes; i++) {
+		sqe = io_uring_get_sqe(ring);
+		io_uring_prep_read(sqe, fds[i][0], buf, sizeof(buf), 0);
+	}
+
+	io_uring_submit(ring);
+
+	ts.tv_sec = 1;
+	ts.tv_nsec = 0;
+	gettimeofday(&tv, NULL);
+	ret = io_uring_wait_cqes_min_timeout(ring, &cqe, 4, &ts, usec_wait, NULL);
+	if (ret) {
+		fprintf(stderr, "wait_cqes: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	ret = time_pass(&tv, min_t, max_t, name);
+
+	io_uring_cq_advance(ring, npipes);
+
+	pthread_join(thread, &tret);
+	for (i = 0; i < npipes; i++) {
+		close(fds[i][0]);
+		close(fds[i][1]);
+	}
+	return ret;
+}
+/*
+ * Test doing min_wait for N events, where 0 events are already available
+ * on wait enter but N/2 are posted within the min_wait window. We'll expect to
+ * return when the min_wait window expires.
+ */
+static int test_some_wait(struct io_uring *ring)
+{
+	return __test_writes(ring, 2, 1000, 100000, 95, 120, __FUNCTION__);
+}
+
+/*
+ * Test doing min_wait for N events, where 0 events are already available
+ * on wait enter but N are posted within the min_wait window. We'll expect to
+ * return upon arrival of the N events, not the full min_wait window.
+ */
+static int test_post_wait(struct io_uring *ring)
+{
+	return __test_writes(ring, 4, 10000, 200000, 9, 12, __FUNCTION__);
+}
+
+/*
+ * Test doing min_wait for N events, where 0 events are already available
+ * on wait enter and one is posted after the min_wait timeout has expired.
+ * That first event should cause wait to abort, even if the task has asked
+ * for more to wait on.
+ */
+static int test_late(struct io_uring *ring)
+{
+	return __test_writes(ring, 1, 100000, 10000, 95, 120, __FUNCTION__);
+}
+
+static int __test_nop(struct io_uring *ring, int nr_nops, int min_t, int max_t,
+		      unsigned long long_wait, const char *name)
+{
+	struct __kernel_timespec ts;
+	struct io_uring_cqe *cqe;
+	struct timeval tv;
+	int i, ret;
+
+	for (i = 0; i < nr_nops; i++) {
+		struct io_uring_sqe *sqe;
+
+		sqe = io_uring_get_sqe(ring);
+		io_uring_prep_nop(sqe);
+	}
+
+	if (nr_nops)
+		io_uring_submit(ring);
+
+	ts.tv_sec = 0;
+	ts.tv_nsec = long_wait * 1000;
+	gettimeofday(&tv, NULL);
+	ret = io_uring_wait_cqes_min_timeout(ring, &cqe, 4, &ts, 50000, NULL);
+	io_uring_cq_advance(ring, nr_nops);
+	if (nr_nops) {
+		if (ret) {
+			fprintf(stderr, "wait_cqes: %d\n", ret);
+			return T_EXIT_FAIL;
+		}
+	} else {
+		if (ret != -ETIME) {
+			fprintf(stderr, "wait_cqes: %d\n", ret);
+			return T_EXIT_FAIL;
+		}
+	}
+
+	return time_pass(&tv, min_t, max_t, name);
+}
+
+/*
+ * Test doing min_wait for N events, where N/2 events are already available
+ * on wait enter. This should abort waiting after min_wait, not do the full
+ * wait.
+ */
+static int test_some(struct io_uring *ring)
+{
+	return __test_nop(ring, 2, 45, 55, 100000, __FUNCTION__);
+}
+
+/*
+ * Test doing min_wait for N events, where N events are already available
+ * on wait enter.
+ */
+static int test_already(struct io_uring *ring)
+{
+	return __test_nop(ring, 4, 0, 1, 100000, __FUNCTION__);
+}
+
+/*
+ * Test doing min_wait for N events, and nothing ever gets posted. We'd
+ * expect the time to be the normal wait time, not the min_wait time.
+ */
+static int test_nothing(struct io_uring *ring)
+{
+	return __test_nop(ring, 0, 95, 110, 100000, __FUNCTION__);
+}
+
+/*
+ * Test doing min_wait for N events, and nothing ever gets posted, and use
+ * a min_wait time that's bigger than the total wait. We only expect the
+ * min_wait to elapse.
+ */
+static int test_min_wait_biggest(struct io_uring *ring)
+{
+	return __test_nop(ring, 0, 45, 55, 20000, __FUNCTION__);
+}
+
+/*
+ * Test doing min_wait for N events, and nothing ever gets posted, and use
+ * a min_wait time that's roughly equal to the total wait. We only expect the
+ * min_wait to elapse.
+ */
+static int test_min_wait_equal(struct io_uring *ring)
+{
+	return __test_nop(ring, 0, 45, 55, 50001, __FUNCTION__);
+}
+
+int main(int argc, char *argv[])
+{
+	struct io_uring ring1, ring2;
+	struct io_uring_params p = { };
+	int ret;
+
+	if (argc > 1)
+		return 0;
+
+	ret = t_create_ring_params(8, &ring1, &p);
+	if (ret == T_SETUP_SKIP)
+		return T_EXIT_SKIP;
+	else if (ret != T_SETUP_OK)
+		return ret;
+	if (!(p.features & IORING_FEAT_MIN_TIMEOUT))
+		return T_EXIT_SKIP;
+
+	p.flags = IORING_SETUP_SINGLE_ISSUER|IORING_SETUP_DEFER_TASKRUN;
+	ret = t_create_ring_params(8, &ring2, &p);
+	if (ret == T_SETUP_SKIP)
+		return T_EXIT_SKIP;
+	else if (ret != T_SETUP_OK)
+		return ret;
+
+	ret = test_already(&ring1);
+	if (ret == T_EXIT_FAIL || ret == T_EXIT_SKIP)
+		return ret;
+
+	ret = test_already(&ring2);
+	if (ret == T_EXIT_FAIL)
+		return T_EXIT_FAIL;
+
+	ret = test_some(&ring1);
+	if (ret == T_EXIT_FAIL || ret == T_EXIT_SKIP)
+		return ret;
+
+	ret = test_some(&ring2);
+	if (ret == T_EXIT_FAIL)
+		return T_EXIT_FAIL;
+
+	ret = test_late(&ring1);
+	if (ret == T_EXIT_FAIL || ret == T_EXIT_SKIP)
+		return ret;
+
+	ret = test_late(&ring2);
+	if (ret == T_EXIT_FAIL)
+		return T_EXIT_FAIL;
+
+	ret = test_post_wait(&ring1);
+	if (ret == T_EXIT_FAIL || ret == T_EXIT_SKIP)
+		return ret;
+
+	ret = test_post_wait(&ring2);
+	if (ret == T_EXIT_FAIL)
+		return T_EXIT_FAIL;
+
+	ret = test_some_wait(&ring1);
+	if (ret == T_EXIT_FAIL || ret == T_EXIT_SKIP)
+		return ret;
+
+	ret = test_some_wait(&ring2);
+	if (ret == T_EXIT_FAIL)
+		return T_EXIT_FAIL;
+
+	ret = test_nothing(&ring1);
+	if (ret == T_EXIT_FAIL || ret == T_EXIT_SKIP)
+		return ret;
+
+	ret = test_nothing(&ring2);
+	if (ret == T_EXIT_FAIL)
+		return T_EXIT_FAIL;
+
+	ret = test_min_wait_biggest(&ring1);
+	if (ret == T_EXIT_FAIL || ret == T_EXIT_SKIP)
+		return ret;
+
+	ret = test_min_wait_biggest(&ring2);
+	if (ret == T_EXIT_FAIL)
+		return T_EXIT_FAIL;
+
+	ret = test_min_wait_equal(&ring1);
+	if (ret == T_EXIT_FAIL || ret == T_EXIT_SKIP)
+		return ret;
+
+	ret = test_min_wait_equal(&ring2);
+	if (ret == T_EXIT_FAIL)
+		return T_EXIT_FAIL;
+
+	io_uring_queue_exit(&ring1);
+	io_uring_queue_exit(&ring2);
+	return T_EXIT_PASS;
+}

diff --git a/test/min-timeout.c b/test/min-timeout.c
new file mode 100644
index 0000000..7f090a2
--- /dev/null
+++ b/test/min-timeout.c

@@ -0,0 +1,208 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: test min timeout handling
+ *
+ */
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <pthread.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+#define NPIPES	8
+#define NWRITES	6
+
+#define WAIT_USEC	(250000)
+
+static int no_min_timeout;
+
+struct d {
+	int fd[NPIPES];
+	long delay;
+};
+
+static void *thread_fn(void *data)
+{
+	struct d *d = data;
+	char buf[32];
+	int i;
+
+	memset(buf, 0x55, sizeof(buf));
+
+	for (i = 0; i < NWRITES; i++) {
+		int ret;
+
+		usleep(d->delay);
+		ret = write(d->fd[i], buf, sizeof(buf));
+		if (ret != sizeof(buf)) {
+			fprintf(stderr, "bad write %d\n", ret);
+			break;
+		}
+	}
+	return NULL;
+}
+
+/*
+ * Allow 25% tolerance
+ */
+static int within_range(unsigned int target, unsigned int msec)
+{
+	unsigned int high, low;
+
+	low = (target * 3) / 4;
+	high = (target * 5) / 4;
+	return (msec >= low && msec <= high);
+}
+
+static int test(int flags, int expected_ctx, int min_wait, int write_delay,
+		int nr_cqes, int msec_target)
+{
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	struct io_uring ring;
+	struct __kernel_timespec ts;
+	struct rusage s, e;
+	pthread_t thread;
+	struct d d;
+	struct io_uring_params p = { .flags = flags, };
+	int ret, fds[NPIPES][2], i;
+	struct timeval start_time;
+	char buf[32];
+	void *tret;
+	long ttime;
+
+	ret = io_uring_queue_init_params(NPIPES, &ring, &p);
+	if (ret == -EINVAL)
+		return T_EXIT_SKIP;
+	if (!(p.features & IORING_FEAT_MIN_TIMEOUT)) {
+		no_min_timeout = 1;
+		return T_EXIT_SKIP;
+	}
+
+	for (i = 0; i < NPIPES; i++) {
+		if (pipe(fds[i]) < 0) {
+			perror("pipe");
+			return 1;
+		}
+		d.fd[i] = fds[i][1];
+	}
+
+	d.delay = write_delay;
+	pthread_create(&thread, NULL, thread_fn, &d);
+
+	for (i = 0; i < NPIPES; i++) {
+		sqe = io_uring_get_sqe(&ring);
+		io_uring_prep_read(sqe, fds[i][0], buf, sizeof(buf), 0);
+	}
+
+	ts.tv_sec = 0;
+	ts.tv_nsec = WAIT_USEC * 1000LL;
+
+	gettimeofday(&start_time, NULL);
+	getrusage(RUSAGE_THREAD, &s);
+	ret = io_uring_submit_and_wait_min_timeout(&ring, &cqe, 8, &ts, min_wait, NULL);
+	if (ret != NPIPES)
+		fprintf(stderr, "submit_and_wait=%d\n", ret);
+
+	getrusage(RUSAGE_THREAD, &e);
+	e.ru_nvcsw -= s.ru_nvcsw;
+	ttime = mtime_since_now(&start_time);
+	if (!within_range(msec_target, ttime)) {
+		fprintf(stderr, "Expected %d msec, got %ld msec\n", msec_target,
+								ttime);
+		fprintf(stderr, "flags=%x, min_wait=%d, write_delay=%d\n",
+				flags, min_wait, write_delay);
+	}
+	/* will usually be accurate, but allow for offset of 1 */
+	if (e.ru_nvcsw != expected_ctx &&
+	    (e.ru_nvcsw - expected_ctx > 1))
+		fprintf(stderr, "%ld ctx switches, expected %d\n", e.ru_nvcsw,
+								expected_ctx);
+	
+	for (i = 0; i < NPIPES; i++) {
+		ret = io_uring_peek_cqe(&ring, &cqe);
+		if (ret)
+			break;
+		io_uring_cqe_seen(&ring, cqe);
+	}
+
+	if (i != nr_cqes)
+		fprintf(stderr, "Got %d CQEs, expected %d\n", i, nr_cqes);
+
+	pthread_join(thread, &tret);
+
+	for (i = 0; i < NPIPES; i++) {
+		close(fds[i][0]);
+		close(fds[i][1]);
+	}
+
+	return T_EXIT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	int ret;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	ret = test(0, NWRITES + 1, 0, 2000, NWRITES, WAIT_USEC / 1000);
+	if (ret == T_EXIT_FAIL)
+		return T_EXIT_FAIL;
+	if (no_min_timeout)
+		return T_EXIT_SKIP;
+
+	ret = test(0, NWRITES + 1, 50000, 2000, NWRITES, 50);
+	if (ret == T_EXIT_FAIL)
+		return T_EXIT_FAIL;
+
+	ret = test(0, NWRITES + 1, 500000, 2000, NWRITES, 500);
+	if (ret == T_EXIT_FAIL)
+		return T_EXIT_FAIL;
+
+	/* no writes within min timeout, but it's given. expect 1 cqe */
+	ret = test(0, 1, 10000, 20000, 1, 20);
+	if (ret == T_EXIT_FAIL)
+		return T_EXIT_FAIL;
+
+	/* same as above, but no min timeout. should time out and we get 6 */
+	ret = test(0, NWRITES + 1, 0, 20000, NWRITES, WAIT_USEC / 1000);
+	if (ret == T_EXIT_FAIL)
+		return T_EXIT_FAIL;
+
+	ret = test(IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN, 1,
+			0, 2000, NWRITES, WAIT_USEC / 1000);
+	if (ret == T_EXIT_FAIL)
+		return T_EXIT_FAIL;
+
+	ret = test(IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN, 1,
+			50000, 2000, NWRITES, 50);
+	if (ret == T_EXIT_FAIL)
+		return T_EXIT_FAIL;
+
+	ret = test(IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN, 1,
+			500000, 2000, NWRITES, 500);
+	if (ret == T_EXIT_FAIL)
+		return T_EXIT_FAIL;
+
+	/* no writes within min timeout, but it's given. expect 1 cqe */
+	ret = test(IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN, 1,
+			10000, 20000, 1, 20);
+	if (ret == T_EXIT_FAIL)
+		return T_EXIT_FAIL;
+
+	/* same as above, but no min timeout. should time out and we get 6 */
+	ret = test(IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN, 1,
+			0, 20000, NWRITES, WAIT_USEC / 1000);
+	if (ret == T_EXIT_FAIL)
+		return T_EXIT_FAIL;
+
+	return ret;
+}

diff --git a/test/mkdir.c b/test/mkdir.c
index 363fe1e..a160668 100644
--- a/test/mkdir.c
+++ b/test/mkdir.c

@@ -10,6 +10,7 @@
 #include <unistd.h>
 
 #include "liburing.h"
+#include "helpers.h"
 
 static int do_mkdirat(struct io_uring *ring, const char *fn)
 {
@@ -59,7 +60,7 @@
 	struct io_uring ring;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
 	ret = io_uring_queue_init(8, &ring, 0);
 	if (ret) {
@@ -71,7 +72,7 @@
 	if (ret < 0) {
 		if (ret == -EBADF || ret == -EINVAL) {
 			fprintf(stdout, "mkdirat not supported, skipping\n");
-			goto out;
+			goto skip;
 		}
 		fprintf(stderr, "mkdirat: %s\n", strerror(-ret));
 		goto err;
@@ -96,13 +97,22 @@
 		goto err1;
 	}
 
-out:
+	ret = do_mkdirat(&ring, (const char *) (uintptr_t) 0x1234);
+	if (ret != -EFAULT) {
+		fprintf(stderr, "do_mkdirat bad address: %d\n", ret);
+		goto err1;
+	}
+
 	unlinkat(AT_FDCWD, fn, AT_REMOVEDIR);
 	io_uring_queue_exit(&ring);
-	return 0;
+	return T_EXIT_PASS;
+skip:
+	unlinkat(AT_FDCWD, fn, AT_REMOVEDIR);
+	io_uring_queue_exit(&ring);
+	return T_EXIT_SKIP;
 err1:
 	unlinkat(AT_FDCWD, fn, AT_REMOVEDIR);
 err:
 	io_uring_queue_exit(&ring);
-	return 1;
+	return T_EXIT_FAIL;
 }

diff --git a/test/msg-ring-fd.c b/test/msg-ring-fd.c
new file mode 100644
index 0000000..1e15f8a
--- /dev/null
+++ b/test/msg-ring-fd.c

@@ -0,0 +1,331 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: test fd passing with MSG_RING
+ *
+ */
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <pthread.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+static int no_msg;
+static int no_sparse;
+static int no_fd_pass;
+
+struct data {
+	pthread_t thread;
+	pthread_barrier_t barrier;
+	int ring_flags;
+	int ring_fd;
+	char buf[32];
+};
+
+static void *thread_fn(void *__data)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct data *d = __data;
+	struct io_uring ring;
+	int ret, fd = -1;
+
+	io_uring_queue_init(8, &ring, d->ring_flags);
+	ret = io_uring_register_files(&ring, &fd, 1);
+	if (ret) {
+		if (ret != -EINVAL && ret != -EBADF)
+			fprintf(stderr, "thread file register: %d\n", ret);
+		no_sparse = 1;
+		pthread_barrier_wait(&d->barrier);
+		return NULL;
+	}
+
+	d->ring_fd = ring.ring_fd;
+	pthread_barrier_wait(&d->barrier);
+
+	/* wait for MSG */
+	ret = io_uring_wait_cqe(&ring, &cqe);
+	if (ret) {
+		fprintf(stderr, "wait_cqe dst: %d\n", ret);
+		return NULL;
+	}
+	if (cqe->res < 0) {
+		fprintf(stderr, "cqe error dst: %d\n", cqe->res);
+		return NULL;
+	}
+
+	fd = cqe->res;
+	io_uring_cqe_seen(&ring, cqe);
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_read(sqe, fd, d->buf, sizeof(d->buf), 0);
+	sqe->flags |= IOSQE_FIXED_FILE;
+	io_uring_submit(&ring);
+
+	ret = io_uring_wait_cqe(&ring, &cqe);
+	if (ret) {
+		fprintf(stderr, "wait_cqe dst: %d\n", ret);
+		return NULL;
+	}
+	if (cqe->res < 0) {
+		fprintf(stderr, "cqe error dst: %d\n", cqe->res);
+		return NULL;
+	}
+
+	io_uring_queue_exit(&ring);
+	return NULL;
+}
+
+static int test_remote(struct io_uring *src, int ring_flags)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	int fds[2], fd, ret;
+	struct data d;
+	char buf[32];
+	void *tret;
+	int i;
+
+	if (no_fd_pass)
+		return 0;
+
+	pthread_barrier_init(&d.barrier, NULL, 2);
+	d.ring_flags = ring_flags;
+	pthread_create(&d.thread, NULL, thread_fn, &d);
+	pthread_barrier_wait(&d.barrier);
+	memset(d.buf, 0, sizeof(d.buf));
+
+	if (no_sparse)
+		return 0;
+
+	if (pipe(fds) < 0) {
+		perror("pipe");
+		return 1;
+	}
+
+	fd = fds[0];
+	ret = io_uring_register_files(src, &fd, 1);
+	if (ret) {
+		fprintf(stderr, "register files failed: %d\n", ret);
+		return 1;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(buf); i++)
+		buf[i] = rand();
+
+	sqe = io_uring_get_sqe(src);
+	io_uring_prep_write(sqe, fds[1], buf, sizeof(buf), 0);
+	sqe->user_data = 1;
+
+	sqe = io_uring_get_sqe(src);
+	io_uring_prep_msg_ring_fd(sqe, d.ring_fd, 0, 0, 0, 0);
+	sqe->user_data = 2;
+	
+	io_uring_submit(src);
+
+	for (i = 0; i < 2; i++) {
+		ret = io_uring_wait_cqe(src, &cqe);
+		if (ret) {
+			fprintf(stderr, "wait_cqe: %d\n", ret);
+			return 1;
+		}
+		if (cqe->res < 0) {
+			fprintf(stderr, "cqe res %d\n", cqe->res);
+			return 1;
+		}
+		if (cqe->user_data == 1 && cqe->res != sizeof(buf)) {
+			fprintf(stderr, "short write %d\n", cqe->res);
+			return 1;
+		}
+		io_uring_cqe_seen(src, cqe);
+	}
+
+	pthread_join(d.thread, &tret);
+
+	if (memcmp(buf, d.buf, sizeof(buf))) {
+		fprintf(stderr, "buffers differ\n");
+		return 1;
+	}
+
+	close(fds[0]);
+	close(fds[1]);
+	io_uring_unregister_files(src);
+	return 0;
+}
+
+static int test_local(struct io_uring *src, struct io_uring *dst)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	int fds[2], fd, ret;
+	char buf[32], dst_buf[32];
+	int i;
+
+	if (no_fd_pass)
+		return 0;
+
+	fd = -1;
+	ret = io_uring_register_files(dst, &fd, 1);
+	if (ret) {
+		if (ret == -EBADF || ret == -EINVAL)
+			return 0;
+		fprintf(stderr, "register files failed: %d\n", ret);
+		return 1;
+	}
+
+	if (pipe(fds) < 0) {
+		perror("pipe");
+		return 1;
+	}
+
+	fd = fds[0];
+	ret = io_uring_register_files(src, &fd, 1);
+	if (ret) {
+		fprintf(stderr, "register files failed: %d\n", ret);
+		return 1;
+	}
+
+	memset(dst_buf, 0, sizeof(dst_buf));
+	for (i = 0; i < ARRAY_SIZE(buf); i++)
+		buf[i] = rand();
+
+	sqe = io_uring_get_sqe(src);
+	io_uring_prep_write(sqe, fds[1], buf, sizeof(buf), 0);
+	sqe->user_data = 1;
+
+	sqe = io_uring_get_sqe(src);
+	io_uring_prep_msg_ring_fd(sqe, dst->ring_fd, 0, 0, 10, 0);
+	sqe->user_data = 2;
+	
+	io_uring_submit(src);
+
+	fd = -1;
+	for (i = 0; i < 2; i++) {
+		ret = io_uring_wait_cqe(src, &cqe);
+		if (ret) {
+			fprintf(stderr, "wait_cqe: %d\n", ret);
+			return 1;
+		}
+		if (cqe->user_data == 2 && cqe->res == -EINVAL) {
+			no_fd_pass = 1;
+		} else if (cqe->res < 0) {
+			fprintf(stderr, "cqe res %d\n", cqe->res);
+			return 1;
+		}
+		if (cqe->user_data == 1 && cqe->res != sizeof(buf)) {
+			fprintf(stderr, "short write %d\n", cqe->res);
+			return 1;
+		}
+		io_uring_cqe_seen(src, cqe);
+	}
+
+	if (no_fd_pass)
+		goto out;
+
+	ret = io_uring_wait_cqe(dst, &cqe);
+	if (ret) {
+		fprintf(stderr, "wait_cqe dst: %d\n", ret);
+		return 1;
+	}
+	if (cqe->res < 0) {
+		fprintf(stderr, "cqe error dst: %d\n", cqe->res);
+		return 1;
+	}
+
+	fd = cqe->res;
+	io_uring_cqe_seen(dst, cqe);
+	sqe = io_uring_get_sqe(dst);
+	io_uring_prep_read(sqe, fd, dst_buf, sizeof(dst_buf), 0);
+	sqe->flags |= IOSQE_FIXED_FILE;
+	sqe->user_data = 3;
+	io_uring_submit(dst);
+
+	ret = io_uring_wait_cqe(dst, &cqe);
+	if (ret) {
+		fprintf(stderr, "wait_cqe dst: %d\n", ret);
+		return 1;
+	}
+	if (cqe->res < 0) {
+		fprintf(stderr, "cqe error dst: %d\n", cqe->res);
+		return 1;
+	}
+	if (cqe->res != sizeof(dst_buf)) {
+		fprintf(stderr, "short read %d\n", cqe->res);
+		return 1;
+	}
+	if (memcmp(buf, dst_buf, sizeof(buf))) {
+		fprintf(stderr, "buffers differ\n");
+		return 1;
+	}
+
+out:
+	close(fds[0]);
+	close(fds[1]);
+	io_uring_unregister_files(src);
+	io_uring_unregister_files(dst);
+	return 0;
+}
+
+static int test(int ring_flags)
+{
+	struct io_uring ring, ring2;
+	int ret;
+
+	ret = io_uring_queue_init(8, &ring, ring_flags);
+	if (ret) {
+		if (ret == -EINVAL)
+			return 0;
+		fprintf(stderr, "ring setup failed: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+	ret = io_uring_queue_init(8, &ring2, ring_flags);
+	if (ret) {
+		fprintf(stderr, "ring setup failed: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_local(&ring, &ring2);
+	if (ret) {
+		fprintf(stderr, "test local failed\n");
+		return T_EXIT_FAIL;
+	}
+	if (no_msg)
+		return T_EXIT_SKIP;
+
+	ret = test_remote(&ring, ring_flags);
+	if (ret) {
+		fprintf(stderr, "test_remote failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	io_uring_queue_exit(&ring);
+	io_uring_queue_exit(&ring2);
+	return T_EXIT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	int ret;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	ret = test(0);
+	if (ret != T_EXIT_PASS) {
+		fprintf(stderr, "ring flags 0 failed\n");
+		return ret;
+	}
+	if (no_msg)
+		return T_EXIT_SKIP;
+
+	ret = test(IORING_SETUP_SINGLE_ISSUER|IORING_SETUP_DEFER_TASKRUN);
+	if (ret != T_EXIT_PASS) {
+		fprintf(stderr, "ring flags defer failed\n");
+		return ret;
+	}
+
+	return ret;
+}

diff --git a/test/msg-ring-flags.c b/test/msg-ring-flags.c
new file mode 100644
index 0000000..ebf848c
--- /dev/null
+++ b/test/msg-ring-flags.c

@@ -0,0 +1,212 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: test ring messaging with flags command
+ *
+ */
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <pthread.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+#define CUSTOM_FLAG 0x42
+#define USER_DATA 0x5aa5
+#define LEN 0x20
+#define ID 0x1
+
+struct data {
+	pthread_barrier_t barrier;
+	int fd;
+};
+
+static int recv_msg(struct io_uring *ring)
+{
+	struct io_uring_cqe *cqe;
+	int ret;
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret) {
+		fprintf(stderr, "wait cqe %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+	if (cqe->user_data != USER_DATA) {
+		fprintf(stderr, "user_data %llx\n", (long long) cqe->user_data);
+		return T_EXIT_FAIL;
+	}
+	if (cqe->res != LEN) {
+		fprintf(stderr, "len %x\n", cqe->res);
+		return T_EXIT_FAIL;
+	}
+	if (cqe->flags != CUSTOM_FLAG) {
+		fprintf(stderr, "flags %x\n", cqe->flags);
+		return T_EXIT_FAIL;
+	}
+
+	return T_EXIT_PASS;
+}
+
+static int send_msg(struct io_uring *ring, int target_fd)
+{
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	int ret;
+
+	sqe = io_uring_get_sqe(ring);
+	if (!sqe) {
+		fprintf(stderr, "get sqe failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	io_uring_prep_msg_ring_cqe_flags(sqe, target_fd, LEN, USER_DATA,
+					 0, CUSTOM_FLAG);
+	sqe->user_data = ID;
+
+	ret = io_uring_submit(ring);
+	if (ret <= 0) {
+		if (ret == -EINVAL)
+			return T_EXIT_SKIP;
+
+		fprintf(stderr, "sqe submit failed: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret < 0) {
+		fprintf(stderr, "wait completion %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+	if (cqe->res != 0) {
+		if (cqe->res == -EINVAL)
+			return T_EXIT_SKIP;
+		fprintf(stderr, "cqe res %d\n", cqe->res);
+		return T_EXIT_FAIL;
+	}
+	if (cqe->user_data != ID) {
+		fprintf(stderr, "user_data %llx\n", (long long) cqe->user_data);
+		return T_EXIT_FAIL;
+	}
+
+	io_uring_cqe_seen(ring, cqe);
+	return T_EXIT_PASS;
+}
+
+static void *thread_fn(void *data)
+{
+	struct data *d = data;
+	struct io_uring ring;
+	int ret;
+
+	ret = io_uring_queue_init(2, &ring, IORING_SETUP_DEFER_TASKRUN | IORING_SETUP_SINGLE_ISSUER);
+	if (ret) {
+		fprintf(stderr, "ring init failed %d\n", ret);
+		pthread_barrier_wait(&d->barrier);
+		return NULL;
+	}
+
+	d->fd = ring.ring_fd;
+	pthread_barrier_wait(&d->barrier);
+
+	if (recv_msg(&ring))
+		return (void *) 1;
+
+	return NULL;
+}
+
+static int test(int ring_flags)
+{
+	struct io_uring ring, ring2;
+	pthread_t thread;
+	struct data d;
+	void *ret2;
+	int ret, i;
+
+	ret = io_uring_queue_init(2, &ring, ring_flags);
+	if (ret) {
+		fprintf(stderr, "io_uring_queue_init failed for ring1: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	ret = io_uring_queue_init(2, &ring2, ring_flags);
+	if (ret) {
+		fprintf(stderr, "io_uring_queue_init failed for ring2: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	ret = send_msg(&ring, ring2.ring_fd);
+	if (ret) {
+		if (ret != T_EXIT_SKIP)
+			fprintf(stderr, "send_msg failed: %d\n", ret);
+		return ret;
+	}
+
+	ret = recv_msg(&ring2);
+	if (ret) {
+		fprintf(stderr, "recv_msg failed: %d\n", ret);
+		return ret;
+	}
+
+	for (i = 0; i < 8; i++) {
+		ret = send_msg(&ring, ring2.ring_fd);
+		if (ret) {
+			if (ret != T_EXIT_SKIP)
+				fprintf(stderr, "send_msg failed: %d\n", ret);
+			return ret;
+		}
+	}
+
+	for (i = 0; i < 8; i++) {
+		ret = recv_msg(&ring2);
+		if (ret) {
+			fprintf(stderr, "recv_msg failed: %d\n", ret);
+			return ret;
+		}
+	}
+
+	pthread_barrier_init(&d.barrier, NULL, 2);
+	d.fd = -1;
+	pthread_create(&thread, NULL, thread_fn, &d);
+	pthread_barrier_wait(&d.barrier);
+	if (d.fd == -1)
+		return T_EXIT_FAIL;
+
+	ret = send_msg(&ring, d.fd);
+	if (ret) {
+		fprintf(stderr, "send msg failed: %d\n", ret);
+		return ret;
+	}
+	pthread_join(thread, &ret2);
+	if (ret2) {
+		fprintf(stderr, "Remote test failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	return T_EXIT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	int ret;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	ret = test(0);
+	if (ret == T_EXIT_FAIL) {
+		fprintf(stderr, "test ring_flags 0 failed\n");
+		return ret;
+	} else if (ret == T_EXIT_SKIP)
+		return ret;
+
+	ret = test(IORING_SETUP_SINGLE_ISSUER|IORING_SETUP_DEFER_TASKRUN);
+	if (ret == T_EXIT_FAIL) {
+		fprintf(stderr, "test ring_flags defer failed\n");
+		return ret;
+	}
+
+	return ret;
+}

diff --git a/test/msg-ring-overflow.c b/test/msg-ring-overflow.c
new file mode 100644
index 0000000..8dcdf89
--- /dev/null
+++ b/test/msg-ring-overflow.c

@@ -0,0 +1,159 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: test ring messaging command
+ *
+ */
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+static int no_msg;
+
+static int test(struct io_uring *ring, unsigned dst_flags)
+{
+	struct io_uring_params p = { };
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	struct io_uring dst;
+	int ret, i, err_ret = T_EXIT_FAIL;
+
+	p.flags = dst_flags | IORING_SETUP_CQSIZE;
+	p.cq_entries = 4;
+	ret = io_uring_queue_init_params(4, &dst, &p);
+	if (ret) {
+		fprintf(stderr, "Destination ring create failed %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	for (i = 0; i < 8; i++) {
+		sqe = io_uring_get_sqe(ring);
+		if (!sqe) {
+			fprintf(stderr, "get sqe failed\n");
+			goto err;
+		}
+
+		io_uring_prep_msg_ring(sqe, dst.ring_fd, 0x10, 0x1234, 0);
+		sqe->user_data = i + 1;
+	}
+
+	ret = io_uring_submit(ring);
+	if (ret != 8) {
+		/*
+		 * Likely an old kernel that doesn't support the opcode,
+		 * just skip the test.
+		 */
+		if (ret == 1) {
+			err_ret = T_EXIT_SKIP;
+			no_msg = 1;
+			goto err;
+		}
+		fprintf(stderr, "sqe submit failed: %d\n", ret);
+		goto err;
+	}
+
+	for (i = 0; i < 8; i++) {
+		ret = io_uring_wait_cqe(ring, &cqe);
+		if (ret < 0) {
+			fprintf(stderr, "wait completion %d\n", ret);
+			goto err;
+		}
+		switch (cqe->user_data) {
+		case 1 ... 8:
+			if (cqe->res == -EINVAL || cqe->res == -EOPNOTSUPP) {
+				no_msg = 1;
+				goto out;
+			}
+			if (cqe->res != 0) {
+				fprintf(stderr, "cqe res %d\n", cqe->res);
+				goto err;
+			}
+			break;
+		case 0x1234:
+			if (cqe->res != 0x10) {
+				fprintf(stderr, "invalid len %x\n", cqe->res);
+				goto err;
+			}
+			break;
+		default:
+			fprintf(stderr, "Invalid user_data\n");
+			goto err;
+		}
+		io_uring_cqe_seen(ring, cqe);
+	}
+
+	for (i = 0; i < 8; i++) {
+		ret = io_uring_wait_cqe(&dst, &cqe);
+		if (ret < 0) {
+			fprintf(stderr, "wait completion %d\n", ret);
+			goto err;
+		}
+		switch (cqe->user_data) {
+		case 0x1234:
+			if (cqe->res != 0x10) {
+				fprintf(stderr, "invalid len %x\n", cqe->res);
+				goto err;
+			}
+			break;
+		default:
+			fprintf(stderr, "Invalid user_data\n");
+			goto err;
+		}
+		io_uring_cqe_seen(&dst, cqe);
+	}
+
+out:
+	io_uring_queue_exit(&dst);
+	return no_msg ? T_EXIT_SKIP : T_EXIT_PASS;
+err:
+	io_uring_queue_exit(&dst);
+	return err_ret;
+}
+
+int main(int argc, char *argv[])
+{
+	struct io_uring src;
+	int ret;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	ret = io_uring_queue_init(8, &src, 0);
+	if (ret) {
+		fprintf(stderr, "ring setup failed: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	ret = test(&src, 0);
+	if (ret && !no_msg) {
+		fprintf(stderr, "test failed\n");
+		return ret;
+	}
+	if (no_msg)
+		return T_EXIT_SKIP;
+
+	ret = test(&src, IORING_SETUP_IOPOLL);
+	if (ret) {
+		fprintf(stderr, "test IOPOLL failed\n");
+		return ret;
+	}
+
+	ret = test(&src, IORING_SETUP_DEFER_TASKRUN | IORING_SETUP_SINGLE_ISSUER);
+	if (ret) {
+		fprintf(stderr, "test defer failed\n");
+		return ret;
+	}
+
+	ret = test(&src, IORING_SETUP_DEFER_TASKRUN | IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_IOPOLL);
+	if (ret) {
+		fprintf(stderr, "test defer IOPOLL failed\n");
+		return ret;
+	}
+
+	return T_EXIT_PASS;
+}

diff --git a/test/msg-ring.c b/test/msg-ring.c
index 48c4a64..e861ddf 100644
--- a/test/msg-ring.c
+++ b/test/msg-ring.c

@@ -12,6 +12,7 @@
 #include <pthread.h>
 
 #include "liburing.h"
+#include "helpers.h"
 
 static int no_msg;
 
@@ -71,13 +72,27 @@
 	return 1;
 }
 
-static void *wait_cqe_fn(void *data)
+struct data {
+	struct io_uring *ring;
+	unsigned int flags;
+	pthread_barrier_t startup;
+	pthread_barrier_t barrier;
+};
+
+static void *wait_cqe_fn(void *__data)
 {
-	struct io_uring *ring = data;
+	struct data *d = __data;
 	struct io_uring_cqe *cqe;
+	struct io_uring ring;
 	int ret;
 
-	ret = io_uring_wait_cqe(ring, &cqe);
+	io_uring_queue_init(4, &ring, d->flags);
+	d->ring = &ring;
+	pthread_barrier_wait(&d->startup);
+
+	pthread_barrier_wait(&d->barrier);
+
+	ret = io_uring_wait_cqe(&ring, &cqe);
 	if (ret) {
 		fprintf(stderr, "wait cqe %d\n", ret);
 		goto err;
@@ -92,17 +107,33 @@
 		goto err;
 	}
 
+	io_uring_cqe_seen(&ring, cqe);
+	io_uring_queue_exit(&ring);
 	return NULL;
 err:
+	io_uring_cqe_seen(&ring, cqe);
+	io_uring_queue_exit(&ring);
 	return (void *) (unsigned long) 1;
 }
 
-static int test_remote(struct io_uring *ring, struct io_uring *target)
+static int test_remote(struct io_uring *ring, unsigned int ring_flags)
 {
+	struct io_uring *target;
+	pthread_t thread;
+	void *tret;
 	struct io_uring_cqe *cqe;
 	struct io_uring_sqe *sqe;
+	struct data d;
 	int ret;
 
+	d.flags = ring_flags;
+	pthread_barrier_init(&d.barrier, NULL, 2);
+	pthread_barrier_init(&d.startup, NULL, 2);
+	pthread_create(&thread, NULL, wait_cqe_fn, &d);
+
+	pthread_barrier_wait(&d.startup);
+	target = d.ring;
+
 	sqe = io_uring_get_sqe(ring);
 	if (!sqe) {
 		fprintf(stderr, "get sqe failed\n");
@@ -118,6 +149,8 @@
 		goto err;
 	}
 
+	pthread_barrier_wait(&d.barrier);
+
 	ret = io_uring_wait_cqe(ring, &cqe);
 	if (ret < 0) {
 		fprintf(stderr, "wait completion %d\n", ret);
@@ -125,32 +158,119 @@
 	}
 	if (cqe->res != 0) {
 		fprintf(stderr, "cqe res %d\n", cqe->res);
+		io_uring_cqe_seen(ring, cqe);
 		return -1;
 	}
 	if (cqe->user_data != 1) {
 		fprintf(stderr, "user_data %llx\n", (long long) cqe->user_data);
+		io_uring_cqe_seen(ring, cqe);
 		return -1;
 	}
 
 	io_uring_cqe_seen(ring, cqe);
+	pthread_join(thread, &tret);
 	return 0;
 err:
 	return 1;
 }
 
-static int test_invalid(struct io_uring *ring)
+static void *remote_submit_fn(void *data)
 {
-	struct io_uring_cqe *cqe;
 	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct io_uring *target = data;
+	struct io_uring ring;
 	int ret;
 
-	sqe = io_uring_get_sqe(ring);
+	ret = io_uring_queue_init(8, &ring, 0);
+	if (ret) {
+		fprintf(stderr, "thread ring setup failed: %d\n", ret);
+		goto err;
+	}
+	sqe = io_uring_get_sqe(&ring);
 	if (!sqe) {
 		fprintf(stderr, "get sqe failed\n");
 		goto err;
 	}
 
-	io_uring_prep_msg_ring(sqe, 1, 0, 0x8989, 0);
+	io_uring_prep_msg_ring(sqe, target->ring_fd, 0x20, 0x5aa5, 0);
+	sqe->user_data = 1;
+
+	ret = io_uring_submit(&ring);
+	if (ret <= 0) {
+		fprintf(stderr, "sqe submit failed: %d\n", ret);
+		goto err;
+	}
+
+	ret = io_uring_wait_cqe(&ring, &cqe);
+	if (ret < 0) {
+		fprintf(stderr, "wait completion %d\n", ret);
+		goto err;
+	}
+	if (cqe->res != 0 || cqe->user_data != 1) {
+		fprintf(stderr, "invalid cqe\n");
+		goto err;
+	}
+	io_uring_cqe_seen(&ring, cqe);
+	io_uring_queue_exit(&ring);
+	return NULL;
+err:
+	return (void *) (unsigned long) 1;
+}
+
+static int test_remote_submit(struct io_uring *target)
+{
+	struct io_uring_cqe *cqe;
+	pthread_t thread;
+	void *tret;
+	int ret;
+
+	pthread_create(&thread, NULL, remote_submit_fn, target);
+
+	ret = io_uring_wait_cqe(target, &cqe);
+	if (ret < 0) {
+		fprintf(stderr, "wait completion %d\n", ret);
+		goto err;
+	}
+	if (cqe->res != 0x20) {
+		fprintf(stderr, "cqe res %d\n", cqe->res);
+		return -1;
+	}
+	if (cqe->user_data != 0x5aa5) {
+		fprintf(stderr, "user_data %llx\n", (long long) cqe->user_data);
+		return -1;
+	}
+	io_uring_cqe_seen(target, cqe);
+	pthread_join(thread, &tret);
+	return 0;
+err:
+	return 1;
+}
+
+static int test_invalid(struct io_uring *ring, bool fixed)
+{
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	int ret, fd = 1;
+
+	sqe = io_uring_get_sqe(ring);
+	if (!sqe) {
+		fprintf(stderr, "get sqe failed\n");
+		return 1;
+	}
+
+	if (fixed) {
+		ret = io_uring_register_files(ring, &fd, 1);
+		if (ret) {
+			fprintf(stderr, "file register %d\n", ret);
+			return 1;
+		}
+		io_uring_prep_msg_ring(sqe, 0, 0, 0x8989, 0);
+		sqe->flags |= IOSQE_FIXED_FILE;
+	} else {
+		io_uring_prep_msg_ring(sqe, 1, 0, 0x8989, 0);
+	}
+
 	sqe->user_data = 1;
 
 	ret = io_uring_submit(ring);
@@ -170,67 +290,184 @@
 	}
 
 	io_uring_cqe_seen(ring, cqe);
+	if (fixed)
+		io_uring_unregister_files(ring);
 	return 0;
 err:
+	if (fixed)
+		io_uring_unregister_files(ring);
 	return 1;
 }
 
-int main(int argc, char *argv[])
+static int test_disabled_ring(struct io_uring *ring, int flags)
 {
-	struct io_uring ring, ring2, pring;
-	pthread_t thread;
-	void *tret;
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	struct io_uring disabled_ring;
 	int ret;
 
-	if (argc > 1)
-		return 0;
+	flags |= IORING_SETUP_R_DISABLED;
+	ret = io_uring_queue_init(8, &disabled_ring, flags);
+	if (ret) {
+		if (ret == -EINVAL)
+			return T_EXIT_SKIP;
+		fprintf(stderr, "ring setup failed: %d\n", ret);
+		return 1;
+	}
 
-	ret = io_uring_queue_init(8, &ring, 0);
-	if (ret) {
-		fprintf(stderr, "ring setup failed: %d\n", ret);
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_msg_ring(sqe, disabled_ring.ring_fd, 0x10, 0x1234, 0);
+	sqe->user_data = 1;
+
+	ret = io_uring_submit(ring);
+	if (ret != 1) {
+		fprintf(stderr, "sqe submit failed: %d\n", ret);
 		return 1;
 	}
-	ret = io_uring_queue_init(8, &ring2, 0);
-	if (ret) {
-		fprintf(stderr, "ring setup failed: %d\n", ret);
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret < 0) {
+		fprintf(stderr, "wait completion %d\n", ret);
 		return 1;
 	}
-	ret = io_uring_queue_init(8, &pring, IORING_SETUP_IOPOLL);
+	if (cqe->res != 0 && cqe->res != -EBADFD) {
+		fprintf(stderr, "cqe res %d\n", cqe->res);
+		return 1;
+	}
+	if (cqe->user_data != 1) {
+		fprintf(stderr, "user_data %llx\n", (long long) cqe->user_data);
+		return 1;
+	}
+
+	io_uring_cqe_seen(ring, cqe);
+	io_uring_queue_exit(&disabled_ring);
+	return 0;
+}
+
+static int test(int ring_flags)
+{
+	struct io_uring ring, ring2, pring;
+	int ret, i;
+
+	ret = io_uring_queue_init(8, &ring, ring_flags);
+	if (ret) {
+		if (ret == -EINVAL)
+			return T_EXIT_SKIP;
+		fprintf(stderr, "ring setup failed: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+	ret = io_uring_queue_init(8, &ring2, ring_flags);
 	if (ret) {
 		fprintf(stderr, "ring setup failed: %d\n", ret);
-		return 1;
+		return T_EXIT_FAIL;
+	}
+	ret = io_uring_queue_init(8, &pring, ring_flags | IORING_SETUP_IOPOLL);
+	if (ret) {
+		fprintf(stderr, "ring setup failed: %d\n", ret);
+		return T_EXIT_FAIL;
 	}
 
 	ret = test_own(&ring);
 	if (ret) {
 		fprintf(stderr, "test_own failed\n");
-		return ret;
+		return T_EXIT_FAIL;
 	}
-	if (no_msg) {
-		fprintf(stdout, "Skipped\n");
-		return 0;
-	}
+	if (no_msg)
+		return T_EXIT_SKIP;
 	ret = test_own(&pring);
 	if (ret) {
 		fprintf(stderr, "test_own iopoll failed\n");
-		return ret;
+		return T_EXIT_FAIL;
 	}
 
-	ret = test_invalid(&ring);
+	ret = test_invalid(&ring, 0);
 	if (ret) {
 		fprintf(stderr, "test_invalid failed\n");
-		return ret;
+		return T_EXIT_FAIL;
 	}
 
-	pthread_create(&thread, NULL, wait_cqe_fn, &ring2);
+	for (i = 0; i < 2; i++) {
+		ret = test_invalid(&ring, 1);
+		if (ret) {
+			fprintf(stderr, "test_invalid fixed failed\n");
+			return T_EXIT_FAIL;
+		}
+	}
 
-	ret = test_remote(&ring, &ring2);
+	ret = test_remote(&ring, ring_flags);
 	if (ret) {
 		fprintf(stderr, "test_remote failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	io_uring_queue_exit(&ring);
+	io_uring_queue_exit(&pring);
+
+	if (t_probe_defer_taskrun()) {
+		ret = io_uring_queue_init(8, &ring, IORING_SETUP_SINGLE_ISSUER |
+						    IORING_SETUP_DEFER_TASKRUN);
+		if (ret) {
+			fprintf(stderr, "deferred ring setup failed: %d\n", ret);
+			return T_EXIT_FAIL;
+		}
+
+		ret = test_own(&ring);
+		if (ret) {
+			fprintf(stderr, "test_own deferred failed\n");
+			return T_EXIT_FAIL;
+		}
+
+		for (i = 0; i < 2; i++) {
+			ret = test_invalid(&ring, i);
+			if (ret) {
+				fprintf(stderr, "test_invalid(0) deferred failed\n");
+				return T_EXIT_FAIL;
+			}
+		}
+
+		ret = test_remote_submit(&ring);
+		if (ret) {
+			fprintf(stderr, "test_remote_submit failed\n");
+			return T_EXIT_FAIL;
+		}
+		io_uring_queue_exit(&ring);
+
+		if (test_disabled_ring(&ring2, 0)) {
+			fprintf(stderr, "test_disabled_ring failed\n");
+			return T_EXIT_FAIL;
+		}
+
+		if (test_disabled_ring(&ring2, IORING_SETUP_SINGLE_ISSUER |
+						IORING_SETUP_DEFER_TASKRUN)) {
+			fprintf(stderr, "test_disabled_ring defer failed\n");
+			return T_EXIT_FAIL;
+		}
+	}
+
+	io_uring_queue_exit(&ring2);
+	return T_EXIT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	int ret;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	ret = test(0);
+	if (ret == T_EXIT_FAIL) {
+		fprintf(stderr, "ring flags 0 failed\n");
+		return ret;
+	} else if (ret == T_EXIT_SKIP) {
+		return T_EXIT_SKIP;
+	}
+
+	ret = test(IORING_SETUP_SINGLE_ISSUER|IORING_SETUP_DEFER_TASKRUN);
+	if (ret == T_EXIT_FAIL) {
+		fprintf(stderr, "ring flags defer failed\n");
 		return ret;
 	}
 
-	pthread_join(thread, &tret);
-
-	return 0;
+	return ret;
 }

diff --git a/test/multicqes_drain.c b/test/multicqes_drain.c
index b16dc52..4ed8345 100644
--- a/test/multicqes_drain.c
+++ b/test/multicqes_drain.c

@@ -17,6 +17,7 @@
 #include <poll.h>
 
 #include "liburing.h"
+#include "helpers.h"
 
 enum {
 	multi,
@@ -37,16 +38,20 @@
  * sqe_flags: combination of sqe flags
  * multi_sqes: record the user_data/index of all the multishot sqes
  * cnt: how many entries there are in multi_sqes
- * we can leverage multi_sqes array for cancellation: we randomly pick
- * up an entry in multi_sqes when form a cancellation sqe.
+ * we can leverage multi_sqes array for cancelation: we randomly pick
+ * up an entry in multi_sqes when form a cancelation sqe.
  * multi_cap: limitation of number of multishot sqes
  */
-const unsigned sqe_flags[4] = {0, IOSQE_IO_LINK, IOSQE_IO_DRAIN,
-	IOSQE_IO_LINK | IOSQE_IO_DRAIN};
-int multi_sqes[max_entry], cnt = 0;
-int multi_cap = max_entry / 5;
+static const unsigned sqe_flags[4] = {
+	0,
+	IOSQE_IO_LINK,
+	IOSQE_IO_DRAIN,
+	IOSQE_IO_LINK | IOSQE_IO_DRAIN
+};
+static int multi_sqes[max_entry], cnt = 0;
+static int multi_cap = max_entry / 5;
 
-int write_pipe(int pipe, char *str)
+static int write_pipe(int pipe, char *str)
 {
 	int ret;
 	do {
@@ -56,7 +61,7 @@
 	return ret;
 }
 
-void read_pipe(int pipe)
+static void read_pipe(int pipe)
 {
 	char str[4] = {0};
 	int ret;
@@ -66,18 +71,21 @@
 		perror("read");
 }
 
-int trigger_event(int p[])
+static int trigger_event(struct io_uring *ring, int p[])
 {
 	int ret;
 	if ((ret = write_pipe(p[1], "foo")) != 3) {
 		fprintf(stderr, "bad write return %d\n", ret);
 		return 1;
 	}
+	usleep(1000);
+	io_uring_get_events(ring);
 	read_pipe(p[0]);
 	return 0;
 }
 
-void io_uring_sqe_prep(int op, struct io_uring_sqe *sqe, unsigned sqe_flags, int arg)
+static void io_uring_sqe_prep(int op, struct io_uring_sqe *sqe,
+			      unsigned sqe_flags, int arg)
 {
 	switch (op) {
 		case multi:
@@ -97,11 +105,11 @@
 	sqe->flags = sqe_flags;
 }
 
-__u8 generate_flags(int sqe_op)
+static __u8 generate_flags(int sqe_op)
 {
 	__u8 flags = 0;
 	/*
-	 * drain sqe must be put after multishot sqes cancelled
+	 * drain sqe must be put after multishot sqes canceled
 	 */
 	do {
 		flags = sqe_flags[rand() % 4];
@@ -116,7 +124,7 @@
 	/*
 	 * avoid below case:
 	 * sqe0(multishot, link)->sqe1(nop, link)->sqe2(nop)->sqe3(cancel_sqe0)
-	 * sqe3 may excute before sqe0 so that sqe0 isn't cancelled
+	 * sqe3 may execute before sqe0 so that sqe0 isn't canceled
 	 */
 	if (sqe_op == multi)
 		flags &= ~IOSQE_IO_LINK;
@@ -135,7 +143,7 @@
  * - ensure number of multishot sqes doesn't exceed multi_cap
  * - don't generate multishot sqes after high watermark
  */
-int generate_opcode(int i, int pre_flags)
+static int generate_opcode(int i, int pre_flags)
 {
 	int sqe_op;
 	int high_watermark = max_entry - max_entry / 5;
@@ -162,7 +170,7 @@
 	multi_sqes[cnt++] = index;
 }
 
-int remove_multishot_sqe()
+static int remove_multishot_sqe(void)
 {
 	int ret;
 
@@ -225,12 +233,12 @@
 	}
 
 	sleep(1);
-	// TODO: randomize event triggerring order
+	// TODO: randomize event triggering order
 	for (i = 0; i < max_entry; i++) {
 		if (si[i].op != multi && si[i].op != single)
 			continue;
 
-		if (trigger_event(pipes[i]))
+		if (trigger_event(ring, pipes[i]))
 			goto err;
 	}
 	sleep(1);
@@ -245,7 +253,7 @@
 	 * compl_bits is a bit map to record completions.
 	 * eg. sqe[0], sqe[1], sqe[2] fully completed
 	 * then compl_bits is 000...00111b
-	 * 
+	 *
 	 */
 	unsigned long long compl_bits = 0;
 	for (j = 0; j < i; j++) {
@@ -257,7 +265,7 @@
 			}
 		}
 		/*
-		 * for multishot sqes, record them only when it is cancelled
+		 * for multishot sqes, record them only when it is canceled
 		 */
 		if ((si[index].op != multi) || (cqe_res[j] == -ECANCELED))
 			compl_bits |= (1ULL << index);
@@ -294,7 +302,12 @@
 	io_uring_prep_poll_add(sqe[1], pipe2[0], POLLIN);
 	sqe[1]->user_data = 1;
 
-	ret = io_uring_submit(ring);
+	/* This test relies on multishot poll to trigger events continually.
+	 * however with IORING_SETUP_DEFER_TASKRUN this will only happen when
+	 * triggered with a get_events. Hence we sprinkle get_events whenever
+	 * there might be work to process in order to get the same result
+	 */
+	ret = io_uring_submit_and_get_events(ring);
 	if (ret < 0) {
 		printf("sqe submit failed\n");
 		goto err;
@@ -304,11 +317,11 @@
 	}
 
 	for (i = 0; i < 2; i++) {
-		if (trigger_event(pipe1))
+		if (trigger_event(ring, pipe1))
 			goto err;
 	}
-	if (trigger_event(pipe2))
-			goto err;
+	if (trigger_event(ring, pipe2))
+		goto err;
 
 	for (i = 0; i < 2; i++) {
 		sqe[i] = io_uring_get_sqe(ring);
@@ -354,25 +367,27 @@
 	return 1;
 }
 
-int main(int argc, char *argv[])
+static int test(bool defer_taskrun)
 {
 	struct io_uring ring;
 	int i, ret;
+	unsigned int flags = 0;
 
-	if (argc > 1)
-		return 0;
+	if (defer_taskrun)
+		flags = IORING_SETUP_SINGLE_ISSUER |
+			IORING_SETUP_DEFER_TASKRUN;
 
-	ret = io_uring_queue_init(1024, &ring, 0);
+	ret = io_uring_queue_init(1024, &ring, flags);
 	if (ret) {
 		printf("ring setup failed\n");
-		return 1;
+		return T_EXIT_FAIL;
 	}
 
 	for (i = 0; i < 5; i++) {
 		ret = test_simple_drain(&ring);
 		if (ret) {
 			fprintf(stderr, "test_simple_drain failed\n");
-			break;
+			return T_EXIT_FAIL;
 		}
 	}
 
@@ -380,8 +395,35 @@
 		ret = test_generic_drain(&ring);
 		if (ret) {
 			fprintf(stderr, "test_generic_drain failed\n");
-			break;
+			return T_EXIT_FAIL;
 		}
 	}
+
+	io_uring_queue_exit(&ring);
+
+	return T_EXIT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	int ret;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	ret = test(false);
+	if (ret != T_EXIT_PASS) {
+		fprintf(stderr, "%s: test(false) failed\n", argv[0]);
+		return ret;
+	}
+
+	if (t_probe_defer_taskrun()) {
+		ret = test(true);
+		if (ret != T_EXIT_PASS) {
+			fprintf(stderr, "%s: test(true) failed\n", argv[0]);
+			return ret;
+		}
+	}
+
 	return ret;
 }

diff --git a/test/napi-test.c b/test/napi-test.c
new file mode 100644
index 0000000..f359669
--- /dev/null
+++ b/test/napi-test.c

@@ -0,0 +1,228 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: run NAPI receive test. Meant to be run from the associated
+ *		script, napi-test.sh. That will invoke this test program
+ *		as either a sender or receiver, with the queue flags passed
+ *		in for testing.
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+#include <arpa/inet.h>
+#include <linux/if_packet.h>
+#include <linux/socket.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+static const char receiver_address[] = "10.10.10.20";
+static const int port = 9999;
+#define BUF_SIZE 4096
+
+static char buffer[BUF_SIZE];
+static unsigned current_byte = 0;
+
+static void do_setsockopt(int fd, int level, int optname, int val)
+{
+	int ret = setsockopt(fd, level, optname, &val, sizeof(val));
+
+	assert(ret == 0);
+}
+
+static int sender(int queue_flags)
+{
+	unsigned long long written = 0;
+	struct sockaddr_in addr;
+	struct io_uring ring;
+	int i, ret, fd;
+
+	/*
+	 * Sender doesn't use the ring, but try and set one up with the same
+	 * flags that the receiver will use. If that fails, we know the
+	 * receiver will have failed too - just skip the test in that case.
+	 */
+	ret = io_uring_queue_init(1, &ring, queue_flags);
+	if (ret)
+		return T_EXIT_SKIP;
+	io_uring_queue_exit(&ring);
+
+	memset(&addr, 0, sizeof(addr));
+	addr.sin_family = AF_INET;
+	addr.sin_port = htons(port);
+	ret = inet_pton(AF_INET, receiver_address, &addr.sin_addr);
+	assert(ret == 1);
+
+	fd = socket(PF_INET, SOCK_STREAM, 0);
+	assert(fd >= 0);
+
+	/* don't race with receiver, give it 1 second to connect */
+	i = 0;
+	do {
+		ret = connect(fd, (void *)&addr, sizeof(addr));
+		if (!ret)
+			break;
+		if (ret == -1 && errno == ECONNREFUSED) {
+			if (i >= 10000) {
+				fprintf(stderr, "Gave up trying to connect\n");
+				return 1;
+			}
+			usleep(100);
+			continue;
+		}
+		i++;
+	} while (1);
+
+	while (written < 8 * 1024 * 1024) {
+		for (i = 0; i < BUF_SIZE; i++)
+			buffer[i] = current_byte + i;
+
+		ret = write(fd, buffer, BUF_SIZE);
+		if (ret <= 0) {
+			if (!ret || errno == ECONNRESET)
+				break;
+			fprintf(stderr, "write failed %i %i\n", ret, errno);
+			return 1;
+		}
+		written += ret;
+		current_byte += ret;
+	}
+
+	close(fd);
+	return 0;
+}
+
+static int receiver(int queue_flags)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct io_uring ring;
+	struct io_uring_napi napi = { };
+	struct sockaddr_in addr;
+	int fd, listen_fd;
+	int i, ret;
+
+	ret = io_uring_queue_init(8, &ring, queue_flags);
+	if (ret < 0) {
+		if (ret == -EINVAL)
+			return T_EXIT_SKIP;
+		fprintf(stderr, "queue_init: %s\n", strerror(-ret));
+		return 1;
+	}
+
+	napi.prefer_busy_poll = 1;
+	napi.busy_poll_to = 50;
+	io_uring_register_napi(&ring, &napi);
+
+	memset(&addr, 0, sizeof(addr));
+	addr.sin_family = AF_INET;
+	addr.sin_port = htons(port);
+	addr.sin_addr.s_addr = INADDR_ANY;
+
+	listen_fd = socket(AF_INET, SOCK_STREAM, 0);
+	assert(listen_fd >= 0);
+
+	do_setsockopt(listen_fd, SOL_SOCKET, SO_REUSEPORT, 1);
+	ret = bind(listen_fd, (void *)&addr, sizeof(addr));
+	if (ret) {
+		fprintf(stderr, "bind failed %i %i\n", ret, errno);
+		return 1;
+	}
+
+	ret = listen(listen_fd, 8);
+	assert(ret == 0);
+
+	fd = accept(listen_fd, NULL, NULL);
+	assert(fd >= 0);
+
+	while (1) {
+		sqe = io_uring_get_sqe(&ring);
+		io_uring_prep_recv(sqe, fd, buffer, BUF_SIZE, 0);
+
+		ret = io_uring_submit(&ring);
+		if (ret != 1) {
+			fprintf(stderr, "io_uring_submit: %i\n", ret);
+			return 1;
+		}
+
+		ret = io_uring_wait_cqe(&ring, &cqe);
+		if (ret < 0) {
+			fprintf(stderr, "io_uring_wait_cqe: %i\n", ret);
+			return 1;
+		}
+
+		ret = cqe->res;
+		if (ret <= 0) {
+			if (!ret)
+				break;
+			fprintf(stderr, "recv failed %i %i\n", ret, errno);
+			return 1;
+		}
+
+		for (i = 0; i < ret; i++) {
+			char expected = current_byte + i;
+
+			if (buffer[i] != expected) {
+				fprintf(stderr, "data mismatch: idx %i, %c vs %c\n",
+					i, buffer[i], expected);
+				return 1;
+			}
+		}
+
+		current_byte += ret;
+		io_uring_cqe_seen(&ring, cqe);
+	}
+
+	close(fd);
+	io_uring_queue_exit(&ring);
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	int queue_flags;
+	int is_rx;
+
+	if (geteuid()) {
+		fprintf(stdout, "NAPI test requires root\n");
+		return T_EXIT_SKIP;
+	}
+
+	if (argc == 1) {
+		struct stat sb;
+
+		if (!stat("napi-test.sh", &sb)) {
+			return system("bash napi-test.sh");
+		} else if (!stat("test/napi-test.sh", &sb)) {
+			return system("bash test/napi-test.sh");
+		} else {
+			fprintf(stderr, "Can't find napi-test.sh\n");
+			return T_EXIT_SKIP;
+		}
+	} else if (argc == 2) {
+		return T_EXIT_SKIP;
+	} else if (argc != 3) {
+		return T_EXIT_SKIP;
+	}
+
+	if (!strcmp(argv[1], "receive"))
+		is_rx = 1;
+	else if (!strcmp(argv[1], "send"))
+		is_rx = 0;
+	else
+		return T_EXIT_FAIL;
+
+	queue_flags = strtoul(argv[2], NULL, 16);
+
+	if (is_rx)
+		return receiver(queue_flags);
+
+	return sender(queue_flags);
+}

diff --git a/test/napi-test.sh b/test/napi-test.sh
new file mode 100644
index 0000000..86ef218
--- /dev/null
+++ b/test/napi-test.sh

@@ -0,0 +1,48 @@
+#! /usr/bin/env bash
+
+if [ ! -x "$(command -v ip)" ]; then
+	echo "Need ip installed"
+	exit 77
+fi
+if [ ! -x "$(command -v ethtool)" ]; then
+	echo "Need ethool installed"
+	exit 77
+fi
+
+function clean_namespaces {
+	ip netns del iou-nscl
+	ip netns del iou-nsserv
+}
+trap clean_namespaces EXIT
+
+ip link add iou-ptp-cl type veth peer name iou-ptp-serv
+
+ip netns add iou-nscl
+ip link set iou-ptp-cl netns iou-nscl
+ip netns exec iou-nscl ip addr add '10.10.10.10/24' dev iou-ptp-cl
+ip netns exec iou-nscl ethtool -K iou-ptp-cl tcp-segmentation-offload off
+ip netns exec iou-nscl ethtool -K iou-ptp-cl generic-receive-offload on
+ip netns exec iou-nscl ip link set dev iou-ptp-cl up
+
+ip netns add iou-nsserv
+ip link set iou-ptp-serv netns iou-nsserv
+ip netns exec iou-nsserv ip addr add '10.10.10.20/24' dev iou-ptp-serv
+ip netns exec iou-nsserv ethtool -K iou-ptp-serv tcp-segmentation-offload off
+ip netns exec iou-nsserv ethtool -K iou-ptp-serv generic-receive-offload on
+ip netns exec iou-nsserv ip link set dev iou-ptp-serv up
+
+# test basic init, defer_taskrun, and sqpoll
+QUEUE_FLAGS="0x0 0x3000 0x2"
+for flags in $QUEUE_FLAGS; do
+	if [ -f "napi-test.t" ]; then
+		NAPI_TEST="./napi-test.t"
+	elif [ -f "test/napi-test.t" ]; then
+		NAPI_TEST="test/napi-test.t"
+	else
+		echo "Can't find napi-test.t"
+		exit 77
+	fi
+	ip netns exec iou-nsserv $NAPI_TEST receive $flags &
+	ip netns exec iou-nscl $NAPI_TEST send $flags
+	wait
+done

diff --git a/test/no-mmap-inval.c b/test/no-mmap-inval.c
new file mode 100644
index 0000000..22f322a
--- /dev/null
+++ b/test/no-mmap-inval.c

@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: test that using SETUP_NO_MMAP with an invalid SQ ring
+ *		address fails.
+ *
+ */
+#include <stdlib.h>
+#include <sys/types.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+int main(int argc, char *argv[])
+{
+	struct io_uring_params p = {
+		.sq_entries	= 2,
+		.cq_entries	= 4,
+		.flags		= IORING_SETUP_NO_MMAP,
+	};
+	struct io_uring ring;
+	void *addr;
+	int ret;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	t_posix_memalign(&addr, sysconf(_SC_PAGESIZE), 8192);
+	p.cq_off.user_addr = (unsigned long long) (uintptr_t) addr;
+
+	ret = io_uring_queue_init_params(2, &ring, &p);
+	if (ret == -EINVAL) {
+		/*  kernel doesn't support SETUP_NO_MMAP */
+		free(addr);
+		return T_EXIT_SKIP;
+	} else if (ret && (ret != -EFAULT && ret != -ENOMEM)) {
+		fprintf(stderr, "Got %d, wanted -EFAULT\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	free(addr);
+	return T_EXIT_PASS;
+}

diff --git a/test/nolibc.c b/test/nolibc.c
new file mode 100644
index 0000000..3b0c4c0
--- /dev/null
+++ b/test/nolibc.c

@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Test liburing nolibc functionality.
+ *
+ * Currently, supported architectures are:
+ *   1) x86
+ *   2) x86-64
+ *   3) aarch64
+ *   4) riscv64
+ *
+ */
+#include "helpers.h"
+
+#if !defined(__x86_64__) && !defined(__i386__) && !defined(__aarch64__) && (!defined(__riscv) && __riscv_xlen != 64)
+
+
+/*
+ * This arch doesn't support nolibc.
+ */
+int main(void)
+{
+	return T_EXIT_SKIP;
+}
+
+#else /* #if !defined(__x86_64__) && !defined(__i386__) && !defined(__aarch64__) && (!defined(__riscv) && __riscv_xlen != 64) */
+
+#ifndef CONFIG_NOLIBC
+#define CONFIG_NOLIBC
+#endif
+
+#include <stdio.h>
+#include <unistd.h>
+#include "../src/lib.h"
+
+static int test_get_page_size(void)
+{
+	long a, b;
+
+	a = sysconf(_SC_PAGESIZE);
+	b = get_page_size();
+	if (a != b) {
+		fprintf(stderr, "get_page_size() fails, %ld != %ld", a, b);
+		return -1;
+	}
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	int ret;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	ret = test_get_page_size();
+	if (ret)
+		return T_EXIT_FAIL;
+
+	return T_EXIT_PASS;
+}
+
+#endif /* #if !defined(__x86_64__) && !defined(__i386__) && !defined(__aarch64__) && (!defined(__riscv) && __riscv_xlen != 64) */

diff --git a/test/nop.c b/test/nop.c
index 1aa88fc..d3ab766 100644
--- a/test/nop.c
+++ b/test/nop.c

@@ -15,6 +15,45 @@
 
 static int seq;
 
+static int test_nop_inject(struct io_uring *ring, unsigned req_flags)
+{
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	int ret;
+
+	sqe = io_uring_get_sqe(ring);
+	if (!sqe) {
+		fprintf(stderr, "get sqe failed\n");
+		goto err;
+	}
+
+	io_uring_prep_nop(sqe);
+	sqe->user_data = ++seq;
+	sqe->nop_flags = IORING_NOP_INJECT_RESULT;
+	sqe->flags |= req_flags;
+	sqe->len = -EFAULT;
+
+	ret = io_uring_submit(ring);
+	if (ret <= 0) {
+		fprintf(stderr, "sqe submit failed: %d\n", ret);
+		goto err;
+	}
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret < 0) {
+		fprintf(stderr, "wait completion %d\n", ret);
+		goto err;
+	}
+	if (cqe->res != -EINVAL && cqe->res != -EFAULT) {
+		fprintf(stderr, "expected injected result, got %d\n", cqe->res);
+		goto err;
+	}
+	io_uring_cqe_seen(ring, cqe);
+	return 0;
+err:
+	return 1;
+}
+
 static int test_single_nop(struct io_uring *ring, unsigned req_flags)
 {
 	struct io_uring_cqe *cqe;
@@ -151,6 +190,11 @@
 			fprintf(stderr, "test_barrier_nop failed\n");
 			goto err;
 		}
+		ret = test_nop_inject(&ring, req_flags);
+		if (ret) {
+			fprintf(stderr, "test_nop_inject failed\n");
+			goto err;
+		}
 	}
 err:
 	io_uring_queue_exit(&ring);

diff --git a/test/nvme.h b/test/nvme.h
new file mode 100644
index 0000000..1254b92
--- /dev/null
+++ b/test/nvme.h

@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: Helpers for NVMe uring passthrough commands
+ */
+#ifndef LIBURING_NVME_H
+#define LIBURING_NVME_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/ioctl.h>
+#include <linux/nvme_ioctl.h>
+
+/*
+ * If the uapi headers installed on the system lacks nvme uring command
+ * support, use the local version to prevent compilation issues.
+ */
+#ifndef CONFIG_HAVE_NVME_URING
+struct nvme_uring_cmd {
+	__u8	opcode;
+	__u8	flags;
+	__u16	rsvd1;
+	__u32	nsid;
+	__u32	cdw2;
+	__u32	cdw3;
+	__u64	metadata;
+	__u64	addr;
+	__u32	metadata_len;
+	__u32	data_len;
+	__u32	cdw10;
+	__u32	cdw11;
+	__u32	cdw12;
+	__u32	cdw13;
+	__u32	cdw14;
+	__u32	cdw15;
+	__u32	timeout_ms;
+	__u32   rsvd2;
+};
+
+#define NVME_URING_CMD_IO	_IOWR('N', 0x80, struct nvme_uring_cmd)
+#define NVME_URING_CMD_IO_VEC	_IOWR('N', 0x81, struct nvme_uring_cmd)
+#endif /* CONFIG_HAVE_NVME_URING */
+
+#define NVME_DEFAULT_IOCTL_TIMEOUT 0
+#define NVME_IDENTIFY_DATA_SIZE 4096
+#define NVME_IDENTIFY_CSI_SHIFT 24
+#define NVME_IDENTIFY_CNS_NS 0
+#define NVME_CSI_NVM 0
+
+enum nvme_admin_opcode {
+	nvme_admin_identify		= 0x06,
+};
+
+enum nvme_io_opcode {
+	nvme_cmd_write			= 0x01,
+	nvme_cmd_read			= 0x02,
+};
+
+static int nsid;
+static __u32 lba_shift;
+
+struct nvme_lbaf {
+	__le16			ms;
+	__u8			ds;
+	__u8			rp;
+};
+
+struct nvme_id_ns {
+	__le64			nsze;
+	__le64			ncap;
+	__le64			nuse;
+	__u8			nsfeat;
+	__u8			nlbaf;
+	__u8			flbas;
+	__u8			mc;
+	__u8			dpc;
+	__u8			dps;
+	__u8			nmic;
+	__u8			rescap;
+	__u8			fpi;
+	__u8			dlfeat;
+	__le16			nawun;
+	__le16			nawupf;
+	__le16			nacwu;
+	__le16			nabsn;
+	__le16			nabo;
+	__le16			nabspf;
+	__le16			noiob;
+	__u8			nvmcap[16];
+	__le16			npwg;
+	__le16			npwa;
+	__le16			npdg;
+	__le16			npda;
+	__le16			nows;
+	__le16			mssrl;
+	__le32			mcl;
+	__u8			msrc;
+	__u8			rsvd81[11];
+	__le32			anagrpid;
+	__u8			rsvd96[3];
+	__u8			nsattr;
+	__le16			nvmsetid;
+	__le16			endgid;
+	__u8			nguid[16];
+	__u8			eui64[8];
+	struct nvme_lbaf	lbaf[16];
+	__u8			rsvd192[192];
+	__u8			vs[3712];
+};
+
+static inline int ilog2(uint32_t i)
+{
+	int log = -1;
+
+	while (i) {
+		i >>= 1;
+		log++;
+	}
+	return log;
+}
+
+__attribute__((__unused__))
+static int nvme_get_info(const char *file)
+{
+	struct nvme_id_ns ns;
+	int fd, err;
+	__u32 lba_size;
+
+	fd = open(file, O_RDONLY);
+	if (fd < 0) {
+		perror("file open");
+		return -errno;
+	}
+
+	nsid = ioctl(fd, NVME_IOCTL_ID);
+	if (nsid < 0) {
+		close(fd);
+		return -errno;
+	}
+
+	struct nvme_passthru_cmd cmd = {
+		.opcode         = nvme_admin_identify,
+		.nsid           = nsid,
+		.addr           = (__u64)(uintptr_t)&ns,
+		.data_len       = NVME_IDENTIFY_DATA_SIZE,
+		.cdw10          = NVME_IDENTIFY_CNS_NS,
+		.cdw11          = NVME_CSI_NVM << NVME_IDENTIFY_CSI_SHIFT,
+		.timeout_ms     = NVME_DEFAULT_IOCTL_TIMEOUT,
+	};
+
+	err = ioctl(fd, NVME_IOCTL_ADMIN_CMD, &cmd);
+	if (err) {
+		close(fd);
+		return err;
+	}
+
+	lba_size = 1 << ns.lbaf[(ns.flbas & 0x0f)].ds;
+	lba_shift = ilog2(lba_size);
+
+	close(fd);
+	return 0;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif

diff --git a/test/ooo-file-unreg.c b/test/ooo-file-unreg.c
new file mode 100644
index 0000000..dd6ea55
--- /dev/null
+++ b/test/ooo-file-unreg.c

@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: Test that out-of-order file updates with inflight requests
+ *		work as expected.
+ *
+ */
+#include <stdio.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <poll.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+int main(int argc, char *argv[])
+{
+	struct io_uring_sqe *sqe;
+	int res, fds[2], sockid;
+	struct io_uring ring;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	res = io_uring_queue_init(1, &ring, 0);
+	if (res) {
+		fprintf(stderr, "queue_init: %d\n", res);
+		return T_EXIT_FAIL;
+	}
+
+	res = io_uring_register_files_sparse(&ring, 2);
+	if (res) {
+		if (res == -EINVAL)
+			return T_EXIT_SKIP;
+		fprintf(stderr, "sparse reg: %d\n", res);
+		return T_EXIT_FAIL;
+	}
+
+	fds[0] = socket(AF_INET, SOCK_DGRAM, 0);
+	if (fds[0] < 0) {
+		perror("socket");
+		return T_EXIT_FAIL;
+	}
+	fds[1] = socket(AF_INET, SOCK_DGRAM, 0);
+	if (fds[1] < 0) {
+		perror("socket");
+		return T_EXIT_FAIL;
+	}
+
+	res = io_uring_register_files_update(&ring, 0, fds, 2);
+	if (res != 2) {
+		fprintf(stderr, "files updates; %d\n", res);
+		return T_EXIT_FAIL;
+	}
+
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_poll_add(sqe, 0, POLLIN);
+	sqe->flags = IOSQE_FIXED_FILE;
+	io_uring_submit(&ring);
+
+	close(fds[0]);
+	close(fds[1]);
+
+	sockid = -1;
+	res = io_uring_register_files_update(&ring, 1, &sockid, 1);
+	if (res != 1) {
+		fprintf(stderr, "files updates; %d\n", res);
+		return T_EXIT_FAIL;
+	}
+
+	sockid = -1;
+	res = io_uring_register_files_update(&ring, 0, &sockid, 1);
+	if (res != 1) {
+		fprintf(stderr, "files updates; %d\n", res);
+		return T_EXIT_FAIL;
+	}
+
+	sleep(1);
+	io_uring_queue_exit(&ring);
+	return T_EXIT_PASS;
+}

diff --git a/test/open-close.c b/test/open-close.c
index d5c116b..9a85e3f 100644
--- a/test/open-close.c
+++ b/test/open-close.c

@@ -35,6 +35,36 @@
 	return ret;
 }
 
+static int test_close_flush(void)
+{
+	struct io_uring ring;
+	struct io_uring_sqe *sqe;
+	char buf[128];
+	int ret, fd;
+
+	sprintf(buf, "/sys/kernel/debug/tracing/per_cpu/cpu0/trace_pipe_raw");
+	fd = open(buf, O_RDONLY);
+	if (fd < 0)
+		return 0;
+
+	ret = io_uring_queue_init(8, &ring, 0);
+	if (ret) {
+		fprintf(stderr, "ring setup failed\n");
+		return -1;
+	}
+
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_close(sqe, fd);
+	ret = submit_wait(&ring);
+	if (ret) {
+		fprintf(stderr, "closefailed %i\n", ret);
+		return -1;
+	}
+
+	io_uring_queue_exit(&ring);
+	return 0;
+}
+
 static inline int try_close(struct io_uring *ring, int fd, int slot)
 {
 	struct io_uring_sqe *sqe;
@@ -220,12 +250,16 @@
 			fprintf(stdout, "Open not supported, skipping\n");
 			goto done;
 		}
+		if (ret == -EPERM || ret == -EACCES)
+			return T_EXIT_SKIP;
 		fprintf(stderr, "test_openat absolute failed: %d\n", ret);
 		goto err;
 	}
 
 	ret = test_openat(&ring, path_rel, AT_FDCWD);
 	if (ret < 0) {
+		if (ret == -EPERM || ret == -EACCES)
+			return T_EXIT_SKIP;
 		fprintf(stderr, "test_openat relative failed: %d\n", ret);
 		goto err;
 	}
@@ -248,6 +282,12 @@
 		goto err;
 	}
 
+	ret = test_close_flush();
+	if (ret) {
+		fprintf(stderr, "test_close_flush failed\n");
+		goto err;
+	}
+
 done:
 	unlink(path);
 	if (do_unlink)

diff --git a/test/open-direct-pick.c b/test/open-direct-pick.c
index b1597e7..a1edf84 100644
--- a/test/open-direct-pick.c
+++ b/test/open-direct-pick.c

@@ -158,7 +158,7 @@
 		return 0;
 	}
 
-	path = "/tmp/.open.close";
+	path = "/tmp/.open.direct.pick";
 	t_create_file(path, 4096);
 
 	ret = test_openat(&ring, path);

diff --git a/test/openat2.c b/test/openat2.c
index 34c0f85..889b6b3 100644
--- a/test/openat2.c
+++ b/test/openat2.c

@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: MIT */
 /*
- * Description: run various openat(2) tests
+ * Description: run various openat2(2) tests
  *
  */
 #include <errno.h>
@@ -15,25 +15,32 @@
 #include "liburing.h"
 
 static int test_openat2(struct io_uring *ring, const char *path, int dfd,
-			bool direct, int fixed_index)
+			bool direct, int fixed_index, int bad_how)
 {
 	struct io_uring_cqe *cqe;
 	struct io_uring_sqe *sqe;
-	struct open_how how;
+	struct open_how __how, *how;
 	int ret;
 
+	if (bad_how)
+		how = (struct open_how *) (uintptr_t) 0x1234;
+	else
+		how = &__how;
+
 	sqe = io_uring_get_sqe(ring);
 	if (!sqe) {
 		fprintf(stderr, "get sqe failed\n");
 		return -1;
 	}
-	memset(&how, 0, sizeof(how));
-	how.flags = O_RDWR;
+	if (!bad_how) {
+		memset(how, 0, sizeof(*how));
+		how->flags = O_RDWR;
+	}
 
 	if (!direct)
-		io_uring_prep_openat2(sqe, dfd, path, &how);
+		io_uring_prep_openat2(sqe, dfd, path, how);
 	else
-		io_uring_prep_openat2_direct(sqe, dfd, path, &how, fixed_index);
+		io_uring_prep_openat2_direct(sqe, dfd, path, how, fixed_index);
 
 	ret = io_uring_submit(ring);
 	if (ret <= 0) {
@@ -72,11 +79,13 @@
 	}
 	ret = io_uring_register_files(&ring, &fd, 1);
 	if (ret) {
+		if (ret == -EINVAL || ret == -EBADF)
+			return 0;
 		fprintf(stderr, "%s: register ret=%d\n", __FUNCTION__, ret);
 		return -1;
 	}
 
-	ret = test_openat2(&ring, path, dfd, true, 0);
+	ret = test_openat2(&ring, path, dfd, true, 0, 0);
 	if (ret == -EINVAL) {
 		printf("fixed open isn't supported\n");
 		return 1;
@@ -133,7 +142,7 @@
 		return -1;
 	}
 
-	ret = test_openat2(&ring, path, dfd, true, 0);
+	ret = test_openat2(&ring, path, dfd, true, 0, 0);
 	if (ret != -ENXIO) {
 		fprintf(stderr, "install into not existing table, %i\n", ret);
 		return 1;
@@ -141,23 +150,25 @@
 
 	ret = io_uring_register_files(&ring, &fd, 1);
 	if (ret) {
+		if (ret == -EINVAL || ret == -EBADF)
+			return 0;
 		fprintf(stderr, "%s: register ret=%d\n", __FUNCTION__, ret);
 		return -1;
 	}
 
-	ret = test_openat2(&ring, path, dfd, true, 1);
+	ret = test_openat2(&ring, path, dfd, true, 1, 0);
 	if (ret != -EINVAL) {
 		fprintf(stderr, "install out of bounds, %i\n", ret);
 		return -1;
 	}
 
-	ret = test_openat2(&ring, path, dfd, true, (1u << 16));
+	ret = test_openat2(&ring, path, dfd, true, (1u << 16), 0);
 	if (ret != -EINVAL) {
 		fprintf(stderr, "install out of bounds or u16 overflow, %i\n", ret);
 		return -1;
 	}
 
-	ret = test_openat2(&ring, path, dfd, true, (1u << 16) + 1);
+	ret = test_openat2(&ring, path, dfd, true, (1u << 16) + 1, 0);
 	if (ret != -EINVAL) {
 		fprintf(stderr, "install out of bounds or u16 overflow, %i\n", ret);
 		return -1;
@@ -192,7 +203,7 @@
 	}
 
 	/* reinstall into the second slot */
-	ret = test_openat2(&ring, path, dfd, true, 1);
+	ret = test_openat2(&ring, path, dfd, true, 1, 0);
 	if (ret != 0) {
 		fprintf(stderr, "reinstall failed, %i\n", ret);
 		return -1;
@@ -260,18 +271,22 @@
 	if (do_unlink)
 		t_create_file(path_rel, 4096);
 
-	ret = test_openat2(&ring, path, -1, false, 0);
+	ret = test_openat2(&ring, path, -1, false, 0, 0);
 	if (ret < 0) {
 		if (ret == -EINVAL) {
 			fprintf(stdout, "openat2 not supported, skipping\n");
 			goto done;
 		}
+		if (ret == -EPERM || ret == -EACCES)
+			return T_EXIT_SKIP;
 		fprintf(stderr, "test_openat2 absolute failed: %d\n", ret);
 		goto err;
 	}
 
-	ret = test_openat2(&ring, path_rel, AT_FDCWD, false, 0);
+	ret = test_openat2(&ring, path_rel, AT_FDCWD, false, 0, 0);
 	if (ret < 0) {
+		if (ret == -EPERM || ret == -EACCES)
+			return T_EXIT_SKIP;
 		fprintf(stderr, "test_openat2 relative failed: %d\n", ret);
 		goto err;
 	}
@@ -295,6 +310,18 @@
 		goto err;
 	}
 
+	ret = test_openat2(&ring, (const char *) (uintptr_t) 0x1234, AT_FDCWD, false, 0, 0);
+	if (ret != -EFAULT) {
+		fprintf(stderr, "test_openat2 bad address failed: %d\n", ret);
+		goto err;
+	}
+
+	ret = test_openat2(&ring, path_rel, AT_FDCWD, false, 0, 1);
+	if (ret != -EFAULT) {
+		fprintf(stderr, "test_openat2 bad how failed: %d\n", ret);
+		goto err;
+	}
+
 done:
 	unlink(path);
 	if (do_unlink)

diff --git a/test/pipe-bug.c b/test/pipe-bug.c
new file mode 100644
index 0000000..49e4a94
--- /dev/null
+++ b/test/pipe-bug.c

@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: MIT
+
+/*
+ * Description: tests bug fixed in
+ * "io_uring: don't gate task_work run on TIF_NOTIFY_SIGNAL"
+ *
+ * See: https://github.com/axboe/liburing/issues/665
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "helpers.h"
+#include "liburing.h"
+
+#define CHECK(x)								\
+do {										\
+	if (!(x)) {								\
+		fprintf(stderr, "%s:%d %s failed\n", __FILE__, __LINE__, #x);	\
+		return -1;							\
+	}									\
+} while (0)
+
+static int pipe_bug(void)
+{
+	struct io_uring_params p;
+	struct io_uring ring;
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	char buf[1024];
+	int fds[2];
+	struct __kernel_timespec to = {
+		.tv_sec = 1
+	};
+
+	CHECK(pipe(fds) == 0);
+
+	memset(&p, 0, sizeof(p));
+	CHECK(t_create_ring_params(8, &ring, &p) == 0);
+
+	/* WRITE */
+	sqe = io_uring_get_sqe(&ring);
+	CHECK(sqe);
+	io_uring_prep_write(sqe, fds[1], "foobar", strlen("foobar"), 0); /* or -1 */
+	CHECK(io_uring_submit(&ring) == 1);
+	CHECK(io_uring_wait_cqe(&ring, &cqe) == 0);
+
+	io_uring_cqe_seen(&ring, cqe);
+
+	/* CLOSE */
+	sqe = io_uring_get_sqe(&ring);
+	CHECK(sqe);
+	io_uring_prep_close(sqe, fds[1]);
+	CHECK(io_uring_submit(&ring) == 1);
+	CHECK(io_uring_wait_cqe_timeout(&ring, &cqe, &to) == 0);
+	io_uring_cqe_seen(&ring, cqe);
+
+	/* READ */
+	sqe = io_uring_get_sqe(&ring);
+	CHECK(sqe);
+	io_uring_prep_read(sqe, fds[0], buf, sizeof(buf), 0); /* or -1 */
+	CHECK(io_uring_submit(&ring) == 1);
+	CHECK(io_uring_wait_cqe_timeout(&ring, &cqe, &to) == 0);
+	io_uring_cqe_seen(&ring, cqe);
+	memset(buf, 0, sizeof(buf));
+
+	/* READ */
+	sqe = io_uring_get_sqe(&ring);
+	CHECK(sqe);
+	io_uring_prep_read(sqe, fds[0], buf, sizeof(buf), 0); /* or -1 */
+	CHECK(io_uring_submit(&ring) == 1);
+	CHECK(io_uring_wait_cqe_timeout(&ring, &cqe, &to) == 0);
+	io_uring_cqe_seen(&ring, cqe);
+
+	close(fds[0]);
+	io_uring_queue_exit(&ring);
+
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	int i;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	for (i = 0; i < 10000; i++) {
+		if (pipe_bug())
+			return T_EXIT_FAIL;
+	}
+
+	return T_EXIT_PASS;
+}

diff --git a/test/poll-cancel-all.c b/test/poll-cancel-all.c
index 35116f5..a89e5d5 100644
--- a/test/poll-cancel-all.c
+++ b/test/poll-cancel-all.c

@@ -14,11 +14,22 @@
 
 static int no_cancel_flags;
 
-static int test1(struct io_uring *ring, int *fd)
+static int test1(struct io_uring *ring, int *fd, int fixed)
 {
 	struct io_uring_sqe *sqe;
 	struct io_uring_cqe *cqe;
-	int ret, i;
+	int ret, i, __fd = fd[0];
+
+	if (fixed)
+		__fd = 0;
+
+	if (fixed) {
+		ret = io_uring_register_files(ring, fd, 1);
+		if (ret) {
+			fprintf(stderr, "failed file register %d\n", ret);
+			return 1;
+		}
+	}
 
 	for (i = 0; i < 8; i++) {
 		sqe = io_uring_get_sqe(ring);
@@ -27,8 +38,10 @@
 			return 1;
 		}
 
-		io_uring_prep_poll_add(sqe, fd[0], POLLIN);
+		io_uring_prep_poll_add(sqe, __fd, POLLIN);
 		sqe->user_data = i + 1;
+		if (fixed)
+			sqe->flags |= IOSQE_FIXED_FILE;
 	}
 
 	ret = io_uring_submit(ring);
@@ -51,7 +64,9 @@
 	 */
 	io_uring_prep_cancel(sqe, 0, IORING_ASYNC_CANCEL_ALL);
 	sqe->cancel_flags |= IORING_ASYNC_CANCEL_FD;
-	sqe->fd = fd[0];
+	if (fixed)
+		sqe->cancel_flags |= IORING_ASYNC_CANCEL_FD_FIXED;
+	sqe->fd = __fd;
 	sqe->user_data = 100;
 
 	ret = io_uring_submit(ring);
@@ -93,6 +108,9 @@
 		io_uring_cqe_seen(ring, cqe);
 	}
 
+	if (fixed)
+		io_uring_unregister_files(ring);
+
 	return 0;
 }
 
@@ -442,7 +460,7 @@
 		return 1;
 	}
 
-	ret = test1(&ring, fd);
+	ret = test1(&ring, fd, 0);
 	if (ret) {
 		fprintf(stderr, "test1 failed\n");
 		return ret;
@@ -450,6 +468,12 @@
 	if (no_cancel_flags)
 		return 0;
 
+	ret = test1(&ring, fd, 1);
+	if (ret) {
+		fprintf(stderr, "test1 fixed failed\n");
+		return ret;
+	}
+
 	ret = test2(&ring, fd);
 	if (ret) {
 		fprintf(stderr, "test2 failed\n");

diff --git a/test/poll-cancel.c b/test/poll-cancel.c
index 0714a80..2298d91 100644
--- a/test/poll-cancel.c
+++ b/test/poll-cancel.c

@@ -154,11 +154,11 @@
 		return 1;
 	}
 
-	/* test timeout-offset triggering path during cancellation */
+	/* test timeout-offset triggering path during cancelation */
 	sqe = io_uring_get_sqe(&ring);
 	io_uring_prep_timeout(sqe, &ts, off_nr, 0);
 
-	/* poll ring2 to trigger cancellation on exit() */
+	/* poll ring2 to trigger cancelation on exit() */
 	sqe = io_uring_get_sqe(&ring);
 	io_uring_prep_poll_add(sqe, ring2.ring_fd, POLLIN);
 	sqe->flags |= IOSQE_IO_LINK;

diff --git a/test/poll-link.c b/test/poll-link.c
index 197ad77..c0b1cf5 100644
--- a/test/poll-link.c
+++ b/test/poll-link.c

@@ -13,10 +13,11 @@
 #include <poll.h>
 #include <arpa/inet.h>
 
+#include "helpers.h"
 #include "liburing.h"
 
-pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
-pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
+static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
 
 static int recv_thread_ready = 0;
 static int recv_thread_done = 0;
@@ -50,15 +51,15 @@
 
 static void *send_thread(void *arg)
 {
+	struct sockaddr_in addr;
 	struct data *data = arg;
+	int s0;
 
 	wait_for_var(&recv_thread_ready);
 
-	int s0 = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+	s0 = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
 	assert(s0 != -1);
 
-	struct sockaddr_in addr;
-
 	addr.sin_family = AF_INET;
 	addr.sin_port = data->port;
 	addr.sin_addr.s_addr = data->addr;
@@ -70,8 +71,9 @@
 	return 0;
 }
 
-void *recv_thread(void *arg)
+static void *recv_thread(void *arg)
 {
+	struct sockaddr_in addr = { };
 	struct data *data = arg;
 	struct io_uring_sqe *sqe;
 	struct io_uring ring;
@@ -89,27 +91,17 @@
 	ret = setsockopt(s0, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val));
 	assert(ret != -1);
 
-	struct sockaddr_in addr;
-
 	addr.sin_family = AF_INET;
 	data->addr = inet_addr("127.0.0.1");
 	addr.sin_addr.s_addr = data->addr;
 
-	i = 0;
-	do {
-		data->port = htons(1025 + (rand() % 64510));
-		addr.sin_port = data->port;
-
-		if (bind(s0, (struct sockaddr*)&addr, sizeof(addr)) != -1)
-			break;
-	} while (++i < 100);
-
-	if (i >= 100) {
-		fprintf(stderr, "Can't find good port, skipped\n");
+	if (t_bind_ephemeral_port(s0, &addr)) {
+		perror("bind");
 		data->stop = 1;
 		signal_var(&recv_thread_ready);
-		goto out;
+		goto err;
 	}
+	data->port = addr.sin_port;
 
 	ret = listen(s0, 128);
 	assert(ret != -1);
@@ -158,7 +150,6 @@
 		io_uring_cqe_seen(&ring, cqe);
 	}
 
-out:
 	signal_var(&recv_thread_done);
 	close(s0);
 	io_uring_queue_exit(&ring);

diff --git a/test/poll-many.c b/test/poll-many.c
index dfbeeab..4f9ce02 100644
--- a/test/poll-many.c
+++ b/test/poll-many.c

@@ -14,6 +14,7 @@
 #include <fcntl.h>
 
 #include "liburing.h"
+#include "helpers.h"
 
 #define	NFILES	5000
 #define BATCH	500
@@ -21,6 +22,8 @@
 
 #define RING_SIZE	512
 
+static int nfiles = NFILES;
+
 struct p {
 	int fd[2];
 	int triggered;
@@ -90,7 +93,7 @@
 		int off;
 
 		do {
-			off = rand() % NFILES;
+			off = rand() % nfiles;
 			if (!p[off].triggered)
 				break;
 		} while (1);
@@ -108,7 +111,7 @@
 
 static int arm_polls(struct io_uring *ring)
 {
-	int ret, to_arm = NFILES, i, off;
+	int ret, to_arm = nfiles, i, off;
 
 	off = 0;
 	while (to_arm) {
@@ -137,6 +140,21 @@
 	return 0;
 }
 
+static int do_test(struct io_uring *ring)
+{
+	int i;
+
+	if (arm_polls(ring))
+		return 1;
+
+	for (i = 0; i < NLOOPS; i++) {
+		trigger_polls();
+		if (reap_polls(ring))
+			return 1;
+	}
+	return 0;
+}
+
 int main(int argc, char *argv[])
 {
 	struct io_uring ring;
@@ -145,28 +163,32 @@
 	int i, ret;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_SKIP;
 
 	if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) {
 		perror("getrlimit");
-		goto err_noring;
+		return T_EXIT_FAIL;
 	}
 
 	if (rlim.rlim_cur < (2 * NFILES + 5)) {
-		rlim.rlim_cur = (2 * NFILES + 5);
-		rlim.rlim_max = rlim.rlim_cur;
+		rlim.rlim_cur = rlim.rlim_max;
+		nfiles = (rlim.rlim_cur / 2) - 5;
+		if (nfiles > NFILES)
+			nfiles = NFILES;
+		if (nfiles <= 0)
+			goto err_nofail;
 		if (setrlimit(RLIMIT_NOFILE, &rlim) < 0) {
 			if (errno == EPERM)
 				goto err_nofail;
 			perror("setrlimit");
-			goto err_noring;
+			return T_EXIT_FAIL;
 		}
 	}
 
-	for (i = 0; i < NFILES; i++) {
+	for (i = 0; i < nfiles; i++) {
 		if (pipe(p[i].fd) < 0) {
 			perror("pipe");
-			goto err_noring;
+			return T_EXIT_FAIL;
 		}
 	}
 
@@ -176,33 +198,39 @@
 	if (ret) {
 		if (ret == -EINVAL) {
 			fprintf(stdout, "No CQSIZE, trying without\n");
-			ret = io_uring_queue_init(RING_SIZE, &ring, 0);
+
+			params.flags &= ~IORING_SETUP_CQSIZE;
+			params.cq_entries = 0;
+			ret = io_uring_queue_init_params(RING_SIZE, &ring, &params);
 			if (ret) {
 				fprintf(stderr, "ring setup failed: %d\n", ret);
-				return 1;
+				return T_EXIT_FAIL;
 			}
 		}
 	}
 
-	if (arm_polls(&ring))
-		goto err;
-
-	for (i = 0; i < NLOOPS; i++) {
-		trigger_polls();
-		ret = reap_polls(&ring);
-		if (ret)
-			goto err;
+	if (do_test(&ring)) {
+		fprintf(stderr, "test (normal) failed\n");
+		return T_EXIT_FAIL;
 	}
+	io_uring_queue_exit(&ring);
 
-	io_uring_queue_exit(&ring);
+	if (t_probe_defer_taskrun()) {
+		params.flags |= IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN;
+		ret = io_uring_queue_init_params(RING_SIZE, &ring, &params);
+		if (ret) {
+			fprintf(stderr, "ring DEFER setup failed: %d\n", ret);
+			return T_EXIT_FAIL;
+		}
+		if (do_test(&ring)) {
+			fprintf(stderr, "test (DEFER) failed\n");
+			return T_EXIT_FAIL;
+		}
+		io_uring_queue_exit(&ring);
+	}
 	return 0;
-err:
-	io_uring_queue_exit(&ring);
-err_noring:
-	fprintf(stderr, "poll-many failed\n");
-	return 1;
 err_nofail:
 	fprintf(stderr, "poll-many: not enough files available (and not root), "
 			"skipped\n");
-	return 0;
+	return T_EXIT_SKIP;
 }

diff --git a/test/poll-mshot-overflow.c b/test/poll-mshot-overflow.c
new file mode 100644
index 0000000..041e872
--- /dev/null
+++ b/test/poll-mshot-overflow.c

@@ -0,0 +1,265 @@
+// SPDX-License-Identifier: MIT
+
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <poll.h>
+#include <sys/wait.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+static int check_final_cqe(struct io_uring *ring)
+{
+	struct io_uring_cqe *cqe;
+	int count = 0;
+	bool signalled_no_more = false;
+
+	while (!io_uring_peek_cqe(ring, &cqe)) {
+		if (cqe->user_data == 1) {
+			count++;
+			if (signalled_no_more) {
+				fprintf(stderr, "signalled no more!\n");
+				return T_EXIT_FAIL;
+			}
+			if (!(cqe->flags & IORING_CQE_F_MORE))
+				signalled_no_more = true;
+		} else if (cqe->user_data != 3) {
+			fprintf(stderr, "%d: got unexpected %d\n", count, (int)cqe->user_data);
+			return T_EXIT_FAIL;
+		}
+		io_uring_cqe_seen(ring, cqe);
+	}
+
+	if (!count) {
+		fprintf(stderr, "no cqe\n");
+		return T_EXIT_FAIL;
+	}
+
+	return T_EXIT_PASS;
+}
+
+static int test(bool defer_taskrun)
+{
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	struct io_uring ring;
+	int pipe1[2];
+	int ret, i;
+
+	if (pipe(pipe1) != 0) {
+		perror("pipe");
+		return T_EXIT_FAIL;
+	}
+
+	struct io_uring_params params = {
+		/* cheat using SINGLE_ISSUER existence to know if this behaviour
+		 * is updated
+		 */
+		.flags = IORING_SETUP_CQSIZE | IORING_SETUP_SINGLE_ISSUER,
+		.cq_entries = 2
+	};
+
+	if (defer_taskrun)
+		params.flags |= IORING_SETUP_SINGLE_ISSUER |
+				IORING_SETUP_DEFER_TASKRUN;
+
+	ret = io_uring_queue_init_params(2, &ring, &params);
+	if (ret)
+		return T_EXIT_SKIP;
+
+	sqe = io_uring_get_sqe(&ring);
+	if (!sqe) {
+		fprintf(stderr, "get sqe failed\n");
+		return T_EXIT_FAIL;
+	}
+	io_uring_prep_poll_multishot(sqe, pipe1[0], POLLIN);
+	io_uring_sqe_set_data64(sqe, 1);
+
+	if (io_uring_cq_ready(&ring)) {
+		fprintf(stderr, "unexpected cqe\n");
+		return T_EXIT_FAIL;
+	}
+
+	for (i = 0; i < 2; i++) {
+		sqe = io_uring_get_sqe(&ring);
+		io_uring_prep_nop(sqe);
+		io_uring_sqe_set_data64(sqe, 2);
+		io_uring_submit(&ring);
+	}
+
+	do {
+		errno = 0;
+		ret = write(pipe1[1], "foo", 3);
+	} while (ret == -1 && errno == EINTR);
+
+	if (ret <= 0) {
+		fprintf(stderr, "write failed: %d\n", errno);
+		return T_EXIT_FAIL;
+	}
+
+	/* should have 2 cqe + 1 overflow now, so take out two cqes */
+	for (i = 0; i < 2; i++) {
+		if (io_uring_peek_cqe(&ring, &cqe)) {
+			fprintf(stderr, "unexpectedly no cqe\n");
+			return T_EXIT_FAIL;
+		}
+		if (cqe->user_data != 2) {
+			fprintf(stderr, "unexpected user_data\n");
+			return T_EXIT_FAIL;
+		}
+		io_uring_cqe_seen(&ring, cqe);
+	}
+
+	/* make sure everything is processed */
+	io_uring_get_events(&ring);
+
+	/* now remove the poll */
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_poll_remove(sqe, 1);
+	io_uring_sqe_set_data64(sqe, 3);
+	ret = io_uring_submit(&ring);
+
+	if (ret != 1) {
+		fprintf(stderr, "bad poll remove\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = check_final_cqe(&ring);
+
+	close(pipe1[0]);
+	close(pipe1[1]);
+	io_uring_queue_exit(&ring);
+
+	return ret;
+}
+
+static int test_downgrade(bool support_defer)
+{
+	struct io_uring_cqe cqes[128];
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	struct io_uring ring;
+	int fds[2];
+	int ret, i, cqe_count, tmp = 0, more_cqe_count;
+
+	if (pipe(fds) != 0) {
+		perror("pipe");
+		return -1;
+	}
+
+	struct io_uring_params params = {
+		.flags = IORING_SETUP_CQSIZE,
+		.cq_entries = 2
+	};
+
+	ret = io_uring_queue_init_params(2, &ring, &params);
+	if (ret) {
+		fprintf(stderr, "queue init: %d\n", ret);
+		return -1;
+	}
+
+	sqe = io_uring_get_sqe(&ring);
+	if (!sqe) {
+		fprintf(stderr, "get sqe failed\n");
+		return -1;
+	}
+	io_uring_prep_poll_multishot(sqe, fds[0], POLLIN);
+	io_uring_sqe_set_data64(sqe, 1);
+	io_uring_submit(&ring);
+
+	for (i = 0; i < 8; i++) {
+		ret = write(fds[1], &tmp, sizeof(tmp));
+		if (ret != sizeof(tmp)) {
+			perror("write");
+			return -1;
+		}
+		ret = read(fds[0], &tmp, sizeof(tmp));
+		if (ret != sizeof(tmp)) {
+			perror("read");
+			return -1;
+		}
+	}
+
+	cqe_count = 0;
+	while (!io_uring_peek_cqe(&ring, &cqe)) {
+		cqes[cqe_count++] = *cqe;
+		io_uring_cqe_seen(&ring, cqe);
+	}
+
+	/* Some kernels might allow overflows to poll,
+	 * but if they didn't it should stop the MORE flag
+	 */
+	if (cqe_count < 3) {
+		fprintf(stderr, "too few cqes: %d\n", cqe_count);
+		return -1;
+	} else if (cqe_count == 8) {
+		more_cqe_count = cqe_count;
+		/* downgrade only available since support_defer */
+		if (support_defer) {
+			fprintf(stderr, "did not downgrade on overflow\n");
+			return -1;
+		}
+	} else {
+		more_cqe_count = cqe_count - 1;
+		cqe = &cqes[cqe_count - 1];
+		if (cqe->flags & IORING_CQE_F_MORE) {
+			fprintf(stderr, "incorrect MORE flag %x\n", cqe->flags);
+			return -1;
+		}
+	}
+
+	for (i = 0; i < more_cqe_count; i++) {
+		cqe = &cqes[i];
+		if (!(cqe->flags & IORING_CQE_F_MORE)) {
+			fprintf(stderr, "missing MORE flag\n");
+			return -1;
+		}
+		if (cqe->res < 0) {
+			fprintf(stderr, "bad res: %d\n", cqe->res);
+			return -1;
+		}
+	}
+
+	close(fds[0]);
+	close(fds[1]);
+	io_uring_queue_exit(&ring);
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	int ret;
+	bool support_defer;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	support_defer = t_probe_defer_taskrun();
+	ret = test_downgrade(support_defer);
+	if (ret) {
+		fprintf(stderr, "%s: test_downgrade(%d) failed\n", argv[0], support_defer);
+		return T_EXIT_FAIL;
+	}
+
+	ret = test(false);
+	if (ret == T_EXIT_SKIP)
+		return ret;
+	if (ret != T_EXIT_PASS) {
+		fprintf(stderr, "%s: test(false) failed\n", argv[0]);
+		return ret;
+	}
+
+	if (support_defer) {
+		ret = test(true);
+		if (ret != T_EXIT_PASS) {
+			fprintf(stderr, "%s: test(true) failed\n", argv[0]);
+			return ret;
+		}
+	}
+
+	return ret;
+}

diff --git a/test/poll-mshot-update.c b/test/poll-mshot-update.c
index caedb6f..082e507 100644
--- a/test/poll-mshot-update.c
+++ b/test/poll-mshot-update.c

@@ -15,11 +15,14 @@
 #include <pthread.h>
 
 #include "liburing.h"
+#include "helpers.h"
 
 #define	NFILES	5000
 #define BATCH	500
 #define NLOOPS	1000
 
+static int nfiles = NFILES;
+
 #define RING_SIZE	512
 
 struct p {
@@ -75,6 +78,20 @@
 	return 0;
 }
 
+static int submit_arm_poll(struct io_uring *ring, int off)
+{
+	int ret;
+
+	ret = arm_poll(ring, off);
+	if (ret)
+		return ret;
+
+	ret = io_uring_submit(ring);
+	if (ret < 0)
+		return ret;
+	return ret == 1 ? 0 : -1;
+}
+
 static int reap_polls(struct io_uring *ring)
 {
 	struct io_uring_cqe *cqe;
@@ -106,6 +123,18 @@
 		off = cqe->user_data;
 		if (off == 0x12345678)
 			goto seen;
+		if (!(cqe->flags & IORING_CQE_F_MORE)) {
+			/* need to re-arm poll */
+			ret = submit_arm_poll(ring, off);
+			if (ret)
+				break;
+			if (cqe->res <= 0) {
+				/* retry this one */
+				i--;
+				goto seen;
+			}
+		}
+
 		ret = read(p[off].fd[0], &c, 1);
 		if (ret != 1) {
 			if (ret == -1 && errno == EAGAIN)
@@ -134,7 +163,7 @@
 		int off;
 
 		do {
-			off = rand() % NFILES;
+			off = rand() % nfiles;
 			if (!p[off].triggered)
 				break;
 		} while (1);
@@ -158,7 +187,7 @@
 
 static int arm_polls(struct io_uring *ring)
 {
-	int ret, to_arm = NFILES, i, off;
+	int ret, to_arm = nfiles, i, off;
 
 	off = 0;
 	while (to_arm) {
@@ -187,52 +216,23 @@
 	return 0;
 }
 
-int main(int argc, char *argv[])
+static int run(int cqe)
 {
 	struct io_uring ring;
 	struct io_uring_params params = { };
-	struct rlimit rlim;
 	pthread_t thread;
 	int i, j, ret;
 
-	if (argc > 1)
-		return 0;
-
-	ret = has_poll_update();
-	if (ret < 0) {
-		fprintf(stderr, "poll update check failed %i\n", ret);
-		return -1;
-	} else if (!ret) {
-		fprintf(stderr, "no poll update, skip\n");
-		return 0;
-	}
-
-	if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) {
-		perror("getrlimit");
-		goto err_noring;
-	}
-
-	if (rlim.rlim_cur < (2 * NFILES + 5)) {
-		rlim.rlim_cur = (2 * NFILES + 5);
-		rlim.rlim_max = rlim.rlim_cur;
-		if (setrlimit(RLIMIT_NOFILE, &rlim) < 0) {
-			if (errno == EPERM)
-				goto err_nofail;
-			perror("setrlimit");
-			goto err_noring;
-		}
-	}
-
-	for (i = 0; i < NFILES; i++) {
+	for (i = 0; i < nfiles; i++) {
 		if (pipe(p[i].fd) < 0) {
 			perror("pipe");
-			goto err_noring;
+			return 1;
 		}
 		fcntl(p[i].fd[0], F_SETFL, O_NONBLOCK);
 	}
 
 	params.flags = IORING_SETUP_CQSIZE;
-	params.cq_entries = 4096;
+	params.cq_entries = cqe;
 	ret = io_uring_queue_init_params(RING_SIZE, &ring, &params);
 	if (ret) {
 		if (ret == -EINVAL) {
@@ -255,19 +255,76 @@
 			goto err;
 		pthread_join(thread, NULL);
 
-		for (j = 0; j < NFILES; j++)
+		for (j = 0; j < nfiles; j++)
 			p[j].triggered = 0;
 	}
 
 	io_uring_queue_exit(&ring);
+	for (i = 0; i < nfiles; i++) {
+		close(p[i].fd[0]);
+		close(p[i].fd[1]);
+	}
 	return 0;
 err:
 	io_uring_queue_exit(&ring);
-err_noring:
+	return 1;
+}
+
+int main(int argc, char *argv[])
+{
+	struct rlimit rlim;
+	int ret;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	ret = has_poll_update();
+	if (ret < 0) {
+		fprintf(stderr, "poll update check failed %i\n", ret);
+		return -1;
+	} else if (!ret) {
+		fprintf(stderr, "no poll update, skip\n");
+		return 0;
+	}
+
+	if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) {
+		perror("getrlimit");
+		goto err;
+	}
+
+	if (rlim.rlim_cur < (2 * NFILES + 5)) {
+		rlim.rlim_cur = rlim.rlim_max;
+		nfiles = (rlim.rlim_cur / 2) - 5;
+		if (nfiles > NFILES)
+			nfiles = NFILES;
+		if (nfiles <= 0)
+			goto err_nofail;
+		if (setrlimit(RLIMIT_NOFILE, &rlim) < 0) {
+			if (errno == EPERM)
+				goto err_nofail;
+			perror("setrlimit");
+			return T_EXIT_FAIL;
+		}
+	}
+
+	ret = run(1024);
+	if (ret) {
+		fprintf(stderr, "run(1024) failed\n");
+		goto err;
+	}
+
+	ret = run(8192);
+	if (ret) {
+		fprintf(stderr, "run(8192) failed\n");
+		goto err;
+	}
+
+	return 0;
+err:
 	fprintf(stderr, "poll-many failed\n");
 	return 1;
 err_nofail:
 	fprintf(stderr, "poll-many: not enough files available (and not root), "
 			"skipped\n");
-	return 0;
+	return T_EXIT_SKIP;
 }

diff --git a/test/poll-race-mshot.c b/test/poll-race-mshot.c
new file mode 100644
index 0000000..34a6a18
--- /dev/null
+++ b/test/poll-race-mshot.c

@@ -0,0 +1,276 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: check that racing wakeups don't re-issue a poll multishot,
+ *		this can leak ring provided buffers. also test if ring
+ *		provided buffers for regular receive can leak if we hit a
+ *		poll race.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <string.h>
+#include <sys/socket.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+#define NREQS		64
+#define BUF_SIZE	64
+
+static int no_buf_ring;
+
+struct data {
+	pthread_barrier_t barrier;
+	int fd;
+};
+
+static void *thread(void *data)
+{
+	struct data *d = data;
+	char buf[BUF_SIZE];
+	int ret, i, fd;
+
+	memset(buf, 0x5a, BUF_SIZE);
+	pthread_barrier_wait(&d->barrier);
+	fd = d->fd;
+	for (i = 0; i < NREQS; i++) {
+		ret = write(fd, buf, sizeof(buf));
+		if (ret != BUF_SIZE) {
+			if (ret < 0) {
+				perror("write");
+				printf("bad fd %d\n", fd);
+			} else
+				fprintf(stderr, "wrote short %d\n", ret);
+		}
+	}
+	return NULL;
+}
+
+static int test(struct io_uring *ring, struct data *d)
+{
+	struct io_uring_buf_ring *br;
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	int fd[2], ret, i;
+	pthread_t t;
+	void *buf, *ptr;
+	void *ret2;
+
+	if (socketpair(PF_LOCAL, SOCK_STREAM, 0, fd) < 0) {
+		perror("socketpair");
+		return T_EXIT_FAIL;
+	}
+
+	d->fd = fd[1];
+
+	if (posix_memalign((void **) &buf, 16384, BUF_SIZE * NREQS))
+		return T_EXIT_FAIL;
+
+	br = io_uring_setup_buf_ring(ring, NREQS, 1, 0, &ret);
+	if (!br) {
+		if (ret == -EINVAL) {
+			no_buf_ring = 1;
+			return T_EXIT_SKIP;
+		}
+		fprintf(stderr, "buf ring reg %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	ptr = buf;
+	for (i = 0; i < NREQS; i++) {
+		io_uring_buf_ring_add(br, ptr, BUF_SIZE, i + 1,
+				io_uring_buf_ring_mask(NREQS), i);
+		ptr += BUF_SIZE;
+	}
+	io_uring_buf_ring_advance(br, NREQS);
+
+	pthread_create(&t, NULL, thread, d);
+
+	for (i = 0; i < NREQS; i++) {
+		sqe = io_uring_get_sqe(ring);
+		io_uring_prep_recv(sqe, fd[0], NULL, 0, 0);
+		sqe->flags |= IOSQE_BUFFER_SELECT;
+		sqe->buf_group = 1;
+	}
+
+	pthread_barrier_wait(&d->barrier);
+
+	ret = io_uring_submit(ring);
+	if (ret != NREQS) {
+		fprintf(stderr, "submit %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	i = 0;
+	do {
+		ret = io_uring_wait_cqe(ring, &cqe);
+		if (ret) {
+			fprintf(stderr, "cqe wait %d\n", ret);
+			return T_EXIT_FAIL;
+		}
+		i++;
+		if (cqe->res != BUF_SIZE) {
+			fprintf(stderr, "Bad cqe res %d\n", cqe->res);
+			break;
+		}
+		if (cqe->flags & IORING_CQE_F_BUFFER) {
+			int bid = cqe->flags >> 16;
+
+			if (bid > NREQS) {
+				fprintf(stderr, "Bad BID %d\n", bid);
+				return T_EXIT_FAIL;
+			}
+		} else {
+			fprintf(stderr, "No BID set!\n");
+			printf("ret=%d\n", cqe->res);
+			return T_EXIT_FAIL;
+		}
+		io_uring_cqe_seen(ring, cqe);
+		if (i > NREQS) {
+			fprintf(stderr, "Got too many requests?\n");
+			return T_EXIT_FAIL;
+		}
+	} while (i < NREQS);
+
+	pthread_join(t, &ret2);
+	free(buf);
+	io_uring_free_buf_ring(ring, br, NREQS, 1);
+	close(fd[0]);
+	close(fd[1]);
+	return T_EXIT_PASS;
+}
+
+static int test_mshot(struct io_uring *ring, struct data *d)
+{
+	struct io_uring_buf_ring *br;
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	int fd[2], ret, i;
+	pthread_t t;
+	void *buf, *ptr;
+	void *ret2;
+
+	if (socketpair(PF_LOCAL, SOCK_STREAM, 0, fd) < 0) {
+		perror("socketpair");
+		return T_EXIT_FAIL;
+	}
+
+	d->fd = fd[1];
+
+	if (posix_memalign((void *) &buf, 16384, BUF_SIZE * NREQS))
+		return T_EXIT_FAIL;
+
+	br = io_uring_setup_buf_ring(ring, NREQS, 1, 0, &ret);
+	if (!br) {
+		fprintf(stderr, "buf ring reg %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	ptr = buf;
+	for (i = 0; i < NREQS; i++) {
+		io_uring_buf_ring_add(br, ptr, BUF_SIZE, i + 1,
+				io_uring_buf_ring_mask(NREQS), i);
+		ptr += BUF_SIZE;
+	}
+	io_uring_buf_ring_advance(br, NREQS);
+
+	pthread_create(&t, NULL, thread, d);
+
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_recv_multishot(sqe, fd[0], NULL, 0, 0);
+	sqe->flags |= IOSQE_BUFFER_SELECT;
+	sqe->buf_group = 1;
+
+	pthread_barrier_wait(&d->barrier);
+
+	ret = io_uring_submit(ring);
+	if (ret != 1) {
+		fprintf(stderr, "submit %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	i = 0;
+	do {
+		ret = io_uring_wait_cqe(ring, &cqe);
+		if (ret) {
+			fprintf(stderr, "cqe wait %d\n", ret);
+			return T_EXIT_FAIL;
+		}
+		i++;
+		if (!(cqe->flags & IORING_CQE_F_MORE))
+			break;
+		if (cqe->res != BUF_SIZE) {
+			fprintf(stderr, "Bad cqe res %d\n", cqe->res);
+			break;
+		}
+		if (cqe->flags & IORING_CQE_F_BUFFER) {
+			int bid = cqe->flags >> 16;
+
+			if (bid > NREQS) {
+				fprintf(stderr, "Bad BID %d\n", bid);
+				return T_EXIT_FAIL;
+			}
+		} else {
+			fprintf(stderr, "No BID set!\n");
+			printf("ret=%d\n", cqe->res);
+			return T_EXIT_FAIL;
+		}
+		io_uring_cqe_seen(ring, cqe);
+		if (i > NREQS) {
+			fprintf(stderr, "Got too many requests?\n");
+			return T_EXIT_FAIL;
+		}
+	} while (1);
+
+	if (i != NREQS + 1) {
+		fprintf(stderr, "Only got %d requests\n", i);
+		return T_EXIT_FAIL;
+	}
+
+	pthread_join(t, &ret2);
+	io_uring_free_buf_ring(ring, br, NREQS, 1);
+	free(buf);
+	close(fd[0]);
+	close(fd[1]);
+	return T_EXIT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	struct io_uring ring;
+	struct data d;
+	int i, ret;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	pthread_barrier_init(&d.barrier, NULL, 2);
+
+	for (i = 0; i < 1000; i++) {
+		io_uring_queue_init(NREQS, &ring, 0);
+		ret = test(&ring, &d);
+		if (ret != T_EXIT_PASS) {
+			if (no_buf_ring)
+				break;
+			fprintf(stderr, "Test failed loop %d\n", i);
+			return T_EXIT_FAIL;
+		}
+		io_uring_queue_exit(&ring);
+	}
+
+	if (no_buf_ring)
+		return T_EXIT_SKIP;
+
+	for (i = 0; i < 1000; i++) {
+		io_uring_queue_init(NREQS, &ring, 0);
+		ret = test_mshot(&ring, &d);
+		if (ret != T_EXIT_PASS) {
+			fprintf(stderr, "Test mshot failed loop %d\n", i);
+			return T_EXIT_FAIL;
+		}
+		io_uring_queue_exit(&ring);
+	}
+
+	return T_EXIT_PASS;
+}

diff --git a/test/poll-race.c b/test/poll-race.c
new file mode 100644
index 0000000..a601085
--- /dev/null
+++ b/test/poll-race.c

@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: check that multiple receives on the same socket don't get
+ *		stalled if multiple wakers race with the socket readiness.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <sys/socket.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+#define NREQS	64
+
+struct data {
+	pthread_barrier_t barrier;
+	int fd;
+};
+
+static void *thread(void *data)
+{
+	struct data *d = data;
+	char buf[64];
+	int ret, i;
+
+	pthread_barrier_wait(&d->barrier);
+	for (i = 0; i < NREQS; i++) {
+		ret = write(d->fd, buf, sizeof(buf));
+		if (ret != 64)
+			fprintf(stderr, "wrote short %d\n", ret);
+	}
+	return NULL;
+}
+
+static int test(struct io_uring *ring, struct data *d)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	int fd[2], ret, i;
+	char buf[64];
+	pthread_t t;
+	void *ret2;
+
+	if (socketpair(PF_LOCAL, SOCK_STREAM, 0, fd) < 0) {
+		perror("socketpair");
+		return T_EXIT_FAIL;
+	}
+
+	d->fd = fd[1];
+
+	pthread_create(&t, NULL, thread, d);
+
+	for (i = 0; i < NREQS; i++) {
+		sqe = io_uring_get_sqe(ring);
+		io_uring_prep_recv(sqe, fd[0], buf, sizeof(buf), 0);
+	}
+
+	pthread_barrier_wait(&d->barrier);
+
+	ret = io_uring_submit(ring);
+	if (ret != NREQS) {
+		fprintf(stderr, "submit %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	for (i = 0; i < NREQS; i++) {
+		ret = io_uring_wait_cqe(ring, &cqe);
+		if (ret) {
+			fprintf(stderr, "cqe wait %d\n", ret);
+			return T_EXIT_FAIL;
+		}
+		io_uring_cqe_seen(ring, cqe);
+	}
+
+	close(fd[0]);
+	close(fd[1]);
+	pthread_join(t, &ret2);
+	return T_EXIT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	struct io_uring ring;
+	struct data d;
+	int i, ret;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	pthread_barrier_init(&d.barrier, NULL, 2);
+
+	io_uring_queue_init(NREQS, &ring, 0);
+
+	for (i = 0; i < 1000; i++) {
+		ret = test(&ring, &d);
+		if (ret != T_EXIT_PASS) {
+			fprintf(stderr, "Test failed\n");
+			return T_EXIT_FAIL;
+		}
+	}
+
+	return T_EXIT_PASS;
+}

diff --git a/test/poll-v-poll.c b/test/poll-v-poll.c
index 1b277db..f2ab594 100644
--- a/test/poll-v-poll.c
+++ b/test/poll-v-poll.c

@@ -17,6 +17,7 @@
 #include <sys/epoll.h>
 
 #include "liburing.h"
+#include "helpers.h"
 
 struct thread_data {
 	struct io_uring *ring;
@@ -174,6 +175,8 @@
 
 	fd = open(fname, O_RDONLY);
 	if (fd < 0) {
+		if (errno == EPERM || errno == EACCES)
+			return T_EXIT_SKIP;
 		perror("open");
 		return 1;
 	}
@@ -331,19 +334,19 @@
 		fname = argv[0];
 
 	ret = do_fd_test(&ring, fname, POLLIN);
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "fd test IN failed\n");
 		return ret;
 	}
 
 	ret = do_fd_test(&ring, fname, POLLOUT);
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "fd test OUT failed\n");
 		return ret;
 	}
 
 	ret = do_fd_test(&ring, fname, POLLOUT | POLLIN);
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "fd test IN|OUT failed\n");
 		return ret;
 	}

diff --git a/test/poll.c b/test/poll.c
index 1cd57ba..7d1f5a4 100644
--- a/test/poll.c
+++ b/test/poll.c

@@ -11,16 +11,27 @@
 #include <signal.h>
 #include <poll.h>
 #include <sys/wait.h>
+#include <assert.h>
 
+#include "helpers.h"
 #include "liburing.h"
 
-static void sig_alrm(int sig)
+static void do_setsockopt(int fd, int level, int optname, int val)
 {
-	fprintf(stderr, "Timed out!\n");
-	exit(1);
+	if (setsockopt(fd, level, optname, &val, sizeof(val)))
+		t_error(1, errno, "setsockopt %d.%d: %d", level, optname, val);
 }
 
-int main(int argc, char *argv[])
+static bool check_cq_empty(struct io_uring *ring)
+{
+	struct io_uring_cqe *cqe = NULL;
+	int ret;
+
+	ret = io_uring_peek_cqe(ring, &cqe); /* nothing should be there */
+	return ret == -EAGAIN;
+}
+
+static int test_basic(void)
 {
 	struct io_uring_cqe *cqe;
 	struct io_uring_sqe *sqe;
@@ -29,34 +40,22 @@
 	pid_t p;
 	int ret;
 
-	if (argc > 1)
-		return 0;
-
 	if (pipe(pipe1) != 0) {
 		perror("pipe");
 		return 1;
 	}
 
 	p = fork();
-	switch (p) {
-	case -1:
+	if (p == -1) {
 		perror("fork");
 		exit(2);
-	case 0: {
-		struct sigaction act;
-
+	} else if (p == 0) {
 		ret = io_uring_queue_init(1, &ring, 0);
 		if (ret) {
 			fprintf(stderr, "child: ring setup failed: %d\n", ret);
 			return 1;
 		}
 
-		memset(&act, 0, sizeof(act));
-		act.sa_handler = sig_alrm;
-		act.sa_flags = SA_RESTART;
-		sigaction(SIGALRM, &act, NULL);
-		alarm(1);
-
 		sqe = io_uring_get_sqe(&ring);
 		if (!sqe) {
 			fprintf(stderr, "get sqe failed\n");
@@ -92,18 +91,237 @@
 							(long) cqe->res);
 			return 1;
 		}
-		exit(0);
-		}
-	default:
-		do {
-			errno = 0;
-			ret = write(pipe1[1], "foo", 3);
-		} while (ret == -1 && errno == EINTR);
 
-		if (ret != 3) {
-			fprintf(stderr, "parent: bad write return %d\n", ret);
+		io_uring_queue_exit(&ring);
+		exit(0);
+	}
+
+	do {
+		errno = 0;
+		ret = write(pipe1[1], "foo", 3);
+	} while (ret == -1 && errno == EINTR);
+
+	if (ret != 3) {
+		fprintf(stderr, "parent: bad write return %d\n", ret);
+		return 1;
+	}
+	close(pipe1[0]);
+	close(pipe1[1]);
+	return 0;
+}
+
+static int test_missing_events(void)
+{
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	struct io_uring ring;
+	int i, ret, sp[2];
+	char buf[2] = {};
+	int res_mask = 0;
+
+	ret = io_uring_queue_init(8, &ring, IORING_SETUP_SINGLE_ISSUER |
+					    IORING_SETUP_DEFER_TASKRUN);
+	if (ret) {
+		fprintf(stderr, "ring setup failed: %d\n", ret);
+		return 1;
+	}
+
+	if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0) {
+		perror("Failed to create Unix-domain socket pair\n");
+		return 1;
+	}
+	do_setsockopt(sp[0], SOL_SOCKET, SO_SNDBUF, 1);
+	ret = send(sp[0], buf, sizeof(buf), 0);
+	if (ret != sizeof(buf)) {
+		perror("send failed\n");
+		return 1;
+	}
+
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_poll_multishot(sqe, sp[0], POLLIN|POLLOUT);
+	ret = io_uring_submit(&ring);
+	if (ret != 1) {
+		fprintf(stderr, "sqe submit failed: %d\n", ret);
+		return 1;
+	}
+
+	/* trigger EPOLLIN */
+	ret = send(sp[1], buf, sizeof(buf), 0);
+	if (ret != sizeof(buf)) {
+		fprintf(stderr, "send sp[1] failed %i %i\n", ret, errno);
+		return 1;
+	}
+
+	/* trigger EPOLLOUT */
+	ret = recv(sp[1], buf, sizeof(buf), 0);
+	if (ret != sizeof(buf)) {
+		perror("recv failed\n");
+		return 1;
+	}
+
+	for (i = 0; ; i++) {
+		if (i == 0)
+			ret = io_uring_wait_cqe(&ring, &cqe);
+		else
+			ret = io_uring_peek_cqe(&ring, &cqe);
+
+		if (i != 0 && ret == -EAGAIN) {
+			break;
+		}
+		if (ret) {
+			fprintf(stderr, "wait completion %d, %i\n", ret, i);
 			return 1;
 		}
-		return 0;
+		res_mask |= cqe->res;
+		io_uring_cqe_seen(&ring, cqe);
 	}
+
+	if ((res_mask & (POLLIN|POLLOUT)) != (POLLIN|POLLOUT)) {
+		fprintf(stderr, "missing poll events %i\n", res_mask);
+		return 1;
+	}
+	io_uring_queue_exit(&ring);
+	close(sp[0]);
+	close(sp[1]);
+	return 0;
+}
+
+#define NR_SQES		2048
+
+static int test_self_poll(void)
+{
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	struct io_uring ring;
+	int ret, i, j;
+
+	ret = io_uring_queue_init(NR_SQES, &ring, 0);
+	if (ret) {
+		fprintf(stderr, "ring setup failed: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	for (j = 0; j < 32; j++) {
+		for (i = 0; i < NR_SQES; i++) {
+			sqe = io_uring_get_sqe(&ring);
+			io_uring_prep_poll_add(sqe, ring.ring_fd, POLLIN);
+		}
+
+		ret = io_uring_submit(&ring);
+		assert(ret == NR_SQES);
+	}
+
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_nop(sqe);
+	ret = io_uring_submit(&ring);
+	assert(ret == 1);
+
+	ret = io_uring_wait_cqe(&ring, &cqe);
+	io_uring_cqe_seen(&ring, cqe);
+
+	io_uring_queue_exit(&ring);
+	return T_EXIT_PASS;
+}
+
+static int test_disabled_ring_lazy_polling(int early_poll)
+{
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	struct io_uring ring, ring2;
+	unsigned head;
+	int ret, i = 0;
+
+	ret = io_uring_queue_init(8, &ring, IORING_SETUP_SINGLE_ISSUER |
+					     IORING_SETUP_DEFER_TASKRUN |
+					     IORING_SETUP_R_DISABLED);
+	if (ret) {
+		fprintf(stderr, "ring setup failed: %d\n", ret);
+		return 1;
+	}
+	ret = io_uring_queue_init(8, &ring2, 0);
+	if (ret) {
+		fprintf(stderr, "ring2 setup failed: %d\n", ret);
+		return 1;
+	}
+
+	if (early_poll) {
+		/* start polling disabled DEFER_TASKRUN ring */
+		sqe = io_uring_get_sqe(&ring2);
+		io_uring_prep_poll_add(sqe, ring.ring_fd, POLLIN);
+		ret = io_uring_submit(&ring2);
+		assert(ret == 1);
+		assert(check_cq_empty(&ring2));
+	}
+
+	/* enable rings, which should also activate pollwq */
+	ret = io_uring_enable_rings(&ring);
+	assert(ret >= 0);
+
+	if (!early_poll) {
+		/* start polling enabled DEFER_TASKRUN ring */
+		sqe = io_uring_get_sqe(&ring2);
+		io_uring_prep_poll_add(sqe, ring.ring_fd, POLLIN);
+		ret = io_uring_submit(&ring2);
+		assert(ret == 1);
+		assert(check_cq_empty(&ring2));
+	}
+
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_nop(sqe);
+	ret = io_uring_submit(&ring);
+	assert(ret == 1);
+
+	io_uring_for_each_cqe(&ring2, head, cqe) {
+		i++;
+	}
+	if (i !=  1) {
+		fprintf(stderr, "fail, polling stuck\n");
+		return 1;
+	}
+	io_uring_queue_exit(&ring);
+	io_uring_queue_exit(&ring2);
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	int ret;
+
+	if (argc > 1)
+		return 0;
+
+	ret = test_basic();
+	if (ret) {
+		fprintf(stderr, "test_basic() failed %i\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+
+	if (t_probe_defer_taskrun()) {
+		ret = test_missing_events();
+		if (ret) {
+			fprintf(stderr, "test_missing_events() failed %i\n", ret);
+			return T_EXIT_FAIL;
+		}
+
+		ret = test_disabled_ring_lazy_polling(false);
+		if (ret) {
+			fprintf(stderr, "test_disabled_ring_lazy_polling(false) failed %i\n", ret);
+			return T_EXIT_FAIL;
+		}
+
+		ret = test_disabled_ring_lazy_polling(true);
+		if (ret) {
+			fprintf(stderr, "test_disabled_ring_lazy_polling(true) failed %i\n", ret);
+			return T_EXIT_FAIL;
+		}
+	}
+
+	ret = test_self_poll();
+	if (ret) {
+		fprintf(stderr, "test_self_poll failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	return 0;
 }

diff --git a/test/pollfree.c b/test/pollfree.c
index d753ffe..8325ac0 100644
--- a/test/pollfree.c
+++ b/test/pollfree.c

@@ -1,426 +1,148 @@
 /* SPDX-License-Identifier: MIT */
-// https://syzkaller.appspot.com/bug?id=5f5a44abb4cba056fe24255c4fcb7e7bbe13de7a
-// autogenerated by syzkaller (https://github.com/google/syzkaller)
-
-#include <dirent.h>
-#include <endian.h>
-#include <errno.h>
+/*
+ * Description: test pollfree wakeups
+ */
 #include <fcntl.h>
-#include <pthread.h>
-#include <signal.h>
-#include <stdarg.h>
-#include <stdbool.h>
-#include <stdint.h>
 #include <stdio.h>
-#include <stdlib.h>
 #include <string.h>
-#include <sys/mman.h>
-#include <sys/prctl.h>
 #include <sys/stat.h>
-#include <sys/syscall.h>
 #include <sys/types.h>
-#include <sys/wait.h>
-#include <time.h>
+#include <sys/signalfd.h>
 #include <unistd.h>
+#include <stdlib.h>
 
-#include <linux/futex.h>
+#include "liburing.h"
+#include "helpers.h"
 
-#ifdef __NR_futex
+static int no_signalfd;
 
-static void sleep_ms(uint64_t ms)
+static int child(int flags)
 {
-  usleep(ms * 1000);
+	struct io_uring_sqe *sqe;
+	struct io_uring ring;
+	struct signalfd_siginfo si;
+	static unsigned long index;
+	sigset_t mask;
+	int ret, fd;
+
+	ret = io_uring_queue_init(4, &ring, flags);
+	if (ret) {
+		if (ret == -EINVAL)
+			return 0;
+		fprintf(stderr, "queue init failed %d\n", ret);
+		return ret;
+	}
+
+	sigemptyset(&mask);
+	sigaddset(&mask, SIGINT);
+
+	fd = signalfd(-1, &mask, SFD_NONBLOCK);
+	if (fd < 0) {
+		no_signalfd = 1;
+		perror("signalfd");
+		return 1;
+	}
+
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_read(sqe, fd, &si, sizeof(si), 0);
+	sqe->user_data = 1;
+	io_uring_submit(&ring);
+
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_read(sqe, fd, &si, sizeof(si), 0);
+	sqe->user_data = 2;
+	sqe->flags |= IOSQE_ASYNC;
+	io_uring_submit(&ring);
+
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_read(sqe, fd, &si, sizeof(si), 0);
+	sqe->user_data = 3;
+	io_uring_submit(&ring);
+
+	if (!(++index & 7))
+		usleep(100);
+
+	return 0;
 }
 
-static uint64_t current_time_ms(void)
+static int run_test(int flags)
 {
-  struct timespec ts;
-  if (clock_gettime(CLOCK_MONOTONIC, &ts))
-    exit(1);
-  return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000;
+	pid_t pid;
+	int ret;
+
+	pid = fork();
+	if (pid < 0) {
+		perror("fork");
+		return 1;
+	} else if (!pid) {
+		ret = child(flags);
+		_exit(ret);
+	} else {
+		int wstatus;
+		pid_t childpid;
+
+		do {
+			childpid = waitpid(pid, &wstatus, 0);
+		} while (childpid == (pid_t) -1 && (errno == EINTR));
+
+		if (errno == ECHILD)
+			wstatus = 0;
+		return wstatus;
+	}
 }
 
-static void thread_start(void* (*fn)(void*), void* arg)
+static int test(int flags)
 {
-  pthread_t th;
-  pthread_attr_t attr;
-  pthread_attr_init(&attr);
-  pthread_attr_setstacksize(&attr, 128 << 10);
-  int i = 0;
-  for (; i < 100; i++) {
-    if (pthread_create(&th, &attr, fn, arg) == 0) {
-      pthread_attr_destroy(&attr);
-      return;
-    }
-    if (errno == EAGAIN) {
-      usleep(50);
-      continue;
-    }
-    break;
-  }
-  exit(1);
-}
+	struct timeval start;
+	int ret;
 
-typedef struct {
-  int state;
-} event_t;
+	gettimeofday(&start, NULL);
+	do {
+		ret = run_test(flags);
+		if (ret) {
+			fprintf(stderr, "test failed with flags %x\n", flags);
+			return 1;
+		}
+		if (no_signalfd)
+			break;
+	} while (mtime_since_now(&start) < 2500);
 
-static void event_init(event_t* ev)
-{
-  ev->state = 0;
-}
-
-static void event_reset(event_t* ev)
-{
-  ev->state = 0;
-}
-
-static void event_set(event_t* ev)
-{
-  if (ev->state)
-    exit(1);
-  __atomic_store_n(&ev->state, 1, __ATOMIC_RELEASE);
-  syscall(__NR_futex, &ev->state, FUTEX_WAKE | FUTEX_PRIVATE_FLAG, 1000000);
-}
-
-static void event_wait(event_t* ev)
-{
-  while (!__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE))
-    syscall(__NR_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, 0);
-}
-
-static int event_isset(event_t* ev)
-{
-  return __atomic_load_n(&ev->state, __ATOMIC_ACQUIRE);
-}
-
-static int event_timedwait(event_t* ev, uint64_t timeout)
-{
-  uint64_t start = current_time_ms();
-  uint64_t now = start;
-  for (;;) {
-    uint64_t remain = timeout - (now - start);
-    struct timespec ts;
-    ts.tv_sec = remain / 1000;
-    ts.tv_nsec = (remain % 1000) * 1000 * 1000;
-    syscall(__NR_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, &ts);
-    if (__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE))
-      return 1;
-    now = current_time_ms();
-    if (now - start > timeout)
-      return 0;
-  }
-}
-
-#define SIZEOF_IO_URING_SQE 64
-#define SIZEOF_IO_URING_CQE 16
-#define SQ_HEAD_OFFSET 0
-#define SQ_TAIL_OFFSET 64
-#define SQ_RING_MASK_OFFSET 256
-#define SQ_RING_ENTRIES_OFFSET 264
-#define SQ_FLAGS_OFFSET 276
-#define SQ_DROPPED_OFFSET 272
-#define CQ_HEAD_OFFSET 128
-#define CQ_TAIL_OFFSET 192
-#define CQ_RING_MASK_OFFSET 260
-#define CQ_RING_ENTRIES_OFFSET 268
-#define CQ_RING_OVERFLOW_OFFSET 284
-#define CQ_FLAGS_OFFSET 280
-#define CQ_CQES_OFFSET 320
-
-struct io_sqring_offsets {
-  uint32_t head;
-  uint32_t tail;
-  uint32_t ring_mask;
-  uint32_t ring_entries;
-  uint32_t flags;
-  uint32_t dropped;
-  uint32_t array;
-  uint32_t resv1;
-  uint64_t resv2;
-};
-
-struct io_cqring_offsets {
-  uint32_t head;
-  uint32_t tail;
-  uint32_t ring_mask;
-  uint32_t ring_entries;
-  uint32_t overflow;
-  uint32_t cqes;
-  uint64_t resv[2];
-};
-
-struct io_uring_params {
-  uint32_t sq_entries;
-  uint32_t cq_entries;
-  uint32_t flags;
-  uint32_t sq_thread_cpu;
-  uint32_t sq_thread_idle;
-  uint32_t features;
-  uint32_t resv[4];
-  struct io_sqring_offsets sq_off;
-  struct io_cqring_offsets cq_off;
-};
-
-#define IORING_OFF_SQ_RING 0
-#define IORING_OFF_SQES 0x10000000ULL
-
-#define sys_io_uring_setup 425
-static long syz_io_uring_setup(volatile long a0, volatile long a1,
-                               volatile long a2, volatile long a3,
-                               volatile long a4, volatile long a5)
-{
-  uint32_t entries = (uint32_t)a0;
-  struct io_uring_params* setup_params = (struct io_uring_params*)a1;
-  void* vma1 = (void*)a2;
-  void* vma2 = (void*)a3;
-  void** ring_ptr_out = (void**)a4;
-  void** sqes_ptr_out = (void**)a5;
-  uint32_t fd_io_uring = syscall(sys_io_uring_setup, entries, setup_params);
-  uint32_t sq_ring_sz =
-      setup_params->sq_off.array + setup_params->sq_entries * sizeof(uint32_t);
-  uint32_t cq_ring_sz = setup_params->cq_off.cqes +
-                        setup_params->cq_entries * SIZEOF_IO_URING_CQE;
-  uint32_t ring_sz = sq_ring_sz > cq_ring_sz ? sq_ring_sz : cq_ring_sz;
-  *ring_ptr_out = mmap(vma1, ring_sz, PROT_READ | PROT_WRITE,
-                       MAP_SHARED | MAP_POPULATE | MAP_FIXED, fd_io_uring,
-                       IORING_OFF_SQ_RING);
-  uint32_t sqes_sz = setup_params->sq_entries * SIZEOF_IO_URING_SQE;
-  *sqes_ptr_out =
-      mmap(vma2, sqes_sz, PROT_READ | PROT_WRITE,
-           MAP_SHARED | MAP_POPULATE | MAP_FIXED, fd_io_uring, IORING_OFF_SQES);
-  return fd_io_uring;
-}
-
-static long syz_io_uring_submit(volatile long a0, volatile long a1,
-                                volatile long a2, volatile long a3)
-{
-  char* ring_ptr = (char*)a0;
-  char* sqes_ptr = (char*)a1;
-  char* sqe = (char*)a2;
-  uint32_t sqes_index = (uint32_t)a3;
-  uint32_t sq_ring_entries = *(uint32_t*)(ring_ptr + SQ_RING_ENTRIES_OFFSET);
-  uint32_t cq_ring_entries = *(uint32_t*)(ring_ptr + CQ_RING_ENTRIES_OFFSET);
-  uint32_t sq_array_off =
-      (CQ_CQES_OFFSET + cq_ring_entries * SIZEOF_IO_URING_CQE + 63) & ~63;
-  if (sq_ring_entries)
-    sqes_index %= sq_ring_entries;
-  char* sqe_dest = sqes_ptr + sqes_index * SIZEOF_IO_URING_SQE;
-  memcpy(sqe_dest, sqe, SIZEOF_IO_URING_SQE);
-  uint32_t sq_ring_mask = *(uint32_t*)(ring_ptr + SQ_RING_MASK_OFFSET);
-  uint32_t* sq_tail_ptr = (uint32_t*)(ring_ptr + SQ_TAIL_OFFSET);
-  uint32_t sq_tail = *sq_tail_ptr & sq_ring_mask;
-  uint32_t sq_tail_next = *sq_tail_ptr + 1;
-  uint32_t* sq_array = (uint32_t*)(ring_ptr + sq_array_off);
-  *(sq_array + sq_tail) = sqes_index;
-  __atomic_store_n(sq_tail_ptr, sq_tail_next, __ATOMIC_RELEASE);
-  return 0;
-}
-
-static void kill_and_wait(int pid, int* status)
-{
-  kill(-pid, SIGKILL);
-  kill(pid, SIGKILL);
-  for (int i = 0; i < 100; i++) {
-    if (waitpid(-1, status, WNOHANG | __WALL) == pid)
-      return;
-    usleep(1000);
-  }
-  DIR* dir = opendir("/sys/fs/fuse/connections");
-  if (dir) {
-    for (;;) {
-      struct dirent* ent = readdir(dir);
-      if (!ent)
-        break;
-      if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0)
-        continue;
-      char abort[300];
-      snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort",
-               ent->d_name);
-      int fd = open(abort, O_WRONLY);
-      if (fd == -1) {
-        continue;
-      }
-      if (write(fd, abort, 1) < 0) {
-      }
-      close(fd);
-    }
-    closedir(dir);
-  } else {
-  }
-  while (waitpid(-1, status, __WALL) != pid) {
-  }
-}
-
-static void setup_test()
-{
-  prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
-  setpgrp();
-}
-
-struct thread_t {
-  int created, call;
-  event_t ready, done;
-};
-
-static struct thread_t threads[16];
-static void execute_call(int call);
-static int running;
-
-static void* thr(void* arg)
-{
-  struct thread_t* th = (struct thread_t*)arg;
-  for (;;) {
-    event_wait(&th->ready);
-    event_reset(&th->ready);
-    execute_call(th->call);
-    __atomic_fetch_sub(&running, 1, __ATOMIC_RELAXED);
-    event_set(&th->done);
-  }
-  return 0;
-}
-
-static void execute_one(void)
-{
-  int i, call, thread;
-  for (call = 0; call < 4; call++) {
-    for (thread = 0; thread < (int)(sizeof(threads) / sizeof(threads[0]));
-         thread++) {
-      struct thread_t* th = &threads[thread];
-      if (!th->created) {
-        th->created = 1;
-        event_init(&th->ready);
-        event_init(&th->done);
-        event_set(&th->done);
-        thread_start(thr, th);
-      }
-      if (!event_isset(&th->done))
-        continue;
-      event_reset(&th->done);
-      th->call = call;
-      __atomic_fetch_add(&running, 1, __ATOMIC_RELAXED);
-      event_set(&th->ready);
-      event_timedwait(&th->done, 50);
-      break;
-    }
-  }
-  for (i = 0; i < 100 && __atomic_load_n(&running, __ATOMIC_RELAXED); i++)
-    sleep_ms(1);
-}
-
-static void execute_one(void);
-
-#define WAIT_FLAGS __WALL
-
-static void loop(void)
-{
-  int iter = 0;
-  for (; iter < 5000; iter++) {
-    int pid = fork();
-    if (pid < 0)
-      exit(1);
-    if (pid == 0) {
-      setup_test();
-      execute_one();
-      exit(0);
-    }
-    int status = 0;
-    uint64_t start = current_time_ms();
-    for (;;) {
-      if (waitpid(-1, &status, WNOHANG | WAIT_FLAGS) == pid)
-        break;
-      sleep_ms(1);
-      if (current_time_ms() - start < 5000)
-        continue;
-      kill_and_wait(pid, &status);
-      break;
-    }
-  }
-}
-
-#ifndef __NR_io_uring_enter
-#define __NR_io_uring_enter 426
-#endif
-
-uint64_t r[4] = {0xffffffffffffffff, 0xffffffffffffffff, 0x0, 0x0};
-
-void execute_call(int call)
-{
-  intptr_t res = 0;
-  switch (call) {
-  case 0:
-    *(uint64_t*)0x200000c0 = 0;
-    res = syscall(__NR_signalfd4, -1, 0x200000c0ul, 8ul, 0ul);
-    if (res != -1)
-      r[0] = res;
-    break;
-  case 1:
-    *(uint32_t*)0x20000a84 = 0;
-    *(uint32_t*)0x20000a88 = 0;
-    *(uint32_t*)0x20000a8c = 0;
-    *(uint32_t*)0x20000a90 = 0;
-    *(uint32_t*)0x20000a98 = -1;
-    memset((void*)0x20000a9c, 0, 12);
-    res = -1;
-    res = syz_io_uring_setup(0x87, 0x20000a80, 0x206d6000, 0x206d7000,
-                             0x20000000, 0x20000040);
-    if (res != -1) {
-      r[1] = res;
-      r[2] = *(uint64_t*)0x20000000;
-      r[3] = *(uint64_t*)0x20000040;
-    }
-    break;
-  case 2:
-    *(uint8_t*)0x20002240 = 6;
-    *(uint8_t*)0x20002241 = 0;
-    *(uint16_t*)0x20002242 = 0;
-    *(uint32_t*)0x20002244 = r[0];
-    *(uint64_t*)0x20002248 = 0;
-    *(uint64_t*)0x20002250 = 0;
-    *(uint32_t*)0x20002258 = 0;
-    *(uint16_t*)0x2000225c = 0;
-    *(uint16_t*)0x2000225e = 0;
-    *(uint64_t*)0x20002260 = 0;
-    *(uint16_t*)0x20002268 = 0;
-    *(uint16_t*)0x2000226a = 0;
-    memset((void*)0x2000226c, 0, 20);
-    syz_io_uring_submit(r[2], r[3], 0x20002240, 0);
-    break;
-  case 3:
-    syscall(__NR_io_uring_enter, r[1], 0x1523a, 0, 0ul, 0ul, 0xaul);
-    break;
-  }
+	return 0;
 }
 
 int main(int argc, char *argv[])
 {
-  void *ret;
+	int ret;
 
-#if !defined(__i386) && !defined(__x86_64__)
-  return 0;
-#endif
+	if (argc > 1)
+		return T_EXIT_SKIP;
 
-  if (argc > 1)
-    return 0;
+	ret = test(0);
+	if (ret) {
+		fprintf(stderr, "test 0 failed: %d\n", ret);
+		return ret;
+	}
 
-  ret = mmap((void *)0x1ffff000ul, 0x1000ul, 0ul, 0x32ul, -1, 0ul);
-  if (ret == MAP_FAILED)
-    return 0;
-  ret = mmap((void *)0x20000000ul, 0x1000000ul, 7ul, 0x32ul, -1, 0ul);
-  if (ret == MAP_FAILED)
-    return 0;
-  ret = mmap((void *)0x21000000ul, 0x1000ul, 0ul, 0x32ul, -1, 0ul);
-  if (ret == MAP_FAILED)
-    return 0;
-  loop();
-  return 0;
+	if (no_signalfd)
+		return T_EXIT_SKIP;
+
+	ret = test(IORING_SETUP_SQPOLL);
+	if (ret) {
+		fprintf(stderr, "test SQPOLL failed: %d\n", ret);
+		return ret;
+	}
+
+	ret = test(IORING_SETUP_COOP_TASKRUN);
+	if (ret) {
+		fprintf(stderr, "test COOP failed: %d\n", ret);
+		return ret;
+	}
+
+	ret = test(IORING_SETUP_DEFER_TASKRUN|IORING_SETUP_SINGLE_ISSUER);
+	if (ret) {
+		fprintf(stderr, "test DEFER failed: %d\n", ret);
+		return ret;
+	}
+
+	return T_EXIT_PASS;
 }
-
-#else /* __NR_futex */
-
-int main(int argc, char *argv[])
-{
-  return 0;
-}
-
-#endif /* __NR_futex */

diff --git a/test/read-before-exit.c b/test/read-before-exit.c
index be36bd4..a539352 100644
--- a/test/read-before-exit.c
+++ b/test/read-before-exit.c

@@ -14,6 +14,8 @@
 #include "liburing.h"
 #include "helpers.h"
 
+static int no_iopoll;
+
 struct data {
         struct io_uring *ring;
         int timer_fd1;
@@ -22,7 +24,7 @@
         uint64_t buf2;
 };
 
-void *submit(void *data)
+static void *submit(void *data)
 {
 	struct io_uring_sqe *sqe;
 	struct data *d = data;
@@ -35,8 +37,21 @@
 	io_uring_prep_read(sqe, d->timer_fd2, &d->buf2, sizeof(d->buf2), 0);
 
 	ret = io_uring_submit(d->ring);
-	if (ret != 2)
+	if (ret != 2) {
+		struct io_uring_cqe *cqe;
+
+		/*
+		 * Kernels without submit-all-on-error behavior will
+		 * fail submitting all, check if that's the case and
+		 * don't error
+		 */
+		ret = io_uring_peek_cqe(d->ring, &cqe);
+		if (!ret && cqe->res == -EOPNOTSUPP) {
+			no_iopoll = 1;
+			return NULL;
+		}
 		return (void *) (uintptr_t) 1;
+	}
 
 	/* Exit suddenly. */
 	return NULL;
@@ -95,9 +110,11 @@
 	for (i = 0; i < 1000; i++) {
 		ret = test(IORING_SETUP_IOPOLL);
 		if (ret) {
-			fprintf(stderr, "Test IOPOLL failed\n");
+			fprintf(stderr, "Test IOPOLL failed loop %d\n", ret);
 			return ret;
 		}
+		if (no_iopoll)
+			break;
 	}
 
 	for (i = 0; i < 100; i++) {

diff --git a/test/read-mshot-empty.c b/test/read-mshot-empty.c
new file mode 100644
index 0000000..1405548
--- /dev/null
+++ b/test/read-mshot-empty.c

@@ -0,0 +1,158 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: test that multishot read correctly keeps reading until all
+ *		data has been emptied. the original implementation failed
+ *		to do so, if the available buffer size was less than what
+ *		was available, hence requiring multiple reads to empty the
+ *		file buffer.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <pthread.h>
+#include <sys/time.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+#define BGID		17
+#define NR_BUFS		4
+#define BR_MASK		(NR_BUFS - 1)
+#define BUF_SIZE	32
+
+static int do_write(int fd, void *buf, int buf_size)
+{
+	int ret;
+
+	ret = write(fd, buf, buf_size);
+	if (ret < 0) {
+		perror("write");
+		return 0;
+	} else if (ret != buf_size) {
+		fprintf(stderr, "bad write size %d\n", ret);
+		return 0;
+	}
+
+	return 1;
+}
+
+static void *thread_fn(void *data)
+{
+	char w1[BUF_SIZE], w2[BUF_SIZE];
+	int *fds = data;
+
+	memset(w1, 0x11, BUF_SIZE);
+	memset(w2, 0x22, BUF_SIZE);
+
+	if (!do_write(fds[1], w1, BUF_SIZE))
+		return NULL;
+	if (!do_write(fds[1], w2, BUF_SIZE))
+		return NULL;
+
+	usleep(100000);
+
+	if (!do_write(fds[1], w1, BUF_SIZE))
+		return NULL;
+	if (!do_write(fds[1], w2, BUF_SIZE))
+		return NULL;
+
+	return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	struct io_uring_buf_ring *br;
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct io_uring ring;
+	pthread_t thread;
+	int i, ret, fds[2];
+	void *buf, *tret;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	if (pipe(fds) < 0) {
+		perror("pipe");
+		return T_EXIT_FAIL;
+	}
+
+	ret = io_uring_queue_init(8, &ring, 0);
+	if (ret) {
+		fprintf(stderr, "queue_init: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	br = io_uring_setup_buf_ring(&ring, NR_BUFS, BGID, 0, &ret);
+	if (!br) {
+		if (ret == -EINVAL)
+			return T_EXIT_SKIP;
+		fprintf(stderr, "failed buffer ring %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	buf = malloc(NR_BUFS * BUF_SIZE);
+	for (i = 0; i < NR_BUFS; i++) {
+		void *this_buf = buf + i * BUF_SIZE;
+
+		io_uring_buf_ring_add(br, this_buf, BUF_SIZE, i, BR_MASK, i);
+	}
+	io_uring_buf_ring_advance(br, NR_BUFS);
+
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_read_multishot(sqe, fds[0], 0, 0, BGID);
+
+	ret = io_uring_submit(&ring);
+	if (ret != 1) {
+		fprintf(stderr, "bad submit %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	/*
+	 * read multishot not available would be ready as a cqe when
+	 * submission returns, check and skip if not.
+	 */
+	ret = io_uring_peek_cqe(&ring, &cqe);
+	if (!ret) {
+		if (cqe->res == -EINVAL || cqe->res == -EBADF) {
+			free(buf);
+			return T_EXIT_SKIP;
+		}
+	}
+
+	pthread_create(&thread, NULL, thread_fn, fds);
+
+	for (i = 0; i < 4; i++) {
+		int buf_index;
+
+		ret = io_uring_wait_cqe(&ring, &cqe);
+		if (ret) {
+			fprintf(stderr, "wait %d\n", ret);
+			break;
+		}
+
+		if (cqe->res != BUF_SIZE) {
+			fprintf(stderr, "size %d\n", cqe->res);
+			return T_EXIT_FAIL;
+		}
+		if (!(cqe->flags & IORING_CQE_F_BUFFER)) {
+			fprintf(stderr, "buffer not set\n");
+			return T_EXIT_FAIL;
+		}
+		if (!(cqe->flags & IORING_CQE_F_MORE)) {
+			fprintf(stderr, "more not set\n");
+			return T_EXIT_FAIL;
+		}
+		buf_index = cqe->flags >> 16;
+		assert(buf_index >= 0 && buf_index <= NR_BUFS);
+		io_uring_cqe_seen(&ring, cqe);
+	}
+
+	pthread_join(thread, &tret);
+	io_uring_free_buf_ring(&ring, br, NR_BUFS, BGID);
+	io_uring_queue_exit(&ring);
+	free(buf);
+	return T_EXIT_PASS;
+}

diff --git a/test/read-mshot-stdin.c b/test/read-mshot-stdin.c
new file mode 100644
index 0000000..797ba56
--- /dev/null
+++ b/test/read-mshot-stdin.c

@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: test multishot read on stdin. Not that this REQUIRES input
+ *		to be received on stdin, and hence if invoked with no
+ *		arguments, or without the single argument being 'stdin',
+ *		the test will just return SKIPPED. Can't be run from the
+ *		standard test harness, as it's interactive.
+ *
+ * To run, do run "test/read-mshot-stdin.t stdin" and then input text on
+ * the console, followed by enter / line feed. If it works as it should,
+ * it'll output the received CQE data. If an error is detected, it'll
+ * abort with an error.
+ */
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+#define BUF_SIZE	32
+#define NR_BUFS		64
+#define BUF_BGID	1
+
+#define BR_MASK		(NR_BUFS - 1)
+
+static int test_stdin(void)
+{
+	struct io_uring_buf_ring *br;
+	struct io_uring_params p = { };
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct io_uring ring;
+	int ret, i, last_bid;
+	char *buf, *ptr;
+
+	p.flags = IORING_SETUP_CQSIZE;
+	p.cq_entries = NR_BUFS;
+	ret = io_uring_queue_init_params(1, &ring, &p);
+	if (ret) {
+		if (ret == -EINVAL)
+			return T_EXIT_SKIP;
+		fprintf(stderr, "ring setup failed: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	if (posix_memalign((void **) &buf, 4096, NR_BUFS * BUF_SIZE))
+		return T_EXIT_FAIL;
+
+	br = io_uring_setup_buf_ring(&ring, NR_BUFS, BUF_BGID, 0, &ret);
+	if (!br) {
+		if (ret == -EINVAL)
+			return T_EXIT_SKIP;
+		fprintf(stderr, "Buffer ring register failed %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	ptr = buf;
+	for (i = 0; i < NR_BUFS; i++) {
+		io_uring_buf_ring_add(br, ptr, BUF_SIZE, i + 1, BR_MASK, i);
+		ptr += BUF_SIZE;
+	}
+	io_uring_buf_ring_advance(br, NR_BUFS);
+
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_read_multishot(sqe, STDIN_FILENO, 0, 0, BUF_BGID);
+
+	ret = io_uring_submit(&ring);
+	if (ret != 1) {
+		fprintf(stderr, "submit: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	last_bid = -1;
+	do {
+		int bid;
+
+		ret = io_uring_wait_cqe(&ring, &cqe);
+		if (ret) {
+			fprintf(stderr, "wait cqe failed %d\n", ret);
+			return T_EXIT_FAIL;
+		}
+		if (cqe->res && !(cqe->flags & IORING_CQE_F_BUFFER)) {
+			fprintf(stderr, "BUF flag not set %x\n", cqe->flags);
+			return T_EXIT_FAIL;
+		}
+		bid = cqe->flags >> 16;
+		printf("CQE res %d, bid %d, flags %x\n", cqe->res, bid, cqe->flags);
+		if (cqe->res > 0 && last_bid != -1 && last_bid + 1 != bid) {
+			fprintf(stderr, "Got bid %d, wanted %d\n", bid, last_bid + 1);
+			return T_EXIT_FAIL;
+		}
+		if (!(cqe->flags & IORING_CQE_F_MORE)) {
+			io_uring_cqe_seen(&ring, cqe);
+			break;
+		}
+
+		last_bid = bid;
+		io_uring_cqe_seen(&ring, cqe);
+	}while (1);
+
+	io_uring_free_buf_ring(&ring, br, NR_BUFS, BUF_BGID);
+	io_uring_queue_exit(&ring);
+	free(buf);
+	return T_EXIT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	if (argc == 1)
+		return T_EXIT_SKIP;
+	else if (argc > 2)
+		return T_EXIT_SKIP;
+	if (!strcmp(argv[1], "stdin"))
+		return test_stdin();
+	return T_EXIT_SKIP;
+}

diff --git a/test/read-mshot.c b/test/read-mshot.c
new file mode 100644
index 0000000..ae47e6b
--- /dev/null
+++ b/test/read-mshot.c

@@ -0,0 +1,659 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: test multishot read (IORING_OP_READ_MULTISHOT) on pipes,
+ *		using ring provided buffers
+ *
+ */
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+#define BUF_SIZE	32
+#define BUF_SIZE_FIRST	17
+#define NR_BUFS		64
+#define BUF_BGID	1
+
+#define BR_MASK		(NR_BUFS - 1)
+
+#define NR_OVERFLOW	(NR_BUFS / 4)
+
+static int no_buf_ring, no_read_mshot, no_buf_ring_inc;
+
+static void arm_read(struct io_uring *ring, int fd, int use_mshot)
+{
+	struct io_uring_sqe *sqe;
+
+	sqe = io_uring_get_sqe(ring);
+	if (use_mshot) {
+		io_uring_prep_read_multishot(sqe, fd, 0, 0, BUF_BGID);
+	} else {
+		io_uring_prep_read(sqe, fd, NULL, 0, 0);
+		sqe->flags = IOSQE_BUFFER_SELECT;
+		sqe->buf_group = BUF_BGID;
+	}
+
+	io_uring_submit(ring);
+}
+
+static int test_inc(int use_mshot, int flags)
+{
+	struct io_uring_buf_ring *br;
+	struct io_uring_params p = { };
+	struct io_uring_cqe *cqe;
+	struct io_uring ring;
+	int nbytes = 65536;
+	int ret, fds[2], i;
+	char tmp[31];
+	char *buf;
+	void *ptr;
+	int bid = -1;
+	int bid_bytes;
+
+	if (no_buf_ring)
+		return 0;
+
+	p.flags = flags;
+	ret = io_uring_queue_init_params(64, &ring, &p);
+	if (ret) {
+		fprintf(stderr, "ring setup failed: %d\n", ret);
+		return 1;
+	}
+
+	if (pipe(fds) < 0) {
+		perror("pipe");
+		return 1;
+	}
+
+	if (posix_memalign((void **) &buf, 4096, 65536))
+		return 1;
+
+	br = io_uring_setup_buf_ring(&ring, 32, BUF_BGID, IOU_PBUF_RING_INC, &ret);
+	if (!br) {
+		if (ret == -EINVAL) {
+			no_buf_ring_inc = 1;
+			free(buf);
+			return 0;
+		}
+		fprintf(stderr, "Buffer ring register failed %d\n", ret);
+		return 1;
+	}
+
+	ptr = buf;
+	buf = ptr + 65536 - 2048;
+	for (i = 0; i < 32; i++) {
+		io_uring_buf_ring_add(br, buf, 2048, i, 31, i);
+		buf -= 2048;
+	}
+	io_uring_buf_ring_advance(br, 32);
+
+	memset(tmp, 0x5a, sizeof(tmp));
+
+	arm_read(&ring, fds[0], use_mshot);
+
+	bid_bytes = 0;
+	do {
+		int write_size = sizeof(tmp);
+
+		if (write_size > nbytes)
+			write_size = nbytes;
+
+		io_uring_get_events(&ring);
+		ret = io_uring_peek_cqe(&ring, &cqe);
+		if (!ret) {
+			int this_bid = cqe->flags >> IORING_CQE_BUFFER_SHIFT;
+			if (bid == -1) {
+				bid = this_bid;
+			} else if (bid != this_bid) {
+				if (bid_bytes != 2048) {
+					fprintf(stderr, "unexpected bid bytes %d\n",
+						bid_bytes);
+					return 1;
+				}
+				bid = this_bid;
+				bid_bytes = 0;
+			}
+			bid_bytes += cqe->res;
+			nbytes -= cqe->res;
+			if (!(cqe->flags & IORING_CQE_F_MORE))
+				arm_read(&ring, fds[0], use_mshot);
+			io_uring_cqe_seen(&ring, cqe);
+			if (!nbytes)
+				break;
+		}
+		usleep(1000);
+		ret = write(fds[1], tmp, write_size);
+		if (ret < 0) {
+			perror("write");
+			return 1;
+		} else if (ret != write_size) {
+			printf("short write %d\n", ret);
+			return 1;
+		}
+	} while (nbytes);
+
+	if (bid_bytes) {
+		if (bid_bytes != 2048) {
+			fprintf(stderr, "unexpected bid bytes %d\n", bid_bytes);
+			return 1;
+		}
+	}
+
+	io_uring_free_buf_ring(&ring, br, 32, BUF_BGID);
+	io_uring_queue_exit(&ring);
+	free(ptr);
+	close(fds[0]);
+	close(fds[1]);
+	return 0;
+}
+
+static int test_clamp(void)
+{
+	struct io_uring_buf_ring *br;
+	struct io_uring_params p = { };
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct io_uring ring;
+	int ret, fds[2], i;
+	char tmp[32];
+	char *buf;
+	void *ptr;
+
+	ret = io_uring_queue_init_params(4, &ring, &p);
+	if (ret) {
+		fprintf(stderr, "ring setup failed: %d\n", ret);
+		return 1;
+	}
+
+	if (pipe(fds) < 0) {
+		perror("pipe");
+		return 1;
+	}
+
+	if (posix_memalign((void **) &buf, 4096, NR_BUFS * BUF_SIZE))
+		return 1;
+
+	br = io_uring_setup_buf_ring(&ring, NR_BUFS, BUF_BGID, 0, &ret);
+	if (!br) {
+		if (ret == -EINVAL) {
+			no_buf_ring = 1;
+			return 0;
+		}
+		fprintf(stderr, "Buffer ring register failed %d\n", ret);
+		return 1;
+	}
+
+	ptr = buf;
+	io_uring_buf_ring_add(br, buf, 16, 1, BR_MASK, 0);
+	buf += 16;
+	io_uring_buf_ring_add(br, buf, 32, 2, BR_MASK, 1);
+	buf += 32;
+	io_uring_buf_ring_add(br, buf, 32, 3, BR_MASK, 2);
+	buf += 32;
+	io_uring_buf_ring_add(br, buf, 32, 4, BR_MASK, 3);
+	buf += 32;
+	io_uring_buf_ring_advance(br, 4);
+
+	memset(tmp, 0xaa, sizeof(tmp));
+
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_read_multishot(sqe, fds[0], 0, 0, BUF_BGID);
+
+	ret = io_uring_submit(&ring);
+	if (ret != 1) {
+		fprintf(stderr, "submit: %d\n", ret);
+		return 1;
+	}
+
+	/* prevent pipe buffer merging */
+	usleep(1000);
+	ret = write(fds[1], tmp, 16);
+
+	usleep(1000);
+	ret = write(fds[1], tmp, sizeof(tmp));
+
+	/* prevent pipe buffer merging */
+	usleep(1000);
+	ret = write(fds[1], tmp, 16);
+
+	usleep(1000);
+	ret = write(fds[1], tmp, sizeof(tmp));
+
+	/*
+	 * We should see a 16 byte completion, then a 32 byte, then a 16 byte,
+	 * and finally a 32 byte again.
+	 */
+	for (i = 0; i < 4; i++) {
+		ret = io_uring_wait_cqe(&ring, &cqe);
+		if (ret) {
+			fprintf(stderr, "wait cqe failed %d\n", ret);
+			return 1;
+		}
+		if (cqe->res < 0) {
+			fprintf(stderr, "cqe res: %d\n", cqe->res);
+			return 1;
+		}
+		if (!(cqe->flags & IORING_CQE_F_MORE)) {
+			fprintf(stderr, "no more cqes\n");
+			return 1;
+		}
+		if (i == 0 || i == 2) {
+			if (cqe->res != 16) {
+				fprintf(stderr, "%d cqe got %d\n", i, cqe->res);
+				return 1;
+			}
+		} else if (i == 1 || i == 3) {
+			if (cqe->res != 32) {
+				fprintf(stderr, "%d cqe got %d\n", i, cqe->res);
+				return 1;
+			}
+		}
+		io_uring_cqe_seen(&ring, cqe);
+	}
+
+	io_uring_free_buf_ring(&ring, br, NR_BUFS, BUF_BGID);
+	io_uring_queue_exit(&ring);
+	free(ptr);
+	return 0;
+}
+
+static int test(int first_good, int async, int overflow, int incremental)
+{
+	struct io_uring_buf_ring *br;
+	struct io_uring_params p = { };
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct io_uring ring;
+	int ret, fds[2], i, start_msg = 0;
+	int br_flags = 0;
+	char tmp[32];
+	void *ptr[NR_BUFS];
+	char *inc_index;
+
+	p.flags = IORING_SETUP_CQSIZE;
+	if (!overflow)
+		p.cq_entries = NR_BUFS + 1;
+	else
+		p.cq_entries = NR_OVERFLOW;
+	ret = io_uring_queue_init_params(1, &ring, &p);
+	if (ret) {
+		fprintf(stderr, "ring setup failed: %d\n", ret);
+		return 1;
+	}
+
+	if (incremental) {
+		if (no_buf_ring_inc)
+			return 0;
+		br_flags |= IOU_PBUF_RING_INC;
+	}
+
+	br = io_uring_setup_buf_ring(&ring, NR_BUFS, BUF_BGID, br_flags, &ret);
+	if (!br) {
+		if (ret == -EINVAL) {
+			if (incremental) {
+				no_buf_ring_inc = 1;
+				return 0;
+			}
+			no_buf_ring = 1;
+			return 0;
+		}
+		fprintf(stderr, "Buffer ring register failed %d\n", ret);
+		return 1;
+	}
+
+	if (pipe(fds) < 0) {
+		perror("pipe");
+		return 1;
+	}
+
+	if (!incremental) {
+		for (i = 0; i < NR_BUFS; i++) {
+			unsigned size = i <= 1 ? BUF_SIZE_FIRST : BUF_SIZE;
+			ptr[i] = malloc(size);
+			if (!ptr[i])
+				return 1;
+			io_uring_buf_ring_add(br, ptr[i], size, i + 1, BR_MASK, i);
+		}
+		inc_index = NULL;
+		io_uring_buf_ring_advance(br, NR_BUFS);
+	} else {
+		inc_index = ptr[0] = malloc(NR_BUFS * BUF_SIZE);
+		memset(inc_index, 0, NR_BUFS * BUF_SIZE);
+		io_uring_buf_ring_add(br, ptr[0], NR_BUFS * BUF_SIZE, 1, BR_MASK, 0);
+		io_uring_buf_ring_advance(br, 1);
+	}
+
+	if (first_good) {
+		sprintf(tmp, "this is buffer %d\n", start_msg++);
+		ret = write(fds[1], tmp, strlen(tmp));
+	}
+
+	sqe = io_uring_get_sqe(&ring);
+	/* len == 0 means just use the defined provided buffer length */
+	io_uring_prep_read_multishot(sqe, fds[0], 0, 0, BUF_BGID);
+	if (async)
+		sqe->flags |= IOSQE_ASYNC;
+
+	ret = io_uring_submit(&ring);
+	if (ret != 1) {
+		fprintf(stderr, "submit: %d\n", ret);
+		return 1;
+	}
+
+	/* write NR_BUFS + 1, or if first_good is set, NR_BUFS */
+	for (i = 0; i < NR_BUFS + !first_good; i++) {
+		/* prevent pipe buffer merging */
+		usleep(1000);
+		sprintf(tmp, "this is buffer %d\n", i + start_msg);
+		ret = write(fds[1], tmp, strlen(tmp));
+		if (ret != strlen(tmp)) {
+			fprintf(stderr, "write ret %d\n", ret);
+			return 1;
+		}
+	}
+
+	for (i = 0; i < NR_BUFS + 1; i++) {
+		int bid;
+
+		ret = io_uring_wait_cqe(&ring, &cqe);
+		if (ret) {
+			fprintf(stderr, "wait cqe failed %d\n", ret);
+			return 1;
+		}
+		if (cqe->res < 0) {
+			/* expected failure as we try to read one too many */
+			if (cqe->res == -ENOBUFS && i == NR_BUFS)
+				break;
+			if (!i && cqe->res == -EINVAL) {
+				no_read_mshot = 1;
+				break;
+			}
+			fprintf(stderr, "%d: cqe res %d\n", i, cqe->res);
+			return 1;
+		} else if (i > 9 && cqe->res <= 17) {
+			fprintf(stderr, "truncated message %d %d\n", i, cqe->res);
+			return 1;
+		}
+
+		if (!(cqe->flags & IORING_CQE_F_BUFFER)) {
+			fprintf(stderr, "no buffer selected\n");
+			return 1;
+		}
+		bid = cqe->flags >> IORING_CQE_BUFFER_SHIFT;
+		if (incremental && bid != 1) {
+			fprintf(stderr, "bid %d for incremental\n", bid);
+			return 1;
+		}
+		if (incremental && !first_good) {
+			char out_buf[64];
+			sprintf(out_buf, "this is buffer %d\n", i + start_msg);
+			if (strncmp(inc_index, out_buf, strlen(out_buf)))
+				return 1;
+			inc_index += cqe->res;
+		}
+		if (!(cqe->flags & IORING_CQE_F_MORE)) {
+			/* we expect this on overflow */
+			if (overflow && i >= NR_OVERFLOW)
+				break;
+			fprintf(stderr, "no more cqes\n");
+			return 1;
+		}
+		/* should've overflown! */
+		if (overflow && i > NR_OVERFLOW) {
+			fprintf(stderr, "Expected overflow!\n");
+			return 1;
+		}
+		io_uring_cqe_seen(&ring, cqe);
+	}
+
+
+	io_uring_free_buf_ring(&ring, br, NR_BUFS, BUF_BGID);
+	io_uring_queue_exit(&ring);
+	if (incremental) {
+		free(ptr[0]);
+	} else {
+		for (i = 0; i < NR_BUFS; i++)
+			free(ptr[i]);
+	}
+	return 0;
+}
+
+static int test_invalid(int async)
+{
+	struct io_uring_buf_ring *br;
+	struct io_uring_params p = { };
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct io_uring ring;
+	char fname[32] = ".mshot.%d.XXXXXX";
+	int ret, fd;
+	char *buf;
+
+	p.flags = IORING_SETUP_CQSIZE;
+	p.cq_entries = NR_BUFS;
+	ret = io_uring_queue_init_params(1, &ring, &p);
+	if (ret) {
+		fprintf(stderr, "ring setup failed: %d\n", ret);
+		return 1;
+	}
+
+	fd = mkstemp(fname);
+	if (fd < 0) {
+		perror("mkstemp");
+		return 1;
+	}
+	unlink(fname);
+
+	if (posix_memalign((void **) &buf, 4096, BUF_SIZE))
+		return 1;
+
+	br = io_uring_setup_buf_ring(&ring, 1, BUF_BGID, 0, &ret);
+	if (!br) {
+		fprintf(stderr, "Buffer ring register failed %d\n", ret);
+		return 1;
+	}
+
+	io_uring_buf_ring_add(br, buf, BUF_SIZE, 1, BR_MASK, 0);
+	io_uring_buf_ring_advance(br, 1);
+
+	sqe = io_uring_get_sqe(&ring);
+	/* len == 0 means just use the defined provided buffer length */
+	io_uring_prep_read_multishot(sqe, fd, 0, 0, BUF_BGID);
+	if (async)
+		sqe->flags |= IOSQE_ASYNC;
+
+	ret = io_uring_submit(&ring);
+	if (ret != 1) {
+		fprintf(stderr, "submit: %d\n", ret);
+		return 1;
+	}
+
+	ret = io_uring_wait_cqe(&ring, &cqe);
+	if (ret) {
+		fprintf(stderr, "wait cqe failed %d\n", ret);
+		return 1;
+	}
+	if (cqe->flags & IORING_CQE_F_MORE) {
+		fprintf(stderr, "MORE flag set unexpected %d\n", cqe->flags);
+		return 1;
+	}
+	if (cqe->res != -EBADFD) {
+		fprintf(stderr, "Got cqe res %d, wanted -EBADFD\n", cqe->res);
+		return 1;
+	}
+
+	io_uring_cqe_seen(&ring, cqe);
+	io_uring_free_buf_ring(&ring, br, 1, BUF_BGID);
+	io_uring_queue_exit(&ring);
+	free(buf);
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	int ret;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	ret = test(0, 0, 0, 0);
+	if (ret) {
+		fprintf(stderr, "test 0 0 0 failed\n");
+		return T_EXIT_FAIL;
+	}
+	if (no_buf_ring || no_read_mshot) {
+		printf("skip\n");
+		return T_EXIT_SKIP;
+	}
+
+	ret = test(0, 1, 0, 0);
+	if (ret) {
+		fprintf(stderr, "test 0 1 0, failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test(1, 0, 0, 0);
+	if (ret) {
+		fprintf(stderr, "test 1 0 0 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test(0, 0, 1, 0);
+	if (ret) {
+		fprintf(stderr, "test 0 0 1 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test(0, 1, 1, 0);
+	if (ret) {
+		fprintf(stderr, "test 0 1 1 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test(1, 0, 1, 0);
+	if (ret) {
+		fprintf(stderr, "test 1 0 1, failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test(1, 0, 1, 0);
+	if (ret) {
+		fprintf(stderr, "test 1 0 1 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test(1, 1, 1, 0);
+	if (ret) {
+		fprintf(stderr, "test 1 1 1 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test(0, 0, 0, 1);
+	if (ret) {
+		fprintf(stderr, "test 0 0 0 1 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test(0, 0, 1, 1);
+	if (ret) {
+		fprintf(stderr, "test 0 0 1 1 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test(0, 1, 0, 1);
+	if (ret) {
+		fprintf(stderr, "test 0 1 0 1 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test(0, 1, 1, 1);
+	if (ret) {
+		fprintf(stderr, "test 0 1 1 1 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test(1, 0, 0, 1);
+	if (ret) {
+		fprintf(stderr, "test 1 0 0 1 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test(1, 0, 1, 1);
+	if (ret) {
+		fprintf(stderr, "test 1 0 1 1 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test(1, 1, 0, 1);
+	if (ret) {
+		fprintf(stderr, "test 1 1 0 1 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test(1, 1, 1, 1);
+	if (ret) {
+		fprintf(stderr, "test 1 1 1 1 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_invalid(0);
+	if (ret) {
+		fprintf(stderr, "test_invalid 0 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_invalid(1);
+	if (ret) {
+		fprintf(stderr, "test_invalid 1 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_clamp();
+	if (ret) {
+		fprintf(stderr, "test_clamp failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_inc(0, 0);
+	if (ret) {
+		fprintf(stderr, "test_inc 0 0 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_inc(0, IORING_SETUP_SQPOLL);
+	if (ret) {
+		fprintf(stderr, "test_inc 0 sqpoll failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_inc(0, IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN);
+	if (ret) {
+		fprintf(stderr, "test_inc 0 defer failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_inc(1, 0);
+	if (ret) {
+		fprintf(stderr, "test_inc 1 0 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_inc(1, IORING_SETUP_SQPOLL);
+	if (ret) {
+		fprintf(stderr, "test_inc 1 sqpoll failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_inc(1, IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN);
+	if (ret) {
+		fprintf(stderr, "test_inc 1 defer failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	return T_EXIT_PASS;
+}

diff --git a/test/read-write.c b/test/read-write.c
index 3951a64..23f1d58 100644
--- a/test/read-write.c
+++ b/test/read-write.c

@@ -15,6 +15,7 @@
 
 #include "helpers.h"
 #include "liburing.h"
+#include "../src/syscall.h"
 
 #define FILE_SIZE	(256 * 1024)
 #define BS		8192
@@ -23,6 +24,7 @@
 static struct iovec *vecs;
 static int no_read;
 static int no_buf_select;
+static int no_buf_copy;
 static int warned;
 
 static int create_nonaligned_buffers(void)
@@ -42,9 +44,9 @@
 	return 0;
 }
 
-static int __test_io(const char *file, struct io_uring *ring, int write,
-		     int buffered, int sqthread, int fixed, int nonvec,
-		     int buf_select, int seq, int exp_len)
+static int _test_io(const char *file, struct io_uring *ring, int write,
+		    int buffered, int sqthread, int fixed, int nonvec,
+		    int buf_select, int seq, int exp_len)
 {
 	struct io_uring_sqe *sqe;
 	struct io_uring_cqe *cqe;
@@ -64,7 +66,7 @@
 	if (!buffered)
 		open_flags |= O_DIRECT;
 
-	if (fixed) {
+	if (fixed == 1) {
 		ret = t_register_buffers(ring, vecs, BUFFERS);
 		if (ret == T_SETUP_SKIP)
 			return 0;
@@ -76,6 +78,8 @@
 
 	fd = open(file, open_flags);
 	if (fd < 0) {
+		if (errno == EINVAL || errno == EPERM || errno == EACCES)
+			return 0;
 		perror("file open");
 		goto err;
 	}
@@ -199,13 +203,6 @@
 		io_uring_cqe_seen(ring, cqe);
 	}
 
-	if (fixed) {
-		ret = io_uring_unregister_buffers(ring);
-		if (ret) {
-			fprintf(stderr, "buffer unreg failed: %d\n", ret);
-			goto err;
-		}
-	}
 	if (sqthread) {
 		ret = io_uring_unregister_files(ring);
 		if (ret) {
@@ -227,6 +224,65 @@
 		close(fd);
 	return 1;
 }
+
+static int __test_io(const char *file, struct io_uring *ring, int write,
+		     int buffered, int sqthread, int fixed, int nonvec,
+		     int buf_select, int seq, int exp_len)
+{
+	int ret;
+
+	ret = _test_io(file, ring, write, buffered, sqthread, fixed, nonvec,
+		       buf_select, seq, exp_len);
+	if (ret)
+		return ret;
+
+	if (fixed) {
+		struct io_uring ring2;
+		int ring_flags = 0;
+
+		if (no_buf_copy)
+			return 0;
+		if (sqthread)
+			ring_flags = IORING_SETUP_SQPOLL;
+		ret = t_create_ring(64, &ring2, ring_flags);
+		if (ret == T_SETUP_SKIP)
+			return 0;
+		if (ret != T_SETUP_OK) {
+			fprintf(stderr, "ring create failed: %d\n", ret);
+			return 1;
+		}
+
+		ret = io_uring_clone_buffers(&ring2, ring);
+		if (ret) {
+			if (ret == -EINVAL) {
+				no_buf_copy = 1;
+				io_uring_queue_exit(&ring2);
+				return 0;
+			}
+			fprintf(stderr, "copy buffers: %d\n", ret);
+			return ret;
+		}
+		ret = _test_io(file, &ring2, write, buffered, sqthread, 2,
+			       nonvec, buf_select, seq, exp_len);
+		if (ret)
+			return ret;
+
+		ret = io_uring_unregister_buffers(ring);
+		if (ret) {
+			fprintf(stderr, "buffer unreg failed: %d\n", ret);
+			return ret;
+		}
+		ret = io_uring_unregister_buffers(&ring2);
+		if (ret) {
+			fprintf(stderr, "buffer copy unreg failed: %d\n", ret);
+			return ret;
+		}
+		io_uring_queue_exit(&ring2);
+	}
+
+	return ret;
+}
+
 static int test_io(const char *file, int write, int buffered, int sqthread,
 		   int fixed, int nonvec, int exp_len)
 {
@@ -264,6 +320,8 @@
 
 	fd = open(file, O_WRONLY);
 	if (fd < 0) {
+		if (errno == EACCES || errno == EPERM)
+			return T_EXIT_SKIP;
 		perror("open");
 		return 1;
 	}
@@ -336,6 +394,7 @@
 	if (!(p->ops[IORING_OP_READ].flags & IO_URING_OP_SUPPORTED))
 		goto out;
 	io_uring_queue_exit(&ring);
+	free(p);
 	return 1;
 }
 
@@ -462,6 +521,79 @@
 	return 0;
 }
 
+static int test_buf_select_pipe(void)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct io_uring ring;
+	int ret, i;
+	int fds[2];
+
+	if (no_buf_select)
+		return 0;
+
+	ret = io_uring_queue_init(64, &ring, 0);
+	if (ret) {
+		fprintf(stderr, "ring create failed: %d\n", ret);
+		return 1;
+	}
+
+	ret = provide_buffers_iovec(&ring, 0);
+	if (ret) {
+		fprintf(stderr, "provide buffers failed: %d\n", ret);
+		return 1;
+	}
+
+	ret = pipe(fds);
+	if (ret) {
+		fprintf(stderr, "pipe failed: %d\n", ret);
+		return 1;
+	}
+
+	for (i = 0; i < 5; i++) {
+		sqe = io_uring_get_sqe(&ring);
+		io_uring_prep_read(sqe, fds[0], NULL, 1 /* max read 1 per go */, -1);
+		sqe->flags |= IOSQE_BUFFER_SELECT;
+		sqe->buf_group = 0;
+	}
+	io_uring_submit(&ring);
+
+	ret = write(fds[1], "01234", 5);
+	if (ret != 5) {
+		fprintf(stderr, "pipe write failed %d\n", ret);
+		return 1;
+	}
+
+	for (i = 0; i < 5; i++) {
+		const char *buff;
+
+		if (io_uring_wait_cqe(&ring, &cqe)) {
+			fprintf(stderr, "bad wait %d\n", i);
+			return 1;
+		}
+		if (cqe->res != 1) {
+			fprintf(stderr, "expected read %d\n", cqe->res);
+			return 1;
+		}
+		if (!(cqe->flags & IORING_CQE_F_BUFFER)) {
+			fprintf(stderr, "no buffer %d\n", cqe->res);
+			return 1;
+		}
+		buff = vecs[cqe->flags >> 16].iov_base;
+		if (*buff != '0' + i) {
+			fprintf(stderr, "%d: expected %c, got %c\n", i, '0' + i, *buff);
+			return 1;
+		}
+		io_uring_cqe_seen(&ring, cqe);
+	}
+
+
+	close(fds[0]);
+	close(fds[1]);
+	io_uring_queue_exit(&ring);
+	return 0;
+}
+
 static int test_buf_select(const char *filename, int nonvec)
 {
 	struct io_uring_probe *p;
@@ -562,6 +694,53 @@
 	return ret;
 }
 
+static int test_rem_buf_single(int to_rem)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct io_uring ring;
+	int ret, expected;
+	int bgid = 1;
+
+	if (no_buf_select)
+		return 0;
+
+	ret = io_uring_queue_init(64, &ring, 0);
+	if (ret) {
+		fprintf(stderr, "ring create failed: %d\n", ret);
+		return 1;
+	}
+
+	ret = provide_buffers_iovec(&ring, bgid);
+	if (ret)
+		return ret;
+
+	expected = (to_rem > BUFFERS) ? BUFFERS : to_rem;
+
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_remove_buffers(sqe, to_rem, bgid);
+
+	ret = io_uring_submit(&ring);
+	if (ret != 1) {
+		fprintf(stderr, "submit: %d\n", ret);
+		return -1;
+	}
+
+	ret = io_uring_wait_cqe(&ring, &cqe);
+	if (ret) {
+		fprintf(stderr, "wait_cqe=%d\n", ret);
+		return 1;
+	}
+	if (cqe->res != expected) {
+		fprintf(stderr, "cqe->res=%d, expected=%d\n", cqe->res, expected);
+		return 1;
+	}
+	io_uring_cqe_seen(&ring, cqe);
+
+	io_uring_queue_exit(&ring);
+	return ret;
+}
+
 static int test_io_link(const char *file)
 {
 	const int nr_links = 100;
@@ -574,6 +753,8 @@
 
 	fd = open(file, O_WRONLY);
 	if (fd < 0) {
+		if (errno == EPERM || errno == EACCES)
+			return 0;
 		perror("file open");
 		goto err;
 	}
@@ -686,6 +867,7 @@
 			goto err;
 		}
 		io_uring_prep_writev(sqe, fd, &vecs[i], 1, off);
+		io_uring_sqe_set_data64(sqe, i);
 		off += BS;
 	}
 
@@ -701,7 +883,7 @@
 			fprintf(stderr, "wait_cqe=%d\n", ret);
 			goto err;
 		}
-		if (i < 16) {
+		if (cqe->user_data < 16) {
 			if (cqe->res != BS) {
 				fprintf(stderr, "bad write: %d\n", cqe->res);
 				goto err;
@@ -746,6 +928,8 @@
 		t_create_file(fname, FILE_SIZE);
 	}
 
+	signal(SIGXFSZ, SIG_IGN);
+
 	vecs = t_create_buffers(BUFFERS, BS);
 
 	/* if we don't have nonvec read, skip testing that */
@@ -791,6 +975,12 @@
 		goto err;
 	}
 
+	ret = test_buf_select_pipe();
+	if (ret) {
+		fprintf(stderr, "test_buf_select_pipe failed\n");
+		goto err;
+	}
+
 	ret = test_eventfd_read();
 	if (ret) {
 		fprintf(stderr, "test_eventfd_read failed\n");
@@ -798,7 +988,7 @@
 	}
 
 	ret = read_poll_link(fname);
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "read_poll_link failed\n");
 		goto err;
 	}
@@ -839,6 +1029,12 @@
 		goto err;
 	}
 
+	if(vecs != NULL) {
+		for (i = 0; i < BUFFERS; i++)
+			free(vecs[i].iov_base);
+	}
+	free(vecs);
+
 	srand((unsigned)time(NULL));
 	if (create_nonaligned_buffers()) {
 		fprintf(stderr, "file creation failed\n");
@@ -866,6 +1062,12 @@
 		}
 	}
 
+	ret = test_rem_buf_single(BUFFERS + 1);
+	if (ret) {
+		fprintf(stderr, "test_rem_buf_single(BUFFERS + 1) failed\n");
+		goto err;
+	}
+
 	if (fname != argv[1])
 		unlink(fname);
 	return 0;

diff --git a/test/recv-msgall-stream.c b/test/recv-msgall-stream.c
index a188cc1..ff9fd2a 100644
--- a/test/recv-msgall-stream.c
+++ b/test/recv-msgall-stream.c

@@ -3,6 +3,7 @@
  * Test MSG_WAITALL for recv/recvmsg and include normal sync versions just
  * for comparison.
  */
+#include <assert.h>
 #include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -19,13 +20,11 @@
 
 #define MAX_MSG	128
 
-static int port = 31200;
-
 struct recv_data {
 	pthread_mutex_t mutex;
 	int use_recvmsg;
 	int use_sync;
-	int port;
+	__be16 port;
 };
 
 static int get_conn_sock(struct recv_data *rd, int *sockout)
@@ -36,7 +35,6 @@
 	memset(&saddr, 0, sizeof(saddr));
 	saddr.sin_family = AF_INET;
 	saddr.sin_addr.s_addr = htonl(INADDR_ANY);
-	saddr.sin_port = htons(rd->port);
 
 	sockfd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, IPPROTO_TCP);
 	if (sockfd < 0) {
@@ -48,11 +46,11 @@
 	setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val));
 	setsockopt(sockfd, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val));
 
-	ret = bind(sockfd, (struct sockaddr *)&saddr, sizeof(saddr));
-	if (ret < 0) {
+	if (t_bind_ephemeral_port(sockfd, &saddr)) {
 		perror("bind");
 		goto err;
 	}
+	rd->port = saddr.sin_port;
 
 	ret = listen(sockfd, 16);
 	if (ret < 0) {
@@ -279,11 +277,6 @@
 	for (i = 0; i < MAX_MSG; i++)
 		buf[i] = i;
 
-	memset(&saddr, 0, sizeof(saddr));
-	saddr.sin_family = AF_INET;
-	saddr.sin_port = htons(rd->port);
-	inet_pton(AF_INET, "127.0.0.1", &saddr.sin_addr);
-
 	sockfd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, IPPROTO_TCP);
 	if (sockfd < 0) {
 		perror("socket");
@@ -291,6 +284,11 @@
 	}
 
 	pthread_mutex_lock(&rd->mutex);
+	assert(rd->port != 0);
+	memset(&saddr, 0, sizeof(saddr));
+	saddr.sin_family = AF_INET;
+	saddr.sin_port = rd->port;
+	inet_pton(AF_INET, "127.0.0.1", &saddr.sin_addr);
 
 	ret = connect(sockfd, (struct sockaddr *)&saddr, sizeof(saddr));
 	if (ret < 0) {
@@ -319,6 +317,7 @@
 		if (cqe->res == -EINVAL) {
 			fprintf(stdout, "send not supported, skipping\n");
 			close(sockfd);
+			free(buf);
 			return 0;
 		}
 		if (cqe->res != iov.iov_len) {
@@ -330,10 +329,12 @@
 
 	shutdown(sockfd, SHUT_RDWR);
 	close(sockfd);
+	free(buf);
 	return 0;
 err:
 	shutdown(sockfd, SHUT_RDWR);
 	close(sockfd);
+	free(buf);
 	return 1;
 }
 
@@ -351,7 +352,7 @@
 	pthread_mutex_lock(&rd.mutex);
 	rd.use_recvmsg = use_recvmsg;
 	rd.use_sync = use_sync;
-	rd.port = port++;
+	rd.port = 0;
 
 	ret = pthread_create(&recv_thread, NULL, recv_fn, &rd);
 	if (ret) {

diff --git a/test/recv-msgall.c b/test/recv-msgall.c
index a6f7cfc..0461c15 100644
--- a/test/recv-msgall.c
+++ b/test/recv-msgall.c

@@ -16,22 +16,24 @@
 #include "helpers.h"
 
 #define MAX_MSG	128
-
-#define PORT	10201
 #define HOST	"127.0.0.1"
+static __be16 bind_port;
+struct recv_data {
+	pthread_barrier_t barrier;
+	int use_recvmsg;
+	struct msghdr msg;
+};
 
 static int recv_prep(struct io_uring *ring, struct iovec *iov, int *sock,
-		     int use_recvmsg)
+		     struct recv_data *rd)
 {
 	struct sockaddr_in saddr;
 	struct io_uring_sqe *sqe;
 	int sockfd, ret, val;
-	struct msghdr msg = { };
 
 	memset(&saddr, 0, sizeof(saddr));
 	saddr.sin_family = AF_INET;
 	saddr.sin_addr.s_addr = htonl(INADDR_ANY);
-	saddr.sin_port = htons(PORT);
 
 	sockfd = socket(AF_INET, SOCK_DGRAM, 0);
 	if (sockfd < 0) {
@@ -42,21 +44,24 @@
 	val = 1;
 	setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val));
 
-	ret = bind(sockfd, (struct sockaddr *)&saddr, sizeof(saddr));
-	if (ret < 0) {
+	if (t_bind_ephemeral_port(sockfd, &saddr)) {
 		perror("bind");
 		goto err;
 	}
+	bind_port = saddr.sin_port;
 
 	sqe = io_uring_get_sqe(ring);
-	if (!use_recvmsg) {
+	if (!rd->use_recvmsg) {
 		io_uring_prep_recv(sqe, sockfd, iov->iov_base, iov->iov_len,
 					MSG_WAITALL);
 	} else {
-		msg.msg_namelen = sizeof(struct sockaddr_in);
-		msg.msg_iov = iov;
-		msg.msg_iovlen = 1;
-		io_uring_prep_recvmsg(sqe, sockfd, &msg, MSG_WAITALL);
+		struct msghdr *msg = &rd->msg;
+
+		memset(msg, 0, sizeof(*msg));
+		msg->msg_namelen = sizeof(struct sockaddr_in);
+		msg->msg_iov = iov;
+		msg->msg_iovlen = 1;
+		io_uring_prep_recvmsg(sqe, sockfd, msg, MSG_WAITALL);
 	}
 
 	sqe->user_data = 2;
@@ -103,11 +108,6 @@
 	return 1;
 }
 
-struct recv_data {
-	pthread_mutex_t mutex;
-	int use_recvmsg;
-};
-
 static void *recv_fn(void *data)
 {
 	struct recv_data *rd = data;
@@ -122,20 +122,20 @@
 
 	ret = t_create_ring_params(1, &ring, &p);
 	if (ret == T_SETUP_SKIP) {
-		pthread_mutex_unlock(&rd->mutex);
+		pthread_barrier_wait(&rd->barrier);
 		ret = 0;
 		goto err;
 	} else if (ret < 0) {
-		pthread_mutex_unlock(&rd->mutex);
+		pthread_barrier_wait(&rd->barrier);
 		goto err;
 	}
 
-	ret = recv_prep(&ring, &iov, &sock, rd->use_recvmsg);
+	ret = recv_prep(&ring, &iov, &sock, rd);
 	if (ret) {
 		fprintf(stderr, "recv_prep failed: %d\n", ret);
 		goto err;
 	}
-	pthread_mutex_unlock(&rd->mutex);
+	pthread_barrier_wait(&rd->barrier);
 	ret = do_recv(&ring);
 	close(sock);
 	io_uring_queue_exit(&ring);
@@ -165,18 +165,20 @@
 
 	memset(&saddr, 0, sizeof(saddr));
 	saddr.sin_family = AF_INET;
-	saddr.sin_port = htons(PORT);
+	saddr.sin_port = bind_port;
 	inet_pton(AF_INET, HOST, &saddr.sin_addr);
 
 	sockfd = socket(AF_INET, SOCK_DGRAM, 0);
 	if (sockfd < 0) {
 		perror("socket");
+		free(buf);
 		return 1;
 	}
 
 	ret = connect(sockfd, (struct sockaddr *)&saddr, sizeof(saddr));
 	if (ret < 0) {
 		perror("connect");
+		free(buf);
 		return 1;
 	}
 
@@ -201,6 +203,7 @@
 		if (cqe->res == -EINVAL) {
 			fprintf(stdout, "send not supported, skipping\n");
 			close(sockfd);
+			free(buf);
 			return 0;
 		}
 		if (cqe->res != iov.iov_len) {
@@ -211,36 +214,34 @@
 	}
 
 	close(sockfd);
+	free(buf);
 	return 0;
 err:
 	close(sockfd);
+	free(buf);
 	return 1;
 }
 
 static int test(int use_recvmsg)
 {
-	pthread_mutexattr_t attr;
 	pthread_t recv_thread;
 	struct recv_data rd;
 	int ret;
 	void *retval;
 
-	pthread_mutexattr_init(&attr);
-	pthread_mutexattr_setpshared(&attr, 1);
-	pthread_mutex_init(&rd.mutex, &attr);
-	pthread_mutex_lock(&rd.mutex);
+	pthread_barrier_init(&rd.barrier, NULL, 2);
 	rd.use_recvmsg = use_recvmsg;
 
 	ret = pthread_create(&recv_thread, NULL, recv_fn, &rd);
 	if (ret) {
 		fprintf(stderr, "Thread create failed: %d\n", ret);
-		pthread_mutex_unlock(&rd.mutex);
 		return 1;
 	}
 
-	pthread_mutex_lock(&rd.mutex);
+	pthread_barrier_wait(&rd.barrier);
 	do_send();
 	pthread_join(recv_thread, &retval);
+	pthread_barrier_destroy(&rd.barrier);
 	return (intptr_t)retval;
 }
 

diff --git a/test/recv-multishot.c b/test/recv-multishot.c
new file mode 100644
index 0000000..39983e8
--- /dev/null
+++ b/test/recv-multishot.c

@@ -0,0 +1,602 @@
+// SPDX-License-Identifier: MIT
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <pthread.h>
+#include <assert.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+#define ENORECVMULTISHOT 9999
+
+enum early_error_t {
+	ERROR_NONE  = 0,
+	ERROR_NOT_ENOUGH_BUFFERS,
+	ERROR_EARLY_CLOSE_SENDER,
+	ERROR_EARLY_CLOSE_RECEIVER,
+	ERROR_EARLY_OVERFLOW,
+	ERROR_EARLY_LAST
+};
+
+struct args {
+	bool stream;
+	bool wait_each;
+	bool recvmsg;
+	enum early_error_t early_error;
+	bool defer;
+};
+
+static int check_sockaddr(struct sockaddr_in *in)
+{
+	struct in_addr expected;
+
+	inet_pton(AF_INET, "127.0.0.1", &expected);
+	if (in->sin_family != AF_INET) {
+		fprintf(stderr, "bad family %d\n", (int)htons(in->sin_family));
+		return -1;
+	}
+	if (memcmp(&expected, &in->sin_addr, sizeof(in->sin_addr))) {
+		char buff[256];
+		const char *addr = inet_ntop(AF_INET, &in->sin_addr, buff, sizeof(buff));
+
+		fprintf(stderr, "unexpected address %s\n", addr ? addr : "INVALID");
+		return -1;
+	}
+	return 0;
+}
+
+static int test(struct args *args)
+{
+	int const N = 8;
+	int const N_BUFFS = N * 64;
+	int const N_CQE_OVERFLOW = 4;
+	int const min_cqes = args->early_error ? 2 : 8;
+	int const NAME_LEN = sizeof(struct sockaddr_storage);
+	int const CONTROL_LEN = CMSG_ALIGN(sizeof(struct sockaddr_storage))
+					+ sizeof(struct cmsghdr);
+	struct io_uring ring;
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	int fds[2], ret, i, j;
+	int total_sent_bytes = 0, total_recv_bytes = 0, total_dropped_bytes = 0;
+	int send_buff[256];
+	int *sent_buffs[N_BUFFS];
+	int *recv_buffs[N_BUFFS];
+	int *at;
+	struct io_uring_cqe recv_cqe[N_BUFFS];
+	int recv_cqes = 0;
+	bool early_error = false;
+	bool early_error_started = false;
+	struct __kernel_timespec timeout = {
+		.tv_sec = 1,
+	};
+	struct msghdr msg;
+	struct io_uring_params params = { };
+	int n_sqe = 32;
+
+	memset(recv_buffs, 0, sizeof(recv_buffs));
+
+	if (args->defer)
+		params.flags |= IORING_SETUP_SINGLE_ISSUER |
+				IORING_SETUP_DEFER_TASKRUN;
+
+	if (args->early_error == ERROR_EARLY_OVERFLOW) {
+		params.flags |= IORING_SETUP_CQSIZE;
+		params.cq_entries = N_CQE_OVERFLOW;
+		n_sqe = N_CQE_OVERFLOW;
+	}
+
+	ret = io_uring_queue_init_params(n_sqe, &ring, &params);
+	if (ret) {
+		fprintf(stderr, "queue init failed: %d\n", ret);
+		return ret;
+	}
+
+	ret = t_create_socket_pair(fds, args->stream);
+	if (ret) {
+		fprintf(stderr, "t_create_socket_pair failed: %d\n", ret);
+		return ret;
+	}
+
+	if (!args->stream) {
+		bool val = true;
+
+		/* force some cmsgs to come back to us */
+		ret = setsockopt(fds[0], IPPROTO_IP, IP_RECVORIGDSTADDR, &val,
+				 sizeof(val));
+		if (ret) {
+			fprintf(stderr, "setsockopt failed %d\n", errno);
+			goto cleanup;
+		}
+	}
+
+	for (i = 0; i < ARRAY_SIZE(send_buff); i++)
+		send_buff[i] = i;
+
+	for (i = 0; i < ARRAY_SIZE(recv_buffs); i++) {
+		/* prepare some different sized buffers */
+		int buffer_size = (i % 2 == 0 && (args->stream || args->recvmsg)) ? 1 : N;
+
+		buffer_size *= sizeof(int);
+		if (args->recvmsg) {
+			buffer_size +=
+				sizeof(struct io_uring_recvmsg_out) +
+				NAME_LEN +
+				CONTROL_LEN;
+		}
+
+		recv_buffs[i] = malloc(buffer_size);
+
+		if (i > 2 && args->early_error == ERROR_NOT_ENOUGH_BUFFERS)
+			continue;
+
+		sqe = io_uring_get_sqe(&ring);
+		io_uring_prep_provide_buffers(sqe, recv_buffs[i],
+					buffer_size, 1, 7, i);
+		io_uring_sqe_set_data64(sqe, 0x999);
+		memset(recv_buffs[i], 0xcc, buffer_size);
+		if (io_uring_submit_and_wait_timeout(&ring, &cqe, 1, &timeout, NULL) < 0) {
+			fprintf(stderr, "provide buffers failed: %d\n", ret);
+			ret = -1;
+			goto cleanup;
+		}
+		io_uring_cqe_seen(&ring, cqe);
+	}
+
+	sqe = io_uring_get_sqe(&ring);
+	if (args->recvmsg) {
+		unsigned int flags = 0;
+
+		if (!args->stream)
+			flags |= MSG_TRUNC;
+
+		memset(&msg, 0, sizeof(msg));
+		msg.msg_namelen = NAME_LEN;
+		msg.msg_controllen = CONTROL_LEN;
+		io_uring_prep_recvmsg_multishot(sqe, fds[0], &msg, flags);
+	} else {
+		io_uring_prep_recv_multishot(sqe, fds[0], NULL, 0, 0);
+	}
+	sqe->flags |= IOSQE_BUFFER_SELECT;
+	sqe->buf_group = 7;
+	io_uring_sqe_set_data64(sqe, 1234);
+	io_uring_submit(&ring);
+
+	at = &send_buff[0];
+	total_sent_bytes = 0;
+	for (i = 0; i < N; i++) {
+		int to_send = sizeof(*at) * (i+1);
+
+		total_sent_bytes += to_send;
+		sent_buffs[i] = at;
+		if (send(fds[1], at, to_send, 0) != to_send) {
+			if (early_error_started)
+				break;
+			fprintf(stderr, "send failed %d\n", errno);
+			ret = -1;
+			goto cleanup;
+		}
+
+		if (i == 2) {
+			if (args->early_error == ERROR_EARLY_CLOSE_RECEIVER) {
+				/* allow previous sends to complete */
+				usleep(1000);
+				io_uring_get_events(&ring);
+
+				sqe = io_uring_get_sqe(&ring);
+				io_uring_prep_recv(sqe, fds[0], NULL, 0, 0);
+				io_uring_prep_cancel64(sqe, 1234, 0);
+				io_uring_sqe_set_data64(sqe, 0x888);
+				sqe->flags |= IOSQE_CQE_SKIP_SUCCESS;
+				io_uring_submit(&ring);
+				early_error_started = true;
+
+				/* allow the cancel to complete */
+				usleep(1000);
+				io_uring_get_events(&ring);
+			}
+			if (args->early_error == ERROR_EARLY_CLOSE_SENDER) {
+				early_error_started = true;
+				shutdown(fds[1], SHUT_RDWR);
+				close(fds[1]);
+			}
+		}
+		at += (i+1);
+
+		if (args->wait_each) {
+			ret = io_uring_wait_cqes(&ring, &cqe, 1, &timeout, NULL);
+			if (ret) {
+				fprintf(stderr, "wait_each failed: %d\n", ret);
+				ret = -1;
+				goto cleanup;
+			}
+			while (io_uring_peek_cqe(&ring, &cqe) == 0) {
+				recv_cqe[recv_cqes++] = *cqe;
+				if (cqe->flags & IORING_CQE_F_MORE) {
+					io_uring_cqe_seen(&ring, cqe);
+				} else {
+					early_error = true;
+					io_uring_cqe_seen(&ring, cqe);
+				}
+			}
+			if (early_error)
+				break;
+		}
+	}
+
+	close(fds[1]);
+
+	/* allow sends to finish */
+	usleep(1000);
+
+	if ((args->stream && !early_error) || recv_cqes < min_cqes) {
+		unsigned int to_wait = 1;
+
+		if (recv_cqes < min_cqes)
+			to_wait = min_cqes - recv_cqes;
+		ret = io_uring_wait_cqes(&ring, &cqe, to_wait, &timeout, NULL);
+		if (ret && ret != -ETIME) {
+			fprintf(stderr, "wait final failed: %d\n", ret);
+			ret = -1;
+			goto cleanup;
+		}
+	}
+
+	while (io_uring_peek_cqe(&ring, &cqe) == 0) {
+		recv_cqe[recv_cqes++] = *cqe;
+		io_uring_cqe_seen(&ring, cqe);
+	}
+
+	ret = -1;
+	at = &send_buff[0];
+	if (recv_cqes < min_cqes) {
+		if (recv_cqes > 0 && recv_cqe[0].res == -EINVAL) {
+			return -ENORECVMULTISHOT;
+		}
+		/* some kernels apparently don't check ->ioprio, skip */
+		ret = -ENORECVMULTISHOT;
+		goto cleanup;
+	}
+	for (i = 0; i < recv_cqes; i++) {
+		cqe = &recv_cqe[i];
+
+		bool const is_last = i == recv_cqes - 1;
+
+		/*
+		 * Older kernels could terminate multishot early due to overflow,
+		 * but later ones will not. So discriminate based on the MORE flag.
+		 */
+		bool const early_last = args->early_error == ERROR_EARLY_OVERFLOW &&
+					!args->wait_each &&
+					i >= N_CQE_OVERFLOW &&
+					!(cqe->flags & IORING_CQE_F_MORE);
+
+		bool const should_be_last =
+			(cqe->res <= 0) ||
+			(args->stream && is_last) ||
+			early_last;
+		int *this_recv;
+		int orig_payload_size = cqe->res;
+
+
+		if (should_be_last) {
+			int used_res = cqe->res;
+
+			if (!is_last) {
+				fprintf(stderr, "not last cqe had error %d\n", i);
+				goto cleanup;
+			}
+
+			switch (args->early_error) {
+			case ERROR_NOT_ENOUGH_BUFFERS:
+				if (cqe->res != -ENOBUFS) {
+					fprintf(stderr,
+						"ERROR_NOT_ENOUGH_BUFFERS: res %d\n", cqe->res);
+					goto cleanup;
+				}
+				break;
+			case ERROR_EARLY_OVERFLOW:
+				if (cqe->res < 0) {
+					fprintf(stderr,
+						"ERROR_EARLY_OVERFLOW: res %d\n", cqe->res);
+					goto cleanup;
+				}
+				break;
+			case ERROR_EARLY_CLOSE_RECEIVER:
+				if (cqe->res != -ECANCELED) {
+					fprintf(stderr,
+						"ERROR_EARLY_CLOSE_RECEIVER: res %d\n", cqe->res);
+					goto cleanup;
+				}
+				break;
+			case ERROR_NONE:
+			case ERROR_EARLY_CLOSE_SENDER:
+				if (args->recvmsg && (cqe->flags & IORING_CQE_F_BUFFER)) {
+					void *buff = recv_buffs[cqe->flags >> 16];
+					struct io_uring_recvmsg_out *o =
+						io_uring_recvmsg_validate(buff, cqe->res, &msg);
+
+					if (!o) {
+						fprintf(stderr, "invalid buff\n");
+						goto cleanup;
+					}
+					if (o->payloadlen != 0) {
+						fprintf(stderr, "expected 0 payloadlen, got %u\n",
+							o->payloadlen);
+						goto cleanup;
+					}
+					used_res = 0;
+				} else if (cqe->res != 0) {
+					fprintf(stderr, "early error: res %d\n", cqe->res);
+					goto cleanup;
+				}
+				break;
+			case ERROR_EARLY_LAST:
+				fprintf(stderr, "bad error_early\n");
+				goto cleanup;
+			}
+
+			if (cqe->res <= 0 && cqe->flags & IORING_CQE_F_BUFFER) {
+				fprintf(stderr, "final BUFFER flag set\n");
+				goto cleanup;
+			}
+
+			if (cqe->flags & IORING_CQE_F_MORE) {
+				fprintf(stderr, "final MORE flag set\n");
+				goto cleanup;
+			}
+
+			if (used_res <= 0)
+				continue;
+		} else {
+			if (!(cqe->flags & IORING_CQE_F_MORE)) {
+				fprintf(stderr, "MORE flag not set\n");
+				goto cleanup;
+			}
+		}
+
+		if (!(cqe->flags & IORING_CQE_F_BUFFER)) {
+			fprintf(stderr, "BUFFER flag not set\n");
+			goto cleanup;
+		}
+
+		this_recv = recv_buffs[cqe->flags >> 16];
+
+		if (args->recvmsg) {
+			struct io_uring_recvmsg_out *o = io_uring_recvmsg_validate(
+				this_recv, cqe->res, &msg);
+
+			if (!o) {
+				fprintf(stderr, "bad recvmsg\n");
+				goto cleanup;
+			}
+			orig_payload_size = o->payloadlen;
+
+			if (!args->stream) {
+				orig_payload_size = o->payloadlen;
+
+				struct cmsghdr *cmsg;
+
+				if (o->namelen < sizeof(struct sockaddr_in)) {
+					fprintf(stderr, "bad addr len %d",
+						o->namelen);
+					goto cleanup;
+				}
+				if (check_sockaddr((struct sockaddr_in *)io_uring_recvmsg_name(o)))
+					goto cleanup;
+
+				cmsg = io_uring_recvmsg_cmsg_firsthdr(o, &msg);
+				if (!cmsg ||
+				    cmsg->cmsg_level != IPPROTO_IP ||
+				    cmsg->cmsg_type != IP_RECVORIGDSTADDR) {
+					fprintf(stderr, "bad cmsg");
+					goto cleanup;
+				}
+				if (check_sockaddr((struct sockaddr_in *)CMSG_DATA(cmsg)))
+					goto cleanup;
+				cmsg = io_uring_recvmsg_cmsg_nexthdr(o, &msg, cmsg);
+				if (cmsg) {
+					fprintf(stderr, "unexpected extra cmsg\n");
+					goto cleanup;
+				}
+
+			}
+
+			this_recv = (int *)io_uring_recvmsg_payload(o, &msg);
+			cqe->res = io_uring_recvmsg_payload_length(o, cqe->res, &msg);
+			if (o->payloadlen != cqe->res) {
+				if (!(o->flags & MSG_TRUNC)) {
+					fprintf(stderr, "expected truncated flag\n");
+					goto cleanup;
+				}
+				total_dropped_bytes += (o->payloadlen - cqe->res);
+			}
+		}
+
+		total_recv_bytes += cqe->res;
+
+		if (cqe->res % 4 != 0) {
+			/*
+			 * doesn't seem to happen in practice, would need some
+			 * work to remove this requirement
+			 */
+			fprintf(stderr, "unexpectedly aligned buffer cqe->res=%d\n", cqe->res);
+			goto cleanup;
+		}
+
+		/*
+		 * for tcp: check buffer arrived in order
+		 * for udp: based on size validate data based on size
+		 */
+		if (!args->stream) {
+			int sent_idx = orig_payload_size / sizeof(*at) - 1;
+
+			if (sent_idx < 0 || sent_idx > N) {
+				fprintf(stderr, "Bad sent idx: %d\n", sent_idx);
+				goto cleanup;
+			}
+			at = sent_buffs[sent_idx];
+		}
+		for (j = 0; j < cqe->res / 4; j++) {
+			int sent = *at++;
+			int recv = *this_recv++;
+
+			if (sent != recv) {
+				fprintf(stderr, "recv=%d sent=%d\n", recv, sent);
+				goto cleanup;
+			}
+		}
+	}
+
+	if (args->early_error == ERROR_NONE &&
+	    total_recv_bytes + total_dropped_bytes < total_sent_bytes) {
+		fprintf(stderr,
+			"missing recv: recv=%d dropped=%d sent=%d\n",
+			total_recv_bytes, total_sent_bytes, total_dropped_bytes);
+		goto cleanup;
+	}
+
+	ret = 0;
+cleanup:
+	for (i = 0; i < ARRAY_SIZE(recv_buffs); i++)
+		free(recv_buffs[i]);
+	close(fds[0]);
+	close(fds[1]);
+	io_uring_queue_exit(&ring);
+
+	return ret;
+}
+
+static int test_enobuf(void)
+{
+	struct io_uring ring;
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqes[16];
+	char buffs[256];
+	int ret, i, fds[2];
+
+	if (t_create_ring(8, &ring, 0) != T_SETUP_OK) {
+		fprintf(stderr, "ring create\n");
+		return -1;
+	}
+
+	ret = t_create_socket_pair(fds, false);
+	if (ret) {
+		fprintf(stderr, "t_create_socket_pair\n");
+		return ret;
+	}
+
+	sqe = io_uring_get_sqe(&ring);
+	assert(sqe);
+	/* deliberately only 2 provided buffers */
+	io_uring_prep_provide_buffers(sqe, &buffs[0], 1, 2, 0, 0);
+	io_uring_sqe_set_data64(sqe, 0);
+
+	sqe = io_uring_get_sqe(&ring);
+	assert(sqe);
+	io_uring_prep_recv_multishot(sqe, fds[0], NULL, 0, 0);
+	io_uring_sqe_set_data64(sqe, 1);
+	sqe->buf_group = 0;
+	sqe->flags |= IOSQE_BUFFER_SELECT;
+
+	ret = io_uring_submit(&ring);
+	if (ret != 2) {
+		fprintf(stderr, "bad submit %d\n", ret);
+		return -1;
+	}
+	for (i = 0; i < 3; i++) {
+		do {
+			ret = write(fds[1], "?", 1);
+		} while (ret == -1 && errno == EINTR);
+	}
+
+	ret = io_uring_wait_cqes(&ring, &cqes[0], 4, NULL, NULL);
+	if (ret) {
+		fprintf(stderr, "wait cqes\n");
+		return ret;
+	}
+
+	ret = io_uring_peek_batch_cqe(&ring, &cqes[0], 4);
+	if (ret != 4) {
+		fprintf(stderr, "peek batch cqes\n");
+		return -1;
+	}
+
+	/* provide buffers */
+	assert(cqes[0]->user_data == 0);
+	assert(cqes[0]->res == 0);
+
+	/* valid recv */
+	assert(cqes[1]->user_data == 1);
+	assert(cqes[2]->user_data == 1);
+	assert(cqes[1]->res == 1);
+	assert(cqes[2]->res == 1);
+	assert(cqes[1]->flags & (IORING_CQE_F_BUFFER | IORING_CQE_F_MORE));
+	assert(cqes[2]->flags & (IORING_CQE_F_BUFFER | IORING_CQE_F_MORE));
+
+	/* missing buffer */
+	assert(cqes[3]->user_data == 1);
+	assert(cqes[3]->res == -ENOBUFS);
+	assert(!(cqes[3]->flags & (IORING_CQE_F_BUFFER | IORING_CQE_F_MORE)));
+
+	close(fds[0]);
+	close(fds[1]);
+	io_uring_queue_exit(&ring);
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	int ret;
+	int loop;
+	int early_error = 0;
+	bool has_defer;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	has_defer = t_probe_defer_taskrun();
+
+	for (loop = 0; loop < 16; loop++) {
+		struct args a = {
+			.stream = loop & 0x01,
+			.wait_each = loop & 0x2,
+			.recvmsg = loop & 0x04,
+			.defer = loop & 0x08,
+		};
+		if (a.defer && !has_defer)
+			continue;
+		for (early_error = 0; early_error < ERROR_EARLY_LAST; early_error++) {
+			a.early_error = (enum early_error_t)early_error;
+			ret = test(&a);
+			if (ret) {
+				if (ret == -ENORECVMULTISHOT) {
+					if (loop == 0)
+						return T_EXIT_SKIP;
+					fprintf(stderr,
+						"ENORECVMULTISHOT received but loop>0\n");
+				}
+				fprintf(stderr,
+					"test stream=%d wait_each=%d recvmsg=%d early_error=%d "
+					" defer=%d failed\n",
+					a.stream, a.wait_each, a.recvmsg, a.early_error, a.defer);
+				return T_EXIT_FAIL;
+			}
+		}
+	}
+
+	ret = test_enobuf();
+	if (ret) {
+		fprintf(stderr, "test_enobuf() failed: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	return T_EXIT_PASS;
+}

diff --git a/test/recvsend_bundle-inc.c b/test/recvsend_bundle-inc.c
new file mode 100644
index 0000000..f48e26f
--- /dev/null
+++ b/test/recvsend_bundle-inc.c

@@ -0,0 +1,680 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Simple test case showing using send and recv bundles with incremental
+ * buffer ring usage
+ */
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <pthread.h>
+
+#define MSG_SIZE 128
+#define NR_MIN_MSGS	4
+#define NR_MAX_MSGS	32
+#define SEQ_SIZE	(MSG_SIZE / sizeof(unsigned long))
+
+static int nr_msgs;
+
+#define RECV_BIDS	8192
+#define RECV_BID_MASK	(RECV_BIDS - 1)
+
+#include <liburing.h>
+
+enum t_test_result {
+	T_EXIT_PASS   = 0,
+	T_EXIT_FAIL   = 1,
+	T_EXIT_SKIP   = 77,
+};
+
+#define PORT	10202
+#define HOST	"127.0.0.1"
+
+static int use_port = PORT;
+
+#define SEND_BGID	7
+#define RECV_BGID	8
+
+static int no_send_mshot;
+
+struct recv_data {
+	pthread_barrier_t connect;
+	pthread_barrier_t startup;
+	pthread_barrier_t barrier;
+	pthread_barrier_t finish;
+	unsigned long seq;
+	int recv_bytes;
+	int accept_fd;
+	int abort;
+	unsigned int max_sends;
+	int to_eagain;
+	void *recv_buf;
+
+	int send_bundle;
+	int recv_bundle;
+};
+
+static int arm_recv(struct io_uring *ring, struct recv_data *rd)
+{
+	struct io_uring_sqe *sqe;
+	int ret;
+
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_recv_multishot(sqe, rd->accept_fd, NULL, 0, 0);
+	if (rd->recv_bundle)
+		sqe->ioprio |= IORING_RECVSEND_BUNDLE;
+	sqe->buf_group = RECV_BGID;
+	sqe->flags |= IOSQE_BUFFER_SELECT;
+	sqe->user_data = 2;
+
+	ret = io_uring_submit(ring);
+	if (ret != 1) {
+		fprintf(stderr, "submit failed: %d\n", ret);
+		return 1;
+	}
+
+	return 0;
+}
+
+static int recv_prep(struct io_uring *ring, struct recv_data *rd, int *sock)
+{
+	struct sockaddr_in saddr;
+	int sockfd, ret, val, use_fd;
+	socklen_t socklen;
+
+	memset(&saddr, 0, sizeof(saddr));
+	saddr.sin_family = AF_INET;
+	saddr.sin_addr.s_addr = htonl(INADDR_ANY);
+	saddr.sin_port = htons(use_port);
+
+	sockfd = socket(AF_INET, SOCK_STREAM, 0);
+	if (sockfd < 0) {
+		perror("socket");
+		return 1;
+	}
+
+	val = 1;
+	setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val));
+
+	ret = bind(sockfd, (struct sockaddr *)&saddr, sizeof(saddr));
+	if (ret < 0) {
+		perror("bind");
+		goto err;
+	}
+
+	ret = listen(sockfd, 1);
+	if (ret < 0) {
+		perror("listen");
+		goto err;
+	}
+
+	pthread_barrier_wait(&rd->connect);
+
+	socklen = sizeof(saddr);
+	use_fd = accept(sockfd, (struct sockaddr *)&saddr, &socklen);
+	if (use_fd < 0) {
+		perror("accept");
+		goto err;
+	}
+
+	rd->accept_fd = use_fd;
+	pthread_barrier_wait(&rd->startup);
+	pthread_barrier_wait(&rd->barrier);
+
+	if (arm_recv(ring, rd))
+		goto err;
+
+	*sock = sockfd;
+	return 0;
+err:
+	close(sockfd);
+	return 1;
+}
+
+static int verify_seq(struct recv_data *rd, void *verify_ptr, int verify_sz,
+		      int start_bid)
+{
+	unsigned long *seqp;
+	int seq_size = verify_sz / sizeof(unsigned long);
+	int i;
+
+	seqp = verify_ptr;
+	for (i = 0; i < seq_size; i++) {
+		if (rd->seq != *seqp) {
+			fprintf(stderr, "bid=%d, got seq %lu, wanted %lu, offset %d\n", start_bid, *seqp, rd->seq, i);
+			return 0;
+		}
+		seqp++;
+		rd->seq++;
+	}
+
+	return 1;
+}
+
+static int recv_get_cqe(struct io_uring *ring, struct recv_data *rd,
+			struct io_uring_cqe **cqe)
+{
+	struct __kernel_timespec ts = { .tv_sec = 0, .tv_nsec = 100000000LL };
+	int ret;
+
+	do {
+		ret = io_uring_wait_cqe_timeout(ring, cqe, &ts);
+		if (!ret)
+			return 0;
+		if (ret == -ETIME) {
+			if (rd->abort)
+				break;
+			continue;
+		}
+		fprintf(stderr, "wait recv: %d\n", ret);
+		break;
+	} while (1);
+
+	return 1;
+}
+
+static int do_recv(struct io_uring *ring, struct recv_data *rd)
+{
+	struct io_uring_cqe *cqe;
+	void *verify_ptr;
+	int verify_sz = 0;
+	int verify_bid = 0;
+	int bid;
+
+	verify_ptr = malloc(rd->recv_bytes);
+
+	do {
+		if (recv_get_cqe(ring, rd, &cqe))
+			break;
+		if (cqe->res == -EINVAL) {
+			fprintf(stdout, "recv not supported, skipping\n");
+			return 0;
+		}
+		if (cqe->res < 0) {
+			fprintf(stderr, "failed recv cqe: %d\n", cqe->res);
+			goto err;
+		}
+		if (!(cqe->flags & IORING_CQE_F_BUFFER)) {
+			fprintf(stderr, "no buffer set in recv\n");
+			goto err;
+		}
+		if (!(cqe->flags & IORING_CQE_F_BUF_MORE)) {
+			fprintf(stderr, "CQE_F_BUF_MORE not set\n");
+			goto err;
+		}
+		bid = cqe->flags >> IORING_CQE_BUFFER_SHIFT;
+		if (bid != 0) {
+			fprintf(stderr, "got bid %d\n", bid);
+			goto err;
+		}
+		if (!(verify_sz % MSG_SIZE)) {
+			if (!verify_seq(rd, verify_ptr, verify_sz, verify_bid))
+				goto err;
+			verify_bid += verify_sz / MSG_SIZE;
+			verify_bid &= RECV_BID_MASK;
+			verify_sz = 0;
+		} else {
+			memcpy(verify_ptr + verify_sz, rd->recv_buf + (bid * MSG_SIZE), cqe->res);
+			verify_sz += cqe->res;
+		}
+		rd->recv_bytes -= cqe->res;
+		io_uring_cqe_seen(ring, cqe);
+		if (!(cqe->flags & IORING_CQE_F_MORE) && rd->recv_bytes) {
+			if (arm_recv(ring, rd))
+				goto err;
+		}
+	} while (rd->recv_bytes);
+
+	if (verify_sz && !(verify_sz % MSG_SIZE) &&
+	    !verify_seq(rd, verify_ptr, verify_sz, verify_bid))
+		goto err;
+
+	pthread_barrier_wait(&rd->finish);
+	return 0;
+err:
+	pthread_barrier_wait(&rd->finish);
+	return 1;
+}
+
+static void *recv_fn(void *data)
+{
+	struct recv_data *rd = data;
+	struct io_uring_params p = { };
+	struct io_uring ring;
+	struct io_uring_buf_ring *br;
+	void *buf, *ptr;
+	int ret, sock;
+
+	p.cq_entries = 4096;
+	p.flags = IORING_SETUP_CQSIZE;
+	io_uring_queue_init_params(16, &ring, &p);
+
+	ret = 0;
+	if (posix_memalign(&buf, 4096, MSG_SIZE * RECV_BIDS))
+		goto err;
+
+	br = io_uring_setup_buf_ring(&ring, RECV_BIDS, RECV_BGID, IOU_PBUF_RING_INC, &ret);
+	if (!br) {
+		fprintf(stderr, "failed setting up recv ring %d\n", ret);
+		goto err;
+	}
+
+	ptr = buf;
+	io_uring_buf_ring_add(br, ptr, MSG_SIZE * RECV_BIDS, 0, RECV_BID_MASK, 0);
+	io_uring_buf_ring_advance(br, 1);
+	rd->recv_buf = buf;
+
+	ret = recv_prep(&ring, rd, &sock);
+	if (ret) {
+		fprintf(stderr, "recv_prep failed: %d\n", ret);
+		goto err;
+	}
+
+	ret = do_recv(&ring, rd);
+
+	close(sock);
+	close(rd->accept_fd);
+	free(buf);
+	io_uring_queue_exit(&ring);
+err:
+	return (void *)(intptr_t)ret;
+}
+
+static int __do_send_bundle(struct recv_data *rd, struct io_uring *ring, int sockfd)
+{
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	int bytes_needed = MSG_SIZE * nr_msgs;
+	int i, ret;
+
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_send_bundle(sqe, sockfd, 0, 0);
+	sqe->flags |= IOSQE_BUFFER_SELECT;
+	sqe->buf_group = SEND_BGID;
+	sqe->user_data = 1;
+
+	ret = io_uring_submit(ring);
+	if (ret != 1)
+		return 1;
+
+	pthread_barrier_wait(&rd->barrier);
+
+	for (i = 0; i < nr_msgs; i++) {
+		ret = io_uring_wait_cqe(ring, &cqe);
+		if (ret) {
+			fprintf(stderr, "wait send: %d\n", ret);
+			return 1;
+		}
+		if (!i && cqe->res == -EINVAL) {
+			rd->abort = 1;
+			no_send_mshot = 1;
+			break;
+		}
+		if (cqe->res < 0) {
+			fprintf(stderr, "bad send cqe res: %d\n", cqe->res);
+			return 1;
+		}
+		bytes_needed -= cqe->res;
+		if (!bytes_needed) {
+			io_uring_cqe_seen(ring, cqe);
+			break;
+		}
+		if (!(cqe->flags & IORING_CQE_F_MORE)) {
+			fprintf(stderr, "expected more, but MORE not set\n");
+			return 1;
+		}
+		io_uring_cqe_seen(ring, cqe);
+	}
+
+	return 0;
+}
+
+static int __do_send(struct recv_data *rd, struct io_uring *ring, int sockfd)
+{
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	int bytes_needed = MSG_SIZE * nr_msgs;
+	int i, ret;
+
+	for (i = 0; i < nr_msgs; i++) {
+		sqe = io_uring_get_sqe(ring);
+		io_uring_prep_send(sqe, sockfd, NULL, 0, 0);
+		sqe->user_data = 10 + i;
+		sqe->flags |= IOSQE_BUFFER_SELECT;
+		sqe->buf_group = SEND_BGID;
+
+		ret = io_uring_submit(ring);
+		if (ret != 1)
+			return 1;
+
+		if (!i)
+			pthread_barrier_wait(&rd->barrier);
+		ret = io_uring_wait_cqe(ring, &cqe);
+		if (ret) {
+			fprintf(stderr, "send wait cqe %d\n", ret);
+			return 1;
+		}
+
+		if (!i && cqe->res == -EINVAL) {
+			rd->abort = 1;
+			no_send_mshot = 1;
+			break;
+		}
+		if (cqe->res != MSG_SIZE) {
+			fprintf(stderr, "send failed cqe: %d\n", cqe->res);
+			return 1;
+		}
+		if (cqe->res < 0) {
+			fprintf(stderr, "bad send cqe res: %d\n", cqe->res);
+			return 1;
+		}
+		bytes_needed -= cqe->res;
+		io_uring_cqe_seen(ring, cqe);
+		if (!bytes_needed)
+			break;
+	}
+
+	return 0;
+}
+
+static int do_send(struct recv_data *rd)
+{
+	struct sockaddr_in saddr;
+	struct io_uring ring;
+	unsigned long seq_buf[SEQ_SIZE], send_seq;
+	struct io_uring_params p = { };
+	struct io_uring_buf_ring *br;
+	int sockfd, ret, len, i;
+	socklen_t optlen;
+	void *buf, *ptr;
+
+	ret = io_uring_queue_init_params(16, &ring, &p);
+	if (ret) {
+		fprintf(stderr, "queue init failed: %d\n", ret);
+		return 1;
+	}
+	if (!(p.features & IORING_FEAT_RECVSEND_BUNDLE)) {
+		no_send_mshot = 1;
+		return 0;
+	}
+
+	if (posix_memalign(&buf, 4096, MSG_SIZE * nr_msgs))
+		return 1;
+
+	br = io_uring_setup_buf_ring(&ring, nr_msgs, SEND_BGID, 0, &ret);
+	if (!br) {
+		if (ret == -EINVAL) {
+			fprintf(stderr, "einval on br setup\n");
+			return 0;
+		}
+		fprintf(stderr, "failed setting up send ring %d\n", ret);
+		return 1;
+	}
+
+	ptr = buf;
+	for (i = 0; i < nr_msgs; i++) {
+		io_uring_buf_ring_add(br, ptr, MSG_SIZE, i, nr_msgs - 1, i);
+		ptr += MSG_SIZE;
+	}
+	io_uring_buf_ring_advance(br, nr_msgs);
+
+	memset(&saddr, 0, sizeof(saddr));
+	saddr.sin_family = AF_INET;
+	saddr.sin_port = htons(use_port);
+	inet_pton(AF_INET, HOST, &saddr.sin_addr);
+
+	sockfd = socket(AF_INET, SOCK_STREAM, 0);
+	if (sockfd < 0) {
+		perror("socket");
+		goto err2;
+	}
+
+	pthread_barrier_wait(&rd->connect);
+
+	ret = connect(sockfd, (struct sockaddr *)&saddr, sizeof(saddr));
+	if (ret < 0) {
+		perror("connect");
+		goto err;
+	}
+
+	pthread_barrier_wait(&rd->startup);
+
+	optlen = sizeof(len);
+	len = 1024 * MSG_SIZE;
+	setsockopt(sockfd, SOL_SOCKET, SO_SNDBUF, &len, optlen);
+
+	/* almost fill queue, leave room for one message */
+	send_seq = 0;
+	rd->to_eagain = 0;
+	while (rd->max_sends && rd->max_sends--) {
+		for (i = 0; i < SEQ_SIZE; i++)
+			seq_buf[i] = send_seq++;
+
+		ret = send(sockfd, seq_buf, sizeof(seq_buf), MSG_DONTWAIT);
+		if (ret < 0) {
+			if (errno == EAGAIN) {
+				send_seq -= SEQ_SIZE;
+				break;
+			}
+			perror("send");
+			return 1;
+		} else if (ret != sizeof(seq_buf)) {
+			fprintf(stderr, "short %d send\n", ret);
+			return 1;
+		}
+
+		rd->to_eagain++;
+		rd->recv_bytes += sizeof(seq_buf);
+	}
+
+	ptr = buf;
+	for (i = 0; i < nr_msgs; i++) {
+		unsigned long *pseq = ptr;
+		int j;
+
+		for (j = 0; j < SEQ_SIZE; j++)
+			pseq[j] = send_seq++;
+		ptr += MSG_SIZE;
+	}
+
+	/* prepare more messages, sending with bundle */
+	rd->recv_bytes += (nr_msgs * MSG_SIZE);
+	if (rd->send_bundle)
+		ret = __do_send_bundle(rd, &ring, sockfd);
+	else
+		ret = __do_send(rd, &ring, sockfd);
+	if (ret)
+		goto err;
+
+	pthread_barrier_wait(&rd->finish);
+
+	close(sockfd);
+	free(buf);
+	io_uring_queue_exit(&ring);
+	return 0;
+
+err:
+	close(sockfd);
+err2:
+	io_uring_queue_exit(&ring);
+	pthread_barrier_wait(&rd->finish);
+	return 1;
+}
+
+static int test(int backlog, unsigned int max_sends, int *to_eagain,
+		int send_bundle, int recv_bundle)
+{
+	pthread_t recv_thread;
+	struct recv_data rd;
+	int ret;
+	void *retval;
+
+	memset(&rd, 0, sizeof(rd));
+	pthread_barrier_init(&rd.connect, NULL, 2);
+	pthread_barrier_init(&rd.startup, NULL, 2);
+	pthread_barrier_init(&rd.barrier, NULL, 2);
+	pthread_barrier_init(&rd.finish, NULL, 2);
+	rd.max_sends = max_sends;
+	if (to_eagain)
+		*to_eagain = 0;
+
+	rd.send_bundle = send_bundle;
+	rd.recv_bundle = recv_bundle;
+
+	ret = pthread_create(&recv_thread, NULL, recv_fn, &rd);
+	if (ret) {
+		fprintf(stderr, "Thread create failed: %d\n", ret);
+		return 1;
+	}
+
+	ret = do_send(&rd);
+	if (no_send_mshot)
+		return 0;
+
+	if (ret)
+		return ret;
+
+	pthread_join(recv_thread, &retval);
+	if (to_eagain)
+		*to_eagain = rd.to_eagain;
+	return (intptr_t)retval;
+}
+
+static int run_tests(void)
+{
+	int ret, eagain_hit;
+
+	nr_msgs = NR_MIN_MSGS;
+
+	/* test basic send bundle first */
+	ret = test(0, 0, NULL, 0, 0);
+	if (ret) {
+		fprintf(stderr, "test a failed\n");
+		return T_EXIT_FAIL;
+	}
+	if (no_send_mshot)
+		return T_EXIT_SKIP;
+
+	/* test recv bundle */
+	ret = test(0, 0, NULL, 0, 1);
+	if (ret) {
+		fprintf(stderr, "test b failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	/* test bundling recv and send */
+	ret = test(0, 0, NULL, 1, 1);
+	if (ret) {
+		fprintf(stderr, "test c failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	/* test bundling with full socket */
+	ret = test(1, 1000000, &eagain_hit, 1, 1);
+	if (ret) {
+		fprintf(stderr, "test d failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	/* test bundling with almost full socket */
+	ret = test(1, eagain_hit - (nr_msgs / 2), NULL, 1, 1);
+	if (ret) {
+		fprintf(stderr, "test e failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	/* test recv bundle with almost full socket */
+	ret = test(1, eagain_hit - (nr_msgs / 2), NULL, 0, 1);
+	if (ret) {
+		fprintf(stderr, "test f failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	/* test send bundle with almost full socket */
+	ret = test(1, eagain_hit - (nr_msgs / 2), &eagain_hit, 1, 0);
+	if (ret) {
+		fprintf(stderr, "test g failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	/* now repeat the last three tests, but with > FAST_UIOV segments */
+	nr_msgs = NR_MAX_MSGS;
+
+	/* test bundling with almost full socket */
+	ret = test(1, eagain_hit - (nr_msgs / 2), NULL, 1, 1);
+	if (ret) {
+		fprintf(stderr, "test h failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	/* test recv bundle with almost full socket */
+	ret = test(1, eagain_hit - (nr_msgs / 2), NULL, 0, 1);
+	if (ret) {
+		fprintf(stderr, "test i failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	/* test send bundle with almost full socket */
+	ret = test(1, eagain_hit - (nr_msgs / 2), &eagain_hit, 1, 0);
+	if (ret) {
+		fprintf(stderr, "test j failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	return T_EXIT_PASS;
+}
+
+static int test_tcp(void)
+{
+	int ret;
+
+	ret = run_tests();
+	if (ret == T_EXIT_FAIL)
+		fprintf(stderr, "TCP test case failed\n");
+	return ret;
+}
+
+static bool has_pbuf_ring_inc(void)
+{
+	struct io_uring_buf_ring *br;
+	bool has_pbuf_inc = false;
+	struct io_uring ring;
+	void *buf;
+	int ret;
+
+	ret = io_uring_queue_init(1, &ring, 0);
+	if (ret)
+		return false;
+
+	if (posix_memalign(&buf, 4096, MSG_SIZE * RECV_BIDS))
+		return false;
+
+	br = io_uring_setup_buf_ring(&ring, RECV_BIDS, RECV_BGID, IOU_PBUF_RING_INC, &ret);
+	if (br) {
+		has_pbuf_inc = true;
+		io_uring_unregister_buf_ring(&ring, RECV_BGID);
+	}
+	io_uring_queue_exit(&ring);
+	free(buf);
+	return has_pbuf_inc;
+}
+
+int main(int argc, char *argv[])
+{
+	int ret;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+	if (!has_pbuf_ring_inc())
+		return T_EXIT_SKIP;
+
+	ret = test_tcp();
+	if (ret != T_EXIT_PASS)
+		return ret;
+
+	return T_EXIT_PASS;
+}

diff --git a/test/recvsend_bundle.c b/test/recvsend_bundle.c
new file mode 100644
index 0000000..6caee75
--- /dev/null
+++ b/test/recvsend_bundle.c

@@ -0,0 +1,754 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Simple test case showing using send and recv bundles
+ */
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <pthread.h>
+
+#define MSG_SIZE 128
+#define NR_MIN_MSGS	4
+#define NR_MAX_MSGS	32
+#define SEQ_SIZE	(MSG_SIZE / sizeof(unsigned long))
+
+static int nr_msgs;
+static int use_tcp;
+static int classic_buffers;
+
+#define RECV_BIDS	8192
+#define RECV_BID_MASK	(RECV_BIDS - 1)
+
+#include "liburing.h"
+#include "helpers.h"
+
+#define PORT	10202
+#define HOST	"127.0.0.1"
+
+static int use_port = PORT;
+
+#define SEND_BGID	7
+#define RECV_BGID	8
+
+static int no_send_mshot;
+
+struct recv_data {
+	pthread_barrier_t connect;
+	pthread_barrier_t startup;
+	pthread_barrier_t barrier;
+	pthread_barrier_t finish;
+	unsigned long seq;
+	int recv_bytes;
+	int accept_fd;
+	int abort;
+	unsigned int max_sends;
+	int to_eagain;
+	void *recv_buf;
+
+	int send_bundle;
+	int recv_bundle;
+};
+
+static int arm_recv(struct io_uring *ring, struct recv_data *rd)
+{
+	struct io_uring_sqe *sqe;
+	int ret;
+
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_recv_multishot(sqe, rd->accept_fd, NULL, 0, 0);
+	if (rd->recv_bundle && use_tcp)
+		sqe->ioprio |= IORING_RECVSEND_BUNDLE;
+	sqe->buf_group = RECV_BGID;
+	sqe->flags |= IOSQE_BUFFER_SELECT;
+	sqe->user_data = 2;
+
+	ret = io_uring_submit(ring);
+	if (ret != 1) {
+		fprintf(stderr, "submit failed: %d\n", ret);
+		return 1;
+	}
+
+	return 0;
+}
+
+static int recv_prep(struct io_uring *ring, struct recv_data *rd, int *sock)
+{
+	struct sockaddr_in saddr;
+	int sockfd, ret, val, use_fd;
+	socklen_t socklen;
+
+	memset(&saddr, 0, sizeof(saddr));
+	saddr.sin_family = AF_INET;
+	saddr.sin_addr.s_addr = htonl(INADDR_ANY);
+	saddr.sin_port = htons(use_port);
+
+	if (use_tcp)
+		sockfd = socket(AF_INET, SOCK_STREAM, 0);
+	else
+		sockfd = socket(AF_INET, SOCK_DGRAM, 0);
+	if (sockfd < 0) {
+		perror("socket");
+		return 1;
+	}
+
+	val = 1;
+	setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val));
+
+	ret = bind(sockfd, (struct sockaddr *)&saddr, sizeof(saddr));
+	if (ret < 0) {
+		perror("bind");
+		goto err;
+	}
+
+	if (use_tcp) {
+		ret = listen(sockfd, 1);
+		if (ret < 0) {
+			perror("listen");
+			goto err;
+		}
+
+		pthread_barrier_wait(&rd->connect);
+
+		if (rd->abort)
+			goto err;
+
+		socklen = sizeof(saddr);
+		use_fd = accept(sockfd, (struct sockaddr *)&saddr, &socklen);
+		if (use_fd < 0) {
+			perror("accept");
+			goto err;
+		}
+	} else {
+		use_fd = sockfd;
+		pthread_barrier_wait(&rd->connect);
+	}
+
+	rd->accept_fd = use_fd;
+	pthread_barrier_wait(&rd->startup);
+	pthread_barrier_wait(&rd->barrier);
+
+	if (arm_recv(ring, rd))
+		goto err;
+
+	*sock = sockfd;
+	return 0;
+err:
+	close(sockfd);
+	return 1;
+}
+
+static int verify_seq(struct recv_data *rd, void *verify_ptr, int verify_sz,
+		      int start_bid)
+{
+	unsigned long *seqp;
+	int seq_size = verify_sz / sizeof(unsigned long);
+	int i;
+
+	seqp = verify_ptr;
+	for (i = 0; i < seq_size; i++) {
+		if (rd->seq != *seqp) {
+			fprintf(stderr, "bid=%d, got seq %lu, wanted %lu, offset %d\n", start_bid, *seqp, rd->seq, i);
+			return 0;
+		}
+		seqp++;
+		rd->seq++;
+	}
+
+	return 1;
+}
+
+static int recv_get_cqe(struct io_uring *ring, struct recv_data *rd,
+			struct io_uring_cqe **cqe)
+{
+	struct __kernel_timespec ts = { .tv_sec = 0, .tv_nsec = 100000000LL };
+	int ret;
+
+	do {
+		ret = io_uring_wait_cqe_timeout(ring, cqe, &ts);
+		if (!ret)
+			return 0;
+		if (ret == -ETIME) {
+			if (rd->abort)
+				break;
+			continue;
+		}
+		fprintf(stderr, "wait recv: %d\n", ret);
+		break;
+	} while (1);
+
+	return 1;
+}
+
+static int do_recv(struct io_uring *ring, struct recv_data *rd)
+{
+	struct io_uring_cqe *cqe;
+	int bid, next_bid = 0;
+	void *verify_ptr;
+	int verify_sz = 0;
+	int verify_bid = 0;
+
+	verify_ptr = malloc(rd->recv_bytes);
+
+	do {
+		if (recv_get_cqe(ring, rd, &cqe))
+			break;
+		if (cqe->res == -EINVAL) {
+			fprintf(stdout, "recv not supported, skipping\n");
+			return 0;
+		}
+		if (cqe->res < 0) {
+			fprintf(stderr, "failed recv cqe: %d\n", cqe->res);
+			goto err;
+		}
+		if (!(cqe->flags & IORING_CQE_F_BUFFER)) {
+			fprintf(stderr, "no buffer set in recv\n");
+			goto err;
+		}
+		bid = cqe->flags >> IORING_CQE_BUFFER_SHIFT;
+		if (bid != next_bid) {
+			fprintf(stderr, "got bid %d, wanted %d\n", bid, next_bid);
+			goto err;
+		}
+		if (!rd->recv_bundle && cqe->res > MSG_SIZE) {
+			fprintf(stderr, "recv got wrong length: %d\n", cqe->res);
+			goto err;
+		}
+		if (!(verify_sz % MSG_SIZE)) {
+			if (!verify_seq(rd, verify_ptr, verify_sz, verify_bid))
+				goto err;
+			verify_bid += verify_sz / MSG_SIZE;
+			verify_bid &= RECV_BID_MASK;
+			verify_sz = 0;
+		} else {
+			memcpy(verify_ptr + verify_sz, rd->recv_buf + (bid * MSG_SIZE), cqe->res);
+			verify_sz += cqe->res;
+		}
+		next_bid = bid + ((cqe->res + MSG_SIZE - 1) / MSG_SIZE);
+		next_bid &= RECV_BID_MASK;
+		rd->recv_bytes -= cqe->res;
+		io_uring_cqe_seen(ring, cqe);
+		if (!(cqe->flags & IORING_CQE_F_MORE) && rd->recv_bytes) {
+			if (arm_recv(ring, rd))
+				goto err;
+		}
+	} while (rd->recv_bytes);
+
+	if (verify_sz && !(verify_sz % MSG_SIZE) &&
+	    !verify_seq(rd, verify_ptr, verify_sz, verify_bid))
+		goto err;
+
+	pthread_barrier_wait(&rd->finish);
+	return 0;
+err:
+	pthread_barrier_wait(&rd->finish);
+	return 1;
+}
+
+static int provide_classic_buffers(struct io_uring *ring, void *buf, int nbufs, int bgid)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	int ret;
+
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_provide_buffers(sqe, buf, MSG_SIZE, nbufs, bgid, 0);
+	io_uring_submit(ring);
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret) {
+		fprintf(stderr, "provide buffer wait: %d\n", ret);
+		return 1;
+	}
+	if (cqe->res) {
+		fprintf(stderr, "provide buffers fail: %d\n", cqe->res);
+		return 1;
+	}
+	io_uring_cqe_seen(ring, cqe);
+	return 0;
+}
+
+static void *recv_fn(void *data)
+{
+	struct recv_data *rd = data;
+	struct io_uring_params p = { };
+	struct io_uring ring;
+	struct io_uring_buf_ring *br;
+	void *buf = NULL, *ptr;
+	int ret, sock, i;
+
+	p.cq_entries = 4096;
+	p.flags = IORING_SETUP_CQSIZE;
+	ret = t_create_ring_params(16, &ring, &p);
+	if (ret == T_SETUP_SKIP) {
+		ret = 0;
+		goto err;
+	} else if (ret < 0) {
+		goto err;
+	}
+
+	if (posix_memalign(&buf, 4096, MSG_SIZE * RECV_BIDS))
+		goto err;
+
+	if (!classic_buffers) {
+		br = io_uring_setup_buf_ring(&ring, RECV_BIDS, RECV_BGID, 0, &ret);
+		if (!br) {
+			if (ret != -EINVAL)
+				fprintf(stderr, "failed setting up recv ring %d\n", ret);
+			goto err;
+		}
+
+		ptr = buf;
+		for (i = 0; i < RECV_BIDS; i++) {
+			io_uring_buf_ring_add(br, ptr, MSG_SIZE, i, RECV_BID_MASK, i);
+			ptr += MSG_SIZE;
+		}
+		io_uring_buf_ring_advance(br, RECV_BIDS);
+		rd->recv_buf = buf;
+	} else {
+		ret = provide_classic_buffers(&ring, buf, RECV_BIDS, RECV_BGID);
+		if (ret) {
+			fprintf(stderr, "failed providing classic buffers\n");
+			goto err;
+		}
+	}
+
+	ret = recv_prep(&ring, rd, &sock);
+	if (ret) {
+		fprintf(stderr, "recv_prep failed: %d\n", ret);
+		goto err;
+	}
+
+	ret = do_recv(&ring, rd);
+
+	close(sock);
+	close(rd->accept_fd);
+	io_uring_queue_exit(&ring);
+err:
+	free(buf);
+	return (void *)(intptr_t)ret;
+}
+
+static int __do_send_bundle(struct recv_data *rd, struct io_uring *ring, int sockfd)
+{
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	int bytes_needed = MSG_SIZE * nr_msgs;
+	int i, ret;
+
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_send_bundle(sqe, sockfd, 0, 0);
+	sqe->flags |= IOSQE_BUFFER_SELECT;
+	sqe->buf_group = SEND_BGID;
+	sqe->user_data = 1;
+
+	ret = io_uring_submit(ring);
+	if (ret != 1)
+		return 1;
+
+	pthread_barrier_wait(&rd->barrier);
+
+	for (i = 0; i < nr_msgs; i++) {
+		ret = io_uring_wait_cqe(ring, &cqe);
+		if (ret) {
+			fprintf(stderr, "wait send: %d\n", ret);
+			return 1;
+		}
+		if (!i && cqe->res == -EINVAL) {
+			rd->abort = 1;
+			no_send_mshot = 1;
+			break;
+		}
+		if (cqe->res < 0) {
+			fprintf(stderr, "bad send cqe res: %d\n", cqe->res);
+			return 1;
+		}
+		bytes_needed -= cqe->res;
+		if (!bytes_needed) {
+			io_uring_cqe_seen(ring, cqe);
+			break;
+		}
+		if (!(cqe->flags & IORING_CQE_F_MORE)) {
+			fprintf(stderr, "expected more, but MORE not set\n");
+			return 1;
+		}
+		io_uring_cqe_seen(ring, cqe);
+	}
+
+	return 0;
+}
+
+static int __do_send(struct recv_data *rd, struct io_uring *ring, int sockfd)
+{
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	int bytes_needed = MSG_SIZE * nr_msgs;
+	int i, ret;
+
+	for (i = 0; i < nr_msgs; i++) {
+		sqe = io_uring_get_sqe(ring);
+		io_uring_prep_send(sqe, sockfd, NULL, 0, 0);
+		sqe->user_data = 10 + i;
+		sqe->flags |= IOSQE_BUFFER_SELECT;
+		sqe->buf_group = SEND_BGID;
+
+		ret = io_uring_submit(ring);
+		if (ret != 1)
+			return 1;
+
+		if (!i)
+			pthread_barrier_wait(&rd->barrier);
+		ret = io_uring_wait_cqe(ring, &cqe);
+		if (ret) {
+			fprintf(stderr, "send wait cqe %d\n", ret);
+			return 1;
+		}
+
+		if (!i && cqe->res == -EINVAL) {
+			rd->abort = 1;
+			no_send_mshot = 1;
+			break;
+		}
+		if (cqe->res != MSG_SIZE) {
+			fprintf(stderr, "send failed cqe: %d\n", cqe->res);
+			return 1;
+		}
+		if (cqe->res < 0) {
+			fprintf(stderr, "bad send cqe res: %d\n", cqe->res);
+			return 1;
+		}
+		bytes_needed -= cqe->res;
+		io_uring_cqe_seen(ring, cqe);
+		if (!bytes_needed)
+			break;
+	}
+
+	return 0;
+}
+
+static int do_send(struct recv_data *rd)
+{
+	struct sockaddr_in saddr;
+	struct io_uring ring;
+	unsigned long seq_buf[SEQ_SIZE], send_seq;
+	struct io_uring_params p = { };
+	struct io_uring_buf_ring *br;
+	int sockfd, ret, len, i;
+	socklen_t optlen;
+	void *buf = NULL, *ptr;
+
+	ret = io_uring_queue_init_params(16, &ring, &p);
+	if (ret) {
+		fprintf(stderr, "queue init failed: %d\n", ret);
+		return 1;
+	}
+	if (!(p.features & IORING_FEAT_RECVSEND_BUNDLE)) {
+		rd->abort = 1;
+		no_send_mshot = 1;
+		pthread_barrier_wait(&rd->connect);
+		return 0;
+	}
+
+	if (posix_memalign(&buf, 4096, MSG_SIZE * nr_msgs))
+		return 1;
+
+	if (!classic_buffers) {
+		br = io_uring_setup_buf_ring(&ring, nr_msgs, SEND_BGID, 0, &ret);
+		if (!br) {
+			if (ret == -EINVAL) {
+				fprintf(stderr, "einval on br setup\n");
+				return 0;
+			}
+			fprintf(stderr, "failed setting up send ring %d\n", ret);
+			return 1;
+		}
+
+		ptr = buf;
+		for (i = 0; i < nr_msgs; i++) {
+			io_uring_buf_ring_add(br, ptr, MSG_SIZE, i, nr_msgs - 1, i);
+			ptr += MSG_SIZE;
+		}
+		io_uring_buf_ring_advance(br, nr_msgs);
+	} else {
+		ret = provide_classic_buffers(&ring, buf, nr_msgs, SEND_BGID);
+		if (ret) {
+			fprintf(stderr, "failed providing classic buffers\n");
+			return ret;
+		}
+	}
+
+	memset(&saddr, 0, sizeof(saddr));
+	saddr.sin_family = AF_INET;
+	saddr.sin_port = htons(use_port);
+	inet_pton(AF_INET, HOST, &saddr.sin_addr);
+
+	if (use_tcp)
+		sockfd = socket(AF_INET, SOCK_STREAM, 0);
+	else
+		sockfd = socket(AF_INET, SOCK_DGRAM, 0);
+	if (sockfd < 0) {
+		perror("socket");
+		goto err2;
+	}
+
+	pthread_barrier_wait(&rd->connect);
+
+	ret = connect(sockfd, (struct sockaddr *)&saddr, sizeof(saddr));
+	if (ret < 0) {
+		perror("connect");
+		goto err;
+	}
+
+	pthread_barrier_wait(&rd->startup);
+
+	optlen = sizeof(len);
+	len = 1024 * MSG_SIZE;
+	setsockopt(sockfd, SOL_SOCKET, SO_SNDBUF, &len, optlen);
+
+	/* almost fill queue, leave room for one message */
+	send_seq = 0;
+	rd->to_eagain = 0;
+	while (rd->max_sends && rd->max_sends--) {
+		for (i = 0; i < SEQ_SIZE; i++)
+			seq_buf[i] = send_seq++;
+
+		ret = send(sockfd, seq_buf, sizeof(seq_buf), MSG_DONTWAIT);
+		if (ret < 0) {
+			if (errno == EAGAIN) {
+				send_seq -= SEQ_SIZE;
+				break;
+			}
+			perror("send");
+			return 1;
+		} else if (ret != sizeof(seq_buf)) {
+			fprintf(stderr, "short %d send\n", ret);
+			return 1;
+		}
+
+		rd->to_eagain++;
+		rd->recv_bytes += sizeof(seq_buf);
+	}
+
+	ptr = buf;
+	for (i = 0; i < nr_msgs; i++) {
+		unsigned long *pseq = ptr;
+		int j;
+
+		for (j = 0; j < SEQ_SIZE; j++)
+			pseq[j] = send_seq++;
+		ptr += MSG_SIZE;
+	}
+
+	/* prepare more messages, sending with bundle */
+	rd->recv_bytes += (nr_msgs * MSG_SIZE);
+	if (rd->send_bundle && use_tcp)
+		ret = __do_send_bundle(rd, &ring, sockfd);
+	else
+		ret = __do_send(rd, &ring, sockfd);
+	if (ret)
+		goto err;
+
+	pthread_barrier_wait(&rd->finish);
+
+	close(sockfd);
+	io_uring_queue_exit(&ring);
+	free(buf);
+	return 0;
+
+err:
+	close(sockfd);
+err2:
+	io_uring_queue_exit(&ring);
+	pthread_barrier_wait(&rd->finish);
+	free(buf);
+	return 1;
+}
+
+static int test(int backlog, unsigned int max_sends, int *to_eagain,
+		int send_bundle, int recv_bundle)
+{
+	pthread_t recv_thread;
+	struct recv_data rd;
+	int ret;
+	void *retval;
+
+	/* backlog not reliable on UDP, skip it */
+	if ((backlog || max_sends) && !use_tcp)
+		return T_EXIT_PASS;
+
+	memset(&rd, 0, sizeof(rd));
+	pthread_barrier_init(&rd.connect, NULL, 2);
+	pthread_barrier_init(&rd.startup, NULL, 2);
+	pthread_barrier_init(&rd.barrier, NULL, 2);
+	pthread_barrier_init(&rd.finish, NULL, 2);
+	rd.max_sends = max_sends;
+	if (to_eagain)
+		*to_eagain = 0;
+
+	rd.send_bundle = send_bundle;
+	rd.recv_bundle = recv_bundle;
+
+	ret = pthread_create(&recv_thread, NULL, recv_fn, &rd);
+	if (ret) {
+		fprintf(stderr, "Thread create failed: %d\n", ret);
+		return 1;
+	}
+
+	ret = do_send(&rd);
+	if (no_send_mshot) {
+		fprintf(stderr, "no_send_mshot, aborting (ignore other errors)\n");
+		rd.abort = 1;
+		pthread_join(recv_thread, &retval);
+		return 0;
+	}
+
+	if (ret)
+		return ret;
+
+	pthread_join(recv_thread, &retval);
+	if (to_eagain)
+		*to_eagain = rd.to_eagain;
+	return (intptr_t)retval;
+}
+
+static int run_tests(int is_udp)
+{
+	int ret, eagain_hit;
+
+	nr_msgs = NR_MIN_MSGS;
+
+	/* test basic send bundle first */
+	ret = test(0, 0, NULL, 0, 0);
+	if (ret) {
+		fprintf(stderr, "test a failed\n");
+		return T_EXIT_FAIL;
+	}
+	if (no_send_mshot)
+		return T_EXIT_SKIP;
+
+	/* test recv bundle */
+	ret = test(0, 0, NULL, 0, 1);
+	if (ret) {
+		fprintf(stderr, "test b failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	/* test bundling recv and send */
+	ret = test(0, 0, NULL, 1, 1);
+	if (ret) {
+		fprintf(stderr, "test c failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	/* test bundling with full socket */
+	ret = test(1, 1000000, &eagain_hit, 1, 1);
+	if (ret) {
+		fprintf(stderr, "test d failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	/* test bundling with almost full socket */
+	ret = test(1, eagain_hit - (nr_msgs / 2), NULL, 1, 1);
+	if (ret) {
+		fprintf(stderr, "test e failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	/* test recv bundle with almost full socket */
+	ret = test(1, eagain_hit - (nr_msgs / 2), NULL, 0, 1);
+	if (ret) {
+		fprintf(stderr, "test f failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	if (is_udp)
+		return T_EXIT_PASS;
+
+	/* test send bundle with almost full socket */
+	ret = test(1, eagain_hit - (nr_msgs / 2), &eagain_hit, 1, 0);
+	if (ret) {
+		fprintf(stderr, "test g failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	/* now repeat the last three tests, but with > FAST_UIOV segments */
+	nr_msgs = NR_MAX_MSGS;
+
+	/* test bundling with almost full socket */
+	ret = test(1, eagain_hit - (nr_msgs / 2), NULL, 1, 1);
+	if (ret) {
+		fprintf(stderr, "test h failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	/* test recv bundle with almost full socket */
+	ret = test(1, eagain_hit - (nr_msgs / 2), NULL, 0, 1);
+	if (ret) {
+		fprintf(stderr, "test i failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	/* test send bundle with almost full socket */
+	ret = test(1, eagain_hit - (nr_msgs / 2), &eagain_hit, 1, 0);
+	if (ret) {
+		fprintf(stderr, "test j failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	return T_EXIT_PASS;
+}
+
+static int test_tcp(void)
+{
+	int ret;
+
+	use_tcp = 1;
+	ret = run_tests(false);
+	if (ret == T_EXIT_FAIL)
+		fprintf(stderr, "TCP test case (classic=%d) failed\n", classic_buffers);
+	return ret;
+}
+
+static int test_udp(void)
+{
+	int ret;
+
+	use_tcp = 0;
+	use_port++;
+	ret = run_tests(true);
+	if (ret == T_EXIT_FAIL)
+		fprintf(stderr, "UDP test case (classic=%d) failed\n", classic_buffers);
+	return ret;
+}
+
+int main(int argc, char *argv[])
+{
+	int ret;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	ret = test_tcp();
+	if (ret != T_EXIT_PASS)
+		return ret;
+
+	ret = test_udp();
+	if (ret != T_EXIT_PASS)
+		return ret;
+
+	classic_buffers = 1;
+
+	ret = test_tcp();
+	if (ret != T_EXIT_PASS)
+		return ret;
+
+	ret = test_udp();
+	if (ret != T_EXIT_PASS)
+		return ret;
+
+	return T_EXIT_PASS;
+}

diff --git a/test/reg-fd-only.c b/test/reg-fd-only.c
new file mode 100644
index 0000000..65b7b32
--- /dev/null
+++ b/test/reg-fd-only.c

@@ -0,0 +1,141 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Test io_uring_setup with IORING_SETUP_REGISTERED_FD_ONLY
+ *
+ */
+#include <stdio.h>
+
+#include "helpers.h"
+
+#define NORMAL_PAGE_ENTRIES	8
+#define HUGE_PAGE_ENTRIES	512
+
+static int no_mmap;
+
+static int test_nops(struct io_uring *ring, int sq_size, int nr_nops)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	int i, ret;
+
+	do {
+		int todo = nr_nops;
+
+		if (todo > sq_size)
+			todo = sq_size;
+
+		for (i = 0; i < todo; i++) {
+			sqe = io_uring_get_sqe(ring);
+			io_uring_prep_nop(sqe);
+		}
+
+		ret = io_uring_submit(ring);
+		if (ret != todo) {
+			fprintf(stderr, "short submit %d\n", ret);
+			return T_EXIT_FAIL;
+		}
+
+		for (i = 0; i < todo; i++) {
+			ret = io_uring_wait_cqe(ring, &cqe);
+			if (ret) {
+				fprintf(stderr, "wait err %d\n", ret);
+				return T_EXIT_FAIL;
+			}
+			io_uring_cqe_seen(ring, cqe);
+		}
+		nr_nops -= todo;
+	} while (nr_nops);
+
+	return T_EXIT_PASS;
+}
+
+static int test(int nentries, int ring_flags)
+{
+	struct io_uring ring;
+	unsigned values[2];
+	int ret;
+
+	ret = io_uring_queue_init(nentries, &ring,
+			IORING_SETUP_REGISTERED_FD_ONLY | IORING_SETUP_NO_MMAP |
+			ring_flags);
+	if (ret == -EINVAL) {
+		no_mmap = 1;
+		return T_EXIT_SKIP;
+	} else if (ret == -ENOMEM) {
+		fprintf(stdout, "Enable huge pages to test big rings\n");
+		return T_EXIT_SKIP;
+	} else if (ret) {
+		fprintf(stderr, "ring setup failed: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	ret = io_uring_register_ring_fd(&ring);
+	if (ret != -EEXIST) {
+		fprintf(stderr, "registering already-registered ring fd should fail\n");
+		goto err;
+	}
+
+	ret = io_uring_close_ring_fd(&ring);
+	if (ret != -EBADF) {
+		fprintf(stderr, "closing already-closed ring fd should fail\n");
+		goto err;
+	}
+
+	/* Test a simple io_uring_register operation expected to work.
+	 * io_uring_register_iowq_max_workers is arbitrary.
+	 */
+	values[0] = values[1] = 0;
+	ret = io_uring_register_iowq_max_workers(&ring, values);
+	if (ret || (values[0] == 0 && values[1] == 0)) {
+		fprintf(stderr, "io_uring_register operation failed after closing ring fd\n");
+		goto err;
+	}
+
+	ret = test_nops(&ring, nentries, nentries * 4);
+	if (ret)
+		goto err;
+
+	io_uring_queue_exit(&ring);
+	return T_EXIT_PASS;
+
+err:
+	io_uring_queue_exit(&ring);
+	return T_EXIT_FAIL;
+}
+
+int main(int argc, char *argv[])
+{
+	int ret;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	/* test single normal page */
+	ret = test(NORMAL_PAGE_ENTRIES, 0);
+	if (ret == T_EXIT_SKIP || no_mmap) {
+		return T_EXIT_SKIP;
+	} else if (ret != T_EXIT_PASS) {
+		fprintf(stderr, "test 8 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	/* test single normal page */
+	ret = test(NORMAL_PAGE_ENTRIES, IORING_SETUP_SQPOLL);
+	if (ret == T_EXIT_SKIP || no_mmap) {
+		return T_EXIT_SKIP;
+	} else if (ret != T_EXIT_PASS) {
+		fprintf(stderr, "test 8 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	/* test with entries requiring a huge page */
+	ret = test(HUGE_PAGE_ENTRIES, 0);
+	if (ret == T_EXIT_SKIP) {
+		return T_EXIT_SKIP;
+	} else if (ret != T_EXIT_PASS) {
+		fprintf(stderr, "test 512 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	return T_EXIT_PASS;
+}

diff --git a/test/reg-hint.c b/test/reg-hint.c
new file mode 100644
index 0000000..b01a096
--- /dev/null
+++ b/test/reg-hint.c

@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Test alloc hint sanity after unregistering the file table
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/socket.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+int main(int argc, char *argv[])
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct io_uring ring;
+	int ret;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	io_uring_queue_init(1, &ring, 0);
+
+	ret = io_uring_register_files_sparse(&ring, 16);
+	if (ret) {
+		if (ret == -EINVAL)
+			return T_EXIT_SKIP;
+
+		fprintf(stderr, "Failed to register file table: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+	io_uring_unregister_files(&ring);
+
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_socket_direct_alloc(sqe, AF_UNIX, SOCK_DGRAM, 0, 0);
+
+	ret = io_uring_submit(&ring);
+	if (ret != 1) {
+		fprintf(stderr, "submit %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	ret = io_uring_wait_cqe(&ring, &cqe);
+	if (ret) {
+		fprintf(stderr, "wait cqe: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	if (cqe->res != -ENFILE) {
+		fprintf(stderr, "Bad CQE res: %d\n", cqe->res);
+		return T_EXIT_FAIL;
+	}
+
+	io_uring_cqe_seen(&ring, cqe);
+	return T_EXIT_PASS;
+}

diff --git a/test/reg-reg-ring.c b/test/reg-reg-ring.c
new file mode 100644
index 0000000..322208a
--- /dev/null
+++ b/test/reg-reg-ring.c

@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Test io_uring_register with a registered ring (IORING_REGISTER_USE_REGISTERED_RING)
+ *
+ */
+#include <stdio.h>
+
+#include "helpers.h"
+
+int main(int argc, char *argv[])
+{
+	struct io_uring ring;
+	unsigned values[2];
+	int ret;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	ret = io_uring_queue_init(8, &ring, 0);
+	if (ret) {
+		fprintf(stderr, "ring setup failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	if (!(ring.features & IORING_FEAT_REG_REG_RING)) {
+		fprintf(stderr, "IORING_FEAT_REG_REG_RING not available in kernel\n");
+		io_uring_queue_exit(&ring);
+		return T_EXIT_SKIP;
+	}
+
+	ret = io_uring_close_ring_fd(&ring);
+	if (ret != -EINVAL) {
+		fprintf(stderr, "closing ring fd should EINVAL before register\n");
+		goto err;
+	}
+
+	ret = io_uring_unregister_ring_fd(&ring);
+	if (ret != -EINVAL) {
+		fprintf(stderr, "unregistering not-registered ring fd should fail\n");
+		goto err;
+	}
+
+	ret = io_uring_register_ring_fd(&ring);
+	if (ret != 1) {
+		fprintf(stderr, "registering ring fd failed\n");
+		goto err;
+	}
+
+	ret = io_uring_register_ring_fd(&ring);
+	if (ret != -EEXIST) {
+		fprintf(stderr, "registering already-registered ring fd should fail\n");
+		goto err;
+	}
+
+	/* Test a simple io_uring_register operation expected to work.
+	 * io_uring_register_iowq_max_workers is arbitrary.
+	 */
+	values[0] = values[1] = 0;
+	ret = io_uring_register_iowq_max_workers(&ring, values);
+	if (ret || (values[0] == 0 && values[1] == 0)) {
+		fprintf(stderr, "io_uring_register operation failed before closing ring fd\n");
+		goto err;
+	}
+
+	ret = io_uring_close_ring_fd(&ring);
+	if (ret != 1) {
+		fprintf(stderr, "closing ring fd failed\n");
+		goto err;
+	}
+
+	values[0] = values[1] = 0;
+	ret = io_uring_register_iowq_max_workers(&ring, values);
+	if (ret || (values[0] == 0 && values[1] == 0)) {
+		fprintf(stderr, "io_uring_register operation failed after closing ring fd\n");
+		goto err;
+	}
+
+	ret = io_uring_close_ring_fd(&ring);
+	if (ret != -EBADF) {
+		fprintf(stderr, "closing already-closed ring fd should fail\n");
+		goto err;
+	}
+
+	io_uring_queue_exit(&ring);
+	return T_EXIT_PASS;
+
+err:
+	io_uring_queue_exit(&ring);
+	return T_EXIT_FAIL;
+}

diff --git a/test/regbuf-clone.c b/test/regbuf-clone.c
new file mode 100644
index 0000000..59b99e1
--- /dev/null
+++ b/test/regbuf-clone.c

@@ -0,0 +1,247 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: test buffer cloning between rings
+ *
+ */
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/uio.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+#define NR_VECS		64
+#define BUF_SIZE	8192
+
+static int no_buf_clone;
+
+static int test(int reg_src, int reg_dst)
+{
+	struct iovec vecs[NR_VECS];
+	struct io_uring src, dst;
+	int ret, i;
+
+	ret = io_uring_queue_init(1, &src, 0);
+	if (ret) {
+		fprintf(stderr, "ring_init: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+	ret = io_uring_queue_init(1, &dst, 0);
+	if (ret) {
+		fprintf(stderr, "ring_init: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+	if (reg_src) {
+		ret = io_uring_register_ring_fd(&src);
+		if (ret < 0) {
+			if (ret == -EINVAL)
+				return T_EXIT_SKIP;
+			fprintf(stderr, "register ring: %d\n", ret);
+			return T_EXIT_FAIL;
+		}
+	}
+	if (reg_dst) {
+		ret = io_uring_register_ring_fd(&dst);
+		if (ret < 0) {
+			if (ret == -EINVAL)
+				return T_EXIT_SKIP;
+			fprintf(stderr, "register ring: %d\n", ret);
+			return T_EXIT_FAIL;
+		}
+	}
+
+	/* test fail with no buffers in src */
+	ret = io_uring_clone_buffers(&dst, &src);
+	if (ret == -EINVAL) {
+		/* no buffer copy support */
+		no_buf_clone = true;
+		return T_EXIT_SKIP;
+	} else if (ret != -ENXIO) {
+		fprintf(stderr, "empty copy: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	for (i = 0; i < NR_VECS; i++) {
+		if (posix_memalign(&vecs[i].iov_base, 4096, BUF_SIZE))
+			return T_EXIT_FAIL;
+		vecs[i].iov_len = BUF_SIZE;
+	}
+
+	ret = io_uring_register_buffers(&src, vecs, NR_VECS);
+	if (ret < 0) {
+		if (ret == -ENOMEM)
+			return T_EXIT_SKIP;
+		return T_EXIT_FAIL;
+	}
+
+	/* copy should work now */
+	ret = io_uring_clone_buffers(&dst, &src);
+	if (ret) {
+		fprintf(stderr, "buffer copy: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	/* try copy again, should get -EBUSY */
+	ret = io_uring_clone_buffers(&dst, &src);
+	if (ret != -EBUSY) {
+		fprintf(stderr, "busy copy: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	ret = io_uring_unregister_buffers(&dst);
+	if (ret) {
+		fprintf(stderr, "dst unregister buffers: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	ret = io_uring_unregister_buffers(&dst);
+	if (ret != -ENXIO) {
+		fprintf(stderr, "dst unregister empty buffers: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	ret = io_uring_unregister_buffers(&src);
+	if (ret) {
+		fprintf(stderr, "src unregister buffers: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	ret = io_uring_register_buffers(&dst, vecs, NR_VECS);
+	if (ret < 0) {
+		fprintf(stderr, "register buffers dst; %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	ret = io_uring_clone_buffers(&src, &dst);
+	if (ret) {
+		fprintf(stderr, "buffer copy reverse: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	ret = io_uring_unregister_buffers(&dst);
+	if (ret) {
+		fprintf(stderr, "dst unregister buffers: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	ret = io_uring_unregister_buffers(&dst);
+	if (ret != -ENXIO) {
+		fprintf(stderr, "dst unregister empty buffers: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	ret = io_uring_unregister_buffers(&src);
+	if (ret) {
+		fprintf(stderr, "src unregister buffers: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	io_uring_queue_exit(&src);
+	io_uring_queue_exit(&dst);
+
+	for (i = 0; i < NR_VECS; i++)
+		free(vecs[i].iov_base);
+
+	return T_EXIT_PASS;
+}
+
+static int test_dummy(void)
+{
+	struct iovec vec = { };
+	struct io_uring src, dst;
+	int ret;
+
+	ret = io_uring_queue_init(1, &src, 0);
+	if (ret) {
+		fprintf(stderr, "ring_init: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+	ret = io_uring_queue_init(1, &dst, 0);
+	if (ret) {
+		fprintf(stderr, "ring_init: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	ret = io_uring_register_buffers(&src, &vec, 1);
+	if (ret < 0) {
+		fprintf(stderr, "failed to register dummy buffer: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	ret = io_uring_clone_buffers(&dst, &src);
+	if (ret) {
+		fprintf(stderr, "clone dummy buf: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	ret = io_uring_unregister_buffers(&src);
+	if (ret) {
+		fprintf(stderr, "rsc unregister buffers: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	ret = io_uring_unregister_buffers(&dst);
+	if (ret) {
+		fprintf(stderr, "dst unregister buffers: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	io_uring_queue_exit(&src);
+	io_uring_queue_exit(&dst);
+
+	return T_EXIT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	int ret;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	ret = test(0, 0);
+	if (ret == T_EXIT_SKIP) {
+		return T_EXIT_SKIP;
+	} else if (ret != T_EXIT_PASS) {
+		fprintf(stderr, "test 0 0 failed\n");
+		return T_EXIT_FAIL;
+	}
+	if (no_buf_clone)
+		return T_EXIT_SKIP;
+
+	ret = test(0, 1);
+	if (ret == T_EXIT_SKIP) {
+		return T_EXIT_SKIP;
+	} else if (ret != T_EXIT_PASS) {
+		fprintf(stderr, "test 0 1 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test(1, 0);
+	if (ret == T_EXIT_SKIP) {
+		return T_EXIT_SKIP;
+	} else if (ret != T_EXIT_PASS) {
+		fprintf(stderr, "test 1 0 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test(1, 1);
+	if (ret == T_EXIT_SKIP) {
+		return T_EXIT_SKIP;
+	} else if (ret != T_EXIT_PASS) {
+		fprintf(stderr, "test 1 1 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_dummy();
+	if (ret == T_EXIT_SKIP) {
+		return T_EXIT_SKIP;
+	} else if (ret != T_EXIT_PASS) {
+		fprintf(stderr, "test_dummy failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	return T_EXIT_PASS;
+}

diff --git a/test/regbuf-merge.c b/test/regbuf-merge.c
new file mode 100644
index 0000000..9e5aa3d
--- /dev/null
+++ b/test/regbuf-merge.c

@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: MIT */

+// autogenerated by syzkaller (https://github.com/google/syzkaller)

+

+#include <endian.h>

+#include <stdint.h>

+#include <stdio.h>

+#include <stdlib.h>

+#include <string.h>

+#include <sys/mman.h>

+#include <sys/syscall.h>

+#include <sys/types.h>

+#include <unistd.h>

+

+#include "helpers.h"

+

+#ifndef CONFIG_USE_SANITIZER

+#ifndef __NR_io_uring_register

+#define __NR_io_uring_register 427

+#endif

+#ifndef __NR_io_uring_setup

+#define __NR_io_uring_setup 425

+#endif

+

+#define SIZEOF_IO_URING_SQE 64

+#define SIZEOF_IO_URING_CQE 16

+#define SQ_HEAD_OFFSET 0

+#define SQ_TAIL_OFFSET 64

+#define SQ_RING_MASK_OFFSET 256

+#define SQ_RING_ENTRIES_OFFSET 264

+#define SQ_FLAGS_OFFSET 276

+#define SQ_DROPPED_OFFSET 272

+#define CQ_HEAD_OFFSET 128

+#define CQ_TAIL_OFFSET 192

+#define CQ_RING_MASK_OFFSET 260

+#define CQ_RING_ENTRIES_OFFSET 268

+#define CQ_RING_OVERFLOW_OFFSET 284

+#define CQ_FLAGS_OFFSET 280

+#define CQ_CQES_OFFSET 320

+

+static long syz_io_uring_setup(volatile long a0, volatile long a1, volatile long a2, volatile long a3, volatile long a4, volatile long a5)

+{

+	uint32_t entries = (uint32_t)a0;

+	struct io_uring_params* setup_params = (struct io_uring_params*)a1;

+	void* vma1 = (void*)a2;

+	void* vma2 = (void*)a3;

+	void** ring_ptr_out = (void**)a4;

+	void** sqes_ptr_out = (void**)a5;

+	uint32_t fd_io_uring = syscall(__NR_io_uring_setup, entries, setup_params);

+	uint32_t sq_ring_sz = setup_params->sq_off.array + setup_params->sq_entries * sizeof(uint32_t);

+	uint32_t cq_ring_sz = setup_params->cq_off.cqes + setup_params->cq_entries * SIZEOF_IO_URING_CQE;

+	uint32_t ring_sz = sq_ring_sz > cq_ring_sz ? sq_ring_sz : cq_ring_sz;

+	*ring_ptr_out = mmap(vma1, ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE | MAP_FIXED, fd_io_uring, IORING_OFF_SQ_RING);

+	uint32_t sqes_sz = setup_params->sq_entries * SIZEOF_IO_URING_SQE;

+	*sqes_ptr_out = mmap(vma2, sqes_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE | MAP_FIXED, fd_io_uring, IORING_OFF_SQES);

+	return fd_io_uring;

+}

+

+static uint64_t r[1] = {0xffffffffffffffff};

+

+int main(int argc, char *argv[])

+{

+	intptr_t res = 0;

+

+	if (argc > 1)

+		return T_EXIT_SKIP;

+

+	mmap((void *) 0x1ffff000ul, 0x1000ul, PROT_NONE,

+		MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0ul);

+	mmap((void *) 0x20000000ul, 0x1000000ul, PROT_READ|PROT_WRITE|PROT_EXEC,

+		MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0ul);

+	mmap((void *) 0x21000000ul, 0x1000ul, PROT_NONE,

+		MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0ul);

+

+	*(uint32_t*)0x20000684 = 0;

+	*(uint32_t*)0x20000688 = 0;

+	*(uint32_t*)0x2000068c = 0;

+	*(uint32_t*)0x20000690 = 0;

+	*(uint32_t*)0x20000698 = -1;

+	memset((void*)0x2000069c, 0, 12);

+

+	res = syz_io_uring_setup(0x2fd6, 0x20000680, 0x20ffd000, 0x20ffc000,

+					0x20000700, 0x20000740);

+	if (res != -1)

+		r[0] = res;

+

+	*(uint64_t*)0x20002840 = 0;

+	*(uint64_t*)0x20002848 = 0;

+	*(uint64_t*)0x20002850 = 0x20000840;

+	*(uint64_t*)0x20002858 = 0x1000;

+	syscall(__NR_io_uring_register, r[0], 0ul, 0x20002840ul, 2ul);

+	return T_EXIT_PASS;

+}

+#else

+int main(int argc, char *argv[])

+{

+	return T_EXIT_SKIP;

+}

+#endif


diff --git a/test/register-restrictions.c b/test/register-restrictions.c
index e1cf5bd..aa0f324 100644
--- a/test/register-restrictions.c
+++ b/test/register-restrictions.c

@@ -13,12 +13,7 @@
 #include <sys/eventfd.h>
 
 #include "liburing.h"
-
-enum {
-	TEST_OK,
-	TEST_SKIPPED,
-	TEST_FAILED
-};
+#include "helpers.h"
 
 static int test_restrictions_sqe_op(void)
 {
@@ -36,15 +31,15 @@
 
 	if (pipe(pipe1) != 0) {
 		perror("pipe");
-		return TEST_FAILED;
+		return T_EXIT_FAIL;
 	}
 
 	ret = io_uring_queue_init(8, &ring, IORING_SETUP_R_DISABLED);
 	if (ret) {
 		if (ret == -EINVAL)
-			return TEST_SKIPPED;
+			return T_EXIT_SKIP;
 		fprintf(stderr, "ring setup failed: %d\n", ret);
-		return TEST_FAILED;
+		return T_EXIT_FAIL;
 	}
 
 	res[0].opcode = IORING_RESTRICTION_SQE_OP;
@@ -56,16 +51,16 @@
 	ret = io_uring_register_restrictions(&ring, res, 2);
 	if (ret) {
 		if (ret == -EINVAL)
-			return TEST_SKIPPED;
+			return T_EXIT_SKIP;
 
 		fprintf(stderr, "failed to register restrictions: %d\n", ret);
-		return TEST_FAILED;
+		return T_EXIT_FAIL;
 	}
 
 	ret = io_uring_enable_rings(&ring);
 	if (ret) {
 		fprintf(stderr, "ring enabling failed: %d\n", ret);
-		return TEST_FAILED;
+		return T_EXIT_FAIL;
 	}
 
 	sqe = io_uring_get_sqe(&ring);
@@ -79,28 +74,28 @@
 	ret = io_uring_submit(&ring);
 	if (ret != 2) {
 		fprintf(stderr, "submit: %d\n", ret);
-		return TEST_FAILED;
+		return T_EXIT_FAIL;
 	}
 
 	for (int i = 0; i < 2; i++) {
 		ret = io_uring_wait_cqe(&ring, &cqe);
 		if (ret) {
 			fprintf(stderr, "wait: %d\n", ret);
-			return TEST_FAILED;
+			return T_EXIT_FAIL;
 		}
 
 		switch (cqe->user_data) {
 		case 1: /* writev */
 			if (cqe->res != sizeof(ptr)) {
 				fprintf(stderr, "write res: %d\n", cqe->res);
-				return TEST_FAILED;
+				return T_EXIT_FAIL;
 			}
 
 			break;
 		case 2: /* readv should be denied */
 			if (cqe->res != -EACCES) {
 				fprintf(stderr, "read res: %d\n", cqe->res);
-				return TEST_FAILED;
+				return T_EXIT_FAIL;
 			}
 			break;
 		}
@@ -108,7 +103,7 @@
 	}
 
 	io_uring_queue_exit(&ring);
-	return TEST_OK;
+	return T_EXIT_PASS;
 }
 
 static int test_restrictions_register_op(void)
@@ -125,13 +120,13 @@
 
 	if (pipe(pipe1) != 0) {
 		perror("pipe");
-		return TEST_FAILED;
+		return T_EXIT_FAIL;
 	}
 
 	ret = io_uring_queue_init(8, &ring, IORING_SETUP_R_DISABLED);
 	if (ret) {
 		fprintf(stderr, "ring setup failed: %d\n", ret);
-		return TEST_FAILED;
+		return T_EXIT_FAIL;
 	}
 
 	res[0].opcode = IORING_RESTRICTION_REGISTER_OP;
@@ -140,32 +135,32 @@
 	ret = io_uring_register_restrictions(&ring, res, 1);
 	if (ret) {
 		if (ret == -EINVAL)
-			return TEST_SKIPPED;
+			return T_EXIT_SKIP;
 
 		fprintf(stderr, "failed to register restrictions: %d\n", ret);
-		return TEST_FAILED;
+		return T_EXIT_FAIL;
 	}
 
 	ret = io_uring_enable_rings(&ring);
 	if (ret) {
 		fprintf(stderr, "ring enabling failed: %d\n", ret);
-		return TEST_FAILED;
+		return T_EXIT_FAIL;
 	}
 
 	ret = io_uring_register_buffers(&ring, &vec, 1);
 	if (ret) {
 		fprintf(stderr, "io_uring_register_buffers failed: %d\n", ret);
-		return TEST_FAILED;
+		return T_EXIT_FAIL;
 	}
 
 	ret = io_uring_register_files(&ring, pipe1, 2);
 	if (ret != -EACCES) {
 		fprintf(stderr, "io_uring_register_files ret: %d\n", ret);
-		return TEST_FAILED;
+		return T_EXIT_FAIL;
 	}
 
 	io_uring_queue_exit(&ring);
-	return TEST_OK;
+	return T_EXIT_PASS;
 }
 
 static int test_restrictions_fixed_file(void)
@@ -184,13 +179,13 @@
 
 	if (pipe(pipe1) != 0) {
 		perror("pipe");
-		return TEST_FAILED;
+		return T_EXIT_FAIL;
 	}
 
 	ret = io_uring_queue_init(8, &ring, IORING_SETUP_R_DISABLED);
 	if (ret) {
 		fprintf(stderr, "ring setup failed: %d\n", ret);
-		return TEST_FAILED;
+		return T_EXIT_FAIL;
 	}
 
 	res[0].opcode = IORING_RESTRICTION_SQE_OP;
@@ -208,22 +203,22 @@
 	ret = io_uring_register_restrictions(&ring, res, 4);
 	if (ret) {
 		if (ret == -EINVAL)
-			return TEST_SKIPPED;
+			return T_EXIT_SKIP;
 
 		fprintf(stderr, "failed to register restrictions: %d\n", ret);
-		return TEST_FAILED;
+		return T_EXIT_FAIL;
 	}
 
 	ret = io_uring_enable_rings(&ring);
 	if (ret) {
 		fprintf(stderr, "ring enabling failed: %d\n", ret);
-		return TEST_FAILED;
+		return T_EXIT_FAIL;
 	}
 
 	ret = io_uring_register_files(&ring, pipe1, 2);
 	if (ret) {
 		fprintf(stderr, "io_uring_register_files ret: %d\n", ret);
-		return TEST_FAILED;
+		return T_EXIT_FAIL;
 	}
 
 	sqe = io_uring_get_sqe(&ring);
@@ -243,34 +238,34 @@
 	ret = io_uring_submit(&ring);
 	if (ret != 3) {
 		fprintf(stderr, "submit: %d\n", ret);
-		return TEST_FAILED;
+		return T_EXIT_FAIL;
 	}
 
 	for (int i = 0; i < 3; i++) {
 		ret = io_uring_wait_cqe(&ring, &cqe);
 		if (ret) {
 			fprintf(stderr, "wait: %d\n", ret);
-			return TEST_FAILED;
+			return T_EXIT_FAIL;
 		}
 
 		switch (cqe->user_data) {
 		case 1: /* writev */
 			if (cqe->res != sizeof(ptr)) {
 				fprintf(stderr, "write res: %d\n", cqe->res);
-				return TEST_FAILED;
+				return T_EXIT_FAIL;
 			}
 
 			break;
 		case 2: /* readv */
 			if (cqe->res != sizeof(ptr)) {
 				fprintf(stderr, "read res: %d\n", cqe->res);
-				return TEST_FAILED;
+				return T_EXIT_FAIL;
 			}
 			break;
 		case 3: /* writev without fixed_file should be denied */
 			if (cqe->res != -EACCES) {
 				fprintf(stderr, "write res: %d\n", cqe->res);
-				return TEST_FAILED;
+				return T_EXIT_FAIL;
 			}
 			break;
 		}
@@ -278,7 +273,7 @@
 	}
 
 	io_uring_queue_exit(&ring);
-	return TEST_OK;
+	return T_EXIT_PASS;
 }
 
 static int test_restrictions_flags(void)
@@ -297,13 +292,13 @@
 
 	if (pipe(pipe1) != 0) {
 		perror("pipe");
-		return TEST_FAILED;
+		return T_EXIT_FAIL;
 	}
 
 	ret = io_uring_queue_init(8, &ring, IORING_SETUP_R_DISABLED);
 	if (ret) {
 		fprintf(stderr, "ring setup failed: %d\n", ret);
-		return TEST_FAILED;
+		return T_EXIT_FAIL;
 	}
 
 	res[0].opcode = IORING_RESTRICTION_SQE_OP;
@@ -318,22 +313,22 @@
 	ret = io_uring_register_restrictions(&ring, res, 3);
 	if (ret) {
 		if (ret == -EINVAL)
-			return TEST_SKIPPED;
+			return T_EXIT_SKIP;
 
 		fprintf(stderr, "failed to register restrictions: %d\n", ret);
-		return TEST_FAILED;
+		return T_EXIT_FAIL;
 	}
 
 	ret = io_uring_register_files(&ring, pipe1, 2);
 	if (ret) {
 		fprintf(stderr, "io_uring_register_files ret: %d\n", ret);
-		return TEST_FAILED;
+		return T_EXIT_FAIL;
 	}
 
 	ret = io_uring_enable_rings(&ring);
 	if (ret) {
 		fprintf(stderr, "ring enabling failed: %d\n", ret);
-		return TEST_FAILED;
+		return T_EXIT_FAIL;
 	}
 
 	sqe = io_uring_get_sqe(&ring);
@@ -354,7 +349,7 @@
 	ret = io_uring_submit(&ring);
 	if (ret != 3) {
 		fprintf(stderr, "submit: %d\n", ret);
-		return TEST_FAILED;
+		return T_EXIT_FAIL;
 	}
 
 	sqe = io_uring_get_sqe(&ring);
@@ -365,7 +360,7 @@
 	ret = io_uring_submit(&ring);
 	if (ret != 1) {
 		fprintf(stderr, "submit: %d\n", ret);
-		return TEST_FAILED;
+		return T_EXIT_FAIL;
 	}
 
 	sqe = io_uring_get_sqe(&ring);
@@ -376,7 +371,7 @@
 	ret = io_uring_submit(&ring);
 	if (ret != 1) {
 		fprintf(stderr, "submit: %d\n", ret);
-		return TEST_FAILED;
+		return T_EXIT_FAIL;
 	}
 
 	sqe = io_uring_get_sqe(&ring);
@@ -387,7 +382,7 @@
 	ret = io_uring_submit(&ring);
 	if (ret != 1) {
 		fprintf(stderr, "submit: %d\n", ret);
-		return TEST_FAILED;
+		return T_EXIT_FAIL;
 	}
 
 	sqe = io_uring_get_sqe(&ring);
@@ -397,14 +392,14 @@
 	ret = io_uring_submit(&ring);
 	if (ret != 1) {
 		fprintf(stderr, "submit: %d\n", ret);
-		return TEST_FAILED;
+		return T_EXIT_FAIL;
 	}
 
 	for (int i = 0; i < 7; i++) {
 		ret = io_uring_wait_cqe(&ring, &cqe);
 		if (ret) {
 			fprintf(stderr, "wait: %d\n", ret);
-			return TEST_FAILED;
+			return T_EXIT_FAIL;
 		}
 
 		switch (cqe->user_data) {
@@ -414,7 +409,7 @@
 			if (cqe->res != sizeof(ptr)) {
 				fprintf(stderr, "write res: %d user_data %" PRIu64 "\n",
 					cqe->res, (uint64_t) cqe->user_data);
-				return TEST_FAILED;
+				return T_EXIT_FAIL;
 			}
 
 			break;
@@ -425,7 +420,7 @@
 			if (cqe->res != -EACCES) {
 				fprintf(stderr, "write res: %d user_data %" PRIu64 "\n",
 					cqe->res, (uint64_t) cqe->user_data);
-				return TEST_FAILED;
+				return T_EXIT_FAIL;
 			}
 			break;
 		}
@@ -433,7 +428,7 @@
 	}
 
 	io_uring_queue_exit(&ring);
-	return TEST_OK;
+	return T_EXIT_PASS;
 }
 
 static int test_restrictions_empty(void)
@@ -452,40 +447,40 @@
 
 	if (pipe(pipe1) != 0) {
 		perror("pipe");
-		return TEST_FAILED;
+		return T_EXIT_FAIL;
 	}
 
 	ret = io_uring_queue_init(8, &ring, IORING_SETUP_R_DISABLED);
 	if (ret) {
 		fprintf(stderr, "ring setup failed: %d\n", ret);
-		return TEST_FAILED;
+		return T_EXIT_FAIL;
 	}
 
 	ret = io_uring_register_restrictions(&ring, res, 0);
 	if (ret) {
 		if (ret == -EINVAL)
-			return TEST_SKIPPED;
+			return T_EXIT_SKIP;
 
 		fprintf(stderr, "failed to register restrictions: %d\n", ret);
-		return TEST_FAILED;
+		return T_EXIT_FAIL;
 	}
 
 	ret = io_uring_enable_rings(&ring);
 	if (ret) {
 		fprintf(stderr, "ring enabling failed: %d\n", ret);
-		return TEST_FAILED;
+		return T_EXIT_FAIL;
 	}
 
 	ret = io_uring_register_buffers(&ring, &vec, 1);
 	if (ret != -EACCES) {
 		fprintf(stderr, "io_uring_register_buffers ret: %d\n", ret);
-		return TEST_FAILED;
+		return T_EXIT_FAIL;
 	}
 
 	ret = io_uring_register_files(&ring, pipe1, 2);
 	if (ret != -EACCES) {
 		fprintf(stderr, "io_uring_register_files ret: %d\n", ret);
-		return TEST_FAILED;
+		return T_EXIT_FAIL;
 	}
 
 	sqe = io_uring_get_sqe(&ring);
@@ -494,24 +489,24 @@
 	ret = io_uring_submit(&ring);
 	if (ret != 1) {
 		fprintf(stderr, "submit: %d\n", ret);
-		return TEST_FAILED;
+		return T_EXIT_FAIL;
 	}
 
 	ret = io_uring_wait_cqe(&ring, &cqe);
 	if (ret) {
 		fprintf(stderr, "wait: %d\n", ret);
-		return TEST_FAILED;
+		return T_EXIT_FAIL;
 	}
 
 	if (cqe->res != -EACCES) {
 		fprintf(stderr, "write res: %d\n", cqe->res);
-		return TEST_FAILED;
+		return T_EXIT_FAIL;
 	}
 
 	io_uring_cqe_seen(&ring, cqe);
 
 	io_uring_queue_exit(&ring);
-	return TEST_OK;
+	return T_EXIT_PASS;
 }
 
 static int test_restrictions_rings_not_disabled(void)
@@ -523,7 +518,7 @@
 	ret = io_uring_queue_init(8, &ring, 0);
 	if (ret) {
 		fprintf(stderr, "ring setup failed: %d\n", ret);
-		return TEST_FAILED;
+		return T_EXIT_FAIL;
 	}
 
 	res[0].opcode = IORING_RESTRICTION_SQE_OP;
@@ -533,11 +528,11 @@
 	if (ret != -EBADFD) {
 		fprintf(stderr, "io_uring_register_restrictions ret: %d\n",
 			ret);
-		return TEST_FAILED;
+		return T_EXIT_FAIL;
 	}
 
 	io_uring_queue_exit(&ring);
-	return TEST_OK;
+	return T_EXIT_PASS;
 }
 
 static int test_restrictions_rings_disabled(void)
@@ -549,7 +544,7 @@
 	ret = io_uring_queue_init(8, &ring, IORING_SETUP_R_DISABLED);
 	if (ret) {
 		fprintf(stderr, "ring setup failed: %d\n", ret);
-		return TEST_FAILED;
+		return T_EXIT_FAIL;
 	}
 
 	sqe = io_uring_get_sqe(&ring);
@@ -558,11 +553,11 @@
 	ret = io_uring_submit(&ring);
 	if (ret != -EBADFD) {
 		fprintf(stderr, "submit: %d\n", ret);
-		return TEST_FAILED;
+		return T_EXIT_FAIL;
 	}
 
 	io_uring_queue_exit(&ring);
-	return TEST_OK;
+	return T_EXIT_PASS;
 }
 
 int main(int argc, char *argv[])
@@ -573,61 +568,67 @@
 		return 0;
 
 	ret = test_restrictions_sqe_op();
-	if (ret == TEST_SKIPPED) {
+	if (ret == T_EXIT_SKIP) {
 		printf("test_restrictions_sqe_op: skipped\n");
-		return 0;
-	} else if (ret == TEST_FAILED) {
+		return T_EXIT_SKIP;
+	} else if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "test_restrictions_sqe_op failed\n");
 		return ret;
 	}
 
 	ret = test_restrictions_register_op();
-	if (ret == TEST_SKIPPED) {
+	if (ret == T_EXIT_SKIP) {
 		printf("test_restrictions_register_op: skipped\n");
-	} else if (ret == TEST_FAILED) {
+		return T_EXIT_SKIP;
+	} else if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "test_restrictions_register_op failed\n");
 		return ret;
 	}
 
 	ret = test_restrictions_fixed_file();
-	if (ret == TEST_SKIPPED) {
+	if (ret == T_EXIT_SKIP) {
 		printf("test_restrictions_fixed_file: skipped\n");
-	} else if (ret == TEST_FAILED) {
+		return T_EXIT_SKIP;
+	} else if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "test_restrictions_fixed_file failed\n");
 		return ret;
 	}
 
 	ret = test_restrictions_flags();
-	if (ret == TEST_SKIPPED) {
+	if (ret == T_EXIT_SKIP) {
 		printf("test_restrictions_flags: skipped\n");
-	} else if (ret == TEST_FAILED) {
+		return T_EXIT_SKIP;
+	} else if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "test_restrictions_flags failed\n");
 		return ret;
 	}
 
 	ret = test_restrictions_empty();
-	if (ret == TEST_SKIPPED) {
+	if (ret == T_EXIT_SKIP) {
 		printf("test_restrictions_empty: skipped\n");
-	} else if (ret == TEST_FAILED) {
+		return T_EXIT_SKIP;
+	} else if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "test_restrictions_empty failed\n");
 		return ret;
 	}
 
 	ret = test_restrictions_rings_not_disabled();
-	if (ret == TEST_SKIPPED) {
+	if (ret == T_EXIT_SKIP) {
 		printf("test_restrictions_rings_not_disabled: skipped\n");
-	} else if (ret == TEST_FAILED) {
+		return T_EXIT_SKIP;
+	} else if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "test_restrictions_rings_not_disabled failed\n");
 		return ret;
 	}
 
 	ret = test_restrictions_rings_disabled();
-	if (ret == TEST_SKIPPED) {
+	if (ret == T_EXIT_SKIP) {
 		printf("test_restrictions_rings_disabled: skipped\n");
-	} else if (ret == TEST_FAILED) {
+		return T_EXIT_SKIP;
+	} else if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "test_restrictions_rings_disabled failed\n");
 		return ret;
 	}
 
-	return 0;
+	return T_EXIT_PASS;
 }

diff --git a/test/rename.c b/test/rename.c
index 67d4e9c..97b538d 100644
--- a/test/rename.c
+++ b/test/rename.c

@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: MIT */
 /*
- * Description: run various nop tests
+ * Description: run various rename tests
  *
  */
 #include <errno.h>
@@ -13,6 +13,51 @@
 
 #include "liburing.h"
 
+/* test using a bad address for either old or new path */
+static int test_rename_badaddr(struct io_uring *ring, bool bad_old)
+{
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	const char *path = ".foo.bar";
+	const char *old, *new;
+	int ret;
+
+	if (bad_old) {
+		old = (void *) (uintptr_t) 0x1234;
+		new = path;
+	} else {
+		old = path;
+		new = (void *) (uintptr_t) 0x1234;
+	}
+
+	sqe = io_uring_get_sqe(ring);
+	if (!sqe) {
+		fprintf(stderr, "get sqe failed\n");
+		goto err;
+	}
+
+	memset(sqe, 0, sizeof(*sqe));
+
+	io_uring_prep_rename(sqe, old, new);
+
+	ret = io_uring_submit(ring);
+	if (ret <= 0) {
+		fprintf(stderr, "sqe submit failed: %d\n", ret);
+		goto err;
+	}
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret < 0) {
+		fprintf(stderr, "wait completion %d\n", ret);
+		goto err;
+	}
+	ret = cqe->res;
+	io_uring_cqe_seen(ring, cqe);
+	return ret;
+err:
+	return 1;
+}
+
 static int test_rename(struct io_uring *ring, const char *old, const char *new)
 {
 	struct io_uring_cqe *cqe;
@@ -28,7 +73,7 @@
 	memset(sqe, 0, sizeof(*sqe));
 
 	io_uring_prep_rename(sqe, old, new);
-	
+
 	ret = io_uring_submit(ring);
 	if (ret <= 0) {
 		fprintf(stderr, "sqe submit failed: %d\n", ret);
@@ -122,6 +167,19 @@
 		fprintf(stderr, "test_rename invalid failed: %d\n", ret);
 		return ret;
 	}
+
+	ret = test_rename_badaddr(&ring, 0);
+	if (ret != -EFAULT) {
+		fprintf(stderr, "test_badaddr 0 failed: %d\n", ret);
+		return ret;
+	}
+
+	ret = test_rename_badaddr(&ring, 1);
+	if (ret != -EFAULT) {
+		fprintf(stderr, "test_badaddr 1 failed: %d\n", ret);
+		return ret;
+	}
+
 out:
 	unlink(dst);
 	return 0;

diff --git a/test/ring-leak.c b/test/ring-leak.c
index 5b739ad..c76c15f 100644
--- a/test/ring-leak.c
+++ b/test/ring-leak.c

@@ -23,6 +23,7 @@
 #include <linux/fs.h>
 
 #include "liburing.h"
+#include "helpers.h"
 #include "../src/syscall.h"
 
 static int __io_uring_register_files(int ring_fd, int fd1, int fd2)
@@ -48,7 +49,7 @@
 	return fd;
 }
 
-static void send_fd(int socket, int fd)
+static int send_fd(int socket, int fd)
 {
 	char buf[CMSG_SPACE(sizeof(fd))];
 	struct cmsghdr *cmsg;
@@ -69,8 +70,14 @@
 
 	msg.msg_controllen = CMSG_SPACE(sizeof(fd));
 
-	if (sendmsg(socket, &msg, 0) < 0)
+	if (sendmsg(socket, &msg, 0) < 0) {
+		if (errno == EINVAL)
+			return T_EXIT_SKIP;
 		perror("sendmsg");
+		return T_EXIT_FAIL;
+	}
+
+	return T_EXIT_PASS;
 }
 
 static int test_iowq_request_cancel(void)
@@ -135,6 +142,17 @@
 	return 0;
 }
 
+static void trigger_unix_gc(void)
+{
+	int fd;
+
+	fd = socket(AF_UNIX, SOCK_DGRAM, 0);
+	if (fd < 0)
+		perror("socket dgram");
+	else
+		close(fd);
+}
+
 static int test_scm_cycles(bool update)
 {
 	char buffer[128];
@@ -155,7 +173,9 @@
 		perror("pipe");
 		return -1;
 	}
-	send_fd(sp[0], ring.ring_fd);
+	ret = send_fd(sp[0], ring.ring_fd);
+	if (ret != T_EXIT_PASS)
+		return ret;
 
 	/* register an empty set for updates */
 	if (update) {
@@ -193,6 +213,8 @@
 	/* should unregister files and close the write fd */
 	io_uring_queue_exit(&ring);
 
+	trigger_unix_gc();
+
 	/*
 	 * We're trying to wait for the ring to "really" exit, that will be
 	 * done async. For that rely on the registered write end to be closed
@@ -223,12 +245,13 @@
 		bool update = !!(i & 1);
 
 		ret = test_scm_cycles(update);
+		if (ret == T_EXIT_SKIP)
+			return T_EXIT_SKIP;
 		if (ret) {
 			fprintf(stderr, "test_scm_cycles() failed %i\n",
 				update);
 			return 1;
 		}
-		break;
 	}
 
 	if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sp) != 0) {
@@ -247,8 +270,11 @@
 	}
 
 	pid = fork();
-	if (pid)
-		send_fd(sp[0], ring_fd);
+	if (pid) {
+		ret = send_fd(sp[0], ring_fd);
+		if (ret != T_EXIT_PASS)
+			return ret;
+	}
 
 	close(ring_fd);
 	close(sp[0]);

diff --git a/test/ring-leak2.c b/test/ring-leak2.c
index a8c03fe..6e76717 100644
--- a/test/ring-leak2.c
+++ b/test/ring-leak2.c

@@ -46,7 +46,7 @@
 
 static int client_eventfd = -1;
 
-int setup_io_uring(struct io_uring *ring)
+static int setup_io_uring(struct io_uring *ring)
 {
 	struct io_uring_params p = { };
 	int ret;

diff --git a/test/ringbuf-read.c b/test/ringbuf-read.c
index 673f2de..cfcc3d8 100644
--- a/test/ringbuf-read.c
+++ b/test/ringbuf-read.c

@@ -37,7 +37,6 @@
 
 static int test(const char *filename, int dio, int async)
 {
-	struct io_uring_buf_reg reg = { };
 	struct io_uring_sqe *sqe;
 	struct io_uring_cqe *cqe;
 	struct io_uring ring;
@@ -52,11 +51,16 @@
 		return 1;
 	}
 
-	if (dio)
+	if (dio) {
 		fd = open(filename, O_DIRECT | O_RDONLY);
-	else
+		if (fd < 0 && errno == EINVAL)
+			return T_EXIT_SKIP;
+	} else {
 		fd = open(filename, O_RDONLY);
+	}
 	if (fd < 0) {
+		if (errno == EPERM || errno == EACCES)
+			return T_EXIT_SKIP;
 		perror("open");
 		return 1;
 	}
@@ -65,15 +69,9 @@
 
 	if (posix_memalign((void **) &buf, 4096, FSIZE))
 		return 1;
-	if (posix_memalign((void **) &br, 4096, 4096))
-		return 1;
 
-	reg.ring_addr = (unsigned long) br;
-	reg.ring_entries = NR_BUFS;
-	reg.bgid = 1;
-
-	ret = io_uring_register_buf_ring(&ring, &reg, 0);
-	if (ret) {
+	br = io_uring_setup_buf_ring(&ring, NR_BUFS, 1, 0, &ret);
+	if (!br) {
 		if (ret == -EINVAL) {
 			no_buf_ring = 1;
 			return 0;
@@ -127,6 +125,7 @@
 		if (verify_buffer(buf + ((bid - 1) * BUF_SIZE), ud))
 			return 1;
 	}
+	free(buf);
 
 	return 0;
 }
@@ -148,6 +147,8 @@
 
 	fd = open(fname, O_WRONLY);
 	if (fd < 0) {
+		if (errno == EPERM || errno == EACCES)
+			return T_EXIT_SKIP;
 		perror("open");
 		goto err;
 	}
@@ -156,40 +157,45 @@
 		ret = write(fd, buf, BUF_SIZE);
 		if (ret != BUF_SIZE) {
 			fprintf(stderr, "bad file prep write\n");
+			close(fd);
 			goto err;
 		}
 	}
 	close(fd);
 
 	ret = test(fname, 1, 0);
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "dio test failed\n");
-		return ret;
+		goto err;
 	}
 	if (no_buf_ring)
-		return 0;
+		goto pass;
 
 	ret = test(fname, 0, 0);
 	if (ret) {
 		fprintf(stderr, "buffered test failed\n");
-		return ret;
+		goto err;
 	}
 
 	ret = test(fname, 1, 1);
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "dio async test failed\n");
-		return ret;
+		goto err;
 	}
 
 	ret = test(fname, 0, 1);
-	if (ret) {
+	if (ret == T_EXIT_FAIL) {
 		fprintf(stderr, "buffered async test failed\n");
-		return ret;
+		goto err;
 	}
 
-	return 0;
+pass:
+	ret = T_EXIT_PASS;
+	goto out;
 err:
+	ret = T_EXIT_FAIL;
+out:
 	if (do_unlink)
 		unlink(fname);
-	return 1;
+	return ret;
 }

diff --git a/test/ringbuf-status.c b/test/ringbuf-status.c
new file mode 100644
index 0000000..0794bfb
--- /dev/null
+++ b/test/ringbuf-status.c

@@ -0,0 +1,246 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: test reading provided ring buf head
+ *
+ */
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+#define BUF_SIZE	32
+#define NR_BUFS		8
+#define FSIZE		(BUF_SIZE * NR_BUFS)
+
+#define BR_MASK		(NR_BUFS - 1)
+#define BGID		1
+
+static int no_buf_ring;
+static int no_buf_ring_status;
+
+static int test_max(void)
+{
+	struct io_uring_buf_ring *br;
+	struct io_uring ring;
+	int nr_bufs = 32768;
+	int ret, i;
+	char *buf;
+
+	ret = io_uring_queue_init(1, &ring, 0);
+	if (ret) {
+		fprintf(stderr, "ring setup failed: %d\n", ret);
+		return 1;
+	}
+
+	if (posix_memalign((void **) &buf, 4096, FSIZE))
+		return 1;
+
+	br = io_uring_setup_buf_ring(&ring, nr_bufs, BGID, 0, &ret);
+	if (!br) {
+		fprintf(stderr, "Buffer ring register failed %d\n", ret);
+		return 1;
+	}
+
+	ret = io_uring_buf_ring_available(&ring, br, BGID);
+	if (ret) {
+		fprintf(stderr, "Bad available count %d\n", ret);
+		return 1;
+	}
+
+	for (i = 0; i < nr_bufs / 2; i++)
+		io_uring_buf_ring_add(br, buf, BUF_SIZE, i + 1, nr_bufs - 1, i);
+	io_uring_buf_ring_advance(br, nr_bufs / 2);
+
+	ret = io_uring_buf_ring_available(&ring, br, BGID);
+	if (ret != nr_bufs / 2) {
+		fprintf(stderr, "Bad half full available count %d\n", ret);
+		return 1;
+	}
+
+	for (i = 0; i < nr_bufs / 2; i++)
+		io_uring_buf_ring_add(br, buf, BUF_SIZE, i + 1, nr_bufs - 1, i);
+	io_uring_buf_ring_advance(br, nr_bufs / 2);
+
+	ret = io_uring_buf_ring_available(&ring, br, BGID);
+	if (ret != nr_bufs) {
+		fprintf(stderr, "Bad half full available count %d\n", ret);
+		return 1;
+	}
+
+	free(buf);
+	io_uring_queue_exit(&ring);
+	return T_EXIT_PASS;
+}
+
+static int test(int invalid)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct io_uring ring;
+	struct io_uring_buf_ring *br;
+	int ret, i, fds[2];
+	uint16_t head;
+	char *buf;
+	void *ptr;
+	char output[16];
+
+	memset(output, 0x55, sizeof(output));
+
+	ret = io_uring_queue_init(NR_BUFS, &ring, 0);
+	if (ret) {
+		fprintf(stderr, "ring setup failed: %d\n", ret);
+		return 1;
+	}
+
+	if (pipe(fds) < 0) {
+		perror("pipe");
+		return T_EXIT_FAIL;
+	}
+
+	if (posix_memalign((void **) &buf, 4096, FSIZE))
+		return 1;
+
+	br = io_uring_setup_buf_ring(&ring, NR_BUFS, BGID, 0, &ret);
+	if (!br) {
+		if (ret == -EINVAL) {
+			no_buf_ring = 1;
+			free(buf);
+			return 0;
+		}
+		fprintf(stderr, "Buffer ring register failed %d\n", ret);
+		return 1;
+	}
+
+	ptr = buf;
+	for (i = 0; i < NR_BUFS; i++) {
+		io_uring_buf_ring_add(br, ptr, BUF_SIZE, i + 1, BR_MASK, i);
+		ptr += BUF_SIZE;
+	}
+	io_uring_buf_ring_advance(br, NR_BUFS);
+
+	/* head should be zero at this point */
+	head = 1;
+	if (!invalid)
+		ret = io_uring_buf_ring_head(&ring, BGID, &head);
+	else
+		ret = io_uring_buf_ring_head(&ring, BGID + 10, &head);
+	if (ret) {
+		if (ret == -EINVAL) {
+			no_buf_ring_status = 1;
+			free(buf);
+			return T_EXIT_SKIP;
+		}
+		if (invalid && ret == -ENOENT) {
+			free(buf);
+			return T_EXIT_PASS;
+		}
+		fprintf(stderr, "buf_ring_head: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+	if (invalid) {
+		fprintf(stderr, "lookup of bad group id succeeded\n");
+		return T_EXIT_FAIL;
+	}
+	if (head != 0) {
+		fprintf(stderr, "bad head %d\n", head);
+		return T_EXIT_FAIL;
+	}
+
+	ret = io_uring_buf_ring_available(&ring, br, BGID);
+	if (ret != NR_BUFS) {
+		fprintf(stderr, "ring available %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_read(sqe, fds[0], NULL, BUF_SIZE, i * BUF_SIZE);
+	sqe->buf_group = BGID;
+	sqe->flags |= IOSQE_BUFFER_SELECT;
+	sqe->user_data = 1;
+
+	ret = io_uring_submit(&ring);
+	if (ret != 1) {
+		fprintf(stderr, "submit: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	/* head should still be zero at this point, no buffers consumed */
+	head = 1;
+	ret = io_uring_buf_ring_head(&ring, BGID, &head);
+	if (head != 0) {
+		fprintf(stderr, "bad head after submit %d\n", head);
+		return T_EXIT_FAIL;
+	}
+
+	ret = write(fds[1], output, sizeof(output));
+	if (ret != sizeof(output)) {
+		fprintf(stderr, "pipe buffer write %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	ret = io_uring_wait_cqe(&ring, &cqe);
+	if (ret) {
+		fprintf(stderr, "wait cqe failed %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+	if (cqe->res != sizeof(output)) {
+		fprintf(stderr, "cqe res %d\n", cqe->res);
+		return T_EXIT_FAIL;
+	}
+	if (!(cqe->flags & IORING_CQE_F_BUFFER)) {
+		fprintf(stderr, "no buffer selected\n");
+		return T_EXIT_FAIL;
+	}
+	io_uring_cqe_seen(&ring, cqe);
+
+	/* head should now be one, we consumed a buffer */
+	ret = io_uring_buf_ring_head(&ring, BGID, &head);
+	if (head != 1) {
+		fprintf(stderr, "bad head after cqe %d\n", head);
+		return T_EXIT_FAIL;
+	}
+
+	ret = io_uring_buf_ring_available(&ring, br, BGID);
+	if (ret != NR_BUFS - 1) {
+		fprintf(stderr, "ring available %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	close(fds[0]);
+	close(fds[1]);
+	free(buf);
+	io_uring_queue_exit(&ring);
+	return T_EXIT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	int ret;
+
+	ret = test(0);
+	if (ret == T_EXIT_FAIL) {
+		fprintf(stderr, "test 0 failed\n");
+		return T_EXIT_FAIL;
+	}
+	if (no_buf_ring || no_buf_ring_status)
+		return T_EXIT_SKIP;
+
+	ret = test(1);
+	if (ret == T_EXIT_FAIL) {
+		fprintf(stderr, "test 1 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_max();
+	if (ret == T_EXIT_FAIL) {
+		fprintf(stderr, "test_max failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	return T_EXIT_PASS;
+}

diff --git a/test/rsrc_tags.c b/test/rsrc_tags.c
index 2d11d2a..95f65e9 100644
--- a/test/rsrc_tags.c
+++ b/test/rsrc_tags.c

@@ -40,7 +40,7 @@
 			  const void *arg, const __u64 *tags)
 {
 	struct io_uring_rsrc_register reg;
-	int ret, reg_type;
+	int reg_type;
 
 	memset(&reg, 0, sizeof(reg));
 	reg.nr = nr;
@@ -51,9 +51,8 @@
 	if (type != TEST_IORING_RSRC_FILE)
 		reg_type = IORING_REGISTER_BUFFERS2;
 
-	ret = __sys_io_uring_register(ring->ring_fd, reg_type,
-					&reg, sizeof(reg));
-	return ret ? -errno : 0;
+	return __sys_io_uring_register(ring->ring_fd, reg_type, &reg,
+				       sizeof(reg));
 }
 
 /*
@@ -64,7 +63,7 @@
 			const void *arg, const __u64 *tags)
 {
 	struct io_uring_rsrc_update2 up;
-	int ret, up_type;
+	int up_type;
 
 	memset(&up, 0, sizeof(up));
 	up.offset = off;
@@ -75,9 +74,7 @@
 	up_type = IORING_REGISTER_FILES_UPDATE2;
 	if (type != TEST_IORING_RSRC_FILE)
 		up_type = IORING_REGISTER_BUFFERS_UPDATE;
-	ret = __sys_io_uring_register(ring->ring_fd, up_type,
-				      &up, sizeof(up));
-	return ret < 0 ? -errno : ret;
+	return __sys_io_uring_register(ring->ring_fd, up_type, &up, sizeof(up));
 }
 
 static bool has_rsrc_update(void)
@@ -185,7 +182,7 @@
 		return 1;
 	}
 
-	/* test that CQE is not emmited before we're done with a buffer */
+	/* test that CQE is not emitted before we're done with a buffer */
 	sqe = io_uring_get_sqe(&ring);
 	io_uring_prep_read_fixed(sqe, pipes[0], tmp_buf, 10, 0, 0);
 	sqe->user_data = 100;
@@ -350,7 +347,16 @@
 	ret = io_uring_register_files_update(&ring, off, &fd, 1);
 	assert(ret == 1);
 	ret = io_uring_wait_cqe(&ring, &cqe);
-	assert(!ret && cqe->user_data == tags[off]);
+	if (ret) {
+		fprintf(stderr, "io_uring wait ret=%d\n", ret);
+		return 1;
+	}
+	if (cqe->user_data != tags[off]) {
+		fprintf(stderr, "data %lx != %lx\n",
+				(unsigned long) cqe->user_data,
+				(unsigned long) tags[off]);
+		return 1;
+	}
 	io_uring_cqe_seen(&ring, cqe);
 
 	/* remove removed file, shouldn't emit old tag */
@@ -404,7 +410,8 @@
 
 int main(int argc, char *argv[])
 {
-	int ring_flags[] = {0, IORING_SETUP_IOPOLL, IORING_SETUP_SQPOLL};
+	int ring_flags[] = {0, IORING_SETUP_IOPOLL, IORING_SETUP_SQPOLL,
+			    IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN};
 	int i, ret;
 
 	if (argc > 1)
@@ -426,7 +433,12 @@
 	}
 
 	for (i = 0; i < sizeof(ring_flags) / sizeof(ring_flags[0]); i++) {
-		ret = test_files(ring_flags[i]);
+		int flag = ring_flags[i];
+
+		if (flag & IORING_SETUP_DEFER_TASKRUN && !t_probe_defer_taskrun())
+			continue;
+
+		ret = test_files(flag);
 		if (ret) {
 			printf("test_tag failed, type %i\n", i);
 			return ret;

diff --git a/test/runtests.sh b/test/runtests.sh
index 6d8f7af..22dbc28 100755
--- a/test/runtests.sh
+++ b/test/runtests.sh

@@ -1,13 +1,12 @@
 #!/usr/bin/env bash
 
 TESTS=("$@")
-RET=0
 TIMEOUT=60
 DMESG_FILTER="cat"
 TEST_DIR=$(dirname "$0")
-FAILED=""
-SKIPPED=""
-TIMED_OUT=""
+FAILED=()
+SKIPPED=()
+TIMED_OUT=()
 TEST_FILES=""
 declare -A TEST_MAP
 
@@ -94,7 +93,7 @@
 	# shellcheck disable=SC2181
 	if [ $? -eq 0 ]; then
 		echo "Test skipped"
-		SKIPPED="$SKIPPED <$test_string>"
+		SKIPPED+=("<$test_string>")
 		return
 	fi
 
@@ -111,15 +110,19 @@
 	# Check test status
 	if [ "$status" -eq 124 ]; then
 		echo "Test $test_name timed out (may not be a failure)"
-		TIMED_OUT="$TIMED_OUT <$test_string>"
-	elif [ "$status" -ne 0 ]; then
+		TIMED_OUT+=("<$test_string>")
+	elif [ "$status" -eq 77 ]; then
+		echo "Skipped"
+		SKIPPED+=("<$test_string>")
+	elif [[ $test_string != xfail* ]] && [ "$status" -ne 0 ]; then
 		echo "Test $test_name failed with ret $status"
-		FAILED="$FAILED <$test_string>"
-		RET=1
+		FAILED+=("<$test_string>")
+	elif [[ $test_string == xfail* ]] && [ "$status" -ne 1 ]; then
+		echo "Test $test_name expected fail status 1 but returned $status"
+		FAILED+=("<$test_string>")
 	elif ! _check_dmesg "$dmesg_marker" "$test_name"; then
 		echo "Test $test_name failed dmesg check"
-		FAILED="$FAILED <$test_string>"
-		RET=1
+		FAILED+=("<$test_string>")
 	else
 		if [ -f "output/$out_name" ]; then
 			T_PREV=$(cat "output/$out_name")
@@ -153,17 +156,27 @@
 	fi
 done
 
-if [ -n "$SKIPPED" ]; then
-	echo "Tests skipped: $SKIPPED"
+if [ "$DO_KMSG" -eq "1" ]; then
+	for dmesg_file in *.dmesg; do
+		if [ -f "$dmesg_file" ]; then
+			echo "Found dmesg file $dmesg_file, outputting:"
+			cat "$dmesg_file"
+		fi
+	done
 fi
 
-if [ -n "$TIMED_OUT" ]; then
-	echo "Tests timed out: $TIMED_OUT"
+if [ "${#TIMED_OUT[*]}" -ne 0 ]; then
+	echo "Tests timed out (${#TIMED_OUT[*]}): ${TIMED_OUT[*]}"
 fi
 
-if [ "${RET}" -ne 0 ]; then
-	echo "Tests failed: $FAILED"
-	exit $RET
+KVER=$(uname -rv)
+echo "Test run complete, kernel: $KVER"
+
+if [ "${#FAILED[*]}" -ne 0 ]; then
+	echo "Tests failed (${#FAILED[*]}): ${FAILED[*]}"
+	exit 1
+elif [ "${#SKIPPED[*]}" -ne 0 ] && [ -n "$TEST_GNU_EXITCODE" ]; then
+	exit 77
 else
 	echo "All tests passed"
 	exit 0

diff --git a/test/rw_merge_test.c b/test/rw_merge_test.c
index 03f6467..559ba3f 100644
--- a/test/rw_merge_test.c
+++ b/test/rw_merge_test.c

@@ -79,7 +79,7 @@
 	assert(ret == 1);
 
 	/*
-	 * Read may stuck because of bug there request was be incorrecly
+	 * Read may stuck because of bug there request was be incorrectly
 	 * merged with <REQ1> request
 	 */
 	ret = io_uring_wait_cqe_timeout(&ring, &cqe, &ts);

diff --git a/test/send-zerocopy.c b/test/send-zerocopy.c
new file mode 100644
index 0000000..7135f57
--- /dev/null
+++ b/test/send-zerocopy.c

@@ -0,0 +1,1030 @@
+/* SPDX-License-Identifier: MIT */
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <string.h>
+
+#include <arpa/inet.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/ip.h>
+#include <netinet/in.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/un.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/mman.h>
+#include <linux/mman.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+#define MAX_MSG	128
+
+#define HOST	"127.0.0.1"
+#define HOSTV6	"::1"
+
+#define MAX_IOV 32
+#define CORK_REQS 5
+#define RX_TAG 10000
+#define BUFFER_OFFSET 41
+
+#ifndef ARRAY_SIZE
+	#define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0]))
+#endif
+
+enum {
+	BUF_T_NORMAL,
+	BUF_T_SMALL,
+	BUF_T_NONALIGNED,
+	BUF_T_LARGE,
+	BUF_T_HUGETLB,
+
+	__BUF_NR,
+};
+
+/* 32MB, should be enough to trigger a short send */
+#define LARGE_BUF_SIZE		(1U << 25)
+
+static size_t page_sz;
+static char *tx_buffer, *rx_buffer;
+static struct iovec buffers_iov[__BUF_NR];
+
+static bool has_sendzc;
+static bool has_sendmsg;
+static bool hit_enomem;
+
+static int probe_zc_support(void)
+{
+	struct io_uring ring;
+	struct io_uring_probe *p;
+	int ret;
+
+	has_sendzc = has_sendmsg = false;
+
+	ret = io_uring_queue_init(1, &ring, 0);
+	if (ret)
+		return -1;
+
+	p = t_calloc(1, sizeof(*p) + 256 * sizeof(struct io_uring_probe_op));
+	if (!p)
+		return -1;
+
+	ret = io_uring_register_probe(&ring, p, 256);
+	if (ret)
+		return -1;
+
+	has_sendzc = p->ops_len > IORING_OP_SEND_ZC;
+	has_sendmsg = p->ops_len > IORING_OP_SENDMSG_ZC;
+	io_uring_queue_exit(&ring);
+	free(p);
+	return 0;
+}
+
+static bool check_cq_empty(struct io_uring *ring)
+{
+	struct io_uring_cqe *cqe = NULL;
+	int ret;
+
+	ret = io_uring_peek_cqe(ring, &cqe); /* nothing should be there */
+	return ret == -EAGAIN;
+}
+
+static int test_basic_send(struct io_uring *ring, int sock_tx, int sock_rx)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	int msg_flags = 0;
+	unsigned zc_flags = 0;
+	int payload_size = 100;
+	int ret;
+
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_send_zc(sqe, sock_tx, tx_buffer, payload_size,
+			      msg_flags, zc_flags);
+	sqe->user_data = 1;
+
+	ret = io_uring_submit(ring);
+	assert(ret == 1);
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	assert(!ret && cqe->user_data == 1);
+	if (cqe->res != payload_size) {
+		fprintf(stderr, "send failed %i\n", cqe->res);
+		return T_EXIT_FAIL;
+	}
+
+	assert(cqe->flags & IORING_CQE_F_MORE);
+	io_uring_cqe_seen(ring, cqe);
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	assert(!ret);
+	assert(cqe->user_data == 1);
+	assert(cqe->flags & IORING_CQE_F_NOTIF);
+	assert(!(cqe->flags & IORING_CQE_F_MORE));
+	io_uring_cqe_seen(ring, cqe);
+	assert(check_cq_empty(ring));
+
+	ret = recv(sock_rx, rx_buffer, payload_size, MSG_TRUNC);
+	assert(ret == payload_size);
+	return T_EXIT_PASS;
+}
+
+static int test_send_faults_check(struct io_uring *ring, int expected)
+{
+	struct io_uring_cqe *cqe;
+	int ret, nr_cqes = 0;
+	bool more = true;
+
+	while (more) {
+		nr_cqes++;
+		ret = io_uring_wait_cqe(ring, &cqe);
+		assert(!ret);
+		assert(cqe->user_data == 1);
+
+		if (nr_cqes == 1 && (cqe->flags & IORING_CQE_F_NOTIF)) {
+			fprintf(stderr, "test_send_faults_check notif came first\n");
+			return -1;
+		}
+
+		if (!(cqe->flags & IORING_CQE_F_NOTIF)) {
+			if (cqe->res != expected) {
+				fprintf(stderr, "invalid cqe res %i vs expected %i, "
+					"user_data %i\n",
+					cqe->res, expected, (int)cqe->user_data);
+				return -1;
+			}
+		} else {
+			if (cqe->res != 0 || cqe->flags != IORING_CQE_F_NOTIF) {
+				fprintf(stderr, "invalid notif cqe %i %i\n",
+					cqe->res, cqe->flags);
+				return -1;
+			}
+		}
+
+		more = cqe->flags & IORING_CQE_F_MORE;
+		io_uring_cqe_seen(ring, cqe);
+	}
+
+	if (nr_cqes > 2) {
+		fprintf(stderr, "test_send_faults_check() too many CQEs %i\n",
+				nr_cqes);
+		return -1;
+	}
+	assert(check_cq_empty(ring));
+	return 0;
+}
+
+static int test_send_faults(int sock_tx, int sock_rx)
+{
+	struct io_uring_sqe *sqe;
+	int msg_flags = 0;
+	unsigned zc_flags = 0;
+	int ret, payload_size = 100;
+	struct io_uring ring;
+
+	ret = io_uring_queue_init(32, &ring, 0);
+	if (ret) {
+		fprintf(stderr, "queue init failed: %d\n", ret);
+		return -1;
+	}
+
+	/* invalid buffer */
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_send_zc(sqe, sock_tx, (void *)1UL, payload_size,
+			      msg_flags, zc_flags);
+	sqe->user_data = 1;
+	ret = io_uring_submit(&ring);
+	assert(ret == 1);
+
+	ret = test_send_faults_check(&ring, -EFAULT);
+	if (ret) {
+		fprintf(stderr, "test_send_faults with invalid buf failed\n");
+		return -1;
+	}
+
+	/* invalid address */
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_send_zc(sqe, sock_tx, tx_buffer, payload_size,
+			      msg_flags, zc_flags);
+	io_uring_prep_send_set_addr(sqe, (const struct sockaddr *)1UL,
+				    sizeof(struct sockaddr_in6));
+	sqe->user_data = 1;
+	ret = io_uring_submit(&ring);
+	assert(ret == 1);
+
+	ret = test_send_faults_check(&ring, -EFAULT);
+	if (ret) {
+		fprintf(stderr, "test_send_faults with invalid addr failed\n");
+		return -1;
+	}
+
+	/* invalid send/recv flags */
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_send_zc(sqe, sock_tx, tx_buffer, payload_size,
+			      msg_flags, ~0U);
+	sqe->user_data = 1;
+	ret = io_uring_submit(&ring);
+	assert(ret == 1);
+
+	ret = test_send_faults_check(&ring, -EINVAL);
+	if (ret) {
+		fprintf(stderr, "test_send_faults with invalid flags failed\n");
+		return -1;
+	}
+
+	return T_EXIT_PASS;
+}
+
+static int create_socketpair_ip(struct sockaddr_storage *addr,
+				int *sock_client, int *sock_server,
+				bool ipv6, bool client_connect,
+				bool msg_zc, bool tcp)
+{
+	socklen_t addr_size;
+	int family, sock, listen_sock = -1;
+	int ret;
+
+	memset(addr, 0, sizeof(*addr));
+	if (ipv6) {
+		struct sockaddr_in6 *saddr = (struct sockaddr_in6 *)addr;
+
+		family = AF_INET6;
+		saddr->sin6_family = family;
+		saddr->sin6_port = htons(0);
+		addr_size = sizeof(*saddr);
+	} else {
+		struct sockaddr_in *saddr = (struct sockaddr_in *)addr;
+
+		family = AF_INET;
+		saddr->sin_family = family;
+		saddr->sin_port = htons(0);
+		saddr->sin_addr.s_addr = htonl(INADDR_ANY);
+		addr_size = sizeof(*saddr);
+	}
+
+	/* server sock setup */
+	if (tcp) {
+		sock = listen_sock = socket(family, SOCK_STREAM, IPPROTO_TCP);
+	} else {
+		sock = *sock_server = socket(family, SOCK_DGRAM, 0);
+	}
+	if (sock < 0) {
+		perror("socket");
+		return 1;
+	}
+
+	ret = bind(sock, (struct sockaddr *)addr, addr_size);
+	if (ret < 0) {
+		perror("bind");
+		return 1;
+	}
+
+	ret = getsockname(sock, (struct sockaddr *)addr, &addr_size);
+	if (ret < 0) {
+		fprintf(stderr, "getsockname failed %i\n", errno);
+		return 1;
+	}
+
+	if (tcp) {
+		ret = listen(sock, 128);
+		assert(ret != -1);
+	}
+
+	if (ipv6) {
+		struct sockaddr_in6 *saddr = (struct sockaddr_in6 *)addr;
+
+		inet_pton(AF_INET6, HOSTV6, &(saddr->sin6_addr));
+	} else {
+		struct sockaddr_in *saddr = (struct sockaddr_in *)addr;
+
+		inet_pton(AF_INET, HOST, &saddr->sin_addr);
+	}
+
+	/* client sock setup */
+	if (tcp) {
+		*sock_client = socket(family, SOCK_STREAM, IPPROTO_TCP);
+		assert(client_connect);
+	} else {
+		*sock_client = socket(family, SOCK_DGRAM, 0);
+	}
+	if (*sock_client < 0) {
+		perror("socket");
+		return 1;
+	}
+	if (client_connect) {
+		ret = connect(*sock_client, (struct sockaddr *)addr, addr_size);
+		if (ret < 0) {
+			perror("connect");
+			return 1;
+		}
+	}
+	if (msg_zc) {
+#ifdef SO_ZEROCOPY
+		int val = 1;
+
+		/*
+		 * NOTE: apps must not set SO_ZEROCOPY when using io_uring zc.
+		 * It's only here to test interactions with MSG_ZEROCOPY.
+		 */
+		if (setsockopt(*sock_client, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val))) {
+			perror("setsockopt zc");
+			return 1;
+		}
+#else
+		fprintf(stderr, "no SO_ZEROCOPY\n");
+		return 1;
+#endif
+	}
+	if (tcp) {
+		*sock_server = accept(listen_sock, NULL, NULL);
+		if (!*sock_server) {
+			fprintf(stderr, "can't accept\n");
+			return 1;
+		}
+		close(listen_sock);
+	}
+	return 0;
+}
+
+struct send_conf {
+	bool fixed_buf;
+	bool mix_register;
+	bool cork;
+	bool force_async;
+	bool use_sendmsg;
+	bool tcp;
+	bool zc;
+	bool iovec;
+	bool long_iovec;
+	bool poll_first;
+	int buf_index;
+	struct sockaddr_storage *addr;
+};
+
+static int do_test_inet_send(struct io_uring *ring, int sock_client, int sock_server,
+			     struct send_conf *conf)
+{
+	struct iovec iov[MAX_IOV];
+	struct msghdr msghdr[CORK_REQS];
+	const unsigned zc_flags = 0;
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	int nr_reqs = conf->cork ? CORK_REQS : 1;
+	int i, ret, nr_cqes, addr_len = 0;
+	size_t send_size = buffers_iov[conf->buf_index].iov_len;
+	size_t chunk_size = send_size / nr_reqs;
+	size_t chunk_size_last = send_size - chunk_size * (nr_reqs - 1);
+	char *buf = buffers_iov[conf->buf_index].iov_base;
+
+	assert(MAX_IOV >= CORK_REQS);
+
+	if (conf->addr) {
+		sa_family_t fam = ((struct sockaddr_in *)conf->addr)->sin_family;
+
+		addr_len = (fam == AF_INET) ? sizeof(struct sockaddr_in) :
+					      sizeof(struct sockaddr_in6);
+	}
+
+	memset(rx_buffer, 0, send_size);
+
+	for (i = 0; i < nr_reqs; i++) {
+		bool real_fixed_buf = conf->fixed_buf;
+		size_t cur_size = chunk_size;
+		int msg_flags = MSG_WAITALL;
+
+		if (conf->mix_register)
+			real_fixed_buf = rand() & 1;
+
+		if (i != nr_reqs - 1)
+			msg_flags |= MSG_MORE;
+		else
+			cur_size = chunk_size_last;
+
+		sqe = io_uring_get_sqe(ring);
+
+		if (!conf->use_sendmsg) {
+			if (conf->zc) {
+				io_uring_prep_send_zc(sqe, sock_client, buf + i * chunk_size,
+						      cur_size, msg_flags, zc_flags);
+			} else {
+				io_uring_prep_send(sqe, sock_client, buf + i * chunk_size,
+						      cur_size, msg_flags);
+			}
+
+			if (real_fixed_buf) {
+				sqe->ioprio |= IORING_RECVSEND_FIXED_BUF;
+				sqe->buf_index = conf->buf_index;
+			}
+			if (conf->addr)
+				io_uring_prep_send_set_addr(sqe, (const struct sockaddr *)conf->addr,
+							    addr_len);
+		} else {
+			struct iovec *io;
+			int iov_len;
+
+			if (conf->zc)
+				io_uring_prep_sendmsg_zc(sqe, sock_client, &msghdr[i], msg_flags);
+			else
+				io_uring_prep_sendmsg(sqe, sock_client, &msghdr[i], msg_flags);
+
+			if (!conf->iovec) {
+				io = &iov[i];
+				iov_len = 1;
+				iov[i].iov_len = cur_size;
+				iov[i].iov_base = buf + i * chunk_size;
+			} else {
+				char *it = buf;
+				int j;
+
+				assert(nr_reqs == 1);
+				iov_len = conf->long_iovec ? MAX_IOV : 4;
+				io = iov;
+
+				for (j = 0; j < iov_len; j++)
+					io[j].iov_len = 1;
+				/* first want to be easily advanced */
+				io[0].iov_base = it;
+				it += io[0].iov_len;
+				/* this should cause retry */
+				io[1].iov_len = chunk_size - iov_len + 1;
+				io[1].iov_base = it;
+				it += io[1].iov_len;
+				/* fill the rest */
+				for (j = 2; j < iov_len; j++) {
+					io[j].iov_base = it;
+					it += io[j].iov_len;
+				}
+			}
+
+			memset(&msghdr[i], 0, sizeof(msghdr[i]));
+			msghdr[i].msg_iov = io;
+			msghdr[i].msg_iovlen = iov_len;
+			if (conf->addr) {
+				msghdr[i].msg_name = conf->addr;
+				msghdr[i].msg_namelen = addr_len;
+			}
+		}
+		sqe->user_data = i;
+		if (conf->force_async)
+			sqe->flags |= IOSQE_ASYNC;
+		if (conf->poll_first)
+			sqe->ioprio |= IORING_RECVSEND_POLL_FIRST;
+		if (i != nr_reqs - 1)
+			sqe->flags |= IOSQE_IO_LINK;
+	}
+
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_recv(sqe, sock_server, rx_buffer, send_size, MSG_WAITALL);
+	sqe->user_data = RX_TAG;
+
+	ret = io_uring_submit(ring);
+	if (ret != nr_reqs + 1) {
+		fprintf(stderr, "submit failed, got %i expected %i\n", ret, nr_reqs);
+		return 1;
+	}
+
+	nr_cqes = nr_reqs + 1;
+	for (i = 0; i < nr_cqes; i++) {
+		int expected = chunk_size;
+
+		ret = io_uring_wait_cqe(ring, &cqe);
+		if (ret) {
+			fprintf(stderr, "io_uring_wait_cqe failed %i\n", ret);
+			return 1;
+		}
+		if (cqe->user_data == RX_TAG) {
+			if (cqe->res != send_size) {
+				fprintf(stderr, "rx failed res: %i, expected %i\n",
+						cqe->res, (int)send_size);
+				return 1;
+			}
+			io_uring_cqe_seen(ring, cqe);
+			continue;
+		}
+		if ((cqe->flags & IORING_CQE_F_MORE) && (cqe->flags & IORING_CQE_F_NOTIF)) {
+			fprintf(stderr, "unexpected cflags %i res %i\n",
+					cqe->flags, cqe->res);
+			return 1;
+		}
+		if (cqe->user_data >= nr_reqs) {
+			fprintf(stderr, "invalid user_data %lu\n",
+					(unsigned long)cqe->user_data);
+			return 1;
+		}
+		if (!(cqe->flags & IORING_CQE_F_NOTIF)) {
+			if (cqe->flags & IORING_CQE_F_MORE)
+				nr_cqes++;
+			if (cqe->user_data == nr_reqs - 1)
+				expected = chunk_size_last;
+			if (cqe->res != expected) {
+				if (cqe->res == -ENOMEM) {
+					if (!hit_enomem) {
+						fprintf(stderr, "Hit -ENOMEM. "
+							"Increase ulimit -l "
+							"limit for a complete "
+							"test run. Skipping "
+							"parts.\n");
+						hit_enomem = 1;
+					}
+					return 0;
+				}
+				fprintf(stderr, "invalid cqe->res %d expected %d\n",
+						 cqe->res, expected);
+				return 1;
+			}
+		}
+		io_uring_cqe_seen(ring, cqe);
+	}
+
+	for (i = 0; i < send_size; i++) {
+		if (buf[i] != rx_buffer[i]) {
+			fprintf(stderr, "botched data, first mismated byte %i, "
+				"%u vs %u\n", i, buf[i], rx_buffer[i]);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static int test_inet_send(struct io_uring *ring)
+{
+	struct send_conf conf;
+	struct sockaddr_storage addr;
+	int sock_client = -1, sock_server = -1;
+	int ret, j, i;
+	int buf_index;
+
+	for (j = 0; j < 32; j++) {
+		bool ipv6 = j & 1;
+		bool client_connect = j & 2;
+		bool msg_zc_set = j & 4;
+		bool tcp = j & 8;
+		bool swap_sockets = j & 16;
+
+		if (tcp && !client_connect)
+			continue;
+		if (swap_sockets && !tcp)
+			continue;
+#ifndef SO_ZEROCOPY
+		if (msg_zc_set)
+			continue;
+#endif
+		ret = create_socketpair_ip(&addr, &sock_client, &sock_server, ipv6,
+				 client_connect, msg_zc_set, tcp);
+		if (ret) {
+			fprintf(stderr, "sock prep failed %d\n", ret);
+			return 1;
+		}
+		if (swap_sockets) {
+			int tmp_sock = sock_client;
+
+			sock_client = sock_server;
+			sock_server = tmp_sock;
+		}
+
+		for (i = 0; i < 1024; i++) {
+			bool regbuf;
+
+			conf.use_sendmsg = i & 1;
+			conf.poll_first = i & 2;
+			conf.fixed_buf = i & 4;
+			conf.addr = (i & 8) ? &addr : NULL;
+			conf.cork = i & 16;
+			conf.mix_register = i & 32;
+			conf.force_async = i & 64;
+			conf.zc = i & 128;
+			conf.iovec = i & 256;
+			conf.long_iovec = i & 512;
+			conf.tcp = tcp;
+			regbuf = conf.mix_register || conf.fixed_buf;
+
+			if (conf.iovec && (!conf.use_sendmsg || regbuf || conf.cork))
+				continue;
+			if (!conf.zc) {
+				if (regbuf)
+					continue;
+				/*
+				* Non zerocopy send w/ addr was added together with sendmsg_zc,
+				* skip if we the kernel doesn't support it.
+				*/
+				if (conf.addr && !has_sendmsg)
+					continue;
+			}
+			if (tcp && (conf.cork || conf.addr))
+				continue;
+			if (conf.mix_register && (!conf.cork || conf.fixed_buf))
+				continue;
+			if (!client_connect && conf.addr == NULL)
+				continue;
+			if (conf.use_sendmsg && (regbuf || !has_sendmsg))
+				continue;
+			if (msg_zc_set && !conf.zc)
+				continue;
+
+			for (buf_index = 0; buf_index < ARRAY_SIZE(buffers_iov); buf_index++) {
+				size_t len = buffers_iov[buf_index].iov_len;
+
+				if (!buffers_iov[buf_index].iov_base)
+					continue;
+				/* UDP IPv4 max datagram size is under 64K */
+				if (!tcp && len > (1U << 15))
+					continue;
+
+				conf.buf_index = buf_index;
+				ret = do_test_inet_send(ring, sock_client, sock_server, &conf);
+				if (ret) {
+					fprintf(stderr, "send failed fixed buf %i, "
+							"conn %i, addr %i, cork %i\n",
+						conf.fixed_buf, client_connect,
+						!!conf.addr, conf.cork);
+					return 1;
+				}
+			}
+		}
+
+		close(sock_client);
+		close(sock_server);
+	}
+	return 0;
+}
+
+static int test_async_addr(struct io_uring *ring)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct sockaddr_storage addr;
+	int sock_tx = -1, sock_rx = -1;
+	struct __kernel_timespec ts;
+	int ret;
+
+	ts.tv_sec = 1;
+	ts.tv_nsec = 0;
+	ret = create_socketpair_ip(&addr, &sock_tx, &sock_rx, true, false, false, false);
+	if (ret) {
+		fprintf(stderr, "sock prep failed %d\n", ret);
+		return 1;
+	}
+
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_timeout(sqe, &ts, 0, IORING_TIMEOUT_ETIME_SUCCESS);
+	sqe->user_data = 1;
+	sqe->flags |= IOSQE_IO_LINK;
+
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_send_zc(sqe, sock_tx, tx_buffer, 1, 0, 0);
+	sqe->user_data = 2;
+	io_uring_prep_send_set_addr(sqe, (const struct sockaddr *)&addr,
+				    sizeof(struct sockaddr_in6));
+
+	ret = io_uring_submit(ring);
+	assert(ret == 2);
+	memset(&addr, 0, sizeof(addr));
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret) {
+		fprintf(stderr, "io_uring_wait_cqe failed %i\n", ret);
+		return 1;
+	}
+	if (cqe->user_data != 1 || cqe->res != -ETIME) {
+		fprintf(stderr, "invalid timeout res %i %i\n",
+			(int)cqe->user_data, cqe->res);
+		return 1;
+	}
+	io_uring_cqe_seen(ring, cqe);
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret) {
+		fprintf(stderr, "io_uring_wait_cqe failed %i\n", ret);
+		return 1;
+	}
+	if (cqe->user_data != 2 || cqe->res != 1) {
+		fprintf(stderr, "invalid send %i %i\n",
+			(int)cqe->user_data, cqe->res);
+		return 1;
+	}
+	io_uring_cqe_seen(ring, cqe);
+	ret = recv(sock_rx, rx_buffer, 1, MSG_TRUNC);
+	assert(ret == 1);
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret) {
+		fprintf(stderr, "io_uring_wait_cqe failed %i\n", ret);
+		return 1;
+	}
+	assert(cqe->flags & IORING_CQE_F_NOTIF);
+	io_uring_cqe_seen(ring, cqe);
+
+	close(sock_tx);
+	close(sock_rx);
+	return 0;
+}
+
+static int test_sendzc_report(struct io_uring *ring)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct sockaddr_storage addr;
+	int sock_tx, sock_rx;
+	int ret;
+
+	ret = create_socketpair_ip(&addr, &sock_tx, &sock_rx, true, true, false, true);
+	if (ret) {
+		fprintf(stderr, "sock prep failed %d\n", ret);
+		return 1;
+	}
+
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_send_zc(sqe, sock_tx, tx_buffer, 1, 0,
+				IORING_SEND_ZC_REPORT_USAGE);
+	ret = io_uring_submit(ring);
+	if (ret != 1) {
+		fprintf(stderr, "io_uring_submit failed %i\n", ret);
+		return 1;
+	}
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret) {
+		fprintf(stderr, "io_uring_wait_cqe failed %i\n", ret);
+		return 1;
+	}
+	if (cqe->res != 1 && cqe->res != -EINVAL) {
+		fprintf(stderr, "sendzc report failed %u\n", cqe->res);
+		return 1;
+	}
+	if (!(cqe->flags & IORING_CQE_F_MORE)) {
+		fprintf(stderr, "expected notification %i\n", cqe->res);
+		return 1;
+	}
+	io_uring_cqe_seen(ring, cqe);
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret) {
+		fprintf(stderr, "io_uring_wait_cqe failed %i\n", ret);
+		return 1;
+	}
+	if (cqe->flags & IORING_CQE_F_MORE) {
+		fprintf(stderr, "F_MORE after notification\n");
+		return 1;
+	}
+	io_uring_cqe_seen(ring, cqe);
+
+	close(sock_tx);
+	close(sock_rx);
+	return 0;
+}
+
+/* see also send_recv.c:test_invalid */
+static int test_invalid_zc(int fds[2])
+{
+	struct io_uring ring;
+	int ret;
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	bool notif = false;
+
+	if (!has_sendmsg)
+		return 0;
+
+	ret = t_create_ring(8, &ring, 0);
+	if (ret)
+		return ret;
+
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_sendmsg(sqe, fds[0], NULL, MSG_WAITALL);
+	sqe->opcode = IORING_OP_SENDMSG_ZC;
+	sqe->flags |= IOSQE_ASYNC;
+
+	ret = io_uring_submit(&ring);
+	if (ret != 1) {
+		fprintf(stderr, "submit failed %i\n", ret);
+		return ret;
+	}
+	ret = io_uring_wait_cqe(&ring, &cqe);
+	if (ret)
+		return 1;
+	if (cqe->flags & IORING_CQE_F_MORE)
+		notif = true;
+	io_uring_cqe_seen(&ring, cqe);
+
+	if (notif) {
+		ret = io_uring_wait_cqe(&ring, &cqe);
+		if (ret)
+			return 1;
+		io_uring_cqe_seen(&ring, cqe);
+	}
+	io_uring_queue_exit(&ring);
+	return 0;
+}
+
+static int run_basic_tests(void)
+{
+	struct sockaddr_storage addr;
+	int ret, i, sp[2];
+
+	/* create TCP IPv6 pair */
+	ret = create_socketpair_ip(&addr, &sp[0], &sp[1], true, true, false, true);
+	if (ret) {
+		fprintf(stderr, "sock prep failed %d\n", ret);
+		return -1;
+	}
+
+	for (i = 0; i < 2; i++) {
+		struct io_uring ring;
+		unsigned ring_flags = 0;
+
+		if (i & 1)
+			ring_flags |= IORING_SETUP_DEFER_TASKRUN;
+
+		ret = io_uring_queue_init(32, &ring, ring_flags);
+		if (ret) {
+			if (ret == -EINVAL)
+				continue;
+			fprintf(stderr, "queue init failed: %d\n", ret);
+			return -1;
+		}
+
+		ret = test_basic_send(&ring, sp[0], sp[1]);
+		if (ret) {
+			fprintf(stderr, "test_basic_send() failed\n");
+			return -1;
+		}
+
+		ret = test_send_faults(sp[0], sp[1]);
+		if (ret) {
+			fprintf(stderr, "test_send_faults() failed\n");
+			return -1;
+		}
+
+		ret = test_invalid_zc(sp);
+		if (ret) {
+			fprintf(stderr, "test_invalid_zc() failed\n");
+			return -1;
+		}
+
+		ret = test_async_addr(&ring);
+		if (ret) {
+			fprintf(stderr, "test_async_addr() failed\n");
+			return T_EXIT_FAIL;
+		}
+
+		ret = test_sendzc_report(&ring);
+		if (ret) {
+			fprintf(stderr, "test_sendzc_report() failed\n");
+			return T_EXIT_FAIL;
+		}
+
+		io_uring_queue_exit(&ring);
+	}
+
+	close(sp[0]);
+	close(sp[1]);
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	size_t len;
+	int ret, i;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	ret = probe_zc_support();
+	if (ret) {
+		printf("probe failed\n");
+		return T_EXIT_FAIL;
+	}
+	if (!has_sendzc) {
+		printf("no IORING_OP_SEND_ZC support, skip\n");
+		return T_EXIT_SKIP;
+	}
+
+	page_sz = sysconf(_SC_PAGESIZE);
+
+	len = LARGE_BUF_SIZE;
+	tx_buffer = aligned_alloc(page_sz, len);
+	rx_buffer = aligned_alloc(page_sz, len);
+	if (tx_buffer && rx_buffer) {
+		buffers_iov[BUF_T_LARGE].iov_base = tx_buffer;
+		buffers_iov[BUF_T_LARGE].iov_len = len;
+	} else {
+		if (tx_buffer)
+			free(tx_buffer);
+		if (rx_buffer)
+			free(rx_buffer);
+
+		printf("skip large buffer tests, can't alloc\n");
+
+		len = 2 * page_sz;
+		tx_buffer = aligned_alloc(page_sz, len);
+		rx_buffer = aligned_alloc(page_sz, len);
+	}
+	if (!tx_buffer || !rx_buffer) {
+		fprintf(stderr, "can't allocate buffers\n");
+		return T_EXIT_FAIL;
+	}
+
+	srand((unsigned)time(NULL));
+	for (i = 0; i < len; i++)
+		tx_buffer[i] = i;
+	memset(rx_buffer, 0, len);
+
+	buffers_iov[BUF_T_NORMAL].iov_base = tx_buffer + page_sz;
+	buffers_iov[BUF_T_NORMAL].iov_len = page_sz;
+	buffers_iov[BUF_T_SMALL].iov_base = tx_buffer;
+	buffers_iov[BUF_T_SMALL].iov_len = 137;
+	buffers_iov[BUF_T_NONALIGNED].iov_base = tx_buffer + BUFFER_OFFSET;
+	buffers_iov[BUF_T_NONALIGNED].iov_len = 2 * page_sz - BUFFER_OFFSET - 13;
+
+	if (len == LARGE_BUF_SIZE) {
+		void *huge_page;
+		int off = page_sz + 27;
+
+		len = 1U << 22;
+		huge_page = mmap(NULL, len, PROT_READ|PROT_WRITE,
+				 MAP_PRIVATE | MAP_HUGETLB | MAP_HUGE_2MB | MAP_ANONYMOUS,
+				 -1, 0);
+		if (huge_page != MAP_FAILED) {
+			buffers_iov[BUF_T_HUGETLB].iov_base = huge_page + off;
+			buffers_iov[BUF_T_HUGETLB].iov_len = len - off;
+		}
+	}
+
+	ret = run_basic_tests();
+	if (ret)
+		return T_EXIT_FAIL;
+
+	for (i = 0; i < 2; i++) {
+		struct io_uring ring;
+		unsigned ring_flags = 0;
+
+		if (i & 1)
+			ring_flags |= IORING_SETUP_SINGLE_ISSUER |
+				      IORING_SETUP_DEFER_TASKRUN;
+
+		ret = io_uring_queue_init(32, &ring, ring_flags);
+		if (ret) {
+			if (ret == -EINVAL)
+				continue;
+			fprintf(stderr, "queue init failed: %d\n", ret);
+			return -1;
+		}
+
+		ret = t_register_buffers(&ring, buffers_iov, ARRAY_SIZE(buffers_iov));
+		if (ret == T_SETUP_SKIP) {
+			fprintf(stderr, "can't register bufs, skip\n");
+			goto out;
+		} else if (ret != T_SETUP_OK) {
+			fprintf(stderr, "buffer registration failed %i\n", ret);
+			return T_EXIT_FAIL;
+		}
+
+		if (buffers_iov[BUF_T_HUGETLB].iov_base) {
+			buffers_iov[BUF_T_HUGETLB].iov_base += 13;
+			buffers_iov[BUF_T_HUGETLB].iov_len -= 26;
+		}
+		if (buffers_iov[BUF_T_LARGE].iov_base) {
+			buffers_iov[BUF_T_LARGE].iov_base += 13;
+			buffers_iov[BUF_T_LARGE].iov_len -= 26;
+		}
+
+		ret = test_inet_send(&ring);
+		if (ret) {
+			fprintf(stderr, "test_inet_send() failed (defer_taskrun %i)\n",
+					 ring_flags & IORING_SETUP_DEFER_TASKRUN);
+			return T_EXIT_FAIL;
+		}
+
+		if (buffers_iov[BUF_T_HUGETLB].iov_base) {
+			buffers_iov[BUF_T_HUGETLB].iov_base -= 13;
+			buffers_iov[BUF_T_HUGETLB].iov_len += 26;
+		}
+		if (buffers_iov[BUF_T_LARGE].iov_base) {
+			buffers_iov[BUF_T_LARGE].iov_base -= 13;
+			buffers_iov[BUF_T_LARGE].iov_len += 26;
+		}
+out:
+		io_uring_queue_exit(&ring);
+	}
+
+	return T_EXIT_PASS;
+}

diff --git a/test/send_recv.c b/test/send_recv.c
index a7b001a..f68a158 100644
--- a/test/send_recv.c
+++ b/test/send_recv.c

@@ -23,7 +23,7 @@
 #define HOST	"127.0.0.1"
 
 static int recv_prep(struct io_uring *ring, struct iovec *iov, int *sock,
-		     int registerfiles)
+		     int registerfiles, int async, int provide)
 {
 	struct sockaddr_in saddr;
 	struct io_uring_sqe *sqe;
@@ -64,6 +64,10 @@
 	io_uring_prep_recv(sqe, use_fd, iov->iov_base, iov->iov_len, 0);
 	if (registerfiles)
 		sqe->flags |= IOSQE_FIXED_FILE;
+	if (async)
+		sqe->flags |= IOSQE_ASYNC;
+	if (provide)
+		sqe->flags |= IOSQE_BUFFER_SELECT;
 	sqe->user_data = 2;
 
 	ret = io_uring_submit(ring);
@@ -79,7 +83,7 @@
 	return 1;
 }
 
-static int do_recv(struct io_uring *ring, struct iovec *iov)
+static int do_recv(struct io_uring *ring, struct iovec *iov, int enobufs)
 {
 	struct io_uring_cqe *cqe;
 	int ret;
@@ -87,11 +91,18 @@
 	ret = io_uring_wait_cqe(ring, &cqe);
 	if (ret) {
 		fprintf(stdout, "wait_cqe: %d\n", ret);
-		goto err;
+		return 1;
 	}
 	if (cqe->res == -EINVAL) {
 		fprintf(stdout, "recv not supported, skipping\n");
-		return 0;
+		goto out;
+	}
+	if (cqe->res == -ENOBUFS && enobufs) {
+		if (cqe->flags & IORING_CQE_F_SOCK_NONEMPTY) {
+			fprintf(stdout, "NONEMPTY set on -ENOBUFS\n");
+			goto err;
+		}
+		goto out;
 	}
 	if (cqe->res < 0) {
 		fprintf(stderr, "failed cqe: %d\n", cqe->res);
@@ -109,8 +120,11 @@
 		goto err;
 	}
 
+out:
+	io_uring_cqe_seen(ring, cqe);
 	return 0;
 err:
+	io_uring_cqe_seen(ring, cqe);
 	return 1;
 }
 
@@ -118,6 +132,8 @@
 	pthread_mutex_t mutex;
 	int use_sqthread;
 	int registerfiles;
+	int async;
+	int provide;
 };
 
 static void *recv_fn(void *data)
@@ -152,13 +168,14 @@
 		}
 	}
 
-	ret = recv_prep(&ring, &iov, &sock, rd->registerfiles);
+	ret = recv_prep(&ring, &iov, &sock, rd->registerfiles, rd->async,
+				rd->provide);
 	if (ret) {
 		fprintf(stderr, "recv_prep failed: %d\n", ret);
 		goto err;
 	}
 	pthread_mutex_unlock(&rd->mutex);
-	ret = do_recv(&ring, &iov);
+	ret = do_recv(&ring, &iov, rd->provide);
 
 	close(sock);
 	io_uring_queue_exit(&ring);
@@ -192,13 +209,13 @@
 	sockfd = socket(AF_INET, SOCK_DGRAM, 0);
 	if (sockfd < 0) {
 		perror("socket");
-		return 1;
+		goto err2;
 	}
 
 	ret = connect(sockfd, (struct sockaddr *)&saddr, sizeof(saddr));
 	if (ret < 0) {
 		perror("connect");
-		return 1;
+		goto err;
 	}
 
 	sqe = io_uring_get_sqe(&ring);
@@ -214,8 +231,7 @@
 	ret = io_uring_wait_cqe(&ring, &cqe);
 	if (cqe->res == -EINVAL) {
 		fprintf(stdout, "send not supported, skipping\n");
-		close(sockfd);
-		return 0;
+		goto err;
 	}
 	if (cqe->res != iov.iov_len) {
 		fprintf(stderr, "failed cqe: %d\n", cqe->res);
@@ -223,13 +239,17 @@
 	}
 
 	close(sockfd);
+	io_uring_queue_exit(&ring);
 	return 0;
+
 err:
 	close(sockfd);
+err2:
+	io_uring_queue_exit(&ring);
 	return 1;
 }
 
-static int test(int use_sqthread, int regfiles)
+static int test(int use_sqthread, int regfiles, int async, int provide)
 {
 	pthread_mutexattr_t attr;
 	pthread_t recv_thread;
@@ -243,6 +263,8 @@
 	pthread_mutex_lock(&rd.mutex);
 	rd.use_sqthread = use_sqthread;
 	rd.registerfiles = regfiles;
+	rd.async = async;
+	rd.provide = provide;
 
 	ret = pthread_create(&recv_thread, NULL, recv_fn, &rd);
 	if (ret) {
@@ -257,6 +279,50 @@
 	return (intptr_t)retval;
 }
 
+static int test_invalid(void)
+{
+	struct io_uring ring;
+	int ret, i;
+	int fds[2];
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+
+	ret = t_create_ring(8, &ring, IORING_SETUP_SUBMIT_ALL);
+	if (ret) {
+		if (ret == -EINVAL)
+			return 0;
+		return ret;
+	}
+
+	ret = t_create_socket_pair(fds, true);
+	if (ret)
+		return ret;
+
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_sendmsg(sqe, fds[0], NULL, MSG_WAITALL);
+	sqe->flags |= IOSQE_ASYNC;
+
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_recvmsg(sqe, fds[1], NULL, 0);
+	sqe->flags |= IOSQE_ASYNC;
+
+	ret = io_uring_submit_and_wait(&ring, 2);
+	if (ret != 2)
+		return ret;
+
+	for (i = 0; i < 2; i++) {
+		ret = io_uring_peek_cqe(&ring, &cqe);
+		if (ret || cqe->res != -EFAULT)
+			return -1;
+		io_uring_cqe_seen(&ring, cqe);
+	}
+
+	io_uring_queue_exit(&ring);
+	close(fds[0]);
+	close(fds[1]);
+	return 0;
+}
+
 int main(int argc, char *argv[])
 {
 	int ret;
@@ -264,21 +330,81 @@
 	if (argc > 1)
 		return 0;
 
-	ret = test(0, 0);
+	ret = test_invalid();
 	if (ret) {
-		fprintf(stderr, "test sqthread=0 failed\n");
+		fprintf(stderr, "test_invalid failed\n");
 		return ret;
 	}
 
-	ret = test(1, 1);
+	ret = test(0, 0, 1, 1);
 	if (ret) {
-		fprintf(stderr, "test sqthread=1 reg=1 failed\n");
+		fprintf(stderr, "test sqthread=0 1 1 failed\n");
 		return ret;
 	}
 
-	ret = test(1, 0);
+	ret = test(1, 1, 1, 1);
 	if (ret) {
-		fprintf(stderr, "test sqthread=1 reg=0 failed\n");
+		fprintf(stderr, "test sqthread=1 reg=1 1 1 failed\n");
+		return ret;
+	}
+
+	ret = test(1, 0, 1, 1);
+	if (ret) {
+		fprintf(stderr, "test sqthread=1 reg=0 1 1 failed\n");
+		return ret;
+	}
+
+	ret = test(0, 0, 0, 1);
+	if (ret) {
+		fprintf(stderr, "test sqthread=0 0 1 failed\n");
+		return ret;
+	}
+
+	ret = test(1, 1, 0, 1);
+	if (ret) {
+		fprintf(stderr, "test sqthread=1 reg=1 0 1 failed\n");
+		return ret;
+	}
+
+	ret = test(1, 0, 0, 1);
+	if (ret) {
+		fprintf(stderr, "test sqthread=1 reg=0 0 1 failed\n");
+		return ret;
+	}
+
+	ret = test(0, 0, 1, 0);
+	if (ret) {
+		fprintf(stderr, "test sqthread=0 0 1 failed\n");
+		return ret;
+	}
+
+	ret = test(1, 1, 1, 0);
+	if (ret) {
+		fprintf(stderr, "test sqthread=1 reg=1 1 0 failed\n");
+		return ret;
+	}
+
+	ret = test(1, 0, 1, 0);
+	if (ret) {
+		fprintf(stderr, "test sqthread=1 reg=0 1 0 failed\n");
+		return ret;
+	}
+
+	ret = test(0, 0, 0, 0);
+	if (ret) {
+		fprintf(stderr, "test sqthread=0 0 0 failed\n");
+		return ret;
+	}
+
+	ret = test(1, 1, 0, 0);
+	if (ret) {
+		fprintf(stderr, "test sqthread=1 reg=1 0 0 failed\n");
+		return ret;
+	}
+
+	ret = test(1, 0, 0, 0);
+	if (ret) {
+		fprintf(stderr, "test sqthread=1 reg=0 0 0 failed\n");
 		return ret;
 	}
 

diff --git a/test/send_recvmsg.c b/test/send_recvmsg.c
index cce6c45..f2e1efc 100644
--- a/test/send_recvmsg.c
+++ b/test/send_recvmsg.c

@@ -184,22 +184,11 @@
 
 	if ((rd->buf_ring || rd->buf_select) && !rd->no_buf_add) {
 		if (rd->buf_ring) {
-			struct io_uring_buf_reg reg = { };
-			void *ptr;
-
-			if (posix_memalign(&ptr, 4096, 4096))
-				goto err;
-
-			reg.ring_addr = (unsigned long) ptr;
-			reg.ring_entries = 1;
-			reg.bgid = BUF_BGID;
-			if (io_uring_register_buf_ring(&ring, &reg, 0)) {
+			br = io_uring_setup_buf_ring(&ring, 1, BUF_BGID, 0, &ret);
+			if (!br) {
 				no_pbuf_ring = 1;
 				goto out;
 			}
-
-			br = ptr;
-			io_uring_buf_ring_init(br);
 			io_uring_buf_ring_add(br, buf, sizeof(buf), BUF_BID,
 					      io_uring_buf_ring_mask(1), 0);
 			io_uring_buf_ring_advance(br, 1);
@@ -246,9 +235,9 @@
 	ret = do_recvmsg(&ring, buf, rd);
 	close(sockfd);
 
-	io_uring_queue_exit(&ring);
 	if (br)
-		free(br);
+		io_uring_free_buf_ring(&ring, br, 1, BUF_BGID);
+	io_uring_queue_exit(&ring);
 err:
 	return (void *)(intptr_t)ret;
 out:
@@ -256,7 +245,7 @@
 out_no_ring:
 	pthread_mutex_unlock(mutex);
 	if (br)
-		free(br);
+		io_uring_free_buf_ring(&ring, br, 1, BUF_BGID);
 	return NULL;
 }
 

diff --git a/test/sendmsg_fs_cve.c b/test/sendmsg_fs_cve.c
deleted file mode 100644
index 2ce3114..0000000
--- a/test/sendmsg_fs_cve.c
+++ /dev/null

@@ -1,200 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * repro-CVE-2020-29373 -- Reproducer for CVE-2020-29373.
- *
- * Copyright (c) 2021 SUSE
- * Author: Nicolai Stange <[email protected]>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <unistd.h>
-#include <stdio.h>
-#include <string.h>
-#include <sys/mman.h>
-#include <sys/socket.h>
-#include <sys/un.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <inttypes.h>
-#include <stdlib.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include "liburing.h"
-
-/*
- * This attempts to make the kernel issue a sendmsg() to
- * path from io_uring's async io_sq_wq_submit_work().
- *
- * Unfortunately, IOSQE_ASYNC is available only from kernel version
- * 5.6 onwards. To still force io_uring to process the request
- * asynchronously from io_sq_wq_submit_work(), queue a couple of
- * auxiliary requests all failing with EAGAIN before. This is
- * implemented by writing repeatedly to an auxiliary O_NONBLOCK
- * AF_UNIX socketpair with a small SO_SNDBUF.
- */
-static int try_sendmsg_async(const char * const path)
-{
-	int snd_sock, r;
-	struct io_uring ring;
-	char sbuf[16] = {};
-	struct iovec siov = { .iov_base = &sbuf, .iov_len = sizeof(sbuf) };
-	struct sockaddr_un addr = {};
-	struct msghdr msg = {
-		.msg_name = &addr,
-		.msg_namelen = sizeof(addr),
-		.msg_iov = &siov,
-		.msg_iovlen = 1,
-	};
-	struct io_uring_cqe *cqe;
-	struct io_uring_sqe *sqe;
-
-	snd_sock = socket(AF_UNIX, SOCK_DGRAM, 0);
-	if (snd_sock < 0) {
-		perror("socket(AF_UNIX)");
-		return -1;
-	}
-
-	addr.sun_family = AF_UNIX;
-	strcpy(addr.sun_path, path);
-
-	r = io_uring_queue_init(512, &ring, 0);
-	if (r < 0) {
-		fprintf(stderr, "ring setup failed: %d\n", r);
-		goto close_iour;
-	}
-
-	sqe = io_uring_get_sqe(&ring);
-	if (!sqe) {
-		fprintf(stderr, "get sqe failed\n");
-		r = -EFAULT;
-		goto close_iour;
-	}
-
-	/* the actual one supposed to fail with -ENOENT. */
-	io_uring_prep_sendmsg(sqe, snd_sock, &msg, 0);
-	sqe->flags = IOSQE_ASYNC;
-	sqe->user_data = 255;
-
-	r = io_uring_submit(&ring);
-	if (r != 1) {
-		fprintf(stderr, "sqe submit failed: %d\n", r);
-		r = -EFAULT;
-		goto close_iour;
-	}
-
-	r = io_uring_wait_cqe(&ring, &cqe);
-	if (r < 0) {
-		fprintf(stderr, "wait completion %d\n", r);
-		r = -EFAULT;
-		goto close_iour;
-	}
-	if (cqe->user_data != 255) {
-		fprintf(stderr, "user data %d\n", r);
-		r = -EFAULT;
-		goto close_iour;
-	}
-	if (cqe->res != -ENOENT) {
-		r = 3;
-		fprintf(stderr,
-			"error: cqe %i: res=%i, but expected -ENOENT\n",
-			(int)cqe->user_data, (int)cqe->res);
-	}
-	io_uring_cqe_seen(&ring, cqe);
-
-close_iour:
-	io_uring_queue_exit(&ring);
-	close(snd_sock);
-	return r;
-}
-
-int main(int argc, char *argv[])
-{
-	int r;
-	char tmpdir[] = "/tmp/tmp.XXXXXX";
-	int rcv_sock;
-	struct sockaddr_un addr = {};
-	pid_t c;
-	int wstatus;
-
-	if (!mkdtemp(tmpdir)) {
-		perror("mkdtemp()");
-		return 1;
-	}
-
-	rcv_sock = socket(AF_UNIX, SOCK_DGRAM, 0);
-	if (rcv_sock < 0) {
-		perror("socket(AF_UNIX)");
-		r = 1;
-		goto rmtmpdir;
-	}
-
-	addr.sun_family = AF_UNIX;
-	snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/sock", tmpdir);
-
-	r = bind(rcv_sock, (struct sockaddr *)&addr,
-		 sizeof(addr));
-	if (r < 0) {
-		perror("bind()");
-		close(rcv_sock);
-		r = 1;
-		goto rmtmpdir;
-	}
-
-	c = fork();
-	if (!c) {
-		close(rcv_sock);
-
-		r = chroot(tmpdir);
-		if (r) {
-			if (errno == EPERM) {
-				fprintf(stderr, "chroot not allowed, skip\n");
-				return 0;
-			}
-
-			perror("chroot()");
-			return 1;
-		}
-
-		r = try_sendmsg_async(addr.sun_path);
-		if (r < 0) {
-			/* system call failure */
-			r = 1;
-		} else if (r) {
-			/* test case failure */
-			r += 1;
-		}
-		return r;
-	}
-
-	if (waitpid(c, &wstatus, 0) == (pid_t)-1) {
-		perror("waitpid()");
-		r = 1;
-		goto rmsock;
-	}
-	if (!WIFEXITED(wstatus)) {
-		fprintf(stderr, "child got terminated\n");
-		r = 1;
-		goto rmsock;
-	}
-	r = WEXITSTATUS(wstatus);
-	if (r)
-		fprintf(stderr, "error: Test failed\n");
-rmsock:
-	close(rcv_sock);
-	unlink(addr.sun_path);
-rmtmpdir:
-	rmdir(tmpdir);
-	return r;
-}

diff --git a/test/short-read.c b/test/short-read.c
index a6f2620..a14abfd 100644
--- a/test/short-read.c
+++ b/test/short-read.c

@@ -71,5 +71,6 @@
 	}
 
 	io_uring_cqe_seen(&ring, cqe);
+	free(vec.iov_base);
 	return 0;
 }

diff --git a/test/shutdown.c b/test/shutdown.c
index 14c7407..9402c99 100644
--- a/test/shutdown.c
+++ b/test/shutdown.c

@@ -19,6 +19,7 @@
 #include <arpa/inet.h>
 
 #include "liburing.h"
+#include "helpers.h"
 
 static void sig_pipe(int sig)
 {
@@ -29,7 +30,7 @@
 	int p_fd[2], ret;
 	int32_t recv_s0;
 	int32_t val = 1;
-	struct sockaddr_in addr;
+	struct sockaddr_in addr = { };
 
 	if (argc > 1)
 		return 0;
@@ -44,11 +45,10 @@
 	assert(ret != -1);
 
 	addr.sin_family = AF_INET;
-	addr.sin_port = htons((rand() % 61440) + 4096);
 	addr.sin_addr.s_addr = inet_addr("127.0.0.1");
 
-	ret = bind(recv_s0, (struct sockaddr*)&addr, sizeof(addr));
-	assert(ret != -1);
+	ret = t_bind_ephemeral_port(recv_s0, &addr);
+	assert(!ret);
 	ret = listen(recv_s0, 128);
 	assert(ret != -1);
 

diff --git a/test/sigfd-deadlock.c b/test/sigfd-deadlock.c
index 277b342..1c48ea7 100644
--- a/test/sigfd-deadlock.c
+++ b/test/sigfd-deadlock.c

@@ -11,6 +11,7 @@
 #include <poll.h>
 #include <stdio.h>
 #include "liburing.h"
+#include "helpers.h"
 
 static int setup_signal(void)
 {
@@ -34,22 +35,35 @@
 	struct io_uring ring;
 	int ret;
 
-	io_uring_queue_init(32, &ring, 0);
+	ret = io_uring_queue_init(32, &ring, 0);
+	if (ret)
+		return T_EXIT_FAIL;
 
 	sqe = io_uring_get_sqe(&ring);
 	io_uring_prep_poll_add(sqe, sfd, POLLIN);
-	io_uring_submit(&ring);
+	ret = io_uring_submit(&ring);
+	if (ret < 0) {
+		ret = T_EXIT_FAIL;
+		goto err_exit;
+	}
 
 	kill(getpid(), SIGINT);
 
 	io_uring_wait_cqe(&ring, &cqe);
-	if (cqe->res & POLLIN) {
-		ret = 0;
+	if (cqe->res == -EOPNOTSUPP) {
+		fprintf(stderr, "signalfd poll not supported\n");
+		ret = T_EXIT_SKIP;
+	} else if (cqe->res < 0) {
+		fprintf(stderr, "poll failed: %d\n", cqe->res);
+		ret = T_EXIT_FAIL;
+	} else if (cqe->res & POLLIN) {
+		ret = T_EXIT_PASS;
 	} else {
 		fprintf(stderr, "Unexpected poll mask %x\n", cqe->res);
-		ret = 1;
+		ret = T_EXIT_FAIL;
 	}
 	io_uring_cqe_seen(&ring, cqe);
+err_exit:
 	io_uring_queue_exit(&ring);
 	return ret;
 }
@@ -59,14 +73,14 @@
 	int sfd, ret;
 
 	if (argc > 1)
-		return 0;
+		return T_EXIT_PASS;
 
 	sfd = setup_signal();
 	if (sfd < 0)
-		return 1;
+		return T_EXIT_FAIL;
 
 	ret = test_uring(sfd);
-	if (ret)
+	if (ret == T_EXIT_FAIL)
 		fprintf(stderr, "test_uring signalfd failed\n");
 
 	close(sfd);

diff --git a/test/single-issuer.c b/test/single-issuer.c
new file mode 100644
index 0000000..cb61a5e
--- /dev/null
+++ b/test/single-issuer.c

@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: MIT */
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include "liburing.h"
+#include "test.h"
+#include "helpers.h"
+
+static pid_t pid;
+
+static pid_t fork_t(void)
+{
+	pid = fork();
+	if (pid == -1) {
+		fprintf(stderr, "fork failed\n");
+		exit(T_EXIT_FAIL);
+	}
+	return pid;
+}
+
+static void wait_child_t(void)
+{
+	int wstatus;
+
+	if (waitpid(pid, &wstatus, 0) == (pid_t)-1) {
+		perror("waitpid()");
+		exit(T_EXIT_FAIL);
+	}
+	if (!WIFEXITED(wstatus)) {
+		fprintf(stderr, "child failed %i\n", WEXITSTATUS(wstatus));
+		exit(T_EXIT_FAIL);
+	}
+	if (WEXITSTATUS(wstatus))
+		exit(T_EXIT_FAIL);
+}
+
+static int try_submit(struct io_uring *ring)
+{
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	int ret;
+
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_nop(sqe);
+	sqe->user_data = 42;
+
+	ret = io_uring_submit(ring);
+	if (ret < 0)
+		return ret;
+
+	if (ret != 1)
+		t_error(1, ret, "submit %i", ret);
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret)
+		t_error(1, ret, "wait fail %i", ret);
+
+	if (cqe->res || cqe->user_data != 42)
+		t_error(1, ret, "invalid cqe");
+
+	io_uring_cqe_seen(ring, cqe);
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	struct io_uring ring;
+	int ret;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	ret = io_uring_queue_init(8, &ring, IORING_SETUP_SINGLE_ISSUER);
+	if (ret == -EINVAL) {
+		return T_EXIT_SKIP;
+	} else if (ret) {
+		fprintf(stderr, "io_uring_queue_init() failed %i\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	/* test that the creator iw allowed to submit */
+	ret = try_submit(&ring);
+	if (ret) {
+		fprintf(stderr, "the creator can't submit %i\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	/* test that a second submitter doesn't succeed */
+	if (!fork_t()) {
+		ret = try_submit(&ring);
+		if (ret != -EEXIST)
+			fprintf(stderr, "1: not owner child could submit %i\n", ret);
+		return ret != -EEXIST;
+	}
+	wait_child_t();
+	io_uring_queue_exit(&ring);
+
+	/* test that the first submitter but not creator can submit */
+	ret = io_uring_queue_init(8, &ring, IORING_SETUP_SINGLE_ISSUER |
+					    IORING_SETUP_R_DISABLED);
+	if (ret)
+		t_error(1, ret, "ring init (2) %i", ret);
+
+	if (!fork_t()) {
+		io_uring_enable_rings(&ring);
+		ret = try_submit(&ring);
+		if (ret)
+			fprintf(stderr, "2: not owner child could submit %i\n", ret);
+		return !!ret;
+	}
+	wait_child_t();
+	io_uring_queue_exit(&ring);
+
+	/* test that only the first enabler can submit */
+	ret = io_uring_queue_init(8, &ring, IORING_SETUP_SINGLE_ISSUER |
+					    IORING_SETUP_R_DISABLED);
+	if (ret)
+		t_error(1, ret, "ring init (3) %i", ret);
+
+	io_uring_enable_rings(&ring);
+	if (!fork_t()) {
+		ret = try_submit(&ring);
+		if (ret != -EEXIST)
+			fprintf(stderr, "3: not owner child could submit %i\n", ret);
+		return ret != -EEXIST;
+	}
+	wait_child_t();
+	io_uring_queue_exit(&ring);
+
+	/* test that anyone can submit to a SQPOLL|SINGLE_ISSUER ring */
+	ret = io_uring_queue_init(8, &ring, IORING_SETUP_SINGLE_ISSUER|IORING_SETUP_SQPOLL);
+	if (ret)
+		t_error(1, ret, "ring init (4) %i", ret);
+
+	ret = try_submit(&ring);
+	if (ret) {
+		fprintf(stderr, "SQPOLL submit failed (creator) %i\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	if (!fork_t()) {
+		ret = try_submit(&ring);
+		if (ret)
+			fprintf(stderr, "SQPOLL submit failed (child) %i\n", ret);
+		return !!ret;
+	}
+	wait_child_t();
+	io_uring_queue_exit(&ring);
+
+	/* test that IORING_ENTER_REGISTERED_RING doesn't break anything */
+	ret = io_uring_queue_init(8, &ring, IORING_SETUP_SINGLE_ISSUER);
+	if (ret)
+		t_error(1, ret, "ring init (5) %i", ret);
+
+	if (!fork_t()) {
+		ret = try_submit(&ring);
+		if (ret != -EEXIST)
+			fprintf(stderr, "4: not owner child could submit %i\n", ret);
+		return ret != -EEXIST;
+	}
+	wait_child_t();
+	io_uring_queue_exit(&ring);
+	return T_EXIT_PASS;
+}

diff --git a/test/skip-cqe.c b/test/skip-cqe.c
index 99b882b..c2869e5 100644
--- a/test/skip-cqe.c
+++ b/test/skip-cqe.c

@@ -8,6 +8,7 @@
 #include <assert.h>
 
 #include "liburing.h"
+#include "helpers.h"
 
 #define LINK_SIZE 		6
 #define TIMEOUT_USER_DATA	(-1)
@@ -323,10 +324,8 @@
 		return 1;
 	}
 
-	if (!(ring.features & IORING_FEAT_CQE_SKIP)) {
-		printf("IOSQE_CQE_SKIP_SUCCESS is not supported, skip\n");
-		return 0;
-	}
+	if (!(ring.features & IORING_FEAT_CQE_SKIP))
+		return T_EXIT_SKIP;
 
 	for (i = 0; i < 4; i++) {
 		bool skip_last = i & 1;

diff --git a/test/socket-getsetsock-cmd.c b/test/socket-getsetsock-cmd.c
new file mode 100644
index 0000000..53f04d1
--- /dev/null
+++ b/test/socket-getsetsock-cmd.c

@@ -0,0 +1,346 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: Check that {g,s}etsockopt CMD operations on sockets are
+ * consistent.
+ *
+ * The tests basically do the same socket operation using regular system calls
+ * and io_uring commands, and then compare the results.
+ */
+
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+#include <unistd.h>
+#include <linux/tcp.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+#define USERDATA 0xff42ff
+#define MSG "foobarbaz"
+
+static int no_sock_opt;
+
+struct fds {
+	int tx;
+	int rx;
+};
+
+static struct fds create_sockets(void)
+{
+	struct fds retval;
+	int fd[2];
+
+	t_create_socket_pair(fd, true);
+
+	retval.tx = fd[0];
+	retval.rx = fd[1];
+
+	return retval;
+}
+
+static struct io_uring create_ring(void)
+{
+	struct io_uring ring;
+	int ring_flags = 0;
+	int err;
+
+	err = io_uring_queue_init(32, &ring, ring_flags);
+	assert(err == 0);
+
+	return ring;
+}
+
+static int submit_cmd_sqe(struct io_uring *ring, int32_t fd,
+			  int op, int level, int optname,
+			  void *optval, int optlen,
+			  bool async)
+{
+	struct io_uring_sqe *sqe;
+	int err;
+
+	assert(fd > 0);
+
+	sqe = io_uring_get_sqe(ring);
+	assert(sqe != NULL);
+
+	io_uring_prep_cmd_sock(sqe, op, fd, level, optname, optval, optlen);
+	sqe->user_data = USERDATA;
+	if (async)
+		sqe->flags |= IOSQE_ASYNC;
+
+	/* Submitting SQE */
+	err = io_uring_submit_and_wait(ring, 1);
+	if (err != 1)
+		fprintf(stderr, "Failure: io_uring_submit_and_wait returned %d\n", err);
+
+	return err;
+}
+
+static int receive_cqe(struct io_uring *ring)
+{
+	struct io_uring_cqe *cqe;
+	int err;
+
+	err = io_uring_wait_cqe(ring, &cqe);
+	assert(err ==  0);
+	assert(cqe->user_data == USERDATA);
+	io_uring_cqe_seen(ring, cqe);
+
+	/* Return the result of the operation */
+	return cqe->res;
+}
+
+/*
+ * Run getsock operation using SO_RCVBUF using io_uring cmd operation and
+ * getsockopt(2) and compare the results.
+ */
+static int run_get_rcvbuf(struct io_uring *ring, struct fds *sockfds, bool async)
+{
+	int sval, uval, ulen, err;
+	unsigned int slen;
+
+	/* System call values */
+	slen = sizeof(sval);
+	/* io_uring values */
+	ulen = sizeof(uval);
+
+	/* get through io_uring cmd */
+	err = submit_cmd_sqe(ring, sockfds->rx, SOCKET_URING_OP_GETSOCKOPT,
+			     SOL_SOCKET, SO_RCVBUF, &uval, ulen, async);
+	assert(err == 1);
+
+	/* Wait for the CQE */
+	err = receive_cqe(ring);
+	if (err == -EOPNOTSUPP)
+		return T_EXIT_SKIP;
+	if (err < 0) {
+		fprintf(stderr, "Error received. %d\n", err);
+		return T_EXIT_FAIL;
+	}
+	/* The output of CQE->res contains the length */
+	ulen = err;
+
+	/* Executes the same operation using system call */
+	err = getsockopt(sockfds->rx, SOL_SOCKET, SO_RCVBUF, &sval, &slen);
+	assert(err == 0);
+
+	/* Make sure that io_uring operation returns the same value as the systemcall */
+	assert(ulen == slen);
+	assert(uval == sval);
+
+	return T_EXIT_PASS;
+}
+
+/*
+ * Run getsock operation using SO_PEERNAME using io_uring cmd operation
+ * and getsockopt(2) and compare the results.
+ */
+static int run_get_peername(struct io_uring *ring, struct fds *sockfds, bool async)
+{
+	struct sockaddr sval, uval = {};
+	socklen_t slen = sizeof(sval);
+	socklen_t ulen = sizeof(uval);
+	int err;
+
+	/* Get values from the systemcall */
+	err = getsockopt(sockfds->tx, SOL_SOCKET, SO_PEERNAME, &sval, &slen);
+	assert(err == 0);
+
+	/* Getting SO_PEERNAME */
+	err = submit_cmd_sqe(ring, sockfds->rx, SOCKET_URING_OP_GETSOCKOPT,
+			     SOL_SOCKET, SO_PEERNAME, &uval, ulen, async);
+	assert(err == 1);
+
+	/* Wait for the CQE */
+	err = receive_cqe(ring);
+	if (err == -EOPNOTSUPP || err == -EINVAL) {
+		no_sock_opt = 1;
+		return T_EXIT_SKIP;
+	}
+
+	if (err < 0) {
+		fprintf(stderr, "%s: Error in the CQE: %d\n", __func__, err);
+		return T_EXIT_FAIL;
+	}
+
+	/* The length comes from cqe->res, which is returned from receive_cqe() */
+	ulen = err;
+
+	/* Make sure that io_uring operation returns the same values as the systemcall */
+	assert(sval.sa_family == uval.sa_family);
+	assert(slen == ulen);
+
+	return T_EXIT_PASS;
+}
+
+/*
+ * Run getsockopt tests. Basically comparing io_uring output and systemcall results
+ */
+static int run_getsockopt_test(struct io_uring *ring, struct fds *sockfds)
+{
+	int err;
+
+	err = run_get_peername(ring, sockfds, false);
+	if (err)
+		return err;
+
+	err = run_get_peername(ring, sockfds, true);
+	if (err)
+		return err;
+
+	err = run_get_rcvbuf(ring, sockfds, false);
+	if (err)
+		return err;
+
+	return run_get_rcvbuf(ring, sockfds, true);
+}
+
+/*
+ * Given a `val` value, set it in SO_REUSEPORT using io_uring cmd, and read using
+ * getsockopt(2), and make sure they match.
+ */
+static int run_setsockopt_reuseport(struct io_uring *ring, struct fds *sockfds,
+				    int val, bool async)
+{
+	unsigned int slen, ulen;
+	int sval, uval = val;
+	int err;
+
+	slen = sizeof(sval);
+	ulen = sizeof(uval);
+
+	/* Setting SO_REUSEPORT */
+	err = submit_cmd_sqe(ring, sockfds->rx, SOCKET_URING_OP_SETSOCKOPT,
+			     SOL_SOCKET, SO_REUSEPORT, &uval, ulen, async);
+	assert(err == 1);
+
+	err = receive_cqe(ring);
+	if (err == -EOPNOTSUPP)
+		return T_EXIT_SKIP;
+
+	/* Get values from the systemcall */
+	err = getsockopt(sockfds->rx, SOL_SOCKET, SO_REUSEPORT, &sval, &slen);
+	assert(err == 0);
+
+	/* Make sure the set using io_uring cmd matches what systemcall returns */
+	assert(uval == sval);
+	assert(ulen == slen);
+
+	return T_EXIT_PASS;
+}
+
+/*
+ * Given a `val` value, set the TCP_USER_TIMEOUT using io_uring and read using
+ * getsockopt(2). Make sure they match
+ */
+static int run_setsockopt_usertimeout(struct io_uring *ring, struct fds *sockfds,
+				      int val, bool async)
+{
+	int optname = TCP_USER_TIMEOUT;
+	int level = IPPROTO_TCP;
+	unsigned int slen, ulen;
+	int sval, uval, err;
+
+	slen = sizeof(uval);
+	ulen = sizeof(uval);
+
+	uval = val;
+
+	/* Setting timeout */
+	err = submit_cmd_sqe(ring, sockfds->rx, SOCKET_URING_OP_SETSOCKOPT,
+			     level, optname, &uval, ulen, async);
+	assert(err == 1);
+
+	err = receive_cqe(ring);
+	if (err == -EOPNOTSUPP)
+		return T_EXIT_SKIP;
+	if (err < 0) {
+		fprintf(stderr, "%s: Got an error: %d\n", __func__, err);
+		return T_EXIT_FAIL;
+	}
+
+	/* Get the value from the systemcall, to make sure it was set */
+	err = getsockopt(sockfds->rx, level, optname, &sval, &slen);
+	assert(err == 0);
+	assert(uval == sval);
+
+	return T_EXIT_PASS;
+}
+
+/* Test setsockopt() for SOL_SOCKET */
+static int run_setsockopt_test(struct io_uring *ring, struct fds *sockfds)
+{
+	int err, i;
+	int j;
+
+	for (j = 0; j < 2; j++) {
+		bool async = j & 1;
+
+		for (i = 0; i <= 1; i++) {
+			err = run_setsockopt_reuseport(ring, sockfds, i, async);
+			if (err)
+				return err;
+		}
+
+		for (i = 1; i <= 10; i++) {
+			err = run_setsockopt_usertimeout(ring, sockfds, i, async);
+			if (err)
+				return err;
+		}
+	}
+
+	return err;
+}
+
+/* Send data through the sockets */
+static void send_data(struct fds *s)
+{
+	int written_bytes;
+	/* Send data sing the sockstruct->send */
+	written_bytes = write(s->tx, MSG, strlen(MSG));
+	assert(written_bytes == strlen(MSG));
+}
+
+int main(int argc, char *argv[])
+{
+	struct fds sockfds;
+	struct io_uring ring;
+	int err;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	/* Simply io_uring ring creation */
+	ring = create_ring();
+
+	/* Create sockets */
+	sockfds = create_sockets();
+
+	send_data(&sockfds);
+
+	err = run_getsockopt_test(&ring, &sockfds);
+	if (err) {
+		if (err == T_EXIT_SKIP) {
+			fprintf(stderr, "Skipping tests.\n");
+			return T_EXIT_SKIP;
+		}
+		fprintf(stderr, "Failed to run test: %d\n", err);
+		return err;
+	}
+	if (no_sock_opt)
+		return T_EXIT_SKIP;
+
+	err = run_setsockopt_test(&ring, &sockfds);
+	if (err) {
+		if (err == T_EXIT_SKIP) {
+			fprintf(stderr, "Skipping tests.\n");
+			return T_EXIT_SKIP;
+		}
+		fprintf(stderr, "Failed to run test: %d\n", err);
+		return err;
+	}
+
+	io_uring_queue_exit(&ring);
+	return err;
+}

diff --git a/test/socket-io-cmd.c b/test/socket-io-cmd.c
new file mode 100644
index 0000000..1c19a02
--- /dev/null
+++ b/test/socket-io-cmd.c

@@ -0,0 +1,237 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Check that CMD operations on sockets are consistent.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <assert.h>
+#include <string.h>
+#include <unistd.h>
+#include <linux/sockios.h>
+#include <sys/ioctl.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+#define USERDATA 0x1234
+#define MSG "foobarbaz"
+
+static int no_io_cmd;
+
+struct fds {
+	int tx;
+	int rx;
+};
+
+/* Create 2 sockets (tx, rx) given the socket type */
+static struct fds create_sockets(bool stream)
+{
+	struct fds retval;
+	int fd[2];
+
+	t_create_socket_pair(fd, stream);
+
+	retval.tx = fd[0];
+	retval.rx = fd[1];
+
+	return retval;
+}
+
+static int create_sqe_and_submit(struct io_uring *ring, int32_t fd, int op)
+{
+	struct io_uring_sqe *sqe;
+	int ret;
+
+	assert(fd > 0);
+	sqe = io_uring_get_sqe(ring);
+	assert(sqe != NULL);
+
+	io_uring_prep_cmd_sock(sqe, op, fd, 0, 0, NULL, 0);
+	sqe->user_data = USERDATA;
+
+	/* Submitting SQE */
+	ret = io_uring_submit_and_wait(ring, 1);
+	if (ret <= 0)
+		return ret;
+
+	return 0;
+}
+
+static int receive_cqe(struct io_uring *ring)
+{
+	struct io_uring_cqe *cqe;
+	int err;
+
+	err = io_uring_wait_cqe(ring, &cqe);
+	assert(err ==  0);
+	assert(cqe->user_data == USERDATA);
+	err = cqe->res;
+	io_uring_cqe_seen(ring, cqe);
+
+	/* Return the result of the operation */
+	return err;
+}
+
+static ssize_t send_data(struct fds *s, char *str)
+{
+	size_t written_bytes;
+
+	written_bytes = write(s->tx, str, strlen(str));
+	assert(written_bytes == strlen(MSG));
+
+	return written_bytes;
+}
+
+static int run_test(bool stream)
+{
+	struct fds sockfds;
+	ssize_t bytes_in, bytes_out;
+	struct io_uring ring;
+	size_t written_bytes;
+	int error;
+
+	/* Create three sockets */
+	sockfds = create_sockets(stream);
+	assert(sockfds.tx > 0);
+	assert(sockfds.rx > 0);
+	/* Send data sing the sockfds->send */
+	written_bytes = send_data(&sockfds, MSG);
+
+	/* Simply io_uring ring creation */
+	error = t_create_ring(1, &ring, 0);
+	if (error == T_SETUP_SKIP)
+		return error;
+	else if (error != T_SETUP_OK)
+		return T_EXIT_FAIL;
+
+	error = create_sqe_and_submit(&ring, sockfds.rx,
+				      SOCKET_URING_OP_SIOCINQ);
+	if (error)
+		return T_EXIT_FAIL;
+	bytes_in = receive_cqe(&ring);
+	if (bytes_in < 0) {
+		if (bytes_in == -EINVAL || bytes_in == -EOPNOTSUPP) {
+			no_io_cmd = 1;
+			return T_EXIT_SKIP;
+		}
+		fprintf(stderr, "Bad return value %ld\n", (long) bytes_in);
+		return T_EXIT_FAIL;
+	}
+
+	error = create_sqe_and_submit(&ring, sockfds.tx,
+				      SOCKET_URING_OP_SIOCOUTQ);
+	if (error)
+		return T_EXIT_FAIL;
+
+	bytes_out = receive_cqe(&ring);
+	if (bytes_in == -ENOTSUP || bytes_out == -ENOTSUP) {
+		fprintf(stderr, "Skipping tests. -ENOTSUP returned\n");
+		return T_EXIT_SKIP;
+	}
+
+	/*
+	 * Assert the number of written bytes are either in the socket buffer
+	 * or on the receive side
+	 */
+	if (bytes_in + bytes_out != written_bytes) {
+		fprintf(stderr, "values does not match: %zu+%zu != %zu\n",
+			bytes_in, bytes_out, written_bytes);
+		return T_EXIT_FAIL;
+	}
+
+	io_uring_queue_exit(&ring);
+
+	return T_EXIT_PASS;
+}
+
+/*
+ * Make sure that siocoutq and siocinq returns the same value
+ * using ioctl(2) and uring commands for raw sockets
+ */
+static int run_test_raw(void)
+{
+	int ioctl_siocoutq, ioctl_siocinq;
+	int uring_siocoutq, uring_siocinq;
+	struct io_uring ring;
+	int retry = 0, sock, error;
+
+	sock = socket(PF_INET, SOCK_RAW, IPPROTO_TCP);
+	if (sock == -1)  {
+		/* You need root to create raw socket */
+		perror("Not able to create a raw socket");
+		return T_EXIT_SKIP;
+	}
+
+	/* Get the same operation using uring cmd */
+	error = t_create_ring(1, &ring, 0);
+	if (error == T_SETUP_SKIP)
+		return error;
+	else if (error != T_SETUP_OK)
+		return T_EXIT_FAIL;
+
+again:
+	/* Simple SIOCOUTQ using ioctl */
+	error = ioctl(sock, SIOCOUTQ, &ioctl_siocoutq);
+	if (error < 0) {
+		fprintf(stderr, "Failed to run ioctl(SIOCOUTQ): %d\n", error);
+		return T_EXIT_FAIL;
+	}
+
+	error = ioctl(sock, SIOCINQ, &ioctl_siocinq);
+	if (error < 0) {
+		fprintf(stderr, "Failed to run ioctl(SIOCINQ): %d\n", error);
+		return T_EXIT_FAIL;
+	}
+
+	create_sqe_and_submit(&ring, sock, SOCKET_URING_OP_SIOCOUTQ);
+	uring_siocoutq = receive_cqe(&ring);
+
+	create_sqe_and_submit(&ring, sock, SOCKET_URING_OP_SIOCINQ);
+	uring_siocinq = receive_cqe(&ring);
+
+	/* Compare that both values (ioctl and uring CMD) should be similar */
+	if (uring_siocoutq != ioctl_siocoutq) {
+		if (!retry) {
+			retry = 1;
+			goto again;
+		}
+		fprintf(stderr, "values does not match: %d != %d\n",
+			uring_siocoutq, ioctl_siocoutq);
+		return T_EXIT_FAIL;
+	}
+	if (uring_siocinq != ioctl_siocinq) {
+		if (!retry) {
+			retry = 1;
+			goto again;
+		}
+		fprintf(stderr, "values does not match: %d != %d\n",
+			uring_siocinq, ioctl_siocinq);
+		return T_EXIT_FAIL;
+	}
+
+	return T_EXIT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	int err;
+
+	if (argc > 1)
+		return 0;
+
+	/* Test SOCK_STREAM */
+	err = run_test(true);
+	if (err)
+		return err;
+	if (no_io_cmd)
+		return T_EXIT_SKIP;
+
+	/* Test SOCK_DGRAM */
+	err = run_test(false);
+	if (err)
+		return err;
+
+	/* Test raw sockets */
+	return run_test_raw();
+}

diff --git a/test/socket-rw-eagain.c b/test/socket-rw-eagain.c
index 2d6a817..896d455 100644
--- a/test/socket-rw-eagain.c
+++ b/test/socket-rw-eagain.c

@@ -18,6 +18,7 @@
 #include <arpa/inet.h>
 
 #include "liburing.h"
+#include "helpers.h"
 
 int main(int argc, char *argv[])
 {
@@ -41,18 +42,8 @@
 
 	addr.sin_family = AF_INET;
 	addr.sin_addr.s_addr = inet_addr("127.0.0.1");
-
-	do {
-		addr.sin_port = htons((rand() % 61440) + 4096);
-		ret = bind(recv_s0, (struct sockaddr*)&addr, sizeof(addr));
-		if (!ret)
-			break;
-		if (errno != EADDRINUSE) {
-			perror("bind");
-			exit(1);
-		}
-	} while (1);
-
+	ret = t_bind_ephemeral_port(recv_s0, &addr);
+	assert(!ret);
 	ret = listen(recv_s0, 128);
 	assert(ret != -1);
 

diff --git a/test/socket-rw-offset.c b/test/socket-rw-offset.c
index 987b6c9..2c7e239 100644
--- a/test/socket-rw-offset.c
+++ b/test/socket-rw-offset.c

@@ -20,6 +20,7 @@
 #include <arpa/inet.h>
 
 #include "liburing.h"
+#include "helpers.h"
 
 int main(int argc, char *argv[])
 {
@@ -43,17 +44,8 @@
 
 	addr.sin_family = AF_INET;
 	addr.sin_addr.s_addr = inet_addr("127.0.0.1");
-
-	do {
-		addr.sin_port = htons((rand() % 61440) + 4096);
-		ret = bind(recv_s0, (struct sockaddr*)&addr, sizeof(addr));
-		if (!ret)
-			break;
-		if (errno != EADDRINUSE) {
-			perror("bind");
-			exit(1);
-		}
-	} while (1);
+	ret = t_bind_ephemeral_port(recv_s0, &addr);
+	assert(!ret);
 	ret = listen(recv_s0, 128);
 	assert(ret != -1);
 

diff --git a/test/socket-rw.c b/test/socket-rw.c
index 4fbf032..833afd8 100644
--- a/test/socket-rw.c
+++ b/test/socket-rw.c

@@ -20,6 +20,7 @@
 #include <arpa/inet.h>
 
 #include "liburing.h"
+#include "helpers.h"
 
 int main(int argc, char *argv[])
 {
@@ -43,17 +44,8 @@
 
 	addr.sin_family = AF_INET;
 	addr.sin_addr.s_addr = inet_addr("127.0.0.1");
-
-	do {
-		addr.sin_port = htons((rand() % 61440) + 4096);
-		ret = bind(recv_s0, (struct sockaddr*)&addr, sizeof(addr));
-		if (!ret)
-			break;
-		if (errno != EADDRINUSE) {
-			perror("bind");
-			exit(1);
-		}
-	} while (1);
+	ret = t_bind_ephemeral_port(recv_s0, &addr);
+	assert(!ret);
 	ret = listen(recv_s0, 128);
 	assert(ret != -1);
 

diff --git a/test/socket.c b/test/socket.c
index 6a3ea09..a461a5b 100644
--- a/test/socket.c
+++ b/test/socket.c

@@ -11,6 +11,7 @@
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <pthread.h>
+#include <assert.h>
 
 #include "liburing.h"
 #include "helpers.h"
@@ -19,10 +20,10 @@
 
 #define MAX_MSG	128
 
-#define PORT	10202
 #define HOST	"127.0.0.1"
 
 static int no_socket;
+static __be32 g_port;
 
 static int recv_prep(struct io_uring *ring, struct iovec *iov, int *sock,
 		     int registerfiles)
@@ -34,7 +35,6 @@
 	memset(&saddr, 0, sizeof(saddr));
 	saddr.sin_family = AF_INET;
 	saddr.sin_addr.s_addr = htonl(INADDR_ANY);
-	saddr.sin_port = htons(PORT);
 
 	sockfd = socket(AF_INET, SOCK_DGRAM, 0);
 	if (sockfd < 0) {
@@ -45,11 +45,11 @@
 	val = 1;
 	setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val));
 
-	ret = bind(sockfd, (struct sockaddr *)&saddr, sizeof(saddr));
-	if (ret < 0) {
+	if (t_bind_ephemeral_port(sockfd, &saddr)) {
 		perror("bind");
 		goto err;
 	}
+	g_port = saddr.sin_port;
 
 	if (registerfiles) {
 		ret = io_uring_register_files(ring, &sockfd, 1);
@@ -244,9 +244,10 @@
 		}
 	}
 
+	assert(g_port != 0);
 	memset(&saddr, 0, sizeof(saddr));
 	saddr.sin_family = AF_INET;
-	saddr.sin_port = htons(PORT);
+	saddr.sin_port = g_port;
 	inet_pton(AF_INET, HOST, &saddr.sin_addr);
 
 	sqe = io_uring_get_sqe(&ring);
@@ -271,7 +272,6 @@
 	}
 	if (cqe->res < 0) {
 		if (cqe->res == -EINVAL) {
-			fprintf(stdout, "No socket support, skipping\n");
 			no_socket = 1;
 			io_uring_cqe_seen(&ring, cqe);
 			return fallback_send(&ring, &saddr);
@@ -365,6 +365,43 @@
 	return (intptr_t)retval;
 }
 
+static int test_bad_socket(void)
+{
+	struct io_uring ring;
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	int ret;
+
+	ret = io_uring_queue_init(1, &ring, 0);
+	if (ret) {
+		fprintf(stderr, "queue init failed: %d\n", ret);
+		return 1;
+	}
+
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_socket(sqe, -1, SOCK_DGRAM, 0, 0);
+	ret = io_uring_submit(&ring);
+	if (ret != 1) {
+		fprintf(stderr, "socket submit: %d\n", ret);
+		goto err;
+	}
+	ret = io_uring_wait_cqe(&ring, &cqe);
+	if (ret) {
+		fprintf(stderr, "wait_cqe: %d\n", ret);
+		goto err;
+	}
+	if (cqe->res != -EAFNOSUPPORT) {
+		fprintf(stderr, "socket res: %d\n", cqe->res);
+		goto err;
+	}
+	io_uring_cqe_seen(&ring, cqe);
+	io_uring_queue_exit(&ring);
+	return 0;
+err:
+	io_uring_queue_exit(&ring);
+	return 1;
+}
+
 int main(int argc, char *argv[])
 {
 	int ret;
@@ -404,5 +441,11 @@
 		return ret;
 	}
 
+	ret = test_bad_socket();
+	if (ret) {
+		fprintf(stderr, "test bad socket failed\n");
+		return 1;
+	}
+
 	return 0;
 }

diff --git a/test/splice.c b/test/splice.c
index 5e9b789..a883587 100644
--- a/test/splice.c
+++ b/test/splice.c

@@ -191,7 +191,7 @@
 			    IORING_OP_SPLICE);
 }
 
-static int do_tee(struct io_uring *ring, int fd_in, int fd_out, 
+static int do_tee(struct io_uring *ring, int fd_in, int fd_out,
 		  unsigned int len)
 {
 	return do_splice_op(ring, fd_in, 0, fd_out, 0, len, IORING_OP_TEE);
@@ -504,6 +504,8 @@
 	splice_flags = SPLICE_F_FD_IN_FIXED;
 	sqe_flags = IOSQE_FIXED_FILE;
 	ret = test_splice(&ring, &ctx);
+	free(ctx.buf_in);
+	free(ctx.buf_out);
 	if (ret) {
 		fprintf(stderr, "registered fds splice tests failed\n");
 		return ret;

diff --git a/test/sq-poll-dup.c b/test/sq-poll-dup.c
index 6a72b82..3ba5b87 100644
--- a/test/sq-poll-dup.c
+++ b/test/sq-poll-dup.c

@@ -171,14 +171,21 @@
 	vecs = t_create_buffers(BUFFERS, BS);
 
 	fd = open(fname, O_RDONLY | O_DIRECT);
-	if (fname != argv[1])
-		unlink(fname);
-
 	if (fd < 0) {
+		int __e = errno;
+
+		if (fname != argv[1])
+			unlink(fname);
+
+		if (__e == EINVAL || __e == EPERM || __e == EACCES)
+			return T_EXIT_SKIP;
 		perror("open");
 		return -1;
 	}
 
+	if (fname != argv[1])
+		unlink(fname);
+
 	ret = test(fd, 0, 0);
 	if (ret) {
 		fprintf(stderr, "test 0 0 failed\n");

diff --git a/test/sq-poll-kthread.c b/test/sq-poll-kthread.c
index 3f4a07b..bf5122a 100644
--- a/test/sq-poll-kthread.c
+++ b/test/sq-poll-kthread.c

@@ -117,7 +117,7 @@
 	return ret;
 }
 
-int test_sq_poll_kthread_stopped(bool do_exit)
+static int test_sq_poll_kthread_stopped(bool do_exit)
 {
 	pid_t pid;
 	int status = 0;

diff --git a/test/sq-poll-share.c b/test/sq-poll-share.c
index 7bb7626..08b087e 100644
--- a/test/sq-poll-share.c
+++ b/test/sq-poll-share.c

@@ -89,13 +89,16 @@
 	vecs = t_create_buffers(BUFFERS, BS);
 
 	fd = open(fname, O_RDONLY | O_DIRECT);
-	if (fname != argv[1])
-		unlink(fname);
 	if (fd < 0) {
+		if (errno == EPERM || errno == EACCES || errno == EINVAL)
+			return T_EXIT_SKIP;
 		perror("open");
 		return -1;
 	}
 
+	if (fname != argv[1])
+		unlink(fname);
+
 	for (i = 0; i < NR_RINGS; i++) {
 		struct io_uring_params p = { };
 

diff --git a/test/sqpoll-cancel-hang.c b/test/sqpoll-cancel-hang.c
deleted file mode 100644
index ef62272..0000000
--- a/test/sqpoll-cancel-hang.c
+++ /dev/null

@@ -1,157 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-#include <fcntl.h>
-#include <signal.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/mman.h>
-#include <sys/wait.h>
-#include <time.h>
-#include <unistd.h>
-#include "liburing.h"
-#include "../src/syscall.h"
-
-static uint64_t current_time_ms(void)
-{
-    struct timespec ts;
-    if (clock_gettime(CLOCK_MONOTONIC, &ts))
-        exit(1);
-    return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000;
-}
-
-#define SIZEOF_IO_URING_SQE 64
-#define SIZEOF_IO_URING_CQE 16
-#define SQ_TAIL_OFFSET 64
-#define SQ_RING_MASK_OFFSET 256
-#define SQ_RING_ENTRIES_OFFSET 264
-#define CQ_RING_ENTRIES_OFFSET 268
-#define CQ_CQES_OFFSET 320
-
-#define IORING_OFF_SQES 0x10000000ULL
-
-static void kill_and_wait(int pid, int* status)
-{
-    kill(-pid, SIGKILL);
-    kill(pid, SIGKILL);
-    while (waitpid(-1, status, __WALL) != pid) {
-    }
-}
-
-#define WAIT_FLAGS __WALL
-
-uint64_t r[3] = {0xffffffffffffffff, 0x0, 0x0};
-
-static long syz_io_uring_setup(volatile long a0, volatile long a1,
-volatile long a2, volatile long a3, volatile long a4, volatile long
-a5)
-{
-    uint32_t entries = (uint32_t)a0;
-    struct io_uring_params* setup_params = (struct io_uring_params*)a1;
-    void* vma1 = (void*)a2;
-    void* vma2 = (void*)a3;
-    void** ring_ptr_out = (void**)a4;
-    void** sqes_ptr_out = (void**)a5;
-    uint32_t fd_io_uring = __sys_io_uring_setup(entries, setup_params);
-    uint32_t sq_ring_sz = setup_params->sq_off.array +
-setup_params->sq_entries * sizeof(uint32_t);
-    uint32_t cq_ring_sz = setup_params->cq_off.cqes +
-setup_params->cq_entries * SIZEOF_IO_URING_CQE;
-    uint32_t ring_sz = sq_ring_sz > cq_ring_sz ? sq_ring_sz : cq_ring_sz;
-    *ring_ptr_out = mmap(vma1, ring_sz, PROT_READ | PROT_WRITE,
-MAP_SHARED | MAP_POPULATE | MAP_FIXED, fd_io_uring,
-IORING_OFF_SQ_RING);
-    uint32_t sqes_sz = setup_params->sq_entries * SIZEOF_IO_URING_SQE;
-    *sqes_ptr_out = mmap(vma2, sqes_sz, PROT_READ | PROT_WRITE,
-MAP_SHARED | MAP_POPULATE | MAP_FIXED, fd_io_uring, IORING_OFF_SQES);
-    return fd_io_uring;
-}
-
-static long syz_io_uring_submit(volatile long a0, volatile long a1,
-volatile long a2, volatile long a3)
-{
-    char* ring_ptr = (char*)a0;
-    char* sqes_ptr = (char*)a1;
-    char* sqe = (char*)a2;
-    uint32_t sqes_index = (uint32_t)a3;
-    uint32_t sq_ring_entries = *(uint32_t*)(ring_ptr + SQ_RING_ENTRIES_OFFSET);
-    uint32_t cq_ring_entries = *(uint32_t*)(ring_ptr + CQ_RING_ENTRIES_OFFSET);
-    uint32_t sq_array_off = (CQ_CQES_OFFSET + cq_ring_entries *
-SIZEOF_IO_URING_CQE + 63) & ~63;
-    if (sq_ring_entries)
-        sqes_index %= sq_ring_entries;
-    char* sqe_dest = sqes_ptr + sqes_index * SIZEOF_IO_URING_SQE;
-    memcpy(sqe_dest, sqe, SIZEOF_IO_URING_SQE);
-    uint32_t sq_ring_mask = *(uint32_t*)(ring_ptr + SQ_RING_MASK_OFFSET);
-    uint32_t* sq_tail_ptr = (uint32_t*)(ring_ptr + SQ_TAIL_OFFSET);
-    uint32_t sq_tail = *sq_tail_ptr & sq_ring_mask;
-    uint32_t sq_tail_next = *sq_tail_ptr + 1;
-    uint32_t* sq_array = (uint32_t*)(ring_ptr + sq_array_off);
-    *(sq_array + sq_tail) = sqes_index;
-    __atomic_store_n(sq_tail_ptr, sq_tail_next, __ATOMIC_RELEASE);
-    return 0;
-}
-
-
-void trigger_bug(void)
-{
-    intptr_t res = 0;
-    *(uint32_t*)0x20000204 = 0;
-    *(uint32_t*)0x20000208 = 2;
-    *(uint32_t*)0x2000020c = 0;
-    *(uint32_t*)0x20000210 = 0;
-    *(uint32_t*)0x20000218 = -1;
-    memset((void*)0x2000021c, 0, 12);
-    res = -1;
-    res = syz_io_uring_setup(0x7987, 0x20000200, 0x20400000, 0x20ffd000, 0x200000c0, 0x200001c0);
-    if (res != -1) {
-        r[0] = res;
-        r[1] = *(uint64_t*)0x200000c0;
-        r[2] = *(uint64_t*)0x200001c0;
-    }
-    *(uint8_t*)0x20000180 = 0xb;
-    *(uint8_t*)0x20000181 = 1;
-    *(uint16_t*)0x20000182 = 0;
-    *(uint32_t*)0x20000184 = 0;
-    *(uint64_t*)0x20000188 = 4;
-    *(uint64_t*)0x20000190 = 0x20000140;
-    *(uint64_t*)0x20000140 = 0x77359400;
-    *(uint64_t*)0x20000148 = 0;
-    *(uint32_t*)0x20000198 = 1;
-    *(uint32_t*)0x2000019c = 0;
-    *(uint64_t*)0x200001a0 = 0;
-    *(uint16_t*)0x200001a8 = 0;
-    *(uint16_t*)0x200001aa = 0;
-    memset((void*)0x200001ac, 0, 20);
-    syz_io_uring_submit(r[1], r[2], 0x20000180, 1);
-    *(uint32_t*)0x20000544 = 0;
-    *(uint32_t*)0x20000548 = 0x36;
-    *(uint32_t*)0x2000054c = 0;
-    *(uint32_t*)0x20000550 = 0;
-    *(uint32_t*)0x20000558 = r[0];
-    memset((void*)0x2000055c, 0, 12);
-
-}
-int main(void)
-{
-    mmap((void *)0x20000000ul, 0x1000000ul, 7ul, 0x32ul, -1, 0ul);
-    int pid = fork();
-    if (pid < 0)
-        exit(1);
-    if (pid == 0) {
-        trigger_bug();
-        exit(0);
-    }
-    int status = 0;
-    uint64_t start = current_time_ms();
-    for (;;) {
-        if (current_time_ms() - start < 1000) {
-            continue;
-        }
-        kill_and_wait(pid, &status);
-        break;
-    }
-    return 0;
-}
-
-
-

diff --git a/test/sqpoll-disable-exit.c b/test/sqpoll-disable-exit.c
index 76b6cf5..bc4c2fb 100644
--- a/test/sqpoll-disable-exit.c
+++ b/test/sqpoll-disable-exit.c

@@ -22,8 +22,10 @@
 #include <unistd.h>
 
 #include "liburing.h"
+#include "helpers.h"
 #include "../src/syscall.h"
 
+#ifndef CONFIG_USE_SANITIZER
 static void sleep_ms(uint64_t ms)
 {
   usleep(ms * 1000);
@@ -136,7 +138,7 @@
   }
 }
 
-static void setup_test()
+static void setup_test(void)
 {
   prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
   setpgrp();
@@ -188,9 +190,15 @@
 }
 int main(void)
 {
-  mmap((void *)0x1ffff000ul, 0x1000ul, 0ul, 0x32ul, -1, 0ul);
-  mmap((void *)0x20000000ul, 0x1000000ul, 7ul, 0x32ul, -1, 0ul);
-  mmap((void *)0x21000000ul, 0x1000ul, 0ul, 0x32ul, -1, 0ul);
+  mmap((void *)0x1ffff000ul, 0x1000ul, 0ul, MAP_ANON|MAP_PRIVATE, -1, 0ul);
+  mmap((void *)0x20000000ul, 0x1000000ul, 7ul, MAP_ANON|MAP_PRIVATE, -1, 0ul);
+  mmap((void *)0x21000000ul, 0x1000ul, 0ul, MAP_ANON|MAP_PRIVATE, -1, 0ul);
   loop();
   return 0;
 }
+#else
+int main(int argc, char *argv[])
+{
+	return T_EXIT_SKIP;
+}
+#endif

diff --git a/test/sqpoll-exec.c b/test/sqpoll-exec.c
new file mode 100644
index 0000000..174ac07
--- /dev/null
+++ b/test/sqpoll-exec.c

@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: Check that closing a file with SQPOLL has it immediately closed
+ *		upon receiving the CQE for the close. The 6.9 kernel had a bug
+ *		where SQPOLL would not run kernel wide task_work when running the
+ *		private task_work, which would defer the close if this was the
+ *		final close of the file.
+ */
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "helpers.h"
+#include "liburing.h"
+
+static int fill_exec_target(char *dst, char *path)
+{
+	struct stat sb;
+
+	/*
+	 * Should either be ./exec-target.t or test/exec-target.t
+	 */
+	sprintf(dst, "%s", path);
+	return stat(dst, &sb);
+}
+
+static int test_exec(struct io_uring *ring, char * const argv[])
+{
+	char prog_path[PATH_MAX];
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	int ret, wstatus, fd;
+	pid_t p;
+
+	if (fill_exec_target(prog_path, "./exec-target.t") &&
+	    fill_exec_target(prog_path, "test/exec-target.t")) {
+		fprintf(stdout, "Can't find exec-target, skipping\n");
+		return 0;
+	}
+
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_openat(sqe, AT_FDCWD, prog_path, O_WRONLY, 0);
+	sqe->user_data = 0;
+
+	io_uring_submit(ring);
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret) {
+		fprintf(stderr, "wait cqe %d\n", ret);
+		return 1;
+	}
+	if (cqe->res < 0) {
+		fprintf(stderr, "open: %d\n", cqe->res);
+		return 1;
+	}
+	fd = cqe->res;
+	io_uring_cqe_seen(ring, cqe);
+
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_close(sqe, fd);
+	sqe->user_data = 1;
+
+	io_uring_submit(ring);
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret) {
+		fprintf(stderr, "wait cqe %d\n", ret);
+		return 1;
+	}
+	if (cqe->res < 0) {
+		fprintf(stderr, "close: %d\n", cqe->res);
+		return 1;
+	}
+	io_uring_cqe_seen(ring, cqe);
+
+	p = fork();
+	if (p == -1) {
+		fprintf(stderr, "fork() failed\n");
+		return 1;
+	}
+
+	if (p == 0) {
+		/* file should be closed, try exec'ing it */
+		ret = execve(prog_path, argv, NULL);
+		if (ret) {
+			fprintf(stderr, "exec failed: %s\n", strerror(errno));
+			exit(1);
+		}
+	}
+
+	if (waitpid(p, &wstatus, 0) == (pid_t)-1) {
+		perror("waitpid()");
+		return 1;
+	}
+	if (!WIFEXITED(wstatus) || WEXITSTATUS(wstatus))
+		return 1;
+
+	return 0;
+}
+
+int main(int argc, char * const argv[])
+{
+	struct io_uring_params p = { .flags = IORING_SETUP_SQPOLL, };
+	struct io_uring ring;
+	int ret, i;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	ret = t_create_ring_params(8, &ring, &p);
+	if (ret == T_SETUP_SKIP)
+		return T_EXIT_SKIP;
+	else if (ret != T_SETUP_OK)
+		return T_EXIT_FAIL;
+
+	for (i = 0; i < 20; i++) {
+		ret = test_exec(&ring, argv);
+		if (ret) {
+			fprintf(stderr, "test_exec failed\n");
+			return ret;
+		}
+	}
+
+	return T_EXIT_PASS;
+}

diff --git a/test/sqpoll-exit-hang.c b/test/sqpoll-exit-hang.c
index cde2115..ff1924a 100644
--- a/test/sqpoll-exit-hang.c
+++ b/test/sqpoll-exit-hang.c

@@ -10,31 +10,7 @@
 #include <sys/time.h>
 #include <poll.h>
 #include "liburing.h"
-
-static unsigned long long mtime_since(const struct timeval *s,
-				      const struct timeval *e)
-{
-	long long sec, usec;
-
-	sec = e->tv_sec - s->tv_sec;
-	usec = (e->tv_usec - s->tv_usec);
-	if (sec > 0 && usec < 0) {
-		sec--;
-		usec += 1000000;
-	}
-
-	sec *= 1000;
-	usec /= 1000;
-	return sec + usec;
-}
-
-static unsigned long long mtime_since_now(struct timeval *tv)
-{
-	struct timeval end;
-
-	gettimeofday(&end, NULL);
-	return mtime_since(tv, &end);
-}
+#include "helpers.h"
 
 int main(int argc, char *argv[])
 {

diff --git a/test/sqpoll-sleep.c b/test/sqpoll-sleep.c
index 9d1cff6..5cf4210 100644
--- a/test/sqpoll-sleep.c
+++ b/test/sqpoll-sleep.c

@@ -9,31 +9,7 @@
 #include <unistd.h>
 #include <sys/time.h>
 #include "liburing.h"
-
-static unsigned long long mtime_since(const struct timeval *s,
-				      const struct timeval *e)
-{
-	long long sec, usec;
-
-	sec = e->tv_sec - s->tv_sec;
-	usec = (e->tv_usec - s->tv_usec);
-	if (sec > 0 && usec < 0) {
-		sec--;
-		usec += 1000000;
-	}
-
-	sec *= 1000;
-	usec /= 1000;
-	return sec + usec;
-}
-
-static unsigned long long mtime_since_now(struct timeval *tv)
-{
-	struct timeval end;
-
-	gettimeofday(&end, NULL);
-	return mtime_since(tv, &end);
-}
+#include "helpers.h"
 
 int main(int argc, char *argv[])
 {

diff --git a/test/sqwait.c b/test/sqwait.c
new file mode 100644
index 0000000..ba77bf3
--- /dev/null
+++ b/test/sqwait.c

@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: test that the app can always get a new sqe after having
+ *		called io_uring_sqring_wait().
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+#define NR_IOS	10000
+#define INFLIGHT	256
+#define FILE_SIZE	(256 * 1024 * 1024)
+
+static int inflight;
+
+static int reap(struct io_uring *ring)
+{
+	struct io_uring_cqe *cqe;
+	int ret;
+
+	while (inflight >= INFLIGHT / 2) {
+		ret = io_uring_wait_cqe(ring, &cqe);
+		if (ret < 0) {
+			fprintf(stderr, "wait=%d\n", ret);
+			return 1;
+		}
+		if (cqe->res < 0) {
+			printf("cqe res %d\n", cqe->res);
+			return 1;
+		}
+		io_uring_cqe_seen(ring, cqe);
+		inflight--;
+	}
+
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring ring;
+	int fd = -1, i, iov_off, ret, fret;
+	struct iovec iovs[INFLIGHT];
+	const char *fname;
+	char buf[256];
+	loff_t off;
+
+	if (argc > 1) {
+		fname = argv[1];
+	} else {
+		srand((unsigned)time(NULL));
+		snprintf(buf, sizeof(buf), ".sqwait-%u-%u", (unsigned)rand(),
+			 (unsigned)getpid());
+		fname = buf;
+		t_create_file(fname, FILE_SIZE);
+	}
+
+	fret = T_EXIT_SKIP;
+	ret = io_uring_queue_init(8, &ring, IORING_SETUP_SQPOLL);
+	if (ret < 0) {
+		if (errno == EINVAL || errno == EPERM)
+			goto err;
+		fprintf(stderr, "queue init %d\n", ret);
+		fret = T_EXIT_FAIL;
+		goto err;
+	}
+
+	fd = open(fname, O_RDONLY | O_DIRECT);
+	if (fd < 0) {
+		if (errno == EACCES || errno == EPERM || errno == EINVAL)
+			return T_EXIT_SKIP;
+		perror("open");
+		fret = T_EXIT_FAIL;
+		goto err;
+	}
+
+	for (i = 0; i < INFLIGHT; i++) {
+		if (posix_memalign(&iovs[i].iov_base, 4096, 4096))
+			goto err;
+		iovs[i].iov_len = 4096;
+	}
+
+	iov_off = off = 0;
+	for (i = 0; i < NR_IOS; i++) {
+		struct iovec *iov = &iovs[iov_off];
+
+		sqe = io_uring_get_sqe(&ring);
+		if (!sqe) {
+			ret = io_uring_sqring_wait(&ring);
+			if (ret < 0) {
+				if (ret == -EINVAL)
+					return T_EXIT_SKIP;
+				fprintf(stderr, "sqwait=%d\n", ret);
+				fret = T_EXIT_FAIL;
+				goto err;
+			}
+			sqe = io_uring_get_sqe(&ring);
+			if (!sqe) {
+				fprintf(stderr, "No sqe post wait\n");
+				fret = T_EXIT_FAIL;
+				goto err;
+			}
+		}
+		io_uring_prep_read(sqe, fd, iov->iov_base, iov->iov_len, 0);
+		io_uring_submit(&ring);
+		inflight++;
+
+		iov_off++;
+		if (iov_off == INFLIGHT)
+			iov_off = 0;
+		off += 8192;
+		if (off > FILE_SIZE - 8192)
+			off = 0;
+		if (reap(&ring)) {
+			fret = T_EXIT_FAIL;
+			goto err;
+		}
+	}
+
+	if (fd != -1)
+		close(fd);
+	if (fname != argv[1])
+		unlink(fname);
+	io_uring_queue_exit(&ring);
+	return T_EXIT_PASS;
+err:
+	if (fd != -1)
+		close(fd);
+	if (fname != argv[1])
+		unlink(fname);
+	return fret;
+}

diff --git a/test/statx.c b/test/statx.c
index 5fa086e..b26e140 100644
--- a/test/statx.c
+++ b/test/statx.c

@@ -36,11 +36,86 @@
 	return errno == ENOSYS ? 0 : -1;
 }
 
+static int test_statx_invalid_buf(struct io_uring *ring, const char *path)
+{
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	struct statx *x = (struct statx *) (uintptr_t) 0x1234;
+	int ret;
+
+	sqe = io_uring_get_sqe(ring);
+	if (!sqe) {
+		fprintf(stderr, "get sqe failed\n");
+		goto err;
+	}
+	io_uring_prep_statx(sqe, -1, path, 0, STATX_ALL, x);
+
+	ret = io_uring_submit(ring);
+	if (ret <= 0) {
+		fprintf(stderr, "sqe submit failed: %d\n", ret);
+		goto err;
+	}
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret < 0) {
+		fprintf(stderr, "wait completion %d\n", ret);
+		goto err;
+	}
+	ret = cqe->res;
+	io_uring_cqe_seen(ring, cqe);
+	if (ret != -EFAULT) {
+		fprintf(stderr, "Invalid address didn't fail\n");
+		goto err;
+	}
+
+	return 0;
+err:
+	return -1;
+}
+
+static int test_statx_invalid_path(struct io_uring *ring)
+{
+	const char *path = (const char *) (uintptr_t) 0x1234;
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	struct statx x1 = { };
+	int ret;
+
+	sqe = io_uring_get_sqe(ring);
+	if (!sqe) {
+		fprintf(stderr, "get sqe failed\n");
+		goto err;
+	}
+	io_uring_prep_statx(sqe, -1, path, 0, STATX_ALL, &x1);
+
+	ret = io_uring_submit(ring);
+	if (ret <= 0) {
+		fprintf(stderr, "sqe submit failed: %d\n", ret);
+		goto err;
+	}
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret < 0) {
+		fprintf(stderr, "wait completion %d\n", ret);
+		goto err;
+	}
+	ret = cqe->res;
+	io_uring_cqe_seen(ring, cqe);
+	if (ret != -EFAULT) {
+		fprintf(stderr, "Invalid address didn't fail\n");
+		goto err;
+	}
+
+	return 0;
+err:
+	return -1;
+}
+
 static int test_statx(struct io_uring *ring, const char *path)
 {
 	struct io_uring_cqe *cqe;
 	struct io_uring_sqe *sqe;
-	struct statx x1, x2;
+	struct statx x1 = { }, x2 = { };
 	int ret;
 
 	sqe = io_uring_get_sqe(ring);
@@ -86,6 +161,8 @@
 
 	fd = open(path, O_RDONLY);
 	if (fd < 0) {
+		if (errno == EPERM || errno == EACCES)
+			return 0;
 		perror("open");
 		return 1;
 	}
@@ -156,6 +233,18 @@
 		goto err;
 	}
 
+	ret = test_statx_invalid_path(&ring);
+	if (ret) {
+		fprintf(stderr, "test_statx_invalid_path failed: %d\n", ret);
+		goto err;
+	}
+
+	ret = test_statx_invalid_buf(&ring, fname);
+	if (ret) {
+		fprintf(stderr, "test_statx_invalid_buf failed: %d\n", ret);
+		goto err;
+	}
+
 	ret = test_statx_fd(&ring, fname);
 	if (ret) {
 		fprintf(stderr, "test_statx_fd failed: %d\n", ret);

diff --git a/test/stdout.c b/test/stdout.c
index ade100a..a699447 100644
--- a/test/stdout.c
+++ b/test/stdout.c

@@ -89,6 +89,7 @@
 		io_uring_cqe_seen(ring, cqe);
 	}
 	io_uring_unregister_buffers(ring);
+	free(vecs[0].iov_base);
 	return 0;
 err:
 	return 1;
@@ -143,6 +144,7 @@
 	}
 	io_uring_cqe_seen(ring, cqe);
 	io_uring_unregister_buffers(ring);
+	free(vecs.iov_base);
 	return 0;
 err:
 	return 1;

diff --git a/test/submit-and-wait.c b/test/submit-and-wait.c
new file mode 100644
index 0000000..249a372
--- /dev/null
+++ b/test/submit-and-wait.c

@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: Test that io_uring_submit_and_wait_timeout() returns the
+ * right value (submit count) and that it doesn't end up waiting twice.
+ *
+ */
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/time.h>
+
+#include "liburing.h"
+#include "helpers.h"
+#include "test.h"
+
+static int test(struct io_uring *ring)
+{
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	struct __kernel_timespec ts;
+	struct timeval tv;
+	int ret, i;
+
+	for (i = 0; i < 1; i++) {
+		sqe = io_uring_get_sqe(ring);
+		if (!sqe) {
+			fprintf(stderr, "get sqe failed at %d\n", i);
+			goto err;
+		}
+		io_uring_prep_nop(sqe);
+	}
+
+	ts.tv_sec = 1;
+	ts.tv_nsec = 0;
+	gettimeofday(&tv, NULL);
+	ret = io_uring_submit_and_wait_timeout(ring, &cqe, 2, &ts, NULL);
+	if (ret < 0) {
+		fprintf(stderr, "submit_and_wait_timeout: %d\n", ret);
+		goto err;
+	}
+	ret = mtime_since_now(&tv);
+	/* allow some slack, should be around 1s */
+	if (ret > 1200) {
+		fprintf(stderr, "wait took too long: %d\n", ret);
+		goto err;
+	}
+	return 0;
+err:
+	return 1;
+}
+
+static int test_ring(void)
+{
+	struct io_uring ring;
+	struct io_uring_params p = { };
+	int ret;
+
+	p.flags = 0;
+	ret = io_uring_queue_init_params(8, &ring, &p);
+	if (ret) {
+		fprintf(stderr, "ring setup failed: %d\n", ret);
+		return 1;
+	}
+
+	ret = test(&ring);
+	if (ret) {
+		fprintf(stderr, "test failed\n");
+		goto err;
+	}
+err:
+	io_uring_queue_exit(&ring);
+	return ret;
+}
+
+int main(int argc, char *argv[])
+{
+	if (argc > 1)
+		return 0;
+
+	return test_ring();
+}

diff --git a/test/submit-link-fail.c b/test/submit-link-fail.c
index 45f6976..e62f793 100644
--- a/test/submit-link-fail.c
+++ b/test/submit-link-fail.c

@@ -23,7 +23,7 @@
 	struct io_uring ring;
 	struct io_uring_sqe *sqe;
 	struct io_uring_cqe *cqe;
-	char buffer[1];
+	char buffer[1] = { };
 	int i, ret, fds[2];
 
 	if (drain)

diff --git a/test/submit-reuse.c b/test/submit-reuse.c
index d5ccdd4..44a02ab 100644
--- a/test/submit-reuse.c
+++ b/test/submit-reuse.c

@@ -102,31 +102,6 @@
 	return 0;
 }
 
-static unsigned long long mtime_since(const struct timeval *s,
-				      const struct timeval *e)
-{
-	long long sec, usec;
-
-	sec = e->tv_sec - s->tv_sec;
-	usec = (e->tv_usec - s->tv_usec);
-	if (sec > 0 && usec < 0) {
-		sec--;
-		usec += 1000000;
-	}
-
-	sec *= 1000;
-	usec /= 1000;
-	return sec + usec;
-}
-
-static unsigned long long mtime_since_now(struct timeval *tv)
-{
-	struct timeval end;
-
-	gettimeofday(&end, NULL);
-	return mtime_since(tv, &end);
-}
-
 static int test_reuse(int argc, char *argv[], int split, int async)
 {
 	struct thread_data data;
@@ -162,6 +137,8 @@
 	if (do_unlink)
 		unlink(fname1);
 	if (fd1 < 0) {
+		if (errno == EPERM || errno == EACCES)
+			return T_EXIT_SKIP;
 		perror("open fname1");
 		goto err;
 	}
@@ -211,7 +188,6 @@
 err:
 	io_uring_queue_exit(&ring);
 	return 1;
-
 }
 
 int main(int argc, char *argv[])
@@ -225,6 +201,8 @@
 		async = (i & 2) != 0;
 
 		ret = test_reuse(argc, argv, split, async);
+		if (ret == T_EXIT_SKIP)
+			continue;
 		if (ret) {
 			fprintf(stderr, "test_reuse %d %d failed\n", split, async);
 			return ret;

diff --git a/test/symlink.c b/test/symlink.c
index cf4aa96..a9d380b 100644
--- a/test/symlink.c
+++ b/test/symlink.c

@@ -11,7 +11,6 @@
 
 #include "liburing.h"
 
-
 static int do_symlinkat(struct io_uring *ring, const char *oldname, const char *newname)
 {
 	int ret;
@@ -43,7 +42,8 @@
 	return 1;
 }
 
-int test_link_contents(const char* linkname, const char *expected_contents)
+static int test_link_contents(const char* linkname,
+			      const char *expected_contents)
 {
 	char buf[128];
 	int ret = readlink(linkname, buf, 127);
@@ -104,6 +104,18 @@
 		goto err1;
 	}
 
+	ret = do_symlinkat(&ring, target, (const char *) (uintptr_t) 0x1234);
+	if (ret != -EFAULT) {
+		fprintf(stderr, "test_symlinkat bad target failed: %d\n", ret);
+		goto err1;
+	}
+
+	ret = do_symlinkat(&ring, (const char *) (uintptr_t) 0x1234, target);
+	if (ret != -EFAULT) {
+		fprintf(stderr, "test_symlinkat bad source failed: %d\n", ret);
+		goto err1;
+	}
+
 out:
 	unlinkat(AT_FDCWD, linkname, 0);
 	io_uring_queue_exit(&ring);

diff --git a/test/sync-cancel.c b/test/sync-cancel.c
new file mode 100644
index 0000000..eb67054
--- /dev/null
+++ b/test/sync-cancel.c

@@ -0,0 +1,269 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: test io_uring_register_sync_cancel()
+ *
+ */
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+static int no_sync_cancel, no_sync_cancel_op;
+
+static int test_sync_cancel_timeout(struct io_uring *ring, int async, int by_op)
+{
+	struct io_uring_sync_cancel_reg reg = { };
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	int ret, fds[2], to_prep;
+	char buf[32];
+
+	if (pipe(fds) < 0) {
+		perror("pipe");
+		return 1;
+	}
+
+	to_prep = 1;
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_read(sqe, fds[0], buf, sizeof(buf), 0);
+	sqe->user_data = 0x89;
+	if (async)
+		sqe->flags |= IOSQE_ASYNC;
+
+	ret = io_uring_submit(ring);
+	if (ret != to_prep) {
+		fprintf(stderr, "submit=%d\n", ret);
+		return 1;
+	}
+
+	usleep(10000);
+
+	reg.flags = IORING_ASYNC_CANCEL_OP;
+	reg.opcode = IORING_OP_READ;
+	reg.timeout.tv_nsec = 1;
+	ret = io_uring_register_sync_cancel(ring, &reg);
+	/* earlier kernels had sync cancel, but not per-op */
+	if (ret == -EINVAL) {
+		no_sync_cancel_op = 1;
+		return 0;
+	}
+	if (async) {
+		/* we expect -ETIME here, but can race and get 0 */
+		if (ret != -ETIME && ret != 0) {
+			fprintf(stderr, "sync_cancel=%d\n", ret);
+			return 1;
+		}
+	} else {
+		if (ret < 0) {
+			fprintf(stderr, "sync_cancel=%d\n", ret);
+			return 1;
+		}
+	}
+
+	/*
+	 * we could _almost_ use peek_cqe() here, but there is still
+	 * a small gap where io-wq is done with the request and on
+	 * its way to posting a completion, but hasn't done it just
+	 * yet. the request is canceled and won't be doing any IO
+	 * to buffers etc, but the cqe may not have quite arrived yet.
+	 */
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret) {
+		fprintf(stderr, "peek=%d\n", ret);
+		return 1;
+	}
+	if (cqe->res >= 0) {
+		fprintf(stderr, "cqe->res=%d\n", cqe->res);
+		return 1;
+	}
+	io_uring_cqe_seen(ring, cqe);
+	return 0;
+}
+
+static int test_sync_cancel(struct io_uring *ring, int async, int nr_all,
+			    int use_fd, int by_op)
+{
+	struct io_uring_sync_cancel_reg reg = { };
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	int ret, fds[2], to_prep, i;
+	char buf[32];
+
+	if (pipe(fds) < 0) {
+		perror("pipe");
+		return 1;
+	}
+
+	to_prep = 1;
+	if (nr_all)
+		to_prep = 4;
+	for (i = 0; i < to_prep; i++) {
+		sqe = io_uring_get_sqe(ring);
+		io_uring_prep_read(sqe, fds[0], buf, sizeof(buf), 0);
+		sqe->user_data = 0x89;
+		if (async)
+			sqe->flags |= IOSQE_ASYNC;
+	}
+
+	ret = io_uring_submit(ring);
+	if (ret != to_prep) {
+		fprintf(stderr, "submit=%d\n", ret);
+		return 1;
+	}
+
+	usleep(10000);
+
+	if (!use_fd)
+		reg.addr = 0x89;
+	else
+		reg.fd = fds[0];
+	reg.timeout.tv_sec = 200;
+	if (nr_all)
+		reg.flags |= IORING_ASYNC_CANCEL_ALL;
+	if (use_fd)
+		reg.flags |= IORING_ASYNC_CANCEL_FD;
+	ret = io_uring_register_sync_cancel(ring, &reg);
+	if (ret < 0) {
+		if (ret == -EINVAL && !no_sync_cancel) {
+			no_sync_cancel = 1;
+			return 0;
+		}
+		fprintf(stderr, "sync_cancel=%d\n", ret);
+		return 1;
+	}
+
+	for (i = 0; i < to_prep; i++) {
+		/*
+		 * we could _almost_ use peek_cqe() here, but there is still
+		 * a small gap where io-wq is done with the request and on
+		 * its way to posting a completion, but hasn't done it just
+		 * yet. the request is canceled and won't be doing any IO
+		 * to buffers etc, but the cqe may not have quite arrived yet.
+		 */
+		ret = io_uring_wait_cqe(ring, &cqe);
+		if (ret) {
+			fprintf(stderr, "peek=%d\n", ret);
+			return 1;
+		}
+		if (cqe->res >= 0) {
+			fprintf(stderr, "cqe->res=%d\n", cqe->res);
+			return 1;
+		}
+		io_uring_cqe_seen(ring, cqe);
+	}
+
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	struct io_uring ring;
+	int ret;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	ret = t_create_ring(7, &ring, 0);
+	if (ret == T_SETUP_SKIP)
+		return T_EXIT_SKIP;
+	else if (ret != T_SETUP_OK)
+		return ret;
+
+	ret = test_sync_cancel(&ring, 0, 0, 0, 0);
+	if (ret) {
+		fprintf(stderr, "test_sync_cancel 0 0 0 failed\n");
+		return T_EXIT_FAIL;
+	}
+	if (no_sync_cancel)
+		return T_EXIT_SKIP;
+
+	ret = test_sync_cancel(&ring, 0, 0, 0, 1);
+	if (ret) {
+		fprintf(stderr, "test_sync_cancel 0 0 1 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_sync_cancel(&ring, 1, 0, 0, 0);
+	if (ret) {
+		fprintf(stderr, "test_sync_cancel 1 0 0 0 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_sync_cancel(&ring, 1, 0, 0, 1);
+	if (ret) {
+		fprintf(stderr, "test_sync_cancel 1 0 0 1 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+
+	ret = test_sync_cancel(&ring, 0, 1, 0, 0);
+	if (ret) {
+		fprintf(stderr, "test_sync_cancel 0 1 0 0 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_sync_cancel(&ring, 0, 1, 0, 1);
+	if (ret) {
+		fprintf(stderr, "test_sync_cancel 0 1 0 1 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+
+	ret = test_sync_cancel(&ring, 1, 1, 0, 0);
+	if (ret) {
+		fprintf(stderr, "test_sync_cancel 1 1 0 0 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_sync_cancel(&ring, 0, 0, 1, 0);
+	if (ret) {
+		fprintf(stderr, "test_sync_cancel 0 0 1 0 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_sync_cancel(&ring, 1, 0, 1, 0);
+	if (ret) {
+		fprintf(stderr, "test_sync_cancel 1 0 1 0 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_sync_cancel(&ring, 0, 1, 1, 0);
+	if (ret) {
+		fprintf(stderr, "test_sync_cancel 0 1 1 0 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_sync_cancel(&ring, 1, 1, 1, 0);
+	if (ret) {
+		fprintf(stderr, "test_sync_cancel 1 1 1 0 failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_sync_cancel_timeout(&ring, 0, 0);
+	if (ret) {
+		fprintf(stderr, "test_sync_cancel_timeout 0 0\n");
+		return T_EXIT_FAIL;
+	}
+	if (no_sync_cancel_op)
+		return T_EXIT_PASS;
+
+	ret = test_sync_cancel_timeout(&ring, 0, 1);
+	if (ret) {
+		fprintf(stderr, "test_sync_cancel_timeout 0 1\n");
+		return T_EXIT_FAIL;
+	}
+
+	/* must be last, leaves request */
+	ret = test_sync_cancel_timeout(&ring, 1, 0);
+	if (ret) {
+		fprintf(stderr, "test_sync_cancel_timeout 1\n");
+		return T_EXIT_FAIL;
+	}
+
+	return T_EXIT_PASS;
+}

diff --git a/test/test.h b/test/test.h
index 3628163..e99a8d2 100644
--- a/test/test.h
+++ b/test/test.h

@@ -14,7 +14,8 @@
 	const char *description;
 } io_uring_test_config;
 
-io_uring_test_config io_uring_test_configs[] = {
+__attribute__((__unused__))
+static io_uring_test_config io_uring_test_configs[] = {
 	{ 0, 						"default" },
 	{ IORING_SETUP_SQE128, 				"large SQE"},
 	{ IORING_SETUP_CQE32, 				"large CQE"},

diff --git a/test/thread-exit.c b/test/thread-exit.c
index 3e20431..f94d989 100644
--- a/test/thread-exit.c
+++ b/test/thread-exit.c

@@ -102,6 +102,8 @@
 	if (do_unlink)
 		unlink(fname);
 	if (fd < 0) {
+		if (errno == EPERM || errno == EACCES)
+			goto skip;
 		perror("open");
 		return 1;
 	}
@@ -140,4 +142,7 @@
 err:
 	free_g_buf();
 	return 1;
+skip:
+	free_g_buf();
+	return T_EXIT_SKIP;
 }

diff --git a/test/timeout-new.c b/test/timeout-new.c
index 6efcfb4..21d140d 100644
--- a/test/timeout-new.c
+++ b/test/timeout-new.c

@@ -8,13 +8,14 @@
 #include <unistd.h>
 #include <pthread.h>
 #include "liburing.h"
+#include "helpers.h"
 
 #define TIMEOUT_MSEC	200
 #define TIMEOUT_SEC	10
 
-int thread_ret0, thread_ret1;
-int cnt = 0;
-pthread_mutex_t mutex;
+static int thread_ret0, thread_ret1;
+static int cnt = 0;
+static pthread_mutex_t mutex;
 
 static void msec_to_ts(struct __kernel_timespec *ts, unsigned int msec)
 {
@@ -22,32 +23,6 @@
 	ts->tv_nsec = (msec % 1000) * 1000000;
 }
 
-static unsigned long long mtime_since(const struct timeval *s,
-				      const struct timeval *e)
-{
-	long long sec, usec;
-
-	sec = e->tv_sec - s->tv_sec;
-	usec = (e->tv_usec - s->tv_usec);
-	if (sec > 0 && usec < 0) {
-		sec--;
-		usec += 1000000;
-	}
-
-	sec *= 1000;
-	usec /= 1000;
-	return sec + usec;
-}
-
-static unsigned long long mtime_since_now(struct timeval *tv)
-{
-	struct timeval end;
-
-	gettimeofday(&end, NULL);
-	return mtime_since(tv, &end);
-}
-
-
 static int test_return_before_timeout(struct io_uring *ring)
 {
 	struct io_uring_cqe *cqe;
@@ -111,7 +86,8 @@
 	return 0;
 }
 
-int __reap_thread_fn(void *data) {
+static int __reap_thread_fn(void *data)
+{
 	struct io_uring *ring = (struct io_uring *)data;
 	struct io_uring_cqe *cqe;
 	struct __kernel_timespec ts;
@@ -123,12 +99,14 @@
 	return io_uring_wait_cqe_timeout(ring, &cqe, &ts);
 }
 
-void *reap_thread_fn0(void *data) {
+static void *reap_thread_fn0(void *data)
+{
 	thread_ret0 = __reap_thread_fn(data);
 	return NULL;
 }
 
-void *reap_thread_fn1(void *data) {
+static void *reap_thread_fn1(void *data)
+{
 	thread_ret1 = __reap_thread_fn(data);
 	return NULL;
 }
@@ -137,7 +115,8 @@
  * This is to test issuing a sqe in main thread and reaping it in two child-thread
  * at the same time. To see if timeout feature works or not.
  */
-int test_multi_threads_timeout() {
+static int test_multi_threads_timeout(void)
+{
 	struct io_uring ring;
 	int ret;
 	bool both_wait = false;

diff --git a/test/timeout-overflow.c b/test/timeout-overflow.c
deleted file mode 100644
index 671f171..0000000
--- a/test/timeout-overflow.c
+++ /dev/null

@@ -1,204 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Description: run timeout overflow test
- *
- */
-#include <errno.h>
-#include <stdio.h>
-#include <limits.h>
-#include <string.h>
-#include <sys/time.h>
-
-#include "liburing.h"
-
-#define TIMEOUT_MSEC	200
-static int not_supported;
-
-static void msec_to_ts(struct __kernel_timespec *ts, unsigned int msec)
-{
-	ts->tv_sec = msec / 1000;
-	ts->tv_nsec = (msec % 1000) * 1000000;
-}
-
-static int check_timeout_support(void)
-{
-	struct io_uring_sqe *sqe;
-	struct io_uring_cqe *cqe;
-	struct __kernel_timespec ts;
-	struct io_uring_params p;
-	struct io_uring ring;
-	int ret;
-
-	memset(&p, 0, sizeof(p));
-	ret = io_uring_queue_init_params(1, &ring, &p);
-	if (ret) {
-		fprintf(stderr, "ring setup failed: %d\n", ret);
-		return 1;
-	}
-
-	/* not really a match, but same kernel added batched completions */
-	if (p.features & IORING_FEAT_POLL_32BITS) {
-		fprintf(stdout, "Skipping\n");
-		not_supported = 1;
-		return 0;
-	}
-
-	sqe = io_uring_get_sqe(&ring);
-	msec_to_ts(&ts, TIMEOUT_MSEC);
-	io_uring_prep_timeout(sqe, &ts, 1, 0);
-
-	ret = io_uring_submit(&ring);
-	if (ret < 0) {
-		fprintf(stderr, "sqe submit failed: %d\n", ret);
-		goto err;
-	}
-
-	ret = io_uring_wait_cqe(&ring, &cqe);
-	if (ret < 0) {
-		fprintf(stderr, "wait completion %d\n", ret);
-		goto err;
-	}
-
-	if (cqe->res == -EINVAL) {
-		not_supported = 1;
-		fprintf(stdout, "Timeout not supported, ignored\n");
-		return 0;
-	}
-
-	io_uring_cqe_seen(&ring, cqe);
-	io_uring_queue_exit(&ring);
-	return 0;
-err:
-	io_uring_queue_exit(&ring);
-	return 1;
-}
-
-/*
- * We first setup 4 timeout requests, which require a count value of 1, 1, 2,
- * UINT_MAX, so the sequence is 1, 2, 4, 2. Before really timeout, this 4
- * requests will not lead the change of cq_cached_tail, so as sq_dropped.
- *
- * And before this patch. The order of this four requests will be req1->req2->
- * req4->req3. Actually, it should be req1->req2->req3->req4.
- *
- * Then, if there is 2 nop req. All timeout requests expect req4 will completed
- * successful after the patch. And req1/req2 will completed successful with
- * req3/req4 return -ETIME without this patch!
- */
-static int test_timeout_overflow(void)
-{
-	struct io_uring_sqe *sqe;
-	struct io_uring_cqe *cqe;
-	struct __kernel_timespec ts;
-	struct io_uring ring;
-	int i, ret;
-
-	ret = io_uring_queue_init(16, &ring, 0);
-	if (ret) {
-		fprintf(stderr, "ring setup failed: %d\n", ret);
-		return 1;
-	}
-
-	msec_to_ts(&ts, TIMEOUT_MSEC);
-	for (i = 0; i < 4; i++) {
-		unsigned num = 0;
-		sqe = io_uring_get_sqe(&ring);
-		switch (i) {
-		case 0:
-		case 1:
-			num = 1;
-			break;
-		case 2:
-			num = 2;
-			break;
-		case 3:
-			num = UINT_MAX;
-			break;
-		}
-		io_uring_prep_timeout(sqe, &ts, num, 0);
-	}
-
-	for (i = 0; i < 2; i++) {
-		sqe = io_uring_get_sqe(&ring);
-		io_uring_prep_nop(sqe);
-		io_uring_sqe_set_data(sqe, (void *) 1);
-	}
-	ret = io_uring_submit(&ring);
-	if (ret < 0) {
-		fprintf(stderr, "sqe submit failed: %d\n", ret);
-		goto err;
-	}
-
-	i = 0;
-	while (i < 6) {
-		ret = io_uring_wait_cqe(&ring, &cqe);
-		if (ret < 0) {
-			fprintf(stderr, "wait completion %d\n", ret);
-			goto err;
-		}
-
-		/*
-		 * cqe1: first nop req
-		 * cqe2: first timeout req, because of cqe1
-		 * cqe3: second timeout req because of cqe1 + cqe2
-		 * cqe4: second nop req
-		 * cqe5~cqe6: the left three timeout req
-		 */
-		switch (i) {
-		case 0:
-		case 3:
-			if (io_uring_cqe_get_data(cqe) != (void *) 1) {
-				fprintf(stderr, "nop not seen as 1 or 2\n");
-				goto err;
-			}
-			break;
-		case 1:
-		case 2:
-		case 4:
-			if (cqe->res == -ETIME) {
-				fprintf(stderr, "expected not return -ETIME "
-					"for the #%d timeout req\n", i - 1);
-				goto err;
-			}
-			break;
-		case 5:
-			if (cqe->res != -ETIME) {
-				fprintf(stderr, "expected return -ETIME for "
-					"the #%d timeout req\n", i - 1);
-				goto err;
-			}
-			break;
-		}
-		io_uring_cqe_seen(&ring, cqe);
-		i++;
-	}
-
-	return 0;
-err:
-	return 1;
-}
-
-int main(int argc, char *argv[])
-{
-	int ret;
-
-	if (argc > 1)
-		return 0;
-
-	ret = check_timeout_support();
-	if (ret) {
-		fprintf(stderr, "check_timeout_support failed: %d\n", ret);
-		return 1;
-	}
-
-	if (not_supported)
-		return 0;
-
-	ret = test_timeout_overflow();
-	if (ret) {
-		fprintf(stderr, "test_timeout_overflow failed\n");
-		return 1;
-	}
-
-	return 0;
-}

diff --git a/test/timeout.c b/test/timeout.c
index 2fd4736..06474ca 100644
--- a/test/timeout.c
+++ b/test/timeout.c

@@ -14,12 +14,14 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 
+#include "helpers.h"
 #include "liburing.h"
 #include "../src/syscall.h"
 
 #define TIMEOUT_MSEC	200
 static int not_supported;
 static int no_modify;
+static int no_multishot;
 
 static void msec_to_ts(struct __kernel_timespec *ts, unsigned int msec)
 {
@@ -27,31 +29,6 @@
 	ts->tv_nsec = (msec % 1000) * 1000000;
 }
 
-static unsigned long long mtime_since(const struct timeval *s,
-				      const struct timeval *e)
-{
-	long long sec, usec;
-
-	sec = e->tv_sec - s->tv_sec;
-	usec = (e->tv_usec - s->tv_usec);
-	if (sec > 0 && usec < 0) {
-		sec--;
-		usec += 1000000;
-	}
-
-	sec *= 1000;
-	usec /= 1000;
-	return sec + usec;
-}
-
-static unsigned long long mtime_since_now(struct timeval *tv)
-{
-	struct timeval end;
-
-	gettimeofday(&end, NULL);
-	return mtime_since(tv, &end);
-}
-
 /*
  * Test that we return to userspace if a timeout triggers, even if we
  * don't satisfy the number of events asked for.
@@ -156,7 +133,7 @@
 
 		/*
 		 * NOP commands have user_data as 1. Check that we get the
-		 * at least 'nr' NOPs first, then the successfully removed timout.
+		 * at least 'nr' NOPs first, then the successfully removed timeout.
 		 */
 		if (io_uring_cqe_get_data(cqe) == NULL) {
 			if (i < nr) {
@@ -176,7 +153,7 @@
 			goto err;
 		}
 		i++;
-	};
+	}
 
 	return 0;
 err:
@@ -588,7 +565,7 @@
 		}
 
 		if (cqe->user_data != user_data) {
-			fprintf(stderr, "%s: unexpected timeout req %d sequece\n",
+			fprintf(stderr, "%s: unexpected timeout req %d sequence\n",
 				__FUNCTION__, i+1);
 			goto err;
 		}
@@ -678,7 +655,7 @@
 		case 1:
 			/* Should be timeout req_2 */
 			if (cqe->user_data != 2) {
-				fprintf(stderr, "%s: unexpected timeout req %d sequece\n",
+				fprintf(stderr, "%s: unexpected timeout req %d sequence\n",
 					__FUNCTION__, i+1);
 				goto err;
 			}
@@ -691,7 +668,7 @@
 		case 2:
 			/* Should be timeout req_1 */
 			if (cqe->user_data != 1) {
-				fprintf(stderr, "%s: unexpected timeout req %d sequece\n",
+				fprintf(stderr, "%s: unexpected timeout req %d sequence\n",
 					__FUNCTION__, i+1);
 				goto err;
 			}
@@ -978,6 +955,7 @@
 	struct io_uring_cqe *cqe;
 	struct __kernel_timespec ts, ts_upd;
 	unsigned long long exp_ms, base_ms = 10000;
+	bool update_ealready = false;
 	struct timeval tv;
 	int ret, i, nr = 2;
 	__u32 mode = abs ? IORING_TIMEOUT_ABS : 0;
@@ -1040,6 +1018,16 @@
 			}
 			break;
 		case 2:
+			/*
+			 * We should not be hitting this case, but for
+			 * a kernel with PREEMPT_RT, even an instant attempt
+			 * to remove a timer will return that the timer is
+			 * already running... Deal with it.
+			 */
+			if (cqe->res == -EALREADY) {
+				update_ealready = true;
+				break;
+			}
 			if (cqe->res != 0) {
 				fprintf(stderr, "%s: got %d, wanted %d\n",
 						__FUNCTION__, cqe->res,
@@ -1060,7 +1048,7 @@
 	}
 
 	exp_ms = mtime_since_now(&tv);
-	if (exp_ms >= base_ms / 2) {
+	if (!update_ealready && exp_ms >= base_ms / 2) {
 		fprintf(stderr, "too long, timeout wasn't updated\n");
 		goto err;
 	}
@@ -1231,7 +1219,7 @@
 			exit(1);
 		}
 
-		/* trigger full cancellation */
+		/* trigger full cancelation */
 		ret = execl(prog_path, prog_path, NULL);
 		if (ret) {
 			fprintf(stderr, "exec failed %i\n", errno);
@@ -1303,7 +1291,6 @@
 		fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret);
 		return 1;
 	} else if (cqe->user_data == 1 && cqe->res == -EINVAL) {
-		fprintf(stderr, "ETIME_SUCCESS is not supported, skip\n");
 		goto done;
 	} else if (cqe->res != -ETIME || cqe->user_data != 1) {
 		fprintf(stderr, "timeout failed %i %i\n", cqe->res,
@@ -1328,6 +1315,246 @@
 }
 
 
+static int test_timeout_multishot(struct io_uring *ring)
+{
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	struct __kernel_timespec ts;
+	int ret;
+
+	sqe = io_uring_get_sqe(ring);
+	if (!sqe) {
+		fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__);
+		goto err;
+	}
+
+	msec_to_ts(&ts, TIMEOUT_MSEC);
+	io_uring_prep_timeout(sqe, &ts, 0, IORING_TIMEOUT_MULTISHOT);
+	io_uring_sqe_set_data(sqe, (void *) 1);
+
+	ret = io_uring_submit(ring);
+	if (ret <= 0) {
+		fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret);
+		goto err;
+	}
+
+	for (int i = 0; i < 2; i++) {
+		ret = io_uring_wait_cqe(ring, &cqe);
+		if (ret < 0) {
+			fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret);
+			goto err;
+		}
+
+		ret = cqe->res;
+		if (ret == -EINVAL) {
+			no_multishot = 1;
+			return T_EXIT_SKIP;
+		}
+
+		if (!(cqe->flags & IORING_CQE_F_MORE)) {
+			fprintf(stderr, "%s: flag not set in cqe\n", __FUNCTION__);
+			goto err;
+		}
+
+		if (ret != -ETIME) {
+			fprintf(stderr, "%s: Timeout: %s\n", __FUNCTION__, strerror(-ret));
+			goto err;
+		}
+
+		io_uring_cqe_seen(ring, cqe);
+	}
+
+	sqe = io_uring_get_sqe(ring);
+	if (!sqe) {
+		fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__);
+		goto err;
+	}
+
+	io_uring_prep_timeout_remove(sqe, 1, 0);
+	io_uring_sqe_set_data(sqe, (void *) 2);
+
+	ret = io_uring_submit(ring);
+	if (ret <= 0) {
+		fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret);
+		goto err;
+	}
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret < 0) {
+		fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret);
+		goto err;
+	}
+
+	ret = cqe->res;
+	if (ret < 0) {
+		fprintf(stderr, "%s: remove failed: %s\n", __FUNCTION__, strerror(-ret));
+		goto err;
+	}
+
+	io_uring_cqe_seen(ring, cqe);
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret < 0) {
+		fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret);
+		goto err;
+	}
+
+	ret = cqe->res;
+	if (ret != -ECANCELED) {
+		fprintf(stderr, "%s: timeout canceled: %s %llu\n", __FUNCTION__, strerror(-ret), cqe->user_data);
+		goto err;
+	}
+
+	io_uring_cqe_seen(ring, cqe);
+	return 0;
+err:
+	return 1;
+}
+
+
+static int test_timeout_multishot_nr(struct io_uring *ring)
+{
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	struct __kernel_timespec ts;
+	int ret;
+
+	if (no_multishot)
+		return T_EXIT_SKIP;
+
+	sqe = io_uring_get_sqe(ring);
+	if (!sqe) {
+		fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__);
+		goto err;
+	}
+
+	msec_to_ts(&ts, TIMEOUT_MSEC);
+	io_uring_prep_timeout(sqe, &ts, 3, IORING_TIMEOUT_MULTISHOT);
+	io_uring_sqe_set_data(sqe, (void *) 1);
+
+	ret = io_uring_submit(ring);
+	if (ret <= 0) {
+		fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret);
+		goto err;
+	}
+
+	for (int i = 0; i < 3; i++) {
+		ret = io_uring_wait_cqe(ring, &cqe);
+		if (ret < 0) {
+			fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret);
+			goto err;
+		}
+
+		if (i < 2 && !(cqe->flags & IORING_CQE_F_MORE)) {
+			fprintf(stderr, "%s: flag not set in cqe\n", __FUNCTION__);
+			goto err;
+		}
+		if (i == 3 && (cqe->flags & IORING_CQE_F_MORE)) {
+			fprintf(stderr, "%s: flag set in cqe\n", __FUNCTION__);
+			goto err;
+		}
+
+		ret = cqe->res;
+		if (ret != -ETIME) {
+			fprintf(stderr, "%s: Timeout: %s\n", __FUNCTION__, strerror(-ret));
+			goto err;
+		}
+
+		io_uring_cqe_seen(ring, cqe);
+	}
+
+	msec_to_ts(&ts, 2 * TIMEOUT_MSEC);
+	ret = io_uring_wait_cqe_timeout(ring, &cqe, &ts);
+	if (ret != -ETIME) {
+		fprintf(stderr, "%s: wait completion timeout %s\n", __FUNCTION__, strerror(-ret));
+		goto err;
+	}
+
+	return 0;
+err:
+	return 1;
+}
+
+
+static int test_timeout_multishot_overflow(struct io_uring *ring)
+{
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	struct __kernel_timespec ts;
+	int ret;
+
+	if (no_multishot)
+		return T_EXIT_SKIP;
+
+	sqe = io_uring_get_sqe(ring);
+	if (!sqe) {
+		fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__);
+		goto err;
+	}
+
+	msec_to_ts(&ts, 10);
+	io_uring_prep_timeout(sqe, &ts, 0, IORING_TIMEOUT_MULTISHOT);
+	io_uring_sqe_set_data(sqe, (void *) 1);
+
+	ret = io_uring_submit(ring);
+	if (ret <= 0) {
+		fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret);
+		goto err;
+	}
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret < 0) {
+		fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret);
+		goto err;
+	}
+
+	ret = cqe->res;
+	if (ret != -ETIME) {
+		fprintf(stderr, "%s: Timeout: %s\n", __FUNCTION__, strerror(-ret));
+		goto err;
+	}
+
+	io_uring_cqe_seen(ring, cqe);
+	sleep(1);
+
+	if (!((*ring->sq.kflags) & IORING_SQ_CQ_OVERFLOW)) {
+		goto err;
+	}
+
+	/* multishot timer should be gone */
+	sqe = io_uring_get_sqe(ring);
+	if (!sqe) {
+		fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__);
+		goto err;
+	}
+
+	io_uring_prep_timeout_remove(sqe, 1, 0);
+
+	ret = io_uring_submit(ring);
+	if (ret <= 0) {
+		fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret);
+		goto err;
+	}
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret < 0) {
+		fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret);
+		goto err;
+	}
+
+	ret = cqe->res;
+	io_uring_cqe_seen(ring, cqe);
+	if (ret != -ETIME) {
+		fprintf(stderr, "%s: remove failed: %d %s\n", __FUNCTION__, ret, strerror(-ret));
+		goto err;
+	}
+
+	return 0;
+err:
+	return 1;
+}
+
+
 int main(int argc, char *argv[])
 {
 	struct io_uring ring, sqpoll_ring;
@@ -1420,6 +1647,40 @@
 		return ret;
 	}
 
+	ret = test_timeout_multishot(&ring);
+	if (ret && ret != T_EXIT_SKIP) {
+		fprintf(stderr, "test_timeout_multishot failed\n");
+		return ret;
+	}
+
+	ret = test_timeout_multishot_nr(&ring);
+	if (ret && ret != T_EXIT_SKIP) {
+		fprintf(stderr, "test_timeout_multishot_nr failed\n");
+		return ret;
+	}
+
+	/* io_uring_wait_cqe_timeout() may have left a timeout, reinit ring */
+	io_uring_queue_exit(&ring);
+	ret = io_uring_queue_init(8, &ring, 0);
+	if (ret) {
+		fprintf(stderr, "ring setup failed\n");
+		return 1;
+	}
+
+	ret = test_timeout_multishot_overflow(&ring);
+	if (ret && ret != T_EXIT_SKIP) {
+		fprintf(stderr, "test_timeout_multishot_overflow failed\n");
+		return ret;
+	}
+
+	/* io_uring_wait_cqe_timeout() may have left a timeout, reinit ring */
+	io_uring_queue_exit(&ring);
+	ret = io_uring_queue_init(8, &ring, 0);
+	if (ret) {
+		fprintf(stderr, "ring setup failed\n");
+		return 1;
+	}
+
 	ret = test_single_timeout_wait(&ring, &p);
 	if (ret) {
 		fprintf(stderr, "test_single_timeout_wait failed\n");

diff --git a/test/truncate.c b/test/truncate.c
new file mode 100644
index 0000000..6c47b04
--- /dev/null
+++ b/test/truncate.c

@@ -0,0 +1,186 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: run various truncate tests
+ *
+ */
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+#define TWO_GIG_SIZE ((loff_t)2 * 1024 * 1024 * 1024)
+#define ONE_GIG_SIZE ((loff_t)1024 * 1024 * 1024)
+#define HALF_GIG_SIZE ((loff_t)512 * 1024 * 1024)
+
+static int test_truncate(struct io_uring *ring, int fd)
+{
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	int ret = -1;
+
+	sqe = io_uring_get_sqe(ring);
+	if (!sqe) {
+		fprintf(stderr, "get sqe failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	memset(sqe, 0, sizeof(*sqe));
+
+	io_uring_prep_rw(IORING_OP_FTRUNCATE, sqe, fd, "fail", 0, 4);
+
+	ret = io_uring_submit(ring);
+	if (ret <= 0) {
+		fprintf(stderr, "sqe submit failed: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret < 0) {
+		fprintf(stderr, "wait completion %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+	ret = cqe->res;
+	io_uring_cqe_seen(ring, cqe);
+	if (ret == -EINVAL)
+		return T_EXIT_PASS;
+
+	fprintf(stderr, "unexpected truncate res %d\n", ret);
+	return T_EXIT_FAIL;
+}
+
+static int test_ftruncate(struct io_uring *ring, int fd, loff_t len)
+{
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	int ret;
+
+	sqe = io_uring_get_sqe(ring);
+	if (!sqe) {
+		fprintf(stderr, "get sqe failed\n");
+		goto err;
+	}
+
+	memset(sqe, 0, sizeof(*sqe));
+
+	io_uring_prep_ftruncate(sqe, fd, len);
+
+	ret = io_uring_submit(ring);
+	if (ret <= 0) {
+		fprintf(stderr, "sqe submit failed: %d\n", ret);
+		goto err;
+	}
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret < 0) {
+		fprintf(stderr, "wait completion %d\n", ret);
+		goto err;
+	}
+	ret = cqe->res;
+	io_uring_cqe_seen(ring, cqe);
+	return ret;
+err:
+	return 1;
+}
+
+static int get_file_size(int fd, loff_t *size)
+{
+	struct stat st;
+
+	if (fstat(fd, &st) < 0) {
+		perror("fstat");
+		return -1;
+	}
+	if (S_ISREG(st.st_mode)) {
+		*size = st.st_size;
+		return 0;
+	} else if (S_ISBLK(st.st_mode)) {
+		unsigned long long bytes;
+
+		if (ioctl(fd, BLKGETSIZE64, &bytes) != 0) {
+			perror("ioctl");
+			return -1;
+		}
+
+		*size = bytes;
+		return 0;
+	}
+
+	return -1;
+}
+
+int main(int argc, char *argv[])
+{
+	struct io_uring ring;
+	char path[32] = ".truncate.XXXXXX";
+	int ret;
+	int fd;
+	int i;
+	loff_t size;
+	loff_t test_sizes[3];
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	ret = io_uring_queue_init(1, &ring, 0);
+	if (ret) {
+		fprintf(stderr, "ring setup failed: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	fd = mkostemp(path, O_WRONLY | O_CREAT | O_TRUNC);
+	if (fd < 0) {
+		perror("mkostemp");
+		return T_EXIT_FAIL;
+	}
+
+	test_sizes[0] = TWO_GIG_SIZE;
+	test_sizes[1] = ONE_GIG_SIZE;
+	test_sizes[2] = HALF_GIG_SIZE;
+
+	for (i = 0; i < 3; i++) {
+		ret = test_ftruncate(&ring, fd, test_sizes[i]);
+		if (ret < 0) {
+			if (ret == -EBADF || ret == -EINVAL) {
+				if (i == 0) {
+					fprintf(stdout, "Ftruncate not supported, skipping\n");
+					ret = T_EXIT_SKIP;
+					goto out;
+				}
+				goto err;
+			}
+			fprintf(stderr, "ftruncate: %s\n", strerror(-ret));
+			goto err;
+		} else if (ret) {
+			fprintf(stderr, "unexpected cqe->res %d\n", ret);
+			goto err;
+		}
+		if (get_file_size(fd, &size))
+			goto err;
+		if (size != test_sizes[i]) {
+			fprintf(stderr, "fail %d size=%llu, %llu\n", i,
+				(unsigned long long) size,
+				(unsigned long long) test_sizes[i]);
+			goto err;
+		}
+	}
+
+	ret = test_truncate(&ring, fd);
+	if (ret != T_EXIT_PASS)
+		goto err;
+
+out:
+	unlink(path);
+	close(fd);
+	return T_EXIT_PASS;
+err:
+	unlink(path);
+	close(fd);
+	return T_EXIT_FAIL;
+}

diff --git a/test/unlink.c b/test/unlink.c
index 8e7d2f4..f573608 100644
--- a/test/unlink.c
+++ b/test/unlink.c

@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: MIT */
 /*
- * Description: run various nop tests
+ * Description: run various unlink tests
  *
  */
 #include <errno.h>
@@ -13,6 +13,87 @@
 
 #include "liburing.h"
 
+static int test_rmdir(struct io_uring *ring)
+{
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	char buf[32];
+	int ret;
+
+	sprintf(buf, ".tmp.dir.%d", getpid());
+	if (mkdir(buf, 0755) < 0) {
+		perror("mkdir");
+		return 1;
+	}
+
+	sqe = io_uring_get_sqe(ring);
+	if (!sqe) {
+		fprintf(stderr, "get sqe failed\n");
+		goto err;
+	}
+	io_uring_prep_unlink(sqe, buf, AT_REMOVEDIR);
+
+	ret = io_uring_submit(ring);
+	if (ret <= 0) {
+		fprintf(stderr, "sqe submit failed: %d\n", ret);
+		goto err;
+	}
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret < 0) {
+		fprintf(stderr, "wait completion %d\n", ret);
+		goto err;
+	}
+	ret = cqe->res;
+	io_uring_cqe_seen(ring, cqe);
+
+	if (!ret) {
+		struct stat sb;
+
+		if (!stat(buf, &sb)) {
+			fprintf(stderr, "dir unlinked but still there\n");
+			goto err;
+		}
+	}
+	unlink(buf);
+	return ret;
+err:
+	unlink(buf);
+	return 1;
+}
+
+static int test_unlink_badaddr(struct io_uring *ring)
+{
+	const char *old = (const char *) (uintptr_t) 0x1234;
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	int ret;
+
+	sqe = io_uring_get_sqe(ring);
+	if (!sqe) {
+		fprintf(stderr, "get sqe failed\n");
+		goto err;
+	}
+	io_uring_prep_unlink(sqe, old, 0);
+
+	ret = io_uring_submit(ring);
+	if (ret <= 0) {
+		fprintf(stderr, "sqe submit failed: %d\n", ret);
+		goto err;
+	}
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret < 0) {
+		fprintf(stderr, "wait completion %d\n", ret);
+		goto err;
+	}
+	ret = cqe->res;
+	io_uring_cqe_seen(ring, cqe);
+	return ret;
+err:
+	return 1;
+}
+
 static int test_unlink(struct io_uring *ring, const char *old)
 {
 	struct io_uring_cqe *cqe;
@@ -25,7 +106,7 @@
 		goto err;
 	}
 	io_uring_prep_unlink(sqe, old, 0);
-	
+
 	ret = io_uring_submit(ring);
 	if (ret <= 0) {
 		fprintf(stderr, "sqe submit failed: %d\n", ret);
@@ -105,6 +186,18 @@
 		return 1;
 	}
 
+	ret = test_unlink_badaddr(&ring);
+	if (ret != -EFAULT) {
+		fprintf(stderr, "badaddr unlink got %s\n", strerror(-ret));
+		return 1;
+	}
+
+	ret = test_rmdir(&ring);
+	if (ret) {
+		fprintf(stderr, "rmdir failed: %s\n", strerror(-ret));
+		return 1;
+	}
+
 	return 0;
 err:
 	unlink(buf);

diff --git a/test/uring_cmd_ublk.c b/test/uring_cmd_ublk.c
new file mode 100644
index 0000000..2817402
--- /dev/null
+++ b/test/uring_cmd_ublk.c

@@ -0,0 +1,1252 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: uring_cmd based ublk
+ *
+ * Covers cancellable uring_cmd feature.
+ */
+#include <unistd.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <pthread.h>
+#include <limits.h>
+#include <poll.h>
+#include <sys/syscall.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include <sys/inotify.h>
+#include <sys/wait.h>
+
+#include "liburing.h"
+#include "helpers.h"
+#ifdef CONFIG_HAVE_UBLK_HEADER
+#include <linux/ublk_cmd.h>
+
+/****************** part 1: libublk ********************/
+
+#define CTRL_DEV		"/dev/ublk-control"
+#define UBLKC_DEV		"/dev/ublkc"
+#define UBLKB_DEV		"/dev/ublkb"
+#define UBLK_CTRL_RING_DEPTH            32
+
+/* queue idle timeout */
+#define UBLKSRV_IO_IDLE_SECS		20
+
+#define UBLK_IO_MAX_BYTES               65536
+#define UBLK_MAX_QUEUES                 4
+#define UBLK_QUEUE_DEPTH                128
+
+#define UBLK_DBG_DEV            (1U << 0)
+#define UBLK_DBG_QUEUE          (1U << 1)
+#define UBLK_DBG_IO_CMD         (1U << 2)
+#define UBLK_DBG_IO             (1U << 3)
+#define UBLK_DBG_CTRL_CMD       (1U << 4)
+#define UBLK_LOG                (1U << 5)
+
+struct ublk_dev;
+struct ublk_queue;
+
+struct ublk_ctrl_cmd_data {
+	__u32 cmd_op;
+#define CTRL_CMD_HAS_DATA	1
+#define CTRL_CMD_HAS_BUF	2
+	__u32 flags;
+
+	__u64 data[2];
+	__u64 addr;
+	__u32 len;
+};
+
+struct ublk_io {
+	char *buf_addr;
+
+#define UBLKSRV_NEED_FETCH_RQ		(1UL << 0)
+#define UBLKSRV_NEED_COMMIT_RQ_COMP	(1UL << 1)
+#define UBLKSRV_IO_FREE			(1UL << 2)
+	unsigned int flags;
+
+	unsigned int result;
+};
+
+struct ublk_tgt_ops {
+	const char *name;
+	int (*init_tgt)(struct ublk_dev *);
+	void (*deinit_tgt)(struct ublk_dev *);
+
+	int (*queue_io)(struct ublk_queue *, int tag);
+	void (*tgt_io_done)(struct ublk_queue *,
+			int tag, const struct io_uring_cqe *);
+};
+
+struct ublk_tgt {
+	unsigned long dev_size;
+	const struct ublk_tgt_ops *ops;
+	struct ublk_params params;
+};
+
+struct ublk_queue {
+	int q_id;
+	int q_depth;
+	unsigned int cmd_inflight;
+	unsigned int io_inflight;
+	struct ublk_dev *dev;
+	const struct ublk_tgt_ops *tgt_ops;
+	char *io_cmd_buf;
+	struct io_uring ring;
+	struct ublk_io ios[UBLK_QUEUE_DEPTH];
+#define UBLKSRV_QUEUE_STOPPING	(1U << 0)
+#define UBLKSRV_QUEUE_IDLE	(1U << 1)
+	unsigned state;
+	pid_t tid;
+	pthread_t thread;
+};
+
+struct ublk_dev {
+	struct ublk_tgt tgt;
+	struct ublksrv_ctrl_dev_info  dev_info;
+	struct ublk_queue q[UBLK_MAX_QUEUES];
+
+	int fds[2];	/* fds[0] points to /dev/ublkcN */
+	int nr_fds;
+	int ctrl_fd;
+	struct io_uring ring;
+};
+
+#ifndef offsetof
+#define offsetof(TYPE, MEMBER)  ((size_t)&((TYPE *)0)->MEMBER)
+#endif
+
+#ifndef container_of
+#define container_of(ptr, type, member) ({                              \
+	unsigned long __mptr = (unsigned long)(ptr);                    \
+	((type *)(__mptr - offsetof(type, member))); })
+#endif
+
+#define round_up(val, rnd) \
+	(((val) + ((rnd) - 1)) & ~((rnd) - 1))
+
+static unsigned int ublk_dbg_mask = 0;
+
+static const struct ublk_tgt_ops *ublk_find_tgt(const char *name);
+
+static inline int is_target_io(__u64 user_data)
+{
+	return (user_data & (1ULL << 63)) != 0;
+}
+
+static inline __u64 build_user_data(unsigned tag, unsigned op,
+		unsigned tgt_data, unsigned is_target_io)
+{
+	assert(!(tag >> 16) && !(op >> 8) && !(tgt_data >> 16));
+
+	return tag | (op << 16) | (tgt_data << 24) | (__u64)is_target_io << 63;
+}
+
+static inline unsigned int user_data_to_tag(__u64 user_data)
+{
+	return user_data & 0xffff;
+}
+
+static inline unsigned int user_data_to_op(__u64 user_data)
+{
+	return (user_data >> 16) & 0xff;
+}
+
+static void ublk_err(const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	vfprintf(stderr, fmt, ap);
+}
+
+static void ublk_dbg(int level, const char *fmt, ...)
+{
+	if (level & ublk_dbg_mask) {
+		va_list ap;
+		va_start(ap, fmt);
+		vfprintf(stdout, fmt, ap);
+        }
+}
+
+static inline void *ublk_get_sqe_cmd(const struct io_uring_sqe *sqe)
+{
+	return (void *)&sqe->cmd;
+}
+
+static inline void ublk_mark_io_done(struct ublk_io *io, int res)
+{
+	io->flags |= (UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_IO_FREE);
+	io->result = res;
+}
+
+static inline const struct ublksrv_io_desc *ublk_get_iod(
+                const struct ublk_queue *q, int tag)
+{
+        return (struct ublksrv_io_desc *)
+                &(q->io_cmd_buf[tag * sizeof(struct ublksrv_io_desc)]);
+}
+
+static inline void ublk_set_sqe_cmd_op(struct io_uring_sqe *sqe,
+		__u32 cmd_op)
+{
+        __u32 *addr = (__u32 *)&sqe->off;
+
+        addr[0] = cmd_op;
+        addr[1] = 0;
+}
+
+static inline int ublk_setup_ring(struct io_uring *r, int depth,
+		int cq_depth, unsigned flags)
+{
+	struct io_uring_params p;
+
+	memset(&p, 0, sizeof(p));
+	p.flags = flags | IORING_SETUP_CQSIZE;
+	p.cq_entries = cq_depth;
+
+	return io_uring_queue_init_params(depth, r, &p);
+}
+
+static void ublk_ctrl_init_cmd(struct ublk_dev *dev,
+		struct io_uring_sqe *sqe,
+		struct ublk_ctrl_cmd_data *data)
+{
+	struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
+	struct ublksrv_ctrl_cmd *cmd = (struct ublksrv_ctrl_cmd *)ublk_get_sqe_cmd(sqe);
+
+	sqe->fd = dev->ctrl_fd;
+	sqe->opcode = IORING_OP_URING_CMD;
+	sqe->ioprio = 0;
+
+	if (data->flags & CTRL_CMD_HAS_BUF) {
+		cmd->addr = data->addr;
+		cmd->len = data->len;
+	}
+
+	if (data->flags & CTRL_CMD_HAS_DATA)
+		cmd->data[0] = data->data[0];
+
+	cmd->dev_id = info->dev_id;
+	cmd->queue_id = -1;
+
+	ublk_set_sqe_cmd_op(sqe, data->cmd_op);
+
+	io_uring_sqe_set_data(sqe, cmd);
+}
+
+static int __ublk_ctrl_cmd(struct ublk_dev *dev,
+		struct ublk_ctrl_cmd_data *data)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	int ret = -EINVAL;
+
+	sqe = io_uring_get_sqe(&dev->ring);
+	if (!sqe) {
+		ublk_err("%s: can't get sqe ret %d\n", __func__, ret);
+		return ret;
+	}
+
+	ublk_ctrl_init_cmd(dev, sqe, data);
+
+	ret = io_uring_submit(&dev->ring);
+	if (ret < 0) {
+		ublk_err("uring submit ret %d\n", ret);
+		return ret;
+	}
+
+	ret = io_uring_wait_cqe(&dev->ring, &cqe);
+	if (ret < 0) {
+		ublk_err("wait cqe: %s\n", strerror(-ret));
+		return ret;
+	}
+	io_uring_cqe_seen(&dev->ring, cqe);
+
+	return cqe->res;
+}
+
+static int ublk_ctrl_start_dev(struct ublk_dev *dev,
+		int daemon_pid)
+{
+	struct ublk_ctrl_cmd_data data = {
+		.cmd_op	= UBLK_U_CMD_START_DEV,
+		.flags	= CTRL_CMD_HAS_DATA,
+	};
+
+	dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid;
+
+	return __ublk_ctrl_cmd(dev, &data);
+}
+
+static int ublk_ctrl_add_dev(struct ublk_dev *dev)
+{
+	struct ublk_ctrl_cmd_data data = {
+		.cmd_op	= UBLK_U_CMD_ADD_DEV,
+		.flags	= CTRL_CMD_HAS_BUF,
+		.addr = (__u64) (uintptr_t) &dev->dev_info,
+		.len = sizeof(struct ublksrv_ctrl_dev_info),
+	};
+
+	return __ublk_ctrl_cmd(dev, &data);
+}
+
+static int ublk_ctrl_del_dev(struct ublk_dev *dev)
+{
+	struct ublk_ctrl_cmd_data data = {
+		.cmd_op = UBLK_U_CMD_DEL_DEV,
+		.flags = 0,
+	};
+
+	return __ublk_ctrl_cmd(dev, &data);
+}
+
+static int ublk_ctrl_get_info(struct ublk_dev *dev)
+{
+	struct ublk_ctrl_cmd_data data = {
+		.cmd_op	= UBLK_U_CMD_GET_DEV_INFO,
+		.flags	= CTRL_CMD_HAS_BUF,
+		.addr = (__u64) (uintptr_t) &dev->dev_info,
+		.len = sizeof(struct ublksrv_ctrl_dev_info),
+	};
+
+	return __ublk_ctrl_cmd(dev, &data);
+}
+
+static int ublk_ctrl_set_params(struct ublk_dev *dev,
+		struct ublk_params *params)
+{
+	struct ublk_ctrl_cmd_data data = {
+		.cmd_op	= UBLK_U_CMD_SET_PARAMS,
+		.flags	= CTRL_CMD_HAS_BUF,
+		.addr = (__u64) (uintptr_t) params,
+		.len = sizeof(*params),
+	};
+	params->len = sizeof(*params);
+	return __ublk_ctrl_cmd(dev, &data);
+}
+
+static int ublk_ctrl_get_features(struct ublk_dev *dev,
+		__u64 *features)
+{
+	struct ublk_ctrl_cmd_data data = {
+		.cmd_op	= UBLK_U_CMD_GET_FEATURES,
+		.flags	= CTRL_CMD_HAS_BUF,
+		.addr = (__u64) (uintptr_t) features,
+		.len = sizeof(*features),
+	};
+
+	return __ublk_ctrl_cmd(dev, &data);
+}
+
+static void ublk_ctrl_deinit(struct ublk_dev *dev)
+{
+	close(dev->ctrl_fd);
+	free(dev);
+}
+
+static struct ublk_dev *ublk_ctrl_init(void)
+{
+	struct ublk_dev *dev = (struct ublk_dev *)calloc(1, sizeof(*dev));
+	struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
+	int ret;
+
+	dev->ctrl_fd = open(CTRL_DEV, O_RDWR);
+	if (dev->ctrl_fd < 0) {
+		free(dev);
+		return NULL;
+	}
+
+	info->max_io_buf_bytes = UBLK_IO_MAX_BYTES;
+
+	ret = ublk_setup_ring(&dev->ring, UBLK_CTRL_RING_DEPTH,
+			UBLK_CTRL_RING_DEPTH, IORING_SETUP_SQE128);
+	if (ret < 0) {
+		ublk_err("queue_init: %s\n", strerror(-ret));
+		free(dev);
+		return NULL;
+	}
+	dev->nr_fds = 1;
+
+	return dev;
+}
+
+static int ublk_queue_cmd_buf_sz(struct ublk_queue *q)
+{
+	int size =  q->q_depth * sizeof(struct ublksrv_io_desc);
+	unsigned int page_sz = getpagesize();
+
+	return round_up(size, page_sz);
+}
+
+static void ublk_queue_deinit(struct ublk_queue *q)
+{
+	int i;
+	int nr_ios = q->q_depth;
+
+	io_uring_unregister_ring_fd(&q->ring);
+
+	if (q->ring.ring_fd > 0) {
+		io_uring_unregister_files(&q->ring);
+		close(q->ring.ring_fd);
+		q->ring.ring_fd = -1;
+	}
+
+	if (q->io_cmd_buf)
+		munmap(q->io_cmd_buf, ublk_queue_cmd_buf_sz(q));
+
+	for (i = 0; i < nr_ios; i++)
+		free(q->ios[i].buf_addr);
+}
+
+static int ublk_queue_init(struct ublk_queue *q)
+{
+	struct ublk_dev *dev = q->dev;
+	int depth = dev->dev_info.queue_depth;
+	int i, ret = -1;
+	int cmd_buf_size, io_buf_size;
+	unsigned long off;
+	int ring_depth = depth, cq_depth = depth;
+
+	q->tgt_ops = dev->tgt.ops;
+	q->state = 0;
+	q->q_depth = depth;
+	q->cmd_inflight = 0;
+	q->tid = gettid();
+
+	cmd_buf_size = ublk_queue_cmd_buf_sz(q);
+	off = UBLKSRV_CMD_BUF_OFFSET +
+		q->q_id * (UBLK_MAX_QUEUE_DEPTH * sizeof(struct ublksrv_io_desc));
+	q->io_cmd_buf = (char *)mmap(0, cmd_buf_size, PROT_READ,
+			MAP_SHARED | MAP_POPULATE, dev->fds[0], off);
+	if (q->io_cmd_buf == MAP_FAILED) {
+		ublk_err("ublk dev %d queue %d map io_cmd_buf failed %m\n",
+				q->dev->dev_info.dev_id, q->q_id);
+		goto fail;
+	}
+
+	io_buf_size = dev->dev_info.max_io_buf_bytes;
+	for (i = 0; i < q->q_depth; i++) {
+		q->ios[i].buf_addr = NULL;
+
+		if (posix_memalign((void **)&q->ios[i].buf_addr,
+					getpagesize(), io_buf_size)) {
+			ublk_err("ublk dev %d queue %d io %d posix_memalign failed %m\n",
+					dev->dev_info.dev_id, q->q_id, i);
+			goto fail;
+		}
+		q->ios[i].flags = UBLKSRV_NEED_FETCH_RQ | UBLKSRV_IO_FREE;
+	}
+
+	ret = ublk_setup_ring(&q->ring, ring_depth, cq_depth,
+			IORING_SETUP_COOP_TASKRUN);
+	if (ret < 0) {
+		ublk_err("ublk dev %d queue %d setup io_uring failed %d\n",
+				q->dev->dev_info.dev_id, q->q_id, ret);
+		goto fail;
+	}
+
+	io_uring_register_ring_fd(&q->ring);
+
+	ret = io_uring_register_files(&q->ring, dev->fds, dev->nr_fds);
+	if (ret) {
+		ublk_err("ublk dev %d queue %d register files failed %d\n",
+				q->dev->dev_info.dev_id, q->q_id, ret);
+		goto fail;
+	}
+
+	return 0;
+ fail:
+	ublk_queue_deinit(q);
+	ublk_err("ublk dev %d queue %d failed\n",
+			dev->dev_info.dev_id, q->q_id);
+	return -ENOMEM;
+}
+
+static int ublk_dev_prep(struct ublk_dev *dev)
+{
+	int dev_id = dev->dev_info.dev_id;
+	char buf[64];
+	int ret = 0;
+
+	snprintf(buf, 64, "%s%d", UBLKC_DEV, dev_id);
+	dev->fds[0] = open(buf, O_RDWR);
+	if (dev->fds[0] < 0) {
+		ret = -EBADF;
+		ublk_err("can't open %s, ret %d\n", buf, dev->fds[0]);
+		goto fail;
+	}
+
+	if (dev->tgt.ops->init_tgt)
+		ret = dev->tgt.ops->init_tgt(dev);
+
+	return ret;
+fail:
+	close(dev->fds[0]);
+	return ret;
+}
+
+static void ublk_dev_unprep(struct ublk_dev *dev)
+{
+	if (dev->tgt.ops->deinit_tgt)
+		dev->tgt.ops->deinit_tgt(dev);
+	close(dev->fds[0]);
+}
+
+static int ublk_queue_io_cmd(struct ublk_queue *q,
+		struct ublk_io *io, unsigned tag)
+{
+	struct ublksrv_io_cmd *cmd;
+	struct io_uring_sqe *sqe;
+	unsigned int cmd_op = 0;
+	__u64 user_data;
+
+	/* only freed io can be issued */
+	if (!(io->flags & UBLKSRV_IO_FREE))
+		return 0;
+
+	/* we issue because we need either fetching or committing */
+	if (!(io->flags &
+		(UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP)))
+		return 0;
+
+	if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP)
+		cmd_op = UBLK_U_IO_COMMIT_AND_FETCH_REQ;
+	else if (io->flags & UBLKSRV_NEED_FETCH_RQ)
+		cmd_op = UBLK_U_IO_FETCH_REQ;
+
+	sqe = io_uring_get_sqe(&q->ring);
+	if (!sqe) {
+		ublk_err("%s: run out of sqe %d, tag %d\n",
+				__func__, q->q_id, tag);
+		return -1;
+	}
+
+	cmd = (struct ublksrv_io_cmd *)ublk_get_sqe_cmd(sqe);
+
+	if (cmd_op == UBLK_U_IO_COMMIT_AND_FETCH_REQ)
+		cmd->result = io->result;
+
+	/* These fields should be written once, never change */
+	ublk_set_sqe_cmd_op(sqe, cmd_op);
+	sqe->fd		= 0;	/* dev->fds[0] */
+	sqe->opcode	= IORING_OP_URING_CMD;
+	sqe->flags	= IOSQE_FIXED_FILE;
+	sqe->rw_flags	= 0;
+	cmd->tag	= tag;
+	cmd->addr	= (__u64) (uintptr_t) io->buf_addr;
+	cmd->q_id	= q->q_id;
+
+	user_data = build_user_data(tag, _IOC_NR(cmd_op), 0, 0);
+	io_uring_sqe_set_data64(sqe, user_data);
+
+	io->flags = 0;
+
+	q->cmd_inflight += 1;
+
+	ublk_dbg(UBLK_DBG_IO_CMD, "%s: (qid %d tag %u cmd_op %u) iof %x stopping %d\n",
+			__func__, q->q_id, tag, cmd_op,
+			io->flags, !!(q->state & UBLKSRV_QUEUE_STOPPING));
+	return 1;
+}
+
+static int ublk_complete_io(struct ublk_queue *q,
+		unsigned tag, int res)
+{
+	struct ublk_io *io = &q->ios[tag];
+
+	ublk_mark_io_done(io, res);
+
+	return ublk_queue_io_cmd(q, io, tag);
+}
+
+static void ublk_submit_fetch_commands(struct ublk_queue *q)
+{
+	int i = 0;
+
+	for (i = 0; i < q->q_depth; i++)
+		ublk_queue_io_cmd(q, &q->ios[i], i);
+}
+
+static int ublk_queue_is_idle(struct ublk_queue *q)
+{
+	return !io_uring_sq_ready(&q->ring) && !q->io_inflight;
+}
+
+static int ublk_queue_is_done(struct ublk_queue *q)
+{
+	return (q->state & UBLKSRV_QUEUE_STOPPING) && ublk_queue_is_idle(q);
+}
+
+static inline void ublksrv_handle_tgt_cqe(struct ublk_queue *q,
+		struct io_uring_cqe *cqe)
+{
+	unsigned tag = user_data_to_tag(cqe->user_data);
+
+	if (cqe->res < 0 && cqe->res != -EAGAIN)
+		ublk_err("%s: failed tgt io: res %d qid %u tag %u, cmd_op %u\n",
+			__func__, cqe->res, q->q_id,
+			user_data_to_tag(cqe->user_data),
+			user_data_to_op(cqe->user_data));
+
+	if (q->tgt_ops->tgt_io_done)
+		q->tgt_ops->tgt_io_done(q, tag, cqe);
+}
+
+static void ublk_handle_cqe(struct io_uring *r,
+		struct io_uring_cqe *cqe, void *data)
+{
+	struct ublk_queue *q = container_of(r, struct ublk_queue, ring);
+	unsigned tag = user_data_to_tag(cqe->user_data);
+	unsigned cmd_op = user_data_to_op(cqe->user_data);
+	int fetch = (cqe->res != UBLK_IO_RES_ABORT) &&
+		!(q->state & UBLKSRV_QUEUE_STOPPING);
+	struct ublk_io *io;
+
+	ublk_dbg(UBLK_DBG_IO_CMD, "%s: res %d (qid %d tag %u cmd_op %u target %d) stopping %d\n",
+			__func__, cqe->res, q->q_id, tag, cmd_op,
+			is_target_io(cqe->user_data),
+			(q->state & UBLKSRV_QUEUE_STOPPING));
+
+	/* Don't retrieve io in case of target io */
+	if (is_target_io(cqe->user_data)) {
+		ublksrv_handle_tgt_cqe(q, cqe);
+		return;
+	}
+
+	io = &q->ios[tag];
+	q->cmd_inflight--;
+
+	if (!fetch) {
+		q->state |= UBLKSRV_QUEUE_STOPPING;
+		io->flags &= ~UBLKSRV_NEED_FETCH_RQ;
+	}
+
+	if (cqe->res == UBLK_IO_RES_OK) {
+		assert(tag < q->q_depth);
+		q->tgt_ops->queue_io(q, tag);
+	} else {
+		/*
+		 * COMMIT_REQ will be completed immediately since no fetching
+		 * piggyback is required.
+		 *
+		 * Marking IO_FREE only, then this io won't be issued since
+		 * we only issue io with (UBLKSRV_IO_FREE | UBLKSRV_NEED_*)
+		 *
+		 * */
+		io->flags = UBLKSRV_IO_FREE;
+	}
+}
+
+static int ublk_reap_events_uring(struct io_uring *r)
+{
+	struct io_uring_cqe *cqe;
+	unsigned head;
+	int count = 0;
+
+	io_uring_for_each_cqe(r, head, cqe) {
+		ublk_handle_cqe(r, cqe, NULL);
+		count += 1;
+	}
+	io_uring_cq_advance(r, count);
+
+	return count;
+}
+
+static int ublk_process_io(struct ublk_queue *q)
+{
+	int ret, reapped;
+
+	ublk_dbg(UBLK_DBG_QUEUE, "dev%d-q%d: to_submit %d inflight cmd %u stopping %d\n",
+				q->dev->dev_info.dev_id,
+				q->q_id, io_uring_sq_ready(&q->ring),
+				q->cmd_inflight,
+				(q->state & UBLKSRV_QUEUE_STOPPING));
+
+	if (ublk_queue_is_done(q))
+		return -ENODEV;
+
+	ret = io_uring_submit_and_wait(&q->ring, 1);
+	reapped = ublk_reap_events_uring(&q->ring);
+
+	ublk_dbg(UBLK_DBG_QUEUE, "submit result %d, reapped %d stop %d idle %d\n",
+			ret, reapped, (q->state & UBLKSRV_QUEUE_STOPPING),
+			(q->state & UBLKSRV_QUEUE_IDLE));
+
+	return reapped;
+}
+
+static void *ublk_io_handler_fn(void *data)
+{
+	struct ublk_queue *q = data;
+	int dev_id = q->dev->dev_info.dev_id;
+	int ret;
+
+	ret = ublk_queue_init(q);
+	if (ret) {
+		ublk_err("ublk dev %d queue %d init queue failed\n",
+				dev_id, q->q_id);
+		return NULL;
+	}
+	ublk_dbg(UBLK_DBG_QUEUE, "tid %d: ublk dev %d queue %d started\n",
+			q->tid, dev_id, q->q_id);
+
+	/* submit all io commands to ublk driver */
+	ublk_submit_fetch_commands(q);
+	do {
+		if (ublk_process_io(q) < 0)
+			break;
+	} while (1);
+
+	ublk_dbg(UBLK_DBG_QUEUE, "ublk dev %d queue %d exited\n", dev_id, q->q_id);
+	ublk_queue_deinit(q);
+	return NULL;
+}
+
+static void ublk_set_parameters(struct ublk_dev *dev)
+{
+	int ret;
+
+	ret = ublk_ctrl_set_params(dev, &dev->tgt.params);
+	if (ret)
+		ublk_err("dev %d set basic parameter failed %d\n",
+				dev->dev_info.dev_id, ret);
+}
+
+static int ublk_start_daemon(struct ublk_dev *dev)
+{
+	int ret, i;
+	void *thread_ret;
+	const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info;
+
+	if (daemon(1, 1) < 0)
+		return -errno;
+
+	ublk_dbg(UBLK_DBG_DEV, "%s enter\n", __func__);
+
+	ret = ublk_dev_prep(dev);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < dinfo->nr_hw_queues; i++) {
+		dev->q[i].dev = dev;
+		dev->q[i].q_id = i;
+		pthread_create(&dev->q[i].thread, NULL,
+				ublk_io_handler_fn,
+				&dev->q[i]);
+	}
+
+	/* everything is fine now, start us */
+	ublk_set_parameters(dev);
+	ret = ublk_ctrl_start_dev(dev, getpid());
+	if (ret < 0) {
+		ublk_err("%s: ublk_ctrl_start_dev failed: %d\n", __func__, ret);
+		goto fail;
+	}
+
+	/* wait until we are terminated */
+	for (i = 0; i < dinfo->nr_hw_queues; i++)
+		pthread_join(dev->q[i].thread, &thread_ret);
+ fail:
+	ublk_dev_unprep(dev);
+	ublk_dbg(UBLK_DBG_DEV, "%s exit\n", __func__);
+
+	return ret;
+}
+
+static int wait_ublk_dev(char *dev_name, int evt_mask, unsigned timeout)
+{
+#define EV_SIZE (sizeof(struct inotify_event))
+#define EV_BUF_LEN (128 * (EV_SIZE + 16))
+	struct pollfd pfd;
+	int fd, wd;
+	int ret = -EINVAL;
+
+	fd = inotify_init();
+	if (fd < 0) {
+		ublk_dbg(UBLK_DBG_DEV, "%s: inotify init failed\n", __func__);
+		return fd;
+	}
+
+	wd = inotify_add_watch(fd, "/dev", evt_mask);
+	if (wd == -1) {
+		ublk_dbg(UBLK_DBG_DEV, "%s: add watch for /dev failed\n", __func__);
+		goto fail;
+	}
+
+	pfd.fd = fd;
+	pfd.events = POLL_IN;
+	while (1) {
+		int i = 0;
+		char buffer[EV_BUF_LEN];
+		ret = poll(&pfd, 1, 1000 * timeout);
+
+		if (ret == -1) {
+			ublk_err("%s: poll inotify failed: %d\n", __func__, ret);
+			goto rm_watch;
+		} else if (ret == 0) {
+			ublk_err("%s: poll inotify timeout\n", __func__);
+			ret = -ENOENT;
+			goto rm_watch;
+		}
+
+		ret = read(fd, buffer, EV_BUF_LEN);
+		if (ret < 0) {
+			ublk_err("%s: read inotify fd failed\n", __func__);
+			goto rm_watch;
+		}
+
+		while (i < ret) {
+			struct inotify_event *event = (struct inotify_event *)&buffer[i];
+
+			ublk_dbg(UBLK_DBG_DEV, "%s: inotify event %x %s\n",
+					__func__, event->mask, event->name);
+			if (event->mask & evt_mask) {
+				if (!strcmp(event->name, dev_name)) {
+					ret = 0;
+					goto rm_watch;
+				}
+			}
+			i += EV_SIZE + event->len;
+		}
+	}
+rm_watch:
+	inotify_rm_watch(fd, wd);
+fail:
+	close(fd);
+	return ret;
+}
+
+static int ublk_stop_io_daemon(const struct ublk_dev *dev)
+{
+	int daemon_pid = dev->dev_info.ublksrv_pid;
+	int dev_id = dev->dev_info.dev_id;
+	char ublkc[64];
+	int ret;
+
+	/*
+	 * Wait until ublk char device is closed, when our daemon is shutdown
+	 */
+	snprintf(ublkc, sizeof(ublkc), "%s%d", "ublkc", dev_id);
+	ret = wait_ublk_dev(ublkc, IN_CLOSE_WRITE, 10);
+	waitpid(dev->dev_info.ublksrv_pid, NULL, 0);
+	ublk_dbg(UBLK_DBG_DEV, "%s: pid %d dev_id %d ret %d\n",
+			__func__, daemon_pid, dev_id, ret);
+
+	return ret;
+}
+
+static int cmd_dev_add(char *tgt_type, int *exp_id, unsigned nr_queues,
+		       unsigned depth)
+{
+	const struct ublk_tgt_ops *ops;
+	struct ublksrv_ctrl_dev_info *info;
+	struct ublk_dev *dev;
+	int dev_id = *exp_id;
+	char ublkb[64];
+	int ret;
+
+	ops = ublk_find_tgt(tgt_type);
+	if (!ops) {
+		ublk_err("%s: no such tgt type, type %s\n",
+				__func__, tgt_type);
+		return -ENODEV;
+	}
+
+	if (nr_queues > UBLK_MAX_QUEUES || depth > UBLK_QUEUE_DEPTH) {
+		ublk_err("%s: invalid nr_queues or depth queues %u depth %u\n",
+				__func__, nr_queues, depth);
+		return -EINVAL;
+	}
+
+	dev = ublk_ctrl_init();
+	if (!dev) {
+		ublk_err("%s: can't alloc dev id %d, type %s\n",
+				__func__, dev_id, tgt_type);
+		return -ENOMEM;
+	}
+
+	info = &dev->dev_info;
+	info->dev_id = dev_id;
+        info->nr_hw_queues = nr_queues;
+        info->queue_depth = depth;
+	dev->tgt.ops = ops;
+
+	ret = ublk_ctrl_add_dev(dev);
+	if (ret < 0) {
+		ublk_err("%s: can't add dev id %d, type %s ret %d\n",
+				__func__, dev_id, tgt_type, ret);
+		goto fail;
+	}
+
+	switch (fork()) {
+	case -1:
+		goto fail;
+	case 0:
+		ublk_start_daemon(dev);
+		return 0;
+	}
+
+	/*
+	 * Wait until ublk disk is added, when our daemon is started
+	 * successfully
+	 */
+	snprintf(ublkb, sizeof(ublkb), "%s%u", "ublkb", dev->dev_info.dev_id);
+	ret = wait_ublk_dev(ublkb, IN_CREATE, 3);
+	if (ret < 0) {
+		ublk_err("%s: can't start daemon id %d, type %s\n",
+				__func__, dev_id, tgt_type);
+		ublk_ctrl_del_dev(dev);
+	} else {
+		*exp_id = dev->dev_info.dev_id;
+	}
+fail:
+	ublk_ctrl_deinit(dev);
+	return ret;
+}
+
+static int cmd_dev_del_by_kill(int number)
+{
+	struct ublk_dev *dev;
+	int ret;
+
+	dev = ublk_ctrl_init();
+	dev->dev_info.dev_id = number;
+
+	ret = ublk_ctrl_get_info(dev);
+	if (ret < 0)
+		goto fail;
+
+	/* simulate one ublk daemon panic */
+	kill(dev->dev_info.ublksrv_pid, 9);
+
+	ret = ublk_stop_io_daemon(dev);
+	if (ret < 0)
+		ublk_err("%s: can't stop daemon id %d\n", __func__, number);
+	ublk_ctrl_del_dev(dev);
+fail:
+	if (ret >= 0)
+		ret = ublk_ctrl_get_info(dev);
+	ublk_ctrl_deinit(dev);
+
+	return (ret != 0) ? 0 : -EIO;
+}
+
+/****************** part 2: target implementation ********************/
+
+static int ublk_null_tgt_init(struct ublk_dev *dev)
+{
+	const struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
+	unsigned long dev_size = 250UL << 30;
+
+	dev->tgt.dev_size = dev_size;
+	dev->tgt.params = (struct ublk_params) {
+		.types = UBLK_PARAM_TYPE_BASIC,
+		.basic = {
+			.logical_bs_shift	= 9,
+			.physical_bs_shift	= 12,
+			.io_opt_shift		= 12,
+			.io_min_shift		= 9,
+			.max_sectors		= info->max_io_buf_bytes >> 9,
+			.dev_sectors		= dev_size >> 9,
+		},
+	};
+
+	return 0;
+}
+
+static int ublk_null_queue_io(struct ublk_queue *q, int tag)
+{
+	const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
+
+	ublk_complete_io(q, tag, iod->nr_sectors << 9);
+
+	return 0;
+}
+
+static const struct ublk_tgt_ops tgt_ops_list[] = {
+	{
+		.name = "null",
+		.init_tgt = ublk_null_tgt_init,
+		.queue_io = ublk_null_queue_io,
+	},
+};
+
+static const struct ublk_tgt_ops *ublk_find_tgt(const char *name)
+{
+	const struct ublk_tgt_ops *ops;
+	int i;
+
+	if (name == NULL)
+		return NULL;
+
+	for (i = 0; sizeof(tgt_ops_list) / sizeof(*ops); i++)
+		if (strcmp(tgt_ops_list[i].name, name) == 0)
+			return &tgt_ops_list[i];
+	return NULL;
+}
+
+
+/****************** part 3: IO test over ublk disk ********************/
+
+#include "helpers.h"
+#include "liburing.h"
+#define BS		4096
+#define BUFFERS		128
+
+struct io_ctx {
+	int dev_id;
+	int write;
+	int seq;
+
+	/* output */
+	int res;
+	pthread_t handle;
+};
+
+static int __test_io(struct io_uring *ring, int fd, int write,
+		int seq, struct iovec *vecs, int exp_len, off_t start)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	int i, ret;
+	off_t offset;
+
+	offset = start;
+	for (i = 0; i < BUFFERS; i++) {
+		sqe = io_uring_get_sqe(ring);
+		if (!sqe) {
+			fprintf(stderr, "sqe get failed\n");
+			goto err;
+		}
+		if (!seq)
+			offset = start + BS * (rand() % BUFFERS);
+		if (write) {
+			io_uring_prep_write_fixed(sqe, fd, vecs[i].iov_base,
+						  vecs[i].iov_len,
+						  offset, i);
+		} else {
+			io_uring_prep_read_fixed(sqe, fd, vecs[i].iov_base,
+						 vecs[i].iov_len,
+						 offset, i);
+		}
+		sqe->user_data = i;
+		if (seq)
+			offset += BS;
+	}
+
+	ret = io_uring_submit(ring);
+	if (ret != BUFFERS) {
+		fprintf(stderr, "submit got %d, wanted %d\n", ret, BUFFERS);
+		goto err;
+	}
+
+	for (i = 0; i < BUFFERS; i++) {
+		ret = io_uring_wait_cqe(ring, &cqe);
+		if (ret) {
+			fprintf(stderr, "wait_cqe=%d\n", ret);
+			goto err;
+		}
+		if (exp_len == -1) {
+			int iov_len = vecs[cqe->user_data].iov_len;
+
+			if (cqe->res != iov_len) {
+				fprintf(stderr, "cqe res %d, wanted %d\n",
+					cqe->res, iov_len);
+				goto err;
+			}
+		} else if (cqe->res != exp_len) {
+			fprintf(stderr, "cqe res %d, wanted %d\n", cqe->res, exp_len);
+			goto err;
+		}
+		io_uring_cqe_seen(ring, cqe);
+	}
+
+	return 0;
+err:
+	return 1;
+}
+
+/* Run IO over ublk block device  */
+static int test_io(struct io_ctx *ctx)
+{
+	struct io_uring ring;
+	int ret, ring_flags = 0;
+	char buf[256];
+	int fd = -1;
+	off_t offset = 0;
+	unsigned long long bytes;
+	int open_flags = O_DIRECT;
+	struct iovec *vecs = t_create_buffers(BUFFERS, BS);
+
+	ret = t_create_ring(BUFFERS, &ring, ring_flags);
+	if (ret == T_SETUP_SKIP)
+		return 0;
+	if (ret != T_SETUP_OK) {
+		fprintf(stderr, "ring create failed: %d\n", ret);
+		return 1;
+	}
+
+	snprintf(buf, sizeof(buf), "%s%d", UBLKB_DEV, ctx->dev_id);
+
+	if (ctx->write)
+		open_flags |= O_WRONLY;
+	else
+		open_flags |= O_RDONLY;
+	fd = open(buf, open_flags);
+	if (fd < 0) {
+		if (errno == EINVAL)
+			return 0;
+		return 1;
+	}
+
+	if (ioctl(fd, BLKGETSIZE64, &bytes) != 0)
+		return 1;
+
+	ret = t_register_buffers(&ring, vecs, BUFFERS);
+	if (ret == T_SETUP_SKIP)
+		return 0;
+	if (ret != T_SETUP_OK) {
+		fprintf(stderr, "buffer reg failed: %d\n", ret);
+		return 1;
+	}
+
+	for (offset = 0; offset < bytes; offset += BS * BUFFERS) {
+		ret = __test_io(&ring, fd, ctx->write, ctx->seq, vecs, BS,
+				offset);
+		if (ret != T_SETUP_OK) {
+			fprintf(stderr, "/dev/ublkb%d read failed: offset %lu ret %d\n",
+					ctx->dev_id, (unsigned long) offset, ret);
+			break;
+		}
+	}
+
+	close(fd);
+	io_uring_unregister_buffers(&ring);
+	io_uring_queue_exit(&ring);
+
+	return ret;
+}
+
+static void *test_io_fn(void *data)
+{
+	struct io_ctx *ctx = data;
+
+	ctx->res = test_io(ctx);
+
+	return data;
+}
+
+static void ignore_stderr(void)
+{
+	int devnull = open("/dev/null", O_WRONLY);
+
+	if (devnull >= 0) {
+		dup2(devnull, fileno(stderr));
+		close(devnull);
+	}
+}
+
+static int test_io_worker(int dev_id)
+{
+	const int nr_jobs = 4;
+	struct io_ctx ctx[nr_jobs];
+	int i, ret = 0;
+
+	for (i = 0; i < nr_jobs; i++) {
+		ctx[i].dev_id = dev_id;
+		ctx[i].write = (i & 0x1) ? 0 : 1;
+		ctx[i].seq = 1;
+
+		pthread_create(&ctx[i].handle, NULL, test_io_fn, &ctx[i]);
+	}
+
+	for (i = 0; i < nr_jobs; i++) {
+		pthread_join(ctx[i].handle, NULL);
+
+		if (!ret && ctx[i].res)
+			ret = ctx[i].res;
+	}
+
+	return ret;
+}
+
+/*
+ * Run IO over created ublk device, meantime delete this ublk device
+ *
+ * Cover cancellable uring_cmd
+ * */
+static int test_del_ublk_with_io(void)
+{
+	const unsigned wait_ms = 200;
+	char *tgt_type = "null";
+	int dev_id = -1;
+	int ret, pid;
+
+	ret = cmd_dev_add(tgt_type, &dev_id, 2, BUFFERS);
+	if (ret != T_SETUP_OK) {
+		fprintf(stderr, "buffer reg failed: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	switch ((pid = fork())) {
+	case -1:
+		fprintf(stderr, "fork failed\n");
+		return T_EXIT_FAIL;
+	case 0:
+		/* io error is expected since the parent is killing ublk */
+		ignore_stderr();
+		test_io_worker(dev_id);
+		return 0;
+	default:
+		/*
+		 * Wait a little while until ublk IO pipeline is warm up,
+		 * then try to shutdown ublk device by `kill -9 $ublk_daemon_pid`.
+		 *
+		 * cancellable uring_cmd code path can be covered in this way.
+		 */
+		usleep(wait_ms * 1000);
+		ret = cmd_dev_del_by_kill(dev_id);
+		waitpid(pid, NULL, 0);
+		return ret;
+	}
+}
+
+int main(int argc, char *argv[])
+{
+	const int nr_loop = 4;
+	struct ublk_dev *dev;
+	__u64 features;
+	int ret, i;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	dev = ublk_ctrl_init();
+	/* ublk isn't supported or the module isn't loaded */
+	if (!dev)
+		return T_EXIT_SKIP;
+
+	/* kernel doesn't support get_features */
+	ret = ublk_ctrl_get_features(dev, &features);
+	if (ret < 0)
+		return T_EXIT_SKIP;
+
+	if (!(features & UBLK_F_CMD_IOCTL_ENCODE))
+		return T_EXIT_SKIP;
+
+	for (i = 0; i < nr_loop; i++) {
+		if (test_del_ublk_with_io())
+			return T_EXIT_FAIL;
+	}
+	ublk_ctrl_deinit(dev);
+	return T_EXIT_PASS;
+}
+#else
+int main(int argc, char *argv[])
+{
+	return T_EXIT_SKIP;
+}
+#endif

diff --git a/test/version.c b/test/version.c
new file mode 100644
index 0000000..57991ce
--- /dev/null
+++ b/test/version.c

@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: check version macros and runtime checks work
+ *
+ */
+#include "liburing.h"
+#include "helpers.h"
+
+int main(int argc, char *argv[])
+{
+	if (IO_URING_CHECK_VERSION(io_uring_major_version(), io_uring_minor_version()))
+		return T_EXIT_FAIL;
+
+	if (io_uring_major_version() != IO_URING_VERSION_MAJOR)
+		return T_EXIT_FAIL;
+
+	if (io_uring_minor_version() != IO_URING_VERSION_MINOR)
+		return T_EXIT_FAIL;
+
+#if IO_URING_CHECK_VERSION(IO_URING_VERSION_MAJOR, IO_URING_VERSION_MINOR)
+	return T_EXIT_FAIL;
+#endif
+
+	return T_EXIT_PASS;
+}

diff --git a/test/wait-timeout.c b/test/wait-timeout.c
new file mode 100644
index 0000000..f35fbb5
--- /dev/null
+++ b/test/wait-timeout.c

@@ -0,0 +1,287 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: run various timeout tests
+ *
+ */
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <assert.h>
+
+#include "helpers.h"
+#include "liburing.h"
+#include "../src/syscall.h"
+
+#define IO_NSEC_PER_SEC			1000000000ULL
+
+static bool support_abs = false;
+static bool support_clock = false;
+
+static unsigned long long timespec_to_ns(struct timespec *ts)
+{
+	return ts->tv_nsec + ts->tv_sec * IO_NSEC_PER_SEC;
+}
+static struct timespec ns_to_timespec(unsigned long long t)
+{
+	struct timespec ts;
+
+	ts.tv_sec = t / IO_NSEC_PER_SEC;
+	ts.tv_nsec = t - ts.tv_sec * IO_NSEC_PER_SEC;
+	return ts;
+}
+
+static long long ns_since(struct timespec *ts)
+{
+	struct timespec now;
+	int ret;
+
+	ret = clock_gettime(CLOCK_MONOTONIC, &now);
+	if (ret) {
+		fprintf(stderr, "clock_gettime failed\n");
+		exit(T_EXIT_FAIL);
+	}
+
+	return timespec_to_ns(&now) - timespec_to_ns(ts);
+
+}
+
+static int t_io_uring_wait(struct io_uring *ring, int nr, unsigned enter_flags,
+			   struct timespec *ts)
+{
+	struct __kernel_timespec kts = {
+		.tv_sec = ts->tv_sec,
+		.tv_nsec = ts->tv_nsec
+	};
+	struct io_uring_getevents_arg arg = {
+		.sigmask	= 0,
+		.sigmask_sz	= _NSIG / 8,
+		.ts		= (unsigned long) &kts
+	};
+	int ret;
+
+	enter_flags |= IORING_ENTER_GETEVENTS | IORING_ENTER_EXT_ARG;
+	ret = io_uring_enter2(ring->ring_fd, 0, nr, enter_flags,
+			      (void *)&arg, sizeof(arg));
+	return ret;
+}
+
+static int probe_timers(void)
+{
+	struct io_uring_clock_register cr = { .clockid = CLOCK_MONOTONIC, };
+	struct io_uring ring;
+	struct timespec ts;
+	int ret;
+
+	ret = io_uring_queue_init(8, &ring, 0);
+	if (ret) {
+		fprintf(stderr, "probe ring setup failed: %d\n", ret);
+		return ret;
+	}
+
+	ret = clock_gettime(CLOCK_MONOTONIC, &ts);
+	if (ret) {
+		fprintf(stderr, "clock_gettime failed\n");
+		return ret;
+	}
+
+	ret = t_io_uring_wait(&ring, 0, IORING_ENTER_ABS_TIMER, &ts);
+	if (!ret) {
+		support_abs = true;
+	} else if (ret != -EINVAL) {
+		fprintf(stderr, "wait failed %i\n", ret);
+		return ret;
+	}
+
+	ret = io_uring_register_clock(&ring, &cr);
+	if (!ret) {
+		support_clock = true;
+	} else if (ret != -EINVAL) {
+		fprintf(stderr, "io_uring_register_clock %i\n", ret);
+		return ret;
+	}
+
+	io_uring_queue_exit(&ring);
+	return 0;
+}
+
+static int test_timeout(bool abs, bool set_clock)
+{
+	unsigned enter_flags = abs ? IORING_ENTER_ABS_TIMER : 0;
+	struct io_uring ring;
+	struct timespec start, end, ts;
+	long long dt;
+	int ret;
+
+	ret = io_uring_queue_init(8, &ring, 0);
+	if (ret) {
+		fprintf(stderr, "ring setup failed: %d\n", ret);
+		return 1;
+	}
+
+	if (set_clock) {
+		struct io_uring_clock_register cr = {};
+
+		cr.clockid = CLOCK_BOOTTIME;
+		ret = io_uring_register_clock(&ring, &cr);
+		if (ret) {
+			fprintf(stderr, "io_uring_register_clock failed\n");
+			return 1;
+		}
+	}
+
+	/* pass current time */
+	ret = clock_gettime(CLOCK_MONOTONIC, &start);
+	assert(ret == 0);
+
+	ts = abs ? start : ns_to_timespec(0);
+	ret = t_io_uring_wait(&ring, 1, enter_flags, &ts);
+	if (ret != -ETIME) {
+		fprintf(stderr, "wait current time failed, %i\n", ret);
+		return 1;
+	}
+
+	if (ns_since(&start) >= IO_NSEC_PER_SEC) {
+		fprintf(stderr, "current time test failed\n");
+		return 1;
+	}
+
+	if (abs) {
+		/* expired time */
+		ret = clock_gettime(CLOCK_MONOTONIC, &start);
+		assert(ret == 0);
+		ts = ns_to_timespec(timespec_to_ns(&start) - IO_NSEC_PER_SEC);
+
+		ret = t_io_uring_wait(&ring, 1, enter_flags, &ts);
+		if (ret != -ETIME) {
+			fprintf(stderr, "expired timeout wait failed, %i\n", ret);
+			return 1;
+		}
+
+		ret = clock_gettime(CLOCK_MONOTONIC, &end);
+		assert(ret == 0);
+
+		if (ns_since(&start) >= IO_NSEC_PER_SEC) {
+			fprintf(stderr, "expired timer test failed\n");
+			return 1;
+		}
+	}
+
+	/* 1s wait */
+	ret = clock_gettime(CLOCK_MONOTONIC, &start);
+	assert(ret == 0);
+
+	dt = 2 * IO_NSEC_PER_SEC + (abs ? timespec_to_ns(&start) : 0);
+	ts = ns_to_timespec(dt);
+	ret = t_io_uring_wait(&ring, 1, enter_flags, &ts);
+	if (ret != -ETIME) {
+		fprintf(stderr, "wait timeout failed, %i\n", ret);
+		return 1;
+	}
+
+	dt = ns_since(&start);
+	if (dt < IO_NSEC_PER_SEC || dt > 3 * IO_NSEC_PER_SEC) {
+		fprintf(stderr, "early wake up, %lld\n", dt);
+		return 1;
+	}
+	return 0;
+}
+
+static int test_clock_setup(void)
+{
+	struct io_uring ring;
+	struct io_uring_clock_register cr = {};
+	int ret;
+
+	ret = io_uring_queue_init(8, &ring, 0);
+	if (ret) {
+		fprintf(stderr, "ring setup failed: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	ret = __sys_io_uring_register(ring.ring_fd, IORING_REGISTER_CLOCK, NULL, 0);
+	if (!ret) {
+		fprintf(stderr, "invalid null clock registration %i\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	cr.clockid = -1;
+	ret = __sys_io_uring_register(ring.ring_fd, IORING_REGISTER_CLOCK, &cr, 0);
+	if (ret != -EINVAL) {
+		fprintf(stderr, "invalid clockid registration %i\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	cr.clockid = CLOCK_MONOTONIC;
+	ret = __sys_io_uring_register(ring.ring_fd, IORING_REGISTER_CLOCK, &cr, 0);
+	if (ret) {
+		fprintf(stderr, "clock monotonic registration failed %i\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	cr.clockid = CLOCK_BOOTTIME;
+	ret = __sys_io_uring_register(ring.ring_fd, IORING_REGISTER_CLOCK, &cr, 0);
+	if (ret) {
+		fprintf(stderr, "clock boottime registration failed %i\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	cr.clockid = CLOCK_MONOTONIC;
+	ret = __sys_io_uring_register(ring.ring_fd, IORING_REGISTER_CLOCK, &cr, 0);
+	if (ret) {
+		fprintf(stderr, "2nd clock monotonic registration failed %i\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	io_uring_queue_exit(&ring);
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	int ret, i;
+
+	if (argc > 1)
+		return 0;
+
+	ret = probe_timers();
+	if (ret) {
+		fprintf(stderr, "probe failed\n");
+		return T_EXIT_FAIL;
+	}
+	if (!support_abs && !support_clock)
+		return T_EXIT_SKIP;
+
+	if (support_clock) {
+		ret = test_clock_setup();
+		if (ret) {
+			fprintf(stderr, "test_clock_setup failed\n");
+			return T_EXIT_FAIL;
+		}
+	}
+
+	for (i = 0; i < 4; i++) {
+		bool abs = i & 1;
+		bool clock = i & 2;
+
+		if (abs && !support_abs)
+			continue;
+		if (clock && !support_clock)
+			continue;
+
+		ret = test_timeout(abs, clock);
+		if (ret) {
+			fprintf(stderr, "test_timeout failed %i %i\n",
+					abs, clock);
+			return ret;
+		}
+	}
+
+	return 0;
+}

diff --git a/test/waitid.c b/test/waitid.c
new file mode 100644
index 0000000..54e187f
--- /dev/null
+++ b/test/waitid.c

@@ -0,0 +1,427 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: test waitid functionality
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+static bool no_waitid;
+
+static void child(long usleep_time)
+{
+	if (usleep_time)
+		usleep(usleep_time);
+	exit(0);
+}
+
+static int test_invalid_infop(struct io_uring *ring)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	siginfo_t *si = (siginfo_t *) (uintptr_t) 0x1234;
+	int ret, w;
+	pid_t pid;
+
+	pid = fork();
+	if (!pid) {
+		child(200000);
+		exit(0);
+	}
+
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_waitid(sqe, P_PID, pid, si, WEXITED, 0);
+	sqe->user_data = 1;
+	io_uring_submit(ring);
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret) {
+		fprintf(stderr, "cqe wait: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+	if (cqe->res != -EFAULT) {
+		fprintf(stderr, "Bad return on invalid infop: %d\n", cqe->res);
+		return T_EXIT_FAIL;
+	}
+	io_uring_cqe_seen(ring, cqe);
+	wait(&w);
+	return T_EXIT_PASS;
+}
+
+/*
+ * Test linked timeout with child not exiting in time
+ */
+static int test_noexit(struct io_uring *ring)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct __kernel_timespec ts;
+	int ret, i, w;
+	siginfo_t si;
+	pid_t pid;
+
+	pid = fork();
+	if (!pid) {
+		child(200000);
+		exit(0);
+	}
+
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_waitid(sqe, P_PID, pid, &si, WEXITED, 0);
+	sqe->flags |= IOSQE_IO_LINK;
+	sqe->user_data = 1;
+
+	ts.tv_sec = 0;
+	ts.tv_nsec = 100 * 1000 * 1000ULL;
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_link_timeout(sqe, &ts, 0);
+	sqe->user_data = 2;
+
+	io_uring_submit(ring);
+
+	for (i = 0; i < 2; i++) {
+		ret = io_uring_wait_cqe(ring, &cqe);
+		if (ret) {
+			fprintf(stderr, "cqe wait: %d\n", ret);
+			return T_EXIT_FAIL;
+		}
+		if (cqe->user_data == 2 && cqe->res != 1) {
+			fprintf(stderr, "timeout res: %d\n", cqe->res);
+			return T_EXIT_FAIL;
+		}
+		if (cqe->user_data == 1 && cqe->res != -ECANCELED) {
+			fprintf(stderr, "waitid res: %d\n", cqe->res);
+			return T_EXIT_FAIL;
+		}
+		io_uring_cqe_seen(ring, cqe);
+	}
+
+	wait(&w);
+	return T_EXIT_PASS;
+}
+
+/*
+ * Test one child exiting, but not the one we were looking for
+ */
+static int test_double(struct io_uring *ring)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	siginfo_t si;
+	pid_t p1, p2;
+	int ret, w;
+
+	/* p1 will exit shortly */
+	p1 = fork();
+	if (!p1) {
+		child(100000);
+		exit(0);
+	}
+
+	/* p2 will linger */
+	p2 = fork();
+	if (!p2) {
+		child(200000);
+		exit(0);
+	}
+
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_waitid(sqe, P_PID, p2, &si, WEXITED, 0);
+
+	io_uring_submit(ring);
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret) {
+		fprintf(stderr, "cqe wait: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	if (cqe->res < 0) {
+		fprintf(stderr, "cqe res: %d\n", cqe->res);
+		return T_EXIT_FAIL;
+	}
+	if (si.si_pid != p2) {
+		fprintf(stderr, "expected pid %d, got %d\n", p2, si.si_pid);
+		return T_EXIT_FAIL;
+	}
+
+	io_uring_cqe_seen(ring, cqe);
+	wait(&w);
+	return T_EXIT_PASS;
+}
+
+/*
+ * Test reaping of an already exited task
+ */
+static int test_ready(struct io_uring *ring)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	siginfo_t si;
+	pid_t pid;
+	int ret;
+
+	pid = fork();
+	if (!pid) {
+		child(0);
+		exit(0);
+	}
+
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_waitid(sqe, P_PID, pid, &si, WEXITED, 0);
+
+	io_uring_submit(ring);
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret) {
+		fprintf(stderr, "cqe wait: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	if (cqe->res < 0) {
+		fprintf(stderr, "cqe res: %d\n", cqe->res);
+		return T_EXIT_FAIL;
+	}
+	if (si.si_pid != pid) {
+		fprintf(stderr, "expected pid %d, got %d\n", pid, si.si_pid);
+		return T_EXIT_FAIL;
+	}
+
+	io_uring_cqe_seen(ring, cqe);
+	return T_EXIT_PASS;
+}
+
+/*
+ * Test cancelation of pending waitid
+ */
+static int test_cancel(struct io_uring *ring)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	int ret, i, w;
+	pid_t pid;
+
+	pid = fork();
+	if (!pid) {
+		child(20000);
+		exit(0);
+	}
+
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_waitid(sqe, P_PID, pid, NULL, WEXITED, 0);
+	sqe->user_data = 1;
+
+	io_uring_submit(ring);
+
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_cancel64(sqe, 1, 0);
+	sqe->user_data = 2;
+
+	io_uring_submit(ring);
+
+	for (i = 0; i < 2; i++) {
+		ret = io_uring_wait_cqe(ring, &cqe);
+		if (ret) {
+			fprintf(stderr, "cqe wait: %d\n", ret);
+			return T_EXIT_FAIL;
+		}
+		if (cqe->user_data == 1 && cqe->res != -ECANCELED) {
+			fprintf(stderr, "cqe res: %d\n", cqe->res);
+			return T_EXIT_FAIL;
+		}
+		if (cqe->user_data == 2 && cqe->res != 1) {
+			fprintf(stderr, "cqe res: %d\n", cqe->res);
+			return T_EXIT_FAIL;
+		}
+		io_uring_cqe_seen(ring, cqe);
+	}
+
+	wait(&w);
+	return T_EXIT_PASS;
+}
+
+/*
+ * Test cancelation of pending waitid, with expected races that either
+ * waitid trigger or cancelation will win.
+ */
+static int test_cancel_race(struct io_uring *ring, int async)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	int ret, i, to_wait, total_forks;
+	pid_t pid;
+
+	total_forks = 0;
+	for (i = 0; i < 10; i++) {
+		total_forks++;
+		pid = fork();
+		if (!pid) {
+			child(getpid() & 1);
+			exit(0);
+		}
+	}
+
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_waitid(sqe, P_ALL, -1, NULL, WEXITED, 0);
+	if (async)
+		sqe->flags |= IOSQE_ASYNC;
+	sqe->user_data = 1;
+
+	io_uring_submit(ring);
+
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_cancel64(sqe, 1, 0);
+	sqe->user_data = 2;
+
+	usleep(1);
+
+	io_uring_submit(ring);
+
+	to_wait = total_forks;
+	for (i = 0; i < 2; i++) {
+		ret = io_uring_wait_cqe(ring, &cqe);
+		if (ret) {
+			fprintf(stderr, "cqe wait: %d\n", ret);
+			return T_EXIT_FAIL;
+		}
+		if (cqe->user_data == 1) {
+			if (!cqe->res)
+				to_wait--;
+			if (!(cqe->res == -ECANCELED || cqe->res == 0)) {
+				fprintf(stderr, "cqe1 res: %d\n", cqe->res);
+				return T_EXIT_FAIL;
+			}
+		}
+		if (cqe->user_data == 2 &&
+		    !(cqe->res == 1 || cqe->res == 0 || cqe->res == -ENOENT ||
+		      cqe->res == -EALREADY)) {
+			fprintf(stderr, "cqe2 res: %d\n", cqe->res);
+			return T_EXIT_FAIL;
+		}
+		io_uring_cqe_seen(ring, cqe);
+	}
+
+	for (i = 0; i < to_wait; i++) {
+		int w;
+
+		wait(&w);
+	}
+
+	return T_EXIT_PASS;
+}
+
+/*
+ * Test basic reap of child exit
+ */
+static int test(struct io_uring *ring)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	siginfo_t si;
+	pid_t pid;
+	int ret;
+
+	pid = fork();
+	if (!pid) {
+		child(100);
+		exit(0);
+	}
+
+	sqe = io_uring_get_sqe(ring);
+	io_uring_prep_waitid(sqe, P_PID, pid, &si, WEXITED, 0);
+
+	io_uring_submit(ring);
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret) {
+		fprintf(stderr, "cqe wait: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	/* no waitid support */
+	if (cqe->res == -EINVAL) {
+		no_waitid = true;
+		return T_EXIT_SKIP;
+	}
+	if (cqe->res < 0) {
+		fprintf(stderr, "cqe res: %d\n", cqe->res);
+		return T_EXIT_FAIL;
+	}
+	if (si.si_pid != pid) {
+		fprintf(stderr, "expected pid %d, got %d\n", pid, si.si_pid);
+		return T_EXIT_FAIL;
+	}
+
+	io_uring_cqe_seen(ring, cqe);
+	return T_EXIT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	struct io_uring ring;
+	int ret, i;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	io_uring_queue_init(8, &ring, 0);
+
+	ret = test(&ring);
+	if (ret == T_EXIT_FAIL) {
+		fprintf(stderr, "test failed\n");
+		return T_EXIT_FAIL;
+	}
+	if (no_waitid)
+		return T_EXIT_SKIP;
+
+	ret = test_noexit(&ring);
+	if (ret == T_EXIT_FAIL) {
+		fprintf(stderr, "test_noexit failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_noexit(&ring);
+	if (ret == T_EXIT_FAIL) {
+		fprintf(stderr, "test_noexit failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_double(&ring);
+	if (ret == T_EXIT_FAIL) {
+		fprintf(stderr, "test_double failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_ready(&ring);
+	if (ret == T_EXIT_FAIL) {
+		fprintf(stderr, "test_ready failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_cancel(&ring);
+	if (ret == T_EXIT_FAIL) {
+		fprintf(stderr, "test_cancel failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test_invalid_infop(&ring);
+	if (ret == T_EXIT_FAIL) {
+		fprintf(stderr, "test_invalid_infop failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	for (i = 0; i < 1000; i++) {
+		ret = test_cancel_race(&ring, i & 1);
+		if (ret == T_EXIT_FAIL) {
+			fprintf(stderr, "test_cancel_race failed\n");
+			return T_EXIT_FAIL;
+		}
+	}
+
+	io_uring_queue_exit(&ring);
+	return T_EXIT_PASS;
+}

diff --git a/test/wakeup-hang.c b/test/wakeup-hang.c
index e43cb34..39d9164 100644
--- a/test/wakeup-hang.c
+++ b/test/wakeup-hang.c

@@ -5,10 +5,10 @@
 #include <stdlib.h>
 #include <string.h>
 #include <pthread.h>
-#include <liburing.h>
 #include <fcntl.h>
 #include <poll.h>
 #include <sys/time.h>
+#include "liburing.h"
 
 struct thread_data {
 	struct io_uring *ring;

diff --git a/test/wq-aff.c b/test/wq-aff.c
new file mode 100644
index 0000000..91b94ad
--- /dev/null
+++ b/test/wq-aff.c

@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: test that io-wq affinity is correctly set for SQPOLL
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+#define IOWQ_CPU	0
+#define SQPOLL_CPU	1
+
+static int verify_comm(pid_t pid, const char *name, int cpu)
+{
+	char comm[64], buf[64];
+	cpu_set_t set;
+	int fd, ret;
+
+	sprintf(comm, "/proc/%d/comm", pid);
+	fd = open(comm, O_RDONLY);
+	if (fd < 0) {
+		perror("open");
+		return T_EXIT_SKIP;
+	}
+
+	ret = read(fd, buf, sizeof(buf));
+	if (ret < 0) {
+		close(fd);
+		return T_EXIT_SKIP;
+	}
+
+	if (strncmp(buf, name, strlen(name) - 1)) {
+		close(fd);
+		return T_EXIT_SKIP;
+	}
+
+	close(fd);
+
+	ret = sched_getaffinity(pid, sizeof(set), &set);
+	if (ret < 0) {
+		perror("sched_getaffinity");
+		return T_EXIT_SKIP;
+	}
+
+	if (CPU_COUNT(&set) != 1) {
+		fprintf(stderr, "More than one CPU set in mask\n");
+		return T_EXIT_FAIL;
+	}
+	if (!CPU_ISSET(cpu, &set)) {
+		fprintf(stderr, "Wrong CPU set in mask\n");
+		return T_EXIT_FAIL;
+	}
+
+	return T_EXIT_PASS;
+}
+
+static int verify_affinity(pid_t pid, int sqpoll)
+{
+	pid_t wq_pid, sqpoll_pid = -1;
+	char name[64];
+	int ret;
+
+	wq_pid = pid + 2;
+	if (sqpoll)
+		sqpoll_pid = pid + 1;
+
+	/* verify we had the pids right */
+	sprintf(name, "iou-wrk-%d", pid);
+	ret = verify_comm(wq_pid, name, IOWQ_CPU);
+	if (ret != T_EXIT_PASS)
+		return ret;
+
+	if (sqpoll_pid != -1) {
+		sprintf(name, "iou-sqp-%d", pid);
+		ret = verify_comm(sqpoll_pid, name, SQPOLL_CPU);
+		if (ret != T_EXIT_PASS)
+			return ret;
+	}
+
+	return T_EXIT_PASS;
+}
+
+static int test(int sqpoll)
+{
+	struct io_uring_params p = { };
+	struct io_uring ring;
+	struct io_uring_sqe *sqe;
+	char buf[64];
+	int fds[2], ret;
+	cpu_set_t set;
+
+	if (sqpoll) {
+		p.flags = IORING_SETUP_SQPOLL | IORING_SETUP_SQ_AFF;
+		p.sq_thread_cpu = SQPOLL_CPU;
+	}
+
+	io_uring_queue_init_params(8, &ring, &p);
+
+	CPU_ZERO(&set);
+	CPU_SET(IOWQ_CPU, &set);
+
+	ret = io_uring_register_iowq_aff(&ring, sizeof(set), &set);
+	if (ret) {
+		fprintf(stderr, "register aff: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	if (pipe(fds) < 0) {
+		perror("pipe");
+		return T_EXIT_FAIL;
+	}
+
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_read(sqe, fds[0], buf, sizeof(buf), 0);
+	sqe->flags |= IOSQE_ASYNC;
+
+	io_uring_submit(&ring);
+
+	usleep(10000);
+
+	ret = verify_affinity(getpid(), sqpoll);
+	io_uring_queue_exit(&ring);
+	return ret;
+}
+
+static int test_invalid_cpu(void)
+{
+	struct io_uring_params p = { };
+	struct io_uring ring;
+	int ret, nr_cpus;
+
+	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+	if (nr_cpus < 0) {
+		perror("sysconf(_SC_NPROCESSORS_ONLN");
+		return T_EXIT_SKIP;
+	}
+
+	p.flags = IORING_SETUP_SQPOLL | IORING_SETUP_SQ_AFF;
+	p.sq_thread_cpu = 16 * nr_cpus;
+
+	ret = io_uring_queue_init_params(8, &ring, &p);
+	if (ret == -EPERM) {
+		return T_EXIT_SKIP;
+	} else if (ret != -EINVAL) {
+		fprintf(stderr, "Queue init: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	io_uring_queue_exit(&ring);
+	return T_EXIT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	int ret;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	ret = test_invalid_cpu();
+	if (ret == T_EXIT_SKIP) {
+		return T_EXIT_SKIP;
+	} else if (ret != T_EXIT_PASS) {
+		fprintf(stderr, "test sqpoll cpu failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	ret = test(1);
+	if (ret == T_EXIT_SKIP) {
+		return T_EXIT_SKIP;
+	} else if (ret != T_EXIT_PASS) {
+		fprintf(stderr, "test sqpoll failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	return T_EXIT_PASS;
+}

diff --git a/test/xattr.c b/test/xattr.c
index d88059c..6448e06 100644
--- a/test/xattr.c
+++ b/test/xattr.c

@@ -51,10 +51,11 @@
 	}
 
 	ret = cqe->res;
-	if (ret == -EINVAL)
-		no_xattr = 1;
+	if (ret < 0) {
+		if (cqe->res == -EINVAL || cqe->res == -EOPNOTSUPP)
+			no_xattr = 1;
+	}
 	io_uring_cqe_seen(ring, cqe);
-
 	return ret;
 }
 
@@ -126,8 +127,11 @@
 	}
 
 	ret = cqe->res;
+	if (ret < 0) {
+		if (ret == -EINVAL || ret == -EOPNOTSUPP)
+			no_xattr = 1;
+	}
 	io_uring_cqe_seen(ring, cqe);
-
 	return ret;
 }
 
@@ -210,14 +214,14 @@
 
 	/* Test reading attributes. */
 	value_len = io_uring_fgetxattr(&ring, fd, KEY1, value, XATTR_SIZE);
-	if (value_len != strlen(value) || strncmp(value, VALUE1, value_len)) {
+	if (value_len != strlen(VALUE1) || strncmp(value, VALUE1, value_len)) {
 		fprintf(stderr, "Error: fgetxattr expected value: %s, returned value: %s\n", VALUE1, value);
 		rc = -1;
 		goto Exit;
 	}
 
 	value_len = io_uring_fgetxattr(&ring, fd, KEY2, value, XATTR_SIZE);
-	if (value_len != strlen(value)|| strncmp(value, VALUE2, value_len)) {
+	if (value_len != strlen(VALUE2) || strncmp(value, VALUE2, value_len)) {
 		fprintf(stderr, "Error: fgetxattr expected value: %s, returned value: %s\n", VALUE2, value);
 		rc = -1;
 		goto Exit;
@@ -290,7 +294,6 @@
 /* Test driver for failure cases of fsetxattr and fgetxattr. */
 static int test_failure_fxattr(void)
 {
-	int rc = 0;
 	struct io_uring ring;
 	char value[XATTR_SIZE];
 
@@ -309,31 +312,36 @@
 	}
 
 	/* Test writing attributes. */
-	assert(io_uring_fsetxattr(&ring, -1, KEY1, VALUE1, strlen(VALUE1), 0) < 0);
-	assert(io_uring_fsetxattr(&ring, fd, NULL, VALUE1, strlen(VALUE1), 0) < 0);
-	assert(io_uring_fsetxattr(&ring, fd, KEY1, NULL,   strlen(VALUE1), 0) < 0);
-	assert(io_uring_fsetxattr(&ring, fd, KEY1, VALUE1, 0,              0) == 0);
-	assert(io_uring_fsetxattr(&ring, fd, KEY1, VALUE1, -1,             0) < 0);
+	if (io_uring_fsetxattr(&ring, -1, KEY1, VALUE1, strlen(VALUE1), 0) >= 0)
+		return 1;
+	if (io_uring_fsetxattr(&ring, fd, NULL, VALUE1, strlen(VALUE1), 0) >= 0)
+		return 1;
+	if (io_uring_fsetxattr(&ring, fd, KEY1, NULL,   strlen(VALUE1), 0) >= 0)
+		return 1;
+	if (io_uring_fsetxattr(&ring, fd, KEY1, VALUE1, 0, 0) != 0)
+		return 1;
+	if (io_uring_fsetxattr(&ring, fd, KEY1, VALUE1, -1, 0) >= 0)
+		return 1;
 
 	/* Test reading attributes. */
-	assert(io_uring_fgetxattr(&ring, -1, KEY1, value, XATTR_SIZE) < 0);
-	assert(io_uring_fgetxattr(&ring, fd, NULL, value, XATTR_SIZE) < 0);
-	assert(io_uring_fgetxattr(&ring, fd, KEY1, value, 0)          == 0);
+	if (io_uring_fgetxattr(&ring, -1, KEY1, value, XATTR_SIZE) >= 0)
+		return 1;
+	if (io_uring_fgetxattr(&ring, fd, NULL, value, XATTR_SIZE) >= 0)
+		return 1;
+	if (io_uring_fgetxattr(&ring, fd, KEY1, value, 0) != 0)
+		return 1;
 
 	/* Cleanup. */
 	close(fd);
 	unlink(FILENAME);
-
 	io_uring_queue_exit(&ring);
-
-	return rc;
+	return 0;
 }
 
 
 /* Test driver for failure cases for setxattr and getxattr. */
 static int test_failure_xattr(void)
 {
-	int rc = 0;
 	struct io_uring ring;
 	char value[XATTR_SIZE];
 
@@ -348,24 +356,33 @@
 	t_create_file(FILENAME, 0);
 
 	/* Test writing attributes. */
-	assert(io_uring_setxattr(&ring, "complete garbage", KEY1, VALUE1, strlen(VALUE1), 0) < 0);
-	assert(io_uring_setxattr(&ring, NULL,     KEY1, VALUE1, strlen(VALUE1), 0) < 0);
-	assert(io_uring_setxattr(&ring, FILENAME, NULL, VALUE1, strlen(VALUE1), 0) < 0);
-	assert(io_uring_setxattr(&ring, FILENAME, KEY1, NULL,   strlen(VALUE1), 0) < 0);
-	assert(io_uring_setxattr(&ring, FILENAME, KEY1, VALUE1, 0,              0) == 0);
+	if (io_uring_setxattr(&ring, "complete garbage", KEY1, VALUE1, strlen(VALUE1), 0) >= 0)
+		return 1;
+	if (io_uring_setxattr(&ring, NULL,     KEY1, VALUE1, strlen(VALUE1), 0) >= 0)
+		return 1;
+	if (io_uring_setxattr(&ring, FILENAME, NULL, VALUE1, strlen(VALUE1), 0) >= 0)
+		return 1;
+	if (io_uring_setxattr(&ring, FILENAME, KEY1, NULL,   strlen(VALUE1), 0) >= 0)
+		return 1;
+	if (io_uring_setxattr(&ring, FILENAME, KEY1, VALUE1, 0, 0) != 0)
+		return 1;
 
 	/* Test reading attributes. */
-	assert(io_uring_getxattr(&ring, "complete garbage", KEY1, value, XATTR_SIZE) < 0);
-	assert(io_uring_getxattr(&ring, NULL,     KEY1, value, XATTR_SIZE) < 0);
-	assert(io_uring_getxattr(&ring, FILENAME, NULL, value, XATTR_SIZE) < 0);
-	assert(io_uring_getxattr(&ring, FILENAME, KEY1, NULL,  XATTR_SIZE) == 0);
-	assert(io_uring_getxattr(&ring, FILENAME, KEY1, value, 0)          == 0);
+	if (io_uring_getxattr(&ring, "complete garbage", KEY1, value, XATTR_SIZE) >= 0)
+		return 1;
+	if (io_uring_getxattr(&ring, NULL,     KEY1, value, XATTR_SIZE) >= 0)
+		return 1;
+	if (io_uring_getxattr(&ring, FILENAME, NULL, value, XATTR_SIZE) >= 0)
+		return 1;
+	if (io_uring_getxattr(&ring, FILENAME, KEY1, NULL,  XATTR_SIZE) != 0)
+		return 1;
+	if (io_uring_getxattr(&ring, FILENAME, KEY1, value, 0) != 0)
+		return 1;
 
 	/* Cleanup. */
 	io_uring_queue_exit(&ring);
 	unlink(FILENAME);
-
-	return rc;
+	return 0;
 }
 
 /* Test for invalid SQE, this will cause a segmentation fault if enabled. */

diff --git a/test/xfail_prep_link_timeout_out_of_scope.c b/test/xfail_prep_link_timeout_out_of_scope.c
new file mode 100644
index 0000000..0de7948
--- /dev/null
+++ b/test/xfail_prep_link_timeout_out_of_scope.c

@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: Check to see if the asan checks catch an stack-use-after-free for prep_link_timeout
+ */
+
+#include <stdio.h>
+#include <errno.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <assert.h>
+#include "liburing.h"
+#include "helpers.h"
+
+#include <stdio.h>
+
+int main(int argc, char *argv[])
+{
+	struct io_uring ring;
+	struct io_uring_sqe *sqe;
+	int ret;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	ret = io_uring_queue_init(8, &ring, 0);
+	if (ret < 0) {
+		printf("io_uring_queue_init ret %i\n", ret);
+		return T_EXIT_PASS; // this test expects an inverted exit code
+	}
+
+	// force timespec to go out of scope, test "passes" if asan catches this bug.
+	{
+		struct __kernel_timespec timespec;
+		timespec.tv_sec = 0;
+		timespec.tv_nsec = 5000;
+
+		sqe = io_uring_get_sqe(&ring);
+		io_uring_prep_timeout(sqe, &timespec, 0, 0);
+		io_uring_sqe_set_data(sqe, (void *) 1);
+	}
+
+	ret = io_uring_submit_and_wait(&ring, 1);
+	printf("submit_and_wait %i\n", ret);
+
+	return T_EXIT_PASS; // this test expects an inverted exit code
+}

diff --git a/test/xfail_register_buffers_out_of_scope.c b/test/xfail_register_buffers_out_of_scope.c
new file mode 100644
index 0000000..cbb4ca3
--- /dev/null
+++ b/test/xfail_register_buffers_out_of_scope.c

@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: Check to see if the asan checks catch an stack-use-after-free for io_uring_sqe_set_data
+ */
+
+#include <stdio.h>
+#include <errno.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <assert.h>
+#include <stdlib.h>
+#include "liburing.h"
+#include "helpers.h"
+
+#include <stdio.h>
+
+#define BUFFERS     8
+#define BUFFER_SIZE 128
+
+int main(int argc, char *argv[])
+{
+	struct io_uring ring;
+	struct iovec *iovs;
+	int i;
+	int ret;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	ret = io_uring_queue_init(8, &ring, 0);
+	if (ret < 0) {
+		printf("io_uring_queue_init ret %i\n", ret);
+		return T_EXIT_PASS; // this test expects an inverted exit code
+	}
+
+	iovs = calloc(BUFFERS, sizeof(struct iovec));
+	for (i = 0; i < BUFFERS; i++) {
+		iovs[i].iov_base = malloc(BUFFER_SIZE);
+		iovs[i].iov_len = BUFFER_SIZE;
+	}
+	// force one iov_base to be freed, test "passes" if asan catches this bug.
+	free(iovs[4].iov_base);
+
+	ret = io_uring_register_buffers(&ring, iovs, BUFFERS);
+	printf("io_uring_register_buffers %i\n", ret);
+
+	ret = io_uring_submit_and_wait(&ring, 1);
+	printf("submit_and_wait %i\n", ret);
+
+	return T_EXIT_PASS; // this test expects an inverted exit code
+}
commit	68fd8799fc9254e9b15a9dba15b27c840a9d1b6d	[log] [tgz]
author	Akilesh Kailash <[email protected]>	Thu Dec 12 13:06:45 2024 -0800
committer	Automerger Merge Worker <[email protected]>	Thu Dec 12 13:06:45 2024 -0800
tree	42465b51c425a6342b3f12f8edbd239c827068f0
parent	cc78841c5b686103ee7fc42ba376cc399861a794 [diff]
parent	42074fb7717ac6767cd1b8c1c7cbe3aa77411824 [diff]