Merge changes If8668073,I92038175 into main

* changes:
  Merge tag '8.0.0' into AOSP
  Update version to 8.0.0.
diff --git a/README.md b/README.md
index c1a0a7d..f114ac6 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
-VIXL: ARMv8 Runtime Code Generation Library
-===========================================
+VIXL: ARMv8 Runtime Code Generation Library 8.0.0
+=================================================
 
 Contents:
 
@@ -48,7 +48,7 @@
 
 To build VIXL the following software is required:
 
- 1. Python 2.7
+ 1. Python 3.5+
  2. SCons 2.0
  3. GCC 4.8+ or Clang 4.0+
 
@@ -61,8 +61,8 @@
 
  1. Git
  2. [Google's `cpplint.py`][cpplint]
- 3. clang-format-4.0
- 4. clang-tidy-4.0
+ 3. clang-format 11+
+ 4. clang-tidy 11+
 
 Refer to the 'Usage' section for details.
 
diff --git a/SConstruct b/SConstruct
index bb8638c..b855d64 100644
--- a/SConstruct
+++ b/SConstruct
@@ -98,7 +98,9 @@
       'CCFLAGS' : ['-O3'],
       },
     'simulator:aarch64' : {
-      'CCFLAGS' : ['-DVIXL_INCLUDE_SIMULATOR_AARCH64'],
+      'CCFLAGS' : ['-DVIXL_INCLUDE_SIMULATOR_AARCH64',
+                   '-pthread'],
+      'LINKFLAGS' : ['-pthread']
       },
     'symbols:on' : {
       'CCFLAGS' : ['-g'],
@@ -120,6 +122,9 @@
     'coverage:on' : {
       'CCFLAGS': ['-fprofile-instr-generate', '-fcoverage-mapping'],
       'LINKFLAGS': ['-fprofile-instr-generate', '-fcoverage-mapping']
+      },
+    'implicit_checks:on' : {
+      'CCFLAGS' : ['-DVIXL_ENABLE_IMPLICIT_CHECKS'],
       }
     }
 
@@ -187,7 +192,7 @@
                     'AArch64. Set `target` to include `aarch64` or `a64`.')
 
 
-# Default variables may depend on each other, therefore we need this dictionnary
+# Default variables may depend on each other, therefore we need this dictionary
 # to be ordered.
 vars_default_handlers = OrderedDict({
     # variable_name    : [ 'default val', 'handler', 'validator']
@@ -265,6 +270,10 @@
     EnumVariable('negative_testing',
                   'Enable negative testing (needs exceptions)',
                  'off', allowed_values=['on', 'off']),
+    EnumVariable('implicit_checks',
+                 'Allow signals raised from simulated invalid (e.g: out of'
+                 + ' bounds) memory reads to be handled by the host.',
+                 'off', allowed_values=['on', 'off']),
     DefaultVariable('symbols', 'Include debugging symbols in the binaries',
                     ['on', 'off']),
     DefaultVariable('simulator', 'Simulators to include', ['aarch64', 'none']),
diff --git a/benchmarks/aarch64/bench-branch-link-masm.cc b/benchmarks/aarch64/bench-branch-link-masm.cc
index 59f141e..53a2b58 100644
--- a/benchmarks/aarch64/bench-branch-link-masm.cc
+++ b/benchmarks/aarch64/bench-branch-link-masm.cc
@@ -24,13 +24,12 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+#include "bench-utils.h"
 #include "globals-vixl.h"
 
 #include "aarch64/instructions-aarch64.h"
 #include "aarch64/macro-assembler-aarch64.h"
 
-#include "bench-utils.h"
-
 using namespace vixl;
 using namespace vixl::aarch64;
 
diff --git a/benchmarks/aarch64/bench-branch-link.cc b/benchmarks/aarch64/bench-branch-link.cc
index 43d399e..b05e68d 100644
--- a/benchmarks/aarch64/bench-branch-link.cc
+++ b/benchmarks/aarch64/bench-branch-link.cc
@@ -24,13 +24,12 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+#include "bench-utils.h"
 #include "globals-vixl.h"
 
 #include "aarch64/instructions-aarch64.h"
 #include "aarch64/macro-assembler-aarch64.h"
 
-#include "bench-utils.h"
-
 using namespace vixl;
 using namespace vixl::aarch64;
 
diff --git a/benchmarks/aarch64/bench-branch-masm.cc b/benchmarks/aarch64/bench-branch-masm.cc
index d395c18..8b98995 100644
--- a/benchmarks/aarch64/bench-branch-masm.cc
+++ b/benchmarks/aarch64/bench-branch-masm.cc
@@ -24,13 +24,12 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+#include "bench-utils.h"
 #include "globals-vixl.h"
 
 #include "aarch64/instructions-aarch64.h"
 #include "aarch64/macro-assembler-aarch64.h"
 
-#include "bench-utils.h"
-
 using namespace vixl;
 using namespace vixl::aarch64;
 
diff --git a/benchmarks/aarch64/bench-branch.cc b/benchmarks/aarch64/bench-branch.cc
index b695d93..8d00941 100644
--- a/benchmarks/aarch64/bench-branch.cc
+++ b/benchmarks/aarch64/bench-branch.cc
@@ -24,13 +24,12 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+#include "bench-utils.h"
 #include "globals-vixl.h"
 
 #include "aarch64/instructions-aarch64.h"
 #include "aarch64/macro-assembler-aarch64.h"
 
-#include "bench-utils.h"
-
 using namespace vixl;
 using namespace vixl::aarch64;
 
diff --git a/benchmarks/aarch64/bench-dataop.cc b/benchmarks/aarch64/bench-dataop.cc
index b7b5455..a6333d2 100644
--- a/benchmarks/aarch64/bench-dataop.cc
+++ b/benchmarks/aarch64/bench-dataop.cc
@@ -24,13 +24,12 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+#include "bench-utils.h"
 #include "globals-vixl.h"
 
 #include "aarch64/instructions-aarch64.h"
 #include "aarch64/macro-assembler-aarch64.h"
 
-#include "bench-utils.h"
-
 using namespace vixl;
 using namespace vixl::aarch64;
 
diff --git a/benchmarks/aarch64/bench-mixed-disasm.cc b/benchmarks/aarch64/bench-mixed-disasm.cc
index 17de32e..785fb54 100644
--- a/benchmarks/aarch64/bench-mixed-disasm.cc
+++ b/benchmarks/aarch64/bench-mixed-disasm.cc
@@ -24,13 +24,12 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+#include "bench-utils.h"
 #include "globals-vixl.h"
 
 #include "aarch64/instructions-aarch64.h"
 #include "aarch64/macro-assembler-aarch64.h"
 
-#include "bench-utils.h"
-
 using namespace vixl;
 using namespace vixl::aarch64;
 
diff --git a/benchmarks/aarch64/bench-mixed-masm.cc b/benchmarks/aarch64/bench-mixed-masm.cc
index a3bed0c..689fb16 100644
--- a/benchmarks/aarch64/bench-mixed-masm.cc
+++ b/benchmarks/aarch64/bench-mixed-masm.cc
@@ -24,13 +24,12 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+#include "bench-utils.h"
 #include "globals-vixl.h"
 
 #include "aarch64/instructions-aarch64.h"
 #include "aarch64/macro-assembler-aarch64.h"
 
-#include "bench-utils.h"
-
 using namespace vixl;
 using namespace vixl::aarch64;
 
diff --git a/benchmarks/aarch64/bench-mixed-sim.cc b/benchmarks/aarch64/bench-mixed-sim.cc
index 9739460..89e28ab 100644
--- a/benchmarks/aarch64/bench-mixed-sim.cc
+++ b/benchmarks/aarch64/bench-mixed-sim.cc
@@ -24,14 +24,13 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+#include "bench-utils.h"
 #include "globals-vixl.h"
 
 #include "aarch64/instructions-aarch64.h"
 #include "aarch64/macro-assembler-aarch64.h"
 #include "aarch64/simulator-aarch64.h"
 
-#include "bench-utils.h"
-
 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
 
 using namespace vixl;
diff --git a/benchmarks/aarch64/bench-utils.cc b/benchmarks/aarch64/bench-utils.cc
index d3ad507..7b5bafd 100644
--- a/benchmarks/aarch64/bench-utils.cc
+++ b/benchmarks/aarch64/bench-utils.cc
@@ -24,12 +24,13 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+#include "bench-utils.h"
+
 #include <vector>
 
 #include "globals-vixl.h"
-#include "aarch64/macro-assembler-aarch64.h"
 
-#include "bench-utils.h"
+#include "aarch64/macro-assembler-aarch64.h"
 
 using namespace vixl;
 using namespace vixl::aarch64;
@@ -306,7 +307,7 @@
 
 void BenchCodeGenerator::BindAllPendingLabels() {
   while (!labels_.empty()) {
-    // BindPendingLables generates a branch over each block of bound labels.
+    // BindPendingLabels generates a branch over each block of bound labels.
     // This will be repeated for each call here, but the effect is minimal and
     // (empirically) we rarely accumulate more than 64 pending labels anyway.
     BindPendingLabels(UINT64_MAX);
diff --git a/benchmarks/aarch64/bench-utils.h b/benchmarks/aarch64/bench-utils.h
index a16bf62..feb9f0e 100644
--- a/benchmarks/aarch64/bench-utils.h
+++ b/benchmarks/aarch64/bench-utils.h
@@ -27,14 +27,14 @@
 #ifndef VIXL_AARCH64_BENCH_UTILS_H_
 #define VIXL_AARCH64_BENCH_UTILS_H_
 
+#include <list>
 #include <stdio.h>
 #include <stdlib.h>
 #include <sys/time.h>
-
-#include <list>
 #include <vector>
 
 #include "globals-vixl.h"
+
 #include "aarch64/macro-assembler-aarch64.h"
 
 class BenchTimer {
@@ -90,7 +90,8 @@
     }
 
     char* end;
-    unsigned long run_time = strtoul(argv[1], &end, 0);  // NOLINT(runtime/int)
+    unsigned long run_time =  // NOLINT(google-runtime-int)
+        strtoul(argv[1], &end, 0);
     if ((end == argv[1]) || (run_time > UINT32_MAX)) {
       PrintUsage(argv[0]);
       status_ = kExitFailure;
@@ -242,7 +243,7 @@
   vixl::aarch64::MacroAssembler* masm_;
 
   // State for *rand48(), used to randomise code generation.
-  unsigned short rand_state_[3];  // NOLINT(runtime/int)
+  unsigned short rand_state_[3];  // NOLINT(google-runtime-int)
 
   uint32_t rnd_;
   int rnd_bits_;
diff --git a/doc/aarch64/topics/index.md b/doc/aarch64/topics/index.md
index 0e11450..90c1791 100644
--- a/doc/aarch64/topics/index.md
+++ b/doc/aarch64/topics/index.md
@@ -6,3 +6,4 @@
 
 * [Extending and customizing the disassembler](extending-the-disassembler.md)
 * [Using VIM YouCompleteMe with VIXL](ycm.md)
+* [Debugging with the VIXL Simulator](simulator-debugger.md)
diff --git a/doc/aarch64/topics/simulator-debugger.md b/doc/aarch64/topics/simulator-debugger.md
new file mode 100644
index 0000000..7d92b00
--- /dev/null
+++ b/doc/aarch64/topics/simulator-debugger.md
@@ -0,0 +1,114 @@
+Debugging with the VIXL Simulator
+=================================
+
+The VIXL AArch64 simulator contains a basic debugger which can be used to debug
+simulated applications. The debugger supports basic debugging features such as
+setting breakpoints, stepping through simulated instructions and printing
+simulator specific information, for example: printing the values of a register
+or printing instructions at specified addresses.
+
+Using the Debugger
+------------------
+
+In order to use the debugger it first needs to be enabled in the simulator.
+
+```C++
+    Decoder decoder;
+    Simulator simulator(&decoder);
+    simulator.SetDebuggerEnabled(true);
+```
+
+Once enabled, the debugger will be activated whenever a breakpoint (brk) is
+encountered by the simulator. For example:
+
+```asm
+    add x1, x0, #5
+    mov x2, #2
+
+    brk 0   // Debugger activated here.
+
+    sub x3, x1, x2
+```
+
+Further breakpoints can be set either programmatically or interactively in the
+debugger itself. For example, to set breakpoints programmatically:
+
+```C++
+    // 'func' is an AARCH64 assembly function.
+    extern "C" void func();
+
+    Debugger* debugger = simulator.GetDebugger();
+
+    // Register a breakpoint at a fixed (absolute) address.
+    debugger->RegisterBreakpoint(0x00007ffbc6d38000);
+
+    // Register a breakpoint to an already existing assembly function.
+    debugger->RegisterBreakpoint(reinterpret_cast<uint64_t>(&func));
+```
+
+Or to set breakpoints interactively once the debugger has been activated:
+
+```sh
+    sim> break 0x00007ffbc6d38000
+```
+
+The debugger has a variety of useful commands to control program flow (e.g:
+step, next, continue) and inspect features of the running simulator (e.g:
+print, trace). To view a list of all supported commands
+use "help" at the debugger prompt.
+
+```sh
+    sim> help
+```
+
+Extending the Debugger
+----------------------
+
+The debugger can be extended with custom commands to allow for greater
+flexibility in debugging individual applications. This could be used for a
+variety of applications, for example printing out object specific information
+from an address.
+
+To create a custom debugger command, extend the DebuggerCmd class located in
+debugger-aarch64.h and implement its methods.
+
+```C++
+    class PrintObjectCmd : public DebuggerCmd {
+     public:
+      PrintObjectCmd(Simulator* sim)
+            : DebuggerCmd(sim,
+                          "printobject",
+                          "po",
+                          "<address>",
+                          "Print a custom object located at the given address.")
+      {}
+
+      // Called when the command word is given to the interactive debugger.
+      DebugReturn Action(const std::vector<std::string>& args) override {
+        // We want exactly 1 argument (an address) given to the printobject
+        // command.
+        if (args.size() != 1) {
+            fprintf(ostream_, "Error: incorrect command format.");
+            return DebugContinue;
+        }
+
+        auto addr = Debugger::ParseUint64String(args.front());
+        if (addr) {
+            fprintf(ostream_, "Error: could not get address from string.");
+            return DebugContinue;
+        }
+
+        // Convert the address given to a custom object and then print it.
+        CustomObject object = reinterpret_cast<CustomObject>(*addr);
+        object.print();
+      }
+    };
+```
+
+Then simply register the new command with the debugger.
+
+```C++
+    Debugger* debugger = simulator.GetDebugger();
+
+    debugger->RegisterCmd<PrintObjectCmd>();
+```
diff --git a/doc/range-limits.md b/doc/range-limits.md
new file mode 100644
index 0000000..cd7cf8b
--- /dev/null
+++ b/doc/range-limits.md
@@ -0,0 +1,148 @@
+Immediate Range Limits in VIXL
+==============================
+
+VIXL's macro assembler tries to increase the range of branches and literal loads
+automatically for you, but applications must still be aware of these extended
+limits, and stay within them, in order to ensure valid code is generated.
+
+In debug builds, assertions prevent exceeding these limits at run time. In
+release builds, for performance reasons, the application is responsible for
+staying within the limits.
+
+You should decide what corrections should be applied in your application if it
+exceeds these limits.
+
+Terms
+-----
+
+**Bind** assigning an address to a label such that the instructions that refer
+to the label can be assigned PC-relative offsets.
+
+**Forward** a forward branch or load literal will refer to a location that will
+be bound later in code generation, ie. at a higher address.
+
+**Backward** a backward branch or load literal refers to a location that has
+already been bound earlier in code generation, ie. at a lower address.
+
+**Instruction range** the range of values that can be encoded in the instruction
+to be generated. Outside the instruction range, additional instructions may be
+generated to increase the range, branching further than would be possible in
+one instruction, for example.
+
+**Veneer** a sequence of additional instructions produced to increase the
+instruction range.
+
+**Adjusted PC** the PC including its architecturally-defined offset. In AArch32
+T32, this is the current PC plus four bytes. In AArch64, there is no adjustment;
+Adjusted PC is equal to PC.
+
+AArch64
+-------
+
+### Branches
+
+All instructions and targets must be aligned to the instruction size, four
+bytes.
+
+#### Unconditional immediate branches (`B`)
+
+* Unconditional immediate branches have an instruction range of -134,217,728 to
++134,217,724 bytes from the current PC.
+* No veneers are applied to unconditional immediate branches to extend their
+instruction range.
+* Callers can use the function `IsValidImmPCOffset(UncondBranchType, offset)` to
+check `offset` (in units of instruction) is within the instruction range.
+
+#### Conditional branches (`B.cond`) and compare-and-branch (`CBZ`, `CBNZ`)
+
+* Conditional branch and compare-and-branch instructions have the same
+instruction range.
+* The instruction range is -1,048,576 to +1,048,574 bytes from the current PC.
+* Veneers are applied to extend the range to -134,217,724 to +135,266,298 bytes
+from the current PC.
+  * Unconditional branch range minus one instruction backwards.
+  * Unconditional branch range plus conditional branch range forwards.
+* Callers can use the functions `IsValidImmPCOffset(CondBranchType, offset)` and
+`IsValidImmPCOffset(CompareBranchType, offset)` to check `offset` (in units of
+instruction) is within the instruction range.
+
+#### Test-and-branch (`TBZ`, `TBNZ`)
+
+* Test-and-branch instructions have an instruction range of -32,768 to 32,764
+bytes from the current PC.
+* Veneers are applied to extend the range to -134,217,728 to +135,299,062 bytes
+from the current PC.
+  * Unconditional branch range minus one instruction backwards.
+  * Unconditional branch range plus test-and-branch range forwards.
+* Callers can use the function `IsValidImmPCOffset(TestBranchType, offset)` to
+check `offset` (in units of instruction) is within the instruction range.
+
+### Literals
+
+#### Compute PC-relative address (`ADR`)
+
+* Compute PC-relative address instructions have an instruction range of
+-1,048,576 to +1,048,575 bytes from the current PC.
+* No veneers are applied to extend the instruction range.
+* Callers can use `IsInt21(offset)` to check `offset` (in bytes) is within the
+instruction range.
+
+#### Load from PC-relative address (`LDR`)
+
+* Load from PC-relative address instructions have an instruction range of
+-1,048,576 to +1,048,572 bytes from the current PC. The offset must be four-byte
+aligned.
+* Automatically-placed literals (eg. those created by `Ldr(reg, literal_value)`)
+will be emitted into code such that they are in range of the instructions that
+refer to them.
+* Veneers are not applied to manually-placed literals, ie. those created by
+`Literal<T> x(value)` and emitted by `place()`.
+* Callers can use `IsInt19(offset)` to check `offset` (in units of instruction)
+is within the instruction range.
+
+AArch32
+-------
+
+Limits stated in this section relate to the T32 instruction encodings only.
+
+### Branches
+
+#### Unconditional immediate branches (`B`)
+
+* Unconditional immediate branches have an instruction range of -16,777,216 to
++16,777,214 bytes from the current adjusted PC.
+* Veneers are applied to forward branches to extend them to an unlimited range.
+* No veneers are applied to backward branches.
+
+#### Conditional immediate branches (`B`)
+
+* Conditional immediate branches have an instruction range of -1,048,576 to
++1,048,574 bytes from the current adjusted PC.
+* Veneers are applied to forward branches to extend them to an unlimited range.
+* Veneers are applied to backward branches to extend the range to that of
+unconditional immediate branches, -16,777,216 bytes from the current adjusted
+PC.
+
+#### Compare and branch (`CBZ`, `CBNZ`)
+
+* Compare and branch has an instruction range of 0 to +126 bytes from the
+current adjusted PC.
+* Veneers are applied to forward branches to extend them to an unlimited range.
+* Veneers are applied to backward branches to extend the range to that of
+unconditional immediate branches, -16,777,216 bytes from the current adjusted
+PC.
+
+### Literals
+
+#### Compute/load PC-relative address (`ADR`, `LDR`)
+
+* Compute and load PC-relative address instructions have the same instruction
+range.
+* The instruction range is -4,095 to +4,095 bytes from the current adjusted PC.
+The PC is aligned down to a four-byte boundary before the offset is added.
+* Automatically-placed literals (ie. those created by `Literal<T> x(value)`)
+will be emitted into code such that they are in range of the instructions that
+refer to them.
+* Veneers are not applied to manually-placed literals, ie. those created by
+`Literal<T> x(value, RawLiteral::kManuallyPlaced)` and emitted by `Place()`.
+
diff --git a/examples/aarch64/add2-vectors.cc b/examples/aarch64/add2-vectors.cc
index 85fc5c7..6104604 100644
--- a/examples/aarch64/add2-vectors.cc
+++ b/examples/aarch64/add2-vectors.cc
@@ -84,6 +84,8 @@
 }
 
 
+#ifndef TEST_EXAMPLES
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
 void PrintVector(const uint8_t* vec, unsigned num) {
   unsigned i;
   printf("( ");
@@ -95,9 +97,9 @@
   }
   printf(" )\n");
 }
+#endif
 
 
-#ifndef TEST_EXAMPLES
 int main(void) {
   MacroAssembler masm;
 
diff --git a/examples/aarch64/custom-disassembler.cc b/examples/aarch64/custom-disassembler.cc
index 9ea6aac..49c9565 100644
--- a/examples/aarch64/custom-disassembler.cc
+++ b/examples/aarch64/custom-disassembler.cc
@@ -24,9 +24,10 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+#include "custom-disassembler.h"
+
 #include <regex>
 
-#include "custom-disassembler.h"
 #include "examples.h"
 
 using namespace vixl;
@@ -106,7 +107,7 @@
 
 // We override this method to add a comment to some instructions. Helpers from
 // the vixl::Instruction class can be used to analyse the instruction being
-// disasssembled.
+// disassembled.
 void CustomDisassembler::Visit(Metadata* metadata, const Instruction* instr) {
   vixl::aarch64::Disassembler::Visit(metadata, instr);
   const std::string& form = (*metadata)["form"];
diff --git a/examples/aarch64/debugging.cc b/examples/aarch64/debugging.cc
new file mode 100644
index 0000000..de167f3
--- /dev/null
+++ b/examples/aarch64/debugging.cc
@@ -0,0 +1,87 @@
+// Copyright 2023, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "examples.h"
+
+#include "aarch64/disasm-aarch64.h"
+#include "aarch64/macro-assembler-aarch64.h"
+#include "aarch64/simulator-aarch64.h"
+
+using namespace vixl;
+using namespace vixl::aarch64;
+
+#define __ masm->
+
+void GenerateDebugExample(MacroAssembler* masm) {
+  // Create a breakpoint here to break into the debugger.
+  __ Brk(0);
+
+  // Do some arithmetic.
+  __ Add(x1, x0, 5);
+  __ Mov(x2, 2);
+  __ Sub(x3, x1, x2);
+
+  __ Ret();
+}
+
+#ifndef TEST_EXAMPLES
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
+
+int main(void) {
+  MacroAssembler masm;
+
+  // Generate the code for the example function.
+  Label debug_example;
+  masm.Bind(&debug_example);
+  GenerateDebugExample(&masm);
+  masm.FinalizeCode();
+
+  Instruction* start = masm.GetLabelAddress<Instruction*>(&debug_example);
+
+  // Disassemble the generated code.
+  PrintDisassembler disassembler(stdout);
+  disassembler.DisassembleBuffer(start, masm.GetSizeOfCodeGenerated());
+
+  Decoder decoder;
+  Simulator simulator(&decoder);
+
+  simulator.SetColouredTrace(true);
+  simulator.SetDebuggerEnabled(true);
+
+  int32_t input_a = 1;
+  int32_t input_b = 2;
+  simulator.WriteWRegister(0, input_a);
+  simulator.WriteWRegister(1, input_b);
+  simulator.RunFrom(start);
+  printf("The final result is %ld\n", simulator.ReadXRegister(3));
+
+  return 0;
+}
+
+#else
+int main(void) { return 0; }
+#endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
+#endif  // TEST_EXAMPLES
diff --git a/examples/aarch64/disasm.cc b/examples/aarch64/disasm.cc
index 1c58027..f546dcf 100644
--- a/examples/aarch64/disasm.cc
+++ b/examples/aarch64/disasm.cc
@@ -30,6 +30,7 @@
 #include <string.h>
 
 #include "code-buffer-vixl.h"
+
 #include "aarch64/decoder-aarch64.h"
 #include "aarch64/disasm-aarch64.h"
 
diff --git a/examples/aarch64/getting-started.cc b/examples/aarch64/getting-started.cc
index a083498..dc9705e 100644
--- a/examples/aarch64/getting-started.cc
+++ b/examples/aarch64/getting-started.cc
@@ -24,11 +24,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+#include "executable-memory.h"
+
 #include "aarch64/macro-assembler-aarch64.h"
 #include "aarch64/simulator-aarch64.h"
 
-#include "executable-memory.h"
-
 using namespace vixl;
 using namespace vixl::aarch64;
 
@@ -59,7 +59,7 @@
 
   simulator.WriteXRegister(0, 0x8899aabbccddeeff);
   simulator.RunFrom(masm.GetLabelAddress<Instruction *>(&demo));
-  printf("x0 = %" PRIx64 "\n", simulator.ReadXRegister(0));
+  printf("x0 = 0x%" PRIx64 "\n", simulator.ReadXRegister(0));
 
 #else
   byte* code = masm.GetBuffer()->GetStartAddress<byte*>();
@@ -70,7 +70,9 @@
       memory.GetEntryPoint<uint64_t (*)(uint64_t)>(demo);
   uint64_t input_value = 0x8899aabbccddeeff;
   uint64_t output_value = (*demo_function)(input_value);
-  printf("native: demo(0x%016lx) = 0x%016lx\n", input_value, output_value);
+  printf("native: demo(0x%" PRIx64 ") = 0x%" PRIx64 "\n",
+         input_value,
+         output_value);
 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
 
   return 0;
diff --git a/examples/aarch64/literal.cc b/examples/aarch64/literal.cc
index 43effe5..5ddafac 100644
--- a/examples/aarch64/literal.cc
+++ b/examples/aarch64/literal.cc
@@ -84,6 +84,8 @@
          b,
          simulator.ReadXRegister(0));
 
+  free(code);
+
   return simulator.ReadXRegister(0);
 }
 #endif
diff --git a/examples/aarch64/neon-matrix-multiply.cc b/examples/aarch64/neon-matrix-multiply.cc
index f56c410..17fe70a 100644
--- a/examples/aarch64/neon-matrix-multiply.cc
+++ b/examples/aarch64/neon-matrix-multiply.cc
@@ -75,7 +75,7 @@
   __ Ld1(v16.V4S(), v17.V4S(), v18.V4S(), v19.V4S(), MemOperand(x2));
 
   // Initialise vectors of the output matrix with zeros.
-  // This is only for the purposes of showing how this can be achived
+  // This is only for the purposes of showing how this can be achieved
   // but technically this is not required because we overwrite all lanes
   // of the output vectors.
   __ Movi(v0.V16B(), 0);
diff --git a/examples/aarch64/non-const-visitor.cc b/examples/aarch64/non-const-visitor.cc
index 307b618..ca16b6c 100644
--- a/examples/aarch64/non-const-visitor.cc
+++ b/examples/aarch64/non-const-visitor.cc
@@ -24,10 +24,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+#include "non-const-visitor.h"
+
 #include <regex>
 
 #include "examples.h"
-#include "non-const-visitor.h"
 
 using namespace vixl;
 using namespace vixl::aarch64;
diff --git a/examples/aarch64/simulator_interception.cc b/examples/aarch64/simulator_interception.cc
new file mode 100644
index 0000000..1f5d266
--- /dev/null
+++ b/examples/aarch64/simulator_interception.cc
@@ -0,0 +1,159 @@
+// Copyright 2023, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "examples.h"
+
+#include "aarch64/disasm-aarch64.h"
+#include "aarch64/macro-assembler-aarch64.h"
+#include "aarch64/simulator-aarch64.h"
+
+using namespace vixl;
+using namespace vixl::aarch64;
+
+#define __ masm->
+
+enum Result { FAILURE, SUCCESS };
+
+// This will be called via a runtime call.
+extern "C" int example_1() { return SUCCESS; }
+
+// This will never be called, instead it will be intercepted and 'callback'
+// will be called.
+uint32_t example_2() { return FAILURE; }
+
+uint32_t example_3(uint32_t num, float f) {
+  USE(f);
+  return num;
+}
+
+// This will be called instead of example_2.
+uint32_t callback(uint64_t original_target) {
+  USE(original_target);
+  return SUCCESS;
+}
+
+void GenerateInterceptionExamples(MacroAssembler* masm) {
+  // Preserve lr, since the calls will overwrite it.
+  __ Push(xzr, lr);
+
+  // example_1 will be intercepted and called through a runtime call.
+  __ Mov(x16, reinterpret_cast<uint64_t>(example_1));
+  __ Blr(x16);
+  __ Mov(w1, w0);
+
+  // example_2 will be intercepted and callback will be called instead.
+  __ Mov(x16, reinterpret_cast<uint64_t>(example_2));
+  __ Blr(x16);
+  __ Mov(w2, w0);
+
+  // Pass FAILURE as a parameter.
+  __ Mov(x0, FAILURE);
+  __ Fmov(s0, 3.5);
+  // example_3 will be intercepted and lambda callback will be called instead.
+  __ Mov(x16, reinterpret_cast<uint64_t>(example_3));
+  __ Blr(x16);
+  __ Mov(w3, w0);
+
+  // Restore lr and return.
+  __ Pop(lr, xzr);
+  __ Ret();
+}
+
+#ifndef TEST_EXAMPLES
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
+
+int main(void) {
+  MacroAssembler masm;
+
+  // Generate the code for the example function.
+  Label call_simulator_interception;
+  masm.Bind(&call_simulator_interception);
+  GenerateInterceptionExamples(&masm);
+  masm.FinalizeCode();
+
+  Instruction* start =
+      masm.GetLabelAddress<Instruction*>(&call_simulator_interception);
+
+  // Disassemble the generated code.
+  PrintDisassembler disassembler(stdout);
+  disassembler.DisassembleBuffer(start, masm.GetSizeOfCodeGenerated());
+
+  Decoder decoder;
+  Simulator simulator(&decoder);
+
+  // Register interceptions to the branches, example_1 will be called via a
+  // runtime call and callback will be called instead of example_2.
+  simulator.RegisterBranchInterception(example_1);
+  simulator.RegisterBranchInterception(example_2, callback);
+
+  // Lambda callbacks can be used to arbitrarily modify the simulator.
+  simulator.RegisterBranchInterception(
+      example_3, [&simulator](uint64_t original_target) {
+        USE(original_target);
+        ABI abi;
+
+        uint32_t param1 = simulator.ReadGenericOperand<uint32_t>(
+            abi.GetNextParameterGenericOperand<uint32_t>());
+        float param2 = simulator.ReadGenericOperand<float>(
+            abi.GetNextParameterGenericOperand<float>());
+
+        if (param1 == FAILURE && param2 == 3.5) {
+          simulator.WriteWRegister(0, SUCCESS);
+        } else {
+          simulator.WriteWRegister(0, FAILURE);
+        }
+      });
+
+  simulator.RunFrom(start);
+
+  uint32_t result_1 = simulator.ReadWRegister(1);
+  if (result_1 == SUCCESS) {
+    printf("SUCCESS: example_1 was called via a runtime call.\n");
+  } else {
+    printf("ERROR: example_1 was not called.\n");
+  }
+
+  uint32_t result_2 = simulator.ReadWRegister(2);
+  if (result_2 == SUCCESS) {
+    printf("SUCCESS: callback was called instead of example_2.\n");
+  } else {
+    printf("ERROR: example_2 was called incorrectly.\n");
+  }
+
+  uint32_t result_3 = simulator.ReadWRegister(0);
+  if (result_3 == SUCCESS) {
+    printf("SUCCESS: Lambda callback called instead of example_3.\n");
+  } else {
+    printf("ERROR: example_3 was called instead of the lambda.\n");
+  }
+
+  return 0;
+}
+#else
+// TODO: Support running natively.
+int main(void) { return 0; }
+#endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
+#endif  // TEST_EXAMPLES
diff --git a/src/aarch32/disasm-aarch32.cc b/src/aarch32/disasm-aarch32.cc
index 535f60c..54dafe1 100644
--- a/src/aarch32/disasm-aarch32.cc
+++ b/src/aarch32/disasm-aarch32.cc
@@ -348,7 +348,7 @@
     *lane = (value >> 2) & 1;
     return Untyped32;
   }
-  *lane = -1;
+  *lane = ~0U;
   return kDataTypeValueInvalid;
 }
 
@@ -365,7 +365,7 @@
     *lane = (value >> 2) & 1;
     return Untyped32;
   }
-  *lane = -1;
+  *lane = ~0U;
   return kDataTypeValueInvalid;
 }
 
@@ -382,7 +382,7 @@
     *lane = (value >> 3) & 1;
     return Untyped32;
   }
-  *lane = -1;
+  *lane = ~0U;
   return kDataTypeValueInvalid;
 }
 
@@ -60977,7 +60977,7 @@
                         Condition condition((instr >> 28) & 0xf);
                         unsigned rd = (instr >> 12) & 0xf;
                         uint32_t imm = ImmediateA32::Decode(instr & 0xfff);
-                        Location location(-imm, kA32PcDelta);
+                        Location location(UnsignedNegate(imm), kA32PcDelta);
                         // ADR{<c>}{<q>} <Rd>, <label> ; A2
                         adr(condition, Best, Register(rd), &location);
                         break;
diff --git a/src/aarch32/disasm-aarch32.h b/src/aarch32/disasm-aarch32.h
index 679f47b..4696408 100644
--- a/src/aarch32/disasm-aarch32.h
+++ b/src/aarch32/disasm-aarch32.h
@@ -36,6 +36,12 @@
 #include "aarch32/constants-aarch32.h"
 #include "aarch32/operands-aarch32.h"
 
+// Microsoft Visual C++ defines a `mvn` macro that conflicts with our own
+// definition.
+#if defined(_MSC_VER) && defined(mvn)
+#undef mvn
+#endif
+
 namespace vixl {
 namespace aarch32 {
 
diff --git a/src/aarch32/instructions-aarch32.cc b/src/aarch32/instructions-aarch32.cc
index 92450d4..f3ed0e0 100644
--- a/src/aarch32/instructions-aarch32.cc
+++ b/src/aarch32/instructions-aarch32.cc
@@ -636,22 +636,17 @@
 }
 
 
-static inline uint32_t ror(uint32_t x, int i) {
-  VIXL_ASSERT((0 < i) && (i < 32));
-  return (x >> i) | (x << (32 - i));
-}
-
-
 bool ImmediateT32::IsImmediateT32(uint32_t imm) {
   /* abcdefgh abcdefgh abcdefgh abcdefgh */
-  if ((imm ^ ror(imm, 8)) == 0) return true;
+  if (AllBytesMatch(imm)) return true;
   /* 00000000 abcdefgh 00000000 abcdefgh */
   /* abcdefgh 00000000 abcdefgh 00000000 */
-  if ((imm ^ ror(imm, 16)) == 0 &&
-      (((imm & 0xff00) == 0) || ((imm & 0xff) == 0)))
+  if (AllHalfwordsMatch(imm) &&
+      (((imm & 0xff00) == 0) || ((imm & 0xff) == 0))) {
     return true;
+  }
   /* isolate least-significant set bit */
-  uint32_t lsb = imm & -imm;
+  uint32_t lsb = imm & UnsignedNegate(imm);
   /* if imm is less than lsb*256 then it fits, but instead we test imm/256 to
   * avoid overflow (underflow is always a successful case) */
   return ((imm >> 8) < lsb);
@@ -697,12 +692,12 @@
   if (imm < 256) return true;
   /* avoid getting confused by wrapped-around bytes (this transform has no
    * effect on pass/fail results) */
-  if (imm & 0xff000000) imm = ror(imm, 16);
+  if (imm & 0xff000000) imm = static_cast<uint32_t>(RotateRight(imm, 16, 32));
   /* copy odd-numbered set bits into even-numbered bits immediately below, so
    * that the least-significant set bit is always an even bit */
   imm = imm | ((imm >> 1) & 0x55555555);
   /* isolate least-significant set bit (always even) */
-  uint32_t lsb = imm & -imm;
+  uint32_t lsb = imm & UnsignedNegate(imm);
   /* if imm is less than lsb*256 then it fits, but instead we test imm/256 to
    * avoid overflow (underflow is always a successful case) */
   return ((imm >> 8) < lsb);
diff --git a/src/aarch32/instructions-aarch32.h b/src/aarch32/instructions-aarch32.h
index e2c95d1..393f1ea 100644
--- a/src/aarch32/instructions-aarch32.h
+++ b/src/aarch32/instructions-aarch32.h
@@ -40,6 +40,8 @@
 
 #if defined(__arm__) && !defined(__SOFTFP__)
 #define HARDFLOAT __attribute__((noinline, pcs("aapcs-vfp")))
+#elif defined(_MSC_VER)
+#define HARDFLOAT __declspec(noinline)
 #else
 #define HARDFLOAT __attribute__((noinline))
 #endif
@@ -1040,7 +1042,9 @@
   const char* GetName() const { return (IsPlus() ? "" : "-"); }
   bool IsPlus() const { return sign_ == plus; }
   bool IsMinus() const { return sign_ == minus; }
-  int32_t ApplyTo(uint32_t value) { return IsPlus() ? value : -value; }
+  int32_t ApplyTo(uint32_t value) {
+    return IsPlus() ? value : UnsignedNegate(value);
+  }
 
  private:
   SignType sign_;
diff --git a/src/aarch32/location-aarch32.h b/src/aarch32/location-aarch32.h
index 512b9c7..2fd4b9a 100644
--- a/src/aarch32/location-aarch32.h
+++ b/src/aarch32/location-aarch32.h
@@ -71,6 +71,8 @@
 #endif
   }
 
+  Location(Location&&) = default; // movable
+
   bool IsReferenced() const { return referenced_; }
 
  private:
@@ -217,7 +219,7 @@
 
  protected:
   // Types passed to LocationBase. Must be distinct for unbound Locations (not
-  // relevant for bound locations, as they don't have a correspoding
+  // relevant for bound locations, as they don't have a corresponding
   // PoolObject).
   static const int kRawLocation = 0;  // Will not be used by the pool manager.
   static const int kVeneerType = 1;
@@ -318,6 +320,12 @@
         addr_(addr),
         manually_placed_(false),
         deletion_policy_(deletion_policy) {}
+
+  // noncopyable to avoid one instruction appearing to refer to two or more literals
+  RawLiteral(const RawLiteral&) = delete;
+
+  RawLiteral(RawLiteral &&) = default; // movable
+
   const void* GetDataAddress() const { return addr_; }
   int GetSize() const { return GetPoolObjectSizeInBytes(); }
 
diff --git a/src/aarch32/macro-assembler-aarch32.cc b/src/aarch32/macro-assembler-aarch32.cc
index 56c0ffb..9ec86fb 100644
--- a/src/aarch32/macro-assembler-aarch32.cc
+++ b/src/aarch32/macro-assembler-aarch32.cc
@@ -266,8 +266,8 @@
 
   uint32_t load_store_offset = offset & extra_offset_mask;
   uint32_t add_offset = offset & ~extra_offset_mask;
-  if ((add_offset != 0) &&
-      (IsModifiedImmediate(offset) || IsModifiedImmediate(-offset))) {
+  if ((add_offset != 0) && (IsModifiedImmediate(offset) ||
+                            IsModifiedImmediate(UnsignedNegate(offset)))) {
     load_store_offset = 0;
     add_offset = offset;
   }
@@ -288,7 +288,7 @@
       // of ADR -- to get behaviour like loads and stores. This ADR can handle
       // at least as much offset as the load_store_offset so it can replace it.
 
-      uint32_t sub_pc_offset = (-offset) & 0xfff;
+      uint32_t sub_pc_offset = UnsignedNegate(offset) & 0xfff;
       load_store_offset = (offset + sub_pc_offset) & extra_offset_mask;
       add_offset = (offset + sub_pc_offset) & ~extra_offset_mask;
 
@@ -599,7 +599,7 @@
     Vmsr(FPSCR, tmp);
     Pop(tmp);
     Msr(APSR_nzcvqg, tmp);
-    // Restore the regsisters.
+    // Restore the registers.
     if (Has32DRegs()) Vpop(Untyped64, DRegisterList(d16, 16));
     Vpop(Untyped64, DRegisterList(d0, 8));
     Pop(RegisterList(saved_registers_mask));
@@ -1245,6 +1245,53 @@
 }
 
 
+void MacroAssembler::Delegate(InstructionType type,
+                              InstructionCondSizeL instruction,
+                              Condition cond,
+                              EncodingSize size,
+                              Location* location) {
+  VIXL_ASSERT(type == kB);
+
+  CONTEXT_SCOPE;
+
+  // Apply veneer to increase range of backwards conditional branches.
+  // This replaces:
+  //   label:
+  //    <instructions>
+  //    bcond label   ; T3
+  // With:
+  //   label:
+  //    <instructions>
+  //    binvcond skip ; T1
+  //    b label       ; T4
+  //   skip:
+  Location::Offset offset = location->GetLocation() -
+    (GetCursorOffset() + GetArchitectureStatePCOffset());
+  if (IsUsingT32() && location->IsBound() && ((offset & 0x1) == 0) &&
+      !cond.Is(al) && cond.IsNotNever()) {
+    // Bound locations must be earlier in the code.
+    VIXL_ASSERT(offset < 0);
+
+    // The offset must be within range of a T4 branch, accounting for the
+    // conditional branch (T1) we emit first, in order to jump over it.
+    offset -= k16BitT32InstructionSizeInBytes;
+    if (offset >= -16777216) {
+      CodeBufferCheckScope scope(this, k16BitT32InstructionSizeInBytes +
+                                       k32BitT32InstructionSizeInBytes);
+      Label skip;
+      b(cond.Negate(), Narrow, &skip);
+      b(location);
+      Bind(&skip);
+      return;
+    } else {
+      VIXL_ABORT_WITH_MSG("Conditional branch too far for veneer.\n");
+    }
+  }
+
+  Assembler::Delegate(type, instruction, cond, size, location);
+}
+
+
 template <typename T>
 static inline bool IsI64BitPattern(T imm) {
   for (T mask = 0xff << ((sizeof(T) - 1) * 8); mask != 0; mask >>= 8) {
diff --git a/src/aarch32/macro-assembler-aarch32.h b/src/aarch32/macro-assembler-aarch32.h
index 390b908..702f7cc 100644
--- a/src/aarch32/macro-assembler-aarch32.h
+++ b/src/aarch32/macro-assembler-aarch32.h
@@ -908,6 +908,12 @@
                         InstructionRL instruction,
                         Register rn,
                         Location* location) VIXL_OVERRIDE;
+  // B
+  virtual void Delegate(InstructionType type,
+                        InstructionCondSizeL instruction,
+                        Condition cond,
+                        EncodingSize size,
+                        Location* location) VIXL_OVERRIDE;
   // VMOV
   virtual void Delegate(InstructionType type,
                         InstructionCondDtSSop instruction,
diff --git a/src/aarch32/operands-aarch32.h b/src/aarch32/operands-aarch32.h
index 9a143d4..1f01d81 100644
--- a/src/aarch32/operands-aarch32.h
+++ b/src/aarch32/operands-aarch32.h
@@ -190,7 +190,7 @@
   }
 
  private:
-// Forbid implicitely creating operands around types that cannot be encoded
+// Forbid implicitly creating operands around types that cannot be encoded
 // into a uint32_t without loss.
 #if __cplusplus >= 201103L
   Operand(int64_t) = delete;   // NOLINT(runtime/explicit)
@@ -615,7 +615,7 @@
 //     - a shifted index register <Rm>, <shift> #<amount>
 //
 //   The index register may have an associated {+/-} sign,
-//   which if ommitted, defaults to + .
+//   which if omitted, defaults to + .
 //
 //   We have two constructors for the offset:
 //
diff --git a/src/aarch64/abi-aarch64.h b/src/aarch64/abi-aarch64.h
index 7e6cd9a..388cf10 100644
--- a/src/aarch64/abi-aarch64.h
+++ b/src/aarch64/abi-aarch64.h
@@ -159,8 +159,8 @@
 inline GenericOperand ABI::GetReturnGenericOperand<void>() const {
   return GenericOperand();
 }
-}
-}  // namespace vixl::aarch64
+}  // namespace aarch64
+}  // namespace vixl
 
 #endif  // VIXL_AARCH64_ABI_AARCH64_H_
 
diff --git a/src/aarch64/assembler-aarch64.cc b/src/aarch64/assembler-aarch64.cc
index 895e8c5..8e7cee5 100644
--- a/src/aarch64/assembler-aarch64.cc
+++ b/src/aarch64/assembler-aarch64.cc
@@ -25,9 +25,10 @@
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
+#include "assembler-aarch64.h"
+
 #include <cmath>
 
-#include "assembler-aarch64.h"
 #include "macro-assembler-aarch64.h"
 
 namespace vixl {
@@ -1917,6 +1918,12 @@
 }
 
 
+void Assembler::sysl(int op, const Register& xt) {
+  VIXL_ASSERT(xt.Is64Bits());
+  Emit(SYSL | SysOp(op) | Rt(xt));
+}
+
+
 void Assembler::dc(DataCacheOp op, const Register& rt) {
   if (op == CVAP) VIXL_ASSERT(CPUHas(CPUFeatures::kDCPoP));
   if (op == CVADP) VIXL_ASSERT(CPUHas(CPUFeatures::kDCCVADP));
@@ -1929,6 +1936,35 @@
   sys(op, rt);
 }
 
+void Assembler::gcspushm(const Register& rt) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kGCS));
+  sys(GCSPUSHM, rt);
+}
+
+void Assembler::gcspopm(const Register& rt) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kGCS));
+  sysl(GCSPOPM, rt);
+}
+
+
+void Assembler::gcsss1(const Register& rt) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kGCS));
+  sys(GCSSS1, rt);
+}
+
+
+void Assembler::gcsss2(const Register& rt) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kGCS));
+  sysl(GCSSS2, rt);
+}
+
+
+void Assembler::chkfeat(const Register& rd) {
+  VIXL_ASSERT(rd.Is(x16));
+  USE(rd);
+  hint(CHKFEAT);
+}
+
 
 void Assembler::hint(SystemHint code) { hint(static_cast<int>(code)); }
 
@@ -1939,6 +1975,542 @@
 }
 
 
+// MTE.
+
+void Assembler::addg(const Register& xd,
+                     const Register& xn,
+                     int offset,
+                     int tag_offset) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMTE));
+  VIXL_ASSERT(IsMultiple(offset, kMTETagGranuleInBytes));
+
+  Emit(0x91800000 | RdSP(xd) | RnSP(xn) |
+       ImmUnsignedField<21, 16>(offset / kMTETagGranuleInBytes) |
+       ImmUnsignedField<13, 10>(tag_offset));
+}
+
+void Assembler::gmi(const Register& xd,
+                    const Register& xn,
+                    const Register& xm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMTE));
+
+  Emit(0x9ac01400 | Rd(xd) | RnSP(xn) | Rm(xm));
+}
+
+void Assembler::irg(const Register& xd,
+                    const Register& xn,
+                    const Register& xm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMTE));
+
+  Emit(0x9ac01000 | RdSP(xd) | RnSP(xn) | Rm(xm));
+}
+
+void Assembler::ldg(const Register& xt, const MemOperand& addr) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMTE));
+  VIXL_ASSERT(addr.IsImmediateOffset());
+  int offset = static_cast<int>(addr.GetOffset());
+  VIXL_ASSERT(IsMultiple(offset, kMTETagGranuleInBytes));
+
+  Emit(0xd9600000 | Rt(xt) | RnSP(addr.GetBaseRegister()) |
+       ImmField<20, 12>(offset / static_cast<int>(kMTETagGranuleInBytes)));
+}
+
+void Assembler::StoreTagHelper(const Register& xt,
+                               const MemOperand& addr,
+                               Instr op) {
+  int offset = static_cast<int>(addr.GetOffset());
+  VIXL_ASSERT(IsMultiple(offset, kMTETagGranuleInBytes));
+
+  Instr addr_mode;
+  if (addr.IsImmediateOffset()) {
+    addr_mode = 2;
+  } else if (addr.IsImmediatePreIndex()) {
+    addr_mode = 3;
+  } else {
+    VIXL_ASSERT(addr.IsImmediatePostIndex());
+    addr_mode = 1;
+  }
+
+  Emit(op | RdSP(xt) | RnSP(addr.GetBaseRegister()) | (addr_mode << 10) |
+       ImmField<20, 12>(offset / static_cast<int>(kMTETagGranuleInBytes)));
+}
+
+void Assembler::st2g(const Register& xt, const MemOperand& addr) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMTE));
+  StoreTagHelper(xt, addr, 0xd9a00000);
+}
+
+void Assembler::stg(const Register& xt, const MemOperand& addr) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMTE));
+  StoreTagHelper(xt, addr, 0xd9200000);
+}
+
+void Assembler::stgp(const Register& xt1,
+                     const Register& xt2,
+                     const MemOperand& addr) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMTE));
+  int offset = static_cast<int>(addr.GetOffset());
+  VIXL_ASSERT(IsMultiple(offset, kMTETagGranuleInBytes));
+
+  Instr addr_mode;
+  if (addr.IsImmediateOffset()) {
+    addr_mode = 2;
+  } else if (addr.IsImmediatePreIndex()) {
+    addr_mode = 3;
+  } else {
+    VIXL_ASSERT(addr.IsImmediatePostIndex());
+    addr_mode = 1;
+  }
+
+  Emit(0x68000000 | RnSP(addr.GetBaseRegister()) | (addr_mode << 23) |
+       ImmField<21, 15>(offset / static_cast<int>(kMTETagGranuleInBytes)) |
+       Rt2(xt2) | Rt(xt1));
+}
+
+void Assembler::stz2g(const Register& xt, const MemOperand& addr) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMTE));
+  StoreTagHelper(xt, addr, 0xd9e00000);
+}
+
+void Assembler::stzg(const Register& xt, const MemOperand& addr) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMTE));
+  StoreTagHelper(xt, addr, 0xd9600000);
+}
+
+void Assembler::subg(const Register& xd,
+                     const Register& xn,
+                     int offset,
+                     int tag_offset) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMTE));
+  VIXL_ASSERT(IsMultiple(offset, kMTETagGranuleInBytes));
+
+  Emit(0xd1800000 | RdSP(xd) | RnSP(xn) |
+       ImmUnsignedField<21, 16>(offset / kMTETagGranuleInBytes) |
+       ImmUnsignedField<13, 10>(tag_offset));
+}
+
+void Assembler::subp(const Register& xd,
+                     const Register& xn,
+                     const Register& xm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMTE));
+
+  Emit(0x9ac00000 | Rd(xd) | RnSP(xn) | RmSP(xm));
+}
+
+void Assembler::subps(const Register& xd,
+                      const Register& xn,
+                      const Register& xm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMTE));
+
+  Emit(0xbac00000 | Rd(xd) | RnSP(xn) | RmSP(xm));
+}
+
+void Assembler::cpye(const Register& rd,
+                     const Register& rs,
+                     const Register& rn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS));
+  VIXL_ASSERT(!AreAliased(rd, rn, rs));
+  VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero());
+
+  Emit(0x1d800400 | Rd(rd) | Rn(rn) | Rs(rs));
+}
+
+void Assembler::cpyen(const Register& rd,
+                      const Register& rs,
+                      const Register& rn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS));
+  VIXL_ASSERT(!AreAliased(rd, rn, rs));
+  VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero());
+
+  Emit(0x1d80c400 | Rd(rd) | Rn(rn) | Rs(rs));
+}
+
+void Assembler::cpyern(const Register& rd,
+                       const Register& rs,
+                       const Register& rn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS));
+  VIXL_ASSERT(!AreAliased(rd, rn, rs));
+  VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero());
+
+  Emit(0x1d808400 | Rd(rd) | Rn(rn) | Rs(rs));
+}
+
+void Assembler::cpyewn(const Register& rd,
+                       const Register& rs,
+                       const Register& rn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS));
+  VIXL_ASSERT(!AreAliased(rd, rn, rs));
+  VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero());
+
+  Emit(0x1d804400 | Rd(rd) | Rn(rn) | Rs(rs));
+}
+
+void Assembler::cpyfe(const Register& rd,
+                      const Register& rs,
+                      const Register& rn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS));
+  VIXL_ASSERT(!AreAliased(rd, rn, rs));
+  VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero());
+
+  Emit(0x19800400 | Rd(rd) | Rn(rn) | Rs(rs));
+}
+
+void Assembler::cpyfen(const Register& rd,
+                       const Register& rs,
+                       const Register& rn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS));
+  VIXL_ASSERT(!AreAliased(rd, rn, rs));
+  VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero());
+
+  Emit(0x1980c400 | Rd(rd) | Rn(rn) | Rs(rs));
+}
+
+void Assembler::cpyfern(const Register& rd,
+                        const Register& rs,
+                        const Register& rn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS));
+  VIXL_ASSERT(!AreAliased(rd, rn, rs));
+  VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero());
+
+  Emit(0x19808400 | Rd(rd) | Rn(rn) | Rs(rs));
+}
+
+void Assembler::cpyfewn(const Register& rd,
+                        const Register& rs,
+                        const Register& rn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS));
+  VIXL_ASSERT(!AreAliased(rd, rn, rs));
+  VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero());
+
+  Emit(0x19804400 | Rd(rd) | Rn(rn) | Rs(rs));
+}
+
+void Assembler::cpyfm(const Register& rd,
+                      const Register& rs,
+                      const Register& rn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS));
+  VIXL_ASSERT(!AreAliased(rd, rn, rs));
+  VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero());
+
+  Emit(0x19400400 | Rd(rd) | Rn(rn) | Rs(rs));
+}
+
+void Assembler::cpyfmn(const Register& rd,
+                       const Register& rs,
+                       const Register& rn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS));
+  VIXL_ASSERT(!AreAliased(rd, rn, rs));
+  VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero());
+
+  Emit(0x1940c400 | Rd(rd) | Rn(rn) | Rs(rs));
+}
+
+void Assembler::cpyfmrn(const Register& rd,
+                        const Register& rs,
+                        const Register& rn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS));
+  VIXL_ASSERT(!AreAliased(rd, rn, rs));
+  VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero());
+
+  Emit(0x19408400 | Rd(rd) | Rn(rn) | Rs(rs));
+}
+
+void Assembler::cpyfmwn(const Register& rd,
+                        const Register& rs,
+                        const Register& rn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS));
+  VIXL_ASSERT(!AreAliased(rd, rn, rs));
+  VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero());
+
+  Emit(0x19404400 | Rd(rd) | Rn(rn) | Rs(rs));
+}
+
+void Assembler::cpyfp(const Register& rd,
+                      const Register& rs,
+                      const Register& rn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS));
+  VIXL_ASSERT(!AreAliased(rd, rn, rs));
+  VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero());
+
+  Emit(0x19000400 | Rd(rd) | Rn(rn) | Rs(rs));
+}
+
+void Assembler::cpyfpn(const Register& rd,
+                       const Register& rs,
+                       const Register& rn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS));
+  VIXL_ASSERT(!AreAliased(rd, rn, rs));
+  VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero());
+
+  Emit(0x1900c400 | Rd(rd) | Rn(rn) | Rs(rs));
+}
+
+void Assembler::cpyfprn(const Register& rd,
+                        const Register& rs,
+                        const Register& rn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS));
+  VIXL_ASSERT(!AreAliased(rd, rn, rs));
+  VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero());
+
+  Emit(0x19008400 | Rd(rd) | Rn(rn) | Rs(rs));
+}
+
+void Assembler::cpyfpwn(const Register& rd,
+                        const Register& rs,
+                        const Register& rn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS));
+  VIXL_ASSERT(!AreAliased(rd, rn, rs));
+  VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero());
+
+  Emit(0x19004400 | Rd(rd) | Rn(rn) | Rs(rs));
+}
+
+void Assembler::cpym(const Register& rd,
+                     const Register& rs,
+                     const Register& rn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS));
+  VIXL_ASSERT(!AreAliased(rd, rn, rs));
+  VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero());
+
+  Emit(0x1d400400 | Rd(rd) | Rn(rn) | Rs(rs));
+}
+
+void Assembler::cpymn(const Register& rd,
+                      const Register& rs,
+                      const Register& rn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS));
+  VIXL_ASSERT(!AreAliased(rd, rn, rs));
+  VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero());
+
+  Emit(0x1d40c400 | Rd(rd) | Rn(rn) | Rs(rs));
+}
+
+void Assembler::cpymrn(const Register& rd,
+                       const Register& rs,
+                       const Register& rn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS));
+  VIXL_ASSERT(!AreAliased(rd, rn, rs));
+  VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero());
+
+  Emit(0x1d408400 | Rd(rd) | Rn(rn) | Rs(rs));
+}
+
+void Assembler::cpymwn(const Register& rd,
+                       const Register& rs,
+                       const Register& rn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS));
+  VIXL_ASSERT(!AreAliased(rd, rn, rs));
+  VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero());
+
+  Emit(0x1d404400 | Rd(rd) | Rn(rn) | Rs(rs));
+}
+
+void Assembler::cpyp(const Register& rd,
+                     const Register& rs,
+                     const Register& rn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS));
+  VIXL_ASSERT(!AreAliased(rd, rn, rs));
+  VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero());
+
+  Emit(0x1d000400 | Rd(rd) | Rn(rn) | Rs(rs));
+}
+
+void Assembler::cpypn(const Register& rd,
+                      const Register& rs,
+                      const Register& rn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS));
+  VIXL_ASSERT(!AreAliased(rd, rn, rs));
+  VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero());
+
+  Emit(0x1d00c400 | Rd(rd) | Rn(rn) | Rs(rs));
+}
+
+void Assembler::cpyprn(const Register& rd,
+                       const Register& rs,
+                       const Register& rn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS));
+  VIXL_ASSERT(!AreAliased(rd, rn, rs));
+  VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero());
+
+  Emit(0x1d008400 | Rd(rd) | Rn(rn) | Rs(rs));
+}
+
+void Assembler::cpypwn(const Register& rd,
+                       const Register& rs,
+                       const Register& rn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS));
+  VIXL_ASSERT(!AreAliased(rd, rn, rs));
+  VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero());
+
+  Emit(0x1d004400 | Rd(rd) | Rn(rn) | Rs(rs));
+}
+
+void Assembler::sete(const Register& rd,
+                     const Register& rn,
+                     const Register& rs) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS));
+  VIXL_ASSERT(!AreAliased(rd, rn, rs));
+  VIXL_ASSERT(!rd.IsZero() && !rn.IsZero());
+
+  Emit(0x19c08400 | Rd(rd) | Rn(rn) | Rs(rs));
+}
+
+void Assembler::seten(const Register& rd,
+                      const Register& rn,
+                      const Register& rs) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS));
+  VIXL_ASSERT(!AreAliased(rd, rn, rs));
+  VIXL_ASSERT(!rd.IsZero() && !rn.IsZero());
+
+  Emit(0x19c0a400 | Rd(rd) | Rn(rn) | Rs(rs));
+}
+
+void Assembler::setge(const Register& rd,
+                      const Register& rn,
+                      const Register& rs) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS));
+  VIXL_ASSERT(!AreAliased(rd, rn, rs));
+  VIXL_ASSERT(!rd.IsZero() && !rn.IsZero());
+
+  Emit(0x1dc08400 | Rd(rd) | Rn(rn) | Rs(rs));
+}
+
+void Assembler::setgen(const Register& rd,
+                       const Register& rn,
+                       const Register& rs) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS));
+  VIXL_ASSERT(!AreAliased(rd, rn, rs));
+  VIXL_ASSERT(!rd.IsZero() && !rn.IsZero());
+
+  Emit(0x1dc0a400 | Rd(rd) | Rn(rn) | Rs(rs));
+}
+
+void Assembler::setgm(const Register& rd,
+                      const Register& rn,
+                      const Register& rs) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS));
+  VIXL_ASSERT(!AreAliased(rd, rn, rs));
+  VIXL_ASSERT(!rd.IsZero() && !rn.IsZero());
+
+  Emit(0x1dc04400 | Rd(rd) | Rn(rn) | Rs(rs));
+}
+
+void Assembler::setgmn(const Register& rd,
+                       const Register& rn,
+                       const Register& rs) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS));
+  VIXL_ASSERT(!AreAliased(rd, rn, rs));
+  VIXL_ASSERT(!rd.IsZero() && !rn.IsZero());
+
+  Emit(0x1dc06400 | Rd(rd) | Rn(rn) | Rs(rs));
+}
+
+void Assembler::setgp(const Register& rd,
+                      const Register& rn,
+                      const Register& rs) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS));
+  VIXL_ASSERT(!AreAliased(rd, rn, rs));
+  VIXL_ASSERT(!rd.IsZero() && !rn.IsZero());
+
+  Emit(0x1dc00400 | Rd(rd) | Rn(rn) | Rs(rs));
+}
+
+void Assembler::setgpn(const Register& rd,
+                       const Register& rn,
+                       const Register& rs) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS));
+  VIXL_ASSERT(!AreAliased(rd, rn, rs));
+  VIXL_ASSERT(!rd.IsZero() && !rn.IsZero());
+
+  Emit(0x1dc02400 | Rd(rd) | Rn(rn) | Rs(rs));
+}
+
+void Assembler::setm(const Register& rd,
+                     const Register& rn,
+                     const Register& rs) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS));
+  VIXL_ASSERT(!AreAliased(rd, rn, rs));
+  VIXL_ASSERT(!rd.IsZero() && !rn.IsZero());
+
+  Emit(0x19c04400 | Rd(rd) | Rn(rn) | Rs(rs));
+}
+
+void Assembler::setmn(const Register& rd,
+                      const Register& rn,
+                      const Register& rs) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS));
+  VIXL_ASSERT(!AreAliased(rd, rn, rs));
+  VIXL_ASSERT(!rd.IsZero() && !rn.IsZero());
+
+  Emit(0x19c06400 | Rd(rd) | Rn(rn) | Rs(rs));
+}
+
+void Assembler::setp(const Register& rd,
+                     const Register& rn,
+                     const Register& rs) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS));
+  VIXL_ASSERT(!AreAliased(rd, rn, rs));
+  VIXL_ASSERT(!rd.IsZero() && !rn.IsZero());
+
+  Emit(0x19c00400 | Rd(rd) | Rn(rn) | Rs(rs));
+}
+
+void Assembler::setpn(const Register& rd,
+                      const Register& rn,
+                      const Register& rs) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS));
+  VIXL_ASSERT(!AreAliased(rd, rn, rs));
+  VIXL_ASSERT(!rd.IsZero() && !rn.IsZero());
+
+  Emit(0x19c02400 | Rd(rd) | Rn(rn) | Rs(rs));
+}
+
+void Assembler::abs(const Register& rd, const Register& rn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kCSSC));
+  VIXL_ASSERT(rd.IsSameSizeAndType(rn));
+
+  Emit(0x5ac02000 | SF(rd) | Rd(rd) | Rn(rn));
+}
+
+void Assembler::cnt(const Register& rd, const Register& rn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kCSSC));
+  VIXL_ASSERT(rd.IsSameSizeAndType(rn));
+
+  Emit(0x5ac01c00 | SF(rd) | Rd(rd) | Rn(rn));
+}
+
+void Assembler::ctz(const Register& rd, const Register& rn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kCSSC));
+  VIXL_ASSERT(rd.IsSameSizeAndType(rn));
+
+  Emit(0x5ac01800 | SF(rd) | Rd(rd) | Rn(rn));
+}
+
+#define MINMAX(V)                        \
+  V(smax, 0x11c00000, 0x1ac06000, true)  \
+  V(smin, 0x11c80000, 0x1ac06800, true)  \
+  V(umax, 0x11c40000, 0x1ac06400, false) \
+  V(umin, 0x11cc0000, 0x1ac06c00, false)
+
+#define VIXL_DEFINE_ASM_FUNC(FN, IMMOP, REGOP, SIGNED)                     \
+  void Assembler::FN(const Register& rd,                                   \
+                     const Register& rn,                                   \
+                     const Operand& op) {                                  \
+    VIXL_ASSERT(rd.IsSameSizeAndType(rn));                                 \
+    Instr i = SF(rd) | Rd(rd) | Rn(rn);                                    \
+    if (op.IsImmediate()) {                                                \
+      int64_t imm = op.GetImmediate();                                     \
+      i |= SIGNED ? ImmField<17, 10>(imm) : ImmUnsignedField<17, 10>(imm); \
+      Emit(IMMOP | i);                                                     \
+    } else {                                                               \
+      VIXL_ASSERT(op.IsPlainRegister());                                   \
+      VIXL_ASSERT(op.GetRegister().IsSameSizeAndType(rd));                 \
+      Emit(REGOP | i | Rm(op.GetRegister()));                              \
+    }                                                                      \
+  }
+MINMAX(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
+
 // NEON structure loads and stores.
 Instr Assembler::LoadStoreStructAddrModeField(const MemOperand& addr) {
   Instr addr_field = RnSP(addr.GetBaseRegister());
@@ -2376,6 +2948,25 @@
   LoadStoreStructSingle(vt, lane, dst, NEONLoadStoreSingleStructStore1);
 }
 
+void Assembler::pmull(const VRegister& vd,
+                      const VRegister& vn,
+                      const VRegister& vm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+  VIXL_ASSERT(AreSameFormat(vn, vm));
+  VIXL_ASSERT((vn.Is8B() && vd.Is8H()) || (vn.Is1D() && vd.Is1Q()));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kPmull1Q) || vd.Is8H());
+  Emit(VFormat(vn) | NEON_PMULL | Rm(vm) | Rn(vn) | Rd(vd));
+}
+
+void Assembler::pmull2(const VRegister& vd,
+                       const VRegister& vn,
+                       const VRegister& vm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+  VIXL_ASSERT(AreSameFormat(vn, vm));
+  VIXL_ASSERT((vn.Is16B() && vd.Is8H()) || (vn.Is2D() && vd.Is1Q()));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kPmull1Q) || vd.Is8H());
+  Emit(VFormat(vn) | NEON_PMULL2 | Rm(vm) | Rn(vn) | Rd(vd));
+}
 
 void Assembler::NEON3DifferentL(const VRegister& vd,
                                 const VRegister& vn,
@@ -2423,8 +3014,6 @@
 
 // clang-format off
 #define NEON_3DIFF_LONG_LIST(V) \
-  V(pmull,  NEON_PMULL,  vn.IsVector() && vn.Is8B())                           \
-  V(pmull2, NEON_PMULL2, vn.IsVector() && vn.Is16B())                          \
   V(saddl,  NEON_SADDL,  vn.IsVector() && vn.IsD())                            \
   V(saddl2, NEON_SADDL2, vn.IsVector() && vn.IsQ())                            \
   V(sabal,  NEON_SABAL,  vn.IsVector() && vn.IsD())                            \
@@ -2782,7 +3371,8 @@
 
 
 void Assembler::fmov(const VRegister& vd, const Register& rn) {
-  VIXL_ASSERT(CPUHas(CPUFeatures::kFP));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kFP) ||
+              (vd.Is1D() && CPUHas(CPUFeatures::kNEON)));
   VIXL_ASSERT(vd.Is1H() || vd.Is1S() || vd.Is1D());
   VIXL_ASSERT((vd.GetSizeInBits() == rn.GetSizeInBits()) || vd.Is1H());
   FPIntegerConvertOp op;
@@ -3798,7 +4388,7 @@
                          const VRegister& vm) {
   VIXL_ASSERT(CPUHas(CPUFeatures::kNEON, CPUFeatures::kRDM));
   VIXL_ASSERT(AreSameFormat(vd, vn, vm));
-  VIXL_ASSERT(vd.IsVector() || !vd.IsQ());
+  VIXL_ASSERT(vd.IsLaneSizeH() || vd.IsLaneSizeS());
 
   Instr format, op = NEON_SQRDMLAH;
   if (vd.IsScalar()) {
@@ -3817,7 +4407,7 @@
                          const VRegister& vm) {
   VIXL_ASSERT(CPUHas(CPUFeatures::kNEON, CPUFeatures::kRDM));
   VIXL_ASSERT(AreSameFormat(vd, vn, vm));
-  VIXL_ASSERT(vd.IsVector() || !vd.IsQ());
+  VIXL_ASSERT(vd.IsLaneSizeH() || vd.IsLaneSizeS());
 
   Instr format, op = NEON_SQRDMLSH;
   if (vd.IsScalar()) {
@@ -5286,6 +5876,263 @@
   Emit(0x6e80a400 | Rd(vd) | Rn(vn) | Rm(vm));
 }
 
+void Assembler::bcax(const VRegister& vd, const VRegister& vn, const VRegister& vm, const VRegister& va) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSHA3));
+  VIXL_ASSERT(vd.Is16B() && vn.Is16B() && vm.Is16B());
+
+  Emit(0xce200000 | Rd(vd) | Rn(vn) | Rm(vm) | Ra(va));
+}
+
+void Assembler::eor3(const VRegister& vd, const VRegister& vn, const VRegister& vm, const VRegister& va) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSHA3));
+  VIXL_ASSERT(vd.Is16B() && vn.Is16B() && vm.Is16B() && va.Is16B());
+
+  Emit(0xce000000 | Rd(vd) | Rn(vn) | Rm(vm) | Ra(va));
+}
+
+void Assembler::xar(const VRegister& vd, const VRegister& vn, const VRegister& vm, int rotate) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSHA3));
+  VIXL_ASSERT(vd.Is2D() && vn.Is2D() && vm.Is2D());
+  VIXL_ASSERT(IsUint6(rotate));
+
+  Emit(0xce800000 | Rd(vd) | Rn(vn) | Rm(vm) | rotate << 10);
+}
+
+void Assembler::rax1(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSHA3));
+  VIXL_ASSERT(vd.Is2D() && vn.Is2D() && vm.Is2D());
+
+  Emit(0xce608c00 | Rd(vd) | Rn(vn) | Rm(vm));
+}
+
+void Assembler::sha1c(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSHA1));
+  VIXL_ASSERT(vd.IsQ() && vn.IsS() && vm.Is4S());
+
+  Emit(0x5e000000 | Rd(vd) | Rn(vn) | Rm(vm));
+}
+
+void Assembler::sha1h(const VRegister& sd, const VRegister& sn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSHA1));
+  VIXL_ASSERT(sd.IsS() && sn.IsS());
+
+  Emit(0x5e280800 | Rd(sd) | Rn(sn));
+}
+
+void Assembler::sha1m(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSHA1));
+  VIXL_ASSERT(vd.IsQ() && vn.IsS() && vm.Is4S());
+
+  Emit(0x5e002000 | Rd(vd) | Rn(vn) | Rm(vm));
+}
+
+void Assembler::sha1p(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSHA1));
+  VIXL_ASSERT(vd.IsQ() && vn.IsS() && vm.Is4S());
+
+  Emit(0x5e001000 | Rd(vd) | Rn(vn) | Rm(vm));
+}
+
+void Assembler::sha1su0(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSHA1));
+  VIXL_ASSERT(vd.Is4S() && vn.Is4S() && vm.Is4S());
+
+  Emit(0x5e003000 | Rd(vd) | Rn(vn) | Rm(vm));
+}
+
+void Assembler::sha1su1(const VRegister& vd, const VRegister& vn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSHA1));
+  VIXL_ASSERT(vd.Is4S() && vn.Is4S());
+
+  Emit(0x5e281800 | Rd(vd) | Rn(vn));
+}
+
+void Assembler::sha256h(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSHA2));
+  VIXL_ASSERT(vd.IsQ() && vn.IsQ() && vm.Is4S());
+
+  Emit(0x5e004000 | Rd(vd) | Rn(vn) | Rm(vm));
+}
+
+void Assembler::sha256h2(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSHA2));
+  VIXL_ASSERT(vd.IsQ() && vn.IsQ() && vm.Is4S());
+
+  Emit(0x5e005000 | Rd(vd) | Rn(vn) | Rm(vm));
+}
+
+void Assembler::sha256su0(const VRegister& vd, const VRegister& vn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSHA2));
+  VIXL_ASSERT(vd.Is4S() && vn.Is4S());
+
+  Emit(0x5e282800 | Rd(vd) | Rn(vn));
+}
+
+void Assembler::sha256su1(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSHA2));
+  VIXL_ASSERT(vd.Is4S() && vn.Is4S() && vm.Is4S());
+
+  Emit(0x5e006000 | Rd(vd) | Rn(vn) | Rm(vm));
+}
+
+void Assembler::sha512h(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSHA512));
+  VIXL_ASSERT(vd.IsQ() && vn.IsQ() && vm.Is2D());
+
+  Emit(0xce608000 | Rd(vd) | Rn(vn) | Rm(vm));
+}
+
+void Assembler::sha512h2(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSHA512));
+  VIXL_ASSERT(vd.IsQ() && vn.IsQ() && vm.Is2D());
+
+  Emit(0xce608400 | Rd(vd) | Rn(vn) | Rm(vm));
+}
+
+void Assembler::sha512su0(const VRegister& vd, const VRegister& vn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSHA512));
+  VIXL_ASSERT(vd.Is2D() && vn.Is2D());
+
+  Emit(0xcec08000 | Rd(vd) | Rn(vn));
+}
+
+void Assembler::sha512su1(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSHA512));
+  VIXL_ASSERT(vd.Is2D() && vn.Is2D() && vm.Is2D());
+
+  Emit(0xce608800 | Rd(vd) | Rn(vn) | Rm(vm));
+}
+
+void Assembler::aesd(const VRegister& vd, const VRegister& vn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kAES));
+  VIXL_ASSERT(vd.Is16B() && vn.Is16B());
+
+  Emit(0x4e285800 | Rd(vd) | Rn(vn));
+}
+
+void Assembler::aese(const VRegister& vd, const VRegister& vn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kAES));
+  VIXL_ASSERT(vd.Is16B() && vn.Is16B());
+
+  Emit(0x4e284800 | Rd(vd) | Rn(vn));
+}
+
+void Assembler::aesimc(const VRegister& vd, const VRegister& vn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kAES));
+  VIXL_ASSERT(vd.Is16B() && vn.Is16B());
+
+  Emit(0x4e287800 | Rd(vd) | Rn(vn));
+}
+
+void Assembler::aesmc(const VRegister& vd, const VRegister& vn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kAES));
+  VIXL_ASSERT(vd.Is16B() && vn.Is16B());
+
+  Emit(0x4e286800 | Rd(vd) | Rn(vn));
+}
+
+void Assembler::sm3partw1(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSM3));
+  VIXL_ASSERT(vd.Is4S() && vn.Is4S() && vm.Is4S());
+
+  Emit(0xce60c000 | Rd(vd) | Rn(vn) | Rm(vm));
+}
+
+void Assembler::sm3partw2(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSM3));
+  VIXL_ASSERT(vd.Is4S() && vn.Is4S() && vm.Is4S());
+
+  Emit(0xce60c400 | Rd(vd) | Rn(vn) | Rm(vm));
+}
+
+void Assembler::sm3ss1(const VRegister& vd, const VRegister& vn, const VRegister& vm, const VRegister& va) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSM3));
+  VIXL_ASSERT(vd.Is4S() && vn.Is4S() && vm.Is4S() && va.Is4S());
+
+  Emit(0xce400000 | Rd(vd) | Rn(vn) | Rm(vm) | Ra(va));
+}
+
+void Assembler::sm3tt1a(const VRegister& vd, const VRegister& vn, const VRegister& vm, int index) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSM3));
+  VIXL_ASSERT(vd.Is4S() && vn.Is4S() && vm.Is4S());
+  VIXL_ASSERT(IsUint2(index));
+
+  Instr i = static_cast<uint32_t>(index) << 12;
+  Emit(0xce408000 | Rd(vd) | Rn(vn) | Rm(vm) | i);
+}
+
+void Assembler::sm3tt1b(const VRegister& vd, const VRegister& vn, const VRegister& vm, int index) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSM3));
+  VIXL_ASSERT(vd.Is4S() && vn.Is4S() && vm.Is4S());
+  VIXL_ASSERT(IsUint2(index));
+
+  Instr i = static_cast<uint32_t>(index) << 12;
+  Emit(0xce408400 | Rd(vd) | Rn(vn) | Rm(vm) | i);
+}
+
+void Assembler::sm3tt2a(const VRegister& vd, const VRegister& vn, const VRegister& vm, int index) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSM3));
+  VIXL_ASSERT(vd.Is4S() && vn.Is4S() && vm.Is4S());
+  VIXL_ASSERT(IsUint2(index));
+
+  Instr i = static_cast<uint32_t>(index) << 12;
+  Emit(0xce408800 | Rd(vd) | Rn(vn) | Rm(vm) | i);
+}
+
+void Assembler::sm3tt2b(const VRegister& vd, const VRegister& vn, const VRegister& vm, int index) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSM3));
+  VIXL_ASSERT(vd.Is4S() && vn.Is4S() && vm.Is4S());
+  VIXL_ASSERT(IsUint2(index));
+
+  Instr i = static_cast<uint32_t>(index) << 12;
+  Emit(0xce408c00 | Rd(vd) | Rn(vn) | Rm(vm) | i);
+}
+
+void Assembler::sm4e(const VRegister& vd, const VRegister& vn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSM4));
+  VIXL_ASSERT(vd.Is4S() && vn.Is4S());
+
+  Emit(0xcec08400 | Rd(vd) | Rn(vn));
+}
+
+void Assembler::sm4ekey(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSM4));
+  VIXL_ASSERT(vd.Is4S() && vn.Is4S() && vm.Is4S());
+
+  Emit(0xce60c800 | Rd(vd) | Rn(vn) | Rm(vm));
+}
+
 // Note:
 // For all ToImm instructions below, a difference in case
 // for the same letter indicates a negated bit.
@@ -5311,9 +6158,9 @@
 
 
 uint32_t Assembler::FP32ToImm8(float imm) {
-  VIXL_ASSERT(IsImmFP32(imm));
   // bits: aBbb.bbbc.defg.h000.0000.0000.0000.0000
   uint32_t bits = FloatToRawbits(imm);
+  VIXL_ASSERT(IsImmFP32(bits));
   // bit7: a000.0000
   uint32_t bit7 = ((bits >> 31) & 0x1) << 7;
   // bit6: 0b00.0000
@@ -5329,10 +6176,10 @@
 
 
 uint32_t Assembler::FP64ToImm8(double imm) {
-  VIXL_ASSERT(IsImmFP64(imm));
   // bits: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
   //       0000.0000.0000.0000.0000.0000.0000.0000
   uint64_t bits = DoubleToRawbits(imm);
+  VIXL_ASSERT(IsImmFP64(bits));
   // bit7: a000.0000
   uint64_t bit7 = ((bits >> 63) & 0x1) << 7;
   // bit6: 0b00.0000
@@ -5886,10 +6733,9 @@
 }
 
 
-bool Assembler::IsImmFP32(float imm) {
+bool Assembler::IsImmFP32(uint32_t bits) {
   // Valid values will have the form:
   // aBbb.bbbc.defg.h000.0000.0000.0000.0000
-  uint32_t bits = FloatToRawbits(imm);
   // bits[19..0] are cleared.
   if ((bits & 0x7ffff) != 0) {
     return false;
@@ -5910,11 +6756,10 @@
 }
 
 
-bool Assembler::IsImmFP64(double imm) {
+bool Assembler::IsImmFP64(uint64_t bits) {
   // Valid values will have the form:
   // aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
   // 0000.0000.0000.0000.0000.0000.0000.0000
-  uint64_t bits = DoubleToRawbits(imm);
   // bits[47..0] are cleared.
   if ((bits & 0x0000ffffffffffff) != 0) {
     return false;
@@ -5936,16 +6781,18 @@
 
 
 bool Assembler::IsImmLSPair(int64_t offset, unsigned access_size_in_bytes_log2) {
+  const auto access_size_in_bytes = 1U << access_size_in_bytes_log2;
   VIXL_ASSERT(access_size_in_bytes_log2 <= kQRegSizeInBytesLog2);
-  return IsMultiple(offset, 1 << access_size_in_bytes_log2) &&
-         IsInt7(offset / (1 << access_size_in_bytes_log2));
+  return IsMultiple(offset, access_size_in_bytes) &&
+         IsInt7(offset / access_size_in_bytes);
 }
 
 
 bool Assembler::IsImmLSScaled(int64_t offset, unsigned access_size_in_bytes_log2) {
+  const auto access_size_in_bytes = 1U << access_size_in_bytes_log2;
   VIXL_ASSERT(access_size_in_bytes_log2 <= kQRegSizeInBytesLog2);
-  return IsMultiple(offset, 1 << access_size_in_bytes_log2) &&
-         IsUint12(offset / (1 << access_size_in_bytes_log2));
+  return IsMultiple(offset, access_size_in_bytes) &&
+         IsUint12(offset / access_size_in_bytes);
 }
 
 
@@ -6330,6 +7177,7 @@
       return CPUHas(CPUFeatures::kRNG);
     case FPCR:
     case NZCV:
+    case DCZID_EL0:
       break;
   }
   return true;
diff --git a/src/aarch64/assembler-aarch64.h b/src/aarch64/assembler-aarch64.h
index 65c55cc..441a528 100644
--- a/src/aarch64/assembler-aarch64.h
+++ b/src/aarch64/assembler-aarch64.h
@@ -33,6 +33,7 @@
 #include "../globals-vixl.h"
 #include "../invalset-vixl.h"
 #include "../utils-vixl.h"
+
 #include "operands-aarch64.h"
 
 namespace vixl {
@@ -2157,6 +2158,9 @@
   // System instruction with pre-encoded op (op1:crn:crm:op2).
   void sys(int op, const Register& xt = xzr);
 
+  // System instruction with result.
+  void sysl(int op, const Register& xt = xzr);
+
   // System data cache operation.
   void dc(DataCacheOp op, const Register& rt);
 
@@ -3617,6 +3621,123 @@
   // Unsigned 8-bit integer matrix multiply-accumulate (vector).
   void ummla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
 
+  // Bit Clear and exclusive-OR.
+  void bcax(const VRegister& vd,
+            const VRegister& vn,
+            const VRegister& vm,
+            const VRegister& va);
+
+  // Three-way Exclusive-OR.
+  void eor3(const VRegister& vd,
+            const VRegister& vn,
+            const VRegister& vm,
+            const VRegister& va);
+
+  // Exclusive-OR and Rotate.
+  void xar(const VRegister& vd,
+           const VRegister& vn,
+           const VRegister& vm,
+           int rotate);
+
+  // Rotate and Exclusive-OR
+  void rax1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
+
+  // SHA1 hash update (choose).
+  void sha1c(const VRegister& vd, const VRegister& vn, const VRegister& vm);
+
+  // SHA1 fixed rotate.
+  void sha1h(const VRegister& sd, const VRegister& sn);
+
+  // SHA1 hash update (majority).
+  void sha1m(const VRegister& vd, const VRegister& vn, const VRegister& vm);
+
+  // SHA1 hash update (parity).
+  void sha1p(const VRegister& vd, const VRegister& vn, const VRegister& vm);
+
+  // SHA1 schedule update 0.
+  void sha1su0(const VRegister& vd, const VRegister& vn, const VRegister& vm);
+
+  // SHA1 schedule update 1.
+  void sha1su1(const VRegister& vd, const VRegister& vn);
+
+  // SHA256 hash update (part 1).
+  void sha256h(const VRegister& vd, const VRegister& vn, const VRegister& vm);
+
+  // SHA256 hash update (part 2).
+  void sha256h2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
+
+  // SHA256 schedule update 0.
+  void sha256su0(const VRegister& vd, const VRegister& vn);
+
+  // SHA256 schedule update 1.
+  void sha256su1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
+
+  // SHA512 hash update part 1.
+  void sha512h(const VRegister& vd, const VRegister& vn, const VRegister& vm);
+
+  // SHA512 hash update part 2.
+  void sha512h2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
+
+  // SHA512 schedule Update 0.
+  void sha512su0(const VRegister& vd, const VRegister& vn);
+
+  // SHA512 schedule Update 1.
+  void sha512su1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
+
+  // AES single round decryption.
+  void aesd(const VRegister& vd, const VRegister& vn);
+
+  // AES single round encryption.
+  void aese(const VRegister& vd, const VRegister& vn);
+
+  // AES inverse mix columns.
+  void aesimc(const VRegister& vd, const VRegister& vn);
+
+  // AES mix columns.
+  void aesmc(const VRegister& vd, const VRegister& vn);
+
+  // SM3PARTW1.
+  void sm3partw1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
+
+  // SM3PARTW2.
+  void sm3partw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
+
+  // SM3SS1.
+  void sm3ss1(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm,
+              const VRegister& va);
+
+  // SM3TT1A.
+  void sm3tt1a(const VRegister& vd,
+               const VRegister& vn,
+               const VRegister& vm,
+               int index);
+
+  // SM3TT1B.
+  void sm3tt1b(const VRegister& vd,
+               const VRegister& vn,
+               const VRegister& vm,
+               int index);
+
+  // SM3TT2A.
+  void sm3tt2a(const VRegister& vd,
+               const VRegister& vn,
+               const VRegister& vm,
+               int index);
+
+  // SM3TT2B.
+  void sm3tt2b(const VRegister& vd,
+               const VRegister& vn,
+               const VRegister& vm,
+               int index);
+
+  // SM4 Encode.
+  void sm4e(const VRegister& vd, const VRegister& vn);
+
+  // SM4 Key.
+  void sm4ekey(const VRegister& vd, const VRegister& vn, const VRegister& vm);
+
   // Scalable Vector Extensions.
 
   // Absolute value (predicated).
@@ -6901,6 +7022,191 @@
              const ZRegister& zm,
              int index);
 
+  // Add with Tag.
+  void addg(const Register& xd, const Register& xn, int offset, int tag_offset);
+
+  // Tag Mask Insert.
+  void gmi(const Register& xd, const Register& xn, const Register& xm);
+
+  // Insert Random Tag.
+  void irg(const Register& xd, const Register& xn, const Register& xm = xzr);
+
+  // Load Allocation Tag.
+  void ldg(const Register& xt, const MemOperand& addr);
+
+  void StoreTagHelper(const Register& xt, const MemOperand& addr, Instr op);
+
+  // Store Allocation Tags.
+  void st2g(const Register& xt, const MemOperand& addr);
+
+  // Store Allocation Tag.
+  void stg(const Register& xt, const MemOperand& addr);
+
+  // Store Allocation Tag and Pair of registers.
+  void stgp(const Register& xt1, const Register& xt2, const MemOperand& addr);
+
+  // Store Allocation Tags, Zeroing.
+  void stz2g(const Register& xt, const MemOperand& addr);
+
+  // Store Allocation Tag, Zeroing.
+  void stzg(const Register& xt, const MemOperand& addr);
+
+  // Subtract with Tag.
+  void subg(const Register& xd, const Register& xn, int offset, int tag_offset);
+
+  // Subtract Pointer.
+  void subp(const Register& xd, const Register& xn, const Register& xm);
+
+  // Subtract Pointer, setting Flags.
+  void subps(const Register& xd, const Register& xn, const Register& xm);
+
+  // Compare with Tag.
+  void cmpp(const Register& xn, const Register& xm) { subps(xzr, xn, xm); }
+
+  // Memory Copy.
+  void cpye(const Register& rd, const Register& rs, const Register& rn);
+
+  // Memory Copy, reads and writes non-temporal.
+  void cpyen(const Register& rd, const Register& rs, const Register& rn);
+
+  // Memory Copy, reads non-temporal.
+  void cpyern(const Register& rd, const Register& rs, const Register& rn);
+
+  // Memory Copy, writes non-temporal.
+  void cpyewn(const Register& rd, const Register& rs, const Register& rn);
+
+  // Memory Copy Forward-only.
+  void cpyfe(const Register& rd, const Register& rs, const Register& rn);
+
+  // Memory Copy Forward-only, reads and writes non-temporal.
+  void cpyfen(const Register& rd, const Register& rs, const Register& rn);
+
+  // Memory Copy Forward-only, reads non-temporal.
+  void cpyfern(const Register& rd, const Register& rs, const Register& rn);
+
+  // Memory Copy Forward-only, writes non-temporal.
+  void cpyfewn(const Register& rd, const Register& rs, const Register& rn);
+
+  // Memory Copy Forward-only.
+  void cpyfm(const Register& rd, const Register& rs, const Register& rn);
+
+  // Memory Copy Forward-only, reads and writes non-temporal.
+  void cpyfmn(const Register& rd, const Register& rs, const Register& rn);
+
+  // Memory Copy Forward-only, reads non-temporal.
+  void cpyfmrn(const Register& rd, const Register& rs, const Register& rn);
+
+  // Memory Copy Forward-only, writes non-temporal.
+  void cpyfmwn(const Register& rd, const Register& rs, const Register& rn);
+
+  // Memory Copy Forward-only.
+  void cpyfp(const Register& rd, const Register& rs, const Register& rn);
+
+  // Memory Copy Forward-only, reads and writes non-temporal.
+  void cpyfpn(const Register& rd, const Register& rs, const Register& rn);
+
+  // Memory Copy Forward-only, reads non-temporal.
+  void cpyfprn(const Register& rd, const Register& rs, const Register& rn);
+
+  // Memory Copy Forward-only, writes non-temporal.
+  void cpyfpwn(const Register& rd, const Register& rs, const Register& rn);
+
+  // Memory Copy.
+  void cpym(const Register& rd, const Register& rs, const Register& rn);
+
+  // Memory Copy, reads and writes non-temporal.
+  void cpymn(const Register& rd, const Register& rs, const Register& rn);
+
+  // Memory Copy, reads non-temporal.
+  void cpymrn(const Register& rd, const Register& rs, const Register& rn);
+
+  // Memory Copy, writes non-temporal.
+  void cpymwn(const Register& rd, const Register& rs, const Register& rn);
+
+  // Memory Copy.
+  void cpyp(const Register& rd, const Register& rs, const Register& rn);
+
+  // Memory Copy, reads and writes non-temporal.
+  void cpypn(const Register& rd, const Register& rs, const Register& rn);
+
+  // Memory Copy, reads non-temporal.
+  void cpyprn(const Register& rd, const Register& rs, const Register& rn);
+
+  // Memory Copy, writes non-temporal.
+  void cpypwn(const Register& rd, const Register& rs, const Register& rn);
+
+  // Memory Set.
+  void sete(const Register& rd, const Register& rn, const Register& rs);
+
+  // Memory Set, non-temporal.
+  void seten(const Register& rd, const Register& rn, const Register& rs);
+
+  // Memory Set with tag setting.
+  void setge(const Register& rd, const Register& rn, const Register& rs);
+
+  // Memory Set with tag setting, non-temporal.
+  void setgen(const Register& rd, const Register& rn, const Register& rs);
+
+  // Memory Set with tag setting.
+  void setgm(const Register& rd, const Register& rn, const Register& rs);
+
+  // Memory Set with tag setting, non-temporal.
+  void setgmn(const Register& rd, const Register& rn, const Register& rs);
+
+  // Memory Set with tag setting.
+  void setgp(const Register& rd, const Register& rn, const Register& rs);
+
+  // Memory Set with tag setting, non-temporal.
+  void setgpn(const Register& rd, const Register& rn, const Register& rs);
+
+  // Memory Set.
+  void setm(const Register& rd, const Register& rn, const Register& rs);
+
+  // Memory Set, non-temporal.
+  void setmn(const Register& rd, const Register& rn, const Register& rs);
+
+  // Memory Set.
+  void setp(const Register& rd, const Register& rn, const Register& rs);
+
+  // Memory Set, non-temporal.
+  void setpn(const Register& rd, const Register& rn, const Register& rs);
+
+  // Absolute value.
+  void abs(const Register& rd, const Register& rn);
+
+  // Count bits.
+  void cnt(const Register& rd, const Register& rn);
+
+  // Count Trailing Zeros.
+  void ctz(const Register& rd, const Register& rn);
+
+  // Signed Maximum.
+  void smax(const Register& rd, const Register& rn, const Operand& op);
+
+  // Signed Minimum.
+  void smin(const Register& rd, const Register& rn, const Operand& op);
+
+  // Unsigned Maximum.
+  void umax(const Register& rd, const Register& rn, const Operand& op);
+
+  // Unsigned Minimum.
+  void umin(const Register& rd, const Register& rn, const Operand& op);
+
+  // Check feature status.
+  void chkfeat(const Register& rd);
+
+  // Guarded Control Stack Push.
+  void gcspushm(const Register& rt);
+
+  // Guarded Control Stack Pop.
+  void gcspopm(const Register& rt);
+
+  // Guarded Control Stack Switch Stack 1.
+  void gcsss1(const Register& rt);
+
+  // Guarded Control Stack Switch Stack 2.
+  void gcsss2(const Register& rt);
+
   // Emit generic instructions.
 
   // Emit raw instructions into the instruction stream.
@@ -7219,8 +7525,9 @@
   }
 
   static Instr ImmLSPair(int64_t imm7, unsigned access_size_in_bytes_log2) {
-    VIXL_ASSERT(IsMultiple(imm7, 1 << access_size_in_bytes_log2));
-    int64_t scaled_imm7 = imm7 / (1 << access_size_in_bytes_log2);
+    const auto access_size_in_bytes = 1U << access_size_in_bytes_log2;
+    VIXL_ASSERT(IsMultiple(imm7, access_size_in_bytes));
+    int64_t scaled_imm7 = imm7 / access_size_in_bytes;
     VIXL_ASSERT(IsInt7(scaled_imm7));
     return TruncateToUint7(scaled_imm7) << ImmLSPair_offset;
   }
@@ -7345,8 +7652,14 @@
   static bool IsImmAddSub(int64_t immediate);
   static bool IsImmConditionalCompare(int64_t immediate);
   static bool IsImmFP16(Float16 imm);
-  static bool IsImmFP32(float imm);
-  static bool IsImmFP64(double imm);
+
+  static bool IsImmFP32(float imm) { return IsImmFP32(FloatToRawbits(imm)); }
+
+  static bool IsImmFP32(uint32_t bits);
+
+  static bool IsImmFP64(double imm) { return IsImmFP64(DoubleToRawbits(imm)); }
+
+  static bool IsImmFP64(uint64_t bits);
   static bool IsImmLogical(uint64_t value,
                            unsigned width,
                            unsigned* n = NULL,
@@ -7362,6 +7675,8 @@
   static Instr VFormat(VRegister vd) {
     if (vd.Is64Bits()) {
       switch (vd.GetLanes()) {
+        case 1:
+          return NEON_1D;
         case 2:
           return NEON_2S;
         case 4:
diff --git a/src/aarch64/assembler-sve-aarch64.cc b/src/aarch64/assembler-sve-aarch64.cc
index 84d4d51..0c3c7f8 100644
--- a/src/aarch64/assembler-sve-aarch64.cc
+++ b/src/aarch64/assembler-sve-aarch64.cc
@@ -6505,7 +6505,7 @@
 
 void Assembler::fmov(const ZRegister& zd, double imm) {
   if (IsPositiveZero(imm)) {
-    dup(zd, imm);
+    dup(zd, 0);
   } else {
     fdup(zd, imm);
   }
@@ -7410,13 +7410,13 @@
   //  size<23:22> | Zm<20:16> | op<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0>
 
   VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVEPmull128) || !zd.IsLaneSizeQ());
   VIXL_ASSERT(AreSameLaneSize(zn, zm));
   VIXL_ASSERT(!zd.IsLaneSizeB() && !zd.IsLaneSizeS());
   VIXL_ASSERT(zd.GetLaneSizeInBytes() == zn.GetLaneSizeInBytes() * 2);
-  // SVEPmull128 is not supported
-  VIXL_ASSERT(!zd.IsLaneSizeQ());
+  Instr size = zd.IsLaneSizeQ() ? 0 : SVESize(zd);
 
-  Emit(0x45006800 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+  Emit(0x45006800 | size | Rd(zd) | Rn(zn) | Rm(zm));
 }
 
 void Assembler::pmullt(const ZRegister& zd,
@@ -7427,13 +7427,13 @@
   //  size<23:22> | Zm<20:16> | op<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0>
 
   VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVEPmull128) || !zd.IsLaneSizeQ());
   VIXL_ASSERT(AreSameLaneSize(zn, zm));
   VIXL_ASSERT(!zd.IsLaneSizeB() && !zd.IsLaneSizeS());
   VIXL_ASSERT(zd.GetLaneSizeInBytes() == zn.GetLaneSizeInBytes() * 2);
-  // SVEPmull128 is not supported
-  VIXL_ASSERT(!zd.IsLaneSizeQ());
+  Instr size = zd.IsLaneSizeQ() ? 0 : SVESize(zd);
 
-  Emit(0x45006c00 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+  Emit(0x45006c00 | size | Rd(zd) | Rn(zn) | Rm(zm));
 }
 
 void Assembler::raddhnb(const ZRegister& zd,
diff --git a/src/aarch64/constants-aarch64.h b/src/aarch64/constants-aarch64.h
index f7512b2..279587c 100644
--- a/src/aarch64/constants-aarch64.h
+++ b/src/aarch64/constants-aarch64.h
@@ -389,7 +389,8 @@
   BTI    = 32,
   BTI_c  = 34,
   BTI_j  = 36,
-  BTI_jc = 38
+  BTI_jc = 38,
+  CHKFEAT = 40
 };
 
 enum BranchTargetIdentifier {
@@ -500,7 +501,8 @@
   NZCV = SystemRegisterEncoder<3, 3, 4, 2, 0>::value,
   FPCR = SystemRegisterEncoder<3, 3, 4, 4, 0>::value,
   RNDR = SystemRegisterEncoder<3, 3, 2, 4, 0>::value,    // Random number.
-  RNDRRS = SystemRegisterEncoder<3, 3, 2, 4, 1>::value   // Reseeded random number.
+  RNDRRS = SystemRegisterEncoder<3, 3, 2, 4, 1>::value,  // Reseeded random number.
+  DCZID_EL0 = SystemRegisterEncoder<3, 3, 0, 0, 7>::value
 };
 
 template<int op1, int crn, int crm, int op2>
@@ -523,7 +525,22 @@
   CVAP = CacheOpEncoder<3, 7, 12, 1>::value,
   CVADP = CacheOpEncoder<3, 7, 13, 1>::value,
   CIVAC = CacheOpEncoder<3, 7, 14, 1>::value,
-  ZVA = CacheOpEncoder<3, 7, 4, 1>::value
+  ZVA = CacheOpEncoder<3, 7, 4, 1>::value,
+  GVA = CacheOpEncoder<3, 7, 4, 3>::value,
+  GZVA = CacheOpEncoder<3, 7, 4, 4>::value,
+  CGVAC = CacheOpEncoder<3, 7, 10, 3>::value,
+  CGDVAC = CacheOpEncoder<3, 7, 10, 5>::value,
+  CGVAP = CacheOpEncoder<3, 7, 12, 3>::value,
+  CGDVAP = CacheOpEncoder<3, 7, 12, 5>::value,
+  CIGVAC = CacheOpEncoder<3, 7, 14, 3>::value,
+  CIGDVAC = CacheOpEncoder<3, 7, 14, 5>::value
+};
+
+enum GCSOp {
+  GCSPUSHM = CacheOpEncoder<3, 7, 7, 0>::value,
+  GCSPOPM = CacheOpEncoder<3, 7, 7, 1>::value,
+  GCSSS1 = CacheOpEncoder<3, 7, 7, 2>::value,
+  GCSSS2 = CacheOpEncoder<3, 7, 7, 3>::value
 };
 
 // Some SVE instructions support a predicate constraint pattern. This is
@@ -934,7 +951,8 @@
   SystemSysFixed  = 0xD5080000,
   SystemSysFMask  = 0xFFF80000,
   SystemSysMask   = 0xFFF80000,
-  SYS             = SystemSysFixed | 0x00000000
+  SYS             = SystemSysFixed | 0x00000000,
+  SYSL            = SystemSysFixed | 0x00200000
 };
 
 // Exception.
diff --git a/src/aarch64/cpu-aarch64.cc b/src/aarch64/cpu-aarch64.cc
index ae51992..3b70cfc 100644
--- a/src/aarch64/cpu-aarch64.cc
+++ b/src/aarch64/cpu-aarch64.cc
@@ -48,6 +48,7 @@
 const IDRegister::Field AA64PFR1::kBT(0);
 const IDRegister::Field AA64PFR1::kSSBS(4);
 const IDRegister::Field AA64PFR1::kMTE(8);
+const IDRegister::Field AA64PFR1::kSME(24);
 
 const IDRegister::Field AA64ISAR0::kAES(4);
 const IDRegister::Field AA64ISAR0::kSHA1(8);
@@ -78,7 +79,10 @@
 const IDRegister::Field AA64ISAR1::kDGH(48);
 const IDRegister::Field AA64ISAR1::kI8MM(52);
 
+const IDRegister::Field AA64ISAR2::kWFXT(0);
 const IDRegister::Field AA64ISAR2::kRPRES(4);
+const IDRegister::Field AA64ISAR2::kMOPS(16);
+const IDRegister::Field AA64ISAR2::kCSSC(52);
 
 const IDRegister::Field AA64MMFR0::kECV(60);
 
@@ -97,6 +101,14 @@
 const IDRegister::Field AA64ZFR0::kF32MM(52);
 const IDRegister::Field AA64ZFR0::kF64MM(56);
 
+const IDRegister::Field AA64SMFR0::kSMEf32f32(32, 1);
+const IDRegister::Field AA64SMFR0::kSMEb16f32(34, 1);
+const IDRegister::Field AA64SMFR0::kSMEf16f32(35, 1);
+const IDRegister::Field AA64SMFR0::kSMEi8i32(36);
+const IDRegister::Field AA64SMFR0::kSMEf64f64(48, 1);
+const IDRegister::Field AA64SMFR0::kSMEi16i64(52);
+const IDRegister::Field AA64SMFR0::kSMEfa64(63, 1);
+
 CPUFeatures AA64PFR0::GetCPUFeatures() const {
   CPUFeatures f;
   if (Get(kFP) >= 0) f.Combine(CPUFeatures::kFP);
@@ -119,6 +131,8 @@
   if (Get(kSSBS) >= 2) f.Combine(CPUFeatures::kSSBSControl);
   if (Get(kMTE) >= 1) f.Combine(CPUFeatures::kMTEInstructions);
   if (Get(kMTE) >= 2) f.Combine(CPUFeatures::kMTE);
+  if (Get(kMTE) >= 3) f.Combine(CPUFeatures::kMTE3);
+  if (Get(kSME) >= 1) f.Combine(CPUFeatures::kSME);
   return f;
 }
 
@@ -155,6 +169,7 @@
   if (Get(kSB) >= 1) f.Combine(CPUFeatures::kSB);
   if (Get(kSPECRES) >= 1) f.Combine(CPUFeatures::kSPECRES);
   if (Get(kBF16) >= 1) f.Combine(CPUFeatures::kBF16);
+  if (Get(kBF16) >= 2) f.Combine(CPUFeatures::kEBF16);
   if (Get(kDGH) >= 1) f.Combine(CPUFeatures::kDGH);
   if (Get(kI8MM) >= 1) f.Combine(CPUFeatures::kI8MM);
 
@@ -180,7 +195,10 @@
 
 CPUFeatures AA64ISAR2::GetCPUFeatures() const {
   CPUFeatures f;
+  if (Get(kWFXT) >= 2) f.Combine(CPUFeatures::kWFXT);
   if (Get(kRPRES) >= 1) f.Combine(CPUFeatures::kRPRES);
+  if (Get(kMOPS) >= 1) f.Combine(CPUFeatures::kMOPS);
+  if (Get(kCSSC) >= 1) f.Combine(CPUFeatures::kCSSC);
   return f;
 }
 
@@ -213,6 +231,7 @@
   if (Get(kSM4) >= 1) f.Combine(CPUFeatures::kSVESM4);
   if (Get(kSHA3) >= 1) f.Combine(CPUFeatures::kSVESHA3);
   if (Get(kBF16) >= 1) f.Combine(CPUFeatures::kSVEBF16);
+  if (Get(kBF16) >= 2) f.Combine(CPUFeatures::kSVE_EBF16);
   if (Get(kBitPerm) >= 1) f.Combine(CPUFeatures::kSVEBitPerm);
   if (Get(kAES) >= 1) f.Combine(CPUFeatures::kSVEAES);
   if (Get(kAES) >= 2) f.Combine(CPUFeatures::kSVEPmull128);
@@ -220,6 +239,18 @@
   return f;
 }
 
+CPUFeatures AA64SMFR0::GetCPUFeatures() const {
+  CPUFeatures f;
+  if (Get(kSMEf32f32) >= 1) f.Combine(CPUFeatures::kSMEf32f32);
+  if (Get(kSMEb16f32) >= 1) f.Combine(CPUFeatures::kSMEb16f32);
+  if (Get(kSMEf16f32) >= 1) f.Combine(CPUFeatures::kSMEf16f32);
+  if (Get(kSMEi8i32) >= 15) f.Combine(CPUFeatures::kSMEi8i32);
+  if (Get(kSMEf64f64) >= 1) f.Combine(CPUFeatures::kSMEf64f64);
+  if (Get(kSMEi16i64) >= 15) f.Combine(CPUFeatures::kSMEi16i64);
+  if (Get(kSMEfa64) >= 1) f.Combine(CPUFeatures::kSMEfa64);
+  return f;
+}
+
 int IDRegister::Get(IDRegister::Field field) const {
   int msb = field.GetMsb();
   int lsb = field.GetLsb();
@@ -248,11 +279,11 @@
     CPUFeatures::QueryIDRegistersOption option) {
   CPUFeatures features;
 
-#if VIXL_USE_LINUX_HWCAP
+#ifdef VIXL_USE_LINUX_HWCAP
   // Map each set bit onto a feature. Ideally, we'd use HWCAP_* macros rather
   // than explicit bits, but explicit bits allow us to identify features that
   // the toolchain doesn't know about.
-  static const CPUFeatures::Feature kFeatureBits[] =
+  static const CPUFeatures::Feature kFeatureBitsLow[] =
       {// Bits 0-7
        CPUFeatures::kFP,
        CPUFeatures::kNEON,
@@ -288,8 +319,11 @@
        CPUFeatures::kSSBSControl,
        CPUFeatures::kSB,
        CPUFeatures::kPAuth,
-       CPUFeatures::kPAuthGeneric,
-       // Bits 32-39
+       CPUFeatures::kPAuthGeneric};
+  VIXL_STATIC_ASSERT(ArrayLength(kFeatureBitsLow) < 64);
+
+  static const CPUFeatures::Feature kFeatureBitsHigh[] =
+      {// Bits 0-7
        CPUFeatures::kDCCVADP,
        CPUFeatures::kSVE2,
        CPUFeatures::kSVEAES,
@@ -298,7 +332,7 @@
        CPUFeatures::kSVESHA3,
        CPUFeatures::kSVESM4,
        CPUFeatures::kAXFlag,
-       // Bits 40-47
+       // Bits 8-15
        CPUFeatures::kFrintToFixedSizedInt,
        CPUFeatures::kSVEI8MM,
        CPUFeatures::kSVEF32MM,
@@ -307,24 +341,43 @@
        CPUFeatures::kI8MM,
        CPUFeatures::kBF16,
        CPUFeatures::kDGH,
-       // Bits 48+
+       // Bits 16-23
        CPUFeatures::kRNG,
        CPUFeatures::kBTI,
        CPUFeatures::kMTE,
        CPUFeatures::kECV,
        CPUFeatures::kAFP,
-       CPUFeatures::kRPRES};
+       CPUFeatures::kRPRES,
+       CPUFeatures::kMTE3,
+       CPUFeatures::kSME,
+       // Bits 24-31
+       CPUFeatures::kSMEi16i64,
+       CPUFeatures::kSMEf64f64,
+       CPUFeatures::kSMEi8i32,
+       CPUFeatures::kSMEf16f32,
+       CPUFeatures::kSMEb16f32,
+       CPUFeatures::kSMEf32f32,
+       CPUFeatures::kSMEfa64,
+       CPUFeatures::kWFXT,
+       // Bits 32-39
+       CPUFeatures::kEBF16,
+       CPUFeatures::kSVE_EBF16};
+  VIXL_STATIC_ASSERT(ArrayLength(kFeatureBitsHigh) < 64);
 
-  uint64_t hwcap_low32 = getauxval(AT_HWCAP);
-  uint64_t hwcap_high32 = getauxval(AT_HWCAP2);
-  VIXL_ASSERT(IsUint32(hwcap_low32));
-  VIXL_ASSERT(IsUint32(hwcap_high32));
-  uint64_t hwcap = hwcap_low32 | (hwcap_high32 << 32);
+  auto combine_features = [&features](uint64_t hwcap,
+                                      const CPUFeatures::Feature* feature_array,
+                                      size_t features_size) {
+    for (size_t i = 0; i < features_size; i++) {
+      if (hwcap & (UINT64_C(1) << i)) features.Combine(feature_array[i]);
+    }
+  };
 
-  VIXL_STATIC_ASSERT(ArrayLength(kFeatureBits) < 64);
-  for (size_t i = 0; i < ArrayLength(kFeatureBits); i++) {
-    if (hwcap & (UINT64_C(1) << i)) features.Combine(kFeatureBits[i]);
-  }
+  uint64_t hwcap_low = getauxval(AT_HWCAP);
+  uint64_t hwcap_high = getauxval(AT_HWCAP2);
+
+  combine_features(hwcap_low, kFeatureBitsLow, ArrayLength(kFeatureBitsLow));
+  combine_features(hwcap_high, kFeatureBitsHigh, ArrayLength(kFeatureBitsHigh));
+
   // MTE support from HWCAP2 signifies FEAT_MTE1 and FEAT_MTE2 support
   if (features.Has(CPUFeatures::kMTE)) {
     features.Combine(CPUFeatures::kMTEInstructions);
@@ -425,7 +478,7 @@
 }
 
 
-void CPU::EnsureIAndDCacheCoherency(void *address, size_t length) {
+void CPU::EnsureIAndDCacheCoherency(void* address, size_t length) {
 #ifdef __aarch64__
   // Implement the cache synchronisation for all targets where AArch64 is the
   // host, even if we're building the simulator for an AAarch64 host. This
@@ -523,5 +576,6 @@
 #endif
 }
 
+
 }  // namespace aarch64
 }  // namespace vixl
diff --git a/src/aarch64/cpu-aarch64.h b/src/aarch64/cpu-aarch64.h
index 892f48f..d5a5f8c 100644
--- a/src/aarch64/cpu-aarch64.h
+++ b/src/aarch64/cpu-aarch64.h
@@ -31,6 +31,7 @@
 #include "../globals-vixl.h"
 
 #include "instructions-aarch64.h"
+#include "simulator-aarch64.h"
 
 #ifndef VIXL_INCLUDE_TARGET_AARCH64
 // The supporting .cc file is only compiled when the A64 target is selected.
@@ -56,24 +57,24 @@
    public:
     enum Type { kUnsigned, kSigned };
 
+    static const int kMaxWidthInBits = 4;
+
     // This needs to be constexpr so that fields have "constant initialisation".
     // This avoids initialisation order problems when these values are used to
     // (dynamically) initialise static variables, etc.
-    explicit constexpr Field(int lsb, Type type = kUnsigned)
-        : lsb_(lsb), type_(type) {}
+    explicit constexpr Field(int lsb,
+                             int bitWidth = kMaxWidthInBits,
+                             Type type = kUnsigned)
+        : lsb_(lsb), bitWidth_(bitWidth), type_(type) {}
 
-    static const int kMaxWidthInBits = 4;
-
-    int GetWidthInBits() const {
-      // All current ID fields have four bits.
-      return kMaxWidthInBits;
-    }
+    int GetWidthInBits() const { return bitWidth_; }
     int GetLsb() const { return lsb_; }
     int GetMsb() const { return lsb_ + GetWidthInBits() - 1; }
     Type GetType() const { return type_; }
 
    private:
     int lsb_;
+    int bitWidth_;
     Type type_;
   };
 
@@ -113,6 +114,7 @@
   static const Field kBT;
   static const Field kSSBS;
   static const Field kMTE;
+  static const Field kSME;
 };
 
 class AA64ISAR0 : public IDRegister {
@@ -167,7 +169,10 @@
   CPUFeatures GetCPUFeatures() const;
 
  private:
+  static const Field kWFXT;
   static const Field kRPRES;
+  static const Field kMOPS;
+  static const Field kCSSC;
 };
 
 class AA64MMFR0 : public IDRegister {
@@ -219,6 +224,22 @@
   static const Field kF64MM;
 };
 
+class AA64SMFR0 : public IDRegister {
+ public:
+  explicit AA64SMFR0(uint64_t value) : IDRegister(value) {}
+
+  CPUFeatures GetCPUFeatures() const;
+
+ private:
+  static const Field kSMEf32f32;
+  static const Field kSMEb16f32;
+  static const Field kSMEf16f32;
+  static const Field kSMEi8i32;
+  static const Field kSMEf64f64;
+  static const Field kSMEi16i64;
+  static const Field kSMEfa64;
+};
+
 class CPU {
  public:
   // Initialise CPU support.
@@ -285,6 +306,7 @@
   V(AA64MMFR1, "ID_AA64MMFR1_EL1")                                            \
   /* These registers are RES0 in the baseline Arm8.0. We can always safely */ \
   /* read them, but some compilers don't accept the symbolic names. */        \
+  V(AA64SMFR0, "S3_0_C0_C4_5")                                                \
   V(AA64ISAR2, "S3_0_C0_C6_2")                                                \
   V(AA64MMFR2, "S3_0_C0_C7_2")                                                \
   V(AA64ZFR0, "S3_0_C0_C4_4")
diff --git a/src/aarch64/cpu-features-auditor-aarch64.cc b/src/aarch64/cpu-features-auditor-aarch64.cc
index 63249b0..972bf03 100644
--- a/src/aarch64/cpu-features-auditor-aarch64.cc
+++ b/src/aarch64/cpu-features-auditor-aarch64.cc
@@ -24,12 +24,13 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+#include "cpu-features-auditor-aarch64.h"
+
 #include "cpu-features.h"
 #include "globals-vixl.h"
 #include "utils-vixl.h"
-#include "decoder-aarch64.h"
 
-#include "cpu-features-auditor-aarch64.h"
+#include "decoder-aarch64.h"
 
 namespace vixl {
 namespace aarch64 {
@@ -246,16 +247,47 @@
 
 void CPUFeaturesAuditor::VisitCrypto2RegSHA(const Instruction* instr) {
   RecordInstructionFeaturesScope scope(this);
+  if (form_hash_ == "sha256su0_vv_cryptosha2"_h) {
+    scope.Record(CPUFeatures::kNEON, CPUFeatures::kSHA2);
+  } else {
+    scope.Record(CPUFeatures::kNEON, CPUFeatures::kSHA1);
+  }
   USE(instr);
 }
 
 void CPUFeaturesAuditor::VisitCrypto3RegSHA(const Instruction* instr) {
   RecordInstructionFeaturesScope scope(this);
+  switch (form_hash_) {
+    case "sha1c_qsv_cryptosha3"_h:
+    case "sha1m_qsv_cryptosha3"_h:
+    case "sha1p_qsv_cryptosha3"_h:
+    case "sha1su0_vvv_cryptosha3"_h:
+      scope.Record(CPUFeatures::kNEON, CPUFeatures::kSHA1);
+      break;
+    case "sha256h_qqv_cryptosha3"_h:
+    case "sha256h2_qqv_cryptosha3"_h:
+    case "sha256su1_vvv_cryptosha3"_h:
+      scope.Record(CPUFeatures::kNEON, CPUFeatures::kSHA2);
+      break;
+  }
   USE(instr);
 }
 
 void CPUFeaturesAuditor::VisitCryptoAES(const Instruction* instr) {
   RecordInstructionFeaturesScope scope(this);
+  scope.Record(CPUFeatures::kNEON, CPUFeatures::kAES);
+  USE(instr);
+}
+
+void CPUFeaturesAuditor::VisitCryptoSM3(const Instruction* instr) {
+  RecordInstructionFeaturesScope scope(this);
+  scope.Record(CPUFeatures::kNEON, CPUFeatures::kSM3);
+  USE(instr);
+}
+
+void CPUFeaturesAuditor::VisitCryptoSM4(const Instruction* instr) {
+  RecordInstructionFeaturesScope scope(this);
+  scope.Record(CPUFeatures::kNEON, CPUFeatures::kSM4);
   USE(instr);
 }
 
@@ -507,8 +539,6 @@
 
 void CPUFeaturesAuditor::VisitFPIntegerConvert(const Instruction* instr) {
   RecordInstructionFeaturesScope scope(this);
-  // All of these instructions require FP.
-  scope.Record(CPUFeatures::kFP);
   switch (instr->Mask(FPIntegerConvertMask)) {
     case FCVTAS_wh:
     case FCVTAS_xh:
@@ -538,17 +568,23 @@
     case SCVTF_hx:
     case UCVTF_hw:
     case UCVTF_hx:
+      scope.Record(CPUFeatures::kFP);
       scope.Record(CPUFeatures::kFPHalf);
       return;
+    case FMOV_dx:
+      scope.RecordOneOrBothOf(CPUFeatures::kFP, CPUFeatures::kNEON);
+      return;
     case FMOV_d1_x:
     case FMOV_x_d1:
+      scope.Record(CPUFeatures::kFP);
       scope.Record(CPUFeatures::kNEON);
       return;
     case FJCVTZS:
+      scope.Record(CPUFeatures::kFP);
       scope.Record(CPUFeatures::kJSCVT);
       return;
     default:
-      // No special CPU features.
+      scope.Record(CPUFeatures::kFP);
       return;
   }
 }
@@ -731,6 +767,12 @@
   RecordInstructionFeaturesScope scope(this);
   // All of these instructions require NEON.
   scope.Record(CPUFeatures::kNEON);
+  if (form_hash_ == "pmull_asimddiff_l"_h) {
+    if (instr->GetNEONSize() == 3) {
+      // Source is 1D or 2D, destination is 1Q.
+      scope.Record(CPUFeatures::kPmull1Q);
+    }
+  }
   USE(instr);
 }
 
@@ -1265,80 +1307,93 @@
 
 void CPUFeaturesAuditor::VisitSystem(const Instruction* instr) {
   RecordInstructionFeaturesScope scope(this);
-  if (instr->Mask(SystemHintFMask) == SystemHintFixed) {
-    CPUFeatures required;
-    switch (instr->GetInstructionBits()) {
-      case PACIA1716:
-      case PACIB1716:
-      case AUTIA1716:
-      case AUTIB1716:
-      case PACIAZ:
-      case PACIASP:
-      case PACIBZ:
-      case PACIBSP:
-      case AUTIAZ:
-      case AUTIASP:
-      case AUTIBZ:
-      case AUTIBSP:
-      case XPACLRI:
-        required.Combine(CPUFeatures::kPAuth);
-        break;
-      default:
-        switch (instr->GetImmHint()) {
-          case ESB:
-            required.Combine(CPUFeatures::kRAS);
-            break;
-          case BTI:
-          case BTI_j:
-          case BTI_c:
-          case BTI_jc:
-            required.Combine(CPUFeatures::kBTI);
-            break;
-          default:
-            break;
-        }
-        break;
-    }
 
-    // These are all HINT instructions, and behave as NOPs if the corresponding
-    // features are not implemented, so we record the corresponding features
-    // only if they are available.
-    if (available_.Has(required)) scope.Record(required);
-  } else if (instr->Mask(SystemSysMask) == SYS) {
-    switch (instr->GetSysOp()) {
-      // DC instruction variants.
-      case CVAP:
-        scope.Record(CPUFeatures::kDCPoP);
-        break;
-      case CVADP:
-        scope.Record(CPUFeatures::kDCCVADP);
-        break;
-      case IVAU:
-      case CVAC:
-      case CVAU:
-      case CIVAC:
-        // No special CPU features.
-        break;
-    }
-  } else if (instr->Mask(SystemPStateFMask) == SystemPStateFixed) {
-    switch (instr->Mask(SystemPStateMask)) {
-      case CFINV:
-        scope.Record(CPUFeatures::kFlagM);
-        break;
-      case AXFLAG:
-      case XAFLAG:
-        scope.Record(CPUFeatures::kAXFlag);
-        break;
-    }
-  } else if (instr->Mask(SystemSysRegFMask) == SystemSysRegFixed) {
-    if (instr->Mask(SystemSysRegMask) == MRS) {
+  CPUFeatures required;
+  switch (form_hash_) {
+    case "pacib1716_hi_hints"_h:
+    case "pacia1716_hi_hints"_h:
+    case "pacibsp_hi_hints"_h:
+    case "paciasp_hi_hints"_h:
+    case "pacibz_hi_hints"_h:
+    case "paciaz_hi_hints"_h:
+    case "autib1716_hi_hints"_h:
+    case "autia1716_hi_hints"_h:
+    case "autibsp_hi_hints"_h:
+    case "autiasp_hi_hints"_h:
+    case "autibz_hi_hints"_h:
+    case "autiaz_hi_hints"_h:
+    case "xpaclri_hi_hints"_h:
+      required.Combine(CPUFeatures::kPAuth);
+      break;
+    case "esb_hi_hints"_h:
+      required.Combine(CPUFeatures::kRAS);
+      break;
+    case "bti_hb_hints"_h:
+      required.Combine(CPUFeatures::kBTI);
+      break;
+  }
+
+  // The instructions above are all HINTs and behave as NOPs if the
+  // corresponding features are not implemented, so we record the corresponding
+  // features only if they are available.
+  if (available_.Has(required)) scope.Record(required);
+
+  switch (form_hash_) {
+    case "cfinv_m_pstate"_h:
+      scope.Record(CPUFeatures::kFlagM);
+      break;
+    case "axflag_m_pstate"_h:
+    case "xaflag_m_pstate"_h:
+      scope.Record(CPUFeatures::kAXFlag);
+      break;
+    case "mrs_rs_systemmove"_h:
       switch (instr->GetImmSystemRegister()) {
         case RNDR:
         case RNDRRS:
           scope.Record(CPUFeatures::kRNG);
           break;
       }
-    }
+      break;
+    case "sys_cr_systeminstrs"_h:
+      switch (instr->GetSysOp()) {
+        // DC instruction variants.
+        case CGVAC:
+        case CGDVAC:
+        case CGVAP:
+        case CGDVAP:
+        case CIGVAC:
+        case CIGDVAC:
+        case GVA:
+        case GZVA:
+          scope.Record(CPUFeatures::kMTE);
+          break;
+        case CVAP:
+          scope.Record(CPUFeatures::kDCPoP);
+          break;
+        case CVADP:
+          scope.Record(CPUFeatures::kDCCVADP);
+          break;
+        case IVAU:
+        case CVAC:
+        case CVAU:
+        case CIVAC:
+        case ZVA:
+          // No special CPU features.
+          break;
+        case GCSPUSHM:
+        case GCSSS1:
+          scope.Record(CPUFeatures::kGCS);
+          break;
+      }
+      break;
+    case "sysl_rc_systeminstrs"_h:
+      switch (instr->GetSysOp()) {
+        case GCSPOPM:
+        case GCSSS2:
+          scope.Record(CPUFeatures::kGCS);
+          break;
+      }
+      break;
   }
 }
 
@@ -1392,9 +1447,9 @@
 void CPUFeaturesAuditor::Visit(Metadata* metadata, const Instruction* instr) {
   VIXL_ASSERT(metadata->count("form") > 0);
   const std::string& form = (*metadata)["form"];
-  uint32_t form_hash = Hash(form.c_str());
+  form_hash_ = Hash(form.c_str());
   const FormToVisitorFnMap* fv = CPUFeaturesAuditor::GetFormToVisitorFnMap();
-  FormToVisitorFnMap::const_iterator it = fv->find(form_hash);
+  FormToVisitorFnMap::const_iterator it = fv->find(form_hash_);
   if (it == fv->end()) {
     RecordInstructionFeaturesScope scope(this);
     std::map<uint32_t, const CPUFeatures> features = {
@@ -1725,10 +1780,116 @@
          CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM)},
         {"sudot_z_zzzi_s"_h,
          CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM)},
+        {"addg_64_addsub_immtags"_h, CPUFeatures::kMTE},
+        {"gmi_64g_dp_2src"_h, CPUFeatures::kMTE},
+        {"irg_64i_dp_2src"_h, CPUFeatures::kMTE},
+        {"ldg_64loffset_ldsttags"_h, CPUFeatures::kMTE},
+        {"st2g_64soffset_ldsttags"_h, CPUFeatures::kMTE},
+        {"st2g_64spost_ldsttags"_h, CPUFeatures::kMTE},
+        {"st2g_64spre_ldsttags"_h, CPUFeatures::kMTE},
+        {"stgp_64_ldstpair_off"_h, CPUFeatures::kMTE},
+        {"stgp_64_ldstpair_post"_h, CPUFeatures::kMTE},
+        {"stgp_64_ldstpair_pre"_h, CPUFeatures::kMTE},
+        {"stg_64soffset_ldsttags"_h, CPUFeatures::kMTE},
+        {"stg_64spost_ldsttags"_h, CPUFeatures::kMTE},
+        {"stg_64spre_ldsttags"_h, CPUFeatures::kMTE},
+        {"stz2g_64soffset_ldsttags"_h, CPUFeatures::kMTE},
+        {"stz2g_64spost_ldsttags"_h, CPUFeatures::kMTE},
+        {"stz2g_64spre_ldsttags"_h, CPUFeatures::kMTE},
+        {"stzg_64soffset_ldsttags"_h, CPUFeatures::kMTE},
+        {"stzg_64spost_ldsttags"_h, CPUFeatures::kMTE},
+        {"stzg_64spre_ldsttags"_h, CPUFeatures::kMTE},
+        {"subg_64_addsub_immtags"_h, CPUFeatures::kMTE},
+        {"subps_64s_dp_2src"_h, CPUFeatures::kMTE},
+        {"subp_64s_dp_2src"_h, CPUFeatures::kMTE},
+        {"cpyen_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpyern_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpyewn_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpye_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpyfen_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpyfern_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpyfewn_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpyfe_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpyfmn_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpyfmrn_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpyfmwn_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpyfm_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpyfpn_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpyfprn_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpyfpwn_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpyfp_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpymn_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpymrn_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpymwn_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpym_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpypn_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpyprn_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpypwn_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpyp_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"seten_set_memcms"_h, CPUFeatures::kMOPS},
+        {"sete_set_memcms"_h, CPUFeatures::kMOPS},
+        {"setgen_set_memcms"_h,
+         CPUFeatures(CPUFeatures::kMOPS, CPUFeatures::kMTE)},
+        {"setge_set_memcms"_h,
+         CPUFeatures(CPUFeatures::kMOPS, CPUFeatures::kMTE)},
+        {"setgmn_set_memcms"_h,
+         CPUFeatures(CPUFeatures::kMOPS, CPUFeatures::kMTE)},
+        {"setgm_set_memcms"_h,
+         CPUFeatures(CPUFeatures::kMOPS, CPUFeatures::kMTE)},
+        {"setgpn_set_memcms"_h,
+         CPUFeatures(CPUFeatures::kMOPS, CPUFeatures::kMTE)},
+        {"setgp_set_memcms"_h,
+         CPUFeatures(CPUFeatures::kMOPS, CPUFeatures::kMTE)},
+        {"setmn_set_memcms"_h, CPUFeatures::kMOPS},
+        {"setm_set_memcms"_h, CPUFeatures::kMOPS},
+        {"setpn_set_memcms"_h, CPUFeatures::kMOPS},
+        {"setp_set_memcms"_h, CPUFeatures::kMOPS},
+        {"abs_32_dp_1src"_h, CPUFeatures::kCSSC},
+        {"abs_64_dp_1src"_h, CPUFeatures::kCSSC},
+        {"cnt_32_dp_1src"_h, CPUFeatures::kCSSC},
+        {"cnt_64_dp_1src"_h, CPUFeatures::kCSSC},
+        {"ctz_32_dp_1src"_h, CPUFeatures::kCSSC},
+        {"ctz_64_dp_1src"_h, CPUFeatures::kCSSC},
+        {"smax_32_dp_2src"_h, CPUFeatures::kCSSC},
+        {"smax_64_dp_2src"_h, CPUFeatures::kCSSC},
+        {"smin_32_dp_2src"_h, CPUFeatures::kCSSC},
+        {"smin_64_dp_2src"_h, CPUFeatures::kCSSC},
+        {"umax_32_dp_2src"_h, CPUFeatures::kCSSC},
+        {"umax_64_dp_2src"_h, CPUFeatures::kCSSC},
+        {"umin_32_dp_2src"_h, CPUFeatures::kCSSC},
+        {"umin_64_dp_2src"_h, CPUFeatures::kCSSC},
+        {"smax_32_minmax_imm"_h, CPUFeatures::kCSSC},
+        {"smax_64_minmax_imm"_h, CPUFeatures::kCSSC},
+        {"smin_32_minmax_imm"_h, CPUFeatures::kCSSC},
+        {"smin_64_minmax_imm"_h, CPUFeatures::kCSSC},
+        {"umax_32u_minmax_imm"_h, CPUFeatures::kCSSC},
+        {"umax_64u_minmax_imm"_h, CPUFeatures::kCSSC},
+        {"umin_32u_minmax_imm"_h, CPUFeatures::kCSSC},
+        {"umin_64u_minmax_imm"_h, CPUFeatures::kCSSC},
+        {"bcax_vvv16_crypto4"_h,
+         CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSHA3)},
+        {"eor3_vvv16_crypto4"_h,
+         CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSHA3)},
+        {"rax1_vvv2_cryptosha512_3"_h,
+         CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSHA3)},
+        {"xar_vvv2_crypto3_imm6"_h,
+         CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSHA3)},
+        {"sha512h_qqv_cryptosha512_3"_h,
+         CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSHA512)},
+        {"sha512h2_qqv_cryptosha512_3"_h,
+         CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSHA512)},
+        {"sha512su0_vv2_cryptosha512_2"_h,
+         CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSHA512)},
+        {"sha512su1_vvv2_cryptosha512_3"_h,
+         CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSHA512)},
+        {"pmullb_z_zz_q"_h,
+         CPUFeatures(CPUFeatures::kSVE2, CPUFeatures::kSVEPmull128)},
+        {"pmullt_z_zz_q"_h,
+         CPUFeatures(CPUFeatures::kSVE2, CPUFeatures::kSVEPmull128)},
     };
 
-    if (features.count(form_hash) > 0) {
-      scope.Record(features[form_hash]);
+    if (features.count(form_hash_) > 0) {
+      scope.Record(features[form_hash_]);
     }
   } else {
     (it->second)(this, instr);
diff --git a/src/aarch64/cpu-features-auditor-aarch64.h b/src/aarch64/cpu-features-auditor-aarch64.h
index 041bc88..489083a 100644
--- a/src/aarch64/cpu-features-auditor-aarch64.h
+++ b/src/aarch64/cpu-features-auditor-aarch64.h
@@ -31,7 +31,8 @@
 #include <iostream>
 #include <unordered_map>
 
-#include "cpu-features.h"
+#include "../cpu-features.h"
+
 #include "decoder-aarch64.h"
 #include "decoder-visitor-map-aarch64.h"
 
@@ -112,6 +113,8 @@
 #define DECLARE(A) virtual void Visit##A(const Instruction* instr);
   VISITOR_LIST(DECLARE)
 #undef DECLARE
+  void VisitCryptoSM3(const Instruction* instr);
+  void VisitCryptoSM4(const Instruction* instr);
 
   void LoadStoreHelper(const Instruction* instr);
   void LoadStorePairHelper(const Instruction* instr);
@@ -126,6 +129,7 @@
       uint32_t,
       std::function<void(CPUFeaturesAuditor*, const Instruction*)>>;
   static const FormToVisitorFnMap* GetFormToVisitorFnMap();
+  uint32_t form_hash_;
 };
 
 }  // namespace aarch64
diff --git a/src/aarch64/debugger-aarch64.cc b/src/aarch64/debugger-aarch64.cc
new file mode 100644
index 0000000..1b060fd
--- /dev/null
+++ b/src/aarch64/debugger-aarch64.cc
@@ -0,0 +1,498 @@
+// Copyright 2023, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
+
+#include "debugger-aarch64.h"
+
+#include <cerrno>
+#include <cmath>
+#include <cstring>
+#include <errno.h>
+#include <limits>
+
+namespace vixl {
+namespace aarch64 {
+
+
+Debugger::Debugger(Simulator* sim)
+    : sim_(sim), input_stream_(&std::cin), ostream_(sim->GetOutputStream()) {
+  // Register all basic debugger commands.
+  RegisterCmd<HelpCmd>();
+  RegisterCmd<BreakCmd>();
+  RegisterCmd<StepCmd>();
+  RegisterCmd<ContinueCmd>();
+  RegisterCmd<PrintCmd>();
+  RegisterCmd<TraceCmd>();
+  RegisterCmd<GdbCmd>();
+}
+
+
+template <class T>
+void Debugger::RegisterCmd() {
+  auto new_command = std::make_unique<T>(sim_);
+
+  // Check that the new command word and alias, don't already exist.
+  std::string_view new_cmd_word = new_command->GetCommandWord();
+  std::string_view new_cmd_alias = new_command->GetCommandAlias();
+  for (const auto& cmd : debugger_cmds_) {
+    std::string_view cmd_word = cmd->GetCommandWord();
+    std::string_view cmd_alias = cmd->GetCommandAlias();
+
+    if (new_cmd_word == cmd_word) {
+      VIXL_ABORT_WITH_MSG("Command word matches an existing command word.");
+    } else if (new_cmd_word == cmd_alias) {
+      VIXL_ABORT_WITH_MSG("Command word matches an existing command alias.");
+    }
+
+    if (new_cmd_alias != "") {
+      if (new_cmd_alias == cmd_word) {
+        VIXL_ABORT_WITH_MSG("Command alias matches an existing command word.");
+      } else if (new_cmd_alias == cmd_alias) {
+        VIXL_ABORT_WITH_MSG("Command alias matches an existing command alias.");
+      }
+    }
+  }
+
+  debugger_cmds_.push_back(std::move(new_command));
+}
+
+
+bool Debugger::IsAtBreakpoint() const {
+  return IsBreakpoint(reinterpret_cast<uint64_t>(sim_->ReadPc()));
+}
+
+
+void Debugger::Debug() {
+  DebugReturn done = DebugContinue;
+  while (done == DebugContinue) {
+    // Disassemble the next instruction to execute.
+    PrintDisassembler print_disasm = PrintDisassembler(ostream_);
+    print_disasm.Disassemble(sim_->ReadPc());
+
+    // Read the command line.
+    fprintf(ostream_, "sim> ");
+    std::string line;
+    std::getline(*input_stream_, line);
+
+    // Remove all control characters from the command string.
+    line.erase(std::remove_if(line.begin(),
+                              line.end(),
+                              [](char c) { return std::iscntrl(c); }),
+               line.end());
+
+    // Assume input from std::cin has already been output (e.g: by a terminal)
+    // but input from elsewhere (e.g: from a testing input stream) has not.
+    if (input_stream_ != &std::cin) {
+      fprintf(ostream_, "%s\n", line.c_str());
+    }
+
+    // Parse the command into tokens.
+    std::vector<std::string> tokenized_cmd = Tokenize(line);
+    if (!tokenized_cmd.empty()) {
+      done = ExecDebugCommand(tokenized_cmd);
+    }
+  }
+}
+
+
+std::optional<uint64_t> Debugger::ParseUint64String(std::string_view uint64_str,
+                                                    int base) {
+  // Clear any previous errors.
+  errno = 0;
+
+  // strtoull uses 0 to indicate that no conversion was possible so first
+  // check that the string isn't zero.
+  if (IsZeroUint64String(uint64_str, base)) {
+    return 0;
+  }
+
+  // Cannot use stoi as it might not be possible to use exceptions.
+  char* end;
+  uint64_t value = std::strtoull(uint64_str.data(), &end, base);
+  if (value == 0 || *end != '\0' || errno == ERANGE) {
+    return std::nullopt;
+  }
+
+  return value;
+}
+
+
+std::optional<Debugger::RegisterParsedFormat> Debugger::ParseRegString(
+    std::string_view reg_str) {
+  // A register should only have 2 (e.g: X0) or 3 (e.g: X31) characters.
+  if (reg_str.size() < 2 || reg_str.size() > 3) {
+    return std::nullopt;
+  }
+
+  // Check for aliases of registers.
+  if (reg_str == "lr") {
+    return {{'X', kLinkRegCode}};
+  } else if (reg_str == "sp") {
+    return {{'X', kSpRegCode}};
+  }
+
+  unsigned max_reg_num;
+  char reg_prefix = std::toupper(reg_str.front());
+  switch (reg_prefix) {
+    case 'W':
+      VIXL_FALLTHROUGH();
+    case 'X':
+      max_reg_num = kNumberOfRegisters - 1;
+      break;
+    case 'V':
+      max_reg_num = kNumberOfVRegisters - 1;
+      break;
+    case 'Z':
+      max_reg_num = kNumberOfZRegisters - 1;
+      break;
+    case 'P':
+      max_reg_num = kNumberOfPRegisters - 1;
+      break;
+    default:
+      return std::nullopt;
+  }
+
+  std::string_view str_code = reg_str.substr(1, reg_str.size());
+  auto reg_code = ParseUint64String(str_code, 10);
+  if (!reg_code) {
+    return std::nullopt;
+  }
+
+  if (*reg_code > max_reg_num) {
+    return std::nullopt;
+  }
+
+  return {{reg_prefix, static_cast<unsigned int>(*reg_code)}};
+}
+
+
+void Debugger::PrintUsage() {
+  for (const auto& cmd : debugger_cmds_) {
+    // Print commands in the following format:
+    //  foo / f
+    //      foo <arg>
+    //      A description of the foo command.
+    //
+
+    std::string_view cmd_word = cmd->GetCommandWord();
+    std::string_view cmd_alias = cmd->GetCommandAlias();
+    if (cmd_alias != "") {
+      fprintf(ostream_, "%s / %s\n", cmd_word.data(), cmd_alias.data());
+    } else {
+      fprintf(ostream_, "%s\n", cmd_word.data());
+    }
+
+    std::string_view args_str = cmd->GetArgsString();
+    if (args_str != "") {
+      fprintf(ostream_, "\t%s %s\n", cmd_word.data(), args_str.data());
+    }
+
+    std::string_view description = cmd->GetDescription();
+    if (description != "") {
+      fprintf(ostream_, "\t%s\n", description.data());
+    }
+  }
+}
+
+
+std::vector<std::string> Debugger::Tokenize(std::string_view input_line,
+                                            char separator) {
+  std::vector<std::string> words;
+
+  if (input_line.empty()) {
+    return words;
+  }
+
+  for (auto separator_pos = input_line.find(separator);
+       separator_pos != input_line.npos;
+       separator_pos = input_line.find(separator)) {
+    // Skip consecutive, repeated separators.
+    if (separator_pos != 0) {
+      words.push_back(std::string{input_line.substr(0, separator_pos)});
+    }
+
+    // Remove characters up to and including the separator.
+    input_line.remove_prefix(separator_pos + 1);
+  }
+
+  // Add the rest of the string to the vector.
+  words.push_back(std::string{input_line});
+
+  return words;
+}
+
+
+DebugReturn Debugger::ExecDebugCommand(
+    const std::vector<std::string>& tokenized_cmd) {
+  std::string cmd_word = tokenized_cmd.front();
+  for (const auto& cmd : debugger_cmds_) {
+    if (cmd_word == cmd->GetCommandWord() ||
+        cmd_word == cmd->GetCommandAlias()) {
+      const std::vector<std::string> args(tokenized_cmd.begin() + 1,
+                                          tokenized_cmd.end());
+
+      // Call the handler for the command and pass the arguments.
+      return cmd->Action(args);
+    }
+  }
+
+  fprintf(ostream_, "Error: command '%s' not found\n", cmd_word.c_str());
+  return DebugContinue;
+}
+
+
+bool Debugger::IsZeroUint64String(std::string_view uint64_str, int base) {
+  // Remove any hex prefixes.
+  if (base == 0 || base == 16) {
+    std::string_view prefix = uint64_str.substr(0, 2);
+    if (prefix == "0x" || prefix == "0X") {
+      uint64_str.remove_prefix(2);
+    }
+  }
+
+  if (uint64_str.empty()) {
+    return false;
+  }
+
+  // Check all remaining digits in the string for anything other than zero.
+  for (char c : uint64_str) {
+    if (c != '0') {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+
+DebuggerCmd::DebuggerCmd(Simulator* sim,
+                         std::string cmd_word,
+                         std::string cmd_alias,
+                         std::string args_str,
+                         std::string description)
+    : sim_(sim),
+      ostream_(sim->GetOutputStream()),
+      command_word_(cmd_word),
+      command_alias_(cmd_alias),
+      args_str_(args_str),
+      description_(description) {}
+
+
+DebugReturn HelpCmd::Action(const std::vector<std::string>& args) {
+  USE(args);
+  sim_->GetDebugger()->PrintUsage();
+  return DebugContinue;
+}
+
+
+DebugReturn BreakCmd::Action(const std::vector<std::string>& args) {
+  if (args.size() != 1) {
+    fprintf(ostream_, "Error: Use `break <address>` to set a breakpoint\n");
+    return DebugContinue;
+  }
+
+  std::string arg = args.front();
+  auto break_addr = Debugger::ParseUint64String(arg);
+  if (!break_addr) {
+    fprintf(ostream_, "Error: Use `break <address>` to set a breakpoint\n");
+    return DebugContinue;
+  }
+
+  if (sim_->GetDebugger()->IsBreakpoint(*break_addr)) {
+    sim_->GetDebugger()->RemoveBreakpoint(*break_addr);
+    fprintf(ostream_,
+            "Breakpoint successfully removed at: 0x%" PRIx64 "\n",
+            *break_addr);
+  } else {
+    sim_->GetDebugger()->RegisterBreakpoint(*break_addr);
+    fprintf(ostream_,
+            "Breakpoint successfully added at: 0x%" PRIx64 "\n",
+            *break_addr);
+  }
+
+  return DebugContinue;
+}
+
+
+DebugReturn StepCmd::Action(const std::vector<std::string>& args) {
+  if (args.size() > 1) {
+    fprintf(ostream_,
+            "Error: use `step [number]` to step an optional number of"
+            " instructions\n");
+    return DebugContinue;
+  }
+
+  // Step 1 instruction by default.
+  std::optional<uint64_t> number_of_instructions_to_execute{1};
+
+  if (args.size() == 1) {
+    // Parse the argument to step that number of instructions.
+    std::string arg = args.front();
+    number_of_instructions_to_execute = Debugger::ParseUint64String(arg);
+    if (!number_of_instructions_to_execute) {
+      fprintf(ostream_,
+              "Error: use `step [number]` to step an optional number of"
+              " instructions\n");
+      return DebugContinue;
+    }
+  }
+
+  while (!sim_->IsSimulationFinished() &&
+         *number_of_instructions_to_execute > 0) {
+    sim_->ExecuteInstruction();
+    (*number_of_instructions_to_execute)--;
+
+    // The first instruction has already been printed by Debug() so only
+    // enable instruction tracing after the first instruction has been
+    // executed.
+    sim_->SetTraceParameters(sim_->GetTraceParameters() | LOG_DISASM);
+  }
+
+  // Disable instruction tracing after all instructions have been executed.
+  sim_->SetTraceParameters(sim_->GetTraceParameters() & ~LOG_DISASM);
+
+  if (sim_->IsSimulationFinished()) {
+    fprintf(ostream_,
+            "Debugger at the end of simulation, leaving simulator...\n");
+    return DebugExit;
+  }
+
+  return DebugContinue;
+}
+
+
+DebugReturn ContinueCmd::Action(const std::vector<std::string>& args) {
+  USE(args);
+
+  fprintf(ostream_, "Continuing...\n");
+
+  if (sim_->GetDebugger()->IsAtBreakpoint()) {
+    // This breakpoint has already been hit, so execute it before continuing.
+    sim_->ExecuteInstruction();
+  }
+
+  return DebugExit;
+}
+
+
+DebugReturn PrintCmd::Action(const std::vector<std::string>& args) {
+  if (args.size() != 1) {
+    fprintf(ostream_,
+            "Error: use `print <register|all>` to print the contents of a"
+            " specific register or all registers.\n");
+    return DebugContinue;
+  }
+
+  if (args.front() == "all") {
+    sim_->PrintRegisters();
+    sim_->PrintZRegisters();
+  } else if (args.front() == "system") {
+    sim_->PrintSystemRegisters();
+  } else if (args.front() == "ffr") {
+    sim_->PrintFFR();
+  } else {
+    auto reg = Debugger::ParseRegString(args.front());
+    if (!reg) {
+      fprintf(ostream_,
+              "Error: incorrect register format, use e.g: X0, x0, etc...\n");
+      return DebugContinue;
+    }
+
+    // Ensure the stack pointer is printed instead of the zero register.
+    if ((*reg).second == kSpRegCode) {
+      (*reg).second = kSPRegInternalCode;
+    }
+
+    // Registers are printed in different ways depending on their type.
+    switch ((*reg).first) {
+      case 'W':
+        sim_->PrintRegister(
+            (*reg).second,
+            static_cast<Simulator::PrintRegisterFormat>(
+                Simulator::PrintRegisterFormat::kPrintWReg |
+                Simulator::PrintRegisterFormat::kPrintRegPartial));
+        break;
+      case 'X':
+        sim_->PrintRegister((*reg).second,
+                            Simulator::PrintRegisterFormat::kPrintXReg);
+        break;
+      case 'V':
+        sim_->PrintVRegister((*reg).second);
+        break;
+      case 'Z':
+        sim_->PrintZRegister((*reg).second);
+        break;
+      case 'P':
+        sim_->PrintPRegister((*reg).second);
+        break;
+      default:
+        // ParseRegString should only allow valid register characters.
+        VIXL_UNREACHABLE();
+    }
+  }
+
+  return DebugContinue;
+}
+
+
+DebugReturn TraceCmd::Action(const std::vector<std::string>& args) {
+  if (args.size() != 0) {
+    fprintf(ostream_, "Error: use `trace` to toggle tracing of registers.\n");
+    return DebugContinue;
+  }
+
+  int trace_params = sim_->GetTraceParameters();
+  if ((trace_params & LOG_ALL) != LOG_ALL) {
+    fprintf(ostream_,
+            "Enabling disassembly, registers and memory write tracing\n");
+    sim_->SetTraceParameters(trace_params | LOG_ALL);
+  } else {
+    fprintf(ostream_,
+            "Disabling disassembly, registers and memory write tracing\n");
+    sim_->SetTraceParameters(trace_params & ~LOG_ALL);
+  }
+
+  return DebugContinue;
+}
+
+
+DebugReturn GdbCmd::Action(const std::vector<std::string>& args) {
+  if (args.size() != 0) {
+    fprintf(ostream_,
+            "Error: use `gdb` to enter GDB from the simulator debugger.\n");
+    return DebugContinue;
+  }
+
+  HostBreakpoint();
+  return DebugContinue;
+}
+
+
+}  // namespace aarch64
+}  // namespace vixl
+
+#endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
diff --git a/src/aarch64/debugger-aarch64.h b/src/aarch64/debugger-aarch64.h
new file mode 100644
index 0000000..ee5fa24
--- /dev/null
+++ b/src/aarch64/debugger-aarch64.h
@@ -0,0 +1,276 @@
+// Copyright 2023, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_AARCH64_DEBUGGER_AARCH64_H_
+#define VIXL_AARCH64_DEBUGGER_AARCH64_H_
+
+#include <optional>
+#include <unordered_set>
+#include <vector>
+
+#include "../cpu-features.h"
+#include "../globals-vixl.h"
+#include "../utils-vixl.h"
+
+#include "abi-aarch64.h"
+#include "cpu-features-auditor-aarch64.h"
+#include "disasm-aarch64.h"
+#include "instructions-aarch64.h"
+#include "simulator-aarch64.h"
+#include "simulator-constants-aarch64.h"
+
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
+
+namespace vixl {
+namespace aarch64 {
+
+class Simulator;
+
+enum DebugReturn { DebugContinue, DebugExit };
+
+
+// A debugger command that performs some action when used by the simulator
+// debugger.
+class DebuggerCmd {
+ public:
+  DebuggerCmd(Simulator* sim,
+              std::string cmd_word,
+              std::string cmd_alias,
+              std::string usage,
+              std::string description);
+  virtual ~DebuggerCmd() {}
+
+  // Perform some action based on the arguments passed in. Returns true if the
+  // debugger should exit after the action, false otherwise.
+  virtual DebugReturn Action(const std::vector<std::string>& args) = 0;
+
+  // Return the command word.
+  std::string_view GetCommandWord() { return command_word_; }
+  // Return the alias for this command. Returns an empty string if this command
+  // has no alias.
+  std::string_view GetCommandAlias() { return command_alias_; }
+  // Return this commands usage.
+  std::string_view GetArgsString() { return args_str_; }
+  // Return this commands description.
+  std::string_view GetDescription() { return description_; }
+
+ protected:
+  // Simulator which this command will be performed on.
+  Simulator* sim_;
+  // Stream to output the result of the command to.
+  FILE* ostream_;
+  // Command word that, when given to the interactive debugger, calls Action.
+  std::string command_word_;
+  // Optional alias for the command_word.
+  std::string command_alias_;
+  // Optional string showing the arguments that can be passed to the command.
+  std::string args_str_;
+  // Optional description of the command.
+  std::string description_;
+};
+
+
+//
+// Base debugger command handlers:
+//
+
+
+class HelpCmd : public DebuggerCmd {
+ public:
+  HelpCmd(Simulator* sim)
+      : DebuggerCmd(sim, "help", "h", "", "Display this help message.") {}
+
+  DebugReturn Action(const std::vector<std::string>& args) override;
+};
+
+
+class BreakCmd : public DebuggerCmd {
+ public:
+  BreakCmd(Simulator* sim)
+      : DebuggerCmd(sim,
+                    "break",
+                    "b",
+                    "<address>",
+                    "Set or remove a breakpoint.") {}
+
+  DebugReturn Action(const std::vector<std::string>& args) override;
+};
+
+
+class StepCmd : public DebuggerCmd {
+ public:
+  StepCmd(Simulator* sim)
+      : DebuggerCmd(sim,
+                    "step",
+                    "s",
+                    "[<n>]",
+                    "Step n instructions, default step 1 instruction.") {}
+
+  DebugReturn Action(const std::vector<std::string>& args) override;
+};
+
+
+class ContinueCmd : public DebuggerCmd {
+ public:
+  ContinueCmd(Simulator* sim)
+      : DebuggerCmd(sim,
+                    "continue",
+                    "c",
+                    "",
+                    "Exit the debugger and continue executing instructions.") {}
+
+  DebugReturn Action(const std::vector<std::string>& args) override;
+};
+
+
+class PrintCmd : public DebuggerCmd {
+ public:
+  PrintCmd(Simulator* sim)
+      : DebuggerCmd(sim,
+                    "print",
+                    "p",
+                    "<register|all|system>",
+                    "Print the contents of a register, all registers or all"
+                    " system registers.") {}
+
+  DebugReturn Action(const std::vector<std::string>& args) override;
+};
+
+
+class TraceCmd : public DebuggerCmd {
+ public:
+  TraceCmd(Simulator* sim)
+      : DebuggerCmd(sim,
+                    "trace",
+                    "t",
+                    "",
+                    "Start/stop memory and register tracing.") {}
+
+  DebugReturn Action(const std::vector<std::string>& args) override;
+};
+
+
+class GdbCmd : public DebuggerCmd {
+ public:
+  GdbCmd(Simulator* sim)
+      : DebuggerCmd(sim,
+                    "gdb",
+                    "g",
+                    "",
+                    "Enter an already running instance of gdb.") {}
+
+  DebugReturn Action(const std::vector<std::string>& args) override;
+};
+
+
+// A debugger for the Simulator which takes input from the user in order to
+// control the running of the Simulator.
+class Debugger {
+ public:
+  // A pair consisting of a register character (e.g: W, X, V) and a register
+  // code (e.g: 0, 1 ...31) which represents a single parsed register.
+  //
+  // Note: the register character is guaranteed to be upper case.
+  using RegisterParsedFormat = std::pair<char, unsigned>;
+
+  Debugger(Simulator* sim);
+
+  // Set the input stream, from which commands are read, to a custom stream.
+  void SetInputStream(std::istream* stream) { input_stream_ = stream; }
+
+  // Register a new command for the debugger.
+  template <class T>
+  void RegisterCmd();
+
+  // Set a breakpoint at the given address.
+  void RegisterBreakpoint(uint64_t addr) { breakpoints_.insert(addr); }
+  // Remove a breakpoint at the given address.
+  void RemoveBreakpoint(uint64_t addr) { breakpoints_.erase(addr); }
+  // Return true if the address is the location of a breakpoint.
+  bool IsBreakpoint(uint64_t addr) const {
+    return (breakpoints_.find(addr) != breakpoints_.end());
+  }
+  // Return true if the simulator pc is a breakpoint.
+  bool IsAtBreakpoint() const;
+
+  // Main loop for the debugger. Keep prompting for user inputted debugger
+  // commands and try to execute them until a command is given that exits the
+  // interactive debugger.
+  void Debug();
+
+  // Get an unsigned integer value from a string and return it in 'value'.
+  // Base is used to determine the numeric base of the number to be read,
+  // i.e: 8 for octal, 10 for decimal, 16 for hexadecimal and 0 for
+  // auto-detect. Return true if an integer value was found, false otherwise.
+  static std::optional<uint64_t> ParseUint64String(std::string_view uint64_str,
+                                                   int base = 0);
+
+  // Get a register from a string and return it in 'reg'. Return true if a
+  // valid register character and code (e.g: W0, X29, V31) was found, false
+  // otherwise.
+  static std::optional<RegisterParsedFormat> ParseRegString(
+      std::string_view reg_str);
+
+  // Print the usage of each debugger command.
+  void PrintUsage();
+
+ private:
+  // Split a string based on the separator given (a single space character by
+  // default) and return as a std::vector of strings.
+  static std::vector<std::string> Tokenize(std::string_view input_line,
+                                           char separator = ' ');
+
+  // Try to execute a single debugger command.
+  DebugReturn ExecDebugCommand(const std::vector<std::string>& tokenized_cmd);
+
+  // Return true if the string is zero, i.e: all characters in the string
+  // (other than prefixes) are zero.
+  static bool IsZeroUint64String(std::string_view uint64_str, int base);
+
+  // The simulator that this debugger acts on.
+  Simulator* sim_;
+
+  // A vector of all commands recognised by the debugger.
+  std::vector<std::unique_ptr<DebuggerCmd>> debugger_cmds_;
+
+  // Input stream from which commands are read. Default is std::cin.
+  std::istream* input_stream_;
+
+  // Output stream from the simulator.
+  FILE* ostream_;
+
+  // A list of all instruction addresses that, when executed by the
+  // simulator, will start the interactive debugger if it hasn't already.
+  std::unordered_set<uint64_t> breakpoints_;
+};
+
+
+}  // namespace aarch64
+}  // namespace vixl
+
+#endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
+
+#endif  // VIXL_AARCH64_DEBUGGER_AARCH64_H_
diff --git a/src/aarch64/decoder-aarch64.cc b/src/aarch64/decoder-aarch64.cc
index a4e2989..4ff02c1 100644
--- a/src/aarch64/decoder-aarch64.cc
+++ b/src/aarch64/decoder-aarch64.cc
@@ -24,12 +24,13 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+#include "decoder-aarch64.h"
+
 #include <string>
 
 #include "../globals-vixl.h"
 #include "../utils-vixl.h"
 
-#include "decoder-aarch64.h"
 #include "decoder-constants-aarch64.h"
 
 namespace vixl {
@@ -179,10 +180,10 @@
   // masked result.
   uint64_t signature = (static_cast<uint64_t>(y) << 32) | x;
   switch (signature) {
-    INSTANTIATE_TEMPLATE_M(00000001);
+    INSTANTIATE_TEMPLATE_M(00000002);
     INSTANTIATE_TEMPLATE_M(00000010);
-    INSTANTIATE_TEMPLATE_M(0000001f);
     INSTANTIATE_TEMPLATE_M(00000060);
+    INSTANTIATE_TEMPLATE_M(000000df);
     INSTANTIATE_TEMPLATE_M(00000100);
     INSTANTIATE_TEMPLATE_M(00000200);
     INSTANTIATE_TEMPLATE_M(00000400);
@@ -203,10 +204,10 @@
     INSTANTIATE_TEMPLATE_M(00003800);
     INSTANTIATE_TEMPLATE_M(00003c00);
     INSTANTIATE_TEMPLATE_M(00013000);
-    INSTANTIATE_TEMPLATE_M(00020000);
-    INSTANTIATE_TEMPLATE_M(00020010);
     INSTANTIATE_TEMPLATE_M(000203e0);
     INSTANTIATE_TEMPLATE_M(000303e0);
+    INSTANTIATE_TEMPLATE_M(00040000);
+    INSTANTIATE_TEMPLATE_M(00040010);
     INSTANTIATE_TEMPLATE_M(00060000);
     INSTANTIATE_TEMPLATE_M(00061000);
     INSTANTIATE_TEMPLATE_M(00070000);
@@ -217,19 +218,22 @@
     INSTANTIATE_TEMPLATE_M(000f0010);
     INSTANTIATE_TEMPLATE_M(00100000);
     INSTANTIATE_TEMPLATE_M(00180000);
-    INSTANTIATE_TEMPLATE_M(001d1c00);
+    INSTANTIATE_TEMPLATE_M(001b1c00);
     INSTANTIATE_TEMPLATE_M(001f0000);
+    INSTANTIATE_TEMPLATE_M(001f0018);
     INSTANTIATE_TEMPLATE_M(001f2000);
     INSTANTIATE_TEMPLATE_M(001f3000);
     INSTANTIATE_TEMPLATE_M(00400000);
+    INSTANTIATE_TEMPLATE_M(00400018);
     INSTANTIATE_TEMPLATE_M(00400800);
     INSTANTIATE_TEMPLATE_M(00403000);
+    INSTANTIATE_TEMPLATE_M(00500000);
     INSTANTIATE_TEMPLATE_M(00500800);
     INSTANTIATE_TEMPLATE_M(00583000);
     INSTANTIATE_TEMPLATE_M(005f0000);
     INSTANTIATE_TEMPLATE_M(00800000);
     INSTANTIATE_TEMPLATE_M(00800400);
-    INSTANTIATE_TEMPLATE_M(00800c1e);
+    INSTANTIATE_TEMPLATE_M(00800c1d);
     INSTANTIATE_TEMPLATE_M(0080101f);
     INSTANTIATE_TEMPLATE_M(00801c00);
     INSTANTIATE_TEMPLATE_M(00803000);
@@ -242,15 +246,15 @@
     INSTANTIATE_TEMPLATE_M(00c00200);
     INSTANTIATE_TEMPLATE_M(00c00400);
     INSTANTIATE_TEMPLATE_M(00c00c00);
-    INSTANTIATE_TEMPLATE_M(00c00c1c);
+    INSTANTIATE_TEMPLATE_M(00c00c19);
     INSTANTIATE_TEMPLATE_M(00c01000);
     INSTANTIATE_TEMPLATE_M(00c01400);
     INSTANTIATE_TEMPLATE_M(00c01c00);
     INSTANTIATE_TEMPLATE_M(00c02000);
     INSTANTIATE_TEMPLATE_M(00c03000);
     INSTANTIATE_TEMPLATE_M(00c03c00);
+    INSTANTIATE_TEMPLATE_M(00c70000);
     INSTANTIATE_TEMPLATE_M(00c83000);
-    INSTANTIATE_TEMPLATE_M(00cf0000);
     INSTANTIATE_TEMPLATE_M(00d00200);
     INSTANTIATE_TEMPLATE_M(00d80800);
     INSTANTIATE_TEMPLATE_M(00d81800);
@@ -260,9 +264,9 @@
     INSTANTIATE_TEMPLATE_M(00d92400);
     INSTANTIATE_TEMPLATE_M(00d93000);
     INSTANTIATE_TEMPLATE_M(00db0000);
+    INSTANTIATE_TEMPLATE_M(00db2000);
     INSTANTIATE_TEMPLATE_M(00dc0000);
     INSTANTIATE_TEMPLATE_M(00dc2000);
-    INSTANTIATE_TEMPLATE_M(00dd2000);
     INSTANTIATE_TEMPLATE_M(00df0000);
     INSTANTIATE_TEMPLATE_M(40000000);
     INSTANTIATE_TEMPLATE_M(40000010);
@@ -271,12 +275,11 @@
     INSTANTIATE_TEMPLATE_M(40002010);
     INSTANTIATE_TEMPLATE_M(40003000);
     INSTANTIATE_TEMPLATE_M(40003c00);
-    INSTANTIATE_TEMPLATE_M(400f0000);
-    INSTANTIATE_TEMPLATE_M(400f0400);
     INSTANTIATE_TEMPLATE_M(401f2000);
     INSTANTIATE_TEMPLATE_M(40400800);
     INSTANTIATE_TEMPLATE_M(40400c00);
     INSTANTIATE_TEMPLATE_M(40403c00);
+    INSTANTIATE_TEMPLATE_M(405f0000);
     INSTANTIATE_TEMPLATE_M(40800000);
     INSTANTIATE_TEMPLATE_M(40800c00);
     INSTANTIATE_TEMPLATE_M(40802000);
@@ -284,9 +287,10 @@
     INSTANTIATE_TEMPLATE_M(40803400);
     INSTANTIATE_TEMPLATE_M(40803c00);
     INSTANTIATE_TEMPLATE_M(40c00000);
+    INSTANTIATE_TEMPLATE_M(40c00400);
+    INSTANTIATE_TEMPLATE_M(40c00800);
     INSTANTIATE_TEMPLATE_M(40c00c00);
     INSTANTIATE_TEMPLATE_M(40c00c10);
-    INSTANTIATE_TEMPLATE_M(40c01c00);
     INSTANTIATE_TEMPLATE_M(40c02000);
     INSTANTIATE_TEMPLATE_M(40c02010);
     INSTANTIATE_TEMPLATE_M(40c02c00);
@@ -298,16 +302,18 @@
     INSTANTIATE_TEMPLATE_M(40d02010);
     INSTANTIATE_TEMPLATE_M(40d80000);
     INSTANTIATE_TEMPLATE_M(40d81800);
+    INSTANTIATE_TEMPLATE_M(40dc0000);
     INSTANTIATE_TEMPLATE_M(bf20c000);
-    INSTANTIATE_TEMPLATE_MV(00000003, 00000000);
-    INSTANTIATE_TEMPLATE_MV(00000003, 00000003);
+    INSTANTIATE_TEMPLATE_MV(00000006, 00000000);
+    INSTANTIATE_TEMPLATE_MV(00000006, 00000006);
+    INSTANTIATE_TEMPLATE_MV(00000007, 00000000);
     INSTANTIATE_TEMPLATE_MV(0000001f, 0000001f);
     INSTANTIATE_TEMPLATE_MV(00000210, 00000000);
     INSTANTIATE_TEMPLATE_MV(000003e0, 00000000);
     INSTANTIATE_TEMPLATE_MV(000003e0, 000003e0);
-    INSTANTIATE_TEMPLATE_MV(000003e1, 000003e0);
-    INSTANTIATE_TEMPLATE_MV(000003e3, 000003e0);
-    INSTANTIATE_TEMPLATE_MV(000003e3, 000003e3);
+    INSTANTIATE_TEMPLATE_MV(000003e2, 000003e0);
+    INSTANTIATE_TEMPLATE_MV(000003e6, 000003e0);
+    INSTANTIATE_TEMPLATE_MV(000003e6, 000003e6);
     INSTANTIATE_TEMPLATE_MV(00000c00, 00000000);
     INSTANTIATE_TEMPLATE_MV(00000fc0, 00000000);
     INSTANTIATE_TEMPLATE_MV(000013e0, 00001000);
@@ -318,11 +324,13 @@
     INSTANTIATE_TEMPLATE_MV(00003000, 00002000);
     INSTANTIATE_TEMPLATE_MV(00003000, 00003000);
     INSTANTIATE_TEMPLATE_MV(00003010, 00000000);
+    INSTANTIATE_TEMPLATE_MV(00003c00, 00003c00);
+    INSTANTIATE_TEMPLATE_MV(00040010, 00000000);
     INSTANTIATE_TEMPLATE_MV(00060000, 00000000);
     INSTANTIATE_TEMPLATE_MV(00061000, 00000000);
     INSTANTIATE_TEMPLATE_MV(00070000, 00030000);
-    INSTANTIATE_TEMPLATE_MV(0007309f, 0000001f);
     INSTANTIATE_TEMPLATE_MV(00073ee0, 00033060);
+    INSTANTIATE_TEMPLATE_MV(00073f9f, 0000001f);
     INSTANTIATE_TEMPLATE_MV(000f0000, 00000000);
     INSTANTIATE_TEMPLATE_MV(000f0010, 00000000);
     INSTANTIATE_TEMPLATE_MV(00100200, 00000000);
@@ -337,13 +345,13 @@
     INSTANTIATE_TEMPLATE_MV(001f0000, 00100000);
     INSTANTIATE_TEMPLATE_MV(001f0000, 001f0000);
     INSTANTIATE_TEMPLATE_MV(001f3000, 00000000);
+    INSTANTIATE_TEMPLATE_MV(001f3000, 00001000);
     INSTANTIATE_TEMPLATE_MV(001f3000, 001f0000);
     INSTANTIATE_TEMPLATE_MV(001f300f, 0000000d);
     INSTANTIATE_TEMPLATE_MV(001f301f, 0000000d);
     INSTANTIATE_TEMPLATE_MV(001f33e0, 000103e0);
     INSTANTIATE_TEMPLATE_MV(001f3800, 00000000);
     INSTANTIATE_TEMPLATE_MV(00401000, 00400000);
-    INSTANTIATE_TEMPLATE_MV(00403000, 00000000);
     INSTANTIATE_TEMPLATE_MV(005f3000, 001f0000);
     INSTANTIATE_TEMPLATE_MV(005f3000, 001f1000);
     INSTANTIATE_TEMPLATE_MV(00800010, 00000000);
@@ -366,6 +374,7 @@
     INSTANTIATE_TEMPLATE_MV(40002000, 40000000);
     INSTANTIATE_TEMPLATE_MV(40003c00, 00000000);
     INSTANTIATE_TEMPLATE_MV(40040000, 00000000);
+    INSTANTIATE_TEMPLATE_MV(401f2000, 401f0000);
     INSTANTIATE_TEMPLATE_MV(40800c00, 40000400);
     INSTANTIATE_TEMPLATE_MV(40c00000, 00000000);
     INSTANTIATE_TEMPLATE_MV(40c00000, 00400000);
@@ -466,7 +475,8 @@
 
     // Create a compiled node that contains a table with an entry for every bit
     // pattern.
-    CreateCompiledNode(bit_extract_fn, 1U << GetSampledBitsCount());
+    CreateCompiledNode(bit_extract_fn,
+                       static_cast<size_t>(1) << GetSampledBitsCount());
     VIXL_ASSERT(compiled_node_ != NULL);
 
     // When we find a pattern matches the representation, set the node's decode
diff --git a/src/aarch64/decoder-constants-aarch64.h b/src/aarch64/decoder-constants-aarch64.h
index ddfdff6..af50a55 100644
--- a/src/aarch64/decoder-constants-aarch64.h
+++ b/src/aarch64/decoder-constants-aarch64.h
@@ -49,75 +49,49 @@
 
 // clang-format off
 static const DecodeMapping kDecodeMapping[] = {
-  { "_gggyqx",
-    {23, 22, 20, 19, 18, 17, 16},
-    { {"0111001"_b, "fcvtnu_asimdmiscfp16_r"},
-      {"0x00001"_b, "fcvtnu_asimdmisc_r"},
-      {"1111001"_b, "fcvtpu_asimdmiscfp16_r"},
-      {"1x00001"_b, "fcvtpu_asimdmisc_r"},
-      {"xx10000"_b, "umaxv_asimdall_only"},
-      {"xx10001"_b, "uminv_asimdall_only"},
+  { "_ggvlym",
+    {13, 12},
+    { {"00"_b, "adc_32_addsub_carry"},
     },
   },
 
-  { "_ggvztl",
-    {30},
-    { {"0"_b, "bl_only_branch_imm"},
-      {"1"_b, "_qpzynz"},
+  { "_ghmtnl",
+    {18, 17},
+    { {"0x"_b, "ld1_asisdlsep_r3_r3"},
+      {"10"_b, "ld1_asisdlsep_r3_r3"},
+      {"11"_b, "ld1_asisdlsep_i3_i3"},
     },
   },
 
-  { "_ghmzhr",
-    {20, 19, 18, 17, 16, 13, 12},
-    { {"0000000"_b, "rbit_32_dp_1src"},
-      {"0000001"_b, "clz_32_dp_1src"},
+  { "_ghpxms",
+    {23, 22},
+    { {"01"_b, "fmla_z_p_zzz"},
+      {"1x"_b, "fmla_z_p_zzz"},
     },
   },
 
-  { "_ghnljt",
-    {23, 22, 20, 19, 18, 17, 16},
-    { {"0000000"_b, "fcvtns_64s_float2int"},
-      {"0000001"_b, "fcvtnu_64s_float2int"},
-      {"0000010"_b, "scvtf_s64_float2int"},
-      {"0000011"_b, "ucvtf_s64_float2int"},
-      {"0000100"_b, "fcvtas_64s_float2int"},
-      {"0000101"_b, "fcvtau_64s_float2int"},
-      {"0001000"_b, "fcvtps_64s_float2int"},
-      {"0001001"_b, "fcvtpu_64s_float2int"},
-      {"0010000"_b, "fcvtms_64s_float2int"},
-      {"0010001"_b, "fcvtmu_64s_float2int"},
-      {"0011000"_b, "fcvtzs_64s_float2int"},
-      {"0011001"_b, "fcvtzu_64s_float2int"},
-      {"0100000"_b, "fcvtns_64d_float2int"},
-      {"0100001"_b, "fcvtnu_64d_float2int"},
-      {"0100010"_b, "scvtf_d64_float2int"},
-      {"0100011"_b, "ucvtf_d64_float2int"},
-      {"0100100"_b, "fcvtas_64d_float2int"},
-      {"0100101"_b, "fcvtau_64d_float2int"},
-      {"0100110"_b, "fmov_64d_float2int"},
-      {"0100111"_b, "fmov_d64_float2int"},
-      {"0101000"_b, "fcvtps_64d_float2int"},
-      {"0101001"_b, "fcvtpu_64d_float2int"},
-      {"0110000"_b, "fcvtms_64d_float2int"},
-      {"0110001"_b, "fcvtmu_64d_float2int"},
-      {"0111000"_b, "fcvtzs_64d_float2int"},
-      {"0111001"_b, "fcvtzu_64d_float2int"},
-      {"1001110"_b, "fmov_64vx_float2int"},
-      {"1001111"_b, "fmov_v64i_float2int"},
-      {"1100000"_b, "fcvtns_64h_float2int"},
-      {"1100001"_b, "fcvtnu_64h_float2int"},
-      {"1100010"_b, "scvtf_h64_float2int"},
-      {"1100011"_b, "ucvtf_h64_float2int"},
-      {"1100100"_b, "fcvtas_64h_float2int"},
-      {"1100101"_b, "fcvtau_64h_float2int"},
-      {"1100110"_b, "fmov_64h_float2int"},
-      {"1100111"_b, "fmov_h64_float2int"},
-      {"1101000"_b, "fcvtps_64h_float2int"},
-      {"1101001"_b, "fcvtpu_64h_float2int"},
-      {"1110000"_b, "fcvtms_64h_float2int"},
-      {"1110001"_b, "fcvtmu_64h_float2int"},
-      {"1111000"_b, "fcvtzs_64h_float2int"},
-      {"1111001"_b, "fcvtzu_64h_float2int"},
+  { "_ghqqzy",
+    {11},
+    { {"0"_b, "_qrsxzp"},
+    },
+  },
+
+  { "_ghrnmz",
+    {20, 19, 18, 17, 16, 13, 12, 9, 8, 7, 6, 5},
+    { {"000010011111"_b, "xpacd_64z_dp_1src"},
+    },
+  },
+
+  { "_gjprgr",
+    {22, 13, 12},
+    { {"000"_b, "ldsmax_64_memop"},
+      {"001"_b, "ldsmin_64_memop"},
+      {"010"_b, "ldumax_64_memop"},
+      {"011"_b, "ldumin_64_memop"},
+      {"100"_b, "ldsmaxl_64_memop"},
+      {"101"_b, "ldsminl_64_memop"},
+      {"110"_b, "ldumaxl_64_memop"},
+      {"111"_b, "lduminl_64_memop"},
     },
   },
 
@@ -127,150 +101,106 @@
     },
   },
 
-  { "_gjsnly",
-    {16, 13, 12},
-    { {"000"_b, "rev16_64_dp_1src"},
-      {"001"_b, "cls_64_dp_1src"},
-      {"100"_b, "pacib_64p_dp_1src"},
-      {"101"_b, "autib_64p_dp_1src"},
-      {"110"_b, "_ksvxxm"},
-      {"111"_b, "_xsgxyy"},
+  { "_gjtmjg",
+    {23, 22, 20, 19, 13, 11},
+    { {"0000x0"_b, "bic_asimdimm_l_hl"},
+      {"00x100"_b, "uqshrn_asimdshf_n"},
+      {"00x101"_b, "uqrshrn_asimdshf_n"},
+      {"010x00"_b, "uqshrn_asimdshf_n"},
+      {"010x01"_b, "uqrshrn_asimdshf_n"},
+      {"011100"_b, "uqshrn_asimdshf_n"},
+      {"011101"_b, "uqrshrn_asimdshf_n"},
+      {"0x1000"_b, "uqshrn_asimdshf_n"},
+      {"0x1001"_b, "uqrshrn_asimdshf_n"},
     },
   },
 
-  { "_gjylrt",
-    {20, 19, 18, 17, 16},
-    { {"00000"_b, "fcvtns_32h_float2int"},
-      {"00001"_b, "fcvtnu_32h_float2int"},
-      {"00010"_b, "scvtf_h32_float2int"},
-      {"00011"_b, "ucvtf_h32_float2int"},
-      {"00100"_b, "fcvtas_32h_float2int"},
-      {"00101"_b, "fcvtau_32h_float2int"},
-      {"00110"_b, "fmov_32h_float2int"},
-      {"00111"_b, "fmov_h32_float2int"},
-      {"01000"_b, "fcvtps_32h_float2int"},
-      {"01001"_b, "fcvtpu_32h_float2int"},
-      {"10000"_b, "fcvtms_32h_float2int"},
-      {"10001"_b, "fcvtmu_32h_float2int"},
-      {"11000"_b, "fcvtzs_32h_float2int"},
-      {"11001"_b, "fcvtzu_32h_float2int"},
+  { "_gjxsrn",
+    {2, 1, 0},
+    { {"000"_b, "_sqttsv"},
     },
   },
 
-  { "_gkhhjm",
-    {30, 23, 22},
-    { {"000"_b, "sbfm_32m_bitfield"},
-      {"100"_b, "ubfm_32m_bitfield"},
+  { "_gknljg",
+    {11, 10, 9, 8, 7, 6},
+    { {"000000"_b, "wfet_only_systeminstrswithreg"},
     },
   },
 
-  { "_gkkpjz",
-    {23, 22, 20, 19, 18, 17, 16},
-    { {"0111001"_b, "fcvtmu_asisdmiscfp16_r"},
-      {"0x00001"_b, "fcvtmu_asisdmisc_r"},
-      {"1111001"_b, "fcvtzu_asisdmiscfp16_r"},
-      {"1x00001"_b, "fcvtzu_asisdmisc_r"},
-      {"xx00000"_b, "neg_asisdmisc_r"},
+  { "_gkqhyz",
+    {23, 22},
+    { {"00"_b, "fmsub_s_floatdp3"},
+      {"01"_b, "fmsub_d_floatdp3"},
+      {"11"_b, "fmsub_h_floatdp3"},
     },
   },
 
-  { "_gkpvxz",
-    {10},
-    { {"0"_b, "blraa_64p_branch_reg"},
-      {"1"_b, "blrab_64p_branch_reg"},
+  { "_glgznt",
+    {20, 19, 18, 17, 16, 4, 3},
+    { {"0000001"_b, "fcmp_dz_floatcmp"},
+      {"0000011"_b, "fcmpe_dz_floatcmp"},
+      {"xxxxx00"_b, "fcmp_d_floatcmp"},
+      {"xxxxx10"_b, "fcmpe_d_floatcmp"},
     },
   },
 
-  { "_gkpzhr",
-    {30, 23, 22, 13, 12, 11, 10},
-    { {"000xxxx"_b, "fnmsub_s_floatdp3"},
-      {"001xxxx"_b, "fnmsub_d_floatdp3"},
-      {"011xxxx"_b, "fnmsub_h_floatdp3"},
-      {"10001x0"_b, "fmul_asisdelem_rh_h"},
-      {"10x0101"_b, "sqshrn_asisdshf_n"},
-      {"10x0111"_b, "sqrshrn_asisdshf_n"},
-      {"11x01x0"_b, "fmul_asisdelem_r_sd"},
-      {"1xx11x0"_b, "sqdmull_asisdelem_l"},
+  { "_gljqng",
+    {22, 13, 12},
+    { {"000"_b, "ldsmaxa_32_memop"},
+      {"001"_b, "ldsmina_32_memop"},
+      {"010"_b, "ldumaxa_32_memop"},
+      {"011"_b, "ldumina_32_memop"},
+      {"100"_b, "ldsmaxal_32_memop"},
+      {"101"_b, "ldsminal_32_memop"},
+      {"110"_b, "ldumaxal_32_memop"},
+      {"111"_b, "lduminal_32_memop"},
     },
   },
 
-  { "_gkxgsn",
-    {30, 23, 22, 11, 10},
-    { {"00000"_b, "stlur_32_ldapstl_unscaled"},
-      {"00100"_b, "ldapur_32_ldapstl_unscaled"},
-      {"01000"_b, "ldapursw_64_ldapstl_unscaled"},
-      {"10000"_b, "stlur_64_ldapstl_unscaled"},
-      {"10100"_b, "ldapur_64_ldapstl_unscaled"},
-    },
-  },
-
-  { "_glgrjy",
-    {23, 22, 20, 19, 18, 17, 16},
-    { {"0000000"_b, "not_asimdmisc_r"},
-      {"0100000"_b, "rbit_asimdmisc_r"},
-    },
-  },
-
-  { "_glhxyj",
-    {17},
-    { {"0"_b, "ld3_asisdlsop_bx3_r3b"},
-      {"1"_b, "ld3_asisdlsop_b3_i3b"},
-    },
-  },
-
-  { "_glkzlv",
-    {20, 19, 18, 17, 16},
-    { {"00000"_b, "rev16_asimdmisc_r"},
-    },
-  },
-
-  { "_gmjhll",
-    {17},
-    { {"0"_b, "st1_asisdlsep_r4_r4"},
-      {"1"_b, "st1_asisdlsep_i4_i4"},
-    },
-  },
-
-  { "_gmrxlp",
+  { "_glkvkr",
     {30},
-    { {"0"_b, "orr_32_log_shift"},
-      {"1"_b, "ands_32_log_shift"},
+    { {"0"_b, "adds_32_addsub_shift"},
+      {"1"_b, "subs_32_addsub_shift"},
     },
   },
 
-  { "_gmrxqq",
-    {30, 23, 22},
-    { {"000"_b, "stp_q_ldstpair_off"},
-      {"001"_b, "ldp_q_ldstpair_off"},
-      {"010"_b, "stp_q_ldstpair_pre"},
-      {"011"_b, "ldp_q_ldstpair_pre"},
+  { "_glpxty",
+    {20, 19, 18, 17, 16},
+    { {"00010"_b, "scvtf_s32_float2fix"},
+      {"00011"_b, "ucvtf_s32_float2fix"},
+      {"11000"_b, "fcvtzs_32s_float2fix"},
+      {"11001"_b, "fcvtzu_32s_float2fix"},
     },
   },
 
-  { "_gmsgqz",
-    {30, 23, 22},
-    { {"100"_b, "eor3_vvv16_crypto4"},
-      {"101"_b, "sm3ss1_vvv4_crypto4"},
-      {"110"_b, "xar_vvv2_crypto3_imm6"},
+  { "_gmqyjv",
+    {30, 20, 19, 18, 17, 16, 13},
+    { {"1111110"_b, "_nvkxzs"},
     },
   },
 
-  { "_gmvjgn",
+  { "_gmsmls",
+    {13},
+    { {"0"_b, "mls_asimdelem_r"},
+      {"1"_b, "umlsl_asimdelem_l"},
+    },
+  },
+
+  { "_gmsqqz",
     {23},
-    { {"0"_b, "fmax_asimdsame_only"},
-      {"1"_b, "fmin_asimdsame_only"},
+    { {"0"_b, "facge_asimdsame_only"},
+      {"1"_b, "facgt_asimdsame_only"},
     },
   },
 
-  { "_gmvrxn",
-    {18, 17, 12},
-    { {"000"_b, "st4_asisdlso_d4_4d"},
-    },
-  },
-
-  { "_gmvtss",
-    {30},
-    { {"0"_b, "ldr_q_loadlit"},
+  { "_gmtjvr",
+    {16, 13, 12},
+    { {"000"_b, "rev_64_dp_1src"},
+      {"001"_b, "cnt_64_dp_1src"},
+      {"100"_b, "pacdb_64p_dp_1src"},
+      {"101"_b, "autdb_64p_dp_1src"},
+      {"110"_b, "_rlxhxz"},
+      {"111"_b, "_phjkhr"},
     },
   },
 
@@ -281,6 +211,25 @@
     },
   },
 
+  { "_gnhjkl",
+    {16, 13, 12},
+    { {"000"_b, "rbit_64_dp_1src"},
+      {"001"_b, "clz_64_dp_1src"},
+      {"010"_b, "abs_64_dp_1src"},
+      {"100"_b, "pacia_64p_dp_1src"},
+      {"101"_b, "autia_64p_dp_1src"},
+      {"110"_b, "_yzxjnk"},
+      {"111"_b, "_prxyhr"},
+    },
+  },
+
+  { "_gnpgsg",
+    {22},
+    { {"0"_b, "str_64_ldst_regoff"},
+      {"1"_b, "ldr_64_ldst_regoff"},
+    },
+  },
+
   { "_gnqhsl",
     {23, 22, 20, 19, 18, 17, 16},
     { {"0010000"_b, "punpklo_p_p"},
@@ -290,38 +239,23 @@
     },
   },
 
-  { "_gnqjhz",
-    {20, 19, 18, 17, 16, 13, 12},
-    { {"0000000"_b, "rev16_32_dp_1src"},
-      {"0000001"_b, "cls_32_dp_1src"},
+  { "_gnxrlr",
+    {23, 22, 13, 12, 11, 10},
+    { {"0011x0"_b, "sudot_asimdelem_d"},
+      {"0111x0"_b, "bfdot_asimdelem_e"},
+      {"0x1001"_b, "scvtf_asimdshf_c"},
+      {"0x1111"_b, "fcvtzs_asimdshf_c"},
+      {"1011x0"_b, "usdot_asimdelem_d"},
+      {"1111x0"_b, "bfmlal_asimdelem_f"},
+      {"xx00x0"_b, "sqdmulh_asimdelem_r"},
+      {"xx01x0"_b, "sqrdmulh_asimdelem_r"},
+      {"xx10x0"_b, "sdot_asimdelem_d"},
     },
   },
 
-  { "_gntpyh",
-    {23, 13, 12, 11, 10},
-    { {"00010"_b, "_gqspys"},
-      {"00110"_b, "_ymgrgx"},
-      {"01001"_b, "fcmge_asisdsame_only"},
-      {"01011"_b, "facge_asisdsame_only"},
-      {"01110"_b, "_kjyphv"},
-      {"10010"_b, "_myjqrl"},
-      {"10101"_b, "fabd_asisdsame_only"},
-      {"10110"_b, "_vlsmsn"},
-      {"11001"_b, "fcmgt_asisdsame_only"},
-      {"11011"_b, "facgt_asisdsame_only"},
-      {"11110"_b, "_pxtsvn"},
-    },
-  },
-
-  { "_gnxgxs",
-    {30, 18},
-    { {"00"_b, "_krlpjl"},
-    },
-  },
-
-  { "_gnytkh",
-    {1, 0},
-    { {"11"_b, "braaz_64_branch_reg"},
+  { "_gplkxy",
+    {20, 19, 18, 17, 16},
+    { {"00000"_b, "sqneg_asimdmisc_r"},
     },
   },
 
@@ -331,55 +265,95 @@
     },
   },
 
-  { "_gqspys",
-    {22, 20, 19, 18, 17, 16},
-    { {"111001"_b, "fcvtau_asisdmiscfp16_r"},
-      {"x00001"_b, "fcvtau_asisdmisc_r"},
-      {"x10000"_b, "fmaxnmp_asisdpair_only_sd"},
+  { "_gqmjys",
+    {18, 17},
+    { {"0x"_b, "st1_asisdlsop_sx1_r1s"},
+      {"10"_b, "st1_asisdlsop_sx1_r1s"},
+      {"11"_b, "st1_asisdlsop_s1_i1s"},
     },
   },
 
-  { "_gqykqv",
-    {23, 22, 12},
-    { {"000"_b, "_rjmyyl"},
-      {"001"_b, "_zqltpy"},
-      {"010"_b, "_hstvrp"},
-      {"011"_b, "_yhqyzj"},
-      {"110"_b, "_mxtskk"},
-      {"111"_b, "_qmjqhq"},
+  { "_grgrpt",
+    {18},
+    { {"1"_b, "fmaxv_v_p_z"},
     },
   },
 
-  { "_grqnlm",
-    {30, 23, 22, 13, 12, 11, 10},
-    { {"000xxxx"_b, "fnmadd_s_floatdp3"},
-      {"001xxxx"_b, "fnmadd_d_floatdp3"},
-      {"011xxxx"_b, "fnmadd_h_floatdp3"},
-      {"10001x0"_b, "fmla_asisdelem_rh_h"},
-      {"10x0001"_b, "sshr_asisdshf_r"},
-      {"10x0101"_b, "ssra_asisdshf_r"},
-      {"10x1001"_b, "srshr_asisdshf_r"},
-      {"10x1101"_b, "srsra_asisdshf_r"},
-      {"11x01x0"_b, "fmla_asisdelem_r_sd"},
-      {"1xx11x0"_b, "sqdmlal_asisdelem_l"},
-    },
-  },
-
-  { "_grrjlh",
+  { "_grjzyl",
     {30},
-    { {"1"_b, "_jlqxvj"},
+    { {"0"_b, "bl_only_branch_imm"},
+      {"1"_b, "_hjtsgj"},
     },
   },
 
-  { "_grxzzg",
-    {23, 22},
-    { {"00"_b, "tbx_asimdtbl_l2_2"},
+  { "_grktgm",
+    {30, 23, 22, 19},
+    { {"1001"_b, "aesd_b_cryptoaes"},
+      {"xxx0"_b, "cnt_asimdmisc_r"},
     },
   },
 
-  { "_gsgzpg",
-    {17},
-    { {"0"_b, "ld2_asisdlso_h2_2h"},
+  { "_grmpht",
+    {20, 18, 17},
+    { {"000"_b, "_mjjhqj"},
+    },
+  },
+
+  { "_grprpj",
+    {20, 19, 18, 17, 16},
+    { {"11111"_b, "ldarb_lr32_ldstexcl"},
+    },
+  },
+
+  { "_grqsgp",
+    {23, 22, 4, 3, 2, 1, 0},
+    { {"0000001"_b, "svc_ex_exception"},
+      {"0000010"_b, "hvc_ex_exception"},
+      {"0000011"_b, "smc_ex_exception"},
+      {"0100000"_b, "hlt_ex_exception"},
+    },
+  },
+
+  { "_grsnms",
+    {20, 19, 18, 17, 16},
+    { {"00010"_b, "scvtf_h32_float2fix"},
+      {"00011"_b, "ucvtf_h32_float2fix"},
+      {"11000"_b, "fcvtzs_32h_float2fix"},
+      {"11001"_b, "fcvtzu_32h_float2fix"},
+    },
+  },
+
+  { "_grsslr",
+    {30, 23, 22, 11, 10, 4},
+    { {"001000"_b, "ccmn_32_condcmp_reg"},
+      {"001100"_b, "ccmn_32_condcmp_imm"},
+      {"101000"_b, "ccmp_32_condcmp_reg"},
+      {"101100"_b, "ccmp_32_condcmp_imm"},
+    },
+  },
+
+  { "_grvxrm",
+    {12},
+    { {"0"_b, "st4_asisdlsop_dx4_r4d"},
+    },
+  },
+
+  { "_gshlgj",
+    {30, 23, 22, 13, 12, 11, 10},
+    { {"100xxx1"_b, "ins_asimdins_iv_v"},
+      {"x00xxx0"_b, "ext_asimdext_only"},
+      {"x010001"_b, "fmaxnmp_asimdsamefp16_only"},
+      {"x010101"_b, "faddp_asimdsamefp16_only"},
+      {"x010111"_b, "fmul_asimdsamefp16_only"},
+      {"x011001"_b, "fcmge_asimdsamefp16_only"},
+      {"x011011"_b, "facge_asimdsamefp16_only"},
+      {"x011101"_b, "fmaxp_asimdsamefp16_only"},
+      {"x011111"_b, "fdiv_asimdsamefp16_only"},
+      {"x110001"_b, "fminnmp_asimdsamefp16_only"},
+      {"x110101"_b, "fabd_asimdsamefp16_only"},
+      {"x111001"_b, "fcmgt_asimdsamefp16_only"},
+      {"x111011"_b, "facgt_asimdsamefp16_only"},
+      {"x111101"_b, "fminp_asimdsamefp16_only"},
     },
   },
 
@@ -390,53 +364,69 @@
     },
   },
 
-  { "_gskkxk",
-    {17},
-    { {"0"_b, "st1_asisdlso_h1_1h"},
-    },
-  },
-
-  { "_gsttpm",
+  { "_gsjvmx",
     {12},
-    { {"0"_b, "ld3_asisdlsop_dx3_r3d"},
+    { {"0"_b, "st3_asisdlsop_dx3_r3d"},
     },
   },
 
-  { "_gszlvl",
-    {30},
-    { {"0"_b, "_tvsszp"},
-      {"1"_b, "_njtngm"},
+  { "_gslmjl",
+    {23, 22},
+    { {"00"_b, "fcsel_s_floatsel"},
+      {"01"_b, "fcsel_d_floatsel"},
+      {"11"_b, "fcsel_h_floatsel"},
     },
   },
 
-  { "_gszxkp",
-    {13, 12},
-    { {"11"_b, "cmgt_asisdsame_only"},
-    },
-  },
-
-  { "_gtjskz",
+  { "_gsnnnt",
     {30, 23, 22, 13, 12, 11, 10},
-    { {"1011011"_b, "bfmmla_asimdsame2_e"},
-      {"x011111"_b, "bfdot_asimdsame2_d"},
-      {"x111111"_b, "bfmlal_asimdsame2_f"},
-      {"xxx0xx1"_b, "fcmla_asimdsame2_c"},
-      {"xxx1x01"_b, "fcadd_asimdsame2_c"},
+    { {"000xx00"_b, "stlurb_32_ldapstl_unscaled"},
+      {"001xx00"_b, "ldapurb_32_ldapstl_unscaled"},
+      {"010xx00"_b, "ldapursb_64_ldapstl_unscaled"},
+      {"011xx00"_b, "ldapursb_32_ldapstl_unscaled"},
+      {"100xx00"_b, "stlurh_32_ldapstl_unscaled"},
+      {"101xx00"_b, "ldapurh_32_ldapstl_unscaled"},
+      {"110xx00"_b, "ldapursh_64_ldapstl_unscaled"},
+      {"111xx00"_b, "ldapursh_32_ldapstl_unscaled"},
+      {"x000001"_b, "cpyfprn_cpy_memcms"},
+      {"x000101"_b, "cpyfpwtrn_cpy_memcms"},
+      {"x001001"_b, "cpyfprtrn_cpy_memcms"},
+      {"x001101"_b, "cpyfptrn_cpy_memcms"},
+      {"x010001"_b, "cpyfmrn_cpy_memcms"},
+      {"x010101"_b, "cpyfmwtrn_cpy_memcms"},
+      {"x011001"_b, "cpyfmrtrn_cpy_memcms"},
+      {"x011101"_b, "cpyfmtrn_cpy_memcms"},
+      {"x100001"_b, "cpyfern_cpy_memcms"},
+      {"x100101"_b, "cpyfewtrn_cpy_memcms"},
+      {"x101001"_b, "cpyfertrn_cpy_memcms"},
+      {"x101101"_b, "cpyfetrn_cpy_memcms"},
+      {"x110001"_b, "sete_set_memcms"},
+      {"x110101"_b, "setet_set_memcms"},
+      {"x111001"_b, "seten_set_memcms"},
+      {"x111101"_b, "setetn_set_memcms"},
     },
   },
 
-  { "_gttglx",
-    {17},
-    { {"0"_b, "st4_asisdlso_h4_4h"},
+  { "_gsvlph",
+    {22, 4, 3},
+    { {"00x"_b, "prfm_p_ldst_regoff"},
+      {"010"_b, "prfm_p_ldst_regoff"},
+      {"011"_b, "rprfm_r_ldst_regoff"},
     },
   },
 
-  { "_gtvhmp",
-    {30, 13},
-    { {"00"_b, "_rjyrnt"},
-      {"01"_b, "_mzhsrq"},
-      {"10"_b, "_xtzlzy"},
-      {"11"_b, "_kqxhzx"},
+  { "_gtqnvr",
+    {30, 23, 22},
+    { {"000"_b, "msub_32a_dp_3src"},
+    },
+  },
+
+  { "_gtsglj",
+    {11, 10, 9, 8, 7, 6},
+    { {"000001"_b, "tcommit_only_barriers"},
+      {"000011"_b, "sb_only_barriers"},
+      {"xx1000"_b, "dsb_bon_barriers"},
+      {"xxxx10"_b, "dmb_bo_barriers"},
     },
   },
 
@@ -453,40 +443,21 @@
     },
   },
 
-  { "_gvjgyp",
-    {23, 22, 13, 12, 11, 10},
-    { {"0001x0"_b, "fmls_asimdelem_rh_h"},
-      {"0x0101"_b, "shl_asimdshf_r"},
-      {"0x1101"_b, "sqshl_asimdshf_r"},
-      {"1000x0"_b, "fmlsl_asimdelem_lh"},
-      {"1x01x0"_b, "fmls_asimdelem_r_sd"},
-      {"xx10x0"_b, "smlsl_asimdelem_l"},
-      {"xx11x0"_b, "sqdmlsl_asimdelem_l"},
+  { "_gvpvjn",
+    {20, 19, 18, 17, 16, 13, 12},
+    { {"0000000"_b, "rev_32_dp_1src"},
+      {"0000001"_b, "ctz_32_dp_1src"},
     },
   },
 
-  { "_gvstrp",
-    {17},
-    { {"0"_b, "ld2_asisdlsop_bx2_r2b"},
-      {"1"_b, "ld2_asisdlsop_b2_i2b"},
-    },
-  },
-
-  { "_gvykrp",
-    {30, 23, 22, 13, 12, 11, 10},
-    { {"10001x0"_b, "fmulx_asisdelem_rh_h"},
-      {"10x0001"_b, "sqshrun_asisdshf_n"},
-      {"10x0011"_b, "sqrshrun_asisdshf_n"},
-      {"10x0101"_b, "uqshrn_asisdshf_n"},
-      {"10x0111"_b, "uqrshrn_asisdshf_n"},
-      {"11x01x0"_b, "fmulx_asisdelem_r_sd"},
-    },
-  },
-
-  { "_gxlvsg",
-    {13},
-    { {"0"_b, "_vpxvjs"},
-      {"1"_b, "_lpslrz"},
+  { "_gvxjvz",
+    {23, 22, 12},
+    { {"000"_b, "_tgvkhm"},
+      {"001"_b, "_ktyrgy"},
+      {"010"_b, "_gxzgtk"},
+      {"011"_b, "_vlxrps"},
+      {"110"_b, "_jqrmyp"},
+      {"111"_b, "_ssypmm"},
     },
   },
 
@@ -497,39 +468,57 @@
     },
   },
 
-  { "_gxnlxg",
-    {20, 19, 18, 17, 16},
-    { {"00001"_b, "uqxtn_asisdmisc_n"},
+  { "_gxqnph",
+    {23, 22, 13, 12, 11, 10},
+    { {"0x1001"_b, "ucvtf_asimdshf_c"},
+      {"0x1111"_b, "fcvtzu_asimdshf_c"},
+      {"1000x0"_b, "fmlsl2_asimdelem_lh"},
+      {"xx01x0"_b, "sqrdmlah_asimdelem_r"},
+      {"xx10x0"_b, "udot_asimdelem_d"},
+      {"xx11x0"_b, "sqrdmlsh_asimdelem_r"},
     },
   },
 
-  { "_gxslgq",
-    {23, 22, 20, 19, 17, 16},
-    { {"000010"_b, "scvtf_s32_float2fix"},
-      {"000011"_b, "ucvtf_s32_float2fix"},
-      {"001100"_b, "fcvtzs_32s_float2fix"},
-      {"001101"_b, "fcvtzu_32s_float2fix"},
-      {"010010"_b, "scvtf_d32_float2fix"},
-      {"010011"_b, "ucvtf_d32_float2fix"},
-      {"011100"_b, "fcvtzs_32d_float2fix"},
-      {"011101"_b, "fcvtzu_32d_float2fix"},
-      {"110010"_b, "scvtf_h32_float2fix"},
-      {"110011"_b, "ucvtf_h32_float2fix"},
-      {"111100"_b, "fcvtzs_32h_float2fix"},
-      {"111101"_b, "fcvtzu_32h_float2fix"},
+  { "_gxzgtk",
+    {20, 19, 18, 17, 16, 13},
+    { {"000000"_b, "fabs_d_floatdp1"},
+      {"000010"_b, "fsqrt_d_floatdp1"},
+      {"000110"_b, "fcvt_hd_floatdp1"},
+      {"001000"_b, "frintp_d_floatdp1"},
+      {"001010"_b, "frintz_d_floatdp1"},
+      {"001110"_b, "frinti_d_floatdp1"},
+      {"010000"_b, "frint32x_d_floatdp1"},
+      {"010010"_b, "frint64x_d_floatdp1"},
     },
   },
 
-  { "_gygnsz",
-    {17},
-    { {"0"_b, "ld2_asisdlsop_hx2_r2h"},
-      {"1"_b, "ld2_asisdlsop_h2_i2h"},
+  { "_gyjphh",
+    {30, 23, 22, 11, 10},
+    { {"00000"_b, "_plgrmv"},
+      {"00001"_b, "_xmxhhg"},
+      {"00100"_b, "_lmmjvx"},
+      {"00110"_b, "_tmtgqm"},
+      {"01100"_b, "_hvmyjz"},
+      {"10000"_b, "_mgtxyt"},
+      {"10100"_b, "_rkzlpp"},
+      {"10110"_b, "_xqrgjj"},
     },
   },
 
-  { "_gymljg",
+  { "_gyllxt",
     {23},
-    { {"0"_b, "fmulx_asimdsame_only"},
+    { {"0"_b, "_hzkxht"},
+    },
+  },
+
+  { "_gylmmr",
+    {30, 23, 22, 11, 10},
+    { {"00010"_b, "str_b_ldst_regoff"},
+      {"00110"_b, "ldr_b_ldst_regoff"},
+      {"01010"_b, "str_q_ldst_regoff"},
+      {"01110"_b, "ldr_q_ldst_regoff"},
+      {"10010"_b, "str_h_ldst_regoff"},
+      {"10110"_b, "ldr_h_ldst_regoff"},
     },
   },
 
@@ -552,11 +541,84 @@
     },
   },
 
-  { "_gznnvh",
-    {23, 22, 20, 19, 18, 17, 16},
-    { {"0111001"_b, "frinta_asimdmiscfp16_r"},
-      {"0x00001"_b, "frinta_asimdmisc_r"},
-      {"xx00000"_b, "cmge_asimdmisc_z"},
+  { "_gyrkkz",
+    {30, 22, 11},
+    { {"000"_b, "_nqjvmr"},
+      {"001"_b, "_jjnvrv"},
+      {"010"_b, "_yptgjg"},
+      {"011"_b, "_vsyjql"},
+      {"100"_b, "_lzqxgt"},
+      {"110"_b, "_xvrvhv"},
+      {"111"_b, "_ptstkz"},
+    },
+  },
+
+  { "_gyymmx",
+    {30, 13, 12},
+    { {"000"_b, "stilp_32se_ldiappstilp"},
+      {"001"_b, "stilp_32s_ldiappstilp"},
+      {"100"_b, "stilp_64ss_ldiappstilp"},
+      {"101"_b, "stilp_64s_ldiappstilp"},
+    },
+  },
+
+  { "_gzgpjp",
+    {23},
+    { {"0"_b, "fmaxp_asimdsame_only"},
+      {"1"_b, "fminp_asimdsame_only"},
+    },
+  },
+
+  { "_gznrjv",
+    {30, 23, 22, 19, 16},
+    { {"10010"_b, "aese_b_cryptoaes"},
+      {"xxx00"_b, "cls_asimdmisc_r"},
+      {"xxx01"_b, "sqxtn_asimdmisc_n"},
+    },
+  },
+
+  { "_gzpkvm",
+    {30, 23, 22, 13, 12, 11, 10},
+    { {"0000000"_b, "swpb_32_memop"},
+      {"0000100"_b, "rcwclr_64_memop"},
+      {"0001000"_b, "rcwswp_64_memop"},
+      {"0001100"_b, "rcwset_64_memop"},
+      {"000xx10"_b, "strb_32b_ldst_regoff"},
+      {"0010000"_b, "swplb_32_memop"},
+      {"0010100"_b, "rcwclrl_64_memop"},
+      {"0011000"_b, "rcwswpl_64_memop"},
+      {"0011100"_b, "rcwsetl_64_memop"},
+      {"001xx10"_b, "ldrb_32b_ldst_regoff"},
+      {"0100000"_b, "swpab_32_memop"},
+      {"0100100"_b, "rcwclra_64_memop"},
+      {"0101000"_b, "rcwswpa_64_memop"},
+      {"0101100"_b, "rcwseta_64_memop"},
+      {"010xx10"_b, "ldrsb_64b_ldst_regoff"},
+      {"0110000"_b, "swpalb_32_memop"},
+      {"0110100"_b, "rcwclral_64_memop"},
+      {"0111000"_b, "rcwswpal_64_memop"},
+      {"0111100"_b, "rcwsetal_64_memop"},
+      {"011xx10"_b, "ldrsb_32b_ldst_regoff"},
+      {"1000000"_b, "swph_32_memop"},
+      {"1000100"_b, "rcwsclr_64_memop"},
+      {"1001000"_b, "rcwsswp_64_memop"},
+      {"1001100"_b, "rcwsset_64_memop"},
+      {"100xx10"_b, "strh_32_ldst_regoff"},
+      {"1010000"_b, "swplh_32_memop"},
+      {"1010100"_b, "rcwsclrl_64_memop"},
+      {"1011000"_b, "rcwsswpl_64_memop"},
+      {"1011100"_b, "rcwssetl_64_memop"},
+      {"101xx10"_b, "ldrh_32_ldst_regoff"},
+      {"1100000"_b, "swpah_32_memop"},
+      {"1100100"_b, "rcwsclra_64_memop"},
+      {"1101000"_b, "rcwsswpa_64_memop"},
+      {"1101100"_b, "rcwsseta_64_memop"},
+      {"110xx10"_b, "ldrsh_64_ldst_regoff"},
+      {"1110000"_b, "swpalh_32_memop"},
+      {"1110100"_b, "rcwsclral_64_memop"},
+      {"1111000"_b, "rcwsswpal_64_memop"},
+      {"1111100"_b, "rcwssetal_64_memop"},
+      {"111xx10"_b, "ldrsh_32_ldst_regoff"},
     },
   },
 
@@ -569,30 +631,33 @@
     },
   },
 
-  { "_gzvgmh",
-    {18, 17, 12},
-    { {"0x0"_b, "ld4_asisdlsop_dx4_r4d"},
-      {"100"_b, "ld4_asisdlsop_dx4_r4d"},
-      {"110"_b, "ld4_asisdlsop_d4_i4d"},
-    },
-  },
-
-  { "_gzylzp",
-    {17},
-    { {"0"_b, "st3_asisdlsop_hx3_r3h"},
-      {"1"_b, "st3_asisdlsop_h3_i3h"},
-    },
-  },
-
-  { "_hggmnk",
-    {13, 12},
-    { {"10"_b, "lslv_32_dp_2src"},
-    },
-  },
-
-  { "_hgxqpp",
+  { "_gzrtkk",
     {18, 17},
-    { {"00"_b, "st3_asisdlso_s3_3s"},
+    { {"0x"_b, "ld1_asisdlsep_r1_r1"},
+      {"10"_b, "ld1_asisdlsep_r1_r1"},
+      {"11"_b, "ld1_asisdlsep_i1_i1"},
+    },
+  },
+
+  { "_gzvylr",
+    {30, 13},
+    { {"00"_b, "_rjyrnt"},
+      {"01"_b, "_mzhsrq"},
+      {"10"_b, "_prtvjm"},
+      {"11"_b, "_zspprz"},
+    },
+  },
+
+  { "_gzzsgh",
+    {18},
+    { {"0"_b, "ld3_asisdlso_b3_3b"},
+    },
+  },
+
+  { "_hgjgpm",
+    {30},
+    { {"0"_b, "bic_64_log_shift"},
+      {"1"_b, "eon_64_log_shift"},
     },
   },
 
@@ -609,18 +674,6 @@
     },
   },
 
-  { "_hhhqjk",
-    {4, 3, 2, 1, 0},
-    { {"11111"_b, "_pqpzkt"},
-    },
-  },
-
-  { "_hhkhkk",
-    {30, 23, 11, 10},
-    { {"1001"_b, "_lkvynm"},
-    },
-  },
-
   { "_hhkqtn",
     {20, 19, 18, 17, 16},
     { {"00000"_b, "lasta_r_p_z"},
@@ -631,52 +684,191 @@
     },
   },
 
-  { "_hhnjjk",
-    {9, 8, 7, 6, 5},
-    { {"11111"_b, "pacdzb_64z_dp_1src"},
+  { "_hhlmrg",
+    {23, 20, 19, 18, 17, 16, 13},
+    { {"0000000"_b, "ld2r_asisdlso_r2"},
+      {"0000001"_b, "ld4r_asisdlso_r4"},
+      {"10xxxx0"_b, "ld2r_asisdlsop_rx2_r"},
+      {"10xxxx1"_b, "ld4r_asisdlsop_rx4_r"},
+      {"110xxx0"_b, "ld2r_asisdlsop_rx2_r"},
+      {"110xxx1"_b, "ld4r_asisdlsop_rx4_r"},
+      {"1110xx0"_b, "ld2r_asisdlsop_rx2_r"},
+      {"1110xx1"_b, "ld4r_asisdlsop_rx4_r"},
+      {"11110x0"_b, "ld2r_asisdlsop_rx2_r"},
+      {"11110x1"_b, "ld4r_asisdlsop_rx4_r"},
+      {"1111100"_b, "ld2r_asisdlsop_rx2_r"},
+      {"1111101"_b, "ld4r_asisdlsop_rx4_r"},
+      {"1111110"_b, "ld2r_asisdlsop_r2_i"},
+      {"1111111"_b, "ld4r_asisdlsop_r4_i"},
     },
   },
 
-  { "_hhymvj",
+  { "_hhxpjz",
+    {18},
+    { {"0"_b, "ld2_asisdlso_b2_2b"},
+    },
+  },
+
+  { "_hhxpyt",
+    {30, 23, 22, 13, 12, 11, 10},
+    { {"000xx10"_b, "stlur_b_ldapstl_simd"},
+      {"001xx10"_b, "ldapur_b_ldapstl_simd"},
+      {"010xx10"_b, "stlur_q_ldapstl_simd"},
+      {"011xx10"_b, "ldapur_q_ldapstl_simd"},
+      {"100xx10"_b, "stlur_h_ldapstl_simd"},
+      {"101xx10"_b, "ldapur_h_ldapstl_simd"},
+      {"x000001"_b, "cpyp_cpy_memcms"},
+      {"x000101"_b, "cpypwt_cpy_memcms"},
+      {"x001001"_b, "cpyprt_cpy_memcms"},
+      {"x001101"_b, "cpypt_cpy_memcms"},
+      {"x010001"_b, "cpym_cpy_memcms"},
+      {"x010101"_b, "cpymwt_cpy_memcms"},
+      {"x011001"_b, "cpymrt_cpy_memcms"},
+      {"x011101"_b, "cpymt_cpy_memcms"},
+      {"x100001"_b, "cpye_cpy_memcms"},
+      {"x100101"_b, "cpyewt_cpy_memcms"},
+      {"x101001"_b, "cpyert_cpy_memcms"},
+      {"x101101"_b, "cpyet_cpy_memcms"},
+      {"x110001"_b, "setgp_set_memcms"},
+      {"x110101"_b, "setgpt_set_memcms"},
+      {"x111001"_b, "setgpn_set_memcms"},
+      {"x111101"_b, "setgptn_set_memcms"},
+    },
+  },
+
+  { "_hjplhs",
     {20, 19, 18, 17, 16, 13, 12},
-    { {"0000011"_b, "sqabs_asisdmisc_r"},
-      {"0000100"_b, "sqxtn_asisdmisc_n"},
+    { {"1111100"_b, "ldaprb_32l_memop"},
     },
   },
 
-  { "_hjgylh",
-    {30, 23, 22},
-    { {"000"_b, "str_s_ldst_pos"},
-      {"001"_b, "ldr_s_ldst_pos"},
-      {"100"_b, "str_d_ldst_pos"},
-      {"101"_b, "ldr_d_ldst_pos"},
+  { "_hjqryy",
+    {11, 10, 9, 8, 7, 6},
+    { {"000000"_b, "wfit_only_systeminstrswithreg"},
     },
   },
 
-  { "_hjqtrt",
+  { "_hjtsgj",
+    {23},
+    { {"0"_b, "_pnkxsr"},
+    },
+  },
+
+  { "_hjvkkq",
+    {18},
+    { {"0"_b, "ld4_asisdlsep_r4_r"},
+      {"1"_b, "ld4_asisdlsep_i4_i"},
+    },
+  },
+
+  { "_hkgzsh",
+    {13, 12, 11, 10},
+    { {"1111"_b, "_qvzvmq"},
+    },
+  },
+
+  { "_hkjjsr",
     {12},
-    { {"0"_b, "st1_asisdlsop_dx1_r1d"},
+    { {"0"_b, "ld1_asisdlsop_dx1_r1d"},
     },
   },
 
-  { "_hjtvvm",
-    {13, 12},
-    { {"00"_b, "sdiv_64_dp_2src"},
-      {"10"_b, "rorv_64_dp_2src"},
+  { "_hkpjqm",
+    {30},
+    { {"1"_b, "_qgyppr"},
     },
   },
 
-  { "_hljrqn",
-    {22},
-    { {"0"_b, "str_32_ldst_regoff"},
-      {"1"_b, "ldr_32_ldst_regoff"},
+  { "_hkxlsm",
+    {18},
+    { {"0"_b, "st4_asisdlsop_hx4_r4h"},
+      {"1"_b, "st4_asisdlsop_h4_i4h"},
     },
   },
 
-  { "_hlshjk",
-    {23, 22},
-    { {"00"_b, "fmlal_asimdsame_f"},
-      {"10"_b, "fmlsl_asimdsame_f"},
+  { "_hkxzqg",
+    {2, 1},
+    { {"00"_b, "br_64_branch_reg"},
+    },
+  },
+
+  { "_hljttg",
+    {12},
+    { {"0"_b, "ld2_asisdlsop_dx2_r2d"},
+    },
+  },
+
+  { "_hlljqz",
+    {30, 23, 22, 11, 10},
+    { {"00000"_b, "stur_s_ldst_unscaled"},
+      {"00001"_b, "str_s_ldst_immpost"},
+      {"00011"_b, "str_s_ldst_immpre"},
+      {"00100"_b, "ldur_s_ldst_unscaled"},
+      {"00101"_b, "ldr_s_ldst_immpost"},
+      {"00111"_b, "ldr_s_ldst_immpre"},
+      {"10000"_b, "stur_d_ldst_unscaled"},
+      {"10001"_b, "str_d_ldst_immpost"},
+      {"10011"_b, "str_d_ldst_immpre"},
+      {"10100"_b, "ldur_d_ldst_unscaled"},
+      {"10101"_b, "ldr_d_ldst_immpost"},
+      {"10111"_b, "ldr_d_ldst_immpre"},
+    },
+  },
+
+  { "_hlqvmm",
+    {20, 19, 18, 17, 16, 13, 12, 9, 8, 7, 6, 5},
+    { {"000010011111"_b, "xpaci_64z_dp_1src"},
+    },
+  },
+
+  { "_hlxmpy",
+    {13, 12, 11, 10},
+    { {"0000"_b, "umlal_asimddiff_l"},
+      {"0001"_b, "sub_asimdsame_only"},
+      {"0010"_b, "_hytrnv"},
+      {"0011"_b, "cmeq_asimdsame_only"},
+      {"0101"_b, "mls_asimdsame_only"},
+      {"0110"_b, "_vjhrzl"},
+      {"0111"_b, "pmul_asimdsame_only"},
+      {"1000"_b, "umlsl_asimddiff_l"},
+      {"1001"_b, "umaxp_asimdsame_only"},
+      {"1010"_b, "_zpjzst"},
+      {"1011"_b, "uminp_asimdsame_only"},
+      {"1101"_b, "sqrdmulh_asimdsame_only"},
+      {"1110"_b, "_jztlrz"},
+    },
+  },
+
+  { "_hlypvy",
+    {30, 23, 22},
+    { {"000"_b, "smaddl_64wa_dp_3src"},
+      {"010"_b, "umaddl_64wa_dp_3src"},
+    },
+  },
+
+  { "_hmgzjl",
+    {18},
+    { {"0"_b, "st3_asisdlso_h3_3h"},
+    },
+  },
+
+  { "_hmjrmm",
+    {30, 23, 22, 20, 19, 18},
+    { {"00xxxx"_b, "add_32_addsub_imm"},
+      {"011000"_b, "smax_32_minmax_imm"},
+      {"011001"_b, "umax_32u_minmax_imm"},
+      {"011010"_b, "smin_32_minmax_imm"},
+      {"011011"_b, "umin_32u_minmax_imm"},
+      {"10xxxx"_b, "sub_32_addsub_imm"},
+    },
+  },
+
+  { "_hmpzzg",
+    {22, 20, 19, 18, 17, 16},
+    { {"111000"_b, "fcmle_asisdmiscfp16_fz"},
+      {"111001"_b, "frsqrte_asisdmiscfp16_r"},
+      {"x00000"_b, "fcmle_asisdmisc_fz"},
+      {"x00001"_b, "frsqrte_asisdmisc_r"},
     },
   },
 
@@ -696,45 +888,6 @@
     },
   },
 
-  { "_hmtxlh",
-    {9, 8, 7, 6, 5, 1, 0},
-    { {"1111111"_b, "retaa_64e_branch_reg"},
-    },
-  },
-
-  { "_hmxlny",
-    {13, 12, 11, 10},
-    { {"0000"_b, "addhn_asimddiff_n"},
-      {"0001"_b, "sshl_asimdsame_only"},
-      {"0010"_b, "_lyghyg"},
-      {"0011"_b, "sqshl_asimdsame_only"},
-      {"0100"_b, "sabal_asimddiff_l"},
-      {"0101"_b, "srshl_asimdsame_only"},
-      {"0110"_b, "_htgzzx"},
-      {"0111"_b, "sqrshl_asimdsame_only"},
-      {"1000"_b, "subhn_asimddiff_n"},
-      {"1001"_b, "smax_asimdsame_only"},
-      {"1010"_b, "_sqpjtr"},
-      {"1011"_b, "smin_asimdsame_only"},
-      {"1100"_b, "sabdl_asimddiff_l"},
-      {"1101"_b, "sabd_asimdsame_only"},
-      {"1110"_b, "_rnrzsj"},
-      {"1111"_b, "saba_asimdsame_only"},
-    },
-  },
-
-  { "_hngpgx",
-    {23, 10, 4},
-    { {"000"_b, "_vxsjgg"},
-    },
-  },
-
-  { "_hngpxg",
-    {1, 0},
-    { {"00"_b, "br_64_branch_reg"},
-    },
-  },
-
   { "_hnjrmp",
     {4},
     { {"0"_b, "cmplo_p_p_zi"},
@@ -742,79 +895,105 @@
     },
   },
 
-  { "_hnzzkj",
-    {30, 18},
-    { {"00"_b, "_gxslgq"},
-    },
-  },
-
-  { "_hpgqlp",
-    {9, 8, 7, 6, 5},
-    { {"00000"_b, "fmov_s_floatimm"},
-    },
-  },
-
-  { "_hqhzgj",
-    {17},
-    { {"0"_b, "ld2_asisdlso_b2_2b"},
-    },
-  },
-
-  { "_hqlskj",
-    {18, 17},
-    { {"00"_b, "ld1_asisdlse_r1_1v"},
-    },
-  },
-
-  { "_hqnxvt",
-    {13, 12, 11, 10},
-    { {"0000"_b, "saddl_asimddiff_l"},
-      {"0001"_b, "shadd_asimdsame_only"},
-      {"0010"_b, "_rykykh"},
-      {"0011"_b, "sqadd_asimdsame_only"},
-      {"0100"_b, "saddw_asimddiff_w"},
-      {"0101"_b, "srhadd_asimdsame_only"},
-      {"0110"_b, "_glkzlv"},
-      {"0111"_b, "_rnktts"},
-      {"1000"_b, "ssubl_asimddiff_l"},
-      {"1001"_b, "shsub_asimdsame_only"},
-      {"1010"_b, "_rgztzl"},
-      {"1011"_b, "sqsub_asimdsame_only"},
-      {"1100"_b, "ssubw_asimddiff_w"},
-      {"1101"_b, "cmgt_asimdsame_only"},
-      {"1110"_b, "_nyxxks"},
-      {"1111"_b, "cmge_asimdsame_only"},
-    },
-  },
-
-  { "_hqsvmh",
-    {18, 17},
-    { {"00"_b, "st4_asisdlso_s4_4s"},
-    },
-  },
-
-  { "_hrhzqy",
-    {17},
-    { {"0"_b, "ld4_asisdlse_r4"},
-    },
-  },
-
-  { "_hrktgs",
-    {12},
-    { {"0"_b, "st2_asisdlsop_dx2_r2d"},
-    },
-  },
-
-  { "_hrllsn",
+  { "_hnkyxy",
     {18, 17, 16},
-    { {"000"_b, "fadd_z_p_zz"},
-      {"001"_b, "fsub_z_p_zz"},
-      {"010"_b, "fmul_z_p_zz"},
-      {"011"_b, "fsubr_z_p_zz"},
-      {"100"_b, "fmaxnm_z_p_zz"},
-      {"101"_b, "fminnm_z_p_zz"},
-      {"110"_b, "fmax_z_p_zz"},
-      {"111"_b, "fmin_z_p_zz"},
+    { {"011"_b, "_ykpgyh"},
+    },
+  },
+
+  { "_hnsvjh",
+    {19},
+    { {"0"_b, "_ntjrlg"},
+      {"1"_b, "sysl_rc_systeminstrs"},
+    },
+  },
+
+  { "_hpmvzr",
+    {11, 10, 9, 8, 7, 6},
+    { {"000000"_b, "yield_hi_hints"},
+      {"000001"_b, "wfi_hi_hints"},
+      {"000010"_b, "sevl_hi_hints"},
+      {"000011"_b, "xpaclri_hi_hints"},
+      {"001000"_b, "psb_hc_hints"},
+      {"001001"_b, "gcsb_hd_hints"},
+      {"001100"_b, "paciasp_hi_hints"},
+      {"001101"_b, "pacibsp_hi_hints"},
+      {"001110"_b, "autiasp_hi_hints"},
+      {"001111"_b, "autibsp_hi_hints"},
+      {"0x01xx"_b, "hint_hm_hints"},
+      {"0x101x"_b, "hint_hm_hints"},
+      {"10x0xx"_b, "hint_hm_hints"},
+      {"10x1xx"_b, "hint_hm_hints"},
+      {"1101xx"_b, "hint_hm_hints"},
+      {"11101x"_b, "hint_hm_hints"},
+      {"x100xx"_b, "hint_hm_hints"},
+      {"x1100x"_b, "hint_hm_hints"},
+      {"x111xx"_b, "hint_hm_hints"},
+    },
+  },
+
+  { "_hpqkhv",
+    {20, 19, 18, 17, 16},
+    { {"11111"_b, "ldaxp_lp64_ldstexcl"},
+    },
+  },
+
+  { "_hptkrj",
+    {30, 22, 13, 12, 11, 10},
+    { {"000001"_b, "rmif_only_rmif"},
+      {"01xx00"_b, "ccmn_64_condcmp_reg"},
+      {"01xx10"_b, "ccmn_64_condcmp_imm"},
+      {"11xx00"_b, "ccmp_64_condcmp_reg"},
+      {"11xx10"_b, "ccmp_64_condcmp_imm"},
+    },
+  },
+
+  { "_hqkhsy",
+    {12},
+    { {"0"_b, "st3_asisdlsop_dx3_r3d"},
+    },
+  },
+
+  { "_hqkljv",
+    {30, 23, 22},
+    { {"000"_b, "and_32_log_imm"},
+      {"010"_b, "movn_32_movewide"},
+      {"100"_b, "eor_32_log_imm"},
+      {"110"_b, "movz_32_movewide"},
+    },
+  },
+
+  { "_hqnsvg",
+    {30},
+    { {"0"_b, "add_64_addsub_shift"},
+      {"1"_b, "sub_64_addsub_shift"},
+    },
+  },
+
+  { "_hqvhjp",
+    {22},
+    { {"0"_b, "str_32_ldst_regoff"},
+      {"1"_b, "ldr_32_ldst_regoff"},
+    },
+  },
+
+  { "_hrmsnk",
+    {9, 8, 7, 6, 5, 2, 1},
+    { {"1111111"_b, "eretaa_64e_branch_reg"},
+    },
+  },
+
+  { "_hrpkqg",
+    {18, 17, 12},
+    { {"000"_b, "st4_asisdlso_d4_4d"},
+    },
+  },
+
+  { "_hrxtnj",
+    {30, 23, 22, 13, 12, 11, 10},
+    { {"1010000"_b, "sm3partw1_vvv4_cryptosha512_3"},
+      {"1010001"_b, "sm3partw2_vvv4_cryptosha512_3"},
+      {"1010010"_b, "sm4ekey_vvv4_cryptosha512_3"},
     },
   },
 
@@ -828,39 +1007,36 @@
     },
   },
 
-  { "_hsjynv",
-    {30},
-    { {"0"_b, "bl_only_branch_imm"},
+  { "_hrymnk",
+    {18},
+    { {"0"_b, "st1_asisdlso_h1_1h"},
     },
   },
 
-  { "_hstvrp",
-    {20, 19, 18, 17, 16, 13},
-    { {"000000"_b, "fmov_d_floatdp1"},
-      {"000010"_b, "fneg_d_floatdp1"},
-      {"000100"_b, "fcvt_sd_floatdp1"},
-      {"000110"_b, "bfcvt_bs_floatdp1"},
-      {"001000"_b, "frintn_d_floatdp1"},
-      {"001010"_b, "frintm_d_floatdp1"},
-      {"001100"_b, "frinta_d_floatdp1"},
-      {"001110"_b, "frintx_d_floatdp1"},
-      {"010000"_b, "frint32z_d_floatdp1"},
-      {"010010"_b, "frint64z_d_floatdp1"},
+  { "_hspyhv",
+    {13, 12},
+    { {"10"_b, "umax_64_dp_2src"},
     },
   },
 
-  { "_hsvgnt",
-    {23, 22, 4, 3, 2, 1, 0},
-    { {"0000001"_b, "svc_ex_exception"},
-      {"0000010"_b, "hvc_ex_exception"},
-      {"0000011"_b, "smc_ex_exception"},
-      {"0100000"_b, "hlt_ex_exception"},
-    },
-  },
-
-  { "_htgzzx",
-    {20, 18, 17, 16},
-    { {"0000"_b, "_mqgtsq"},
+  { "_hsrkqt",
+    {13, 12, 11, 10},
+    { {"0000"_b, "addhn_asimddiff_n"},
+      {"0001"_b, "sshl_asimdsame_only"},
+      {"0010"_b, "_qtgrzv"},
+      {"0011"_b, "sqshl_asimdsame_only"},
+      {"0100"_b, "sabal_asimddiff_l"},
+      {"0101"_b, "srshl_asimdsame_only"},
+      {"0110"_b, "_vhkpvn"},
+      {"0111"_b, "sqrshl_asimdsame_only"},
+      {"1000"_b, "subhn_asimddiff_n"},
+      {"1001"_b, "smax_asimdsame_only"},
+      {"1010"_b, "_rgztgm"},
+      {"1011"_b, "smin_asimdsame_only"},
+      {"1100"_b, "sabdl_asimddiff_l"},
+      {"1101"_b, "sabd_asimdsame_only"},
+      {"1110"_b, "_grmpht"},
+      {"1111"_b, "saba_asimdsame_only"},
     },
   },
 
@@ -870,15 +1046,16 @@
     },
   },
 
-  { "_htmthz",
-    {22, 20, 19, 18, 17, 16, 13, 12},
-    { {"01111100"_b, "_msztzv"},
+  { "_htjmmx",
+    {30},
+    { {"0"_b, "tbnz_only_testbranch"},
     },
   },
 
-  { "_htnmls",
-    {22, 13, 12},
-    { {"000"_b, "ldapr_32l_memop"},
+  { "_htkpks",
+    {30, 23, 22},
+    { {"000"_b, "add_32_addsub_ext"},
+      {"100"_b, "sub_32_addsub_ext"},
     },
   },
 
@@ -889,12 +1066,6 @@
     },
   },
 
-  { "_htppjj",
-    {30, 23, 22},
-    { {"000"_b, "msub_64a_dp_3src"},
-    },
-  },
-
   { "_htqpks",
     {30, 20, 19, 18, 17, 16, 13},
     { {"000000x"_b, "add_z_zi"},
@@ -916,11 +1087,109 @@
     },
   },
 
-  { "_hvvyhl",
-    {23, 22, 20, 19, 18, 17, 16},
-    { {"0x00001"_b, "frint32z_asimdmisc_r"},
-      {"1111000"_b, "fcmlt_asimdmiscfp16_fz"},
-      {"1x00000"_b, "fcmlt_asimdmisc_fz"},
+  { "_htrtzz",
+    {30, 23, 22, 13, 12, 11, 10},
+    { {"000xx10"_b, "stlur_b_ldapstl_simd"},
+      {"001xx10"_b, "ldapur_b_ldapstl_simd"},
+      {"010xx10"_b, "stlur_q_ldapstl_simd"},
+      {"011xx10"_b, "ldapur_q_ldapstl_simd"},
+      {"100xx10"_b, "stlur_h_ldapstl_simd"},
+      {"101xx10"_b, "ldapur_h_ldapstl_simd"},
+      {"x000001"_b, "cpypwn_cpy_memcms"},
+      {"x000101"_b, "cpypwtwn_cpy_memcms"},
+      {"x001001"_b, "cpyprtwn_cpy_memcms"},
+      {"x001101"_b, "cpyptwn_cpy_memcms"},
+      {"x010001"_b, "cpymwn_cpy_memcms"},
+      {"x010101"_b, "cpymwtwn_cpy_memcms"},
+      {"x011001"_b, "cpymrtwn_cpy_memcms"},
+      {"x011101"_b, "cpymtwn_cpy_memcms"},
+      {"x100001"_b, "cpyewn_cpy_memcms"},
+      {"x100101"_b, "cpyewtwn_cpy_memcms"},
+      {"x101001"_b, "cpyertwn_cpy_memcms"},
+      {"x101101"_b, "cpyetwn_cpy_memcms"},
+      {"x110001"_b, "setgm_set_memcms"},
+      {"x110101"_b, "setgmt_set_memcms"},
+      {"x111001"_b, "setgmn_set_memcms"},
+      {"x111101"_b, "setgmtn_set_memcms"},
+    },
+  },
+
+  { "_htsjxj",
+    {23, 22, 13, 12, 11, 10},
+    { {"001010"_b, "pmullb_z_zz_q"},
+      {"001011"_b, "pmullt_z_zz_q"},
+      {"101010"_b, "pmullb_z_zz"},
+      {"101011"_b, "pmullt_z_zz"},
+      {"x11010"_b, "pmullb_z_zz"},
+      {"x11011"_b, "pmullt_z_zz"},
+      {"xx0000"_b, "saddwb_z_zz"},
+      {"xx0001"_b, "saddwt_z_zz"},
+      {"xx0010"_b, "uaddwb_z_zz"},
+      {"xx0011"_b, "uaddwt_z_zz"},
+      {"xx0100"_b, "ssubwb_z_zz"},
+      {"xx0101"_b, "ssubwt_z_zz"},
+      {"xx0110"_b, "usubwb_z_zz"},
+      {"xx0111"_b, "usubwt_z_zz"},
+      {"xx1000"_b, "sqdmullb_z_zz"},
+      {"xx1001"_b, "sqdmullt_z_zz"},
+      {"xx1100"_b, "smullb_z_zz"},
+      {"xx1101"_b, "smullt_z_zz"},
+      {"xx1110"_b, "umullb_z_zz"},
+      {"xx1111"_b, "umullt_z_zz"},
+    },
+  },
+
+  { "_hvhrsq",
+    {30, 23, 22},
+    { {"000"_b, "str_32_ldst_pos"},
+      {"001"_b, "ldr_32_ldst_pos"},
+      {"010"_b, "ldrsw_64_ldst_pos"},
+      {"100"_b, "str_64_ldst_pos"},
+      {"101"_b, "ldr_64_ldst_pos"},
+      {"110"_b, "prfm_p_ldst_pos"},
+    },
+  },
+
+  { "_hvmyjz",
+    {13, 12},
+    { {"00"_b, "subps_64s_dp_2src"},
+    },
+  },
+
+  { "_hvnhmh",
+    {30, 23, 22, 13, 12, 11, 10},
+    { {"000xx00"_b, "stlurb_32_ldapstl_unscaled"},
+      {"001xx00"_b, "ldapurb_32_ldapstl_unscaled"},
+      {"010xx00"_b, "ldapursb_64_ldapstl_unscaled"},
+      {"011xx00"_b, "ldapursb_32_ldapstl_unscaled"},
+      {"100xx00"_b, "stlurh_32_ldapstl_unscaled"},
+      {"101xx00"_b, "ldapurh_32_ldapstl_unscaled"},
+      {"110xx00"_b, "ldapursh_64_ldapstl_unscaled"},
+      {"111xx00"_b, "ldapursh_32_ldapstl_unscaled"},
+      {"x000001"_b, "cpyfpwn_cpy_memcms"},
+      {"x000101"_b, "cpyfpwtwn_cpy_memcms"},
+      {"x001001"_b, "cpyfprtwn_cpy_memcms"},
+      {"x001101"_b, "cpyfptwn_cpy_memcms"},
+      {"x010001"_b, "cpyfmwn_cpy_memcms"},
+      {"x010101"_b, "cpyfmwtwn_cpy_memcms"},
+      {"x011001"_b, "cpyfmrtwn_cpy_memcms"},
+      {"x011101"_b, "cpyfmtwn_cpy_memcms"},
+      {"x100001"_b, "cpyfewn_cpy_memcms"},
+      {"x100101"_b, "cpyfewtwn_cpy_memcms"},
+      {"x101001"_b, "cpyfertwn_cpy_memcms"},
+      {"x101101"_b, "cpyfetwn_cpy_memcms"},
+      {"x110001"_b, "setm_set_memcms"},
+      {"x110101"_b, "setmt_set_memcms"},
+      {"x111001"_b, "setmn_set_memcms"},
+      {"x111101"_b, "setmtn_set_memcms"},
+    },
+  },
+
+  { "_hvrjyt",
+    {30, 23, 22},
+    { {"000"_b, "sbfm_32m_bitfield"},
+      {"010"_b, "extr_32_extract"},
+      {"100"_b, "ubfm_32m_bitfield"},
     },
   },
 
@@ -930,60 +1199,63 @@
     },
   },
 
-  { "_hxglyp",
-    {17},
-    { {"0"_b, "ld4_asisdlsep_r4_r"},
-      {"1"_b, "ld4_asisdlsep_i4_i"},
+  { "_hxgngr",
+    {23, 22, 13},
+    { {"100"_b, "fmlsl_asimdelem_lh"},
+      {"xx1"_b, "smlsl_asimdelem_l"},
     },
   },
 
-  { "_hxmjhn",
-    {30, 23, 22, 19, 16},
-    { {"10010"_b, "aese_b_cryptoaes"},
-      {"xxx00"_b, "cls_asimdmisc_r"},
-      {"xxx01"_b, "sqxtn_asimdmisc_n"},
+  { "_hxlznn",
+    {30, 23, 22, 13},
+    { {"0000"_b, "ld1sh_z_p_br_s32"},
+      {"0001"_b, "ldff1sh_z_p_br_s32"},
+      {"0010"_b, "ld1w_z_p_br_u64"},
+      {"0011"_b, "ldff1w_z_p_br_u64"},
+      {"0100"_b, "ld1sb_z_p_br_s32"},
+      {"0101"_b, "ldff1sb_z_p_br_s32"},
+      {"0110"_b, "ld1d_z_p_br_u64"},
+      {"0111"_b, "ldff1d_z_p_br_u64"},
+      {"1001"_b, "st2w_z_p_br_contiguous"},
+      {"1010"_b, "st1w_z_p_br"},
+      {"1011"_b, "st4w_z_p_br_contiguous"},
+      {"1100"_b, "str_z_bi"},
+      {"1101"_b, "st2d_z_p_br_contiguous"},
+      {"1110"_b, "st1d_z_p_br"},
+      {"1111"_b, "st4d_z_p_br_contiguous"},
     },
   },
 
-  { "_hxnmsl",
-    {30, 23, 22, 20, 13},
-    { {"00001"_b, "ld2w_z_p_bi_contiguous"},
-      {"000x0"_b, "ld2w_z_p_br_contiguous"},
-      {"00101"_b, "ld4w_z_p_bi_contiguous"},
-      {"001x0"_b, "ld4w_z_p_br_contiguous"},
-      {"01001"_b, "ld2d_z_p_bi_contiguous"},
-      {"010x0"_b, "ld2d_z_p_br_contiguous"},
-      {"01101"_b, "ld4d_z_p_bi_contiguous"},
-      {"011x0"_b, "ld4d_z_p_br_contiguous"},
-      {"10011"_b, "st2w_z_p_bi_contiguous"},
-      {"100x0"_b, "st1w_z_p_bz_d_x32_scaled"},
-      {"10111"_b, "st4w_z_p_bi_contiguous"},
-      {"101x0"_b, "st1w_z_p_bz_s_x32_scaled"},
-      {"10x01"_b, "st1w_z_p_bi"},
-      {"11011"_b, "st2d_z_p_bi_contiguous"},
-      {"110x0"_b, "st1d_z_p_bz_d_x32_scaled"},
-      {"11111"_b, "st4d_z_p_bi_contiguous"},
-      {"11x01"_b, "st1d_z_p_bi"},
+  { "_hxrnns",
+    {23, 22, 13, 12},
+    { {"0000"_b, "fmul_s_floatdp2"},
+      {"0001"_b, "fdiv_s_floatdp2"},
+      {"0010"_b, "fadd_s_floatdp2"},
+      {"0011"_b, "fsub_s_floatdp2"},
+      {"0100"_b, "fmul_d_floatdp2"},
+      {"0101"_b, "fdiv_d_floatdp2"},
+      {"0110"_b, "fadd_d_floatdp2"},
+      {"0111"_b, "fsub_d_floatdp2"},
+      {"1100"_b, "fmul_h_floatdp2"},
+      {"1101"_b, "fdiv_h_floatdp2"},
+      {"1110"_b, "fadd_h_floatdp2"},
+      {"1111"_b, "fsub_h_floatdp2"},
     },
   },
 
-  { "_hxrtsq",
-    {23, 22, 12},
-    { {"000"_b, "_gxlvsg"},
-      {"001"_b, "_kxhjtk"},
-      {"010"_b, "_hyxhpl"},
-      {"011"_b, "_kvgjzh"},
-      {"110"_b, "_tpsylx"},
-      {"111"_b, "_zhpxqz"},
+  { "_hxxqks",
+    {23},
+    { {"0"_b, "fmla_asimdsame_only"},
+      {"1"_b, "fmls_asimdsame_only"},
     },
   },
 
-  { "_hxzlmm",
-    {30, 23, 22},
-    { {"000"_b, "stxp_sp32_ldstexcl"},
-      {"001"_b, "ldxp_lp32_ldstexcl"},
-      {"100"_b, "stxp_sp64_ldstexcl"},
-      {"101"_b, "ldxp_lp64_ldstexcl"},
+  { "_hxxxyy",
+    {13, 12},
+    { {"00"_b, "cpyfm_cpy_memcms"},
+      {"01"_b, "cpyfmwt_cpy_memcms"},
+      {"10"_b, "cpyfmrt_cpy_memcms"},
+      {"11"_b, "cpyfmt_cpy_memcms"},
     },
   },
 
@@ -1002,18 +1274,24 @@
     },
   },
 
-  { "_hyxhpl",
-    {13},
-    { {"0"_b, "_yrrppk"},
-      {"1"_b, "_pnxggm"},
+  { "_hynprk",
+    {18},
+    { {"0"_b, "st2_asisdlso_h2_2h"},
     },
   },
 
-  { "_hyymjs",
-    {18, 17, 12},
-    { {"0x0"_b, "ld2_asisdlsop_dx2_r2d"},
-      {"100"_b, "ld2_asisdlsop_dx2_r2d"},
-      {"110"_b, "ld2_asisdlsop_d2_i2d"},
+  { "_hyskth",
+    {22},
+    { {"0"_b, "str_64_ldst_regoff"},
+      {"1"_b, "ldr_64_ldst_regoff"},
+    },
+  },
+
+  { "_hytrnv",
+    {23, 22, 20, 19, 18, 17, 16},
+    { {"0111001"_b, "frinta_asimdmiscfp16_r"},
+      {"0x00001"_b, "frinta_asimdmisc_r"},
+      {"xx00000"_b, "cmge_asimdmisc_z"},
     },
   },
 
@@ -1036,68 +1314,34 @@
     },
   },
 
-  { "_hzllgl",
-    {17},
-    { {"0"_b, "st1_asisdlse_r4_4v"},
+  { "_hzkxht",
+    {22, 20},
+    { {"00"_b, "_zrxhzq"},
+      {"01"_b, "msr_sr_systemmove"},
+      {"10"_b, "_krllsy"},
+      {"11"_b, "msrr_sr_systemmovepr"},
     },
   },
 
-  { "_hzmlps",
-    {19},
-    { {"0"_b, "_rpqgjl"},
-      {"1"_b, "sys_cr_systeminstrs"},
+  { "_hzsxkp",
+    {30, 13},
+    { {"00"_b, "_jlrrlt"},
+      {"01"_b, "_jrlynj"},
+      {"10"_b, "_ghpxms"},
+      {"11"_b, "_nyjtng"},
     },
   },
 
-  { "_hzxjsp",
-    {23, 22, 20, 19, 16, 13, 10},
-    { {"0000000"_b, "_shgkvq"},
-      {"0000001"_b, "_vytxll"},
-      {"0000010"_b, "_hqsvmh"},
-      {"0000011"_b, "_gmvrxn"},
-      {"0100000"_b, "_ygyxvx"},
-      {"0100001"_b, "_tszvvk"},
-      {"0100010"_b, "_tyjqvt"},
-      {"0100011"_b, "_ylqnqt"},
-      {"100xx00"_b, "st2_asisdlsop_sx2_r2s"},
-      {"100xx01"_b, "_hrktgs"},
-      {"100xx10"_b, "st4_asisdlsop_sx4_r4s"},
-      {"100xx11"_b, "_mmrtvz"},
-      {"1010x00"_b, "st2_asisdlsop_sx2_r2s"},
-      {"1010x01"_b, "_lmtnzv"},
-      {"1010x10"_b, "st4_asisdlsop_sx4_r4s"},
-      {"1010x11"_b, "_qrykhm"},
-      {"1011000"_b, "st2_asisdlsop_sx2_r2s"},
-      {"1011001"_b, "_nyssqn"},
-      {"1011010"_b, "st4_asisdlsop_sx4_r4s"},
-      {"1011011"_b, "_kpqgsn"},
-      {"1011100"_b, "_knpsmq"},
-      {"1011101"_b, "_jzyzjh"},
-      {"1011110"_b, "_vhhktl"},
-      {"1011111"_b, "_yjxvkp"},
-      {"110xx00"_b, "ld2_asisdlsop_sx2_r2s"},
-      {"110xx01"_b, "_zppjvk"},
-      {"110xx10"_b, "ld4_asisdlsop_sx4_r4s"},
-      {"110xx11"_b, "_kqjmvy"},
-      {"1110x00"_b, "ld2_asisdlsop_sx2_r2s"},
-      {"1110x01"_b, "_ptkrvg"},
-      {"1110x10"_b, "ld4_asisdlsop_sx4_r4s"},
-      {"1110x11"_b, "_kjryvx"},
-      {"1111000"_b, "ld2_asisdlsop_sx2_r2s"},
-      {"1111001"_b, "_mlvpxh"},
-      {"1111010"_b, "ld4_asisdlsop_sx4_r4s"},
-      {"1111011"_b, "_xqjrgk"},
-      {"1111100"_b, "_msgqps"},
-      {"1111101"_b, "_hyymjs"},
-      {"1111110"_b, "_qsnqpz"},
-      {"1111111"_b, "_gzvgmh"},
+  { "_jggxjz",
+    {13, 12},
+    { {"00"_b, "cmtst_asisdsame_only"},
     },
   },
 
-  { "_jggvph",
+  { "_jgklkt",
     {30},
-    { {"0"_b, "bic_64_log_shift"},
-      {"1"_b, "eon_64_log_shift"},
+    { {"0"_b, "ldrsw_64_loadlit"},
+      {"1"_b, "prfm_p_loadlit"},
     },
   },
 
@@ -1108,6 +1352,32 @@
     },
   },
 
+  { "_jgsryt",
+    {20, 19, 18, 17, 16},
+    { {"11111"_b, "ldaxrh_lr32_ldstexcl"},
+    },
+  },
+
+  { "_jgxqzr",
+    {13, 12, 11, 10},
+    { {"0000"_b, "_xzjvkv"},
+      {"0001"_b, "_nqjtqn"},
+      {"0011"_b, "_qzmrnj"},
+      {"0100"_b, "_xptsns"},
+      {"0101"_b, "_qpgxxr"},
+      {"0110"_b, "uzp1_asimdperm_only"},
+      {"0111"_b, "_rsnvnr"},
+      {"1000"_b, "_yszlqj"},
+      {"1001"_b, "_lzvxxj"},
+      {"1010"_b, "trn1_asimdperm_only"},
+      {"1011"_b, "_zmrhxx"},
+      {"1100"_b, "_skytvx"},
+      {"1101"_b, "_smptxh"},
+      {"1110"_b, "zip1_asimdperm_only"},
+      {"1111"_b, "_rjvgkl"},
+    },
+  },
+
   { "_jgyhrh",
     {4},
     { {"0"_b, "cmplo_p_p_zi"},
@@ -1115,9 +1385,9 @@
     },
   },
 
-  { "_jhkglp",
-    {30, 23, 22},
-    { {"110"_b, "xar_vvv2_crypto3_imm6"},
+  { "_jhkkgv",
+    {10},
+    { {"0"_b, "_qvgtlh"},
     },
   },
 
@@ -1128,29 +1398,21 @@
     },
   },
 
-  { "_jhqlkv",
-    {30, 23, 22},
-    { {"000"_b, "stxr_sr32_ldstexcl"},
-      {"001"_b, "ldxr_lr32_ldstexcl"},
-      {"010"_b, "stllr_sl32_ldstexcl"},
-      {"011"_b, "ldlar_lr32_ldstexcl"},
-      {"100"_b, "stxr_sr64_ldstexcl"},
-      {"101"_b, "ldxr_lr64_ldstexcl"},
-      {"110"_b, "stllr_sl64_ldstexcl"},
-      {"111"_b, "ldlar_lr64_ldstexcl"},
+  { "_jhltlz",
+    {20, 19, 18, 17, 16},
+    { {"11111"_b, "ldxr_lr64_ldstexcl"},
     },
   },
 
-  { "_jhytlg",
-    {30, 23, 22, 13, 11, 10},
-    { {"000010"_b, "str_b_ldst_regoff"},
-      {"000110"_b, "str_bl_ldst_regoff"},
-      {"001010"_b, "ldr_b_ldst_regoff"},
-      {"001110"_b, "ldr_bl_ldst_regoff"},
-      {"010x10"_b, "str_q_ldst_regoff"},
-      {"011x10"_b, "ldr_q_ldst_regoff"},
-      {"100x10"_b, "str_h_ldst_regoff"},
-      {"101x10"_b, "ldr_h_ldst_regoff"},
+  { "_jjgpxz",
+    {9, 8, 7, 6, 5},
+    { {"00000"_b, "fmov_h_floatimm"},
+    },
+  },
+
+  { "_jjnvrv",
+    {20, 19, 18, 17, 16, 13, 12, 4, 3, 2, 1, 0},
+    { {"000000001101"_b, "setf8_only_setf"},
     },
   },
 
@@ -1162,67 +1424,51 @@
     },
   },
 
-  { "_jkpsxk",
-    {20},
-    { {"0"_b, "_kyygzs"},
-      {"1"_b, "msr_sr_systemmove"},
+  { "_jkvsxy",
+    {30, 23, 22, 13, 12, 11, 10},
+    { {"0000010"_b, "rcwcas_c64_rcwcomswap"},
+      {"0000011"_b, "rcwcasp_c64_rcwcomswappr"},
+      {"0000100"_b, "ldclrp_128_memop_128"},
+      {"0001100"_b, "ldsetp_128_memop_128"},
+      {"0010010"_b, "rcwcasl_c64_rcwcomswap"},
+      {"0010011"_b, "rcwcaspl_c64_rcwcomswappr"},
+      {"0010100"_b, "ldclrpl_128_memop_128"},
+      {"0011100"_b, "ldsetpl_128_memop_128"},
+      {"0100010"_b, "rcwcasa_c64_rcwcomswap"},
+      {"0100011"_b, "rcwcaspa_c64_rcwcomswappr"},
+      {"0100100"_b, "ldclrpa_128_memop_128"},
+      {"0101100"_b, "ldsetpa_128_memop_128"},
+      {"0110010"_b, "rcwcasal_c64_rcwcomswap"},
+      {"0110011"_b, "rcwcaspal_c64_rcwcomswappr"},
+      {"0110100"_b, "ldclrpal_128_memop_128"},
+      {"0111100"_b, "ldsetpal_128_memop_128"},
+      {"1000010"_b, "rcwscas_c64_rcwcomswap"},
+      {"1000011"_b, "rcwscasp_c64_rcwcomswappr"},
+      {"1010010"_b, "rcwscasl_c64_rcwcomswap"},
+      {"1010011"_b, "rcwscaspl_c64_rcwcomswappr"},
+      {"1100010"_b, "rcwscasa_c64_rcwcomswap"},
+      {"1100011"_b, "rcwscaspa_c64_rcwcomswappr"},
+      {"1110010"_b, "rcwscasal_c64_rcwcomswap"},
+      {"1110011"_b, "rcwscaspal_c64_rcwcomswappr"},
     },
   },
 
-  { "_jkqktg",
-    {20, 19, 18, 17, 16},
-    { {"00000"_b, "sqneg_asimdmisc_r"},
+  { "_jkvvtp",
+    {30, 23, 22},
+    { {"100"_b, "bcax_vvv16_crypto4"},
     },
   },
 
-  { "_jkrlsg",
-    {23, 22},
-    { {"00"_b, "fmsub_s_floatdp3"},
-      {"01"_b, "fmsub_d_floatdp3"},
-      {"11"_b, "fmsub_h_floatdp3"},
+  { "_jkxyvn",
+    {23},
+    { {"0"_b, "fadd_asimdsame_only"},
+      {"1"_b, "fsub_asimdsame_only"},
     },
   },
 
-  { "_jksztq",
-    {22, 20, 19, 13, 12},
-    { {"0x100"_b, "sri_asisdshf_r"},
-      {"0x101"_b, "sli_asisdshf_r"},
-      {"0x110"_b, "sqshlu_asisdshf_r"},
-      {"0x111"_b, "uqshl_asisdshf_r"},
-      {"10x00"_b, "sri_asisdshf_r"},
-      {"10x01"_b, "sli_asisdshf_r"},
-      {"10x10"_b, "sqshlu_asisdshf_r"},
-      {"10x11"_b, "uqshl_asisdshf_r"},
-      {"11100"_b, "sri_asisdshf_r"},
-      {"11101"_b, "sli_asisdshf_r"},
-      {"11110"_b, "sqshlu_asisdshf_r"},
-      {"11111"_b, "uqshl_asisdshf_r"},
-      {"x1000"_b, "sri_asisdshf_r"},
-      {"x1001"_b, "sli_asisdshf_r"},
-      {"x1010"_b, "sqshlu_asisdshf_r"},
-      {"x1011"_b, "uqshl_asisdshf_r"},
-    },
-  },
-
-  { "_jkxlnq",
-    {30},
-    { {"0"_b, "bl_only_branch_imm"},
-      {"1"_b, "_nhzyvv"},
-    },
-  },
-
-  { "_jlqjzr",
-    {30, 23},
-    { {"00"_b, "adds_64s_addsub_imm"},
-      {"10"_b, "subs_64s_addsub_imm"},
-    },
-  },
-
-  { "_jlqxvj",
-    {23, 22},
-    { {"01"_b, "_mplgqv"},
-      {"10"_b, "xar_vvv2_crypto3_imm6"},
-      {"11"_b, "_ljhtkq"},
+  { "_jlnjsy",
+    {23, 22, 20, 19, 18, 17, 16, 13, 12, 11},
+    { {"0011111001"_b, "_ssjnph"},
     },
   },
 
@@ -1239,19 +1485,6 @@
     },
   },
 
-  { "_jlrvpl",
-    {17},
-    { {"0"_b, "st2_asisdlse_r2"},
-    },
-  },
-
-  { "_jmgkrl",
-    {30},
-    { {"0"_b, "orn_32_log_shift"},
-      {"1"_b, "bics_32_log_shift"},
-    },
-  },
-
   { "_jmvgsp",
     {22, 20, 11},
     { {"100"_b, "sqinch_z_zs"},
@@ -1268,29 +1501,13 @@
     },
   },
 
-  { "_jmyslr",
-    {17},
-    { {"0"_b, "ld1_asisdlsep_r4_r4"},
-      {"1"_b, "ld1_asisdlsep_i4_i4"},
+  { "_jnktqs",
+    {18, 17},
+    { {"00"_b, "ld1_asisdlso_s1_1s"},
     },
   },
 
-  { "_jnjlsh",
-    {12},
-    { {"0"_b, "st1_asisdlsop_dx1_r1d"},
-    },
-  },
-
-  { "_jnmgrh",
-    {30, 19, 18, 17, 16},
-    { {"11000"_b, "ins_asimdins_iv_v"},
-      {"1x100"_b, "ins_asimdins_iv_v"},
-      {"1xx10"_b, "ins_asimdins_iv_v"},
-      {"1xxx1"_b, "ins_asimdins_iv_v"},
-    },
-  },
-
-  { "_jplmmr",
+  { "_jnnmjk",
     {23, 22, 20, 19, 16, 13, 12},
     { {"0111100"_b, "fcvtas_asisdmiscfp16_r"},
       {"0111101"_b, "scvtf_asisdmiscfp16_r"},
@@ -1314,52 +1531,107 @@
     },
   },
 
-  { "_jpvljz",
-    {23, 22},
-    { {"01"_b, "fcmeq_asimdsamefp16_only"},
-    },
-  },
-
-  { "_jpxgqh",
-    {30, 23, 22},
-    { {"000"_b, "sbfm_32m_bitfield"},
-      {"100"_b, "ubfm_32m_bitfield"},
-    },
-  },
-
-  { "_jqjnrv",
+  { "_jpvmkz",
     {18, 17},
-    { {"00"_b, "st1_asisdlso_s1_1s"},
+    { {"00"_b, "_jnnmjk"},
     },
   },
 
-  { "_jqnglz",
-    {23, 22, 20, 19, 11},
-    { {"00010"_b, "ucvtf_asisdshf_c"},
-      {"001x0"_b, "ucvtf_asisdshf_c"},
-      {"01xx0"_b, "ucvtf_asisdshf_c"},
+  { "_jqhvhn",
+    {30, 23, 11, 10},
+    { {"0000"_b, "_ygtpyl"},
+      {"0010"_b, "_hqvhjp"},
+      {"0100"_b, "_xkylhh"},
+      {"0110"_b, "_mnxgml"},
+      {"1000"_b, "_qyyrqq"},
+      {"1001"_b, "ldraa_64_ldst_pac"},
+      {"1010"_b, "_kpsnsk"},
+      {"1011"_b, "ldraa_64w_ldst_pac"},
+      {"1100"_b, "_tyzpxk"},
+      {"1101"_b, "ldrab_64_ldst_pac"},
+      {"1111"_b, "ldrab_64w_ldst_pac"},
     },
   },
 
-  { "_jqnhrj",
-    {12, 10},
-    { {"00"_b, "_mzynlp"},
-      {"01"_b, "_mvglql"},
-      {"10"_b, "_tylqpt"},
-      {"11"_b, "_lrjyhr"},
+  { "_jqlgts",
+    {30, 23, 22},
+    { {"000"_b, "str_s_ldst_pos"},
+      {"001"_b, "ldr_s_ldst_pos"},
+      {"100"_b, "str_d_ldst_pos"},
+      {"101"_b, "ldr_d_ldst_pos"},
     },
   },
 
-  { "_jqplxx",
-    {20, 19, 18, 17, 16, 13, 12},
-    { {"1111100"_b, "_xpvpqq"},
+  { "_jqrmyp",
+    {20, 19, 18, 17, 16, 13},
+    { {"000000"_b, "fabs_h_floatdp1"},
+      {"000010"_b, "fsqrt_h_floatdp1"},
+      {"000100"_b, "fcvt_dh_floatdp1"},
+      {"001000"_b, "frintp_h_floatdp1"},
+      {"001010"_b, "frintz_h_floatdp1"},
+      {"001110"_b, "frinti_h_floatdp1"},
     },
   },
 
-  { "_jqtltz",
-    {13},
-    { {"0"_b, "mul_asimdelem_r"},
-      {"1"_b, "smull_asimdelem_l"},
+  { "_jqsjtj",
+    {18},
+    { {"0"_b, "st2_asisdlse_r2"},
+    },
+  },
+
+  { "_jqtksx",
+    {30, 23, 22, 13, 12, 11, 10},
+    { {"0000000"_b, "ldsmaxb_32_memop"},
+      {"0000100"_b, "ldsminb_32_memop"},
+      {"0000x10"_b, "strb_32b_ldst_regoff"},
+      {"0001000"_b, "ldumaxb_32_memop"},
+      {"0001100"_b, "lduminb_32_memop"},
+      {"0001x10"_b, "strb_32bl_ldst_regoff"},
+      {"0010000"_b, "ldsmaxlb_32_memop"},
+      {"0010100"_b, "ldsminlb_32_memop"},
+      {"0010x10"_b, "ldrb_32b_ldst_regoff"},
+      {"0011000"_b, "ldumaxlb_32_memop"},
+      {"0011100"_b, "lduminlb_32_memop"},
+      {"0011x10"_b, "ldrb_32bl_ldst_regoff"},
+      {"0100000"_b, "ldsmaxab_32_memop"},
+      {"0100100"_b, "ldsminab_32_memop"},
+      {"0100x10"_b, "ldrsb_64b_ldst_regoff"},
+      {"0101000"_b, "ldumaxab_32_memop"},
+      {"0101100"_b, "lduminab_32_memop"},
+      {"0101x10"_b, "ldrsb_64bl_ldst_regoff"},
+      {"0110000"_b, "ldsmaxalb_32_memop"},
+      {"0110100"_b, "ldsminalb_32_memop"},
+      {"0110x10"_b, "ldrsb_32b_ldst_regoff"},
+      {"0111000"_b, "ldumaxalb_32_memop"},
+      {"0111100"_b, "lduminalb_32_memop"},
+      {"0111x10"_b, "ldrsb_32bl_ldst_regoff"},
+      {"1000000"_b, "ldsmaxh_32_memop"},
+      {"1000100"_b, "ldsminh_32_memop"},
+      {"1001000"_b, "ldumaxh_32_memop"},
+      {"1001100"_b, "lduminh_32_memop"},
+      {"100xx10"_b, "strh_32_ldst_regoff"},
+      {"1010000"_b, "ldsmaxlh_32_memop"},
+      {"1010100"_b, "ldsminlh_32_memop"},
+      {"1011000"_b, "ldumaxlh_32_memop"},
+      {"1011100"_b, "lduminlh_32_memop"},
+      {"101xx10"_b, "ldrh_32_ldst_regoff"},
+      {"1100000"_b, "ldsmaxah_32_memop"},
+      {"1100100"_b, "ldsminah_32_memop"},
+      {"1101000"_b, "ldumaxah_32_memop"},
+      {"1101100"_b, "lduminah_32_memop"},
+      {"110xx10"_b, "ldrsh_64_ldst_regoff"},
+      {"1110000"_b, "ldsmaxalh_32_memop"},
+      {"1110100"_b, "ldsminalh_32_memop"},
+      {"1111000"_b, "ldumaxalh_32_memop"},
+      {"1111100"_b, "lduminalh_32_memop"},
+      {"111xx10"_b, "ldrsh_32_ldst_regoff"},
+    },
+  },
+
+  { "_jqvpqx",
+    {23, 22},
+    { {"00"_b, "fmlal_asimdsame_f"},
+      {"10"_b, "fmlsl_asimdsame_f"},
     },
   },
 
@@ -1374,28 +1646,12 @@
     },
   },
 
-  { "_jrgzxt",
-    {18, 17},
-    { {"00"_b, "ld3_asisdlse_r3"},
-    },
-  },
-
   { "_jrlynj",
     {11, 10},
     { {"00"_b, "_gzqvnk"},
     },
   },
 
-  { "_jrnlzs",
-    {13, 12, 11},
-    { {"000"_b, "fminnmp_asimdsamefp16_only"},
-      {"010"_b, "fabd_asimdsamefp16_only"},
-      {"100"_b, "fcmgt_asimdsamefp16_only"},
-      {"101"_b, "facgt_asimdsamefp16_only"},
-      {"110"_b, "fminp_asimdsamefp16_only"},
-    },
-  },
-
   { "_jrnxzh",
     {12},
     { {"0"_b, "cmla_z_zzz"},
@@ -1403,72 +1659,95 @@
     },
   },
 
-  { "_jrsptt",
+  { "_jrqxvn",
+    {23, 22, 13, 12, 11, 10},
+    { {"000000"_b, "tbl_asimdtbl_l3_3"},
+      {"000100"_b, "tbx_asimdtbl_l3_3"},
+      {"001000"_b, "tbl_asimdtbl_l4_4"},
+      {"001100"_b, "tbx_asimdtbl_l4_4"},
+      {"xx0110"_b, "uzp2_asimdperm_only"},
+      {"xx1010"_b, "trn2_asimdperm_only"},
+      {"xx1110"_b, "zip2_asimdperm_only"},
+    },
+  },
+
+  { "_jrxtzg",
+    {30, 23, 22, 11, 10},
+    { {"10001"_b, "stg_64spost_ldsttags"},
+      {"10010"_b, "stg_64soffset_ldsttags"},
+      {"10011"_b, "stg_64spre_ldsttags"},
+      {"10100"_b, "ldg_64loffset_ldsttags"},
+      {"10101"_b, "stzg_64spost_ldsttags"},
+      {"10110"_b, "stzg_64soffset_ldsttags"},
+      {"10111"_b, "stzg_64spre_ldsttags"},
+      {"11001"_b, "st2g_64spost_ldsttags"},
+      {"11010"_b, "st2g_64soffset_ldsttags"},
+      {"11011"_b, "st2g_64spre_ldsttags"},
+      {"11101"_b, "stz2g_64spost_ldsttags"},
+      {"11110"_b, "stz2g_64soffset_ldsttags"},
+      {"11111"_b, "stz2g_64spre_ldsttags"},
+    },
+  },
+
+  { "_jsqvtn",
+    {23, 22, 11, 10},
+    { {"0000"_b, "_lnsjqy"},
+      {"0001"_b, "stg_64spost_ldsttags"},
+      {"0010"_b, "stg_64soffset_ldsttags"},
+      {"0011"_b, "stg_64spre_ldsttags"},
+      {"0100"_b, "ldg_64loffset_ldsttags"},
+      {"0101"_b, "stzg_64spost_ldsttags"},
+      {"0110"_b, "stzg_64soffset_ldsttags"},
+      {"0111"_b, "stzg_64spre_ldsttags"},
+      {"1000"_b, "_myzhml"},
+      {"1001"_b, "st2g_64spost_ldsttags"},
+      {"1010"_b, "st2g_64soffset_ldsttags"},
+      {"1011"_b, "st2g_64spre_ldsttags"},
+      {"1100"_b, "_mjstgz"},
+      {"1101"_b, "stz2g_64spost_ldsttags"},
+      {"1110"_b, "stz2g_64soffset_ldsttags"},
+      {"1111"_b, "stz2g_64spre_ldsttags"},
+    },
+  },
+
+  { "_jvkxtj",
+    {30, 23, 22},
+    { {"000"_b, "stnp_q_ldstnapair_offs"},
+      {"001"_b, "ldnp_q_ldstnapair_offs"},
+      {"010"_b, "stp_q_ldstpair_post"},
+      {"011"_b, "ldp_q_ldstpair_post"},
+    },
+  },
+
+  { "_jvnsgt",
+    {18},
+    { {"0"_b, "ld4_asisdlsop_bx4_r4b"},
+      {"1"_b, "ld4_asisdlsop_b4_i4b"},
+    },
+  },
+
+  { "_jvpjsm",
+    {20, 19, 18, 17, 16, 13, 12},
+    { {"0000000"_b, "_xrnqyn"},
+    },
+  },
+
+  { "_jxgpgg",
     {13, 12},
-    { {"00"_b, "sqadd_asisdsame_only"},
-      {"10"_b, "sqsub_asisdsame_only"},
-      {"11"_b, "cmge_asisdsame_only"},
+    { {"00"_b, "udiv_64_dp_2src"},
+      {"10"_b, "asrv_64_dp_2src"},
     },
   },
 
-  { "_jryylt",
-    {30, 23, 22, 19, 18, 17, 16},
-    { {"00000x1"_b, "smov_asimdins_w_w"},
-      {"0000x10"_b, "smov_asimdins_w_w"},
-      {"00010xx"_b, "smov_asimdins_w_w"},
-      {"0001110"_b, "smov_asimdins_w_w"},
-      {"000x10x"_b, "smov_asimdins_w_w"},
-      {"000x111"_b, "smov_asimdins_w_w"},
-      {"10000x1"_b, "smov_asimdins_x_x"},
-      {"1000x10"_b, "smov_asimdins_x_x"},
-      {"10010xx"_b, "smov_asimdins_x_x"},
-      {"1001110"_b, "smov_asimdins_x_x"},
-      {"100x10x"_b, "smov_asimdins_x_x"},
-      {"100x111"_b, "smov_asimdins_x_x"},
+  { "_jxgqqz",
+    {30},
+    { {"0"_b, "cbz_64_compbranch"},
     },
   },
 
-  { "_jsygzs",
-    {30, 23, 22, 12, 11, 10},
-    { {"0000xx"_b, "add_64_addsub_ext"},
-      {"000100"_b, "add_64_addsub_ext"},
-      {"1000xx"_b, "sub_64_addsub_ext"},
-      {"100100"_b, "sub_64_addsub_ext"},
-    },
-  },
-
-  { "_jtqlhs",
-    {22},
-    { {"0"_b, "str_64_ldst_regoff"},
-      {"1"_b, "ldr_64_ldst_regoff"},
-    },
-  },
-
-  { "_jvhnxl",
-    {23},
-    { {"0"_b, "fcmge_asimdsame_only"},
-      {"1"_b, "fcmgt_asimdsame_only"},
-    },
-  },
-
-  { "_jvpqrp",
-    {23, 22},
-    { {"00"_b, "fmla_asisdelem_rh_h"},
-      {"1x"_b, "fmla_asisdelem_r_sd"},
-    },
-  },
-
-  { "_jvvzjq",
-    {23, 22},
-    { {"00"_b, "fcsel_s_floatsel"},
-      {"01"_b, "fcsel_d_floatsel"},
-      {"11"_b, "fcsel_h_floatsel"},
-    },
-  },
-
-  { "_jxrlyh",
-    {12},
-    { {"0"_b, "_mtgksl"},
+  { "_jxltqm",
+    {13, 12},
+    { {"01"_b, "sqdmull_asisddiff_only"},
     },
   },
 
@@ -1478,17 +1757,6 @@
     },
   },
 
-  { "_jxtgtx",
-    {30, 23, 22},
-    { {"000"_b, "str_b_ldst_pos"},
-      {"001"_b, "ldr_b_ldst_pos"},
-      {"010"_b, "str_q_ldst_pos"},
-      {"011"_b, "ldr_q_ldst_pos"},
-      {"100"_b, "str_h_ldst_pos"},
-      {"101"_b, "ldr_h_ldst_pos"},
-    },
-  },
-
   { "_jxyskn",
     {13, 12, 11, 10},
     { {"0000"_b, "uqincp_z_p_z"},
@@ -1497,12 +1765,6 @@
     },
   },
 
-  { "_jxzrxm",
-    {20, 19, 18, 17, 16},
-    { {"00000"_b, "usqadd_asisdmisc_r"},
-    },
-  },
-
   { "_jymnkk",
     {23, 22, 12, 11, 10},
     { {"01000"_b, "bfdot_z_zzzi"},
@@ -1513,10 +1775,10 @@
     },
   },
 
-  { "_jyxszq",
-    {30, 4},
-    { {"0x"_b, "b_only_branch_imm"},
-      {"10"_b, "b_only_condbranch"},
+  { "_jyzhnh",
+    {18},
+    { {"0"_b, "st1_asisdlsop_hx1_r1h"},
+      {"1"_b, "st1_asisdlsop_h1_i1h"},
     },
   },
 
@@ -1533,11 +1795,19 @@
     },
   },
 
-  { "_jzyzjh",
-    {18, 17, 12},
-    { {"0x0"_b, "st2_asisdlsop_dx2_r2d"},
-      {"100"_b, "st2_asisdlsop_dx2_r2d"},
-      {"110"_b, "st2_asisdlsop_d2_i2d"},
+  { "_jztlrz",
+    {23, 22, 20, 19, 18, 17, 16},
+    { {"0111001"_b, "fcvtmu_asimdmiscfp16_r"},
+      {"0x00001"_b, "fcvtmu_asimdmisc_r"},
+      {"1111001"_b, "fcvtzu_asimdmiscfp16_r"},
+      {"1x00001"_b, "fcvtzu_asimdmisc_r"},
+      {"xx00000"_b, "neg_asimdmisc_r"},
+    },
+  },
+
+  { "_jztspt",
+    {18, 17},
+    { {"00"_b, "st4_asisdlso_s4_4s"},
     },
   },
 
@@ -1569,6 +1839,34 @@
     },
   },
 
+  { "_kgpsjz",
+    {13, 12, 11, 10},
+    { {"0000"_b, "saddl_asimddiff_l"},
+      {"0001"_b, "shadd_asimdsame_only"},
+      {"0010"_b, "_rkrlsy"},
+      {"0011"_b, "sqadd_asimdsame_only"},
+      {"0100"_b, "saddw_asimddiff_w"},
+      {"0101"_b, "srhadd_asimdsame_only"},
+      {"0110"_b, "_vypgrt"},
+      {"0111"_b, "_xygvjp"},
+      {"1000"_b, "ssubl_asimddiff_l"},
+      {"1001"_b, "shsub_asimdsame_only"},
+      {"1010"_b, "_pjhmvy"},
+      {"1011"_b, "sqsub_asimdsame_only"},
+      {"1100"_b, "ssubw_asimddiff_w"},
+      {"1101"_b, "cmgt_asimdsame_only"},
+      {"1110"_b, "_ygghnn"},
+      {"1111"_b, "cmge_asimdsame_only"},
+    },
+  },
+
+  { "_kgygky",
+    {30, 23, 22},
+    { {"000"_b, "sbfm_32m_bitfield"},
+      {"100"_b, "ubfm_32m_bitfield"},
+    },
+  },
+
   { "_khjvqq",
     {22, 11},
     { {"00"_b, "sqrdmulh_z_zzi_s"},
@@ -1576,23 +1874,38 @@
     },
   },
 
-  { "_kjghlk",
-    {23, 22, 20, 19, 13, 11},
-    { {"0000x0"_b, "orr_asimdimm_l_sl"},
-      {"00x100"_b, "ssra_asimdshf_r"},
-      {"00x110"_b, "srsra_asimdshf_r"},
-      {"010x00"_b, "ssra_asimdshf_r"},
-      {"010x10"_b, "srsra_asimdshf_r"},
-      {"011100"_b, "ssra_asimdshf_r"},
-      {"011110"_b, "srsra_asimdshf_r"},
-      {"0x1000"_b, "ssra_asimdshf_r"},
-      {"0x1010"_b, "srsra_asimdshf_r"},
+  { "_khrsgv",
+    {22, 20, 19, 13, 12},
+    { {"0x100"_b, "sri_asisdshf_r"},
+      {"0x101"_b, "sli_asisdshf_r"},
+      {"0x110"_b, "sqshlu_asisdshf_r"},
+      {"0x111"_b, "uqshl_asisdshf_r"},
+      {"10x00"_b, "sri_asisdshf_r"},
+      {"10x01"_b, "sli_asisdshf_r"},
+      {"10x10"_b, "sqshlu_asisdshf_r"},
+      {"10x11"_b, "uqshl_asisdshf_r"},
+      {"11100"_b, "sri_asisdshf_r"},
+      {"11101"_b, "sli_asisdshf_r"},
+      {"11110"_b, "sqshlu_asisdshf_r"},
+      {"11111"_b, "uqshl_asisdshf_r"},
+      {"x1000"_b, "sri_asisdshf_r"},
+      {"x1001"_b, "sli_asisdshf_r"},
+      {"x1010"_b, "sqshlu_asisdshf_r"},
+      {"x1011"_b, "uqshl_asisdshf_r"},
     },
   },
 
-  { "_kjngjl",
-    {23, 22},
-    { {"00"_b, "tbx_asimdtbl_l1_1"},
+  { "_khtsmx",
+    {18},
+    { {"0"_b, "ld4_asisdlsop_hx4_r4h"},
+      {"1"_b, "ld4_asisdlsop_h4_i4h"},
+    },
+  },
+
+  { "_khvvtr",
+    {20, 19, 18, 17, 16, 13, 12},
+    { {"0000000"_b, "rev16_32_dp_1src"},
+      {"0000001"_b, "cls_32_dp_1src"},
     },
   },
 
@@ -1609,67 +1922,22 @@
     },
   },
 
-  { "_kjrxpx",
-    {23, 22, 20, 19, 18, 17, 16},
-    { {"0111001"_b, "ucvtf_asimdmiscfp16_r"},
-      {"0x00001"_b, "ucvtf_asimdmisc_r"},
-      {"1111000"_b, "fcmle_asimdmiscfp16_fz"},
-      {"1111001"_b, "frsqrte_asimdmiscfp16_r"},
-      {"1x00000"_b, "fcmle_asimdmisc_fz"},
-      {"1x00001"_b, "frsqrte_asimdmisc_r"},
+  { "_kjsrkm",
+    {18, 17, 16, 13, 12, 11, 10, 9, 8, 7, 4, 3, 2, 1, 0},
+    { {"000000000011111"_b, "_zztypv"},
     },
   },
 
-  { "_kjryvx",
-    {12},
-    { {"0"_b, "ld4_asisdlsop_dx4_r4d"},
-    },
-  },
-
-  { "_kjyphv",
-    {20, 19, 18, 17, 16},
-    { {"10000"_b, "fmaxp_asisdpair_only_sd"},
-    },
-  },
-
-  { "_kkgpjl",
-    {20, 19, 18, 17},
-    { {"0000"_b, "_msqkyy"},
-    },
-  },
-
-  { "_kkgzst",
-    {23, 22, 13, 12, 11, 10},
-    { {"0001x0"_b, "fmla_asimdelem_rh_h"},
-      {"0x0001"_b, "sshr_asimdshf_r"},
-      {"0x0101"_b, "ssra_asimdshf_r"},
-      {"0x1001"_b, "srshr_asimdshf_r"},
-      {"0x1101"_b, "srsra_asimdshf_r"},
-      {"1000x0"_b, "fmlal_asimdelem_lh"},
-      {"1x01x0"_b, "fmla_asimdelem_r_sd"},
-      {"xx10x0"_b, "smlal_asimdelem_l"},
-      {"xx11x0"_b, "sqdmlal_asimdelem_l"},
-    },
-  },
-
-  { "_kkmjyr",
-    {0},
-    { {"1"_b, "blrabz_64_branch_reg"},
-    },
-  },
-
-  { "_kkmxxx",
+  { "_kkkltp",
     {30},
-    { {"0"_b, "bl_only_branch_imm"},
-      {"1"_b, "_jqplxx"},
+    { {"1"_b, "_sqkkqy"},
     },
   },
 
-  { "_kknjng",
-    {23, 22, 20, 19, 11},
-    { {"00010"_b, "ssra_asisdshf_r"},
-      {"001x0"_b, "ssra_asisdshf_r"},
-      {"01xx0"_b, "ssra_asisdshf_r"},
+  { "_kkpxth",
+    {18},
+    { {"0"_b, "ld1_asisdlsop_bx1_r1b"},
+      {"1"_b, "ld1_asisdlsop_b1_i1b"},
     },
   },
 
@@ -1685,47 +1953,71 @@
     },
   },
 
+  { "_kktzst",
+    {13, 12, 11, 10},
+    { {"1111"_b, "frsqrts_asisdsamefp16_only"},
+    },
+  },
+
   { "_kkvrzq",
     {23, 22, 9, 8, 7, 6, 5},
     { {"0000000"_b, "pfalse_p"},
     },
   },
 
-  { "_klkgqk",
-    {23, 22, 20, 19, 18, 17, 16},
-    { {"0111001"_b, "fcvtms_asimdmiscfp16_r"},
-      {"0x00001"_b, "fcvtms_asimdmisc_r"},
-      {"1111001"_b, "fcvtzs_asimdmiscfp16_r"},
-      {"1x00001"_b, "fcvtzs_asimdmisc_r"},
-      {"xx00000"_b, "abs_asimdmisc_r"},
-      {"xx10001"_b, "addv_asimdall_only"},
+  { "_klrksl",
+    {30, 23, 22, 19, 16},
+    { {"10010"_b, "aesmc_b_cryptoaes"},
+      {"x0x01"_b, "fcvtn_asimdmisc_n"},
+      {"x1001"_b, "bfcvtn_asimdmisc_4s"},
+      {"xxx00"_b, "sadalp_asimdmisc_p"},
     },
   },
 
-  { "_klnhpj",
-    {9, 8, 7, 6, 5, 1, 0},
-    { {"1111111"_b, "eretab_64e_branch_reg"},
+  { "_klsmsv",
+    {30, 23, 22, 10},
+    { {"1001"_b, "ins_asimdins_iv_v"},
+      {"x000"_b, "ext_asimdext_only"},
     },
   },
 
-  { "_klthpn",
-    {30, 23, 22, 11, 10},
-    { {"01000"_b, "csel_64_condsel"},
-      {"01001"_b, "csinc_64_condsel"},
-      {"11000"_b, "csinv_64_condsel"},
-      {"11001"_b, "csneg_64_condsel"},
+  { "_kltlmp",
+    {22, 20, 19, 13, 12},
+    { {"0x100"_b, "ushr_asisdshf_r"},
+      {"0x101"_b, "usra_asisdshf_r"},
+      {"0x110"_b, "urshr_asisdshf_r"},
+      {"0x111"_b, "ursra_asisdshf_r"},
+      {"10x00"_b, "ushr_asisdshf_r"},
+      {"10x01"_b, "usra_asisdshf_r"},
+      {"10x10"_b, "urshr_asisdshf_r"},
+      {"10x11"_b, "ursra_asisdshf_r"},
+      {"11100"_b, "ushr_asisdshf_r"},
+      {"11101"_b, "usra_asisdshf_r"},
+      {"11110"_b, "urshr_asisdshf_r"},
+      {"11111"_b, "ursra_asisdshf_r"},
+      {"x1000"_b, "ushr_asisdshf_r"},
+      {"x1001"_b, "usra_asisdshf_r"},
+      {"x1010"_b, "urshr_asisdshf_r"},
+      {"x1011"_b, "ursra_asisdshf_r"},
     },
   },
 
-  { "_kmhtqp",
-    {30},
-    { {"0"_b, "bl_only_branch_imm"},
+  { "_klxxgx",
+    {20, 19, 18, 17, 16, 13},
+    { {"000000"_b, "fmov_s_floatdp1"},
+      {"000010"_b, "fneg_s_floatdp1"},
+      {"001000"_b, "frintn_s_floatdp1"},
+      {"001010"_b, "frintm_s_floatdp1"},
+      {"001100"_b, "frinta_s_floatdp1"},
+      {"001110"_b, "frintx_s_floatdp1"},
+      {"010000"_b, "frint32z_s_floatdp1"},
+      {"010010"_b, "frint64z_s_floatdp1"},
     },
   },
 
-  { "_kmkpnj",
-    {17},
-    { {"0"_b, "ld3_asisdlso_h3_3h"},
+  { "_kmqlmz",
+    {18},
+    { {"0"_b, "st1_asisdlso_b1_1b"},
     },
   },
 
@@ -1748,66 +2040,67 @@
     },
   },
 
-  { "_knpsmq",
-    {18, 17},
-    { {"0x"_b, "st2_asisdlsop_sx2_r2s"},
-      {"10"_b, "st2_asisdlsop_sx2_r2s"},
-      {"11"_b, "st2_asisdlsop_s2_i2s"},
+  { "_knpjtt",
+    {20, 19, 18, 17, 16},
+    { {"11111"_b, "ldxrh_lr32_ldstexcl"},
     },
   },
 
-  { "_kpmvkn",
-    {30, 23, 22, 11, 10},
-    { {"00000"_b, "stur_b_ldst_unscaled"},
-      {"00001"_b, "str_b_ldst_immpost"},
-      {"00011"_b, "str_b_ldst_immpre"},
-      {"00100"_b, "ldur_b_ldst_unscaled"},
-      {"00101"_b, "ldr_b_ldst_immpost"},
-      {"00111"_b, "ldr_b_ldst_immpre"},
-      {"01000"_b, "stur_q_ldst_unscaled"},
-      {"01001"_b, "str_q_ldst_immpost"},
-      {"01011"_b, "str_q_ldst_immpre"},
-      {"01100"_b, "ldur_q_ldst_unscaled"},
-      {"01101"_b, "ldr_q_ldst_immpost"},
-      {"01111"_b, "ldr_q_ldst_immpre"},
-      {"10000"_b, "stur_h_ldst_unscaled"},
-      {"10001"_b, "str_h_ldst_immpost"},
-      {"10011"_b, "str_h_ldst_immpre"},
-      {"10100"_b, "ldur_h_ldst_unscaled"},
-      {"10101"_b, "ldr_h_ldst_immpost"},
-      {"10111"_b, "ldr_h_ldst_immpre"},
+  { "_kpgghm",
+    {22, 20, 19, 18, 17, 16, 13, 12},
+    { {"01111100"_b, "ldapr_64l_memop"},
     },
   },
 
-  { "_kpqgsn",
-    {12},
-    { {"0"_b, "st4_asisdlsop_dx4_r4d"},
+  { "_kpnlmr",
+    {20, 19, 18, 17, 16},
+    { {"00000"_b, "clz_asimdmisc_r"},
+      {"00001"_b, "uqxtn_asimdmisc_n"},
     },
   },
 
-  { "_kpxtsp",
-    {6, 5},
-    { {"00"_b, "cfinv_m_pstate"},
-      {"01"_b, "xaflag_m_pstate"},
-      {"10"_b, "axflag_m_pstate"},
+  { "_kppzvh",
+    {30, 23, 22, 13, 12, 11, 10},
+    { {"000xx10"_b, "stlur_s_ldapstl_simd"},
+      {"001xx10"_b, "ldapur_s_ldapstl_simd"},
+      {"100xx10"_b, "stlur_d_ldapstl_simd"},
+      {"101xx10"_b, "ldapur_d_ldapstl_simd"},
+      {"x000001"_b, "cpyprn_cpy_memcms"},
+      {"x000101"_b, "cpypwtrn_cpy_memcms"},
+      {"x001001"_b, "cpyprtrn_cpy_memcms"},
+      {"x001101"_b, "cpyptrn_cpy_memcms"},
+      {"x010001"_b, "cpymrn_cpy_memcms"},
+      {"x010101"_b, "cpymwtrn_cpy_memcms"},
+      {"x011001"_b, "cpymrtrn_cpy_memcms"},
+      {"x011101"_b, "cpymtrn_cpy_memcms"},
+      {"x100001"_b, "cpyern_cpy_memcms"},
+      {"x100101"_b, "cpyewtrn_cpy_memcms"},
+      {"x101001"_b, "cpyertrn_cpy_memcms"},
+      {"x101101"_b, "cpyetrn_cpy_memcms"},
+      {"x110001"_b, "setge_set_memcms"},
+      {"x110101"_b, "setget_set_memcms"},
+      {"x111001"_b, "setgen_set_memcms"},
+      {"x111101"_b, "setgetn_set_memcms"},
     },
   },
 
-  { "_kpyqyv",
-    {12},
-    { {"0"_b, "_vjxqhp"},
+  { "_kpsnsk",
+    {22},
+    { {"0"_b, "str_64_ldst_regoff"},
+      {"1"_b, "ldr_64_ldst_regoff"},
     },
   },
 
-  { "_kqjmvy",
-    {12},
-    { {"0"_b, "ld4_asisdlsop_dx4_r4d"},
+  { "_kqsqly",
+    {18},
+    { {"0"_b, "st1_asisdlsep_r2_r2"},
+      {"1"_b, "st1_asisdlsep_i2_i2"},
     },
   },
 
-  { "_kqkhtz",
-    {9, 8, 7, 6, 5},
-    { {"11111"_b, "autiza_64z_dp_1src"},
+  { "_kqstrr",
+    {18, 17, 12},
+    { {"000"_b, "st3_asisdlso_d3_3d"},
     },
   },
 
@@ -1821,19 +2114,6 @@
     },
   },
 
-  { "_kqxhzx",
-    {20, 19, 18, 16, 12, 11, 10},
-    { {"0000xxx"_b, "_zmzxjm"},
-      {"0010xxx"_b, "_tmshps"},
-      {"0011xxx"_b, "_tsksxr"},
-      {"0110100"_b, "_pnzphx"},
-      {"0111100"_b, "_xpkkpn"},
-      {"1000xxx"_b, "_psqpkp"},
-      {"1001xxx"_b, "_phxkzh"},
-      {"1100xxx"_b, "_vsvrgt"},
-    },
-  },
-
   { "_kqzmtr",
     {30, 23, 22, 20, 13},
     { {"00001"_b, "ld1b_z_p_bi_u16"},
@@ -1852,62 +2132,107 @@
     },
   },
 
-  { "_krhrrr",
+  { "_krllsy",
+    {19},
+    { {"1"_b, "sysp_cr_syspairinstrs"},
+    },
+  },
+
+  { "_krtvhr",
     {12, 10},
-    { {"00"_b, "_xyzpvp"},
-      {"01"_b, "_nlyntn"},
-      {"10"_b, "_zhkjzg"},
-      {"11"_b, "_zmpzkg"},
+    { {"00"_b, "_xvmxrg"},
+      {"01"_b, "_mvvngm"},
+      {"10"_b, "_mkyyng"},
+      {"11"_b, "_vvzsmg"},
     },
   },
 
-  { "_krlpjl",
-    {23, 22, 20, 19, 17, 16},
-    { {"000010"_b, "scvtf_s64_float2fix"},
-      {"000011"_b, "ucvtf_s64_float2fix"},
-      {"001100"_b, "fcvtzs_64s_float2fix"},
-      {"001101"_b, "fcvtzu_64s_float2fix"},
-      {"010010"_b, "scvtf_d64_float2fix"},
-      {"010011"_b, "ucvtf_d64_float2fix"},
-      {"011100"_b, "fcvtzs_64d_float2fix"},
-      {"011101"_b, "fcvtzu_64d_float2fix"},
-      {"110010"_b, "scvtf_h64_float2fix"},
-      {"110011"_b, "ucvtf_h64_float2fix"},
-      {"111100"_b, "fcvtzs_64h_float2fix"},
-      {"111101"_b, "fcvtzu_64h_float2fix"},
+  { "_krvxxx",
+    {12, 9, 8, 7, 6, 5},
+    { {"100000"_b, "_skjqrx"},
     },
   },
 
-  { "_kstltt",
-    {18, 17, 12},
-    { {"0x0"_b, "ld3_asisdlsop_dx3_r3d"},
-      {"100"_b, "ld3_asisdlsop_dx3_r3d"},
-      {"110"_b, "ld3_asisdlsop_d3_i3d"},
+  { "_ksgpqz",
+    {30},
+    { {"1"_b, "_trjmmn"},
     },
   },
 
-  { "_ksvxxm",
-    {9, 8, 7, 6, 5},
-    { {"11111"_b, "pacizb_64z_dp_1src"},
+  { "_kshtnj",
+    {23, 22, 13, 12, 11, 10},
+    { {"01x1x0"_b, "fcmla_asimdelem_c_h"},
+      {"0x0001"_b, "sri_asimdshf_r"},
+      {"0x0101"_b, "sli_asimdshf_r"},
+      {"0x1001"_b, "sqshlu_asimdshf_r"},
+      {"0x1101"_b, "uqshl_asimdshf_r"},
+      {"10x1x0"_b, "fcmla_asimdelem_c_s"},
+      {"xx00x0"_b, "mls_asimdelem_r"},
+      {"xx10x0"_b, "umlsl_asimdelem_l"},
     },
   },
 
-  { "_ktnjrx",
+  { "_kskqmz",
+    {20, 19, 18, 17, 16, 13, 12},
+    { {"0000011"_b, "sqabs_asisdmisc_r"},
+      {"0000100"_b, "sqxtn_asisdmisc_n"},
+    },
+  },
+
+  { "_ksrkkn",
+    {22},
+    { {"0"_b, "str_32_ldst_regoff"},
+      {"1"_b, "ldr_32_ldst_regoff"},
+    },
+  },
+
+  { "_kssltr",
+    {13, 12, 11, 10},
+    { {"0000"_b, "smull_asimddiff_l"},
+      {"0001"_b, "_pstgvl"},
+      {"0010"_b, "_ztlysk"},
+      {"0011"_b, "_hxxqks"},
+      {"0100"_b, "sqdmull_asimddiff_l"},
+      {"0101"_b, "_jkxyvn"},
+      {"0110"_b, "_lvsrnj"},
+      {"0111"_b, "_vvgnhm"},
+      {"1000"_b, "pmull_asimddiff_l"},
+      {"1001"_b, "_skqzyg"},
+      {"1010"_b, "_szqlsn"},
+      {"1011"_b, "_jqvpqx"},
+      {"1101"_b, "_yyvjqv"},
+      {"1110"_b, "_xlyppq"},
+      {"1111"_b, "_mhljkp"},
+    },
+  },
+
+  { "_ktngnm",
+    {12, 10},
+    { {"00"_b, "_hxgngr"},
+      {"01"_b, "_ngkgsg"},
+      {"10"_b, "_plrggq"},
+      {"11"_b, "_kxztps"},
+    },
+  },
+
+  { "_ktpxrr",
     {30, 23, 22, 13, 12, 11, 10},
-    { {"000xxxx"_b, "fnmadd_s_floatdp3"},
-      {"001xxxx"_b, "fnmadd_d_floatdp3"},
-      {"011xxxx"_b, "fnmadd_h_floatdp3"},
-      {"10001x0"_b, "fmls_asisdelem_rh_h"},
-      {"10x0101"_b, "shl_asisdshf_r"},
-      {"10x1101"_b, "sqshl_asisdshf_r"},
-      {"11x01x0"_b, "fmls_asisdelem_r_sd"},
-      {"1xx11x0"_b, "sqdmlsl_asisdelem_l"},
+    { {"0001111"_b, "casp_cp32_ldstexcl"},
+      {"0011111"_b, "caspa_cp32_ldstexcl"},
+      {"0101111"_b, "casb_c32_ldstexcl"},
+      {"0111111"_b, "casab_c32_ldstexcl"},
+      {"1001111"_b, "casp_cp64_ldstexcl"},
+      {"1011111"_b, "caspa_cp64_ldstexcl"},
+      {"1101111"_b, "cash_c32_ldstexcl"},
+      {"1111111"_b, "casah_c32_ldstexcl"},
     },
   },
 
-  { "_ktrkrp",
-    {17},
-    { {"0"_b, "st3_asisdlso_h3_3h"},
+  { "_ktsgth",
+    {23, 22},
+    { {"00"_b, "fcsel_s_floatsel"},
+      {"01"_b, "fcsel_d_floatsel"},
+      {"11"_b, "fcsel_h_floatsel"},
     },
   },
 
@@ -1919,15 +2244,9 @@
     },
   },
 
-  { "_kvgjzh",
+  { "_ktyrgy",
     {9, 8, 7, 6, 5},
-    { {"00000"_b, "fmov_d_floatimm"},
-    },
-  },
-
-  { "_kvmrng",
-    {23, 22},
-    { {"00"_b, "tbl_asimdtbl_l1_1"},
+    { {"00000"_b, "fmov_s_floatimm"},
     },
   },
 
@@ -1944,55 +2263,51 @@
     },
   },
 
-  { "_kvyysq",
-    {12, 9, 8, 7, 6, 5},
-    { {"100000"_b, "_sjrqth"},
+  { "_kxhmlx",
+    {23, 22, 20, 19, 18, 17, 16},
+    { {"0111001"_b, "fcvtnu_asisdmiscfp16_r"},
+      {"0x00001"_b, "fcvtnu_asisdmisc_r"},
+      {"1111001"_b, "fcvtpu_asisdmiscfp16_r"},
+      {"1x00001"_b, "fcvtpu_asisdmisc_r"},
     },
   },
 
-  { "_kxhjtk",
-    {9, 8, 7, 6, 5},
-    { {"00000"_b, "fmov_s_floatimm"},
+  { "_kxmjsh",
+    {20, 19, 18, 17, 16},
+    { {"10000"_b, "fmaxp_asisdpair_only_sd"},
     },
   },
 
-  { "_kxjgsz",
-    {23, 22, 20, 19, 11},
-    { {"00000"_b, "movi_asimdimm_m_sm"},
+  { "_kxmxxm",
+    {23},
+    { {"0"_b, "fcmge_asimdsame_only"},
+      {"1"_b, "fcmgt_asimdsame_only"},
     },
   },
 
-  { "_kxkyqr",
-    {17},
-    { {"0"_b, "ld4_asisdlsop_hx4_r4h"},
-      {"1"_b, "ld4_asisdlsop_h4_i4h"},
+  { "_kxpqhv",
+    {30, 23, 22, 11, 10},
+    { {"10001"_b, "stg_64spost_ldsttags"},
+      {"10010"_b, "stg_64soffset_ldsttags"},
+      {"10011"_b, "stg_64spre_ldsttags"},
+      {"10100"_b, "ldg_64loffset_ldsttags"},
+      {"10101"_b, "stzg_64spost_ldsttags"},
+      {"10110"_b, "stzg_64soffset_ldsttags"},
+      {"10111"_b, "stzg_64spre_ldsttags"},
+      {"11001"_b, "st2g_64spost_ldsttags"},
+      {"11010"_b, "st2g_64soffset_ldsttags"},
+      {"11011"_b, "st2g_64spre_ldsttags"},
+      {"11101"_b, "stz2g_64spost_ldsttags"},
+      {"11110"_b, "stz2g_64soffset_ldsttags"},
+      {"11111"_b, "stz2g_64spre_ldsttags"},
     },
   },
 
-  { "_kxprqm",
-    {13, 12, 11, 10},
-    { {"0000"_b, "raddhn_asimddiff_n"},
-      {"0001"_b, "ushl_asimdsame_only"},
-      {"0010"_b, "_mmknzp"},
-      {"0011"_b, "uqshl_asimdsame_only"},
-      {"0100"_b, "uabal_asimddiff_l"},
-      {"0101"_b, "urshl_asimdsame_only"},
-      {"0110"_b, "_glgrjy"},
-      {"0111"_b, "uqrshl_asimdsame_only"},
-      {"1000"_b, "rsubhn_asimddiff_n"},
-      {"1001"_b, "umax_asimdsame_only"},
-      {"1010"_b, "_pxlnhs"},
-      {"1011"_b, "umin_asimdsame_only"},
-      {"1100"_b, "uabdl_asimddiff_l"},
-      {"1101"_b, "uabd_asimdsame_only"},
-      {"1110"_b, "_jkqktg"},
-      {"1111"_b, "uaba_asimdsame_only"},
-    },
-  },
-
-  { "_kxsysq",
-    {30},
-    { {"0"_b, "tbnz_only_testbranch"},
+  { "_kxtqjh",
+    {23, 22},
+    { {"01"_b, "_mhnlsy"},
+      {"10"_b, "xar_vvv2_crypto3_imm6"},
+      {"11"_b, "_spxvlt"},
     },
   },
 
@@ -2009,6 +2324,39 @@
     },
   },
 
+  { "_kxztps",
+    {23, 22, 20, 19, 13, 11},
+    { {"0000x0"_b, "orr_asimdimm_l_sl"},
+      {"00x100"_b, "shl_asimdshf_r"},
+      {"00x110"_b, "sqshl_asimdshf_r"},
+      {"010x00"_b, "shl_asimdshf_r"},
+      {"010x10"_b, "sqshl_asimdshf_r"},
+      {"011100"_b, "shl_asimdshf_r"},
+      {"011110"_b, "sqshl_asimdshf_r"},
+      {"0x1000"_b, "shl_asimdshf_r"},
+      {"0x1010"_b, "sqshl_asimdshf_r"},
+    },
+  },
+
+  { "_kyhhqt",
+    {23, 20, 19, 18, 17, 16, 13},
+    { {"0000000"_b, "ld1r_asisdlso_r1"},
+      {"0000001"_b, "ld3r_asisdlso_r3"},
+      {"10xxxx0"_b, "ld1r_asisdlsop_rx1_r"},
+      {"10xxxx1"_b, "ld3r_asisdlsop_rx3_r"},
+      {"110xxx0"_b, "ld1r_asisdlsop_rx1_r"},
+      {"110xxx1"_b, "ld3r_asisdlsop_rx3_r"},
+      {"1110xx0"_b, "ld1r_asisdlsop_rx1_r"},
+      {"1110xx1"_b, "ld3r_asisdlsop_rx3_r"},
+      {"11110x0"_b, "ld1r_asisdlsop_rx1_r"},
+      {"11110x1"_b, "ld3r_asisdlsop_rx3_r"},
+      {"1111100"_b, "ld1r_asisdlsop_rx1_r"},
+      {"1111101"_b, "ld3r_asisdlsop_rx3_r"},
+      {"1111110"_b, "ld1r_asisdlsop_r1_i"},
+      {"1111111"_b, "ld3r_asisdlsop_r3_i"},
+    },
+  },
+
   { "_kyjxrr",
     {30, 13},
     { {"00"_b, "_qtxpky"},
@@ -2017,17 +2365,12 @@
     },
   },
 
-  { "_kykymg",
-    {30},
-    { {"1"_b, "_rsyhtj"},
-    },
-  },
-
-  { "_kypqpy",
-    {30, 23, 22, 13, 12, 11, 10},
-    { {"1010000"_b, "sm3partw1_vvv4_cryptosha512_3"},
-      {"1010001"_b, "sm3partw2_vvv4_cryptosha512_3"},
-      {"1010010"_b, "sm4ekey_vvv4_cryptosha512_3"},
+  { "_kynxnz",
+    {30, 23, 22, 20, 19},
+    { {"0xxxx"_b, "bl_only_branch_imm"},
+      {"10001"_b, "sysl_rc_systeminstrs"},
+      {"1001x"_b, "mrs_rs_systemmove"},
+      {"1011x"_b, "mrrs_rs_systemmovepr"},
     },
   },
 
@@ -2038,12 +2381,6 @@
     },
   },
 
-  { "_kyxqgg",
-    {20, 19, 18, 17, 16, 13, 12},
-    { {"0000000"_b, "stgm_64bulk_ldsttags"},
-    },
-  },
-
   { "_kyxrqg",
     {10},
     { {"0"_b, "uabalb_z_zzz"},
@@ -2051,17 +2388,18 @@
     },
   },
 
-  { "_kyygzs",
-    {19},
-    { {"0"_b, "_nnkyzr"},
-      {"1"_b, "sys_cr_systeminstrs"},
+  { "_kzjxxk",
+    {20, 19, 18, 17, 16, 13, 12},
+    { {"0000000"_b, "rbit_32_dp_1src"},
+      {"0000001"_b, "clz_32_dp_1src"},
+      {"0000010"_b, "abs_32_dp_1src"},
     },
   },
 
-  { "_kyyzks",
+  { "_kzksnv",
     {13, 12},
-    { {"00"_b, "sdiv_32_dp_2src"},
-      {"10"_b, "rorv_32_dp_2src"},
+    { {"00"_b, "sqshl_asisdsame_only"},
+      {"01"_b, "sqrshl_asisdsame_only"},
     },
   },
 
@@ -2074,83 +2412,59 @@
     },
   },
 
-  { "_kzrklp",
-    {17},
-    { {"0"_b, "ld4_asisdlso_b4_4b"},
+  { "_kzprzt",
+    {9, 8, 7, 6, 5, 2, 1},
+    { {"1111111"_b, "retaa_64e_branch_reg"},
     },
   },
 
-  { "_lgglzy",
-    {30, 23, 22, 19, 16},
-    { {"10010"_b, "aesimc_b_cryptoaes"},
-      {"x0x01"_b, "fcvtl_asimdmisc_l"},
-      {"xxx00"_b, "sqabs_asimdmisc_r"},
+  { "_kzpyzy",
+    {30, 23, 22, 13},
+    { {"0000"_b, "ld1sh_z_p_br_s64"},
+      {"0001"_b, "ldff1sh_z_p_br_s64"},
+      {"0010"_b, "ld1w_z_p_br_u32"},
+      {"0011"_b, "ldff1w_z_p_br_u32"},
+      {"0100"_b, "ld1sb_z_p_br_s64"},
+      {"0101"_b, "ldff1sb_z_p_br_s64"},
+      {"0110"_b, "ld1sb_z_p_br_s16"},
+      {"0111"_b, "ldff1sb_z_p_br_s16"},
+      {"1001"_b, "stnt1w_z_p_br_contiguous"},
+      {"1010"_b, "st1w_z_p_br"},
+      {"1011"_b, "st3w_z_p_br_contiguous"},
+      {"1100"_b, "str_z_bi"},
+      {"1101"_b, "stnt1d_z_p_br_contiguous"},
+      {"1111"_b, "st3d_z_p_br_contiguous"},
     },
   },
 
-  { "_lhmlrj",
-    {30, 23, 22, 20, 19},
-    { {"0xxxx"_b, "bl_only_branch_imm"},
-      {"10001"_b, "sysl_rc_systeminstrs"},
-      {"1001x"_b, "mrs_rs_systemmove"},
+  { "_kzyzrh",
+    {16, 13, 12},
+    { {"000"_b, "rev16_64_dp_1src"},
+      {"001"_b, "cls_64_dp_1src"},
+      {"100"_b, "pacib_64p_dp_1src"},
+      {"101"_b, "autib_64p_dp_1src"},
+      {"110"_b, "_vpyvjr"},
+      {"111"_b, "_sntnsm"},
     },
   },
 
-  { "_lhpgsn",
-    {13, 12, 10},
-    { {"000"_b, "sqdmulh_asisdelem_r"},
-      {"010"_b, "sqrdmulh_asisdelem_r"},
-      {"101"_b, "_mxkgnq"},
-      {"111"_b, "_sgnknz"},
+  { "_lgmlmt",
+    {18, 17},
+    { {"00"_b, "ld3_asisdlse_r3"},
     },
   },
 
-  { "_lhtyjq",
-    {23, 22, 20, 19, 18, 16, 13},
-    { {"0000000"_b, "_gskkxk"},
-      {"0000001"_b, "_ktrkrp"},
-      {"0100000"_b, "_nmtkjv"},
-      {"0100001"_b, "_kmkpnj"},
-      {"100xxx0"_b, "st1_asisdlsop_hx1_r1h"},
-      {"100xxx1"_b, "st3_asisdlsop_hx3_r3h"},
-      {"1010xx0"_b, "st1_asisdlsop_hx1_r1h"},
-      {"1010xx1"_b, "st3_asisdlsop_hx3_r3h"},
-      {"10110x0"_b, "st1_asisdlsop_hx1_r1h"},
-      {"10110x1"_b, "st3_asisdlsop_hx3_r3h"},
-      {"1011100"_b, "st1_asisdlsop_hx1_r1h"},
-      {"1011101"_b, "st3_asisdlsop_hx3_r3h"},
-      {"1011110"_b, "_mgmgqh"},
-      {"1011111"_b, "_gzylzp"},
-      {"110xxx0"_b, "ld1_asisdlsop_hx1_r1h"},
-      {"110xxx1"_b, "ld3_asisdlsop_hx3_r3h"},
-      {"1110xx0"_b, "ld1_asisdlsop_hx1_r1h"},
-      {"1110xx1"_b, "ld3_asisdlsop_hx3_r3h"},
-      {"11110x0"_b, "ld1_asisdlsop_hx1_r1h"},
-      {"11110x1"_b, "ld3_asisdlsop_hx3_r3h"},
-      {"1111100"_b, "ld1_asisdlsop_hx1_r1h"},
-      {"1111101"_b, "ld3_asisdlsop_hx3_r3h"},
-      {"1111110"_b, "_mrkkps"},
-      {"1111111"_b, "_xygxsv"},
+  { "_lgyqpk",
+    {18, 17},
+    { {"0x"_b, "st2_asisdlsop_sx2_r2s"},
+      {"10"_b, "st2_asisdlsop_sx2_r2s"},
+      {"11"_b, "st2_asisdlsop_s2_i2s"},
     },
   },
 
-  { "_lhvtrp",
-    {23, 22, 20, 19, 13, 11},
-    { {"0000x0"_b, "orr_asimdimm_l_hl"},
-      {"00x100"_b, "sqshrn_asimdshf_n"},
-      {"00x101"_b, "sqrshrn_asimdshf_n"},
-      {"010x00"_b, "sqshrn_asimdshf_n"},
-      {"010x01"_b, "sqrshrn_asimdshf_n"},
-      {"011100"_b, "sqshrn_asimdshf_n"},
-      {"011101"_b, "sqrshrn_asimdshf_n"},
-      {"0x1000"_b, "sqshrn_asimdshf_n"},
-      {"0x1001"_b, "sqrshrn_asimdshf_n"},
-    },
-  },
-
-  { "_ljhtkq",
-    {20, 19, 18, 17, 16, 13, 12, 11},
-    { {"00000000"_b, "_yvyxkx"},
+  { "_lgzlyq",
+    {30, 23, 11, 10},
+    { {"1001"_b, "_kltlmp"},
     },
   },
 
@@ -2202,9 +2516,21 @@
     },
   },
 
-  { "_ljxhnq",
-    {12},
-    { {"0"_b, "ld1_asisdlsop_dx1_r1d"},
+  { "_ljtvgz",
+    {23, 22, 20, 19, 18, 17, 16},
+    { {"0111001"_b, "ucvtf_asimdmiscfp16_r"},
+      {"0x00001"_b, "ucvtf_asimdmisc_r"},
+      {"1111000"_b, "fcmle_asimdmiscfp16_fz"},
+      {"1111001"_b, "frsqrte_asimdmiscfp16_r"},
+      {"1x00000"_b, "fcmle_asimdmisc_fz"},
+      {"1x00001"_b, "frsqrte_asimdmisc_r"},
+    },
+  },
+
+  { "_lkpprr",
+    {30, 23, 22},
+    { {"000"_b, "sbfm_32m_bitfield"},
+      {"100"_b, "ubfm_32m_bitfield"},
     },
   },
 
@@ -2215,27 +2541,6 @@
     },
   },
 
-  { "_lkvynm",
-    {22, 20, 19, 13, 12},
-    { {"0x100"_b, "ushr_asisdshf_r"},
-      {"0x101"_b, "usra_asisdshf_r"},
-      {"0x110"_b, "urshr_asisdshf_r"},
-      {"0x111"_b, "ursra_asisdshf_r"},
-      {"10x00"_b, "ushr_asisdshf_r"},
-      {"10x01"_b, "usra_asisdshf_r"},
-      {"10x10"_b, "urshr_asisdshf_r"},
-      {"10x11"_b, "ursra_asisdshf_r"},
-      {"11100"_b, "ushr_asisdshf_r"},
-      {"11101"_b, "usra_asisdshf_r"},
-      {"11110"_b, "urshr_asisdshf_r"},
-      {"11111"_b, "ursra_asisdshf_r"},
-      {"x1000"_b, "ushr_asisdshf_r"},
-      {"x1001"_b, "usra_asisdshf_r"},
-      {"x1010"_b, "urshr_asisdshf_r"},
-      {"x1011"_b, "ursra_asisdshf_r"},
-    },
-  },
-
   { "_lkxgjy",
     {23, 22},
     { {"10"_b, "cmla_z_zzzi_h"},
@@ -2243,9 +2548,16 @@
     },
   },
 
-  { "_llnzlv",
-    {20, 19, 18, 17, 16},
-    { {"00000"_b, "sqneg_asisdmisc_r"},
+  { "_lkzyzv",
+    {30},
+    { {"0"_b, "bl_only_branch_imm"},
+      {"1"_b, "_vgxtvy"},
+    },
+  },
+
+  { "_lljxgp",
+    {1},
+    { {"1"_b, "blrabz_64_branch_reg"},
     },
   },
 
@@ -2257,9 +2569,17 @@
     },
   },
 
-  { "_llqjlh",
-    {10},
-    { {"0"_b, "_lhtyjq"},
+  { "_llqtkj",
+    {18, 17},
+    { {"00"_b, "ld2_asisdlso_s2_2s"},
+    },
+  },
+
+  { "_lltzjg",
+    {18, 17, 12},
+    { {"0x0"_b, "ld2_asisdlsop_dx2_r2d"},
+      {"100"_b, "ld2_asisdlsop_dx2_r2d"},
+      {"110"_b, "ld2_asisdlsop_d2_i2d"},
     },
   },
 
@@ -2269,15 +2589,15 @@
     },
   },
 
-  { "_llxlqz",
-    {20, 19, 18, 17, 16},
-    { {"00000"_b, "cmge_asisdmisc_z"},
+  { "_lmmjvx",
+    {4},
+    { {"0"_b, "ccmn_64_condcmp_reg"},
     },
   },
 
-  { "_lmtnzv",
-    {12},
-    { {"0"_b, "st2_asisdlsop_dx2_r2d"},
+  { "_lmmkzh",
+    {4, 3, 2, 1, 0},
+    { {"11111"_b, "_nntvzj"},
     },
   },
 
@@ -2287,66 +2607,104 @@
     },
   },
 
-  { "_lnjpjs",
+  { "_lnkrzt",
+    {18, 4},
+    { {"00"_b, "fcmne_p_p_z0"},
+    },
+  },
+
+  { "_lnmhqq",
+    {22, 13, 12},
+    { {"000"_b, "ldsmaxa_64_memop"},
+      {"001"_b, "ldsmina_64_memop"},
+      {"010"_b, "ldumaxa_64_memop"},
+      {"011"_b, "ldumina_64_memop"},
+      {"100"_b, "ldsmaxal_64_memop"},
+      {"101"_b, "ldsminal_64_memop"},
+      {"110"_b, "ldumaxal_64_memop"},
+      {"111"_b, "lduminal_64_memop"},
+    },
+  },
+
+  { "_lnntps",
+    {30, 11, 10},
+    { {"000"_b, "_gvxjvz"},
+      {"001"_b, "_ypzllm"},
+      {"011"_b, "_gslmjl"},
+      {"100"_b, "_jxltqm"},
+      {"101"_b, "_shqyqv"},
+      {"110"_b, "_jpvmkz"},
+      {"111"_b, "_pxnyvl"},
+    },
+  },
+
+  { "_lnsjqy",
+    {20, 19, 18, 17, 16, 13, 12},
+    { {"0000000"_b, "stzgm_64bulk_ldsttags"},
+    },
+  },
+
+  { "_lplpkk",
+    {30, 23, 22, 13, 12, 11, 10},
+    { {"1101001"_b, "smmla_asimdsame2_g"},
+      {"1101011"_b, "usmmla_asimdsame2_g"},
+      {"x100111"_b, "usdot_asimdsame2_d"},
+      {"xxx0101"_b, "sdot_asimdsame2_d"},
+    },
+  },
+
+  { "_lplzxv",
+    {13, 12, 11, 10},
+    { {"0000"_b, "umull_asimddiff_l"},
+      {"0001"_b, "_yxgmrs"},
+      {"0010"_b, "_vyqxyz"},
+      {"0011"_b, "_snzvtt"},
+      {"0101"_b, "_svgvjm"},
+      {"0110"_b, "_ljtvgz"},
+      {"0111"_b, "_snhmgn"},
+      {"1001"_b, "_kxmxxm"},
+      {"1010"_b, "_nkpyjg"},
+      {"1011"_b, "_gmsqqz"},
+      {"1101"_b, "_gzgpjp"},
+      {"1110"_b, "_nzmqhv"},
+      {"1111"_b, "_xgxtlr"},
+    },
+  },
+
+  { "_lptrlg",
+    {13, 12},
+    { {"00"_b, "sqadd_asisdsame_only"},
+      {"10"_b, "sqsub_asisdsame_only"},
+      {"11"_b, "cmge_asisdsame_only"},
+    },
+  },
+
+  { "_lpzgvs",
+    {20, 19, 18, 17, 16},
+    { {"11111"_b, "stllr_sl32_ldstexcl"},
+    },
+  },
+
+  { "_lqjlkj",
+    {13, 12},
+    { {"00"_b, "cpyfp_cpy_memcms"},
+      {"01"_b, "cpyfpwt_cpy_memcms"},
+      {"10"_b, "cpyfprt_cpy_memcms"},
+      {"11"_b, "cpyfpt_cpy_memcms"},
+    },
+  },
+
+  { "_lqknkn",
     {18, 17},
-    { {"0x"_b, "ld3_asisdlsop_sx3_r3s"},
-      {"10"_b, "ld3_asisdlsop_sx3_r3s"},
-      {"11"_b, "ld3_asisdlsop_s3_i3s"},
+    { {"0x"_b, "st4_asisdlsop_sx4_r4s"},
+      {"10"_b, "st4_asisdlsop_sx4_r4s"},
+      {"11"_b, "st4_asisdlsop_s4_i4s"},
     },
   },
 
-  { "_lnkqjp",
-    {18, 17, 12},
-    { {"000"_b, "ld3_asisdlso_d3_3d"},
-    },
-  },
-
-  { "_lnnyzt",
-    {23, 22},
-    { {"01"_b, "fmax_asimdsamefp16_only"},
-      {"11"_b, "fmin_asimdsamefp16_only"},
-    },
-  },
-
-  { "_lnpvky",
-    {23, 22, 19, 13, 12},
-    { {"00100"_b, "sha1h_ss_cryptosha2"},
-      {"00101"_b, "sha1su1_vv_cryptosha2"},
-      {"00110"_b, "sha256su0_vv_cryptosha2"},
-      {"xx011"_b, "suqadd_asisdmisc_r"},
-    },
-  },
-
-  { "_lpkqzl",
-    {30, 23, 22, 12, 11, 10},
-    { {"0000xx"_b, "adds_64s_addsub_ext"},
-      {"000100"_b, "adds_64s_addsub_ext"},
-      {"1000xx"_b, "subs_64s_addsub_ext"},
-      {"100100"_b, "subs_64s_addsub_ext"},
-    },
-  },
-
-  { "_lpslrz",
-    {4, 3, 2, 1, 0},
-    { {"00000"_b, "fcmp_s_floatcmp"},
-      {"01000"_b, "fcmp_sz_floatcmp"},
-      {"10000"_b, "fcmpe_s_floatcmp"},
-      {"11000"_b, "fcmpe_sz_floatcmp"},
-    },
-  },
-
-  { "_lpsvyy",
-    {30, 13},
-    { {"00"_b, "_jlrrlt"},
-      {"01"_b, "_jrlynj"},
-      {"10"_b, "fmla_z_p_zzz"},
-      {"11"_b, "fmls_z_p_zzz"},
-    },
-  },
-
-  { "_lpsxhz",
-    {22, 20, 19, 18, 17, 16, 13, 12},
-    { {"01111101"_b, "ld64b_64l_memop"},
+  { "_lqlrxp",
+    {20, 19, 18, 17, 16},
+    { {"11111"_b, "stlrb_sl32_ldstexcl"},
     },
   },
 
@@ -2360,24 +2718,12 @@
     },
   },
 
-  { "_lqnvvj",
-    {22, 13, 12},
-    { {"000"_b, "swp_32_memop"},
-      {"100"_b, "swpl_32_memop"},
-    },
-  },
-
-  { "_lrjyhr",
-    {23, 22, 20, 19, 13, 11},
-    { {"0000x0"_b, "bic_asimdimm_l_hl"},
-      {"00x100"_b, "uqshrn_asimdshf_n"},
-      {"00x101"_b, "uqrshrn_asimdshf_n"},
-      {"010x00"_b, "uqshrn_asimdshf_n"},
-      {"010x01"_b, "uqrshrn_asimdshf_n"},
-      {"011100"_b, "uqshrn_asimdshf_n"},
-      {"011101"_b, "uqrshrn_asimdshf_n"},
-      {"0x1000"_b, "uqshrn_asimdshf_n"},
-      {"0x1001"_b, "uqrshrn_asimdshf_n"},
+  { "_lrmgmq",
+    {30, 23, 22},
+    { {"00x"_b, "add_64_addsub_imm"},
+      {"010"_b, "addg_64_addsub_immtags"},
+      {"10x"_b, "sub_64_addsub_imm"},
+      {"110"_b, "subg_64_addsub_immtags"},
     },
   },
 
@@ -2398,50 +2744,18 @@
     },
   },
 
-  { "_lrqkvp",
-    {30, 23, 22, 13, 12, 11, 10},
-    { {"0000000"_b, "ldadd_32_memop"},
-      {"0000100"_b, "ldclr_32_memop"},
-      {"0001000"_b, "ldeor_32_memop"},
-      {"0001100"_b, "ldset_32_memop"},
-      {"000xx10"_b, "str_32_ldst_regoff"},
-      {"0010000"_b, "ldaddl_32_memop"},
-      {"0010100"_b, "ldclrl_32_memop"},
-      {"0011000"_b, "ldeorl_32_memop"},
-      {"0011100"_b, "ldsetl_32_memop"},
-      {"001xx10"_b, "ldr_32_ldst_regoff"},
-      {"0100000"_b, "ldadda_32_memop"},
-      {"0100100"_b, "ldclra_32_memop"},
-      {"0101000"_b, "ldeora_32_memop"},
-      {"0101100"_b, "ldseta_32_memop"},
-      {"010xx10"_b, "ldrsw_64_ldst_regoff"},
-      {"0110000"_b, "ldaddal_32_memop"},
-      {"0110100"_b, "ldclral_32_memop"},
-      {"0111000"_b, "ldeoral_32_memop"},
-      {"0111100"_b, "ldsetal_32_memop"},
-      {"1000000"_b, "ldadd_64_memop"},
-      {"1000100"_b, "ldclr_64_memop"},
-      {"1001000"_b, "ldeor_64_memop"},
-      {"1001100"_b, "ldset_64_memop"},
-      {"100xx10"_b, "str_64_ldst_regoff"},
-      {"1010000"_b, "ldaddl_64_memop"},
-      {"1010100"_b, "ldclrl_64_memop"},
-      {"1011000"_b, "ldeorl_64_memop"},
-      {"1011100"_b, "ldsetl_64_memop"},
-      {"101xx10"_b, "ldr_64_ldst_regoff"},
-      {"10xxx01"_b, "ldraa_64_ldst_pac"},
-      {"10xxx11"_b, "ldraa_64w_ldst_pac"},
-      {"1100000"_b, "ldadda_64_memop"},
-      {"1100100"_b, "ldclra_64_memop"},
-      {"1101000"_b, "ldeora_64_memop"},
-      {"1101100"_b, "ldseta_64_memop"},
-      {"110xx10"_b, "prfm_p_ldst_regoff"},
-      {"1110000"_b, "ldaddal_64_memop"},
-      {"1110100"_b, "ldclral_64_memop"},
-      {"1111000"_b, "ldeoral_64_memop"},
-      {"1111100"_b, "ldsetal_64_memop"},
-      {"11xxx01"_b, "ldrab_64_ldst_pac"},
-      {"11xxx11"_b, "ldrab_64w_ldst_pac"},
+  { "_lrptrn",
+    {30, 23, 13, 12, 11, 10},
+    { {"100001"_b, "sri_asisdshf_r"},
+      {"100101"_b, "sli_asisdshf_r"},
+      {"101001"_b, "sqshlu_asisdshf_r"},
+      {"101101"_b, "uqshl_asisdshf_r"},
+    },
+  },
+
+  { "_lrqlrg",
+    {30},
+    { {"1"_b, "_ylhgrh"},
     },
   },
 
@@ -2458,29 +2772,71 @@
     },
   },
 
-  { "_ltvrrg",
+  { "_lsqgkk",
     {30},
-    { {"0"_b, "bl_only_branch_imm"},
-      {"1"_b, "_htmthz"},
+    { {"1"_b, "_jsqvtn"},
     },
   },
 
-  { "_lvshqt",
-    {23, 22},
-    { {"00"_b, "_qtkpxg"},
+  { "_lssjyz",
+    {30},
+    { {"1"_b, "_kxtqjh"},
     },
   },
 
-  { "_lxgltj",
-    {30, 23, 22},
-    { {"000"_b, "stlxr_sr32_ldstexcl"},
-      {"001"_b, "ldaxr_lr32_ldstexcl"},
-      {"010"_b, "stlr_sl32_ldstexcl"},
-      {"011"_b, "ldar_lr32_ldstexcl"},
-      {"100"_b, "stlxr_sr64_ldstexcl"},
-      {"101"_b, "ldaxr_lr64_ldstexcl"},
-      {"110"_b, "stlr_sl64_ldstexcl"},
-      {"111"_b, "ldar_lr64_ldstexcl"},
+  { "_lszlkq",
+    {22, 20, 19, 18, 17, 16, 13, 12},
+    { {"01111100"_b, "_xtgmvr"},
+    },
+  },
+
+  { "_ltrntg",
+    {12},
+    { {"0"_b, "udot_asimdelem_d"},
+      {"1"_b, "sqrdmlsh_asimdelem_r"},
+    },
+  },
+
+  { "_lvjtlg",
+    {30, 11, 10},
+    { {"000"_b, "_krvxxx"},
+      {"001"_b, "_rpjrhs"},
+      {"010"_b, "_tsypsz"},
+      {"011"_b, "_ktsgth"},
+      {"100"_b, "_yhnqyy"},
+      {"101"_b, "_xzqmkv"},
+      {"110"_b, "_vxqtkl"},
+      {"111"_b, "_jggxjz"},
+    },
+  },
+
+  { "_lvryvp",
+    {30},
+    { {"0"_b, "_gkqhyz"},
+      {"1"_b, "_nzqxrj"},
+    },
+  },
+
+  { "_lvsrnj",
+    {23, 22, 20, 19, 18, 17, 16},
+    { {"0111001"_b, "scvtf_asimdmiscfp16_r"},
+      {"0x00001"_b, "scvtf_asimdmisc_r"},
+      {"1111000"_b, "fcmeq_asimdmiscfp16_fz"},
+      {"1111001"_b, "frecpe_asimdmiscfp16_r"},
+      {"1x00000"_b, "fcmeq_asimdmisc_fz"},
+      {"1x00001"_b, "frecpe_asimdmisc_r"},
+    },
+  },
+
+  { "_lvszgj",
+    {2, 1},
+    { {"11"_b, "brabz_64_branch_reg"},
+    },
+  },
+
+  { "_lxggmz",
+    {30},
+    { {"0"_b, "b_only_branch_imm"},
     },
   },
 
@@ -2490,46 +2846,9 @@
     },
   },
 
-  { "_lxmyjh",
-    {30, 23, 11, 10},
-    { {"0000"_b, "_lqnvvj"},
-      {"0010"_b, "_tmthqm"},
-      {"0100"_b, "_rxjrmn"},
-      {"0110"_b, "_ypqgyp"},
-      {"1000"_b, "_zpsymj"},
-      {"1001"_b, "ldraa_64_ldst_pac"},
-      {"1010"_b, "_rsyzrs"},
-      {"1011"_b, "ldraa_64w_ldst_pac"},
-      {"1100"_b, "_nrrmtx"},
-      {"1101"_b, "ldrab_64_ldst_pac"},
-      {"1110"_b, "_tgqsyg"},
-      {"1111"_b, "ldrab_64w_ldst_pac"},
-    },
-  },
-
-  { "_lxqynh",
-    {23, 22, 19, 18, 17, 16},
-    { {"0000x1"_b, "dup_asimdins_dr_r"},
-      {"000x10"_b, "dup_asimdins_dr_r"},
-      {"0010xx"_b, "dup_asimdins_dr_r"},
-      {"001110"_b, "dup_asimdins_dr_r"},
-      {"00x10x"_b, "dup_asimdins_dr_r"},
-      {"00x111"_b, "dup_asimdins_dr_r"},
-      {"01xxxx"_b, "fmla_asimdsamefp16_only"},
-      {"11xxxx"_b, "fmls_asimdsamefp16_only"},
-    },
-  },
-
-  { "_lxvnxm",
-    {23, 22, 12},
-    { {"100"_b, "fmlsl2_asimdelem_lh"},
-      {"xx1"_b, "sqrdmlah_asimdelem_r"},
-    },
-  },
-
-  { "_lyghyg",
-    {20, 18, 17},
-    { {"000"_b, "_hxmjhn"},
+  { "_lxlqks",
+    {19},
+    { {"1"_b, "sysp_cr_syspairinstrs"},
     },
   },
 
@@ -2540,6 +2859,13 @@
     },
   },
 
+  { "_lymhlk",
+    {30},
+    { {"0"_b, "bl_only_branch_imm"},
+      {"1"_b, "_vpgxgk"},
+    },
+  },
+
   { "_lynsgm",
     {13},
     { {"0"_b, "_ttplgp"},
@@ -2556,42 +2882,95 @@
     },
   },
 
-  { "_lyzxhr",
-    {23, 22, 20, 19, 18, 17, 16, 13, 12, 11},
-    { {"0011111001"_b, "_smplhv"},
+  { "_lyzhrq",
+    {23, 22, 20, 19, 18, 17, 16},
+    { {"0111001"_b, "fcvtms_asimdmiscfp16_r"},
+      {"0x00001"_b, "fcvtms_asimdmisc_r"},
+      {"1111001"_b, "fcvtzs_asimdmiscfp16_r"},
+      {"1x00001"_b, "fcvtzs_asimdmisc_r"},
+      {"xx00000"_b, "abs_asimdmisc_r"},
+      {"xx10001"_b, "addv_asimdall_only"},
     },
   },
 
-  { "_lzpykk",
-    {30, 23, 22},
-    { {"000"_b, "bfm_32m_bitfield"},
+  { "_lzjyhm",
+    {30},
+    { {"0"_b, "ldapursw_64_ldapstl_unscaled"},
     },
   },
 
-  { "_mgmgqh",
-    {17},
-    { {"0"_b, "st1_asisdlsop_hx1_r1h"},
-      {"1"_b, "st1_asisdlsop_h1_i1h"},
-    },
-  },
-
-  { "_mgmkyq",
-    {23},
-    { {"0"_b, "fmaxp_asimdsame_only"},
-      {"1"_b, "fminp_asimdsame_only"},
-    },
-  },
-
-  { "_mgqvvn",
-    {9, 8, 7, 6, 5},
-    { {"11111"_b, "pacdza_64z_dp_1src"},
-    },
-  },
-
-  { "_mgsvlj",
+  { "_lzqxgt",
     {13, 12},
-    { {"00"_b, "udiv_32_dp_2src"},
-      {"10"_b, "asrv_32_dp_2src"},
+    { {"00"_b, "sbcs_32_addsub_carry"},
+    },
+  },
+
+  { "_lzvxxj",
+    {23, 22},
+    { {"01"_b, "fcmeq_asimdsamefp16_only"},
+    },
+  },
+
+  { "_lzzsyj",
+    {18, 17},
+    { {"0x"_b, "st3_asisdlsep_r3_r"},
+      {"10"_b, "st3_asisdlsep_r3_r"},
+      {"11"_b, "st3_asisdlsep_i3_i"},
+    },
+  },
+
+  { "_mgjhts",
+    {13, 12, 10},
+    { {"001"_b, "_rvtxys"},
+      {"010"_b, "_ppyynh"},
+      {"011"_b, "_vvyjmh"},
+      {"101"_b, "_rpplns"},
+      {"110"_b, "sqdmlal_asisdelem_l"},
+      {"111"_b, "_ymmhtq"},
+    },
+  },
+
+  { "_mgspnm",
+    {30, 23},
+    { {"00"_b, "orr_64_log_imm"},
+      {"10"_b, "ands_64s_log_imm"},
+      {"11"_b, "movk_64_movewide"},
+    },
+  },
+
+  { "_mgtxyt",
+    {13, 12},
+    { {"00"_b, "sbcs_64_addsub_carry"},
+    },
+  },
+
+  { "_mhksnq",
+    {23, 22, 20, 19, 11},
+    { {"00010"_b, "ucvtf_asisdshf_c"},
+      {"001x0"_b, "ucvtf_asisdshf_c"},
+      {"01xx0"_b, "ucvtf_asisdshf_c"},
+    },
+  },
+
+  { "_mhljkp",
+    {23},
+    { {"0"_b, "frecps_asimdsame_only"},
+      {"1"_b, "frsqrts_asimdsame_only"},
+    },
+  },
+
+  { "_mhnlsy",
+    {11, 10},
+    { {"00"_b, "sm3tt1a_vvv4_crypto3_imm2"},
+      {"01"_b, "sm3tt1b_vvv4_crypto3_imm2"},
+      {"10"_b, "sm3tt2a_vvv4_crypto3_imm2"},
+      {"11"_b, "sm3tt2b_vvv_crypto3_imm2"},
+    },
+  },
+
+  { "_mhpgjx",
+    {20, 19, 18, 17, 16},
+    { {"11111"_b, "stlr_sl64_ldstexcl"},
     },
   },
 
@@ -2604,24 +2983,49 @@
     },
   },
 
-  { "_mjqvxq",
-    {23, 22, 13, 12, 11, 10},
-    { {"0001x0"_b, "fmul_asimdelem_rh_h"},
-      {"0x0001"_b, "shrn_asimdshf_n"},
-      {"0x0011"_b, "rshrn_asimdshf_n"},
-      {"0x0101"_b, "sqshrn_asimdshf_n"},
-      {"0x0111"_b, "sqrshrn_asimdshf_n"},
-      {"0x1001"_b, "sshll_asimdshf_l"},
-      {"1x01x0"_b, "fmul_asimdelem_r_sd"},
-      {"xx00x0"_b, "mul_asimdelem_r"},
-      {"xx10x0"_b, "smull_asimdelem_l"},
-      {"xx11x0"_b, "sqdmull_asimdelem_l"},
+  { "_mjjhqj",
+    {30, 23, 22, 19, 16},
+    { {"10010"_b, "aesimc_b_cryptoaes"},
+      {"x0x01"_b, "fcvtl_asimdmisc_l"},
+      {"xxx00"_b, "sqabs_asimdmisc_r"},
     },
   },
 
-  { "_mjxzks",
-    {4},
-    { {"0"_b, "ccmp_64_condcmp_reg"},
+  { "_mjrlkp",
+    {23, 22, 20, 19, 13, 11},
+    { {"0000x0"_b, "movi_asimdimm_l_hl"},
+      {"00x100"_b, "shrn_asimdshf_n"},
+      {"00x101"_b, "rshrn_asimdshf_n"},
+      {"00x110"_b, "sshll_asimdshf_l"},
+      {"010x00"_b, "shrn_asimdshf_n"},
+      {"010x01"_b, "rshrn_asimdshf_n"},
+      {"010x10"_b, "sshll_asimdshf_l"},
+      {"011100"_b, "shrn_asimdshf_n"},
+      {"011101"_b, "rshrn_asimdshf_n"},
+      {"011110"_b, "sshll_asimdshf_l"},
+      {"0x1000"_b, "shrn_asimdshf_n"},
+      {"0x1001"_b, "rshrn_asimdshf_n"},
+      {"0x1010"_b, "sshll_asimdshf_l"},
+    },
+  },
+
+  { "_mjrqhl",
+    {18, 17},
+    { {"0x"_b, "st3_asisdlsop_sx3_r3s"},
+      {"10"_b, "st3_asisdlsop_sx3_r3s"},
+      {"11"_b, "st3_asisdlsop_s3_i3s"},
+    },
+  },
+
+  { "_mjstgz",
+    {20, 19, 18, 17, 16, 13, 12},
+    { {"0000000"_b, "ldgm_64bulk_ldsttags"},
+    },
+  },
+
+  { "_mjyhsl",
+    {20, 19, 18, 17, 16},
+    { {"11111"_b, "ldxp_lp32_ldstexcl"},
     },
   },
 
@@ -2632,42 +3036,33 @@
     },
   },
 
-  { "_mkklrm",
+  { "_mkrgxr",
+    {23, 4},
+    { {"00"_b, "_hptkrj"},
+    },
+  },
+
+  { "_mkyyng",
+    {23, 22},
+    { {"01"_b, "fcmla_asimdelem_c_h"},
+      {"10"_b, "fcmla_asimdelem_c_s"},
+    },
+  },
+
+  { "_mkzysy",
+    {30, 23, 22},
+    { {"000"_b, "str_b_ldst_pos"},
+      {"001"_b, "ldr_b_ldst_pos"},
+      {"010"_b, "str_q_ldst_pos"},
+      {"011"_b, "ldr_q_ldst_pos"},
+      {"100"_b, "str_h_ldst_pos"},
+      {"101"_b, "ldr_h_ldst_pos"},
+    },
+  },
+
+  { "_mlgmqm",
     {18, 17},
-    { {"00"_b, "ld3_asisdlso_s3_3s"},
-    },
-  },
-
-  { "_mkskxj",
-    {30, 23, 22, 13},
-    { {"0000"_b, "ld1sh_z_p_br_s32"},
-      {"0001"_b, "ldff1sh_z_p_br_s32"},
-      {"0010"_b, "ld1w_z_p_br_u64"},
-      {"0011"_b, "ldff1w_z_p_br_u64"},
-      {"0100"_b, "ld1sb_z_p_br_s32"},
-      {"0101"_b, "ldff1sb_z_p_br_s32"},
-      {"0110"_b, "ld1d_z_p_br_u64"},
-      {"0111"_b, "ldff1d_z_p_br_u64"},
-      {"1001"_b, "st2w_z_p_br_contiguous"},
-      {"1011"_b, "st4w_z_p_br_contiguous"},
-      {"10x0"_b, "st1w_z_p_br"},
-      {"1100"_b, "str_z_bi"},
-      {"1101"_b, "st2d_z_p_br_contiguous"},
-      {"1110"_b, "st1d_z_p_br"},
-      {"1111"_b, "st4d_z_p_br_contiguous"},
-    },
-  },
-
-  { "_mlnqrm",
-    {30},
-    { {"0"_b, "_nhzrqr"},
-      {"1"_b, "_zpmkvt"},
-    },
-  },
-
-  { "_mlvpxh",
-    {12},
-    { {"0"_b, "ld2_asisdlsop_dx2_r2d"},
+    { {"00"_b, "st2_asisdlso_s2_2s"},
     },
   },
 
@@ -2678,36 +3073,15 @@
     },
   },
 
-  { "_mlyynz",
-    {12},
-    { {"0"_b, "st3_asisdlsop_dx3_r3d"},
+  { "_mmgpkx",
+    {13, 12},
+    { {"11"_b, "cmgt_asisdsame_only"},
     },
   },
 
-  { "_mmhkmp",
-    {18, 17},
-    { {"0x"_b, "ld1_asisdlsop_sx1_r1s"},
-      {"10"_b, "ld1_asisdlsop_sx1_r1s"},
-      {"11"_b, "ld1_asisdlsop_s1_i1s"},
-    },
-  },
-
-  { "_mmknzp",
+  { "_mmxgrt",
     {20, 19, 18, 17, 16},
-    { {"00000"_b, "clz_asimdmisc_r"},
-      {"00001"_b, "uqxtn_asimdmisc_n"},
-    },
-  },
-
-  { "_mmmjkx",
-    {20, 19, 18, 17, 16, 13, 12},
-    { {"0000000"_b, "rev_32_dp_1src"},
-    },
-  },
-
-  { "_mmrtvz",
-    {12},
-    { {"0"_b, "st4_asisdlsop_dx4_r4d"},
+    { {"00000"_b, "rev32_asimdmisc_r"},
     },
   },
 
@@ -2718,41 +3092,46 @@
     },
   },
 
-  { "_mnxmst",
-    {23, 22, 20, 19, 18, 17, 16},
-    { {"0111001"_b, "fcvtns_asimdmiscfp16_r"},
-      {"0x00001"_b, "fcvtns_asimdmisc_r"},
-      {"1111001"_b, "fcvtps_asimdmiscfp16_r"},
-      {"1x00001"_b, "fcvtps_asimdmisc_r"},
-      {"xx00000"_b, "cmlt_asimdmisc_z"},
-      {"xx10000"_b, "smaxv_asimdall_only"},
-      {"xx10001"_b, "sminv_asimdall_only"},
+  { "_mntnlr",
+    {18},
+    { {"0"_b, "ld1_asisdlse_r4_4v"},
     },
   },
 
-  { "_mpgrgp",
-    {30, 22, 13, 12, 11, 10},
-    { {"000001"_b, "rmif_only_rmif"},
-      {"01xx00"_b, "ccmn_64_condcmp_reg"},
-      {"01xx10"_b, "ccmn_64_condcmp_imm"},
-      {"11xx00"_b, "ccmp_64_condcmp_reg"},
-      {"11xx10"_b, "ccmp_64_condcmp_imm"},
+  { "_mnxgml",
+    {22},
+    { {"0"_b, "ldrsw_64_ldst_regoff"},
     },
   },
 
-  { "_mplgqv",
-    {11, 10},
-    { {"00"_b, "sm3tt1a_vvv4_crypto3_imm2"},
-      {"01"_b, "sm3tt1b_vvv4_crypto3_imm2"},
-      {"10"_b, "sm3tt2a_vvv4_crypto3_imm2"},
-      {"11"_b, "sm3tt2b_vvv_crypto3_imm2"},
+  { "_mnxgqm",
+    {30, 23, 22, 13, 12, 11, 10},
+    { {"000xxxx"_b, "fnmadd_s_floatdp3"},
+      {"001xxxx"_b, "fnmadd_d_floatdp3"},
+      {"011xxxx"_b, "fnmadd_h_floatdp3"},
+      {"10001x0"_b, "fmls_asisdelem_rh_h"},
+      {"10x0101"_b, "shl_asisdshf_r"},
+      {"10x1101"_b, "sqshl_asisdshf_r"},
+      {"11x01x0"_b, "fmls_asisdelem_r_sd"},
+      {"1xx11x0"_b, "sqdmlsl_asisdelem_l"},
     },
   },
 
-  { "_mplskr",
-    {13, 12},
-    { {"00"_b, "add_asisdsame_only"},
-      {"11"_b, "sqdmulh_asisdsame_only"},
+  { "_mnzgkx",
+    {12},
+    { {"0"_b, "st1_asisdlsop_dx1_r1d"},
+    },
+  },
+
+  { "_mnzzhk",
+    {20, 19, 18, 17, 16},
+    { {"11111"_b, "stlr_sl32_ldstexcl"},
+    },
+  },
+
+  { "_mphkpq",
+    {12},
+    { {"0"_b, "st1_asisdlsop_dx1_r1d"},
     },
   },
 
@@ -2769,103 +3148,23 @@
     },
   },
 
-  { "_mpyhkm",
-    {30, 23, 22, 13, 12, 11, 10},
-    { {"000xxxx"_b, "fnmsub_s_floatdp3"},
-      {"001xxxx"_b, "fnmsub_d_floatdp3"},
-      {"011xxxx"_b, "fnmsub_h_floatdp3"},
-      {"10x1001"_b, "scvtf_asisdshf_c"},
-      {"10x1111"_b, "fcvtzs_asisdshf_c"},
-      {"1xx00x0"_b, "sqdmulh_asisdelem_r"},
-      {"1xx01x0"_b, "sqrdmulh_asisdelem_r"},
+  { "_mpytmv",
+    {23, 22, 20, 19, 11},
+    { {"00011"_b, "fcvtzu_asisdshf_c"},
+      {"001x1"_b, "fcvtzu_asisdshf_c"},
+      {"01xx1"_b, "fcvtzu_asisdshf_c"},
     },
   },
 
-  { "_mpyklp",
-    {23, 22, 20, 19, 16, 13, 10},
-    { {"0000000"_b, "_jqjnrv"},
-      {"0000001"_b, "_yqmqzp"},
-      {"0000010"_b, "_hgxqpp"},
-      {"0000011"_b, "_rvzhhx"},
-      {"0100000"_b, "_nnllqy"},
-      {"0100001"_b, "_vhmsgj"},
-      {"0100010"_b, "_mkklrm"},
-      {"0100011"_b, "_lnkqjp"},
-      {"100xx00"_b, "st1_asisdlsop_sx1_r1s"},
-      {"100xx01"_b, "_yxmkzr"},
-      {"100xx10"_b, "st3_asisdlsop_sx3_r3s"},
-      {"100xx11"_b, "_mlyynz"},
-      {"1010x00"_b, "st1_asisdlsop_sx1_r1s"},
-      {"1010x01"_b, "_jnjlsh"},
-      {"1010x10"_b, "st3_asisdlsop_sx3_r3s"},
-      {"1010x11"_b, "_svrnxq"},
-      {"1011000"_b, "st1_asisdlsop_sx1_r1s"},
-      {"1011001"_b, "_hjqtrt"},
-      {"1011010"_b, "st3_asisdlsop_sx3_r3s"},
-      {"1011011"_b, "_vqlytp"},
-      {"1011100"_b, "_qqpqnm"},
-      {"1011101"_b, "_thvvzp"},
-      {"1011110"_b, "_srglgl"},
-      {"1011111"_b, "_qzrjss"},
-      {"110xx00"_b, "ld1_asisdlsop_sx1_r1s"},
-      {"110xx01"_b, "_ljxhnq"},
-      {"110xx10"_b, "ld3_asisdlsop_sx3_r3s"},
-      {"110xx11"_b, "_nkrqgn"},
-      {"1110x00"_b, "ld1_asisdlsop_sx1_r1s"},
-      {"1110x01"_b, "_vmplgv"},
-      {"1110x10"_b, "ld3_asisdlsop_sx3_r3s"},
-      {"1110x11"_b, "_gsttpm"},
-      {"1111000"_b, "ld1_asisdlsop_sx1_r1s"},
-      {"1111001"_b, "_xmqvpl"},
-      {"1111010"_b, "ld3_asisdlsop_sx3_r3s"},
-      {"1111011"_b, "_stqmps"},
-      {"1111100"_b, "_mmhkmp"},
-      {"1111101"_b, "_srvnql"},
-      {"1111110"_b, "_lnjpjs"},
-      {"1111111"_b, "_kstltt"},
+  { "_mqljmr",
+    {2, 1, 0},
+    { {"000"_b, "_rnphqp"},
     },
   },
 
-  { "_mpzqxm",
-    {23, 22, 20, 19, 18, 16, 13},
-    { {"0000000"_b, "_vpkhvh"},
-      {"0000001"_b, "_gttglx"},
-      {"0100000"_b, "_gsgzpg"},
-      {"0100001"_b, "_ynqsgl"},
-      {"100xxx0"_b, "st2_asisdlsop_hx2_r2h"},
-      {"100xxx1"_b, "st4_asisdlsop_hx4_r4h"},
-      {"1010xx0"_b, "st2_asisdlsop_hx2_r2h"},
-      {"1010xx1"_b, "st4_asisdlsop_hx4_r4h"},
-      {"10110x0"_b, "st2_asisdlsop_hx2_r2h"},
-      {"10110x1"_b, "st4_asisdlsop_hx4_r4h"},
-      {"1011100"_b, "st2_asisdlsop_hx2_r2h"},
-      {"1011101"_b, "st4_asisdlsop_hx4_r4h"},
-      {"1011110"_b, "_sjsltg"},
-      {"1011111"_b, "_xrpmzt"},
-      {"110xxx0"_b, "ld2_asisdlsop_hx2_r2h"},
-      {"110xxx1"_b, "ld4_asisdlsop_hx4_r4h"},
-      {"1110xx0"_b, "ld2_asisdlsop_hx2_r2h"},
-      {"1110xx1"_b, "ld4_asisdlsop_hx4_r4h"},
-      {"11110x0"_b, "ld2_asisdlsop_hx2_r2h"},
-      {"11110x1"_b, "ld4_asisdlsop_hx4_r4h"},
-      {"1111100"_b, "ld2_asisdlsop_hx2_r2h"},
-      {"1111101"_b, "ld4_asisdlsop_hx4_r4h"},
-      {"1111110"_b, "_gygnsz"},
-      {"1111111"_b, "_kxkyqr"},
-    },
-  },
-
-  { "_mqgtsq",
-    {30, 23, 22, 19},
-    { {"1001"_b, "aesd_b_cryptoaes"},
-      {"xxx0"_b, "cnt_asimdmisc_r"},
-    },
-  },
-
-  { "_mqkjxj",
-    {30},
-    { {"0"_b, "bl_only_branch_imm"},
-      {"1"_b, "_lyzxhr"},
+  { "_mqmrng",
+    {9, 8, 7, 6, 5, 2, 1},
+    { {"1111100"_b, "eret_64e_branch_reg"},
     },
   },
 
@@ -2880,6 +3179,42 @@
     },
   },
 
+  { "_mqssgy",
+    {30},
+    { {"0"_b, "_slzrtr"},
+      {"1"_b, "_nsgxlz"},
+    },
+  },
+
+  { "_mqtgvk",
+    {30, 23, 22, 13, 12, 11, 10},
+    { {"000xx00"_b, "stlurb_32_ldapstl_unscaled"},
+      {"001xx00"_b, "ldapurb_32_ldapstl_unscaled"},
+      {"010xx00"_b, "ldapursb_64_ldapstl_unscaled"},
+      {"011xx00"_b, "ldapursb_32_ldapstl_unscaled"},
+      {"100xx00"_b, "stlurh_32_ldapstl_unscaled"},
+      {"101xx00"_b, "ldapurh_32_ldapstl_unscaled"},
+      {"110xx00"_b, "ldapursh_64_ldapstl_unscaled"},
+      {"111xx00"_b, "ldapursh_32_ldapstl_unscaled"},
+      {"x000001"_b, "cpyfp_cpy_memcms"},
+      {"x000101"_b, "cpyfpwt_cpy_memcms"},
+      {"x001001"_b, "cpyfprt_cpy_memcms"},
+      {"x001101"_b, "cpyfpt_cpy_memcms"},
+      {"x010001"_b, "cpyfm_cpy_memcms"},
+      {"x010101"_b, "cpyfmwt_cpy_memcms"},
+      {"x011001"_b, "cpyfmrt_cpy_memcms"},
+      {"x011101"_b, "cpyfmt_cpy_memcms"},
+      {"x100001"_b, "cpyfe_cpy_memcms"},
+      {"x100101"_b, "cpyfewt_cpy_memcms"},
+      {"x101001"_b, "cpyfert_cpy_memcms"},
+      {"x101101"_b, "cpyfet_cpy_memcms"},
+      {"x110001"_b, "setp_set_memcms"},
+      {"x110101"_b, "setpt_set_memcms"},
+      {"x111001"_b, "setpn_set_memcms"},
+      {"x111101"_b, "setptn_set_memcms"},
+    },
+  },
+
   { "_mrhtxt",
     {23, 22, 20, 9},
     { {"0000"_b, "brkpb_p_p_pp"},
@@ -2887,204 +3222,102 @@
     },
   },
 
-  { "_mrkkps",
-    {17},
-    { {"0"_b, "ld1_asisdlsop_hx1_r1h"},
-      {"1"_b, "ld1_asisdlsop_h1_i1h"},
+  { "_mrlpxr",
+    {30, 23, 22},
+    { {"000"_b, "_vqzsgg"},
+      {"001"_b, "_tzjyhy"},
+      {"011"_b, "_grsnms"},
+      {"100"_b, "_sknvhk"},
+      {"101"_b, "_ptqtmp"},
+      {"111"_b, "_kktzst"},
     },
   },
 
-  { "_mrmpgh",
-    {30, 23, 22, 13, 12, 11, 10},
-    { {"000xxxx"_b, "stlxp_sp32_ldstexcl"},
-      {"001xxxx"_b, "ldaxp_lp32_ldstexcl"},
-      {"0101111"_b, "casl_c32_ldstexcl"},
-      {"0111111"_b, "casal_c32_ldstexcl"},
-      {"100xxxx"_b, "stlxp_sp64_ldstexcl"},
-      {"101xxxx"_b, "ldaxp_lp64_ldstexcl"},
-      {"1101111"_b, "casl_c64_ldstexcl"},
-      {"1111111"_b, "casal_c64_ldstexcl"},
+  { "_msnshr",
+    {23, 22, 13, 12, 11, 10},
+    { {"0001x0"_b, "fmls_asimdelem_rh_h"},
+      {"0x0101"_b, "shl_asimdshf_r"},
+      {"0x1101"_b, "sqshl_asimdshf_r"},
+      {"1000x0"_b, "fmlsl_asimdelem_lh"},
+      {"1x01x0"_b, "fmls_asimdelem_r_sd"},
+      {"xx10x0"_b, "smlsl_asimdelem_l"},
+      {"xx11x0"_b, "sqdmlsl_asimdelem_l"},
     },
   },
 
-  { "_mrqqlp",
-    {30, 11, 10},
-    { {"000"_b, "_gqykqv"},
-      {"001"_b, "_xgvgmk"},
-      {"010"_b, "_tjpjng"},
-      {"011"_b, "_pjkylt"},
-      {"101"_b, "_yrgnqz"},
-      {"110"_b, "_hhymvj"},
-      {"111"_b, "_xpmvjv"},
+  { "_msvhjv",
+    {9, 8, 7, 6, 5},
+    { {"00000"_b, "fmov_d_floatimm"},
     },
   },
 
-  { "_msgqps",
-    {18, 17},
-    { {"0x"_b, "ld2_asisdlsop_sx2_r2s"},
-      {"10"_b, "ld2_asisdlsop_sx2_r2s"},
-      {"11"_b, "ld2_asisdlsop_s2_i2s"},
-    },
-  },
-
-  { "_msnsjp",
-    {23, 20, 19, 18, 17, 16},
-    { {"000001"_b, "fcvtxn_asisdmisc_n"},
-    },
-  },
-
-  { "_msqkyy",
-    {16, 13, 12},
-    { {"000"_b, "rbit_64_dp_1src"},
-      {"001"_b, "clz_64_dp_1src"},
-      {"100"_b, "pacia_64p_dp_1src"},
-      {"101"_b, "autia_64p_dp_1src"},
-      {"110"_b, "_sqgxzn"},
-      {"111"_b, "_kqkhtz"},
-    },
-  },
-
-  { "_mstthg",
-    {13, 12, 11, 10},
-    { {"0000"_b, "umull_asimddiff_l"},
-      {"0001"_b, "_qptvrm"},
-      {"0010"_b, "_qqzrhz"},
-      {"0011"_b, "_yxhrpk"},
-      {"0101"_b, "_vsqpzr"},
-      {"0110"_b, "_kjrxpx"},
-      {"0111"_b, "_qnvgmh"},
-      {"1001"_b, "_jvhnxl"},
-      {"1010"_b, "_zyzzhm"},
-      {"1011"_b, "_slhpgp"},
-      {"1101"_b, "_mgmkyq"},
-      {"1110"_b, "_qvlytr"},
-      {"1111"_b, "_qtmjkr"},
-    },
-  },
-
-  { "_msztzv",
-    {23, 11, 10, 4, 3, 2, 1},
-    { {"0000000"_b, "_vvprhx"},
-      {"0101111"_b, "_nqysxy"},
-      {"0111111"_b, "_kkmjyr"},
-      {"1000000"_b, "_ypjyqh"},
-    },
-  },
-
-  { "_mtgksl",
-    {23, 22, 20, 19, 18, 16, 13},
-    { {"0000000"_b, "_vnrnmg"},
-      {"0000001"_b, "_hzllgl"},
-      {"0100000"_b, "_hrhzqy"},
-      {"0100001"_b, "_qtjzhs"},
-      {"100xxx0"_b, "st4_asisdlsep_r4_r"},
-      {"100xxx1"_b, "st1_asisdlsep_r4_r4"},
-      {"1010xx0"_b, "st4_asisdlsep_r4_r"},
-      {"1010xx1"_b, "st1_asisdlsep_r4_r4"},
-      {"10110x0"_b, "st4_asisdlsep_r4_r"},
-      {"10110x1"_b, "st1_asisdlsep_r4_r4"},
-      {"1011100"_b, "st4_asisdlsep_r4_r"},
-      {"1011101"_b, "st1_asisdlsep_r4_r4"},
-      {"1011110"_b, "_nzkhrj"},
-      {"1011111"_b, "_gmjhll"},
-      {"110xxx0"_b, "ld4_asisdlsep_r4_r"},
-      {"110xxx1"_b, "ld1_asisdlsep_r4_r4"},
-      {"1110xx0"_b, "ld4_asisdlsep_r4_r"},
-      {"1110xx1"_b, "ld1_asisdlsep_r4_r4"},
-      {"11110x0"_b, "ld4_asisdlsep_r4_r"},
-      {"11110x1"_b, "ld1_asisdlsep_r4_r4"},
-      {"1111100"_b, "ld4_asisdlsep_r4_r"},
-      {"1111101"_b, "ld1_asisdlsep_r4_r4"},
-      {"1111110"_b, "_hxglyp"},
-      {"1111111"_b, "_jmyslr"},
-    },
-  },
-
-  { "_mthzvm",
-    {30, 23, 13, 12, 11, 10},
-    { {"100001"_b, "ushr_asisdshf_r"},
-      {"100101"_b, "usra_asisdshf_r"},
-      {"101001"_b, "urshr_asisdshf_r"},
-      {"101101"_b, "ursra_asisdshf_r"},
-    },
-  },
-
-  { "_mtjrtt",
-    {13, 12},
-    { {"00"_b, "subps_64s_dp_2src"},
-    },
-  },
-
-  { "_mtlhnl",
-    {23, 22, 20, 19, 13, 11},
-    { {"0000x0"_b, "movi_asimdimm_l_sl"},
-      {"00x100"_b, "sshr_asimdshf_r"},
-      {"00x110"_b, "srshr_asimdshf_r"},
-      {"010x00"_b, "sshr_asimdshf_r"},
-      {"010x10"_b, "srshr_asimdshf_r"},
-      {"011100"_b, "sshr_asimdshf_r"},
-      {"011110"_b, "srshr_asimdshf_r"},
-      {"0x1000"_b, "sshr_asimdshf_r"},
-      {"0x1010"_b, "srshr_asimdshf_r"},
-    },
-  },
-
-  { "_mtnpmr",
-    {13, 12, 11, 10},
-    { {"0000"_b, "smull_asimddiff_l"},
-      {"0001"_b, "_ypznsm"},
-      {"0010"_b, "_sgztlj"},
-      {"0011"_b, "_nsnyxt"},
-      {"0100"_b, "sqdmull_asimddiff_l"},
-      {"0101"_b, "_plltlx"},
-      {"0110"_b, "_qtystr"},
-      {"0111"_b, "_gymljg"},
-      {"1000"_b, "pmull_asimddiff_l"},
-      {"1001"_b, "_rpmrkq"},
-      {"1010"_b, "_hvvyhl"},
-      {"1011"_b, "_hlshjk"},
-      {"1101"_b, "_gmvjgn"},
-      {"1110"_b, "_rsyjqj"},
-      {"1111"_b, "_yvlhjg"},
-    },
-  },
-
-  { "_mtzgpn",
-    {30},
-    { {"0"_b, "cbz_32_compbranch"},
-    },
-  },
-
-  { "_mvglql",
-    {23, 22, 20, 19, 13, 11},
-    { {"0000x0"_b, "mvni_asimdimm_l_hl"},
-      {"00x100"_b, "sqshrun_asimdshf_n"},
-      {"00x101"_b, "sqrshrun_asimdshf_n"},
-      {"00x110"_b, "ushll_asimdshf_l"},
-      {"010x00"_b, "sqshrun_asimdshf_n"},
-      {"010x01"_b, "sqrshrun_asimdshf_n"},
-      {"010x10"_b, "ushll_asimdshf_l"},
-      {"011100"_b, "sqshrun_asimdshf_n"},
-      {"011101"_b, "sqrshrun_asimdshf_n"},
-      {"011110"_b, "ushll_asimdshf_l"},
-      {"0x1000"_b, "sqshrun_asimdshf_n"},
-      {"0x1001"_b, "sqrshrun_asimdshf_n"},
-      {"0x1010"_b, "ushll_asimdshf_l"},
-    },
-  },
-
-  { "_mvgsjr",
+  { "_msvjxq",
     {20, 19, 18, 17, 16},
-    { {"00000"_b, "usqadd_asimdmisc_r"},
-      {"00001"_b, "shll_asimdmisc_s"},
-      {"10000"_b, "uaddlv_asimdall_only"},
+    { {"00001"_b, "sqxtun_asisdmisc_n"},
     },
   },
 
-  { "_mvzvpk",
-    {30},
-    { {"0"_b, "orn_64_log_shift"},
-      {"1"_b, "bics_64_log_shift"},
+  { "_msyrjz",
+    {13, 12, 11, 10},
+    { {"1111"_b, "casal_c64_ldstexcl"},
+    },
+  },
+
+  { "_mthlnv",
+    {18},
+    { {"0"_b, "ld1_asisdlsep_r4_r4"},
+      {"1"_b, "ld1_asisdlsep_i4_i4"},
+    },
+  },
+
+  { "_mtkhgz",
+    {10},
+    { {"0"_b, "sha512su0_vv2_cryptosha512_2"},
+      {"1"_b, "sm4e_vv4_cryptosha512_2"},
+    },
+  },
+
+  { "_mtlxqp",
+    {30, 23, 22},
+    { {"000"_b, "stnp_64_ldstnapair_offs"},
+      {"001"_b, "ldnp_64_ldstnapair_offs"},
+      {"010"_b, "stp_64_ldstpair_post"},
+      {"011"_b, "ldp_64_ldstpair_post"},
+    },
+  },
+
+  { "_mtshvn",
+    {18},
+    { {"0"_b, "ld1_asisdlso_b1_1b"},
+    },
+  },
+
+  { "_mtzhrn",
+    {30, 23, 22, 11, 10, 4},
+    { {"001000"_b, "ccmn_64_condcmp_reg"},
+      {"001100"_b, "ccmn_64_condcmp_imm"},
+      {"101000"_b, "ccmp_64_condcmp_reg"},
+      {"101100"_b, "ccmp_64_condcmp_imm"},
+    },
+  },
+
+  { "_mvqkzv",
+    {18, 17, 12},
+    { {"000"_b, "st2_asisdlso_d2_2d"},
+    },
+  },
+
+  { "_mvvngm",
+    {23, 22, 20, 19, 13, 11},
+    { {"0000x0"_b, "mvni_asimdimm_l_sl"},
+      {"00x100"_b, "ushr_asimdshf_r"},
+      {"00x110"_b, "urshr_asimdshf_r"},
+      {"010x00"_b, "ushr_asimdshf_r"},
+      {"010x10"_b, "urshr_asimdshf_r"},
+      {"011100"_b, "ushr_asimdshf_r"},
+      {"011110"_b, "urshr_asimdshf_r"},
+      {"0x1000"_b, "ushr_asimdshf_r"},
+      {"0x1010"_b, "urshr_asimdshf_r"},
     },
   },
 
@@ -3100,11 +3333,10 @@
     },
   },
 
-  { "_mxkgnq",
-    {23, 22, 20, 19, 11},
-    { {"00010"_b, "scvtf_asisdshf_c"},
-      {"001x0"_b, "scvtf_asisdshf_c"},
-      {"01xx0"_b, "scvtf_asisdshf_c"},
+  { "_mxnzst",
+    {30},
+    { {"0"_b, "_vghjnt"},
+      {"1"_b, "_pkqvxk"},
     },
   },
 
@@ -3116,15 +3348,16 @@
     },
   },
 
-  { "_mxtskk",
-    {20, 19, 18, 17, 16, 13},
-    { {"000000"_b, "fmov_h_floatdp1"},
-      {"000010"_b, "fneg_h_floatdp1"},
-      {"000100"_b, "fcvt_sh_floatdp1"},
-      {"001000"_b, "frintn_h_floatdp1"},
-      {"001010"_b, "frintm_h_floatdp1"},
-      {"001100"_b, "frinta_h_floatdp1"},
-      {"001110"_b, "frintx_h_floatdp1"},
+  { "_mxplnn",
+    {30, 23, 22},
+    { {"000"_b, "stnp_s_ldstnapair_offs"},
+      {"001"_b, "ldnp_s_ldstnapair_offs"},
+      {"010"_b, "stp_s_ldstpair_post"},
+      {"011"_b, "ldp_s_ldstpair_post"},
+      {"100"_b, "stnp_d_ldstnapair_offs"},
+      {"101"_b, "ldnp_d_ldstnapair_offs"},
+      {"110"_b, "stp_d_ldstpair_post"},
+      {"111"_b, "ldp_d_ldstpair_post"},
     },
   },
 
@@ -3134,21 +3367,6 @@
     },
   },
 
-  { "_myjqrl",
-    {22, 20, 19, 18, 17, 16},
-    { {"111000"_b, "fcmge_asisdmiscfp16_fz"},
-      {"x00000"_b, "fcmge_asisdmisc_fz"},
-      {"x10000"_b, "fminnmp_asisdpair_only_sd"},
-    },
-  },
-
-  { "_mykjss",
-    {17},
-    { {"0"_b, "st2_asisdlsop_bx2_r2b"},
-      {"1"_b, "st2_asisdlsop_b2_i2b"},
-    },
-  },
-
   { "_mylphg",
     {30, 13, 4},
     { {"000"_b, "cmpge_p_p_zw"},
@@ -3159,16 +3377,26 @@
     },
   },
 
-  { "_myrshl",
-    {4},
-    { {"0"_b, "ccmn_32_condcmp_imm"},
+  { "_myrkmk",
+    {16, 13, 12},
+    { {"000"_b, "rev32_64_dp_1src"},
+      {"001"_b, "ctz_64_dp_1src"},
+      {"100"_b, "pacda_64p_dp_1src"},
+      {"101"_b, "autda_64p_dp_1src"},
+      {"110"_b, "_tnjhxp"},
+      {"111"_b, "_qqjtpm"},
     },
   },
 
-  { "_myxhpq",
+  { "_myvqtn",
     {12},
-    { {"0"_b, "udot_asimdelem_d"},
-      {"1"_b, "sqrdmlsh_asimdelem_r"},
+    { {"0"_b, "_yrgzqr"},
+    },
+  },
+
+  { "_myzhml",
+    {20, 19, 18, 17, 16, 13, 12},
+    { {"0000000"_b, "stgm_64bulk_ldsttags"},
     },
   },
 
@@ -3179,16 +3407,35 @@
     },
   },
 
-  { "_mzqzhq",
-    {23, 22, 20, 19, 11},
-    { {"00000"_b, "mvni_asimdimm_m_sm"},
+  { "_mzkxzm",
+    {1},
+    { {"0"_b, "blr_64_branch_reg"},
     },
   },
 
-  { "_mzynlp",
-    {23, 22, 13},
-    { {"100"_b, "fmlal2_asimdelem_lh"},
-      {"xx1"_b, "umull_asimdelem_l"},
+  { "_nghmrp",
+    {13, 12, 11, 10},
+    { {"1111"_b, "casal_c32_ldstexcl"},
+    },
+  },
+
+  { "_ngkgsg",
+    {23, 22, 20, 19, 11},
+    { {"00000"_b, "movi_asimdimm_l_sl"},
+    },
+  },
+
+  { "_ngnxrx",
+    {18},
+    { {"0"_b, "ld1_asisdlse_r2_2v"},
+    },
+  },
+
+  { "_ngtlpz",
+    {18, 17, 12},
+    { {"0x0"_b, "st3_asisdlsop_dx3_r3d"},
+      {"100"_b, "st3_asisdlsop_dx3_r3d"},
+      {"110"_b, "st3_asisdlsop_d3_i3d"},
     },
   },
 
@@ -3211,11 +3458,15 @@
     },
   },
 
-  { "_ngxkmp",
-    {18, 17},
-    { {"0x"_b, "st3_asisdlsep_r3_r"},
-      {"10"_b, "st3_asisdlsep_r3_r"},
-      {"11"_b, "st3_asisdlsep_i3_i"},
+  { "_ngvqhs",
+    {13, 12, 11, 10},
+    { {"0001"_b, "ushl_asisdsame_only"},
+      {"0010"_b, "_vrxhss"},
+      {"0011"_b, "uqshl_asisdsame_only"},
+      {"0101"_b, "urshl_asisdsame_only"},
+      {"0111"_b, "uqrshl_asisdsame_only"},
+      {"1010"_b, "_xprqgs"},
+      {"1110"_b, "_yskyrg"},
     },
   },
 
@@ -3227,29 +3478,40 @@
     },
   },
 
-  { "_nhhpqz",
-    {23, 22, 13, 12},
-    { {"0000"_b, "fmul_s_floatdp2"},
-      {"0001"_b, "fdiv_s_floatdp2"},
-      {"0010"_b, "fadd_s_floatdp2"},
-      {"0011"_b, "fsub_s_floatdp2"},
-      {"0100"_b, "fmul_d_floatdp2"},
-      {"0101"_b, "fdiv_d_floatdp2"},
-      {"0110"_b, "fadd_d_floatdp2"},
-      {"0111"_b, "fsub_d_floatdp2"},
-      {"1100"_b, "fmul_h_floatdp2"},
-      {"1101"_b, "fdiv_h_floatdp2"},
-      {"1110"_b, "fadd_h_floatdp2"},
-      {"1111"_b, "fsub_h_floatdp2"},
+  { "_nhnhzp",
+    {23, 22, 20, 19, 17, 16, 13},
+    { {"0000000"_b, "_hrymnk"},
+      {"0000001"_b, "_hmgzjl"},
+      {"0100000"_b, "_nxmgqz"},
+      {"0100001"_b, "_ssjrxs"},
+      {"100xxx0"_b, "st1_asisdlsop_hx1_r1h"},
+      {"100xxx1"_b, "st3_asisdlsop_hx3_r3h"},
+      {"1010xx0"_b, "st1_asisdlsop_hx1_r1h"},
+      {"1010xx1"_b, "st3_asisdlsop_hx3_r3h"},
+      {"10110x0"_b, "st1_asisdlsop_hx1_r1h"},
+      {"10110x1"_b, "st3_asisdlsop_hx3_r3h"},
+      {"1011100"_b, "st1_asisdlsop_hx1_r1h"},
+      {"1011101"_b, "st3_asisdlsop_hx3_r3h"},
+      {"1011110"_b, "_jyzhnh"},
+      {"1011111"_b, "_qzlvkm"},
+      {"110xxx0"_b, "ld1_asisdlsop_hx1_r1h"},
+      {"110xxx1"_b, "ld3_asisdlsop_hx3_r3h"},
+      {"1110xx0"_b, "ld1_asisdlsop_hx1_r1h"},
+      {"1110xx1"_b, "ld3_asisdlsop_hx3_r3h"},
+      {"11110x0"_b, "ld1_asisdlsop_hx1_r1h"},
+      {"11110x1"_b, "ld3_asisdlsop_hx3_r3h"},
+      {"1111100"_b, "ld1_asisdlsop_hx1_r1h"},
+      {"1111101"_b, "ld3_asisdlsop_hx3_r3h"},
+      {"1111110"_b, "_zmkntq"},
+      {"1111111"_b, "_rxhssh"},
     },
   },
 
-  { "_nhkstj",
-    {30, 23, 22},
-    { {"00x"_b, "add_64_addsub_shift"},
-      {"010"_b, "add_64_addsub_shift"},
-      {"10x"_b, "sub_64_addsub_shift"},
-      {"110"_b, "sub_64_addsub_shift"},
+  { "_nhrkqm",
+    {22, 20, 19, 18, 17, 16},
+    { {"111001"_b, "ucvtf_asisdmiscfp16_r"},
+      {"x00001"_b, "ucvtf_asisdmisc_r"},
+      {"x10000"_b, "faddp_asisdpair_only_sd"},
     },
   },
 
@@ -3259,34 +3521,16 @@
     },
   },
 
-  { "_nhzrqr",
-    {23, 22},
-    { {"00"_b, "fmadd_s_floatdp3"},
-      {"01"_b, "fmadd_d_floatdp3"},
-      {"11"_b, "fmadd_h_floatdp3"},
-    },
-  },
-
-  { "_nhzyvv",
-    {23, 22, 4, 3, 2, 1, 0},
-    { {"0000000"_b, "brk_ex_exception"},
-      {"0100000"_b, "tcancel_ex_exception"},
-      {"1000001"_b, "dcps1_dc_exception"},
-      {"1000010"_b, "dcps2_dc_exception"},
-      {"1000011"_b, "dcps3_dc_exception"},
-    },
-  },
-
-  { "_njgmvx",
-    {18, 17},
-    { {"00"_b, "_rzqzlq"},
-    },
-  },
-
-  { "_njgxlz",
-    {30},
-    { {"0"_b, "_txzxzs"},
-      {"1"_b, "_vprkpq"},
+  { "_njjlxy",
+    {30, 23, 22},
+    { {"000"_b, "stlxp_sp32_ldstexcl"},
+      {"001"_b, "_ymvzyh"},
+      {"010"_b, "_nxttqn"},
+      {"011"_b, "_nghmrp"},
+      {"100"_b, "stlxp_sp64_ldstexcl"},
+      {"101"_b, "_hpqkhv"},
+      {"110"_b, "_xspjzn"},
+      {"111"_b, "_msyrjz"},
     },
   },
 
@@ -3296,14 +3540,13 @@
     },
   },
 
-  { "_njtngm",
-    {13, 12, 10},
-    { {"001"_b, "_qkzlkj"},
-      {"010"_b, "_jvpqrp"},
-      {"011"_b, "_kknjng"},
-      {"101"_b, "_xmtlmj"},
-      {"110"_b, "sqdmlal_asisdelem_l"},
-      {"111"_b, "_zgjpym"},
+  { "_njnsqm",
+    {23, 22, 20, 19, 18, 17, 16},
+    { {"0111001"_b, "frintn_asimdmiscfp16_r"},
+      {"0x00001"_b, "frintn_asimdmisc_r"},
+      {"1111001"_b, "frintp_asimdmiscfp16_r"},
+      {"1x00001"_b, "frintp_asimdmisc_r"},
+      {"xx00000"_b, "cmgt_asimdmisc_z"},
     },
   },
 
@@ -3316,37 +3559,62 @@
     },
   },
 
-  { "_njxtpv",
-    {30, 23, 22, 11, 10, 4},
-    { {"001000"_b, "ccmn_32_condcmp_reg"},
-      {"001100"_b, "ccmn_32_condcmp_imm"},
-      {"101000"_b, "ccmp_32_condcmp_reg"},
-      {"101100"_b, "ccmp_32_condcmp_imm"},
+  { "_nklqly",
+    {13, 12, 11, 10},
+    { {"0000"_b, "sha256h_qqv_cryptosha3"},
+      {"0100"_b, "sha256h2_qqv_cryptosha3"},
+      {"1000"_b, "sha256su1_vvv_cryptosha3"},
     },
   },
 
-  { "_nkjgpq",
-    {23, 20, 19, 18, 17, 16, 13},
-    { {"0000000"_b, "ld1r_asisdlso_r1"},
-      {"0000001"_b, "ld3r_asisdlso_r3"},
-      {"10xxxx0"_b, "ld1r_asisdlsop_rx1_r"},
-      {"10xxxx1"_b, "ld3r_asisdlsop_rx3_r"},
-      {"110xxx0"_b, "ld1r_asisdlsop_rx1_r"},
-      {"110xxx1"_b, "ld3r_asisdlsop_rx3_r"},
-      {"1110xx0"_b, "ld1r_asisdlsop_rx1_r"},
-      {"1110xx1"_b, "ld3r_asisdlsop_rx3_r"},
-      {"11110x0"_b, "ld1r_asisdlsop_rx1_r"},
-      {"11110x1"_b, "ld3r_asisdlsop_rx3_r"},
-      {"1111100"_b, "ld1r_asisdlsop_rx1_r"},
-      {"1111101"_b, "ld3r_asisdlsop_rx3_r"},
-      {"1111110"_b, "ld1r_asisdlsop_r1_i"},
-      {"1111111"_b, "ld3r_asisdlsop_r3_i"},
+  { "_nklvmv",
+    {30, 23, 22, 13, 12, 11, 10},
+    { {"1011001"_b, "fcmge_asisdsamefp16_only"},
+      {"1011011"_b, "facge_asisdsamefp16_only"},
+      {"1110101"_b, "fabd_asisdsamefp16_only"},
+      {"1111001"_b, "fcmgt_asisdsamefp16_only"},
+      {"1111011"_b, "facgt_asisdsamefp16_only"},
     },
   },
 
-  { "_nkrqgn",
-    {12},
-    { {"0"_b, "ld3_asisdlsop_dx3_r3d"},
+  { "_nklyky",
+    {18, 17, 12},
+    { {"000"_b, "st1_asisdlso_d1_1d"},
+    },
+  },
+
+  { "_nkmkvz",
+    {18},
+    { {"0"_b, "st3_asisdlsop_bx3_r3b"},
+      {"1"_b, "st3_asisdlsop_b3_i3b"},
+    },
+  },
+
+  { "_nknntn",
+    {23, 22, 20, 19, 18, 17, 16},
+    { {"0111001"_b, "fcvtns_asimdmiscfp16_r"},
+      {"0x00001"_b, "fcvtns_asimdmisc_r"},
+      {"1111001"_b, "fcvtps_asimdmiscfp16_r"},
+      {"1x00001"_b, "fcvtps_asimdmisc_r"},
+      {"xx00000"_b, "cmlt_asimdmisc_z"},
+      {"xx10000"_b, "smaxv_asimdall_only"},
+      {"xx10001"_b, "sminv_asimdall_only"},
+    },
+  },
+
+  { "_nkpyjg",
+    {23, 20, 19, 18, 17, 16},
+    { {"000001"_b, "frint32x_asimdmisc_r"},
+    },
+  },
+
+  { "_nktrpj",
+    {23, 22, 12},
+    { {"001"_b, "sudot_asimdelem_d"},
+      {"011"_b, "bfdot_asimdelem_e"},
+      {"101"_b, "usdot_asimdelem_d"},
+      {"111"_b, "bfmlal_asimdelem_f"},
+      {"xx0"_b, "sdot_asimdelem_d"},
     },
   },
 
@@ -3359,29 +3627,29 @@
     },
   },
 
-  { "_nlgqsk",
+  { "_nkyrpv",
     {30, 23, 13, 12, 11, 10},
-    { {"100001"_b, "sri_asisdshf_r"},
-      {"100101"_b, "sli_asisdshf_r"},
-      {"101001"_b, "sqshlu_asisdshf_r"},
-      {"101101"_b, "uqshl_asisdshf_r"},
+    { {"101001"_b, "ucvtf_asisdshf_c"},
+      {"101111"_b, "fcvtzu_asisdshf_c"},
+      {"1x01x0"_b, "sqrdmlah_asisdelem_r"},
+      {"1x11x0"_b, "sqrdmlsh_asisdelem_r"},
     },
   },
 
-  { "_nlkkyx",
-    {23, 13, 12},
-    { {"001"_b, "fmulx_asisdsame_only"},
-      {"011"_b, "frecps_asisdsame_only"},
-      {"111"_b, "frsqrts_asisdsame_only"},
-    },
-  },
-
-  { "_nllnsg",
-    {30, 23, 22, 19, 16},
-    { {"10010"_b, "aesmc_b_cryptoaes"},
-      {"x0x01"_b, "fcvtn_asimdmisc_n"},
-      {"x1001"_b, "bfcvtn_asimdmisc_4s"},
-      {"xxx00"_b, "sadalp_asimdmisc_p"},
+  { "_nkyynq",
+    {23, 22, 20, 19, 17, 16},
+    { {"000010"_b, "scvtf_s32_float2fix"},
+      {"000011"_b, "ucvtf_s32_float2fix"},
+      {"001100"_b, "fcvtzs_32s_float2fix"},
+      {"001101"_b, "fcvtzu_32s_float2fix"},
+      {"010010"_b, "scvtf_d32_float2fix"},
+      {"010011"_b, "ucvtf_d32_float2fix"},
+      {"011100"_b, "fcvtzs_32d_float2fix"},
+      {"011101"_b, "fcvtzu_32d_float2fix"},
+      {"110010"_b, "scvtf_h32_float2fix"},
+      {"110011"_b, "ucvtf_h32_float2fix"},
+      {"111100"_b, "fcvtzs_32h_float2fix"},
+      {"111101"_b, "fcvtzu_32h_float2fix"},
     },
   },
 
@@ -3392,85 +3660,74 @@
     },
   },
 
-  { "_nlqglq",
-    {13, 10},
-    { {"00"_b, "_lxvnxm"},
-      {"01"_b, "_mzqzhq"},
-      {"10"_b, "_myxhpq"},
-      {"11"_b, "_pslllp"},
-    },
-  },
-
-  { "_nlyntn",
-    {23, 22, 20, 19, 11},
-    { {"00000"_b, "movi_asimdimm_l_sl"},
-    },
-  },
-
-  { "_nmkqzt",
-    {20, 19, 18, 17},
-    { {"0000"_b, "_nvqlyn"},
-    },
-  },
-
-  { "_nmtkjv",
-    {17},
-    { {"0"_b, "ld1_asisdlso_h1_1h"},
-    },
-  },
-
-  { "_nmzyvt",
+  { "_nlrjsj",
     {30, 23, 22, 13, 12, 11, 10},
-    { {"0000000"_b, "ldsmaxb_32_memop"},
-      {"0000100"_b, "ldsminb_32_memop"},
-      {"0000x10"_b, "strb_32b_ldst_regoff"},
-      {"0001000"_b, "ldumaxb_32_memop"},
-      {"0001100"_b, "lduminb_32_memop"},
-      {"0001x10"_b, "strb_32bl_ldst_regoff"},
-      {"0010000"_b, "ldsmaxlb_32_memop"},
-      {"0010100"_b, "ldsminlb_32_memop"},
-      {"0010x10"_b, "ldrb_32b_ldst_regoff"},
-      {"0011000"_b, "ldumaxlb_32_memop"},
-      {"0011100"_b, "lduminlb_32_memop"},
-      {"0011x10"_b, "ldrb_32bl_ldst_regoff"},
-      {"0100000"_b, "ldsmaxab_32_memop"},
-      {"0100100"_b, "ldsminab_32_memop"},
-      {"0100x10"_b, "ldrsb_64b_ldst_regoff"},
-      {"0101000"_b, "ldumaxab_32_memop"},
-      {"0101100"_b, "lduminab_32_memop"},
-      {"0101x10"_b, "ldrsb_64bl_ldst_regoff"},
-      {"0110000"_b, "ldsmaxalb_32_memop"},
-      {"0110100"_b, "ldsminalb_32_memop"},
-      {"0110x10"_b, "ldrsb_32b_ldst_regoff"},
-      {"0111000"_b, "ldumaxalb_32_memop"},
-      {"0111100"_b, "lduminalb_32_memop"},
-      {"0111x10"_b, "ldrsb_32bl_ldst_regoff"},
-      {"1000000"_b, "ldsmaxh_32_memop"},
-      {"1000100"_b, "ldsminh_32_memop"},
-      {"1001000"_b, "ldumaxh_32_memop"},
-      {"1001100"_b, "lduminh_32_memop"},
-      {"100xx10"_b, "strh_32_ldst_regoff"},
-      {"1010000"_b, "ldsmaxlh_32_memop"},
-      {"1010100"_b, "ldsminlh_32_memop"},
-      {"1011000"_b, "ldumaxlh_32_memop"},
-      {"1011100"_b, "lduminlh_32_memop"},
-      {"101xx10"_b, "ldrh_32_ldst_regoff"},
-      {"1100000"_b, "ldsmaxah_32_memop"},
-      {"1100100"_b, "ldsminah_32_memop"},
-      {"1101000"_b, "ldumaxah_32_memop"},
-      {"1101100"_b, "lduminah_32_memop"},
-      {"110xx10"_b, "ldrsh_64_ldst_regoff"},
-      {"1110000"_b, "ldsmaxalh_32_memop"},
-      {"1110100"_b, "ldsminalh_32_memop"},
-      {"1111000"_b, "ldumaxalh_32_memop"},
-      {"1111100"_b, "lduminalh_32_memop"},
-      {"111xx10"_b, "ldrsh_32_ldst_regoff"},
+    { {"000xx10"_b, "stlur_s_ldapstl_simd"},
+      {"001xx10"_b, "ldapur_s_ldapstl_simd"},
+      {"100xx10"_b, "stlur_d_ldapstl_simd"},
+      {"101xx10"_b, "ldapur_d_ldapstl_simd"},
+      {"x000001"_b, "cpypn_cpy_memcms"},
+      {"x000101"_b, "cpypwtn_cpy_memcms"},
+      {"x001001"_b, "cpyprtn_cpy_memcms"},
+      {"x001101"_b, "cpyptn_cpy_memcms"},
+      {"x010001"_b, "cpymn_cpy_memcms"},
+      {"x010101"_b, "cpymwtn_cpy_memcms"},
+      {"x011001"_b, "cpymrtn_cpy_memcms"},
+      {"x011101"_b, "cpymtn_cpy_memcms"},
+      {"x100001"_b, "cpyen_cpy_memcms"},
+      {"x100101"_b, "cpyewtn_cpy_memcms"},
+      {"x101001"_b, "cpyertn_cpy_memcms"},
+      {"x101101"_b, "cpyetn_cpy_memcms"},
     },
   },
 
-  { "_nnhprs",
-    {1, 0},
-    { {"00"_b, "ret_64r_branch_reg"},
+  { "_nmqrtr",
+    {23, 22, 13, 12, 11, 10},
+    { {"0001x0"_b, "fmul_asimdelem_rh_h"},
+      {"0x0001"_b, "shrn_asimdshf_n"},
+      {"0x0011"_b, "rshrn_asimdshf_n"},
+      {"0x0101"_b, "sqshrn_asimdshf_n"},
+      {"0x0111"_b, "sqrshrn_asimdshf_n"},
+      {"0x1001"_b, "sshll_asimdshf_l"},
+      {"1x01x0"_b, "fmul_asimdelem_r_sd"},
+      {"xx00x0"_b, "mul_asimdelem_r"},
+      {"xx10x0"_b, "smull_asimdelem_l"},
+      {"xx11x0"_b, "sqdmull_asimdelem_l"},
+    },
+  },
+
+  { "_nmqskh",
+    {23, 22, 20, 19, 16, 13, 12},
+    { {"0000000"_b, "_xkznrh"},
+      {"0000010"_b, "_svlrvy"},
+      {"0000011"_b, "_prmjlz"},
+      {"0100000"_b, "_lgmlmt"},
+      {"0100010"_b, "_qhpkhm"},
+      {"0100011"_b, "_sqlsyr"},
+      {"100xx00"_b, "st3_asisdlsep_r3_r"},
+      {"100xx10"_b, "st1_asisdlsep_r3_r3"},
+      {"100xx11"_b, "st1_asisdlsep_r1_r1"},
+      {"1010x00"_b, "st3_asisdlsep_r3_r"},
+      {"1010x10"_b, "st1_asisdlsep_r3_r3"},
+      {"1010x11"_b, "st1_asisdlsep_r1_r1"},
+      {"1011000"_b, "st3_asisdlsep_r3_r"},
+      {"1011010"_b, "st1_asisdlsep_r3_r3"},
+      {"1011011"_b, "st1_asisdlsep_r1_r1"},
+      {"1011100"_b, "_lzzsyj"},
+      {"1011110"_b, "_xqvzvl"},
+      {"1011111"_b, "_vxrnyh"},
+      {"110xx00"_b, "ld3_asisdlsep_r3_r"},
+      {"110xx10"_b, "ld1_asisdlsep_r3_r3"},
+      {"110xx11"_b, "ld1_asisdlsep_r1_r1"},
+      {"1110x00"_b, "ld3_asisdlsep_r3_r"},
+      {"1110x10"_b, "ld1_asisdlsep_r3_r3"},
+      {"1110x11"_b, "ld1_asisdlsep_r1_r1"},
+      {"1111000"_b, "ld3_asisdlsep_r3_r"},
+      {"1111010"_b, "ld1_asisdlsep_r3_r3"},
+      {"1111011"_b, "ld1_asisdlsep_r1_r1"},
+      {"1111100"_b, "_ntxnpq"},
+      {"1111110"_b, "_ghmtnl"},
+      {"1111111"_b, "_gzrtkk"},
     },
   },
 
@@ -3482,21 +3739,40 @@
     },
   },
 
-  { "_nnkyzr",
-    {18, 17, 16},
-    { {"011"_b, "_yvgqjx"},
+  { "_nnrtpm",
+    {20, 19, 18, 17, 16},
+    { {"11111"_b, "stllrb_sl32_ldstexcl"},
     },
   },
 
-  { "_nnllqy",
-    {18, 17},
-    { {"00"_b, "ld1_asisdlso_s1_1s"},
-    },
-  },
-
-  { "_nnlvqz",
-    {9, 8, 7, 6, 5},
-    { {"00000"_b, "fmov_d_floatimm"},
+  { "_nntvzj",
+    {11, 10, 9, 8, 7, 6},
+    { {"000000"_b, "nop_hi_hints"},
+      {"000001"_b, "wfe_hi_hints"},
+      {"000010"_b, "sev_hi_hints"},
+      {"000011"_b, "dgh_hi_hints"},
+      {"000100"_b, "pacia1716_hi_hints"},
+      {"000101"_b, "pacib1716_hi_hints"},
+      {"000110"_b, "autia1716_hi_hints"},
+      {"000111"_b, "autib1716_hi_hints"},
+      {"001000"_b, "esb_hi_hints"},
+      {"001001"_b, "tsb_hc_hints"},
+      {"001010"_b, "csdb_hi_hints"},
+      {"001011"_b, "clrbhb_hi_hints"},
+      {"001100"_b, "paciaz_hi_hints"},
+      {"001101"_b, "pacibz_hi_hints"},
+      {"001110"_b, "autiaz_hi_hints"},
+      {"001111"_b, "autibz_hi_hints"},
+      {"0100xx"_b, "bti_hb_hints"},
+      {"010100"_b, "chkfeat_hf_hints"},
+      {"0101x1"_b, "hint_hm_hints"},
+      {"01x110"_b, "hint_hm_hints"},
+      {"10xxxx"_b, "hint_hm_hints"},
+      {"110xxx"_b, "hint_hm_hints"},
+      {"111110"_b, "hint_hm_hints"},
+      {"x110xx"_b, "hint_hm_hints"},
+      {"x1110x"_b, "hint_hm_hints"},
+      {"x11111"_b, "hint_hm_hints"},
     },
   },
 
@@ -3507,11 +3783,29 @@
     },
   },
 
-  { "_nqgqjh",
-    {30, 23, 22, 20, 19},
-    { {"0xxxx"_b, "bl_only_branch_imm"},
-      {"10001"_b, "sys_cr_systeminstrs"},
-      {"1001x"_b, "msr_sr_systemmove"},
+  { "_npjnlv",
+    {20, 19, 18, 17},
+    { {"0000"_b, "_kzyzrh"},
+    },
+  },
+
+  { "_npxkzq",
+    {20, 19, 18, 17, 16, 13, 12},
+    { {"0000000"_b, "_tykvnx"},
+    },
+  },
+
+  { "_nqjtqn",
+    {23, 22},
+    { {"00"_b, "dup_asimdins_dv_v"},
+      {"01"_b, "fmaxnm_asimdsamefp16_only"},
+      {"11"_b, "fminnm_asimdsamefp16_only"},
+    },
+  },
+
+  { "_nqjvmr",
+    {13, 12},
+    { {"00"_b, "adcs_32_addsub_carry"},
     },
   },
 
@@ -3522,41 +3816,44 @@
     },
   },
 
-  { "_nqlgtn",
-    {23, 20, 19, 18, 17, 16, 13},
-    { {"0000000"_b, "ld2r_asisdlso_r2"},
-      {"0000001"_b, "ld4r_asisdlso_r4"},
-      {"10xxxx0"_b, "ld2r_asisdlsop_rx2_r"},
-      {"10xxxx1"_b, "ld4r_asisdlsop_rx4_r"},
-      {"110xxx0"_b, "ld2r_asisdlsop_rx2_r"},
-      {"110xxx1"_b, "ld4r_asisdlsop_rx4_r"},
-      {"1110xx0"_b, "ld2r_asisdlsop_rx2_r"},
-      {"1110xx1"_b, "ld4r_asisdlsop_rx4_r"},
-      {"11110x0"_b, "ld2r_asisdlsop_rx2_r"},
-      {"11110x1"_b, "ld4r_asisdlsop_rx4_r"},
-      {"1111100"_b, "ld2r_asisdlsop_rx2_r"},
-      {"1111101"_b, "ld4r_asisdlsop_rx4_r"},
-      {"1111110"_b, "ld2r_asisdlsop_r2_i"},
-      {"1111111"_b, "ld4r_asisdlsop_r4_i"},
+  { "_nqlrmv",
+    {30, 23, 22},
+    { {"000"_b, "bfm_32m_bitfield"},
     },
   },
 
-  { "_nqysxy",
-    {0},
-    { {"1"_b, "blraaz_64_branch_reg"},
+  { "_nqmnzp",
+    {30, 23, 22, 20, 19, 18, 17, 16},
+    { {"00000000"_b, "udf_only_perm_undef"},
     },
   },
 
-  { "_nrrmtx",
-    {22, 13, 12},
-    { {"000"_b, "swpa_64_memop"},
-      {"100"_b, "swpal_64_memop"},
+  { "_nrmlqv",
+    {23, 22, 20, 19, 13, 11},
+    { {"0000x0"_b, "orr_asimdimm_l_sl"},
+      {"00x100"_b, "ssra_asimdshf_r"},
+      {"00x110"_b, "srsra_asimdshf_r"},
+      {"010x00"_b, "ssra_asimdshf_r"},
+      {"010x10"_b, "srsra_asimdshf_r"},
+      {"011100"_b, "ssra_asimdshf_r"},
+      {"011110"_b, "srsra_asimdshf_r"},
+      {"0x1000"_b, "ssra_asimdshf_r"},
+      {"0x1010"_b, "srsra_asimdshf_r"},
     },
   },
 
-  { "_nrssjz",
-    {17},
-    { {"0"_b, "ld3_asisdlso_b3_3b"},
+  { "_nsgvsv",
+    {9, 8, 7, 6, 5},
+    { {"00000"_b, "fmov_h_floatimm"},
+    },
+  },
+
+  { "_nsgxlz",
+    {13, 12, 10},
+    { {"000"_b, "sqdmulh_asisdelem_r"},
+      {"010"_b, "sqrdmulh_asisdelem_r"},
+      {"101"_b, "_rkjjtp"},
+      {"111"_b, "_pzpxxv"},
     },
   },
 
@@ -3576,35 +3873,6 @@
     },
   },
 
-  { "_nsnyxt",
-    {23},
-    { {"0"_b, "fmla_asimdsame_only"},
-      {"1"_b, "fmls_asimdsame_only"},
-    },
-  },
-
-  { "_nssrnm",
-    {20, 18, 17, 16},
-    { {"0000"_b, "_lnpvky"},
-    },
-  },
-
-  { "_nszhhy",
-    {17},
-    { {"0"_b, "ld2_asisdlsep_r2_r"},
-      {"1"_b, "ld2_asisdlsep_i2_i"},
-    },
-  },
-
-  { "_nthvqx",
-    {23, 22},
-    { {"00"_b, "eor_asimdsame_only"},
-      {"01"_b, "bsl_asimdsame_only"},
-      {"10"_b, "bit_asimdsame_only"},
-      {"11"_b, "bif_asimdsame_only"},
-    },
-  },
-
   { "_ntjpsx",
     {22, 20, 11},
     { {"000"_b, "uqincb_r_rs_uw"},
@@ -3618,34 +3886,17 @@
     },
   },
 
-  { "_ntkhsm",
-    {13, 12},
-    { {"00"_b, "cmtst_asisdsame_only"},
+  { "_ntjrlg",
+    {18, 17, 16, 13, 12, 11, 10, 9, 7, 6, 5},
+    { {"01111000011"_b, "_vsslrs"},
     },
   },
 
-  { "_ntkqhk",
-    {11, 10, 9, 8, 7, 6},
-    { {"000000"_b, "yield_hi_hints"},
-      {"000001"_b, "wfi_hi_hints"},
-      {"000010"_b, "sevl_hi_hints"},
-      {"000011"_b, "xpaclri_hi_hints"},
-      {"001000"_b, "psb_hc_hints"},
-      {"0010x1"_b, "hint_hm_hints"},
-      {"001100"_b, "paciasp_hi_hints"},
-      {"001101"_b, "pacibsp_hi_hints"},
-      {"001110"_b, "autiasp_hi_hints"},
-      {"001111"_b, "autibsp_hi_hints"},
-      {"0x01xx"_b, "hint_hm_hints"},
-      {"0x1010"_b, "hint_hm_hints"},
-      {"10x0xx"_b, "hint_hm_hints"},
-      {"10x1xx"_b, "hint_hm_hints"},
-      {"1101xx"_b, "hint_hm_hints"},
-      {"111010"_b, "hint_hm_hints"},
-      {"x100xx"_b, "hint_hm_hints"},
-      {"x1100x"_b, "hint_hm_hints"},
-      {"x11011"_b, "hint_hm_hints"},
-      {"x111xx"_b, "hint_hm_hints"},
+  { "_ntxnpq",
+    {18, 17},
+    { {"0x"_b, "ld3_asisdlsep_r3_r"},
+      {"10"_b, "ld3_asisdlsep_r3_r"},
+      {"11"_b, "ld3_asisdlsep_i3_i"},
     },
   },
 
@@ -3658,19 +3909,16 @@
     },
   },
 
-  { "_nvqlyn",
-    {16, 13, 12},
-    { {"000"_b, "rev_64_dp_1src"},
-      {"100"_b, "pacdb_64p_dp_1src"},
-      {"101"_b, "autdb_64p_dp_1src"},
-      {"110"_b, "_hhnjjk"},
-      {"111"_b, "_yvnjkr"},
+  { "_nvkxzs",
+    {12},
+    { {"0"_b, "gcsstr_64_ldst_gcs"},
+      {"1"_b, "gcssttr_64_ldst_gcs"},
     },
   },
 
-  { "_nvthzh",
-    {20, 19, 18, 17, 16, 13, 12, 9, 8, 7, 6, 5},
-    { {"000010011111"_b, "xpacd_64z_dp_1src"},
+  { "_nvnjyp",
+    {23, 22, 20, 19, 11},
+    { {"00000"_b, "mvni_asimdimm_m_sm"},
     },
   },
 
@@ -3699,10 +3947,9 @@
     },
   },
 
-  { "_nxjgmm",
-    {17},
-    { {"0"_b, "st3_asisdlsop_bx3_r3b"},
-      {"1"_b, "st3_asisdlsop_b3_i3b"},
+  { "_nvzsxn",
+    {18, 17, 12},
+    { {"000"_b, "stl1_asisdlso_d1"},
     },
   },
 
@@ -3719,65 +3966,118 @@
     },
   },
 
-  { "_nxmjvy",
-    {30, 23, 11, 10},
-    { {"1001"_b, "_jksztq"},
+  { "_nxlmhz",
+    {30, 23},
+    { {"00"_b, "add_32_addsub_imm"},
+      {"10"_b, "sub_32_addsub_imm"},
     },
   },
 
-  { "_nxqygl",
-    {13},
-    { {"0"_b, "mla_asimdelem_r"},
-      {"1"_b, "umlal_asimdelem_l"},
+  { "_nxlsjm",
+    {20, 19, 18, 17, 16},
+    { {"11111"_b, "ldxrb_lr32_ldstexcl"},
     },
   },
 
-  { "_nxyhyv",
-    {30, 11, 10},
-    { {"000"_b, "_kvyysq"},
-      {"001"_b, "_rvjzgt"},
-      {"010"_b, "_vjlnqj"},
-      {"011"_b, "_jvvzjq"},
-      {"100"_b, "_tzzhsk"},
-      {"101"_b, "_mplskr"},
-      {"110"_b, "_njgmvx"},
-      {"111"_b, "_ntkhsm"},
+  { "_nxmgqz",
+    {18},
+    { {"0"_b, "ld1_asisdlso_h1_1h"},
     },
   },
 
-  { "_nykvly",
-    {16, 13, 12},
-    { {"000"_b, "rev32_64_dp_1src"},
-      {"100"_b, "pacda_64p_dp_1src"},
-      {"101"_b, "autda_64p_dp_1src"},
-      {"110"_b, "_mgqvvn"},
-      {"111"_b, "_xvlnmy"},
+  { "_nxrqmg",
+    {30, 23, 22, 13, 12, 11, 10},
+    { {"000xx00"_b, "stlur_32_ldapstl_unscaled"},
+      {"001xx00"_b, "ldapur_32_ldapstl_unscaled"},
+      {"010xx00"_b, "ldapursw_64_ldapstl_unscaled"},
+      {"100xx00"_b, "stlur_64_ldapstl_unscaled"},
+      {"101xx00"_b, "ldapur_64_ldapstl_unscaled"},
+      {"x000001"_b, "cpyfpn_cpy_memcms"},
+      {"x000101"_b, "cpyfpwtn_cpy_memcms"},
+      {"x001001"_b, "cpyfprtn_cpy_memcms"},
+      {"x001101"_b, "cpyfptn_cpy_memcms"},
+      {"x010001"_b, "cpyfmn_cpy_memcms"},
+      {"x010101"_b, "cpyfmwtn_cpy_memcms"},
+      {"x011001"_b, "cpyfmrtn_cpy_memcms"},
+      {"x011101"_b, "cpyfmtn_cpy_memcms"},
+      {"x100001"_b, "cpyfen_cpy_memcms"},
+      {"x100101"_b, "cpyfewtn_cpy_memcms"},
+      {"x101001"_b, "cpyfertn_cpy_memcms"},
+      {"x101101"_b, "cpyfetn_cpy_memcms"},
     },
   },
 
-  { "_nyssqn",
+  { "_nxttqn",
+    {13, 12, 11, 10},
+    { {"1111"_b, "casl_c32_ldstexcl"},
+    },
+  },
+
+  { "_nygsjm",
+    {18},
+    { {"0"_b, "st2_asisdlso_b2_2b"},
+    },
+  },
+
+  { "_nyjtng",
+    {23, 22},
+    { {"01"_b, "fmls_z_p_zzz"},
+      {"1x"_b, "fmls_z_p_zzz"},
+    },
+  },
+
+  { "_nynrns",
+    {23, 22, 12},
+    { {"000"_b, "_klxxgx"},
+      {"001"_b, "_pglvnj"},
+      {"010"_b, "_pzttrn"},
+      {"011"_b, "_svyszp"},
+      {"110"_b, "_prrkzv"},
+      {"111"_b, "_nsgvsv"},
+    },
+  },
+
+  { "_nzmqhv",
+    {23, 22, 20, 19, 18, 17, 16},
+    { {"0x00001"_b, "frint64x_asimdmisc_r"},
+      {"0x10000"_b, "fmaxv_asimdall_only_sd"},
+      {"1111000"_b, "fneg_asimdmiscfp16_r"},
+      {"1111001"_b, "fsqrt_asimdmiscfp16_r"},
+      {"1x00000"_b, "fneg_asimdmisc_r"},
+      {"1x00001"_b, "fsqrt_asimdmisc_r"},
+      {"1x10000"_b, "fminv_asimdall_only_sd"},
+    },
+  },
+
+  { "_nzqxrj",
     {12},
-    { {"0"_b, "st2_asisdlsop_dx2_r2d"},
+    { {"1"_b, "_qgvtrn"},
     },
   },
 
-  { "_nyxxks",
-    {20, 19, 18, 17, 16},
-    { {"00000"_b, "suqadd_asimdmisc_r"},
-      {"10000"_b, "saddlv_asimdall_only"},
+  { "_nzskzl",
+    {13, 12, 11, 10},
+    { {"0000"_b, "uaddl_asimddiff_l"},
+      {"0001"_b, "uhadd_asimdsame_only"},
+      {"0010"_b, "_mmxgrt"},
+      {"0011"_b, "uqadd_asimdsame_only"},
+      {"0100"_b, "uaddw_asimddiff_w"},
+      {"0101"_b, "urhadd_asimdsame_only"},
+      {"0111"_b, "_yyvnrp"},
+      {"1000"_b, "usubl_asimddiff_l"},
+      {"1001"_b, "uhsub_asimdsame_only"},
+      {"1010"_b, "_vlhkgr"},
+      {"1011"_b, "uqsub_asimdsame_only"},
+      {"1100"_b, "usubw_asimddiff_w"},
+      {"1101"_b, "cmhi_asimdsame_only"},
+      {"1110"_b, "_srpptk"},
+      {"1111"_b, "cmhs_asimdsame_only"},
     },
   },
 
-  { "_nzkhrj",
-    {17},
-    { {"0"_b, "st4_asisdlsep_r4_r"},
-      {"1"_b, "st4_asisdlsep_i4_i"},
-    },
-  },
-
-  { "_nzqkky",
-    {20, 19, 18, 17, 16},
-    { {"00000"_b, "rev32_asimdmisc_r"},
+  { "_nzvlzt",
+    {18},
+    { {"0"_b, "st1_asisdlse_r4_4v"},
     },
   },
 
@@ -3812,6 +4112,85 @@
     },
   },
 
+  { "_pglvnj",
+    {9, 8, 7, 6, 5},
+    { {"00000"_b, "fmov_s_floatimm"},
+    },
+  },
+
+  { "_pgmlrt",
+    {30, 23, 22},
+    { {"000"_b, "stxrb_sr32_ldstexcl"},
+      {"001"_b, "_nxlsjm"},
+      {"010"_b, "_nnrtpm"},
+      {"011"_b, "_sksvrn"},
+      {"100"_b, "stxrh_sr32_ldstexcl"},
+      {"101"_b, "_knpjtt"},
+      {"110"_b, "_zqhhlq"},
+      {"111"_b, "_xtzykp"},
+    },
+  },
+
+  { "_pgvjgs",
+    {30, 23, 22, 13, 12, 11, 10},
+    { {"000xx10"_b, "stlur_s_ldapstl_simd"},
+      {"001xx10"_b, "ldapur_s_ldapstl_simd"},
+      {"100xx10"_b, "stlur_d_ldapstl_simd"},
+      {"101xx10"_b, "ldapur_d_ldapstl_simd"},
+      {"x000001"_b, "cpypwn_cpy_memcms"},
+      {"x000101"_b, "cpypwtwn_cpy_memcms"},
+      {"x001001"_b, "cpyprtwn_cpy_memcms"},
+      {"x001101"_b, "cpyptwn_cpy_memcms"},
+      {"x010001"_b, "cpymwn_cpy_memcms"},
+      {"x010101"_b, "cpymwtwn_cpy_memcms"},
+      {"x011001"_b, "cpymrtwn_cpy_memcms"},
+      {"x011101"_b, "cpymtwn_cpy_memcms"},
+      {"x100001"_b, "cpyewn_cpy_memcms"},
+      {"x100101"_b, "cpyewtwn_cpy_memcms"},
+      {"x101001"_b, "cpyertwn_cpy_memcms"},
+      {"x101101"_b, "cpyetwn_cpy_memcms"},
+      {"x110001"_b, "setgm_set_memcms"},
+      {"x110101"_b, "setgmt_set_memcms"},
+      {"x111001"_b, "setgmn_set_memcms"},
+      {"x111101"_b, "setgmtn_set_memcms"},
+    },
+  },
+
+  { "_phjkhr",
+    {9, 8, 7, 6, 5},
+    { {"11111"_b, "autdzb_64z_dp_1src"},
+    },
+  },
+
+  { "_phktvp",
+    {7, 6, 4, 3, 2, 1, 0},
+    { {"0111111"_b, "clrex_bn_barriers"},
+      {"1011111"_b, "dsb_bo_barriers"},
+      {"1111111"_b, "isb_bi_barriers"},
+    },
+  },
+
+  { "_phpphm",
+    {18},
+    { {"0"_b, "st4_asisdlso_h4_4h"},
+    },
+  },
+
+  { "_phrqqx",
+    {23, 22, 13},
+    { {"100"_b, "fmlal_asimdelem_lh"},
+      {"xx1"_b, "smlal_asimdelem_l"},
+    },
+  },
+
+  { "_phsrlk",
+    {23, 22, 13},
+    { {"000"_b, "fmla_asimdelem_rh_h"},
+      {"1x0"_b, "fmla_asimdelem_r_sd"},
+      {"xx1"_b, "sqdmlal_asimdelem_l"},
+    },
+  },
+
   { "_phthqj",
     {30, 13},
     { {"00"_b, "_sntyqy"},
@@ -3821,26 +4200,12 @@
     },
   },
 
-  { "_phtnny",
-    {18, 17},
-    { {"0x"_b, "ld1_asisdlsep_r3_r3"},
-      {"10"_b, "ld1_asisdlsep_r3_r3"},
-      {"11"_b, "ld1_asisdlsep_i3_i3"},
-    },
-  },
-
-  { "_phvnqh",
-    {30},
-    { {"0"_b, "bic_32_log_shift"},
-      {"1"_b, "eon_32_log_shift"},
-    },
-  },
-
-  { "_phxkzh",
-    {17, 4},
-    { {"00"_b, "fcmlt_p_p_z0"},
-      {"01"_b, "fcmle_p_p_z0"},
-      {"10"_b, "fcmne_p_p_z0"},
+  { "_phtxqg",
+    {13, 10},
+    { {"00"_b, "_vrjhtm"},
+      {"01"_b, "_spktyg"},
+      {"10"_b, "_nktrpj"},
+      {"11"_b, "_vzvstm"},
     },
   },
 
@@ -3850,38 +4215,132 @@
     },
   },
 
-  { "_pjkylt",
-    {23, 22},
-    { {"00"_b, "fcsel_s_floatsel"},
-      {"01"_b, "fcsel_d_floatsel"},
-      {"11"_b, "fcsel_h_floatsel"},
+  { "_pjhmvy",
+    {20, 19, 18, 17, 16},
+    { {"00000"_b, "saddlp_asimdmisc_p"},
+      {"00001"_b, "xtn_asimdmisc_n"},
     },
   },
 
-  { "_plktrh",
+  { "_pjlnhh",
     {30, 23},
-    { {"00"_b, "adds_32s_addsub_imm"},
-      {"10"_b, "subs_32s_addsub_imm"},
+    { {"00"_b, "and_64_log_imm"},
+      {"01"_b, "movn_64_movewide"},
+      {"10"_b, "eor_64_log_imm"},
+      {"11"_b, "movz_64_movewide"},
     },
   },
 
-  { "_plltlx",
-    {23},
-    { {"0"_b, "fadd_asimdsame_only"},
-      {"1"_b, "fsub_asimdsame_only"},
+  { "_pjskhr",
+    {18, 17},
+    { {"00"_b, "st3_asisdlso_s3_3s"},
     },
   },
 
-  { "_pmkxlj",
-    {17},
-    { {"0"_b, "st1_asisdlse_r2_2v"},
+  { "_pjvkjz",
+    {13, 12},
+    { {"00"_b, "sbc_64_addsub_carry"},
     },
   },
 
-  { "_pmrngh",
-    {30},
-    { {"0"_b, "bl_only_branch_imm"},
-      {"1"_b, "_snkqvp"},
+  { "_pkjqsy",
+    {20, 19, 18, 17, 16, 13, 12},
+    { {"0000001"_b, "cnt_32_dp_1src"},
+    },
+  },
+
+  { "_pkpvmj",
+    {13, 12, 11, 10},
+    { {"1111"_b, "casa_c64_ldstexcl"},
+    },
+  },
+
+  { "_pkqvxk",
+    {12},
+    { {"1"_b, "_ynsytg"},
+    },
+  },
+
+  { "_pkskpp",
+    {30, 23},
+    { {"00"_b, "adds_64s_addsub_imm"},
+      {"10"_b, "subs_64s_addsub_imm"},
+    },
+  },
+
+  { "_plgrmv",
+    {13, 12},
+    { {"00"_b, "adcs_64_addsub_carry"},
+    },
+  },
+
+  { "_plrggq",
+    {23, 22, 13},
+    { {"000"_b, "fmls_asimdelem_rh_h"},
+      {"1x0"_b, "fmls_asimdelem_r_sd"},
+      {"xx1"_b, "sqdmlsl_asimdelem_l"},
+    },
+  },
+
+  { "_plyhhz",
+    {20, 19, 18, 17, 16},
+    { {"00000"_b, "cmge_asisdmisc_z"},
+    },
+  },
+
+  { "_plymgg",
+    {18},
+    { {"1"_b, "frsqrte_z_z"},
+    },
+  },
+
+  { "_plytvr",
+    {22},
+    { {"0"_b, "str_32_ldst_regoff"},
+      {"1"_b, "ldr_32_ldst_regoff"},
+    },
+  },
+
+  { "_plyxlq",
+    {30, 18},
+    { {"00"_b, "_nkyynq"},
+    },
+  },
+
+  { "_plzqrv",
+    {23, 22, 20, 19, 12, 11, 10},
+    { {"00x1001"_b, "sqshrun_asisdshf_n"},
+      {"00x1011"_b, "sqrshrun_asisdshf_n"},
+      {"00x1101"_b, "uqshrn_asisdshf_n"},
+      {"00x1111"_b, "uqrshrn_asisdshf_n"},
+      {"00xx1x0"_b, "fmulx_asisdelem_rh_h"},
+      {"010x001"_b, "sqshrun_asisdshf_n"},
+      {"010x011"_b, "sqrshrun_asisdshf_n"},
+      {"010x101"_b, "uqshrn_asisdshf_n"},
+      {"010x111"_b, "uqrshrn_asisdshf_n"},
+      {"0111001"_b, "sqshrun_asisdshf_n"},
+      {"0111011"_b, "sqrshrun_asisdshf_n"},
+      {"0111101"_b, "uqshrn_asisdshf_n"},
+      {"0111111"_b, "uqrshrn_asisdshf_n"},
+      {"0x10001"_b, "sqshrun_asisdshf_n"},
+      {"0x10011"_b, "sqrshrun_asisdshf_n"},
+      {"0x10101"_b, "uqshrn_asisdshf_n"},
+      {"0x10111"_b, "uqrshrn_asisdshf_n"},
+      {"1xxx1x0"_b, "fmulx_asisdelem_r_sd"},
+    },
+  },
+
+  { "_pmpsvs",
+    {18, 17, 12},
+    { {"000"_b, "ld2_asisdlso_d2_2d"},
+    },
+  },
+
+  { "_pnkxsr",
+    {22, 20},
+    { {"00"_b, "_hnsvjh"},
+      {"01"_b, "mrs_rs_systemmove"},
+      {"11"_b, "mrrs_rs_systemmovepr"},
     },
   },
 
@@ -3891,48 +4350,6 @@
     },
   },
 
-  { "_pnqxjg",
-    {4},
-    { {"0"_b, "ccmn_32_condcmp_reg"},
-    },
-  },
-
-  { "_pnxggm",
-    {4, 3, 2, 1, 0},
-    { {"00000"_b, "fcmp_d_floatcmp"},
-      {"01000"_b, "fcmp_dz_floatcmp"},
-      {"10000"_b, "fcmpe_d_floatcmp"},
-      {"11000"_b, "fcmpe_dz_floatcmp"},
-    },
-  },
-
-  { "_pnxgrg",
-    {30, 23, 22},
-    { {"000"_b, "madd_32a_dp_3src"},
-    },
-  },
-
-  { "_pnzphx",
-    {17},
-    { {"1"_b, "frecpe_z_z"},
-    },
-  },
-
-  { "_pphhym",
-    {30, 23, 22},
-    { {"00x"_b, "add_32_addsub_shift"},
-      {"010"_b, "add_32_addsub_shift"},
-      {"10x"_b, "sub_32_addsub_shift"},
-      {"110"_b, "sub_32_addsub_shift"},
-    },
-  },
-
-  { "_ppllxt",
-    {18, 17},
-    { {"00"_b, "ld1_asisdlse_r3_3v"},
-    },
-  },
-
   { "_ppnssm",
     {30, 13, 12},
     { {"000"_b, "_ktyppm"},
@@ -3953,21 +4370,18 @@
     },
   },
 
-  { "_ppqkym",
-    {30, 23, 22, 11, 10},
-    { {"10001"_b, "stg_64spost_ldsttags"},
-      {"10010"_b, "stg_64soffset_ldsttags"},
-      {"10011"_b, "stg_64spre_ldsttags"},
-      {"10100"_b, "ldg_64loffset_ldsttags"},
-      {"10101"_b, "stzg_64spost_ldsttags"},
-      {"10110"_b, "stzg_64soffset_ldsttags"},
-      {"10111"_b, "stzg_64spre_ldsttags"},
-      {"11001"_b, "st2g_64spost_ldsttags"},
-      {"11010"_b, "st2g_64soffset_ldsttags"},
-      {"11011"_b, "st2g_64spre_ldsttags"},
-      {"11101"_b, "stz2g_64spost_ldsttags"},
-      {"11110"_b, "stz2g_64soffset_ldsttags"},
-      {"11111"_b, "stz2g_64spre_ldsttags"},
+  { "_ppvnly",
+    {18, 17},
+    { {"0x"_b, "ld2_asisdlsop_sx2_r2s"},
+      {"10"_b, "ld2_asisdlsop_sx2_r2s"},
+      {"11"_b, "ld2_asisdlsop_s2_i2s"},
+    },
+  },
+
+  { "_ppyynh",
+    {23, 22},
+    { {"00"_b, "fmla_asisdelem_rh_h"},
+      {"1x"_b, "fmla_asisdelem_r_sd"},
     },
   },
 
@@ -3978,45 +4392,35 @@
     },
   },
 
-  { "_pqpzkt",
-    {11, 10, 9, 8, 7, 6},
-    { {"000000"_b, "nop_hi_hints"},
-      {"000001"_b, "wfe_hi_hints"},
-      {"000010"_b, "sev_hi_hints"},
-      {"000011"_b, "dgh_hi_hints"},
-      {"000100"_b, "pacia1716_hi_hints"},
-      {"000101"_b, "pacib1716_hi_hints"},
-      {"000110"_b, "autia1716_hi_hints"},
-      {"000111"_b, "autib1716_hi_hints"},
-      {"001000"_b, "esb_hi_hints"},
-      {"001001"_b, "tsb_hc_hints"},
-      {"001010"_b, "csdb_hi_hints"},
-      {"001100"_b, "paciaz_hi_hints"},
-      {"001101"_b, "pacibz_hi_hints"},
-      {"001110"_b, "autiaz_hi_hints"},
-      {"001111"_b, "autibz_hi_hints"},
-      {"0100xx"_b, "bti_hb_hints"},
-      {"0x1011"_b, "hint_hm_hints"},
-      {"10x0xx"_b, "hint_hm_hints"},
-      {"10x1xx"_b, "hint_hm_hints"},
-      {"1100xx"_b, "hint_hm_hints"},
-      {"111011"_b, "hint_hm_hints"},
-      {"x1100x"_b, "hint_hm_hints"},
-      {"x11010"_b, "hint_hm_hints"},
-      {"x1x1xx"_b, "hint_hm_hints"},
+  { "_pqmqrg",
+    {30, 23, 22},
+    { {"000"_b, "stp_s_ldstpair_off"},
+      {"001"_b, "ldp_s_ldstpair_off"},
+      {"010"_b, "stp_s_ldstpair_pre"},
+      {"011"_b, "ldp_s_ldstpair_pre"},
+      {"100"_b, "stp_d_ldstpair_off"},
+      {"101"_b, "ldp_d_ldstpair_off"},
+      {"110"_b, "stp_d_ldstpair_pre"},
+      {"111"_b, "ldp_d_ldstpair_pre"},
     },
   },
 
-  { "_pqtjgx",
-    {23, 22, 13, 12, 11, 10},
-    { {"01x1x0"_b, "fcmla_asimdelem_c_h"},
-      {"0x0001"_b, "sri_asimdshf_r"},
-      {"0x0101"_b, "sli_asimdshf_r"},
-      {"0x1001"_b, "sqshlu_asimdshf_r"},
-      {"0x1101"_b, "uqshl_asimdshf_r"},
-      {"10x1x0"_b, "fcmla_asimdelem_c_s"},
-      {"xx00x0"_b, "mls_asimdelem_r"},
-      {"xx10x0"_b, "umlsl_asimdelem_l"},
+  { "_pqsvty",
+    {13},
+    { {"0"_b, "_qqslmv"},
+      {"1"_b, "_gjxsrn"},
+    },
+  },
+
+  { "_prgrzz",
+    {30},
+    { {"0"_b, "cbnz_32_compbranch"},
+    },
+  },
+
+  { "_prjzxs",
+    {12},
+    { {"0"_b, "ld2_asisdlsop_dx2_r2d"},
     },
   },
 
@@ -4027,27 +4431,55 @@
     },
   },
 
-  { "_pslllp",
-    {30, 23, 22, 20, 19, 12, 11},
-    { {"0000000"_b, "movi_asimdimm_d_ds"},
-      {"1000000"_b, "movi_asimdimm_d2_d"},
-      {"1000010"_b, "fmov_asimdimm_d2_d"},
-      {"x00x100"_b, "ucvtf_asimdshf_c"},
-      {"x00x111"_b, "fcvtzu_asimdshf_c"},
-      {"x010x00"_b, "ucvtf_asimdshf_c"},
-      {"x010x11"_b, "fcvtzu_asimdshf_c"},
-      {"x011100"_b, "ucvtf_asimdshf_c"},
-      {"x011111"_b, "fcvtzu_asimdshf_c"},
-      {"x0x1000"_b, "ucvtf_asimdshf_c"},
-      {"x0x1011"_b, "fcvtzu_asimdshf_c"},
+  { "_prmjlz",
+    {18, 17},
+    { {"00"_b, "st1_asisdlse_r1_1v"},
     },
   },
 
-  { "_psqpkp",
-    {17, 4},
+  { "_prrkzv",
+    {20, 19, 18, 17, 16, 13},
+    { {"000000"_b, "fmov_h_floatdp1"},
+      {"000010"_b, "fneg_h_floatdp1"},
+      {"000100"_b, "fcvt_sh_floatdp1"},
+      {"001000"_b, "frintn_h_floatdp1"},
+      {"001010"_b, "frintm_h_floatdp1"},
+      {"001100"_b, "frinta_h_floatdp1"},
+      {"001110"_b, "frintx_h_floatdp1"},
+    },
+  },
+
+  { "_prtvjm",
+    {23, 22, 12, 11, 10},
+    { {"10000"_b, "fadd_z_zz"},
+      {"10001"_b, "fsub_z_zz"},
+      {"10010"_b, "fmul_z_zz"},
+      {"x1000"_b, "fadd_z_zz"},
+      {"x1001"_b, "fsub_z_zz"},
+      {"x1010"_b, "fmul_z_zz"},
+      {"xx011"_b, "ftsmul_z_zz"},
+      {"xx110"_b, "frecps_z_zz"},
+      {"xx111"_b, "frsqrts_z_zz"},
+    },
+  },
+
+  { "_prxyhr",
+    {9, 8, 7, 6, 5},
+    { {"11111"_b, "autiza_64z_dp_1src"},
+    },
+  },
+
+  { "_prytjs",
+    {18, 4},
     { {"00"_b, "fcmge_p_p_z0"},
       {"01"_b, "fcmgt_p_p_z0"},
-      {"10"_b, "fcmeq_p_p_z0"},
+    },
+  },
+
+  { "_pstgvl",
+    {23},
+    { {"0"_b, "fmaxnm_asimdsame_only"},
+      {"1"_b, "fminnm_asimdsame_only"},
     },
   },
 
@@ -4057,9 +4489,17 @@
     },
   },
 
-  { "_ptkrvg",
-    {12},
-    { {"0"_b, "ld2_asisdlsop_dx2_r2d"},
+  { "_ptkgrz",
+    {22},
+    { {"0"_b, "ldrsw_64_ldst_regoff"},
+    },
+  },
+
+  { "_ptqtmp",
+    {13, 12, 11, 10},
+    { {"0111"_b, "fmulx_asisdsamefp16_only"},
+      {"1001"_b, "fcmeq_asisdsamefp16_only"},
+      {"1111"_b, "frecps_asisdsamefp16_only"},
     },
   },
 
@@ -4150,95 +4590,37 @@
     },
   },
 
-  { "_pvkmmv",
-    {30, 23, 22, 13, 12, 11, 10},
-    { {"0000000"_b, "ldsmax_32_memop"},
-      {"0000100"_b, "ldsmin_32_memop"},
-      {"0001000"_b, "ldumax_32_memop"},
-      {"0001100"_b, "ldumin_32_memop"},
-      {"000xx10"_b, "str_32_ldst_regoff"},
-      {"0010000"_b, "ldsmaxl_32_memop"},
-      {"0010100"_b, "ldsminl_32_memop"},
-      {"0011000"_b, "ldumaxl_32_memop"},
-      {"0011100"_b, "lduminl_32_memop"},
-      {"001xx10"_b, "ldr_32_ldst_regoff"},
-      {"0100000"_b, "ldsmaxa_32_memop"},
-      {"0100100"_b, "ldsmina_32_memop"},
-      {"0101000"_b, "ldumaxa_32_memop"},
-      {"0101100"_b, "ldumina_32_memop"},
-      {"010xx10"_b, "ldrsw_64_ldst_regoff"},
-      {"0110000"_b, "ldsmaxal_32_memop"},
-      {"0110100"_b, "ldsminal_32_memop"},
-      {"0111000"_b, "ldumaxal_32_memop"},
-      {"0111100"_b, "lduminal_32_memop"},
-      {"1000000"_b, "ldsmax_64_memop"},
-      {"1000100"_b, "ldsmin_64_memop"},
-      {"1001000"_b, "ldumax_64_memop"},
-      {"1001100"_b, "ldumin_64_memop"},
-      {"100xx10"_b, "str_64_ldst_regoff"},
-      {"1010000"_b, "ldsmaxl_64_memop"},
-      {"1010100"_b, "ldsminl_64_memop"},
-      {"1011000"_b, "ldumaxl_64_memop"},
-      {"1011100"_b, "lduminl_64_memop"},
-      {"101xx10"_b, "ldr_64_ldst_regoff"},
-      {"10xxx01"_b, "ldraa_64_ldst_pac"},
-      {"10xxx11"_b, "ldraa_64w_ldst_pac"},
-      {"1100000"_b, "ldsmaxa_64_memop"},
-      {"1100100"_b, "ldsmina_64_memop"},
-      {"1101000"_b, "ldumaxa_64_memop"},
-      {"1101100"_b, "ldumina_64_memop"},
-      {"110xx10"_b, "prfm_p_ldst_regoff"},
-      {"1110000"_b, "ldsmaxal_64_memop"},
-      {"1110100"_b, "ldsminal_64_memop"},
-      {"1111000"_b, "ldumaxal_64_memop"},
-      {"1111100"_b, "lduminal_64_memop"},
-      {"11xxx01"_b, "ldrab_64_ldst_pac"},
-      {"11xxx11"_b, "ldrab_64w_ldst_pac"},
+  { "_ptstkz",
+    {4},
+    { {"0"_b, "ccmp_32_condcmp_imm"},
     },
   },
 
-  { "_pvrylp",
-    {13, 12},
-    { {"00"_b, "sbc_64_addsub_carry"},
+  { "_ptyynt",
+    {13, 12, 11, 10},
+    { {"1111"_b, "_stmtkr"},
     },
   },
 
-  { "_pxgztg",
-    {23, 22, 20, 19, 13, 11},
-    { {"0000x0"_b, "bic_asimdimm_l_sl"},
-      {"00x100"_b, "sli_asimdshf_r"},
-      {"00x110"_b, "uqshl_asimdshf_r"},
-      {"010x00"_b, "sli_asimdshf_r"},
-      {"010x10"_b, "uqshl_asimdshf_r"},
-      {"011100"_b, "sli_asimdshf_r"},
-      {"011110"_b, "uqshl_asimdshf_r"},
-      {"0x1000"_b, "sli_asimdshf_r"},
-      {"0x1010"_b, "uqshl_asimdshf_r"},
+  { "_pvtyjz",
+    {30},
+    { {"0"_b, "ldapur_32_ldapstl_unscaled"},
+      {"1"_b, "ldapur_64_ldapstl_unscaled"},
     },
   },
 
-  { "_pxkqxn",
-    {20, 19, 18, 17, 16},
-    { {"00000"_b, "cmle_asisdmisc_z"},
+  { "_pxnyvl",
+    {23, 13, 12},
+    { {"001"_b, "fmulx_asisdsame_only"},
+      {"011"_b, "frecps_asisdsame_only"},
+      {"111"_b, "frsqrts_asisdsame_only"},
     },
   },
 
-  { "_pxlnhs",
-    {23, 20, 19, 18, 17, 16},
-    { {"000001"_b, "fcvtxn_asimdmisc_n"},
-      {"x00000"_b, "uadalp_asimdmisc_p"},
-    },
-  },
-
-  { "_pxnnrz",
-    {20, 19, 18, 17, 16, 13, 12, 3, 2, 1, 0},
-    { {"00000001101"_b, "setf16_only_setf"},
-    },
-  },
-
-  { "_pxtsvn",
-    {20, 19, 18, 17, 16},
-    { {"10000"_b, "fminp_asisdpair_only_sd"},
+  { "_pxvjkp",
+    {30},
+    { {"0"_b, "bl_only_branch_imm"},
+      {"1"_b, "_rmkpsk"},
     },
   },
 
@@ -4251,9 +4633,18 @@
     },
   },
 
-  { "_pxzkjy",
-    {30},
-    { {"1"_b, "_yplktv"},
+  { "_pxzvjl",
+    {30, 23, 22, 13, 12, 11, 10},
+    { {"000xxxx"_b, "fnmadd_s_floatdp3"},
+      {"001xxxx"_b, "fnmadd_d_floatdp3"},
+      {"011xxxx"_b, "fnmadd_h_floatdp3"},
+      {"10001x0"_b, "fmla_asisdelem_rh_h"},
+      {"10x0001"_b, "sshr_asisdshf_r"},
+      {"10x0101"_b, "ssra_asisdshf_r"},
+      {"10x1001"_b, "srshr_asisdshf_r"},
+      {"10x1101"_b, "srsra_asisdshf_r"},
+      {"11x01x0"_b, "fmla_asisdelem_r_sd"},
+      {"1xx11x0"_b, "sqdmlal_asisdelem_l"},
     },
   },
 
@@ -4263,28 +4654,137 @@
     },
   },
 
-  { "_qghmks",
-    {13, 12},
-    { {"00"_b, "subp_64s_dp_2src"},
-      {"01"_b, "irg_64i_dp_2src"},
-      {"10"_b, "lslv_64_dp_2src"},
-      {"11"_b, "pacga_64p_dp_2src"},
+  { "_pyhrrt",
+    {30, 23, 22, 13, 12, 11, 10},
+    { {"10001x0"_b, "fmulx_asisdelem_rh_h"},
+      {"10x0001"_b, "sqshrun_asisdshf_n"},
+      {"10x0011"_b, "sqrshrun_asisdshf_n"},
+      {"10x0101"_b, "uqshrn_asisdshf_n"},
+      {"10x0111"_b, "uqrshrn_asisdshf_n"},
+      {"11x01x0"_b, "fmulx_asisdelem_r_sd"},
     },
   },
 
-  { "_qgmngg",
-    {30, 23},
-    { {"00"_b, "orr_64_log_imm"},
-      {"10"_b, "ands_64s_log_imm"},
-      {"11"_b, "movk_64_movewide"},
+  { "_pyjnpz",
+    {30, 13},
+    { {"00"_b, "_xpqglq"},
+      {"10"_b, "_ryrkqt"},
+      {"11"_b, "_zjzmvh"},
     },
   },
 
-  { "_qgryzh",
-    {18, 17},
-    { {"0x"_b, "st1_asisdlsep_r3_r3"},
-      {"10"_b, "st1_asisdlsep_r3_r3"},
-      {"11"_b, "st1_asisdlsep_i3_i3"},
+  { "_pyjtyn",
+    {22, 20, 19, 18, 17, 16},
+    { {"111001"_b, "fcvtau_asisdmiscfp16_r"},
+      {"x00001"_b, "fcvtau_asisdmisc_r"},
+      {"x10000"_b, "fmaxnmp_asisdpair_only_sd"},
+    },
+  },
+
+  { "_pyttkp",
+    {30, 13, 12, 11, 10},
+    { {"10001"_b, "sqrdmlah_asisdsame2_only"},
+      {"10011"_b, "sqrdmlsh_asisdsame2_only"},
+    },
+  },
+
+  { "_pyvvqx",
+    {10},
+    { {"0"_b, "_rkrntt"},
+    },
+  },
+
+  { "_pzpxxv",
+    {23, 22, 20, 19, 11},
+    { {"00011"_b, "fcvtzs_asisdshf_c"},
+      {"001x1"_b, "fcvtzs_asisdshf_c"},
+      {"01xx1"_b, "fcvtzs_asisdshf_c"},
+    },
+  },
+
+  { "_pzttrn",
+    {20, 19, 18, 17, 16, 13},
+    { {"000000"_b, "fmov_d_floatdp1"},
+      {"000010"_b, "fneg_d_floatdp1"},
+      {"000100"_b, "fcvt_sd_floatdp1"},
+      {"000110"_b, "bfcvt_bs_floatdp1"},
+      {"001000"_b, "frintn_d_floatdp1"},
+      {"001010"_b, "frintm_d_floatdp1"},
+      {"001100"_b, "frinta_d_floatdp1"},
+      {"001110"_b, "frintx_d_floatdp1"},
+      {"010000"_b, "frint32z_d_floatdp1"},
+      {"010010"_b, "frint64z_d_floatdp1"},
+    },
+  },
+
+  { "_pzzgts",
+    {30, 23, 22, 13, 12, 11, 10},
+    { {"000xx10"_b, "stlur_s_ldapstl_simd"},
+      {"001xx10"_b, "ldapur_s_ldapstl_simd"},
+      {"100xx10"_b, "stlur_d_ldapstl_simd"},
+      {"101xx10"_b, "ldapur_d_ldapstl_simd"},
+      {"x000001"_b, "cpyp_cpy_memcms"},
+      {"x000101"_b, "cpypwt_cpy_memcms"},
+      {"x001001"_b, "cpyprt_cpy_memcms"},
+      {"x001101"_b, "cpypt_cpy_memcms"},
+      {"x010001"_b, "cpym_cpy_memcms"},
+      {"x010101"_b, "cpymwt_cpy_memcms"},
+      {"x011001"_b, "cpymrt_cpy_memcms"},
+      {"x011101"_b, "cpymt_cpy_memcms"},
+      {"x100001"_b, "cpye_cpy_memcms"},
+      {"x100101"_b, "cpyewt_cpy_memcms"},
+      {"x101001"_b, "cpyert_cpy_memcms"},
+      {"x101101"_b, "cpyet_cpy_memcms"},
+      {"x110001"_b, "setgp_set_memcms"},
+      {"x110101"_b, "setgpt_set_memcms"},
+      {"x111001"_b, "setgpn_set_memcms"},
+      {"x111101"_b, "setgptn_set_memcms"},
+    },
+  },
+
+  { "_qgqgkx",
+    {30, 23, 22},
+    { {"000"_b, "adds_32s_addsub_ext"},
+      {"100"_b, "subs_32s_addsub_ext"},
+    },
+  },
+
+  { "_qgshrr",
+    {30, 22, 20, 19, 18, 17, 16},
+    { {"00xxxxx"_b, "stlxp_sp32_ldstexcl"},
+      {"0111111"_b, "ldaxp_lp32_ldstexcl"},
+      {"10xxxxx"_b, "stlxp_sp64_ldstexcl"},
+      {"1111111"_b, "ldaxp_lp64_ldstexcl"},
+    },
+  },
+
+  { "_qgsrqq",
+    {23, 22},
+    { {"00"_b, "fmadd_s_floatdp3"},
+      {"01"_b, "fmadd_d_floatdp3"},
+      {"11"_b, "fmadd_h_floatdp3"},
+    },
+  },
+
+  { "_qgvrqy",
+    {1},
+    { {"1"_b, "blraaz_64_branch_reg"},
+    },
+  },
+
+  { "_qgvtrn",
+    {23, 22, 20, 19, 13, 11, 10},
+    { {"00x1001"_b, "sqshrn_asisdshf_n"},
+      {"00x1011"_b, "sqrshrn_asisdshf_n"},
+      {"00xx0x0"_b, "fmul_asisdelem_rh_h"},
+      {"010x001"_b, "sqshrn_asisdshf_n"},
+      {"010x011"_b, "sqrshrn_asisdshf_n"},
+      {"0111001"_b, "sqshrn_asisdshf_n"},
+      {"0111011"_b, "sqrshrn_asisdshf_n"},
+      {"0x10001"_b, "sqshrn_asisdshf_n"},
+      {"0x10011"_b, "sqrshrn_asisdshf_n"},
+      {"1xxx0x0"_b, "fmul_asisdelem_r_sd"},
+      {"xxxx1x0"_b, "sqdmull_asisdelem_l"},
     },
   },
 
@@ -4294,88 +4794,96 @@
     },
   },
 
-  { "_qhgtvk",
-    {30, 23, 22},
-    { {"00x"_b, "adds_32_addsub_shift"},
-      {"010"_b, "adds_32_addsub_shift"},
-      {"10x"_b, "subs_32_addsub_shift"},
-      {"110"_b, "subs_32_addsub_shift"},
+  { "_qgyppr",
+    {23, 13, 12, 11, 10},
+    { {"00010"_b, "_pyjtyn"},
+      {"00110"_b, "_nhrkqm"},
+      {"01001"_b, "fcmge_asisdsame_only"},
+      {"01011"_b, "facge_asisdsame_only"},
+      {"01110"_b, "_kxmjsh"},
+      {"10010"_b, "_rpjgkh"},
+      {"10101"_b, "fabd_asisdsame_only"},
+      {"10110"_b, "_hmpzzg"},
+      {"11001"_b, "fcmgt_asisdsame_only"},
+      {"11011"_b, "facgt_asisdsame_only"},
+      {"11110"_b, "_sxsxxt"},
     },
   },
 
-  { "_qhsplz",
-    {23, 22, 20, 19, 18, 17, 16},
-    { {"0111001"_b, "frintn_asimdmiscfp16_r"},
-      {"0x00001"_b, "frintn_asimdmisc_r"},
-      {"1111001"_b, "frintp_asimdmiscfp16_r"},
-      {"1x00001"_b, "frintp_asimdmisc_r"},
-      {"xx00000"_b, "cmgt_asimdmisc_z"},
+  { "_qhpkhm",
+    {18, 17},
+    { {"00"_b, "ld1_asisdlse_r3_3v"},
     },
   },
 
-  { "_qhtqrj",
-    {30, 23, 22},
-    { {"000"_b, "stnp_s_ldstnapair_offs"},
-      {"001"_b, "ldnp_s_ldstnapair_offs"},
-      {"010"_b, "stp_s_ldstpair_post"},
-      {"011"_b, "ldp_s_ldstpair_post"},
-      {"100"_b, "stnp_d_ldstnapair_offs"},
-      {"101"_b, "ldnp_d_ldstnapair_offs"},
-      {"110"_b, "stp_d_ldstpair_post"},
-      {"111"_b, "ldp_d_ldstpair_post"},
-    },
-  },
-
-  { "_qhtrnn",
-    {30, 23, 22, 11, 10},
-    { {"00000"_b, "stur_32_ldst_unscaled"},
-      {"00001"_b, "str_32_ldst_immpost"},
-      {"00010"_b, "sttr_32_ldst_unpriv"},
-      {"00011"_b, "str_32_ldst_immpre"},
-      {"00100"_b, "ldur_32_ldst_unscaled"},
-      {"00101"_b, "ldr_32_ldst_immpost"},
-      {"00110"_b, "ldtr_32_ldst_unpriv"},
-      {"00111"_b, "ldr_32_ldst_immpre"},
-      {"01000"_b, "ldursw_64_ldst_unscaled"},
-      {"01001"_b, "ldrsw_64_ldst_immpost"},
-      {"01010"_b, "ldtrsw_64_ldst_unpriv"},
-      {"01011"_b, "ldrsw_64_ldst_immpre"},
-      {"10000"_b, "stur_64_ldst_unscaled"},
-      {"10001"_b, "str_64_ldst_immpost"},
-      {"10010"_b, "sttr_64_ldst_unpriv"},
-      {"10011"_b, "str_64_ldst_immpre"},
-      {"10100"_b, "ldur_64_ldst_unscaled"},
-      {"10101"_b, "ldr_64_ldst_immpost"},
-      {"10110"_b, "ldtr_64_ldst_unpriv"},
-      {"10111"_b, "ldr_64_ldst_immpre"},
-      {"11000"_b, "prfum_p_ldst_unscaled"},
-    },
-  },
-
-  { "_qhxzxl",
-    {17},
-    { {"0"_b, "ld1_asisdlse_r2_2v"},
-    },
-  },
-
-  { "_qjyvln",
-    {20, 19, 18, 17, 16, 13, 12, 9, 8, 7, 6, 5},
-    { {"000010011111"_b, "xpaci_64z_dp_1src"},
-    },
-  },
-
-  { "_qkyjhg",
+  { "_qhzvvh",
     {30},
-    { {"0"_b, "ldr_32_loadlit"},
-      {"1"_b, "ldr_64_loadlit"},
+    { {"0"_b, "bl_only_branch_imm"},
     },
   },
 
-  { "_qkzlkj",
-    {23, 22, 20, 19, 11},
-    { {"00010"_b, "sshr_asisdshf_r"},
-      {"001x0"_b, "sshr_asisdshf_r"},
-      {"01xx0"_b, "sshr_asisdshf_r"},
+  { "_qjqrgz",
+    {20, 19, 18, 17, 16},
+    { {"11111"_b, "ldarh_lr32_ldstexcl"},
+    },
+  },
+
+  { "_qjrllr",
+    {23, 22, 12},
+    { {"000"_b, "_pqsvty"},
+      {"001"_b, "_rjrqxt"},
+      {"010"_b, "_rnsmjq"},
+      {"011"_b, "_msvhjv"},
+      {"110"_b, "_rnlxtv"},
+      {"111"_b, "_jjgpxz"},
+    },
+  },
+
+  { "_qjstll",
+    {18, 17},
+    { {"0x"_b, "ld3_asisdlsop_sx3_r3s"},
+      {"10"_b, "ld3_asisdlsop_sx3_r3s"},
+      {"11"_b, "ld3_asisdlsop_s3_i3s"},
+    },
+  },
+
+  { "_qkhrkh",
+    {20, 19, 18, 17, 16},
+    { {"00000"_b, "cmle_asisdmisc_z"},
+    },
+  },
+
+  { "_qkrnms",
+    {30},
+    { {"0"_b, "orr_32_log_shift"},
+      {"1"_b, "ands_32_log_shift"},
+    },
+  },
+
+  { "_qkxmvp",
+    {13, 12},
+    { {"10"_b, "smin_64_dp_2src"},
+    },
+  },
+
+  { "_qkzjxm",
+    {30, 23, 22, 20, 13},
+    { {"00001"_b, "ldnt1w_z_p_bi_contiguous"},
+      {"000x0"_b, "ldnt1w_z_p_br_contiguous"},
+      {"00101"_b, "ld3w_z_p_bi_contiguous"},
+      {"001x0"_b, "ld3w_z_p_br_contiguous"},
+      {"01001"_b, "ldnt1d_z_p_bi_contiguous"},
+      {"010x0"_b, "ldnt1d_z_p_br_contiguous"},
+      {"01101"_b, "ld3d_z_p_bi_contiguous"},
+      {"011x0"_b, "ld3d_z_p_br_contiguous"},
+      {"10011"_b, "stnt1w_z_p_bi_contiguous"},
+      {"100x0"_b, "st1w_z_p_bz_d_x32_unscaled"},
+      {"10101"_b, "st1w_z_p_bi"},
+      {"10111"_b, "st3w_z_p_bi_contiguous"},
+      {"101x0"_b, "st1w_z_p_bz_s_x32_unscaled"},
+      {"11011"_b, "stnt1d_z_p_bi_contiguous"},
+      {"110x0"_b, "st1d_z_p_bz_d_x32_unscaled"},
+      {"11111"_b, "st3d_z_p_bi_contiguous"},
     },
   },
 
@@ -4386,10 +4894,17 @@
     },
   },
 
-  { "_qlqhzg",
-    {20},
-    { {"0"_b, "_hzmlps"},
-      {"1"_b, "msr_sr_systemmove"},
+  { "_qlmqyx",
+    {18, 17, 12},
+    { {"0x0"_b, "ld3_asisdlsop_dx3_r3d"},
+      {"100"_b, "ld3_asisdlsop_dx3_r3d"},
+      {"110"_b, "ld3_asisdlsop_d3_i3d"},
+    },
+  },
+
+  { "_qlpnnn",
+    {23, 10, 4},
+    { {"000"_b, "_vryrnh"},
     },
   },
 
@@ -4400,37 +4915,30 @@
     },
   },
 
-  { "_qmgtyq",
-    {17},
-    { {"0"_b, "ld2_asisdlse_r2"},
+  { "_qlxlxk",
+    {20, 19, 18, 17, 16},
+    { {"11111"_b, "ldar_lr32_ldstexcl"},
     },
   },
 
-  { "_qmjqhq",
-    {9, 8, 7, 6, 5},
-    { {"00000"_b, "fmov_h_floatimm"},
-    },
-  },
-
-  { "_qmqmpj",
-    {12, 10},
-    { {"00"_b, "_nxqygl"},
-      {"01"_b, "_skglrt"},
-      {"10"_b, "_sjlpxn"},
-      {"11"_b, "_qzxvsk"},
-    },
-  },
-
-  { "_qmrgkn",
-    {30},
-    { {"0"_b, "bl_only_branch_imm"},
-      {"1"_b, "_hsvgnt"},
-    },
-  },
-
-  { "_qmzqsy",
-    {20, 19, 18, 17},
-    { {"0000"_b, "_nykvly"},
+  { "_qlzvpg",
+    {13, 12, 11, 10},
+    { {"0000"_b, "raddhn_asimddiff_n"},
+      {"0001"_b, "ushl_asimdsame_only"},
+      {"0010"_b, "_kpnlmr"},
+      {"0011"_b, "uqshl_asimdsame_only"},
+      {"0100"_b, "uabal_asimddiff_l"},
+      {"0101"_b, "urshl_asimdsame_only"},
+      {"0110"_b, "_ssqyrk"},
+      {"0111"_b, "uqrshl_asimdsame_only"},
+      {"1000"_b, "rsubhn_asimddiff_n"},
+      {"1001"_b, "umax_asimdsame_only"},
+      {"1010"_b, "_sjlqvg"},
+      {"1011"_b, "umin_asimdsame_only"},
+      {"1100"_b, "uabdl_asimddiff_l"},
+      {"1101"_b, "uabd_asimdsame_only"},
+      {"1110"_b, "_gplkxy"},
+      {"1111"_b, "uaba_asimdsame_only"},
     },
   },
 
@@ -4441,61 +4949,38 @@
     },
   },
 
-  { "_qnsxkj",
-    {20, 19, 18, 17, 16, 13},
-    { {"000000"_b, "fabs_d_floatdp1"},
-      {"000010"_b, "fsqrt_d_floatdp1"},
-      {"000110"_b, "fcvt_hd_floatdp1"},
-      {"001000"_b, "frintp_d_floatdp1"},
-      {"001010"_b, "frintz_d_floatdp1"},
-      {"001110"_b, "frinti_d_floatdp1"},
-      {"010000"_b, "frint32x_d_floatdp1"},
-      {"010010"_b, "frint64x_d_floatdp1"},
+  { "_qntrvk",
+    {30, 23, 22, 13, 12, 11, 10},
+    { {"000xxxx"_b, "fnmsub_s_floatdp3"},
+      {"001xxxx"_b, "fnmsub_d_floatdp3"},
+      {"011xxxx"_b, "fnmsub_h_floatdp3"},
+      {"10001x0"_b, "fmul_asisdelem_rh_h"},
+      {"10x0101"_b, "sqshrn_asisdshf_n"},
+      {"10x0111"_b, "sqrshrn_asisdshf_n"},
+      {"11x01x0"_b, "fmul_asisdelem_r_sd"},
+      {"1xx11x0"_b, "sqdmull_asisdelem_l"},
     },
   },
 
-  { "_qntssm",
-    {30, 11, 10},
-    { {"000"_b, "_hxrtsq"},
-      {"001"_b, "_ygxhyg"},
-      {"010"_b, "_nhhpqz"},
-      {"011"_b, "_vjymzn"},
-      {"101"_b, "_gszxkp"},
-      {"110"_b, "_nssrnm"},
-      {"111"_b, "_jrsptt"},
+  { "_qnysqv",
+    {30},
+    { {"0"_b, "cbnz_64_compbranch"},
     },
   },
 
-  { "_qntygx",
-    {13, 12, 11, 10},
-    { {"0000"_b, "uaddl_asimddiff_l"},
-      {"0001"_b, "uhadd_asimdsame_only"},
-      {"0010"_b, "_nzqkky"},
-      {"0011"_b, "uqadd_asimdsame_only"},
-      {"0100"_b, "uaddw_asimddiff_w"},
-      {"0101"_b, "urhadd_asimdsame_only"},
-      {"0111"_b, "_nthvqx"},
-      {"1000"_b, "usubl_asimddiff_l"},
-      {"1001"_b, "uhsub_asimdsame_only"},
-      {"1010"_b, "_srmhlk"},
-      {"1011"_b, "uqsub_asimdsame_only"},
-      {"1100"_b, "usubw_asimddiff_w"},
-      {"1101"_b, "cmhi_asimdsame_only"},
-      {"1110"_b, "_mvgsjr"},
-      {"1111"_b, "cmhs_asimdsame_only"},
+  { "_qpgxxr",
+    {23, 22},
+    { {"01"_b, "fadd_asimdsamefp16_only"},
+      {"11"_b, "fsub_asimdsamefp16_only"},
     },
   },
 
-  { "_qnvgmh",
-    {23},
-    { {"0"_b, "fmul_asimdsame_only"},
-    },
-  },
-
-  { "_qptvrm",
-    {23},
-    { {"0"_b, "fmaxnmp_asimdsame_only"},
-      {"1"_b, "fminnmp_asimdsame_only"},
+  { "_qpsryx",
+    {30, 23, 22, 11, 10},
+    { {"01000"_b, "csel_64_condsel"},
+      {"01001"_b, "csinc_64_condsel"},
+      {"11000"_b, "csinv_64_condsel"},
+      {"11001"_b, "csneg_64_condsel"},
     },
   },
 
@@ -4520,36 +5005,40 @@
     },
   },
 
-  { "_qpzynz",
-    {23, 22},
-    { {"00"_b, "_jkpsxk"},
+  { "_qpyxsv",
+    {18},
+    { {"0"_b, "ld4_asisdlso_h4_4h"},
     },
   },
 
-  { "_qqpkkm",
-    {9, 8, 7, 6, 5, 1, 0},
-    { {"1111111"_b, "eretaa_64e_branch_reg"},
+  { "_qqjtpm",
+    {9, 8, 7, 6, 5},
+    { {"11111"_b, "autdza_64z_dp_1src"},
     },
   },
 
-  { "_qqpqnm",
-    {18, 17},
-    { {"0x"_b, "st1_asisdlsop_sx1_r1s"},
-      {"10"_b, "st1_asisdlsop_sx1_r1s"},
-      {"11"_b, "st1_asisdlsop_s1_i1s"},
+  { "_qqslmv",
+    {20, 19, 18, 17, 16},
+    { {"00000"_b, "fcvtns_32s_float2int"},
+      {"00001"_b, "fcvtnu_32s_float2int"},
+      {"00010"_b, "scvtf_s32_float2int"},
+      {"00011"_b, "ucvtf_s32_float2int"},
+      {"00100"_b, "fcvtas_32s_float2int"},
+      {"00101"_b, "fcvtau_32s_float2int"},
+      {"00110"_b, "fmov_32s_float2int"},
+      {"00111"_b, "fmov_s32_float2int"},
+      {"01000"_b, "fcvtps_32s_float2int"},
+      {"01001"_b, "fcvtpu_32s_float2int"},
+      {"10000"_b, "fcvtms_32s_float2int"},
+      {"10001"_b, "fcvtmu_32s_float2int"},
+      {"11000"_b, "fcvtzs_32s_float2int"},
+      {"11001"_b, "fcvtzu_32s_float2int"},
     },
   },
 
-  { "_qqsmlt",
-    {4},
-    { {"0"_b, "ccmp_32_condcmp_imm"},
-    },
-  },
-
-  { "_qqtpln",
-    {17},
-    { {"0"_b, "ld1_asisdlsop_bx1_r1b"},
-      {"1"_b, "ld1_asisdlsop_b1_i1b"},
+  { "_qqvgql",
+    {4, 3, 2, 1, 0},
+    { {"11111"_b, "_gtsglj"},
     },
   },
 
@@ -4567,15 +5056,66 @@
     },
   },
 
-  { "_qqzrhz",
-    {23, 22, 20, 19, 18, 17, 16},
-    { {"0111001"_b, "fcvtau_asimdmiscfp16_r"},
-      {"0x00001"_b, "fcvtau_asimdmisc_r"},
-      {"0x10000"_b, "fmaxnmv_asimdall_only_sd"},
-      {"1111000"_b, "fcmge_asimdmiscfp16_fz"},
-      {"1x00000"_b, "fcmge_asimdmisc_fz"},
-      {"1x00001"_b, "ursqrte_asimdmisc_r"},
-      {"1x10000"_b, "fminnmv_asimdall_only_sd"},
+  { "_qrsxzp",
+    {23, 22, 20, 19, 16, 13, 10},
+    { {"0000000"_b, "_tjnzjl"},
+      {"0000001"_b, "_nklyky"},
+      {"0000010"_b, "_pjskhr"},
+      {"0000011"_b, "_kqstrr"},
+      {"0000101"_b, "_nvzsxn"},
+      {"0100000"_b, "_jnktqs"},
+      {"0100001"_b, "_ttzlqn"},
+      {"0100010"_b, "_sxgnmg"},
+      {"0100011"_b, "_yqzxvr"},
+      {"0100101"_b, "_tvtvkt"},
+      {"100xx00"_b, "st1_asisdlsop_sx1_r1s"},
+      {"100xx01"_b, "_mnzgkx"},
+      {"100xx10"_b, "st3_asisdlsop_sx3_r3s"},
+      {"100xx11"_b, "_tjxyky"},
+      {"1010x00"_b, "st1_asisdlsop_sx1_r1s"},
+      {"1010x01"_b, "_mphkpq"},
+      {"1010x10"_b, "st3_asisdlsop_sx3_r3s"},
+      {"1010x11"_b, "_hqkhsy"},
+      {"1011000"_b, "st1_asisdlsop_sx1_r1s"},
+      {"1011001"_b, "_qsszkx"},
+      {"1011010"_b, "st3_asisdlsop_sx3_r3s"},
+      {"1011011"_b, "_gsjvmx"},
+      {"1011100"_b, "_gqmjys"},
+      {"1011101"_b, "_qtqrmn"},
+      {"1011110"_b, "_mjrqhl"},
+      {"1011111"_b, "_ngtlpz"},
+      {"110xx00"_b, "ld1_asisdlsop_sx1_r1s"},
+      {"110xx01"_b, "_hkjjsr"},
+      {"110xx10"_b, "ld3_asisdlsop_sx3_r3s"},
+      {"110xx11"_b, "_yryygq"},
+      {"1110x00"_b, "ld1_asisdlsop_sx1_r1s"},
+      {"1110x01"_b, "_tptqjs"},
+      {"1110x10"_b, "ld3_asisdlsop_sx3_r3s"},
+      {"1110x11"_b, "_szmyzt"},
+      {"1111000"_b, "ld1_asisdlsop_sx1_r1s"},
+      {"1111001"_b, "_zxklzp"},
+      {"1111010"_b, "ld3_asisdlsop_sx3_r3s"},
+      {"1111011"_b, "_qzxgqh"},
+      {"1111100"_b, "_yzgthp"},
+      {"1111101"_b, "_rgnryt"},
+      {"1111110"_b, "_qjstll"},
+      {"1111111"_b, "_qlmqyx"},
+    },
+  },
+
+  { "_qrtjvn",
+    {30, 23, 22, 20, 19, 12, 11},
+    { {"0000000"_b, "movi_asimdimm_d_ds"},
+      {"1000000"_b, "movi_asimdimm_d2_d"},
+      {"1000010"_b, "fmov_asimdimm_d2_d"},
+      {"x00x100"_b, "ucvtf_asimdshf_c"},
+      {"x00x111"_b, "fcvtzu_asimdshf_c"},
+      {"x010x00"_b, "ucvtf_asimdshf_c"},
+      {"x010x11"_b, "fcvtzu_asimdshf_c"},
+      {"x011100"_b, "ucvtf_asimdshf_c"},
+      {"x011111"_b, "fcvtzu_asimdshf_c"},
+      {"x0x1000"_b, "ucvtf_asimdshf_c"},
+      {"x0x1011"_b, "fcvtzu_asimdshf_c"},
     },
   },
 
@@ -4600,107 +5140,52 @@
     },
   },
 
-  { "_qrykhm",
+  { "_qsszkx",
     {12},
-    { {"0"_b, "st4_asisdlsop_dx4_r4d"},
+    { {"0"_b, "st1_asisdlsop_dx1_r1d"},
     },
   },
 
-  { "_qsnqpz",
-    {18, 17},
-    { {"0x"_b, "ld4_asisdlsop_sx4_r4s"},
-      {"10"_b, "ld4_asisdlsop_sx4_r4s"},
-      {"11"_b, "ld4_asisdlsop_s4_i4s"},
+  { "_qtghgs",
+    {22},
+    { {"0"_b, "ldrsw_64_ldst_regoff"},
     },
   },
 
-  { "_qsqqxg",
-    {30, 23, 22, 13, 12, 11, 10},
-    { {"1010000"_b, "sha512h_qqv_cryptosha512_3"},
-      {"1010001"_b, "sha512h2_qqv_cryptosha512_3"},
-      {"1010010"_b, "sha512su1_vvv2_cryptosha512_3"},
-      {"1010011"_b, "rax1_vvv2_cryptosha512_3"},
+  { "_qtgrzv",
+    {20, 18, 17},
+    { {"000"_b, "_gznrjv"},
     },
   },
 
-  { "_qsrlql",
-    {30, 23, 22, 13, 12, 11, 10},
-    { {"010xx00"_b, "csel_32_condsel"},
-      {"010xx01"_b, "csinc_32_condsel"},
-      {"0110000"_b, "crc32b_32c_dp_2src"},
-      {"0110001"_b, "crc32h_32c_dp_2src"},
-      {"0110010"_b, "crc32w_32c_dp_2src"},
-      {"0110100"_b, "crc32cb_32c_dp_2src"},
-      {"0110101"_b, "crc32ch_32c_dp_2src"},
-      {"0110110"_b, "crc32cw_32c_dp_2src"},
-      {"110xx00"_b, "csinv_32_condsel"},
-      {"110xx01"_b, "csneg_32_condsel"},
+  { "_qtgvlx",
+    {23, 22, 20, 19, 17, 16},
+    { {"000010"_b, "scvtf_s64_float2fix"},
+      {"000011"_b, "ucvtf_s64_float2fix"},
+      {"001100"_b, "fcvtzs_64s_float2fix"},
+      {"001101"_b, "fcvtzu_64s_float2fix"},
+      {"010010"_b, "scvtf_d64_float2fix"},
+      {"010011"_b, "ucvtf_d64_float2fix"},
+      {"011100"_b, "fcvtzs_64d_float2fix"},
+      {"011101"_b, "fcvtzu_64d_float2fix"},
+      {"110010"_b, "scvtf_h64_float2fix"},
+      {"110011"_b, "ucvtf_h64_float2fix"},
+      {"111100"_b, "fcvtzs_64h_float2fix"},
+      {"111101"_b, "fcvtzu_64h_float2fix"},
     },
   },
 
-  { "_qsrtzz",
-    {30},
-    { {"0"_b, "bl_only_branch_imm"},
-      {"1"_b, "_lvshqt"},
+  { "_qtqrmn",
+    {18, 17, 12},
+    { {"0x0"_b, "st1_asisdlsop_dx1_r1d"},
+      {"100"_b, "st1_asisdlsop_dx1_r1d"},
+      {"110"_b, "st1_asisdlsop_d1_i1d"},
     },
   },
 
-  { "_qssyls",
-    {20, 19, 18, 17, 16, 13, 12},
-    { {"0000000"_b, "stzgm_64bulk_ldsttags"},
-    },
-  },
-
-  { "_qsxpyq",
-    {20, 19, 18, 17, 16, 13, 12, 4, 3, 2, 1, 0},
-    { {"000000001101"_b, "setf8_only_setf"},
-    },
-  },
-
-  { "_qsygjs",
-    {30, 23, 22, 12, 11, 10},
-    { {"0000xx"_b, "add_32_addsub_ext"},
-      {"000100"_b, "add_32_addsub_ext"},
-      {"1000xx"_b, "sub_32_addsub_ext"},
-      {"100100"_b, "sub_32_addsub_ext"},
-    },
-  },
-
-  { "_qtgvhn",
-    {17},
-    { {"0"_b, "ld4_asisdlsop_bx4_r4b"},
-      {"1"_b, "ld4_asisdlsop_b4_i4b"},
-    },
-  },
-
-  { "_qtjzhs",
-    {17},
-    { {"0"_b, "ld1_asisdlse_r4_4v"},
-    },
-  },
-
-  { "_qtknlp",
-    {30, 11, 10},
-    { {"000"_b, "_skpjrp"},
-      {"001"_b, "_sjnqvx"},
-      {"011"_b, "_rgnxpp"},
-      {"100"_b, "_rtlzxv"},
-      {"101"_b, "_zvlxrl"},
-      {"110"_b, "_ynnrny"},
-      {"111"_b, "_nlkkyx"},
-    },
-  },
-
-  { "_qtkpxg",
-    {20},
-    { {"0"_b, "_srggzy"},
-      {"1"_b, "mrs_rs_systemmove"},
-    },
-  },
-
-  { "_qtmjkr",
-    {23},
-    { {"0"_b, "fdiv_asimdsame_only"},
+  { "_qtxlsr",
+    {13, 12, 11, 10},
+    { {"1111"_b, "cas_c64_ldstexcl"},
     },
   },
 
@@ -4711,20 +5196,24 @@
     },
   },
 
-  { "_qtxypt",
-    {9, 8, 7, 6, 5, 1, 0},
-    { {"1111111"_b, "retab_64e_branch_reg"},
+  { "_qvgtlh",
+    {30, 23, 22, 11},
+    { {"0001"_b, "strb_32b_ldst_regoff"},
+      {"0011"_b, "ldrb_32b_ldst_regoff"},
+      {"0100"_b, "_hjplhs"},
+      {"0101"_b, "ldrsb_64b_ldst_regoff"},
+      {"0111"_b, "ldrsb_32b_ldst_regoff"},
+      {"1001"_b, "strh_32_ldst_regoff"},
+      {"1011"_b, "ldrh_32_ldst_regoff"},
+      {"1100"_b, "_vrzksz"},
+      {"1101"_b, "ldrsh_64_ldst_regoff"},
+      {"1111"_b, "ldrsh_32_ldst_regoff"},
     },
   },
 
-  { "_qtystr",
-    {23, 22, 20, 19, 18, 17, 16},
-    { {"0111001"_b, "scvtf_asimdmiscfp16_r"},
-      {"0x00001"_b, "scvtf_asimdmisc_r"},
-      {"1111000"_b, "fcmeq_asimdmiscfp16_fz"},
-      {"1111001"_b, "frecpe_asimdmiscfp16_r"},
-      {"1x00000"_b, "fcmeq_asimdmisc_fz"},
-      {"1x00001"_b, "frecpe_asimdmisc_r"},
+  { "_qvjmmq",
+    {30},
+    { {"0"_b, "b_only_branch_imm"},
     },
   },
 
@@ -4735,68 +5224,22 @@
     },
   },
 
-  { "_qvlytr",
-    {23, 22, 20, 19, 18, 17, 16},
-    { {"0x00001"_b, "frint64x_asimdmisc_r"},
-      {"0x10000"_b, "fmaxv_asimdall_only_sd"},
-      {"1111000"_b, "fneg_asimdmiscfp16_r"},
-      {"1111001"_b, "fsqrt_asimdmiscfp16_r"},
-      {"1x00000"_b, "fneg_asimdmisc_r"},
-      {"1x00001"_b, "fsqrt_asimdmisc_r"},
-      {"1x10000"_b, "fminv_asimdall_only_sd"},
-    },
-  },
-
-  { "_qvsypn",
-    {30, 23, 22, 20, 13},
-    { {"00001"_b, "ldnt1w_z_p_bi_contiguous"},
-      {"000x0"_b, "ldnt1w_z_p_br_contiguous"},
-      {"00101"_b, "ld3w_z_p_bi_contiguous"},
-      {"001x0"_b, "ld3w_z_p_br_contiguous"},
-      {"01001"_b, "ldnt1d_z_p_bi_contiguous"},
-      {"010x0"_b, "ldnt1d_z_p_br_contiguous"},
-      {"01101"_b, "ld3d_z_p_bi_contiguous"},
-      {"011x0"_b, "ld3d_z_p_br_contiguous"},
-      {"10011"_b, "stnt1w_z_p_bi_contiguous"},
-      {"100x0"_b, "st1w_z_p_bz_d_x32_unscaled"},
-      {"10111"_b, "st3w_z_p_bi_contiguous"},
-      {"101x0"_b, "st1w_z_p_bz_s_x32_unscaled"},
-      {"10x01"_b, "st1w_z_p_bi"},
-      {"11011"_b, "stnt1d_z_p_bi_contiguous"},
-      {"110x0"_b, "st1d_z_p_bz_d_x32_unscaled"},
-      {"11111"_b, "st3d_z_p_bi_contiguous"},
-      {"11x01"_b, "st1d_z_p_bi"},
-    },
-  },
-
   { "_qvtxpr",
     {20, 9, 4},
     { {"000"_b, "uzp1_p_pp"},
     },
   },
 
-  { "_qxrzgv",
-    {17},
-    { {"0"_b, "ld1_asisdlsep_r2_r2"},
-      {"1"_b, "ld1_asisdlsep_i2_i2"},
-    },
-  },
-
-  { "_qxtvzy",
-    {13, 12, 11, 10},
-    { {"0000"_b, "umlal_asimddiff_l"},
-      {"0001"_b, "sub_asimdsame_only"},
-      {"0010"_b, "_gznnvh"},
-      {"0011"_b, "cmeq_asimdsame_only"},
-      {"0101"_b, "mls_asimdsame_only"},
-      {"0110"_b, "_vsqlkr"},
-      {"0111"_b, "pmul_asimdsame_only"},
-      {"1000"_b, "umlsl_asimddiff_l"},
-      {"1001"_b, "umaxp_asimdsame_only"},
-      {"1010"_b, "_gggyqx"},
-      {"1011"_b, "uminp_asimdsame_only"},
-      {"1101"_b, "sqrdmulh_asimdsame_only"},
-      {"1110"_b, "_slnkst"},
+  { "_qvzvmq",
+    {30, 23, 22},
+    { {"000"_b, "stlxrb_sr32_ldstexcl"},
+      {"001"_b, "_ynznxv"},
+      {"010"_b, "_lqlrxp"},
+      {"011"_b, "_grprpj"},
+      {"100"_b, "stlxrh_sr32_ldstexcl"},
+      {"101"_b, "_jgsryt"},
+      {"110"_b, "_qyrqxp"},
+      {"111"_b, "_qjqrgz"},
     },
   },
 
@@ -4806,88 +5249,76 @@
     },
   },
 
-  { "_qytrjj",
-    {30, 23, 22},
-    { {"100"_b, "bcax_vvv16_crypto4"},
-    },
-  },
-
-  { "_qzjnpr",
-    {30, 23, 22, 20, 19, 18, 17, 16},
-    { {"00000000"_b, "udf_only_perm_undef"},
-    },
-  },
-
-  { "_qzrjss",
-    {18, 17, 12},
-    { {"0x0"_b, "st3_asisdlsop_dx3_r3d"},
-      {"100"_b, "st3_asisdlsop_dx3_r3d"},
-      {"110"_b, "st3_asisdlsop_d3_i3d"},
-    },
-  },
-
-  { "_qzsthq",
-    {30, 23, 22},
-    { {"000"_b, "strb_32_ldst_pos"},
-      {"001"_b, "ldrb_32_ldst_pos"},
-      {"010"_b, "ldrsb_64_ldst_pos"},
-      {"011"_b, "ldrsb_32_ldst_pos"},
-      {"100"_b, "strh_32_ldst_pos"},
-      {"101"_b, "ldrh_32_ldst_pos"},
-      {"110"_b, "ldrsh_64_ldst_pos"},
-      {"111"_b, "ldrsh_32_ldst_pos"},
-    },
-  },
-
-  { "_qzxvsk",
-    {23, 22, 20, 19, 13, 11},
-    { {"0000x0"_b, "bic_asimdimm_l_sl"},
-      {"00x100"_b, "usra_asimdshf_r"},
-      {"00x110"_b, "ursra_asimdshf_r"},
-      {"010x00"_b, "usra_asimdshf_r"},
-      {"010x10"_b, "ursra_asimdshf_r"},
-      {"011100"_b, "usra_asimdshf_r"},
-      {"011110"_b, "ursra_asimdshf_r"},
-      {"0x1000"_b, "usra_asimdshf_r"},
-      {"0x1010"_b, "ursra_asimdshf_r"},
-    },
-  },
-
-  { "_qzzlhq",
-    {30, 23, 22},
-    { {"000"_b, "and_32_log_imm"},
-      {"010"_b, "movn_32_movewide"},
-      {"100"_b, "eor_32_log_imm"},
-      {"110"_b, "movz_32_movewide"},
-    },
-  },
-
-  { "_qzzlpv",
-    {13, 12},
-    { {"01"_b, "gmi_64g_dp_2src"},
-      {"10"_b, "lsrv_64_dp_2src"},
-    },
-  },
-
-  { "_rgjqzs",
-    {30, 23, 22},
-    { {"001"_b, "sbfm_64m_bitfield"},
-      {"101"_b, "ubfm_64m_bitfield"},
-    },
-  },
-
-  { "_rgnxpp",
-    {23, 22},
-    { {"00"_b, "fcsel_s_floatsel"},
-      {"01"_b, "fcsel_d_floatsel"},
-      {"11"_b, "fcsel_h_floatsel"},
-    },
-  },
-
-  { "_rgztzl",
+  { "_qyrqxp",
     {20, 19, 18, 17, 16},
-    { {"00000"_b, "saddlp_asimdmisc_p"},
-      {"00001"_b, "xtn_asimdmisc_n"},
+    { {"11111"_b, "stlrh_sl32_ldstexcl"},
+    },
+  },
+
+  { "_qyyrqq",
+    {22, 13, 12},
+    { {"000"_b, "swp_64_memop"},
+      {"001"_b, "_ymghnh"},
+      {"010"_b, "st64bv0_64_memop"},
+      {"011"_b, "st64bv_64_memop"},
+      {"100"_b, "swpl_64_memop"},
+    },
+  },
+
+  { "_qzlvkm",
+    {18},
+    { {"0"_b, "st3_asisdlsop_hx3_r3h"},
+      {"1"_b, "st3_asisdlsop_h3_i3h"},
+    },
+  },
+
+  { "_qzmrnj",
+    {23, 22},
+    { {"00"_b, "dup_asimdins_dr_r"},
+      {"01"_b, "fmla_asimdsamefp16_only"},
+      {"11"_b, "fmls_asimdsamefp16_only"},
+    },
+  },
+
+  { "_qzsyvx",
+    {30, 23, 22, 11, 10},
+    { {"00010"_b, "str_s_ldst_regoff"},
+      {"00110"_b, "ldr_s_ldst_regoff"},
+      {"10010"_b, "str_d_ldst_regoff"},
+      {"10110"_b, "ldr_d_ldst_regoff"},
+    },
+  },
+
+  { "_qzxgqh",
+    {12},
+    { {"0"_b, "ld3_asisdlsop_dx3_r3d"},
+    },
+  },
+
+  { "_rgnryt",
+    {18, 17, 12},
+    { {"0x0"_b, "ld1_asisdlsop_dx1_r1d"},
+      {"100"_b, "ld1_asisdlsop_dx1_r1d"},
+      {"110"_b, "ld1_asisdlsop_d1_i1d"},
+    },
+  },
+
+  { "_rgxthl",
+    {30, 23, 22},
+    { {"000"_b, "stxp_sp32_ldstexcl"},
+      {"001"_b, "_mjyhsl"},
+      {"010"_b, "_vrsjnp"},
+      {"011"_b, "_zyxnpz"},
+      {"100"_b, "stxp_sp64_ldstexcl"},
+      {"101"_b, "_snrzky"},
+      {"110"_b, "_qtxlsr"},
+      {"111"_b, "_pkpvmj"},
+    },
+  },
+
+  { "_rgztgm",
+    {20, 18, 17},
+    { {"000"_b, "_klrksl"},
     },
   },
 
@@ -4914,40 +5345,39 @@
     },
   },
 
-  { "_rhttgj",
-    {12, 10},
-    { {"00"_b, "_xxpzrl"},
-      {"01"_b, "_vlzrlm"},
-      {"10"_b, "_vxylhh"},
-      {"11"_b, "_pxgztg"},
+  { "_rjmhxr",
+    {30},
+    { {"0"_b, "adds_64_addsub_shift"},
+      {"1"_b, "subs_64_addsub_shift"},
     },
   },
 
-  { "_rhvksm",
-    {23, 22, 20, 19, 18, 17, 16},
-    { {"0111001"_b, "fcvtnu_asisdmiscfp16_r"},
-      {"0x00001"_b, "fcvtnu_asisdmisc_r"},
-      {"1111001"_b, "fcvtpu_asisdmiscfp16_r"},
-      {"1x00001"_b, "fcvtpu_asisdmisc_r"},
+  { "_rjrqxt",
+    {9, 8, 7, 6, 5},
+    { {"00000"_b, "fmov_s_floatimm"},
     },
   },
 
-  { "_rhzhyz",
-    {13, 12, 4},
-    { {"000"_b, "rmif_only_rmif"},
+  { "_rjspzr",
+    {13, 12},
+    { {"00"_b, "udiv_32_dp_2src"},
+      {"10"_b, "asrv_32_dp_2src"},
     },
   },
 
-  { "_rjmyyl",
-    {20, 19, 18, 17, 16, 13},
-    { {"000000"_b, "fmov_s_floatdp1"},
-      {"000010"_b, "fneg_s_floatdp1"},
-      {"001000"_b, "frintn_s_floatdp1"},
-      {"001010"_b, "frintm_s_floatdp1"},
-      {"001100"_b, "frinta_s_floatdp1"},
-      {"001110"_b, "frintx_s_floatdp1"},
-      {"010000"_b, "frint32z_s_floatdp1"},
-      {"010010"_b, "frint64z_s_floatdp1"},
+  { "_rjthsm",
+    {30, 23, 22},
+    { {"001"_b, "sbfm_64m_bitfield"},
+      {"101"_b, "ubfm_64m_bitfield"},
+    },
+  },
+
+  { "_rjvgkl",
+    {30, 23, 22, 19, 18, 17, 16},
+    { {"000xxxx"_b, "umov_asimdins_w_w"},
+      {"1001000"_b, "umov_asimdins_x_x"},
+      {"x01xxxx"_b, "frecps_asimdsamefp16_only"},
+      {"x11xxxx"_b, "frsqrts_asimdsamefp16_only"},
     },
   },
 
@@ -4971,45 +5401,132 @@
     },
   },
 
-  { "_rkqtvs",
-    {23, 22, 13},
-    { {"100"_b, "fmlal_asimdelem_lh"},
-      {"xx1"_b, "smlal_asimdelem_l"},
+  { "_rkjjtp",
+    {23, 22, 20, 19, 11},
+    { {"00010"_b, "scvtf_asisdshf_c"},
+      {"001x0"_b, "scvtf_asisdshf_c"},
+      {"01xx0"_b, "scvtf_asisdshf_c"},
     },
   },
 
-  { "_rkrltp",
-    {17},
-    { {"0"_b, "st3_asisdlso_b3_3b"},
+  { "_rknxlg",
+    {12},
+    { {"0"_b, "ld4_asisdlsop_dx4_r4d"},
     },
   },
 
-  { "_rksxpn",
-    {30, 23, 22, 11, 10},
-    { {"00010"_b, "str_b_ldst_regoff"},
-      {"00110"_b, "ldr_b_ldst_regoff"},
-      {"01010"_b, "str_q_ldst_regoff"},
-      {"01110"_b, "ldr_q_ldst_regoff"},
-      {"10010"_b, "str_h_ldst_regoff"},
-      {"10110"_b, "ldr_h_ldst_regoff"},
+  { "_rkpylh",
+    {20, 19, 18, 17, 16},
+    { {"00010"_b, "scvtf_d32_float2fix"},
+      {"00011"_b, "ucvtf_d32_float2fix"},
+      {"11000"_b, "fcvtzs_32d_float2fix"},
+      {"11001"_b, "fcvtzu_32d_float2fix"},
     },
   },
 
-  { "_rkvyqk",
-    {23, 22, 20, 19, 13, 11},
-    { {"0000x0"_b, "movi_asimdimm_l_hl"},
-      {"00x100"_b, "shrn_asimdshf_n"},
-      {"00x101"_b, "rshrn_asimdshf_n"},
-      {"00x110"_b, "sshll_asimdshf_l"},
-      {"010x00"_b, "shrn_asimdshf_n"},
-      {"010x01"_b, "rshrn_asimdshf_n"},
-      {"010x10"_b, "sshll_asimdshf_l"},
-      {"011100"_b, "shrn_asimdshf_n"},
-      {"011101"_b, "rshrn_asimdshf_n"},
-      {"011110"_b, "sshll_asimdshf_l"},
-      {"0x1000"_b, "shrn_asimdshf_n"},
-      {"0x1001"_b, "rshrn_asimdshf_n"},
-      {"0x1010"_b, "sshll_asimdshf_l"},
+  { "_rkrlsy",
+    {20, 19, 18, 17, 16},
+    { {"00000"_b, "rev64_asimdmisc_r"},
+    },
+  },
+
+  { "_rkrntt",
+    {23, 22, 20, 19, 17, 16, 13},
+    { {"0000000"_b, "_hynprk"},
+      {"0000001"_b, "_phpphm"},
+      {"0100000"_b, "_tlvmlq"},
+      {"0100001"_b, "_qpyxsv"},
+      {"100xxx0"_b, "st2_asisdlsop_hx2_r2h"},
+      {"100xxx1"_b, "st4_asisdlsop_hx4_r4h"},
+      {"1010xx0"_b, "st2_asisdlsop_hx2_r2h"},
+      {"1010xx1"_b, "st4_asisdlsop_hx4_r4h"},
+      {"10110x0"_b, "st2_asisdlsop_hx2_r2h"},
+      {"10110x1"_b, "st4_asisdlsop_hx4_r4h"},
+      {"1011100"_b, "st2_asisdlsop_hx2_r2h"},
+      {"1011101"_b, "st4_asisdlsop_hx4_r4h"},
+      {"1011110"_b, "_skmzll"},
+      {"1011111"_b, "_hkxlsm"},
+      {"110xxx0"_b, "ld2_asisdlsop_hx2_r2h"},
+      {"110xxx1"_b, "ld4_asisdlsop_hx4_r4h"},
+      {"1110xx0"_b, "ld2_asisdlsop_hx2_r2h"},
+      {"1110xx1"_b, "ld4_asisdlsop_hx4_r4h"},
+      {"11110x0"_b, "ld2_asisdlsop_hx2_r2h"},
+      {"11110x1"_b, "ld4_asisdlsop_hx4_r4h"},
+      {"1111100"_b, "ld2_asisdlsop_hx2_r2h"},
+      {"1111101"_b, "ld4_asisdlsop_hx4_r4h"},
+      {"1111110"_b, "_ykhhqq"},
+      {"1111111"_b, "_khtsmx"},
+    },
+  },
+
+  { "_rkskkv",
+    {18},
+    { {"1"_b, "fminv_v_p_z"},
+    },
+  },
+
+  { "_rktqym",
+    {30, 23, 22, 13, 12, 11, 10},
+    { {"010xx00"_b, "csel_32_condsel"},
+      {"010xx01"_b, "csinc_32_condsel"},
+      {"0110000"_b, "crc32b_32c_dp_2src"},
+      {"0110001"_b, "crc32h_32c_dp_2src"},
+      {"0110010"_b, "crc32w_32c_dp_2src"},
+      {"0110100"_b, "crc32cb_32c_dp_2src"},
+      {"0110101"_b, "crc32ch_32c_dp_2src"},
+      {"0110110"_b, "crc32cw_32c_dp_2src"},
+      {"0111000"_b, "smax_32_dp_2src"},
+      {"0111001"_b, "umax_32_dp_2src"},
+      {"0111010"_b, "smin_32_dp_2src"},
+      {"0111011"_b, "umin_32_dp_2src"},
+      {"110xx00"_b, "csinv_32_condsel"},
+      {"110xx01"_b, "csneg_32_condsel"},
+    },
+  },
+
+  { "_rkxlyj",
+    {30, 23, 22, 13, 12, 11, 10},
+    { {"000xx10"_b, "stlur_b_ldapstl_simd"},
+      {"001xx10"_b, "ldapur_b_ldapstl_simd"},
+      {"010xx10"_b, "stlur_q_ldapstl_simd"},
+      {"011xx10"_b, "ldapur_q_ldapstl_simd"},
+      {"100xx10"_b, "stlur_h_ldapstl_simd"},
+      {"101xx10"_b, "ldapur_h_ldapstl_simd"},
+      {"x000001"_b, "cpyprn_cpy_memcms"},
+      {"x000101"_b, "cpypwtrn_cpy_memcms"},
+      {"x001001"_b, "cpyprtrn_cpy_memcms"},
+      {"x001101"_b, "cpyptrn_cpy_memcms"},
+      {"x010001"_b, "cpymrn_cpy_memcms"},
+      {"x010101"_b, "cpymwtrn_cpy_memcms"},
+      {"x011001"_b, "cpymrtrn_cpy_memcms"},
+      {"x011101"_b, "cpymtrn_cpy_memcms"},
+      {"x100001"_b, "cpyern_cpy_memcms"},
+      {"x100101"_b, "cpyewtrn_cpy_memcms"},
+      {"x101001"_b, "cpyertrn_cpy_memcms"},
+      {"x101101"_b, "cpyetrn_cpy_memcms"},
+      {"x110001"_b, "setge_set_memcms"},
+      {"x110101"_b, "setget_set_memcms"},
+      {"x111001"_b, "setgen_set_memcms"},
+      {"x111101"_b, "setgetn_set_memcms"},
+    },
+  },
+
+  { "_rkzlpp",
+    {4},
+    { {"0"_b, "ccmp_64_condcmp_reg"},
+    },
+  },
+
+  { "_rlgtnn",
+    {23},
+    { {"0"_b, "_sxsgmq"},
+    },
+  },
+
+  { "_rlpmrx",
+    {30},
+    { {"0"_b, "_txzxzs"},
+      {"1"_b, "_htsjxj"},
     },
   },
 
@@ -5022,6 +5539,19 @@
     },
   },
 
+  { "_rlxhxz",
+    {9, 8, 7, 6, 5},
+    { {"11111"_b, "pacdzb_64z_dp_1src"},
+    },
+  },
+
+  { "_rlylxh",
+    {18},
+    { {"0"_b, "ld3_asisdlsop_bx3_r3b"},
+      {"1"_b, "ld3_asisdlsop_b3_i3b"},
+    },
+  },
+
   { "_rlyvpn",
     {23, 12, 11, 10},
     { {"0000"_b, "sqshrunb_z_zi"},
@@ -5035,32 +5565,51 @@
     },
   },
 
-  { "_rmltms",
-    {9, 8, 7, 6, 5, 1, 0},
-    { {"1111100"_b, "eret_64e_branch_reg"},
+  { "_rmkpsk",
+    {23},
+    { {"0"_b, "_srkslp"},
     },
   },
 
-  { "_rmmmjj",
-    {30, 23, 22},
-    { {"000"_b, "smaddl_64wa_dp_3src"},
-      {"010"_b, "umaddl_64wa_dp_3src"},
+  { "_rmmpym",
+    {2, 1, 0},
+    { {"000"_b, "_glgznt"},
     },
   },
 
-  { "_rmxjsn",
-    {30},
-    { {"0"_b, "orr_64_log_shift"},
-      {"1"_b, "ands_64_log_shift"},
+  { "_rmyzpp",
+    {20, 19, 18, 17},
+    { {"0000"_b, "_gnhjkl"},
     },
   },
 
-  { "_rnktts",
-    {23, 22},
-    { {"00"_b, "and_asimdsame_only"},
-      {"01"_b, "bic_asimdsame_only"},
-      {"10"_b, "orr_asimdsame_only"},
-      {"11"_b, "orn_asimdsame_only"},
+  { "_rnlxtv",
+    {13},
+    { {"0"_b, "_vvgpzq"},
+      {"1"_b, "_mqljmr"},
+    },
+  },
+
+  { "_rnphqp",
+    {20, 19, 18, 17, 16, 4, 3},
+    { {"0000001"_b, "fcmp_hz_floatcmp"},
+      {"0000011"_b, "fcmpe_hz_floatcmp"},
+      {"xxxxx00"_b, "fcmp_h_floatcmp"},
+      {"xxxxx10"_b, "fcmpe_h_floatcmp"},
+    },
+  },
+
+  { "_rnqmyp",
+    {23, 22, 20, 19, 13, 11},
+    { {"0000x0"_b, "mvni_asimdimm_l_sl"},
+      {"00x100"_b, "sri_asimdshf_r"},
+      {"00x110"_b, "sqshlu_asimdshf_r"},
+      {"010x00"_b, "sri_asimdshf_r"},
+      {"010x10"_b, "sqshlu_asimdshf_r"},
+      {"011100"_b, "sri_asimdshf_r"},
+      {"011110"_b, "sqshlu_asimdshf_r"},
+      {"0x1000"_b, "sri_asimdshf_r"},
+      {"0x1010"_b, "sqshlu_asimdshf_r"},
     },
   },
 
@@ -5071,28 +5620,37 @@
     },
   },
 
-  { "_rnrzsj",
-    {20, 18, 17},
-    { {"000"_b, "_lgglzy"},
+  { "_rnsmjq",
+    {13},
+    { {"0"_b, "_xxqzvy"},
+      {"1"_b, "_rmmpym"},
     },
   },
 
-  { "_rnypvh",
-    {17},
-    { {"0"_b, "st1_asisdlsop_bx1_r1b"},
-      {"1"_b, "st1_asisdlsop_b1_i1b"},
+  { "_rpjgkh",
+    {22, 20, 19, 18, 17, 16},
+    { {"111000"_b, "fcmge_asisdmiscfp16_fz"},
+      {"x00000"_b, "fcmge_asisdmisc_fz"},
+      {"x10000"_b, "fminnmp_asisdpair_only_sd"},
     },
   },
 
-  { "_rpmrkq",
-    {23},
-    { {"0"_b, "fcmeq_asimdsame_only"},
+  { "_rpjrhs",
+    {23, 22, 4},
+    { {"000"_b, "fccmp_s_floatccmp"},
+      {"001"_b, "fccmpe_s_floatccmp"},
+      {"010"_b, "fccmp_d_floatccmp"},
+      {"011"_b, "fccmpe_d_floatccmp"},
+      {"110"_b, "fccmp_h_floatccmp"},
+      {"111"_b, "fccmpe_h_floatccmp"},
     },
   },
 
-  { "_rpqgjl",
-    {18, 17, 16, 13, 12, 7, 4, 3, 2, 1, 0},
-    { {"00000011111"_b, "_kpxtsp"},
+  { "_rpplns",
+    {23, 22, 20, 19, 11},
+    { {"00010"_b, "srshr_asisdshf_r"},
+      {"001x0"_b, "srshr_asisdshf_r"},
+      {"01xx0"_b, "srshr_asisdshf_r"},
     },
   },
 
@@ -5102,6 +5660,32 @@
     },
   },
 
+  { "_rqghyv",
+    {30, 23, 22, 11, 10},
+    { {"00000"_b, "stur_32_ldst_unscaled"},
+      {"00001"_b, "str_32_ldst_immpost"},
+      {"00010"_b, "sttr_32_ldst_unpriv"},
+      {"00011"_b, "str_32_ldst_immpre"},
+      {"00100"_b, "ldur_32_ldst_unscaled"},
+      {"00101"_b, "ldr_32_ldst_immpost"},
+      {"00110"_b, "ldtr_32_ldst_unpriv"},
+      {"00111"_b, "ldr_32_ldst_immpre"},
+      {"01000"_b, "ldursw_64_ldst_unscaled"},
+      {"01001"_b, "ldrsw_64_ldst_immpost"},
+      {"01010"_b, "ldtrsw_64_ldst_unpriv"},
+      {"01011"_b, "ldrsw_64_ldst_immpre"},
+      {"10000"_b, "stur_64_ldst_unscaled"},
+      {"10001"_b, "str_64_ldst_immpost"},
+      {"10010"_b, "sttr_64_ldst_unpriv"},
+      {"10011"_b, "str_64_ldst_immpre"},
+      {"10100"_b, "ldur_64_ldst_unscaled"},
+      {"10101"_b, "ldr_64_ldst_immpost"},
+      {"10110"_b, "ldtr_64_ldst_unpriv"},
+      {"10111"_b, "ldr_64_ldst_immpre"},
+      {"11000"_b, "prfum_p_ldst_unscaled"},
+    },
+  },
+
   { "_rqhryp",
     {12, 10},
     { {"00"_b, "_kjpxvh"},
@@ -5111,6 +5695,50 @@
     },
   },
 
+  { "_rqpjjs",
+    {30, 11, 10},
+    { {"000"_b, "_qjrllr"},
+      {"001"_b, "_xlgxhn"},
+      {"010"_b, "_hxrnns"},
+      {"011"_b, "_xnhkpk"},
+      {"101"_b, "_mmgpkx"},
+      {"110"_b, "_vxhjgg"},
+      {"111"_b, "_lptrlg"},
+    },
+  },
+
+  { "_rqzpzq",
+    {23, 22, 11, 10, 4, 3, 0},
+    { {"0000000"_b, "_hkxzqg"},
+      {"0010111"_b, "_zqlzzp"},
+      {"0011111"_b, "_lvszgj"},
+      {"0100000"_b, "_tmsjzg"},
+      {"0110111"_b, "_kzprzt"},
+      {"0111111"_b, "_tzsnmy"},
+      {"1000000"_b, "_mqmrng"},
+      {"1010111"_b, "_hrmsnk"},
+      {"1011111"_b, "_tqlrzh"},
+    },
+  },
+
+  { "_rrkmyl",
+    {23, 22, 4},
+    { {"000"_b, "fccmp_s_floatccmp"},
+      {"001"_b, "fccmpe_s_floatccmp"},
+      {"010"_b, "fccmp_d_floatccmp"},
+      {"011"_b, "fccmpe_d_floatccmp"},
+      {"110"_b, "fccmp_h_floatccmp"},
+      {"111"_b, "fccmpe_h_floatccmp"},
+    },
+  },
+
+  { "_rrvltp",
+    {18, 4},
+    { {"00"_b, "fcmlt_p_p_z0"},
+      {"01"_b, "fcmle_p_p_z0"},
+    },
+  },
+
   { "_rshyht",
     {13},
     { {"0"_b, "facge_p_p_zz"},
@@ -5118,53 +5746,92 @@
     },
   },
 
+  { "_rsjgyk",
+    {30, 23, 22, 20, 13},
+    { {"00001"_b, "ld2w_z_p_bi_contiguous"},
+      {"000x0"_b, "ld2w_z_p_br_contiguous"},
+      {"00101"_b, "ld4w_z_p_bi_contiguous"},
+      {"001x0"_b, "ld4w_z_p_br_contiguous"},
+      {"01001"_b, "ld2d_z_p_bi_contiguous"},
+      {"010x0"_b, "ld2d_z_p_br_contiguous"},
+      {"01101"_b, "ld4d_z_p_bi_contiguous"},
+      {"011x0"_b, "ld4d_z_p_br_contiguous"},
+      {"10011"_b, "st2w_z_p_bi_contiguous"},
+      {"100x0"_b, "st1w_z_p_bz_d_x32_scaled"},
+      {"10101"_b, "st1w_z_p_bi"},
+      {"10111"_b, "st4w_z_p_bi_contiguous"},
+      {"101x0"_b, "st1w_z_p_bz_s_x32_scaled"},
+      {"11011"_b, "st2d_z_p_bi_contiguous"},
+      {"110x0"_b, "st1d_z_p_bz_d_x32_scaled"},
+      {"11101"_b, "st1d_z_p_bi"},
+      {"11111"_b, "st4d_z_p_bi_contiguous"},
+    },
+  },
+
+  { "_rsmyth",
+    {20, 19, 18, 17, 16},
+    { {"11111"_b, "stllr_sl64_ldstexcl"},
+    },
+  },
+
+  { "_rsnvnr",
+    {30, 23, 22},
+    { {"100"_b, "ins_asimdins_ir_r"},
+      {"x01"_b, "fmulx_asimdsamefp16_only"},
+    },
+  },
+
+  { "_rspmth",
+    {18},
+    { {"0"_b, "st1_asisdlse_r2_2v"},
+    },
+  },
+
   { "_rsqmgk",
     {23, 22, 20, 19, 18, 17, 16},
     { {"0000000"_b, "movprfx_z_z"},
     },
   },
 
-  { "_rsyhtj",
-    {13, 12, 11, 10},
-    { {"0001"_b, "ushl_asisdsame_only"},
-      {"0010"_b, "_gxnlxg"},
-      {"0011"_b, "uqshl_asisdsame_only"},
-      {"0101"_b, "urshl_asisdsame_only"},
-      {"0111"_b, "uqrshl_asisdsame_only"},
-      {"1010"_b, "_msnsjp"},
-      {"1110"_b, "_llnzlv"},
+  { "_rsqxrs",
+    {30, 23, 22, 11, 10},
+    { {"00000"_b, "_ggvlym"},
+      {"01000"_b, "csel_32_condsel"},
+      {"01001"_b, "csinc_32_condsel"},
+      {"01100"_b, "_svvylr"},
+      {"01101"_b, "_zmhqmr"},
+      {"01110"_b, "_rjspzr"},
+      {"01111"_b, "_vpknjg"},
+      {"10000"_b, "_rzymmk"},
+      {"11000"_b, "csinv_32_condsel"},
+      {"11001"_b, "csneg_32_condsel"},
+      {"11100"_b, "_kzjxxk"},
+      {"11101"_b, "_khvvtr"},
+      {"11110"_b, "_gvpvjn"},
+      {"11111"_b, "_pkjqsy"},
     },
   },
 
-  { "_rsyjqj",
-    {23, 22, 20, 19, 18, 17, 16},
-    { {"0010000"_b, "fmaxv_asimdall_only_h"},
-      {"0x00001"_b, "frint64z_asimdmisc_r"},
-      {"1010000"_b, "fminv_asimdall_only_h"},
-      {"1111000"_b, "fabs_asimdmiscfp16_r"},
-      {"1x00000"_b, "fabs_asimdmisc_r"},
-    },
-  },
-
-  { "_rsyzrs",
-    {22},
-    { {"0"_b, "str_64_ldst_regoff"},
-      {"1"_b, "ldr_64_ldst_regoff"},
-    },
-  },
-
-  { "_rtgkkg",
+  { "_rssrty",
     {30, 23, 22, 13, 12, 11, 10},
-    { {"1101001"_b, "smmla_asimdsame2_g"},
-      {"1101011"_b, "usmmla_asimdsame2_g"},
-      {"x100111"_b, "usdot_asimdsame2_d"},
-      {"xxx0101"_b, "sdot_asimdsame2_d"},
+    { {"1011011"_b, "bfmmla_asimdsame2_e"},
+      {"x011111"_b, "bfdot_asimdsame2_d"},
+      {"x111111"_b, "bfmlal_asimdsame2_f"},
+      {"xxx0xx1"_b, "fcmla_asimdsame2_c"},
+      {"xxx1x01"_b, "fcadd_asimdsame2_c"},
     },
   },
 
-  { "_rtlzxv",
-    {13, 12},
-    { {"01"_b, "sqdmull_asisddiff_only"},
+  { "_rszgzl",
+    {30, 23, 22},
+    { {"000"_b, "smsubl_64wa_dp_3src"},
+      {"010"_b, "umsubl_64wa_dp_3src"},
+    },
+  },
+
+  { "_rtlvxq",
+    {30, 23, 22},
+    { {"000"_b, "madd_32a_dp_3src"},
     },
   },
 
@@ -5185,55 +5852,97 @@
     },
   },
 
-  { "_rvjzgt",
-    {23, 22, 4},
-    { {"000"_b, "fccmp_s_floatccmp"},
-      {"001"_b, "fccmpe_s_floatccmp"},
-      {"010"_b, "fccmp_d_floatccmp"},
-      {"011"_b, "fccmpe_d_floatccmp"},
-      {"110"_b, "fccmp_h_floatccmp"},
-      {"111"_b, "fccmpe_h_floatccmp"},
+  { "_rvjkyp",
+    {13, 12},
+    { {"01"_b, "gmi_64g_dp_2src"},
+      {"10"_b, "lsrv_64_dp_2src"},
     },
   },
 
-  { "_rvzhhx",
-    {18, 17, 12},
-    { {"000"_b, "st3_asisdlso_d3_3d"},
+  { "_rvsylx",
+    {18},
+    { {"1"_b, "frecpe_z_z"},
     },
   },
 
-  { "_rxjrmn",
-    {22, 13, 12},
-    { {"000"_b, "swpa_32_memop"},
-      {"100"_b, "swpal_32_memop"},
+  { "_rvtxys",
+    {23, 22, 20, 19, 11},
+    { {"00010"_b, "sshr_asisdshf_r"},
+      {"001x0"_b, "sshr_asisdshf_r"},
+      {"01xx0"_b, "sshr_asisdshf_r"},
     },
   },
 
-  { "_rxpspy",
-    {30, 23, 22, 12, 11, 10},
-    { {"0000xx"_b, "adds_32s_addsub_ext"},
-      {"000100"_b, "adds_32s_addsub_ext"},
-      {"1000xx"_b, "subs_32s_addsub_ext"},
-      {"100100"_b, "subs_32s_addsub_ext"},
+  { "_rvvshx",
+    {23, 22, 13, 12},
+    { {"0000"_b, "fmax_s_floatdp2"},
+      {"0001"_b, "fmin_s_floatdp2"},
+      {"0010"_b, "fmaxnm_s_floatdp2"},
+      {"0011"_b, "fminnm_s_floatdp2"},
+      {"0100"_b, "fmax_d_floatdp2"},
+      {"0101"_b, "fmin_d_floatdp2"},
+      {"0110"_b, "fmaxnm_d_floatdp2"},
+      {"0111"_b, "fminnm_d_floatdp2"},
+      {"1100"_b, "fmax_h_floatdp2"},
+      {"1101"_b, "fmin_h_floatdp2"},
+      {"1110"_b, "fmaxnm_h_floatdp2"},
+      {"1111"_b, "fminnm_h_floatdp2"},
     },
   },
 
-  { "_ryglvl",
-    {4},
-    { {"0"_b, "ccmp_32_condcmp_reg"},
+  { "_rxgkjn",
+    {30, 23, 22},
+    { {"000"_b, "adds_64s_addsub_ext"},
+      {"100"_b, "subs_64s_addsub_ext"},
     },
   },
 
-  { "_rykykh",
-    {20, 19, 18, 17, 16},
-    { {"00000"_b, "rev64_asimdmisc_r"},
+  { "_rxhssh",
+    {18},
+    { {"0"_b, "ld3_asisdlsop_hx3_r3h"},
+      {"1"_b, "ld3_asisdlsop_h3_i3h"},
     },
   },
 
-  { "_rzkmny",
-    {30},
-    { {"0"_b, "and_64_log_shift"},
-      {"1"_b, "eor_64_log_shift"},
+  { "_rxnnvv",
+    {23, 22, 4, 3, 2, 1, 0},
+    { {"0000000"_b, "brk_ex_exception"},
+      {"0100000"_b, "tcancel_ex_exception"},
+      {"1000001"_b, "dcps1_dc_exception"},
+      {"1000010"_b, "dcps2_dc_exception"},
+      {"1000011"_b, "dcps3_dc_exception"},
+    },
+  },
+
+  { "_rxsqhv",
+    {13, 12},
+    { {"00"_b, "adc_64_addsub_carry"},
+    },
+  },
+
+  { "_rxtklv",
+    {30, 18},
+    { {"00"_b, "_qtgvlx"},
+    },
+  },
+
+  { "_rxytqg",
+    {30, 23, 22, 20, 19, 18},
+    { {"00xxxx"_b, "add_64_addsub_imm"},
+      {"011000"_b, "smax_64_minmax_imm"},
+      {"011001"_b, "umax_64u_minmax_imm"},
+      {"011010"_b, "smin_64_minmax_imm"},
+      {"011011"_b, "umin_64u_minmax_imm"},
+      {"10xxxx"_b, "sub_64_addsub_imm"},
+    },
+  },
+
+  { "_ryrkqt",
+    {20, 19},
+    { {"00"_b, "_tsskys"},
+      {"01"_b, "_kqvljp"},
+      {"10"_b, "_lxhlkx"},
+      {"11"_b, "_rjysnh"},
     },
   },
 
@@ -5244,21 +5953,32 @@
     },
   },
 
-  { "_rzqzlq",
-    {23, 22, 20, 19, 16, 13, 12},
-    { {"0111110"_b, "fcvtns_asisdmiscfp16_r"},
-      {"0111111"_b, "fcvtms_asisdmiscfp16_r"},
-      {"0x00110"_b, "fcvtns_asisdmisc_r"},
-      {"0x00111"_b, "fcvtms_asisdmisc_r"},
-      {"1111110"_b, "fcvtps_asisdmiscfp16_r"},
-      {"1111111"_b, "fcvtzs_asisdmiscfp16_r"},
-      {"1x00110"_b, "fcvtps_asisdmisc_r"},
-      {"1x00111"_b, "fcvtzs_asisdmisc_r"},
-      {"xx00000"_b, "cmgt_asisdmisc_z"},
-      {"xx00001"_b, "cmeq_asisdmisc_z"},
-      {"xx00010"_b, "cmlt_asisdmisc_z"},
-      {"xx00011"_b, "abs_asisdmisc_r"},
-      {"xx10111"_b, "addp_asisdpair_only"},
+  { "_rzpqmm",
+    {23, 22, 20, 19, 17, 16, 13},
+    { {"0000000"_b, "_nygsjm"},
+      {"0000001"_b, "_snjmrt"},
+      {"0100000"_b, "_hhxpjz"},
+      {"0100001"_b, "_tktgvg"},
+      {"100xxx0"_b, "st2_asisdlsop_bx2_r2b"},
+      {"100xxx1"_b, "st4_asisdlsop_bx4_r4b"},
+      {"1010xx0"_b, "st2_asisdlsop_bx2_r2b"},
+      {"1010xx1"_b, "st4_asisdlsop_bx4_r4b"},
+      {"10110x0"_b, "st2_asisdlsop_bx2_r2b"},
+      {"10110x1"_b, "st4_asisdlsop_bx4_r4b"},
+      {"1011100"_b, "st2_asisdlsop_bx2_r2b"},
+      {"1011101"_b, "st4_asisdlsop_bx4_r4b"},
+      {"1011110"_b, "_szjjgk"},
+      {"1011111"_b, "_tvgklq"},
+      {"110xxx0"_b, "ld2_asisdlsop_bx2_r2b"},
+      {"110xxx1"_b, "ld4_asisdlsop_bx4_r4b"},
+      {"1110xx0"_b, "ld2_asisdlsop_bx2_r2b"},
+      {"1110xx1"_b, "ld4_asisdlsop_bx4_r4b"},
+      {"11110x0"_b, "ld2_asisdlsop_bx2_r2b"},
+      {"11110x1"_b, "ld4_asisdlsop_bx4_r4b"},
+      {"1111100"_b, "ld2_asisdlsop_bx2_r2b"},
+      {"1111101"_b, "ld4_asisdlsop_bx4_r4b"},
+      {"1111110"_b, "_tzsvyv"},
+      {"1111111"_b, "_jvnsgt"},
     },
   },
 
@@ -5273,6 +5993,12 @@
     },
   },
 
+  { "_rzymmk",
+    {13, 12},
+    { {"00"_b, "sbc_32_addsub_carry"},
+    },
+  },
+
   { "_rzzxsn",
     {30, 13},
     { {"00"_b, "_nvyxmh"},
@@ -5289,29 +6015,25 @@
     },
   },
 
-  { "_sgnknz",
-    {23, 22, 20, 19, 11},
-    { {"00011"_b, "fcvtzs_asisdshf_c"},
-      {"001x1"_b, "fcvtzs_asisdshf_c"},
-      {"01xx1"_b, "fcvtzs_asisdshf_c"},
+  { "_sgmpvp",
+    {23, 22, 13},
+    { {"000"_b, "fmulx_asimdelem_rh_h"},
+      {"1x0"_b, "fmulx_asimdelem_r_sd"},
     },
   },
 
-  { "_sgztlj",
-    {23, 22, 20, 19, 18, 17, 16},
-    { {"0010000"_b, "fmaxnmv_asimdall_only_h"},
-      {"0111001"_b, "fcvtas_asimdmiscfp16_r"},
-      {"0x00001"_b, "fcvtas_asimdmisc_r"},
-      {"1010000"_b, "fminnmv_asimdall_only_h"},
-      {"1111000"_b, "fcmgt_asimdmiscfp16_fz"},
-      {"1x00000"_b, "fcmgt_asimdmisc_fz"},
-      {"1x00001"_b, "urecpe_asimdmisc_r"},
+  { "_shgktt",
+    {11},
+    { {"0"_b, "_tjjqpx"},
     },
   },
 
-  { "_shgkvq",
-    {18, 17},
-    { {"00"_b, "st2_asisdlso_s2_2s"},
+  { "_shgxyq",
+    {23, 22, 19, 13, 12},
+    { {"00100"_b, "sha1h_ss_cryptosha2"},
+      {"00101"_b, "sha1su1_vv_cryptosha2"},
+      {"00110"_b, "sha256su0_vv_cryptosha2"},
+      {"xx011"_b, "suqadd_asisdmisc_r"},
     },
   },
 
@@ -5324,46 +6046,22 @@
     },
   },
 
-  { "_shrsxr",
-    {30, 23, 22},
-    { {"000"_b, "stnp_64_ldstnapair_offs"},
-      {"001"_b, "ldnp_64_ldstnapair_offs"},
-      {"010"_b, "stp_64_ldstpair_post"},
-      {"011"_b, "ldp_64_ldstpair_post"},
+  { "_shqyqv",
+    {23, 13, 12},
+    { {"010"_b, "fcmeq_asisdsame_only"},
     },
   },
 
-  { "_shzysp",
-    {30, 23, 22, 19, 18, 17, 16},
-    { {"1001000"_b, "ins_asimdins_ir_r"},
-      {"100x100"_b, "ins_asimdins_ir_r"},
-      {"100xx10"_b, "ins_asimdins_ir_r"},
-      {"100xxx1"_b, "ins_asimdins_ir_r"},
-      {"x01xxxx"_b, "fmulx_asimdsamefp16_only"},
+  { "_shvqkt",
+    {20, 19, 18, 17, 16},
+    { {"11111"_b, "ldlar_lr32_ldstexcl"},
     },
   },
 
-  { "_sjlpxn",
-    {23, 22},
-    { {"01"_b, "fcmla_asimdelem_c_h"},
-      {"10"_b, "fcmla_asimdelem_c_s"},
-    },
-  },
-
-  { "_sjlrxn",
-    {10},
-    { {"0"_b, "_mpzqxm"},
-    },
-  },
-
-  { "_sjnqvx",
-    {23, 22, 4},
-    { {"000"_b, "fccmp_s_floatccmp"},
-      {"001"_b, "fccmpe_s_floatccmp"},
-      {"010"_b, "fccmp_d_floatccmp"},
-      {"011"_b, "fccmpe_d_floatccmp"},
-      {"110"_b, "fccmp_h_floatccmp"},
-      {"111"_b, "fccmpe_h_floatccmp"},
+  { "_sjlqvg",
+    {23, 20, 19, 18, 17, 16},
+    { {"000001"_b, "fcvtxn_asimdmisc_n"},
+      {"x00000"_b, "uadalp_asimdmisc_p"},
     },
   },
 
@@ -5374,27 +6072,6 @@
     },
   },
 
-  { "_sjnxky",
-    {30},
-    { {"1"_b, "_ylyskq"},
-    },
-  },
-
-  { "_sjrqth",
-    {23, 22},
-    { {"00"_b, "fmov_s_floatimm"},
-      {"01"_b, "fmov_d_floatimm"},
-      {"11"_b, "fmov_h_floatimm"},
-    },
-  },
-
-  { "_sjsltg",
-    {17},
-    { {"0"_b, "st2_asisdlsop_hx2_r2h"},
-      {"1"_b, "st2_asisdlsop_h2_i2h"},
-    },
-  },
-
   { "_sjtrhm",
     {30, 23, 22, 20, 13},
     { {"00001"_b, "ld1rqb_z_p_bi_u8"},
@@ -5415,114 +6092,132 @@
     },
   },
 
-  { "_sjzsvv",
-    {30, 23, 13, 12, 11, 10},
-    { {"101001"_b, "ucvtf_asisdshf_c"},
-      {"101111"_b, "fcvtzu_asisdshf_c"},
-      {"1x01x0"_b, "sqrdmlah_asisdelem_r"},
-      {"1x11x0"_b, "sqrdmlsh_asisdelem_r"},
+  { "_skjqrx",
+    {23, 22},
+    { {"00"_b, "fmov_s_floatimm"},
+      {"01"_b, "fmov_d_floatimm"},
+      {"11"_b, "fmov_h_floatimm"},
     },
   },
 
-  { "_skglrt",
-    {23, 22, 20, 19, 13, 11},
-    { {"0000x0"_b, "mvni_asimdimm_l_sl"},
-      {"00x100"_b, "ushr_asimdshf_r"},
-      {"00x110"_b, "urshr_asimdshf_r"},
-      {"010x00"_b, "ushr_asimdshf_r"},
-      {"010x10"_b, "urshr_asimdshf_r"},
-      {"011100"_b, "ushr_asimdshf_r"},
-      {"011110"_b, "urshr_asimdshf_r"},
-      {"0x1000"_b, "ushr_asimdshf_r"},
-      {"0x1010"_b, "urshr_asimdshf_r"},
+  { "_skmzll",
+    {18},
+    { {"0"_b, "st2_asisdlsop_hx2_r2h"},
+      {"1"_b, "st2_asisdlsop_h2_i2h"},
     },
   },
 
-  { "_skpjrp",
-    {23, 22, 12},
-    { {"000"_b, "_xzyylk"},
-      {"001"_b, "_hpgqlp"},
-      {"010"_b, "_qnsxkj"},
-      {"011"_b, "_nnlvqz"},
-      {"110"_b, "_vylhvl"},
-      {"111"_b, "_stgkpy"},
+  { "_sknvhk",
+    {13, 12, 11, 10},
+    { {"0000"_b, "sha1c_qsv_cryptosha3"},
+      {"0001"_b, "dup_asisdone_only"},
+      {"0100"_b, "sha1p_qsv_cryptosha3"},
+      {"1000"_b, "sha1m_qsv_cryptosha3"},
+      {"1100"_b, "sha1su0_vvv_cryptosha3"},
     },
   },
 
-  { "_slhpgp",
+  { "_skqzyg",
     {23},
-    { {"0"_b, "facge_asimdsame_only"},
-      {"1"_b, "facgt_asimdsame_only"},
+    { {"0"_b, "fcmeq_asimdsame_only"},
     },
   },
 
-  { "_sllkpt",
-    {13, 12},
-    { {"10"_b, "lsrv_32_dp_2src"},
+  { "_sksvrn",
+    {20, 19, 18, 17, 16},
+    { {"11111"_b, "ldlarb_lr32_ldstexcl"},
     },
   },
 
-  { "_slnkst",
-    {23, 22, 20, 19, 18, 17, 16},
-    { {"0111001"_b, "fcvtmu_asimdmiscfp16_r"},
-      {"0x00001"_b, "fcvtmu_asimdmisc_r"},
-      {"1111001"_b, "fcvtzu_asimdmiscfp16_r"},
-      {"1x00001"_b, "fcvtzu_asimdmisc_r"},
-      {"xx00000"_b, "neg_asimdmisc_r"},
+  { "_skszgm",
+    {13, 12, 11, 10},
+    { {"1111"_b, "_xzmrlg"},
     },
   },
 
-  { "_sltqpy",
-    {30, 23, 22, 13, 12, 11, 10},
-    { {"000xx10"_b, "strb_32b_ldst_regoff"},
-      {"001xx10"_b, "ldrb_32b_ldst_regoff"},
-      {"0100000"_b, "ldaprb_32l_memop"},
-      {"010xx10"_b, "ldrsb_64b_ldst_regoff"},
-      {"011xx10"_b, "ldrsb_32b_ldst_regoff"},
-      {"100xx10"_b, "strh_32_ldst_regoff"},
-      {"101xx10"_b, "ldrh_32_ldst_regoff"},
-      {"1100000"_b, "ldaprh_32l_memop"},
-      {"110xx10"_b, "ldrsh_64_ldst_regoff"},
-      {"111xx10"_b, "ldrsh_32_ldst_regoff"},
+  { "_skytvx",
+    {23, 22},
+    { {"00"_b, "tbx_asimdtbl_l2_2"},
     },
   },
 
-  { "_smplhv",
-    {10},
-    { {"0"_b, "braa_64p_branch_reg"},
-      {"1"_b, "brab_64p_branch_reg"},
+  { "_slzrtr",
+    {23, 22},
+    { {"00"_b, "fmsub_s_floatdp3"},
+      {"01"_b, "fmsub_d_floatdp3"},
+      {"11"_b, "fmsub_h_floatdp3"},
     },
   },
 
-  { "_smqvrs",
-    {18, 17},
-    { {"00"_b, "st1_asisdlse_r1_1v"},
+  { "_slzvjh",
+    {30, 23, 22},
+    { {"000"_b, "orr_32_log_imm"},
+      {"100"_b, "ands_32s_log_imm"},
+      {"110"_b, "movk_32_movewide"},
     },
   },
 
-  { "_smrtxq",
-    {13, 12},
-    { {"00"_b, "sbcs_32_addsub_carry"},
+  { "_smmrpj",
+    {18},
+    { {"0"_b, "fadda_v_p_z"},
     },
   },
 
-  { "_snjpvy",
-    {23, 22, 13, 12, 11, 10},
-    { {"0001x0"_b, "fmulx_asimdelem_rh_h"},
-      {"0x0001"_b, "sqshrun_asimdshf_n"},
-      {"0x0011"_b, "sqrshrun_asimdshf_n"},
-      {"0x0101"_b, "uqshrn_asimdshf_n"},
-      {"0x0111"_b, "uqrshrn_asimdshf_n"},
-      {"0x1001"_b, "ushll_asimdshf_l"},
-      {"1000x0"_b, "fmlal2_asimdelem_lh"},
-      {"1x01x0"_b, "fmulx_asimdelem_r_sd"},
-      {"xx10x0"_b, "umull_asimdelem_l"},
+  { "_smptxh",
+    {23, 22},
+    { {"01"_b, "fmax_asimdsamefp16_only"},
+      {"11"_b, "fmin_asimdsamefp16_only"},
     },
   },
 
-  { "_snkqvp",
-    {23, 22, 20, 19, 18, 17, 16, 13, 12, 11},
-    { {"0011111001"_b, "_gkpvxz"},
+  { "_smsytm",
+    {13},
+    { {"0"_b, "mul_asimdelem_r"},
+      {"1"_b, "smull_asimdelem_l"},
+    },
+  },
+
+  { "_snhmgn",
+    {23},
+    { {"0"_b, "fmul_asimdsame_only"},
+    },
+  },
+
+  { "_snhzxr",
+    {30, 23, 22},
+    { {"001"_b, "bfm_64m_bitfield"},
+    },
+  },
+
+  { "_snjmrt",
+    {18},
+    { {"0"_b, "st4_asisdlso_b4_4b"},
+    },
+  },
+
+  { "_snnlgr",
+    {23, 22, 20, 19, 13, 11},
+    { {"0000x0"_b, "movi_asimdimm_l_sl"},
+      {"00x100"_b, "sshr_asimdshf_r"},
+      {"00x110"_b, "srshr_asimdshf_r"},
+      {"010x00"_b, "sshr_asimdshf_r"},
+      {"010x10"_b, "srshr_asimdshf_r"},
+      {"011100"_b, "sshr_asimdshf_r"},
+      {"011110"_b, "srshr_asimdshf_r"},
+      {"0x1000"_b, "sshr_asimdshf_r"},
+      {"0x1010"_b, "srshr_asimdshf_r"},
+    },
+  },
+
+  { "_snrzky",
+    {20, 19, 18, 17, 16},
+    { {"11111"_b, "ldxp_lp64_ldstexcl"},
+    },
+  },
+
+  { "_sntnsm",
+    {9, 8, 7, 6, 5},
+    { {"11111"_b, "autizb_64z_dp_1src"},
     },
   },
 
@@ -5533,71 +6228,34 @@
     },
   },
 
-  { "_sntzjg",
-    {23, 22, 11, 10},
-    { {"0000"_b, "_qssyls"},
-      {"0001"_b, "stg_64spost_ldsttags"},
-      {"0010"_b, "stg_64soffset_ldsttags"},
-      {"0011"_b, "stg_64spre_ldsttags"},
-      {"0100"_b, "ldg_64loffset_ldsttags"},
-      {"0101"_b, "stzg_64spost_ldsttags"},
-      {"0110"_b, "stzg_64soffset_ldsttags"},
-      {"0111"_b, "stzg_64spre_ldsttags"},
-      {"1000"_b, "_kyxqgg"},
-      {"1001"_b, "st2g_64spost_ldsttags"},
-      {"1010"_b, "st2g_64soffset_ldsttags"},
-      {"1011"_b, "st2g_64spre_ldsttags"},
-      {"1100"_b, "_stjrgx"},
-      {"1101"_b, "stz2g_64spost_ldsttags"},
-      {"1110"_b, "stz2g_64soffset_ldsttags"},
-      {"1111"_b, "stz2g_64spre_ldsttags"},
+  { "_snvnjz",
+    {30, 13},
+    { {"10"_b, "_plzqrv"},
     },
   },
 
-  { "_spglxn",
-    {4, 3, 2, 1, 0},
-    { {"11111"_b, "_yqmvxk"},
+  { "_snvzjr",
+    {12},
+    { {"0"_b, "st2_asisdlsop_dx2_r2d"},
     },
   },
 
-  { "_sphpkr",
-    {4, 3, 2, 1, 0},
-    { {"11111"_b, "_thsxvg"},
+  { "_snzvtt",
+    {23, 22},
+    { {"00"_b, "fmlal2_asimdsame_f"},
+      {"10"_b, "fmlsl2_asimdsame_f"},
     },
   },
 
-  { "_spjjkg",
-    {23, 22, 13, 12, 11, 10},
-    { {"0011x0"_b, "sudot_asimdelem_d"},
-      {"0111x0"_b, "bfdot_asimdelem_e"},
-      {"0x1001"_b, "scvtf_asimdshf_c"},
-      {"0x1111"_b, "fcvtzs_asimdshf_c"},
-      {"1011x0"_b, "usdot_asimdelem_d"},
-      {"1111x0"_b, "bfmlal_asimdelem_f"},
-      {"xx00x0"_b, "sqdmulh_asimdelem_r"},
-      {"xx01x0"_b, "sqrdmulh_asimdelem_r"},
-      {"xx10x0"_b, "sdot_asimdelem_d"},
+  { "_spktyg",
+    {23, 22, 20, 19, 11},
+    { {"00000"_b, "movi_asimdimm_m_sm"},
     },
   },
 
-  { "_spmkmm",
-    {30, 19, 18, 17, 16, 10},
-    { {"110001"_b, "ins_asimdins_iv_v"},
-      {"1x1001"_b, "ins_asimdins_iv_v"},
-      {"1xx101"_b, "ins_asimdins_iv_v"},
-      {"1xxx11"_b, "ins_asimdins_iv_v"},
-      {"xxxxx0"_b, "ext_asimdext_only"},
-    },
-  },
-
-  { "_spzgkt",
-    {23, 22, 13, 12, 11, 10},
-    { {"0x1001"_b, "ucvtf_asimdshf_c"},
-      {"0x1111"_b, "fcvtzu_asimdshf_c"},
-      {"1000x0"_b, "fmlsl2_asimdelem_lh"},
-      {"xx01x0"_b, "sqrdmlah_asimdelem_r"},
-      {"xx10x0"_b, "udot_asimdelem_d"},
-      {"xx11x0"_b, "sqrdmlsh_asimdelem_r"},
+  { "_spxvlt",
+    {20, 19, 18, 17, 16, 13, 12, 11},
+    { {"00000000"_b, "_mtkhgz"},
     },
   },
 
@@ -5607,95 +6265,171 @@
     },
   },
 
-  { "_sqgxzn",
-    {9, 8, 7, 6, 5},
-    { {"11111"_b, "paciza_64z_dp_1src"},
+  { "_sqhxzj",
+    {30, 23, 22, 13, 12, 11, 10},
+    { {"1010000"_b, "sha512h_qqv_cryptosha512_3"},
+      {"1010001"_b, "sha512h2_qqv_cryptosha512_3"},
+      {"1010010"_b, "sha512su1_vvv2_cryptosha512_3"},
+      {"1010011"_b, "rax1_vvv2_cryptosha512_3"},
     },
   },
 
-  { "_sqjpsl",
-    {30, 13, 12, 11, 10},
-    { {"10001"_b, "sqrdmlah_asisdsame2_only"},
-      {"10011"_b, "sqrdmlsh_asisdsame2_only"},
+  { "_sqkkqy",
+    {13, 12, 10},
+    { {"010"_b, "sqrdmlah_asisdelem_r"},
+      {"101"_b, "_mhksnq"},
+      {"110"_b, "sqrdmlsh_asisdelem_r"},
+      {"111"_b, "_mpytmv"},
     },
   },
 
-  { "_sqpjtr",
-    {20, 18, 17},
-    { {"000"_b, "_nllnsg"},
-    },
-  },
-
-  { "_srggzy",
-    {19},
-    { {"0"_b, "_xqgxjp"},
-      {"1"_b, "sysl_rc_systeminstrs"},
-    },
-  },
-
-  { "_srglgl",
+  { "_sqlsyr",
     {18, 17},
-    { {"0x"_b, "st3_asisdlsop_sx3_r3s"},
-      {"10"_b, "st3_asisdlsop_sx3_r3s"},
-      {"11"_b, "st3_asisdlsop_s3_i3s"},
+    { {"00"_b, "ld1_asisdlse_r1_1v"},
     },
   },
 
-  { "_srmhjk",
-    {30},
-    { {"0"_b, "ldr_s_loadlit"},
-      {"1"_b, "ldr_d_loadlit"},
+  { "_sqttsv",
+    {20, 19, 18, 17, 16, 4, 3},
+    { {"0000001"_b, "fcmp_sz_floatcmp"},
+      {"0000011"_b, "fcmpe_sz_floatcmp"},
+      {"xxxxx00"_b, "fcmp_s_floatcmp"},
+      {"xxxxx10"_b, "fcmpe_s_floatcmp"},
     },
   },
 
-  { "_srmhlk",
+  { "_srkslp",
+    {22, 20},
+    { {"00"_b, "_zvynrg"},
+      {"01"_b, "msr_sr_systemmove"},
+      {"10"_b, "_lxlqks"},
+      {"11"_b, "msrr_sr_systemmovepr"},
+    },
+  },
+
+  { "_srnkng",
+    {18},
+    { {"0"_b, "faddv_v_p_z"},
+      {"1"_b, "fmaxnmv_v_p_z"},
+    },
+  },
+
+  { "_srpptk",
     {20, 19, 18, 17, 16},
-    { {"00000"_b, "uaddlp_asimdmisc_p"},
-      {"00001"_b, "sqxtun_asimdmisc_n"},
+    { {"00000"_b, "usqadd_asimdmisc_r"},
+      {"00001"_b, "shll_asimdmisc_s"},
+      {"10000"_b, "uaddlv_asimdall_only"},
     },
   },
 
-  { "_srvnql",
-    {18, 17, 12},
-    { {"0x0"_b, "ld1_asisdlsop_dx1_r1d"},
-      {"100"_b, "ld1_asisdlsop_dx1_r1d"},
-      {"110"_b, "ld1_asisdlsop_d1_i1d"},
+  { "_srpqmk",
+    {30, 23, 22},
+    { {"000"_b, "stp_q_ldstpair_off"},
+      {"001"_b, "ldp_q_ldstpair_off"},
+      {"010"_b, "stp_q_ldstpair_pre"},
+      {"011"_b, "ldp_q_ldstpair_pre"},
     },
   },
 
-  { "_stgkpy",
+  { "_srsrtk",
+    {30, 23, 22, 13, 11, 10},
+    { {"000010"_b, "str_b_ldst_regoff"},
+      {"000110"_b, "str_bl_ldst_regoff"},
+      {"001010"_b, "ldr_b_ldst_regoff"},
+      {"001110"_b, "ldr_bl_ldst_regoff"},
+      {"010x10"_b, "str_q_ldst_regoff"},
+      {"011x10"_b, "ldr_q_ldst_regoff"},
+      {"100x10"_b, "str_h_ldst_regoff"},
+      {"101x10"_b, "ldr_h_ldst_regoff"},
+    },
+  },
+
+  { "_srttng",
+    {23, 22},
+    { {"01"_b, "fcmla_asimdelem_c_h"},
+      {"10"_b, "fcmla_asimdelem_c_s"},
+    },
+  },
+
+  { "_ssjnph",
+    {10},
+    { {"0"_b, "blraa_64p_branch_reg"},
+      {"1"_b, "blrab_64p_branch_reg"},
+    },
+  },
+
+  { "_ssjrxs",
+    {18},
+    { {"0"_b, "ld3_asisdlso_h3_3h"},
+    },
+  },
+
+  { "_ssqyrk",
+    {23, 22, 20, 19, 18, 17, 16},
+    { {"0000000"_b, "not_asimdmisc_r"},
+      {"0100000"_b, "rbit_asimdmisc_r"},
+    },
+  },
+
+  { "_ssvpxz",
+    {30, 23, 22},
+    { {"000"_b, "stnp_32_ldstnapair_offs"},
+      {"001"_b, "ldnp_32_ldstnapair_offs"},
+      {"010"_b, "stp_32_ldstpair_post"},
+      {"011"_b, "ldp_32_ldstpair_post"},
+      {"110"_b, "stgp_64_ldstpair_post"},
+      {"111"_b, "ldpsw_64_ldstpair_post"},
+    },
+  },
+
+  { "_ssypmm",
     {9, 8, 7, 6, 5},
     { {"00000"_b, "fmov_h_floatimm"},
     },
   },
 
-  { "_stjrgx",
-    {20, 19, 18, 17, 16, 13, 12},
-    { {"0000000"_b, "ldgm_64bulk_ldsttags"},
+  { "_stlgrr",
+    {30, 23, 22, 13, 12, 11, 10},
+    { {"0001111"_b, "caspl_cp32_ldstexcl"},
+      {"0011111"_b, "caspal_cp32_ldstexcl"},
+      {"0101111"_b, "caslb_c32_ldstexcl"},
+      {"0111111"_b, "casalb_c32_ldstexcl"},
+      {"1001111"_b, "caspl_cp64_ldstexcl"},
+      {"1011111"_b, "caspal_cp64_ldstexcl"},
+      {"1101111"_b, "caslh_c32_ldstexcl"},
+      {"1111111"_b, "casalh_c32_ldstexcl"},
     },
   },
 
-  { "_stqmps",
-    {12},
-    { {"0"_b, "ld3_asisdlsop_dx3_r3d"},
+  { "_stmtkr",
+    {30, 23, 22},
+    { {"000"_b, "stxr_sr32_ldstexcl"},
+      {"001"_b, "_zlvjrh"},
+      {"010"_b, "_lpzgvs"},
+      {"011"_b, "_shvqkt"},
+      {"100"_b, "stxr_sr64_ldstexcl"},
+      {"101"_b, "_jhltlz"},
+      {"110"_b, "_rsmyth"},
+      {"111"_b, "_vjtgmx"},
     },
   },
 
-  { "_strkph",
-    {23, 22},
-    { {"00"_b, "tbl_asimdtbl_l2_2"},
+  { "_svgvjm",
+    {23},
+    { {"0"_b, "faddp_asimdsame_only"},
+      {"1"_b, "fabd_asimdsame_only"},
     },
   },
 
-  { "_svnyyx",
+  { "_svlrvy",
+    {18, 17},
+    { {"00"_b, "st1_asisdlse_r3_3v"},
+    },
+  },
+
+  { "_svvylr",
     {13, 12},
-    { {"00"_b, "adcs_32_addsub_carry"},
-    },
-  },
-
-  { "_svrnxq",
-    {12},
-    { {"0"_b, "st3_asisdlsop_dx3_r3d"},
+    { {"10"_b, "lslv_32_dp_2src"},
     },
   },
 
@@ -5707,9 +6441,32 @@
     },
   },
 
-  { "_sxnkrh",
-    {23},
-    { {"1"_b, "_xxkvsy"},
+  { "_svyszp",
+    {9, 8, 7, 6, 5},
+    { {"00000"_b, "fmov_d_floatimm"},
+    },
+  },
+
+  { "_sxgnmg",
+    {18, 17},
+    { {"00"_b, "ld3_asisdlso_s3_3s"},
+    },
+  },
+
+  { "_sxptnh",
+    {23, 22, 11, 10},
+    { {"0000"_b, "_vmtkqp"},
+      {"0001"_b, "_lqjlkj"},
+      {"0010"_b, "_gyymmx"},
+      {"0011"_b, "_gmqyjv"},
+      {"0100"_b, "_pvtyjz"},
+      {"0101"_b, "_hxxxyy"},
+      {"0110"_b, "_xszmjn"},
+      {"1000"_b, "_lzjyhm"},
+      {"1001"_b, "_zlkygr"},
+      {"1010"_b, "_jvpjsm"},
+      {"1101"_b, "_vzyklr"},
+      {"1110"_b, "_npxkzq"},
     },
   },
 
@@ -5732,62 +6489,118 @@
     },
   },
 
-  { "_syktsg",
-    {13, 12},
-    { {"00"_b, "udiv_64_dp_2src"},
-      {"10"_b, "asrv_64_dp_2src"},
+  { "_sxsgmq",
+    {30, 22, 20, 19, 18, 17, 16},
+    { {"00xxxxx"_b, "stxp_sp32_ldstexcl"},
+      {"0111111"_b, "ldxp_lp32_ldstexcl"},
+      {"10xxxxx"_b, "stxp_sp64_ldstexcl"},
+      {"1111111"_b, "ldxp_lp64_ldstexcl"},
     },
   },
 
-  { "_syzjtz",
-    {13, 12, 10},
-    { {"010"_b, "sqrdmlah_asisdelem_r"},
-      {"101"_b, "_jqnglz"},
-      {"110"_b, "sqrdmlsh_asisdelem_r"},
-      {"111"_b, "_zslsvj"},
+  { "_sxsxxt",
+    {20, 19, 18, 17, 16},
+    { {"10000"_b, "fminp_asisdpair_only_sd"},
     },
   },
 
-  { "_szttjy",
-    {30, 23, 22, 19, 18, 17, 16},
-    { {"00000x1"_b, "umov_asimdins_w_w"},
-      {"0000x10"_b, "umov_asimdins_w_w"},
-      {"00010xx"_b, "umov_asimdins_w_w"},
-      {"0001110"_b, "umov_asimdins_w_w"},
-      {"000x10x"_b, "umov_asimdins_w_w"},
-      {"000x111"_b, "umov_asimdins_w_w"},
-      {"1001000"_b, "umov_asimdins_x_x"},
-      {"x01xxxx"_b, "frecps_asimdsamefp16_only"},
-      {"x11xxxx"_b, "frsqrts_asimdsamefp16_only"},
+  { "_sylkvm",
+    {23, 22, 12},
+    { {"100"_b, "fmlsl2_asimdelem_lh"},
+      {"xx1"_b, "sqrdmlah_asimdelem_r"},
     },
   },
 
-  { "_tgmljr",
-    {23, 22, 20, 19, 12, 11},
-    { {"000000"_b, "movi_asimdimm_n_b"},
-      {"000010"_b, "fmov_asimdimm_s_s"},
-      {"000011"_b, "fmov_asimdimm_h_h"},
-      {"00x100"_b, "scvtf_asimdshf_c"},
-      {"00x111"_b, "fcvtzs_asimdshf_c"},
-      {"010x00"_b, "scvtf_asimdshf_c"},
-      {"010x11"_b, "fcvtzs_asimdshf_c"},
-      {"011100"_b, "scvtf_asimdshf_c"},
-      {"011111"_b, "fcvtzs_asimdshf_c"},
-      {"0x1000"_b, "scvtf_asimdshf_c"},
-      {"0x1011"_b, "fcvtzs_asimdshf_c"},
+  { "_syrmmr",
+    {18, 4},
+    { {"00"_b, "fcmeq_p_p_z0"},
     },
   },
 
-  { "_tgqsyg",
-    {22},
-    { {"0"_b, "prfm_p_ldst_regoff"},
+  { "_szgqrr",
+    {12, 10},
+    { {"00"_b, "_xlyjsz"},
+      {"01"_b, "_yppmkl"},
+      {"10"_b, "_sgmpvp"},
+      {"11"_b, "_gjtmjg"},
     },
   },
 
-  { "_thqvrp",
-    {17},
-    { {"0"_b, "st1_asisdlsep_r2_r2"},
-      {"1"_b, "st1_asisdlsep_i2_i2"},
+  { "_szjjgk",
+    {18},
+    { {"0"_b, "st2_asisdlsop_bx2_r2b"},
+      {"1"_b, "st2_asisdlsop_b2_i2b"},
+    },
+  },
+
+  { "_szmnhg",
+    {12},
+    { {"0"_b, "ld2_asisdlsop_dx2_r2d"},
+    },
+  },
+
+  { "_szmyzt",
+    {12},
+    { {"0"_b, "ld3_asisdlsop_dx3_r3d"},
+    },
+  },
+
+  { "_szqlsn",
+    {23, 22, 20, 19, 18, 17, 16},
+    { {"0x00001"_b, "frint32z_asimdmisc_r"},
+      {"1111000"_b, "fcmlt_asimdmiscfp16_fz"},
+      {"1x00000"_b, "fcmlt_asimdmisc_fz"},
+    },
+  },
+
+  { "_sztkhs",
+    {30, 23, 22},
+    { {"000"_b, "msub_64a_dp_3src"},
+    },
+  },
+
+  { "_szylpy",
+    {22, 12},
+    { {"10"_b, "_hhlmrg"},
+    },
+  },
+
+  { "_szysqh",
+    {22, 13, 12},
+    { {"000"_b, "ldsmax_32_memop"},
+      {"001"_b, "ldsmin_32_memop"},
+      {"010"_b, "ldumax_32_memop"},
+      {"011"_b, "ldumin_32_memop"},
+      {"100"_b, "ldsmaxl_32_memop"},
+      {"101"_b, "ldsminl_32_memop"},
+      {"110"_b, "ldumaxl_32_memop"},
+      {"111"_b, "lduminl_32_memop"},
+    },
+  },
+
+  { "_tgvkhm",
+    {20, 19, 18, 17, 16, 13},
+    { {"000000"_b, "fabs_s_floatdp1"},
+      {"000010"_b, "fsqrt_s_floatdp1"},
+      {"000100"_b, "fcvt_ds_floatdp1"},
+      {"000110"_b, "fcvt_hs_floatdp1"},
+      {"001000"_b, "frintp_s_floatdp1"},
+      {"001010"_b, "frintz_s_floatdp1"},
+      {"001110"_b, "frinti_s_floatdp1"},
+      {"010000"_b, "frint32x_s_floatdp1"},
+      {"010010"_b, "frint64x_s_floatdp1"},
+    },
+  },
+
+  { "_thkkgx",
+    {18},
+    { {"1"_b, "fminnmv_v_p_z"},
+    },
+  },
+
+  { "_thqgrq",
+    {13, 12, 11, 10},
+    { {"1111"_b, "_pgmlrt"},
     },
   },
 
@@ -5800,26 +6613,6 @@
     },
   },
 
-  { "_thsxvg",
-    {11, 10, 9, 8, 7, 6},
-    { {"000010"_b, "ssbb_only_barriers"},
-      {"010010"_b, "pssbb_only_barriers"},
-      {"0x1010"_b, "dsb_bo_barriers"},
-      {"0xx110"_b, "dsb_bo_barriers"},
-      {"1xxx10"_b, "dsb_bo_barriers"},
-      {"xxxx01"_b, "clrex_bn_barriers"},
-      {"xxxx11"_b, "isb_bi_barriers"},
-    },
-  },
-
-  { "_thvvzp",
-    {18, 17, 12},
-    { {"0x0"_b, "st1_asisdlsop_dx1_r1d"},
-      {"100"_b, "st1_asisdlsop_dx1_r1d"},
-      {"110"_b, "st1_asisdlsop_d1_i1d"},
-    },
-  },
-
   { "_thvxym",
     {20},
     { {"0"_b, "_prkmty"},
@@ -5827,40 +6620,79 @@
     },
   },
 
-  { "_tjktkm",
-    {30},
-    { {"1"_b, "_gntpyh"},
+  { "_tjjqpx",
+    {23, 22, 20, 19, 16, 13, 10},
+    { {"0000000"_b, "_mlgmqm"},
+      {"0000001"_b, "_mvqkzv"},
+      {"0000010"_b, "_jztspt"},
+      {"0000011"_b, "_hrpkqg"},
+      {"0100000"_b, "_llqtkj"},
+      {"0100001"_b, "_pmpsvs"},
+      {"0100010"_b, "_vhrkvk"},
+      {"0100011"_b, "_xsvpzx"},
+      {"100xx00"_b, "st2_asisdlsop_sx2_r2s"},
+      {"100xx01"_b, "_ynyqky"},
+      {"100xx10"_b, "st4_asisdlsop_sx4_r4s"},
+      {"100xx11"_b, "_grvxrm"},
+      {"1010x00"_b, "st2_asisdlsop_sx2_r2s"},
+      {"1010x01"_b, "_snvzjr"},
+      {"1010x10"_b, "st4_asisdlsop_sx4_r4s"},
+      {"1010x11"_b, "_xmkysx"},
+      {"1011000"_b, "st2_asisdlsop_sx2_r2s"},
+      {"1011001"_b, "_xqhxql"},
+      {"1011010"_b, "st4_asisdlsop_sx4_r4s"},
+      {"1011011"_b, "_ykpqth"},
+      {"1011100"_b, "_lgyqpk"},
+      {"1011101"_b, "_tplghv"},
+      {"1011110"_b, "_lqknkn"},
+      {"1011111"_b, "_zprgxt"},
+      {"110xx00"_b, "ld2_asisdlsop_sx2_r2s"},
+      {"110xx01"_b, "_prjzxs"},
+      {"110xx10"_b, "ld4_asisdlsop_sx4_r4s"},
+      {"110xx11"_b, "_txsvzz"},
+      {"1110x00"_b, "ld2_asisdlsop_sx2_r2s"},
+      {"1110x01"_b, "_hljttg"},
+      {"1110x10"_b, "ld4_asisdlsop_sx4_r4s"},
+      {"1110x11"_b, "_rknxlg"},
+      {"1111000"_b, "ld2_asisdlsop_sx2_r2s"},
+      {"1111001"_b, "_szmnhg"},
+      {"1111010"_b, "ld4_asisdlsop_sx4_r4s"},
+      {"1111011"_b, "_tjrtxx"},
+      {"1111100"_b, "_ppvnly"},
+      {"1111101"_b, "_lltzjg"},
+      {"1111110"_b, "_ypsgqz"},
+      {"1111111"_b, "_vnrlsj"},
     },
   },
 
-  { "_tjltls",
+  { "_tjlthk",
+    {9, 8, 7, 6, 5, 1},
+    { {"111110"_b, "drps_64e_branch_reg"},
+    },
+  },
+
+  { "_tjnzjl",
     {18, 17},
-    { {"0x"_b, "st1_asisdlsep_r1_r1"},
-      {"10"_b, "st1_asisdlsep_r1_r1"},
-      {"11"_b, "st1_asisdlsep_i1_i1"},
+    { {"00"_b, "st1_asisdlso_s1_1s"},
     },
   },
 
-  { "_tjpjng",
-    {23, 22, 13, 12},
-    { {"0000"_b, "fmax_s_floatdp2"},
-      {"0001"_b, "fmin_s_floatdp2"},
-      {"0010"_b, "fmaxnm_s_floatdp2"},
-      {"0011"_b, "fminnm_s_floatdp2"},
-      {"0100"_b, "fmax_d_floatdp2"},
-      {"0101"_b, "fmin_d_floatdp2"},
-      {"0110"_b, "fmaxnm_d_floatdp2"},
-      {"0111"_b, "fminnm_d_floatdp2"},
-      {"1100"_b, "fmax_h_floatdp2"},
-      {"1101"_b, "fmin_h_floatdp2"},
-      {"1110"_b, "fmaxnm_h_floatdp2"},
-      {"1111"_b, "fminnm_h_floatdp2"},
+  { "_tjrtxx",
+    {12},
+    { {"0"_b, "ld4_asisdlsop_dx4_r4d"},
     },
   },
 
-  { "_tjtgjy",
-    {20, 19, 18, 17},
-    { {"0000"_b, "_gjsnly"},
+  { "_tjxhsy",
+    {10},
+    { {"0"_b, "braa_64p_branch_reg"},
+      {"1"_b, "brab_64p_branch_reg"},
+    },
+  },
+
+  { "_tjxyky",
+    {12},
+    { {"0"_b, "st3_asisdlsop_dx3_r3d"},
     },
   },
 
@@ -5894,24 +6726,27 @@
     },
   },
 
-  { "_tkzqqp",
-    {4, 3, 2, 1, 0},
-    { {"11111"_b, "_ntkqhk"},
+  { "_tklxhy",
+    {18},
+    { {"0"_b, "st3_asisdlso_b3_3b"},
     },
   },
 
-  { "_tlstgz",
-    {30, 23, 22},
-    { {"000"_b, "stlxp_sp32_ldstexcl"},
-      {"001"_b, "ldaxp_lp32_ldstexcl"},
-      {"100"_b, "stlxp_sp64_ldstexcl"},
-      {"101"_b, "ldaxp_lp64_ldstexcl"},
+  { "_tknqxs",
+    {20, 19, 18, 17, 16},
+    { {"11111"_b, "ldaxr_lr64_ldstexcl"},
     },
   },
 
-  { "_tlzlrj",
-    {17},
-    { {"0"_b, "st2_asisdlso_b2_2b"},
+  { "_tktgvg",
+    {18},
+    { {"0"_b, "ld4_asisdlso_b4_4b"},
+    },
+  },
+
+  { "_tlvmlq",
+    {18},
+    { {"0"_b, "ld2_asisdlso_h2_2h"},
     },
   },
 
@@ -5921,24 +6756,15 @@
     },
   },
 
-  { "_tmrnzq",
-    {17},
-    { {"0"_b, "st2_asisdlsep_r2_r"},
-      {"1"_b, "st2_asisdlsep_i2_i"},
+  { "_tmsjzg",
+    {2, 1},
+    { {"00"_b, "ret_64r_branch_reg"},
     },
   },
 
-  { "_tmshps",
-    {17},
-    { {"0"_b, "fmaxnmv_v_p_z"},
-      {"1"_b, "fmaxv_v_p_z"},
-    },
-  },
-
-  { "_tmthqm",
-    {22},
-    { {"0"_b, "str_32_ldst_regoff"},
-      {"1"_b, "ldr_32_ldst_regoff"},
+  { "_tmtgqm",
+    {4},
+    { {"0"_b, "ccmn_64_condcmp_imm"},
     },
   },
 
@@ -5948,35 +6774,29 @@
     },
   },
 
-  { "_tnhmpx",
-    {30, 23, 22, 13, 12, 11, 10},
-    { {"1011001"_b, "fcmge_asisdsamefp16_only"},
-      {"1011011"_b, "facge_asisdsamefp16_only"},
-      {"1110101"_b, "fabd_asisdsamefp16_only"},
-      {"1111001"_b, "fcmgt_asisdsamefp16_only"},
-      {"1111011"_b, "facgt_asisdsamefp16_only"},
+  { "_tnjhxp",
+    {9, 8, 7, 6, 5},
+    { {"11111"_b, "pacdza_64z_dp_1src"},
     },
   },
 
-  { "_tnrrjk",
-    {30, 23, 22, 11, 10},
-    { {"01000"_b, "csel_32_condsel"},
-      {"01001"_b, "csinc_32_condsel"},
-      {"11000"_b, "csinv_32_condsel"},
-      {"11001"_b, "csneg_32_condsel"},
+  { "_tnngsg",
+    {23, 22, 13, 12, 11, 10},
+    { {"01x1x0"_b, "fcmla_asimdelem_c_h"},
+      {"0x0001"_b, "ushr_asimdshf_r"},
+      {"0x0101"_b, "usra_asimdshf_r"},
+      {"0x1001"_b, "urshr_asimdshf_r"},
+      {"0x1101"_b, "ursra_asimdshf_r"},
+      {"10x1x0"_b, "fcmla_asimdelem_c_s"},
+      {"xx00x0"_b, "mla_asimdelem_r"},
+      {"xx10x0"_b, "umlal_asimdelem_l"},
     },
   },
 
-  { "_tnxlnl",
-    {13, 12},
-    { {"00"_b, "crc32x_64c_dp_2src"},
-      {"01"_b, "crc32cx_64c_dp_2src"},
-    },
-  },
-
-  { "_tnzytv",
-    {11, 10, 9, 8, 7, 6},
-    { {"000000"_b, "wfet_only_systeminstrswithreg"},
+  { "_tnpjts",
+    {30},
+    { {"0"_b, "and_64_log_shift"},
+      {"1"_b, "eor_64_log_shift"},
     },
   },
 
@@ -5993,52 +6813,106 @@
     },
   },
 
-  { "_tpkzxg",
-    {4},
-    { {"0"_b, "ccmp_64_condcmp_imm"},
-    },
-  },
-
-  { "_tpsylx",
-    {13},
-    { {"0"_b, "_gjylrt"},
-      {"1"_b, "_ygjslq"},
-    },
-  },
-
-  { "_trlhgn",
-    {30, 23, 22, 11, 10},
-    { {"00010"_b, "str_b_ldst_regoff"},
-      {"00110"_b, "ldr_b_ldst_regoff"},
-      {"01010"_b, "str_q_ldst_regoff"},
-      {"01110"_b, "ldr_q_ldst_regoff"},
-      {"10010"_b, "str_h_ldst_regoff"},
-      {"10110"_b, "ldr_h_ldst_regoff"},
-    },
-  },
-
-  { "_tsksxr",
-    {17},
-    { {"0"_b, "fminnmv_v_p_z"},
-      {"1"_b, "fminv_v_p_z"},
-    },
-  },
-
-  { "_tssqsr",
-    {30},
-    { {"1"_b, "_syzjtz"},
-    },
-  },
-
-  { "_tsvsgh",
-    {17},
-    { {"0"_b, "st1_asisdlso_b1_1b"},
-    },
-  },
-
-  { "_tszvvk",
+  { "_tplghv",
     {18, 17, 12},
-    { {"000"_b, "ld2_asisdlso_d2_2d"},
+    { {"0x0"_b, "st2_asisdlsop_dx2_r2d"},
+      {"100"_b, "st2_asisdlsop_dx2_r2d"},
+      {"110"_b, "st2_asisdlsop_d2_i2d"},
+    },
+  },
+
+  { "_tpmqyl",
+    {30},
+    { {"0"_b, "bl_only_branch_imm"},
+      {"1"_b, "_lszlkq"},
+    },
+  },
+
+  { "_tptqjs",
+    {12},
+    { {"0"_b, "ld1_asisdlsop_dx1_r1d"},
+    },
+  },
+
+  { "_tqlrzh",
+    {9, 8, 7, 6, 5, 2, 1},
+    { {"1111111"_b, "eretab_64e_branch_reg"},
+    },
+  },
+
+  { "_tqlsyy",
+    {30},
+    { {"0"_b, "add_32_addsub_shift"},
+      {"1"_b, "sub_32_addsub_shift"},
+    },
+  },
+
+  { "_trjmmn",
+    {13, 12, 11, 10},
+    { {"0001"_b, "sub_asisdsame_only"},
+      {"0010"_b, "_plyhhz"},
+      {"0011"_b, "cmeq_asisdsame_only"},
+      {"0110"_b, "_qkhrkh"},
+      {"1010"_b, "_kxhmlx"},
+      {"1101"_b, "sqrdmulh_asisdsame_only"},
+      {"1110"_b, "_ytrmvz"},
+    },
+  },
+
+  { "_tshjsk",
+    {18},
+    { {"0"_b, "st4_asisdlsep_r4_r"},
+      {"1"_b, "st4_asisdlsep_i4_i"},
+    },
+  },
+
+  { "_tsskys",
+    {23, 22, 18, 17, 16},
+    { {"01000"_b, "fadd_z_p_zz"},
+      {"01001"_b, "fsub_z_p_zz"},
+      {"01010"_b, "fmul_z_p_zz"},
+      {"01100"_b, "fmaxnm_z_p_zz"},
+      {"01101"_b, "fminnm_z_p_zz"},
+      {"01110"_b, "fmax_z_p_zz"},
+      {"01111"_b, "fmin_z_p_zz"},
+      {"1x000"_b, "fadd_z_p_zz"},
+      {"1x001"_b, "fsub_z_p_zz"},
+      {"1x010"_b, "fmul_z_p_zz"},
+      {"1x100"_b, "fmaxnm_z_p_zz"},
+      {"1x101"_b, "fminnm_z_p_zz"},
+      {"1x110"_b, "fmax_z_p_zz"},
+      {"1x111"_b, "fmin_z_p_zz"},
+      {"xx011"_b, "fsubr_z_p_zz"},
+    },
+  },
+
+  { "_tsypsz",
+    {23, 22, 13, 12},
+    { {"0000"_b, "fnmul_s_floatdp2"},
+      {"0100"_b, "fnmul_d_floatdp2"},
+      {"1100"_b, "fnmul_h_floatdp2"},
+    },
+  },
+
+  { "_ttmvpr",
+    {30, 23, 22, 20, 19},
+    { {"0xxxx"_b, "bl_only_branch_imm"},
+      {"10001"_b, "sys_cr_systeminstrs"},
+      {"1001x"_b, "msr_sr_systemmove"},
+      {"10101"_b, "sysp_cr_syspairinstrs"},
+      {"1011x"_b, "msrr_sr_systemmovepr"},
+    },
+  },
+
+  { "_ttmyrv",
+    {30, 11, 10},
+    { {"000"_b, "_nynrns"},
+      {"001"_b, "_rrkmyl"},
+      {"010"_b, "_rvvshx"},
+      {"011"_b, "_zlmyjt"},
+      {"101"_b, "_yrggjm"},
+      {"110"_b, "_kskqmz"},
+      {"111"_b, "_kzksnv"},
     },
   },
 
@@ -6051,74 +6925,57 @@
     },
   },
 
-  { "_ttstyt",
+  { "_ttsgkt",
     {12, 10},
-    { {"00"_b, "_rkqtvs"},
-      {"01"_b, "_mtlhnl"},
-      {"10"_b, "_zlmgyp"},
-      {"11"_b, "_kjghlk"},
+    { {"00"_b, "_smsytm"},
+      {"01"_b, "_mjrlkp"},
+      {"10"_b, "_vjkhhm"},
+      {"11"_b, "_ymxjjr"},
     },
   },
 
-  { "_tvgvvq",
+  { "_ttzlqn",
+    {18, 17, 12},
+    { {"000"_b, "ld1_asisdlso_d1_1d"},
+    },
+  },
+
+  { "_tvgklq",
+    {18},
+    { {"0"_b, "st4_asisdlsop_bx4_r4b"},
+      {"1"_b, "st4_asisdlsop_b4_i4b"},
+    },
+  },
+
+  { "_tvrlgz",
+    {18},
+    { {"0"_b, "st1_asisdlsop_bx1_r1b"},
+      {"1"_b, "st1_asisdlsop_b1_i1b"},
+    },
+  },
+
+  { "_tvtvkt",
+    {18, 17, 12},
+    { {"000"_b, "ldap1_asisdlso_d1"},
+    },
+  },
+
+  { "_tvyxlr",
     {30},
-    { {"0"_b, "cbnz_32_compbranch"},
+    { {"0"_b, "bl_only_branch_imm"},
+      {"1"_b, "_jlnjsy"},
     },
   },
 
-  { "_tvsszp",
-    {23, 22},
-    { {"00"_b, "fmadd_s_floatdp3"},
-      {"01"_b, "fmadd_d_floatdp3"},
-      {"11"_b, "fmadd_h_floatdp3"},
+  { "_txkmvh",
+    {18},
+    { {"0"_b, "ld2_asisdlse_r2"},
     },
   },
 
-  { "_txhzxq",
-    {30, 22, 11},
-    { {"000"_b, "_svnyyx"},
-      {"001"_b, "_qsxpyq"},
-      {"010"_b, "_pnqxjg"},
-      {"011"_b, "_myrshl"},
-      {"100"_b, "_smrtxq"},
-      {"110"_b, "_ryglvl"},
-      {"111"_b, "_qqsmlt"},
-    },
-  },
-
-  { "_txjyxr",
-    {18, 17},
-    { {"0x"_b, "ld1_asisdlsep_r1_r1"},
-      {"10"_b, "ld1_asisdlsep_r1_r1"},
-      {"11"_b, "ld1_asisdlsep_i1_i1"},
-    },
-  },
-
-  { "_txnqzy",
-    {30, 23, 22},
-    { {"000"_b, "smsubl_64wa_dp_3src"},
-      {"010"_b, "umsubl_64wa_dp_3src"},
-    },
-  },
-
-  { "_txsmts",
-    {13, 12, 11, 10},
-    { {"0000"_b, "smlal_asimddiff_l"},
-      {"0001"_b, "add_asimdsame_only"},
-      {"0010"_b, "_qhsplz"},
-      {"0011"_b, "cmtst_asimdsame_only"},
-      {"0100"_b, "sqdmlal_asimddiff_l"},
-      {"0101"_b, "mla_asimdsame_only"},
-      {"0110"_b, "_yvxgrr"},
-      {"0111"_b, "mul_asimdsame_only"},
-      {"1000"_b, "smlsl_asimddiff_l"},
-      {"1001"_b, "smaxp_asimdsame_only"},
-      {"1010"_b, "_mnxmst"},
-      {"1011"_b, "sminp_asimdsame_only"},
-      {"1100"_b, "sqdmlsl_asimddiff_l"},
-      {"1101"_b, "sqdmulh_asimdsame_only"},
-      {"1110"_b, "_klkgqk"},
-      {"1111"_b, "addp_asimdsame_only"},
+  { "_txsvzz",
+    {12},
+    { {"0"_b, "ld4_asisdlsop_dx4_r4d"},
     },
   },
 
@@ -6132,32 +6989,24 @@
     },
   },
 
-  { "_tyjqvt",
-    {18, 17},
-    { {"00"_b, "ld4_asisdlso_s4_4s"},
+  { "_tykvnx",
+    {30},
+    { {"0"_b, "ldapr_32l_ldapstl_writeback"},
+      {"1"_b, "ldapr_64l_ldapstl_writeback"},
     },
   },
 
-  { "_tylqpt",
-    {23, 22, 13},
-    { {"000"_b, "fmulx_asimdelem_rh_h"},
-      {"1x0"_b, "fmulx_asimdelem_r_sd"},
-    },
-  },
-
-  { "_typysz",
-    {23, 22, 20, 19, 13, 11, 10},
-    { {"00x1001"_b, "sqshrn_asisdshf_n"},
-      {"00x1011"_b, "sqrshrn_asisdshf_n"},
-      {"00xx0x0"_b, "fmul_asisdelem_rh_h"},
-      {"010x001"_b, "sqshrn_asisdshf_n"},
-      {"010x011"_b, "sqrshrn_asisdshf_n"},
-      {"0111001"_b, "sqshrn_asisdshf_n"},
-      {"0111011"_b, "sqrshrn_asisdshf_n"},
-      {"0x10001"_b, "sqshrn_asisdshf_n"},
-      {"0x10011"_b, "sqrshrn_asisdshf_n"},
-      {"1xxx0x0"_b, "fmul_asisdelem_r_sd"},
-      {"xxxx1x0"_b, "sqdmull_asisdelem_l"},
+  { "_tymryz",
+    {23, 22, 20, 19, 13, 11},
+    { {"0000x0"_b, "bic_asimdimm_l_sl"},
+      {"00x100"_b, "sli_asimdshf_r"},
+      {"00x110"_b, "uqshl_asimdshf_r"},
+      {"010x00"_b, "sli_asimdshf_r"},
+      {"010x10"_b, "uqshl_asimdshf_r"},
+      {"011100"_b, "sli_asimdshf_r"},
+      {"011110"_b, "uqshl_asimdshf_r"},
+      {"0x1000"_b, "sli_asimdshf_r"},
+      {"0x1010"_b, "uqshl_asimdshf_r"},
     },
   },
 
@@ -6174,10 +7023,53 @@
     },
   },
 
-  { "_tzzhsk",
+  { "_tytzpq",
+    {30},
+    { {"0"_b, "bic_32_log_shift"},
+      {"1"_b, "eon_32_log_shift"},
+    },
+  },
+
+  { "_tyzpxk",
+    {22, 13, 12},
+    { {"000"_b, "swpa_64_memop"},
+      {"100"_b, "swpal_64_memop"},
+    },
+  },
+
+  { "_tzgtvm",
     {13, 12},
-    { {"01"_b, "sqdmlal_asisddiff_only"},
-      {"11"_b, "sqdmlsl_asisddiff_only"},
+    { {"00"_b, "crc32x_64c_dp_2src"},
+      {"01"_b, "crc32cx_64c_dp_2src"},
+      {"10"_b, "umin_64_dp_2src"},
+    },
+  },
+
+  { "_tzjyhy",
+    {20, 19, 18, 17, 16},
+    { {"00010"_b, "scvtf_d32_float2fix"},
+      {"00011"_b, "ucvtf_d32_float2fix"},
+      {"11000"_b, "fcvtzs_32d_float2fix"},
+      {"11001"_b, "fcvtzu_32d_float2fix"},
+    },
+  },
+
+  { "_tzrgqq",
+    {23, 10},
+    { {"00"_b, "_gyrkkz"},
+    },
+  },
+
+  { "_tzsnmy",
+    {9, 8, 7, 6, 5, 2, 1},
+    { {"1111111"_b, "retab_64e_branch_reg"},
+    },
+  },
+
+  { "_tzsvyv",
+    {18},
+    { {"0"_b, "ld2_asisdlsop_bx2_r2b"},
+      {"1"_b, "ld2_asisdlsop_b2_i2b"},
     },
   },
 
@@ -6187,34 +7079,24 @@
     },
   },
 
-  { "_tzzzxz",
-    {30, 23, 22, 20, 19},
-    { {"0xxxx"_b, "bl_only_branch_imm"},
-      {"10001"_b, "sysl_rc_systeminstrs"},
-      {"1001x"_b, "mrs_rs_systemmove"},
+  { "_vghjnt",
+    {23, 22},
+    { {"00"_b, "fmadd_s_floatdp3"},
+      {"01"_b, "fmadd_d_floatdp3"},
+      {"11"_b, "fmadd_h_floatdp3"},
     },
   },
 
-  { "_vgrhsz",
-    {30, 23, 11, 10},
-    { {"0010"_b, "_hljrqn"},
-      {"0100"_b, "_htnmls"},
-      {"0110"_b, "_vxgzqy"},
-      {"1000"_b, "_lpsxhz"},
-      {"1001"_b, "ldraa_64_ldst_pac"},
-      {"1010"_b, "_jtqlhs"},
-      {"1011"_b, "ldraa_64w_ldst_pac"},
-      {"1100"_b, "_yrlzqp"},
-      {"1101"_b, "ldrab_64_ldst_pac"},
-      {"1110"_b, "_xyhxzt"},
-      {"1111"_b, "ldrab_64w_ldst_pac"},
-    },
-  },
-
-  { "_vgrtjz",
-    {12},
-    { {"0"_b, "sqdmulh_asimdelem_r"},
-      {"1"_b, "sqrdmulh_asimdelem_r"},
+  { "_vgqvys",
+    {30, 23, 22},
+    { {"000"_b, "stp_32_ldstpair_off"},
+      {"001"_b, "ldp_32_ldstpair_off"},
+      {"010"_b, "stp_32_ldstpair_pre"},
+      {"011"_b, "ldp_32_ldstpair_pre"},
+      {"100"_b, "stgp_64_ldstpair_off"},
+      {"101"_b, "ldpsw_64_ldstpair_off"},
+      {"110"_b, "stgp_64_ldstpair_pre"},
+      {"111"_b, "ldpsw_64_ldstpair_pre"},
     },
   },
 
@@ -6229,25 +7111,65 @@
     },
   },
 
-  { "_vhhktl",
+  { "_vgxtvy",
+    {23, 22, 20, 19, 18, 17, 16, 13, 12, 11},
+    { {"0011111001"_b, "_tjxhsy"},
+    },
+  },
+
+  { "_vhkjgh",
+    {30, 23, 22, 20, 19, 18},
+    { {"00xxxx"_b, "add_64_addsub_imm"},
+      {"011000"_b, "smax_64_minmax_imm"},
+      {"011001"_b, "umax_64u_minmax_imm"},
+      {"011010"_b, "smin_64_minmax_imm"},
+      {"011011"_b, "umin_64u_minmax_imm"},
+      {"10xxxx"_b, "sub_64_addsub_imm"},
+    },
+  },
+
+  { "_vhkpvn",
+    {20, 18, 17, 16},
+    { {"0000"_b, "_grktgm"},
+    },
+  },
+
+  { "_vhlqpr",
+    {30, 22, 11, 10},
+    { {"0000"_b, "csel_64_condsel"},
+      {"0001"_b, "csinc_64_condsel"},
+      {"0100"_b, "_xgqhjv"},
+      {"0101"_b, "_hspyhv"},
+      {"0110"_b, "_qkxmvp"},
+      {"0111"_b, "_tzgtvm"},
+      {"1000"_b, "csinv_64_condsel"},
+      {"1001"_b, "csneg_64_condsel"},
+      {"1100"_b, "_hlqvmm"},
+      {"1101"_b, "_ghrnmz"},
+    },
+  },
+
+  { "_vhrkvk",
     {18, 17},
-    { {"0x"_b, "st4_asisdlsop_sx4_r4s"},
-      {"10"_b, "st4_asisdlsop_sx4_r4s"},
-      {"11"_b, "st4_asisdlsop_s4_i4s"},
+    { {"00"_b, "ld4_asisdlso_s4_4s"},
     },
   },
 
-  { "_vhmsgj",
-    {18, 17, 12},
-    { {"000"_b, "ld1_asisdlso_d1_1d"},
+  { "_vjhrzl",
+    {23, 22, 20, 19, 18, 17, 16},
+    { {"0111001"_b, "frintx_asimdmiscfp16_r"},
+      {"0x00001"_b, "frintx_asimdmisc_r"},
+      {"1111001"_b, "frinti_asimdmiscfp16_r"},
+      {"1x00001"_b, "frinti_asimdmisc_r"},
+      {"xx00000"_b, "cmle_asimdmisc_z"},
     },
   },
 
-  { "_vjlnqj",
-    {23, 22, 13, 12},
-    { {"0000"_b, "fnmul_s_floatdp2"},
-      {"0100"_b, "fnmul_d_floatdp2"},
-      {"1100"_b, "fnmul_h_floatdp2"},
+  { "_vjkhhm",
+    {23, 22, 13},
+    { {"000"_b, "fmul_asimdelem_rh_h"},
+      {"1x0"_b, "fmul_asimdelem_r_sd"},
+      {"xx1"_b, "sqdmull_asimdelem_l"},
     },
   },
 
@@ -6258,56 +7180,9 @@
     },
   },
 
-  { "_vjqsqs",
-    {30},
-    { {"0"_b, "and_32_log_shift"},
-      {"1"_b, "eor_32_log_shift"},
-    },
-  },
-
-  { "_vjxqhp",
-    {23, 22, 20, 19, 18, 16, 13},
-    { {"0000000"_b, "_jlrvpl"},
-      {"0000001"_b, "_pmkxlj"},
-      {"0100000"_b, "_qmgtyq"},
-      {"0100001"_b, "_qhxzxl"},
-      {"100xxx0"_b, "st2_asisdlsep_r2_r"},
-      {"100xxx1"_b, "st1_asisdlsep_r2_r2"},
-      {"1010xx0"_b, "st2_asisdlsep_r2_r"},
-      {"1010xx1"_b, "st1_asisdlsep_r2_r2"},
-      {"10110x0"_b, "st2_asisdlsep_r2_r"},
-      {"10110x1"_b, "st1_asisdlsep_r2_r2"},
-      {"1011100"_b, "st2_asisdlsep_r2_r"},
-      {"1011101"_b, "st1_asisdlsep_r2_r2"},
-      {"1011110"_b, "_tmrnzq"},
-      {"1011111"_b, "_thqvrp"},
-      {"110xxx0"_b, "ld2_asisdlsep_r2_r"},
-      {"110xxx1"_b, "ld1_asisdlsep_r2_r2"},
-      {"1110xx0"_b, "ld2_asisdlsep_r2_r"},
-      {"1110xx1"_b, "ld1_asisdlsep_r2_r2"},
-      {"11110x0"_b, "ld2_asisdlsep_r2_r"},
-      {"11110x1"_b, "ld1_asisdlsep_r2_r2"},
-      {"1111100"_b, "ld2_asisdlsep_r2_r"},
-      {"1111101"_b, "ld1_asisdlsep_r2_r2"},
-      {"1111110"_b, "_nszhhy"},
-      {"1111111"_b, "_qxrzgv"},
-    },
-  },
-
-  { "_vjymzn",
-    {23, 22},
-    { {"00"_b, "fcsel_s_floatsel"},
-      {"01"_b, "fcsel_d_floatsel"},
-      {"11"_b, "fcsel_h_floatsel"},
-    },
-  },
-
-  { "_vkhhkk",
-    {30, 23, 22, 11, 10, 4},
-    { {"001000"_b, "ccmn_64_condcmp_reg"},
-      {"001100"_b, "ccmn_64_condcmp_imm"},
-      {"101000"_b, "ccmp_64_condcmp_reg"},
-      {"101100"_b, "ccmp_64_condcmp_imm"},
+  { "_vjtgmx",
+    {20, 19, 18, 17, 16},
+    { {"11111"_b, "ldlar_lr64_ldstexcl"},
     },
   },
 
@@ -6332,35 +7207,42 @@
     },
   },
 
-  { "_vkvgnm",
-    {30, 13},
-    { {"10"_b, "_vyygqs"},
-    },
-  },
-
-  { "_vkyngx",
-    {23, 22, 19, 18, 17, 16},
-    { {"0000x1"_b, "dup_asimdins_dv_v"},
-      {"000x10"_b, "dup_asimdins_dv_v"},
-      {"0010xx"_b, "dup_asimdins_dv_v"},
-      {"001110"_b, "dup_asimdins_dv_v"},
-      {"00x10x"_b, "dup_asimdins_dv_v"},
-      {"00x111"_b, "dup_asimdins_dv_v"},
-      {"01xxxx"_b, "fmaxnm_asimdsamefp16_only"},
-      {"11xxxx"_b, "fminnm_asimdsamefp16_only"},
-    },
-  },
-
-  { "_vllqmp",
+  { "_vkrskv",
     {30, 23, 22, 13, 12, 11, 10},
-    { {"000xxxx"_b, "stxp_sp32_ldstexcl"},
-      {"001xxxx"_b, "ldxp_lp32_ldstexcl"},
-      {"0101111"_b, "cas_c32_ldstexcl"},
-      {"0111111"_b, "casa_c32_ldstexcl"},
-      {"100xxxx"_b, "stxp_sp64_ldstexcl"},
-      {"101xxxx"_b, "ldxp_lp64_ldstexcl"},
-      {"1101111"_b, "cas_c64_ldstexcl"},
-      {"1111111"_b, "casa_c64_ldstexcl"},
+    { {"000xx00"_b, "stlur_32_ldapstl_unscaled"},
+      {"001xx00"_b, "ldapur_32_ldapstl_unscaled"},
+      {"010xx00"_b, "ldapursw_64_ldapstl_unscaled"},
+      {"100xx00"_b, "stlur_64_ldapstl_unscaled"},
+      {"101xx00"_b, "ldapur_64_ldapstl_unscaled"},
+      {"x000001"_b, "cpyfprn_cpy_memcms"},
+      {"x000101"_b, "cpyfpwtrn_cpy_memcms"},
+      {"x001001"_b, "cpyfprtrn_cpy_memcms"},
+      {"x001101"_b, "cpyfptrn_cpy_memcms"},
+      {"x010001"_b, "cpyfmrn_cpy_memcms"},
+      {"x010101"_b, "cpyfmwtrn_cpy_memcms"},
+      {"x011001"_b, "cpyfmrtrn_cpy_memcms"},
+      {"x011101"_b, "cpyfmtrn_cpy_memcms"},
+      {"x100001"_b, "cpyfern_cpy_memcms"},
+      {"x100101"_b, "cpyfewtrn_cpy_memcms"},
+      {"x101001"_b, "cpyfertrn_cpy_memcms"},
+      {"x101101"_b, "cpyfetrn_cpy_memcms"},
+      {"x110001"_b, "sete_set_memcms"},
+      {"x110101"_b, "setet_set_memcms"},
+      {"x111001"_b, "seten_set_memcms"},
+      {"x111101"_b, "setetn_set_memcms"},
+    },
+  },
+
+  { "_vlhkgr",
+    {20, 19, 18, 17, 16},
+    { {"00000"_b, "uaddlp_asimdmisc_p"},
+      {"00001"_b, "sqxtun_asimdmisc_n"},
+    },
+  },
+
+  { "_vllmnt",
+    {20, 19, 18, 17},
+    { {"0000"_b, "_gmtjvr"},
     },
   },
 
@@ -6385,116 +7267,164 @@
     },
   },
 
-  { "_vlrrtz",
+  { "_vlxrps",
+    {9, 8, 7, 6, 5},
+    { {"00000"_b, "fmov_d_floatimm"},
+    },
+  },
+
+  { "_vmgnhk",
+    {30, 23},
+    { {"00"_b, "add_64_addsub_imm"},
+      {"10"_b, "sub_64_addsub_imm"},
+    },
+  },
+
+  { "_vmsxgq",
+    {30, 23, 22, 13, 12, 11, 10},
+    { {"000xx00"_b, "stlur_32_ldapstl_unscaled"},
+      {"001xx00"_b, "ldapur_32_ldapstl_unscaled"},
+      {"010xx00"_b, "ldapursw_64_ldapstl_unscaled"},
+      {"100xx00"_b, "stlur_64_ldapstl_unscaled"},
+      {"101xx00"_b, "ldapur_64_ldapstl_unscaled"},
+      {"x000001"_b, "cpyfpwn_cpy_memcms"},
+      {"x000101"_b, "cpyfpwtwn_cpy_memcms"},
+      {"x001001"_b, "cpyfprtwn_cpy_memcms"},
+      {"x001101"_b, "cpyfptwn_cpy_memcms"},
+      {"x010001"_b, "cpyfmwn_cpy_memcms"},
+      {"x010101"_b, "cpyfmwtwn_cpy_memcms"},
+      {"x011001"_b, "cpyfmrtwn_cpy_memcms"},
+      {"x011101"_b, "cpyfmtwn_cpy_memcms"},
+      {"x100001"_b, "cpyfewn_cpy_memcms"},
+      {"x100101"_b, "cpyfewtwn_cpy_memcms"},
+      {"x101001"_b, "cpyfertwn_cpy_memcms"},
+      {"x101101"_b, "cpyfetwn_cpy_memcms"},
+      {"x110001"_b, "setm_set_memcms"},
+      {"x110101"_b, "setmt_set_memcms"},
+      {"x111001"_b, "setmn_set_memcms"},
+      {"x111101"_b, "setmtn_set_memcms"},
+    },
+  },
+
+  { "_vmtkqp",
+    {30},
+    { {"0"_b, "stlur_32_ldapstl_unscaled"},
+      {"1"_b, "stlur_64_ldapstl_unscaled"},
+    },
+  },
+
+  { "_vmxzxt",
+    {23, 22, 13, 12, 11, 10},
+    { {"0001x0"_b, "fmulx_asimdelem_rh_h"},
+      {"0x0001"_b, "sqshrun_asimdshf_n"},
+      {"0x0011"_b, "sqrshrun_asimdshf_n"},
+      {"0x0101"_b, "uqshrn_asimdshf_n"},
+      {"0x0111"_b, "uqrshrn_asimdshf_n"},
+      {"0x1001"_b, "ushll_asimdshf_l"},
+      {"1000x0"_b, "fmlal2_asimdelem_lh"},
+      {"1x01x0"_b, "fmulx_asimdelem_r_sd"},
+      {"xx10x0"_b, "umull_asimdelem_l"},
+    },
+  },
+
+  { "_vmyztj",
     {30, 23, 22},
-    { {"001"_b, "bfm_64m_bitfield"},
+    { {"000"_b, "stp_64_ldstpair_off"},
+      {"001"_b, "ldp_64_ldstpair_off"},
+      {"010"_b, "stp_64_ldstpair_pre"},
+      {"011"_b, "ldp_64_ldstpair_pre"},
     },
   },
 
-  { "_vlsmsn",
-    {22, 20, 19, 18, 17, 16},
-    { {"111000"_b, "fcmle_asisdmiscfp16_fz"},
-      {"111001"_b, "frsqrte_asisdmiscfp16_r"},
-      {"x00000"_b, "fcmle_asisdmisc_fz"},
-      {"x00001"_b, "frsqrte_asisdmisc_r"},
+  { "_vnggzq",
+    {30, 23, 22, 13, 12, 11, 10},
+    { {"000xx10"_b, "stlur_b_ldapstl_simd"},
+      {"001xx10"_b, "ldapur_b_ldapstl_simd"},
+      {"010xx10"_b, "stlur_q_ldapstl_simd"},
+      {"011xx10"_b, "ldapur_q_ldapstl_simd"},
+      {"100xx10"_b, "stlur_h_ldapstl_simd"},
+      {"101xx10"_b, "ldapur_h_ldapstl_simd"},
+      {"x000001"_b, "cpypn_cpy_memcms"},
+      {"x000101"_b, "cpypwtn_cpy_memcms"},
+      {"x001001"_b, "cpyprtn_cpy_memcms"},
+      {"x001101"_b, "cpyptn_cpy_memcms"},
+      {"x010001"_b, "cpymn_cpy_memcms"},
+      {"x010101"_b, "cpymwtn_cpy_memcms"},
+      {"x011001"_b, "cpymrtn_cpy_memcms"},
+      {"x011101"_b, "cpymtn_cpy_memcms"},
+      {"x100001"_b, "cpyen_cpy_memcms"},
+      {"x100101"_b, "cpyewtn_cpy_memcms"},
+      {"x101001"_b, "cpyertn_cpy_memcms"},
+      {"x101101"_b, "cpyetn_cpy_memcms"},
     },
   },
 
-  { "_vlzrlm",
-    {23, 22, 20, 19, 13, 11},
-    { {"0000x0"_b, "mvni_asimdimm_l_sl"},
-      {"00x100"_b, "sri_asimdshf_r"},
-      {"00x110"_b, "sqshlu_asimdshf_r"},
-      {"010x00"_b, "sri_asimdshf_r"},
-      {"010x10"_b, "sqshlu_asimdshf_r"},
-      {"011100"_b, "sri_asimdshf_r"},
-      {"011110"_b, "sqshlu_asimdshf_r"},
-      {"0x1000"_b, "sri_asimdshf_r"},
-      {"0x1010"_b, "sqshlu_asimdshf_r"},
+  { "_vnnjxg",
+    {30, 23, 22, 13, 12, 11, 10},
+    { {"000xxxx"_b, "fnmsub_s_floatdp3"},
+      {"001xxxx"_b, "fnmsub_d_floatdp3"},
+      {"011xxxx"_b, "fnmsub_h_floatdp3"},
+      {"10x1001"_b, "scvtf_asisdshf_c"},
+      {"10x1111"_b, "fcvtzs_asisdshf_c"},
+      {"1xx00x0"_b, "sqdmulh_asisdelem_r"},
+      {"1xx01x0"_b, "sqrdmulh_asisdelem_r"},
     },
   },
 
-  { "_vmjgmg",
+  { "_vnrlrk",
+    {30},
+    { {"0"_b, "orn_64_log_shift"},
+      {"1"_b, "bics_64_log_shift"},
+    },
+  },
+
+  { "_vnrlsj",
+    {18, 17, 12},
+    { {"0x0"_b, "ld4_asisdlsop_dx4_r4d"},
+      {"100"_b, "ld4_asisdlsop_dx4_r4d"},
+      {"110"_b, "ld4_asisdlsop_d4_i4d"},
+    },
+  },
+
+  { "_vnsqhn",
+    {30, 23, 11, 10},
+    { {"0010"_b, "_plytvr"},
+      {"0100"_b, "_zghtll"},
+      {"0110"_b, "_ptkgrz"},
+      {"1000"_b, "_xksqnh"},
+      {"1001"_b, "ldraa_64_ldst_pac"},
+      {"1010"_b, "_hyskth"},
+      {"1011"_b, "ldraa_64w_ldst_pac"},
+      {"1100"_b, "_kpgghm"},
+      {"1101"_b, "ldrab_64_ldst_pac"},
+      {"1110"_b, "_zxjkmj"},
+      {"1111"_b, "ldrab_64w_ldst_pac"},
+    },
+  },
+
+  { "_vnzkty",
+    {30},
+    { {"0"_b, "orr_64_log_shift"},
+      {"1"_b, "ands_64_log_shift"},
+    },
+  },
+
+  { "_vpgxgk",
+    {20, 19, 18, 17, 16, 13, 12},
+    { {"1111100"_b, "_rqzpzq"},
+    },
+  },
+
+  { "_vpjktn",
     {30, 23, 22},
-    { {"000"_b, "stxrb_sr32_ldstexcl"},
-      {"001"_b, "ldxrb_lr32_ldstexcl"},
-      {"010"_b, "stllrb_sl32_ldstexcl"},
-      {"011"_b, "ldlarb_lr32_ldstexcl"},
-      {"100"_b, "stxrh_sr32_ldstexcl"},
-      {"101"_b, "ldxrh_lr32_ldstexcl"},
-      {"110"_b, "stllrh_sl32_ldstexcl"},
-      {"111"_b, "ldlarh_lr32_ldstexcl"},
+    { {"000"_b, "madd_64a_dp_3src"},
     },
   },
 
-  { "_vmjtrx",
-    {23, 22, 12},
-    { {"001"_b, "sudot_asimdelem_d"},
-      {"011"_b, "bfdot_asimdelem_e"},
-      {"101"_b, "usdot_asimdelem_d"},
-      {"111"_b, "bfmlal_asimdelem_f"},
-      {"xx0"_b, "sdot_asimdelem_d"},
-    },
-  },
-
-  { "_vmjzyk",
-    {30, 23, 22},
-    { {"000"_b, "stp_32_ldstpair_off"},
-      {"001"_b, "ldp_32_ldstpair_off"},
-      {"010"_b, "stp_32_ldstpair_pre"},
-      {"011"_b, "ldp_32_ldstpair_pre"},
-      {"100"_b, "stgp_64_ldstpair_off"},
-      {"101"_b, "ldpsw_64_ldstpair_off"},
-      {"110"_b, "stgp_64_ldstpair_pre"},
-      {"111"_b, "ldpsw_64_ldstpair_pre"},
-    },
-  },
-
-  { "_vmplgv",
-    {12},
-    { {"0"_b, "ld1_asisdlsop_dx1_r1d"},
-    },
-  },
-
-  { "_vmpnlv",
-    {11, 10, 9, 8, 7, 6},
-    { {"000000"_b, "wfit_only_systeminstrswithreg"},
-    },
-  },
-
-  { "_vnpqrh",
-    {30, 23, 22},
-    { {"000"_b, "stp_s_ldstpair_off"},
-      {"001"_b, "ldp_s_ldstpair_off"},
-      {"010"_b, "stp_s_ldstpair_pre"},
-      {"011"_b, "ldp_s_ldstpair_pre"},
-      {"100"_b, "stp_d_ldstpair_off"},
-      {"101"_b, "ldp_d_ldstpair_off"},
-      {"110"_b, "stp_d_ldstpair_pre"},
-      {"111"_b, "ldp_d_ldstpair_pre"},
-    },
-  },
-
-  { "_vnrnmg",
-    {17},
-    { {"0"_b, "st4_asisdlse_r4"},
-    },
-  },
-
-  { "_vpkhvh",
-    {17},
-    { {"0"_b, "st2_asisdlso_h2_2h"},
-    },
-  },
-
-  { "_vpkptr",
-    {30, 23, 22},
-    { {"000"_b, "stnp_32_ldstnapair_offs"},
-      {"001"_b, "ldnp_32_ldstnapair_offs"},
-      {"010"_b, "stp_32_ldstpair_post"},
-      {"011"_b, "ldp_32_ldstpair_post"},
-      {"110"_b, "stgp_64_ldstpair_post"},
-      {"111"_b, "ldpsw_64_ldstpair_post"},
+  { "_vpknjg",
+    {13, 12},
+    { {"00"_b, "sdiv_32_dp_2src"},
+      {"10"_b, "rorv_32_dp_2src"},
     },
   },
 
@@ -6505,177 +7435,176 @@
     },
   },
 
-  { "_vppthj",
-    {30, 23},
-    { {"00"_b, "add_32_addsub_imm"},
-      {"10"_b, "sub_32_addsub_imm"},
+  { "_vpyvjr",
+    {9, 8, 7, 6, 5},
+    { {"11111"_b, "pacizb_64z_dp_1src"},
     },
   },
 
-  { "_vprkpq",
-    {13, 12, 11, 10},
-    { {"0000"_b, "saddwb_z_zz"},
-      {"0001"_b, "saddwt_z_zz"},
-      {"0010"_b, "uaddwb_z_zz"},
-      {"0011"_b, "uaddwt_z_zz"},
-      {"0100"_b, "ssubwb_z_zz"},
-      {"0101"_b, "ssubwt_z_zz"},
-      {"0110"_b, "usubwb_z_zz"},
-      {"0111"_b, "usubwt_z_zz"},
-      {"1000"_b, "sqdmullb_z_zz"},
-      {"1001"_b, "sqdmullt_z_zz"},
-      {"1010"_b, "pmullb_z_zz"},
-      {"1011"_b, "pmullt_z_zz"},
-      {"1100"_b, "smullb_z_zz"},
-      {"1101"_b, "smullt_z_zz"},
-      {"1110"_b, "umullb_z_zz"},
-      {"1111"_b, "umullt_z_zz"},
+  { "_vqrqjt",
+    {30, 23, 22, 11, 10},
+    { {"01000"_b, "csel_32_condsel"},
+      {"01001"_b, "csinc_32_condsel"},
+      {"11000"_b, "csinv_32_condsel"},
+      {"11001"_b, "csneg_32_condsel"},
     },
   },
 
-  { "_vpxvjs",
+  { "_vqzsgg",
     {20, 19, 18, 17, 16},
-    { {"00000"_b, "fcvtns_32s_float2int"},
-      {"00001"_b, "fcvtnu_32s_float2int"},
-      {"00010"_b, "scvtf_s32_float2int"},
-      {"00011"_b, "ucvtf_s32_float2int"},
-      {"00100"_b, "fcvtas_32s_float2int"},
-      {"00101"_b, "fcvtau_32s_float2int"},
-      {"00110"_b, "fmov_32s_float2int"},
-      {"00111"_b, "fmov_s32_float2int"},
-      {"01000"_b, "fcvtps_32s_float2int"},
-      {"01001"_b, "fcvtpu_32s_float2int"},
-      {"10000"_b, "fcvtms_32s_float2int"},
-      {"10001"_b, "fcvtmu_32s_float2int"},
-      {"11000"_b, "fcvtzs_32s_float2int"},
-      {"11001"_b, "fcvtzu_32s_float2int"},
+    { {"00010"_b, "scvtf_s32_float2fix"},
+      {"00011"_b, "ucvtf_s32_float2fix"},
+      {"11000"_b, "fcvtzs_32s_float2fix"},
+      {"11001"_b, "fcvtzu_32s_float2fix"},
     },
   },
 
-  { "_vpykkg",
-    {23, 22, 10},
-    { {"000"_b, "ext_asimdext_only"},
-      {"001"_b, "_jnmgrh"},
-      {"011"_b, "_vytgtz"},
-      {"111"_b, "_jrnlzs"},
-    },
-  },
-
-  { "_vqlytp",
+  { "_vrjhtm",
     {12},
-    { {"0"_b, "st3_asisdlsop_dx3_r3d"},
+    { {"0"_b, "sqdmulh_asimdelem_r"},
+      {"1"_b, "sqrdmulh_asimdelem_r"},
     },
   },
 
-  { "_vqqrjl",
-    {23, 22, 20, 19, 13, 11, 10},
-    { {"0001001"_b, "shl_asisdshf_r"},
-      {"0001101"_b, "sqshl_asisdshf_r"},
-      {"001x001"_b, "shl_asisdshf_r"},
-      {"001x101"_b, "sqshl_asisdshf_r"},
-      {"00xx0x0"_b, "fmls_asisdelem_rh_h"},
-      {"01xx001"_b, "shl_asisdshf_r"},
-      {"01xx101"_b, "sqshl_asisdshf_r"},
-      {"1xxx0x0"_b, "fmls_asisdelem_r_sd"},
-      {"xxxx1x0"_b, "sqdmlsl_asisdelem_l"},
+  { "_vrsgzg",
+    {30, 23, 22, 20, 19, 18},
+    { {"00xxxx"_b, "add_64_addsub_imm"},
+      {"010xxx"_b, "addg_64_addsub_immtags"},
+      {"011000"_b, "smax_64_minmax_imm"},
+      {"011001"_b, "umax_64u_minmax_imm"},
+      {"011010"_b, "smin_64_minmax_imm"},
+      {"011011"_b, "umin_64u_minmax_imm"},
+      {"10xxxx"_b, "sub_64_addsub_imm"},
+      {"110xxx"_b, "subg_64_addsub_immtags"},
     },
   },
 
-  { "_vqvqhp",
-    {30, 23, 22},
-    { {"000"_b, "str_32_ldst_pos"},
-      {"001"_b, "ldr_32_ldst_pos"},
-      {"010"_b, "ldrsw_64_ldst_pos"},
-      {"100"_b, "str_64_ldst_pos"},
-      {"101"_b, "ldr_64_ldst_pos"},
-      {"110"_b, "prfm_p_ldst_pos"},
+  { "_vrsjnp",
+    {13, 12, 11, 10},
+    { {"1111"_b, "cas_c32_ldstexcl"},
     },
   },
 
-  { "_vqzlzt",
-    {30, 23},
-    { {"00"_b, "and_64_log_imm"},
-      {"01"_b, "movn_64_movewide"},
-      {"10"_b, "eor_64_log_imm"},
-      {"11"_b, "movz_64_movewide"},
+  { "_vrxhss",
+    {20, 19, 18, 17, 16},
+    { {"00001"_b, "uqxtn_asisdmisc_n"},
     },
   },
 
-  { "_vsqlkr",
-    {23, 22, 20, 19, 18, 17, 16},
-    { {"0111001"_b, "frintx_asimdmiscfp16_r"},
-      {"0x00001"_b, "frintx_asimdmisc_r"},
-      {"1111001"_b, "frinti_asimdmiscfp16_r"},
-      {"1x00001"_b, "frinti_asimdmisc_r"},
-      {"xx00000"_b, "cmle_asimdmisc_z"},
+  { "_vryrnh",
+    {30, 22, 11},
+    { {"001"_b, "_zsgpsn"},
+      {"010"_b, "ccmn_32_condcmp_reg"},
+      {"011"_b, "ccmn_32_condcmp_imm"},
+      {"110"_b, "ccmp_32_condcmp_reg"},
+      {"111"_b, "ccmp_32_condcmp_imm"},
     },
   },
 
-  { "_vsqpzr",
+  { "_vrzksz",
+    {20, 19, 18, 17, 16, 13, 12},
+    { {"1111100"_b, "ldaprh_32l_memop"},
+    },
+  },
+
+  { "_vshynq",
+    {30, 23, 22, 11, 10},
+    { {"00000"_b, "sturb_32_ldst_unscaled"},
+      {"00001"_b, "strb_32_ldst_immpost"},
+      {"00010"_b, "sttrb_32_ldst_unpriv"},
+      {"00011"_b, "strb_32_ldst_immpre"},
+      {"00100"_b, "ldurb_32_ldst_unscaled"},
+      {"00101"_b, "ldrb_32_ldst_immpost"},
+      {"00110"_b, "ldtrb_32_ldst_unpriv"},
+      {"00111"_b, "ldrb_32_ldst_immpre"},
+      {"01000"_b, "ldursb_64_ldst_unscaled"},
+      {"01001"_b, "ldrsb_64_ldst_immpost"},
+      {"01010"_b, "ldtrsb_64_ldst_unpriv"},
+      {"01011"_b, "ldrsb_64_ldst_immpre"},
+      {"01100"_b, "ldursb_32_ldst_unscaled"},
+      {"01101"_b, "ldrsb_32_ldst_immpost"},
+      {"01110"_b, "ldtrsb_32_ldst_unpriv"},
+      {"01111"_b, "ldrsb_32_ldst_immpre"},
+      {"10000"_b, "sturh_32_ldst_unscaled"},
+      {"10001"_b, "strh_32_ldst_immpost"},
+      {"10010"_b, "sttrh_32_ldst_unpriv"},
+      {"10011"_b, "strh_32_ldst_immpre"},
+      {"10100"_b, "ldurh_32_ldst_unscaled"},
+      {"10101"_b, "ldrh_32_ldst_immpost"},
+      {"10110"_b, "ldtrh_32_ldst_unpriv"},
+      {"10111"_b, "ldrh_32_ldst_immpre"},
+      {"11000"_b, "ldursh_64_ldst_unscaled"},
+      {"11001"_b, "ldrsh_64_ldst_immpost"},
+      {"11010"_b, "ldtrsh_64_ldst_unpriv"},
+      {"11011"_b, "ldrsh_64_ldst_immpre"},
+      {"11100"_b, "ldursh_32_ldst_unscaled"},
+      {"11101"_b, "ldrsh_32_ldst_immpost"},
+      {"11110"_b, "ldtrsh_32_ldst_unpriv"},
+      {"11111"_b, "ldrsh_32_ldst_immpre"},
+    },
+  },
+
+  { "_vsnnms",
+    {30, 13, 12, 11, 10},
+    { {"00000"_b, "_xzntxr"},
+    },
+  },
+
+  { "_vsslrs",
+    {8},
+    { {"0"_b, "tstart_br_systemresult"},
+      {"1"_b, "ttest_br_systemresult"},
+    },
+  },
+
+  { "_vsyjql",
+    {4},
+    { {"0"_b, "ccmn_32_condcmp_imm"},
+    },
+  },
+
+  { "_vtgnnl",
+    {30},
+    { {"0"_b, "_qgsrqq"},
+      {"1"_b, "_mgjhts"},
+    },
+  },
+
+  { "_vtllgt",
+    {10},
+    { {"0"_b, "_nhnhzp"},
+    },
+  },
+
+  { "_vtyqhh",
+    {30},
+    { {"0"_b, "and_32_log_shift"},
+      {"1"_b, "eor_32_log_shift"},
+    },
+  },
+
+  { "_vvgnhm",
     {23},
-    { {"0"_b, "faddp_asimdsame_only"},
-      {"1"_b, "fabd_asimdsame_only"},
+    { {"0"_b, "fmulx_asimdsame_only"},
     },
   },
 
-  { "_vsvrgt",
-    {17},
-    { {"0"_b, "fadda_v_p_z"},
-    },
-  },
-
-  { "_vsvtqz",
-    {30, 23, 22},
-    { {"00x"_b, "add_64_addsub_imm"},
-      {"010"_b, "addg_64_addsub_immtags"},
-      {"10x"_b, "sub_64_addsub_imm"},
-      {"110"_b, "subg_64_addsub_immtags"},
-    },
-  },
-
-  { "_vtxyxz",
-    {23, 22, 13, 12, 11, 10},
-    { {"01x1x0"_b, "fcmla_asimdelem_c_h"},
-      {"0x0001"_b, "ushr_asimdshf_r"},
-      {"0x0101"_b, "usra_asimdshf_r"},
-      {"0x1001"_b, "urshr_asimdshf_r"},
-      {"0x1101"_b, "ursra_asimdshf_r"},
-      {"10x1x0"_b, "fcmla_asimdelem_c_s"},
-      {"xx00x0"_b, "mla_asimdelem_r"},
-      {"xx10x0"_b, "umlal_asimdelem_l"},
-    },
-  },
-
-  { "_vvhzhv",
-    {30, 23, 22, 13, 12, 11, 10},
-    { {"0000000"_b, "swpb_32_memop"},
-      {"000xx10"_b, "strb_32b_ldst_regoff"},
-      {"0010000"_b, "swplb_32_memop"},
-      {"001xx10"_b, "ldrb_32b_ldst_regoff"},
-      {"0100000"_b, "swpab_32_memop"},
-      {"010xx10"_b, "ldrsb_64b_ldst_regoff"},
-      {"0110000"_b, "swpalb_32_memop"},
-      {"011xx10"_b, "ldrsb_32b_ldst_regoff"},
-      {"1000000"_b, "swph_32_memop"},
-      {"100xx10"_b, "strh_32_ldst_regoff"},
-      {"1010000"_b, "swplh_32_memop"},
-      {"101xx10"_b, "ldrh_32_ldst_regoff"},
-      {"1100000"_b, "swpah_32_memop"},
-      {"110xx10"_b, "ldrsh_64_ldst_regoff"},
-      {"1110000"_b, "swpalh_32_memop"},
-      {"111xx10"_b, "ldrsh_32_ldst_regoff"},
-    },
-  },
-
-  { "_vvprhx",
-    {0},
-    { {"0"_b, "blr_64_branch_reg"},
-    },
-  },
-
-  { "_vvrmvg",
-    {12},
-    { {"1"_b, "_typysz"},
+  { "_vvgpzq",
+    {20, 19, 18, 17, 16},
+    { {"00000"_b, "fcvtns_32h_float2int"},
+      {"00001"_b, "fcvtnu_32h_float2int"},
+      {"00010"_b, "scvtf_h32_float2int"},
+      {"00011"_b, "ucvtf_h32_float2int"},
+      {"00100"_b, "fcvtas_32h_float2int"},
+      {"00101"_b, "fcvtau_32h_float2int"},
+      {"00110"_b, "fmov_32h_float2int"},
+      {"00111"_b, "fmov_h32_float2int"},
+      {"01000"_b, "fcvtps_32h_float2int"},
+      {"01001"_b, "fcvtpu_32h_float2int"},
+      {"10000"_b, "fcvtms_32h_float2int"},
+      {"10001"_b, "fcvtmu_32h_float2int"},
+      {"11000"_b, "fcvtzs_32h_float2int"},
+      {"11001"_b, "fcvtzu_32h_float2int"},
     },
   },
 
@@ -6695,9 +7624,25 @@
     },
   },
 
-  { "_vxgzqy",
-    {22},
-    { {"0"_b, "ldrsw_64_ldst_regoff"},
+  { "_vvyjmh",
+    {23, 22, 20, 19, 11},
+    { {"00010"_b, "ssra_asisdshf_r"},
+      {"001x0"_b, "ssra_asisdshf_r"},
+      {"01xx0"_b, "ssra_asisdshf_r"},
+    },
+  },
+
+  { "_vvzsmg",
+    {23, 22, 20, 19, 13, 11},
+    { {"0000x0"_b, "bic_asimdimm_l_sl"},
+      {"00x100"_b, "usra_asimdshf_r"},
+      {"00x110"_b, "ursra_asimdshf_r"},
+      {"010x00"_b, "usra_asimdshf_r"},
+      {"010x10"_b, "ursra_asimdshf_r"},
+      {"011100"_b, "usra_asimdshf_r"},
+      {"011110"_b, "ursra_asimdshf_r"},
+      {"0x1000"_b, "usra_asimdshf_r"},
+      {"0x1010"_b, "ursra_asimdshf_r"},
     },
   },
 
@@ -6714,85 +7659,87 @@
     },
   },
 
-  { "_vxsjgg",
-    {30, 22, 11},
-    { {"001"_b, "_pxnnrz"},
-      {"010"_b, "ccmn_32_condcmp_reg"},
-      {"011"_b, "ccmn_32_condcmp_imm"},
-      {"110"_b, "ccmp_32_condcmp_reg"},
-      {"111"_b, "ccmp_32_condcmp_imm"},
+  { "_vxhjgg",
+    {20, 18, 17, 16},
+    { {"0000"_b, "_shgxyq"},
     },
   },
 
-  { "_vxsvhs",
-    {13, 12},
-    { {"00"_b, "adcs_64_addsub_carry"},
+  { "_vxlmxz",
+    {4, 3, 2, 1, 0},
+    { {"11111"_b, "_hpmvzr"},
     },
   },
 
-  { "_vxylhh",
-    {23, 22},
-    { {"01"_b, "fcmla_asimdelem_c_h"},
-      {"10"_b, "fcmla_asimdelem_c_s"},
+  { "_vxqtkl",
+    {18, 17},
+    { {"00"_b, "_zqmvqs"},
     },
   },
 
-  { "_vylhvl",
-    {20, 19, 18, 17, 16, 13},
-    { {"000000"_b, "fabs_h_floatdp1"},
-      {"000010"_b, "fsqrt_h_floatdp1"},
-      {"000100"_b, "fcvt_dh_floatdp1"},
-      {"001000"_b, "frintp_h_floatdp1"},
-      {"001010"_b, "frintz_h_floatdp1"},
-      {"001110"_b, "frinti_h_floatdp1"},
+  { "_vxrnyh",
+    {18, 17},
+    { {"0x"_b, "st1_asisdlsep_r1_r1"},
+      {"10"_b, "st1_asisdlsep_r1_r1"},
+      {"11"_b, "st1_asisdlsep_i1_i1"},
     },
   },
 
-  { "_vytgtz",
-    {13, 12, 11},
-    { {"000"_b, "fmaxnmp_asimdsamefp16_only"},
-      {"010"_b, "faddp_asimdsamefp16_only"},
-      {"011"_b, "fmul_asimdsamefp16_only"},
-      {"100"_b, "fcmge_asimdsamefp16_only"},
-      {"101"_b, "facge_asimdsamefp16_only"},
-      {"110"_b, "fmaxp_asimdsamefp16_only"},
-      {"111"_b, "fdiv_asimdsamefp16_only"},
+  { "_vxvyyg",
+    {30, 23, 22, 13, 12, 11, 10},
+    { {"000xx00"_b, "stlurb_32_ldapstl_unscaled"},
+      {"001xx00"_b, "ldapurb_32_ldapstl_unscaled"},
+      {"010xx00"_b, "ldapursb_64_ldapstl_unscaled"},
+      {"011xx00"_b, "ldapursb_32_ldapstl_unscaled"},
+      {"100xx00"_b, "stlurh_32_ldapstl_unscaled"},
+      {"101xx00"_b, "ldapurh_32_ldapstl_unscaled"},
+      {"110xx00"_b, "ldapursh_64_ldapstl_unscaled"},
+      {"111xx00"_b, "ldapursh_32_ldapstl_unscaled"},
+      {"x000001"_b, "cpyfpn_cpy_memcms"},
+      {"x000101"_b, "cpyfpwtn_cpy_memcms"},
+      {"x001001"_b, "cpyfprtn_cpy_memcms"},
+      {"x001101"_b, "cpyfptn_cpy_memcms"},
+      {"x010001"_b, "cpyfmn_cpy_memcms"},
+      {"x010101"_b, "cpyfmwtn_cpy_memcms"},
+      {"x011001"_b, "cpyfmrtn_cpy_memcms"},
+      {"x011101"_b, "cpyfmtn_cpy_memcms"},
+      {"x100001"_b, "cpyfen_cpy_memcms"},
+      {"x100101"_b, "cpyfewtn_cpy_memcms"},
+      {"x101001"_b, "cpyfertn_cpy_memcms"},
+      {"x101101"_b, "cpyfetn_cpy_memcms"},
     },
   },
 
-  { "_vytxll",
-    {18, 17, 12},
-    { {"000"_b, "st2_asisdlso_d2_2d"},
+  { "_vyjsst",
+    {30, 4},
+    { {"0x"_b, "b_only_branch_imm"},
+      {"10"_b, "b_only_condbranch"},
+      {"11"_b, "bc_only_condbranch"},
     },
   },
 
-  { "_vyygqs",
-    {23, 22, 20, 19, 12, 11, 10},
-    { {"00x1001"_b, "sqshrun_asisdshf_n"},
-      {"00x1011"_b, "sqrshrun_asisdshf_n"},
-      {"00x1101"_b, "uqshrn_asisdshf_n"},
-      {"00x1111"_b, "uqrshrn_asisdshf_n"},
-      {"00xx1x0"_b, "fmulx_asisdelem_rh_h"},
-      {"010x001"_b, "sqshrun_asisdshf_n"},
-      {"010x011"_b, "sqrshrun_asisdshf_n"},
-      {"010x101"_b, "uqshrn_asisdshf_n"},
-      {"010x111"_b, "uqrshrn_asisdshf_n"},
-      {"0111001"_b, "sqshrun_asisdshf_n"},
-      {"0111011"_b, "sqrshrun_asisdshf_n"},
-      {"0111101"_b, "uqshrn_asisdshf_n"},
-      {"0111111"_b, "uqrshrn_asisdshf_n"},
-      {"0x10001"_b, "sqshrun_asisdshf_n"},
-      {"0x10011"_b, "sqrshrun_asisdshf_n"},
-      {"0x10101"_b, "uqshrn_asisdshf_n"},
-      {"0x10111"_b, "uqrshrn_asisdshf_n"},
-      {"1xxx1x0"_b, "fmulx_asisdelem_r_sd"},
+  { "_vypgrt",
+    {20, 19, 18, 17, 16},
+    { {"00000"_b, "rev16_asimdmisc_r"},
     },
   },
 
-  { "_vyztqx",
-    {8},
-    { {"0"_b, "tstart_br_systemresult"},
-      {"1"_b, "ttest_br_systemresult"},
+  { "_vypnss",
+    {30},
+    { {"0"_b, "orn_32_log_shift"},
+      {"1"_b, "bics_32_log_shift"},
+    },
+  },
+
+  { "_vyqxyz",
+    {23, 22, 20, 19, 18, 17, 16},
+    { {"0111001"_b, "fcvtau_asimdmiscfp16_r"},
+      {"0x00001"_b, "fcvtau_asimdmisc_r"},
+      {"0x10000"_b, "fmaxnmv_asimdall_only_sd"},
+      {"1111000"_b, "fcmge_asimdmiscfp16_fz"},
+      {"1x00000"_b, "fcmge_asimdmisc_fz"},
+      {"1x00001"_b, "ursqrte_asimdmisc_r"},
+      {"1x10000"_b, "fminnmv_asimdall_only_sd"},
     },
   },
 
@@ -6804,52 +7751,67 @@
     },
   },
 
-  { "_vzzvlr",
-    {23, 22, 20, 19, 18, 16, 13},
-    { {"0000000"_b, "_tlzlrj"},
-      {"0000001"_b, "_yhxvhy"},
-      {"0100000"_b, "_hqhzgj"},
-      {"0100001"_b, "_kzrklp"},
-      {"100xxx0"_b, "st2_asisdlsop_bx2_r2b"},
-      {"100xxx1"_b, "st4_asisdlsop_bx4_r4b"},
-      {"1010xx0"_b, "st2_asisdlsop_bx2_r2b"},
-      {"1010xx1"_b, "st4_asisdlsop_bx4_r4b"},
-      {"10110x0"_b, "st2_asisdlsop_bx2_r2b"},
-      {"10110x1"_b, "st4_asisdlsop_bx4_r4b"},
-      {"1011100"_b, "st2_asisdlsop_bx2_r2b"},
-      {"1011101"_b, "st4_asisdlsop_bx4_r4b"},
-      {"1011110"_b, "_mykjss"},
-      {"1011111"_b, "_xkkggt"},
-      {"110xxx0"_b, "ld2_asisdlsop_bx2_r2b"},
-      {"110xxx1"_b, "ld4_asisdlsop_bx4_r4b"},
-      {"1110xx0"_b, "ld2_asisdlsop_bx2_r2b"},
-      {"1110xx1"_b, "ld4_asisdlsop_bx4_r4b"},
-      {"11110x0"_b, "ld2_asisdlsop_bx2_r2b"},
-      {"11110x1"_b, "ld4_asisdlsop_bx4_r4b"},
-      {"1111100"_b, "ld2_asisdlsop_bx2_r2b"},
-      {"1111101"_b, "ld4_asisdlsop_bx4_r4b"},
-      {"1111110"_b, "_gvstrp"},
-      {"1111111"_b, "_qtgvhn"},
+  { "_vzvstm",
+    {23, 22, 20, 19, 12, 11},
+    { {"000000"_b, "movi_asimdimm_n_b"},
+      {"000010"_b, "fmov_asimdimm_s_s"},
+      {"000011"_b, "fmov_asimdimm_h_h"},
+      {"00x100"_b, "scvtf_asimdshf_c"},
+      {"00x111"_b, "fcvtzs_asimdshf_c"},
+      {"010x00"_b, "scvtf_asimdshf_c"},
+      {"010x11"_b, "fcvtzs_asimdshf_c"},
+      {"011100"_b, "scvtf_asimdshf_c"},
+      {"011111"_b, "fcvtzs_asimdshf_c"},
+      {"0x1000"_b, "scvtf_asimdshf_c"},
+      {"0x1011"_b, "fcvtzs_asimdshf_c"},
     },
   },
 
-  { "_xgvgmk",
-    {23, 22, 4},
-    { {"000"_b, "fccmp_s_floatccmp"},
-      {"001"_b, "fccmpe_s_floatccmp"},
-      {"010"_b, "fccmp_d_floatccmp"},
-      {"011"_b, "fccmpe_d_floatccmp"},
-      {"110"_b, "fccmp_h_floatccmp"},
-      {"111"_b, "fccmpe_h_floatccmp"},
+  { "_vzyklr",
+    {13, 12},
+    { {"00"_b, "setp_set_memcms"},
+      {"01"_b, "setpt_set_memcms"},
+      {"10"_b, "setpn_set_memcms"},
+      {"11"_b, "setptn_set_memcms"},
     },
   },
 
-  { "_xhkgqh",
-    {30, 23, 22},
-    { {"000"_b, "stp_64_ldstpair_off"},
-      {"001"_b, "ldp_64_ldstpair_off"},
-      {"010"_b, "stp_64_ldstpair_pre"},
-      {"011"_b, "ldp_64_ldstpair_pre"},
+  { "_vzzqhx",
+    {12, 10},
+    { {"00"_b, "_phrqqx"},
+      {"01"_b, "_snnlgr"},
+      {"10"_b, "_phsrlk"},
+      {"11"_b, "_nrmlqv"},
+    },
+  },
+
+  { "_xghrjn",
+    {20, 19, 18, 17, 16},
+    { {"00010"_b, "scvtf_h32_float2fix"},
+      {"00011"_b, "ucvtf_h32_float2fix"},
+      {"11000"_b, "fcvtzs_32h_float2fix"},
+      {"11001"_b, "fcvtzu_32h_float2fix"},
+    },
+  },
+
+  { "_xgqhjv",
+    {13, 12},
+    { {"10"_b, "smax_64_dp_2src"},
+    },
+  },
+
+  { "_xgxtlr",
+    {23},
+    { {"0"_b, "fdiv_asimdsame_only"},
+    },
+  },
+
+  { "_xhhqnx",
+    {30, 23, 22, 13, 12, 11, 10},
+    { {"1101001"_b, "ummla_asimdsame2_g"},
+      {"xxx0001"_b, "sqrdmlah_asimdsame2_only"},
+      {"xxx0011"_b, "sqrdmlsh_asimdsame2_only"},
+      {"xxx0101"_b, "udot_asimdsame2_d"},
     },
   },
 
@@ -6867,15 +7829,6 @@
     },
   },
 
-  { "_xhltxn",
-    {12, 10},
-    { {"00"_b, "_jqtltz"},
-      {"01"_b, "_rkvyqk"},
-      {"10"_b, "_zpnsrv"},
-      {"11"_b, "_lhvtrp"},
-    },
-  },
-
   { "_xhmpmy",
     {4},
     { {"0"_b, "and_p_p_pp_z"},
@@ -6883,77 +7836,91 @@
     },
   },
 
-  { "_xhvtjg",
-    {11},
-    { {"0"_b, "_mpyklp"},
+  { "_xjtzgm",
+    {30, 23, 22, 11, 10},
+    { {"00000"_b, "stur_b_ldst_unscaled"},
+      {"00001"_b, "str_b_ldst_immpost"},
+      {"00011"_b, "str_b_ldst_immpre"},
+      {"00100"_b, "ldur_b_ldst_unscaled"},
+      {"00101"_b, "ldr_b_ldst_immpost"},
+      {"00111"_b, "ldr_b_ldst_immpre"},
+      {"01000"_b, "stur_q_ldst_unscaled"},
+      {"01001"_b, "str_q_ldst_immpost"},
+      {"01011"_b, "str_q_ldst_immpre"},
+      {"01100"_b, "ldur_q_ldst_unscaled"},
+      {"01101"_b, "ldr_q_ldst_immpost"},
+      {"01111"_b, "ldr_q_ldst_immpre"},
+      {"10000"_b, "stur_h_ldst_unscaled"},
+      {"10001"_b, "str_h_ldst_immpost"},
+      {"10011"_b, "str_h_ldst_immpre"},
+      {"10100"_b, "ldur_h_ldst_unscaled"},
+      {"10101"_b, "ldr_h_ldst_immpost"},
+      {"10111"_b, "ldr_h_ldst_immpre"},
     },
   },
 
-  { "_xhxrnt",
-    {30},
-    { {"0"_b, "_zxhhny"},
-      {"1"_b, "_lhpgsn"},
+  { "_xksqnh",
+    {22, 20, 19, 18, 17, 16, 13, 12},
+    { {"01111101"_b, "ld64b_64l_memop"},
     },
   },
 
-  { "_xjghst",
-    {13, 12, 11, 10},
-    { {"0000"_b, "_kvmrng"},
-      {"0001"_b, "_vkyngx"},
-      {"0011"_b, "_lxqynh"},
-      {"0100"_b, "_kjngjl"},
-      {"0101"_b, "_xmqgmz"},
-      {"0110"_b, "uzp1_asimdperm_only"},
-      {"0111"_b, "_shzysp"},
-      {"1000"_b, "_strkph"},
-      {"1001"_b, "_jpvljz"},
-      {"1010"_b, "trn1_asimdperm_only"},
-      {"1011"_b, "_jryylt"},
-      {"1100"_b, "_grxzzg"},
-      {"1101"_b, "_lnnyzt"},
-      {"1110"_b, "zip1_asimdperm_only"},
-      {"1111"_b, "_szttjy"},
+  { "_xkylhh",
+    {22, 13, 12},
+    { {"000"_b, "swpa_32_memop"},
+      {"100"_b, "swpal_32_memop"},
     },
   },
 
-  { "_xjxppp",
-    {1, 0},
-    { {"11"_b, "brabz_64_branch_reg"},
+  { "_xkznrh",
+    {18, 17},
+    { {"00"_b, "st3_asisdlse_r3"},
     },
   },
 
-  { "_xkkggt",
-    {17},
-    { {"0"_b, "st4_asisdlsop_bx4_r4b"},
-      {"1"_b, "st4_asisdlsop_b4_i4b"},
+  { "_xlgxhn",
+    {23, 22, 4},
+    { {"000"_b, "fccmp_s_floatccmp"},
+      {"001"_b, "fccmpe_s_floatccmp"},
+      {"010"_b, "fccmp_d_floatccmp"},
+      {"011"_b, "fccmpe_d_floatccmp"},
+      {"110"_b, "fccmp_h_floatccmp"},
+      {"111"_b, "fccmpe_h_floatccmp"},
     },
   },
 
-  { "_xlhjhx",
-    {30},
-    { {"0"_b, "bl_only_branch_imm"},
-      {"1"_b, "_zhrtts"},
+  { "_xlqmhl",
+    {20, 19, 18, 17, 16},
+    { {"11111"_b, "ldar_lr64_ldstexcl"},
     },
   },
 
-  { "_xmqgmz",
-    {23, 22},
-    { {"01"_b, "fadd_asimdsamefp16_only"},
-      {"11"_b, "fsub_asimdsamefp16_only"},
+  { "_xlyjsz",
+    {23, 22, 13},
+    { {"100"_b, "fmlal2_asimdelem_lh"},
+      {"xx1"_b, "umull_asimdelem_l"},
     },
   },
 
-  { "_xmqvpl",
+  { "_xlyppq",
+    {23, 22, 20, 19, 18, 17, 16},
+    { {"0010000"_b, "fmaxv_asimdall_only_h"},
+      {"0x00001"_b, "frint64z_asimdmisc_r"},
+      {"1010000"_b, "fminv_asimdall_only_h"},
+      {"1111000"_b, "fabs_asimdmiscfp16_r"},
+      {"1x00000"_b, "fabs_asimdmisc_r"},
+    },
+  },
+
+  { "_xmkysx",
     {12},
-    { {"0"_b, "ld1_asisdlsop_dx1_r1d"},
+    { {"0"_b, "st4_asisdlsop_dx4_r4d"},
     },
   },
 
-  { "_xmtlmj",
-    {23, 22, 20, 19, 11},
-    { {"00010"_b, "srshr_asisdshf_r"},
-      {"001x0"_b, "srshr_asisdshf_r"},
-      {"01xx0"_b, "srshr_asisdshf_r"},
+  { "_xmxhhg",
+    {13, 12, 4},
+    { {"000"_b, "rmif_only_rmif"},
     },
   },
 
@@ -6964,24 +7931,34 @@
     },
   },
 
-  { "_xnsrny",
-    {30, 23, 22},
-    { {"000"_b, "madd_64a_dp_3src"},
-      {"001"_b, "smulh_64_dp_3src"},
-      {"011"_b, "umulh_64_dp_3src"},
+  { "_xnhkpk",
+    {23, 22},
+    { {"00"_b, "fcsel_s_floatsel"},
+      {"01"_b, "fcsel_d_floatsel"},
+      {"11"_b, "fcsel_h_floatsel"},
     },
   },
 
-  { "_xpkkpn",
-    {17},
-    { {"1"_b, "frsqrte_z_z"},
+  { "_xnpyvy",
+    {13, 10},
+    { {"00"_b, "_sylkvm"},
+      {"01"_b, "_nvnjyp"},
+      {"10"_b, "_ltrntg"},
+      {"11"_b, "_qrtjvn"},
     },
   },
 
-  { "_xpmvjv",
-    {13, 12},
-    { {"00"_b, "sqshl_asisdsame_only"},
-      {"01"_b, "sqrshl_asisdsame_only"},
+  { "_xnrrsy",
+    {18},
+    { {"0"_b, "st1_asisdlsep_r4_r4"},
+      {"1"_b, "st1_asisdlsep_i4_i4"},
+    },
+  },
+
+  { "_xnrxym",
+    {18},
+    { {"0"_b, "ld2_asisdlsep_r2_r"},
+      {"1"_b, "ld2_asisdlsep_i2_i"},
     },
   },
 
@@ -6992,108 +7969,109 @@
     },
   },
 
-  { "_xprlgy",
-    {30, 23, 22, 11, 10},
-    { {"00010"_b, "str_s_ldst_regoff"},
-      {"00110"_b, "ldr_s_ldst_regoff"},
-      {"10010"_b, "str_d_ldst_regoff"},
-      {"10110"_b, "ldr_d_ldst_regoff"},
+  { "_xprqgs",
+    {23, 20, 19, 18, 17, 16},
+    { {"000001"_b, "fcvtxn_asisdmisc_n"},
     },
   },
 
-  { "_xpvpqq",
-    {23, 22, 11, 10, 4, 3, 2},
-    { {"0000000"_b, "_hngpxg"},
-      {"0010111"_b, "_gnytkh"},
-      {"0011111"_b, "_xjxppp"},
-      {"0100000"_b, "_nnhprs"},
-      {"0110111"_b, "_hmtxlh"},
-      {"0111111"_b, "_qtxypt"},
-      {"1000000"_b, "_rmltms"},
-      {"1010111"_b, "_qqpkkm"},
-      {"1011111"_b, "_klnhpj"},
-    },
-  },
-
-  { "_xqgxjp",
-    {18, 17, 16, 13, 12, 11, 10, 9, 7, 6, 5},
-    { {"01111000011"_b, "_vyztqx"},
-    },
-  },
-
-  { "_xqhgkk",
-    {30},
-    { {"0"_b, "b_only_branch_imm"},
-    },
-  },
-
-  { "_xqjrgk",
-    {12},
-    { {"0"_b, "ld4_asisdlsop_dx4_r4d"},
-    },
-  },
-
-  { "_xrhhjz",
-    {11},
-    { {"0"_b, "_hzxjsp"},
-    },
-  },
-
-  { "_xrhmtg",
-    {30, 23, 22, 11, 10},
-    { {"00000"_b, "stur_s_ldst_unscaled"},
-      {"00001"_b, "str_s_ldst_immpost"},
-      {"00011"_b, "str_s_ldst_immpre"},
-      {"00100"_b, "ldur_s_ldst_unscaled"},
-      {"00101"_b, "ldr_s_ldst_immpost"},
-      {"00111"_b, "ldr_s_ldst_immpre"},
-      {"10000"_b, "stur_d_ldst_unscaled"},
-      {"10001"_b, "str_d_ldst_immpost"},
-      {"10011"_b, "str_d_ldst_immpre"},
-      {"10100"_b, "ldur_d_ldst_unscaled"},
-      {"10101"_b, "ldr_d_ldst_immpost"},
-      {"10111"_b, "ldr_d_ldst_immpre"},
-    },
-  },
-
-  { "_xrpmzt",
-    {17},
-    { {"0"_b, "st4_asisdlsop_hx4_r4h"},
-      {"1"_b, "st4_asisdlsop_h4_i4h"},
-    },
-  },
-
-  { "_xrxvpr",
+  { "_xptsns",
     {23, 22},
-    { {"00"_b, "_spmkmm"},
+    { {"00"_b, "tbx_asimdtbl_l1_1"},
     },
   },
 
-  { "_xryzqs",
-    {30, 23, 22, 13, 12, 11, 10},
-    { {"0001111"_b, "caspl_cp32_ldstexcl"},
-      {"0011111"_b, "caspal_cp32_ldstexcl"},
-      {"0101111"_b, "caslb_c32_ldstexcl"},
-      {"0111111"_b, "casalb_c32_ldstexcl"},
-      {"1001111"_b, "caspl_cp64_ldstexcl"},
-      {"1011111"_b, "caspal_cp64_ldstexcl"},
-      {"1101111"_b, "caslh_c32_ldstexcl"},
-      {"1111111"_b, "casalh_c32_ldstexcl"},
+  { "_xqhxql",
+    {12},
+    { {"0"_b, "st2_asisdlsop_dx2_r2d"},
     },
   },
 
-  { "_xsgxyy",
-    {9, 8, 7, 6, 5},
-    { {"11111"_b, "autizb_64z_dp_1src"},
+  { "_xqrgjj",
+    {4},
+    { {"0"_b, "ccmp_64_condcmp_imm"},
     },
   },
 
-  { "_xstkrn",
-    {20, 19},
-    { {"00"_b, "_hrllsn"},
-      {"01"_b, "_kqvljp"},
-      {"10"_b, "_lxhlkx"},
-      {"11"_b, "_rjysnh"},
+  { "_xqvzvl",
+    {18, 17},
+    { {"0x"_b, "st1_asisdlsep_r3_r3"},
+      {"10"_b, "st1_asisdlsep_r3_r3"},
+      {"11"_b, "st1_asisdlsep_i3_i3"},
+    },
+  },
+
+  { "_xrkzpn",
+    {12},
+    { {"0"_b, "_zjqssg"},
+    },
+  },
+
+  { "_xrnqyn",
+    {30},
+    { {"0"_b, "stlr_32s_ldapstl_writeback"},
+      {"1"_b, "stlr_64s_ldapstl_writeback"},
+    },
+  },
+
+  { "_xrskrk",
+    {22, 12},
+    { {"10"_b, "_kyhhqt"},
+    },
+  },
+
+  { "_xrzqtn",
+    {30},
+    { {"0"_b, "bl_only_branch_imm"},
+      {"1"_b, "_gyllxt"},
+    },
+  },
+
+  { "_xsgnlv",
+    {30, 23, 13, 12, 11, 10},
+    { {"100001"_b, "ushr_asisdshf_r"},
+      {"100101"_b, "usra_asisdshf_r"},
+      {"101001"_b, "urshr_asisdshf_r"},
+      {"101101"_b, "ursra_asisdshf_r"},
+    },
+  },
+
+  { "_xspjzn",
+    {13, 12, 11, 10},
+    { {"1111"_b, "casl_c64_ldstexcl"},
+    },
+  },
+
+  { "_xsvpzx",
+    {18, 17, 12},
+    { {"000"_b, "ld4_asisdlso_d4_4d"},
+    },
+  },
+
+  { "_xszmjn",
+    {30, 13, 12},
+    { {"000"_b, "ldiapp_32le_ldiappstilp"},
+      {"001"_b, "ldiapp_32l_ldiappstilp"},
+      {"100"_b, "ldiapp_64ls_ldiappstilp"},
+      {"101"_b, "ldiapp_64l_ldiappstilp"},
+    },
+  },
+
+  { "_xszqrg",
+    {30, 23, 22},
+    { {"000"_b, "_glpxty"},
+      {"001"_b, "_rkpylh"},
+      {"011"_b, "_xghrjn"},
+      {"100"_b, "_nklqly"},
+    },
+  },
+
+  { "_xtgmvr",
+    {23, 11, 10, 4, 3, 2, 0},
+    { {"0000000"_b, "_mzkxzm"},
+      {"0101111"_b, "_qgvrqy"},
+      {"0111111"_b, "_lljxgp"},
+      {"1000000"_b, "_tjlthk"},
     },
   },
 
@@ -7103,14 +8081,6 @@
     },
   },
 
-  { "_xtqmyj",
-    {30, 23, 22},
-    { {"000"_b, "orr_32_log_imm"},
-      {"100"_b, "ands_32s_log_imm"},
-      {"110"_b, "movk_32_movewide"},
-    },
-  },
-
   { "_xtxyxj",
     {4},
     { {"0"_b, "orr_p_p_pp_z"},
@@ -7118,20 +8088,16 @@
     },
   },
 
-  { "_xtzlzy",
-    {12, 11, 10},
-    { {"000"_b, "fadd_z_zz"},
-      {"001"_b, "fsub_z_zz"},
-      {"010"_b, "fmul_z_zz"},
-      {"011"_b, "ftsmul_z_zz"},
-      {"110"_b, "frecps_z_zz"},
-      {"111"_b, "frsqrts_z_zz"},
+  { "_xtzykp",
+    {20, 19, 18, 17, 16},
+    { {"11111"_b, "ldlarh_lr32_ldstexcl"},
     },
   },
 
-  { "_xvlnmy",
-    {9, 8, 7, 6, 5},
-    { {"11111"_b, "autdza_64z_dp_1src"},
+  { "_xvmxrg",
+    {13},
+    { {"0"_b, "mla_asimdelem_r"},
+      {"1"_b, "umlal_asimdelem_l"},
     },
   },
 
@@ -7170,6 +8136,12 @@
     },
   },
 
+  { "_xvrvhv",
+    {4},
+    { {"0"_b, "ccmp_32_condcmp_reg"},
+    },
+  },
+
   { "_xxjrsy",
     {23, 22, 9},
     { {"000"_b, "rdffr_p_p_f"},
@@ -7177,55 +8149,36 @@
     },
   },
 
-  { "_xxkvsy",
-    {30, 22, 11, 10},
-    { {"0000"_b, "csel_64_condsel"},
-      {"0001"_b, "csinc_64_condsel"},
-      {"0111"_b, "_tnxlnl"},
-      {"1000"_b, "csinv_64_condsel"},
-      {"1001"_b, "csneg_64_condsel"},
-      {"1100"_b, "_qjyvln"},
-      {"1101"_b, "_nvthzh"},
+  { "_xxphlt",
+    {23},
+    { {"0"_b, "_qgshrr"},
     },
   },
 
-  { "_xxpqgg",
-    {30, 23, 22},
-    { {"001"_b, "sbfm_64m_bitfield"},
-      {"011"_b, "extr_64_extract"},
-      {"101"_b, "ubfm_64m_bitfield"},
+  { "_xxqzvy",
+    {20, 19, 18, 17, 16},
+    { {"00000"_b, "fcvtns_32d_float2int"},
+      {"00001"_b, "fcvtnu_32d_float2int"},
+      {"00010"_b, "scvtf_d32_float2int"},
+      {"00011"_b, "ucvtf_d32_float2int"},
+      {"00100"_b, "fcvtas_32d_float2int"},
+      {"00101"_b, "fcvtau_32d_float2int"},
+      {"01000"_b, "fcvtps_32d_float2int"},
+      {"01001"_b, "fcvtpu_32d_float2int"},
+      {"10000"_b, "fcvtms_32d_float2int"},
+      {"10001"_b, "fcvtmu_32d_float2int"},
+      {"11000"_b, "fcvtzs_32d_float2int"},
+      {"11001"_b, "fcvtzu_32d_float2int"},
+      {"11110"_b, "fjcvtzs_32d_float2int"},
     },
   },
 
-  { "_xxpzrl",
-    {13},
-    { {"0"_b, "mls_asimdelem_r"},
-      {"1"_b, "umlsl_asimdelem_l"},
-    },
-  },
-
-  { "_xxxxlh",
-    {4},
-    { {"0"_b, "ccmn_64_condcmp_imm"},
-    },
-  },
-
-  { "_xxyklv",
-    {23, 22, 13, 12, 11, 10},
-    { {"000000"_b, "tbl_asimdtbl_l3_3"},
-      {"000100"_b, "tbx_asimdtbl_l3_3"},
-      {"001000"_b, "tbl_asimdtbl_l4_4"},
-      {"001100"_b, "tbx_asimdtbl_l4_4"},
-      {"xx0110"_b, "uzp2_asimdperm_only"},
-      {"xx1010"_b, "trn2_asimdperm_only"},
-      {"xx1110"_b, "zip2_asimdperm_only"},
-    },
-  },
-
-  { "_xygxsv",
-    {17},
-    { {"0"_b, "ld3_asisdlsop_hx3_r3h"},
-      {"1"_b, "ld3_asisdlsop_h3_i3h"},
+  { "_xygvjp",
+    {23, 22},
+    { {"00"_b, "and_asimdsame_only"},
+      {"01"_b, "bic_asimdsame_only"},
+      {"10"_b, "orr_asimdsame_only"},
+      {"11"_b, "orn_asimdsame_only"},
     },
   },
 
@@ -7246,47 +8199,47 @@
     },
   },
 
-  { "_xyhxzt",
-    {22},
-    { {"0"_b, "prfm_p_ldst_regoff"},
-    },
-  },
-
-  { "_xyljvp",
-    {30, 23, 22, 11, 10},
-    { {"00000"_b, "_yjpstj"},
-      {"01000"_b, "csel_64_condsel"},
-      {"01001"_b, "csinc_64_condsel"},
-      {"01100"_b, "_qghmks"},
-      {"01101"_b, "_qzzlpv"},
-      {"01110"_b, "_syktsg"},
-      {"01111"_b, "_hjtvvm"},
-      {"10000"_b, "_pvrylp"},
-      {"11000"_b, "csinv_64_condsel"},
-      {"11001"_b, "csneg_64_condsel"},
-      {"11100"_b, "_kkgpjl"},
-      {"11101"_b, "_tjtgjy"},
-      {"11110"_b, "_qmzqsy"},
-      {"11111"_b, "_nmkqzt"},
-    },
-  },
-
-  { "_xylmmp",
-    {22, 12},
-    { {"10"_b, "_nkjgpq"},
-    },
-  },
-
-  { "_xyzpvp",
-    {23, 22, 13},
-    { {"100"_b, "fmlsl_asimdelem_lh"},
-      {"xx1"_b, "smlsl_asimdelem_l"},
-    },
-  },
-
-  { "_xzmjxk",
+  { "_xymnxy",
     {30},
-    { {"1"_b, "_sntzjg"},
+    { {"0"_b, "tbz_only_testbranch"},
+    },
+  },
+
+  { "_xynxhx",
+    {30, 23, 22, 11, 10},
+    { {"00010"_b, "str_b_ldst_regoff"},
+      {"00110"_b, "ldr_b_ldst_regoff"},
+      {"01010"_b, "str_q_ldst_regoff"},
+      {"01110"_b, "ldr_q_ldst_regoff"},
+      {"10010"_b, "str_h_ldst_regoff"},
+      {"10110"_b, "ldr_h_ldst_regoff"},
+    },
+  },
+
+  { "_xzjvkv",
+    {23, 22},
+    { {"00"_b, "tbl_asimdtbl_l1_1"},
+    },
+  },
+
+  { "_xzlxjh",
+    {30, 23, 22},
+    { {"001"_b, "sbfm_64m_bitfield"},
+      {"011"_b, "extr_64_extract"},
+      {"101"_b, "ubfm_64m_bitfield"},
+    },
+  },
+
+  { "_xzmrlg",
+    {30, 23, 22},
+    { {"000"_b, "stlxr_sr32_ldstexcl"},
+      {"001"_b, "_zzkgsk"},
+      {"010"_b, "_mnzzhk"},
+      {"011"_b, "_qlxlxk"},
+      {"100"_b, "stlxr_sr64_ldstexcl"},
+      {"101"_b, "_tknqxs"},
+      {"110"_b, "_mhpgjx"},
+      {"111"_b, "_xlqmhl"},
     },
   },
 
@@ -7299,73 +8252,77 @@
     },
   },
 
-  { "_xzyxnr",
-    {30, 23, 22, 11, 10},
-    { {"10001"_b, "stg_64spost_ldsttags"},
-      {"10010"_b, "stg_64soffset_ldsttags"},
-      {"10011"_b, "stg_64spre_ldsttags"},
-      {"10100"_b, "ldg_64loffset_ldsttags"},
-      {"10101"_b, "stzg_64spost_ldsttags"},
-      {"10110"_b, "stzg_64soffset_ldsttags"},
-      {"10111"_b, "stzg_64spre_ldsttags"},
-      {"11001"_b, "st2g_64spost_ldsttags"},
-      {"11010"_b, "st2g_64soffset_ldsttags"},
-      {"11011"_b, "st2g_64spre_ldsttags"},
-      {"11101"_b, "stz2g_64spost_ldsttags"},
-      {"11110"_b, "stz2g_64soffset_ldsttags"},
-      {"11111"_b, "stz2g_64spre_ldsttags"},
+  { "_xzntxr",
+    {23, 22, 20, 19, 18, 17, 16},
+    { {"0000000"_b, "fcvtns_64s_float2int"},
+      {"0000001"_b, "fcvtnu_64s_float2int"},
+      {"0000010"_b, "scvtf_s64_float2int"},
+      {"0000011"_b, "ucvtf_s64_float2int"},
+      {"0000100"_b, "fcvtas_64s_float2int"},
+      {"0000101"_b, "fcvtau_64s_float2int"},
+      {"0001000"_b, "fcvtps_64s_float2int"},
+      {"0001001"_b, "fcvtpu_64s_float2int"},
+      {"0010000"_b, "fcvtms_64s_float2int"},
+      {"0010001"_b, "fcvtmu_64s_float2int"},
+      {"0011000"_b, "fcvtzs_64s_float2int"},
+      {"0011001"_b, "fcvtzu_64s_float2int"},
+      {"0100000"_b, "fcvtns_64d_float2int"},
+      {"0100001"_b, "fcvtnu_64d_float2int"},
+      {"0100010"_b, "scvtf_d64_float2int"},
+      {"0100011"_b, "ucvtf_d64_float2int"},
+      {"0100100"_b, "fcvtas_64d_float2int"},
+      {"0100101"_b, "fcvtau_64d_float2int"},
+      {"0100110"_b, "fmov_64d_float2int"},
+      {"0100111"_b, "fmov_d64_float2int"},
+      {"0101000"_b, "fcvtps_64d_float2int"},
+      {"0101001"_b, "fcvtpu_64d_float2int"},
+      {"0110000"_b, "fcvtms_64d_float2int"},
+      {"0110001"_b, "fcvtmu_64d_float2int"},
+      {"0111000"_b, "fcvtzs_64d_float2int"},
+      {"0111001"_b, "fcvtzu_64d_float2int"},
+      {"1001110"_b, "fmov_64vx_float2int"},
+      {"1001111"_b, "fmov_v64i_float2int"},
+      {"1100000"_b, "fcvtns_64h_float2int"},
+      {"1100001"_b, "fcvtnu_64h_float2int"},
+      {"1100010"_b, "scvtf_h64_float2int"},
+      {"1100011"_b, "ucvtf_h64_float2int"},
+      {"1100100"_b, "fcvtas_64h_float2int"},
+      {"1100101"_b, "fcvtau_64h_float2int"},
+      {"1100110"_b, "fmov_64h_float2int"},
+      {"1100111"_b, "fmov_h64_float2int"},
+      {"1101000"_b, "fcvtps_64h_float2int"},
+      {"1101001"_b, "fcvtpu_64h_float2int"},
+      {"1110000"_b, "fcvtms_64h_float2int"},
+      {"1110001"_b, "fcvtmu_64h_float2int"},
+      {"1111000"_b, "fcvtzs_64h_float2int"},
+      {"1111001"_b, "fcvtzu_64h_float2int"},
     },
   },
 
-  { "_xzyylk",
-    {20, 19, 18, 17, 16, 13},
-    { {"000000"_b, "fabs_s_floatdp1"},
-      {"000010"_b, "fsqrt_s_floatdp1"},
-      {"000100"_b, "fcvt_ds_floatdp1"},
-      {"000110"_b, "fcvt_hs_floatdp1"},
-      {"001000"_b, "frintp_s_floatdp1"},
-      {"001010"_b, "frintz_s_floatdp1"},
-      {"001110"_b, "frinti_s_floatdp1"},
-      {"010000"_b, "frint32x_s_floatdp1"},
-      {"010010"_b, "frint64x_s_floatdp1"},
-    },
-  },
-
-  { "_ygjslq",
-    {4, 3, 2, 1, 0},
-    { {"00000"_b, "fcmp_h_floatcmp"},
-      {"01000"_b, "fcmp_hz_floatcmp"},
-      {"10000"_b, "fcmpe_h_floatcmp"},
-      {"11000"_b, "fcmpe_hz_floatcmp"},
-    },
-  },
-
-  { "_ygnypk",
-    {22, 12},
-    { {"10"_b, "_nqlgtn"},
-    },
-  },
-
-  { "_ygpjrl",
+  { "_xzqmkv",
     {13, 12},
-    { {"00"_b, "adc_32_addsub_carry"},
+    { {"00"_b, "add_asisdsame_only"},
+      {"11"_b, "sqdmulh_asisdsame_only"},
     },
   },
 
-  { "_ygxhyg",
-    {23, 22, 4},
-    { {"000"_b, "fccmp_s_floatccmp"},
-      {"001"_b, "fccmpe_s_floatccmp"},
-      {"010"_b, "fccmp_d_floatccmp"},
-      {"011"_b, "fccmpe_d_floatccmp"},
-      {"110"_b, "fccmp_h_floatccmp"},
-      {"111"_b, "fccmpe_h_floatccmp"},
+  { "_ygghnn",
+    {20, 19, 18, 17, 16},
+    { {"00000"_b, "suqadd_asimdmisc_r"},
+      {"10000"_b, "saddlv_asimdall_only"},
     },
   },
 
-  { "_ygyxvx",
-    {18, 17},
-    { {"00"_b, "ld2_asisdlso_s2_2s"},
+  { "_ygtpyl",
+    {22, 13, 12},
+    { {"000"_b, "swp_32_memop"},
+      {"100"_b, "swpl_32_memop"},
+    },
+  },
+
+  { "_yhhsns",
+    {20, 19, 18, 17},
+    { {"0000"_b, "_myrkmk"},
     },
   },
 
@@ -7382,21 +8339,17 @@
     },
   },
 
-  { "_yhqyzj",
-    {9, 8, 7, 6, 5},
-    { {"00000"_b, "fmov_d_floatimm"},
+  { "_yhnqyy",
+    {13, 12},
+    { {"01"_b, "sqdmlal_asisddiff_only"},
+      {"11"_b, "sqdmlsl_asisddiff_only"},
     },
   },
 
-  { "_yhxvhy",
-    {17},
-    { {"0"_b, "st4_asisdlso_b4_4b"},
-    },
-  },
-
-  { "_yjjrgg",
+  { "_yjktml",
     {30},
-    { {"0"_b, "cbnz_64_compbranch"},
+    { {"0"_b, "ldr_32_loadlit"},
+      {"1"_b, "ldr_64_loadlit"},
     },
   },
 
@@ -7407,43 +8360,27 @@
     },
   },
 
-  { "_yjpstj",
-    {13, 12},
-    { {"00"_b, "adc_64_addsub_carry"},
+  { "_yjnkrn",
+    {30},
+    { {"0"_b, "bl_only_branch_imm"},
+      {"1"_b, "_grqsgp"},
     },
   },
 
-  { "_yjsjvt",
-    {30, 23, 22, 11, 10},
-    { {"00000"_b, "_vxsvhs"},
-      {"00001"_b, "_rhzhyz"},
-      {"00100"_b, "_zjsgkm"},
-      {"00110"_b, "_xxxxlh"},
-      {"01100"_b, "_mtjrtt"},
-      {"10000"_b, "_yskkjs"},
-      {"10100"_b, "_mjxzks"},
-      {"10110"_b, "_tpkzxg"},
-    },
-  },
-
-  { "_yjxshz",
-    {30, 23, 22, 11, 10},
-    { {"00000"_b, "stlurb_32_ldapstl_unscaled"},
-      {"00100"_b, "ldapurb_32_ldapstl_unscaled"},
-      {"01000"_b, "ldapursb_64_ldapstl_unscaled"},
-      {"01100"_b, "ldapursb_32_ldapstl_unscaled"},
-      {"10000"_b, "stlurh_32_ldapstl_unscaled"},
-      {"10100"_b, "ldapurh_32_ldapstl_unscaled"},
-      {"11000"_b, "ldapursh_64_ldapstl_unscaled"},
-      {"11100"_b, "ldapursh_32_ldapstl_unscaled"},
-    },
-  },
-
-  { "_yjxvkp",
-    {18, 17, 12},
-    { {"0x0"_b, "st4_asisdlsop_dx4_r4d"},
-      {"100"_b, "st4_asisdlsop_dx4_r4d"},
-      {"110"_b, "st4_asisdlsop_d4_i4d"},
+  { "_yjnmkg",
+    {30, 23, 11, 10},
+    { {"0000"_b, "_szysqh"},
+      {"0010"_b, "_ksrkkn"},
+      {"0100"_b, "_gljqng"},
+      {"0110"_b, "_qtghgs"},
+      {"1000"_b, "_gjprgr"},
+      {"1001"_b, "ldraa_64_ldst_pac"},
+      {"1010"_b, "_gnpgsg"},
+      {"1011"_b, "ldraa_64w_ldst_pac"},
+      {"1100"_b, "_lnmhqq"},
+      {"1101"_b, "ldrab_64_ldst_pac"},
+      {"1110"_b, "_gsvlph"},
+      {"1111"_b, "ldrab_64w_ldst_pac"},
     },
   },
 
@@ -7455,16 +8392,90 @@
     },
   },
 
-  { "_yjztsq",
-    {20, 19, 18, 17, 16},
-    { {"11111"_b, "st64b_64l_memop"},
+  { "_ykhhqq",
+    {18},
+    { {"0"_b, "ld2_asisdlsop_hx2_r2h"},
+      {"1"_b, "ld2_asisdlsop_h2_i2h"},
     },
   },
 
-  { "_ylhxlt",
-    {30},
-    { {"0"_b, "ldrsw_64_loadlit"},
-      {"1"_b, "prfm_p_loadlit"},
+  { "_ykjhgg",
+    {30, 23, 22, 13, 12, 11, 10},
+    { {"0000000"_b, "ldaddb_32_memop"},
+      {"0000100"_b, "ldclrb_32_memop"},
+      {"0001000"_b, "ldeorb_32_memop"},
+      {"0001100"_b, "ldsetb_32_memop"},
+      {"000xx10"_b, "strb_32b_ldst_regoff"},
+      {"0010000"_b, "ldaddlb_32_memop"},
+      {"0010100"_b, "ldclrlb_32_memop"},
+      {"0011000"_b, "ldeorlb_32_memop"},
+      {"0011100"_b, "ldsetlb_32_memop"},
+      {"001xx10"_b, "ldrb_32b_ldst_regoff"},
+      {"0100000"_b, "ldaddab_32_memop"},
+      {"0100100"_b, "ldclrab_32_memop"},
+      {"0101000"_b, "ldeorab_32_memop"},
+      {"0101100"_b, "ldsetab_32_memop"},
+      {"010xx10"_b, "ldrsb_64b_ldst_regoff"},
+      {"0110000"_b, "ldaddalb_32_memop"},
+      {"0110100"_b, "ldclralb_32_memop"},
+      {"0111000"_b, "ldeoralb_32_memop"},
+      {"0111100"_b, "ldsetalb_32_memop"},
+      {"011xx10"_b, "ldrsb_32b_ldst_regoff"},
+      {"1000000"_b, "ldaddh_32_memop"},
+      {"1000100"_b, "ldclrh_32_memop"},
+      {"1001000"_b, "ldeorh_32_memop"},
+      {"1001100"_b, "ldseth_32_memop"},
+      {"100xx10"_b, "strh_32_ldst_regoff"},
+      {"1010000"_b, "ldaddlh_32_memop"},
+      {"1010100"_b, "ldclrlh_32_memop"},
+      {"1011000"_b, "ldeorlh_32_memop"},
+      {"1011100"_b, "ldsetlh_32_memop"},
+      {"101xx10"_b, "ldrh_32_ldst_regoff"},
+      {"1100000"_b, "ldaddah_32_memop"},
+      {"1100100"_b, "ldclrah_32_memop"},
+      {"1101000"_b, "ldeorah_32_memop"},
+      {"1101100"_b, "ldsetah_32_memop"},
+      {"110xx10"_b, "ldrsh_64_ldst_regoff"},
+      {"1110000"_b, "ldaddalh_32_memop"},
+      {"1110100"_b, "ldclralh_32_memop"},
+      {"1111000"_b, "ldeoralh_32_memop"},
+      {"1111100"_b, "ldsetalh_32_memop"},
+      {"111xx10"_b, "ldrsh_32_ldst_regoff"},
+    },
+  },
+
+  { "_ykpgyh",
+    {13, 12, 5},
+    { {"010"_b, "_gknljg"},
+      {"011"_b, "_hjqryy"},
+      {"100"_b, "_lmmkzh"},
+      {"101"_b, "_vxlmxz"},
+      {"110"_b, "_phktvp"},
+      {"111"_b, "_qqvgql"},
+    },
+  },
+
+  { "_ykpqth",
+    {12},
+    { {"0"_b, "st4_asisdlsop_dx4_r4d"},
+    },
+  },
+
+  { "_ykptgl",
+    {30, 23},
+    { {"00"_b, "adds_32s_addsub_imm"},
+      {"10"_b, "subs_32s_addsub_imm"},
+    },
+  },
+
+  { "_ylhgrh",
+    {13, 12, 11, 10},
+    { {"0011"_b, "uqadd_asisdsame_only"},
+      {"1010"_b, "_msvjxq"},
+      {"1011"_b, "uqsub_asisdsame_only"},
+      {"1101"_b, "cmhi_asisdsame_only"},
+      {"1110"_b, "_yzlnrs"},
+      {"1111"_b, "cmhs_asisdsame_only"},
     },
   },
 
@@ -7481,28 +8492,9 @@
     },
   },
 
-  { "_ylqnqt",
-    {18, 17, 12},
-    { {"000"_b, "ld4_asisdlso_d4_4d"},
-    },
-  },
-
-  { "_ylyskq",
-    {13, 12, 11, 10},
-    { {"0011"_b, "uqadd_asisdsame_only"},
-      {"1010"_b, "_yzqtyl"},
-      {"1011"_b, "uqsub_asisdsame_only"},
-      {"1101"_b, "cmhi_asisdsame_only"},
-      {"1110"_b, "_jxzrxm"},
-      {"1111"_b, "cmhs_asisdsame_only"},
-    },
-  },
-
-  { "_ymgrgx",
-    {22, 20, 19, 18, 17, 16},
-    { {"111001"_b, "ucvtf_asisdmiscfp16_r"},
-      {"x00001"_b, "ucvtf_asisdmisc_r"},
-      {"x10000"_b, "faddp_asisdpair_only_sd"},
+  { "_ymghnh",
+    {20, 19, 18, 17, 16},
+    { {"11111"_b, "st64b_64l_memop"},
     },
   },
 
@@ -7542,61 +8534,96 @@
     },
   },
 
-  { "_ympyng",
-    {30, 23, 22, 13},
-    { {"0000"_b, "ld1sh_z_p_br_s64"},
-      {"0001"_b, "ldff1sh_z_p_br_s64"},
-      {"0010"_b, "ld1w_z_p_br_u32"},
-      {"0011"_b, "ldff1w_z_p_br_u32"},
-      {"0100"_b, "ld1sb_z_p_br_s64"},
-      {"0101"_b, "ldff1sb_z_p_br_s64"},
-      {"0110"_b, "ld1sb_z_p_br_s16"},
-      {"0111"_b, "ldff1sb_z_p_br_s16"},
-      {"1001"_b, "stnt1w_z_p_br_contiguous"},
-      {"1011"_b, "st3w_z_p_br_contiguous"},
-      {"10x0"_b, "st1w_z_p_br"},
-      {"1100"_b, "str_z_bi"},
-      {"1101"_b, "stnt1d_z_p_br_contiguous"},
-      {"1111"_b, "st3d_z_p_br_contiguous"},
+  { "_ymmhtq",
+    {23, 22, 20, 19, 11},
+    { {"00010"_b, "srsra_asisdshf_r"},
+      {"001x0"_b, "srsra_asisdshf_r"},
+      {"01xx0"_b, "srsra_asisdshf_r"},
     },
   },
 
-  { "_ymznlj",
-    {13, 10},
-    { {"00"_b, "_vgrtjz"},
-      {"01"_b, "_kxjgsz"},
-      {"10"_b, "_vmjtrx"},
-      {"11"_b, "_tgmljr"},
+  { "_ymszkr",
+    {30},
+    { {"0"_b, "ldr_q_loadlit"},
     },
   },
 
-  { "_ynnrny",
-    {18, 17},
-    { {"00"_b, "_jplmmr"},
+  { "_ymtzjg",
+    {12, 10},
+    { {"00"_b, "_gmsmls"},
+      {"01"_b, "_rnqmyp"},
+      {"10"_b, "_srttng"},
+      {"11"_b, "_tymryz"},
     },
   },
 
-  { "_ynqsgl",
-    {17},
-    { {"0"_b, "ld4_asisdlso_h4_4h"},
+  { "_ymvlzl",
+    {18},
+    { {"0"_b, "st4_asisdlse_r4"},
     },
   },
 
-  { "_ypjyqh",
-    {9, 8, 7, 6, 5, 0},
-    { {"111110"_b, "drps_64e_branch_reg"},
+  { "_ymvzyh",
+    {20, 19, 18, 17, 16},
+    { {"11111"_b, "ldaxp_lp32_ldstexcl"},
     },
   },
 
-  { "_yplktv",
-    {13, 12, 11, 10},
-    { {"0001"_b, "sub_asisdsame_only"},
-      {"0010"_b, "_llxlqz"},
-      {"0011"_b, "cmeq_asisdsame_only"},
-      {"0110"_b, "_pxkqxn"},
-      {"1010"_b, "_rhvksm"},
-      {"1101"_b, "sqrdmulh_asisdsame_only"},
-      {"1110"_b, "_gkkpjz"},
+  { "_ymxjjr",
+    {23, 22, 20, 19, 13, 11},
+    { {"0000x0"_b, "orr_asimdimm_l_hl"},
+      {"00x100"_b, "sqshrn_asimdshf_n"},
+      {"00x101"_b, "sqrshrn_asimdshf_n"},
+      {"010x00"_b, "sqshrn_asimdshf_n"},
+      {"010x01"_b, "sqrshrn_asimdshf_n"},
+      {"011100"_b, "sqshrn_asimdshf_n"},
+      {"011101"_b, "sqrshrn_asimdshf_n"},
+      {"0x1000"_b, "sqshrn_asimdshf_n"},
+      {"0x1001"_b, "sqrshrn_asimdshf_n"},
+    },
+  },
+
+  { "_ynsytg",
+    {23, 22, 20, 19, 13, 11, 10},
+    { {"0001001"_b, "shl_asisdshf_r"},
+      {"0001101"_b, "sqshl_asisdshf_r"},
+      {"001x001"_b, "shl_asisdshf_r"},
+      {"001x101"_b, "sqshl_asisdshf_r"},
+      {"00xx0x0"_b, "fmls_asisdelem_rh_h"},
+      {"01xx001"_b, "shl_asisdshf_r"},
+      {"01xx101"_b, "sqshl_asisdshf_r"},
+      {"1xxx0x0"_b, "fmls_asisdelem_r_sd"},
+      {"xxxx1x0"_b, "sqdmlsl_asisdelem_l"},
+    },
+  },
+
+  { "_ynyqky",
+    {12},
+    { {"0"_b, "st2_asisdlsop_dx2_r2d"},
+    },
+  },
+
+  { "_ynznxv",
+    {20, 19, 18, 17, 16},
+    { {"11111"_b, "ldaxrb_lr32_ldstexcl"},
+    },
+  },
+
+  { "_yppmkl",
+    {23, 22, 20, 19, 13, 11},
+    { {"0000x0"_b, "mvni_asimdimm_l_hl"},
+      {"00x100"_b, "sqshrun_asimdshf_n"},
+      {"00x101"_b, "sqrshrun_asimdshf_n"},
+      {"00x110"_b, "ushll_asimdshf_l"},
+      {"010x00"_b, "sqshrun_asimdshf_n"},
+      {"010x01"_b, "sqrshrun_asimdshf_n"},
+      {"010x10"_b, "ushll_asimdshf_l"},
+      {"011100"_b, "sqshrun_asimdshf_n"},
+      {"011101"_b, "sqrshrun_asimdshf_n"},
+      {"011110"_b, "ushll_asimdshf_l"},
+      {"0x1000"_b, "sqshrun_asimdshf_n"},
+      {"0x1001"_b, "sqrshrun_asimdshf_n"},
+      {"0x1010"_b, "ushll_asimdshf_l"},
     },
   },
 
@@ -7618,66 +8645,41 @@
     },
   },
 
-  { "_ypqgyp",
-    {22},
-    { {"0"_b, "ldrsw_64_ldst_regoff"},
+  { "_ypsgqz",
+    {18, 17},
+    { {"0x"_b, "ld4_asisdlsop_sx4_r4s"},
+      {"10"_b, "ld4_asisdlsop_sx4_r4s"},
+      {"11"_b, "ld4_asisdlsop_s4_i4s"},
     },
   },
 
-  { "_ypznsm",
-    {23},
-    { {"0"_b, "fmaxnm_asimdsame_only"},
-      {"1"_b, "fminnm_asimdsame_only"},
+  { "_yptgjg",
+    {4},
+    { {"0"_b, "ccmn_32_condcmp_reg"},
     },
   },
 
-  { "_yqmqzp",
-    {18, 17, 12},
-    { {"000"_b, "st1_asisdlso_d1_1d"},
+  { "_yptvyx",
+    {30, 23, 22},
+    { {"000"_b, "strb_32_ldst_pos"},
+      {"001"_b, "ldrb_32_ldst_pos"},
+      {"010"_b, "ldrsb_64_ldst_pos"},
+      {"011"_b, "ldrsb_32_ldst_pos"},
+      {"100"_b, "strh_32_ldst_pos"},
+      {"101"_b, "ldrh_32_ldst_pos"},
+      {"110"_b, "ldrsh_64_ldst_pos"},
+      {"111"_b, "ldrsh_32_ldst_pos"},
     },
   },
 
-  { "_yqmvxk",
-    {11, 10, 9, 8, 7, 6},
-    { {"000001"_b, "tcommit_only_barriers"},
-      {"xx1000"_b, "dsb_bon_barriers"},
-      {"xxxx10"_b, "dmb_bo_barriers"},
-      {"xxxx11"_b, "sb_only_barriers"},
-    },
-  },
-
-  { "_yqsgrt",
-    {23, 22, 20, 19, 16, 13, 12},
-    { {"0000000"_b, "_znmhps"},
-      {"0000010"_b, "_zssjpv"},
-      {"0000011"_b, "_smqvrs"},
-      {"0100000"_b, "_jrgzxt"},
-      {"0100010"_b, "_ppllxt"},
-      {"0100011"_b, "_hqlskj"},
-      {"100xx00"_b, "st3_asisdlsep_r3_r"},
-      {"100xx10"_b, "st1_asisdlsep_r3_r3"},
-      {"100xx11"_b, "st1_asisdlsep_r1_r1"},
-      {"1010x00"_b, "st3_asisdlsep_r3_r"},
-      {"1010x10"_b, "st1_asisdlsep_r3_r3"},
-      {"1010x11"_b, "st1_asisdlsep_r1_r1"},
-      {"1011000"_b, "st3_asisdlsep_r3_r"},
-      {"1011010"_b, "st1_asisdlsep_r3_r3"},
-      {"1011011"_b, "st1_asisdlsep_r1_r1"},
-      {"1011100"_b, "_ngxkmp"},
-      {"1011110"_b, "_qgryzh"},
-      {"1011111"_b, "_tjltls"},
-      {"110xx00"_b, "ld3_asisdlsep_r3_r"},
-      {"110xx10"_b, "ld1_asisdlsep_r3_r3"},
-      {"110xx11"_b, "ld1_asisdlsep_r1_r1"},
-      {"1110x00"_b, "ld3_asisdlsep_r3_r"},
-      {"1110x10"_b, "ld1_asisdlsep_r3_r3"},
-      {"1110x11"_b, "ld1_asisdlsep_r1_r1"},
-      {"1111000"_b, "ld3_asisdlsep_r3_r"},
-      {"1111010"_b, "ld1_asisdlsep_r3_r3"},
-      {"1111011"_b, "ld1_asisdlsep_r1_r1"},
-      {"1111100"_b, "_zzgrjz"},
-      {"1111110"_b, "_phtnny"},
-      {"1111111"_b, "_txjyxr"},
+  { "_ypzllm",
+    {23, 22, 4},
+    { {"000"_b, "fccmp_s_floatccmp"},
+      {"001"_b, "fccmpe_s_floatccmp"},
+      {"010"_b, "fccmp_d_floatccmp"},
+      {"011"_b, "fccmpe_d_floatccmp"},
+      {"110"_b, "fccmp_h_floatccmp"},
+      {"111"_b, "fccmpe_h_floatccmp"},
     },
   },
 
@@ -7697,16 +8699,51 @@
     },
   },
 
-  { "_yrgnqz",
+  { "_yqzxvr",
+    {18, 17, 12},
+    { {"000"_b, "ld3_asisdlso_d3_3d"},
+    },
+  },
+
+  { "_yrggjm",
     {13, 12},
     { {"00"_b, "sshl_asisdsame_only"},
       {"01"_b, "srshl_asisdsame_only"},
     },
   },
 
-  { "_yrlzqp",
-    {22, 13, 12},
-    { {"000"_b, "ldapr_64l_memop"},
+  { "_yrgzqr",
+    {23, 22, 20, 19, 17, 16, 13},
+    { {"0000000"_b, "_ymvlzl"},
+      {"0000001"_b, "_nzvlzt"},
+      {"0100000"_b, "_zyhgnz"},
+      {"0100001"_b, "_mntnlr"},
+      {"100xxx0"_b, "st4_asisdlsep_r4_r"},
+      {"100xxx1"_b, "st1_asisdlsep_r4_r4"},
+      {"1010xx0"_b, "st4_asisdlsep_r4_r"},
+      {"1010xx1"_b, "st1_asisdlsep_r4_r4"},
+      {"10110x0"_b, "st4_asisdlsep_r4_r"},
+      {"10110x1"_b, "st1_asisdlsep_r4_r4"},
+      {"1011100"_b, "st4_asisdlsep_r4_r"},
+      {"1011101"_b, "st1_asisdlsep_r4_r4"},
+      {"1011110"_b, "_tshjsk"},
+      {"1011111"_b, "_xnrrsy"},
+      {"110xxx0"_b, "ld4_asisdlsep_r4_r"},
+      {"110xxx1"_b, "ld1_asisdlsep_r4_r4"},
+      {"1110xx0"_b, "ld4_asisdlsep_r4_r"},
+      {"1110xx1"_b, "ld1_asisdlsep_r4_r4"},
+      {"11110x0"_b, "ld4_asisdlsep_r4_r"},
+      {"11110x1"_b, "ld1_asisdlsep_r4_r4"},
+      {"1111100"_b, "ld4_asisdlsep_r4_r"},
+      {"1111101"_b, "ld1_asisdlsep_r4_r4"},
+      {"1111110"_b, "_hjvkkq"},
+      {"1111111"_b, "_mthlnv"},
+    },
+  },
+
+  { "_yrjqql",
+    {30},
+    { {"0"_b, "cbz_32_compbranch"},
     },
   },
 
@@ -7717,36 +8754,28 @@
     },
   },
 
-  { "_yrrppk",
+  { "_yrypnt",
+    {30, 23, 11, 10},
+    { {"1001"_b, "_khrsgv"},
+    },
+  },
+
+  { "_yryygq",
+    {12},
+    { {"0"_b, "ld3_asisdlsop_dx3_r3d"},
+    },
+  },
+
+  { "_yskyrg",
     {20, 19, 18, 17, 16},
-    { {"00000"_b, "fcvtns_32d_float2int"},
-      {"00001"_b, "fcvtnu_32d_float2int"},
-      {"00010"_b, "scvtf_d32_float2int"},
-      {"00011"_b, "ucvtf_d32_float2int"},
-      {"00100"_b, "fcvtas_32d_float2int"},
-      {"00101"_b, "fcvtau_32d_float2int"},
-      {"01000"_b, "fcvtps_32d_float2int"},
-      {"01001"_b, "fcvtpu_32d_float2int"},
-      {"10000"_b, "fcvtms_32d_float2int"},
-      {"10001"_b, "fcvtmu_32d_float2int"},
-      {"11000"_b, "fcvtzs_32d_float2int"},
-      {"11001"_b, "fcvtzu_32d_float2int"},
-      {"11110"_b, "fjcvtzs_32d_float2int"},
+    { {"00000"_b, "sqneg_asisdmisc_r"},
     },
   },
 
-  { "_ysjqhn",
-    {30, 23, 22},
-    { {"00x"_b, "adds_64_addsub_shift"},
-      {"010"_b, "adds_64_addsub_shift"},
-      {"10x"_b, "subs_64_addsub_shift"},
-      {"110"_b, "subs_64_addsub_shift"},
-    },
-  },
-
-  { "_yskkjs",
+  { "_ysspjx",
     {13, 12},
-    { {"00"_b, "sbcs_64_addsub_carry"},
+    { {"00"_b, "sdiv_64_dp_2src"},
+      {"10"_b, "rorv_64_dp_2src"},
     },
   },
 
@@ -7760,6 +8789,12 @@
     },
   },
 
+  { "_yszlqj",
+    {23, 22},
+    { {"00"_b, "tbl_asimdtbl_l2_2"},
+    },
+  },
+
   { "_ytkjxx",
     {30, 23, 22, 13, 4},
     { {"00x0x"_b, "ld1w_z_p_bz_s_x32_scaled"},
@@ -7774,9 +8809,13 @@
     },
   },
 
-  { "_ytsghm",
-    {30, 23, 22},
-    { {"000"_b, "msub_32a_dp_3src"},
+  { "_ytrmvz",
+    {23, 22, 20, 19, 18, 17, 16},
+    { {"0111001"_b, "fcvtmu_asisdmiscfp16_r"},
+      {"0x00001"_b, "fcvtmu_asisdmisc_r"},
+      {"1111001"_b, "fcvtzu_asisdmiscfp16_r"},
+      {"1x00001"_b, "fcvtzu_asisdmisc_r"},
+      {"xx00000"_b, "neg_asisdmisc_r"},
     },
   },
 
@@ -7800,56 +8839,6 @@
     },
   },
 
-  { "_ytvxsl",
-    {30, 23, 22},
-    { {"000"_b, "stlxrb_sr32_ldstexcl"},
-      {"001"_b, "ldaxrb_lr32_ldstexcl"},
-      {"010"_b, "stlrb_sl32_ldstexcl"},
-      {"011"_b, "ldarb_lr32_ldstexcl"},
-      {"100"_b, "stlxrh_sr32_ldstexcl"},
-      {"101"_b, "ldaxrh_lr32_ldstexcl"},
-      {"110"_b, "stlrh_sl32_ldstexcl"},
-      {"111"_b, "ldarh_lr32_ldstexcl"},
-    },
-  },
-
-  { "_yvgqjx",
-    {13, 12, 5},
-    { {"010"_b, "_tnzytv"},
-      {"011"_b, "_vmpnlv"},
-      {"100"_b, "_hhhqjk"},
-      {"101"_b, "_tkzqqp"},
-      {"110"_b, "_sphpkr"},
-      {"111"_b, "_spglxn"},
-    },
-  },
-
-  { "_yvhnlk",
-    {30, 23, 22, 13, 12, 11, 10},
-    { {"0001111"_b, "casp_cp32_ldstexcl"},
-      {"0011111"_b, "caspa_cp32_ldstexcl"},
-      {"0101111"_b, "casb_c32_ldstexcl"},
-      {"0111111"_b, "casab_c32_ldstexcl"},
-      {"1001111"_b, "casp_cp64_ldstexcl"},
-      {"1011111"_b, "caspa_cp64_ldstexcl"},
-      {"1101111"_b, "cash_c32_ldstexcl"},
-      {"1111111"_b, "casah_c32_ldstexcl"},
-    },
-  },
-
-  { "_yvlhjg",
-    {23},
-    { {"0"_b, "frecps_asimdsame_only"},
-      {"1"_b, "frsqrts_asimdsame_only"},
-    },
-  },
-
-  { "_yvnjkr",
-    {9, 8, 7, 6, 5},
-    { {"11111"_b, "autdzb_64z_dp_1src"},
-    },
-  },
-
   { "_yvptvx",
     {23, 12, 11, 10},
     { {"0000"_b, "sqshrnb_z_zi"},
@@ -7863,20 +8852,15 @@
     },
   },
 
-  { "_yvxgrr",
-    {23, 22, 20, 19, 18, 17, 16},
-    { {"0111001"_b, "frintm_asimdmiscfp16_r"},
-      {"0x00001"_b, "frintm_asimdmisc_r"},
-      {"1111001"_b, "frintz_asimdmiscfp16_r"},
-      {"1x00001"_b, "frintz_asimdmisc_r"},
-      {"xx00000"_b, "cmeq_asimdmisc_z"},
+  { "_yvqnyq",
+    {23},
+    { {"1"_b, "_vhlqpr"},
     },
   },
 
-  { "_yvygml",
+  { "_yvxkhv",
     {30},
-    { {"0"_b, "_jkrlsg"},
-      {"1"_b, "_vvrmvg"},
+    { {"1"_b, "_ngvqhs"},
     },
   },
 
@@ -7888,23 +8872,10 @@
     },
   },
 
-  { "_yvyxkx",
-    {10},
-    { {"0"_b, "sha512su0_vv2_cryptosha512_2"},
-      {"1"_b, "sm4e_vv4_cryptosha512_2"},
-    },
-  },
-
-  { "_yxhrpk",
-    {23, 22},
-    { {"00"_b, "fmlal2_asimdsame_f"},
-      {"10"_b, "fmlsl2_asimdsame_f"},
-    },
-  },
-
-  { "_yxmkzr",
-    {12},
-    { {"0"_b, "st1_asisdlsop_dx1_r1d"},
+  { "_yxgmrs",
+    {23},
+    { {"0"_b, "fmaxnmp_asimdsame_only"},
+      {"1"_b, "fminnmp_asimdsame_only"},
     },
   },
 
@@ -7916,6 +8887,12 @@
     },
   },
 
+  { "_yxvttm",
+    {30},
+    { {"0"_b, "bl_only_branch_imm"},
+    },
+  },
+
   { "_yykhjv",
     {23, 22, 13, 12, 11, 10},
     { {"000110"_b, "smmla_z_zzz"},
@@ -7951,6 +8928,20 @@
     },
   },
 
+  { "_yysxts",
+    {23, 22, 13, 12, 11, 10},
+    { {"0001x0"_b, "fmla_asimdelem_rh_h"},
+      {"0x0001"_b, "sshr_asimdshf_r"},
+      {"0x0101"_b, "ssra_asimdshf_r"},
+      {"0x1001"_b, "srshr_asimdshf_r"},
+      {"0x1101"_b, "srsra_asimdshf_r"},
+      {"1000x0"_b, "fmlal_asimdelem_lh"},
+      {"1x01x0"_b, "fmla_asimdelem_r_sd"},
+      {"xx10x0"_b, "smlal_asimdelem_l"},
+      {"xx11x0"_b, "sqdmlal_asimdelem_l"},
+    },
+  },
+
   { "_yytvxh",
     {30, 23, 22, 13, 4},
     { {"00000"_b, "prfw_i_p_br_s"},
@@ -7972,6 +8963,22 @@
     },
   },
 
+  { "_yyvjqv",
+    {23},
+    { {"0"_b, "fmax_asimdsame_only"},
+      {"1"_b, "fmin_asimdsame_only"},
+    },
+  },
+
+  { "_yyvnrp",
+    {23, 22},
+    { {"00"_b, "eor_asimdsame_only"},
+      {"01"_b, "bsl_asimdsame_only"},
+      {"10"_b, "bit_asimdsame_only"},
+      {"11"_b, "bif_asimdsame_only"},
+    },
+  },
+
   { "_yyyshx",
     {30, 13, 4},
     { {"000"_b, "cmphs_p_p_zz"},
@@ -7982,78 +8989,114 @@
     },
   },
 
+  { "_yyyxhk",
+    {18},
+    { {"0"_b, "ld1_asisdlsep_r2_r2"},
+      {"1"_b, "ld1_asisdlsep_i2_i2"},
+    },
+  },
+
+  { "_yzgthp",
+    {18, 17},
+    { {"0x"_b, "ld1_asisdlsop_sx1_r1s"},
+      {"10"_b, "ld1_asisdlsop_sx1_r1s"},
+      {"11"_b, "ld1_asisdlsop_s1_i1s"},
+    },
+  },
+
+  { "_yzlnrs",
+    {20, 19, 18, 17, 16},
+    { {"00000"_b, "usqadd_asisdmisc_r"},
+    },
+  },
+
   { "_yzmjhn",
     {4},
     { {"0"_b, "eors_p_p_pp_z"},
     },
   },
 
-  { "_yzqtyl",
-    {20, 19, 18, 17, 16},
-    { {"00001"_b, "sqxtun_asisdmisc_n"},
+  { "_yzpszn",
+    {30},
+    { {"0"_b, "ldr_s_loadlit"},
+      {"1"_b, "ldr_d_loadlit"},
     },
   },
 
-  { "_yzzlxs",
-    {23, 4},
-    { {"00"_b, "_mpgrgp"},
+  { "_yzqhtj",
+    {30, 23, 22, 11, 10},
+    { {"00000"_b, "_rxsqhv"},
+      {"01000"_b, "csel_64_condsel"},
+      {"01001"_b, "csinc_64_condsel"},
+      {"01100"_b, "_zqxkxg"},
+      {"01101"_b, "_rvjkyp"},
+      {"01110"_b, "_jxgpgg"},
+      {"01111"_b, "_ysspjx"},
+      {"10000"_b, "_pjvkjz"},
+      {"11000"_b, "csinv_64_condsel"},
+      {"11001"_b, "csneg_64_condsel"},
+      {"11100"_b, "_rmyzpp"},
+      {"11101"_b, "_npjnlv"},
+      {"11110"_b, "_yhhsns"},
+      {"11111"_b, "_vllmnt"},
     },
   },
 
-  { "_zgjpym",
-    {23, 22, 20, 19, 11},
-    { {"00010"_b, "srsra_asisdshf_r"},
-      {"001x0"_b, "srsra_asisdshf_r"},
-      {"01xx0"_b, "srsra_asisdshf_r"},
-    },
-  },
-
-  { "_zglksl",
-    {30, 23, 22, 13, 12, 11, 10},
-    { {"1101001"_b, "ummla_asimdsame2_g"},
-      {"xxx0001"_b, "sqrdmlah_asimdsame2_only"},
-      {"xxx0011"_b, "sqrdmlsh_asimdsame2_only"},
-      {"xxx0101"_b, "udot_asimdsame2_d"},
-    },
-  },
-
-  { "_zgysvr",
-    {30, 13},
-    { {"00"_b, "_xpqglq"},
-      {"10"_b, "_xstkrn"},
-      {"11"_b, "_zjzmvh"},
-    },
-  },
-
-  { "_zgzlhq",
-    {17},
-    { {"0"_b, "ld1_asisdlso_b1_1b"},
-    },
-  },
-
-  { "_zhkjzg",
-    {23, 22, 13},
-    { {"000"_b, "fmls_asimdelem_rh_h"},
-      {"1x0"_b, "fmls_asimdelem_r_sd"},
-      {"xx1"_b, "sqdmlsl_asimdelem_l"},
-    },
-  },
-
-  { "_zhpxqz",
+  { "_yzxjnk",
     {9, 8, 7, 6, 5},
-    { {"00000"_b, "fmov_h_floatimm"},
+    { {"11111"_b, "paciza_64z_dp_1src"},
     },
   },
 
-  { "_zhrtts",
-    {23, 22},
-    { {"00"_b, "_qlqhzg"},
+  { "_zghtll",
+    {22, 20, 19, 18, 17, 16, 13, 12},
+    { {"01111100"_b, "ldapr_32l_memop"},
     },
   },
 
-  { "_zjgvyp",
-    {30, 13, 12, 11, 10},
-    { {"00000"_b, "_ghnljt"},
+  { "_zgljvg",
+    {30, 23, 22, 13, 12, 11, 10},
+    { {"0000000"_b, "ldadd_32_memop"},
+      {"0000100"_b, "ldclr_32_memop"},
+      {"0001000"_b, "ldeor_32_memop"},
+      {"0001100"_b, "ldset_32_memop"},
+      {"000xx10"_b, "str_32_ldst_regoff"},
+      {"0010000"_b, "ldaddl_32_memop"},
+      {"0010100"_b, "ldclrl_32_memop"},
+      {"0011000"_b, "ldeorl_32_memop"},
+      {"0011100"_b, "ldsetl_32_memop"},
+      {"001xx10"_b, "ldr_32_ldst_regoff"},
+      {"0100000"_b, "ldadda_32_memop"},
+      {"0100100"_b, "ldclra_32_memop"},
+      {"0101000"_b, "ldeora_32_memop"},
+      {"0101100"_b, "ldseta_32_memop"},
+      {"010xx10"_b, "ldrsw_64_ldst_regoff"},
+      {"0110000"_b, "ldaddal_32_memop"},
+      {"0110100"_b, "ldclral_32_memop"},
+      {"0111000"_b, "ldeoral_32_memop"},
+      {"0111100"_b, "ldsetal_32_memop"},
+      {"1000000"_b, "ldadd_64_memop"},
+      {"1000100"_b, "ldclr_64_memop"},
+      {"1001000"_b, "ldeor_64_memop"},
+      {"1001100"_b, "ldset_64_memop"},
+      {"100xx10"_b, "str_64_ldst_regoff"},
+      {"1010000"_b, "ldaddl_64_memop"},
+      {"1010100"_b, "ldclrl_64_memop"},
+      {"1011000"_b, "ldeorl_64_memop"},
+      {"1011100"_b, "ldsetl_64_memop"},
+      {"101xx10"_b, "ldr_64_ldst_regoff"},
+      {"10xxx01"_b, "ldraa_64_ldst_pac"},
+      {"10xxx11"_b, "ldraa_64w_ldst_pac"},
+      {"1100000"_b, "ldadda_64_memop"},
+      {"1100100"_b, "ldclra_64_memop"},
+      {"1101000"_b, "ldeora_64_memop"},
+      {"1101100"_b, "ldseta_64_memop"},
+      {"1110000"_b, "ldaddal_64_memop"},
+      {"1110100"_b, "ldclral_64_memop"},
+      {"1111000"_b, "ldeoral_64_memop"},
+      {"1111100"_b, "ldsetal_64_memop"},
+      {"11xxx01"_b, "ldrab_64_ldst_pac"},
+      {"11xxx11"_b, "ldrab_64w_ldst_pac"},
     },
   },
 
@@ -8063,17 +9106,39 @@
     },
   },
 
-  { "_zjsgkm",
-    {4},
-    { {"0"_b, "ccmn_64_condcmp_reg"},
+  { "_zjqssg",
+    {23, 22, 20, 19, 17, 16, 13},
+    { {"0000000"_b, "_jqsjtj"},
+      {"0000001"_b, "_rspmth"},
+      {"0100000"_b, "_txkmvh"},
+      {"0100001"_b, "_ngnxrx"},
+      {"100xxx0"_b, "st2_asisdlsep_r2_r"},
+      {"100xxx1"_b, "st1_asisdlsep_r2_r2"},
+      {"1010xx0"_b, "st2_asisdlsep_r2_r"},
+      {"1010xx1"_b, "st1_asisdlsep_r2_r2"},
+      {"10110x0"_b, "st2_asisdlsep_r2_r"},
+      {"10110x1"_b, "st1_asisdlsep_r2_r2"},
+      {"1011100"_b, "st2_asisdlsep_r2_r"},
+      {"1011101"_b, "st1_asisdlsep_r2_r2"},
+      {"1011110"_b, "_zyzsql"},
+      {"1011111"_b, "_kqsqly"},
+      {"110xxx0"_b, "ld2_asisdlsep_r2_r"},
+      {"110xxx1"_b, "ld1_asisdlsep_r2_r2"},
+      {"1110xx0"_b, "ld2_asisdlsep_r2_r"},
+      {"1110xx1"_b, "ld1_asisdlsep_r2_r2"},
+      {"11110x0"_b, "ld2_asisdlsep_r2_r"},
+      {"11110x1"_b, "ld1_asisdlsep_r2_r2"},
+      {"1111100"_b, "ld2_asisdlsep_r2_r"},
+      {"1111101"_b, "ld1_asisdlsep_r2_r2"},
+      {"1111110"_b, "_xnrxym"},
+      {"1111111"_b, "_yyyxhk"},
     },
   },
 
-  { "_zjslnr",
-    {30, 23, 22},
-    { {"000"_b, "sbfm_32m_bitfield"},
-      {"010"_b, "extr_32_extract"},
-      {"100"_b, "ubfm_32m_bitfield"},
+  { "_zjrsrx",
+    {30, 23},
+    { {"00"_b, "add_64_addsub_imm"},
+      {"10"_b, "sub_64_addsub_imm"},
     },
   },
 
@@ -8135,18 +9200,39 @@
     },
   },
 
-  { "_zkqtrj",
-    {30},
-    { {"0"_b, "b_only_branch_imm"},
+  { "_zlhlqy",
+    {23, 22, 20, 19, 18, 17, 16},
+    { {"0111001"_b, "frintm_asimdmiscfp16_r"},
+      {"0x00001"_b, "frintm_asimdmisc_r"},
+      {"1111001"_b, "frintz_asimdmiscfp16_r"},
+      {"1x00001"_b, "frintz_asimdmisc_r"},
+      {"xx00000"_b, "cmeq_asimdmisc_z"},
     },
   },
 
-  { "_zkttzl",
-    {23, 22, 20, 19, 18, 16, 13},
-    { {"0000000"_b, "_tsvsgh"},
-      {"0000001"_b, "_rkrltp"},
-      {"0100000"_b, "_zgzlhq"},
-      {"0100001"_b, "_nrssjz"},
+  { "_zlkygr",
+    {13, 12},
+    { {"00"_b, "cpyfe_cpy_memcms"},
+      {"01"_b, "cpyfewt_cpy_memcms"},
+      {"10"_b, "cpyfert_cpy_memcms"},
+      {"11"_b, "cpyfet_cpy_memcms"},
+    },
+  },
+
+  { "_zlmyjt",
+    {23, 22},
+    { {"00"_b, "fcsel_s_floatsel"},
+      {"01"_b, "fcsel_d_floatsel"},
+      {"11"_b, "fcsel_h_floatsel"},
+    },
+  },
+
+  { "_zlqnks",
+    {23, 22, 20, 19, 17, 16, 13},
+    { {"0000000"_b, "_kmqlmz"},
+      {"0000001"_b, "_tklxhy"},
+      {"0100000"_b, "_mtshvn"},
+      {"0100001"_b, "_gzzsgh"},
       {"100xxx0"_b, "st1_asisdlsop_bx1_r1b"},
       {"100xxx1"_b, "st3_asisdlsop_bx3_r3b"},
       {"1010xx0"_b, "st1_asisdlsop_bx1_r1b"},
@@ -8155,8 +9241,8 @@
       {"10110x1"_b, "st3_asisdlsop_bx3_r3b"},
       {"1011100"_b, "st1_asisdlsop_bx1_r1b"},
       {"1011101"_b, "st3_asisdlsop_bx3_r3b"},
-      {"1011110"_b, "_rnypvh"},
-      {"1011111"_b, "_nxjgmm"},
+      {"1011110"_b, "_tvrlgz"},
+      {"1011111"_b, "_nkmkvz"},
       {"110xxx0"_b, "ld1_asisdlsop_bx1_r1b"},
       {"110xxx1"_b, "ld3_asisdlsop_bx3_r3b"},
       {"1110xx0"_b, "ld1_asisdlsop_bx1_r1b"},
@@ -8165,16 +9251,27 @@
       {"11110x1"_b, "ld3_asisdlsop_bx3_r3b"},
       {"1111100"_b, "ld1_asisdlsop_bx1_r1b"},
       {"1111101"_b, "ld3_asisdlsop_bx3_r3b"},
-      {"1111110"_b, "_qqtpln"},
-      {"1111111"_b, "_glhxyj"},
+      {"1111110"_b, "_kkpxth"},
+      {"1111111"_b, "_rlylxh"},
     },
   },
 
-  { "_zlmgyp",
-    {23, 22, 13},
-    { {"000"_b, "fmla_asimdelem_rh_h"},
-      {"1x0"_b, "fmla_asimdelem_r_sd"},
-      {"xx1"_b, "sqdmlal_asimdelem_l"},
+  { "_zlvjrh",
+    {20, 19, 18, 17, 16},
+    { {"11111"_b, "ldxr_lr32_ldstexcl"},
+    },
+  },
+
+  { "_zmhqmr",
+    {13, 12},
+    { {"10"_b, "lsrv_32_dp_2src"},
+    },
+  },
+
+  { "_zmkntq",
+    {18},
+    { {"0"_b, "ld1_asisdlsop_hx1_r1h"},
+      {"1"_b, "ld1_asisdlsop_h1_i1h"},
     },
   },
 
@@ -8187,17 +9284,10 @@
     },
   },
 
-  { "_zmpzkg",
-    {23, 22, 20, 19, 13, 11},
-    { {"0000x0"_b, "orr_asimdimm_l_sl"},
-      {"00x100"_b, "shl_asimdshf_r"},
-      {"00x110"_b, "sqshl_asimdshf_r"},
-      {"010x00"_b, "shl_asimdshf_r"},
-      {"010x10"_b, "sqshl_asimdshf_r"},
-      {"011100"_b, "shl_asimdshf_r"},
-      {"011110"_b, "sqshl_asimdshf_r"},
-      {"0x1000"_b, "shl_asimdshf_r"},
-      {"0x1010"_b, "sqshl_asimdshf_r"},
+  { "_zmrhxx",
+    {30, 23, 22},
+    { {"000"_b, "smov_asimdins_w_w"},
+      {"100"_b, "smov_asimdins_x_x"},
     },
   },
 
@@ -8207,105 +9297,47 @@
     },
   },
 
-  { "_zmzxjm",
-    {17},
-    { {"0"_b, "faddv_v_p_z"},
+  { "_zpjzst",
+    {23, 22, 20, 19, 18, 17, 16},
+    { {"0111001"_b, "fcvtnu_asimdmiscfp16_r"},
+      {"0x00001"_b, "fcvtnu_asimdmisc_r"},
+      {"1111001"_b, "fcvtpu_asimdmiscfp16_r"},
+      {"1x00001"_b, "fcvtpu_asimdmisc_r"},
+      {"xx10000"_b, "umaxv_asimdall_only"},
+      {"xx10001"_b, "uminv_asimdall_only"},
     },
   },
 
-  { "_znmhps",
-    {18, 17},
-    { {"00"_b, "st3_asisdlse_r3"},
+  { "_zprgxt",
+    {18, 17, 12},
+    { {"0x0"_b, "st4_asisdlsop_dx4_r4d"},
+      {"100"_b, "st4_asisdlsop_dx4_r4d"},
+      {"110"_b, "st4_asisdlsop_d4_i4d"},
     },
   },
 
-  { "_zpmkvt",
-    {12},
-    { {"1"_b, "_vqqrjl"},
-    },
-  },
-
-  { "_zpnsrv",
-    {23, 22, 13},
-    { {"000"_b, "fmul_asimdelem_rh_h"},
-      {"1x0"_b, "fmul_asimdelem_r_sd"},
-      {"xx1"_b, "sqdmull_asimdelem_l"},
-    },
-  },
-
-  { "_zppjvk",
-    {12},
-    { {"0"_b, "ld2_asisdlsop_dx2_r2d"},
-    },
-  },
-
-  { "_zpsymj",
-    {22, 13, 12},
-    { {"000"_b, "swp_64_memop"},
-      {"001"_b, "_yjztsq"},
-      {"010"_b, "st64bv0_64_memop"},
-      {"011"_b, "st64bv_64_memop"},
-      {"100"_b, "swpl_64_memop"},
-    },
-  },
-
-  { "_zpzghs",
+  { "_zpxrnm",
     {30, 23, 22},
-    { {"000"_b, "stnp_q_ldstnapair_offs"},
-      {"001"_b, "ldnp_q_ldstnapair_offs"},
-      {"010"_b, "stp_q_ldstpair_post"},
-      {"011"_b, "ldp_q_ldstpair_post"},
+    { {"110"_b, "xar_vvv2_crypto3_imm6"},
     },
   },
 
-  { "_zqltpy",
-    {9, 8, 7, 6, 5},
-    { {"00000"_b, "fmov_s_floatimm"},
+  { "_zqhhlq",
+    {20, 19, 18, 17, 16},
+    { {"11111"_b, "stllrh_sl32_ldstexcl"},
     },
   },
 
-  { "_zqmmsk",
-    {30, 23, 22, 13, 12, 11, 10},
-    { {"0000000"_b, "ldaddb_32_memop"},
-      {"0000100"_b, "ldclrb_32_memop"},
-      {"0001000"_b, "ldeorb_32_memop"},
-      {"0001100"_b, "ldsetb_32_memop"},
-      {"000xx10"_b, "strb_32b_ldst_regoff"},
-      {"0010000"_b, "ldaddlb_32_memop"},
-      {"0010100"_b, "ldclrlb_32_memop"},
-      {"0011000"_b, "ldeorlb_32_memop"},
-      {"0011100"_b, "ldsetlb_32_memop"},
-      {"001xx10"_b, "ldrb_32b_ldst_regoff"},
-      {"0100000"_b, "ldaddab_32_memop"},
-      {"0100100"_b, "ldclrab_32_memop"},
-      {"0101000"_b, "ldeorab_32_memop"},
-      {"0101100"_b, "ldsetab_32_memop"},
-      {"010xx10"_b, "ldrsb_64b_ldst_regoff"},
-      {"0110000"_b, "ldaddalb_32_memop"},
-      {"0110100"_b, "ldclralb_32_memop"},
-      {"0111000"_b, "ldeoralb_32_memop"},
-      {"0111100"_b, "ldsetalb_32_memop"},
-      {"011xx10"_b, "ldrsb_32b_ldst_regoff"},
-      {"1000000"_b, "ldaddh_32_memop"},
-      {"1000100"_b, "ldclrh_32_memop"},
-      {"1001000"_b, "ldeorh_32_memop"},
-      {"1001100"_b, "ldseth_32_memop"},
-      {"100xx10"_b, "strh_32_ldst_regoff"},
-      {"1010000"_b, "ldaddlh_32_memop"},
-      {"1010100"_b, "ldclrlh_32_memop"},
-      {"1011000"_b, "ldeorlh_32_memop"},
-      {"1011100"_b, "ldsetlh_32_memop"},
-      {"101xx10"_b, "ldrh_32_ldst_regoff"},
-      {"1100000"_b, "ldaddah_32_memop"},
-      {"1100100"_b, "ldclrah_32_memop"},
-      {"1101000"_b, "ldeorah_32_memop"},
-      {"1101100"_b, "ldsetah_32_memop"},
-      {"110xx10"_b, "ldrsh_64_ldst_regoff"},
-      {"1110000"_b, "ldaddalh_32_memop"},
-      {"1110100"_b, "ldclralh_32_memop"},
-      {"1111000"_b, "ldeoralh_32_memop"},
-      {"1111100"_b, "ldsetalh_32_memop"},
-      {"111xx10"_b, "ldrsh_32_ldst_regoff"},
+  { "_zqjgzz",
+    {30, 23, 22},
+    { {"000"_b, "add_64_addsub_ext"},
+      {"100"_b, "sub_64_addsub_ext"},
+    },
+  },
+
+  { "_zqlzzp",
+    {2, 1},
+    { {"11"_b, "braaz_64_branch_reg"},
     },
   },
 
@@ -8315,6 +9347,33 @@
     },
   },
 
+  { "_zqmvqs",
+    {23, 22, 20, 19, 16, 13, 12},
+    { {"0111110"_b, "fcvtns_asisdmiscfp16_r"},
+      {"0111111"_b, "fcvtms_asisdmiscfp16_r"},
+      {"0x00110"_b, "fcvtns_asisdmisc_r"},
+      {"0x00111"_b, "fcvtms_asisdmisc_r"},
+      {"1111110"_b, "fcvtps_asisdmiscfp16_r"},
+      {"1111111"_b, "fcvtzs_asisdmiscfp16_r"},
+      {"1x00110"_b, "fcvtps_asisdmisc_r"},
+      {"1x00111"_b, "fcvtzs_asisdmisc_r"},
+      {"xx00000"_b, "cmgt_asisdmisc_z"},
+      {"xx00001"_b, "cmeq_asisdmisc_z"},
+      {"xx00010"_b, "cmlt_asisdmisc_z"},
+      {"xx00011"_b, "abs_asisdmisc_r"},
+      {"xx10111"_b, "addp_asisdpair_only"},
+    },
+  },
+
+  { "_zqxkxg",
+    {13, 12},
+    { {"00"_b, "subp_64s_dp_2src"},
+      {"01"_b, "irg_64i_dp_2src"},
+      {"10"_b, "lslv_64_dp_2src"},
+      {"11"_b, "pacga_64p_dp_2src"},
+    },
+  },
+
   { "_zrmgjx",
     {30, 23, 22, 13, 4},
     { {"01000"_b, "ldr_p_bi"},
@@ -8325,9 +9384,50 @@
     },
   },
 
-  { "_zrvlnx",
-    {13, 12},
-    { {"00"_b, "sbc_32_addsub_carry"},
+  { "_zrpzss",
+    {30, 23, 22, 13, 12, 11, 10},
+    { {"0000000"_b, "swpp_128_memop_128"},
+      {"0000100"_b, "rcwclrp_128_memop_128"},
+      {"0001000"_b, "rcwswpp_128_memop_128"},
+      {"0001100"_b, "rcwsetp_128_memop_128"},
+      {"0010000"_b, "swppl_128_memop_128"},
+      {"0010100"_b, "rcwclrpl_128_memop_128"},
+      {"0011000"_b, "rcwswppl_128_memop_128"},
+      {"0011100"_b, "rcwsetpl_128_memop_128"},
+      {"0100000"_b, "swppa_128_memop_128"},
+      {"0100100"_b, "rcwclrpa_128_memop_128"},
+      {"0101000"_b, "rcwswppa_128_memop_128"},
+      {"0101100"_b, "rcwsetpa_128_memop_128"},
+      {"0110000"_b, "swppal_128_memop_128"},
+      {"0110100"_b, "rcwclrpal_128_memop_128"},
+      {"0111000"_b, "rcwswppal_128_memop_128"},
+      {"0111100"_b, "rcwsetpal_128_memop_128"},
+      {"1000100"_b, "rcwsclrp_128_memop_128"},
+      {"1001000"_b, "rcwsswpp_128_memop_128"},
+      {"1001100"_b, "rcwssetp_128_memop_128"},
+      {"1010100"_b, "rcwsclrpl_128_memop_128"},
+      {"1011000"_b, "rcwsswppl_128_memop_128"},
+      {"1011100"_b, "rcwssetpl_128_memop_128"},
+      {"1100100"_b, "rcwsclrpa_128_memop_128"},
+      {"1101000"_b, "rcwsswppa_128_memop_128"},
+      {"1101100"_b, "rcwssetpa_128_memop_128"},
+      {"1110100"_b, "rcwsclrpal_128_memop_128"},
+      {"1111000"_b, "rcwsswppal_128_memop_128"},
+      {"1111100"_b, "rcwssetpal_128_memop_128"},
+    },
+  },
+
+  { "_zrqtgx",
+    {30},
+    { {"0"_b, "bl_only_branch_imm"},
+      {"1"_b, "_rxnnvv"},
+    },
+  },
+
+  { "_zrxhzq",
+    {19},
+    { {"0"_b, "_kjsrkm"},
+      {"1"_b, "sys_cr_systeminstrs"},
     },
   },
 
@@ -8337,11 +9437,9 @@
     },
   },
 
-  { "_zslsvj",
-    {23, 22, 20, 19, 11},
-    { {"00011"_b, "fcvtzu_asisdshf_c"},
-      {"001x1"_b, "fcvtzu_asisdshf_c"},
-      {"01xx1"_b, "fcvtzu_asisdshf_c"},
+  { "_zsgpsn",
+    {20, 19, 18, 17, 16, 13, 12, 3, 2, 1, 0},
+    { {"00000001101"_b, "setf16_only_setf"},
     },
   },
 
@@ -8358,15 +9456,39 @@
     },
   },
 
-  { "_zssjpv",
-    {18, 17},
-    { {"00"_b, "st1_asisdlse_r3_3v"},
+  { "_zspprz",
+    {20, 19, 17, 16, 12, 11, 10},
+    { {"0000xxx"_b, "_srnkng"},
+      {"0001xxx"_b, "_thkkgx"},
+      {"0010xxx"_b, "_grgrpt"},
+      {"0011xxx"_b, "_rkskkv"},
+      {"0110100"_b, "_rvsylx"},
+      {"0111100"_b, "_plymgg"},
+      {"1000xxx"_b, "_prytjs"},
+      {"1001xxx"_b, "_rrvltp"},
+      {"1010xxx"_b, "_syrmmr"},
+      {"1011xxx"_b, "_lnkrzt"},
+      {"1100xxx"_b, "_smmrpj"},
     },
   },
 
-  { "_zsyggq",
-    {23, 10},
-    { {"00"_b, "_txhzxq"},
+  { "_ztjjnh",
+    {30, 23, 22},
+    { {"100"_b, "eor3_vvv16_crypto4"},
+      {"101"_b, "sm3ss1_vvv4_crypto4"},
+      {"110"_b, "xar_vvv2_crypto3_imm6"},
+    },
+  },
+
+  { "_ztlysk",
+    {23, 22, 20, 19, 18, 17, 16},
+    { {"0010000"_b, "fmaxnmv_asimdall_only_h"},
+      {"0111001"_b, "fcvtas_asimdmiscfp16_r"},
+      {"0x00001"_b, "fcvtas_asimdmisc_r"},
+      {"1010000"_b, "fminnmv_asimdall_only_h"},
+      {"1111000"_b, "fcmgt_asimdmiscfp16_fz"},
+      {"1x00000"_b, "fcmgt_asimdmisc_fz"},
+      {"1x00001"_b, "urecpe_asimdmisc_r"},
     },
   },
 
@@ -8417,62 +9539,53 @@
     },
   },
 
-  { "_zvlxrl",
-    {23, 13, 12},
-    { {"010"_b, "fcmeq_asisdsame_only"},
+  { "_zvvvhr",
+    {13, 12, 11, 10},
+    { {"0000"_b, "smlal_asimddiff_l"},
+      {"0001"_b, "add_asimdsame_only"},
+      {"0010"_b, "_njnsqm"},
+      {"0011"_b, "cmtst_asimdsame_only"},
+      {"0100"_b, "sqdmlal_asimddiff_l"},
+      {"0101"_b, "mla_asimdsame_only"},
+      {"0110"_b, "_zlhlqy"},
+      {"0111"_b, "mul_asimdsame_only"},
+      {"1000"_b, "smlsl_asimddiff_l"},
+      {"1001"_b, "smaxp_asimdsame_only"},
+      {"1010"_b, "_nknntn"},
+      {"1011"_b, "sminp_asimdsame_only"},
+      {"1100"_b, "sqdmlsl_asimddiff_l"},
+      {"1101"_b, "sqdmulh_asimdsame_only"},
+      {"1110"_b, "_lyzhrq"},
+      {"1111"_b, "addp_asimdsame_only"},
     },
   },
 
-  { "_zvqghy",
+  { "_zvxxjk",
     {30, 23, 22, 13, 12, 11, 10},
-    { {"1000000"_b, "sha256h_qqv_cryptosha3"},
-      {"1000100"_b, "sha256h2_qqv_cryptosha3"},
-      {"1001000"_b, "sha256su1_vvv_cryptosha3"},
+    { {"000xxxx"_b, "madd_64a_dp_3src"},
+      {"0011111"_b, "smulh_64_dp_3src"},
+      {"0111111"_b, "umulh_64_dp_3src"},
     },
   },
 
-  { "_zxhhny",
-    {23, 22},
-    { {"00"_b, "fmsub_s_floatdp3"},
-      {"01"_b, "fmsub_d_floatdp3"},
-      {"11"_b, "fmsub_h_floatdp3"},
+  { "_zvynrg",
+    {19},
+    { {"0"_b, "_hnkyxy"},
+      {"1"_b, "sys_cr_systeminstrs"},
     },
   },
 
-  { "_zxspnk",
-    {30, 23, 22, 11, 10},
-    { {"00000"_b, "sturb_32_ldst_unscaled"},
-      {"00001"_b, "strb_32_ldst_immpost"},
-      {"00010"_b, "sttrb_32_ldst_unpriv"},
-      {"00011"_b, "strb_32_ldst_immpre"},
-      {"00100"_b, "ldurb_32_ldst_unscaled"},
-      {"00101"_b, "ldrb_32_ldst_immpost"},
-      {"00110"_b, "ldtrb_32_ldst_unpriv"},
-      {"00111"_b, "ldrb_32_ldst_immpre"},
-      {"01000"_b, "ldursb_64_ldst_unscaled"},
-      {"01001"_b, "ldrsb_64_ldst_immpost"},
-      {"01010"_b, "ldtrsb_64_ldst_unpriv"},
-      {"01011"_b, "ldrsb_64_ldst_immpre"},
-      {"01100"_b, "ldursb_32_ldst_unscaled"},
-      {"01101"_b, "ldrsb_32_ldst_immpost"},
-      {"01110"_b, "ldtrsb_32_ldst_unpriv"},
-      {"01111"_b, "ldrsb_32_ldst_immpre"},
-      {"10000"_b, "sturh_32_ldst_unscaled"},
-      {"10001"_b, "strh_32_ldst_immpost"},
-      {"10010"_b, "sttrh_32_ldst_unpriv"},
-      {"10011"_b, "strh_32_ldst_immpre"},
-      {"10100"_b, "ldurh_32_ldst_unscaled"},
-      {"10101"_b, "ldrh_32_ldst_immpost"},
-      {"10110"_b, "ldtrh_32_ldst_unpriv"},
-      {"10111"_b, "ldrh_32_ldst_immpre"},
-      {"11000"_b, "ldursh_64_ldst_unscaled"},
-      {"11001"_b, "ldrsh_64_ldst_immpost"},
-      {"11010"_b, "ldtrsh_64_ldst_unpriv"},
-      {"11011"_b, "ldrsh_64_ldst_immpre"},
-      {"11100"_b, "ldursh_32_ldst_unscaled"},
-      {"11101"_b, "ldrsh_32_ldst_immpost"},
-      {"11110"_b, "ldtrsh_32_ldst_unpriv"},
-      {"11111"_b, "ldrsh_32_ldst_immpre"},
+  { "_zxjkmj",
+    {22, 4, 3},
+    { {"00x"_b, "prfm_p_ldst_regoff"},
+      {"010"_b, "prfm_p_ldst_regoff"},
+      {"011"_b, "rprfm_r_ldst_regoff"},
+    },
+  },
+
+  { "_zxklzp",
+    {12},
+    { {"0"_b, "ld1_asisdlsop_dx1_r1d"},
     },
   },
 
@@ -8489,6 +9602,12 @@
     },
   },
 
+  { "_zyhgnz",
+    {18},
+    { {"0"_b, "ld4_asisdlse_r4"},
+    },
+  },
+
   { "_zyjjgs",
     {23, 22, 20, 19, 18},
     { {"00000"_b, "orr_z_zi"},
@@ -8499,61 +9618,39 @@
     },
   },
 
-  { "_zylnnn",
-    {30},
-    { {"0"_b, "cbz_64_compbranch"},
+  { "_zyxnpz",
+    {13, 12, 11, 10},
+    { {"1111"_b, "casa_c32_ldstexcl"},
     },
   },
 
-  { "_zytrsq",
-    {30},
-    { {"0"_b, "tbz_only_testbranch"},
+  { "_zyzsql",
+    {18},
+    { {"0"_b, "st2_asisdlsep_r2_r"},
+      {"1"_b, "st2_asisdlsep_i2_i"},
     },
   },
 
-  { "_zyzzhm",
-    {23, 20, 19, 18, 17, 16},
-    { {"000001"_b, "frint32x_asimdmisc_r"},
+  { "_zzhnxv",
+    {30, 23, 22, 20, 19},
+    { {"0xxxx"_b, "bl_only_branch_imm"},
+      {"10001"_b, "sysl_rc_systeminstrs"},
+      {"1001x"_b, "mrs_rs_systemmove"},
+      {"1011x"_b, "mrrs_rs_systemmovepr"},
     },
   },
 
-  { "_zzgrjz",
-    {18, 17},
-    { {"0x"_b, "ld3_asisdlsep_r3_r"},
-      {"10"_b, "ld3_asisdlsep_r3_r"},
-      {"11"_b, "ld3_asisdlsep_i3_i"},
+  { "_zzkgsk",
+    {20, 19, 18, 17, 16},
+    { {"11111"_b, "ldaxr_lr32_ldstexcl"},
     },
   },
 
-  { "_zzhgng",
-    {30, 23, 22, 13, 12, 11, 10},
-    { {"1000000"_b, "sha1c_qsv_cryptosha3"},
-      {"1000001"_b, "dup_asisdone_only"},
-      {"1000100"_b, "sha1p_qsv_cryptosha3"},
-      {"1001000"_b, "sha1m_qsv_cryptosha3"},
-      {"1001100"_b, "sha1su0_vvv_cryptosha3"},
-      {"1010111"_b, "fmulx_asisdsamefp16_only"},
-      {"1011001"_b, "fcmeq_asisdsamefp16_only"},
-      {"1011111"_b, "frecps_asisdsamefp16_only"},
-      {"1111111"_b, "frsqrts_asisdsamefp16_only"},
-    },
-  },
-
-  { "_zzrqlh",
-    {30, 23, 22, 11, 10},
-    { {"00000"_b, "_ygpjrl"},
-      {"01000"_b, "csel_32_condsel"},
-      {"01001"_b, "csinc_32_condsel"},
-      {"01100"_b, "_hggmnk"},
-      {"01101"_b, "_sllkpt"},
-      {"01110"_b, "_mgsvlj"},
-      {"01111"_b, "_kyyzks"},
-      {"10000"_b, "_zrvlnx"},
-      {"11000"_b, "csinv_32_condsel"},
-      {"11001"_b, "csneg_32_condsel"},
-      {"11100"_b, "_ghmzhr"},
-      {"11101"_b, "_gnqjhz"},
-      {"11110"_b, "_mmmjkx"},
+  { "_zztypv",
+    {6, 5},
+    { {"00"_b, "cfinv_m_pstate"},
+      {"01"_b, "xaflag_m_pstate"},
+      {"10"_b, "axflag_m_pstate"},
     },
   },
 
@@ -8568,7 +9665,7 @@
 
   { "Root",
     {31, 29, 28, 27, 26, 25, 24, 21, 15, 14},
-    { {"00000000xx"_b, "_qzjnpr"},
+    { {"00000000xx"_b, "_nqmnzp"},
       {"0000100000"_b, "_rzzxsn"},
       {"0000100001"_b, "_xvppmm"},
       {"0000100010"_b, "_ptsjnr"},
@@ -8578,78 +9675,88 @@
       {"0000100110"_b, "_ppnssm"},
       {"0000100111"_b, "_ztyqrj"},
       {"0000101000"_b, "_rnqtmt"},
-      {"0000101001"_b, "_njgxlz"},
+      {"0000101001"_b, "_rlpmrx"},
       {"0000101010"_b, "_mpvsng"},
       {"0000101011"_b, "_qlxksl"},
       {"0000101100"_b, "_mhrjvp"},
       {"0000101101"_b, "_pgjjsz"},
       {"0000101110"_b, "_yppyky"},
       {"0000101111"_b, "_yjmngt"},
-      {"000100000x"_b, "_vmjgmg"},
-      {"000100001x"_b, "_ytvxsl"},
-      {"0001000101"_b, "_yvhnlk"},
-      {"0001000111"_b, "_xryzqs"},
-      {"000101000x"_b, "_vjqsqs"},
-      {"000101010x"_b, "_phvnqh"},
-      {"000101100x"_b, "_pphhym"},
-      {"00010111xx"_b, "_qsygjs"},
-      {"0001100000"_b, "_jxrlyh"},
-      {"0001100001"_b, "_yqsgrt"},
-      {"0001100010"_b, "_kpyqyv"},
-      {"0001101000"_b, "_zkttzl"},
-      {"0001101001"_b, "_llqjlh"},
-      {"0001101010"_b, "_xhvtjg"},
-      {"0001101011"_b, "_xylmmp"},
-      {"0001101100"_b, "_vzzvlr"},
-      {"0001101101"_b, "_sjlrxn"},
-      {"0001101110"_b, "_xrhhjz"},
-      {"0001101111"_b, "_ygnypk"},
-      {"0001110000"_b, "_xjghst"},
-      {"0001110001"_b, "_xxyklv"},
-      {"0001110010"_b, "_rtgkkg"},
-      {"0001110100"_b, "_hqnxvt"},
-      {"0001110101"_b, "_hmxlny"},
-      {"0001110110"_b, "_txsmts"},
-      {"0001110111"_b, "_mtnpmr"},
-      {"0001111000"_b, "_ttstyt"},
-      {"0001111001"_b, "_krhrrr"},
-      {"0001111010"_b, "_xhltxn"},
-      {"0001111011"_b, "_ymznlj"},
-      {"0001111100"_b, "_kkgzst"},
-      {"0001111101"_b, "_gvjgyp"},
-      {"0001111110"_b, "_mjqvxq"},
-      {"0001111111"_b, "_spjjkg"},
-      {"0010001xxx"_b, "_vppthj"},
-      {"0010010xxx"_b, "_qzzlhq"},
-      {"001001100x"_b, "_zjslnr"},
-      {"001001110x"_b, "_jpxgqh"},
-      {"0010011x1x"_b, "_gkhhjm"},
-      {"0010100xxx"_b, "_jyxszq"},
-      {"0010110xxx"_b, "_xqhgkk"},
-      {"00101x1xxx"_b, "_zkqtrj"},
-      {"0011000xxx"_b, "_qkyjhg"},
-      {"00110010xx"_b, "_yjxshz"},
-      {"0011010000"_b, "_zzrqlh"},
-      {"0011010001"_b, "_qsrlql"},
-      {"001101001x"_b, "_tnrrjk"},
-      {"001101100x"_b, "_pnxgrg"},
-      {"001101101x"_b, "_ytsghm"},
-      {"0011100xxx"_b, "_srmhjk"},
-      {"0011110000"_b, "_zzhgng"},
-      {"0011110001"_b, "_zvqghy"},
-      {"001111001x"_b, "_hnzzkj"},
-      {"0011110100"_b, "_qntssm"},
-      {"0011110101"_b, "_mrqqlp"},
-      {"0011110110"_b, "_nxyhyv"},
-      {"0011110111"_b, "_qtknlp"},
-      {"0011111000"_b, "_gszlvl"},
-      {"0011111001"_b, "_mlnqrm"},
-      {"0011111010"_b, "_yvygml"},
-      {"0011111011"_b, "_xhxrnt"},
-      {"0011111100"_b, "_grqnlm"},
-      {"0011111101"_b, "_ktnjrx"},
-      {"0011111110"_b, "_gkpzhr"},
-      {"0011111111"_b, "_mpyhkm"},
+      {"0001000001"_b, "_thqgrq"},
+      {"0001000011"_b, "_hkgzsh"},
+      {"0001000101"_b, "_ktpxrr"},
+      {"0001000111"_b, "_stlgrr"},
+      {"00010100xx"_b, "_vtyqhh"},
+      {"00010101xx"_b, "_tytzpq"},
+      {"00010110xx"_b, "_tqlsyy"},
+      {"00010111xx"_b, "_htkpks"},
+      {"0001100000"_b, "_myvqtn"},
+      {"0001100001"_b, "_nmqskh"},
+      {"0001100010"_b, "_xrkzpn"},
+      {"0001101000"_b, "_zlqnks"},
+      {"0001101001"_b, "_vtllgt"},
+      {"0001101010"_b, "_ghqqzy"},
+      {"0001101011"_b, "_xrskrk"},
+      {"0001101100"_b, "_rzpqmm"},
+      {"0001101101"_b, "_pyvvqx"},
+      {"0001101110"_b, "_shgktt"},
+      {"0001101111"_b, "_szylpy"},
+      {"0001110000"_b, "_jgxqzr"},
+      {"0001110001"_b, "_jrqxvn"},
+      {"0001110010"_b, "_lplpkk"},
+      {"0001110100"_b, "_kgpsjz"},
+      {"0001110101"_b, "_hsrkqt"},
+      {"0001110110"_b, "_zvvvhr"},
+      {"0001110111"_b, "_kssltr"},
+      {"0001111000"_b, "_vzzqhx"},
+      {"0001111001"_b, "_ktngnm"},
+      {"0001111010"_b, "_ttsgkt"},
+      {"0001111011"_b, "_phtxqg"},
+      {"0001111100"_b, "_yysxts"},
+      {"0001111101"_b, "_msnshr"},
+      {"0001111110"_b, "_nmqrtr"},
+      {"0001111111"_b, "_gnxrlr"},
+      {"00100010xx"_b, "_hmjrmm"},
+      {"00100011xx"_b, "_nxlmhz"},
+      {"0010010xxx"_b, "_hqkljv"},
+      {"001001100x"_b, "_hvrjyt"},
+      {"001001110x"_b, "_kgygky"},
+      {"0010011x1x"_b, "_lkpprr"},
+      {"0010100xxx"_b, "_vyjsst"},
+      {"0010110xxx"_b, "_qvjmmq"},
+      {"00101x1xxx"_b, "_lxggmz"},
+      {"0011000xxx"_b, "_yjktml"},
+      {"0011001000"_b, "_mqtgvk"},
+      {"0011001001"_b, "_hvnhmh"},
+      {"0011001010"_b, "_gsnnnt"},
+      {"0011001011"_b, "_vxvyyg"},
+      {"0011001100"_b, "_jkvsxy"},
+      {"0011001110"_b, "_zrpzss"},
+      {"0011010000"_b, "_rsqxrs"},
+      {"0011010001"_b, "_rktqym"},
+      {"001101001x"_b, "_vqrqjt"},
+      {"001101100x"_b, "_rtlvxq"},
+      {"001101101x"_b, "_gtqnvr"},
+      {"0011100xxx"_b, "_yzpszn"},
+      {"0011101000"_b, "_hhxpyt"},
+      {"0011101001"_b, "_htrtzz"},
+      {"0011101010"_b, "_rkxlyj"},
+      {"0011101011"_b, "_vnggzq"},
+      {"0011110000"_b, "_mrlpxr"},
+      {"0011110001"_b, "_xszqrg"},
+      {"001111001x"_b, "_plyxlq"},
+      {"0011110100"_b, "_rqpjjs"},
+      {"0011110101"_b, "_ttmyrv"},
+      {"0011110110"_b, "_lvjtlg"},
+      {"0011110111"_b, "_lnntps"},
+      {"0011111000"_b, "_vtgnnl"},
+      {"0011111001"_b, "_mxnzst"},
+      {"0011111010"_b, "_lvryvp"},
+      {"0011111011"_b, "_mqssgy"},
+      {"0011111100"_b, "_pxzvjl"},
+      {"0011111101"_b, "_mnxgqm"},
+      {"0011111110"_b, "_qntrvk"},
+      {"0011111111"_b, "_vnnjxg"},
       {"0100100000"_b, "_yyyshx"},
       {"0100100001"_b, "_mylphg"},
       {"0100100010"_b, "_nsjhhg"},
@@ -8658,71 +9765,71 @@
       {"0100100101"_b, "_nvkthr"},
       {"0100100110"_b, "_phthqj"},
       {"0100100111"_b, "_kyjxrr"},
-      {"0100101000"_b, "_gtvhmp"},
+      {"0100101000"_b, "_gzvylr"},
       {"0100101001"_b, "_pppsmg"},
-      {"0100101010"_b, "_zgysvr"},
+      {"0100101010"_b, "_pyjnpz"},
       {"0100101011"_b, "_shqygv"},
-      {"0100101100"_b, "_lpsvyy"},
+      {"0100101100"_b, "_hzsxkp"},
       {"0100101101"_b, "_nqkhrv"},
       {"0100101110"_b, "_tkjtgp"},
       {"0100101111"_b, "_htqpks"},
-      {"0101000xxx"_b, "_vpkptr"},
-      {"0101001xxx"_b, "_vmjzyk"},
-      {"010101000x"_b, "_gmrxlp"},
-      {"010101010x"_b, "_jmgkrl"},
-      {"010101100x"_b, "_qhgtvk"},
-      {"01010111xx"_b, "_rxpspy"},
-      {"0101100xxx"_b, "_qhtqrj"},
-      {"0101101xxx"_b, "_vnpqrh"},
-      {"0101110000"_b, "_vpykkg"},
-      {"0101110001"_b, "_xrxvpr"},
-      {"0101110010"_b, "_zglksl"},
-      {"0101110011"_b, "_gtjskz"},
-      {"0101110100"_b, "_qntygx"},
-      {"0101110101"_b, "_kxprqm"},
-      {"0101110110"_b, "_qxtvzy"},
-      {"0101110111"_b, "_mstthg"},
-      {"0101111000"_b, "_qmqmpj"},
-      {"0101111001"_b, "_rhttgj"},
-      {"0101111010"_b, "_jqnhrj"},
-      {"0101111011"_b, "_nlqglq"},
-      {"0101111100"_b, "_vtxyxz"},
-      {"0101111101"_b, "_pqtjgx"},
-      {"0101111110"_b, "_snjpvy"},
-      {"0101111111"_b, "_spzgkt"},
-      {"0110001xxx"_b, "_plktrh"},
-      {"0110010xxx"_b, "_xtqmyj"},
-      {"0110011xxx"_b, "_lzpykk"},
-      {"0110100xxx"_b, "_mtzgpn"},
-      {"0110101xxx"_b, "_tvgvvq"},
-      {"01110000xx"_b, "_zxspnk"},
-      {"0111000100"_b, "_zqmmsk"},
-      {"0111000101"_b, "_nmzyvt"},
-      {"0111000110"_b, "_vvhzhv"},
-      {"0111000111"_b, "_sltqpy"},
-      {"0111001xxx"_b, "_qzsthq"},
-      {"0111010000"_b, "_zsyggq"},
-      {"0111010001"_b, "_hngpgx"},
-      {"011101001x"_b, "_njxtpv"},
-      {"01111000xx"_b, "_kpmvkn"},
-      {"0111100101"_b, "_jhytlg"},
-      {"0111100111"_b, "_rksxpn"},
-      {"01111001x0"_b, "_trlhgn"},
-      {"0111101xxx"_b, "_jxtgtx"},
-      {"0111110000"_b, "_tnhmpx"},
-      {"0111110010"_b, "_sqjpsl"},
-      {"0111110100"_b, "_sjnxky"},
-      {"0111110101"_b, "_kykymg"},
-      {"0111110110"_b, "_pxzkjy"},
-      {"0111110111"_b, "_tjktkm"},
-      {"0111111000"_b, "_hhkhkk"},
-      {"0111111001"_b, "_nxmjvy"},
-      {"0111111010"_b, "_vkvgnm"},
-      {"0111111011"_b, "_tssqsr"},
-      {"0111111100"_b, "_mthzvm"},
-      {"0111111101"_b, "_nlgqsk"},
-      {"0111111110"_b, "_gvykrp"},
-      {"0111111111"_b, "_sjzsvv"},
+      {"0101000xxx"_b, "_ssvpxz"},
+      {"0101001xxx"_b, "_vgqvys"},
+      {"01010100xx"_b, "_qkrnms"},
+      {"01010101xx"_b, "_vypnss"},
+      {"01010110xx"_b, "_glkvkr"},
+      {"01010111xx"_b, "_qgqgkx"},
+      {"0101100xxx"_b, "_mxplnn"},
+      {"0101101xxx"_b, "_pqmqrg"},
+      {"0101110000"_b, "_gshlgj"},
+      {"0101110001"_b, "_klsmsv"},
+      {"0101110010"_b, "_xhhqnx"},
+      {"0101110011"_b, "_rssrty"},
+      {"0101110100"_b, "_nzskzl"},
+      {"0101110101"_b, "_qlzvpg"},
+      {"0101110110"_b, "_hlxmpy"},
+      {"0101110111"_b, "_lplzxv"},
+      {"0101111000"_b, "_krtvhr"},
+      {"0101111001"_b, "_ymtzjg"},
+      {"0101111010"_b, "_szgqrr"},
+      {"0101111011"_b, "_xnpyvy"},
+      {"0101111100"_b, "_tnngsg"},
+      {"0101111101"_b, "_kshtnj"},
+      {"0101111110"_b, "_vmxzxt"},
+      {"0101111111"_b, "_gxqnph"},
+      {"0110001xxx"_b, "_ykptgl"},
+      {"0110010xxx"_b, "_slzvjh"},
+      {"0110011xxx"_b, "_nqlrmv"},
+      {"0110100xxx"_b, "_yrjqql"},
+      {"0110101xxx"_b, "_prgrzz"},
+      {"01110000xx"_b, "_vshynq"},
+      {"0111000100"_b, "_ykjhgg"},
+      {"0111000101"_b, "_jqtksx"},
+      {"0111000110"_b, "_gzpkvm"},
+      {"0111000111"_b, "_jhkkgv"},
+      {"0111001xxx"_b, "_yptvyx"},
+      {"0111010000"_b, "_tzrgqq"},
+      {"0111010001"_b, "_qlpnnn"},
+      {"011101001x"_b, "_grsslr"},
+      {"01111000xx"_b, "_xjtzgm"},
+      {"0111100101"_b, "_srsrtk"},
+      {"0111100111"_b, "_xynxhx"},
+      {"01111001x0"_b, "_gylmmr"},
+      {"0111101xxx"_b, "_mkzysy"},
+      {"0111110000"_b, "_nklvmv"},
+      {"0111110010"_b, "_pyttkp"},
+      {"0111110100"_b, "_lrqlrg"},
+      {"0111110101"_b, "_yvxkhv"},
+      {"0111110110"_b, "_ksgpqz"},
+      {"0111110111"_b, "_hkpjqm"},
+      {"0111111000"_b, "_lgzlyq"},
+      {"0111111001"_b, "_yrypnt"},
+      {"0111111010"_b, "_snvnjz"},
+      {"0111111011"_b, "_kkkltp"},
+      {"0111111100"_b, "_xsgnlv"},
+      {"0111111101"_b, "_lrptrn"},
+      {"0111111110"_b, "_pyhrrt"},
+      {"0111111111"_b, "_nkyrpv"},
       {"0x10000xxx"_b, "adr_only_pcreladdr"},
       {"1000100000"_b, "_lspzrv"},
       {"1000100001"_b, "_kxvvkq"},
@@ -8740,55 +9847,68 @@
       {"1000101101"_b, "_ytkjxx"},
       {"1000101110"_b, "_zxtzmv"},
       {"1000101111"_b, "_kgmqkh"},
-      {"100100000x"_b, "_jhqlkv"},
-      {"100100001x"_b, "_lxgltj"},
-      {"1001000100"_b, "_hxzlmm"},
-      {"1001000101"_b, "_vllqmp"},
-      {"1001000110"_b, "_tlstgz"},
-      {"1001000111"_b, "_mrmpgh"},
-      {"10010100xx"_b, "_rzkmny"},
-      {"10010101xx"_b, "_jggvph"},
-      {"10010110xx"_b, "_nhkstj"},
-      {"10010111xx"_b, "_jsygzs"},
-      {"100111000x"_b, "_gmsgqz"},
-      {"1001110010"_b, "_grrjlh"},
-      {"1001110011"_b, "_jhkglp"},
-      {"100111010x"_b, "_qytrjj"},
-      {"1001110110"_b, "_qsqqxg"},
-      {"1001110111"_b, "_kypqpy"},
-      {"1010001xxx"_b, "_vsvtqz"},
-      {"1010010xxx"_b, "_vqzlzt"},
-      {"10100110xx"_b, "_xxpqgg"},
-      {"10100111xx"_b, "_rgjqzs"},
-      {"10101000xx"_b, "_qmrgkn"},
-      {"10101001xx"_b, "_jkxlnq"},
-      {"1010101000"_b, "_ggvztl"},
-      {"1010101001"_b, "_xlhjhx"},
-      {"101010101x"_b, "_nqgqjh"},
-      {"1010101100"_b, "_qsrtzz"},
-      {"1010101110"_b, "_tzzzxz"},
-      {"10101011x1"_b, "_lhmlrj"},
-      {"1010110000"_b, "_kkmxxx"},
-      {"1010110100"_b, "_ltvrrg"},
-      {"1010111000"_b, "_mqkjxj"},
-      {"1010111100"_b, "_pmrngh"},
-      {"101011xx10"_b, "_hsjynv"},
-      {"101011xxx1"_b, "_kmhtqp"},
-      {"1011000xxx"_b, "_ylhxlt"},
-      {"10110010xx"_b, "_gkxgsn"},
-      {"1011001100"_b, "_xzmjxk"},
-      {"1011001110"_b, "_ppqkym"},
-      {"10110011x1"_b, "_xzyxnr"},
-      {"1011010000"_b, "_xyljvp"},
-      {"1011010001"_b, "_sxnkrh"},
-      {"101101001x"_b, "_klthpn"},
-      {"101101100x"_b, "_xnsrny"},
-      {"101101101x"_b, "_htppjj"},
-      {"101101110x"_b, "_rmmmjj"},
-      {"101101111x"_b, "_txnqzy"},
-      {"1011100xxx"_b, "_gmvtss"},
-      {"10111100xx"_b, "_gnxgxs"},
-      {"1011110100"_b, "_zjgvyp"},
+      {"1001000001"_b, "_ptyynt"},
+      {"1001000011"_b, "_skszgm"},
+      {"1001000100"_b, "_rlgtnn"},
+      {"1001000101"_b, "_rgxthl"},
+      {"1001000110"_b, "_xxphlt"},
+      {"1001000111"_b, "_njjlxy"},
+      {"10010100xx"_b, "_tnpjts"},
+      {"10010101xx"_b, "_hgjgpm"},
+      {"10010110xx"_b, "_hqnsvg"},
+      {"10010111xx"_b, "_zqjgzz"},
+      {"100111000x"_b, "_ztjjnh"},
+      {"1001110010"_b, "_lssjyz"},
+      {"1001110011"_b, "_zpxrnm"},
+      {"100111010x"_b, "_jkvvtp"},
+      {"1001110110"_b, "_sqhxzj"},
+      {"1001110111"_b, "_hrxtnj"},
+      {"1010001000"_b, "_vrsgzg"},
+      {"1010001010"_b, "_vhkjgh"},
+      {"10100010x1"_b, "_rxytqg"},
+      {"1010001100"_b, "_lrmgmq"},
+      {"1010001110"_b, "_zjrsrx"},
+      {"10100011x1"_b, "_vmgnhk"},
+      {"1010010xxx"_b, "_pjlnhh"},
+      {"10100110xx"_b, "_xzlxjh"},
+      {"10100111xx"_b, "_rjthsm"},
+      {"10101000xx"_b, "_yjnkrn"},
+      {"10101001xx"_b, "_zrqtgx"},
+      {"1010101000"_b, "_pxvjkp"},
+      {"1010101001"_b, "_xrzqtn"},
+      {"101010101x"_b, "_ttmvpr"},
+      {"1010101100"_b, "_grjzyl"},
+      {"1010101110"_b, "_kynxnz"},
+      {"10101011x1"_b, "_zzhnxv"},
+      {"1010110000"_b, "_lymhlk"},
+      {"1010110100"_b, "_tpmqyl"},
+      {"1010111000"_b, "_lkzyzv"},
+      {"1010111100"_b, "_tvyxlr"},
+      {"101011xx10"_b, "_yxvttm"},
+      {"101011xxx1"_b, "_qhzvvh"},
+      {"1011000xxx"_b, "_jgklkt"},
+      {"1011001000"_b, "_sxptnh"},
+      {"1011001001"_b, "_vmsxgq"},
+      {"1011001010"_b, "_vkrskv"},
+      {"1011001011"_b, "_nxrqmg"},
+      {"1011001100"_b, "_lsqgkk"},
+      {"1011001110"_b, "_kxpqhv"},
+      {"10110011x1"_b, "_jrxtzg"},
+      {"1011010000"_b, "_yzqhtj"},
+      {"1011010001"_b, "_yvqnyq"},
+      {"101101001x"_b, "_qpsryx"},
+      {"1011011000"_b, "_vpjktn"},
+      {"1011011001"_b, "_zvxxjk"},
+      {"101101101x"_b, "_sztkhs"},
+      {"101101110x"_b, "_hlypvy"},
+      {"101101111x"_b, "_rszgzl"},
+      {"1011100xxx"_b, "_ymszkr"},
+      {"1011101000"_b, "_pzzgts"},
+      {"1011101001"_b, "_pgvjgs"},
+      {"1011101010"_b, "_kppzvh"},
+      {"1011101011"_b, "_nlrjsj"},
+      {"10111100xx"_b, "_rxtklv"},
+      {"1011110100"_b, "_vsnnms"},
       {"1100100000"_b, "_sjtrhm"},
       {"1100100001"_b, "_hzkglv"},
       {"1100100010"_b, "_qrygny"},
@@ -8798,43 +9918,44 @@
       {"1100100110"_b, "_kqzmtr"},
       {"1100100111"_b, "_qpvgnh"},
       {"1100101000"_b, "_tpkslq"},
-      {"1100101001"_b, "_ympyng"},
+      {"1100101001"_b, "_kzpyzy"},
       {"1100101010"_b, "_ytvtqn"},
-      {"1100101011"_b, "_qvsypn"},
+      {"1100101011"_b, "_qkzjxm"},
       {"1100101100"_b, "_lqmksm"},
-      {"1100101101"_b, "_mkskxj"},
+      {"1100101101"_b, "_hxlznn"},
       {"1100101110"_b, "_knkjnz"},
-      {"1100101111"_b, "_hxnmsl"},
-      {"1101000xxx"_b, "_shrsxr"},
-      {"1101001xxx"_b, "_xhkgqh"},
-      {"11010100xx"_b, "_rmxjsn"},
-      {"11010101xx"_b, "_mvzvpk"},
-      {"11010110xx"_b, "_ysjqhn"},
-      {"11010111xx"_b, "_lpkqzl"},
-      {"1101100xxx"_b, "_zpzghs"},
-      {"1101101xxx"_b, "_gmrxqq"},
-      {"1110001xxx"_b, "_jlqjzr"},
-      {"1110010xxx"_b, "_qgmngg"},
-      {"1110011xxx"_b, "_vlrrtz"},
-      {"1110100xxx"_b, "_zylnnn"},
-      {"1110101xxx"_b, "_yjjrgg"},
-      {"11110000xx"_b, "_qhtrnn"},
-      {"1111000100"_b, "_lrqkvp"},
-      {"1111000101"_b, "_pvkmmv"},
-      {"1111000110"_b, "_lxmyjh"},
-      {"1111000111"_b, "_vgrhsz"},
-      {"1111001xxx"_b, "_vqvqhp"},
-      {"1111010000"_b, "_yjsjvt"},
-      {"1111010010"_b, "_yzzlxs"},
-      {"11110100x1"_b, "_vkhhkk"},
-      {"11111000xx"_b, "_xrhmtg"},
-      {"11111001xx"_b, "_xprlgy"},
-      {"1111101xxx"_b, "_hjgylh"},
+      {"1100101111"_b, "_rsjgyk"},
+      {"1101000xxx"_b, "_mtlxqp"},
+      {"1101001xxx"_b, "_vmyztj"},
+      {"11010100xx"_b, "_vnzkty"},
+      {"11010101xx"_b, "_vnrlrk"},
+      {"11010110xx"_b, "_rjmhxr"},
+      {"11010111xx"_b, "_rxgkjn"},
+      {"1101100xxx"_b, "_jvkxtj"},
+      {"1101101xxx"_b, "_srpqmk"},
+      {"1110001xxx"_b, "_pkskpp"},
+      {"1110010xxx"_b, "_mgspnm"},
+      {"1110011xxx"_b, "_snhzxr"},
+      {"1110100xxx"_b, "_jxgqqz"},
+      {"1110101xxx"_b, "_qnysqv"},
+      {"11110000xx"_b, "_rqghyv"},
+      {"1111000100"_b, "_zgljvg"},
+      {"1111000101"_b, "_yjnmkg"},
+      {"1111000110"_b, "_jqhvhn"},
+      {"1111000111"_b, "_vnsqhn"},
+      {"1111001xxx"_b, "_hvhrsq"},
+      {"1111010000"_b, "_gyjphh"},
+      {"1111010010"_b, "_mkrgxr"},
+      {"11110100x1"_b, "_mtzhrn"},
+      {"11111000xx"_b, "_hlljqz"},
+      {"11111001xx"_b, "_qzsyvx"},
+      {"1111101xxx"_b, "_jqlgts"},
       {"1x10000xxx"_b, "adrp_only_pcreladdr"},
-      {"x110110xxx"_b, "_zytrsq"},
-      {"x110111xxx"_b, "_kxsysq"},
+      {"x110110xxx"_b, "_xymnxy"},
+      {"x110111xxx"_b, "_htjmmx"},
     },
   },
+
 };
 // clang-format on
 
diff --git a/src/aarch64/decoder-visitor-map-aarch64.h b/src/aarch64/decoder-visitor-map-aarch64.h
index 49c27b2..bda71ce 100644
--- a/src/aarch64/decoder-visitor-map-aarch64.h
+++ b/src/aarch64/decoder-visitor-map-aarch64.h
@@ -1464,8 +1464,6 @@
       {"crc32h_32c_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source},     \
       {"crc32w_32c_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source},     \
       {"crc32x_64c_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source},     \
-      {"gmi_64g_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source},        \
-      {"irg_64i_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source},        \
       {"lslv_32_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source},        \
       {"lslv_64_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source},        \
       {"lsrv_32_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source},        \
@@ -2076,7 +2074,6 @@
       {"scvtf_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16},       \
       {"ucvtf_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16},       \
       {"addhn_asimddiff_n"_h, &VISITORCLASS::VisitNEON3Different},             \
-      {"pmull_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different},             \
       {"raddhn_asimddiff_n"_h, &VISITORCLASS::VisitNEON3Different},            \
       {"rsubhn_asimddiff_n"_h, &VISITORCLASS::VisitNEON3Different},            \
       {"sabal_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different},             \
@@ -2594,8 +2591,8 @@
       {"dmb_bo_barriers"_h, &VISITORCLASS::VisitSystem},                       \
       {"dsb_bo_barriers"_h, &VISITORCLASS::VisitSystem},                       \
       {"hint_hm_hints"_h, &VISITORCLASS::VisitSystem},                         \
+      {"chkfeat_hf_hints"_h, &VISITORCLASS::VisitSystem},                      \
       {"mrs_rs_systemmove"_h, &VISITORCLASS::VisitSystem},                     \
-      {"msr_si_pstate"_h, &VISITORCLASS::VisitSystem},                         \
       {"msr_sr_systemmove"_h, &VISITORCLASS::VisitSystem},                     \
       {"psb_hc_hints"_h, &VISITORCLASS::VisitSystem},                          \
       {"sb_only_barriers"_h, &VISITORCLASS::VisitSystem},                      \
@@ -2641,19 +2638,15 @@
        &VISITORCLASS::VisitUnconditionalBranchToRegister},                     \
       {"ret_64r_branch_reg"_h,                                                 \
        &VISITORCLASS::VisitUnconditionalBranchToRegister},                     \
-      {"addg_64_addsub_immtags"_h, &VISITORCLASS::VisitUnimplemented},         \
-      {"bcax_vvv16_crypto4"_h, &VISITORCLASS::VisitUnimplemented},             \
       {"bfcvtn_asimdmisc_4s"_h, &VISITORCLASS::VisitUnimplemented},            \
       {"bfdot_asimdelem_e"_h, &VISITORCLASS::VisitUnimplemented},              \
       {"bfdot_asimdsame2_d"_h, &VISITORCLASS::VisitUnimplemented},             \
       {"bfmlal_asimdelem_f"_h, &VISITORCLASS::VisitUnimplemented},             \
-      {"bfmlal_asimdsame2_f_"_h, &VISITORCLASS::VisitUnimplemented},           \
+      {"bfmlal_asimdsame2_f"_h, &VISITORCLASS::VisitUnimplemented},            \
       {"bfmmla_asimdsame2_e"_h, &VISITORCLASS::VisitUnimplemented},            \
       {"dsb_bon_barriers"_h, &VISITORCLASS::VisitUnimplemented},               \
-      {"eor3_vvv16_crypto4"_h, &VISITORCLASS::VisitUnimplemented},             \
       {"ld64b_64l_memop"_h, &VISITORCLASS::VisitUnimplemented},                \
       {"ldgm_64bulk_ldsttags"_h, &VISITORCLASS::VisitUnimplemented},           \
-      {"ldg_64loffset_ldsttags"_h, &VISITORCLASS::VisitUnimplemented},         \
       {"ldtrb_32_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented},           \
       {"ldtrh_32_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented},           \
       {"ldtrsb_32_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented},          \
@@ -2663,62 +2656,38 @@
       {"ldtrsw_64_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented},          \
       {"ldtr_32_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented},            \
       {"ldtr_64_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented},            \
-      {"rax1_vvv2_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented},       \
-      {"sha512h2_qqv_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented},    \
-      {"sha512h_qqv_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented},     \
-      {"sha512su0_vv2_cryptosha512_2"_h, &VISITORCLASS::VisitUnimplemented},   \
-      {"sha512su1_vvv2_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented},  \
-      {"sm3partw1_vvv4_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented},  \
-      {"sm3partw2_vvv4_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented},  \
-      {"sm3ss1_vvv4_crypto4"_h, &VISITORCLASS::VisitUnimplemented},            \
-      {"sm3tt1a_vvv4_crypto3_imm2"_h, &VISITORCLASS::VisitUnimplemented},      \
-      {"sm3tt1b_vvv4_crypto3_imm2"_h, &VISITORCLASS::VisitUnimplemented},      \
-      {"sm3tt2a_vvv4_crypto3_imm2"_h, &VISITORCLASS::VisitUnimplemented},      \
-      {"sm3tt2b_vvv_crypto3_imm2"_h, &VISITORCLASS::VisitUnimplemented},       \
-      {"sm4ekey_vvv4_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented},    \
-      {"sm4e_vv4_cryptosha512_2"_h, &VISITORCLASS::VisitUnimplemented},        \
-      {"st2g_64soffset_ldsttags"_h, &VISITORCLASS::VisitUnimplemented},        \
-      {"st2g_64spost_ldsttags"_h, &VISITORCLASS::VisitUnimplemented},          \
-      {"st2g_64spre_ldsttags"_h, &VISITORCLASS::VisitUnimplemented},           \
+      {"sm3partw1_vvv4_cryptosha512_3"_h, &VISITORCLASS::VisitCryptoSM3},      \
+      {"sm3partw2_vvv4_cryptosha512_3"_h, &VISITORCLASS::VisitCryptoSM3},      \
+      {"sm3ss1_vvv4_crypto4"_h, &VISITORCLASS::VisitCryptoSM3},                \
+      {"sm3tt1a_vvv4_crypto3_imm2"_h, &VISITORCLASS::VisitCryptoSM3},          \
+      {"sm3tt1b_vvv4_crypto3_imm2"_h, &VISITORCLASS::VisitCryptoSM3},          \
+      {"sm3tt2a_vvv4_crypto3_imm2"_h, &VISITORCLASS::VisitCryptoSM3},          \
+      {"sm3tt2b_vvv_crypto3_imm2"_h, &VISITORCLASS::VisitCryptoSM3},           \
+      {"sm4ekey_vvv4_cryptosha512_3"_h, &VISITORCLASS::VisitCryptoSM4},        \
+      {"sm4e_vv4_cryptosha512_2"_h, &VISITORCLASS::VisitCryptoSM4},            \
       {"st64b_64l_memop"_h, &VISITORCLASS::VisitUnimplemented},                \
       {"st64bv_64_memop"_h, &VISITORCLASS::VisitUnimplemented},                \
       {"st64bv0_64_memop"_h, &VISITORCLASS::VisitUnimplemented},               \
       {"stgm_64bulk_ldsttags"_h, &VISITORCLASS::VisitUnimplemented},           \
-      {"stgp_64_ldstpair_off"_h, &VISITORCLASS::VisitUnimplemented},           \
-      {"stgp_64_ldstpair_post"_h, &VISITORCLASS::VisitUnimplemented},          \
-      {"stgp_64_ldstpair_pre"_h, &VISITORCLASS::VisitUnimplemented},           \
-      {"stg_64soffset_ldsttags"_h, &VISITORCLASS::VisitUnimplemented},         \
-      {"stg_64spost_ldsttags"_h, &VISITORCLASS::VisitUnimplemented},           \
-      {"stg_64spre_ldsttags"_h, &VISITORCLASS::VisitUnimplemented},            \
       {"sttrb_32_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented},           \
       {"sttrh_32_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented},           \
       {"sttr_32_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented},            \
       {"sttr_64_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented},            \
-      {"stz2g_64soffset_ldsttags"_h, &VISITORCLASS::VisitUnimplemented},       \
-      {"stz2g_64spost_ldsttags"_h, &VISITORCLASS::VisitUnimplemented},         \
-      {"stz2g_64spre_ldsttags"_h, &VISITORCLASS::VisitUnimplemented},          \
       {"stzgm_64bulk_ldsttags"_h, &VISITORCLASS::VisitUnimplemented},          \
-      {"stzg_64soffset_ldsttags"_h, &VISITORCLASS::VisitUnimplemented},        \
-      {"stzg_64spost_ldsttags"_h, &VISITORCLASS::VisitUnimplemented},          \
-      {"stzg_64spre_ldsttags"_h, &VISITORCLASS::VisitUnimplemented},           \
-      {"subg_64_addsub_immtags"_h, &VISITORCLASS::VisitUnimplemented},         \
-      {"subps_64s_dp_2src"_h, &VISITORCLASS::VisitUnimplemented},              \
-      {"subp_64s_dp_2src"_h, &VISITORCLASS::VisitUnimplemented},               \
       {"tcancel_ex_exception"_h, &VISITORCLASS::VisitUnimplemented},           \
       {"tstart_br_systemresult"_h, &VISITORCLASS::VisitUnimplemented},         \
       {"ttest_br_systemresult"_h, &VISITORCLASS::VisitUnimplemented},          \
       {"wfet_only_systeminstrswithreg"_h, &VISITORCLASS::VisitUnimplemented},  \
       {"wfit_only_systeminstrswithreg"_h, &VISITORCLASS::VisitUnimplemented},  \
-      {"xar_vvv2_crypto3_imm6"_h, &VISITORCLASS::VisitUnimplemented},          \
       {"bfcvt_z_p_z_s2bf"_h, &VISITORCLASS::VisitUnimplemented},               \
       {"bfcvtnt_z_p_z_s2bf"_h, &VISITORCLASS::VisitUnimplemented},             \
-      {"bfdot_z_zzz_"_h, &VISITORCLASS::VisitUnimplemented},                   \
-      {"bfdot_z_zzzi_"_h, &VISITORCLASS::VisitUnimplemented},                  \
-      {"bfmlalb_z_zzz_"_h, &VISITORCLASS::VisitUnimplemented},                 \
-      {"bfmlalb_z_zzzi_"_h, &VISITORCLASS::VisitUnimplemented},                \
-      {"bfmlalt_z_zzz_"_h, &VISITORCLASS::VisitUnimplemented},                 \
-      {"bfmlalt_z_zzzi_"_h, &VISITORCLASS::VisitUnimplemented},                \
-      {"bfmmla_z_zzz_"_h, &VISITORCLASS::VisitUnimplemented}, {                \
+      {"bfdot_z_zzz"_h, &VISITORCLASS::VisitUnimplemented},                    \
+      {"bfdot_z_zzzi"_h, &VISITORCLASS::VisitUnimplemented},                   \
+      {"bfmlalb_z_zzz"_h, &VISITORCLASS::VisitUnimplemented},                  \
+      {"bfmlalb_z_zzzi"_h, &VISITORCLASS::VisitUnimplemented},                 \
+      {"bfmlalt_z_zzz"_h, &VISITORCLASS::VisitUnimplemented},                  \
+      {"bfmlalt_z_zzzi"_h, &VISITORCLASS::VisitUnimplemented},                 \
+      {"bfmmla_z_zzz"_h, &VISITORCLASS::VisitUnimplemented}, {                 \
     "unallocated"_h, &VISITORCLASS::VisitUnallocated                           \
   }
 
@@ -2742,7 +2711,6 @@
       {"pacib1716_hi_hints"_h, &VISITORCLASS::VisitSystem},                    \
       {"pacibsp_hi_hints"_h, &VISITORCLASS::VisitSystem},                      \
       {"pacibz_hi_hints"_h, &VISITORCLASS::VisitSystem},                       \
-      {"pssbb_only_barriers"_h, &VISITORCLASS::VisitSystem},                   \
       {"sev_hi_hints"_h, &VISITORCLASS::VisitSystem},                          \
       {"sevl_hi_hints"_h, &VISITORCLASS::VisitSystem},                         \
       {"ssbb_only_barriers"_h, &VISITORCLASS::VisitSystem},                    \
@@ -2851,6 +2819,7 @@
       {"fmlal_asimdsame_f"_h, &VISITORCLASS::VisitNEON3Same},                  \
       {"fmlsl2_asimdsame_f"_h, &VISITORCLASS::VisitNEON3Same},                 \
       {"fmlsl_asimdsame_f"_h, &VISITORCLASS::VisitNEON3Same},                  \
+      {"pmull_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different},             \
       {"ushll_asimdshf_l"_h, &VISITORCLASS::VisitNEONShiftImmediate},          \
       {"sshll_asimdshf_l"_h, &VISITORCLASS::VisitNEONShiftImmediate},          \
       {"shrn_asimdshf_n"_h, &VISITORCLASS::VisitNEONShiftImmediate},           \
@@ -2880,22 +2849,6 @@
        &VISITORCLASS::VisitNEONScalarByIndexedElement},                        \
       {"sqdmull_asisdelem_l"_h,                                                \
        &VISITORCLASS::VisitNEONScalarByIndexedElement},                        \
-      {"fmla_asisdelem_rh_h"_h,                                                \
-       &VISITORCLASS::VisitNEONScalarByIndexedElement},                        \
-      {"fmla_asisdelem_r_sd"_h,                                                \
-       &VISITORCLASS::VisitNEONScalarByIndexedElement},                        \
-      {"fmls_asisdelem_rh_h"_h,                                                \
-       &VISITORCLASS::VisitNEONScalarByIndexedElement},                        \
-      {"fmls_asisdelem_r_sd"_h,                                                \
-       &VISITORCLASS::VisitNEONScalarByIndexedElement},                        \
-      {"fmulx_asisdelem_rh_h"_h,                                               \
-       &VISITORCLASS::VisitNEONScalarByIndexedElement},                        \
-      {"fmulx_asisdelem_r_sd"_h,                                               \
-       &VISITORCLASS::VisitNEONScalarByIndexedElement},                        \
-      {"fmul_asisdelem_rh_h"_h,                                                \
-       &VISITORCLASS::VisitNEONScalarByIndexedElement},                        \
-      {"fmul_asisdelem_r_sd"_h,                                                \
-       &VISITORCLASS::VisitNEONScalarByIndexedElement},                        \
       {"fabd_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same},          \
       {"facge_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same},         \
       {"facgt_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same},         \
@@ -2968,6 +2921,22 @@
       {"frecpe_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc},        \
       {"frecpx_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc},        \
       {"frsqrte_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc},       \
-      {"scvtf_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, {       \
-    "ucvtf_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc              \
+      {"scvtf_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc},         \
+      {"ucvtf_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc},         \
+      {"fmla_asisdelem_rh_h"_h,                                                \
+       &VISITORCLASS::VisitNEONScalarByIndexedElement},                        \
+      {"fmla_asisdelem_r_sd"_h,                                                \
+       &VISITORCLASS::VisitNEONScalarByIndexedElement},                        \
+      {"fmls_asisdelem_rh_h"_h,                                                \
+       &VISITORCLASS::VisitNEONScalarByIndexedElement},                        \
+      {"fmls_asisdelem_r_sd"_h,                                                \
+       &VISITORCLASS::VisitNEONScalarByIndexedElement},                        \
+      {"fmulx_asisdelem_rh_h"_h,                                               \
+       &VISITORCLASS::VisitNEONScalarByIndexedElement},                        \
+      {"fmulx_asisdelem_r_sd"_h,                                               \
+       &VISITORCLASS::VisitNEONScalarByIndexedElement},                        \
+      {"fmul_asisdelem_rh_h"_h,                                                \
+       &VISITORCLASS::VisitNEONScalarByIndexedElement},                        \
+  {                                                                            \
+    "fmul_asisdelem_r_sd"_h, &VISITORCLASS::VisitNEONScalarByIndexedElement    \
   }
diff --git a/src/aarch64/disasm-aarch64.cc b/src/aarch64/disasm-aarch64.cc
index 3d3e5fd..930dfd6 100644
--- a/src/aarch64/disasm-aarch64.cc
+++ b/src/aarch64/disasm-aarch64.cc
@@ -24,16 +24,15 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+#include "disasm-aarch64.h"
+
 #include <bitset>
 #include <cstdlib>
 #include <sstream>
 
-#include "disasm-aarch64.h"
-
 namespace vixl {
 namespace aarch64 {
 
-
 const Disassembler::FormToVisitorFnMap *Disassembler::GetFormToVisitorFnMap() {
   static const FormToVisitorFnMap form_to_visitor = {
       DEFAULT_FORM_TO_VISITOR_MAP(Disassembler),
@@ -48,7 +47,6 @@
       {"csdb_hi_hints"_h, &Disassembler::DisassembleNoArgs},
       {"dgh_hi_hints"_h, &Disassembler::DisassembleNoArgs},
       {"ssbb_only_barriers"_h, &Disassembler::DisassembleNoArgs},
-      {"pssbb_only_barriers"_h, &Disassembler::DisassembleNoArgs},
       {"esb_hi_hints"_h, &Disassembler::DisassembleNoArgs},
       {"isb_bi_barriers"_h, &Disassembler::DisassembleNoArgs},
       {"nop_hi_hints"_h, &Disassembler::DisassembleNoArgs},
@@ -332,6 +330,7 @@
       {"frsqrte_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc},
       {"scvtf_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc},
       {"ucvtf_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc},
+      {"pmull_asimddiff_l"_h, &Disassembler::DisassembleNEONPolynomialMul},
       {"adclb_z_zzz"_h, &Disassembler::DisassembleSVEAddSubCarry},
       {"adclt_z_zzz"_h, &Disassembler::DisassembleSVEAddSubCarry},
       {"addhnb_z_zz"_h, &Disassembler::DisassembleSVEAddSubHigh},
@@ -419,8 +418,8 @@
       {"nbsl_z_zzz"_h, &Disassembler::DisassembleSVEBitwiseTernary},
       {"nmatch_p_p_zz"_h, &Disassembler::Disassemble_PdT_PgZ_ZnT_ZmT},
       {"pmul_z_zz"_h, &Disassembler::Disassemble_ZdB_ZnB_ZmB},
-      {"pmullb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb},
-      {"pmullt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb},
+      {"pmullb_z_zz"_h, &Disassembler::DisassembleSVEPmull},
+      {"pmullt_z_zz"_h, &Disassembler::DisassembleSVEPmull},
       {"raddhnb_z_zz"_h, &Disassembler::DisassembleSVEAddSubHigh},
       {"raddhnt_z_zz"_h, &Disassembler::DisassembleSVEAddSubHigh},
       {"rax1_z_zz"_h, &Disassembler::Disassemble_ZdD_ZnD_ZmD},
@@ -672,6 +671,98 @@
       {"usdot_z_zzzi_s"_h, &Disassembler::VisitSVEMulIndex},
       {"sudot_z_zzzi_s"_h, &Disassembler::VisitSVEMulIndex},
       {"usdot_asimdsame2_d"_h, &Disassembler::VisitNEON3SameExtra},
+      {"addg_64_addsub_immtags"_h,
+       &Disassembler::Disassemble_XdSP_XnSP_uimm6_uimm4},
+      {"gmi_64g_dp_2src"_h, &Disassembler::Disassemble_Xd_XnSP_Xm},
+      {"irg_64i_dp_2src"_h, &Disassembler::Disassemble_XdSP_XnSP_Xm},
+      {"ldg_64loffset_ldsttags"_h, &Disassembler::DisassembleMTELoadTag},
+      {"st2g_64soffset_ldsttags"_h, &Disassembler::DisassembleMTEStoreTag},
+      {"st2g_64spost_ldsttags"_h, &Disassembler::DisassembleMTEStoreTag},
+      {"st2g_64spre_ldsttags"_h, &Disassembler::DisassembleMTEStoreTag},
+      {"stgp_64_ldstpair_off"_h, &Disassembler::DisassembleMTEStoreTagPair},
+      {"stgp_64_ldstpair_post"_h, &Disassembler::DisassembleMTEStoreTagPair},
+      {"stgp_64_ldstpair_pre"_h, &Disassembler::DisassembleMTEStoreTagPair},
+      {"stg_64soffset_ldsttags"_h, &Disassembler::DisassembleMTEStoreTag},
+      {"stg_64spost_ldsttags"_h, &Disassembler::DisassembleMTEStoreTag},
+      {"stg_64spre_ldsttags"_h, &Disassembler::DisassembleMTEStoreTag},
+      {"stz2g_64soffset_ldsttags"_h, &Disassembler::DisassembleMTEStoreTag},
+      {"stz2g_64spost_ldsttags"_h, &Disassembler::DisassembleMTEStoreTag},
+      {"stz2g_64spre_ldsttags"_h, &Disassembler::DisassembleMTEStoreTag},
+      {"stzg_64soffset_ldsttags"_h, &Disassembler::DisassembleMTEStoreTag},
+      {"stzg_64spost_ldsttags"_h, &Disassembler::DisassembleMTEStoreTag},
+      {"stzg_64spre_ldsttags"_h, &Disassembler::DisassembleMTEStoreTag},
+      {"subg_64_addsub_immtags"_h,
+       &Disassembler::Disassemble_XdSP_XnSP_uimm6_uimm4},
+      {"subps_64s_dp_2src"_h, &Disassembler::Disassemble_Xd_XnSP_XmSP},
+      {"subp_64s_dp_2src"_h, &Disassembler::Disassemble_Xd_XnSP_XmSP},
+      {"cpyen_cpy_memcms"_h, &Disassembler::DisassembleCpy},
+      {"cpyern_cpy_memcms"_h, &Disassembler::DisassembleCpy},
+      {"cpyewn_cpy_memcms"_h, &Disassembler::DisassembleCpy},
+      {"cpye_cpy_memcms"_h, &Disassembler::DisassembleCpy},
+      {"cpyfen_cpy_memcms"_h, &Disassembler::DisassembleCpy},
+      {"cpyfern_cpy_memcms"_h, &Disassembler::DisassembleCpy},
+      {"cpyfewn_cpy_memcms"_h, &Disassembler::DisassembleCpy},
+      {"cpyfe_cpy_memcms"_h, &Disassembler::DisassembleCpy},
+      {"cpyfmn_cpy_memcms"_h, &Disassembler::DisassembleCpy},
+      {"cpyfmrn_cpy_memcms"_h, &Disassembler::DisassembleCpy},
+      {"cpyfmwn_cpy_memcms"_h, &Disassembler::DisassembleCpy},
+      {"cpyfm_cpy_memcms"_h, &Disassembler::DisassembleCpy},
+      {"cpyfpn_cpy_memcms"_h, &Disassembler::DisassembleCpy},
+      {"cpyfprn_cpy_memcms"_h, &Disassembler::DisassembleCpy},
+      {"cpyfpwn_cpy_memcms"_h, &Disassembler::DisassembleCpy},
+      {"cpyfp_cpy_memcms"_h, &Disassembler::DisassembleCpy},
+      {"cpymn_cpy_memcms"_h, &Disassembler::DisassembleCpy},
+      {"cpymrn_cpy_memcms"_h, &Disassembler::DisassembleCpy},
+      {"cpymwn_cpy_memcms"_h, &Disassembler::DisassembleCpy},
+      {"cpym_cpy_memcms"_h, &Disassembler::DisassembleCpy},
+      {"cpypn_cpy_memcms"_h, &Disassembler::DisassembleCpy},
+      {"cpyprn_cpy_memcms"_h, &Disassembler::DisassembleCpy},
+      {"cpypwn_cpy_memcms"_h, &Disassembler::DisassembleCpy},
+      {"cpyp_cpy_memcms"_h, &Disassembler::DisassembleCpy},
+      {"seten_set_memcms"_h, &Disassembler::DisassembleSet},
+      {"sete_set_memcms"_h, &Disassembler::DisassembleSet},
+      {"setgen_set_memcms"_h, &Disassembler::DisassembleSet},
+      {"setge_set_memcms"_h, &Disassembler::DisassembleSet},
+      {"setgmn_set_memcms"_h, &Disassembler::DisassembleSet},
+      {"setgm_set_memcms"_h, &Disassembler::DisassembleSet},
+      {"setgpn_set_memcms"_h, &Disassembler::DisassembleSet},
+      {"setgp_set_memcms"_h, &Disassembler::DisassembleSet},
+      {"setmn_set_memcms"_h, &Disassembler::DisassembleSet},
+      {"setm_set_memcms"_h, &Disassembler::DisassembleSet},
+      {"setpn_set_memcms"_h, &Disassembler::DisassembleSet},
+      {"setp_set_memcms"_h, &Disassembler::DisassembleSet},
+      {"abs_32_dp_1src"_h, &Disassembler::VisitDataProcessing1Source},
+      {"abs_64_dp_1src"_h, &Disassembler::VisitDataProcessing1Source},
+      {"cnt_32_dp_1src"_h, &Disassembler::VisitDataProcessing1Source},
+      {"cnt_64_dp_1src"_h, &Disassembler::VisitDataProcessing1Source},
+      {"ctz_32_dp_1src"_h, &Disassembler::VisitDataProcessing1Source},
+      {"ctz_64_dp_1src"_h, &Disassembler::VisitDataProcessing1Source},
+      {"smax_32_dp_2src"_h, &Disassembler::VisitDataProcessing2Source},
+      {"smax_64_dp_2src"_h, &Disassembler::VisitDataProcessing2Source},
+      {"smin_32_dp_2src"_h, &Disassembler::VisitDataProcessing2Source},
+      {"smin_64_dp_2src"_h, &Disassembler::VisitDataProcessing2Source},
+      {"umax_32_dp_2src"_h, &Disassembler::VisitDataProcessing2Source},
+      {"umax_64_dp_2src"_h, &Disassembler::VisitDataProcessing2Source},
+      {"umin_32_dp_2src"_h, &Disassembler::VisitDataProcessing2Source},
+      {"umin_64_dp_2src"_h, &Disassembler::VisitDataProcessing2Source},
+      {"smax_32_minmax_imm"_h, &Disassembler::DisassembleMinMaxImm},
+      {"smax_64_minmax_imm"_h, &Disassembler::DisassembleMinMaxImm},
+      {"smin_32_minmax_imm"_h, &Disassembler::DisassembleMinMaxImm},
+      {"smin_64_minmax_imm"_h, &Disassembler::DisassembleMinMaxImm},
+      {"umax_32u_minmax_imm"_h, &Disassembler::DisassembleMinMaxImm},
+      {"umax_64u_minmax_imm"_h, &Disassembler::DisassembleMinMaxImm},
+      {"umin_32u_minmax_imm"_h, &Disassembler::DisassembleMinMaxImm},
+      {"umin_64u_minmax_imm"_h, &Disassembler::DisassembleMinMaxImm},
+      {"bcax_vvv16_crypto4"_h, &Disassembler::DisassembleNEON4Same},
+      {"eor3_vvv16_crypto4"_h, &Disassembler::DisassembleNEON4Same},
+      {"xar_vvv2_crypto3_imm6"_h, &Disassembler::DisassembleNEONXar},
+      {"rax1_vvv2_cryptosha512_3"_h, &Disassembler::DisassembleNEONRax1},
+      {"sha512h2_qqv_cryptosha512_3"_h, &Disassembler::DisassembleSHA512},
+      {"sha512h_qqv_cryptosha512_3"_h, &Disassembler::DisassembleSHA512},
+      {"sha512su0_vv2_cryptosha512_2"_h, &Disassembler::DisassembleSHA512},
+      {"sha512su1_vvv2_cryptosha512_3"_h, &Disassembler::DisassembleSHA512},
+      {"pmullb_z_zz_q"_h, &Disassembler::DisassembleSVEPmull128},
+      {"pmullt_z_zz_q"_h, &Disassembler::DisassembleSVEPmull128},
   };
   return &form_to_visitor;
 }  // NOLINT(readability/fn_size)
@@ -745,6 +836,12 @@
   const char *form_cmp = "'Rn, 'Rm'NDP";
   const char *form_neg = "'Rd, 'Rm'NDP";
 
+  if (instr->GetShiftDP() == ROR) {
+    // Add/sub/adds/subs don't allow ROR as a shift mode.
+    VisitUnallocated(instr);
+    return;
+  }
+
   switch (form_hash_) {
     case "adds_32_addsub_shift"_h:
     case "adds_64_addsub_shift"_h:
@@ -1352,6 +1449,10 @@
   Format(instr, mnemonic, form);
 }
 
+void Disassembler::DisassembleMinMaxImm(const Instruction *instr) {
+  const char *suffix = (instr->ExtractBit(18) == 0) ? "'s1710" : "'u1710";
+  FormatWithDecodedMnemonic(instr, "'Rd, 'Rn, #", suffix);
+}
 
 void Disassembler::VisitCompareBranch(const Instruction *instr) {
   FormatWithDecodedMnemonic(instr, "'Rt, 'TImmCmpa");
@@ -1927,7 +2028,7 @@
 
 void Disassembler::VisitSystem(const Instruction *instr) {
   const char *mnemonic = mnemonic_.c_str();
-  const char *form = "(System)";
+  const char *form = "";
   const char *suffix = NULL;
 
   switch (form_hash_) {
@@ -1937,7 +2038,6 @@
     case "mrs_rs_systemmove"_h:
       form = "'Xt, 'IY";
       break;
-    case "msr_si_pstate"_h:
     case "msr_sr_systemmove"_h:
       form = "'IY, 'Xt";
       break;
@@ -1957,47 +2057,82 @@
           break;
       }
       break;
+    case "chkfeat_hf_hints"_h:
+      mnemonic = "chkfeat";
+      form = "x16";
+      break;
     case "hint_hm_hints"_h:
       form = "'IH";
       break;
-    case "dmb_bo_barriers"_h:
-    case "dsb_bo_barriers"_h:
+    case Hash("dmb_bo_barriers"):
       form = "'M";
       break;
-    case "sys_cr_systeminstrs"_h:
-      mnemonic = "dc";
-      suffix = ", 'Xt";
-      switch (instr->GetSysOp()) {
-        case IVAU:
-          mnemonic = "ic";
-          form = "ivau";
-          break;
-        case CVAC:
-          form = "cvac";
-          break;
-        case CVAU:
-          form = "cvau";
-          break;
-        case CVAP:
-          form = "cvap";
-          break;
-        case CVADP:
-          form = "cvadp";
-          break;
-        case CIVAC:
-          form = "civac";
-          break;
-        case ZVA:
-          form = "zva";
-          break;
-        default:
-          mnemonic = "sys";
-          form = "'G1, 'Kn, 'Km, 'G2";
-          if (instr->GetRt() == 31) {
-            suffix = NULL;
-          }
-          break;
+    case Hash("dsb_bo_barriers"): {
+      int crm = instr->GetCRm();
+      if (crm == 0) {
+        mnemonic = "ssbb";
+        form = "";
+      } else if (crm == 4) {
+        mnemonic = "pssbb";
+        form = "";
+      } else {
+        form = "'M";
       }
+      break;
+    }
+    case Hash("sys_cr_systeminstrs"): {
+      const std::map<uint32_t, const char *> dcop = {
+          {IVAU, "ivau"},
+          {CVAC, "cvac"},
+          {CVAU, "cvau"},
+          {CVAP, "cvap"},
+          {CVADP, "cvadp"},
+          {CIVAC, "civac"},
+          {ZVA, "zva"},
+          {GVA, "gva"},
+          {GZVA, "gzva"},
+          {CGVAC, "cgvac"},
+          {CGDVAC, "cgdvac"},
+          {CGVAP, "cgvap"},
+          {CGDVAP, "cgdvap"},
+          {CIGVAC, "cigvac"},
+          {CIGDVAC, "cigdvac"},
+      };
+
+      uint32_t sysop = instr->GetSysOp();
+      if (dcop.count(sysop)) {
+        if (sysop == IVAU) {
+          mnemonic = "ic";
+        } else {
+          mnemonic = "dc";
+        }
+        form = dcop.at(sysop);
+        suffix = ", 'Xt";
+      } else if (sysop == GCSSS1) {
+        mnemonic = "gcsss1";
+        form = "'Xt";
+      } else if (sysop == GCSPUSHM) {
+        mnemonic = "gcspushm";
+        form = "'Xt";
+      } else {
+        mnemonic = "sys";
+        form = "'G1, 'Kn, 'Km, 'G2";
+        if (instr->GetRt() < 31) {
+          suffix = ", 'Xt";
+        }
+      }
+      break;
+    }
+    case "sysl_rc_systeminstrs"_h:
+      uint32_t sysop = instr->GetSysOp();
+      if (sysop == GCSPOPM) {
+        mnemonic = "gcspopm";
+        form = (instr->GetRt() == 31) ? "" : "'Xt";
+      } else if (sysop == GCSSS2) {
+        mnemonic = "gcsss2";
+        form = "'Xt";
+      }
+      break;
   }
   Format(instr, mnemonic, form, suffix);
 }
@@ -2043,17 +2178,74 @@
 
 
 void Disassembler::VisitCrypto2RegSHA(const Instruction *instr) {
-  VisitUnimplemented(instr);
+  const char *form = "'Vd.4s, 'Vn.4s";
+  if (form_hash_ == "sha1h_ss_cryptosha2"_h) {
+    form = "'Sd, 'Sn";
+  }
+  FormatWithDecodedMnemonic(instr, form);
 }
 
 
 void Disassembler::VisitCrypto3RegSHA(const Instruction *instr) {
-  VisitUnimplemented(instr);
+  const char *form = "'Qd, 'Sn, 'Vm.4s";
+  switch (form_hash_) {
+    case "sha1su0_vvv_cryptosha3"_h:
+    case "sha256su1_vvv_cryptosha3"_h:
+      form = "'Vd.4s, 'Vn.4s, 'Vm.4s";
+      break;
+    case "sha256h_qqv_cryptosha3"_h:
+    case "sha256h2_qqv_cryptosha3"_h:
+      form = "'Qd, 'Qn, 'Vm.4s";
+      break;
+  }
+  FormatWithDecodedMnemonic(instr, form);
 }
 
 
 void Disassembler::VisitCryptoAES(const Instruction *instr) {
-  VisitUnimplemented(instr);
+  FormatWithDecodedMnemonic(instr, "'Vd.16b, 'Vn.16b");
+}
+
+void Disassembler::VisitCryptoSM3(const Instruction *instr) {
+  const char *form = "'Vd.4s, 'Vn.4s, 'Vm.";
+  const char *suffix = "4s";
+
+  switch (form_hash_) {
+    case "sm3ss1_vvv4_crypto4"_h:
+      suffix = "4s, 'Va.4s";
+      break;
+    case "sm3tt1a_vvv4_crypto3_imm2"_h:
+    case "sm3tt1b_vvv4_crypto3_imm2"_h:
+    case "sm3tt2a_vvv4_crypto3_imm2"_h:
+    case "sm3tt2b_vvv_crypto3_imm2"_h:
+      suffix = "s['u1312]";
+      break;
+  }
+
+  FormatWithDecodedMnemonic(instr, form, suffix);
+}
+
+void Disassembler::VisitCryptoSM4(const Instruction *instr) {
+  VIXL_ASSERT((form_hash_ == "sm4ekey_vvv4_cryptosha512_3"_h) ||
+              (form_hash_ == "sm4e_vv4_cryptosha512_2"_h));
+  const char *form = "'Vd.4s, 'Vn.4s";
+  const char *suffix =
+      (form_hash_ == "sm4e_vv4_cryptosha512_2"_h) ? NULL : ", 'Vm.4s";
+
+  FormatWithDecodedMnemonic(instr, form, suffix);
+}
+
+void Disassembler::DisassembleSHA512(const Instruction *instr) {
+  const char *form = "'Qd, 'Qn, 'Vm.2d";
+  const char *suffix = NULL;
+  switch (form_hash_) {
+    case "sha512su1_vvv2_cryptosha512_3"_h:
+      suffix = ", 'Vm.2d";
+      VIXL_FALLTHROUGH();
+    case "sha512su0_vv2_cryptosha512_2"_h:
+      form = "'Vd.2d, 'Vn.2d";
+  }
+  FormatWithDecodedMnemonic(instr, form, suffix);
 }
 
 void Disassembler::DisassembleNEON2RegAddlp(const Instruction *instr) {
@@ -2269,13 +2461,19 @@
 }
 
 void Disassembler::VisitNEON3SameExtra(const Instruction *instr) {
-  static const NEONFormatMap map_usdot = {{30}, {NF_8B, NF_16B}};
+  static const NEONFormatMap map_dot =
+      {{23, 22, 30}, {NF_UNDEF, NF_UNDEF, NF_UNDEF, NF_UNDEF, NF_2S, NF_4S}};
+  static const NEONFormatMap map_fc =
+      {{23, 22, 30},
+       {NF_UNDEF, NF_UNDEF, NF_4H, NF_8H, NF_2S, NF_4S, NF_UNDEF, NF_2D}};
+  static const NEONFormatMap map_rdm =
+      {{23, 22, 30}, {NF_UNDEF, NF_UNDEF, NF_4H, NF_8H, NF_2S, NF_4S}};
 
   const char *mnemonic = mnemonic_.c_str();
   const char *form = "'Vd.%s, 'Vn.%s, 'Vm.%s";
   const char *suffix = NULL;
 
-  NEONFormatDecoder nfd(instr);
+  NEONFormatDecoder nfd(instr, &map_fc);
 
   switch (form_hash_) {
     case "fcmla_asimdsame2_c"_h:
@@ -2288,17 +2486,28 @@
     case "sdot_asimdsame2_d"_h:
     case "udot_asimdsame2_d"_h:
     case "usdot_asimdsame2_d"_h:
-      nfd.SetFormatMap(1, &map_usdot);
-      nfd.SetFormatMap(2, &map_usdot);
+      nfd.SetFormatMaps(nfd.LogicalFormatMap());
+      nfd.SetFormatMap(0, &map_dot);
       break;
     default:
-      // sqrdml[as]h - nothing to do.
+      nfd.SetFormatMaps(&map_rdm);
       break;
   }
 
   Format(instr, mnemonic, nfd.Substitute(form), suffix);
 }
 
+void Disassembler::DisassembleNEON4Same(const Instruction *instr) {
+  FormatWithDecodedMnemonic(instr, "'Vd.16b, 'Vn.16b, 'Vm.16b, 'Va.16b");
+}
+
+void Disassembler::DisassembleNEONXar(const Instruction *instr) {
+  FormatWithDecodedMnemonic(instr, "'Vd.2d, 'Vn.2d, 'Vm.2d, #'u1510");
+}
+
+void Disassembler::DisassembleNEONRax1(const Instruction *instr) {
+  FormatWithDecodedMnemonic(instr, "'Vd.2d, 'Vn.2d, 'Vm.2d");
+}
 
 void Disassembler::VisitNEON3Different(const Instruction *instr) {
   const char *mnemonic = mnemonic_.c_str();
@@ -2321,11 +2530,6 @@
       nfd.SetFormatMaps(nfd.LongIntegerFormatMap());
       nfd.SetFormatMap(0, nfd.IntegerFormatMap());
       break;
-    case "pmull_asimddiff_l"_h:
-      if (nfd.GetVectorFormat(0) != kFormat8H) {
-        mnemonic = NULL;
-      }
-      break;
     case "sqdmlal_asimddiff_l"_h:
     case "sqdmlsl_asimddiff_l"_h:
     case "sqdmull_asimddiff_l"_h:
@@ -2337,6 +2541,22 @@
   Format(instr, nfd.Mnemonic(mnemonic), nfd.Substitute(form));
 }
 
+void Disassembler::DisassembleNEONPolynomialMul(const Instruction *instr) {
+  const char *mnemonic = instr->ExtractBit(30) ? "pmull2" : "pmull";
+  const char *form = NULL;
+  int size = instr->ExtractBits(23, 22);
+  if (size == 0) {
+    // Bits 30:27 of the instruction are x001, where x is the Q bit. Map
+    // this to "8" and "16" by adding 7.
+    form = "'Vd.8h, 'Vn.'u3127+7b, 'Vm.'u3127+7b";
+  } else if (size == 3) {
+    form = "'Vd.1q, 'Vn.'?30:21d, 'Vm.'?30:21d";
+  } else {
+    mnemonic = NULL;
+  }
+  Format(instr, mnemonic, form);
+}
+
 void Disassembler::DisassembleNEONFPAcrossLanes(const Instruction *instr) {
   const char *mnemonic = mnemonic_.c_str();
   const char *form = "'Sd, 'Vn.4s";
@@ -2377,7 +2597,7 @@
 }
 
 void Disassembler::VisitNEONByIndexedElement(const Instruction *instr) {
-  const char *form = "'Vd.%s, 'Vn.%s, 'Ve.%s['IVByElemIndex]";
+  const char *form = "'Vd.%s, 'Vn.%s, 'Vf.%s['IVByElemIndex]";
   static const NEONFormatMap map_v =
       {{23, 22, 30},
        {NF_UNDEF, NF_UNDEF, NF_4H, NF_8H, NF_2S, NF_4S, NF_UNDEF, NF_UNDEF}};
@@ -2388,7 +2608,7 @@
 }
 
 void Disassembler::DisassembleNEONMulByElementLong(const Instruction *instr) {
-  const char *form = "'Vd.%s, 'Vn.%s, 'Ve.%s['IVByElemIndex]";
+  const char *form = "'Vd.%s, 'Vn.%s, 'Vf.%s['IVByElemIndex]";
   // TODO: Disallow undefined element types for this instruction.
   static const NEONFormatMap map_ta = {{23, 22}, {NF_UNDEF, NF_4S, NF_2D}};
   NEONFormatDecoder nfd(instr,
@@ -2405,7 +2625,7 @@
 }
 
 void Disassembler::DisassembleNEONFPMulByElement(const Instruction *instr) {
-  const char *form = "'Vd.%s, 'Vn.%s, 'Ve.%s['IVByElemIndex]";
+  const char *form = "'Vd.%s, 'Vn.%s, 'Vf.%s['IVByElemIndex]";
   NEONFormatDecoder nfd(instr,
                         NEONFormatDecoder::FPFormatMap(),
                         NEONFormatDecoder::FPFormatMap(),
@@ -2427,7 +2647,7 @@
 
 void Disassembler::DisassembleNEONComplexMulByElement(
     const Instruction *instr) {
-  const char *form = "'Vd.%s, 'Vn.%s, 'Ve.%s['IVByElemIndexRot], #'u1413*90";
+  const char *form = "'Vd.%s, 'Vn.%s, 'Vm.%s['IVByElemIndexRot], #'u1413*90";
   // TODO: Disallow undefined element types for this instruction.
   static const NEONFormatMap map_cn =
       {{23, 22, 30},
@@ -2513,10 +2733,10 @@
 void Disassembler::VisitNEONLoadStoreMultiStruct(const Instruction *instr) {
   const char *mnemonic = NULL;
   const char *form = NULL;
-  const char *form_1v = "{'Vt.%1$s}, ['Xns]";
-  const char *form_2v = "{'Vt.%1$s, 'Vt2.%1$s}, ['Xns]";
-  const char *form_3v = "{'Vt.%1$s, 'Vt2.%1$s, 'Vt3.%1$s}, ['Xns]";
-  const char *form_4v = "{'Vt.%1$s, 'Vt2.%1$s, 'Vt3.%1$s, 'Vt4.%1$s}, ['Xns]";
+  const char *form_1v = "{'Vt.%s}, ['Xns]";
+  const char *form_2v = "{'Vt.%s, 'Vt2.%s}, ['Xns]";
+  const char *form_3v = "{'Vt.%s, 'Vt2.%s, 'Vt3.%s}, ['Xns]";
+  const char *form_4v = "{'Vt.%s, 'Vt2.%s, 'Vt3.%s, 'Vt4.%s}, ['Xns]";
   NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap());
 
   switch (instr->Mask(NEONLoadStoreMultiStructMask)) {
@@ -2611,11 +2831,10 @@
     const Instruction *instr) {
   const char *mnemonic = NULL;
   const char *form = NULL;
-  const char *form_1v = "{'Vt.%1$s}, ['Xns], 'Xmr1";
-  const char *form_2v = "{'Vt.%1$s, 'Vt2.%1$s}, ['Xns], 'Xmr2";
-  const char *form_3v = "{'Vt.%1$s, 'Vt2.%1$s, 'Vt3.%1$s}, ['Xns], 'Xmr3";
-  const char *form_4v =
-      "{'Vt.%1$s, 'Vt2.%1$s, 'Vt3.%1$s, 'Vt4.%1$s}, ['Xns], 'Xmr4";
+  const char *form_1v = "{'Vt.%s}, ['Xns], 'Xmr1";
+  const char *form_2v = "{'Vt.%s, 'Vt2.%s}, ['Xns], 'Xmr2";
+  const char *form_3v = "{'Vt.%s, 'Vt2.%s, 'Vt3.%s}, ['Xns], 'Xmr3";
+  const char *form_4v = "{'Vt.%s, 'Vt2.%s, 'Vt3.%s, 'Vt4.%s}, ['Xns], 'Xmr4";
   NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap());
 
   switch (instr->Mask(NEONLoadStoreMultiStructPostIndexMask)) {
@@ -2818,7 +3037,7 @@
       break;
     case NEON_LD4R:
       mnemonic = "ld4r";
-      form = "{'Vt.%1$s, 'Vt2.%1$s, 'Vt3.%1$s, 'Vt4.%1$s}, ['Xns]";
+      form = "{'Vt.%s, 'Vt2.%s, 'Vt3.%s, 'Vt4.%s}, ['Xns]";
       break;
     default:
       break;
@@ -2978,7 +3197,7 @@
       break;
     case NEON_LD4R_post:
       mnemonic = "ld4r";
-      form = "{'Vt.%1$s, 'Vt2.%1$s, 'Vt3.%1$s, 'Vt4.%1$s}, ['Xns], 'Xmz4";
+      form = "{'Vt.%s, 'Vt2.%s, 'Vt3.%s, 'Vt4.%s}, ['Xns], 'Xmz4";
       break;
     default:
       break;
@@ -3194,6 +3413,8 @@
       break;
     case "sqdmulh_asisdsame_only"_h:
     case "sqrdmulh_asisdsame_only"_h:
+    case "sqrdmlah_asisdsame2_only"_h:
+    case "sqrdmlsh_asisdsame2_only"_h:
       if ((vform == kFormatB) || (vform == kFormatD)) {
         mnemonic = NULL;
       }
@@ -3214,7 +3435,7 @@
 void Disassembler::DisassembleNEONScalarSatMulLongIndex(
     const Instruction *instr) {
   const char *mnemonic = mnemonic_.c_str();
-  const char *form = "%sd, %sn, 'Ve.%s['IVByElemIndex]";
+  const char *form = "%sd, %sn, 'Vf.%s['IVByElemIndex]";
   NEONFormatDecoder nfd(instr,
                         NEONFormatDecoder::LongScalarFormatMap(),
                         NEONFormatDecoder::ScalarFormatMap());
@@ -3228,7 +3449,7 @@
 
 void Disassembler::DisassembleNEONFPScalarMulIndex(const Instruction *instr) {
   const char *mnemonic = mnemonic_.c_str();
-  const char *form = "%sd, %sn, 'Ve.%s['IVByElemIndex]";
+  const char *form = "%sd, %sn, 'Vf.%s['IVByElemIndex]";
   static const NEONFormatMap map = {{23, 22}, {NF_H, NF_UNDEF, NF_S, NF_D}};
   NEONFormatDecoder nfd(instr, &map);
   Format(instr,
@@ -3238,7 +3459,7 @@
 
 void Disassembler::VisitNEONScalarByIndexedElement(const Instruction *instr) {
   const char *mnemonic = mnemonic_.c_str();
-  const char *form = "%sd, %sn, 'Ve.%s['IVByElemIndex]";
+  const char *form = "%sd, %sn, 'Vf.%s['IVByElemIndex]";
   NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap());
   VectorFormat vform_dst = nfd.GetVectorFormat(0);
   if ((vform_dst == kFormatB) || (vform_dst == kFormatD)) {
@@ -3812,8 +4033,7 @@
     }
 
     // Check 0x0000pq00_0000pq00 or 0xffffpq00_ffffpq00.
-    uint64_t rotvalue = RotateRight(value, 32, 64);
-    if (value == rotvalue) {
+    if (AllWordsMatch(value)) {
       generic_value &= 0xffffffff;
       if ((generic_value == 0xffff) || (generic_value == UINT32_MAX)) {
         return false;
@@ -3821,8 +4041,7 @@
     }
 
     // Check 0xpq00pq00_pq00pq00.
-    rotvalue = RotateRight(value, 16, 64);
-    if (value == rotvalue) {
+    if (AllHalfwordsMatch(value)) {
       return false;
     }
   } else {
@@ -3836,8 +4055,7 @@
     }
 
     // Check 0x000000pq_000000pq or 0xffffffpq_ffffffpq.
-    uint64_t rotvalue = RotateRight(value, 32, 64);
-    if (value == rotvalue) {
+    if (AllWordsMatch(value)) {
       generic_value &= 0xffffffff;
       if ((generic_value == 0xff) || (generic_value == UINT32_MAX)) {
         return false;
@@ -3845,8 +4063,7 @@
     }
 
     // Check 0x00pq00pq_00pq00pq or 0xffpqffpq_ffpqffpq.
-    rotvalue = RotateRight(value, 16, 64);
-    if (value == rotvalue) {
+    if (AllHalfwordsMatch(value)) {
       generic_value &= 0xffff;
       if ((generic_value == 0xff) || (generic_value == UINT16_MAX)) {
         return false;
@@ -3854,8 +4071,7 @@
     }
 
     // Check 0xpqpqpqpq_pqpqpqpq.
-    rotvalue = RotateRight(value, 8, 64);
-    if (value == rotvalue) {
+    if (AllBytesMatch(value)) {
       return false;
     }
   }
@@ -5637,15 +5853,26 @@
   }
 }
 
+void Disassembler::DisassembleSVEPmull(const Instruction *instr) {
+  if (instr->GetSVEVectorFormat() == kFormatVnS) {
+    VisitUnallocated(instr);
+  } else {
+    Disassemble_ZdT_ZnTb_ZmTb(instr);
+  }
+}
+
+void Disassembler::DisassembleSVEPmull128(const Instruction *instr) {
+  FormatWithDecodedMnemonic(instr, "'Zd.q, 'Zn.d, 'Zm.d");
+}
+
 void Disassembler::Disassemble_ZdT_ZnTb_ZmTb(const Instruction *instr) {
-  const char *form = "'Zd.'t, 'Zn.'th, 'Zm.'th";
   if (instr->GetSVEVectorFormat() == kFormatVnB) {
     // TODO: This is correct for saddlbt, ssublbt, subltb, which don't have
-    // b-lane sized form, and for pmull[b|t] as feature `SVEPmull128` isn't
-    // supported, but may need changes for other instructions reaching here.
+    // b-lane sized form, but may need changes for other instructions reaching
+    // here.
     Format(instr, "unimplemented", "(ZdT_ZnTb_ZmTb)");
   } else {
-    Format(instr, mnemonic_.c_str(), form);
+    FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Zn.'th, 'Zm.'th");
   }
 }
 
@@ -5866,6 +6093,137 @@
   Format(instr, mnemonic_.c_str(), form, suffix);
 }
 
+void Disassembler::Disassemble_XdSP_XnSP_Xm(const Instruction *instr) {
+  const char *form = "'Xds, 'Xns";
+  const char *suffix = instr->GetRm() == 31 ? "" : ", 'Xm";
+  Format(instr, mnemonic_.c_str(), form, suffix);
+}
+
+void Disassembler::Disassemble_XdSP_XnSP_uimm6_uimm4(const Instruction *instr) {
+  VIXL_STATIC_ASSERT(kMTETagGranuleInBytes == 16);
+  const char *form = "'Xds, 'Xns, #'u2116*16, #'u1310";
+  Format(instr, mnemonic_.c_str(), form);
+}
+
+void Disassembler::Disassemble_Xd_XnSP_Xm(const Instruction *instr) {
+  const char *form = "'Rd, 'Xns, 'Rm";
+  Format(instr, mnemonic_.c_str(), form);
+}
+
+void Disassembler::Disassemble_Xd_XnSP_XmSP(const Instruction *instr) {
+  if ((form_hash_ == Hash("subps_64s_dp_2src")) && (instr->GetRd() == 31)) {
+    Format(instr, "cmpp", "'Xns, 'Xms");
+  } else {
+    const char *form = "'Xd, 'Xns, 'Xms";
+    Format(instr, mnemonic_.c_str(), form);
+  }
+}
+
+void Disassembler::DisassembleMTEStoreTagPair(const Instruction *instr) {
+  const char *form = "'Xt, 'Xt2, ['Xns";
+  const char *suffix = NULL;
+  switch (form_hash_) {
+    case Hash("stgp_64_ldstpair_off"):
+      suffix = ", #'s2115*16]";
+      break;
+    case Hash("stgp_64_ldstpair_post"):
+      suffix = "], #'s2115*16";
+      break;
+    case Hash("stgp_64_ldstpair_pre"):
+      suffix = ", #'s2115*16]!";
+      break;
+    default:
+      mnemonic_ = "unimplemented";
+      break;
+  }
+
+  if (instr->GetImmLSPair() == 0) {
+    suffix = "]";
+  }
+
+  Format(instr, mnemonic_.c_str(), form, suffix);
+}
+
+void Disassembler::DisassembleMTEStoreTag(const Instruction *instr) {
+  const char *form = "'Xds, ['Xns";
+  const char *suffix = NULL;
+  switch (form_hash_) {
+    case Hash("st2g_64soffset_ldsttags"):
+    case Hash("stg_64soffset_ldsttags"):
+    case Hash("stz2g_64soffset_ldsttags"):
+    case Hash("stzg_64soffset_ldsttags"):
+      suffix = ", #'s2012*16]";
+      break;
+    case Hash("st2g_64spost_ldsttags"):
+    case Hash("stg_64spost_ldsttags"):
+    case Hash("stz2g_64spost_ldsttags"):
+    case Hash("stzg_64spost_ldsttags"):
+      suffix = "], #'s2012*16";
+      break;
+    case Hash("st2g_64spre_ldsttags"):
+    case Hash("stg_64spre_ldsttags"):
+    case Hash("stz2g_64spre_ldsttags"):
+    case Hash("stzg_64spre_ldsttags"):
+      suffix = ", #'s2012*16]!";
+      break;
+    default:
+      mnemonic_ = "unimplemented";
+      break;
+  }
+
+  if (instr->GetImmLS() == 0) {
+    suffix = "]";
+  }
+
+  Format(instr, mnemonic_.c_str(), form, suffix);
+}
+
+void Disassembler::DisassembleMTELoadTag(const Instruction *instr) {
+  const char *form =
+      (instr->GetImmLS() == 0) ? "'Xt, ['Xns]" : "'Xt, ['Xns, #'s2012*16]";
+  Format(instr, mnemonic_.c_str(), form);
+}
+
+void Disassembler::DisassembleCpy(const Instruction *instr) {
+  const char *form = "['Xd]!, ['Xs]!, 'Xn!";
+
+  int d = instr->GetRd();
+  int n = instr->GetRn();
+  int s = instr->GetRs();
+
+  // Aliased registers and sp/zr are disallowed.
+  if ((d == n) || (d == s) || (n == s) || (d == 31) || (n == 31) || (s == 31)) {
+    form = NULL;
+  }
+
+  // Bits 31 and 30 must be zero.
+  if (instr->ExtractBits(31, 30)) {
+    form = NULL;
+  }
+
+  Format(instr, mnemonic_.c_str(), form);
+}
+
+void Disassembler::DisassembleSet(const Instruction *instr) {
+  const char *form = "['Xd]!, 'Xn!, 'Xs";
+
+  int d = instr->GetRd();
+  int n = instr->GetRn();
+  int s = instr->GetRs();
+
+  // Aliased registers are disallowed. Only Xs may be xzr.
+  if ((d == n) || (d == s) || (n == s) || (d == 31) || (n == 31)) {
+    form = NULL;
+  }
+
+  // Bits 31 and 30 must be zero.
+  if (instr->ExtractBits(31, 30)) {
+    form = NULL;
+  }
+
+  Format(instr, mnemonic_.c_str(), form);
+}
+
 void Disassembler::ProcessOutput(const Instruction * /*instr*/) {
   // The base disasm does nothing more than disassembling into a buffer.
 }
@@ -5918,7 +6276,7 @@
   USE(instr);
   if (offset < 0) {
     // Cast to uint64_t so that INT64_MIN is handled in a well-defined way.
-    uint64_t abs_offset = -static_cast<uint64_t>(offset);
+    uint64_t abs_offset = UnsignedNegate(static_cast<uint64_t>(offset));
     AppendToOutput("#-0x%" PRIx64, abs_offset);
   } else {
     AppendToOutput("#+0x%" PRIx64, offset);
@@ -6099,6 +6457,12 @@
       // by-element instructions.
       reg_num = instr->GetRmLow16();
       break;
+    case 'f':
+      // This is register Rm, but using an element size dependent number of bits
+      // in the register specifier.
+      reg_num =
+          (instr->GetNEONSize() < 2) ? instr->GetRmLow16() : instr->GetRm();
+      break;
     case 'a':
       reg_num = instr->GetRa();
       break;
@@ -6156,7 +6520,7 @@
   const char *reg_field = &format[1];
 
   if (reg_prefix == 'R') {
-    bool is_x = instr->GetSixtyFourBits();
+    bool is_x = instr->GetSixtyFourBits() == 1;
     if (strspn(reg_field, "0123456789") == 2) {  // r20d, r31n, etc.
       // Core W or X registers where the type is determined by a specified bit
       // position, eg. 'R20d, 'R05n. This is like the 'Rd syntax, where bit 31
@@ -6187,7 +6551,7 @@
         field_len = 3;
         char *eimm;
         int imm = static_cast<int>(strtol(&reg_field[2], &eimm, 10));
-        field_len += eimm - &reg_field[2];
+        field_len += static_cast<unsigned>(eimm - &reg_field[2]);
         if (reg_num == 31) {
           switch (reg_field[1]) {
             case 'z':
@@ -6381,12 +6745,12 @@
     }
     case 'F': {  // IFP, IFPNeon, IFPSve or IFPFBits.
       int imm8 = 0;
-      int len = strlen("IFP");
+      size_t len = strlen("IFP");
       switch (format[3]) {
         case 'F':
           VIXL_ASSERT(strncmp(format, "IFPFBits", strlen("IFPFBits")) == 0);
           AppendToOutput("#%" PRId32, 64 - instr->GetFPScale());
-          return strlen("IFPFBits");
+          return static_cast<int>(strlen("IFPFBits"));
         case 'N':
           VIXL_ASSERT(strncmp(format, "IFPNeon", strlen("IFPNeon")) == 0);
           imm8 = instr->GetImmNEONabcdefgh();
@@ -6405,7 +6769,7 @@
       AppendToOutput("#0x%" PRIx32 " (%.4f)",
                      imm8,
                      Instruction::Imm8ToFP32(imm8));
-      return len;
+      return static_cast<int>(len);
     }
     case 'H': {  // IH - ImmHint
       AppendToOutput("#%" PRId32, instr->GetImmHint());
@@ -6532,7 +6896,7 @@
           return 9;
         }
         case 'B': {  // IVByElemIndex.
-          int ret = strlen("IVByElemIndex");
+          int ret = static_cast<int>(strlen("IVByElemIndex"));
           uint32_t vm_index = instr->GetNEONH() << 2;
           vm_index |= instr->GetNEONL() << 1;
           vm_index |= instr->GetNEONM();
@@ -6571,12 +6935,12 @@
               rn_index = imm4 >> tz;
               if (strncmp(format, "IVInsIndex1", strlen("IVInsIndex1")) == 0) {
                 AppendToOutput("%d", rd_index);
-                return strlen("IVInsIndex1");
+                return static_cast<int>(strlen("IVInsIndex1"));
               } else if (strncmp(format,
                                  "IVInsIndex2",
                                  strlen("IVInsIndex2")) == 0) {
                 AppendToOutput("%d", rn_index);
-                return strlen("IVInsIndex2");
+                return static_cast<int>(strlen("IVInsIndex2"));
               }
             }
             return 0;
@@ -6586,7 +6950,7 @@
             std::pair<int, int> index_and_lane_size =
                 instr->GetSVEPermuteIndexAndLaneSizeLog2();
             AppendToOutput("%d", index_and_lane_size.first);
-            return strlen("IVInsSVEIndex");
+            return static_cast<int>(strlen("IVInsSVEIndex"));
           }
           VIXL_FALLTHROUGH();
         }
@@ -6598,31 +6962,31 @@
           if (strncmp(format, "IVMIImm8", strlen("IVMIImm8")) == 0) {
             uint64_t imm8 = instr->GetImmNEONabcdefgh();
             AppendToOutput("#0x%" PRIx64, imm8);
-            return strlen("IVMIImm8");
+            return static_cast<int>(strlen("IVMIImm8"));
           } else if (strncmp(format, "IVMIImm", strlen("IVMIImm")) == 0) {
             uint64_t imm8 = instr->GetImmNEONabcdefgh();
             uint64_t imm = 0;
             for (int i = 0; i < 8; ++i) {
-              if (imm8 & (1 << i)) {
+              if (imm8 & (UINT64_C(1) << i)) {
                 imm |= (UINT64_C(0xff) << (8 * i));
               }
             }
             AppendToOutput("#0x%" PRIx64, imm);
-            return strlen("IVMIImm");
+            return static_cast<int>(strlen("IVMIImm"));
           } else if (strncmp(format,
                              "IVMIShiftAmt1",
                              strlen("IVMIShiftAmt1")) == 0) {
             int cmode = instr->GetNEONCmode();
             int shift_amount = 8 * ((cmode >> 1) & 3);
             AppendToOutput("#%d", shift_amount);
-            return strlen("IVMIShiftAmt1");
+            return static_cast<int>(strlen("IVMIShiftAmt1"));
           } else if (strncmp(format,
                              "IVMIShiftAmt2",
                              strlen("IVMIShiftAmt2")) == 0) {
             int cmode = instr->GetNEONCmode();
             int shift_amount = 8 << (cmode & 1);
             AppendToOutput("#%d", shift_amount);
-            return strlen("IVMIShiftAmt2");
+            return static_cast<int>(strlen("IVMIShiftAmt2"));
           } else {
             VIXL_UNIMPLEMENTED();
             return 0;
@@ -6652,6 +7016,9 @@
         case RNDRRS:
           AppendToOutput("rndrrs");
           break;
+        case DCZID_EL0:
+          AppendToOutput("dczid_el0");
+          break;
         default:
           AppendToOutput("S%d_%d_c%d_c%d_%d",
                          instr->GetSysOp0(),
@@ -7129,7 +7496,7 @@
     uint64_t value = strtoul(c + 1, &new_c, 10);
     c = new_c;
     VIXL_ASSERT(IsInt32(value));
-    bits += value;
+    bits = static_cast<int32_t>(bits + value);
   } else if (*c == '*') {
     // Similarly, a "*n" trailing the format specifier indicates the extracted
     // value should be multiplied by n. This is for cases where the encoded
@@ -7138,7 +7505,7 @@
     uint64_t value = strtoul(c + 1, &new_c, 10);
     c = new_c;
     VIXL_ASSERT(IsInt32(value));
-    bits *= value;
+    bits = static_cast<int32_t>(bits * value);
   }
 
   AppendToOutput("%d", bits);
@@ -7290,7 +7657,7 @@
   if (signed_addresses_) {
     if (address < 0) {
       sign = "-";
-      abs_address = -static_cast<uint64_t>(address);
+      abs_address = UnsignedNegate(static_cast<uint64_t>(address));
     } else {
       // Leave a leading space, to maintain alignment.
       sign = " ";
diff --git a/src/aarch64/disasm-aarch64.h b/src/aarch64/disasm-aarch64.h
index aa5348d..9470565 100644
--- a/src/aarch64/disasm-aarch64.h
+++ b/src/aarch64/disasm-aarch64.h
@@ -184,6 +184,10 @@
   void Disassemble_ZdaS_ZnB_ZmB(const Instruction* instr);
   void Disassemble_Vd4S_Vn16B_Vm16B(const Instruction* instr);
 
+  void DisassembleCpy(const Instruction* instr);
+  void DisassembleSet(const Instruction* instr);
+  void DisassembleMinMaxImm(const Instruction* instr);
+
   void DisassembleSVEShiftLeftImm(const Instruction* instr);
   void DisassembleSVEShiftRightImm(const Instruction* instr);
   void DisassembleSVEAddSubCarry(const Instruction* instr);
@@ -192,6 +196,8 @@
   void DisassembleSVEBitwiseTernary(const Instruction* instr);
   void DisassembleSVEFlogb(const Instruction* instr);
   void DisassembleSVEFPPair(const Instruction* instr);
+  void DisassembleSVEPmull(const Instruction* instr);
+  void DisassembleSVEPmull128(const Instruction* instr);
 
   void DisassembleNoArgs(const Instruction* instr);
 
@@ -224,6 +230,23 @@
   void DisassembleNEONScalarShiftRightNarrowImm(const Instruction* instr);
   void DisassembleNEONScalar2RegMiscOnlyD(const Instruction* instr);
   void DisassembleNEONFPScalar2RegMisc(const Instruction* instr);
+  void DisassembleNEONPolynomialMul(const Instruction* instr);
+  void DisassembleNEON4Same(const Instruction* instr);
+  void DisassembleNEONXar(const Instruction* instr);
+  void DisassembleNEONRax1(const Instruction* instr);
+  void DisassembleSHA512(const Instruction* instr);
+
+  void DisassembleMTELoadTag(const Instruction* instr);
+  void DisassembleMTEStoreTag(const Instruction* instr);
+  void DisassembleMTEStoreTagPair(const Instruction* instr);
+
+  void Disassemble_XdSP_XnSP_Xm(const Instruction* instr);
+  void Disassemble_XdSP_XnSP_uimm6_uimm4(const Instruction* instr);
+  void Disassemble_Xd_XnSP_Xm(const Instruction* instr);
+  void Disassemble_Xd_XnSP_XmSP(const Instruction* instr);
+
+  void VisitCryptoSM3(const Instruction* instr);
+  void VisitCryptoSM4(const Instruction* instr);
 
   void Format(const Instruction* instr,
               const char* mnemonic,
diff --git a/src/aarch64/instructions-aarch64.cc b/src/aarch64/instructions-aarch64.cc
index 4718e2d..adef87f 100644
--- a/src/aarch64/instructions-aarch64.cc
+++ b/src/aarch64/instructions-aarch64.cc
@@ -25,6 +25,7 @@
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include "instructions-aarch64.h"
+
 #include "assembler-aarch64.h"
 
 namespace vixl {
@@ -198,6 +199,7 @@
     case "decd_z_zs"_h:
     case "dech_z_zs"_h:
     case "decw_z_zs"_h:
+    case "ext_z_zi_des"_h:
     case "faddp_z_p_zz"_h:
     case "fmaxnmp_z_p_zz"_h:
     case "fmaxp_z_p_zz"_h:
@@ -210,7 +212,6 @@
     case "insr_z_v"_h:
     case "smaxp_z_p_zz"_h:
     case "sminp_z_p_zz"_h:
-    case "splice_z_p_zz_con"_h:
     case "splice_z_p_zz_des"_h:
     case "sqcadd_z_zz"_h:
     case "sqdecd_z_zs"_h:
@@ -602,6 +603,28 @@
   return std::make_pair(reg_code, index);
 }
 
+// Get the register and index for NEON indexed multiplies.
+std::pair<int, int> Instruction::GetNEONMulRmAndIndex() const {
+  int reg_code = GetRm();
+  int index = (GetNEONH() << 2) | (GetNEONL() << 1) | GetNEONM();
+  switch (GetNEONSize()) {
+    case 0:  // FP H-sized elements.
+    case 1:  // Integer H-sized elements.
+      // 4-bit Rm, 3-bit index.
+      reg_code &= 0xf;
+      break;
+    case 2:  // S-sized elements.
+      // 5-bit Rm, 2-bit index.
+      index >>= 1;
+      break;
+    case 3:  // FP D-sized elements.
+      // 5-bit Rm, 1-bit index.
+      index >>= 2;
+      break;
+  }
+  return std::make_pair(reg_code, index);
+}
+
 // Logical immediates can't encode zero, so a return value of zero is used to
 // indicate a failure case. Specifically, where the constraints on imm_s are
 // not met.
@@ -1010,6 +1033,8 @@
       return kFormat4H;
     case kFormat2D:
       return kFormat2S;
+    case kFormat1Q:
+      return kFormat1D;
     case kFormatH:
       return kFormatB;
     case kFormatS:
@@ -1022,6 +1047,8 @@
       return kFormatVnH;
     case kFormatVnD:
       return kFormatVnS;
+    case kFormatVnQ:
+      return kFormatVnD;
     default:
       VIXL_UNREACHABLE();
       return kFormatUndefined;
@@ -1094,6 +1121,8 @@
       return kFormat2S;
     case kFormat2D:
       return kFormat4S;
+    case kFormat1Q:
+      return kFormat2D;
     case kFormatVnH:
       return kFormatVnB;
     case kFormatVnS:
@@ -1245,6 +1274,7 @@
     case kFormat8H:
     case kFormat4S:
     case kFormat2D:
+    case kFormat1Q:
       return kQRegSize;
     default:
       VIXL_UNREACHABLE();
@@ -1282,6 +1312,7 @@
     case kFormat2D:
     case kFormatVnD:
       return 64;
+    case kFormat1Q:
     case kFormatVnQ:
       return 128;
     case kFormatVnO:
@@ -1347,6 +1378,7 @@
     case kFormat2D:
       return 2;
     case kFormat1D:
+    case kFormat1Q:
     case kFormatB:
     case kFormatH:
     case kFormatS:
diff --git a/src/aarch64/instructions-aarch64.h b/src/aarch64/instructions-aarch64.h
index d92e6ee..00aeb3c 100644
--- a/src/aarch64/instructions-aarch64.h
+++ b/src/aarch64/instructions-aarch64.h
@@ -119,7 +119,7 @@
 
 // We can't define a static kZRegSize because the size depends on the
 // implementation. However, it is sometimes useful to know the minimum and
-// maxmimum possible sizes.
+// maximum possible sizes.
 const unsigned kZRegMinSize = 128;
 const unsigned kZRegMinSizeLog2 = 7;
 const unsigned kZRegMinSizeInBytes = kZRegMinSize / 8;
@@ -141,21 +141,25 @@
 const unsigned kPRegMaxSizeInBytes = kPRegMaxSize / 8;
 const unsigned kPRegMaxSizeInBytesLog2 = kPRegMaxSizeLog2 - 3;
 
+const unsigned kMTETagGranuleInBytes = 16;
+const unsigned kMTETagGranuleInBytesLog2 = 4;
+const unsigned kMTETagWidth = 4;
+
 // Make these moved float constants backwards compatible
 // with explicit vixl::aarch64:: namespace references.
-using vixl::kDoubleMantissaBits;
 using vixl::kDoubleExponentBits;
-using vixl::kFloatMantissaBits;
-using vixl::kFloatExponentBits;
-using vixl::kFloat16MantissaBits;
+using vixl::kDoubleMantissaBits;
 using vixl::kFloat16ExponentBits;
+using vixl::kFloat16MantissaBits;
+using vixl::kFloatExponentBits;
+using vixl::kFloatMantissaBits;
 
-using vixl::kFP16PositiveInfinity;
 using vixl::kFP16NegativeInfinity;
-using vixl::kFP32PositiveInfinity;
+using vixl::kFP16PositiveInfinity;
 using vixl::kFP32NegativeInfinity;
-using vixl::kFP64PositiveInfinity;
+using vixl::kFP32PositiveInfinity;
 using vixl::kFP64NegativeInfinity;
+using vixl::kFP64PositiveInfinity;
 
 using vixl::kFP16DefaultNaN;
 using vixl::kFP32DefaultNaN;
@@ -213,9 +217,10 @@
   kFormatVnQ = kFormatSVEQ | kFormatSVE,
   kFormatVnO = kFormatSVEO | kFormatSVE,
 
-  // An artificial value, used by simulator trace tests and a few oddball
+  // Artificial values, used by simulator trace tests and a few oddball
   // instructions (such as FMLAL).
-  kFormat2H = 0xfffffffe
+  kFormat2H = 0xfffffffe,
+  kFormat1Q = 0xfffffffd
 };
 
 // Instructions. ---------------------------------------------------------------
@@ -368,6 +373,7 @@
 
   std::pair<int, int> GetSVEPermuteIndexAndLaneSizeLog2() const;
 
+  std::pair<int, int> GetNEONMulRmAndIndex() const;
   std::pair<int, int> GetSVEMulZmAndIndex() const;
   std::pair<int, int> GetSVEMulLongZmAndIndex() const;
 
@@ -512,6 +518,65 @@
     return false;
   }
 
+  bool IsMOPSPrologueOf(const Instruction* instr, uint32_t mops_type) const {
+    VIXL_ASSERT((mops_type == "set"_h) || (mops_type == "setg"_h) ||
+                (mops_type == "cpy"_h));
+    const int op_lsb = (mops_type == "cpy"_h) ? 22 : 14;
+    return GetInstructionBits() == instr->Mask(~(0x3U << op_lsb));
+  }
+
+  bool IsMOPSMainOf(const Instruction* instr, uint32_t mops_type) const {
+    VIXL_ASSERT((mops_type == "set"_h) || (mops_type == "setg"_h) ||
+                (mops_type == "cpy"_h));
+    const int op_lsb = (mops_type == "cpy"_h) ? 22 : 14;
+    return GetInstructionBits() ==
+           (instr->Mask(~(0x3U << op_lsb)) | (0x1 << op_lsb));
+  }
+
+  bool IsMOPSEpilogueOf(const Instruction* instr, uint32_t mops_type) const {
+    VIXL_ASSERT((mops_type == "set"_h) || (mops_type == "setg"_h) ||
+                (mops_type == "cpy"_h));
+    const int op_lsb = (mops_type == "cpy"_h) ? 22 : 14;
+    return GetInstructionBits() ==
+           (instr->Mask(~(0x3U << op_lsb)) | (0x2 << op_lsb));
+  }
+
+  template <uint32_t mops_type>
+  bool IsConsistentMOPSTriplet() const {
+    VIXL_STATIC_ASSERT((mops_type == "set"_h) || (mops_type == "setg"_h) ||
+                       (mops_type == "cpy"_h));
+
+    int64_t isize = static_cast<int64_t>(kInstructionSize);
+    const Instruction* prev2 = GetInstructionAtOffset(-2 * isize);
+    const Instruction* prev1 = GetInstructionAtOffset(-1 * isize);
+    const Instruction* next1 = GetInstructionAtOffset(1 * isize);
+    const Instruction* next2 = GetInstructionAtOffset(2 * isize);
+
+    // Use the encoding of the current instruction to determine the expected
+    // adjacent instructions. NB. this doesn't check if the nearby instructions
+    // are MOPS-type, but checks that they form a consistent triplet if they
+    // are. For example, 'mov x0, #0; mov x0, #512; mov x0, #1024' is a
+    // consistent triplet, but they are not MOPS instructions.
+    const int op_lsb = (mops_type == "cpy"_h) ? 22 : 14;
+    const uint32_t kMOPSOpfield = 0x3 << op_lsb;
+    const uint32_t kMOPSPrologue = 0;
+    const uint32_t kMOPSMain = 0x1 << op_lsb;
+    const uint32_t kMOPSEpilogue = 0x2 << op_lsb;
+    switch (Mask(kMOPSOpfield)) {
+      case kMOPSPrologue:
+        return next1->IsMOPSMainOf(this, mops_type) &&
+               next2->IsMOPSEpilogueOf(this, mops_type);
+      case kMOPSMain:
+        return prev1->IsMOPSPrologueOf(this, mops_type) &&
+               next1->IsMOPSEpilogueOf(this, mops_type);
+      case kMOPSEpilogue:
+        return prev2->IsMOPSPrologueOf(this, mops_type) &&
+               prev1->IsMOPSMainOf(this, mops_type);
+      default:
+        VIXL_ABORT_WITH_MSG("Undefined MOPS operation\n");
+    }
+  }
+
   static int GetImmBranchRangeBitwidth(ImmBranchType branch_type);
   VIXL_DEPRECATED(
       "GetImmBranchRangeBitwidth",
@@ -764,7 +829,7 @@
   enum SubstitutionMode { kPlaceholder, kFormat };
 
   // Construct a format decoder with increasingly specific format maps for each
-  // subsitution. If no format map is specified, the default is the integer
+  // substitution. If no format map is specified, the default is the integer
   // format map.
   explicit NEONFormatDecoder(const Instruction* instr) {
     instrbits_ = instr->GetInstructionBits();
@@ -791,11 +856,13 @@
   // Set the format mapping for all or individual substitutions.
   void SetFormatMaps(const NEONFormatMap* format0,
                      const NEONFormatMap* format1 = NULL,
-                     const NEONFormatMap* format2 = NULL) {
+                     const NEONFormatMap* format2 = NULL,
+                     const NEONFormatMap* format3 = NULL) {
     VIXL_ASSERT(format0 != NULL);
     formats_[0] = format0;
     formats_[1] = (format1 == NULL) ? formats_[0] : format1;
     formats_[2] = (format2 == NULL) ? formats_[1] : format2;
+    formats_[3] = (format3 == NULL) ? formats_[2] : format3;
   }
   void SetFormatMap(unsigned index, const NEONFormatMap* format) {
     VIXL_ASSERT(index <= ArrayLength(formats_));
@@ -814,12 +881,15 @@
   const char* Substitute(const char* string,
                          SubstitutionMode mode0 = kFormat,
                          SubstitutionMode mode1 = kFormat,
-                         SubstitutionMode mode2 = kFormat) {
+                         SubstitutionMode mode2 = kFormat,
+                         SubstitutionMode mode3 = kFormat) {
     const char* subst0 = GetSubstitute(0, mode0);
     const char* subst1 = GetSubstitute(1, mode1);
     const char* subst2 = GetSubstitute(2, mode2);
+    const char* subst3 = GetSubstitute(3, mode3);
 
-    if ((subst0 == NULL) || (subst1 == NULL) || (subst2 == NULL)) {
+    if ((subst0 == NULL) || (subst1 == NULL) || (subst2 == NULL) ||
+        (subst3 == NULL)) {
       return NULL;
     }
 
@@ -828,7 +898,8 @@
              string,
              subst0,
              subst1,
-             subst2);
+             subst2,
+             subst3);
     return form_buffer_;
   }
 
@@ -1066,7 +1137,7 @@
   }
 
   Instr instrbits_;
-  const NEONFormatMap* formats_[3];
+  const NEONFormatMap* formats_[4];
   char form_buffer_[64];
   char mne_buffer_[16];
 };
diff --git a/src/aarch64/logic-aarch64.cc b/src/aarch64/logic-aarch64.cc
index a77e7f2..2d923cd 100644
--- a/src/aarch64/logic-aarch64.cc
+++ b/src/aarch64/logic-aarch64.cc
@@ -36,33 +36,33 @@
 using vixl::internal::SimFloat16;
 
 template <typename T>
-bool IsFloat64() {
+constexpr bool IsFloat64() {
   return false;
 }
 template <>
-bool IsFloat64<double>() {
+constexpr bool IsFloat64<double>() {
   return true;
 }
 
 template <typename T>
-bool IsFloat32() {
+constexpr bool IsFloat32() {
   return false;
 }
 template <>
-bool IsFloat32<float>() {
+constexpr bool IsFloat32<float>() {
   return true;
 }
 
 template <typename T>
-bool IsFloat16() {
+constexpr bool IsFloat16() {
   return false;
 }
 template <>
-bool IsFloat16<Float16>() {
+constexpr bool IsFloat16<Float16>() {
   return true;
 }
 template <>
-bool IsFloat16<SimFloat16>() {
+constexpr bool IsFloat16<SimFloat16>() {
   return true;
 }
 
@@ -167,24 +167,44 @@
 }
 
 
-void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
-  dst.ClearForWrite(vform);
-  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
-    LoadLane(dst, vform, i, addr);
-    addr += LaneSizeInBytesFromFormat(vform);
+uint64_t Simulator::GenerateRandomTag(uint16_t exclude) {
+  // Generate a 4 bit integer from a 48bit random number
+  uint64_t rtag = rand_gen_() >> 44;
+  VIXL_ASSERT(IsUint4(rtag));
+
+  if (exclude == 0) {
+    exclude = static_cast<uint16_t>(rand_gen_() >> 44);
   }
+
+  // TODO: implement this to better match the specification, which calls for a
+  // true random mode, and a pseudo-random mode with state (EL1.TAG) modified by
+  // PRNG.
+  return ChooseNonExcludedTag(rtag, 0, exclude);
 }
 
 
-void Simulator::ld1(VectorFormat vform,
+bool Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    if (!LoadLane(dst, vform, i, addr)) {
+      return false;
+    }
+    addr += LaneSizeInBytesFromFormat(vform);
+  }
+  return true;
+}
+
+
+bool Simulator::ld1(VectorFormat vform,
                     LogicVRegister dst,
                     int index,
                     uint64_t addr) {
-  LoadLane(dst, vform, index, addr);
+  dst.ClearForWrite(vform);
+  return LoadLane(dst, vform, index, addr);
 }
 
 
-void Simulator::ld1r(VectorFormat vform,
+bool Simulator::ld1r(VectorFormat vform,
                      VectorFormat unpack_vform,
                      LogicVRegister dst,
                      uint64_t addr,
@@ -193,20 +213,25 @@
   dst.ClearForWrite(vform);
   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
     if (is_signed) {
-      LoadIntToLane(dst, vform, unpack_size, i, addr);
+      if (!LoadIntToLane(dst, vform, unpack_size, i, addr)) {
+        return false;
+      }
     } else {
-      LoadUintToLane(dst, vform, unpack_size, i, addr);
+      if (!LoadUintToLane(dst, vform, unpack_size, i, addr)) {
+        return false;
+      }
     }
   }
+  return true;
 }
 
 
-void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
-  ld1r(vform, vform, dst, addr);
+bool Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
+  return ld1r(vform, vform, dst, addr);
 }
 
 
-void Simulator::ld2(VectorFormat vform,
+bool Simulator::ld2(VectorFormat vform,
                     LogicVRegister dst1,
                     LogicVRegister dst2,
                     uint64_t addr1) {
@@ -215,15 +240,17 @@
   int esize = LaneSizeInBytesFromFormat(vform);
   uint64_t addr2 = addr1 + esize;
   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
-    LoadLane(dst1, vform, i, addr1);
-    LoadLane(dst2, vform, i, addr2);
+    if (!LoadLane(dst1, vform, i, addr1) || !LoadLane(dst2, vform, i, addr2)) {
+      return false;
+    }
     addr1 += 2 * esize;
     addr2 += 2 * esize;
   }
+  return true;
 }
 
 
-void Simulator::ld2(VectorFormat vform,
+bool Simulator::ld2(VectorFormat vform,
                     LogicVRegister dst1,
                     LogicVRegister dst2,
                     int index,
@@ -231,12 +258,12 @@
   dst1.ClearForWrite(vform);
   dst2.ClearForWrite(vform);
   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
-  LoadLane(dst1, vform, index, addr1);
-  LoadLane(dst2, vform, index, addr2);
+  return (LoadLane(dst1, vform, index, addr1) &&
+          LoadLane(dst2, vform, index, addr2));
 }
 
 
-void Simulator::ld2r(VectorFormat vform,
+bool Simulator::ld2r(VectorFormat vform,
                      LogicVRegister dst1,
                      LogicVRegister dst2,
                      uint64_t addr) {
@@ -244,13 +271,15 @@
   dst2.ClearForWrite(vform);
   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
-    LoadLane(dst1, vform, i, addr);
-    LoadLane(dst2, vform, i, addr2);
+    if (!LoadLane(dst1, vform, i, addr) || !LoadLane(dst2, vform, i, addr2)) {
+      return false;
+    }
   }
+  return true;
 }
 
 
-void Simulator::ld3(VectorFormat vform,
+bool Simulator::ld3(VectorFormat vform,
                     LogicVRegister dst1,
                     LogicVRegister dst2,
                     LogicVRegister dst3,
@@ -262,17 +291,19 @@
   uint64_t addr2 = addr1 + esize;
   uint64_t addr3 = addr2 + esize;
   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
-    LoadLane(dst1, vform, i, addr1);
-    LoadLane(dst2, vform, i, addr2);
-    LoadLane(dst3, vform, i, addr3);
+    if (!LoadLane(dst1, vform, i, addr1) || !LoadLane(dst2, vform, i, addr2) ||
+        !LoadLane(dst3, vform, i, addr3)) {
+      return false;
+    }
     addr1 += 3 * esize;
     addr2 += 3 * esize;
     addr3 += 3 * esize;
   }
+  return true;
 }
 
 
-void Simulator::ld3(VectorFormat vform,
+bool Simulator::ld3(VectorFormat vform,
                     LogicVRegister dst1,
                     LogicVRegister dst2,
                     LogicVRegister dst3,
@@ -283,13 +314,13 @@
   dst3.ClearForWrite(vform);
   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
-  LoadLane(dst1, vform, index, addr1);
-  LoadLane(dst2, vform, index, addr2);
-  LoadLane(dst3, vform, index, addr3);
+  return (LoadLane(dst1, vform, index, addr1) &&
+          LoadLane(dst2, vform, index, addr2) &&
+          LoadLane(dst3, vform, index, addr3));
 }
 
 
-void Simulator::ld3r(VectorFormat vform,
+bool Simulator::ld3r(VectorFormat vform,
                      LogicVRegister dst1,
                      LogicVRegister dst2,
                      LogicVRegister dst3,
@@ -300,14 +331,16 @@
   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
-    LoadLane(dst1, vform, i, addr);
-    LoadLane(dst2, vform, i, addr2);
-    LoadLane(dst3, vform, i, addr3);
+    if (!LoadLane(dst1, vform, i, addr) || !LoadLane(dst2, vform, i, addr2) ||
+        !LoadLane(dst3, vform, i, addr3)) {
+      return false;
+    }
   }
+  return true;
 }
 
 
-void Simulator::ld4(VectorFormat vform,
+bool Simulator::ld4(VectorFormat vform,
                     LogicVRegister dst1,
                     LogicVRegister dst2,
                     LogicVRegister dst3,
@@ -322,19 +355,20 @@
   uint64_t addr3 = addr2 + esize;
   uint64_t addr4 = addr3 + esize;
   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
-    LoadLane(dst1, vform, i, addr1);
-    LoadLane(dst2, vform, i, addr2);
-    LoadLane(dst3, vform, i, addr3);
-    LoadLane(dst4, vform, i, addr4);
+    if (!LoadLane(dst1, vform, i, addr1) || !LoadLane(dst2, vform, i, addr2) ||
+        !LoadLane(dst3, vform, i, addr3) || !LoadLane(dst4, vform, i, addr4)) {
+      return false;
+    }
     addr1 += 4 * esize;
     addr2 += 4 * esize;
     addr3 += 4 * esize;
     addr4 += 4 * esize;
   }
+  return true;
 }
 
 
-void Simulator::ld4(VectorFormat vform,
+bool Simulator::ld4(VectorFormat vform,
                     LogicVRegister dst1,
                     LogicVRegister dst2,
                     LogicVRegister dst3,
@@ -348,14 +382,14 @@
   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
   uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
-  LoadLane(dst1, vform, index, addr1);
-  LoadLane(dst2, vform, index, addr2);
-  LoadLane(dst3, vform, index, addr3);
-  LoadLane(dst4, vform, index, addr4);
+  return (LoadLane(dst1, vform, index, addr1) &&
+          LoadLane(dst2, vform, index, addr2) &&
+          LoadLane(dst3, vform, index, addr3) &&
+          LoadLane(dst4, vform, index, addr4));
 }
 
 
-void Simulator::ld4r(VectorFormat vform,
+bool Simulator::ld4r(VectorFormat vform,
                      LogicVRegister dst1,
                      LogicVRegister dst2,
                      LogicVRegister dst3,
@@ -369,57 +403,61 @@
   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
   uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
-    LoadLane(dst1, vform, i, addr);
-    LoadLane(dst2, vform, i, addr2);
-    LoadLane(dst3, vform, i, addr3);
-    LoadLane(dst4, vform, i, addr4);
+    if (!LoadLane(dst1, vform, i, addr) || !LoadLane(dst2, vform, i, addr2) ||
+        !LoadLane(dst3, vform, i, addr3) || !LoadLane(dst4, vform, i, addr4)) {
+      return false;
+    }
   }
+  return true;
 }
 
 
-void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {
+bool Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {
   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
-    StoreLane(src, vform, i, addr);
+    if (!StoreLane(src, vform, i, addr)) return false;
     addr += LaneSizeInBytesFromFormat(vform);
   }
+  return true;
 }
 
 
-void Simulator::st1(VectorFormat vform,
+bool Simulator::st1(VectorFormat vform,
                     LogicVRegister src,
                     int index,
                     uint64_t addr) {
-  StoreLane(src, vform, index, addr);
+  return StoreLane(src, vform, index, addr);
 }
 
 
-void Simulator::st2(VectorFormat vform,
+bool Simulator::st2(VectorFormat vform,
                     LogicVRegister src,
                     LogicVRegister src2,
                     uint64_t addr) {
   int esize = LaneSizeInBytesFromFormat(vform);
   uint64_t addr2 = addr + esize;
   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
-    StoreLane(src, vform, i, addr);
-    StoreLane(src2, vform, i, addr2);
+    if (!StoreLane(src, vform, i, addr) || !StoreLane(src2, vform, i, addr2)) {
+      return false;
+    }
     addr += 2 * esize;
     addr2 += 2 * esize;
   }
+  return true;
 }
 
 
-void Simulator::st2(VectorFormat vform,
+bool Simulator::st2(VectorFormat vform,
                     LogicVRegister src,
                     LogicVRegister src2,
                     int index,
                     uint64_t addr) {
   int esize = LaneSizeInBytesFromFormat(vform);
-  StoreLane(src, vform, index, addr);
-  StoreLane(src2, vform, index, addr + 1 * esize);
+  return (StoreLane(src, vform, index, addr) &&
+          StoreLane(src2, vform, index, addr + 1 * esize));
 }
 
 
-void Simulator::st3(VectorFormat vform,
+bool Simulator::st3(VectorFormat vform,
                     LogicVRegister src,
                     LogicVRegister src2,
                     LogicVRegister src3,
@@ -428,30 +466,32 @@
   uint64_t addr2 = addr + esize;
   uint64_t addr3 = addr2 + esize;
   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
-    StoreLane(src, vform, i, addr);
-    StoreLane(src2, vform, i, addr2);
-    StoreLane(src3, vform, i, addr3);
+    if (!StoreLane(src, vform, i, addr) || !StoreLane(src2, vform, i, addr2) ||
+        !StoreLane(src3, vform, i, addr3)) {
+      return false;
+    }
     addr += 3 * esize;
     addr2 += 3 * esize;
     addr3 += 3 * esize;
   }
+  return true;
 }
 
 
-void Simulator::st3(VectorFormat vform,
+bool Simulator::st3(VectorFormat vform,
                     LogicVRegister src,
                     LogicVRegister src2,
                     LogicVRegister src3,
                     int index,
                     uint64_t addr) {
   int esize = LaneSizeInBytesFromFormat(vform);
-  StoreLane(src, vform, index, addr);
-  StoreLane(src2, vform, index, addr + 1 * esize);
-  StoreLane(src3, vform, index, addr + 2 * esize);
+  return (StoreLane(src, vform, index, addr) &&
+          StoreLane(src2, vform, index, addr + 1 * esize) &&
+          StoreLane(src3, vform, index, addr + 2 * esize));
 }
 
 
-void Simulator::st4(VectorFormat vform,
+bool Simulator::st4(VectorFormat vform,
                     LogicVRegister src,
                     LogicVRegister src2,
                     LogicVRegister src3,
@@ -462,19 +502,21 @@
   uint64_t addr3 = addr2 + esize;
   uint64_t addr4 = addr3 + esize;
   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
-    StoreLane(src, vform, i, addr);
-    StoreLane(src2, vform, i, addr2);
-    StoreLane(src3, vform, i, addr3);
-    StoreLane(src4, vform, i, addr4);
+    if (!StoreLane(src, vform, i, addr) || !StoreLane(src2, vform, i, addr2) ||
+        !StoreLane(src3, vform, i, addr3) ||
+        !StoreLane(src4, vform, i, addr4)) {
+      return false;
+    }
     addr += 4 * esize;
     addr2 += 4 * esize;
     addr3 += 4 * esize;
     addr4 += 4 * esize;
   }
+  return true;
 }
 
 
-void Simulator::st4(VectorFormat vform,
+bool Simulator::st4(VectorFormat vform,
                     LogicVRegister src,
                     LogicVRegister src2,
                     LogicVRegister src3,
@@ -482,10 +524,10 @@
                     int index,
                     uint64_t addr) {
   int esize = LaneSizeInBytesFromFormat(vform);
-  StoreLane(src, vform, index, addr);
-  StoreLane(src2, vform, index, addr + 1 * esize);
-  StoreLane(src3, vform, index, addr + 2 * esize);
-  StoreLane(src4, vform, index, addr + 3 * esize);
+  return (StoreLane(src, vform, index, addr) &&
+          StoreLane(src2, vform, index, addr + 1 * esize) &&
+          StoreLane(src3, vform, index, addr + 2 * esize) &&
+          StoreLane(src4, vform, index, addr + 3 * esize));
 }
 
 
@@ -880,23 +922,12 @@
   return sqrdmlsh(vform, dst, src1, dup_element(indexform, temp, src2, index));
 }
 
-
 uint64_t Simulator::PolynomialMult(uint64_t op1,
                                    uint64_t op2,
                                    int lane_size_in_bits) const {
-  VIXL_ASSERT(static_cast<unsigned>(lane_size_in_bits) <= kSRegSize);
-  VIXL_ASSERT(IsUintN(lane_size_in_bits, op1));
-  VIXL_ASSERT(IsUintN(lane_size_in_bits, op2));
-  uint64_t result = 0;
-  for (int i = 0; i < lane_size_in_bits; ++i) {
-    if ((op1 >> i) & 1) {
-      result = result ^ (op2 << i);
-    }
-  }
-  return result;
+  return PolynomialMult128(op1, op2, lane_size_in_bits).second;
 }
 
-
 LogicVRegister Simulator::pmul(VectorFormat vform,
                                LogicVRegister dst,
                                const LogicVRegister& src1,
@@ -918,14 +949,16 @@
                                 const LogicVRegister& src1,
                                 const LogicVRegister& src2) {
   dst.ClearForWrite(vform);
-
   VectorFormat vform_src = VectorFormatHalfWidth(vform);
-  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+
+  // Process the elements in reverse to avoid problems when the destination
+  // register is the same as a source.
+  for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
     dst.SetUint(vform,
                 i,
-                PolynomialMult(src1.Uint(vform_src, i),
-                               src2.Uint(vform_src, i),
-                               LaneSizeInBitsFromFormat(vform_src)));
+                PolynomialMult128(src1.Uint(vform_src, i),
+                                  src2.Uint(vform_src, i),
+                                  LaneSizeInBitsFromFormat(vform_src)));
   }
 
   return dst;
@@ -936,16 +969,18 @@
                                  LogicVRegister dst,
                                  const LogicVRegister& src1,
                                  const LogicVRegister& src2) {
-  VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
   dst.ClearForWrite(vform);
+  VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
+
   int lane_count = LaneCountFromFormat(vform);
   for (int i = 0; i < lane_count; i++) {
     dst.SetUint(vform,
                 i,
-                PolynomialMult(src1.Uint(vform_src, lane_count + i),
-                               src2.Uint(vform_src, lane_count + i),
-                               LaneSizeInBitsFromFormat(vform_src)));
+                PolynomialMult128(src1.Uint(vform_src, lane_count + i),
+                                  src2.Uint(vform_src, lane_count + i),
+                                  LaneSizeInBitsFromFormat(vform_src)));
   }
+
   return dst;
 }
 
@@ -2202,7 +2237,6 @@
     offset = LaneCountFromFormat(dstform) / 2;
   } else {
     offset = 0;
-    dst.ClearForWrite(dstform);
   }
 
   for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
@@ -2242,6 +2276,13 @@
       dst.SetUint(dstform, offset + i, result);
     }
   }
+
+  if (upperhalf) {
+    // Clear any bits beyond a Q register.
+    dst.ClearForWrite(kFormat16B);
+  } else {
+    dst.ClearForWrite(dstform);
+  }
   return dst;
 }
 
@@ -2284,7 +2325,7 @@
     bool src1_gt_src2 = is_signed ? (src1.Int(vform, i) > src2.Int(vform, i))
                                   : (src1.Uint(vform, i) > src2.Uint(vform, i));
     // Always calculate the answer using unsigned arithmetic, to avoid
-    // implemenation-defined signed overflow.
+    // implementation-defined signed overflow.
     if (src1_gt_src2) {
       dst.SetUint(vform, i, src1.Uint(vform, i) - src2.Uint(vform, i));
     } else {
@@ -2473,6 +2514,7 @@
                               LogicVRegister dst,
                               const LogicVRegister& src,
                               int rotation) {
+  dst.ClearForWrite(vform);
   int width = LaneSizeInBitsFromFormat(vform);
   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
     uint64_t value = src.Uint(vform, i);
@@ -2481,6 +2523,14 @@
   return dst;
 }
 
+LogicVRegister Simulator::rol(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& src,
+                              int rotation) {
+  int ror_equivalent = LaneSizeInBitsFromFormat(vform) - rotation;
+  return ror(vform, dst, src, ror_equivalent);
+}
+
 LogicVRegister Simulator::ext(VectorFormat vform,
                               LogicVRegister dst,
                               const LogicVRegister& src1,
@@ -2489,10 +2539,10 @@
   uint8_t result[kZRegMaxSizeInBytes] = {};
   int lane_count = LaneCountFromFormat(vform);
   for (int i = 0; i < lane_count - index; ++i) {
-    result[i] = src1.Uint(vform, i + index);
+    result[i] = static_cast<uint8_t>(src1.Uint(vform, i + index));
   }
   for (int i = 0; i < index; ++i) {
-    result[lane_count - index + i] = src2.Uint(vform, i);
+    result[lane_count - index + i] = static_cast<uint8_t>(src2.Uint(vform, i));
   }
   dst.ClearForWrite(vform);
   for (int i = 0; i < lane_count; ++i) {
@@ -2689,7 +2739,7 @@
                                 int index,
                                 int rot) {
   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
-    VIXL_UNIMPLEMENTED();
+    fcmla<SimFloat16>(vform, dst, src1, src2, dst, index, rot);
   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
     fcmla<float>(vform, dst, src1, src2, dst, index, rot);
   } else {
@@ -4135,7 +4185,7 @@
 
     // Arithmetic shift the whole value right by `esize - 1` bits.
     accum.second = (accum.first << 1) | (accum.second >> (esize - 1));
-    accum.first = -(accum.first >> (esize - 1));
+    accum.first = UnsignedNegate(accum.first >> (esize - 1));
 
     // Perform saturation.
     bool is_pos = (accum.first == 0) ? true : false;
@@ -4513,7 +4563,7 @@
   if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) {
     // inf * 0.0 returns +/-2.0.
     T two = 2.0;
-    return copysign(1.0, op1) * copysign(1.0, op2) * two;
+    return copysign(T(1.0), op1) * copysign(T(1.0), op2) * two;
   }
   return FPMul(op1, op2);
 }
@@ -4523,8 +4573,8 @@
 T Simulator::FPMulAdd(T a, T op1, T op2) {
   T result = FPProcessNaNs3(a, op1, op2);
 
-  T sign_a = copysign(1.0, a);
-  T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
+  T sign_a = copysign(T(1.0), a);
+  T sign_prod = copysign(T(1.0), op1) * copysign(T(1.0), op2);
   bool isinf_prod = IsInf(op1) || IsInf(op2);
   bool operation_generates_nan =
       (IsInf(op1) && (op2 == 0.0)) ||                     // inf * 0.0
@@ -4550,7 +4600,7 @@
   // Work around broken fma implementations for exact zero results: The sign of
   // exact 0.0 results is positive unless both a and op1 * op2 are negative.
   if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
-    return ((sign_a < T(0.0)) && (sign_prod < T(0.0))) ? -0.0 : 0.0;
+    return ((sign_a < T(0.0)) && (sign_prod < T(0.0))) ? T(-0.0) : T(0.0);
   }
 
   result = FusedMultiplyAdd(op1, op2, a);
@@ -4559,7 +4609,7 @@
   // Work around broken fma implementations for rounded zero results: If a is
   // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
   if ((a == 0.0) && (result == 0.0)) {
-    return copysign(0.0, sign_prod);
+    return copysign(T(0.0), sign_prod);
   }
 
   return result;
@@ -4621,9 +4671,9 @@
 template <typename T>
 T Simulator::FPMaxNM(T a, T b) {
   if (IsQuietNaN(a) && !IsQuietNaN(b)) {
-    a = kFP64NegativeInfinity;
+    a = T(kFP64NegativeInfinity);
   } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
-    b = kFP64NegativeInfinity;
+    b = T(kFP64NegativeInfinity);
   }
 
   T result = FPProcessNaNs(a, b);
@@ -4648,9 +4698,9 @@
 template <typename T>
 T Simulator::FPMinNM(T a, T b) {
   if (IsQuietNaN(a) && !IsQuietNaN(b)) {
-    a = kFP64PositiveInfinity;
+    a = T(kFP64PositiveInfinity);
   } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
-    b = kFP64PositiveInfinity;
+    b = T(kFP64PositiveInfinity);
   }
 
   T result = FPProcessNaNs(a, b);
@@ -4665,8 +4715,8 @@
     return two;
   } else if (IsInf(op1) || IsInf(op2)) {
     // Return +inf if signs match, otherwise -inf.
-    return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
-                                          : kFP64NegativeInfinity;
+    return ((op1 >= 0.0) == (op2 >= 0.0)) ? T(kFP64PositiveInfinity)
+                                          : T(kFP64NegativeInfinity);
   } else {
     return FusedMultiplyAdd(op1, op2, two);
   }
@@ -4695,8 +4745,8 @@
     return one_point_five;
   } else if (IsInf(op1) || IsInf(op2)) {
     // Return +inf if signs match, otherwise -inf.
-    return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
-                                          : kFP64NegativeInfinity;
+    return ((op1 >= 0.0) == (op2 >= 0.0)) ? T(kFP64PositiveInfinity)
+                                          : T(kFP64NegativeInfinity);
   } else {
     // The multiply-add-halve operation must be fully fused, so avoid interim
     // rounding by checking which operand can be losslessly divided by two
@@ -4725,7 +4775,7 @@
       (value == kFP64NegativeInfinity)) {
     // +/- zero and infinity all return zero, however -0 and +/- Infinity also
     // unset the Z-flag.
-    result = 0.0;
+    result = 0;
     if ((value != 0.0) || std::signbit(value)) {
       Z = 0;
     }
@@ -5510,38 +5560,40 @@
 }
 
 
-#define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP)                                    \
-  LogicVRegister Simulator::FNP(VectorFormat vform,                            \
-                                LogicVRegister dst,                            \
-                                const LogicVRegister& src1,                    \
-                                const LogicVRegister& src2) {                  \
-    SimVRegister temp1, temp2;                                                 \
-    uzp1(vform, temp1, src1, src2);                                            \
-    uzp2(vform, temp2, src1, src2);                                            \
-    FN(vform, dst, temp1, temp2);                                              \
-    if (IsSVEFormat(vform)) {                                                  \
-      interleave_top_bottom(vform, dst, dst);                                  \
-    }                                                                          \
-    return dst;                                                                \
-  }                                                                            \
-                                                                               \
-  LogicVRegister Simulator::FNP(VectorFormat vform,                            \
-                                LogicVRegister dst,                            \
-                                const LogicVRegister& src) {                   \
-    if (vform == kFormatH) {                                                   \
-      SimFloat16 result(OP(SimFloat16(RawbitsToFloat16(src.Uint(vform, 0))),   \
-                           SimFloat16(RawbitsToFloat16(src.Uint(vform, 1))))); \
-      dst.SetUint(vform, 0, Float16ToRawbits(result));                         \
-    } else if (vform == kFormatS) {                                            \
-      float result = OP(src.Float<float>(0), src.Float<float>(1));             \
-      dst.SetFloat(0, result);                                                 \
-    } else {                                                                   \
-      VIXL_ASSERT(vform == kFormatD);                                          \
-      double result = OP(src.Float<double>(0), src.Float<double>(1));          \
-      dst.SetFloat(0, result);                                                 \
-    }                                                                          \
-    dst.ClearForWrite(vform);                                                  \
-    return dst;                                                                \
+#define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP)                                   \
+  LogicVRegister Simulator::FNP(VectorFormat vform,                           \
+                                LogicVRegister dst,                           \
+                                const LogicVRegister& src1,                   \
+                                const LogicVRegister& src2) {                 \
+    SimVRegister temp1, temp2;                                                \
+    uzp1(vform, temp1, src1, src2);                                           \
+    uzp2(vform, temp2, src1, src2);                                           \
+    FN(vform, dst, temp1, temp2);                                             \
+    if (IsSVEFormat(vform)) {                                                 \
+      interleave_top_bottom(vform, dst, dst);                                 \
+    }                                                                         \
+    return dst;                                                               \
+  }                                                                           \
+                                                                              \
+  LogicVRegister Simulator::FNP(VectorFormat vform,                           \
+                                LogicVRegister dst,                           \
+                                const LogicVRegister& src) {                  \
+    if (vform == kFormatH) {                                                  \
+      SimFloat16 result(OP(SimFloat16(RawbitsToFloat16(                       \
+                               static_cast<uint16_t>(src.Uint(vform, 0)))),   \
+                           SimFloat16(RawbitsToFloat16(                       \
+                               static_cast<uint16_t>(src.Uint(vform, 1)))))); \
+      dst.SetUint(vform, 0, Float16ToRawbits(result));                        \
+    } else if (vform == kFormatS) {                                           \
+      float result = OP(src.Float<float>(0), src.Float<float>(1));            \
+      dst.SetFloat(0, result);                                                \
+    } else {                                                                  \
+      VIXL_ASSERT(vform == kFormatD);                                         \
+      double result = OP(src.Float<double>(0), src.Float<double>(1));         \
+      dst.SetFloat(0, result);                                                \
+    }                                                                         \
+    dst.ClearForWrite(vform);                                                 \
+    return dst;                                                               \
   }
 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
 #undef DEFINE_NEON_FP_PAIR_OP
@@ -5783,7 +5835,8 @@
   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
       float input = src.Float<float>(i);
-      float rounded = FPRoundInt(input, rounding_mode, frint_mode);
+      float rounded =
+          static_cast<float>(FPRoundInt(input, rounding_mode, frint_mode));
 
       if (inexact_exception && !IsNaN(input) && (input != rounded)) {
         FPProcessException();
@@ -5945,6 +5998,7 @@
 LogicVRegister Simulator::fcvtl(VectorFormat vform,
                                 LogicVRegister dst,
                                 const LogicVRegister& src) {
+  dst.ClearForWrite(vform);
   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
     for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
       // TODO: Full support for SimFloat16 in SimRegister(s).
@@ -5965,6 +6019,7 @@
 LogicVRegister Simulator::fcvtl2(VectorFormat vform,
                                  LogicVRegister dst,
                                  const LogicVRegister& src) {
+  dst.ClearForWrite(vform);
   int lane_count = LaneCountFromFormat(vform);
   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
     for (int i = 0; i < lane_count; i++) {
@@ -6010,6 +6065,7 @@
 LogicVRegister Simulator::fcvtn2(VectorFormat vform,
                                  LogicVRegister dst,
                                  const LogicVRegister& src) {
+  dst.ClearForWrite(vform);
   int lane_count = LaneCountFromFormat(vform) / 2;
   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
     for (int i = lane_count - 1; i >= 0; i--) {
@@ -6053,6 +6109,7 @@
                                   LogicVRegister dst,
                                   const LogicVRegister& src) {
   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
+  dst.ClearForWrite(vform);
   int lane_count = LaneCountFromFormat(vform) / 2;
   for (int i = lane_count - 1; i >= 0; i--) {
     dst.SetFloat(i + lane_count,
@@ -6089,9 +6146,9 @@
     return FPProcessNaN(op);
   } else if (op == 0.0) {
     if (copysign(1.0, op) < 0.0) {
-      return kFP64NegativeInfinity;
+      return T(kFP64NegativeInfinity);
     } else {
-      return kFP64PositiveInfinity;
+      return T(kFP64PositiveInfinity);
     }
   } else if (copysign(1.0, op) < 0.0) {
     FPProcessException();
@@ -6102,11 +6159,11 @@
     uint64_t fraction;
     int exp, result_exp;
 
-    if (IsFloat16<T>()) {
+    if constexpr (IsFloat16<T>()) {
       exp = Float16Exp(op);
       fraction = Float16Mantissa(op);
       fraction <<= 42;
-    } else if (IsFloat32<T>()) {
+    } else if constexpr (IsFloat32<T>()) {
       exp = FloatExp(op);
       fraction = FloatMantissa(op);
       fraction <<= 29;
@@ -6131,9 +6188,9 @@
       scaled = DoublePack(0, 1021, Bits(fraction, 51, 44) << 44);
     }
 
-    if (IsFloat16<T>()) {
+    if constexpr (IsFloat16<T>()) {
       result_exp = (44 - exp) / 2;
-    } else if (IsFloat32<T>()) {
+    } else if constexpr (IsFloat32<T>()) {
       result_exp = (380 - exp) / 2;
     } else {
       VIXL_ASSERT(IsFloat64<T>());
@@ -6142,11 +6199,11 @@
 
     uint64_t estimate = DoubleToRawbits(recip_sqrt_estimate(scaled));
 
-    if (IsFloat16<T>()) {
+    if constexpr (IsFloat16<T>()) {
       uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0));
       uint16_t est_bits = static_cast<uint16_t>(Bits(estimate, 51, 42));
       return Float16Pack(0, exp_bits, est_bits);
-    } else if (IsFloat32<T>()) {
+    } else if constexpr (IsFloat32<T>()) {
       uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
       uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
       return FloatPack(0, exp_bits, est_bits);
@@ -6186,9 +6243,9 @@
 T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
   uint32_t sign;
 
-  if (IsFloat16<T>()) {
+  if constexpr (IsFloat16<T>()) {
     sign = Float16Sign(op);
-  } else if (IsFloat32<T>()) {
+  } else if constexpr (IsFloat32<T>()) {
     sign = FloatSign(op);
   } else {
     VIXL_ASSERT(IsFloat64<T>());
@@ -6198,10 +6255,10 @@
   if (IsNaN(op)) {
     return FPProcessNaN(op);
   } else if (IsInf(op)) {
-    return (sign == 1) ? -0.0 : 0.0;
+    return (sign == 1) ? T(-0.0) : T(0.0);
   } else if (op == 0.0) {
     FPProcessException();  // FPExc_DivideByZero exception.
-    return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
+    return (sign == 1) ? T(kFP64NegativeInfinity) : T(kFP64PositiveInfinity);
   } else if ((IsFloat16<T>() && (std::fabs(op) < std::pow(2.0, -16.0))) ||
              (IsFloat32<T>() && (std::fabs(op) < std::pow(2.0, -128.0))) ||
              (IsFloat64<T>() && (std::fabs(op) < std::pow(2.0, -1024.0)))) {
@@ -6224,12 +6281,12 @@
     }
     FPProcessException();  // FPExc_Overflow and FPExc_Inexact.
     if (overflow_to_inf) {
-      return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
+      return (sign == 1) ? T(kFP64NegativeInfinity) : T(kFP64PositiveInfinity);
     } else {
       // Return FPMaxNormal(sign).
-      if (IsFloat16<T>()) {
+      if constexpr (IsFloat16<T>()) {
         return Float16Pack(sign, 0x1f, 0x3ff);
-      } else if (IsFloat32<T>()) {
+      } else if constexpr (IsFloat32<T>()) {
         return FloatPack(sign, 0xfe, 0x07fffff);
       } else {
         VIXL_ASSERT(IsFloat64<T>());
@@ -6240,12 +6297,12 @@
     uint64_t fraction;
     int exp, result_exp;
 
-    if (IsFloat16<T>()) {
+    if constexpr (IsFloat16<T>()) {
       sign = Float16Sign(op);
       exp = Float16Exp(op);
       fraction = Float16Mantissa(op);
       fraction <<= 42;
-    } else if (IsFloat32<T>()) {
+    } else if constexpr (IsFloat32<T>()) {
       sign = FloatSign(op);
       exp = FloatExp(op);
       fraction = FloatMantissa(op);
@@ -6268,9 +6325,9 @@
 
     double scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
 
-    if (IsFloat16<T>()) {
+    if constexpr (IsFloat16<T>()) {
       result_exp = (29 - exp);  // In range 29-30 = -1 to 29+1 = 30.
-    } else if (IsFloat32<T>()) {
+    } else if constexpr (IsFloat32<T>()) {
       result_exp = (253 - exp);  // In range 253-254 = -1 to 253+1 = 254.
     } else {
       VIXL_ASSERT(IsFloat64<T>());
@@ -6286,11 +6343,11 @@
       fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
       result_exp = 0;
     }
-    if (IsFloat16<T>()) {
+    if constexpr (IsFloat16<T>()) {
       uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0));
       uint16_t frac_bits = static_cast<uint16_t>(Bits(fraction, 51, 42));
       return Float16Pack(sign, exp_bits, frac_bits);
-    } else if (IsFloat32<T>()) {
+    } else if constexpr (IsFloat32<T>()) {
       uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
       uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
       return FloatPack(sign, exp_bits, frac_bits);
@@ -6436,12 +6493,12 @@
     } else {
       int exp;
       uint32_t sign;
-      if (IsFloat16<T>()) {
+      if constexpr (IsFloat16<T>()) {
         sign = Float16Sign(op);
         exp = Float16Exp(op);
         exp = (exp == 0) ? (0x1F - 1) : static_cast<int>(Bits(~exp, 4, 0));
         result = Float16Pack(sign, exp, 0);
-      } else if (IsFloat32<T>()) {
+      } else if constexpr (IsFloat32<T>()) {
         sign = FloatSign(op);
         exp = FloatExp(op);
         exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
@@ -6745,18 +6802,21 @@
 
   if (lane_size == kHRegSize) {
     index_highbit = 4;
-    VIXL_ASSERT(ArrayLength(fexpa_coeff16) == (1U << (index_highbit + 1)));
+    VIXL_ASSERT(ArrayLength(fexpa_coeff16) ==
+                (uint64_t{1} << (index_highbit + 1)));
     fexpa_coeff = fexpa_coeff16;
     op_highbit = 9;
     op_shift = 10;
   } else if (lane_size == kSRegSize) {
-    VIXL_ASSERT(ArrayLength(fexpa_coeff32) == (1U << (index_highbit + 1)));
+    VIXL_ASSERT(ArrayLength(fexpa_coeff32) ==
+                (uint64_t{1} << (index_highbit + 1)));
     fexpa_coeff = fexpa_coeff32;
     op_highbit = 13;
     op_shift = 23;
   } else {
     VIXL_ASSERT(lane_size == kDRegSize);
-    VIXL_ASSERT(ArrayLength(fexpa_coeff64) == (1U << (index_highbit + 1)));
+    VIXL_ASSERT(ArrayLength(fexpa_coeff64) ==
+                (uint64_t{1} << (index_highbit + 1)));
     fexpa_coeff = fexpa_coeff64;
     op_highbit = 16;
     op_shift = 52;
@@ -7253,7 +7313,9 @@
 
     for (int r = 0; r < reg_count; r++) {
       uint64_t element_address = addr.GetElementAddress(i, r);
-      StoreLane(zt[r], unpack_vform, i << unpack_shift, element_address);
+      if (!StoreLane(zt[r], unpack_vform, i << unpack_shift, element_address)) {
+        return;
+      }
     }
   }
 
@@ -7277,7 +7339,7 @@
   }
 }
 
-void Simulator::SVEStructuredLoadHelper(VectorFormat vform,
+bool Simulator::SVEStructuredLoadHelper(VectorFormat vform,
                                         const LogicPRegister& pg,
                                         unsigned zt_code,
                                         const LogicSVEAddressVector& addr,
@@ -7312,9 +7374,13 @@
       }
 
       if (is_signed) {
-        LoadIntToLane(zt[r], vform, msize_in_bytes, i, element_address);
+        if (!LoadIntToLane(zt[r], vform, msize_in_bytes, i, element_address)) {
+          return false;
+        }
       } else {
-        LoadUintToLane(zt[r], vform, msize_in_bytes, i, element_address);
+        if (!LoadUintToLane(zt[r], vform, msize_in_bytes, i, element_address)) {
+          return false;
+        }
       }
     }
   }
@@ -7333,6 +7399,7 @@
                        "<-",
                        addr);
   }
+  return true;
 }
 
 LogicPRegister Simulator::brka(LogicPRegister pd,
@@ -7427,7 +7494,7 @@
 
   // Non-faulting loads are allowed to fail arbitrarily. To stress user
   // code, fail a random element in roughly one in eight full-vector loads.
-  uint32_t rnd = static_cast<uint32_t>(jrand48(rand_state_));
+  uint32_t rnd = static_cast<uint32_t>(rand_gen_());
   int fake_fault_at_lane = rnd % (LaneCountFromFormat(vform) * 8);
 
   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
@@ -7440,7 +7507,9 @@
         // First-faulting loads always load the first active element, regardless
         // of FFR. The result will be discarded if its FFR lane is inactive, but
         // it could still generate a fault.
-        value = MemReadUint(msize_in_bytes, element_address);
+        VIXL_DEFINE_OR_RETURN(mem_result,
+                              MemReadUint(msize_in_bytes, element_address));
+        value = mem_result;
         // All subsequent elements have non-fault semantics.
         type = kSVENonFaultLoad;
 
@@ -7452,7 +7521,9 @@
         bool can_read = (i < fake_fault_at_lane) &&
                         CanReadMemory(element_address, msize_in_bytes);
         if (can_read) {
-          value = MemReadUint(msize_in_bytes, element_address);
+          VIXL_DEFINE_OR_RETURN(mem_result,
+                                MemReadUint(msize_in_bytes, element_address));
+          value = mem_result;
         } else {
           // Propagate the fault to the end of FFR.
           for (int j = i; j < LaneCountFromFormat(vform); j++) {
@@ -7501,7 +7572,7 @@
   // Note that these instructions don't use the Dtype encoding.
   int msize_in_bytes_log2 = instr->ExtractBits(24, 23);
   int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
-  uint64_t base = ReadXRegister(instr->GetRn());
+  uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
   LogicSVEAddressVector addr(base,
                              &ReadVRegister(instr->GetRm()),
                              vform,
@@ -7810,7 +7881,7 @@
   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
     // Elements outside a multiple of 4T are set to zero. This happens only
     // for double precision operations, when the VL is a multiple of 128 bits,
-    // but not a mutiple of 256 bits.
+    // but not a multiple of 256 bits.
     T value = (i < (T_per_segment * segment_count)) ? result[i] : 0;
     srcdst.SetFloat<T>(vform, i, value);
   }
@@ -7830,6 +7901,653 @@
   return dst;
 }
 
+template <>
+uint64_t CryptoOp<"choose"_h>(uint64_t x, uint64_t y, uint64_t z) {
+  return ((y ^ z) & x) ^ z;
+}
+
+template <>
+uint64_t CryptoOp<"majority"_h>(uint64_t x, uint64_t y, uint64_t z) {
+  return (x & y) | ((x | y) & z);
+}
+
+template <>
+uint64_t CryptoOp<"parity"_h>(uint64_t x, uint64_t y, uint64_t z) {
+  return x ^ y ^ z;
+}
+
+template <typename T, unsigned A, unsigned B, unsigned C>
+static uint64_t SHASigma(uint64_t x) {
+  return static_cast<T>(RotateRight(x, A, sizeof(T) * kBitsPerByte) ^
+                        RotateRight(x, B, sizeof(T) * kBitsPerByte) ^
+                        RotateRight(x, C, sizeof(T) * kBitsPerByte));
+}
+
+LogicVRegister Simulator::sha2h(LogicVRegister srcdst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2,
+                                bool part1) {
+  uint64_t x[4] = {};
+  uint64_t y[4] = {};
+  if (part1) {
+    // Switch input order based on which part is being handled.
+    srcdst.UintArray(kFormat4S, x);
+    src1.UintArray(kFormat4S, y);
+  } else {
+    src1.UintArray(kFormat4S, x);
+    srcdst.UintArray(kFormat4S, y);
+  }
+
+  for (unsigned i = 0; i < ArrayLength(x); i++) {
+    uint64_t chs = CryptoOp<"choose"_h>(y[0], y[1], y[2]);
+    uint64_t maj = CryptoOp<"majority"_h>(x[0], x[1], x[2]);
+
+    uint64_t w = src2.Uint(kFormat4S, i);
+    uint64_t t = y[3] + SHASigma<uint32_t, 6, 11, 25>(y[0]) + chs + w;
+
+    x[3] += t;
+    y[3] = t + SHASigma<uint32_t, 2, 13, 22>(x[0]) + maj;
+
+    // y:x = ROL(y:x, 32)
+    SHARotateEltsLeftOne(x);
+    SHARotateEltsLeftOne(y);
+    std::swap(x[0], y[0]);
+  }
+
+  srcdst.SetUintArray(kFormat4S, part1 ? x : y);
+  return srcdst;
+}
+
+template <typename T, unsigned A, unsigned B, unsigned C>
+static uint64_t SHASURotate(uint64_t x) {
+  return RotateRight(x, A, sizeof(T) * kBitsPerByte) ^
+         RotateRight(x, B, sizeof(T) * kBitsPerByte) ^
+         ((x & ~static_cast<T>(0)) >> C);
+}
+
+LogicVRegister Simulator::sha2su0(LogicVRegister srcdst,
+                                  const LogicVRegister& src1) {
+  uint64_t w[4] = {};
+  uint64_t result[4];
+  srcdst.UintArray(kFormat4S, w);
+  uint64_t x = src1.Uint(kFormat4S, 0);
+
+  result[0] = SHASURotate<uint32_t, 7, 18, 3>(w[1]) + w[0];
+  result[1] = SHASURotate<uint32_t, 7, 18, 3>(w[2]) + w[1];
+  result[2] = SHASURotate<uint32_t, 7, 18, 3>(w[3]) + w[2];
+  result[3] = SHASURotate<uint32_t, 7, 18, 3>(x) + w[3];
+
+  srcdst.SetUintArray(kFormat4S, result);
+  return srcdst;
+}
+
+LogicVRegister Simulator::sha2su1(LogicVRegister srcdst,
+                                  const LogicVRegister& src1,
+                                  const LogicVRegister& src2) {
+  uint64_t w[4] = {};
+  uint64_t x[4] = {};
+  uint64_t y[4] = {};
+  uint64_t result[4];
+  srcdst.UintArray(kFormat4S, w);
+  src1.UintArray(kFormat4S, x);
+  src2.UintArray(kFormat4S, y);
+
+  result[0] = SHASURotate<uint32_t, 17, 19, 10>(y[2]) + w[0] + x[1];
+  result[1] = SHASURotate<uint32_t, 17, 19, 10>(y[3]) + w[1] + x[2];
+  result[2] = SHASURotate<uint32_t, 17, 19, 10>(result[0]) + w[2] + x[3];
+  result[3] = SHASURotate<uint32_t, 17, 19, 10>(result[1]) + w[3] + y[0];
+
+  srcdst.SetUintArray(kFormat4S, result);
+  return srcdst;
+}
+
+LogicVRegister Simulator::sha512h(LogicVRegister srcdst,
+                                  const LogicVRegister& src1,
+                                  const LogicVRegister& src2) {
+  uint64_t w[2] = {};
+  uint64_t x[2] = {};
+  uint64_t y[2] = {};
+  uint64_t result[2] = {};
+  srcdst.UintArray(kFormat2D, w);
+  src1.UintArray(kFormat2D, x);
+  src2.UintArray(kFormat2D, y);
+
+  result[1] = (y[1] & x[0]) ^ (~y[1] & x[1]);
+  result[1] += SHASigma<uint64_t, 14, 18, 41>(y[1]) + w[1];
+
+  uint64_t tmp = result[1] + y[0];
+
+  result[0] = (tmp & y[1]) ^ (~tmp & x[0]);
+  result[0] += SHASigma<uint64_t, 14, 18, 41>(tmp) + w[0];
+
+  srcdst.SetUintArray(kFormat2D, result);
+  return srcdst;
+}
+
+LogicVRegister Simulator::sha512h2(LogicVRegister srcdst,
+                                   const LogicVRegister& src1,
+                                   const LogicVRegister& src2) {
+  uint64_t w[2] = {};
+  uint64_t x[2] = {};
+  uint64_t y[2] = {};
+  uint64_t result[2] = {};
+  srcdst.UintArray(kFormat2D, w);
+  src1.UintArray(kFormat2D, x);
+  src2.UintArray(kFormat2D, y);
+
+  result[1] = (x[0] & y[1]) ^ (x[0] & y[0]) ^ (y[1] & y[0]);
+  result[1] += SHASigma<uint64_t, 28, 34, 39>(y[0]) + w[1];
+
+  result[0] = (result[1] & y[0]) ^ (result[1] & y[1]) ^ (y[1] & y[0]);
+  result[0] += SHASigma<uint64_t, 28, 34, 39>(result[1]) + w[0];
+
+  srcdst.SetUintArray(kFormat2D, result);
+  return srcdst;
+}
+
+LogicVRegister Simulator::sha512su0(LogicVRegister srcdst,
+                                    const LogicVRegister& src1) {
+  uint64_t w[2] = {};
+  uint64_t x[2] = {};
+  uint64_t result[2] = {};
+  srcdst.UintArray(kFormat2D, w);
+  src1.UintArray(kFormat2D, x);
+
+  result[0] = SHASURotate<uint64_t, 1, 8, 7>(w[1]) + w[0];
+  result[1] = SHASURotate<uint64_t, 1, 8, 7>(x[0]) + w[1];
+
+  srcdst.SetUintArray(kFormat2D, result);
+  return srcdst;
+}
+
+LogicVRegister Simulator::sha512su1(LogicVRegister srcdst,
+                                    const LogicVRegister& src1,
+                                    const LogicVRegister& src2) {
+  uint64_t w[2] = {};
+  uint64_t x[2] = {};
+  uint64_t y[2] = {};
+  uint64_t result[2] = {};
+  srcdst.UintArray(kFormat2D, w);
+  src1.UintArray(kFormat2D, x);
+  src2.UintArray(kFormat2D, y);
+
+  result[1] = w[1] + SHASURotate<uint64_t, 19, 61, 6>(x[1]) + y[1];
+  result[0] = w[0] + SHASURotate<uint64_t, 19, 61, 6>(x[0]) + y[0];
+
+  srcdst.SetUintArray(kFormat2D, result);
+  return srcdst;
+}
+
+static uint8_t GalMul(int table, uint64_t x) {
+  // Galois multiplication lookup tables.
+  static const uint8_t ffmul02[256] = {
+      0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16,
+      0x18, 0x1a, 0x1c, 0x1e, 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e,
+      0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e, 0x40, 0x42, 0x44, 0x46,
+      0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e,
+      0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76,
+      0x78, 0x7a, 0x7c, 0x7e, 0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e,
+      0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e, 0xa0, 0xa2, 0xa4, 0xa6,
+      0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe,
+      0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6,
+      0xd8, 0xda, 0xdc, 0xde, 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee,
+      0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe, 0x1b, 0x19, 0x1f, 0x1d,
+      0x13, 0x11, 0x17, 0x15, 0x0b, 0x09, 0x0f, 0x0d, 0x03, 0x01, 0x07, 0x05,
+      0x3b, 0x39, 0x3f, 0x3d, 0x33, 0x31, 0x37, 0x35, 0x2b, 0x29, 0x2f, 0x2d,
+      0x23, 0x21, 0x27, 0x25, 0x5b, 0x59, 0x5f, 0x5d, 0x53, 0x51, 0x57, 0x55,
+      0x4b, 0x49, 0x4f, 0x4d, 0x43, 0x41, 0x47, 0x45, 0x7b, 0x79, 0x7f, 0x7d,
+      0x73, 0x71, 0x77, 0x75, 0x6b, 0x69, 0x6f, 0x6d, 0x63, 0x61, 0x67, 0x65,
+      0x9b, 0x99, 0x9f, 0x9d, 0x93, 0x91, 0x97, 0x95, 0x8b, 0x89, 0x8f, 0x8d,
+      0x83, 0x81, 0x87, 0x85, 0xbb, 0xb9, 0xbf, 0xbd, 0xb3, 0xb1, 0xb7, 0xb5,
+      0xab, 0xa9, 0xaf, 0xad, 0xa3, 0xa1, 0xa7, 0xa5, 0xdb, 0xd9, 0xdf, 0xdd,
+      0xd3, 0xd1, 0xd7, 0xd5, 0xcb, 0xc9, 0xcf, 0xcd, 0xc3, 0xc1, 0xc7, 0xc5,
+      0xfb, 0xf9, 0xff, 0xfd, 0xf3, 0xf1, 0xf7, 0xf5, 0xeb, 0xe9, 0xef, 0xed,
+      0xe3, 0xe1, 0xe7, 0xe5,
+  };
+
+  static const uint8_t ffmul03[256] = {
+      0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d,
+      0x14, 0x17, 0x12, 0x11, 0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39,
+      0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21, 0x60, 0x63, 0x66, 0x65,
+      0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71,
+      0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d,
+      0x44, 0x47, 0x42, 0x41, 0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9,
+      0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1, 0xf0, 0xf3, 0xf6, 0xf5,
+      0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1,
+      0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd,
+      0xb4, 0xb7, 0xb2, 0xb1, 0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99,
+      0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81, 0x9b, 0x98, 0x9d, 0x9e,
+      0x97, 0x94, 0x91, 0x92, 0x83, 0x80, 0x85, 0x86, 0x8f, 0x8c, 0x89, 0x8a,
+      0xab, 0xa8, 0xad, 0xae, 0xa7, 0xa4, 0xa1, 0xa2, 0xb3, 0xb0, 0xb5, 0xb6,
+      0xbf, 0xbc, 0xb9, 0xba, 0xfb, 0xf8, 0xfd, 0xfe, 0xf7, 0xf4, 0xf1, 0xf2,
+      0xe3, 0xe0, 0xe5, 0xe6, 0xef, 0xec, 0xe9, 0xea, 0xcb, 0xc8, 0xcd, 0xce,
+      0xc7, 0xc4, 0xc1, 0xc2, 0xd3, 0xd0, 0xd5, 0xd6, 0xdf, 0xdc, 0xd9, 0xda,
+      0x5b, 0x58, 0x5d, 0x5e, 0x57, 0x54, 0x51, 0x52, 0x43, 0x40, 0x45, 0x46,
+      0x4f, 0x4c, 0x49, 0x4a, 0x6b, 0x68, 0x6d, 0x6e, 0x67, 0x64, 0x61, 0x62,
+      0x73, 0x70, 0x75, 0x76, 0x7f, 0x7c, 0x79, 0x7a, 0x3b, 0x38, 0x3d, 0x3e,
+      0x37, 0x34, 0x31, 0x32, 0x23, 0x20, 0x25, 0x26, 0x2f, 0x2c, 0x29, 0x2a,
+      0x0b, 0x08, 0x0d, 0x0e, 0x07, 0x04, 0x01, 0x02, 0x13, 0x10, 0x15, 0x16,
+      0x1f, 0x1c, 0x19, 0x1a,
+  };
+
+  static const uint8_t ffmul09[256] = {
+      0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53,
+      0x6c, 0x65, 0x7e, 0x77, 0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf,
+      0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7, 0x3b, 0x32, 0x29, 0x20,
+      0x1f, 0x16, 0x0d, 0x04, 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c,
+      0xab, 0xa2, 0xb9, 0xb0, 0x8f, 0x86, 0x9d, 0x94, 0xe3, 0xea, 0xf1, 0xf8,
+      0xc7, 0xce, 0xd5, 0xdc, 0x76, 0x7f, 0x64, 0x6d, 0x52, 0x5b, 0x40, 0x49,
+      0x3e, 0x37, 0x2c, 0x25, 0x1a, 0x13, 0x08, 0x01, 0xe6, 0xef, 0xf4, 0xfd,
+      0xc2, 0xcb, 0xd0, 0xd9, 0xae, 0xa7, 0xbc, 0xb5, 0x8a, 0x83, 0x98, 0x91,
+      0x4d, 0x44, 0x5f, 0x56, 0x69, 0x60, 0x7b, 0x72, 0x05, 0x0c, 0x17, 0x1e,
+      0x21, 0x28, 0x33, 0x3a, 0xdd, 0xd4, 0xcf, 0xc6, 0xf9, 0xf0, 0xeb, 0xe2,
+      0x95, 0x9c, 0x87, 0x8e, 0xb1, 0xb8, 0xa3, 0xaa, 0xec, 0xe5, 0xfe, 0xf7,
+      0xc8, 0xc1, 0xda, 0xd3, 0xa4, 0xad, 0xb6, 0xbf, 0x80, 0x89, 0x92, 0x9b,
+      0x7c, 0x75, 0x6e, 0x67, 0x58, 0x51, 0x4a, 0x43, 0x34, 0x3d, 0x26, 0x2f,
+      0x10, 0x19, 0x02, 0x0b, 0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8,
+      0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0, 0x47, 0x4e, 0x55, 0x5c,
+      0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30,
+      0x9a, 0x93, 0x88, 0x81, 0xbe, 0xb7, 0xac, 0xa5, 0xd2, 0xdb, 0xc0, 0xc9,
+      0xf6, 0xff, 0xe4, 0xed, 0x0a, 0x03, 0x18, 0x11, 0x2e, 0x27, 0x3c, 0x35,
+      0x42, 0x4b, 0x50, 0x59, 0x66, 0x6f, 0x74, 0x7d, 0xa1, 0xa8, 0xb3, 0xba,
+      0x85, 0x8c, 0x97, 0x9e, 0xe9, 0xe0, 0xfb, 0xf2, 0xcd, 0xc4, 0xdf, 0xd6,
+      0x31, 0x38, 0x23, 0x2a, 0x15, 0x1c, 0x07, 0x0e, 0x79, 0x70, 0x6b, 0x62,
+      0x5d, 0x54, 0x4f, 0x46,
+  };
+
+  static const uint8_t ffmul0b[256] = {
+      0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45,
+      0x74, 0x7f, 0x62, 0x69, 0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81,
+      0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9, 0x7b, 0x70, 0x6d, 0x66,
+      0x57, 0x5c, 0x41, 0x4a, 0x23, 0x28, 0x35, 0x3e, 0x0f, 0x04, 0x19, 0x12,
+      0xcb, 0xc0, 0xdd, 0xd6, 0xe7, 0xec, 0xf1, 0xfa, 0x93, 0x98, 0x85, 0x8e,
+      0xbf, 0xb4, 0xa9, 0xa2, 0xf6, 0xfd, 0xe0, 0xeb, 0xda, 0xd1, 0xcc, 0xc7,
+      0xae, 0xa5, 0xb8, 0xb3, 0x82, 0x89, 0x94, 0x9f, 0x46, 0x4d, 0x50, 0x5b,
+      0x6a, 0x61, 0x7c, 0x77, 0x1e, 0x15, 0x08, 0x03, 0x32, 0x39, 0x24, 0x2f,
+      0x8d, 0x86, 0x9b, 0x90, 0xa1, 0xaa, 0xb7, 0xbc, 0xd5, 0xde, 0xc3, 0xc8,
+      0xf9, 0xf2, 0xef, 0xe4, 0x3d, 0x36, 0x2b, 0x20, 0x11, 0x1a, 0x07, 0x0c,
+      0x65, 0x6e, 0x73, 0x78, 0x49, 0x42, 0x5f, 0x54, 0xf7, 0xfc, 0xe1, 0xea,
+      0xdb, 0xd0, 0xcd, 0xc6, 0xaf, 0xa4, 0xb9, 0xb2, 0x83, 0x88, 0x95, 0x9e,
+      0x47, 0x4c, 0x51, 0x5a, 0x6b, 0x60, 0x7d, 0x76, 0x1f, 0x14, 0x09, 0x02,
+      0x33, 0x38, 0x25, 0x2e, 0x8c, 0x87, 0x9a, 0x91, 0xa0, 0xab, 0xb6, 0xbd,
+      0xd4, 0xdf, 0xc2, 0xc9, 0xf8, 0xf3, 0xee, 0xe5, 0x3c, 0x37, 0x2a, 0x21,
+      0x10, 0x1b, 0x06, 0x0d, 0x64, 0x6f, 0x72, 0x79, 0x48, 0x43, 0x5e, 0x55,
+      0x01, 0x0a, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x59, 0x52, 0x4f, 0x44,
+      0x75, 0x7e, 0x63, 0x68, 0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80,
+      0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8, 0x7a, 0x71, 0x6c, 0x67,
+      0x56, 0x5d, 0x40, 0x4b, 0x22, 0x29, 0x34, 0x3f, 0x0e, 0x05, 0x18, 0x13,
+      0xca, 0xc1, 0xdc, 0xd7, 0xe6, 0xed, 0xf0, 0xfb, 0x92, 0x99, 0x84, 0x8f,
+      0xbe, 0xb5, 0xa8, 0xa3,
+  };
+
+  static const uint8_t ffmul0d[256] = {
+      0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f,
+      0x5c, 0x51, 0x46, 0x4b, 0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3,
+      0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b, 0xbb, 0xb6, 0xa1, 0xac,
+      0x8f, 0x82, 0x95, 0x98, 0xd3, 0xde, 0xc9, 0xc4, 0xe7, 0xea, 0xfd, 0xf0,
+      0x6b, 0x66, 0x71, 0x7c, 0x5f, 0x52, 0x45, 0x48, 0x03, 0x0e, 0x19, 0x14,
+      0x37, 0x3a, 0x2d, 0x20, 0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e,
+      0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26, 0xbd, 0xb0, 0xa7, 0xaa,
+      0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6,
+      0xd6, 0xdb, 0xcc, 0xc1, 0xe2, 0xef, 0xf8, 0xf5, 0xbe, 0xb3, 0xa4, 0xa9,
+      0x8a, 0x87, 0x90, 0x9d, 0x06, 0x0b, 0x1c, 0x11, 0x32, 0x3f, 0x28, 0x25,
+      0x6e, 0x63, 0x74, 0x79, 0x5a, 0x57, 0x40, 0x4d, 0xda, 0xd7, 0xc0, 0xcd,
+      0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91,
+      0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75,
+      0x56, 0x5b, 0x4c, 0x41, 0x61, 0x6c, 0x7b, 0x76, 0x55, 0x58, 0x4f, 0x42,
+      0x09, 0x04, 0x13, 0x1e, 0x3d, 0x30, 0x27, 0x2a, 0xb1, 0xbc, 0xab, 0xa6,
+      0x85, 0x88, 0x9f, 0x92, 0xd9, 0xd4, 0xc3, 0xce, 0xed, 0xe0, 0xf7, 0xfa,
+      0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8,
+      0xeb, 0xe6, 0xf1, 0xfc, 0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44,
+      0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c, 0x0c, 0x01, 0x16, 0x1b,
+      0x38, 0x35, 0x22, 0x2f, 0x64, 0x69, 0x7e, 0x73, 0x50, 0x5d, 0x4a, 0x47,
+      0xdc, 0xd1, 0xc6, 0xcb, 0xe8, 0xe5, 0xf2, 0xff, 0xb4, 0xb9, 0xae, 0xa3,
+      0x80, 0x8d, 0x9a, 0x97,
+  };
+
+  static const uint8_t ffmul0e[256] = {
+      0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62,
+      0x48, 0x46, 0x54, 0x5a, 0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca,
+      0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba, 0xdb, 0xd5, 0xc7, 0xc9,
+      0xe3, 0xed, 0xff, 0xf1, 0xab, 0xa5, 0xb7, 0xb9, 0x93, 0x9d, 0x8f, 0x81,
+      0x3b, 0x35, 0x27, 0x29, 0x03, 0x0d, 0x1f, 0x11, 0x4b, 0x45, 0x57, 0x59,
+      0x73, 0x7d, 0x6f, 0x61, 0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87,
+      0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7, 0x4d, 0x43, 0x51, 0x5f,
+      0x75, 0x7b, 0x69, 0x67, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17,
+      0x76, 0x78, 0x6a, 0x64, 0x4e, 0x40, 0x52, 0x5c, 0x06, 0x08, 0x1a, 0x14,
+      0x3e, 0x30, 0x22, 0x2c, 0x96, 0x98, 0x8a, 0x84, 0xae, 0xa0, 0xb2, 0xbc,
+      0xe6, 0xe8, 0xfa, 0xf4, 0xde, 0xd0, 0xc2, 0xcc, 0x41, 0x4f, 0x5d, 0x53,
+      0x79, 0x77, 0x65, 0x6b, 0x31, 0x3f, 0x2d, 0x23, 0x09, 0x07, 0x15, 0x1b,
+      0xa1, 0xaf, 0xbd, 0xb3, 0x99, 0x97, 0x85, 0x8b, 0xd1, 0xdf, 0xcd, 0xc3,
+      0xe9, 0xe7, 0xf5, 0xfb, 0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0,
+      0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0, 0x7a, 0x74, 0x66, 0x68,
+      0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20,
+      0xec, 0xe2, 0xf0, 0xfe, 0xd4, 0xda, 0xc8, 0xc6, 0x9c, 0x92, 0x80, 0x8e,
+      0xa4, 0xaa, 0xb8, 0xb6, 0x0c, 0x02, 0x10, 0x1e, 0x34, 0x3a, 0x28, 0x26,
+      0x7c, 0x72, 0x60, 0x6e, 0x44, 0x4a, 0x58, 0x56, 0x37, 0x39, 0x2b, 0x25,
+      0x0f, 0x01, 0x13, 0x1d, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d,
+      0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0xa7, 0xa9, 0xbb, 0xb5,
+      0x9f, 0x91, 0x83, 0x8d,
+  };
+
+  x &= 255;
+  switch (table) {
+    case 0x2:
+      return ffmul02[x];
+    case 0x3:
+      return ffmul03[x];
+    case 0x9:
+      return ffmul09[x];
+    case 0xb:
+      return ffmul0b[x];
+    case 0xd:
+      return ffmul0d[x];
+    case 0xe:
+      return ffmul0e[x];
+    case 0:
+      // Case 0 indicates no table lookup, used for some forward mix stages.
+      return static_cast<uint8_t>(x);
+    default:
+      VIXL_UNREACHABLE();
+      return static_cast<uint8_t>(x);
+  }
+}
+
+
+static uint8_t AESMixInner(uint64_t* x, int stage, bool inverse) {
+  VIXL_ASSERT(IsUint2(stage));
+
+  int imc_gm[7] = {0xb, 0xd, 0x9, 0xe};
+  int mc_gm[7] = {0x3, 0x0, 0x0, 0x2};
+
+  int* gm = inverse ? imc_gm : mc_gm;
+  int index = 3 - stage;
+
+  uint8_t result = 0;
+  for (int i = 0; i < 4; i++) {
+    result ^= GalMul(gm[(index + i) % 4], x[i]);
+  }
+  return result;
+}
+
+
+LogicVRegister Simulator::aesmix(LogicVRegister dst,
+                                 const LogicVRegister& src,
+                                 bool inverse) {
+  uint64_t in[16] = {};
+  src.UintArray(kFormat16B, in);
+  dst.ClearForWrite(kFormat16B);
+
+  for (int c = 0; c < 16; c++) {
+    int cmod4 = c % 4;
+    int d = c - cmod4;
+    VIXL_ASSERT((d == 0) || (d == 4) || (d == 8) || (d == 12));
+    dst.SetUint(kFormat16B, c, AESMixInner(&in[d], cmod4, inverse));
+  }
+
+  return dst;
+}
+
+LogicVRegister Simulator::aes(LogicVRegister dst,
+                              const LogicVRegister& src,
+                              bool decrypt) {
+  dst.ClearForWrite(kFormat16B);
+
+  // (Inverse) shift rows.
+  uint8_t shift[] = {0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11};
+  uint8_t shift_inv[] = {0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3};
+  for (int i = 0; i < LaneCountFromFormat(kFormat16B); i++) {
+    uint8_t index = decrypt ? shift_inv[i] : shift[i];
+    dst.SetUint(kFormat16B, i, src.Uint(kFormat16B, index));
+  }
+
+  // (Inverse) substitute bytes.
+  static const uint8_t gf2[256] = {
+      0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b,
+      0xfe, 0xd7, 0xab, 0x76, 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
+      0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, 0xb7, 0xfd, 0x93, 0x26,
+      0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
+      0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2,
+      0xeb, 0x27, 0xb2, 0x75, 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
+      0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, 0x53, 0xd1, 0x00, 0xed,
+      0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
+      0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f,
+      0x50, 0x3c, 0x9f, 0xa8, 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
+      0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, 0xcd, 0x0c, 0x13, 0xec,
+      0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
+      0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14,
+      0xde, 0x5e, 0x0b, 0xdb, 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
+      0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, 0xe7, 0xc8, 0x37, 0x6d,
+      0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
+      0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f,
+      0x4b, 0xbd, 0x8b, 0x8a, 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
+      0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, 0xe1, 0xf8, 0x98, 0x11,
+      0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
+      0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f,
+      0xb0, 0x54, 0xbb, 0x16,
+  };
+  static const uint8_t gf2_inv[256] = {
+      0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e,
+      0x81, 0xf3, 0xd7, 0xfb, 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
+      0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, 0x54, 0x7b, 0x94, 0x32,
+      0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
+      0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49,
+      0x6d, 0x8b, 0xd1, 0x25, 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
+      0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, 0x6c, 0x70, 0x48, 0x50,
+      0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
+      0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05,
+      0xb8, 0xb3, 0x45, 0x06, 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
+      0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, 0x3a, 0x91, 0x11, 0x41,
+      0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
+      0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8,
+      0x1c, 0x75, 0xdf, 0x6e, 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
+      0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, 0xfc, 0x56, 0x3e, 0x4b,
+      0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
+      0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59,
+      0x27, 0x80, 0xec, 0x5f, 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
+      0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, 0xa0, 0xe0, 0x3b, 0x4d,
+      0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
+      0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63,
+      0x55, 0x21, 0x0c, 0x7d,
+  };
+
+  for (int i = 0; i < LaneCountFromFormat(kFormat16B); i++) {
+    const uint8_t* table = decrypt ? gf2_inv : gf2;
+    dst.SetUint(kFormat16B, i, table[dst.Uint(kFormat16B, i)]);
+  }
+  return dst;
+}
+
+LogicVRegister Simulator::sm3partw1(LogicVRegister srcdst,
+                                    const LogicVRegister& src1,
+                                    const LogicVRegister& src2) {
+  using namespace std::placeholders;
+  auto ROL = std::bind(RotateLeft, _1, _2, kSRegSize);
+
+  SimVRegister temp;
+
+  ext(kFormat16B, temp, src2, temp, 4);
+  rol(kFormat4S, temp, temp, 15);
+  eor(kFormat4S, temp, temp, src1);
+  LogicVRegister r = eor(kFormat4S, temp, temp, srcdst);
+
+  uint64_t result[4] = {};
+  r.UintArray(kFormat4S, result);
+  for (int i = 0; i < 4; i++) {
+    if (i == 3) {
+      // result[3] already contains srcdst[3] ^ src1[3] from the operations
+      // above.
+      result[i] ^= ROL(result[0], 15);
+    }
+    result[i] ^= ROL(result[i], 15) ^ ROL(result[i], 23);
+  }
+  srcdst.SetUintArray(kFormat4S, result);
+  return srcdst;
+}
+
+LogicVRegister Simulator::sm3partw2(LogicVRegister srcdst,
+                                    const LogicVRegister& src1,
+                                    const LogicVRegister& src2) {
+  using namespace std::placeholders;
+  auto ROL = std::bind(RotateLeft, _1, _2, kSRegSize);
+
+  SimVRegister temp;
+  VectorFormat vf = kFormat4S;
+
+  rol(vf, temp, src2, 7);
+  LogicVRegister r = eor(vf, temp, temp, src1);
+  eor(vf, srcdst, temp, srcdst);
+
+  uint64_t tmp2 = ROL(r.Uint(vf, 0), 15);
+  tmp2 ^= ROL(tmp2, 15) ^ ROL(tmp2, 23);
+  srcdst.SetUint(vf, 3, srcdst.Uint(vf, 3) ^ tmp2);
+  return srcdst;
+}
+
+LogicVRegister Simulator::sm3ss1(LogicVRegister dst,
+                                 const LogicVRegister& src1,
+                                 const LogicVRegister& src2,
+                                 const LogicVRegister& src3) {
+  using namespace std::placeholders;
+  auto ROL = std::bind(RotateLeft, _1, _2, kSRegSize);
+
+  VectorFormat vf = kFormat4S;
+  uint64_t result = ROL(src1.Uint(vf, 3), 12);
+  result += src2.Uint(vf, 3) + src3.Uint(vf, 3);
+  dst.Clear();
+  dst.SetUint(vf, 3, ROL(result, 7));
+  return dst;
+}
+
+LogicVRegister Simulator::sm3tt1(LogicVRegister srcdst,
+                                 const LogicVRegister& src1,
+                                 const LogicVRegister& src2,
+                                 int index,
+                                 bool is_a) {
+  VectorFormat vf = kFormat4S;
+  using namespace std::placeholders;
+  auto ROL = std::bind(RotateLeft, _1, _2, kSRegSize);
+  auto sd = std::bind(&LogicVRegister::Uint, srcdst, vf, _1);
+
+  VIXL_ASSERT(IsUint2(index));
+
+  uint64_t wjprime = src2.Uint(vf, index);
+  uint64_t ss2 = src1.Uint(vf, 3) ^ ROL(sd(3), 12);
+
+  uint64_t tt1;
+  if (is_a) {
+    tt1 = CryptoOp<"parity"_h>(sd(1), sd(2), sd(3));
+  } else {
+    tt1 = CryptoOp<"majority"_h>(sd(1), sd(2), sd(3));
+  }
+  tt1 += sd(0) + ss2 + wjprime;
+
+  ext(kFormat16B, srcdst, srcdst, srcdst, 4);
+  srcdst.SetUint(vf, 1, ROL(sd(1), 9));
+  srcdst.SetUint(vf, 3, tt1);
+  return srcdst;
+}
+
+LogicVRegister Simulator::sm3tt2(LogicVRegister srcdst,
+                                 const LogicVRegister& src1,
+                                 const LogicVRegister& src2,
+                                 int index,
+                                 bool is_a) {
+  VectorFormat vf = kFormat4S;
+  using namespace std::placeholders;
+  auto ROL = std::bind(RotateLeft, _1, _2, kSRegSize);
+  auto sd = std::bind(&LogicVRegister::Uint, srcdst, vf, _1);
+
+  VIXL_ASSERT(IsUint2(index));
+
+  uint64_t wj = src2.Uint(vf, index);
+
+  uint64_t tt2;
+  if (is_a) {
+    tt2 = CryptoOp<"parity"_h>(sd(1), sd(2), sd(3));
+  } else {
+    tt2 = CryptoOp<"choose"_h>(sd(3), sd(2), sd(1));
+  }
+  tt2 += sd(0) + src1.Uint(vf, 3) + wj;
+
+  ext(kFormat16B, srcdst, srcdst, srcdst, 4);
+  srcdst.SetUint(vf, 1, ROL(sd(1), 19));
+  tt2 ^= ROL(tt2, 9) ^ ROL(tt2, 17);
+  srcdst.SetUint(vf, 3, tt2);
+  return srcdst;
+}
+
+static uint64_t SM4SBox(uint64_t x) {
+  static const uint8_t sbox[256] = {
+      0x48, 0x39, 0xcb, 0xd7, 0x3e, 0x5f, 0xee, 0x79, 0x20, 0x4d, 0xdc, 0x3a,
+      0xec, 0x7d, 0xf0, 0x18, 0x84, 0xc6, 0x6e, 0xc5, 0x09, 0xf1, 0xb9, 0x65,
+      0x7e, 0x77, 0x96, 0x0c, 0x4a, 0x97, 0x69, 0x89, 0xb0, 0xb4, 0xe5, 0xb8,
+      0x12, 0xd0, 0x74, 0x2d, 0xbd, 0x7b, 0xcd, 0xa5, 0x88, 0x31, 0xc1, 0x0a,
+      0xd8, 0x5a, 0x10, 0x1f, 0x41, 0x5c, 0xd9, 0x11, 0x7f, 0xbc, 0xdd, 0xbb,
+      0x92, 0xaf, 0x1b, 0x8d, 0x51, 0x5b, 0x6c, 0x6d, 0x72, 0x6a, 0xff, 0x03,
+      0x2f, 0x8e, 0xfd, 0xde, 0x45, 0x37, 0xdb, 0xd5, 0x6f, 0x4e, 0x53, 0x0d,
+      0xab, 0x23, 0x29, 0xc0, 0x60, 0xca, 0x66, 0x82, 0x2e, 0xe2, 0xf6, 0x1d,
+      0xe3, 0xb1, 0x8c, 0xf5, 0x30, 0x32, 0x93, 0xad, 0x55, 0x1a, 0x34, 0x9b,
+      0xa4, 0x5d, 0xae, 0xe0, 0xa1, 0x15, 0x61, 0xf9, 0xce, 0xf2, 0xf7, 0xa3,
+      0xb5, 0x38, 0xc7, 0x40, 0xd2, 0x8a, 0xbf, 0xea, 0x9e, 0xc8, 0xc4, 0xa0,
+      0xe7, 0x02, 0x36, 0x4c, 0x52, 0x27, 0xd3, 0x9f, 0x57, 0x46, 0x00, 0xd4,
+      0x87, 0x78, 0x21, 0x01, 0x3b, 0x7c, 0x22, 0x25, 0xa2, 0xd1, 0x58, 0x63,
+      0x5e, 0x0e, 0x24, 0x1e, 0x35, 0x9d, 0x56, 0x70, 0x4b, 0x0f, 0xeb, 0xf8,
+      0x8b, 0xda, 0x64, 0x71, 0xb2, 0x81, 0x6b, 0x68, 0xa8, 0x4f, 0x85, 0xe6,
+      0x19, 0x3c, 0x59, 0x83, 0xba, 0x17, 0x73, 0xf3, 0xfc, 0xa7, 0x07, 0x47,
+      0xa6, 0x3f, 0x8f, 0x75, 0xfa, 0x94, 0xdf, 0x80, 0x95, 0xe8, 0x08, 0xc9,
+      0xa9, 0x1c, 0xb3, 0xe4, 0x62, 0xac, 0xcf, 0xed, 0x43, 0x0b, 0x54, 0x33,
+      0x7a, 0x98, 0xef, 0x91, 0xf4, 0x50, 0x42, 0x9c, 0x99, 0x06, 0x86, 0x49,
+      0x26, 0x13, 0x44, 0xaa, 0xc3, 0x04, 0xbe, 0x2a, 0x76, 0x9a, 0x67, 0x2b,
+      0x05, 0x2c, 0xfb, 0x28, 0xc2, 0x14, 0xb6, 0x16, 0xb7, 0x3d, 0xe1, 0xcc,
+      0xfe, 0xe9, 0x90, 0xd6,
+  };
+  uint64_t result = 0;
+  for (int j = 24; j >= 0; j -= 8) {
+    uint8_t s = 255 - ((x >> j) & 0xff);
+    result = (result << 8) | sbox[s];
+  }
+  return result;
+}
+
+LogicVRegister Simulator::sm4(LogicVRegister srcdst,
+                              const LogicVRegister& src1,
+                              const LogicVRegister& src2,
+                              bool is_key) {
+  using namespace std::placeholders;
+  auto ROL = std::bind(RotateLeft, _1, _2, kSRegSize);
+
+  VectorFormat vf = kFormat4S;
+  uint64_t result[4] = {};
+  if (is_key) {
+    src1.UintArray(vf, result);
+  } else {
+    srcdst.UintArray(vf, result);
+  }
+
+  for (int i = 0; i < 4; i++) {
+    uint64_t k = is_key ? src2.Uint(vf, i) : src1.Uint(vf, i);
+    uint64_t intval = result[3] ^ result[2] ^ result[1] ^ k;
+    intval = SM4SBox(intval);
+
+    if (is_key) {
+      intval ^= ROL(intval, 13) ^ ROL(intval, 23);
+    } else {
+      intval ^=
+          ROL(intval, 2) ^ ROL(intval, 10) ^ ROL(intval, 18) ^ ROL(intval, 24);
+    }
+
+    intval ^= result[0];
+
+    result[0] = result[1];
+    result[1] = result[2];
+    result[2] = result[3];
+    result[3] = intval;
+  }
+  srcdst.SetUintArray(vf, result);
+  return srcdst;
+}
+
 }  // namespace aarch64
 }  // namespace vixl
 
diff --git a/src/aarch64/macro-assembler-aarch64.cc b/src/aarch64/macro-assembler-aarch64.cc
index e18f846..af90a42 100644
--- a/src/aarch64/macro-assembler-aarch64.cc
+++ b/src/aarch64/macro-assembler-aarch64.cc
@@ -24,10 +24,10 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-#include <cctype>
-
 #include "macro-assembler-aarch64.h"
 
+#include <cctype>
+
 namespace vixl {
 namespace aarch64 {
 
@@ -194,9 +194,8 @@
 
 void VeneerPool::Release() {
   if (--monitor_ == 0) {
-    VIXL_ASSERT(IsEmpty() ||
-                masm_->GetCursorOffset() <
-                    unresolved_branches_.GetFirstLimit());
+    VIXL_ASSERT(IsEmpty() || masm_->GetCursorOffset() <
+                                 unresolved_branches_.GetFirstLimit());
   }
 }
 
@@ -1061,7 +1060,7 @@
     Register temp = temps.AcquireX();
     Mov(temp, imm);
     if (vd.Is1D()) {
-      mov(vd.D(), 0, temp);
+      fmov(vd.D(), temp);
     } else {
       dup(vd.V2D(), temp);
     }
@@ -1097,8 +1096,14 @@
 void MacroAssembler::Movi(const VRegister& vd, uint64_t hi, uint64_t lo) {
   // TODO: Move 128-bit values in a more efficient way.
   VIXL_ASSERT(vd.Is128Bits());
-  Movi(vd.V2D(), lo);
-  if (hi != lo) {
+  if (hi == lo) {
+    Movi(vd.V2D(), lo);
+    return;
+  }
+
+  Movi(vd.V1D(), lo);
+
+  if (hi != 0) {
     UseScratchRegisterScope temps(this);
     // TODO: Figure out if using a temporary V register to materialise the
     // immediate is better.
@@ -1144,11 +1149,14 @@
                           StatusFlags nzcv,
                           Condition cond) {
   VIXL_ASSERT(allow_macro_instructions_);
-  if (operand.IsImmediate() && (operand.GetImmediate() < 0)) {
-    ConditionalCompareMacro(rn, -operand.GetImmediate(), nzcv, cond, CCMN);
-  } else {
-    ConditionalCompareMacro(rn, operand, nzcv, cond, CCMP);
+  if (operand.IsImmediate()) {
+    int64_t imm = operand.GetImmediate();
+    if ((imm < 0) && CanBeNegated(imm)) {
+      ConditionalCompareMacro(rn, -imm, nzcv, cond, CCMN);
+      return;
+    }
   }
+  ConditionalCompareMacro(rn, operand, nzcv, cond, CCMP);
 }
 
 
@@ -1157,11 +1165,14 @@
                           StatusFlags nzcv,
                           Condition cond) {
   VIXL_ASSERT(allow_macro_instructions_);
-  if (operand.IsImmediate() && (operand.GetImmediate() < 0)) {
-    ConditionalCompareMacro(rn, -operand.GetImmediate(), nzcv, cond, CCMP);
-  } else {
-    ConditionalCompareMacro(rn, operand, nzcv, cond, CCMN);
+  if (operand.IsImmediate()) {
+    int64_t imm = operand.GetImmediate();
+    if ((imm < 0) && CanBeNegated(imm)) {
+      ConditionalCompareMacro(rn, -imm, nzcv, cond, CCMP);
+      return;
+    }
   }
+  ConditionalCompareMacro(rn, operand, nzcv, cond, CCMN);
 }
 
 
@@ -1395,8 +1406,7 @@
   VIXL_ASSERT(allow_macro_instructions_);
   if (operand.IsImmediate()) {
     int64_t imm = operand.GetImmediate();
-    if ((imm < 0) && (imm != std::numeric_limits<int64_t>::min()) &&
-        IsImmAddSub(-imm)) {
+    if ((imm < 0) && CanBeNegated(imm) && IsImmAddSub(-imm)) {
       AddSubMacro(rd, rn, -imm, S, SUB);
       return;
     }
@@ -1411,6 +1421,70 @@
   Add(rd, rn, operand, SetFlags);
 }
 
+#define MINMAX(V)        \
+  V(Smax, smax, IsInt8)  \
+  V(Smin, smin, IsInt8)  \
+  V(Umax, umax, IsUint8) \
+  V(Umin, umin, IsUint8)
+
+#define VIXL_DEFINE_MASM_FUNC(MASM, ASM, RANGE)      \
+  void MacroAssembler::MASM(const Register& rd,      \
+                            const Register& rn,      \
+                            const Operand& op) {     \
+    VIXL_ASSERT(allow_macro_instructions_);          \
+    if (op.IsImmediate()) {                          \
+      int64_t imm = op.GetImmediate();               \
+      if (!RANGE(imm)) {                             \
+        UseScratchRegisterScope temps(this);         \
+        Register temp = temps.AcquireSameSizeAs(rd); \
+        Mov(temp, imm);                              \
+        MASM(rd, rn, temp);                          \
+        return;                                      \
+      }                                              \
+    }                                                \
+    SingleEmissionCheckScope guard(this);            \
+    ASM(rd, rn, op);                                 \
+  }
+MINMAX(VIXL_DEFINE_MASM_FUNC)
+#undef VIXL_DEFINE_MASM_FUNC
+
+void MacroAssembler::St2g(const Register& rt, const MemOperand& addr) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  SingleEmissionCheckScope guard(this);
+  st2g(rt, addr);
+}
+
+void MacroAssembler::Stg(const Register& rt, const MemOperand& addr) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  SingleEmissionCheckScope guard(this);
+  stg(rt, addr);
+}
+
+void MacroAssembler::Stgp(const Register& rt1,
+                          const Register& rt2,
+                          const MemOperand& addr) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  SingleEmissionCheckScope guard(this);
+  stgp(rt1, rt2, addr);
+}
+
+void MacroAssembler::Stz2g(const Register& rt, const MemOperand& addr) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  SingleEmissionCheckScope guard(this);
+  stz2g(rt, addr);
+}
+
+void MacroAssembler::Stzg(const Register& rt, const MemOperand& addr) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  SingleEmissionCheckScope guard(this);
+  stzg(rt, addr);
+}
+
+void MacroAssembler::Ldg(const Register& rt, const MemOperand& addr) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  SingleEmissionCheckScope guard(this);
+  ldg(rt, addr);
+}
 
 void MacroAssembler::Sub(const Register& rd,
                          const Register& rn,
@@ -1419,8 +1493,7 @@
   VIXL_ASSERT(allow_macro_instructions_);
   if (operand.IsImmediate()) {
     int64_t imm = operand.GetImmediate();
-    if ((imm < 0) && (imm != std::numeric_limits<int64_t>::min()) &&
-        IsImmAddSub(-imm)) {
+    if ((imm < 0) && CanBeNegated(imm) && IsImmAddSub(-imm)) {
       AddSubMacro(rd, rn, -imm, S, ADD);
       return;
     }
@@ -1474,6 +1547,12 @@
   VIXL_ASSERT(allow_macro_instructions_);
   // Floating point immediates are loaded through the literal pool.
   MacroEmissionCheckScope guard(this);
+  uint64_t rawbits = DoubleToRawbits(imm);
+
+  if (rawbits == 0) {
+    fmov(vd.D(), xzr);
+    return;
+  }
 
   if (vd.Is1H() || vd.Is4H() || vd.Is8H()) {
     Fmov(vd, Float16(imm));
@@ -1486,23 +1565,16 @@
   }
 
   VIXL_ASSERT(vd.Is1D() || vd.Is2D());
-  if (IsImmFP64(imm)) {
+  if (IsImmFP64(rawbits)) {
     fmov(vd, imm);
+  } else if (vd.IsScalar()) {
+    ldr(vd,
+        new Literal<double>(imm,
+                            &literal_pool_,
+                            RawLiteral::kDeletedOnPlacementByPool));
   } else {
-    uint64_t rawbits = DoubleToRawbits(imm);
-    if (vd.IsScalar()) {
-      if (rawbits == 0) {
-        fmov(vd, xzr);
-      } else {
-        ldr(vd,
-            new Literal<double>(imm,
-                                &literal_pool_,
-                                RawLiteral::kDeletedOnPlacementByPool));
-      }
-    } else {
-      // TODO: consider NEON support for load literal.
-      Movi(vd, rawbits);
-    }
+    // TODO: consider NEON support for load literal.
+    Movi(vd, rawbits);
   }
 }
 
@@ -1511,6 +1583,12 @@
   VIXL_ASSERT(allow_macro_instructions_);
   // Floating point immediates are loaded through the literal pool.
   MacroEmissionCheckScope guard(this);
+  uint32_t rawbits = FloatToRawbits(imm);
+
+  if (rawbits == 0) {
+    fmov(vd.S(), wzr);
+    return;
+  }
 
   if (vd.Is1H() || vd.Is4H() || vd.Is8H()) {
     Fmov(vd, Float16(imm));
@@ -1523,23 +1601,16 @@
   }
 
   VIXL_ASSERT(vd.Is1S() || vd.Is2S() || vd.Is4S());
-  if (IsImmFP32(imm)) {
+  if (IsImmFP32(rawbits)) {
     fmov(vd, imm);
+  } else if (vd.IsScalar()) {
+    ldr(vd,
+        new Literal<float>(imm,
+                           &literal_pool_,
+                           RawLiteral::kDeletedOnPlacementByPool));
   } else {
-    uint32_t rawbits = FloatToRawbits(imm);
-    if (vd.IsScalar()) {
-      if (rawbits == 0) {
-        fmov(vd, wzr);
-      } else {
-        ldr(vd,
-            new Literal<float>(imm,
-                               &literal_pool_,
-                               RawLiteral::kDeletedOnPlacementByPool));
-      }
-    } else {
-      // TODO: consider NEON support for load literal.
-      Movi(vd, rawbits);
-    }
+    // TODO: consider NEON support for load literal.
+    Movi(vd, rawbits);
   }
 }
 
@@ -1583,7 +1654,7 @@
 
 void MacroAssembler::Neg(const Register& rd, const Operand& operand) {
   VIXL_ASSERT(allow_macro_instructions_);
-  if (operand.IsImmediate()) {
+  if (operand.IsImmediate() && CanBeNegated(operand.GetImmediate())) {
     Mov(rd, -operand.GetImmediate());
   } else {
     Sub(rd, AppropriateZeroRegFor(rd), operand);
@@ -1899,6 +1970,22 @@
   setf16(wn);
 }
 
+void MacroAssembler::Chkfeat(const Register& xdn) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  MacroEmissionCheckScope guard(this);
+  if (xdn.Is(x16)) {
+    chkfeat(xdn);
+  } else {
+    UseScratchRegisterScope temps(this);
+    if (temps.TryAcquire(x16)) {
+      Mov(x16, xdn);
+      chkfeat(x16);
+      Mov(xdn, x16);
+    } else {
+      VIXL_ABORT();
+    }
+  }
+}
 
 #define DEFINE_FUNCTION(FN, REGTYPE, REG, OP)                          \
   void MacroAssembler::FN(const REGTYPE REG, const MemOperand& addr) { \
@@ -3086,7 +3173,6 @@
       return masm_->GetScratchVRegisterList();
     case CPURegister::kPRegisterBank:
       return masm_->GetScratchPRegisterList();
-      return NULL;
   }
   VIXL_UNREACHABLE();
   return NULL;
diff --git a/src/aarch64/macro-assembler-aarch64.h b/src/aarch64/macro-assembler-aarch64.h
index b1e9ec5..1763f49 100644
--- a/src/aarch64/macro-assembler-aarch64.h
+++ b/src/aarch64/macro-assembler-aarch64.h
@@ -1754,7 +1754,7 @@
   V(casah,  Casah)                            \
   V(caslh,  Caslh)                            \
   V(casalh, Casalh)
-// clang-format on
+  // clang-format on
 
 #define DEFINE_MACRO_ASM_FUNC(ASM, MASM)                                     \
   void MASM(const Register& rs, const Register& rt, const MemOperand& src) { \
@@ -1772,7 +1772,7 @@
   V(caspa,  Caspa)                          \
   V(caspl,  Caspl)                          \
   V(caspal, Caspal)
-// clang-format on
+  // clang-format on
 
 #define DEFINE_MACRO_ASM_FUNC(ASM, MASM)    \
   void MASM(const Register& rs,             \
@@ -1817,7 +1817,7 @@
   V(MASM##alb, ASM##alb)                             \
   V(MASM##ah,  ASM##ah)                              \
   V(MASM##alh, ASM##alh)
-// clang-format on
+  // clang-format on
 
 #define DEFINE_MACRO_LOAD_ASM_FUNC(MASM, ASM)                                \
   void MASM(const Register& rs, const Register& rt, const MemOperand& src) { \
@@ -2680,6 +2680,65 @@
     uxtw(rd, rn);
   }
 
+  void Addg(const Register& xd,
+            const Register& xn,
+            int offset,
+            int tag_offset) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    addg(xd, xn, offset, tag_offset);
+  }
+  void Gmi(const Register& xd, const Register& xn, const Register& xm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    gmi(xd, xn, xm);
+  }
+  void Irg(const Register& xd, const Register& xn, const Register& xm = xzr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    irg(xd, xn, xm);
+  }
+  void Subg(const Register& xd,
+            const Register& xn,
+            int offset,
+            int tag_offset) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    subg(xd, xn, offset, tag_offset);
+  }
+  void Subp(const Register& xd, const Register& xn, const Register& xm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    subp(xd, xn, xm);
+  }
+  void Subps(const Register& xd, const Register& xn, const Register& xm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    subps(xd, xn, xm);
+  }
+  void Cmpp(const Register& xn, const Register& xm) { Subps(xzr, xn, xm); }
+  void Chkfeat(const Register& xdn);
+  void Gcspushm(const Register& rt) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    gcspushm(rt);
+  }
+  void Gcspopm(const Register& rt = xzr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    gcspopm(rt);
+  }
+  void Gcsss1(const Register& rt) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    gcsss1(rt);
+  }
+  void Gcsss2(const Register& rt) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    gcsss2(rt);
+  }
+
 // NEON 3 vector register instructions.
 #define NEON_3VREG_MACRO_LIST(V) \
   V(add, Add)                    \
@@ -2728,6 +2787,7 @@
   V(pmull2, Pmull2)              \
   V(raddhn, Raddhn)              \
   V(raddhn2, Raddhn2)            \
+  V(rax1, Rax1)                  \
   V(rsubhn, Rsubhn)              \
   V(rsubhn2, Rsubhn2)            \
   V(saba, Saba)                  \
@@ -2740,8 +2800,21 @@
   V(saddl2, Saddl2)              \
   V(saddw, Saddw)                \
   V(saddw2, Saddw2)              \
+  V(sha1c, Sha1c)                \
+  V(sha1m, Sha1m)                \
+  V(sha1p, Sha1p)                \
+  V(sha1su0, Sha1su0)            \
+  V(sha256h, Sha256h)            \
+  V(sha256h2, Sha256h2)          \
+  V(sha256su1, Sha256su1)        \
+  V(sha512h, Sha512h)            \
+  V(sha512h2, Sha512h2)          \
+  V(sha512su1, Sha512su1)        \
   V(shadd, Shadd)                \
   V(shsub, Shsub)                \
+  V(sm3partw1, Sm3partw1)        \
+  V(sm3partw2, Sm3partw2)        \
+  V(sm4ekey, Sm4ekey)            \
   V(smax, Smax)                  \
   V(smaxp, Smaxp)                \
   V(smin, Smin)                  \
@@ -2836,6 +2909,10 @@
   V(abs, Abs)                    \
   V(addp, Addp)                  \
   V(addv, Addv)                  \
+  V(aesd, Aesd)                  \
+  V(aese, Aese)                  \
+  V(aesimc, Aesimc)              \
+  V(aesmc, Aesmc)                \
   V(cls, Cls)                    \
   V(clz, Clz)                    \
   V(cnt, Cnt)                    \
@@ -2884,6 +2961,11 @@
   V(sadalp, Sadalp)              \
   V(saddlp, Saddlp)              \
   V(saddlv, Saddlv)              \
+  V(sha1h, Sha1h)                \
+  V(sha1su1, Sha1su1)            \
+  V(sha256su0, Sha256su0)        \
+  V(sha512su0, Sha512su0)        \
+  V(sm4e, Sm4e)                  \
   V(smaxv, Smaxv)                \
   V(sminv, Sminv)                \
   V(sqabs, Sqabs)                \
@@ -2974,7 +3056,11 @@
   V(umlsl, Umlsl)                    \
   V(umlsl2, Umlsl2)                  \
   V(sudot, Sudot)                    \
-  V(usdot, Usdot)
+  V(usdot, Usdot)                    \
+  V(sm3tt1a, Sm3tt1a)                \
+  V(sm3tt1b, Sm3tt1b)                \
+  V(sm3tt2a, Sm3tt2a)                \
+  V(sm3tt2b, Sm3tt2b)
 
 
 #define DEFINE_MACRO_ASM_FUNC(ASM, MASM)    \
@@ -3055,7 +3141,6 @@
 #define SVE_3VREG_COMMUTATIVE_MACRO_LIST(V) \
   V(add, Add)                               \
   V(and_, And)                              \
-  V(bic, Bic)                               \
   V(eor, Eor)                               \
   V(mul, Mul)                               \
   V(orr, Orr)                               \
@@ -3094,6 +3179,14 @@
   SVE_3VREG_COMMUTATIVE_MACRO_LIST(DEFINE_MACRO_ASM_FUNC)
 #undef DEFINE_MACRO_ASM_FUNC
 
+  void Bcax(const VRegister& vd,
+            const VRegister& vn,
+            const VRegister& vm,
+            const VRegister& va) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    bcax(vd, vn, vm, va);
+  }
   void Bic(const VRegister& vd, const int imm8, const int left_shift = 0) {
     VIXL_ASSERT(allow_macro_instructions_);
     SingleEmissionCheckScope guard(this);
@@ -3134,6 +3227,14 @@
     SingleEmissionCheckScope guard(this);
     dup(vd, rn);
   }
+  void Eor3(const VRegister& vd,
+            const VRegister& vn,
+            const VRegister& vm,
+            const VRegister& va) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    eor3(vd, vn, vm, va);
+  }
   void Ext(const VRegister& vd,
            const VRegister& vn,
            const VRegister& vm,
@@ -3430,6 +3531,14 @@
     SingleEmissionCheckScope guard(this);
     st4(vt, vt2, vt3, vt4, lane, dst);
   }
+  void Sm3ss1(const VRegister& vd,
+              const VRegister& vn,
+              const VRegister& vm,
+              const VRegister& va) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    sm3ss1(vd, vn, vm, va);
+  }
   void Smov(const Register& rd, const VRegister& vn, int vn_index) {
     VIXL_ASSERT(allow_macro_instructions_);
     SingleEmissionCheckScope guard(this);
@@ -3440,6 +3549,14 @@
     SingleEmissionCheckScope guard(this);
     umov(rd, vn, vn_index);
   }
+  void Xar(const VRegister& vd,
+           const VRegister& vn,
+           const VRegister& vm,
+           int rotate) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    xar(vd, vn, vm, rotate);
+  }
   void Crc32b(const Register& rd, const Register& rn, const Register& rm) {
     VIXL_ASSERT(allow_macro_instructions_);
     SingleEmissionCheckScope guard(this);
@@ -3571,6 +3688,10 @@
     MovprfxHelperScope guard(this, zd, pg, zn);
     asrd(zd, pg, zd, shift);
   }
+  void Bic(const ZRegister& zd,
+           const PRegisterM& pg,
+           const ZRegister& zn,
+           const ZRegister& zm);
   void Bic(const PRegisterWithLaneSize& pd,
            const PRegisterZ& pg,
            const PRegisterWithLaneSize& pn,
@@ -7486,6 +7607,279 @@
              const ZRegister& zm,
              int index);
 
+  // MTE
+  void St2g(const Register& rt, const MemOperand& addr);
+  void Stg(const Register& rt, const MemOperand& addr);
+  void Stgp(const Register& rt1, const Register& rt2, const MemOperand& addr);
+  void Stz2g(const Register& rt, const MemOperand& addr);
+  void Stzg(const Register& rt, const MemOperand& addr);
+  void Ldg(const Register& rt, const MemOperand& addr);
+
+  void Cpye(const Register& rd, const Register& rs, const Register& rn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cpye(rd, rs, rn);
+  }
+
+  void Cpyen(const Register& rd, const Register& rs, const Register& rn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cpyen(rd, rs, rn);
+  }
+
+  void Cpyern(const Register& rd, const Register& rs, const Register& rn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cpyern(rd, rs, rn);
+  }
+
+  void Cpyewn(const Register& rd, const Register& rs, const Register& rn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cpyewn(rd, rs, rn);
+  }
+
+  void Cpyfe(const Register& rd, const Register& rs, const Register& rn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cpyfe(rd, rs, rn);
+  }
+
+  void Cpyfen(const Register& rd, const Register& rs, const Register& rn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cpyfen(rd, rs, rn);
+  }
+
+  void Cpyfern(const Register& rd, const Register& rs, const Register& rn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cpyfern(rd, rs, rn);
+  }
+
+  void Cpyfewn(const Register& rd, const Register& rs, const Register& rn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cpyfewn(rd, rs, rn);
+  }
+
+  void Cpyfm(const Register& rd, const Register& rs, const Register& rn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cpyfm(rd, rs, rn);
+  }
+
+  void Cpyfmn(const Register& rd, const Register& rs, const Register& rn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cpyfmn(rd, rs, rn);
+  }
+
+  void Cpyfmrn(const Register& rd, const Register& rs, const Register& rn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cpyfmrn(rd, rs, rn);
+  }
+
+  void Cpyfmwn(const Register& rd, const Register& rs, const Register& rn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cpyfmwn(rd, rs, rn);
+  }
+
+  void Cpyfp(const Register& rd, const Register& rs, const Register& rn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cpyfp(rd, rs, rn);
+  }
+
+  void Cpyfpn(const Register& rd, const Register& rs, const Register& rn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cpyfpn(rd, rs, rn);
+  }
+
+  void Cpyfprn(const Register& rd, const Register& rs, const Register& rn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cpyfprn(rd, rs, rn);
+  }
+
+  void Cpyfpwn(const Register& rd, const Register& rs, const Register& rn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cpyfpwn(rd, rs, rn);
+  }
+
+  void Cpym(const Register& rd, const Register& rs, const Register& rn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cpym(rd, rs, rn);
+  }
+
+  void Cpymn(const Register& rd, const Register& rs, const Register& rn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cpymn(rd, rs, rn);
+  }
+
+  void Cpymrn(const Register& rd, const Register& rs, const Register& rn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cpymrn(rd, rs, rn);
+  }
+
+  void Cpymwn(const Register& rd, const Register& rs, const Register& rn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cpymwn(rd, rs, rn);
+  }
+
+  void Cpyp(const Register& rd, const Register& rs, const Register& rn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cpyp(rd, rs, rn);
+  }
+
+  void Cpypn(const Register& rd, const Register& rs, const Register& rn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cpypn(rd, rs, rn);
+  }
+
+  void Cpyprn(const Register& rd, const Register& rs, const Register& rn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cpyprn(rd, rs, rn);
+  }
+
+  void Cpypwn(const Register& rd, const Register& rs, const Register& rn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cpypwn(rd, rs, rn);
+  }
+
+  void Sete(const Register& rd, const Register& rn, const Register& rs) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    sete(rd, rn, rs);
+  }
+
+  void Seten(const Register& rd, const Register& rn, const Register& rs) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    seten(rd, rn, rs);
+  }
+
+  void Setge(const Register& rd, const Register& rn, const Register& rs) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    setge(rd, rn, rs);
+  }
+
+  void Setgen(const Register& rd, const Register& rn, const Register& rs) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    setgen(rd, rn, rs);
+  }
+
+  void Setgm(const Register& rd, const Register& rn, const Register& rs) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    setgm(rd, rn, rs);
+  }
+
+  void Setgmn(const Register& rd, const Register& rn, const Register& rs) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    setgmn(rd, rn, rs);
+  }
+
+  void Setgp(const Register& rd, const Register& rn, const Register& rs) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    setgp(rd, rn, rs);
+  }
+
+  void Setgpn(const Register& rd, const Register& rn, const Register& rs) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    setgpn(rd, rn, rs);
+  }
+
+  void Setm(const Register& rd, const Register& rn, const Register& rs) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    setm(rd, rn, rs);
+  }
+
+  void Setmn(const Register& rd, const Register& rn, const Register& rs) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    setmn(rd, rn, rs);
+  }
+
+  void Setp(const Register& rd, const Register& rn, const Register& rs) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    setp(rd, rn, rs);
+  }
+
+  void Setpn(const Register& rd, const Register& rn, const Register& rs) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    setpn(rd, rn, rs);
+  }
+
+// Macro assembler wrappers that package the MOPS instructions into a single
+// call.
+#define MOPS_LIST(V)  \
+  V(Set, set, )       \
+  V(Setn, set, n)     \
+  V(Setg, setg, )     \
+  V(Setgn, setg, n)   \
+  V(Cpy, cpy, )       \
+  V(Cpyn, cpy, n)     \
+  V(Cpyrn, cpy, rn)   \
+  V(Cpywn, cpy, wn)   \
+  V(Cpyf, cpyf, )     \
+  V(Cpyfn, cpyf, n)   \
+  V(Cpyfrn, cpyf, rn) \
+  V(Cpyfwn, cpyf, wn)
+
+#define DEFINE_MACRO_ASM_FUNC(MASM, ASMPREFIX, ASMSUFFIX)                 \
+  void MASM(const Register& ra, const Register& rb, const Register& rc) { \
+    ExactAssemblyScope scope(this, 3 * kInstructionSize);                 \
+    ASMPREFIX##p##ASMSUFFIX(ra, rb, rc);                                  \
+    ASMPREFIX##m##ASMSUFFIX(ra, rb, rc);                                  \
+    ASMPREFIX##e##ASMSUFFIX(ra, rb, rc);                                  \
+  }
+  MOPS_LIST(DEFINE_MACRO_ASM_FUNC)
+#undef DEFINE_MACRO_ASM_FUNC
+
+  void Abs(const Register& rd, const Register& rn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    abs(rd, rn);
+  }
+
+  void Cnt(const Register& rd, const Register& rn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cnt(rd, rn);
+  }
+
+  void Ctz(const Register& rd, const Register& rn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ctz(rd, rn);
+  }
+
+  void Smax(const Register& rd, const Register& rn, const Operand& op);
+  void Smin(const Register& rd, const Register& rn, const Operand& op);
+  void Umax(const Register& rd, const Register& rn, const Operand& op);
+  void Umin(const Register& rd, const Register& rn, const Operand& op);
+
   template <typename T>
   Literal<T>* CreateLiteralDestroyedWithPool(T value) {
     return new Literal<T>(value,
@@ -7857,9 +8251,10 @@
       UseScratchRegisterScope* scratch_scope);
 
   bool LabelIsOutOfRange(Label* label, ImmBranchType branch_type) {
+    int64_t offset = label->GetLocation() - GetCursorOffset();
+    VIXL_ASSERT(IsMultiple(offset, kInstructionSize));
     return !Instruction::IsValidImmPCOffset(branch_type,
-                                            label->GetLocation() -
-                                                GetCursorOffset());
+                                            offset / kInstructionSize);
   }
 
   void ConfigureSimulatorCPUFeaturesHelper(const CPUFeatures& features,
@@ -8270,6 +8665,16 @@
     return AcquireFrom(available, kGoverningPRegisterMask).P();
   }
 
+  // TODO: extend to other scratch register lists.
+  bool TryAcquire(const Register& required_reg) {
+    CPURegList* list = masm_->GetScratchRegisterList();
+    if (list->IncludesAliasOf(required_reg)) {
+      list->Remove(required_reg);
+      return true;
+    }
+    return false;
+  }
+
   Register AcquireRegisterOfSize(int size_in_bits);
   Register AcquireSameSizeAs(const Register& reg) {
     return AcquireRegisterOfSize(reg.GetSizeInBits());
diff --git a/src/aarch64/macro-assembler-sve-aarch64.cc b/src/aarch64/macro-assembler-sve-aarch64.cc
index 6bf5607..56a504c 100644
--- a/src/aarch64/macro-assembler-sve-aarch64.cc
+++ b/src/aarch64/macro-assembler-sve-aarch64.cc
@@ -627,6 +627,7 @@
 // non-commutative and no reversed form is provided.
 #define VIXL_SVE_NONCOMM_ARITH_ZPZZ_LIST(V) \
   V(Addp, addp)                             \
+  V(Bic, bic)                               \
   V(Faddp, faddp)                           \
   V(Fmaxnmp, fmaxnmp)                       \
   V(Fminnmp, fminnmp)                       \
@@ -831,11 +832,12 @@
       Fdup(zd, static_cast<float>(imm));
       break;
     case kDRegSize:
-      if (IsImmFP64(imm)) {
+      uint64_t bits = DoubleToRawbits(imm);
+      if (IsImmFP64(bits)) {
         SingleEmissionCheckScope guard(this);
         fdup(zd, imm);
       } else {
-        Dup(zd, DoubleToRawbits(imm));
+        Dup(zd, bits);
       }
       break;
   }
diff --git a/src/aarch64/operands-aarch64.cc b/src/aarch64/operands-aarch64.cc
index 8db129c..d1bd81c 100644
--- a/src/aarch64/operands-aarch64.cc
+++ b/src/aarch64/operands-aarch64.cc
@@ -34,7 +34,7 @@
   RegList list = list_ & mask;
   if (list == 0) return NoCPUReg;
   int index = CountTrailingZeros(list);
-  VIXL_ASSERT(((1 << index) & list) != 0);
+  VIXL_ASSERT(((static_cast<RegList>(1) << index) & list) != 0);
   Remove(index);
   return CPURegister(index, size_, type_);
 }
@@ -45,7 +45,7 @@
   if (list == 0) return NoCPUReg;
   int index = CountLeadingZeros(list);
   index = kRegListSizeInBits - 1 - index;
-  VIXL_ASSERT(((1 << index) & list) != 0);
+  VIXL_ASSERT(((static_cast<RegList>(1) << index) & list) != 0);
   Remove(index);
   return CPURegister(index, size_, type_);
 }
@@ -465,5 +465,5 @@
   }
   return false;
 }
-}
-}  // namespace vixl::aarch64
+}  // namespace aarch64
+}  // namespace vixl
diff --git a/src/aarch64/operands-aarch64.h b/src/aarch64/operands-aarch64.h
index 08ee4a6..5469542 100644
--- a/src/aarch64/operands-aarch64.h
+++ b/src/aarch64/operands-aarch64.h
@@ -735,7 +735,7 @@
 class IntegerOperand {
  public:
 #define VIXL_INT_TYPES(V) \
-  V(char) V(short) V(int) V(long) V(long long)  // NOLINT(runtime/int)
+  V(char) V(short) V(int) V(long) V(long long)  // NOLINT(google-runtime-int)
 #define VIXL_DECL_INT_OVERLOADS(T)                                        \
   /* These are allowed to be implicit constructors because this is a */   \
   /* wrapper class that doesn't normally perform any type conversion. */  \
@@ -873,7 +873,7 @@
     return TryEncodeAsShiftedIntNForLane<N, 0>(zd, imm);
   }
 
-  // As above, but for unsigned fields. This is usuaully a simple operation, but
+  // As above, but for unsigned fields. This is usually a simple operation, but
   // is provided for symmetry.
   template <unsigned N, unsigned kShift, typename T>
   bool TryEncodeAsShiftedUintNForLane(const CPURegister& zd, T* imm) const {
@@ -909,7 +909,7 @@
   bool IsPositiveOrZero() const { return !is_negative_; }
 
   uint64_t GetMagnitude() const {
-    return is_negative_ ? -raw_bits_ : raw_bits_;
+    return is_negative_ ? UnsignedNegate(raw_bits_) : raw_bits_;
   }
 
  private:
@@ -993,7 +993,7 @@
   // We only support sizes up to X/D register sizes.
   size_t mem_op_size_;
 };
-}
-}  // namespace vixl::aarch64
+}  // namespace aarch64
+}  // namespace vixl
 
 #endif  // VIXL_AARCH64_OPERANDS_AARCH64_H_
diff --git a/src/aarch64/pointer-auth-aarch64.cc b/src/aarch64/pointer-auth-aarch64.cc
index 55cf4ca..6bc3751 100644
--- a/src/aarch64/pointer-auth-aarch64.cc
+++ b/src/aarch64/pointer-auth-aarch64.cc
@@ -26,10 +26,10 @@
 
 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
 
-#include "simulator-aarch64.h"
-
 #include "utils-vixl.h"
 
+#include "simulator-aarch64.h"
+
 namespace vixl {
 namespace aarch64 {
 
@@ -151,7 +151,7 @@
 
   uint64_t pac = ComputePAC(original_ptr, context, key);
 
-  uint64_t error_code = 1 << key.number;
+  uint64_t error_code = uint64_t{1} << key.number;
   if ((pac & pac_mask) == (ptr & pac_mask)) {
     return original_ptr;
   } else {
diff --git a/src/aarch64/registers-aarch64.cc b/src/aarch64/registers-aarch64.cc
index 735f43c..3df7831 100644
--- a/src/aarch64/registers-aarch64.cc
+++ b/src/aarch64/registers-aarch64.cc
@@ -24,11 +24,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+#include "registers-aarch64.h"
+
 #include <sstream>
 #include <string>
 
-#include "registers-aarch64.h"
-
 namespace vixl {
 namespace aarch64 {
 
@@ -122,7 +122,7 @@
          IsValidPRegister();
 }
 
-// Most coersions simply invoke the necessary constructor.
+// Most coercions simply invoke the necessary constructor.
 #define VIXL_CPUREG_COERCION_LIST(U) \
   U(Register, W, R)                  \
   U(Register, X, R)                  \
@@ -143,7 +143,7 @@
 #undef VIXL_CPUREG_COERCION_LIST
 #undef VIXL_DEFINE_CPUREG_COERCION
 
-// NEON lane-format coersions always return VRegisters.
+// NEON lane-format coercions always return VRegisters.
 #define VIXL_CPUREG_NEON_COERCION_LIST(V) \
   V(8, B)                                 \
   V(16, B)                                \
@@ -153,7 +153,8 @@
   V(2, S)                                 \
   V(4, S)                                 \
   V(1, D)                                 \
-  V(2, D)
+  V(2, D)                                 \
+  V(1, Q)
 #define VIXL_DEFINE_CPUREG_NEON_COERCION(LANES, LANE_TYPE)             \
   VRegister VRegister::V##LANES##LANE_TYPE() const {                   \
     VIXL_ASSERT(IsVRegister());                                        \
@@ -163,7 +164,7 @@
 #undef VIXL_CPUREG_NEON_COERCION_LIST
 #undef VIXL_DEFINE_CPUREG_NEON_COERCION
 
-// Semantic type coersion for sdot and udot.
+// Semantic type coercion for sdot and udot.
 // TODO: Use the qualifiers_ field to distinguish this from ::S().
 VRegister VRegister::S4B() const {
   VIXL_ASSERT(IsVRegister());
@@ -317,5 +318,5 @@
       !reg4.IsValid() || (reg4.GetLaneSizeInBits() == reg1.GetLaneSizeInBits());
   return match;
 }
-}
-}  // namespace vixl::aarch64
+}  // namespace aarch64
+}  // namespace vixl
diff --git a/src/aarch64/registers-aarch64.h b/src/aarch64/registers-aarch64.h
index 911974a..53bbe13 100644
--- a/src/aarch64/registers-aarch64.h
+++ b/src/aarch64/registers-aarch64.h
@@ -67,7 +67,8 @@
 // specialised register types can avoid run-time checks, and should therefore be
 // preferred where run-time polymorphism isn't required.
 //
-// Type-specific modifers are typically implemented only on the derived classes.
+// Type-specific modifiers are typically implemented only on the derived
+// classes.
 //
 // The encoding is such that CPURegister objects are cheap to pass by value.
 class CPURegister {
@@ -574,6 +575,7 @@
   VRegister V4S() const;
   VRegister V1D() const;
   VRegister V2D() const;
+  VRegister V1Q() const;
   VRegister S4B() const;
 
   bool IsValid() const { return IsValidVRegister(); }
@@ -894,7 +896,7 @@
                      const CPURegister& reg2,
                      const CPURegister& reg3 = NoCPUReg,
                      const CPURegister& reg4 = NoCPUReg);
-}
-}  // namespace vixl::aarch64
+}  // namespace aarch64
+}  // namespace vixl
 
 #endif  // VIXL_AARCH64_REGISTERS_AARCH64_H_
diff --git a/src/aarch64/simulator-aarch64.cc b/src/aarch64/simulator-aarch64.cc
index d183dc3..f93bf34 100644
--- a/src/aarch64/simulator-aarch64.cc
+++ b/src/aarch64/simulator-aarch64.cc
@@ -26,14 +26,29 @@
 
 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
 
-#include <errno.h>
-#include <unistd.h>
+#include "simulator-aarch64.h"
 
 #include <cmath>
 #include <cstring>
+#include <errno.h>
 #include <limits>
 
-#include "simulator-aarch64.h"
+#ifdef _WIN32
+#define WIN32_LEAN_AND_MEAN
+#define NOMINMAX
+#include <Windows.h>
+#undef MultiplyHigh
+#include <Memoryapi.h>
+#else
+#include <sys/mman.h>
+#include <unistd.h>
+#endif
+
+#ifdef _MSC_VER
+#define VIXL_SYNC() MemoryBarrier()
+#else
+#define VIXL_SYNC() __sync_synchronize()
+#endif
 
 namespace vixl {
 namespace aarch64 {
@@ -42,6 +57,27 @@
 
 const Instruction* Simulator::kEndOfSimAddress = NULL;
 
+MemoryAccessResult TryMemoryAccess(uintptr_t address, uintptr_t access_size) {
+#ifdef VIXL_ENABLE_IMPLICIT_CHECKS
+  for (uintptr_t i = 0; i < access_size; i++) {
+    if (_vixl_internal_ReadMemory(address, i) == MemoryAccessResult::Failure) {
+      // The memory access failed.
+      return MemoryAccessResult::Failure;
+    }
+  }
+
+  // Either the memory access did not raise a signal or the signal handler did
+  // not correctly return MemoryAccessResult::Failure.
+  return MemoryAccessResult::Success;
+#else
+  USE(address);
+  USE(access_size);
+  return MemoryAccessResult::Success;
+#endif  // VIXL_ENABLE_IMPLICIT_CHECKS
+}
+
+bool MetaDataDepot::MetaDataMTE::is_active = false;
+
 void SimSystemRegister::SetBits(int msb, int lsb, uint32_t bits) {
   int width = msb - lsb + 1;
   VIXL_ASSERT(IsUintN(width, bits) || IsIntN(width, bits));
@@ -66,7 +102,6 @@
   }
 }
 
-
 const Simulator::FormToVisitorFnMap* Simulator::GetFormToVisitorFnMap() {
   static const FormToVisitorFnMap form_to_visitor = {
       DEFAULT_FORM_TO_VISITOR_MAP(Simulator),
@@ -406,20 +441,141 @@
       {"usdot_asimdsame2_d"_h, &Simulator::VisitNEON3SameExtra},
       {"sudot_asimdelem_d"_h, &Simulator::SimulateNEONDotProdByElement},
       {"usdot_asimdelem_d"_h, &Simulator::SimulateNEONDotProdByElement},
+      {"addg_64_addsub_immtags"_h, &Simulator::SimulateMTEAddSubTag},
+      {"gmi_64g_dp_2src"_h, &Simulator::SimulateMTETagMaskInsert},
+      {"irg_64i_dp_2src"_h, &Simulator::Simulate_XdSP_XnSP_Xm},
+      {"ldg_64loffset_ldsttags"_h, &Simulator::SimulateMTELoadTag},
+      {"st2g_64soffset_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
+      {"st2g_64spost_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
+      {"st2g_64spre_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
+      {"stgp_64_ldstpair_off"_h, &Simulator::SimulateMTEStoreTagPair},
+      {"stgp_64_ldstpair_post"_h, &Simulator::SimulateMTEStoreTagPair},
+      {"stgp_64_ldstpair_pre"_h, &Simulator::SimulateMTEStoreTagPair},
+      {"stg_64soffset_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
+      {"stg_64spost_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
+      {"stg_64spre_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
+      {"stz2g_64soffset_ldsttags"_h,
+       &Simulator::Simulator::SimulateMTEStoreTag},
+      {"stz2g_64spost_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
+      {"stz2g_64spre_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
+      {"stzg_64soffset_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
+      {"stzg_64spost_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
+      {"stzg_64spre_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
+      {"subg_64_addsub_immtags"_h, &Simulator::SimulateMTEAddSubTag},
+      {"subps_64s_dp_2src"_h, &Simulator::SimulateMTESubPointer},
+      {"subp_64s_dp_2src"_h, &Simulator::SimulateMTESubPointer},
+      {"cpyen_cpy_memcms"_h, &Simulator::SimulateCpyE},
+      {"cpyern_cpy_memcms"_h, &Simulator::SimulateCpyE},
+      {"cpyewn_cpy_memcms"_h, &Simulator::SimulateCpyE},
+      {"cpye_cpy_memcms"_h, &Simulator::SimulateCpyE},
+      {"cpyfen_cpy_memcms"_h, &Simulator::SimulateCpyE},
+      {"cpyfern_cpy_memcms"_h, &Simulator::SimulateCpyE},
+      {"cpyfewn_cpy_memcms"_h, &Simulator::SimulateCpyE},
+      {"cpyfe_cpy_memcms"_h, &Simulator::SimulateCpyE},
+      {"cpyfmn_cpy_memcms"_h, &Simulator::SimulateCpyM},
+      {"cpyfmrn_cpy_memcms"_h, &Simulator::SimulateCpyM},
+      {"cpyfmwn_cpy_memcms"_h, &Simulator::SimulateCpyM},
+      {"cpyfm_cpy_memcms"_h, &Simulator::SimulateCpyM},
+      {"cpyfpn_cpy_memcms"_h, &Simulator::SimulateCpyFP},
+      {"cpyfprn_cpy_memcms"_h, &Simulator::SimulateCpyFP},
+      {"cpyfpwn_cpy_memcms"_h, &Simulator::SimulateCpyFP},
+      {"cpyfp_cpy_memcms"_h, &Simulator::SimulateCpyFP},
+      {"cpymn_cpy_memcms"_h, &Simulator::SimulateCpyM},
+      {"cpymrn_cpy_memcms"_h, &Simulator::SimulateCpyM},
+      {"cpymwn_cpy_memcms"_h, &Simulator::SimulateCpyM},
+      {"cpym_cpy_memcms"_h, &Simulator::SimulateCpyM},
+      {"cpypn_cpy_memcms"_h, &Simulator::SimulateCpyP},
+      {"cpyprn_cpy_memcms"_h, &Simulator::SimulateCpyP},
+      {"cpypwn_cpy_memcms"_h, &Simulator::SimulateCpyP},
+      {"cpyp_cpy_memcms"_h, &Simulator::SimulateCpyP},
+      {"setp_set_memcms"_h, &Simulator::SimulateSetP},
+      {"setpn_set_memcms"_h, &Simulator::SimulateSetP},
+      {"setgp_set_memcms"_h, &Simulator::SimulateSetGP},
+      {"setgpn_set_memcms"_h, &Simulator::SimulateSetGP},
+      {"setm_set_memcms"_h, &Simulator::SimulateSetM},
+      {"setmn_set_memcms"_h, &Simulator::SimulateSetM},
+      {"setgm_set_memcms"_h, &Simulator::SimulateSetGM},
+      {"setgmn_set_memcms"_h, &Simulator::SimulateSetGM},
+      {"sete_set_memcms"_h, &Simulator::SimulateSetE},
+      {"seten_set_memcms"_h, &Simulator::SimulateSetE},
+      {"setge_set_memcms"_h, &Simulator::SimulateSetE},
+      {"setgen_set_memcms"_h, &Simulator::SimulateSetE},
+      {"abs_32_dp_1src"_h, &Simulator::VisitDataProcessing1Source},
+      {"abs_64_dp_1src"_h, &Simulator::VisitDataProcessing1Source},
+      {"cnt_32_dp_1src"_h, &Simulator::VisitDataProcessing1Source},
+      {"cnt_64_dp_1src"_h, &Simulator::VisitDataProcessing1Source},
+      {"ctz_32_dp_1src"_h, &Simulator::VisitDataProcessing1Source},
+      {"ctz_64_dp_1src"_h, &Simulator::VisitDataProcessing1Source},
+      {"smax_32_dp_2src"_h, &Simulator::SimulateSignedMinMax},
+      {"smax_64_dp_2src"_h, &Simulator::SimulateSignedMinMax},
+      {"smin_32_dp_2src"_h, &Simulator::SimulateSignedMinMax},
+      {"smin_64_dp_2src"_h, &Simulator::SimulateSignedMinMax},
+      {"smax_32_minmax_imm"_h, &Simulator::SimulateSignedMinMax},
+      {"smax_64_minmax_imm"_h, &Simulator::SimulateSignedMinMax},
+      {"smin_32_minmax_imm"_h, &Simulator::SimulateSignedMinMax},
+      {"smin_64_minmax_imm"_h, &Simulator::SimulateSignedMinMax},
+      {"umax_32_dp_2src"_h, &Simulator::SimulateUnsignedMinMax},
+      {"umax_64_dp_2src"_h, &Simulator::SimulateUnsignedMinMax},
+      {"umin_32_dp_2src"_h, &Simulator::SimulateUnsignedMinMax},
+      {"umin_64_dp_2src"_h, &Simulator::SimulateUnsignedMinMax},
+      {"umax_32u_minmax_imm"_h, &Simulator::SimulateUnsignedMinMax},
+      {"umax_64u_minmax_imm"_h, &Simulator::SimulateUnsignedMinMax},
+      {"umin_32u_minmax_imm"_h, &Simulator::SimulateUnsignedMinMax},
+      {"umin_64u_minmax_imm"_h, &Simulator::SimulateUnsignedMinMax},
+      {"bcax_vvv16_crypto4"_h, &Simulator::SimulateNEONSHA3},
+      {"eor3_vvv16_crypto4"_h, &Simulator::SimulateNEONSHA3},
+      {"rax1_vvv2_cryptosha512_3"_h, &Simulator::SimulateNEONSHA3},
+      {"xar_vvv2_crypto3_imm6"_h, &Simulator::SimulateNEONSHA3},
+      {"sha512h_qqv_cryptosha512_3"_h, &Simulator::SimulateSHA512},
+      {"sha512h2_qqv_cryptosha512_3"_h, &Simulator::SimulateSHA512},
+      {"sha512su0_vv2_cryptosha512_2"_h, &Simulator::SimulateSHA512},
+      {"sha512su1_vvv2_cryptosha512_3"_h, &Simulator::SimulateSHA512},
+      {"pmullb_z_zz_q"_h, &Simulator::SimulateSVEPmull128},
+      {"pmullt_z_zz_q"_h, &Simulator::SimulateSVEPmull128},
   };
   return &form_to_visitor;
 }
 
+// Try to access the piece of memory given by the address passed in RDI and the
+// offset passed in RSI, using testb. If a signal is raised then the signal
+// handler should set RIP to _vixl_internal_AccessMemory_continue and RAX to
+// MemoryAccessResult::Failure. If no signal is raised then zero RAX before
+// returning.
+#ifdef VIXL_ENABLE_IMPLICIT_CHECKS
+#ifdef __x86_64__
+asm(R"(
+  .globl _vixl_internal_ReadMemory
+  _vixl_internal_ReadMemory:
+    testb (%rdi, %rsi), %al
+    xorq %rax, %rax
+    ret
+  .globl _vixl_internal_AccessMemory_continue
+  _vixl_internal_AccessMemory_continue:
+    ret
+)");
+#else
+asm(R"(
+  .globl _vixl_internal_ReadMemory
+  _vixl_internal_ReadMemory:
+    ret
+)");
+#endif  // __x86_64__
+#endif  // VIXL_ENABLE_IMPLICIT_CHECKS
+
 Simulator::Simulator(Decoder* decoder, FILE* stream, SimStack::Allocated stack)
     : memory_(std::move(stack)),
       last_instr_(NULL),
-      cpu_features_auditor_(decoder, CPUFeatures::All()) {
+      cpu_features_auditor_(decoder, CPUFeatures::All()),
+      gcs_(kGCSNoStack),
+      gcs_enabled_(false) {
   // Ensure that shift operations act as the simulator expects.
   VIXL_ASSERT((static_cast<int32_t>(-1) >> 1) == -1);
   VIXL_ASSERT((static_cast<uint32_t>(-1) >> 1) == 0x7fffffff);
 
   // Set up a placeholder pipe for CanReadMemory.
+#ifndef _WIN32
   VIXL_CHECK(pipe(placeholder_pipe_fd_) == 0);
+#endif
 
   // Set up the decoder.
   decoder_ = decoder;
@@ -428,6 +584,9 @@
   stream_ = stream;
 
   print_disasm_ = new PrintDisassembler(stream_);
+
+  memory_.AppendMetaData(&meta_data_);
+
   // The Simulator and Disassembler share the same available list, held by the
   // auditor. The Disassembler only annotates instructions with features that
   // are _not_ available, so registering the auditor should have no effect
@@ -454,13 +613,16 @@
   guard_pages_ = false;
 
   // Initialize the common state of RNDR and RNDRRS.
-  uint16_t seed[3] = {11, 22, 33};
-  VIXL_STATIC_ASSERT(sizeof(seed) == sizeof(rand_state_));
-  memcpy(rand_state_, seed, sizeof(rand_state_));
+  uint64_t seed = (11 + (22 << 16) + (static_cast<uint64_t>(33) << 32));
+  rand_gen_.seed(seed);
 
   // Initialize all bits of pseudo predicate register to true.
   LogicPRegister ones(pregister_all_true_);
   ones.SetAllBits();
+
+  // Initialize the debugger but disable it by default.
+  SetDebuggerEnabled(false);
+  debugger_ = std::make_unique<Debugger>(this);
 }
 
 void Simulator::ResetSystemRegisters() {
@@ -526,6 +688,8 @@
   ResetPRegisters();
 
   WriteSp(memory_.GetStack().GetBase());
+  ResetGCSState();
+  EnableGCSCheck();
 
   pc_ = NULL;
   pc_modified_ = false;
@@ -533,6 +697,8 @@
   // BTI state.
   btype_ = DefaultBType;
   next_btype_ = DefaultBType;
+
+  meta_data_.ResetState();
 }
 
 void Simulator::SetVectorLengthInBits(unsigned vector_length) {
@@ -559,8 +725,13 @@
   // The decoder may outlive the simulator.
   decoder_->RemoveVisitor(print_disasm_);
   delete print_disasm_;
+#ifndef _WIN32
   close(placeholder_pipe_fd_[0]);
   close(placeholder_pipe_fd_[1]);
+#endif
+  if (IsAllocatedGCS(gcs_)) {
+    GetGCSManager().FreeStack(gcs_);
+  }
 }
 
 
@@ -569,8 +740,21 @@
   // manually-set registers are logged _before_ the first instruction.
   LogAllWrittenRegisters();
 
-  while (pc_ != kEndOfSimAddress) {
-    ExecuteInstruction();
+  if (debugger_enabled_) {
+    // Slow path to check for breakpoints only if the debugger is enabled.
+    Debugger* debugger = GetDebugger();
+    while (!IsSimulationFinished()) {
+      if (debugger->IsAtBreakpoint()) {
+        fprintf(stream_, "Debugger hit breakpoint, breaking...\n");
+        debugger->Debug();
+      } else {
+        ExecuteInstruction();
+      }
+    }
+  } else {
+    while (!IsSimulationFinished()) {
+      ExecuteInstruction();
+    }
   }
 }
 
@@ -848,6 +1032,19 @@
   return std::make_pair(sum_hi.first, sum_lo.first);
 }
 
+vixl_uint128_t Simulator::Lsl128(vixl_uint128_t x, unsigned shift) const {
+  VIXL_ASSERT(shift <= 64);
+  if (shift == 0) return x;
+  if (shift == 64) return std::make_pair(x.second, 0);
+  uint64_t lo = x.second << shift;
+  uint64_t hi = (x.first << shift) | (x.second >> (64 - shift));
+  return std::make_pair(hi, lo);
+}
+
+vixl_uint128_t Simulator::Eor128(vixl_uint128_t x, vixl_uint128_t y) const {
+  return std::make_pair(x.first ^ y.first, x.second ^ y.second);
+}
+
 vixl_uint128_t Simulator::Neg128(vixl_uint128_t x) {
   // Negate the integer value. Throw an assertion when the input is INT128_MIN.
   VIXL_ASSERT((x.first != GetSignMask(64)) || (x.second != 0));
@@ -859,11 +1056,11 @@
 vixl_uint128_t Simulator::Mul64(uint64_t x, uint64_t y) {
   bool neg_result = false;
   if ((x >> 63) == 1) {
-    x = -x;
+    x = UnsignedNegate(x);
     neg_result = !neg_result;
   }
   if ((y >> 63) == 1) {
-    y = -y;
+    y = UnsignedNegate(y);
     neg_result = !neg_result;
   }
 
@@ -882,10 +1079,25 @@
   vixl_uint128_t result = Add128(a, b);
   result = Add128(result, c);
   result = Add128(result, d);
-  return neg_result ? std::make_pair(-result.first - 1, -result.second)
+  return neg_result ? std::make_pair(UnsignedNegate(result.first) - 1,
+                                     UnsignedNegate(result.second))
                     : result;
 }
 
+vixl_uint128_t Simulator::PolynomialMult128(uint64_t op1,
+                                            uint64_t op2,
+                                            int lane_size_in_bits) const {
+  VIXL_ASSERT(static_cast<unsigned>(lane_size_in_bits) <= kDRegSize);
+  vixl_uint128_t result = std::make_pair(0, 0);
+  vixl_uint128_t op2q = std::make_pair(0, op2);
+  for (int i = 0; i < lane_size_in_bits; i++) {
+    if ((op1 >> i) & 1) {
+      result = Eor128(result, Lsl128(op2q, i));
+    }
+  }
+  return result;
+}
+
 int64_t Simulator::ShiftOperand(unsigned reg_size,
                                 uint64_t uvalue,
                                 Shift shift_type,
@@ -897,7 +1109,7 @@
     bool is_negative = (uvalue & GetSignMask(reg_size)) != 0;
     // The behavior is undefined in c++ if the shift amount greater than or
     // equal to the register lane size. Work out the shifted result based on
-    // architectural behavior before performing the c++ type shfit operations.
+    // architectural behavior before performing the c++ type shift operations.
     switch (shift_type) {
       case LSL:
         if (amount >= reg_size) {
@@ -1621,6 +1833,18 @@
   }
 }
 
+void Simulator::PrintGCS(bool is_push, uint64_t addr, size_t entry) {
+  const char* arrow = is_push ? "<-" : "->";
+  fprintf(stream_,
+          "# %sgcs0x%04" PRIx64 "[%" PRIxPTR "]: %s %s 0x%016" PRIx64 "\n",
+          clr_flag_name,
+          gcs_,
+          entry,
+          clr_normal,
+          arrow,
+          addr);
+}
+
 uint16_t Simulator::PrintPartialAccess(uint16_t access_mask,
                                        uint16_t future_access_mask,
                                        int struct_element_count,
@@ -1668,8 +1892,9 @@
   const char* sep = "";
   for (int i = struct_element_count - 1; i >= 0; i--) {
     int offset = lane_size_in_bytes * i;
-    uint64_t nibble = MemReadUint(lane_size_in_bytes, address + offset);
-    fprintf(stream_, "%s%0*" PRIx64, sep, lane_size_in_nibbles, nibble);
+    auto nibble = MemReadUint(lane_size_in_bytes, address + offset);
+    VIXL_ASSERT(nibble);
+    fprintf(stream_, "%s%0*" PRIx64, sep, lane_size_in_nibbles, *nibble);
     sep = "'";
   }
   fprintf(stream_,
@@ -1688,7 +1913,9 @@
   VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
   if ((format & kPrintRegPartial) == 0) {
-    registers_[code].NotifyRegisterLogged();
+    if (code != kZeroRegCode) {
+      registers_[code].NotifyRegisterLogged();
+    }
   }
   // Scalar-format accesses use a simple format:
   //   "# {reg}: 0x{value} -> {address}"
@@ -1898,11 +2125,31 @@
   }
 }
 
+void Simulator::PrintMemTransfer(uintptr_t dst, uintptr_t src, uint8_t value) {
+  fprintf(stream_,
+          "#               %s: %s0x%016" PRIxPTR " %s<- %s0x%02x%s",
+          clr_reg_name,
+          clr_memory_address,
+          dst,
+          clr_normal,
+          clr_reg_value,
+          value,
+          clr_normal);
+
+  fprintf(stream_,
+          " <- %s0x%016" PRIxPTR "%s\n",
+          clr_memory_address,
+          src,
+          clr_normal);
+}
+
 void Simulator::PrintRead(int rt_code,
                           PrintRegisterFormat format,
                           uintptr_t address) {
   VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
-  registers_[rt_code].NotifyRegisterLogged();
+  if (rt_code != kZeroRegCode) {
+    registers_[rt_code].NotifyRegisterLogged();
+  }
   PrintAccess(rt_code, format, "<-", address);
 }
 
@@ -1921,7 +2168,9 @@
   // For sign- and zero-extension, make it clear that the resulting register
   // value is different from what is loaded from memory.
   VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
-  registers_[rt_code].NotifyRegisterLogged();
+  if (rt_code != kZeroRegCode) {
+    registers_[rt_code].NotifyRegisterLogged();
+  }
   PrintRegister(rt_code, format);
   PrintPartialAccess(1,
                      0,
@@ -1947,7 +2196,9 @@
   // value, only print the relevant part of the value.
   format = GetPrintRegPartial(format);
   VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
-  registers_[rt_code].NotifyRegisterLogged();
+  if (rt_code != kZeroRegCode) {
+    registers_[rt_code].NotifyRegisterLogged();
+  }
   PrintAccess(rt_code, format, "->", address);
 }
 
@@ -2660,6 +2911,23 @@
   }
 }
 
+void Simulator::SimulateSVEPmull128(const Instruction* instr) {
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+  SimVRegister& zm = ReadVRegister(instr->GetRm());
+  SimVRegister& zn = ReadVRegister(instr->GetRn());
+  SimVRegister zn_temp, zm_temp;
+
+  if (form_hash_ == "pmullb_z_zz_q"_h) {
+    pack_even_elements(kFormatVnD, zn_temp, zn);
+    pack_even_elements(kFormatVnD, zm_temp, zm);
+  } else {
+    VIXL_ASSERT(form_hash_ == "pmullt_z_zz_q"_h);
+    pack_odd_elements(kFormatVnD, zn_temp, zn);
+    pack_odd_elements(kFormatVnD, zm_temp, zm);
+  }
+  pmull(kFormatVnQ, zd, zn_temp, zm_temp);
+}
+
 void Simulator::SimulateSVEIntMulLongVec(const Instruction* instr) {
   VectorFormat vform = instr->GetSVEVectorFormat();
   SimVRegister& zd = ReadVRegister(instr->GetRd());
@@ -2674,15 +2942,15 @@
 
   switch (form_hash_) {
     case "pmullb_z_zz"_h:
-      // '00' is reserved for Q-sized lane.
-      if (vform == kFormatVnB) {
+      // Size '10' is undefined.
+      if (vform == kFormatVnS) {
         VIXL_UNIMPLEMENTED();
       }
       pmull(vform, zd, zn_b, zm_b);
       break;
     case "pmullt_z_zz"_h:
-      // '00' is reserved for Q-sized lane.
-      if (vform == kFormatVnB) {
+      // Size '10' is undefined.
+      if (vform == kFormatVnS) {
         VIXL_UNIMPLEMENTED();
       }
       pmull(vform, zd, zn_t, zm_t);
@@ -3571,6 +3839,7 @@
   switch (instr->Mask(UnconditionalBranchMask)) {
     case BL:
       WriteLr(instr->GetNextInstruction());
+      GCSPush(reinterpret_cast<uint64_t>(instr->GetNextInstruction()));
       VIXL_FALLTHROUGH();
     case B:
       WritePc(instr->GetImmPCOffsetTarget());
@@ -3613,6 +3882,8 @@
 void Simulator::VisitUnconditionalBranchToRegister(const Instruction* instr) {
   bool authenticate = false;
   bool link = false;
+  bool ret = false;
+  bool compare_gcs = false;
   uint64_t addr = ReadXRegister(instr->GetRn());
   uint64_t context = 0;
 
@@ -3621,7 +3892,6 @@
       link = true;
       VIXL_FALLTHROUGH();
     case BR:
-    case RET:
       break;
 
     case BLRAAZ:
@@ -3648,15 +3918,15 @@
       authenticate = true;
       addr = ReadXRegister(kLinkRegCode);
       context = ReadXRegister(31, Reg31IsStackPointer);
+      VIXL_FALLTHROUGH();
+    case RET:
+      compare_gcs = true;
+      ret = true;
       break;
     default:
       VIXL_UNREACHABLE();
   }
 
-  if (link) {
-    WriteLr(instr->GetNextInstruction());
-  }
-
   if (authenticate) {
     PACKey key = (instr->ExtractBit(10) == 0) ? kPACKeyIA : kPACKeyIB;
     addr = AuthPAC(addr, context, key, kInstructionPointer);
@@ -3667,8 +3937,52 @@
     }
   }
 
-  WritePc(Instruction::Cast(addr));
+  if (compare_gcs) {
+    uint64_t expected_lr = GCSPeek();
+    char msg[128];
+    if (expected_lr != 0) {
+      if ((expected_lr & 0x3) != 0) {
+        snprintf(msg,
+                 sizeof(msg),
+                 "GCS contains misaligned return address: 0x%016" PRIx64 "\n",
+                 expected_lr);
+        ReportGCSFailure(msg);
+      } else if ((addr != 0) && (addr != expected_lr)) {
+        snprintf(msg,
+                 sizeof(msg),
+                 "GCS mismatch: lr = 0x%016" PRIx64 ", gcs = 0x%016" PRIx64
+                 "\n",
+                 addr,
+                 expected_lr);
+        ReportGCSFailure(msg);
+      }
+      GCSPop();
+    }
+  }
+
+  if (link) {
+    WriteLr(instr->GetNextInstruction());
+    GCSPush(reinterpret_cast<uint64_t>(instr->GetNextInstruction()));
+  }
+
+  if (!ret) {
+    // Check for interceptions to the target address, if one is found, call it.
+    MetaDataDepot::BranchInterceptionAbstract* interception =
+        meta_data_.FindBranchInterception(addr);
+
+    if (interception != nullptr) {
+      // Instead of writing the address of the function to the PC, call the
+      // function's interception directly. We change the address that will be
+      // branched to so that afterwards we continue execution from
+      // the address in the LR. Note: the interception may modify the LR so
+      // store it before calling the interception.
+      addr = ReadRegister<uint64_t>(kLinkRegCode);
+      (*interception)(this);
+    }
+  }
+
   WriteNextBType(GetBTypeFromInstruction(instr));
+  WritePc(Instruction::Cast(addr));
 }
 
 
@@ -3759,6 +4073,9 @@
 
 
 void Simulator::VisitAddSubShifted(const Instruction* instr) {
+  // Add/sub/adds/subs don't allow ROR as a shift mode.
+  VIXL_ASSERT(instr->GetShiftDP() != ROR);
+
   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
   int64_t op2 = ShiftOperand(reg_size,
                              ReadRegister(reg_size, instr->GetRm()),
@@ -3969,10 +4286,12 @@
     VIXL_ALIGNMENT_EXCEPTION();
   }
 
-  WriteRegister<T1>(rt, static_cast<T1>(MemRead<T2>(address)));
+  VIXL_DEFINE_OR_RETURN(value, MemRead<T2>(address));
+
+  WriteRegister<T1>(rt, static_cast<T1>(value));
 
   // Approximate load-acquire by issuing a full barrier after the load.
-  __sync_synchronize();
+  VIXL_SYNC();
 
   LogRead(rt, GetPrintRegisterFormat(element_size), address);
 }
@@ -3997,9 +4316,9 @@
   }
 
   // Approximate store-release by issuing a full barrier after the load.
-  __sync_synchronize();
+  VIXL_SYNC();
 
-  MemWrite<T>(address, ReadRegister<T>(rt));
+  if (!MemWrite<T>(address, ReadRegister<T>(rt))) return;
 
   LogWrite(rt, GetPrintRegisterFormat(element_size), address);
 }
@@ -4086,7 +4405,9 @@
   // Verify that the calculated address is available to the host.
   VIXL_ASSERT(address == addr_ptr);
 
-  WriteXRegister(dst, MemRead<uint64_t>(addr_ptr), NoRegLog);
+  VIXL_DEFINE_OR_RETURN(value, MemRead<uint64_t>(addr_ptr));
+
+  WriteXRegister(dst, value, NoRegLog);
   unsigned access_size = 1 << 3;
   LogRead(dst, GetPrintRegisterFormatForSize(access_size), addr_ptr);
 }
@@ -4113,93 +4434,121 @@
   int extend_to_size = 0;
   LoadStoreOp op = static_cast<LoadStoreOp>(instr->Mask(LoadStoreMask));
   switch (op) {
-    case LDRB_w:
-      WriteWRegister(srcdst, MemRead<uint8_t>(address), NoRegLog);
+    case LDRB_w: {
+      VIXL_DEFINE_OR_RETURN(value, MemRead<uint8_t>(address));
+      WriteWRegister(srcdst, value, NoRegLog);
       extend_to_size = kWRegSizeInBytes;
       break;
-    case LDRH_w:
-      WriteWRegister(srcdst, MemRead<uint16_t>(address), NoRegLog);
+    }
+    case LDRH_w: {
+      VIXL_DEFINE_OR_RETURN(value, MemRead<uint16_t>(address));
+      WriteWRegister(srcdst, value, NoRegLog);
       extend_to_size = kWRegSizeInBytes;
       break;
-    case LDR_w:
-      WriteWRegister(srcdst, MemRead<uint32_t>(address), NoRegLog);
+    }
+    case LDR_w: {
+      VIXL_DEFINE_OR_RETURN(value, MemRead<uint32_t>(address));
+      WriteWRegister(srcdst, value, NoRegLog);
       extend_to_size = kWRegSizeInBytes;
       break;
-    case LDR_x:
-      WriteXRegister(srcdst, MemRead<uint64_t>(address), NoRegLog);
+    }
+    case LDR_x: {
+      VIXL_DEFINE_OR_RETURN(value, MemRead<uint64_t>(address));
+      WriteXRegister(srcdst, value, NoRegLog);
       extend_to_size = kXRegSizeInBytes;
       break;
-    case LDRSB_w:
-      WriteWRegister(srcdst, MemRead<int8_t>(address), NoRegLog);
+    }
+    case LDRSB_w: {
+      VIXL_DEFINE_OR_RETURN(value, MemRead<int8_t>(address));
+      WriteWRegister(srcdst, value, NoRegLog);
       extend_to_size = kWRegSizeInBytes;
       break;
-    case LDRSH_w:
-      WriteWRegister(srcdst, MemRead<int16_t>(address), NoRegLog);
+    }
+    case LDRSH_w: {
+      VIXL_DEFINE_OR_RETURN(value, MemRead<int16_t>(address));
+      WriteWRegister(srcdst, value, NoRegLog);
       extend_to_size = kWRegSizeInBytes;
       break;
-    case LDRSB_x:
-      WriteXRegister(srcdst, MemRead<int8_t>(address), NoRegLog);
+    }
+    case LDRSB_x: {
+      VIXL_DEFINE_OR_RETURN(value, MemRead<int8_t>(address));
+      WriteXRegister(srcdst, value, NoRegLog);
       extend_to_size = kXRegSizeInBytes;
       break;
-    case LDRSH_x:
-      WriteXRegister(srcdst, MemRead<int16_t>(address), NoRegLog);
+    }
+    case LDRSH_x: {
+      VIXL_DEFINE_OR_RETURN(value, MemRead<int16_t>(address));
+      WriteXRegister(srcdst, value, NoRegLog);
       extend_to_size = kXRegSizeInBytes;
       break;
-    case LDRSW_x:
-      WriteXRegister(srcdst, MemRead<int32_t>(address), NoRegLog);
+    }
+    case LDRSW_x: {
+      VIXL_DEFINE_OR_RETURN(value, MemRead<int32_t>(address));
+      WriteXRegister(srcdst, value, NoRegLog);
       extend_to_size = kXRegSizeInBytes;
       break;
-    case LDR_b:
-      WriteBRegister(srcdst, MemRead<uint8_t>(address), NoRegLog);
+    }
+    case LDR_b: {
+      VIXL_DEFINE_OR_RETURN(value, MemRead<uint8_t>(address));
+      WriteBRegister(srcdst, value, NoRegLog);
       rt_is_vreg = true;
       break;
-    case LDR_h:
-      WriteHRegister(srcdst, MemRead<uint16_t>(address), NoRegLog);
+    }
+    case LDR_h: {
+      VIXL_DEFINE_OR_RETURN(value, MemRead<uint16_t>(address));
+      WriteHRegister(srcdst, value, NoRegLog);
       rt_is_vreg = true;
       break;
-    case LDR_s:
-      WriteSRegister(srcdst, MemRead<float>(address), NoRegLog);
+    }
+    case LDR_s: {
+      VIXL_DEFINE_OR_RETURN(value, MemRead<float>(address));
+      WriteSRegister(srcdst, value, NoRegLog);
       rt_is_vreg = true;
       break;
-    case LDR_d:
-      WriteDRegister(srcdst, MemRead<double>(address), NoRegLog);
+    }
+    case LDR_d: {
+      VIXL_DEFINE_OR_RETURN(value, MemRead<double>(address));
+      WriteDRegister(srcdst, value, NoRegLog);
       rt_is_vreg = true;
       break;
-    case LDR_q:
-      WriteQRegister(srcdst, MemRead<qreg_t>(address), NoRegLog);
+    }
+    case LDR_q: {
+      VIXL_DEFINE_OR_RETURN(value, MemRead<qreg_t>(address));
+      WriteQRegister(srcdst, value, NoRegLog);
       rt_is_vreg = true;
       break;
+    }
 
     case STRB_w:
-      MemWrite<uint8_t>(address, ReadWRegister(srcdst));
+      if (!MemWrite<uint8_t>(address, ReadWRegister(srcdst))) return;
       break;
     case STRH_w:
-      MemWrite<uint16_t>(address, ReadWRegister(srcdst));
+      if (!MemWrite<uint16_t>(address, ReadWRegister(srcdst))) return;
       break;
     case STR_w:
-      MemWrite<uint32_t>(address, ReadWRegister(srcdst));
+      if (!MemWrite<uint32_t>(address, ReadWRegister(srcdst))) return;
       break;
     case STR_x:
-      MemWrite<uint64_t>(address, ReadXRegister(srcdst));
+      if (!MemWrite<uint64_t>(address, ReadXRegister(srcdst))) return;
       break;
     case STR_b:
-      MemWrite<uint8_t>(address, ReadBRegister(srcdst));
+      if (!MemWrite<uint8_t>(address, ReadBRegister(srcdst))) return;
       rt_is_vreg = true;
       break;
     case STR_h:
-      MemWrite<uint16_t>(address, ReadHRegisterBits(srcdst));
+      if (!MemWrite<uint16_t>(address, ReadHRegisterBits(srcdst))) return;
       rt_is_vreg = true;
       break;
     case STR_s:
-      MemWrite<float>(address, ReadSRegister(srcdst));
+      if (!MemWrite<float>(address, ReadSRegister(srcdst))) return;
       rt_is_vreg = true;
       break;
     case STR_d:
-      MemWrite<double>(address, ReadDRegister(srcdst));
+      if (!MemWrite<double>(address, ReadDRegister(srcdst))) return;
       rt_is_vreg = true;
       break;
     case STR_q:
-      MemWrite<qreg_t>(address, ReadQRegister(srcdst));
+      if (!MemWrite<qreg_t>(address, ReadQRegister(srcdst))) return;
       rt_is_vreg = true;
       break;
 
@@ -4280,64 +4629,76 @@
     // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_FP_REGS). We
     // will print a more detailed log.
     case LDP_w: {
-      WriteWRegister(rt, MemRead<uint32_t>(address), NoRegLog);
-      WriteWRegister(rt2, MemRead<uint32_t>(address2), NoRegLog);
+      VIXL_DEFINE_OR_RETURN(value, MemRead<uint32_t>(address));
+      VIXL_DEFINE_OR_RETURN(value2, MemRead<uint32_t>(address2));
+      WriteWRegister(rt, value, NoRegLog);
+      WriteWRegister(rt2, value2, NoRegLog);
       break;
     }
     case LDP_s: {
-      WriteSRegister(rt, MemRead<float>(address), NoRegLog);
-      WriteSRegister(rt2, MemRead<float>(address2), NoRegLog);
+      VIXL_DEFINE_OR_RETURN(value, MemRead<float>(address));
+      VIXL_DEFINE_OR_RETURN(value2, MemRead<float>(address2));
+      WriteSRegister(rt, value, NoRegLog);
+      WriteSRegister(rt2, value2, NoRegLog);
       rt_is_vreg = true;
       break;
     }
     case LDP_x: {
-      WriteXRegister(rt, MemRead<uint64_t>(address), NoRegLog);
-      WriteXRegister(rt2, MemRead<uint64_t>(address2), NoRegLog);
+      VIXL_DEFINE_OR_RETURN(value, MemRead<uint64_t>(address));
+      VIXL_DEFINE_OR_RETURN(value2, MemRead<uint64_t>(address2));
+      WriteXRegister(rt, value, NoRegLog);
+      WriteXRegister(rt2, value2, NoRegLog);
       break;
     }
     case LDP_d: {
-      WriteDRegister(rt, MemRead<double>(address), NoRegLog);
-      WriteDRegister(rt2, MemRead<double>(address2), NoRegLog);
+      VIXL_DEFINE_OR_RETURN(value, MemRead<double>(address));
+      VIXL_DEFINE_OR_RETURN(value2, MemRead<double>(address2));
+      WriteDRegister(rt, value, NoRegLog);
+      WriteDRegister(rt2, value2, NoRegLog);
       rt_is_vreg = true;
       break;
     }
     case LDP_q: {
-      WriteQRegister(rt, MemRead<qreg_t>(address), NoRegLog);
-      WriteQRegister(rt2, MemRead<qreg_t>(address2), NoRegLog);
+      VIXL_DEFINE_OR_RETURN(value, MemRead<qreg_t>(address));
+      VIXL_DEFINE_OR_RETURN(value2, MemRead<qreg_t>(address2));
+      WriteQRegister(rt, value, NoRegLog);
+      WriteQRegister(rt2, value2, NoRegLog);
       rt_is_vreg = true;
       break;
     }
     case LDPSW_x: {
-      WriteXRegister(rt, MemRead<int32_t>(address), NoRegLog);
-      WriteXRegister(rt2, MemRead<int32_t>(address2), NoRegLog);
+      VIXL_DEFINE_OR_RETURN(value, MemRead<int32_t>(address));
+      VIXL_DEFINE_OR_RETURN(value2, MemRead<int32_t>(address2));
+      WriteXRegister(rt, value, NoRegLog);
+      WriteXRegister(rt2, value2, NoRegLog);
       sign_extend = true;
       break;
     }
     case STP_w: {
-      MemWrite<uint32_t>(address, ReadWRegister(rt));
-      MemWrite<uint32_t>(address2, ReadWRegister(rt2));
+      if (!MemWrite<uint32_t>(address, ReadWRegister(rt))) return;
+      if (!MemWrite<uint32_t>(address2, ReadWRegister(rt2))) return;
       break;
     }
     case STP_s: {
-      MemWrite<float>(address, ReadSRegister(rt));
-      MemWrite<float>(address2, ReadSRegister(rt2));
+      if (!MemWrite<float>(address, ReadSRegister(rt))) return;
+      if (!MemWrite<float>(address2, ReadSRegister(rt2))) return;
       rt_is_vreg = true;
       break;
     }
     case STP_x: {
-      MemWrite<uint64_t>(address, ReadXRegister(rt));
-      MemWrite<uint64_t>(address2, ReadXRegister(rt2));
+      if (!MemWrite<uint64_t>(address, ReadXRegister(rt))) return;
+      if (!MemWrite<uint64_t>(address2, ReadXRegister(rt2))) return;
       break;
     }
     case STP_d: {
-      MemWrite<double>(address, ReadDRegister(rt));
-      MemWrite<double>(address2, ReadDRegister(rt2));
+      if (!MemWrite<double>(address, ReadDRegister(rt))) return;
+      if (!MemWrite<double>(address2, ReadDRegister(rt2))) return;
       rt_is_vreg = true;
       break;
     }
     case STP_q: {
-      MemWrite<qreg_t>(address, ReadQRegister(rt));
-      MemWrite<qreg_t>(address2, ReadQRegister(rt2));
+      if (!MemWrite<qreg_t>(address, ReadQRegister(rt))) return;
+      if (!MemWrite<qreg_t>(address2, ReadQRegister(rt2))) return;
       rt_is_vreg = true;
       break;
     }
@@ -4397,18 +4758,19 @@
   // associated with that location, even if the compare subsequently fails.
   local_monitor_.Clear();
 
-  T data = MemRead<T>(address);
+  VIXL_DEFINE_OR_RETURN(data, MemRead<T>(address));
+
   if (is_acquire) {
     // Approximate load-acquire by issuing a full barrier after the load.
-    __sync_synchronize();
+    VIXL_SYNC();
   }
 
   if (data == comparevalue) {
     if (is_release) {
       // Approximate store-release by issuing a full barrier before the store.
-      __sync_synchronize();
+      VIXL_SYNC();
     }
-    MemWrite<T>(address, newvalue);
+    if (!MemWrite<T>(address, newvalue)) return;
     LogWrite(rt, GetPrintRegisterFormatForSize(element_size), address);
   }
   WriteRegister<T>(rs, data, NoRegLog);
@@ -4444,12 +4806,12 @@
   // associated with that location, even if the compare subsequently fails.
   local_monitor_.Clear();
 
-  T data_low = MemRead<T>(address);
-  T data_high = MemRead<T>(address2);
+  VIXL_DEFINE_OR_RETURN(data_low, MemRead<T>(address));
+  VIXL_DEFINE_OR_RETURN(data_high, MemRead<T>(address2));
 
   if (is_acquire) {
     // Approximate load-acquire by issuing a full barrier after the load.
-    __sync_synchronize();
+    VIXL_SYNC();
   }
 
   bool same =
@@ -4457,11 +4819,11 @@
   if (same) {
     if (is_release) {
       // Approximate store-release by issuing a full barrier before the store.
-      __sync_synchronize();
+      VIXL_SYNC();
     }
 
-    MemWrite<T>(address, newvalue_low);
-    MemWrite<T>(address2, newvalue_high);
+    if (!MemWrite<T>(address, newvalue_low)) return;
+    if (!MemWrite<T>(address2, newvalue_high)) return;
   }
 
   WriteRegister<T>(rs + 1, data_high, NoRegLog);
@@ -4478,6 +4840,7 @@
 }
 
 bool Simulator::CanReadMemory(uintptr_t address, size_t size) {
+#ifndef _WIN32
   // To simulate fault-tolerant loads, we need to know what host addresses we
   // can access without generating a real fault. One way to do that is to
   // attempt to `write()` the memory to a placeholder pipe[1]. This is more
@@ -4535,6 +4898,44 @@
   }
 
   return can_read;
+#else
+  // To simulate fault-tolerant loads, we need to know what host addresses we
+  // can access without generating a real fault
+  // The pipe code above is almost but not fully compatible with Windows
+  // Instead, use the platform specific API VirtualQuery()
+  //
+  // [2]: https://stackoverflow.com/a/18395247/9109981
+
+  bool can_read = true;
+  MEMORY_BASIC_INFORMATION pageInfo;
+
+  size_t checked = 0;
+  while (can_read && (checked < size)) {
+    size_t result = VirtualQuery(reinterpret_cast<void*>(address + checked),
+                                 &pageInfo,
+                                 sizeof(pageInfo));
+
+    if (result < 0) {
+      can_read = false;
+      break;
+    }
+
+    if (pageInfo.State != MEM_COMMIT) {
+      can_read = false;
+      break;
+    }
+
+    if (pageInfo.Protect == PAGE_NOACCESS || pageInfo.Protect == PAGE_EXECUTE) {
+      can_read = false;
+      break;
+    }
+    checked += pageInfo.RegionSize -
+               ((address + checked) -
+                reinterpret_cast<uintptr_t>(pageInfo.BaseAddress));
+  }
+
+  return can_read;
+#endif
 }
 
 void Simulator::PrintExclusiveAccessWarning() {
@@ -4628,54 +5029,66 @@
           case LDXRB_w:
           case LDAXRB_w:
           case LDARB_w:
-          case LDLARB:
-            WriteWRegister(rt, MemRead<uint8_t>(address), NoRegLog);
+          case LDLARB: {
+            VIXL_DEFINE_OR_RETURN(value, MemRead<uint8_t>(address));
+            WriteWRegister(rt, value, NoRegLog);
             reg_size = kWRegSizeInBytes;
             break;
+          }
           case LDXRH_w:
           case LDAXRH_w:
           case LDARH_w:
-          case LDLARH:
-            WriteWRegister(rt, MemRead<uint16_t>(address), NoRegLog);
+          case LDLARH: {
+            VIXL_DEFINE_OR_RETURN(value, MemRead<uint16_t>(address));
+            WriteWRegister(rt, value, NoRegLog);
             reg_size = kWRegSizeInBytes;
             break;
+          }
           case LDXR_w:
           case LDAXR_w:
           case LDAR_w:
-          case LDLAR_w:
-            WriteWRegister(rt, MemRead<uint32_t>(address), NoRegLog);
+          case LDLAR_w: {
+            VIXL_DEFINE_OR_RETURN(value, MemRead<uint32_t>(address));
+            WriteWRegister(rt, value, NoRegLog);
             reg_size = kWRegSizeInBytes;
             break;
+          }
           case LDXR_x:
           case LDAXR_x:
           case LDAR_x:
-          case LDLAR_x:
-            WriteXRegister(rt, MemRead<uint64_t>(address), NoRegLog);
+          case LDLAR_x: {
+            VIXL_DEFINE_OR_RETURN(value, MemRead<uint64_t>(address));
+            WriteXRegister(rt, value, NoRegLog);
             reg_size = kXRegSizeInBytes;
             break;
+          }
           case LDXP_w:
-          case LDAXP_w:
-            WriteWRegister(rt, MemRead<uint32_t>(address), NoRegLog);
-            WriteWRegister(rt2,
-                           MemRead<uint32_t>(address + element_size),
-                           NoRegLog);
+          case LDAXP_w: {
+            VIXL_DEFINE_OR_RETURN(value, MemRead<uint32_t>(address));
+            VIXL_DEFINE_OR_RETURN(value2,
+                                  MemRead<uint32_t>(address + element_size));
+            WriteWRegister(rt, value, NoRegLog);
+            WriteWRegister(rt2, value2, NoRegLog);
             reg_size = kWRegSizeInBytes;
             break;
+          }
           case LDXP_x:
-          case LDAXP_x:
-            WriteXRegister(rt, MemRead<uint64_t>(address), NoRegLog);
-            WriteXRegister(rt2,
-                           MemRead<uint64_t>(address + element_size),
-                           NoRegLog);
+          case LDAXP_x: {
+            VIXL_DEFINE_OR_RETURN(value, MemRead<uint64_t>(address));
+            VIXL_DEFINE_OR_RETURN(value2,
+                                  MemRead<uint64_t>(address + element_size));
+            WriteXRegister(rt, value, NoRegLog);
+            WriteXRegister(rt2, value2, NoRegLog);
             reg_size = kXRegSizeInBytes;
             break;
+          }
           default:
             VIXL_UNREACHABLE();
         }
 
         if (is_acquire_release) {
           // Approximate load-acquire by issuing a full barrier after the load.
-          __sync_synchronize();
+          VIXL_SYNC();
         }
 
         PrintRegisterFormat format = GetPrintRegisterFormatForSize(reg_size);
@@ -4687,7 +5100,7 @@
         if (is_acquire_release) {
           // Approximate store-release by issuing a full barrier before the
           // store.
-          __sync_synchronize();
+          VIXL_SYNC();
         }
 
         bool do_store = true;
@@ -4709,35 +5122,41 @@
             case STLXRB_w:
             case STLRB_w:
             case STLLRB:
-              MemWrite<uint8_t>(address, ReadWRegister(rt));
+              if (!MemWrite<uint8_t>(address, ReadWRegister(rt))) return;
               break;
             case STXRH_w:
             case STLXRH_w:
             case STLRH_w:
             case STLLRH:
-              MemWrite<uint16_t>(address, ReadWRegister(rt));
+              if (!MemWrite<uint16_t>(address, ReadWRegister(rt))) return;
               break;
             case STXR_w:
             case STLXR_w:
             case STLR_w:
             case STLLR_w:
-              MemWrite<uint32_t>(address, ReadWRegister(rt));
+              if (!MemWrite<uint32_t>(address, ReadWRegister(rt))) return;
               break;
             case STXR_x:
             case STLXR_x:
             case STLR_x:
             case STLLR_x:
-              MemWrite<uint64_t>(address, ReadXRegister(rt));
+              if (!MemWrite<uint64_t>(address, ReadXRegister(rt))) return;
               break;
             case STXP_w:
             case STLXP_w:
-              MemWrite<uint32_t>(address, ReadWRegister(rt));
-              MemWrite<uint32_t>(address + element_size, ReadWRegister(rt2));
+              if (!MemWrite<uint32_t>(address, ReadWRegister(rt))) return;
+              if (!MemWrite<uint32_t>(address + element_size,
+                                      ReadWRegister(rt2))) {
+                return;
+              }
               break;
             case STXP_x:
             case STLXP_x:
-              MemWrite<uint64_t>(address, ReadXRegister(rt));
-              MemWrite<uint64_t>(address + element_size, ReadXRegister(rt2));
+              if (!MemWrite<uint64_t>(address, ReadXRegister(rt))) return;
+              if (!MemWrite<uint64_t>(address + element_size,
+                                      ReadXRegister(rt2))) {
+                return;
+              }
               break;
             default:
               VIXL_UNREACHABLE();
@@ -4770,11 +5189,11 @@
 
   T value = ReadRegister<T>(rs);
 
-  T data = MemRead<T>(address);
+  VIXL_DEFINE_OR_RETURN(data, MemRead<T>(address));
 
   if (is_acquire) {
     // Approximate load-acquire by issuing a full barrier after the load.
-    __sync_synchronize();
+    VIXL_SYNC();
   }
 
   T result = 0;
@@ -4808,7 +5227,7 @@
 
   if (is_release) {
     // Approximate store-release by issuing a full barrier before the store.
-    __sync_synchronize();
+    VIXL_SYNC();
   }
 
   WriteRegister<T>(rt, data, NoRegLog);
@@ -4820,7 +5239,7 @@
   PrintRegisterFormat format = GetPrintRegisterFormatForSize(register_size);
   LogExtendingRead(rt, format, element_size, address);
 
-  MemWrite<T>(address, result);
+  if (!MemWrite<T>(address, result)) return;
   format = GetPrintRegisterFormatForSize(element_size);
   LogWrite(rs, format, address);
 }
@@ -4839,17 +5258,18 @@
 
   CheckIsValidUnalignedAtomicAccess(rn, address, element_size);
 
-  T data = MemRead<T>(address);
+  VIXL_DEFINE_OR_RETURN(data, MemRead<T>(address));
+
   if (is_acquire) {
     // Approximate load-acquire by issuing a full barrier after the load.
-    __sync_synchronize();
+    VIXL_SYNC();
   }
 
   if (is_release) {
     // Approximate store-release by issuing a full barrier before the store.
-    __sync_synchronize();
+    VIXL_SYNC();
   }
-  MemWrite<T>(address, ReadRegister<T>(rs));
+  if (!MemWrite<T>(address, ReadRegister<T>(rs))) return;
 
   WriteRegister<T>(rt, data);
 
@@ -4868,10 +5288,12 @@
 
   CheckIsValidUnalignedAtomicAccess(rn, address, element_size);
 
-  WriteRegister<T>(rt, MemRead<T>(address));
+  VIXL_DEFINE_OR_RETURN(value, MemRead<T>(address));
+
+  WriteRegister<T>(rt, value);
 
   // Approximate load-acquire by issuing a full barrier after the load.
-  __sync_synchronize();
+  VIXL_SYNC();
 
   LogRead(rt, GetPrintRegisterFormatForSize(element_size), address);
 }
@@ -4936,7 +5358,7 @@
     ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_x)
       AtomicMemorySimpleHelper<int64_t>(instr);
       break;
-    // clang-format on
+      // clang-format on
 
     case SWPB:
     case SWPAB:
@@ -4988,30 +5410,42 @@
   switch (instr->Mask(LoadLiteralMask)) {
     // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_VREGS), then
     // print a more detailed log.
-    case LDR_w_lit:
-      WriteWRegister(rt, MemRead<uint32_t>(address), NoRegLog);
+    case LDR_w_lit: {
+      VIXL_DEFINE_OR_RETURN(value, MemRead<uint32_t>(address));
+      WriteWRegister(rt, value, NoRegLog);
       LogRead(rt, kPrintWReg, address);
       break;
-    case LDR_x_lit:
-      WriteXRegister(rt, MemRead<uint64_t>(address), NoRegLog);
+    }
+    case LDR_x_lit: {
+      VIXL_DEFINE_OR_RETURN(value, MemRead<uint64_t>(address));
+      WriteXRegister(rt, value, NoRegLog);
       LogRead(rt, kPrintXReg, address);
       break;
-    case LDR_s_lit:
-      WriteSRegister(rt, MemRead<float>(address), NoRegLog);
+    }
+    case LDR_s_lit: {
+      VIXL_DEFINE_OR_RETURN(value, MemRead<float>(address));
+      WriteSRegister(rt, value, NoRegLog);
       LogVRead(rt, kPrintSRegFP, address);
       break;
-    case LDR_d_lit:
-      WriteDRegister(rt, MemRead<double>(address), NoRegLog);
+    }
+    case LDR_d_lit: {
+      VIXL_DEFINE_OR_RETURN(value, MemRead<double>(address));
+      WriteDRegister(rt, value, NoRegLog);
       LogVRead(rt, kPrintDRegFP, address);
       break;
-    case LDR_q_lit:
-      WriteQRegister(rt, MemRead<qreg_t>(address), NoRegLog);
+    }
+    case LDR_q_lit: {
+      VIXL_DEFINE_OR_RETURN(value, MemRead<qreg_t>(address));
+      WriteQRegister(rt, value, NoRegLog);
       LogVRead(rt, kPrintReg1Q, address);
       break;
-    case LDRSW_x_lit:
-      WriteXRegister(rt, MemRead<int32_t>(address), NoRegLog);
+    }
+    case LDRSW_x_lit: {
+      VIXL_DEFINE_OR_RETURN(value, MemRead<int32_t>(address));
+      WriteXRegister(rt, value, NoRegLog);
       LogExtendingRead(rt, kPrintXReg, kWRegSizeInBytes, address);
       break;
+    }
 
     // Ignore prfm hint instructions.
     case PRFM_lit:
@@ -5120,7 +5554,7 @@
         break;
       case CSNEG_w:
       case CSNEG_x:
-        new_val = -new_val;
+        new_val = UnsignedNegate(new_val);
         break;
       default:
         VIXL_UNIMPLEMENTED();
@@ -5131,112 +5565,102 @@
 }
 
 
-#define PAUTH_MODES_REGISTER_CONTEXT(V) \
-  V(IA, kPACKeyIA, kInstructionPointer) \
-  V(IB, kPACKeyIB, kInstructionPointer) \
-  V(DA, kPACKeyDA, kDataPointer)        \
-  V(DB, kPACKeyDB, kDataPointer)
-
-#define PAUTH_MODES_ZERO_CONTEXT(V)      \
-  V(IZA, kPACKeyIA, kInstructionPointer) \
-  V(IZB, kPACKeyIB, kInstructionPointer) \
-  V(DZA, kPACKeyDA, kDataPointer)        \
-  V(DZB, kPACKeyDB, kDataPointer)
+#define PAUTH_MODES_REGISTER_CONTEXT(V)   \
+  V(i, a, kPACKeyIA, kInstructionPointer) \
+  V(i, b, kPACKeyIB, kInstructionPointer) \
+  V(d, a, kPACKeyDA, kDataPointer)        \
+  V(d, b, kPACKeyDB, kDataPointer)
 
 void Simulator::VisitDataProcessing1Source(const Instruction* instr) {
   unsigned dst = instr->GetRd();
   unsigned src = instr->GetRn();
+  Reg31Mode r31_pac = Reg31IsStackPointer;
 
-  switch (instr->Mask(DataProcessing1SourceMask)) {
-#define DEFINE_PAUTH_FUNCS(SUFFIX, KEY, D)          \
-  case PAC##SUFFIX: {                               \
-    uint64_t mod = ReadXRegister(src);              \
+  switch (form_hash_) {
+#define DEFINE_PAUTH_FUNCS(SUF0, SUF1, KEY, D)      \
+  case "pac" #SUF0 "z" #SUF1 "_64z_dp_1src"_h:      \
+    VIXL_ASSERT(src == kZeroRegCode);               \
+    r31_pac = Reg31IsZeroRegister;                  \
+    VIXL_FALLTHROUGH();                             \
+  case "pac" #SUF0 #SUF1 "_64p_dp_1src"_h: {        \
+    uint64_t mod = ReadXRegister(src, r31_pac);     \
     uint64_t ptr = ReadXRegister(dst);              \
     WriteXRegister(dst, AddPAC(ptr, mod, KEY, D));  \
     break;                                          \
   }                                                 \
-  case AUT##SUFFIX: {                               \
-    uint64_t mod = ReadXRegister(src);              \
+  case "aut" #SUF0 "z" #SUF1 "_64z_dp_1src"_h:      \
+    VIXL_ASSERT(src == kZeroRegCode);               \
+    r31_pac = Reg31IsZeroRegister;                  \
+    VIXL_FALLTHROUGH();                             \
+  case "aut" #SUF0 #SUF1 "_64p_dp_1src"_h: {        \
+    uint64_t mod = ReadXRegister(src, r31_pac);     \
     uint64_t ptr = ReadXRegister(dst);              \
     WriteXRegister(dst, AuthPAC(ptr, mod, KEY, D)); \
     break;                                          \
   }
-
     PAUTH_MODES_REGISTER_CONTEXT(DEFINE_PAUTH_FUNCS)
 #undef DEFINE_PAUTH_FUNCS
 
-#define DEFINE_PAUTH_FUNCS(SUFFIX, KEY, D)          \
-  case PAC##SUFFIX: {                               \
-    if (src != kZeroRegCode) {                      \
-      VIXL_UNIMPLEMENTED();                         \
-    }                                               \
-    uint64_t ptr = ReadXRegister(dst);              \
-    WriteXRegister(dst, AddPAC(ptr, 0x0, KEY, D));  \
-    break;                                          \
-  }                                                 \
-  case AUT##SUFFIX: {                               \
-    if (src != kZeroRegCode) {                      \
-      VIXL_UNIMPLEMENTED();                         \
-    }                                               \
-    uint64_t ptr = ReadXRegister(dst);              \
-    WriteXRegister(dst, AuthPAC(ptr, 0x0, KEY, D)); \
-    break;                                          \
-  }
-
-    PAUTH_MODES_ZERO_CONTEXT(DEFINE_PAUTH_FUNCS)
-#undef DEFINE_PAUTH_FUNCS
-
-    case XPACI:
-      if (src != kZeroRegCode) {
-        VIXL_UNIMPLEMENTED();
-      }
+    case "xpaci_64z_dp_1src"_h:
       WriteXRegister(dst, StripPAC(ReadXRegister(dst), kInstructionPointer));
       break;
-    case XPACD:
-      if (src != kZeroRegCode) {
-        VIXL_UNIMPLEMENTED();
-      }
+    case "xpacd_64z_dp_1src"_h:
       WriteXRegister(dst, StripPAC(ReadXRegister(dst), kDataPointer));
       break;
-    case RBIT_w:
+    case "rbit_32_dp_1src"_h:
       WriteWRegister(dst, ReverseBits(ReadWRegister(src)));
       break;
-    case RBIT_x:
+    case "rbit_64_dp_1src"_h:
       WriteXRegister(dst, ReverseBits(ReadXRegister(src)));
       break;
-    case REV16_w:
+    case "rev16_32_dp_1src"_h:
       WriteWRegister(dst, ReverseBytes(ReadWRegister(src), 1));
       break;
-    case REV16_x:
+    case "rev16_64_dp_1src"_h:
       WriteXRegister(dst, ReverseBytes(ReadXRegister(src), 1));
       break;
-    case REV_w:
+    case "rev_32_dp_1src"_h:
       WriteWRegister(dst, ReverseBytes(ReadWRegister(src), 2));
       break;
-    case REV32_x:
+    case "rev32_64_dp_1src"_h:
       WriteXRegister(dst, ReverseBytes(ReadXRegister(src), 2));
       break;
-    case REV_x:
+    case "rev_64_dp_1src"_h:
       WriteXRegister(dst, ReverseBytes(ReadXRegister(src), 3));
       break;
-    case CLZ_w:
+    case "clz_32_dp_1src"_h:
       WriteWRegister(dst, CountLeadingZeros(ReadWRegister(src)));
       break;
-    case CLZ_x:
+    case "clz_64_dp_1src"_h:
       WriteXRegister(dst, CountLeadingZeros(ReadXRegister(src)));
       break;
-    case CLS_w:
+    case "cls_32_dp_1src"_h:
       WriteWRegister(dst, CountLeadingSignBits(ReadWRegister(src)));
       break;
-    case CLS_x:
+    case "cls_64_dp_1src"_h:
       WriteXRegister(dst, CountLeadingSignBits(ReadXRegister(src)));
       break;
-    default:
-      VIXL_UNIMPLEMENTED();
+    case "abs_32_dp_1src"_h:
+      WriteWRegister(dst, Abs(ReadWRegister(src)));
+      break;
+    case "abs_64_dp_1src"_h:
+      WriteXRegister(dst, Abs(ReadXRegister(src)));
+      break;
+    case "cnt_32_dp_1src"_h:
+      WriteWRegister(dst, CountSetBits(ReadWRegister(src)));
+      break;
+    case "cnt_64_dp_1src"_h:
+      WriteXRegister(dst, CountSetBits(ReadXRegister(src)));
+      break;
+    case "ctz_32_dp_1src"_h:
+      WriteWRegister(dst, CountTrailingZeros(ReadWRegister(src)));
+      break;
+    case "ctz_64_dp_1src"_h:
+      WriteXRegister(dst, CountTrailingZeros(ReadXRegister(src)));
+      break;
   }
 }
 
-
 uint32_t Simulator::Poly32Mod2(unsigned n, uint64_t data, uint32_t poly) {
   VIXL_ASSERT((n > 32) && (n <= 64));
   for (unsigned i = (n - 1); i >= 32; i--) {
@@ -5413,6 +5837,81 @@
   WriteRegister(reg_size, instr->GetRd(), result);
 }
 
+void Simulator::SimulateSignedMinMax(const Instruction* instr) {
+  int32_t wn = ReadWRegister(instr->GetRn());
+  int32_t wm = ReadWRegister(instr->GetRm());
+  int64_t xn = ReadXRegister(instr->GetRn());
+  int64_t xm = ReadXRegister(instr->GetRm());
+  int32_t imm = instr->ExtractSignedBits(17, 10);
+  int dst = instr->GetRd();
+
+  switch (form_hash_) {
+    case "smax_64_minmax_imm"_h:
+    case "smin_64_minmax_imm"_h:
+      xm = imm;
+      break;
+    case "smax_32_minmax_imm"_h:
+    case "smin_32_minmax_imm"_h:
+      wm = imm;
+      break;
+  }
+
+  switch (form_hash_) {
+    case "smax_32_minmax_imm"_h:
+    case "smax_32_dp_2src"_h:
+      WriteWRegister(dst, std::max(wn, wm));
+      break;
+    case "smax_64_minmax_imm"_h:
+    case "smax_64_dp_2src"_h:
+      WriteXRegister(dst, std::max(xn, xm));
+      break;
+    case "smin_32_minmax_imm"_h:
+    case "smin_32_dp_2src"_h:
+      WriteWRegister(dst, std::min(wn, wm));
+      break;
+    case "smin_64_minmax_imm"_h:
+    case "smin_64_dp_2src"_h:
+      WriteXRegister(dst, std::min(xn, xm));
+      break;
+  }
+}
+
+void Simulator::SimulateUnsignedMinMax(const Instruction* instr) {
+  uint64_t xn = ReadXRegister(instr->GetRn());
+  uint64_t xm = ReadXRegister(instr->GetRm());
+  uint32_t imm = instr->ExtractBits(17, 10);
+  int dst = instr->GetRd();
+
+  switch (form_hash_) {
+    case "umax_64u_minmax_imm"_h:
+    case "umax_32u_minmax_imm"_h:
+    case "umin_64u_minmax_imm"_h:
+    case "umin_32u_minmax_imm"_h:
+      xm = imm;
+      break;
+  }
+
+  switch (form_hash_) {
+    case "umax_32u_minmax_imm"_h:
+    case "umax_32_dp_2src"_h:
+      xn &= 0xffff'ffff;
+      xm &= 0xffff'ffff;
+      VIXL_FALLTHROUGH();
+    case "umax_64u_minmax_imm"_h:
+    case "umax_64_dp_2src"_h:
+      WriteXRegister(dst, std::max(xn, xm));
+      break;
+    case "umin_32u_minmax_imm"_h:
+    case "umin_32_dp_2src"_h:
+      xn &= 0xffff'ffff;
+      xm &= 0xffff'ffff;
+      VIXL_FALLTHROUGH();
+    case "umin_64u_minmax_imm"_h:
+    case "umin_64_dp_2src"_h:
+      WriteXRegister(dst, std::min(xn, xm));
+      break;
+  }
+}
 
 void Simulator::VisitDataProcessing3Source(const Instruction* instr) {
   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
@@ -5775,6 +6274,8 @@
       WriteDRegisterBits(dst, ReadXRegister(src));
       break;
     case FMOV_d1_x:
+      // Zero bits beyond the MSB of a Q register.
+      mov(kFormat16B, ReadVRegister(dst), ReadVRegister(dst));
       LogicVRegister(ReadVRegister(dst))
           .SetUint(kFormatD, 1, ReadXRegister(src));
       break;
@@ -6425,73 +6926,188 @@
 }
 
 
-void Simulator::SysOp_W(int op, int64_t val) {
+bool Simulator::SysOp_W(int op, int64_t val) {
   switch (op) {
     case IVAU:
     case CVAC:
     case CVAU:
     case CVAP:
     case CVADP:
-    case CIVAC: {
+    case CIVAC:
+    case CGVAC:
+    case CGDVAC:
+    case CGVAP:
+    case CGDVAP:
+    case CIGVAC:
+    case CIGDVAC: {
       // Perform a placeholder memory access to ensure that we have read access
-      // to the specified address.
-      volatile uint8_t y = MemRead<uint8_t>(val);
+      // to the specified address. The read access does not require a tag match,
+      // so temporarily disable MTE.
+      bool mte_enabled = MetaDataDepot::MetaDataMTE::IsActive();
+      MetaDataDepot::MetaDataMTE::SetActive(false);
+      volatile uint8_t y = *MemRead<uint8_t>(val);
+      MetaDataDepot::MetaDataMTE::SetActive(mte_enabled);
       USE(y);
-      // TODO: Implement "case ZVA:".
       break;
     }
+    case ZVA: {
+      if ((dczid_ & 0x10) != 0) {  // Check dc zva is enabled.
+        return false;
+      }
+      int blocksize = (1 << (dczid_ & 0xf)) * kWRegSizeInBytes;
+      VIXL_ASSERT(IsMultiple(blocksize, sizeof(uint64_t)));
+      uintptr_t addr = AlignDown(val, blocksize);
+      for (int i = 0; i < blocksize; i += sizeof(uint64_t)) {
+        MemWrite<uint64_t>(addr + i, 0);
+        LogWriteU64(0, addr + i);
+      }
+      break;
+    }
+    // TODO: Implement GVA, GZVA.
     default:
       VIXL_UNIMPLEMENTED();
+      return false;
   }
+  return true;
 }
 
+void Simulator::PACHelper(int dst,
+                          int src,
+                          PACKey key,
+                          decltype(&Simulator::AddPAC) pac_fn) {
+  VIXL_ASSERT((dst == 17) || (dst == 30));
+  VIXL_ASSERT((src == -1) || (src == 16) || (src == 31));
 
-// clang-format off
-#define PAUTH_SYSTEM_MODES(V)                                     \
-  V(A1716, 17, ReadXRegister(16),                      kPACKeyIA) \
-  V(B1716, 17, ReadXRegister(16),                      kPACKeyIB) \
-  V(AZ,    30, 0x00000000,                             kPACKeyIA) \
-  V(BZ,    30, 0x00000000,                             kPACKeyIB) \
-  V(ASP,   30, ReadXRegister(31, Reg31IsStackPointer), kPACKeyIA) \
-  V(BSP,   30, ReadXRegister(31, Reg31IsStackPointer), kPACKeyIB)
-// clang-format on
-
+  uint64_t modifier = (src == -1) ? 0 : ReadXRegister(src, Reg31IsStackPointer);
+  uint64_t result =
+      (this->*pac_fn)(ReadXRegister(dst), modifier, key, kInstructionPointer);
+  WriteXRegister(dst, result);
+}
 
 void Simulator::VisitSystem(const Instruction* instr) {
-  // Some system instructions hijack their Op and Cp fields to represent a
-  // range of immediates instead of indicating a different instruction. This
-  // makes the decoding tricky.
-  if (instr->GetInstructionBits() == XPACLRI) {
-    WriteXRegister(30, StripPAC(ReadXRegister(30), kInstructionPointer));
-  } else if (instr->Mask(SystemPStateFMask) == SystemPStateFixed) {
-    switch (instr->Mask(SystemPStateMask)) {
-      case CFINV:
-        ReadNzcv().SetC(!ReadC());
-        break;
-      case AXFLAG:
-        ReadNzcv().SetN(0);
-        ReadNzcv().SetZ(ReadNzcv().GetZ() | ReadNzcv().GetV());
-        ReadNzcv().SetC(ReadNzcv().GetC() & ~ReadNzcv().GetV());
-        ReadNzcv().SetV(0);
-        break;
-      case XAFLAG: {
-        // Can't set the flags in place due to the logical dependencies.
-        uint32_t n = (~ReadNzcv().GetC() & ~ReadNzcv().GetZ()) & 1;
-        uint32_t z = ReadNzcv().GetZ() & ReadNzcv().GetC();
-        uint32_t c = ReadNzcv().GetC() | ReadNzcv().GetZ();
-        uint32_t v = ~ReadNzcv().GetC() & ReadNzcv().GetZ();
-        ReadNzcv().SetN(n);
-        ReadNzcv().SetZ(z);
-        ReadNzcv().SetC(c);
-        ReadNzcv().SetV(v);
-        break;
-      }
+  PACKey pac_key = kPACKeyIA;  // Default key for PAC/AUTH handling.
+
+  switch (form_hash_) {
+    case "cfinv_m_pstate"_h:
+      ReadNzcv().SetC(!ReadC());
+      break;
+    case "axflag_m_pstate"_h:
+      ReadNzcv().SetN(0);
+      ReadNzcv().SetZ(ReadNzcv().GetZ() | ReadNzcv().GetV());
+      ReadNzcv().SetC(ReadNzcv().GetC() & ~ReadNzcv().GetV());
+      ReadNzcv().SetV(0);
+      break;
+    case "xaflag_m_pstate"_h: {
+      // Can't set the flags in place due to the logical dependencies.
+      uint32_t n = (~ReadNzcv().GetC() & ~ReadNzcv().GetZ()) & 1;
+      uint32_t z = ReadNzcv().GetZ() & ReadNzcv().GetC();
+      uint32_t c = ReadNzcv().GetC() | ReadNzcv().GetZ();
+      uint32_t v = ~ReadNzcv().GetC() & ReadNzcv().GetZ();
+      ReadNzcv().SetN(n);
+      ReadNzcv().SetZ(z);
+      ReadNzcv().SetC(c);
+      ReadNzcv().SetV(v);
+      break;
     }
-  } else if (instr->Mask(SystemPAuthFMask) == SystemPAuthFixed) {
-    // Check BType allows PACI[AB]SP instructions.
-    if (PcIsInGuardedPage()) {
-      Instr i = instr->Mask(SystemPAuthMask);
-      if ((i == PACIASP) || (i == PACIBSP)) {
+    case "xpaclri_hi_hints"_h:
+      WriteXRegister(30, StripPAC(ReadXRegister(30), kInstructionPointer));
+      break;
+    case "clrex_bn_barriers"_h:
+      PrintExclusiveAccessWarning();
+      ClearLocalMonitor();
+      break;
+    case "msr_sr_systemmove"_h:
+      switch (instr->GetImmSystemRegister()) {
+        case NZCV:
+          ReadNzcv().SetRawValue(ReadWRegister(instr->GetRt()));
+          LogSystemRegister(NZCV);
+          break;
+        case FPCR:
+          ReadFpcr().SetRawValue(ReadWRegister(instr->GetRt()));
+          LogSystemRegister(FPCR);
+          break;
+        default:
+          VIXL_UNIMPLEMENTED();
+      }
+      break;
+    case "mrs_rs_systemmove"_h:
+      switch (instr->GetImmSystemRegister()) {
+        case NZCV:
+          WriteXRegister(instr->GetRt(), ReadNzcv().GetRawValue());
+          break;
+        case FPCR:
+          WriteXRegister(instr->GetRt(), ReadFpcr().GetRawValue());
+          break;
+        case RNDR:
+        case RNDRRS: {
+          uint64_t high = rand_gen_();
+          uint64_t low = rand_gen_();
+          uint64_t rand_num = (high << 32) | (low & 0xffffffff);
+          WriteXRegister(instr->GetRt(), rand_num);
+          // Simulate successful random number generation.
+          // TODO: Return failure occasionally as a random number cannot be
+          // returned in a period of time.
+          ReadNzcv().SetRawValue(NoFlag);
+          LogSystemRegister(NZCV);
+          break;
+        }
+        case DCZID_EL0:
+          WriteXRegister(instr->GetRt(), dczid_);
+          break;
+        default:
+          VIXL_UNIMPLEMENTED();
+      }
+      break;
+    case "chkfeat_hf_hints"_h: {
+      uint64_t feat_select = ReadXRegister(16);
+      uint64_t gcs_enabled = IsGCSCheckEnabled() ? 1 : 0;
+      feat_select &= ~gcs_enabled;
+      WriteXRegister(16, feat_select);
+      break;
+    }
+    case "hint_hm_hints"_h:
+    case "nop_hi_hints"_h:
+    case "esb_hi_hints"_h:
+    case "csdb_hi_hints"_h:
+      break;
+    case "bti_hb_hints"_h:
+      switch (instr->GetImmHint()) {
+        case BTI_jc:
+          break;
+        case BTI:
+          if (PcIsInGuardedPage() && (ReadBType() != DefaultBType)) {
+            VIXL_ABORT_WITH_MSG("Executing BTI with wrong BType.");
+          }
+          break;
+        case BTI_c:
+          if (PcIsInGuardedPage() &&
+              (ReadBType() == BranchFromGuardedNotToIP)) {
+            VIXL_ABORT_WITH_MSG("Executing BTI c with wrong BType.");
+          }
+          break;
+        case BTI_j:
+          if (PcIsInGuardedPage() && (ReadBType() == BranchAndLink)) {
+            VIXL_ABORT_WITH_MSG("Executing BTI j with wrong BType.");
+          }
+          break;
+        default:
+          VIXL_UNREACHABLE();
+      }
+      return;
+    case "pacib1716_hi_hints"_h:
+      pac_key = kPACKeyIB;
+      VIXL_FALLTHROUGH();
+    case "pacia1716_hi_hints"_h:
+      PACHelper(17, 16, pac_key, &Simulator::AddPAC);
+      break;
+    case "pacibsp_hi_hints"_h:
+      pac_key = kPACKeyIB;
+      VIXL_FALLTHROUGH();
+    case "paciasp_hi_hints"_h:
+      PACHelper(30, 31, pac_key, &Simulator::AddPAC);
+
+      // Check BType allows PACI[AB]SP instructions.
+      if (PcIsInGuardedPage()) {
         switch (ReadBType()) {
           case BranchFromGuardedNotToIP:
           // TODO: This case depends on the value of SCTLR_EL1.BT0, which we
@@ -6503,117 +7119,98 @@
             break;
         }
       }
+      break;
+    case "pacibz_hi_hints"_h:
+      pac_key = kPACKeyIB;
+      VIXL_FALLTHROUGH();
+    case "paciaz_hi_hints"_h:
+      PACHelper(30, -1, pac_key, &Simulator::AddPAC);
+      break;
+    case "autib1716_hi_hints"_h:
+      pac_key = kPACKeyIB;
+      VIXL_FALLTHROUGH();
+    case "autia1716_hi_hints"_h:
+      PACHelper(17, 16, pac_key, &Simulator::AuthPAC);
+      break;
+    case "autibsp_hi_hints"_h:
+      pac_key = kPACKeyIB;
+      VIXL_FALLTHROUGH();
+    case "autiasp_hi_hints"_h:
+      PACHelper(30, 31, pac_key, &Simulator::AuthPAC);
+      break;
+    case "autibz_hi_hints"_h:
+      pac_key = kPACKeyIB;
+      VIXL_FALLTHROUGH();
+    case "autiaz_hi_hints"_h:
+      PACHelper(30, -1, pac_key, &Simulator::AuthPAC);
+      break;
+    case "dsb_bo_barriers"_h:
+    case "dmb_bo_barriers"_h:
+    case "isb_bi_barriers"_h:
+      VIXL_SYNC();
+      break;
+    case "sys_cr_systeminstrs"_h: {
+      uint64_t rt = ReadXRegister(instr->GetRt());
+      uint32_t sysop = instr->GetSysOp();
+      if (sysop == GCSSS1) {
+        uint64_t incoming_size = rt >> 32;
+        // Drop upper 32 bits to get GCS index.
+        uint64_t incoming_gcs = rt & 0xffffffff;
+        uint64_t outgoing_gcs = ActivateGCS(incoming_gcs);
+        uint64_t incoming_seal = GCSPop();
+        if (((incoming_seal ^ rt) != 1) ||
+            (GetActiveGCSPtr()->size() != incoming_size)) {
+          char msg[128];
+          snprintf(msg,
+                   sizeof(msg),
+                   "GCS: invalid incoming stack: 0x%016" PRIx64 "\n",
+                   incoming_seal);
+          ReportGCSFailure(msg);
+        }
+        GCSPush(outgoing_gcs + 5);
+      } else if (sysop == GCSPUSHM) {
+        GCSPush(ReadXRegister(instr->GetRt()));
+      } else {
+        if (!SysOp_W(sysop, rt)) {
+          VisitUnallocated(instr);
+        }
+      }
+      break;
     }
+    case "sysl_rc_systeminstrs"_h: {
+      uint32_t sysop = instr->GetSysOp();
+      if (sysop == GCSPOPM) {
+        uint64_t addr = GCSPop();
+        WriteXRegister(instr->GetRt(), addr);
+      } else if (sysop == GCSSS2) {
+        uint64_t outgoing_gcs = GCSPop();
+        // Check for token inserted by gcsss1.
+        if ((outgoing_gcs & 7) != 5) {
+          char msg[128];
+          snprintf(msg,
+                   sizeof(msg),
+                   "GCS: outgoing stack has no token: 0x%016" PRIx64 "\n",
+                   outgoing_gcs);
+          ReportGCSFailure(msg);
+        }
+        uint64_t incoming_gcs = ActivateGCS(outgoing_gcs);
+        outgoing_gcs &= ~UINT64_C(0x3ff);
 
-    switch (instr->Mask(SystemPAuthMask)) {
-#define DEFINE_PAUTH_FUNCS(SUFFIX, DST, MOD, KEY)                              \
-  case PACI##SUFFIX:                                                           \
-    WriteXRegister(DST,                                                        \
-                   AddPAC(ReadXRegister(DST), MOD, KEY, kInstructionPointer)); \
-    break;                                                                     \
-  case AUTI##SUFFIX:                                                           \
-    WriteXRegister(DST,                                                        \
-                   AuthPAC(ReadXRegister(DST),                                 \
-                           MOD,                                                \
-                           KEY,                                                \
-                           kInstructionPointer));                              \
-    break;
-
-      PAUTH_SYSTEM_MODES(DEFINE_PAUTH_FUNCS)
-#undef DEFINE_PAUTH_FUNCS
-    }
-  } else if (instr->Mask(SystemExclusiveMonitorFMask) ==
-             SystemExclusiveMonitorFixed) {
-    VIXL_ASSERT(instr->Mask(SystemExclusiveMonitorMask) == CLREX);
-    switch (instr->Mask(SystemExclusiveMonitorMask)) {
-      case CLREX: {
-        PrintExclusiveAccessWarning();
-        ClearLocalMonitor();
-        break;
-      }
-    }
-  } else if (instr->Mask(SystemSysRegFMask) == SystemSysRegFixed) {
-    switch (instr->Mask(SystemSysRegMask)) {
-      case MRS: {
-        switch (instr->GetImmSystemRegister()) {
-          case NZCV:
-            WriteXRegister(instr->GetRt(), ReadNzcv().GetRawValue());
-            break;
-          case FPCR:
-            WriteXRegister(instr->GetRt(), ReadFpcr().GetRawValue());
-            break;
-          case RNDR:
-          case RNDRRS: {
-            uint64_t high = jrand48(rand_state_);
-            uint64_t low = jrand48(rand_state_);
-            uint64_t rand_num = (high << 32) | (low & 0xffffffff);
-            WriteXRegister(instr->GetRt(), rand_num);
-            // Simulate successful random number generation.
-            // TODO: Return failure occasionally as a random number cannot be
-            // returned in a period of time.
-            ReadNzcv().SetRawValue(NoFlag);
-            LogSystemRegister(NZCV);
-            break;
-          }
-          default:
-            VIXL_UNIMPLEMENTED();
-        }
-        break;
-      }
-      case MSR: {
-        switch (instr->GetImmSystemRegister()) {
-          case NZCV:
-            ReadNzcv().SetRawValue(ReadWRegister(instr->GetRt()));
-            LogSystemRegister(NZCV);
-            break;
-          case FPCR:
-            ReadFpcr().SetRawValue(ReadWRegister(instr->GetRt()));
-            LogSystemRegister(FPCR);
-            break;
-          default:
-            VIXL_UNIMPLEMENTED();
-        }
-        break;
-      }
-    }
-  } else if (instr->Mask(SystemHintFMask) == SystemHintFixed) {
-    VIXL_ASSERT(instr->Mask(SystemHintMask) == HINT);
-    switch (instr->GetImmHint()) {
-      case NOP:
-      case ESB:
-      case CSDB:
-      case BTI_jc:
-        break;
-      case BTI:
-        if (PcIsInGuardedPage() && (ReadBType() != DefaultBType)) {
-          VIXL_ABORT_WITH_MSG("Executing BTI with wrong BType.");
-        }
-        break;
-      case BTI_c:
-        if (PcIsInGuardedPage() && (ReadBType() == BranchFromGuardedNotToIP)) {
-          VIXL_ABORT_WITH_MSG("Executing BTI c with wrong BType.");
-        }
-        break;
-      case BTI_j:
-        if (PcIsInGuardedPage() && (ReadBType() == BranchAndLink)) {
-          VIXL_ABORT_WITH_MSG("Executing BTI j with wrong BType.");
-        }
-        break;
-      default:
+        // Encode the size into the outgoing stack seal, to check later.
+        uint64_t size = GetActiveGCSPtr()->size();
+        VIXL_ASSERT(IsUint32(size));
+        VIXL_ASSERT(IsUint32(outgoing_gcs + 1));
+        uint64_t outgoing_seal = (size << 32) | (outgoing_gcs + 1);
+        GCSPush(outgoing_seal);
+        ActivateGCS(incoming_gcs);
+        WriteXRegister(instr->GetRt(), outgoing_seal - 1);
+      } else {
         VIXL_UNIMPLEMENTED();
+      }
+      break;
     }
-  } else if (instr->Mask(MemBarrierFMask) == MemBarrierFixed) {
-    __sync_synchronize();
-  } else if ((instr->Mask(SystemSysFMask) == SystemSysFixed)) {
-    switch (instr->Mask(SystemSysMask)) {
-      case SYS:
-        SysOp_W(instr->GetSysOp(), ReadXRegister(instr->GetRt()));
-        break;
-      default:
-        VIXL_UNIMPLEMENTED();
-    }
-  } else {
-    VIXL_UNIMPLEMENTED();
+    default:
+      VIXL_UNIMPLEMENTED();
   }
 }
 
@@ -6648,12 +7245,26 @@
         case kRestoreCPUFeaturesOpcode:
           DoRestoreCPUFeatures(instr);
           return;
+        case kMTEActive:
+          MetaDataDepot::MetaDataMTE::SetActive(true);
+          return;
+        case kMTEInactive:
+          MetaDataDepot::MetaDataMTE::SetActive(false);
+          return;
         default:
           HostBreakpoint();
           return;
       }
     case BRK:
-      HostBreakpoint();
+      if (debugger_enabled_) {
+        uint64_t next_instr =
+            reinterpret_cast<uint64_t>(pc_->GetNextInstruction());
+        if (!debugger_->IsBreakpoint(next_instr)) {
+          debugger_->RegisterBreakpoint(next_instr);
+        }
+      } else {
+        HostBreakpoint();
+      }
       return;
     default:
       VIXL_UNIMPLEMENTED();
@@ -6662,19 +7273,161 @@
 
 
 void Simulator::VisitCrypto2RegSHA(const Instruction* instr) {
-  VisitUnimplemented(instr);
+  SimVRegister& rd = ReadVRegister(instr->GetRd());
+  SimVRegister& rn = ReadVRegister(instr->GetRn());
+
+  switch (form_hash_) {
+    case "sha1h_ss_cryptosha2"_h:
+      ror(kFormatS, rd, rn, 2);
+      break;
+    case "sha1su1_vv_cryptosha2"_h: {
+      SimVRegister temp;
+
+      // temp = srcdst ^ (src >> 32);
+      ext(kFormat16B, temp, rn, temp, 4);
+      eor(kFormat16B, temp, rd, temp);
+
+      // srcdst = ROL(temp, 1) ^ (ROL(temp, 2) << 96)
+      rol(kFormat4S, rd, temp, 1);
+      rol(kFormatS, temp, temp, 2);  // kFormatS will zero bits <127:32>
+      ext(kFormat16B, temp, temp, temp, 4);
+      eor(kFormat16B, rd, rd, temp);
+      break;
+    }
+    case "sha256su0_vv_cryptosha2"_h:
+      sha2su0(rd, rn);
+      break;
+  }
 }
 
 
 void Simulator::VisitCrypto3RegSHA(const Instruction* instr) {
-  VisitUnimplemented(instr);
+  SimVRegister& rd = ReadVRegister(instr->GetRd());
+  SimVRegister& rn = ReadVRegister(instr->GetRn());
+  SimVRegister& rm = ReadVRegister(instr->GetRm());
+
+  switch (form_hash_) {
+    case "sha1c_qsv_cryptosha3"_h:
+      sha1<"choose"_h>(rd, rn, rm);
+      break;
+    case "sha1m_qsv_cryptosha3"_h:
+      sha1<"majority"_h>(rd, rn, rm);
+      break;
+    case "sha1p_qsv_cryptosha3"_h:
+      sha1<"parity"_h>(rd, rn, rm);
+      break;
+    case "sha1su0_vvv_cryptosha3"_h: {
+      SimVRegister temp;
+      ext(kFormat16B, temp, rd, rn, 8);
+      eor(kFormat16B, temp, temp, rd);
+      eor(kFormat16B, rd, temp, rm);
+      break;
+    }
+    case "sha256h_qqv_cryptosha3"_h:
+      sha2h(rd, rn, rm, /* part1 = */ true);
+      break;
+    case "sha256h2_qqv_cryptosha3"_h:
+      sha2h(rd, rn, rm, /* part1 = */ false);
+      break;
+    case "sha256su1_vvv_cryptosha3"_h:
+      sha2su1(rd, rn, rm);
+      break;
+  }
 }
 
 
 void Simulator::VisitCryptoAES(const Instruction* instr) {
-  VisitUnimplemented(instr);
+  SimVRegister& rd = ReadVRegister(instr->GetRd());
+  SimVRegister& rn = ReadVRegister(instr->GetRn());
+  SimVRegister temp;
+
+  switch (form_hash_) {
+    case "aesd_b_cryptoaes"_h:
+      eor(kFormat16B, temp, rd, rn);
+      aes(rd, temp, /* decrypt = */ true);
+      break;
+    case "aese_b_cryptoaes"_h:
+      eor(kFormat16B, temp, rd, rn);
+      aes(rd, temp, /* decrypt = */ false);
+      break;
+    case "aesimc_b_cryptoaes"_h:
+      aesmix(rd, rn, /* inverse = */ true);
+      break;
+    case "aesmc_b_cryptoaes"_h:
+      aesmix(rd, rn, /* inverse = */ false);
+      break;
+  }
 }
 
+void Simulator::VisitCryptoSM3(const Instruction* instr) {
+  SimVRegister& rd = ReadVRegister(instr->GetRd());
+  SimVRegister& rn = ReadVRegister(instr->GetRn());
+  SimVRegister& rm = ReadVRegister(instr->GetRm());
+  SimVRegister& ra = ReadVRegister(instr->GetRa());
+  int index = instr->ExtractBits(13, 12);
+
+  bool is_a = false;
+  switch (form_hash_) {
+    case "sm3partw1_vvv4_cryptosha512_3"_h:
+      sm3partw1(rd, rn, rm);
+      break;
+    case "sm3partw2_vvv4_cryptosha512_3"_h:
+      sm3partw2(rd, rn, rm);
+      break;
+    case "sm3ss1_vvv4_crypto4"_h:
+      sm3ss1(rd, rn, rm, ra);
+      break;
+    case "sm3tt1a_vvv4_crypto3_imm2"_h:
+      is_a = true;
+      VIXL_FALLTHROUGH();
+    case "sm3tt1b_vvv4_crypto3_imm2"_h:
+      sm3tt1(rd, rn, rm, index, is_a);
+      break;
+    case "sm3tt2a_vvv4_crypto3_imm2"_h:
+      is_a = true;
+      VIXL_FALLTHROUGH();
+    case "sm3tt2b_vvv_crypto3_imm2"_h:
+      sm3tt2(rd, rn, rm, index, is_a);
+      break;
+  }
+}
+
+void Simulator::VisitCryptoSM4(const Instruction* instr) {
+  SimVRegister& rd = ReadVRegister(instr->GetRd());
+  SimVRegister& rn = ReadVRegister(instr->GetRn());
+  SimVRegister& rm = ReadVRegister(instr->GetRm());
+
+  bool is_key = false;
+  switch (form_hash_) {
+    case "sm4ekey_vvv4_cryptosha512_3"_h:
+      is_key = true;
+      VIXL_FALLTHROUGH();
+    case "sm4e_vv4_cryptosha512_2"_h:
+      sm4(rd, rn, rm, is_key);
+      break;
+  }
+}
+
+void Simulator::SimulateSHA512(const Instruction* instr) {
+  SimVRegister& rd = ReadVRegister(instr->GetRd());
+  SimVRegister& rn = ReadVRegister(instr->GetRn());
+  SimVRegister& rm = ReadVRegister(instr->GetRm());
+
+  switch (form_hash_) {
+    case "sha512h_qqv_cryptosha512_3"_h:
+      sha512h(rd, rn, rm);
+      break;
+    case "sha512h2_qqv_cryptosha512_3"_h:
+      sha512h2(rd, rn, rm);
+      break;
+    case "sha512su0_vv2_cryptosha512_2"_h:
+      sha512su0(rd, rn);
+      break;
+    case "sha512su1_vvv2_cryptosha512_3"_h:
+      sha512su1(rd, rn, rm);
+      break;
+  }
+}
 
 void Simulator::VisitNEON2RegMisc(const Instruction* instr) {
   NEONFormatDecoder nfd(instr);
@@ -7438,13 +8191,24 @@
   SimVRegister& rd = ReadVRegister(instr->GetRd());
   SimVRegister& rn = ReadVRegister(instr->GetRn());
   SimVRegister& rm = ReadVRegister(instr->GetRm());
+  int size = instr->GetNEONSize();
 
   switch (instr->Mask(NEON3DifferentMask)) {
     case NEON_PMULL:
-      pmull(vf_l, rd, rn, rm);
+      if ((size == 1) || (size == 2)) {  // S/D reserved.
+        VisitUnallocated(instr);
+      } else {
+        if (size == 3) vf_l = kFormat1Q;
+        pmull(vf_l, rd, rn, rm);
+      }
       break;
     case NEON_PMULL2:
-      pmull2(vf_l, rd, rn, rm);
+      if ((size == 1) || (size == 2)) {  // S/D reserved.
+        VisitUnallocated(instr);
+      } else {
+        if (size == 3) vf_l = kFormat1Q;
+        pmull2(vf_l, rd, rn, rm);
+      }
       break;
     case NEON_UADDL:
       uaddl(vf_l, rd, rn, rm);
@@ -7682,22 +8446,14 @@
 void Simulator::SimulateNEONMulByElementLong(const Instruction* instr) {
   NEONFormatDecoder nfd(instr);
   VectorFormat vf = nfd.GetVectorFormat(nfd.LongIntegerFormatMap());
-
   SimVRegister& rd = ReadVRegister(instr->GetRd());
   SimVRegister& rn = ReadVRegister(instr->GetRn());
 
-  int rm_reg = instr->GetRm();
-  int index = (instr->GetNEONH() << 1) | instr->GetNEONL();
-  if (instr->GetNEONSize() == 1) {
-    rm_reg = instr->GetRmLow16();
-    index = (index << 1) | instr->GetNEONM();
-  }
-  SimVRegister& rm = ReadVRegister(rm_reg);
-
+  std::pair<int, int> rm_and_index = instr->GetNEONMulRmAndIndex();
   SimVRegister temp;
   VectorFormat indexform =
       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vf));
-  dup_element(indexform, temp, rm, index);
+  dup_elements_to_segments(indexform, temp, rm_and_index);
 
   bool is_2 = instr->Mask(NEON_Q) ? true : false;
 
@@ -7771,21 +8527,9 @@
   SimVRegister& rd = ReadVRegister(instr->GetRd());
   SimVRegister& rn = ReadVRegister(instr->GetRn());
 
-  int rm_reg = instr->GetRm();
-  int index =
-      (instr->GetNEONH() << 2) | (instr->GetNEONL() << 1) | instr->GetNEONM();
-
-  if ((vform == kFormat4H) || (vform == kFormat8H)) {
-    rm_reg &= 0xf;
-  } else if ((vform == kFormat2S) || (vform == kFormat4S)) {
-    index >>= 1;
-  } else {
-    VIXL_ASSERT(vform == kFormat2D);
-    VIXL_ASSERT(instr->GetNEONL() == 0);
-    index >>= 2;
-  }
-
-  SimVRegister& rm = ReadVRegister(rm_reg);
+  std::pair<int, int> rm_and_index = instr->GetNEONMulRmAndIndex();
+  SimVRegister& rm = ReadVRegister(rm_and_index.first);
+  int index = rm_and_index.second;
 
   switch (form_hash_) {
     case "fmul_asimdelem_rh_h"_h:
@@ -7865,15 +8609,9 @@
   SimVRegister& rd = ReadVRegister(instr->GetRd());
   SimVRegister& rn = ReadVRegister(instr->GetRn());
 
-  int rm_reg = instr->GetRm();
-  int index = (instr->GetNEONH() << 1) | instr->GetNEONL();
-
-  if ((vform == kFormat4H) || (vform == kFormat8H)) {
-    rm_reg &= 0xf;
-    index = (index << 1) | instr->GetNEONM();
-  }
-
-  SimVRegister& rm = ReadVRegister(rm_reg);
+  std::pair<int, int> rm_and_index = instr->GetNEONMulRmAndIndex();
+  SimVRegister& rm = ReadVRegister(rm_and_index.first);
+  int index = rm_and_index.second;
 
   switch (form_hash_) {
     case "mul_asimdelem_r"_h:
@@ -7914,8 +8652,10 @@
   if (instr->Mask(NEONCopyInsElementMask) == NEON_INS_ELEMENT) {
     int imm4 = instr->GetImmNEON4();
     int rn_index = ExtractSignedBitfield32(31, tz, imm4);
+    mov(kFormat16B, rd, rd);  // Zero bits beyond the MSB of a Q register.
     ins_element(vf, rd, reg_index, rn, rn_index);
   } else if (instr->Mask(NEONCopyInsGeneralMask) == NEON_INS_GENERAL) {
+    mov(kFormat16B, rd, rd);  // Zero bits beyond the MSB of a Q register.
     ins_immediate(vf, rd, reg_index, ReadXRegister(instr->GetRn()));
   } else if (instr->Mask(NEONCopyUmovMask) == NEON_UMOV) {
     uint64_t value = LogicVRegister(rn).Uint(vf, reg_index);
@@ -7983,97 +8723,117 @@
   switch (instr->Mask(NEONLoadStoreMultiStructPostIndexMask)) {
     case NEON_LD1_4v:
     case NEON_LD1_4v_post:
-      ld1(vf, ReadVRegister(reg[3]), addr[3]);
+      if (!ld1(vf, ReadVRegister(reg[3]), addr[3])) {
+        return;
+      }
       reg_count++;
       VIXL_FALLTHROUGH();
     case NEON_LD1_3v:
     case NEON_LD1_3v_post:
-      ld1(vf, ReadVRegister(reg[2]), addr[2]);
+      if (!ld1(vf, ReadVRegister(reg[2]), addr[2])) {
+        return;
+      }
       reg_count++;
       VIXL_FALLTHROUGH();
     case NEON_LD1_2v:
     case NEON_LD1_2v_post:
-      ld1(vf, ReadVRegister(reg[1]), addr[1]);
+      if (!ld1(vf, ReadVRegister(reg[1]), addr[1])) {
+        return;
+      }
       reg_count++;
       VIXL_FALLTHROUGH();
     case NEON_LD1_1v:
     case NEON_LD1_1v_post:
-      ld1(vf, ReadVRegister(reg[0]), addr[0]);
+      if (!ld1(vf, ReadVRegister(reg[0]), addr[0])) {
+        return;
+      }
       break;
     case NEON_ST1_4v:
     case NEON_ST1_4v_post:
-      st1(vf, ReadVRegister(reg[3]), addr[3]);
+      if (!st1(vf, ReadVRegister(reg[3]), addr[3])) return;
       reg_count++;
       VIXL_FALLTHROUGH();
     case NEON_ST1_3v:
     case NEON_ST1_3v_post:
-      st1(vf, ReadVRegister(reg[2]), addr[2]);
+      if (!st1(vf, ReadVRegister(reg[2]), addr[2])) return;
       reg_count++;
       VIXL_FALLTHROUGH();
     case NEON_ST1_2v:
     case NEON_ST1_2v_post:
-      st1(vf, ReadVRegister(reg[1]), addr[1]);
+      if (!st1(vf, ReadVRegister(reg[1]), addr[1])) return;
       reg_count++;
       VIXL_FALLTHROUGH();
     case NEON_ST1_1v:
     case NEON_ST1_1v_post:
-      st1(vf, ReadVRegister(reg[0]), addr[0]);
+      if (!st1(vf, ReadVRegister(reg[0]), addr[0])) return;
       log_read = false;
       break;
     case NEON_LD2_post:
     case NEON_LD2:
-      ld2(vf, ReadVRegister(reg[0]), ReadVRegister(reg[1]), addr[0]);
+      if (!ld2(vf, ReadVRegister(reg[0]), ReadVRegister(reg[1]), addr[0])) {
+        return;
+      }
       struct_parts = 2;
       reg_count = 2;
       break;
     case NEON_ST2:
     case NEON_ST2_post:
-      st2(vf, ReadVRegister(reg[0]), ReadVRegister(reg[1]), addr[0]);
+      if (!st2(vf, ReadVRegister(reg[0]), ReadVRegister(reg[1]), addr[0])) {
+        return;
+      }
       struct_parts = 2;
       reg_count = 2;
       log_read = false;
       break;
     case NEON_LD3_post:
     case NEON_LD3:
-      ld3(vf,
-          ReadVRegister(reg[0]),
-          ReadVRegister(reg[1]),
-          ReadVRegister(reg[2]),
-          addr[0]);
+      if (!ld3(vf,
+               ReadVRegister(reg[0]),
+               ReadVRegister(reg[1]),
+               ReadVRegister(reg[2]),
+               addr[0])) {
+        return;
+      }
       struct_parts = 3;
       reg_count = 3;
       break;
     case NEON_ST3:
     case NEON_ST3_post:
-      st3(vf,
-          ReadVRegister(reg[0]),
-          ReadVRegister(reg[1]),
-          ReadVRegister(reg[2]),
-          addr[0]);
+      if (!st3(vf,
+               ReadVRegister(reg[0]),
+               ReadVRegister(reg[1]),
+               ReadVRegister(reg[2]),
+               addr[0])) {
+        return;
+      }
       struct_parts = 3;
       reg_count = 3;
       log_read = false;
       break;
     case NEON_ST4:
     case NEON_ST4_post:
-      st4(vf,
-          ReadVRegister(reg[0]),
-          ReadVRegister(reg[1]),
-          ReadVRegister(reg[2]),
-          ReadVRegister(reg[3]),
-          addr[0]);
+      if (!st4(vf,
+               ReadVRegister(reg[0]),
+               ReadVRegister(reg[1]),
+               ReadVRegister(reg[2]),
+               ReadVRegister(reg[3]),
+               addr[0])) {
+        return;
+      }
       struct_parts = 4;
       reg_count = 4;
       log_read = false;
       break;
     case NEON_LD4_post:
     case NEON_LD4:
-      ld4(vf,
-          ReadVRegister(reg[0]),
-          ReadVRegister(reg[1]),
-          ReadVRegister(reg[2]),
-          ReadVRegister(reg[3]),
-          addr[0]);
+      if (!ld4(vf,
+               ReadVRegister(reg[0]),
+               ReadVRegister(reg[1]),
+               ReadVRegister(reg[2]),
+               ReadVRegister(reg[3]),
+               addr[0])) {
+        return;
+      }
       struct_parts = 4;
       reg_count = 4;
       break;
@@ -8108,7 +8868,10 @@
     // The immediate is implied by the number of vector registers used.
     addr_base += (rm == 31) ? (RegisterSizeInBytesFromFormat(vf) * reg_count)
                             : ReadXRegister(rm);
-    WriteXRegister(instr->GetRn(), addr_base);
+    WriteXRegister(instr->GetRn(),
+                   addr_base,
+                   LogRegWrites,
+                   Reg31IsStackPointer);
   } else {
     VIXL_ASSERT(addr_mode == Offset);
   }
@@ -8245,75 +9008,95 @@
       reg_count = 1;
       if (replicating) {
         VIXL_ASSERT(do_load);
-        ld1r(vf, ReadVRegister(rt), addr);
+        if (!ld1r(vf, ReadVRegister(rt), addr)) {
+          return;
+        }
       } else if (do_load) {
-        ld1(vf, ReadVRegister(rt), lane, addr);
+        if (!ld1(vf, ReadVRegister(rt), lane, addr)) {
+          return;
+        }
       } else {
-        st1(vf, ReadVRegister(rt), lane, addr);
+        if (!st1(vf, ReadVRegister(rt), lane, addr)) return;
       }
       break;
     case NEONLoadStoreSingle2:
       reg_count = 2;
       if (replicating) {
         VIXL_ASSERT(do_load);
-        ld2r(vf, ReadVRegister(rt), ReadVRegister(rt2), addr);
+        if (!ld2r(vf, ReadVRegister(rt), ReadVRegister(rt2), addr)) {
+          return;
+        }
       } else if (do_load) {
-        ld2(vf, ReadVRegister(rt), ReadVRegister(rt2), lane, addr);
+        if (!ld2(vf, ReadVRegister(rt), ReadVRegister(rt2), lane, addr)) {
+          return;
+        }
       } else {
-        st2(vf, ReadVRegister(rt), ReadVRegister(rt2), lane, addr);
+        if (!st2(vf, ReadVRegister(rt), ReadVRegister(rt2), lane, addr)) return;
       }
       break;
     case NEONLoadStoreSingle3:
       reg_count = 3;
       if (replicating) {
         VIXL_ASSERT(do_load);
-        ld3r(vf,
-             ReadVRegister(rt),
-             ReadVRegister(rt2),
-             ReadVRegister(rt3),
-             addr);
+        if (!ld3r(vf,
+                  ReadVRegister(rt),
+                  ReadVRegister(rt2),
+                  ReadVRegister(rt3),
+                  addr)) {
+          return;
+        }
       } else if (do_load) {
-        ld3(vf,
-            ReadVRegister(rt),
-            ReadVRegister(rt2),
-            ReadVRegister(rt3),
-            lane,
-            addr);
+        if (!ld3(vf,
+                 ReadVRegister(rt),
+                 ReadVRegister(rt2),
+                 ReadVRegister(rt3),
+                 lane,
+                 addr)) {
+          return;
+        }
       } else {
-        st3(vf,
-            ReadVRegister(rt),
-            ReadVRegister(rt2),
-            ReadVRegister(rt3),
-            lane,
-            addr);
+        if (!st3(vf,
+                 ReadVRegister(rt),
+                 ReadVRegister(rt2),
+                 ReadVRegister(rt3),
+                 lane,
+                 addr)) {
+          return;
+        }
       }
       break;
     case NEONLoadStoreSingle4:
       reg_count = 4;
       if (replicating) {
         VIXL_ASSERT(do_load);
-        ld4r(vf,
-             ReadVRegister(rt),
-             ReadVRegister(rt2),
-             ReadVRegister(rt3),
-             ReadVRegister(rt4),
-             addr);
+        if (!ld4r(vf,
+                  ReadVRegister(rt),
+                  ReadVRegister(rt2),
+                  ReadVRegister(rt3),
+                  ReadVRegister(rt4),
+                  addr)) {
+          return;
+        }
       } else if (do_load) {
-        ld4(vf,
-            ReadVRegister(rt),
-            ReadVRegister(rt2),
-            ReadVRegister(rt3),
-            ReadVRegister(rt4),
-            lane,
-            addr);
+        if (!ld4(vf,
+                 ReadVRegister(rt),
+                 ReadVRegister(rt2),
+                 ReadVRegister(rt3),
+                 ReadVRegister(rt4),
+                 lane,
+                 addr)) {
+          return;
+        }
       } else {
-        st4(vf,
-            ReadVRegister(rt),
-            ReadVRegister(rt2),
-            ReadVRegister(rt3),
-            ReadVRegister(rt4),
-            lane,
-            addr);
+        if (!st4(vf,
+                 ReadVRegister(rt),
+                 ReadVRegister(rt2),
+                 ReadVRegister(rt3),
+                 ReadVRegister(rt4),
+                 lane,
+                 addr)) {
+          return;
+        }
       }
       break;
     default:
@@ -8345,7 +9128,9 @@
     int lane_size = LaneSizeInBytesFromFormat(vf);
     WriteXRegister(instr->GetRn(),
                    addr + ((rm == 31) ? (reg_count * lane_size)
-                                      : ReadXRegister(rm)));
+                                      : ReadXRegister(rm)),
+                   LogRegWrites,
+                   Reg31IsStackPointer);
   }
 }
 
@@ -8405,7 +9190,7 @@
         vform = q ? kFormat2D : kFormat1D;
         imm = 0;
         for (int i = 0; i < 8; ++i) {
-          if (imm8 & (1 << i)) {
+          if (imm8 & (uint64_t{1} << i)) {
             imm |= (UINT64_C(0xff) << (8 * i));
           }
         }
@@ -8885,78 +9670,76 @@
 void Simulator::VisitNEONScalarByIndexedElement(const Instruction* instr) {
   NEONFormatDecoder nfd(instr, NEONFormatDecoder::LongScalarFormatMap());
   VectorFormat vf = nfd.GetVectorFormat();
-  VectorFormat vf_r = nfd.GetVectorFormat(nfd.ScalarFormatMap());
-
   SimVRegister& rd = ReadVRegister(instr->GetRd());
   SimVRegister& rn = ReadVRegister(instr->GetRn());
   ByElementOp Op = NULL;
 
-  int rm_reg = instr->GetRm();
-  int index = (instr->GetNEONH() << 1) | instr->GetNEONL();
-  if (instr->GetNEONSize() == 1) {
-    rm_reg &= 0xf;
-    index = (index << 1) | instr->GetNEONM();
+  std::pair<int, int> rm_and_index = instr->GetNEONMulRmAndIndex();
+  std::unordered_map<uint32_t, ByElementOp> handler = {
+      {"sqdmull_asisdelem_l"_h, &Simulator::sqdmull},
+      {"sqdmlal_asisdelem_l"_h, &Simulator::sqdmlal},
+      {"sqdmlsl_asisdelem_l"_h, &Simulator::sqdmlsl},
+      {"sqdmulh_asisdelem_r"_h, &Simulator::sqdmulh},
+      {"sqrdmulh_asisdelem_r"_h, &Simulator::sqrdmulh},
+      {"sqrdmlah_asisdelem_r"_h, &Simulator::sqrdmlah},
+      {"sqrdmlsh_asisdelem_r"_h, &Simulator::sqrdmlsh},
+      {"fmul_asisdelem_rh_h"_h, &Simulator::fmul},
+      {"fmul_asisdelem_r_sd"_h, &Simulator::fmul},
+      {"fmla_asisdelem_rh_h"_h, &Simulator::fmla},
+      {"fmla_asisdelem_r_sd"_h, &Simulator::fmla},
+      {"fmls_asisdelem_rh_h"_h, &Simulator::fmls},
+      {"fmls_asisdelem_r_sd"_h, &Simulator::fmls},
+      {"fmulx_asisdelem_rh_h"_h, &Simulator::fmulx},
+      {"fmulx_asisdelem_r_sd"_h, &Simulator::fmulx},
+  };
+
+  std::unordered_map<uint32_t, ByElementOp>::const_iterator it =
+      handler.find(form_hash_);
+
+  if (it == handler.end()) {
+    VIXL_UNIMPLEMENTED();
+  } else {
+    Op = it->second;
   }
 
-  switch (instr->Mask(NEONScalarByIndexedElementMask)) {
-    case NEON_SQDMULL_byelement_scalar:
-      Op = &Simulator::sqdmull;
+  switch (form_hash_) {
+    case "sqdmull_asisdelem_l"_h:
+    case "sqdmlal_asisdelem_l"_h:
+    case "sqdmlsl_asisdelem_l"_h:
+      if ((vf == kFormatB) || (vf == kFormatH)) {
+        VisitUnallocated(instr);
+        return;
+      }
       break;
-    case NEON_SQDMLAL_byelement_scalar:
-      Op = &Simulator::sqdmlal;
+    case "sqdmulh_asisdelem_r"_h:
+    case "sqrdmulh_asisdelem_r"_h:
+    case "sqrdmlah_asisdelem_r"_h:
+    case "sqrdmlsh_asisdelem_r"_h:
+      vf = nfd.GetVectorFormat(nfd.ScalarFormatMap());
+      if ((vf == kFormatB) || (vf == kFormatD)) {
+        VisitUnallocated(instr);
+        return;
+      }
       break;
-    case NEON_SQDMLSL_byelement_scalar:
-      Op = &Simulator::sqdmlsl;
-      break;
-    case NEON_SQDMULH_byelement_scalar:
-      Op = &Simulator::sqdmulh;
-      vf = vf_r;
-      break;
-    case NEON_SQRDMULH_byelement_scalar:
-      Op = &Simulator::sqrdmulh;
-      vf = vf_r;
-      break;
-    case NEON_SQRDMLAH_byelement_scalar:
-      Op = &Simulator::sqrdmlah;
-      vf = vf_r;
-      break;
-    case NEON_SQRDMLSH_byelement_scalar:
-      Op = &Simulator::sqrdmlsh;
-      vf = vf_r;
-      break;
-    default:
+    case "fmul_asisdelem_r_sd"_h:
+    case "fmla_asisdelem_r_sd"_h:
+    case "fmls_asisdelem_r_sd"_h:
+    case "fmulx_asisdelem_r_sd"_h:
       vf = nfd.GetVectorFormat(nfd.FPScalarFormatMap());
-      index = instr->GetNEONH();
-      if (instr->GetFPType() == 0) {
-        index = (index << 2) | (instr->GetNEONL() << 1) | instr->GetNEONM();
-        rm_reg &= 0xf;
-        vf = kFormatH;
-      } else if ((instr->GetFPType() & 1) == 0) {
-        index = (index << 1) | instr->GetNEONL();
-      }
-      switch (instr->Mask(NEONScalarByIndexedElementFPMask)) {
-        case NEON_FMUL_H_byelement_scalar:
-        case NEON_FMUL_byelement_scalar:
-          Op = &Simulator::fmul;
-          break;
-        case NEON_FMLA_H_byelement_scalar:
-        case NEON_FMLA_byelement_scalar:
-          Op = &Simulator::fmla;
-          break;
-        case NEON_FMLS_H_byelement_scalar:
-        case NEON_FMLS_byelement_scalar:
-          Op = &Simulator::fmls;
-          break;
-        case NEON_FMULX_H_byelement_scalar:
-        case NEON_FMULX_byelement_scalar:
-          Op = &Simulator::fmulx;
-          break;
-        default:
-          VIXL_UNIMPLEMENTED();
-      }
+      break;
+    case "fmul_asisdelem_rh_h"_h:
+    case "fmla_asisdelem_rh_h"_h:
+    case "fmls_asisdelem_rh_h"_h:
+    case "fmulx_asisdelem_rh_h"_h:
+      vf = kFormatH;
+      break;
   }
 
-  (this->*Op)(vf, rd, rn, ReadVRegister(rm_reg), index);
+  (this->*Op)(vf,
+              rd,
+              rn,
+              ReadVRegister(rm_and_index.first),
+              rm_and_index.second);
 }
 
 
@@ -9363,6 +10146,34 @@
   }
 }
 
+void Simulator::SimulateNEONSHA3(const Instruction* instr) {
+  SimVRegister& rd = ReadVRegister(instr->GetRd());
+  SimVRegister& rn = ReadVRegister(instr->GetRn());
+  SimVRegister& rm = ReadVRegister(instr->GetRm());
+  SimVRegister& ra = ReadVRegister(instr->GetRa());
+  SimVRegister temp;
+
+  switch (form_hash_) {
+    case "bcax_vvv16_crypto4"_h:
+      bic(kFormat16B, temp, rm, ra);
+      eor(kFormat16B, rd, rn, temp);
+      break;
+    case "eor3_vvv16_crypto4"_h:
+      eor(kFormat16B, temp, rm, ra);
+      eor(kFormat16B, rd, rn, temp);
+      break;
+    case "rax1_vvv2_cryptosha512_3"_h:
+      ror(kFormat2D, temp, rm, 63);  // rol(1) => ror(63)
+      eor(kFormat2D, rd, rn, temp);
+      break;
+    case "xar_vvv2_crypto3_imm6"_h:
+      int rot = instr->ExtractBits(15, 10);
+      eor(kFormat2D, temp, rn, rm);
+      ror(kFormat2D, rd, temp, rot);
+      break;
+  }
+}
+
 void Simulator::VisitSVEAddressGeneration(const Instruction* instr) {
   SimVRegister& zd = ReadVRegister(instr->GetRd());
   SimVRegister& zn = ReadVRegister(instr->GetRn());
@@ -11549,7 +12360,7 @@
   VectorFormat format = instr->GetSVEVectorFormat();
   int64_t imm = instr->GetImmSVEIntWideSigned();
   int shift = instr->ExtractBit(13) * 8;
-  imm *= 1 << shift;
+  imm *= uint64_t{1} << shift;
 
   switch (instr->Mask(SVEBroadcastIntImm_UnpredicatedMask)) {
     case DUP_z_i:
@@ -11787,11 +12598,11 @@
   VIXL_ASSERT(msize_in_bytes_log2 <= esize_in_bytes_log2);
   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
   uint64_t offset = instr->ExtractBits(21, 16) << msize_in_bytes_log2;
-  uint64_t base = ReadXRegister(instr->GetRn()) + offset;
+  uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset;
   VectorFormat unpack_vform =
       SVEFormatFromLaneSizeInBytesLog2(msize_in_bytes_log2);
   SimVRegister temp;
-  ld1r(vform, unpack_vform, temp, base, is_signed);
+  if (!ld1r(vform, unpack_vform, temp, base, is_signed)) return;
   mov_zeroing(vform,
               ReadVRegister(instr->GetRt()),
               ReadPRegister(instr->GetPgLow8()),
@@ -11805,9 +12616,11 @@
       int pl = GetPredicateLengthInBytes();
       int imm9 = (instr->ExtractBits(21, 16) << 3) | instr->ExtractBits(12, 10);
       uint64_t multiplier = ExtractSignedBitfield64(8, 0, imm9);
-      uint64_t address = ReadXRegister(instr->GetRn()) + multiplier * pl;
+      uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
+      uint64_t address = base + multiplier * pl;
       for (int i = 0; i < pl; i++) {
-        pt.Insert(i, MemRead<uint8_t>(address + i));
+        VIXL_DEFINE_OR_RETURN(value, MemRead<uint8_t>(address + i));
+        pt.Insert(i, value);
       }
       LogPRead(instr->GetPt(), address);
       break;
@@ -11825,9 +12638,11 @@
       int vl = GetVectorLengthInBytes();
       int imm9 = (instr->ExtractBits(21, 16) << 3) | instr->ExtractBits(12, 10);
       uint64_t multiplier = ExtractSignedBitfield64(8, 0, imm9);
-      uint64_t address = ReadXRegister(instr->GetRn()) + multiplier * vl;
+      uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
+      uint64_t address = base + multiplier * vl;
       for (int i = 0; i < vl; i++) {
-        zt.Insert(i, MemRead<uint8_t>(address + i));
+        VIXL_DEFINE_OR_RETURN(value, MemRead<uint8_t>(address + i));
+        zt.Insert(i, value);
       }
       LogZRead(instr->GetRt(), address);
       break;
@@ -12063,9 +12878,10 @@
   int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed);
   VIXL_ASSERT(msize_in_bytes_log2 <= esize_in_bytes_log2);
   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
+  uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
   uint64_t offset = ReadXRegister(instr->GetRm());
   offset <<= msize_in_bytes_log2;
-  LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
+  LogicSVEAddressVector addr(base + offset);
   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
   SVEFaultTolerantLoadHelper(vform,
                              ReadPRegister(instr->GetPgLow8()),
@@ -12108,9 +12924,10 @@
   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
   int vl = GetVectorLengthInBytes();
   int vl_divisor_log2 = esize_in_bytes_log2 - msize_in_bytes_log2;
+  uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
   uint64_t offset =
       (instr->ExtractSignedBits(19, 16) * vl) / (1 << vl_divisor_log2);
-  LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
+  LogicSVEAddressVector addr(base + offset);
   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
   SVEFaultTolerantLoadHelper(vform,
                              ReadPRegister(instr->GetPgLow8()),
@@ -12144,8 +12961,9 @@
   }
   int msize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
   int vl = GetVectorLengthInBytes();
+  uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
   uint64_t offset = instr->ExtractSignedBits(19, 16) * vl;
-  LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
+  LogicSVEAddressVector addr(base + offset);
   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
   SVEStructuredLoadHelper(vform,
                           pg,
@@ -12177,8 +12995,9 @@
       break;
   }
   int msize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
+  uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
   uint64_t offset = ReadXRegister(instr->GetRm()) << msize_in_bytes_log2;
-  LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
+  LogicSVEAddressVector addr(base + offset);
   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
   SVEStructuredLoadHelper(vform,
                           pg,
@@ -12209,7 +13028,7 @@
   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(msz);
 
   for (unsigned i = 0; i < dwords; i++) {
-    ld1(kFormatVnD, zt, i, addr + offset + (i * kDRegSizeInBytes));
+    if (!ld1(kFormatVnD, zt, i, addr + offset + (i * kDRegSizeInBytes))) return;
   }
   mov_zeroing(vform, zt, pg, zt);
   dup_element(vform_dst, zt, zt, 0);
@@ -12236,7 +13055,7 @@
   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(msz);
   offset <<= msz;
   for (unsigned i = 0; i < bytes; i++) {
-    ld1(kFormatVnB, zt, i, addr + offset + i);
+    if (!ld1(kFormatVnB, zt, i, addr + offset + i)) return;
   }
   mov_zeroing(vform, zt, pg, zt);
   dup_element(vform_dst, zt, zt, 0);
@@ -12293,7 +13112,7 @@
     case LD4H_z_p_br_contiguous:
     case LD4W_z_p_br_contiguous: {
       int msz = instr->ExtractBits(24, 23);
-      uint64_t offset = ReadXRegister(instr->GetRm()) * (1 << msz);
+      uint64_t offset = ReadXRegister(instr->GetRm()) * (uint64_t{1} << msz);
       VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(msz);
       LogicSVEAddressVector addr(
           ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset);
@@ -12320,7 +13139,7 @@
       unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
       VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
       int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
-      uint64_t base = ReadXRegister(instr->GetRn());
+      uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
       SVEOffsetModifier mod =
           (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW;
       LogicSVEAddressVector addr(base,
@@ -12350,7 +13169,7 @@
     case ST1W_z_p_bz_s_x32_unscaled: {
       unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
       VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
-      uint64_t base = ReadXRegister(instr->GetRn());
+      uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
       SVEOffsetModifier mod =
           (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW;
       LogicSVEAddressVector addr(base,
@@ -12405,7 +13224,7 @@
       unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
       VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
       int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
-      uint64_t base = ReadXRegister(instr->GetRn());
+      uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
       LogicSVEAddressVector addr(base,
                                  &ReadVRegister(instr->GetRm()),
                                  kFormatVnD,
@@ -12434,7 +13253,7 @@
     case ST1W_z_p_bz_d_64_unscaled: {
       unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
       VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
-      uint64_t base = ReadXRegister(instr->GetRn());
+      uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
       LogicSVEAddressVector addr(base,
                                  &ReadVRegister(instr->GetRm()),
                                  kFormatVnD,
@@ -12462,7 +13281,7 @@
       unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
       VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
       int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
-      uint64_t base = ReadXRegister(instr->GetRn());
+      uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
       SVEOffsetModifier mod =
           (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW;
       LogicSVEAddressVector addr(base,
@@ -12494,7 +13313,7 @@
     case ST1W_z_p_bz_d_x32_unscaled: {
       unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
       VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
-      uint64_t base = ReadXRegister(instr->GetRn());
+      uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
       SVEOffsetModifier mod =
           (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW;
       LogicSVEAddressVector addr(base,
@@ -12567,8 +13386,9 @@
   }
   int msize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
   int vl = GetVectorLengthInBytes();
+  uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
   uint64_t offset = instr->ExtractSignedBits(19, 16) * vl;
-  LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
+  LogicSVEAddressVector addr(base + offset);
   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
   SVEStructuredStoreHelper(vform, pg, instr->GetRt(), addr);
 }
@@ -12596,8 +13416,9 @@
       break;
   }
   int msize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
+  uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
   uint64_t offset = ReadXRegister(instr->GetRm()) << msize_in_bytes_log2;
-  LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
+  LogicSVEAddressVector addr(base + offset);
   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
   SVEStructuredStoreHelper(vform, pg, instr->GetRt(), addr);
 }
@@ -12614,11 +13435,12 @@
       int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(false);
       VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
       int vl_divisor_log2 = esize_in_bytes_log2 - msize_in_bytes_log2;
+      uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
       uint64_t offset =
           (instr->ExtractSignedBits(19, 16) * vl) / (1 << vl_divisor_log2);
       VectorFormat vform =
           SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
-      LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
+      LogicSVEAddressVector addr(base + offset);
       addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
       SVEStructuredStoreHelper(vform,
                                ReadPRegister(instr->GetPgLow8()),
@@ -12639,11 +13461,12 @@
     case ST1D_z_p_br:
     case ST1H_z_p_br:
     case ST1W_z_p_br: {
+      uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
       uint64_t offset = ReadXRegister(instr->GetRm());
       offset <<= instr->ExtractBits(24, 23);
       VectorFormat vform =
           SVEFormatFromLaneSizeInBytesLog2(instr->ExtractBits(22, 21));
-      LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
+      LogicSVEAddressVector addr(base + offset);
       addr.SetMsizeInBytesLog2(instr->ExtractBits(24, 23));
       SVEStructuredStoreHelper(vform,
                                ReadPRegister(instr->GetPgLow8()),
@@ -12725,7 +13548,7 @@
     case ST4H_z_p_br_contiguous:
     case ST4W_z_p_br_contiguous: {
       int msz = instr->ExtractBits(24, 23);
-      uint64_t offset = ReadXRegister(instr->GetRm()) * (1 << msz);
+      uint64_t offset = ReadXRegister(instr->GetRm()) * (uint64_t{1} << msz);
       VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(msz);
       LogicSVEAddressVector addr(
           ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset);
@@ -12750,9 +13573,10 @@
       int pl = GetPredicateLengthInBytes();
       int imm9 = (instr->ExtractBits(21, 16) << 3) | instr->ExtractBits(12, 10);
       uint64_t multiplier = ExtractSignedBitfield64(8, 0, imm9);
-      uint64_t address = ReadXRegister(instr->GetRn()) + multiplier * pl;
+      uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
+      uint64_t address = base + multiplier * pl;
       for (int i = 0; i < pl; i++) {
-        MemWrite(address + i, pt.GetLane<uint8_t>(i));
+        if (!MemWrite(address + i, pt.GetLane<uint8_t>(i))) return;
       }
       LogPWrite(instr->GetPt(), address);
       break;
@@ -12770,9 +13594,10 @@
       int vl = GetVectorLengthInBytes();
       int imm9 = (instr->ExtractBits(21, 16) << 3) | instr->ExtractBits(12, 10);
       uint64_t multiplier = ExtractSignedBitfield64(8, 0, imm9);
-      uint64_t address = ReadXRegister(instr->GetRn()) + multiplier * vl;
+      uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
+      uint64_t address = base + multiplier * vl;
       for (int i = 0; i < vl; i++) {
-        MemWrite(address + i, zt.GetLane<uint8_t>(i));
+        if (!MemWrite(address + i, zt.GetLane<uint8_t>(i))) return;
       }
       LogZWrite(instr->GetRt(), address);
       break;
@@ -13752,10 +14577,11 @@
   int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed);
   VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
   int vl_divisor_log2 = esize_in_bytes_log2 - msize_in_bytes_log2;
+  uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
   uint64_t offset =
       (instr->ExtractSignedBits(19, 16) * vl) / (1 << vl_divisor_log2);
   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
-  LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
+  LogicSVEAddressVector addr(base + offset);
   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
   SVEStructuredLoadHelper(vform,
                           ReadPRegister(instr->GetPgLow8()),
@@ -13799,9 +14625,10 @@
   int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed);
   VIXL_ASSERT(msize_in_bytes_log2 <= esize_in_bytes_log2);
   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
+  uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
   uint64_t offset = ReadXRegister(instr->GetRm());
   offset <<= msize_in_bytes_log2;
-  LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
+  LogicSVEAddressVector addr(base + offset);
   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
   SVEStructuredLoadHelper(vform,
                           ReadPRegister(instr->GetPgLow8()),
@@ -13820,6 +14647,331 @@
   abort();
 }
 
+void Simulator::Simulate_XdSP_XnSP_Xm(const Instruction* instr) {
+  VIXL_ASSERT(form_hash_ == Hash("irg_64i_dp_2src"));
+  uint64_t rn = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
+  uint64_t rm = ReadXRegister(instr->GetRm());
+  uint64_t tag = GenerateRandomTag(rm & 0xffff);
+  uint64_t new_val = GetAddressWithAllocationTag(rn, tag);
+  WriteXRegister(instr->GetRd(), new_val, LogRegWrites, Reg31IsStackPointer);
+}
+
+void Simulator::SimulateMTEAddSubTag(const Instruction* instr) {
+  uint64_t rn = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
+  uint64_t rn_tag = GetAllocationTagFromAddress(rn);
+  uint64_t tag_offset = instr->ExtractBits(13, 10);
+  // TODO: implement GCR_EL1.Exclude to provide a tag exclusion list.
+  uint64_t new_tag = ChooseNonExcludedTag(rn_tag, tag_offset);
+
+  uint64_t offset = instr->ExtractBits(21, 16) * kMTETagGranuleInBytes;
+  int carry = 0;
+  if (form_hash_ == Hash("subg_64_addsub_immtags")) {
+    offset = ~offset;
+    carry = 1;
+  } else {
+    VIXL_ASSERT(form_hash_ == Hash("addg_64_addsub_immtags"));
+  }
+  uint64_t new_val =
+      AddWithCarry(kXRegSize, /* set_flags = */ false, rn, offset, carry);
+  new_val = GetAddressWithAllocationTag(new_val, new_tag);
+  WriteXRegister(instr->GetRd(), new_val, LogRegWrites, Reg31IsStackPointer);
+}
+
+void Simulator::SimulateMTETagMaskInsert(const Instruction* instr) {
+  VIXL_ASSERT(form_hash_ == Hash("gmi_64g_dp_2src"));
+  uint64_t mask = ReadXRegister(instr->GetRm());
+  uint64_t tag = GetAllocationTagFromAddress(
+      ReadXRegister(instr->GetRn(), Reg31IsStackPointer));
+  uint64_t mask_bit = uint64_t{1} << tag;
+  WriteXRegister(instr->GetRd(), mask | mask_bit);
+}
+
+void Simulator::SimulateMTESubPointer(const Instruction* instr) {
+  uint64_t rn = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
+  uint64_t rm = ReadXRegister(instr->GetRm(), Reg31IsStackPointer);
+
+  VIXL_ASSERT((form_hash_ == Hash("subps_64s_dp_2src")) ||
+              (form_hash_ == Hash("subp_64s_dp_2src")));
+  bool set_flags = (form_hash_ == Hash("subps_64s_dp_2src"));
+
+  rn = ExtractSignedBitfield64(55, 0, rn);
+  rm = ExtractSignedBitfield64(55, 0, rm);
+  uint64_t new_val = AddWithCarry(kXRegSize, set_flags, rn, ~rm, 1);
+  WriteXRegister(instr->GetRd(), new_val);
+}
+
+void Simulator::SimulateMTEStoreTagPair(const Instruction* instr) {
+  uint64_t rn = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
+  uint64_t rt = ReadXRegister(instr->GetRt());
+  uint64_t rt2 = ReadXRegister(instr->GetRt2());
+  int offset = instr->GetImmLSPair() * static_cast<int>(kMTETagGranuleInBytes);
+
+  AddrMode addr_mode = Offset;
+  switch (form_hash_) {
+    case Hash("stgp_64_ldstpair_off"):
+      // Default is the offset mode.
+      break;
+    case Hash("stgp_64_ldstpair_post"):
+      addr_mode = PostIndex;
+      break;
+    case Hash("stgp_64_ldstpair_pre"):
+      addr_mode = PreIndex;
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+  }
+
+  uintptr_t address = AddressModeHelper(instr->GetRn(), offset, addr_mode);
+  if (!IsAligned(address, kMTETagGranuleInBytes)) {
+    VIXL_ALIGNMENT_EXCEPTION();
+  }
+
+  int tag = GetAllocationTagFromAddress(rn);
+  meta_data_.SetMTETag(address, tag);
+
+  if (!MemWrite<uint64_t>(address, rt)) return;
+  if (!MemWrite<uint64_t>(address + kXRegSizeInBytes, rt2)) return;
+}
+
+void Simulator::SimulateMTEStoreTag(const Instruction* instr) {
+  uint64_t rt = ReadXRegister(instr->GetRt(), Reg31IsStackPointer);
+  int offset = instr->GetImmLS() * static_cast<int>(kMTETagGranuleInBytes);
+
+  AddrMode addr_mode = Offset;
+  switch (form_hash_) {
+    case Hash("st2g_64soffset_ldsttags"):
+    case Hash("stg_64soffset_ldsttags"):
+    case Hash("stz2g_64soffset_ldsttags"):
+    case Hash("stzg_64soffset_ldsttags"):
+      // Default is the offset mode.
+      break;
+    case Hash("st2g_64spost_ldsttags"):
+    case Hash("stg_64spost_ldsttags"):
+    case Hash("stz2g_64spost_ldsttags"):
+    case Hash("stzg_64spost_ldsttags"):
+      addr_mode = PostIndex;
+      break;
+    case Hash("st2g_64spre_ldsttags"):
+    case Hash("stg_64spre_ldsttags"):
+    case Hash("stz2g_64spre_ldsttags"):
+    case Hash("stzg_64spre_ldsttags"):
+      addr_mode = PreIndex;
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+  }
+
+  bool is_pair = false;
+  switch (form_hash_) {
+    case Hash("st2g_64soffset_ldsttags"):
+    case Hash("st2g_64spost_ldsttags"):
+    case Hash("st2g_64spre_ldsttags"):
+    case Hash("stz2g_64soffset_ldsttags"):
+    case Hash("stz2g_64spost_ldsttags"):
+    case Hash("stz2g_64spre_ldsttags"):
+      is_pair = true;
+      break;
+    default:
+      break;
+  }
+
+  bool is_zeroing = false;
+  switch (form_hash_) {
+    case Hash("stz2g_64soffset_ldsttags"):
+    case Hash("stz2g_64spost_ldsttags"):
+    case Hash("stz2g_64spre_ldsttags"):
+    case Hash("stzg_64soffset_ldsttags"):
+    case Hash("stzg_64spost_ldsttags"):
+    case Hash("stzg_64spre_ldsttags"):
+      is_zeroing = true;
+      break;
+    default:
+      break;
+  }
+
+  uintptr_t address = AddressModeHelper(instr->GetRn(), offset, addr_mode);
+
+  if (is_zeroing) {
+    if (!IsAligned(address, kMTETagGranuleInBytes)) {
+      VIXL_ALIGNMENT_EXCEPTION();
+    }
+    VIXL_STATIC_ASSERT(kMTETagGranuleInBytes >= sizeof(uint64_t));
+    VIXL_STATIC_ASSERT(kMTETagGranuleInBytes % sizeof(uint64_t) == 0);
+
+    size_t fill_size = kMTETagGranuleInBytes;
+    if (is_pair) {
+      fill_size += kMTETagGranuleInBytes;
+    }
+
+    size_t fill_offset = 0;
+    while (fill_offset < fill_size) {
+      if (!MemWrite<uint64_t>(address + fill_offset, 0)) return;
+      fill_offset += sizeof(uint64_t);
+    }
+  }
+
+  int tag = GetAllocationTagFromAddress(rt);
+  meta_data_.SetMTETag(address, tag, instr);
+  if (is_pair) {
+    meta_data_.SetMTETag(address + kMTETagGranuleInBytes, tag, instr);
+  }
+}
+
+void Simulator::SimulateMTELoadTag(const Instruction* instr) {
+  uint64_t rt = ReadXRegister(instr->GetRt());
+  int offset = instr->GetImmLS() * static_cast<int>(kMTETagGranuleInBytes);
+
+  switch (form_hash_) {
+    case Hash("ldg_64loffset_ldsttags"):
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+  }
+
+  uintptr_t address = AddressModeHelper(instr->GetRn(), offset, Offset);
+  address = AlignDown(address, kMTETagGranuleInBytes);
+  uint64_t tag = meta_data_.GetMTETag(address, instr);
+  WriteXRegister(instr->GetRt(), GetAddressWithAllocationTag(rt, tag));
+}
+
+void Simulator::SimulateCpyFP(const Instruction* instr) {
+  MOPSPHelper<"cpy"_h>(instr);
+  LogSystemRegister(NZCV);
+}
+
+void Simulator::SimulateCpyP(const Instruction* instr) {
+  MOPSPHelper<"cpy"_h>(instr);
+
+  int d = instr->GetRd();
+  int n = instr->GetRn();
+  int s = instr->GetRs();
+
+  // Determine copy direction. For cases in which direction is implementation
+  // defined, use forward.
+  bool is_backwards = false;
+  uint64_t xs = ReadXRegister(s);
+  uint64_t xd = ReadXRegister(d);
+  uint64_t xn = ReadXRegister(n);
+
+  // Ignore the top byte of addresses for comparisons. We can use xn as is,
+  // as it should have zero in bits 63:55.
+  uint64_t xs_tbi = ExtractUnsignedBitfield64(55, 0, xs);
+  uint64_t xd_tbi = ExtractUnsignedBitfield64(55, 0, xd);
+  VIXL_ASSERT(ExtractUnsignedBitfield64(63, 55, xn) == 0);
+  if ((xs_tbi < xd_tbi) && ((xs_tbi + xn) > xd_tbi)) {
+    is_backwards = true;
+    WriteXRegister(s, xs + xn);
+    WriteXRegister(d, xd + xn);
+  }
+
+  ReadNzcv().SetN(is_backwards ? 1 : 0);
+  LogSystemRegister(NZCV);
+}
+
+void Simulator::SimulateCpyM(const Instruction* instr) {
+  VIXL_ASSERT(instr->IsConsistentMOPSTriplet<"cpy"_h>());
+  VIXL_ASSERT(instr->IsMOPSMainOf(GetLastExecutedInstruction(), "cpy"_h));
+
+  int d = instr->GetRd();
+  int n = instr->GetRn();
+  int s = instr->GetRs();
+
+  uint64_t xd = ReadXRegister(d);
+  uint64_t xn = ReadXRegister(n);
+  uint64_t xs = ReadXRegister(s);
+  bool is_backwards = ReadN();
+
+  int step = 1;
+  if (is_backwards) {
+    step = -1;
+    xs--;
+    xd--;
+  }
+
+  while (xn--) {
+    VIXL_DEFINE_OR_RETURN(temp, MemRead<uint8_t>(xs));
+    if (!MemWrite<uint8_t>(xd, temp)) return;
+    LogMemTransfer(xd, xs, temp);
+    xs += step;
+    xd += step;
+  }
+
+  if (is_backwards) {
+    xs++;
+    xd++;
+  }
+
+  WriteXRegister(d, xd);
+  WriteXRegister(n, 0);
+  WriteXRegister(s, xs);
+}
+
+void Simulator::SimulateCpyE(const Instruction* instr) {
+  USE(instr);
+  VIXL_ASSERT(instr->IsConsistentMOPSTriplet<"cpy"_h>());
+  VIXL_ASSERT(instr->IsMOPSEpilogueOf(GetLastExecutedInstruction(), "cpy"_h));
+  // This implementation does nothing in the epilogue; all copying is completed
+  // in the "main" part.
+}
+
+void Simulator::SimulateSetP(const Instruction* instr) {
+  MOPSPHelper<"set"_h>(instr);
+  LogSystemRegister(NZCV);
+}
+
+void Simulator::SimulateSetM(const Instruction* instr) {
+  VIXL_ASSERT(instr->IsConsistentMOPSTriplet<"set"_h>());
+  VIXL_ASSERT(instr->IsMOPSMainOf(GetLastExecutedInstruction(), "set"_h));
+
+  uint64_t xd = ReadXRegister(instr->GetRd());
+  uint64_t xn = ReadXRegister(instr->GetRn());
+  uint64_t xs = ReadXRegister(instr->GetRs());
+
+  while (xn--) {
+    LogWrite(instr->GetRs(), GetPrintRegPartial(kPrintRegLaneSizeB), xd);
+    if (!MemWrite<uint8_t>(xd++, static_cast<uint8_t>(xs))) return;
+  }
+  WriteXRegister(instr->GetRd(), xd);
+  WriteXRegister(instr->GetRn(), 0);
+}
+
+void Simulator::SimulateSetE(const Instruction* instr) {
+  USE(instr);
+  VIXL_ASSERT(instr->IsConsistentMOPSTriplet<"set"_h>());
+  VIXL_ASSERT(instr->IsMOPSEpilogueOf(GetLastExecutedInstruction(), "set"_h));
+  // This implementation does nothing in the epilogue; all setting is completed
+  // in the "main" part.
+}
+
+void Simulator::SimulateSetGP(const Instruction* instr) {
+  MOPSPHelper<"setg"_h>(instr);
+
+  uint64_t xd = ReadXRegister(instr->GetRd());
+  uint64_t xn = ReadXRegister(instr->GetRn());
+
+  if ((xn > 0) && !IsAligned(xd, kMTETagGranuleInBytes)) {
+    VIXL_ALIGNMENT_EXCEPTION();
+  }
+
+  if (!IsAligned(xn, kMTETagGranuleInBytes)) {
+    VIXL_ALIGNMENT_EXCEPTION();
+  }
+
+  LogSystemRegister(NZCV);
+}
+
+void Simulator::SimulateSetGM(const Instruction* instr) {
+  uint64_t xd = ReadXRegister(instr->GetRd());
+  uint64_t xn = ReadXRegister(instr->GetRn());
+
+  int tag = GetAllocationTagFromAddress(xd);
+  while (xn) {
+    meta_data_.SetMTETag(xd, tag);
+    xd += 16;
+    xn -= 16;
+  }
+  SimulateSetM(instr);
+}
 
 void Simulator::DoTrace(const Instruction* instr) {
   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
@@ -13989,22 +15141,46 @@
   VIXL_STATIC_ASSERT(kRuntimeCallAddressSize == sizeof(uintptr_t));
   // The appropriate `Simulator::SimulateRuntimeCall()` wrapper and the function
   // to call are passed inlined in the assembly.
-  uintptr_t call_wrapper_address =
-      MemRead<uintptr_t>(instr + kRuntimeCallWrapperOffset);
-  uintptr_t function_address =
-      MemRead<uintptr_t>(instr + kRuntimeCallFunctionOffset);
-  RuntimeCallType call_type = static_cast<RuntimeCallType>(
-      MemRead<uint32_t>(instr + kRuntimeCallTypeOffset));
+  VIXL_DEFINE_OR_RETURN(call_wrapper_address,
+                        MemRead<uintptr_t>(instr + kRuntimeCallWrapperOffset));
+  VIXL_DEFINE_OR_RETURN(function_address,
+                        MemRead<uintptr_t>(instr + kRuntimeCallFunctionOffset));
+  VIXL_DEFINE_OR_RETURN(call_type,
+                        MemRead<uint32_t>(instr + kRuntimeCallTypeOffset));
   auto runtime_call_wrapper =
       reinterpret_cast<void (*)(Simulator*, uintptr_t)>(call_wrapper_address);
 
-  if (call_type == kCallRuntime) {
-    WriteRegister(kLinkRegCode,
-                  instr->GetInstructionAtOffset(kRuntimeCallLength));
+  if (static_cast<RuntimeCallType>(call_type) == kCallRuntime) {
+    const Instruction* addr = instr->GetInstructionAtOffset(kRuntimeCallLength);
+    WriteLr(addr);
+    GCSPush(reinterpret_cast<uint64_t>(addr));
   }
   runtime_call_wrapper(this, function_address);
   // Read the return address from `lr` and write it into `pc`.
-  WritePc(ReadRegister<Instruction*>(kLinkRegCode));
+  uint64_t addr = ReadRegister<uint64_t>(kLinkRegCode);
+  if (IsGCSCheckEnabled()) {
+    uint64_t expected_lr = GCSPeek();
+    char msg[128];
+    if (expected_lr != 0) {
+      if ((expected_lr & 0x3) != 0) {
+        snprintf(msg,
+                 sizeof(msg),
+                 "GCS contains misaligned return address: 0x%016" PRIx64 "\n",
+                 expected_lr);
+        ReportGCSFailure(msg);
+      } else if ((addr != 0) && (addr != expected_lr)) {
+        snprintf(msg,
+                 sizeof(msg),
+                 "GCS mismatch: lr = 0x%016" PRIx64 ", gcs = 0x%016" PRIx64
+                 "\n",
+                 addr,
+                 expected_lr);
+        ReportGCSFailure(msg);
+      }
+      GCSPop();
+    }
+  }
+  WritePc(reinterpret_cast<Instruction*>(addr));
 }
 #else
 void Simulator::DoRuntimeCall(const Instruction* instr) {
@@ -14029,7 +15205,7 @@
   // Read the kNone-terminated list of features.
   CPUFeatures parameters;
   while (true) {
-    ElementType feature = MemRead<ElementType>(instr + offset);
+    VIXL_DEFINE_OR_RETURN(feature, MemRead<ElementType>(instr + offset));
     offset += element_size;
     if (feature == static_cast<ElementType>(CPUFeatures::kNone)) break;
     parameters.Combine(static_cast<CPUFeatures::Feature>(feature));
@@ -14072,6 +15248,40 @@
   saved_cpu_features_.pop_back();
 }
 
+#ifdef VIXL_HAS_SIMULATED_MMAP
+void* Simulator::Mmap(
+    void* address, size_t length, int prot, int flags, int fd, off_t offset) {
+  // The underlying system `mmap` in the simulated environment doesn't recognize
+  // PROT_BTI and PROT_MTE. Although the kernel probably just ignores the bits
+  // it doesn't know, mask those protections out before calling is safer.
+  int intenal_prot = prot;
+  prot &= ~(PROT_BTI | PROT_MTE);
+
+  uint64_t address2 = reinterpret_cast<uint64_t>(
+      mmap(address, length, prot, flags, fd, offset));
+
+  if (intenal_prot & PROT_MTE) {
+    // The returning address of `mmap` isn't tagged.
+    int tag = static_cast<int>(GenerateRandomTag());
+    SetGranuleTag(address2, tag, length);
+    address2 = GetAddressWithAllocationTag(address2, tag);
+  }
+
+  return reinterpret_cast<void*>(address2);
+}
+
+
+int Simulator::Munmap(void* address, size_t length, int prot) {
+  if (prot & PROT_MTE) {
+    // Untag the address since `munmap` doesn't recognize the memory tagging
+    // managed by the Simulator.
+    address = AddressUntag(address);
+    CleanGranuleTag(reinterpret_cast<char*>(address), length);
+  }
+
+  return munmap(address, length);
+}
+#endif  // VIXL_HAS_SIMULATED_MMAP
 
 }  // namespace aarch64
 }  // namespace vixl
diff --git a/src/aarch64/simulator-aarch64.h b/src/aarch64/simulator-aarch64.h
index 1fdbb6f..2483fd7 100644
--- a/src/aarch64/simulator-aarch64.h
+++ b/src/aarch64/simulator-aarch64.h
@@ -28,15 +28,18 @@
 #define VIXL_AARCH64_SIMULATOR_AARCH64_H_
 
 #include <memory>
+#include <mutex>
+#include <random>
 #include <unordered_map>
 #include <vector>
 
+#include "../cpu-features.h"
 #include "../globals-vixl.h"
 #include "../utils-vixl.h"
 
-#include "cpu-features.h"
 #include "abi-aarch64.h"
 #include "cpu-features-auditor-aarch64.h"
+#include "debugger-aarch64.h"
 #include "disasm-aarch64.h"
 #include "instructions-aarch64.h"
 #include "simulator-constants-aarch64.h"
@@ -53,9 +56,42 @@
 #endif
 #endif
 
+// The hosts that Simulator running on may not have these flags defined.
+#ifndef PROT_BTI
+#define PROT_BTI 0x10
+#endif
+#ifndef PROT_MTE
+#define PROT_MTE 0x20
+#endif
+
 namespace vixl {
 namespace aarch64 {
 
+class Simulator;
+struct RuntimeCallStructHelper;
+
+enum class MemoryAccessResult { Success = 0, Failure = 1 };
+
+// Try to access a piece of memory at the given address. Accessing that memory
+// might raise a signal which, if handled by a custom signal handler, should
+// setup the native and simulated context in order to continue. Return whether
+// the memory access failed (i.e: raised a signal) or succeeded.
+MemoryAccessResult TryMemoryAccess(uintptr_t address, uintptr_t access_size);
+
+#ifdef VIXL_ENABLE_IMPLICIT_CHECKS
+// Access a byte of memory from the address at the given offset. If the memory
+// could be accessed then return MemoryAccessResult::Success. If the memory
+// could not be accessed, and therefore raised a signal, setup the simulated
+// context and return MemoryAccessResult::Failure.
+//
+// If a signal is raised then it is expected that the signal handler will place
+// MemoryAccessResult::Failure in the native return register and the address of
+// _vixl_internal_AccessMemory_continue into the native instruction pointer.
+extern "C" MemoryAccessResult _vixl_internal_ReadMemory(uintptr_t address,
+                                                        uintptr_t offset);
+extern "C" uintptr_t _vixl_internal_AccessMemory_continue();
+#endif  // VIXL_ENABLE_IMPLICIT_CHECKS
+
 class SimStack {
  public:
   SimStack() {}
@@ -124,7 +160,7 @@
 
   // Allocate the stack, locking the parameters.
   Allocated Allocate() {
-    size_t align_to = 1 << align_log2_;
+    size_t align_to = uint64_t{1} << align_log2_;
     size_t l = AlignUp(limit_guard_size_, align_to);
     size_t u = AlignUp(usable_size_, align_to);
     size_t b = AlignUp(base_guard_size_, align_to);
@@ -153,51 +189,249 @@
   static const size_t kDefaultUsableSize = 8 * 1024;
 };
 
+// Armv8.5 MTE helpers.
+inline int GetAllocationTagFromAddress(uint64_t address) {
+  return static_cast<int>(ExtractUnsignedBitfield64(59, 56, address));
+}
+
+template <typename T>
+T AddressUntag(T address) {
+  // Cast the address using a C-style cast. A reinterpret_cast would be
+  // appropriate, but it can't cast one integral type to another.
+  uint64_t bits = (uint64_t)address;
+  return (T)(bits & ~kAddressTagMask);
+}
+
+// A callback function, called when a function has been intercepted if a
+// BranchInterception entry exists in branch_interceptions. The address of
+// the intercepted function is passed to the callback. For usage see
+// BranchInterception.
+using InterceptionCallback = std::function<void(uint64_t)>;
+
+class MetaDataDepot {
+ public:
+  class MetaDataMTE {
+   public:
+    explicit MetaDataMTE(int tag) : tag_(tag) {}
+
+    int GetTag() const { return tag_; }
+    void SetTag(int tag) {
+      VIXL_ASSERT(IsUint4(tag));
+      tag_ = tag;
+    }
+
+    static bool IsActive() { return is_active; }
+    static void SetActive(bool value) { is_active = value; }
+
+   private:
+    static bool is_active;
+    int16_t tag_;
+
+    friend class MetaDataDepot;
+  };
+
+  // Generate a key for metadata recording from a untagged address.
+  template <typename T>
+  uint64_t GenerateMTEkey(T address) const {
+    // Cast the address using a C-style cast. A reinterpret_cast would be
+    // appropriate, but it can't cast one integral type to another.
+    return (uint64_t)(AddressUntag(address)) >> kMTETagGranuleInBytesLog2;
+  }
+
+  template <typename R, typename T>
+  R GetAttribute(T map, uint64_t key) {
+    auto pair = map->find(key);
+    R value = (pair == map->end()) ? nullptr : &pair->second;
+    return value;
+  }
+
+  template <typename T>
+  int GetMTETag(T address, Instruction const* pc = nullptr) {
+    uint64_t key = GenerateMTEkey(address);
+    MetaDataMTE* m = GetAttribute<MetaDataMTE*>(&metadata_mte_, key);
+
+    if (!m) {
+      std::stringstream sstream;
+      sstream << std::hex << "MTE ERROR : instruction at 0x"
+              << reinterpret_cast<uint64_t>(pc)
+              << " touched a unallocated memory location 0x"
+              << (uint64_t)(address) << ".\n";
+      VIXL_ABORT_WITH_MSG(sstream.str().c_str());
+    }
+
+    return m->GetTag();
+  }
+
+  template <typename T>
+  void SetMTETag(T address, int tag, Instruction const* pc = nullptr) {
+    VIXL_ASSERT(IsAligned((uintptr_t)address, kMTETagGranuleInBytes));
+    uint64_t key = GenerateMTEkey(address);
+    MetaDataMTE* m = GetAttribute<MetaDataMTE*>(&metadata_mte_, key);
+
+    if (!m) {
+      metadata_mte_.insert({key, MetaDataMTE(tag)});
+    } else {
+      // Overwrite
+      if (m->GetTag() == tag) {
+        std::stringstream sstream;
+        sstream << std::hex << "MTE WARNING : instruction at 0x"
+                << reinterpret_cast<uint64_t>(pc)
+                << ", the same tag is assigned to the address 0x"
+                << (uint64_t)(address) << ".\n";
+        VIXL_WARNING(sstream.str().c_str());
+      }
+      m->SetTag(tag);
+    }
+  }
+
+  template <typename T>
+  size_t CleanMTETag(T address) {
+    VIXL_ASSERT(
+        IsAligned(reinterpret_cast<uintptr_t>(address), kMTETagGranuleInBytes));
+    uint64_t key = GenerateMTEkey(address);
+    return metadata_mte_.erase(key);
+  }
+
+  size_t GetTotalCountMTE() { return metadata_mte_.size(); }
+
+  // A pure virtual struct that allows the templated BranchInterception struct
+  // to be stored. For more information see BranchInterception.
+  struct BranchInterceptionAbstract {
+    virtual ~BranchInterceptionAbstract() {}
+    // Call the callback_ if one exists, otherwise do a RuntimeCall.
+    virtual void operator()(Simulator* simulator) const = 0;
+  };
+
+  // An entry denoting a function to intercept when branched to during
+  // simulator execution. When a function is intercepted the callback will be
+  // called if one exists otherwise the function will be passed to
+  // RuntimeCall.
+  template <typename R, typename... P>
+  struct BranchInterception : public BranchInterceptionAbstract {
+    BranchInterception(R (*function)(P...),
+                       InterceptionCallback callback = nullptr)
+        : function_(function), callback_(callback) {}
+
+    void operator()(Simulator* simulator) const VIXL_OVERRIDE;
+
+   private:
+    // Pointer to the function that will be intercepted.
+    R (*function_)(P...);
+
+    // Function to be called instead of function_
+    InterceptionCallback callback_;
+  };
+
+  // Register a new BranchInterception object. If 'function' is branched to
+  // (e.g: "blr function") in the future; instead, if provided, 'callback' will
+  // be called otherwise a runtime call will be performed on 'function'.
+  //
+  // For example: this can be used to always perform runtime calls on
+  // non-AArch64 functions without using the macroassembler.
+  //
+  // Note: only unconditional branches to registers are currently supported to
+  // be intercepted, e.g: "br"/"blr".
+  //
+  // TODO: support intercepting other branch types.
+  template <typename R, typename... P>
+  void RegisterBranchInterception(R (*function)(P...),
+                                  InterceptionCallback callback = nullptr) {
+    uintptr_t addr = reinterpret_cast<uintptr_t>(function);
+    std::unique_ptr<BranchInterceptionAbstract> intercept =
+        std::make_unique<BranchInterception<R, P...>>(function, callback);
+    branch_interceptions_.insert(std::make_pair(addr, std::move(intercept)));
+  }
+
+  // Search for branch interceptions to the branch_target address; If one is
+  // found return it otherwise return nullptr.
+  BranchInterceptionAbstract* FindBranchInterception(uint64_t branch_target) {
+    // Check for interceptions to the target address, if one is found, call it.
+    auto search = branch_interceptions_.find(branch_target);
+    if (search != branch_interceptions_.end()) {
+      return search->second.get();
+    } else {
+      return nullptr;
+    }
+  }
+
+  void ResetState() { branch_interceptions_.clear(); }
+
+ private:
+  // Tag recording of each allocated memory in the tag-granule.
+  std::unordered_map<uint64_t, class MetaDataMTE> metadata_mte_;
+
+  // Store a map of addresses to be intercepted and their corresponding branch
+  // interception object, see 'BranchInterception'.
+  std::unordered_map<uintptr_t, std::unique_ptr<BranchInterceptionAbstract>>
+      branch_interceptions_;
+};
+
+
 // Representation of memory, with typed getters and setters for access.
 class Memory {
  public:
-  explicit Memory(SimStack::Allocated stack) : stack_(std::move(stack)) {}
+  explicit Memory(SimStack::Allocated stack) : stack_(std::move(stack)) {
+    metadata_depot_ = nullptr;
+  }
 
   const SimStack::Allocated& GetStack() { return stack_; }
 
-  template <typename T>
-  T AddressUntag(T address) const {
-    // Cast the address using a C-style cast. A reinterpret_cast would be
-    // appropriate, but it can't cast one integral type to another.
-    uint64_t bits = (uint64_t)address;
-    return (T)(bits & ~kAddressTagMask);
+  template <typename A>
+  bool IsMTETagsMatched(A address, Instruction const* pc = nullptr) const {
+    if (MetaDataDepot::MetaDataMTE::IsActive()) {
+      // Cast the address using a C-style cast. A reinterpret_cast would be
+      // appropriate, but it can't cast one integral type to another.
+      uint64_t addr = (uint64_t)address;
+      int pointer_tag = GetAllocationTagFromAddress(addr);
+      int memory_tag = metadata_depot_->GetMTETag(AddressUntag(addr), pc);
+      return pointer_tag == memory_tag;
+    }
+    return true;
   }
 
   template <typename T, typename A>
-  T Read(A address) const {
+  std::optional<T> Read(A address, Instruction const* pc = nullptr) const {
     T value;
-    address = AddressUntag(address);
     VIXL_STATIC_ASSERT((sizeof(value) == 1) || (sizeof(value) == 2) ||
                        (sizeof(value) == 4) || (sizeof(value) == 8) ||
                        (sizeof(value) == 16));
-    auto base = reinterpret_cast<const char*>(address);
+    auto base = reinterpret_cast<const char*>(AddressUntag(address));
     if (stack_.IsAccessInGuardRegion(base, sizeof(value))) {
       VIXL_ABORT_WITH_MSG("Attempt to read from stack guard region");
     }
+    if (!IsMTETagsMatched(address, pc)) {
+      VIXL_ABORT_WITH_MSG("Tag mismatch.");
+    }
+    if (TryMemoryAccess(reinterpret_cast<uintptr_t>(base), sizeof(value)) ==
+        MemoryAccessResult::Failure) {
+      return std::nullopt;
+    }
     memcpy(&value, base, sizeof(value));
     return value;
   }
 
   template <typename T, typename A>
-  void Write(A address, T value) const {
-    address = AddressUntag(address);
+  bool Write(A address, T value, Instruction const* pc = nullptr) const {
     VIXL_STATIC_ASSERT((sizeof(value) == 1) || (sizeof(value) == 2) ||
                        (sizeof(value) == 4) || (sizeof(value) == 8) ||
                        (sizeof(value) == 16));
-    auto base = reinterpret_cast<char*>(address);
+    auto base = reinterpret_cast<char*>(AddressUntag(address));
     if (stack_.IsAccessInGuardRegion(base, sizeof(value))) {
       VIXL_ABORT_WITH_MSG("Attempt to write to stack guard region");
     }
+    if (!IsMTETagsMatched(address, pc)) {
+      VIXL_ABORT_WITH_MSG("Tag mismatch.");
+    }
+    if (TryMemoryAccess(reinterpret_cast<uintptr_t>(base), sizeof(value)) ==
+        MemoryAccessResult::Failure) {
+      return false;
+    }
     memcpy(base, &value, sizeof(value));
+    return true;
   }
 
   template <typename A>
-  uint64_t ReadUint(int size_in_bytes, A address) const {
+  std::optional<uint64_t> ReadUint(int size_in_bytes, A address) const {
     switch (size_in_bytes) {
       case 1:
         return Read<uint8_t>(address);
@@ -213,7 +447,7 @@
   }
 
   template <typename A>
-  int64_t ReadInt(int size_in_bytes, A address) const {
+  std::optional<int64_t> ReadInt(int size_in_bytes, A address) const {
     switch (size_in_bytes) {
       case 1:
         return Read<int8_t>(address);
@@ -229,7 +463,7 @@
   }
 
   template <typename A>
-  void Write(int size_in_bytes, A address, uint64_t value) const {
+  bool Write(int size_in_bytes, A address, uint64_t value) const {
     switch (size_in_bytes) {
       case 1:
         return Write(address, static_cast<uint8_t>(value));
@@ -241,10 +475,18 @@
         return Write(address, value);
     }
     VIXL_UNREACHABLE();
+    return false;
+  }
+
+  void AppendMetaData(MetaDataDepot* metadata_depot) {
+    VIXL_ASSERT(metadata_depot != nullptr);
+    VIXL_ASSERT(metadata_depot_ == nullptr);
+    metadata_depot_ = metadata_depot;
   }
 
  private:
   SimStack::Allocated stack_;
+  MetaDataDepot* metadata_depot_;
 };
 
 // Represent a register (r0-r31, v0-v31, z0-z31, p0-p15).
@@ -442,7 +684,7 @@
 
   void SetAllBits() {
     int chunk_size = sizeof(ChunkType) * kBitsPerByte;
-    ChunkType bits = GetUintMask(chunk_size);
+    ChunkType bits = static_cast<ChunkType>(GetUintMask(chunk_size));
     for (int lane = 0;
          lane < (static_cast<int>(register_.GetSizeInBits() / chunk_size));
          lane++) {
@@ -495,6 +737,8 @@
   SimPRegister& register_;
 };
 
+using vixl_uint128_t = std::pair<uint64_t, uint64_t>;
+
 // Representation of a vector register, with typed getters and setters for lanes
 // and additional information to represent lane state.
 class LogicVRegister {
@@ -623,6 +867,16 @@
     }
   }
 
+  void SetUint(VectorFormat vform, int index, vixl_uint128_t value) const {
+    if (LaneSizeInBitsFromFormat(vform) <= 64) {
+      SetUint(vform, index, value.second);
+      return;
+    }
+    VIXL_ASSERT((vform == kFormat1Q) || (vform == kFormatVnQ));
+    SetUint(kFormatVnD, 2 * index, value.second);
+    SetUint(kFormatVnD, 2 * index + 1, value.first);
+  }
+
   void SetUintArray(VectorFormat vform, const uint64_t* src) const {
     ClearForWrite(vform);
     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
@@ -1026,6 +1280,10 @@
   uint32_t seed_;
 };
 
+class Debugger;
+
+template <uint32_t mode>
+uint64_t CryptoOp(uint64_t x, uint64_t y, uint64_t z);
 
 class Simulator : public DecoderVisitor {
  public:
@@ -1042,7 +1300,7 @@
 
 
 #if defined(VIXL_HAS_ABI_SUPPORT) && __cplusplus >= 201103L && \
-    (defined(__clang__) || GCC_VERSION_OR_NEWER(4, 9, 1))
+    (defined(_MSC_VER) || defined(__clang__) || GCC_VERSION_OR_NEWER(4, 9, 1))
   // Templated `RunFrom` version taking care of passing arguments and returning
   // the result value.
   // This allows code like:
@@ -1101,6 +1359,8 @@
   static const Instruction* kEndOfSimAddress;
 
   // Simulation helpers.
+  bool IsSimulationFinished() const { return pc_ == kEndOfSimAddress; }
+
   const Instruction* ReadPc() const { return pc_; }
   VIXL_DEPRECATED("ReadPc", const Instruction* pc() const) { return ReadPc(); }
 
@@ -1109,7 +1369,7 @@
   void WritePc(const Instruction* new_pc,
                BranchLogMode log_mode = LogBranches) {
     if (log_mode == LogBranches) LogTakenBranch(new_pc);
-    pc_ = memory_.AddressUntag(new_pc);
+    pc_ = AddressUntag(new_pc);
     pc_modified_ = true;
   }
   VIXL_DEPRECATED("WritePc", void set_pc(const Instruction* new_pc)) {
@@ -1136,6 +1396,8 @@
   bool PcIsInGuardedPage() const { return guard_pages_; }
   void SetGuardedPages(bool guard_pages) { guard_pages_ = guard_pages; }
 
+  const Instruction* GetLastExecutedInstruction() const { return last_instr_; }
+
   void ExecuteInstruction() {
     // The program counter should always be aligned.
     VIXL_ASSERT(IsWordAligned(pc_));
@@ -1241,6 +1503,7 @@
   void SimulateSVESaturatingMulAddHigh(const Instruction* instr);
   void SimulateSVESaturatingMulHighIndex(const Instruction* instr);
   void SimulateSVEFPConvertLong(const Instruction* instr);
+  void SimulateSVEPmull128(const Instruction* instr);
   void SimulateMatrixMul(const Instruction* instr);
   void SimulateSVEFPMatrixMul(const Instruction* instr);
   void SimulateNEONMulByElementLong(const Instruction* instr);
@@ -1248,6 +1511,30 @@
   void SimulateNEONFPMulByElementLong(const Instruction* instr);
   void SimulateNEONComplexMulByElement(const Instruction* instr);
   void SimulateNEONDotProdByElement(const Instruction* instr);
+  void SimulateNEONSHA3(const Instruction* instr);
+  void SimulateMTEAddSubTag(const Instruction* instr);
+  void SimulateMTETagMaskInsert(const Instruction* instr);
+  void SimulateMTESubPointer(const Instruction* instr);
+  void SimulateMTELoadTag(const Instruction* instr);
+  void SimulateMTEStoreTag(const Instruction* instr);
+  void SimulateMTEStoreTagPair(const Instruction* instr);
+  void Simulate_XdSP_XnSP_Xm(const Instruction* instr);
+  void SimulateCpy(const Instruction* instr);
+  void SimulateCpyFP(const Instruction* instr);
+  void SimulateCpyP(const Instruction* instr);
+  void SimulateCpyM(const Instruction* instr);
+  void SimulateCpyE(const Instruction* instr);
+  void SimulateSetP(const Instruction* instr);
+  void SimulateSetM(const Instruction* instr);
+  void SimulateSetE(const Instruction* instr);
+  void SimulateSetGP(const Instruction* instr);
+  void SimulateSetGM(const Instruction* instr);
+  void SimulateSignedMinMax(const Instruction* instr);
+  void SimulateUnsignedMinMax(const Instruction* instr);
+  void SimulateSHA512(const Instruction* instr);
+
+  void VisitCryptoSM3(const Instruction* instr);
+  void VisitCryptoSM4(const Instruction* instr);
 
   // Integer register accessors.
 
@@ -1778,60 +2065,66 @@
   }
 
   template <typename T, typename A>
-  T MemRead(A address) const {
-    return memory_.Read<T>(address);
+  std::optional<T> MemRead(A address) const {
+    Instruction const* pc = ReadPc();
+    return memory_.Read<T>(address, pc);
   }
 
   template <typename T, typename A>
-  void MemWrite(A address, T value) const {
-    return memory_.Write(address, value);
+  bool MemWrite(A address, T value) const {
+    Instruction const* pc = ReadPc();
+    return memory_.Write(address, value, pc);
   }
 
   template <typename A>
-  uint64_t MemReadUint(int size_in_bytes, A address) const {
+  std::optional<uint64_t> MemReadUint(int size_in_bytes, A address) const {
     return memory_.ReadUint(size_in_bytes, address);
   }
 
   template <typename A>
-  int64_t MemReadInt(int size_in_bytes, A address) const {
+  std::optional<int64_t> MemReadInt(int size_in_bytes, A address) const {
     return memory_.ReadInt(size_in_bytes, address);
   }
 
   template <typename A>
-  void MemWrite(int size_in_bytes, A address, uint64_t value) const {
+  bool MemWrite(int size_in_bytes, A address, uint64_t value) const {
     return memory_.Write(size_in_bytes, address, value);
   }
 
-  void LoadLane(LogicVRegister dst,
+  bool LoadLane(LogicVRegister dst,
                 VectorFormat vform,
                 int index,
                 uint64_t addr) const {
     unsigned msize_in_bytes = LaneSizeInBytesFromFormat(vform);
-    LoadUintToLane(dst, vform, msize_in_bytes, index, addr);
+    return LoadUintToLane(dst, vform, msize_in_bytes, index, addr);
   }
 
-  void LoadUintToLane(LogicVRegister dst,
+  bool LoadUintToLane(LogicVRegister dst,
                       VectorFormat vform,
                       unsigned msize_in_bytes,
                       int index,
                       uint64_t addr) const {
-    dst.SetUint(vform, index, MemReadUint(msize_in_bytes, addr));
+    VIXL_DEFINE_OR_RETURN_FALSE(value, MemReadUint(msize_in_bytes, addr));
+    dst.SetUint(vform, index, value);
+    return true;
   }
 
-  void LoadIntToLane(LogicVRegister dst,
+  bool LoadIntToLane(LogicVRegister dst,
                      VectorFormat vform,
                      unsigned msize_in_bytes,
                      int index,
                      uint64_t addr) const {
-    dst.SetInt(vform, index, MemReadInt(msize_in_bytes, addr));
+    VIXL_DEFINE_OR_RETURN_FALSE(value, MemReadInt(msize_in_bytes, addr));
+    dst.SetInt(vform, index, value);
+    return true;
   }
 
-  void StoreLane(const LogicVRegister& src,
+  bool StoreLane(const LogicVRegister& src,
                  VectorFormat vform,
                  int index,
                  uint64_t addr) const {
     unsigned msize_in_bytes = LaneSizeInBytesFromFormat(vform);
-    MemWrite(msize_in_bytes, addr, src.Uint(vform, index));
+    return MemWrite(msize_in_bytes, addr, src.Uint(vform, index));
   }
 
   uint64_t ComputeMemOperandAddress(const MemOperand& mem_op) const;
@@ -1842,12 +2135,14 @@
       return ReadCPURegister<T>(operand.GetCPURegister());
     } else {
       VIXL_ASSERT(operand.IsMemOperand());
-      return MemRead<T>(ComputeMemOperandAddress(operand.GetMemOperand()));
+      auto res = MemRead<T>(ComputeMemOperandAddress(operand.GetMemOperand()));
+      VIXL_ASSERT(res);
+      return *res;
     }
   }
 
   template <typename T>
-  void WriteGenericOperand(GenericOperand operand,
+  bool WriteGenericOperand(GenericOperand operand,
                            T value,
                            RegLogMode log_mode = LogRegWrites) {
     if (operand.IsCPURegister()) {
@@ -1863,8 +2158,9 @@
       WriteCPURegister(operand.GetCPURegister(), raw, log_mode);
     } else {
       VIXL_ASSERT(operand.IsMemOperand());
-      MemWrite(ComputeMemOperandAddress(operand.GetMemOperand()), value);
+      return MemWrite(ComputeMemOperandAddress(operand.GetMemOperand()), value);
     }
+    return true;
   }
 
   bool ReadN() const { return nzcv_.GetN() != 0; }
@@ -2240,12 +2536,16 @@
   // Other state updates, including system registers.
   void PrintSystemRegister(SystemRegister id);
   void PrintTakenBranch(const Instruction* target);
+  void PrintGCS(bool is_push, uint64_t addr, size_t entry);
   void LogSystemRegister(SystemRegister id) {
     if (ShouldTraceSysRegs()) PrintSystemRegister(id);
   }
   void LogTakenBranch(const Instruction* target) {
     if (ShouldTraceBranches()) PrintTakenBranch(target);
   }
+  void LogGCS(bool is_push, uint64_t addr, size_t entry) {
+    if (ShouldTraceSysRegs()) PrintGCS(is_push, addr, entry);
+  }
 
   // Trace memory accesses.
 
@@ -2275,6 +2575,14 @@
   void PrintPWrite(int rt_code, uintptr_t address) {
     PrintPAccess(rt_code, "->", address);
   }
+  void PrintWriteU64(uint64_t x, uintptr_t address) {
+    fprintf(stream_,
+            "#      0x%016" PRIx64 " -> %s0x%016" PRIxPTR "%s\n",
+            x,
+            clr_memory_address,
+            address,
+            clr_normal);
+  }
 
   // Like Print* (above), but respect GetTraceParameters().
   void LogRead(int rt_code, PrintRegisterFormat format, uintptr_t address) {
@@ -2309,7 +2617,12 @@
   void LogPWrite(int rt_code, uintptr_t address) {
     if (ShouldTraceWrites()) PrintPWrite(rt_code, address);
   }
-
+  void LogWriteU64(uint64_t x, uintptr_t address) {
+    if (ShouldTraceWrites()) PrintWriteU64(x, address);
+  }
+  void LogMemTransfer(uintptr_t dst, uintptr_t src, uint8_t value) {
+    if (ShouldTraceWrites()) PrintMemTransfer(dst, src, value);
+  }
   // Helpers for the above, where the access operation is parameterised.
   // - For loads, set op = "<-".
   // - For stores, set op = "->".
@@ -2321,6 +2634,7 @@
                     PrintRegisterFormat format,
                     const char* op,
                     uintptr_t address);
+  void PrintMemTransfer(uintptr_t dst, uintptr_t src, uint8_t value);
   // Simple, unpredicated SVE accesses always access the whole vector, and never
   // know the lane type, so these don't accept a `format`.
   void PrintZAccess(int rt_code, const char* op, uintptr_t address);
@@ -2588,6 +2902,51 @@
                    PointerType type);
   uint64_t AddPAC(uint64_t ptr, uint64_t context, PACKey key, PointerType type);
   uint64_t StripPAC(uint64_t ptr, PointerType type);
+  void PACHelper(int dst,
+                 int src,
+                 PACKey key,
+                 decltype(&Simulator::AddPAC) pac_fn);
+
+  // Armv8.5 MTE helpers.
+  uint64_t ChooseNonExcludedTag(uint64_t tag,
+                                uint64_t offset,
+                                uint64_t exclude = 0) {
+    VIXL_ASSERT(IsUint4(tag) && IsUint4(offset) && IsUint16(exclude));
+
+    if (exclude == 0xffff) {
+      return 0;
+    }
+
+    if (offset == 0) {
+      while ((exclude & (uint64_t{1} << tag)) != 0) {
+        tag = (tag + 1) % 16;
+      }
+    }
+
+    while (offset > 0) {
+      offset--;
+      tag = (tag + 1) % 16;
+      while ((exclude & (uint64_t{1} << tag)) != 0) {
+        tag = (tag + 1) % 16;
+      }
+    }
+    return tag;
+  }
+
+  uint64_t GetAddressWithAllocationTag(uint64_t addr, uint64_t tag) {
+    VIXL_ASSERT(IsUint4(tag));
+    return (addr & ~(UINT64_C(0xf) << 56)) | (tag << 56);
+  }
+
+#if __linux__
+#define VIXL_HAS_SIMULATED_MMAP
+  // Create or remove a mapping with memory protection. Memory attributes such
+  // as MTE and BTI are represented by metadata in Simulator.
+  void* Mmap(
+      void* address, size_t length, int prot, int flags, int fd, off_t offset);
+
+  int Munmap(void* address, size_t length, int prot);
+#endif
 
   // The common CPUFeatures interface with the set of available features.
 
@@ -2610,7 +2969,7 @@
 // Also, the initialisation of the tuples in RuntimeCall(Non)Void is incorrect
 // in GCC before 4.9.1: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=51253
 #if defined(VIXL_HAS_ABI_SUPPORT) && __cplusplus >= 201103L && \
-    (defined(__clang__) || GCC_VERSION_OR_NEWER(4, 9, 1))
+    (defined(_MSC_VER) || defined(__clang__) || GCC_VERSION_OR_NEWER(4, 9, 1))
 
 #define VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT
 
@@ -2656,6 +3015,7 @@
   R DoRuntimeCall(R (*function)(P...),
                   std::tuple<P...> arguments,
                   local_index_sequence<I...>) {
+    USE(arguments);
     return function(std::get<I>(arguments)...);
   }
 
@@ -2667,7 +3027,10 @@
     R return_value = DoRuntimeCall(function,
                                    argument_operands,
                                    __local_index_sequence_for<P...>{});
-    WriteGenericOperand(abi.GetReturnGenericOperand<R>(), return_value);
+    bool succeeded =
+        WriteGenericOperand(abi.GetReturnGenericOperand<R>(), return_value);
+    USE(succeeded);
+    VIXL_ASSERT(succeeded);
   }
 
   template <typename R, typename... P>
@@ -2786,6 +3149,106 @@
 
   SimPRegister& GetPTrue() { return pregister_all_true_; }
 
+  template <typename T>
+  size_t CleanGranuleTag(T address, size_t length = kMTETagGranuleInBytes) {
+    size_t count = 0;
+    for (size_t offset = 0; offset < length; offset += kMTETagGranuleInBytes) {
+      count +=
+          meta_data_.CleanMTETag(reinterpret_cast<uintptr_t>(address) + offset);
+    }
+    size_t expected =
+        length / kMTETagGranuleInBytes + (length % kMTETagGranuleInBytes != 0);
+
+    // Give a warning when the memory region that is being unmapped isn't all
+    // either MTE protected or not.
+    if (count != expected) {
+      std::stringstream sstream;
+      sstream << std::hex
+              << "MTE WARNING : the memory region being unmapped "
+                 "starting at address 0x"
+              << reinterpret_cast<uint64_t>(address)
+              << "is not fully MTE protected.\n";
+      VIXL_WARNING(sstream.str().c_str());
+    }
+    return count;
+  }
+
+  template <typename T>
+  void SetGranuleTag(T address,
+                     int tag,
+                     size_t length = kMTETagGranuleInBytes) {
+    for (size_t offset = 0; offset < length; offset += kMTETagGranuleInBytes) {
+      meta_data_.SetMTETag((uintptr_t)(address) + offset, tag);
+    }
+  }
+
+  template <typename T>
+  int GetGranuleTag(T address) {
+    return meta_data_.GetMTETag(address);
+  }
+
+  // Generate a random address tag, and any tags specified in the input are
+  // excluded from the selection.
+  uint64_t GenerateRandomTag(uint16_t exclude = 0);
+
+  // Register a new BranchInterception object. If 'function' is branched to
+  // (e.g: "bl function") in the future; instead, if provided, 'callback' will
+  // be called otherwise a runtime call will be performed on 'function'.
+  //
+  // For example: this can be used to always perform runtime calls on
+  // non-AArch64 functions without using the macroassembler.
+  template <typename R, typename... P>
+  void RegisterBranchInterception(R (*function)(P...),
+                                  InterceptionCallback callback = nullptr) {
+    meta_data_.RegisterBranchInterception(*function, callback);
+  }
+
+  // Return the current output stream in use by the simulator.
+  FILE* GetOutputStream() const { return stream_; }
+
+  bool IsDebuggerEnabled() const { return debugger_enabled_; }
+
+  void SetDebuggerEnabled(bool enabled) { debugger_enabled_ = enabled; }
+
+  Debugger* GetDebugger() const { return debugger_.get(); }
+
+#ifdef VIXL_ENABLE_IMPLICIT_CHECKS
+  // Returns true if the faulting instruction address (usually the program
+  // counter or instruction pointer) comes from an internal VIXL memory access.
+  // This can be used by signal handlers to check if a signal was raised from
+  // the simulator (via TryMemoryAccess) before the actual
+  // access occurs.
+  bool IsSimulatedMemoryAccess(uintptr_t fault_pc) const {
+    return (fault_pc ==
+            reinterpret_cast<uintptr_t>(&_vixl_internal_ReadMemory));
+  }
+
+  // Get the instruction address of the internal VIXL memory access continuation
+  // label. Signal handlers can resume execution at this address to return to
+  // TryMemoryAccess which will continue simulation.
+  uintptr_t GetSignalReturnAddress() const {
+    return reinterpret_cast<uintptr_t>(&_vixl_internal_AccessMemory_continue);
+  }
+
+  // Replace the fault address reported by the kernel with the actual faulting
+  // address.
+  //
+  // This is required because TryMemoryAccess reads a section of
+  // memory 1 byte at a time meaning the fault address reported may not be the
+  // base address of memory being accessed.
+  void ReplaceFaultAddress(siginfo_t* siginfo, void* context) {
+#ifdef __x86_64__
+    // The base address being accessed is passed in as the first argument to
+    // _vixl_internal_ReadMemory.
+    ucontext_t* uc = reinterpret_cast<ucontext_t*>(context);
+    siginfo->si_addr = reinterpret_cast<void*>(uc->uc_mcontext.gregs[REG_RDI]);
+#else
+    USE(siginfo);
+    USE(context);
+#endif  // __x86_64__
+  }
+#endif  // VIXL_ENABLE_IMPLICIT_CHECKS
+
  protected:
   const char* clr_normal;
   const char* clr_flag_name;
@@ -2866,8 +3329,9 @@
                                             uint64_t left,
                                             uint64_t right,
                                             int carry_in);
-  using vixl_uint128_t = std::pair<uint64_t, uint64_t>;
   vixl_uint128_t Add128(vixl_uint128_t x, vixl_uint128_t y);
+  vixl_uint128_t Lsl128(vixl_uint128_t x, unsigned shift) const;
+  vixl_uint128_t Eor128(vixl_uint128_t x, vixl_uint128_t y) const;
   vixl_uint128_t Mul64(uint64_t x, uint64_t y);
   vixl_uint128_t Neg128(vixl_uint128_t x);
   void LogicalHelper(const Instruction* instr, int64_t op2);
@@ -2897,13 +3361,45 @@
                                       AddrMode addr_mode);
   void NEONLoadStoreSingleStructHelper(const Instruction* instr,
                                        AddrMode addr_mode);
+  template <uint32_t mops_type>
+  void MOPSPHelper(const Instruction* instr) {
+    VIXL_ASSERT(instr->IsConsistentMOPSTriplet<mops_type>());
 
-  uint64_t AddressUntag(uint64_t address) { return address & ~kAddressTagMask; }
+    int d = instr->GetRd();
+    int n = instr->GetRn();
+    int s = instr->GetRs();
 
-  template <typename T>
-  T* AddressUntag(T* address) {
-    uintptr_t address_raw = reinterpret_cast<uintptr_t>(address);
-    return reinterpret_cast<T*>(AddressUntag(address_raw));
+    // Aliased registers and xzr are disallowed for Xd and Xn.
+    if ((d == n) || (d == s) || (n == s) || (d == 31) || (n == 31)) {
+      VisitUnallocated(instr);
+    }
+
+    // Additionally, Xs may not be xzr for cpy.
+    if ((mops_type == "cpy"_h) && (s == 31)) {
+      VisitUnallocated(instr);
+    }
+
+    // Bits 31 and 30 must be zero.
+    if (instr->ExtractBits(31, 30) != 0) {
+      VisitUnallocated(instr);
+    }
+
+    // Saturate copy count.
+    uint64_t xn = ReadXRegister(n);
+    int saturation_bits = (mops_type == "cpy"_h) ? 55 : 63;
+    if ((xn >> saturation_bits) != 0) {
+      xn = (UINT64_C(1) << saturation_bits) - 1;
+      if (mops_type == "setg"_h) {
+        // Align saturated value to granule.
+        xn &= ~UINT64_C(kMTETagGranuleInBytes - 1);
+      }
+      WriteXRegister(n, xn);
+    }
+
+    ReadNzcv().SetN(0);
+    ReadNzcv().SetZ(0);
+    ReadNzcv().SetC(1);  // Indicates "option B" implementation.
+    ReadNzcv().SetV(0);
   }
 
   int64_t ShiftOperand(unsigned reg_size,
@@ -2917,92 +3413,95 @@
   uint64_t PolynomialMult(uint64_t op1,
                           uint64_t op2,
                           int lane_size_in_bits) const;
+  vixl_uint128_t PolynomialMult128(uint64_t op1,
+                                   uint64_t op2,
+                                   int lane_size_in_bits) const;
 
-  void ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr);
-  void ld1(VectorFormat vform, LogicVRegister dst, int index, uint64_t addr);
-  void ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr);
-  void ld1r(VectorFormat vform,
+  bool ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr);
+  bool ld1(VectorFormat vform, LogicVRegister dst, int index, uint64_t addr);
+  bool ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr);
+  bool ld1r(VectorFormat vform,
             VectorFormat unpack_vform,
             LogicVRegister dst,
             uint64_t addr,
             bool is_signed = false);
-  void ld2(VectorFormat vform,
+  bool ld2(VectorFormat vform,
            LogicVRegister dst1,
            LogicVRegister dst2,
            uint64_t addr);
-  void ld2(VectorFormat vform,
+  bool ld2(VectorFormat vform,
            LogicVRegister dst1,
            LogicVRegister dst2,
            int index,
            uint64_t addr);
-  void ld2r(VectorFormat vform,
+  bool ld2r(VectorFormat vform,
             LogicVRegister dst1,
             LogicVRegister dst2,
             uint64_t addr);
-  void ld3(VectorFormat vform,
+  bool ld3(VectorFormat vform,
            LogicVRegister dst1,
            LogicVRegister dst2,
            LogicVRegister dst3,
            uint64_t addr);
-  void ld3(VectorFormat vform,
+  bool ld3(VectorFormat vform,
            LogicVRegister dst1,
            LogicVRegister dst2,
            LogicVRegister dst3,
            int index,
            uint64_t addr);
-  void ld3r(VectorFormat vform,
+  bool ld3r(VectorFormat vform,
             LogicVRegister dst1,
             LogicVRegister dst2,
             LogicVRegister dst3,
             uint64_t addr);
-  void ld4(VectorFormat vform,
+  bool ld4(VectorFormat vform,
            LogicVRegister dst1,
            LogicVRegister dst2,
            LogicVRegister dst3,
            LogicVRegister dst4,
            uint64_t addr);
-  void ld4(VectorFormat vform,
+  bool ld4(VectorFormat vform,
            LogicVRegister dst1,
            LogicVRegister dst2,
            LogicVRegister dst3,
            LogicVRegister dst4,
            int index,
            uint64_t addr);
-  void ld4r(VectorFormat vform,
+  bool ld4r(VectorFormat vform,
             LogicVRegister dst1,
             LogicVRegister dst2,
             LogicVRegister dst3,
             LogicVRegister dst4,
             uint64_t addr);
-  void st1(VectorFormat vform, LogicVRegister src, uint64_t addr);
-  void st1(VectorFormat vform, LogicVRegister src, int index, uint64_t addr);
-  void st2(VectorFormat vform,
+  bool st1(VectorFormat vform, LogicVRegister src, uint64_t addr);
+  bool st1(VectorFormat vform, LogicVRegister src, int index, uint64_t addr);
+  bool st2(VectorFormat vform,
            LogicVRegister src,
            LogicVRegister src2,
            uint64_t addr);
-  void st2(VectorFormat vform,
+  bool st2(VectorFormat vform,
            LogicVRegister src,
            LogicVRegister src2,
            int index,
            uint64_t addr);
-  void st3(VectorFormat vform,
+  bool st3(VectorFormat vform,
            LogicVRegister src,
            LogicVRegister src2,
            LogicVRegister src3,
            uint64_t addr);
-  void st3(VectorFormat vform,
+  bool st3(VectorFormat vform,
            LogicVRegister src,
            LogicVRegister src2,
            LogicVRegister src3,
            int index,
            uint64_t addr);
-  void st4(VectorFormat vform,
+  bool st4(VectorFormat vform,
            LogicVRegister src,
            LogicVRegister src2,
            LogicVRegister src3,
            LogicVRegister src4,
            uint64_t addr);
-  void st4(VectorFormat vform,
+  bool st4(VectorFormat vform,
            LogicVRegister src,
            LogicVRegister src2,
            LogicVRegister src3,
@@ -3288,6 +3787,10 @@
                      LogicVRegister dst,
                      const LogicVRegister& src,
                      int rotation);
+  LogicVRegister rol(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& src,
+                     int rotation);
   LogicVRegister ext(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& src1,
@@ -4012,6 +4515,95 @@
                          LogicVRegister srcdst,
                          const LogicVRegister& src1,
                          const LogicVRegister& src2);
+
+  template <unsigned N>
+  static void SHARotateEltsLeftOne(uint64_t (&x)[N]) {
+    VIXL_STATIC_ASSERT(N == 4);
+    uint64_t temp = x[3];
+    x[3] = x[2];
+    x[2] = x[1];
+    x[1] = x[0];
+    x[0] = temp;
+  }
+
+  template <uint32_t mode>
+  LogicVRegister sha1(LogicVRegister srcdst,
+                      const LogicVRegister& src1,
+                      const LogicVRegister& src2) {
+    uint64_t y = src1.Uint(kFormat4S, 0);
+    uint64_t sd[4] = {};
+    srcdst.UintArray(kFormat4S, sd);
+
+    for (unsigned i = 0; i < ArrayLength(sd); i++) {
+      uint64_t t = CryptoOp<mode>(sd[1], sd[2], sd[3]);
+
+      y += RotateLeft(sd[0], 5, kSRegSize) + t;
+      y += src2.Uint(kFormat4S, i);
+
+      sd[1] = RotateLeft(sd[1], 30, kSRegSize);
+
+      // y:sd = ROL(y:sd, 32)
+      SHARotateEltsLeftOne(sd);
+      std::swap(sd[0], y);
+    }
+
+    srcdst.SetUintArray(kFormat4S, sd);
+    return srcdst;
+  }
+
+  LogicVRegister sha2h(LogicVRegister srcdst,
+                       const LogicVRegister& src1,
+                       const LogicVRegister& src2,
+                       bool part1);
+  LogicVRegister sha2su0(LogicVRegister srcdst, const LogicVRegister& src1);
+  LogicVRegister sha2su1(LogicVRegister srcdst,
+                         const LogicVRegister& src1,
+                         const LogicVRegister& src2);
+  LogicVRegister sha512h(LogicVRegister srcdst,
+                         const LogicVRegister& src1,
+                         const LogicVRegister& src2);
+  LogicVRegister sha512h2(LogicVRegister srcdst,
+                          const LogicVRegister& src1,
+                          const LogicVRegister& src2);
+  LogicVRegister sha512su0(LogicVRegister srcdst, const LogicVRegister& src1);
+  LogicVRegister sha512su1(LogicVRegister srcdst,
+                           const LogicVRegister& src1,
+                           const LogicVRegister& src2);
+
+
+  LogicVRegister aes(LogicVRegister srcdst,
+                     const LogicVRegister& src1,
+                     bool decrypt);
+  LogicVRegister aesmix(LogicVRegister srcdst,
+                        const LogicVRegister& src1,
+                        bool inverse);
+
+  LogicVRegister sm3partw1(LogicVRegister dst,
+                           const LogicVRegister& src1,
+                           const LogicVRegister& src2);
+  LogicVRegister sm3partw2(LogicVRegister dst,
+                           const LogicVRegister& src1,
+                           const LogicVRegister& src2);
+  LogicVRegister sm3ss1(LogicVRegister dst,
+                        const LogicVRegister& src1,
+                        const LogicVRegister& src2,
+                        const LogicVRegister& src3);
+  LogicVRegister sm3tt1(LogicVRegister srcdst,
+                        const LogicVRegister& src1,
+                        const LogicVRegister& src2,
+                        int index,
+                        bool is_a);
+  LogicVRegister sm3tt2(LogicVRegister srcdst,
+                        const LogicVRegister& src1,
+                        const LogicVRegister& src2,
+                        int index,
+                        bool is_a);
+
+  LogicVRegister sm4(LogicVRegister dst,
+                     const LogicVRegister& src1,
+                     const LogicVRegister& src2,
+                     bool is_key);
+
 #define NEON_3VREG_LOGIC_LIST(V) \
   V(addhn)                       \
   V(addhn2)                      \
@@ -4425,7 +5017,7 @@
   uint32_t Crc32Checksum(uint32_t acc, T val, uint32_t poly);
   uint32_t Crc32Checksum(uint32_t acc, uint64_t val, uint32_t poly);
 
-  void SysOp_W(int op, int64_t val);
+  bool SysOp_W(int op, int64_t val);
 
   template <typename T>
   T FPRecipSqrtEstimate(T op);
@@ -4579,7 +5171,8 @@
                                 unsigned zt_code,
                                 const LogicSVEAddressVector& addr);
   // Load each active zt<i>[lane] from `addr.GetElementAddress(lane, ...)`.
-  void SVEStructuredLoadHelper(VectorFormat vform,
+  // Returns false if a load failed.
+  bool SVEStructuredLoadHelper(VectorFormat vform,
                                const LogicPRegister& pg,
                                unsigned zt_code,
                                const LogicSVEAddressVector& addr,
@@ -4737,7 +5330,7 @@
   const Instruction* pc_;
 
   // Pointer to the last simulated instruction, used for checking the validity
-  // of the current instruction with movprfx.
+  // of the current instruction with the previous instruction, such as movprfx.
   Instruction const* last_instr_;
 
   // Branch type register, used for branch target identification.
@@ -4777,10 +5370,12 @@
 
   bool CanReadMemory(uintptr_t address, size_t size);
 
+#ifndef _WIN32
   // CanReadMemory needs placeholder file descriptors, so we use a pipe. We can
   // save some system call overhead by opening them on construction, rather than
   // on every call to CanReadMemory.
   int placeholder_pipe_fd_[2];
+#endif
 
   template <typename T>
   static T FPDefaultNaN();
@@ -4859,14 +5454,186 @@
   CPUFeaturesAuditor cpu_features_auditor_;
   std::vector<CPUFeatures> saved_cpu_features_;
 
-  // State for *rand48 functions, used to simulate randomness with repeatable
+  // linear_congruential_engine, used to simulate randomness with repeatable
   // behaviour (so that tests are deterministic). This is used to simulate RNDR
   // and RNDRRS, as well as to simulate a source of entropy for architecturally
   // undefined behaviour.
-  uint16_t rand_state_[3];
+  std::linear_congruential_engine<uint64_t,
+                                  0x5DEECE66D,
+                                  0xB,
+                                  static_cast<uint64_t>(1) << 48>
+      rand_gen_;
 
   // A configurable size of SVE vector registers.
   unsigned vector_length_;
+
+  // DC ZVA enable (= 0) status and block size.
+  unsigned dczid_ = (0 << 4) | 4;  // 2^4 words => 64-byte block size.
+
+  // Representation of memory attributes such as MTE tagging and BTI page
+  // protection in addition to branch interceptions.
+  MetaDataDepot meta_data_;
+
+  // True if the debugger is enabled and might get entered.
+  bool debugger_enabled_;
+
+  // Debugger for the simulator.
+  std::unique_ptr<Debugger> debugger_;
+
+  // The Guarded Control Stack is represented using a vector, where the more
+  // recently stored addresses are at higher-numbered indices.
+  using GuardedControlStack = std::vector<uint64_t>;
+
+  // The GCSManager handles the synchronisation of GCS across multiple
+  // Simulator instances. Each Simulator has its own stack, but all share
+  // a GCSManager instance. This allows exchanging stacks between Simulators
+  // in a threaded application.
+  class GCSManager {
+   public:
+    // Allocate a new Guarded Control Stack and add it to the vector of stacks.
+    uint64_t AllocateStack() {
+      const std::lock_guard<std::mutex> lock(stacks_mtx_);
+
+      GuardedControlStack* new_stack = new GuardedControlStack;
+      uint64_t result;
+
+      // Put the new stack into the first available slot.
+      for (result = 0; result < stacks_.size(); result++) {
+        if (stacks_[result] == nullptr) {
+          stacks_[result] = new_stack;
+          break;
+        }
+      }
+
+      // If there were no slots, create a new one.
+      if (result == stacks_.size()) {
+        stacks_.push_back(new_stack);
+      }
+
+      // Shift the index to look like a stack pointer aligned to a page.
+      result <<= kPageSizeLog2;
+
+      // Push the tagged index onto the new stack as a seal.
+      new_stack->push_back(result + 1);
+      return result;
+    }
+
+    // Free a Guarded Control Stack and set the stacks_ slot to null.
+    void FreeStack(uint64_t gcs) {
+      const std::lock_guard<std::mutex> lock(stacks_mtx_);
+      uint64_t gcs_index = GetGCSIndex(gcs);
+      GuardedControlStack* gcsptr = stacks_[gcs_index];
+      if (gcsptr == nullptr) {
+        VIXL_ABORT_WITH_MSG("Tried to free unallocated GCS ");
+      } else {
+        delete gcsptr;
+        stacks_[gcs_index] = nullptr;
+      }
+    }
+
+    // Get a pointer to the GCS vector using a GCS id.
+    GuardedControlStack* GetGCSPtr(uint64_t gcs) const {
+      return stacks_[GetGCSIndex(gcs)];
+    }
+
+   private:
+    uint64_t GetGCSIndex(uint64_t gcs) const { return gcs >> 12; }
+
+    std::vector<GuardedControlStack*> stacks_;
+    std::mutex stacks_mtx_;
+  };
+
+  // A GCS id indicating no GCS has been allocated.
+  static const uint64_t kGCSNoStack = kPageSize - 1;
+  uint64_t gcs_;
+  bool gcs_enabled_;
+
+ public:
+  GCSManager& GetGCSManager() {
+    static GCSManager manager;
+    return manager;
+  }
+
+  void EnableGCSCheck() { gcs_enabled_ = true; }
+  void DisableGCSCheck() { gcs_enabled_ = false; }
+  bool IsGCSCheckEnabled() const { return gcs_enabled_; }
+
+ private:
+  bool IsAllocatedGCS(uint64_t gcs) const { return gcs != kGCSNoStack; }
+  void ResetGCSState() {
+    GCSManager& m = GetGCSManager();
+    if (IsAllocatedGCS(gcs_)) {
+      m.FreeStack(gcs_);
+    }
+    ActivateGCS(m.AllocateStack());
+    GCSPop();  // Remove seal.
+  }
+
+  GuardedControlStack* GetGCSPtr(uint64_t gcs) {
+    GCSManager& m = GetGCSManager();
+    GuardedControlStack* result = m.GetGCSPtr(gcs);
+    return result;
+  }
+  GuardedControlStack* GetActiveGCSPtr() { return GetGCSPtr(gcs_); }
+
+  uint64_t ActivateGCS(uint64_t gcs) {
+    uint64_t outgoing_gcs = gcs_;
+    gcs_ = gcs;
+    return outgoing_gcs;
+  }
+
+  void GCSPush(uint64_t addr) {
+    GetActiveGCSPtr()->push_back(addr);
+    size_t entry = GetActiveGCSPtr()->size() - 1;
+    LogGCS(/* is_push = */ true, addr, entry);
+  }
+
+  uint64_t GCSPop() {
+    GuardedControlStack* gcs = GetActiveGCSPtr();
+    if (gcs->empty()) {
+      return 0;
+    }
+    uint64_t return_addr = gcs->back();
+    size_t entry = gcs->size() - 1;
+    gcs->pop_back();
+    LogGCS(/* is_push = */ false, return_addr, entry);
+    return return_addr;
+  }
+
+  uint64_t GCSPeek() {
+    GuardedControlStack* gcs = GetActiveGCSPtr();
+    if (gcs->empty()) {
+      return 0;
+    }
+    uint64_t return_addr = gcs->back();
+    return return_addr;
+  }
+
+  void ReportGCSFailure(const char* msg) {
+    if (IsGCSCheckEnabled()) {
+      GuardedControlStack* gcs = GetActiveGCSPtr();
+      printf("%s", msg);
+      if (gcs == nullptr) {
+        printf("GCS pointer is null\n");
+      } else {
+        printf("GCS records, most recent first:\n");
+        int most_recent_index = static_cast<int>(gcs->size()) - 1;
+        for (int i = 0; i < 8; i++) {
+          if (!gcs->empty()) {
+            uint64_t entry = gcs->back();
+            gcs->pop_back();
+            int index = most_recent_index - i;
+            printf(" gcs%" PRIu64 "[%d]: 0x%016" PRIx64 "\n",
+                   gcs_,
+                   index,
+                   entry);
+          }
+        }
+        printf("End of GCS records.\n");
+      }
+      VIXL_ABORT_WITH_MSG("GCS failed ");
+    }
+  }
 };
 
 #if defined(VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT) && __cplusplus < 201402L
@@ -4877,6 +5644,17 @@
     : Simulator::emulated_index_sequence<I...> {};
 #endif
 
+template <typename R, typename... P>
+void MetaDataDepot::BranchInterception<R, P...>::operator()(
+    Simulator* simulator) const {
+  if (callback_ == nullptr) {
+    Simulator::RuntimeCallStructHelper<R, P...>::
+        Wrapper(simulator, reinterpret_cast<uint64_t>(function_));
+  } else {
+    callback_(reinterpret_cast<uint64_t>(function_));
+  }
+}
+
 }  // namespace aarch64
 }  // namespace vixl
 
diff --git a/src/aarch64/simulator-constants-aarch64.h b/src/aarch64/simulator-constants-aarch64.h
index e2389f1..1aa4f85 100644
--- a/src/aarch64/simulator-constants-aarch64.h
+++ b/src/aarch64/simulator-constants-aarch64.h
@@ -56,6 +56,8 @@
   kDisableCPUFeaturesOpcode,
   kSaveCPUFeaturesOpcode,
   kRestoreCPUFeaturesOpcode,
+  kMTEActive,
+  kMTEInactive,
   // Aliases.
   kDebugHltFirstOpcode = kUnreachableOpcode,
   kDebugHltLastOpcode = kLogOpcode
diff --git a/src/assembler-base-vixl.h b/src/assembler-base-vixl.h
index ee54dcb..7bd6af2 100644
--- a/src/assembler-base-vixl.h
+++ b/src/assembler-base-vixl.h
@@ -29,6 +29,12 @@
 
 #include "code-buffer-vixl.h"
 
+// Microsoft Visual C++ defines a `mvn` macro that conflicts with our own
+// definition.
+#if defined(_MSC_VER) && defined(mvn)
+#undef mvn
+#endif
+
 namespace vixl {
 
 class CodeBufferCheckScope;
diff --git a/src/code-buffer-vixl.cc b/src/code-buffer-vixl.cc
index b443e8d..2d14294 100644
--- a/src/code-buffer-vixl.cc
+++ b/src/code-buffer-vixl.cc
@@ -24,9 +24,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+#ifdef VIXL_CODE_BUFFER_MMAP
 extern "C" {
 #include <sys/mman.h>
 }
+#endif
 
 #include "code-buffer-vixl.h"
 #include "utils-vixl.h"
@@ -119,11 +121,12 @@
 
 
 void CodeBuffer::EmitString(const char* string) {
-  VIXL_ASSERT(HasSpaceFor(strlen(string) + 1));
+  const auto len = strlen(string) + 1;
+  VIXL_ASSERT(HasSpaceFor(len));
   char* dst = reinterpret_cast<char*>(cursor_);
   dirty_ = true;
-  char* null_char = stpcpy(dst, string);
-  cursor_ = reinterpret_cast<byte*>(null_char) + 1;
+  memcpy(dst, string, len);
+  cursor_ = reinterpret_cast<byte*>(dst + len);
 }
 
 
diff --git a/src/code-buffer-vixl.h b/src/code-buffer-vixl.h
index 9a1efd4..cb9031f 100644
--- a/src/code-buffer-vixl.h
+++ b/src/code-buffer-vixl.h
@@ -124,8 +124,9 @@
   void Emit(T value) {
     VIXL_ASSERT(HasSpaceFor(sizeof(value)));
     dirty_ = true;
-    memcpy(cursor_, &value, sizeof(value));
-    cursor_ += sizeof(value);
+    byte* c = cursor_;
+    memcpy(c, &value, sizeof(value));
+    cursor_ = c + sizeof(value);
   }
 
   void UpdateData(size_t offset, const void* data, size_t size);
diff --git a/src/code-generation-scopes-vixl.h b/src/code-generation-scopes-vixl.h
index b7ea2d9..f019b68 100644
--- a/src/code-generation-scopes-vixl.h
+++ b/src/code-generation-scopes-vixl.h
@@ -68,14 +68,19 @@
                        size_t size,
                        BufferSpacePolicy check_policy = kReserveBufferSpace,
                        SizePolicy size_policy = kMaximumSize)
-      : assembler_(NULL), initialised_(false) {
+      : CodeBufferCheckScope() {
     Open(assembler, size, check_policy, size_policy);
   }
 
   // This constructor does not implicitly initialise the scope. Instead, the
   // user is required to explicitly call the `Open` function before using the
   // scope.
-  CodeBufferCheckScope() : assembler_(NULL), initialised_(false) {
+  CodeBufferCheckScope()
+      : assembler_(NULL),
+        assert_policy_(kMaximumSize),
+        limit_(0),
+        previous_allow_assembler_(false),
+        initialised_(false) {
     // Nothing to do.
   }
 
@@ -152,14 +157,15 @@
   // constructed.
   EmissionCheckScope(MacroAssemblerInterface* masm,
                      size_t size,
-                     SizePolicy size_policy = kMaximumSize) {
+                     SizePolicy size_policy = kMaximumSize)
+      : EmissionCheckScope() {
     Open(masm, size, size_policy);
   }
 
   // This constructor does not implicitly initialise the scope. Instead, the
   // user is required to explicitly call the `Open` function before using the
   // scope.
-  EmissionCheckScope() {}
+  EmissionCheckScope() : masm_(nullptr), pool_policy_(kBlockPools) {}
 
   virtual ~EmissionCheckScope() { Close(); }
 
@@ -250,14 +256,15 @@
   // constructed.
   ExactAssemblyScope(MacroAssemblerInterface* masm,
                      size_t size,
-                     SizePolicy size_policy = kExactSize) {
+                     SizePolicy size_policy = kExactSize)
+      : ExactAssemblyScope() {
     Open(masm, size, size_policy);
   }
 
   // This constructor does not implicitly initialise the scope. Instead, the
   // user is required to explicitly call the `Open` function before using the
   // scope.
-  ExactAssemblyScope() {}
+  ExactAssemblyScope() : previous_allow_macro_assembler_(false) {}
 
   virtual ~ExactAssemblyScope() { Close(); }
 
diff --git a/src/compiler-intrinsics-vixl.cc b/src/compiler-intrinsics-vixl.cc
index f6234fa..b8ed1b2 100644
--- a/src/compiler-intrinsics-vixl.cc
+++ b/src/compiler-intrinsics-vixl.cc
@@ -25,6 +25,7 @@
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include "compiler-intrinsics-vixl.h"
+
 #include "utils-vixl.h"
 
 namespace vixl {
diff --git a/src/compiler-intrinsics-vixl.h b/src/compiler-intrinsics-vixl.h
index 50ed357..8d0849a 100644
--- a/src/compiler-intrinsics-vixl.h
+++ b/src/compiler-intrinsics-vixl.h
@@ -29,6 +29,7 @@
 #define VIXL_COMPILER_INTRINSICS_H
 
 #include <limits.h>
+
 #include "globals-vixl.h"
 
 namespace vixl {
@@ -112,7 +113,8 @@
   VIXL_ASSERT(IsPowerOf2(width) && (width <= 64));
 #if COMPILER_HAS_BUILTIN_CLRSB
   VIXL_ASSERT((LLONG_MIN <= value) && (value <= LLONG_MAX));
-  int ll_width = sizeof(long long) * kBitsPerByte;  // NOLINT(runtime/int)
+  int ll_width =
+      sizeof(long long) * kBitsPerByte;  // NOLINT(google-runtime-int)
   int result = __builtin_clrsbll(value) - (ll_width - width);
   // Check that the value fits in the specified width.
   VIXL_ASSERT(result >= 0);
diff --git a/src/cpu-features.cc b/src/cpu-features.cc
index 08db3f4..e1bd0f1 100644
--- a/src/cpu-features.cc
+++ b/src/cpu-features.cc
@@ -24,9 +24,10 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+#include "cpu-features.h"
+
 #include <ostream>
 
-#include "cpu-features.h"
 #include "globals-vixl.h"
 #include "utils-vixl.h"
 
diff --git a/src/cpu-features.h b/src/cpu-features.h
index ebd0578..1a041f6 100644
--- a/src/cpu-features.h
+++ b/src/cpu-features.h
@@ -166,6 +166,7 @@
   /* Memory Tagging Extension.                                              */ \
   V(kMTEInstructions,     "MTE (EL0 instructions)", NULL)                      \
   V(kMTE,                 "MTE",                    NULL)                      \
+  V(kMTE3,                "MTE (asymmetric)",       "mte3")                    \
   /* PAuth extensions.                                                      */ \
   V(kPAuthEnhancedPAC,    "PAuth EnhancedPAC",      NULL)                      \
   V(kPAuthEnhancedPAC2,   "PAuth EnhancedPAC2",     NULL)                      \
@@ -183,7 +184,25 @@
   /* Enhanced Counter Virtualization                                        */ \
   V(kECV,                 "ECV",                    "ecv")                     \
   /* Increased precision of Reciprocal Estimate and Square Root Estimate    */ \
-  V(kRPRES,               "RPRES",                  "rpres")
+  V(kRPRES,               "RPRES",                  "rpres")                   \
+  /* Memory operation instructions, for memcpy, memset                      */ \
+  V(kMOPS,                "Memory ops",             NULL)                      \
+  /* Scalable Matrix Extension (SME)                                        */ \
+  V(kSME,                 "SME",                    "sme")                     \
+  V(kSMEi16i64,           "SME (i16i64)",           "smei16i64")               \
+  V(kSMEf64f64,           "SME (f64f64)",           "smef64f64")               \
+  V(kSMEi8i32,            "SME (i8i32)",            "smei8i32")                \
+  V(kSMEf16f32,           "SME (f16f32)",           "smef16f32")               \
+  V(kSMEb16f32,           "SME (b16f32)",           "smeb16f32")               \
+  V(kSMEf32f32,           "SME (f32f32)",           "smef32f32")               \
+  V(kSMEfa64,             "SME (fa64)",             "smefa64")                 \
+  /* WFET and WFIT instruction support                                      */ \
+  V(kWFXT,                "WFXT",                   "wfxt")                    \
+  /* Extended BFloat16 instructions                                         */ \
+  V(kEBF16,               "EBF16",                  "ebf16")                   \
+  V(kSVE_EBF16,           "EBF16 (SVE)",            "sveebf16")                \
+  V(kCSSC,                "CSSC",                   "cssc")                    \
+  V(kGCS,                 "GCS",                    "gcs")
 // clang-format on
 
 
diff --git a/src/globals-vixl.h b/src/globals-vixl.h
index 4dc8c02..b096c7f 100644
--- a/src/globals-vixl.h
+++ b/src/globals-vixl.h
@@ -27,8 +27,8 @@
 #ifndef VIXL_GLOBALS_H
 #define VIXL_GLOBALS_H
 
-#if __cplusplus < 201402L
-#error VIXL requires C++14
+#if __cplusplus < 201703L
+#error VIXL requires C++17
 #endif
 
 // Get standard C99 macros for integer types.
@@ -158,7 +158,7 @@
 #endif
 // This is not as powerful as template based assertions, but it is simple.
 // It assumes that the descriptions are unique. If this starts being a problem,
-// we can switch to a different implemention.
+// we can switch to a different implementation.
 #define VIXL_CONCAT(a, b) a##b
 #if __cplusplus >= 201103L
 #define VIXL_STATIC_ASSERT_LINE(line_unused, condition, message) \
@@ -207,7 +207,7 @@
 #if __has_warning("-Wimplicit-fallthrough") && __cplusplus >= 201103L
 #define VIXL_FALLTHROUGH() [[clang::fallthrough]]
 // Fallthrough annotation for GCC >= 7.
-#elif __GNUC__ >= 7
+#elif defined(__GNUC__) && __GNUC__ >= 7
 #define VIXL_FALLTHROUGH() __attribute__((fallthrough))
 #else
 #define VIXL_FALLTHROUGH() \
@@ -215,6 +215,18 @@
   } while (0)
 #endif
 
+// Evaluate 'init' to an std::optional and return if it's empty. If 'init' is
+// not empty then define a variable 'name' with the value inside the
+// std::optional.
+#define VIXL_DEFINE_OR_RETURN(name, init) \
+  auto opt##name = init;                  \
+  if (!opt##name) return;                 \
+  auto name = *opt##name;
+#define VIXL_DEFINE_OR_RETURN_FALSE(name, init) \
+  auto opt##name = init;                        \
+  if (!opt##name) return false;                 \
+  auto name = *opt##name;
+
 #if __cplusplus >= 201103L
 #define VIXL_NO_RETURN [[noreturn]]
 #else
diff --git a/src/invalset-vixl.h b/src/invalset-vixl.h
index 8bd6035..b5a710f 100644
--- a/src/invalset-vixl.h
+++ b/src/invalset-vixl.h
@@ -1,4 +1,3 @@
-// Copyright 2015, VIXL authors
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -27,9 +26,8 @@
 #ifndef VIXL_INVALSET_H_
 #define VIXL_INVALSET_H_
 
-#include <cstring>
-
 #include <algorithm>
+#include <cstring>
 #include <vector>
 
 #include "globals-vixl.h"
@@ -92,6 +90,7 @@
  public:
   InvalSet();
   ~InvalSet() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION;
+  InvalSet(InvalSet&&);  // movable
 
   static const size_t kNPreallocatedElements = N_PREALLOCATED_ELEMENTS;
   static const KeyType kInvalidKey = INVALID_KEY;
@@ -112,7 +111,7 @@
   size_t size() const;
 
   // Returns true if no elements are stored in the set.
-  // Note that this does not mean the the backing storage is empty: it can still
+  // Note that this does not mean the backing storage is empty: it can still
   // contain invalid elements.
   bool empty() const;
 
@@ -244,8 +243,13 @@
 
 
 template <class S>
-class InvalSetIterator : public std::iterator<std::forward_iterator_tag,
-                                              typename S::_ElementType> {
+class InvalSetIterator {
+  using iterator_category = std::forward_iterator_tag;
+  using value_type = typename S::_ElementType;
+  using difference_type = std::ptrdiff_t;
+  using pointer = S*;
+  using reference = S&;
+
  private:
   // Redefine types to mirror the associated set types.
   typedef typename S::_ElementType ElementType;
@@ -321,6 +325,27 @@
 #endif
 }
 
+template <TEMPLATE_INVALSET_P_DECL>
+InvalSet<TEMPLATE_INVALSET_P_DEF>::InvalSet(InvalSet&& other)
+    : valid_cached_min_(false), sorted_(true), size_(0), vector_(NULL) {
+  VIXL_ASSERT(other.monitor() == 0);
+  if (this != &other) {
+    sorted_ = other.sorted_;
+    size_ = other.size_;
+#ifdef VIXL_DEBUG
+    monitor_ = 0;
+#endif
+    if (other.IsUsingVector()) {
+      vector_ = other.vector_;
+      other.vector_ = NULL;
+    } else {
+      std::move(other.preallocated_,
+                other.preallocated_ + other.size_,
+                preallocated_);
+    }
+    other.clear();
+  }
+}
 
 template <TEMPLATE_INVALSET_P_DECL>
 InvalSet<TEMPLATE_INVALSET_P_DEF>::~InvalSet()
diff --git a/src/pool-manager-impl.h b/src/pool-manager-impl.h
index a1bcaaa..5baf66b 100644
--- a/src/pool-manager-impl.h
+++ b/src/pool-manager-impl.h
@@ -27,10 +27,10 @@
 #ifndef VIXL_POOL_MANAGER_IMPL_H_
 #define VIXL_POOL_MANAGER_IMPL_H_
 
-#include "pool-manager.h"
-
 #include <algorithm>
+
 #include "assembler-base-vixl.h"
+#include "pool-manager.h"
 
 namespace vixl {
 
@@ -487,7 +487,7 @@
 }
 
 template <typename T>
-PoolManager<T>::~PoolManager<T>() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION {
+PoolManager<T>::~PoolManager() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION {
 #ifdef VIXL_DEBUG
   // Check for unbound objects.
   for (objects_iter iter = objects_.begin(); iter != objects_.end(); ++iter) {
@@ -517,6 +517,6 @@
   }
   return size;
 }
-}
+}  // namespace vixl
 
 #endif  // VIXL_POOL_MANAGER_IMPL_H_
diff --git a/src/pool-manager.h b/src/pool-manager.h
index 2e73f8e..f5101cc 100644
--- a/src/pool-manager.h
+++ b/src/pool-manager.h
@@ -27,11 +27,10 @@
 #ifndef VIXL_POOL_MANAGER_H_
 #define VIXL_POOL_MANAGER_H_
 
-#include <stdint.h>
-
 #include <cstddef>
 #include <limits>
 #include <map>
+#include <stdint.h>
 #include <vector>
 
 #include "globals-vixl.h"
@@ -369,8 +368,8 @@
 
   // Specify the possible locations where the object could be stored. AArch32's
   // PC offset, and T32's PC alignment calculations should be applied by the
-  // Assembler, not here. The PoolManager deals only with simple locationes.
-  // Including min_object_adddress_ is necessary to handle AArch32 some
+  // Assembler, not here. The PoolManager deals only with simple locations.
+  // Including min_object_address_ is necessary to handle AArch32 some
   // instructions which have a minimum offset of 0, but also have the implicit
   // PC offset.
   // Note that this structure cannot handle sparse ranges, such as A32's ADR,
diff --git a/src/utils-vixl.cc b/src/utils-vixl.cc
index 41b5586..639a4b1 100644
--- a/src/utils-vixl.cc
+++ b/src/utils-vixl.cc
@@ -24,10 +24,10 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-#include <cstdio>
-
 #include "utils-vixl.h"
 
+#include <cstdio>
+
 namespace vixl {
 
 // The default NaN values (for FPCR.DN=1).
@@ -391,7 +391,7 @@
   }
 
   VIXL_UNREACHABLE();
-  return value;
+  return static_cast<float>(value);
 }
 
 // TODO: We should consider implementing a full FPToDouble(Float16)
diff --git a/src/utils-vixl.h b/src/utils-vixl.h
index 9e05c7e..c3919b5 100644
--- a/src/utils-vixl.h
+++ b/src/utils-vixl.h
@@ -30,6 +30,7 @@
 #include <cmath>
 #include <cstring>
 #include <limits>
+#include <type_traits>
 #include <vector>
 
 #include "compiler-intrinsics-vixl.h"
@@ -238,6 +239,11 @@
   return value & width_mask;
 }
 
+inline uint64_t RotateLeft(uint64_t value,
+                           unsigned int rotate,
+                           unsigned int width) {
+  return RotateRight(value, width - rotate, width);
+}
 
 // Wrapper class for passing FP16 values through the assembler.
 // This is purely to aid with type checking/casting.
@@ -282,17 +288,43 @@
   return RawbitsToDouble(bits);
 }
 
+// Some compilers dislike negating unsigned integers,
+// so we provide an equivalent.
+template <typename T>
+T UnsignedNegate(T value) {
+  VIXL_STATIC_ASSERT(std::is_unsigned<T>::value);
+  return ~value + 1;
+}
+
+template <typename T>
+bool CanBeNegated(T value) {
+  VIXL_STATIC_ASSERT(std::is_signed<T>::value);
+  return (value == std::numeric_limits<T>::min()) ? false : true;
+}
+
+// An absolute operation for signed integers that is defined for results outside
+// the representable range. Specifically, Abs(MIN_INT) is MIN_INT.
+template <typename T>
+T Abs(T val) {
+  // TODO: this static assertion is for signed integer inputs, as that's the
+  // only type tested. However, the code should work for all numeric inputs.
+  // Remove the assertion and this comment when more tests are available.
+  VIXL_STATIC_ASSERT(std::is_signed<T>::value && std::is_integral<T>::value);
+  return ((val >= -std::numeric_limits<T>::max()) && (val < 0)) ? -val : val;
+}
+
 // Convert unsigned to signed numbers in a well-defined way (using two's
 // complement representations).
 inline int64_t RawbitsToInt64(uint64_t bits) {
   return (bits >= UINT64_C(0x8000000000000000))
-             ? (-static_cast<int64_t>(-bits - 1) - 1)
+             ? (-static_cast<int64_t>(UnsignedNegate(bits) - 1) - 1)
              : static_cast<int64_t>(bits);
 }
 
 inline int32_t RawbitsToInt32(uint32_t bits) {
-  return (bits >= UINT64_C(0x80000000)) ? (-static_cast<int32_t>(-bits - 1) - 1)
-                                        : static_cast<int32_t>(bits);
+  return (bits >= UINT64_C(0x80000000))
+             ? (-static_cast<int32_t>(UnsignedNegate(bits) - 1) - 1)
+             : static_cast<int32_t>(bits);
 }
 
 namespace internal {
@@ -318,7 +350,7 @@
   bool operator>(SimFloat16 rhs) const;
   bool operator==(SimFloat16 rhs) const;
   bool operator!=(SimFloat16 rhs) const;
-  // This is necessary for conversions peformed in (macro asm) Fmov.
+  // This is necessary for conversions performed in (macro asm) Fmov.
   bool operator==(double rhs) const;
   operator double() const;
 };
@@ -475,7 +507,9 @@
 }
 
 
-inline uint64_t LowestSetBit(uint64_t value) { return value & -value; }
+inline uint64_t LowestSetBit(uint64_t value) {
+  return value & UnsignedNegate(value);
+}
 
 
 template <typename T>
@@ -525,13 +559,14 @@
 template <typename T>
 T ReverseBytes(T value, int block_bytes_log2) {
   VIXL_ASSERT((sizeof(value) == 4) || (sizeof(value) == 8));
-  VIXL_ASSERT((1U << block_bytes_log2) <= sizeof(value));
+  VIXL_ASSERT((uint64_t{1} << block_bytes_log2) <= sizeof(value));
   // Split the 64-bit value into an 8-bit array, where b[0] is the least
   // significant byte, and b[7] is the most significant.
   uint8_t bytes[8];
   uint64_t mask = UINT64_C(0xff00000000000000);
   for (int i = 7; i >= 0; i--) {
-    bytes[i] = (static_cast<uint64_t>(value) & mask) >> (i * 8);
+    bytes[i] =
+        static_cast<uint8_t>((static_cast<uint64_t>(value) & mask) >> (i * 8));
     mask >>= 8;
   }
 
@@ -588,6 +623,39 @@
   return IsAligned<4>(pointer);
 }
 
+template <unsigned BITS, typename T>
+bool IsRepeatingPattern(T value) {
+  VIXL_STATIC_ASSERT(std::is_unsigned<T>::value);
+  VIXL_ASSERT(IsMultiple(sizeof(value) * kBitsPerByte, BITS));
+  VIXL_ASSERT(IsMultiple(BITS, 2));
+  VIXL_STATIC_ASSERT(BITS >= 2);
+#if (defined(__x86_64__) || defined(__i386)) && __clang_major__ >= 17 && \
+    __clang_major__ <= 19
+  // Workaround for https://github.com/llvm/llvm-project/issues/108722
+  unsigned hbits = BITS / 2;
+  T midmask = (~static_cast<T>(0) >> BITS) << hbits;
+  // E.g. for bytes in a word (0xb3b2b1b0): .b3b2b1. == .b2b1b0.
+  return (((value >> hbits) & midmask) == ((value << hbits) & midmask));
+#else
+  return value == RotateRight(value, BITS, sizeof(value) * kBitsPerByte);
+#endif
+}
+
+template <typename T>
+bool AllBytesMatch(T value) {
+  return IsRepeatingPattern<kBitsPerByte>(value);
+}
+
+template <typename T>
+bool AllHalfwordsMatch(T value) {
+  return IsRepeatingPattern<kBitsPerByte * 2>(value);
+}
+
+template <typename T>
+bool AllWordsMatch(T value) {
+  return IsRepeatingPattern<kBitsPerByte * 4>(value);
+}
+
 // Increment a pointer until it has the specified alignment. The alignment must
 // be a power of two.
 template <class T>
@@ -829,7 +897,7 @@
   }
   int32_t GetSigned() const { return data_; }
   Uint32 operator~() const { return Uint32(~data_); }
-  Uint32 operator-() const { return Uint32(-data_); }
+  Uint32 operator-() const { return Uint32(UnsignedNegate(data_)); }
   bool operator==(Uint32 value) const { return data_ == value.data_; }
   bool operator!=(Uint32 value) const { return data_ != value.data_; }
   bool operator>(Uint32 value) const { return data_ > value.data_; }
@@ -897,7 +965,7 @@
   Uint32 GetHigh32() const { return Uint32(data_ >> 32); }
   Uint32 GetLow32() const { return Uint32(data_ & 0xffffffff); }
   Uint64 operator~() const { return Uint64(~data_); }
-  Uint64 operator-() const { return Uint64(-data_); }
+  Uint64 operator-() const { return Uint64(UnsignedNegate(data_)); }
   bool operator==(Uint64 value) const { return data_ == value.data_; }
   bool operator!=(Uint64 value) const { return data_ != value.data_; }
   Uint64 operator+(Uint64 value) const { return Uint64(data_ + value.data_); }
@@ -1203,7 +1271,7 @@
     // For subnormal outputs, the shift must be adjusted by the exponent. The +1
     // is necessary because the exponent of a subnormal value (encoded as 0) is
     // the same as the exponent of the smallest normal value (encoded as 1).
-    shift += -exponent + 1;
+    shift += static_cast<int>(-exponent + 1);
 
     // Handle inputs that would produce a zero output.
     //
diff --git a/test/aarch32/config/cond-rd-operand-rn-shift-rs-t32.json b/test/aarch32/config/cond-rd-operand-rn-shift-rs-t32.json
index 6e8f4c8..4d0064d 100644
--- a/test/aarch32/config/cond-rd-operand-rn-shift-rs-t32.json
+++ b/test/aarch32/config/cond-rd-operand-rn-shift-rs-t32.json
@@ -97,7 +97,7 @@
       "type": "assembler",
       "test-cases": [
         {
-          "name": "Unconditionnal",
+          "name": "Unconditional",
           "operands": [
             "cond", "rd", "rn", "shift", "rs"
           ],
diff --git a/test/aarch32/config/cond-rd-rn-operand-rm-t32.json b/test/aarch32/config/cond-rd-rn-operand-rm-t32.json
index 0affe92..b810c3c 100644
--- a/test/aarch32/config/cond-rd-rn-operand-rm-t32.json
+++ b/test/aarch32/config/cond-rd-rn-operand-rm-t32.json
@@ -192,7 +192,7 @@
       "type": "assembler",
       "test-cases": [
         {
-          "name": "Unconditionnal",
+          "name": "Unconditional",
           "operands": [
             "cond", "rd", "rn", "rm"
           ],
diff --git a/test/aarch32/config/data-types.json b/test/aarch32/config/data-types.json
index c3409b0..aa773d6 100644
--- a/test/aarch32/config/data-types.json
+++ b/test/aarch32/config/data-types.json
@@ -1100,7 +1100,7 @@
       "identifier": "OffsetLowerThan4096",
       "type": "int32_t",
       // These variants are a random sample of 500 integers out of all integers
-      // from 1 to 4094 (included). We've added 0 and 4095 explicitely.
+      // from 1 to 4094 (included). We've added 0 and 4095 explicitly.
       "variants": [
         "0",
         "4095",
@@ -1700,7 +1700,7 @@
       ],
       "default": "NoFlag"
     },
-    // TODO: Consider having a seperate list for inputs for which we are only
+    // TODO: Consider having a separate list for inputs for which we are only
     // interested in recording the value after the instruction has executed.
     // This applies to `Q` and `GE`.
     {
@@ -1781,7 +1781,7 @@
       "identifier": "RegisterOffsetLowerThan4096",
       "type": "Register",
       // These values are a random sample of 500 integers out of all integers
-      // from 1 to 4094 (included). We've added 0 and 4095 explicitely.
+      // from 1 to 4094 (included). We've added 0 and 4095 explicitly.
       "values": [
         "0",
         "4095",
diff --git a/test/aarch32/config/template-assembler-aarch32.cc.in b/test/aarch32/config/template-assembler-aarch32.cc.in
index a602860..456668d 100644
--- a/test/aarch32/config/template-assembler-aarch32.cc.in
+++ b/test/aarch32/config/template-assembler-aarch32.cc.in
@@ -49,7 +49,7 @@
   ${instruction_list_declaration}
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/config/template-assembler-negative-aarch32.cc.in b/test/aarch32/config/template-assembler-negative-aarch32.cc.in
index 8a7cd63..006ff01 100644
--- a/test/aarch32/config/template-assembler-negative-aarch32.cc.in
+++ b/test/aarch32/config/template-assembler-negative-aarch32.cc.in
@@ -49,7 +49,7 @@
   ${instruction_list_declaration}
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/config/template-macro-assembler-aarch32.cc.in b/test/aarch32/config/template-macro-assembler-aarch32.cc.in
index ab37208..c76b311 100644
--- a/test/aarch32/config/template-macro-assembler-aarch32.cc.in
+++ b/test/aarch32/config/template-macro-assembler-aarch32.cc.in
@@ -53,7 +53,7 @@
   ${instruction_list_declaration}
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/config/template-simulator-aarch32.cc.in b/test/aarch32/config/template-simulator-aarch32.cc.in
index 9e24225..619bb8d 100644
--- a/test/aarch32/config/template-simulator-aarch32.cc.in
+++ b/test/aarch32/config/template-simulator-aarch32.cc.in
@@ -113,7 +113,7 @@
   ${instruction_list_declaration}
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-assembler-aarch32.cc b/test/aarch32/test-assembler-aarch32.cc
index 418bc11..d97e18b 100644
--- a/test/aarch32/test-assembler-aarch32.cc
+++ b/test/aarch32/test-assembler-aarch32.cc
@@ -177,17 +177,23 @@
 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH32
 // No simulator yet. We can't test the results.
 
-#define ASSERT_EQUAL_32(expected, result)
+#define ASSERT_EQUAL_32(expected, result) \
+  USE(expected, result)
 
-#define ASSERT_EQUAL_64(expected, result)
+#define ASSERT_EQUAL_64(expected, result) \
+  USE(expected, result)
 
-#define ASSERT_EQUAL_128(expected_h, expected_l, result)
+#define ASSERT_EQUAL_128(expected_h, expected_l, result) \
+  USE(expected_h, expected_l, result)
 
-#define ASSERT_EQUAL_FP32(expected, result)
+#define ASSERT_EQUAL_FP32(expected, result) \
+  USE(expected, result)
 
-#define ASSERT_EQUAL_FP64(expected, result)
+#define ASSERT_EQUAL_FP64(expected, result) \
+  USE(expected, result)
 
-#define ASSERT_EQUAL_NZCV(expected)
+#define ASSERT_EQUAL_NZCV(expected) \
+  USE(expected)
 
 #else
 
@@ -1490,7 +1496,7 @@
     VIXL_CHECK(masm->GetCursorOffset() == end);
   }
 
-  // Check that the pool has not been emited along the way.
+  // Check that the pool has not been emitted along the way.
   CHECK_POOL_SIZE(8);
   // This extra instruction should trigger an emit of the pool.
   __ Nop();
@@ -3634,8 +3640,6 @@
   const int label_count = 15;
   const int literal_count = 31;
   Label* labels;
-  uint64_t* literal_values;
-  Literal<uint64_t>* literals[literal_count];
 
   // Use multiple iterations, as each produces a different predictably random
   // sequence.
@@ -3653,7 +3657,7 @@
   // below). The cases are split in 4 groups:
   //
   //   - 0..3: Generate various amount of nops.
-  //   - 4..7: Generate various load intstructions with literals.
+  //   - 4..7: Generate various load instructions with literals.
   //   - 8..14: Generate various branch instructions.
   //   - 15..19: Generate various amount of nops.
   //
@@ -3679,12 +3683,13 @@
         labels = new Label[label_count];
 
         // Create new literal values.
-        literal_values = new uint64_t[literal_count];
+        std::vector<uint64_t> literal_values;
+        std::vector<Literal<uint64_t>> literals;
         for (int lit = 0; lit < literal_count; lit++) {
           // TODO: Generate pseudo-random data for literals. At the moment, the
           // disassembler breaks if we do this.
-          literal_values[lit] = lit;
-          literals[lit] = new Literal<uint64_t>(literal_values[lit]);
+          literal_values.push_back(lit);
+          literals.emplace_back(Literal<uint64_t>(literal_values[lit]));
         }
 
         for (;;) {
@@ -3736,13 +3741,13 @@
               __ Nop();
               break;
             case 4:
-              __ Ldr(r2, literals[literal_index]);
+              __ Ldr(r2, &literals[literal_index]);
               __ Cmp(r2, static_cast<uint32_t>(literal_values[literal_index]));
               __ B(ne, &fail);
               __ Mov(r2, 0);
               break;
             case 5:
-              __ Ldrb(r2, literals[literal_index]);
+              __ Ldrb(r2, &literals[literal_index]);
               __ Cmp(r2,
                      static_cast<uint32_t>(literal_values[literal_index]) &
                          0xff);
@@ -3750,7 +3755,7 @@
               __ Mov(r2, 0);
               break;
             case 6:
-              __ Ldrd(r2, r3, literals[literal_index]);
+              __ Ldrd(r2, r3, &literals[literal_index]);
               __ Cmp(r2, static_cast<uint32_t>(literal_values[literal_index]));
               __ B(ne, &fail);
               __ Mov(r2, 0);
@@ -3761,7 +3766,7 @@
               __ Mov(r3, 0);
               break;
             case 7:
-              __ Vldr(s0, literals[literal_index]);
+              __ Vldr(s0, &literals[literal_index]);
               __ Vmov(s1, static_cast<uint32_t>(literal_values[literal_index]));
               __ Vcmp(s0, s1);
               __ B(ne, &fail);
@@ -3875,9 +3880,6 @@
         // independent.
         masm.FinalizeCode(MacroAssembler::kFallThrough);
         delete[] labels;
-        for (int lit = 0; lit < literal_count; lit++) {
-          delete literals[lit];
-        }
       }
     }
   }
@@ -4849,7 +4851,7 @@
 
 // Generate a "B" and a "Cbz" which have the same checkpoint. Without proper
 // management (i.e. if the veneers were only generated at the shared
-// checkpoint), one one of the branches would be out of range.
+// checkpoint), one of the branches would be out of range.
 TEST_T32(veneer_simultaneous) {
   SETUP();
 
@@ -5109,7 +5111,7 @@
   __ Ldr(r11, literal);
 
   // The range for ldr is 4095, the range for cbz is 127. Generate nops
-  // to have the ldr becomming out of range just before the cbz.
+  // to have the ldr becoming out of range just before the cbz.
   const int NUM_NOPS = 2044;
   const int NUM_RANGE = 58;
 
@@ -5184,7 +5186,7 @@
       __ add(r1, r1, 3);
     }
     __ Bind(&labels[test_num]);
-    // Emit the literal pool if it has not beeen emitted (it's the case for
+    // Emit the literal pool if it has not been emitted (it's the case for
     // the lower values of test_num).
     __ EmitLiteralPool(PoolManager<int32_t>::kBranchRequired);
   }
diff --git a/test/aarch32/test-assembler-cond-dt-drt-drd-drn-drm-float-f32-only-a32.cc b/test/aarch32/test-assembler-cond-dt-drt-drd-drn-drm-float-f32-only-a32.cc
index f9e09bf..497ceac 100644
--- a/test/aarch32/test-assembler-cond-dt-drt-drd-drn-drm-float-f32-only-a32.cc
+++ b/test/aarch32/test-assembler-cond-dt-drt-drd-drn-drm-float-f32-only-a32.cc
@@ -63,7 +63,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-dt-drt-drd-drn-drm-float-f32-only-t32.cc b/test/aarch32/test-assembler-cond-dt-drt-drd-drn-drm-float-f32-only-t32.cc
index 1ef8aff..36182a3 100644
--- a/test/aarch32/test-assembler-cond-dt-drt-drd-drn-drm-float-f32-only-t32.cc
+++ b/test/aarch32/test-assembler-cond-dt-drt-drd-drn-drm-float-f32-only-t32.cc
@@ -63,7 +63,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-dt-drt-drd-drn-drm-float-not-f16-a32.cc b/test/aarch32/test-assembler-cond-dt-drt-drd-drn-drm-float-not-f16-a32.cc
index 1bb0f55..d2cc92c 100644
--- a/test/aarch32/test-assembler-cond-dt-drt-drd-drn-drm-float-not-f16-a32.cc
+++ b/test/aarch32/test-assembler-cond-dt-drt-drd-drn-drm-float-not-f16-a32.cc
@@ -54,7 +54,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-dt-drt-drd-drn-drm-float-not-f16-t32.cc b/test/aarch32/test-assembler-cond-dt-drt-drd-drn-drm-float-not-f16-t32.cc
index d2de584..94ef61f 100644
--- a/test/aarch32/test-assembler-cond-dt-drt-drd-drn-drm-float-not-f16-t32.cc
+++ b/test/aarch32/test-assembler-cond-dt-drt-drd-drn-drm-float-not-f16-t32.cc
@@ -54,7 +54,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-memop-immediate-512-a32.cc b/test/aarch32/test-assembler-cond-rd-memop-immediate-512-a32.cc
index b4f3b52..251d5ec 100644
--- a/test/aarch32/test-assembler-cond-rd-memop-immediate-512-a32.cc
+++ b/test/aarch32/test-assembler-cond-rd-memop-immediate-512-a32.cc
@@ -56,7 +56,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-memop-immediate-8192-a32.cc b/test/aarch32/test-assembler-cond-rd-memop-immediate-8192-a32.cc
index 2a06bc8..4489c43 100644
--- a/test/aarch32/test-assembler-cond-rd-memop-immediate-8192-a32.cc
+++ b/test/aarch32/test-assembler-cond-rd-memop-immediate-8192-a32.cc
@@ -56,7 +56,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-memop-rs-a32.cc b/test/aarch32/test-assembler-cond-rd-memop-rs-a32.cc
index 00e19be..e88b260 100644
--- a/test/aarch32/test-assembler-cond-rd-memop-rs-a32.cc
+++ b/test/aarch32/test-assembler-cond-rd-memop-rs-a32.cc
@@ -60,7 +60,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-memop-rs-shift-amount-1to31-a32.cc b/test/aarch32/test-assembler-cond-rd-memop-rs-shift-amount-1to31-a32.cc
index 733f8de..3fe9c2f 100644
--- a/test/aarch32/test-assembler-cond-rd-memop-rs-shift-amount-1to31-a32.cc
+++ b/test/aarch32/test-assembler-cond-rd-memop-rs-shift-amount-1to31-a32.cc
@@ -56,7 +56,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-memop-rs-shift-amount-1to32-a32.cc b/test/aarch32/test-assembler-cond-rd-memop-rs-shift-amount-1to32-a32.cc
index bdb57c5..b3300a7 100644
--- a/test/aarch32/test-assembler-cond-rd-memop-rs-shift-amount-1to32-a32.cc
+++ b/test/aarch32/test-assembler-cond-rd-memop-rs-shift-amount-1to32-a32.cc
@@ -56,7 +56,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-operand-const-can-use-pc-a32.cc b/test/aarch32/test-assembler-cond-rd-operand-const-can-use-pc-a32.cc
index 22021a4..714ea04 100644
--- a/test/aarch32/test-assembler-cond-rd-operand-const-can-use-pc-a32.cc
+++ b/test/aarch32/test-assembler-cond-rd-operand-const-can-use-pc-a32.cc
@@ -56,7 +56,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-operand-const-cannot-use-pc-a32.cc b/test/aarch32/test-assembler-cond-rd-operand-const-cannot-use-pc-a32.cc
index ebbab48..ceb6dea 100644
--- a/test/aarch32/test-assembler-cond-rd-operand-const-cannot-use-pc-a32.cc
+++ b/test/aarch32/test-assembler-cond-rd-operand-const-cannot-use-pc-a32.cc
@@ -56,7 +56,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-operand-const-t32.cc b/test/aarch32/test-assembler-cond-rd-operand-const-t32.cc
index c20cc62..837d208 100644
--- a/test/aarch32/test-assembler-cond-rd-operand-const-t32.cc
+++ b/test/aarch32/test-assembler-cond-rd-operand-const-t32.cc
@@ -60,7 +60,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-operand-imm16-t32.cc b/test/aarch32/test-assembler-cond-rd-operand-imm16-t32.cc
index 0557e6f..35487e0 100644
--- a/test/aarch32/test-assembler-cond-rd-operand-imm16-t32.cc
+++ b/test/aarch32/test-assembler-cond-rd-operand-imm16-t32.cc
@@ -55,7 +55,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-operand-rn-a32.cc b/test/aarch32/test-assembler-cond-rd-operand-rn-a32.cc
index c64109a..2e2d861 100644
--- a/test/aarch32/test-assembler-cond-rd-operand-rn-a32.cc
+++ b/test/aarch32/test-assembler-cond-rd-operand-rn-a32.cc
@@ -66,7 +66,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-operand-rn-identical-low-registers-in-it-block-t32.cc b/test/aarch32/test-assembler-cond-rd-operand-rn-identical-low-registers-in-it-block-t32.cc
index eb0563f..ed6c6d8 100644
--- a/test/aarch32/test-assembler-cond-rd-operand-rn-identical-low-registers-in-it-block-t32.cc
+++ b/test/aarch32/test-assembler-cond-rd-operand-rn-identical-low-registers-in-it-block-t32.cc
@@ -52,7 +52,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-operand-rn-in-it-block-t32.cc b/test/aarch32/test-assembler-cond-rd-operand-rn-in-it-block-t32.cc
index 11c29ca..2e3a61b 100644
--- a/test/aarch32/test-assembler-cond-rd-operand-rn-in-it-block-t32.cc
+++ b/test/aarch32/test-assembler-cond-rd-operand-rn-in-it-block-t32.cc
@@ -54,7 +54,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-operand-rn-low-registers-in-it-block-t32.cc b/test/aarch32/test-assembler-cond-rd-operand-rn-low-registers-in-it-block-t32.cc
index be7e421..2dd7130 100644
--- a/test/aarch32/test-assembler-cond-rd-operand-rn-low-registers-in-it-block-t32.cc
+++ b/test/aarch32/test-assembler-cond-rd-operand-rn-low-registers-in-it-block-t32.cc
@@ -54,7 +54,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-operand-rn-ror-amount-a32.cc b/test/aarch32/test-assembler-cond-rd-operand-rn-ror-amount-a32.cc
index a2c6125..4a37546 100644
--- a/test/aarch32/test-assembler-cond-rd-operand-rn-ror-amount-a32.cc
+++ b/test/aarch32/test-assembler-cond-rd-operand-rn-ror-amount-a32.cc
@@ -58,7 +58,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-operand-rn-ror-amount-t32.cc b/test/aarch32/test-assembler-cond-rd-operand-rn-ror-amount-t32.cc
index 6372cde..398c32f 100644
--- a/test/aarch32/test-assembler-cond-rd-operand-rn-ror-amount-t32.cc
+++ b/test/aarch32/test-assembler-cond-rd-operand-rn-ror-amount-t32.cc
@@ -58,7 +58,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-operand-rn-shift-amount-1to31-a32.cc b/test/aarch32/test-assembler-cond-rd-operand-rn-shift-amount-1to31-a32.cc
index ef95d1e..1aebeff 100644
--- a/test/aarch32/test-assembler-cond-rd-operand-rn-shift-amount-1to31-a32.cc
+++ b/test/aarch32/test-assembler-cond-rd-operand-rn-shift-amount-1to31-a32.cc
@@ -60,7 +60,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-operand-rn-shift-amount-1to31-in-it-block-t32.cc b/test/aarch32/test-assembler-cond-rd-operand-rn-shift-amount-1to31-in-it-block-t32.cc
index 1a92f9b..f55bd9a 100644
--- a/test/aarch32/test-assembler-cond-rd-operand-rn-shift-amount-1to31-in-it-block-t32.cc
+++ b/test/aarch32/test-assembler-cond-rd-operand-rn-shift-amount-1to31-in-it-block-t32.cc
@@ -52,7 +52,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-operand-rn-shift-amount-1to31-t32.cc b/test/aarch32/test-assembler-cond-rd-operand-rn-shift-amount-1to31-t32.cc
index 1ba217d..c9a4cd9 100644
--- a/test/aarch32/test-assembler-cond-rd-operand-rn-shift-amount-1to31-t32.cc
+++ b/test/aarch32/test-assembler-cond-rd-operand-rn-shift-amount-1to31-t32.cc
@@ -60,7 +60,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-operand-rn-shift-amount-1to32-a32.cc b/test/aarch32/test-assembler-cond-rd-operand-rn-shift-amount-1to32-a32.cc
index ef719ee..5f94dbe 100644
--- a/test/aarch32/test-assembler-cond-rd-operand-rn-shift-amount-1to32-a32.cc
+++ b/test/aarch32/test-assembler-cond-rd-operand-rn-shift-amount-1to32-a32.cc
@@ -60,7 +60,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-operand-rn-shift-amount-1to32-in-it-block-t32.cc b/test/aarch32/test-assembler-cond-rd-operand-rn-shift-amount-1to32-in-it-block-t32.cc
index 6ea6ef4..896d2e3 100644
--- a/test/aarch32/test-assembler-cond-rd-operand-rn-shift-amount-1to32-in-it-block-t32.cc
+++ b/test/aarch32/test-assembler-cond-rd-operand-rn-shift-amount-1to32-in-it-block-t32.cc
@@ -52,7 +52,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-operand-rn-shift-amount-1to32-t32.cc b/test/aarch32/test-assembler-cond-rd-operand-rn-shift-amount-1to32-t32.cc
index 33d8ca2..f7fc177 100644
--- a/test/aarch32/test-assembler-cond-rd-operand-rn-shift-amount-1to32-t32.cc
+++ b/test/aarch32/test-assembler-cond-rd-operand-rn-shift-amount-1to32-t32.cc
@@ -60,7 +60,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-operand-rn-shift-rs-a32.cc b/test/aarch32/test-assembler-cond-rd-operand-rn-shift-rs-a32.cc
index 3276576..962d108 100644
--- a/test/aarch32/test-assembler-cond-rd-operand-rn-shift-rs-a32.cc
+++ b/test/aarch32/test-assembler-cond-rd-operand-rn-shift-rs-a32.cc
@@ -60,7 +60,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-operand-rn-shift-rs-in-it-block-t32.cc b/test/aarch32/test-assembler-cond-rd-operand-rn-shift-rs-in-it-block-t32.cc
index 5571d79..0134848 100644
--- a/test/aarch32/test-assembler-cond-rd-operand-rn-shift-rs-in-it-block-t32.cc
+++ b/test/aarch32/test-assembler-cond-rd-operand-rn-shift-rs-in-it-block-t32.cc
@@ -52,7 +52,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-operand-rn-shift-rs-narrow-out-it-block-t32.cc b/test/aarch32/test-assembler-cond-rd-operand-rn-shift-rs-narrow-out-it-block-t32.cc
index df3f955..a182e9e 100644
--- a/test/aarch32/test-assembler-cond-rd-operand-rn-shift-rs-narrow-out-it-block-t32.cc
+++ b/test/aarch32/test-assembler-cond-rd-operand-rn-shift-rs-narrow-out-it-block-t32.cc
@@ -52,7 +52,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-operand-rn-shift-rs-t32.cc b/test/aarch32/test-assembler-cond-rd-operand-rn-shift-rs-t32.cc
index 2035a1c..0fddef2 100644
--- a/test/aarch32/test-assembler-cond-rd-operand-rn-shift-rs-t32.cc
+++ b/test/aarch32/test-assembler-cond-rd-operand-rn-shift-rs-t32.cc
@@ -54,7 +54,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-operand-rn-t32.cc b/test/aarch32/test-assembler-cond-rd-operand-rn-t32.cc
index 4c494fd..62a0ca6 100644
--- a/test/aarch32/test-assembler-cond-rd-operand-rn-t32.cc
+++ b/test/aarch32/test-assembler-cond-rd-operand-rn-t32.cc
@@ -66,7 +66,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-pc-operand-imm12-t32.cc b/test/aarch32/test-assembler-cond-rd-pc-operand-imm12-t32.cc
index fe7c799..c01fae7 100644
--- a/test/aarch32/test-assembler-cond-rd-pc-operand-imm12-t32.cc
+++ b/test/aarch32/test-assembler-cond-rd-pc-operand-imm12-t32.cc
@@ -55,7 +55,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-pc-operand-imm8-t32.cc b/test/aarch32/test-assembler-cond-rd-pc-operand-imm8-t32.cc
index 405a829..3b17bdd 100644
--- a/test/aarch32/test-assembler-cond-rd-pc-operand-imm8-t32.cc
+++ b/test/aarch32/test-assembler-cond-rd-pc-operand-imm8-t32.cc
@@ -52,7 +52,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-rn-a32.cc b/test/aarch32/test-assembler-cond-rd-rn-a32.cc
index a0a7579..615e634 100644
--- a/test/aarch32/test-assembler-cond-rd-rn-a32.cc
+++ b/test/aarch32/test-assembler-cond-rd-rn-a32.cc
@@ -59,7 +59,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-rn-operand-const-a32.cc b/test/aarch32/test-assembler-cond-rd-rn-operand-const-a32.cc
index ea6f4ca..6cf4ba0 100644
--- a/test/aarch32/test-assembler-cond-rd-rn-operand-const-a32.cc
+++ b/test/aarch32/test-assembler-cond-rd-rn-operand-const-a32.cc
@@ -72,7 +72,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-rn-operand-const-t32.cc b/test/aarch32/test-assembler-cond-rd-rn-operand-const-t32.cc
index 7edd26d..0d7b92a 100644
--- a/test/aarch32/test-assembler-cond-rd-rn-operand-const-t32.cc
+++ b/test/aarch32/test-assembler-cond-rd-rn-operand-const-t32.cc
@@ -72,7 +72,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-rn-operand-imm12-t32.cc b/test/aarch32/test-assembler-cond-rd-rn-operand-imm12-t32.cc
index d8bc2ca..0adc38c 100644
--- a/test/aarch32/test-assembler-cond-rd-rn-operand-imm12-t32.cc
+++ b/test/aarch32/test-assembler-cond-rd-rn-operand-imm12-t32.cc
@@ -56,7 +56,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-rn-operand-rm-a32.cc b/test/aarch32/test-assembler-cond-rd-rn-operand-rm-a32.cc
index 9b94950..1d2141d 100644
--- a/test/aarch32/test-assembler-cond-rd-rn-operand-rm-a32.cc
+++ b/test/aarch32/test-assembler-cond-rd-rn-operand-rm-a32.cc
@@ -86,7 +86,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-rn-operand-rm-all-low-in-it-block-t32.cc b/test/aarch32/test-assembler-cond-rd-rn-operand-rm-all-low-in-it-block-t32.cc
index 1c805b1..3993792 100644
--- a/test/aarch32/test-assembler-cond-rd-rn-operand-rm-all-low-in-it-block-t32.cc
+++ b/test/aarch32/test-assembler-cond-rd-rn-operand-rm-all-low-in-it-block-t32.cc
@@ -54,7 +54,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-rn-operand-rm-all-low-rd-is-rn-in-it-block-t32.cc b/test/aarch32/test-assembler-cond-rd-rn-operand-rm-all-low-rd-is-rn-in-it-block-t32.cc
index f51718a..5013001 100644
--- a/test/aarch32/test-assembler-cond-rd-rn-operand-rm-all-low-rd-is-rn-in-it-block-t32.cc
+++ b/test/aarch32/test-assembler-cond-rd-rn-operand-rm-all-low-rd-is-rn-in-it-block-t32.cc
@@ -62,7 +62,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-rn-operand-rm-rd-is-rn-in-it-block-t32.cc b/test/aarch32/test-assembler-cond-rd-rn-operand-rm-rd-is-rn-in-it-block-t32.cc
index 0114858..b979b3a 100644
--- a/test/aarch32/test-assembler-cond-rd-rn-operand-rm-rd-is-rn-in-it-block-t32.cc
+++ b/test/aarch32/test-assembler-cond-rd-rn-operand-rm-rd-is-rn-in-it-block-t32.cc
@@ -52,7 +52,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-rn-operand-rm-rd-is-rn-is-sp-in-it-block-t32.cc b/test/aarch32/test-assembler-cond-rd-rn-operand-rm-rd-is-rn-is-sp-in-it-block-t32.cc
index fae7287..a5c876c 100644
--- a/test/aarch32/test-assembler-cond-rd-rn-operand-rm-rd-is-rn-is-sp-in-it-block-t32.cc
+++ b/test/aarch32/test-assembler-cond-rd-rn-operand-rm-rd-is-rn-is-sp-in-it-block-t32.cc
@@ -52,7 +52,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-rn-operand-rm-rn-is-sp-in-it-block-t32.cc b/test/aarch32/test-assembler-cond-rd-rn-operand-rm-rn-is-sp-in-it-block-t32.cc
index 7de517d..4b015d0 100644
--- a/test/aarch32/test-assembler-cond-rd-rn-operand-rm-rn-is-sp-in-it-block-t32.cc
+++ b/test/aarch32/test-assembler-cond-rd-rn-operand-rm-rn-is-sp-in-it-block-t32.cc
@@ -52,7 +52,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-rn-operand-rm-ror-amount-a32.cc b/test/aarch32/test-assembler-cond-rd-rn-operand-rm-ror-amount-a32.cc
index 03fd978..3c4d6dd 100644
--- a/test/aarch32/test-assembler-cond-rd-rn-operand-rm-ror-amount-a32.cc
+++ b/test/aarch32/test-assembler-cond-rd-rn-operand-rm-ror-amount-a32.cc
@@ -58,7 +58,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-rn-operand-rm-ror-amount-t32.cc b/test/aarch32/test-assembler-cond-rd-rn-operand-rm-ror-amount-t32.cc
index f3590bb..41c4c14 100644
--- a/test/aarch32/test-assembler-cond-rd-rn-operand-rm-ror-amount-t32.cc
+++ b/test/aarch32/test-assembler-cond-rd-rn-operand-rm-ror-amount-t32.cc
@@ -58,7 +58,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-rn-operand-rm-shift-amount-1to31-a32.cc b/test/aarch32/test-assembler-cond-rd-rn-operand-rm-shift-amount-1to31-a32.cc
index f7bd97d..7d0375f 100644
--- a/test/aarch32/test-assembler-cond-rd-rn-operand-rm-shift-amount-1to31-a32.cc
+++ b/test/aarch32/test-assembler-cond-rd-rn-operand-rm-shift-amount-1to31-a32.cc
@@ -72,7 +72,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-rn-operand-rm-shift-amount-1to31-t32.cc b/test/aarch32/test-assembler-cond-rd-rn-operand-rm-shift-amount-1to31-t32.cc
index ebe495d..885a3cf 100644
--- a/test/aarch32/test-assembler-cond-rd-rn-operand-rm-shift-amount-1to31-t32.cc
+++ b/test/aarch32/test-assembler-cond-rd-rn-operand-rm-shift-amount-1to31-t32.cc
@@ -72,7 +72,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-rn-operand-rm-shift-amount-1to32-a32.cc b/test/aarch32/test-assembler-cond-rd-rn-operand-rm-shift-amount-1to32-a32.cc
index a0ca121..803d67f 100644
--- a/test/aarch32/test-assembler-cond-rd-rn-operand-rm-shift-amount-1to32-a32.cc
+++ b/test/aarch32/test-assembler-cond-rd-rn-operand-rm-shift-amount-1to32-a32.cc
@@ -72,7 +72,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-rn-operand-rm-shift-amount-1to32-t32.cc b/test/aarch32/test-assembler-cond-rd-rn-operand-rm-shift-amount-1to32-t32.cc
index 318b6f9..254f915 100644
--- a/test/aarch32/test-assembler-cond-rd-rn-operand-rm-shift-amount-1to32-t32.cc
+++ b/test/aarch32/test-assembler-cond-rd-rn-operand-rm-shift-amount-1to32-t32.cc
@@ -72,7 +72,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-rn-operand-rm-shift-rs-a32.cc b/test/aarch32/test-assembler-cond-rd-rn-operand-rm-shift-rs-a32.cc
index d181fcc..7f41a0b 100644
--- a/test/aarch32/test-assembler-cond-rd-rn-operand-rm-shift-rs-a32.cc
+++ b/test/aarch32/test-assembler-cond-rd-rn-operand-rm-shift-rs-a32.cc
@@ -72,7 +72,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-rn-operand-rm-t32.cc b/test/aarch32/test-assembler-cond-rd-rn-operand-rm-t32.cc
index ed0b7fe..a9c9e8f 100644
--- a/test/aarch32/test-assembler-cond-rd-rn-operand-rm-t32.cc
+++ b/test/aarch32/test-assembler-cond-rd-rn-operand-rm-t32.cc
@@ -86,7 +86,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-rn-rm-a32.cc b/test/aarch32/test-assembler-cond-rd-rn-rm-a32.cc
index 3f6c7fc..a0a5315 100644
--- a/test/aarch32/test-assembler-cond-rd-rn-rm-a32.cc
+++ b/test/aarch32/test-assembler-cond-rd-rn-rm-a32.cc
@@ -110,7 +110,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-rn-rm-t32.cc b/test/aarch32/test-assembler-cond-rd-rn-rm-t32.cc
index c7812c8..f052c2e 100644
--- a/test/aarch32/test-assembler-cond-rd-rn-rm-t32.cc
+++ b/test/aarch32/test-assembler-cond-rd-rn-rm-t32.cc
@@ -109,7 +109,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-rn-t32.cc b/test/aarch32/test-assembler-cond-rd-rn-t32.cc
index bc39853..bebef77 100644
--- a/test/aarch32/test-assembler-cond-rd-rn-t32.cc
+++ b/test/aarch32/test-assembler-cond-rd-rn-t32.cc
@@ -59,7 +59,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rd-sp-operand-imm8-t32.cc b/test/aarch32/test-assembler-cond-rd-sp-operand-imm8-t32.cc
index bd1f020..57fecaa 100644
--- a/test/aarch32/test-assembler-cond-rd-sp-operand-imm8-t32.cc
+++ b/test/aarch32/test-assembler-cond-rd-sp-operand-imm8-t32.cc
@@ -52,7 +52,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rdlow-operand-imm8-in-it-block-t32.cc b/test/aarch32/test-assembler-cond-rdlow-operand-imm8-in-it-block-t32.cc
index 217af34..ed99425 100644
--- a/test/aarch32/test-assembler-cond-rdlow-operand-imm8-in-it-block-t32.cc
+++ b/test/aarch32/test-assembler-cond-rdlow-operand-imm8-in-it-block-t32.cc
@@ -54,7 +54,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rdlow-operand-imm8-t32.cc b/test/aarch32/test-assembler-cond-rdlow-operand-imm8-t32.cc
index 1332b8f..50fddd9 100644
--- a/test/aarch32/test-assembler-cond-rdlow-operand-imm8-t32.cc
+++ b/test/aarch32/test-assembler-cond-rdlow-operand-imm8-t32.cc
@@ -55,7 +55,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rdlow-rnlow-operand-immediate-imm3-in-it-block-t32.cc b/test/aarch32/test-assembler-cond-rdlow-rnlow-operand-immediate-imm3-in-it-block-t32.cc
index ef6f0d2..bb71431 100644
--- a/test/aarch32/test-assembler-cond-rdlow-rnlow-operand-immediate-imm3-in-it-block-t32.cc
+++ b/test/aarch32/test-assembler-cond-rdlow-rnlow-operand-immediate-imm3-in-it-block-t32.cc
@@ -54,7 +54,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rdlow-rnlow-operand-immediate-imm3-t32.cc b/test/aarch32/test-assembler-cond-rdlow-rnlow-operand-immediate-imm3-t32.cc
index 9d303cb..029e37f 100644
--- a/test/aarch32/test-assembler-cond-rdlow-rnlow-operand-immediate-imm3-t32.cc
+++ b/test/aarch32/test-assembler-cond-rdlow-rnlow-operand-immediate-imm3-t32.cc
@@ -54,7 +54,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rdlow-rnlow-operand-immediate-imm8-in-it-block-t32.cc b/test/aarch32/test-assembler-cond-rdlow-rnlow-operand-immediate-imm8-in-it-block-t32.cc
index 11bb597..1a6a704 100644
--- a/test/aarch32/test-assembler-cond-rdlow-rnlow-operand-immediate-imm8-in-it-block-t32.cc
+++ b/test/aarch32/test-assembler-cond-rdlow-rnlow-operand-immediate-imm8-in-it-block-t32.cc
@@ -54,7 +54,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rdlow-rnlow-operand-immediate-imm8-t32.cc b/test/aarch32/test-assembler-cond-rdlow-rnlow-operand-immediate-imm8-t32.cc
index b946215..2cd95af 100644
--- a/test/aarch32/test-assembler-cond-rdlow-rnlow-operand-immediate-imm8-t32.cc
+++ b/test/aarch32/test-assembler-cond-rdlow-rnlow-operand-immediate-imm8-t32.cc
@@ -54,7 +54,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rdlow-rnlow-operand-immediate-zero-in-it-block-t32.cc b/test/aarch32/test-assembler-cond-rdlow-rnlow-operand-immediate-zero-in-it-block-t32.cc
index ded8ee0..1ad13e3 100644
--- a/test/aarch32/test-assembler-cond-rdlow-rnlow-operand-immediate-zero-in-it-block-t32.cc
+++ b/test/aarch32/test-assembler-cond-rdlow-rnlow-operand-immediate-zero-in-it-block-t32.cc
@@ -52,7 +52,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rdlow-rnlow-operand-immediate-zero-t32.cc b/test/aarch32/test-assembler-cond-rdlow-rnlow-operand-immediate-zero-t32.cc
index 5f7f55c..f6b06c3 100644
--- a/test/aarch32/test-assembler-cond-rdlow-rnlow-operand-immediate-zero-t32.cc
+++ b/test/aarch32/test-assembler-cond-rdlow-rnlow-operand-immediate-zero-t32.cc
@@ -52,7 +52,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rdlow-rnlow-rmlow-in-it-block-t32.cc b/test/aarch32/test-assembler-cond-rdlow-rnlow-rmlow-in-it-block-t32.cc
index fc8fe31..bcf9dcb 100644
--- a/test/aarch32/test-assembler-cond-rdlow-rnlow-rmlow-in-it-block-t32.cc
+++ b/test/aarch32/test-assembler-cond-rdlow-rnlow-rmlow-in-it-block-t32.cc
@@ -52,7 +52,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-rdlow-rnlow-rmlow-t32.cc b/test/aarch32/test-assembler-cond-rdlow-rnlow-rmlow-t32.cc
index 279c443..e45a648 100644
--- a/test/aarch32/test-assembler-cond-rdlow-rnlow-rmlow-t32.cc
+++ b/test/aarch32/test-assembler-cond-rdlow-rnlow-rmlow-t32.cc
@@ -52,7 +52,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-cond-sp-sp-operand-imm7-t32.cc b/test/aarch32/test-assembler-cond-sp-sp-operand-imm7-t32.cc
index fd9d3da..93c66fe 100644
--- a/test/aarch32/test-assembler-cond-sp-sp-operand-imm7-t32.cc
+++ b/test/aarch32/test-assembler-cond-sp-sp-operand-imm7-t32.cc
@@ -54,7 +54,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-negative-cond-rd-rn-operand-rm-shift-rs-a32.cc b/test/aarch32/test-assembler-negative-cond-rd-rn-operand-rm-shift-rs-a32.cc
index 18d4c02..d88aa9f 100644
--- a/test/aarch32/test-assembler-negative-cond-rd-rn-operand-rm-shift-rs-a32.cc
+++ b/test/aarch32/test-assembler-negative-cond-rd-rn-operand-rm-shift-rs-a32.cc
@@ -72,7 +72,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-rd-rn-rm-a32.cc b/test/aarch32/test-assembler-rd-rn-rm-a32.cc
index c838c75..05662a2 100644
--- a/test/aarch32/test-assembler-rd-rn-rm-a32.cc
+++ b/test/aarch32/test-assembler-rd-rn-rm-a32.cc
@@ -58,7 +58,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-assembler-rd-rn-rm-t32.cc b/test/aarch32/test-assembler-rd-rn-rm-t32.cc
index a49c67f..7da4587 100644
--- a/test/aarch32/test-assembler-rd-rn-rm-t32.cc
+++ b/test/aarch32/test-assembler-rd-rn-rm-t32.cc
@@ -58,7 +58,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-disasm-a32.cc b/test/aarch32/test-disasm-a32.cc
index c6acac9..95bb26e 100644
--- a/test/aarch32/test-disasm-a32.cc
+++ b/test/aarch32/test-disasm-a32.cc
@@ -505,11 +505,11 @@
 
   // - Immediate form. We can always re-use `rn`.
 
-  // No need for temporay registers.
+  // No need for temporary registers.
   COMPARE_T32(Rsc(r0, r1, 1),
               "mvn r0, r1\n"
               "adc r0, #1\n");
-  // No need for temporay registers.
+  // No need for temporary registers.
   COMPARE_T32(Rscs(r0, r0, 2),
               "mvn r0, r0\n"
               "adcs r0, #2\n");
@@ -568,7 +568,7 @@
 
   // - Shifted register form.
 
-  // No need for temporay registers.
+  // No need for temporary registers.
   COMPARE_T32(Rsc(r0, r1, Operand(r2, LSL, 1)),
               "mvn r0, r1\n"
               "adc r0, r2, lsl #1\n");
@@ -1508,7 +1508,7 @@
 TEST(macro_assembler_InstructionCondSizeRROp) {
   SETUP();
 
-  // Special case for Orr <-> Orn correspondance.
+  // Special case for Orr <-> Orn correspondence.
 
   COMPARE_T32(Orr(r0, r1, 0x00ffffff), "orn r0, r1, #0xff000000\n");
   COMPARE_T32(Orrs(r0, r1, 0x00ffffff), "orns r0, r1, #0xff000000\n");
@@ -1700,6 +1700,41 @@
 }
 
 
+TEST(macro_assembler_b_cond_t32) {
+  SETUP();
+
+#ifdef VIXL_INCLUDE_TARGET_T32
+  // Ensure backward conditional branches are veneered correctly.
+  __ UseT32();
+  int pc_off = __ GetArchitectureStatePCOffset();
+
+  // Largest encodable backwards offset.
+  int curs = __ GetCursorOffset() + pc_off;
+  Label label_neg1m(curs - 1048576);
+  COMPARE_T32(B(ne, &label_neg1m), "bne 0xfff00004\n");
+
+  // Next largest cannot be encoded.
+  curs = __ GetCursorOffset() + pc_off;
+  Label label_neg1m_plus_inst(curs - (1048576 + 2));
+  COMPARE_T32(B(ne, &label_neg1m_plus_inst), "beq 0x00000006\n"
+                                             "b 0xfff00002\n");
+
+  // Offset that requires largest unconditional branch in veneer.
+  curs = __ GetCursorOffset() + pc_off;
+  Label label_neg16m(curs - (16777216 - 2));
+  COMPARE_T32(B(ne, &label_neg16m), "beq 0x00000006\n"
+                                    "b 0xff000006\n");
+
+  // Next largest cannot be veneered.
+  curs = __ GetCursorOffset() + pc_off;
+  Label label_neg16m_plus_inst(curs - 16777216);
+  MUST_FAIL_TEST_T32(B(ne, &label_neg16m_plus_inst),
+                     "Conditional branch too far for veneer.\n");
+#endif
+
+  CLEANUP();
+}
+
 #ifdef VIXL_NEGATIVE_TESTING
 TEST(assembler_crc_negative) {
   SETUP();
diff --git a/test/aarch32/test-macro-assembler-cond-rd-rn-a32.cc b/test/aarch32/test-macro-assembler-cond-rd-rn-a32.cc
index be77bd9..46706e3 100644
--- a/test/aarch32/test-macro-assembler-cond-rd-rn-a32.cc
+++ b/test/aarch32/test-macro-assembler-cond-rd-rn-a32.cc
@@ -63,7 +63,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-macro-assembler-cond-rd-rn-pc-a32.cc b/test/aarch32/test-macro-assembler-cond-rd-rn-pc-a32.cc
index 830edd1..47af778 100644
--- a/test/aarch32/test-macro-assembler-cond-rd-rn-pc-a32.cc
+++ b/test/aarch32/test-macro-assembler-cond-rd-rn-pc-a32.cc
@@ -56,7 +56,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-macro-assembler-cond-rd-rn-t32.cc b/test/aarch32/test-macro-assembler-cond-rd-rn-t32.cc
index d88cbf3..eccd35e 100644
--- a/test/aarch32/test-macro-assembler-cond-rd-rn-t32.cc
+++ b/test/aarch32/test-macro-assembler-cond-rd-rn-t32.cc
@@ -63,7 +63,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` has
 // various layouts across generated tests so it absolutely cannot be shared.
diff --git a/test/aarch32/test-simulator-cond-dt-drt-drd-drn-drm-float-f64-a32.cc b/test/aarch32/test-simulator-cond-dt-drt-drd-drn-drm-float-f64-a32.cc
index a8417e0..db9ce9c 100644
--- a/test/aarch32/test-simulator-cond-dt-drt-drd-drn-drm-float-f64-a32.cc
+++ b/test/aarch32/test-simulator-cond-dt-drt-drd-drn-drm-float-f64-a32.cc
@@ -118,7 +118,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-dt-drt-drd-drn-drm-float-f64-t32.cc b/test/aarch32/test-simulator-cond-dt-drt-drd-drn-drm-float-f64-t32.cc
index b9c97b6..a9843c5 100644
--- a/test/aarch32/test-simulator-cond-dt-drt-drd-drn-drm-float-f64-t32.cc
+++ b/test/aarch32/test-simulator-cond-dt-drt-drd-drn-drm-float-f64-t32.cc
@@ -118,7 +118,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rd-memop-immediate-512-a32.cc b/test/aarch32/test-simulator-cond-rd-memop-immediate-512-a32.cc
index 4fa3175..3945393 100644
--- a/test/aarch32/test-simulator-cond-rd-memop-immediate-512-a32.cc
+++ b/test/aarch32/test-simulator-cond-rd-memop-immediate-512-a32.cc
@@ -120,7 +120,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rd-memop-immediate-8192-a32.cc b/test/aarch32/test-simulator-cond-rd-memop-immediate-8192-a32.cc
index 7936fa4..18a8156 100644
--- a/test/aarch32/test-simulator-cond-rd-memop-immediate-8192-a32.cc
+++ b/test/aarch32/test-simulator-cond-rd-memop-immediate-8192-a32.cc
@@ -120,7 +120,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rd-memop-rs-a32.cc b/test/aarch32/test-simulator-cond-rd-memop-rs-a32.cc
index 3feb4fc..d8fdf9a 100644
--- a/test/aarch32/test-simulator-cond-rd-memop-rs-a32.cc
+++ b/test/aarch32/test-simulator-cond-rd-memop-rs-a32.cc
@@ -124,7 +124,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rd-memop-rs-shift-amount-1to31-a32.cc b/test/aarch32/test-simulator-cond-rd-memop-rs-shift-amount-1to31-a32.cc
index ce16a6f..de4b428 100644
--- a/test/aarch32/test-simulator-cond-rd-memop-rs-shift-amount-1to31-a32.cc
+++ b/test/aarch32/test-simulator-cond-rd-memop-rs-shift-amount-1to31-a32.cc
@@ -120,7 +120,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rd-memop-rs-shift-amount-1to32-a32.cc b/test/aarch32/test-simulator-cond-rd-memop-rs-shift-amount-1to32-a32.cc
index 797ec6c..ab4c4d7 100644
--- a/test/aarch32/test-simulator-cond-rd-memop-rs-shift-amount-1to32-a32.cc
+++ b/test/aarch32/test-simulator-cond-rd-memop-rs-shift-amount-1to32-a32.cc
@@ -120,7 +120,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rd-operand-const-a32.cc b/test/aarch32/test-simulator-cond-rd-operand-const-a32.cc
index 8211042..207996d 100644
--- a/test/aarch32/test-simulator-cond-rd-operand-const-a32.cc
+++ b/test/aarch32/test-simulator-cond-rd-operand-const-a32.cc
@@ -124,7 +124,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rd-operand-const-t32.cc b/test/aarch32/test-simulator-cond-rd-operand-const-t32.cc
index 08de626..291afec 100644
--- a/test/aarch32/test-simulator-cond-rd-operand-const-t32.cc
+++ b/test/aarch32/test-simulator-cond-rd-operand-const-t32.cc
@@ -124,7 +124,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rd-operand-imm16-t32.cc b/test/aarch32/test-simulator-cond-rd-operand-imm16-t32.cc
index 10fe377..0b4c89e 100644
--- a/test/aarch32/test-simulator-cond-rd-operand-imm16-t32.cc
+++ b/test/aarch32/test-simulator-cond-rd-operand-imm16-t32.cc
@@ -118,7 +118,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rd-operand-rn-a32.cc b/test/aarch32/test-simulator-cond-rd-operand-rn-a32.cc
index b8131d5..bceb77d 100644
--- a/test/aarch32/test-simulator-cond-rd-operand-rn-a32.cc
+++ b/test/aarch32/test-simulator-cond-rd-operand-rn-a32.cc
@@ -130,7 +130,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rd-operand-rn-ror-amount-a32.cc b/test/aarch32/test-simulator-cond-rd-operand-rn-ror-amount-a32.cc
index cd3addb..4b55b01 100644
--- a/test/aarch32/test-simulator-cond-rd-operand-rn-ror-amount-a32.cc
+++ b/test/aarch32/test-simulator-cond-rd-operand-rn-ror-amount-a32.cc
@@ -122,7 +122,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rd-operand-rn-ror-amount-t32.cc b/test/aarch32/test-simulator-cond-rd-operand-rn-ror-amount-t32.cc
index 3d6ea35..ca10bb6 100644
--- a/test/aarch32/test-simulator-cond-rd-operand-rn-ror-amount-t32.cc
+++ b/test/aarch32/test-simulator-cond-rd-operand-rn-ror-amount-t32.cc
@@ -122,7 +122,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rd-operand-rn-shift-amount-1to31-a32.cc b/test/aarch32/test-simulator-cond-rd-operand-rn-shift-amount-1to31-a32.cc
index 41c1eea..1af354c 100644
--- a/test/aarch32/test-simulator-cond-rd-operand-rn-shift-amount-1to31-a32.cc
+++ b/test/aarch32/test-simulator-cond-rd-operand-rn-shift-amount-1to31-a32.cc
@@ -124,7 +124,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rd-operand-rn-shift-amount-1to31-t32.cc b/test/aarch32/test-simulator-cond-rd-operand-rn-shift-amount-1to31-t32.cc
index 83d79e8..8d67b8d 100644
--- a/test/aarch32/test-simulator-cond-rd-operand-rn-shift-amount-1to31-t32.cc
+++ b/test/aarch32/test-simulator-cond-rd-operand-rn-shift-amount-1to31-t32.cc
@@ -124,7 +124,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rd-operand-rn-shift-amount-1to32-a32.cc b/test/aarch32/test-simulator-cond-rd-operand-rn-shift-amount-1to32-a32.cc
index 7327a5f..c189843 100644
--- a/test/aarch32/test-simulator-cond-rd-operand-rn-shift-amount-1to32-a32.cc
+++ b/test/aarch32/test-simulator-cond-rd-operand-rn-shift-amount-1to32-a32.cc
@@ -124,7 +124,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rd-operand-rn-shift-amount-1to32-t32.cc b/test/aarch32/test-simulator-cond-rd-operand-rn-shift-amount-1to32-t32.cc
index af87801..3c9fb1a 100644
--- a/test/aarch32/test-simulator-cond-rd-operand-rn-shift-amount-1to32-t32.cc
+++ b/test/aarch32/test-simulator-cond-rd-operand-rn-shift-amount-1to32-t32.cc
@@ -124,7 +124,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rd-operand-rn-shift-rs-a32.cc b/test/aarch32/test-simulator-cond-rd-operand-rn-shift-rs-a32.cc
index 3df8bcc..daa5ba4 100644
--- a/test/aarch32/test-simulator-cond-rd-operand-rn-shift-rs-a32.cc
+++ b/test/aarch32/test-simulator-cond-rd-operand-rn-shift-rs-a32.cc
@@ -124,7 +124,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rd-operand-rn-shift-rs-t32.cc b/test/aarch32/test-simulator-cond-rd-operand-rn-shift-rs-t32.cc
index 9c3fc8f..ced940f 100644
--- a/test/aarch32/test-simulator-cond-rd-operand-rn-shift-rs-t32.cc
+++ b/test/aarch32/test-simulator-cond-rd-operand-rn-shift-rs-t32.cc
@@ -118,7 +118,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rd-operand-rn-t32.cc b/test/aarch32/test-simulator-cond-rd-operand-rn-t32.cc
index d410510..c60c666 100644
--- a/test/aarch32/test-simulator-cond-rd-operand-rn-t32.cc
+++ b/test/aarch32/test-simulator-cond-rd-operand-rn-t32.cc
@@ -130,7 +130,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rd-rn-a32.cc b/test/aarch32/test-simulator-cond-rd-rn-a32.cc
index 25530f7..33776c3 100644
--- a/test/aarch32/test-simulator-cond-rd-rn-a32.cc
+++ b/test/aarch32/test-simulator-cond-rd-rn-a32.cc
@@ -123,7 +123,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rd-rn-operand-const-a32.cc b/test/aarch32/test-simulator-cond-rd-rn-operand-const-a32.cc
index e2836d1..7862115 100644
--- a/test/aarch32/test-simulator-cond-rd-rn-operand-const-a32.cc
+++ b/test/aarch32/test-simulator-cond-rd-rn-operand-const-a32.cc
@@ -136,7 +136,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rd-rn-operand-const-t32.cc b/test/aarch32/test-simulator-cond-rd-rn-operand-const-t32.cc
index 6fe473a..1c640a6 100644
--- a/test/aarch32/test-simulator-cond-rd-rn-operand-const-t32.cc
+++ b/test/aarch32/test-simulator-cond-rd-rn-operand-const-t32.cc
@@ -136,7 +136,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rd-rn-operand-imm12-t32.cc b/test/aarch32/test-simulator-cond-rd-rn-operand-imm12-t32.cc
index bee7d00..f6644d3 100644
--- a/test/aarch32/test-simulator-cond-rd-rn-operand-imm12-t32.cc
+++ b/test/aarch32/test-simulator-cond-rd-rn-operand-imm12-t32.cc
@@ -118,7 +118,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rd-rn-operand-rm-a32.cc b/test/aarch32/test-simulator-cond-rd-rn-operand-rm-a32.cc
index 606e6bb..8f13e0a 100644
--- a/test/aarch32/test-simulator-cond-rd-rn-operand-rm-a32.cc
+++ b/test/aarch32/test-simulator-cond-rd-rn-operand-rm-a32.cc
@@ -150,7 +150,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rd-rn-operand-rm-ror-amount-a32.cc b/test/aarch32/test-simulator-cond-rd-rn-operand-rm-ror-amount-a32.cc
index 536c0ef..fdedd4b 100644
--- a/test/aarch32/test-simulator-cond-rd-rn-operand-rm-ror-amount-a32.cc
+++ b/test/aarch32/test-simulator-cond-rd-rn-operand-rm-ror-amount-a32.cc
@@ -122,7 +122,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rd-rn-operand-rm-ror-amount-t32.cc b/test/aarch32/test-simulator-cond-rd-rn-operand-rm-ror-amount-t32.cc
index ea0eebb..ce8881a 100644
--- a/test/aarch32/test-simulator-cond-rd-rn-operand-rm-ror-amount-t32.cc
+++ b/test/aarch32/test-simulator-cond-rd-rn-operand-rm-ror-amount-t32.cc
@@ -122,7 +122,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rd-rn-operand-rm-shift-amount-1to31-a32.cc b/test/aarch32/test-simulator-cond-rd-rn-operand-rm-shift-amount-1to31-a32.cc
index b654ac3..18fa62e 100644
--- a/test/aarch32/test-simulator-cond-rd-rn-operand-rm-shift-amount-1to31-a32.cc
+++ b/test/aarch32/test-simulator-cond-rd-rn-operand-rm-shift-amount-1to31-a32.cc
@@ -136,7 +136,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rd-rn-operand-rm-shift-amount-1to31-t32.cc b/test/aarch32/test-simulator-cond-rd-rn-operand-rm-shift-amount-1to31-t32.cc
index bf35690..0566348 100644
--- a/test/aarch32/test-simulator-cond-rd-rn-operand-rm-shift-amount-1to31-t32.cc
+++ b/test/aarch32/test-simulator-cond-rd-rn-operand-rm-shift-amount-1to31-t32.cc
@@ -136,7 +136,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rd-rn-operand-rm-shift-amount-1to32-a32.cc b/test/aarch32/test-simulator-cond-rd-rn-operand-rm-shift-amount-1to32-a32.cc
index ab0b3cd..bba5896 100644
--- a/test/aarch32/test-simulator-cond-rd-rn-operand-rm-shift-amount-1to32-a32.cc
+++ b/test/aarch32/test-simulator-cond-rd-rn-operand-rm-shift-amount-1to32-a32.cc
@@ -136,7 +136,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rd-rn-operand-rm-shift-amount-1to32-t32.cc b/test/aarch32/test-simulator-cond-rd-rn-operand-rm-shift-amount-1to32-t32.cc
index 44506e7..f0cf8b3 100644
--- a/test/aarch32/test-simulator-cond-rd-rn-operand-rm-shift-amount-1to32-t32.cc
+++ b/test/aarch32/test-simulator-cond-rd-rn-operand-rm-shift-amount-1to32-t32.cc
@@ -136,7 +136,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rd-rn-operand-rm-shift-rs-a32.cc b/test/aarch32/test-simulator-cond-rd-rn-operand-rm-shift-rs-a32.cc
index f3a64b6..7070d6d 100644
--- a/test/aarch32/test-simulator-cond-rd-rn-operand-rm-shift-rs-a32.cc
+++ b/test/aarch32/test-simulator-cond-rd-rn-operand-rm-shift-rs-a32.cc
@@ -136,7 +136,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rd-rn-operand-rm-t32.cc b/test/aarch32/test-simulator-cond-rd-rn-operand-rm-t32.cc
index ebca6dc..8527ce0 100644
--- a/test/aarch32/test-simulator-cond-rd-rn-operand-rm-t32.cc
+++ b/test/aarch32/test-simulator-cond-rd-rn-operand-rm-t32.cc
@@ -150,7 +150,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rd-rn-rm-a32.cc b/test/aarch32/test-simulator-cond-rd-rn-rm-a32.cc
index 5b4fbe1..f4acc7f 100644
--- a/test/aarch32/test-simulator-cond-rd-rn-rm-a32.cc
+++ b/test/aarch32/test-simulator-cond-rd-rn-rm-a32.cc
@@ -174,7 +174,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rd-rn-rm-ge-a32.cc b/test/aarch32/test-simulator-cond-rd-rn-rm-ge-a32.cc
index 0ad798e..1f147c9 100644
--- a/test/aarch32/test-simulator-cond-rd-rn-rm-ge-a32.cc
+++ b/test/aarch32/test-simulator-cond-rd-rn-rm-ge-a32.cc
@@ -128,7 +128,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rd-rn-rm-ge-t32.cc b/test/aarch32/test-simulator-cond-rd-rn-rm-ge-t32.cc
index 1f1fced..385552e 100644
--- a/test/aarch32/test-simulator-cond-rd-rn-rm-ge-t32.cc
+++ b/test/aarch32/test-simulator-cond-rd-rn-rm-ge-t32.cc
@@ -128,7 +128,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rd-rn-rm-q-a32.cc b/test/aarch32/test-simulator-cond-rd-rn-rm-q-a32.cc
index a6e19c9..7df7810 100644
--- a/test/aarch32/test-simulator-cond-rd-rn-rm-q-a32.cc
+++ b/test/aarch32/test-simulator-cond-rd-rn-rm-q-a32.cc
@@ -120,7 +120,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rd-rn-rm-q-t32.cc b/test/aarch32/test-simulator-cond-rd-rn-rm-q-t32.cc
index e0339c7..3a4fbe6 100644
--- a/test/aarch32/test-simulator-cond-rd-rn-rm-q-t32.cc
+++ b/test/aarch32/test-simulator-cond-rd-rn-rm-q-t32.cc
@@ -120,7 +120,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rd-rn-rm-sel-a32.cc b/test/aarch32/test-simulator-cond-rd-rn-rm-sel-a32.cc
index c497839..f1228b5 100644
--- a/test/aarch32/test-simulator-cond-rd-rn-rm-sel-a32.cc
+++ b/test/aarch32/test-simulator-cond-rd-rn-rm-sel-a32.cc
@@ -116,7 +116,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rd-rn-rm-sel-t32.cc b/test/aarch32/test-simulator-cond-rd-rn-rm-sel-t32.cc
index 399f42e..9b31427 100644
--- a/test/aarch32/test-simulator-cond-rd-rn-rm-sel-t32.cc
+++ b/test/aarch32/test-simulator-cond-rd-rn-rm-sel-t32.cc
@@ -116,7 +116,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rd-rn-rm-t32.cc b/test/aarch32/test-simulator-cond-rd-rn-rm-t32.cc
index 25622b5..627d0c3 100644
--- a/test/aarch32/test-simulator-cond-rd-rn-rm-t32.cc
+++ b/test/aarch32/test-simulator-cond-rd-rn-rm-t32.cc
@@ -173,7 +173,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rd-rn-t32.cc b/test/aarch32/test-simulator-cond-rd-rn-t32.cc
index f7b2d44..7dd893b 100644
--- a/test/aarch32/test-simulator-cond-rd-rn-t32.cc
+++ b/test/aarch32/test-simulator-cond-rd-rn-t32.cc
@@ -123,7 +123,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rdlow-operand-imm8-t32.cc b/test/aarch32/test-simulator-cond-rdlow-operand-imm8-t32.cc
index 501009a..c23ccb9 100644
--- a/test/aarch32/test-simulator-cond-rdlow-operand-imm8-t32.cc
+++ b/test/aarch32/test-simulator-cond-rdlow-operand-imm8-t32.cc
@@ -119,7 +119,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rdlow-rnlow-operand-immediate-t32.cc b/test/aarch32/test-simulator-cond-rdlow-rnlow-operand-immediate-t32.cc
index 3609ab3..fdf6d2e 100644
--- a/test/aarch32/test-simulator-cond-rdlow-rnlow-operand-immediate-t32.cc
+++ b/test/aarch32/test-simulator-cond-rdlow-rnlow-operand-immediate-t32.cc
@@ -122,7 +122,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-cond-rdlow-rnlow-rmlow-t32.cc b/test/aarch32/test-simulator-cond-rdlow-rnlow-rmlow-t32.cc
index e6893ab..bb92981 100644
--- a/test/aarch32/test-simulator-cond-rdlow-rnlow-rmlow-t32.cc
+++ b/test/aarch32/test-simulator-cond-rdlow-rnlow-rmlow-t32.cc
@@ -118,7 +118,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-rd-rn-rm-a32.cc b/test/aarch32/test-simulator-rd-rn-rm-a32.cc
index 5ad3ba4..97d1c35 100644
--- a/test/aarch32/test-simulator-rd-rn-rm-a32.cc
+++ b/test/aarch32/test-simulator-rd-rn-rm-a32.cc
@@ -122,7 +122,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-simulator-rd-rn-rm-t32.cc b/test/aarch32/test-simulator-rd-rn-rm-t32.cc
index de34ff0..2d52cb7 100644
--- a/test/aarch32/test-simulator-rd-rn-rm-t32.cc
+++ b/test/aarch32/test-simulator-rd-rn-rm-t32.cc
@@ -122,7 +122,7 @@
 
 
 // The following definitions are defined again in each generated test, therefore
-// we need to place them in an anomymous namespace. It expresses that they are
+// we need to place them in an anonymous namespace. It expresses that they are
 // local to this file only, and the compiler is not allowed to share these types
 // across test files during template instantiation. Specifically, `Operands` and
 // `Inputs` have various layouts across generated tests so they absolutely
diff --git a/test/aarch32/test-utils-aarch32.cc b/test/aarch32/test-utils-aarch32.cc
index 3c6574e..ad4cbd5 100644
--- a/test/aarch32/test-utils-aarch32.cc
+++ b/test/aarch32/test-utils-aarch32.cc
@@ -41,7 +41,7 @@
   Register dump_base = r0;
   Register tmp = r1;
 
-  // Check that the the dump registers can be used
+  // Check that the dump registers can be used
   VIXL_STATIC_ASSERT(sizeof(dump_.r_[0]) == kRegSizeInBytes);
   VIXL_STATIC_ASSERT(sizeof(dump_.d_[0]) == kDRegSizeInBytes);
 
diff --git a/test/aarch64/examples/test-examples.cc b/test/aarch64/examples/test-examples.cc
index 3cbbe8d..7bf4af4 100644
--- a/test/aarch64/examples/test-examples.cc
+++ b/test/aarch64/examples/test-examples.cc
@@ -29,8 +29,8 @@
 #include "non-const-visitor.h"
 #include "test-runner.h"
 #include "test-utils.h"
-#include "../test-utils-aarch64.h"
 
+#include "../test-utils-aarch64.h"
 #include "aarch64/macro-assembler-aarch64.h"
 #include "aarch64/simulator-aarch64.h"
 #define TEST(name) TEST_(EXAMPLE_##name)
@@ -129,7 +129,7 @@
                              masm.GetLabelAddress<uint64_t>(&Func));          \
     simulator.RunFrom(masm.GetLabelAddress<Instruction*>(&test));             \
                                                                               \
-    /* Check that callee-saved regsiters are preserved. */                    \
+    /* Check that callee-saved registers are preserved. */                    \
     VIXL_CHECK(saved_xregs[0] == simulator.ReadXRegister(19));                \
     VIXL_CHECK(saved_xregs[1] == simulator.ReadXRegister(20));                \
     VIXL_CHECK(saved_xregs[2] == simulator.ReadXRegister(21));                \
diff --git a/test/aarch64/test-abi.cc b/test/aarch64/test-abi.cc
index e823c7e..0b9a1b0 100644
--- a/test/aarch64/test-abi.cc
+++ b/test/aarch64/test-abi.cc
@@ -25,9 +25,9 @@
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include "test-runner.h"
-#include "test-utils-aarch64.h"
 
 #include "aarch64/abi-aarch64.h"
+#include "test-utils-aarch64.h"
 
 #if (__cplusplus >= 201103L) && !defined(VIXL_HAS_ABI_SUPPORT)
 #error "C++11 should be sufficient to provide ABI support."
@@ -51,9 +51,9 @@
   VIXL_CHECK(abi.GetReturnGenericOperand<char>().Equals(GenericOperand(w0)));
   VIXL_CHECK(abi.GetReturnGenericOperand<int8_t>().Equals(GenericOperand(w0)));
   VIXL_CHECK(abi.GetReturnGenericOperand<uint8_t>().Equals(GenericOperand(w0)));
-  VIXL_CHECK(
-      abi.GetReturnGenericOperand<short>().Equals(  // NOLINT(runtime/int)
-          GenericOperand(w0)));
+  VIXL_CHECK(abi.GetReturnGenericOperand<short>()
+                 .Equals(  // NOLINT(google-runtime-int)
+                     GenericOperand(w0)));
   VIXL_CHECK(abi.GetReturnGenericOperand<int16_t>().Equals(GenericOperand(w0)));
   VIXL_CHECK(
       abi.GetReturnGenericOperand<uint16_t>().Equals(GenericOperand(w0)));
@@ -92,7 +92,7 @@
   CHECK_NEXT_PARAMETER_REG(uint64_t, x4);
   CHECK_NEXT_PARAMETER_REG(void*, x5);
   CHECK_NEXT_PARAMETER_REG(uint32_t, w6);
-  typedef short my_type;  // NOLINT(runtime/int)
+  typedef short my_type;  // NOLINT(google-runtime-int)
   CHECK_NEXT_PARAMETER_REG(my_type, w7);
   CHECK_NEXT_PARAMETER_MEM(int, MemOperand(sp, 0), kWRegSizeInBytes);
   CHECK_NEXT_PARAMETER_MEM(int, MemOperand(sp, 8), kWRegSizeInBytes);
@@ -102,14 +102,14 @@
   CHECK_NEXT_PARAMETER_REG(double, d7);
   CHECK_NEXT_PARAMETER_MEM(double, MemOperand(sp, 16), kDRegSizeInBytes);
   CHECK_NEXT_PARAMETER_MEM(bool, MemOperand(sp, 24), kWRegSizeInBytes);
-  CHECK_NEXT_PARAMETER_MEM(short,  // NOLINT(runtime/int)
+  CHECK_NEXT_PARAMETER_MEM(short,  // NOLINT(google-runtime-int)
                            MemOperand(sp, 32),
                            kWRegSizeInBytes);
   CHECK_NEXT_PARAMETER_MEM(float, MemOperand(sp, 40), kSRegSizeInBytes);
   CHECK_NEXT_PARAMETER_MEM(float, MemOperand(sp, 48), kSRegSizeInBytes);
   VIXL_CHECK(abi.GetStackSpaceRequired() == 56);
 }
-}
-}  // namespace vixl::aarch64
+}  // namespace aarch64
+}  // namespace vixl
 
-#endif  // VIXL_ABI_SUPORT
+#endif  // VIXL_HAS_ABI_SUPPORT
diff --git a/test/aarch64/test-api-aarch64.cc b/test/aarch64/test-api-aarch64.cc
index b25fa7c..3ac9efb 100644
--- a/test/aarch64/test-api-aarch64.cc
+++ b/test/aarch64/test-api-aarch64.cc
@@ -27,14 +27,15 @@
 #include <cstdio>
 #include <cstring>
 #include <string>
+#include <thread>
 
 #include "test-runner.h"
 #include "test-utils.h"
-#include "aarch64/test-utils-aarch64.h"
 
 #include "aarch64/macro-assembler-aarch64.h"
 #include "aarch64/registers-aarch64.h"
 #include "aarch64/simulator-aarch64.h"
+#include "aarch64/test-utils-aarch64.h"
 
 #define __ masm.
 #define TEST(name) TEST_(AARCH64_API_##name)
@@ -510,7 +511,7 @@
 }
 
 
-TEST(areconsecutive) {
+TEST(are_consecutive) {
   VIXL_CHECK(AreConsecutive(b0, NoVReg));
   VIXL_CHECK(AreConsecutive(b1, b2));
   VIXL_CHECK(AreConsecutive(b3, b4, b5));
@@ -1593,7 +1594,7 @@
     temps.Exclude(ZRegister(12), ZRegister(13, kHRegSize), z14);
     temps.Exclude(CPURegList(z16, z17, z18));
     helper.RecordActionsAndCheck(0x77700);
-    // Exluding a register again has no effect.
+    // Excluding a register again has no effect.
     temps.Exclude(ZRegister(18));
     temps.Exclude(ZRegister(17, kFormatVnB));
     temps.Exclude(CPURegister(z16));
@@ -1763,6 +1764,24 @@
   VIXL_CHECK(s.IsAccessInGuardRegion(s.GetLimit() - 1280, 2048));
   VIXL_CHECK(s.IsAccessInGuardRegion(s.GetLimit() - 1280, 10000));
 }
+
+void AllocateAndFreeGCS() {
+  Decoder d;
+  Simulator s(&d);
+
+  for (int i = 0; i < 100000; i++) {
+    uint64_t gcs = s.GetGCSManager().AllocateStack();
+    s.GetGCSManager().FreeStack(gcs);
+  }
+}
+
+TEST(sim_gcs_manager) {
+  std::thread t1(AllocateAndFreeGCS);
+  std::thread t2(AllocateAndFreeGCS);
+
+  t1.join();
+  t2.join();
+}
 #endif
 
 }  // namespace aarch64
diff --git a/test/aarch64/test-api-movprfx-aarch64.cc b/test/aarch64/test-api-movprfx-aarch64.cc
index 535ae0b..7263a08 100644
--- a/test/aarch64/test-api-movprfx-aarch64.cc
+++ b/test/aarch64/test-api-movprfx-aarch64.cc
@@ -30,10 +30,10 @@
 
 #include "test-runner.h"
 #include "test-utils.h"
-#include "aarch64/test-utils-aarch64.h"
 
 #include "aarch64/assembler-aarch64.h"
 #include "aarch64/instructions-aarch64.h"
+#include "aarch64/test-utils-aarch64.h"
 
 #define __ assm.
 #define TEST(name) TEST_(AARCH64_API_##name)
@@ -50,7 +50,7 @@
     instr_form_ = (*metadata)["form"];
   }
 
-  std::string MoveForm() { return std::move(instr_form_); }
+  std::string MoveForm() { return instr_form_; }
 
  private:
   std::string instr_form_;
@@ -1424,6 +1424,9 @@
     __ movprfx(z15, z18);
     __ eor(z15.VnH(), z15.VnH(), 4);
 
+    __ movprfx(z17, z30);
+    __ ext(z17.VnB(), z17.VnB(), z18.VnB(), 2);
+
     __ movprfx(z19, z28);
     __ incd(z19.VnD(), SVE_MUL3);
 
@@ -2409,7 +2412,7 @@
   {
     // We have to use the Assembler directly to generate movprfx, so we need
     // to manually reserve space for the code we're about to emit.
-    static const size_t kPairCount = 133;
+    static const size_t kPairCount = 134;
     CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize);
 
     __ movprfx(z29, z30);
@@ -2580,6 +2583,9 @@
     __ movprfx(z31, z0);
     __ smullt(z31.VnD(), z26.VnS(), z5.VnS(), 0);
 
+    __ movprfx(z4, z5);
+    __ splice_con(z4.VnB(), p7.Merging(), z0.VnB(), z1.VnB());
+
     __ movprfx(z18, z19);
     __ sqdmulh(z18.VnB(), z25.VnB(), z1.VnB());
 
diff --git a/test/aarch64/test-assembler-aarch64.cc b/test/aarch64/test-assembler-aarch64.cc
index 4ca1a56..a86b32e 100644
--- a/test/aarch64/test-assembler-aarch64.cc
+++ b/test/aarch64/test-assembler-aarch64.cc
@@ -24,23 +24,23 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-#include <sys/mman.h>
+#include "test-assembler-aarch64.h"
 
 #include <cfloat>
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
+#include <sys/mman.h>
 
 #include "test-runner.h"
 #include "test-utils.h"
-#include "aarch64/test-utils-aarch64.h"
 
 #include "aarch64/cpu-aarch64.h"
 #include "aarch64/disasm-aarch64.h"
 #include "aarch64/macro-assembler-aarch64.h"
 #include "aarch64/simulator-aarch64.h"
-#include "test-assembler-aarch64.h"
+#include "aarch64/test-utils-aarch64.h"
 
 namespace vixl {
 namespace aarch64 {
@@ -1634,11 +1634,19 @@
   START();
 
   Register pointer = x24;
-  Register modifier = x25;
+  Register retry_limit = x25;
+  Register modifier = x26;
+  Label retry;
 
+  // There is a small but not negligible chance (1 in 127 runs) that the PAC
+  // codes for keys A and B will collide, so retry a few times with different
+  // pointers.
   __ Mov(pointer, 0x0000000012345678);
+  __ Mov(retry_limit, 0x0000000012345678 + 32);
   __ Mov(modifier, 0x477d469dec0b8760);
 
+  __ Bind(&retry);
+
   // Generate PACs using keys A and B.
   __ Mov(x0, pointer);
   __ Pacia(x0, modifier);
@@ -1660,21 +1668,24 @@
   __ Mov(x5, x0);
   __ Autib(x5, modifier);
 
-  // Mask out just the PAC code bits.
-  // TODO: use Simulator::CalculatePACMask in a nice way.
-  __ And(x0, x0, 0x007f000000000000);
-  __ And(x1, x1, 0x007f000000000000);
+  // Retry on collisions.
+  __ Cmp(x0, x1);
+  __ Ccmp(pointer, x0, ZFlag, ne);
+  __ Ccmp(pointer, x1, ZFlag, ne);
+  __ Ccmp(pointer, x4, ZFlag, ne);
+  __ Ccmp(pointer, x5, ZFlag, ne);
+  __ Ccmp(pointer, retry_limit, ZFlag, eq);
+  __ Cinc(pointer, pointer, ne);
+  __ B(ne, &retry);
 
   END();
 
   if (CAN_RUN()) {
     RUN();
 
-    // Check PAC codes have been generated and aren't equal.
-    // NOTE: with a different ComputePAC implementation, there may be a
-    // collision.
-    ASSERT_NOT_EQUAL_64(0, x0);
-    ASSERT_NOT_EQUAL_64(0, x1);
+    // Check PAC codes have been generated.
+    ASSERT_NOT_EQUAL_64(pointer, x0);
+    ASSERT_NOT_EQUAL_64(pointer, x1);
     ASSERT_NOT_EQUAL_64(x0, x1);
 
     // Pointers correctly authenticated.
@@ -1682,8 +1693,13 @@
     ASSERT_EQUAL_64(pointer, x3);
 
     // Pointers corrupted after failing to authenticate.
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
     ASSERT_EQUAL_64(0x0020000012345678, x4);
     ASSERT_EQUAL_64(0x0040000012345678, x5);
+#else
+    ASSERT_NOT_EQUAL_64(pointer, x4);
+    ASSERT_NOT_EQUAL_64(pointer, x5);
+#endif
   }
 }
 
@@ -1694,8 +1710,16 @@
   START();
 
   Register pointer = x24;
+  Register retry_limit = x25;
+  Label retry;
 
+  // There is a small but not negligible chance (1 in 127 runs) that the PAC
+  // codes for keys A and B will collide, so retry a few times with different
+  // pointers.
   __ Mov(pointer, 0x0000000012345678);
+  __ Mov(retry_limit, 0x0000000012345678 + 32);
+
+  __ Bind(&retry);
 
   // Generate PACs using keys A and B.
   __ Mov(x0, pointer);
@@ -1718,21 +1742,24 @@
   __ Mov(x5, x0);
   __ Autizb(x5);
 
-  // Mask out just the PAC code bits.
-  // TODO: use Simulator::CalculatePACMask in a nice way.
-  __ And(x0, x0, 0x007f000000000000);
-  __ And(x1, x1, 0x007f000000000000);
+  // Retry on collisions.
+  __ Cmp(x0, x1);
+  __ Ccmp(pointer, x0, ZFlag, ne);
+  __ Ccmp(pointer, x1, ZFlag, ne);
+  __ Ccmp(pointer, x4, ZFlag, ne);
+  __ Ccmp(pointer, x5, ZFlag, ne);
+  __ Ccmp(pointer, retry_limit, ZFlag, eq);
+  __ Cinc(pointer, pointer, ne);
+  __ B(ne, &retry);
 
   END();
 
   if (CAN_RUN()) {
     RUN();
 
-    // Check PAC codes have been generated and aren't equal.
-    // NOTE: with a different ComputePAC implementation, there may be a
-    // collision.
-    ASSERT_NOT_EQUAL_64(0, x0);
-    ASSERT_NOT_EQUAL_64(0, x1);
+    // Check PAC codes have been generated.
+    ASSERT_NOT_EQUAL_64(pointer, x0);
+    ASSERT_NOT_EQUAL_64(pointer, x1);
     ASSERT_NOT_EQUAL_64(x0, x1);
 
     // Pointers correctly authenticated.
@@ -1740,8 +1767,13 @@
     ASSERT_EQUAL_64(pointer, x3);
 
     // Pointers corrupted after failing to authenticate.
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
     ASSERT_EQUAL_64(0x0020000012345678, x4);
     ASSERT_EQUAL_64(0x0040000012345678, x5);
+#else
+    ASSERT_NOT_EQUAL_64(pointer, x4);
+    ASSERT_NOT_EQUAL_64(pointer, x5);
+#endif
   }
 }
 
@@ -1752,11 +1784,19 @@
   START();
 
   Register pointer = x24;
-  Register modifier = x25;
+  Register retry_limit = x25;
+  Register modifier = x26;
+  Label retry;
 
+  // There is a small but not negligible chance (1 in 127 runs) that the PAC
+  // codes for keys A and B will collide, so retry a few times with different
+  // pointers.
   __ Mov(pointer, 0x0000000012345678);
+  __ Mov(retry_limit, 0x0000000012345678 + 32);
   __ Mov(modifier, 0x477d469dec0b8760);
 
+  __ Bind(&retry);
+
   // Generate PACs using keys A and B.
   __ Mov(x0, pointer);
   __ Pacda(x0, modifier);
@@ -1778,21 +1818,24 @@
   __ Mov(x5, x0);
   __ Autdb(x5, modifier);
 
-  // Mask out just the PAC code bits.
-  // TODO: use Simulator::CalculatePACMask in a nice way.
-  __ And(x0, x0, 0x007f000000000000);
-  __ And(x1, x1, 0x007f000000000000);
+  // Retry on collisions.
+  __ Cmp(x0, x1);
+  __ Ccmp(pointer, x0, ZFlag, ne);
+  __ Ccmp(pointer, x1, ZFlag, ne);
+  __ Ccmp(pointer, x4, ZFlag, ne);
+  __ Ccmp(pointer, x5, ZFlag, ne);
+  __ Ccmp(pointer, retry_limit, ZFlag, eq);
+  __ Cinc(pointer, pointer, ne);
+  __ B(ne, &retry);
 
   END();
 
   if (CAN_RUN()) {
     RUN();
 
-    // Check PAC codes have been generated and aren't equal.
-    // NOTE: with a different ComputePAC implementation, there may be a
-    // collision.
-    ASSERT_NOT_EQUAL_64(0, x0);
-    ASSERT_NOT_EQUAL_64(0, x1);
+    // Check PAC codes have been generated.
+    ASSERT_NOT_EQUAL_64(pointer, x0);
+    ASSERT_NOT_EQUAL_64(pointer, x1);
     ASSERT_NOT_EQUAL_64(x0, x1);
 
     // Pointers correctly authenticated.
@@ -1800,8 +1843,13 @@
     ASSERT_EQUAL_64(pointer, x3);
 
     // Pointers corrupted after failing to authenticate.
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
     ASSERT_EQUAL_64(0x0020000012345678, x4);
     ASSERT_EQUAL_64(0x0040000012345678, x5);
+#else
+    ASSERT_NOT_EQUAL_64(pointer, x4);
+    ASSERT_NOT_EQUAL_64(pointer, x5);
+#endif
   }
 }
 
@@ -1812,8 +1860,16 @@
   START();
 
   Register pointer = x24;
+  Register retry_limit = x25;
+  Label retry;
 
+  // There is a small but not negligible chance (1 in 127 runs) that the PAC
+  // codes for keys A and B will collide, so retry a few times with different
+  // pointers.
   __ Mov(pointer, 0x0000000012345678);
+  __ Mov(retry_limit, 0x0000000012345678 + 32);
+
+  __ Bind(&retry);
 
   // Generate PACs using keys A and B.
   __ Mov(x0, pointer);
@@ -1836,21 +1892,24 @@
   __ Mov(x5, x0);
   __ Autdzb(x5);
 
-  // Mask out just the PAC code bits.
-  // TODO: use Simulator::CalculatePACMask in a nice way.
-  __ And(x0, x0, 0x007f000000000000);
-  __ And(x1, x1, 0x007f000000000000);
+  // Retry on collisions.
+  __ Cmp(x0, x1);
+  __ Ccmp(pointer, x0, ZFlag, ne);
+  __ Ccmp(pointer, x1, ZFlag, ne);
+  __ Ccmp(pointer, x4, ZFlag, ne);
+  __ Ccmp(pointer, x5, ZFlag, ne);
+  __ Ccmp(pointer, retry_limit, ZFlag, eq);
+  __ Cinc(pointer, pointer, ne);
+  __ B(ne, &retry);
 
   END();
 
   if (CAN_RUN()) {
     RUN();
 
-    // Check PAC codes have been generated and aren't equal.
-    // NOTE: with a different ComputePAC implementation, there may be a
-    // collision.
-    ASSERT_NOT_EQUAL_64(0, x0);
-    ASSERT_NOT_EQUAL_64(0, x1);
+    // Check PAC codes have been generated.
+    ASSERT_NOT_EQUAL_64(pointer, x0);
+    ASSERT_NOT_EQUAL_64(pointer, x1);
     ASSERT_NOT_EQUAL_64(x0, x1);
 
     // Pointers correctly authenticated.
@@ -1858,8 +1917,13 @@
     ASSERT_EQUAL_64(pointer, x3);
 
     // Pointers corrupted after failing to authenticate.
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
     ASSERT_EQUAL_64(0x0020000012345678, x4);
     ASSERT_EQUAL_64(0x0040000012345678, x5);
+#else
+    ASSERT_NOT_EQUAL_64(pointer, x4);
+    ASSERT_NOT_EQUAL_64(pointer, x5);
+#endif
   }
 }
 
@@ -1870,11 +1934,19 @@
   START();
 
   Register pointer = x24;
-  Register modifier = x25;
+  Register retry_limit = x25;
+  Register modifier = x26;
+  Label retry;
 
+  // There is a small but not negligible chance (1 in 127 runs) that the PAC
+  // codes for keys A and B will collide, so retry a few times with different
+  // pointers.
   __ Mov(pointer, 0x0000000012345678);
+  __ Mov(retry_limit, 0x0000000012345678 + 32);
   __ Mov(modifier, 0x477d469dec0b8760);
 
+  __ Bind(&retry);
+
   // Generate generic PAC.
   __ Pacga(x0, pointer, modifier);
 
@@ -1890,25 +1962,24 @@
   __ Xpaci(x3);
   __ Xpacd(x4);
 
-  // Mask out just the PAC code bits.
-  // TODO: use Simulator::CalculatePACMask in a nice way.
-  __ And(x0, x0, 0xffffffff00000000);
-  __ And(x1, x1, 0x007f000000000000);
-  __ And(x2, x2, 0x007f000000000000);
+  // Retry on collisions.
+  __ Cmp(x1, x2);
+  __ Ccmp(pointer, x0, ZFlag, ne);
+  __ Ccmp(pointer, x1, ZFlag, ne);
+  __ Ccmp(pointer, x2, ZFlag, ne);
+  __ Ccmp(pointer, retry_limit, ZFlag, eq);
+  __ Cinc(pointer, pointer, ne);
+  __ B(ne, &retry);
 
   END();
 
   if (CAN_RUN()) {
     RUN();
 
-
-    // Check PAC codes have been generated and aren't equal.
-    // NOTE: with a different ComputePAC implementation, there may be a
-    // collision.
-    ASSERT_NOT_EQUAL_64(0, x0);
-
-    ASSERT_NOT_EQUAL_64(0, x1);
-    ASSERT_NOT_EQUAL_64(0, x2);
+    // Check PAC codes have been generated.
+    ASSERT_NOT_EQUAL_64(pointer, x0);
+    ASSERT_NOT_EQUAL_64(pointer, x1);
+    ASSERT_NOT_EQUAL_64(pointer, x2);
     ASSERT_NOT_EQUAL_64(x1, x2);
 
     ASSERT_EQUAL_64(pointer, x3);
@@ -1916,6 +1987,34 @@
   }
 }
 
+TEST(pac_sp_modifier) {
+  SETUP_WITH_FEATURES(CPUFeatures::kPAuth);
+
+  START();
+
+  __ Mov(x0, 0x0000000012345678);
+  __ Mov(x1, x0);
+  __ Mov(x10, sp);
+
+  // Generate PACs using sp and register containing a copy of sp.
+  __ Pacia(x0, x10);
+  __ Pacia(x1, sp);
+
+  // Authenticate the pointers, exchanging (equal) modifiers.
+  __ Mov(x2, x0);
+  __ Mov(x3, x1);
+  __ Autia(x2, sp);
+  __ Autia(x3, x10);
+
+  END();
+
+  if (CAN_RUN()) {
+    RUN();
+
+    ASSERT_EQUAL_64(x0, x1);
+    ASSERT_EQUAL_64(x2, x3);
+  }
+}
 
 TEST(label) {
   SETUP();
@@ -2548,13 +2647,18 @@
   if (CAN_RUN()) {
 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
     simulator.SetGuardedPages(true);
-#else
-    VIXL_UNIMPLEMENTED();
 #endif
+    // On hardware, we'll run the test anyway, but mark it as SKIPPED until
+    // we've implemented a mechanism for marking Guarded pages.
+
     RUN();
 
     ASSERT_EQUAL_64(42, x0);
     ASSERT_EQUAL_64(84, x1);
+
+#ifndef VIXL_INCLUDE_SIMULATOR_AARCH64
+    printf("SKIPPED: marking guarded pages is unimplemented on hardware");
+#endif
   }
 }
 
@@ -2587,7 +2691,11 @@
   END();
 
   if (CAN_RUN()) {
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
     MUST_FAIL_WITH_MESSAGE(RUN(), "Failed to authenticate pointer.");
+#else
+    printf("SKIPPED: negative PAuth tests are unimplemented on hardware.");
+#endif
   }
 }
 #endif  // VIXL_NEGATIVE_TESTING
@@ -2623,7 +2731,11 @@
   END();
 
   if (CAN_RUN()) {
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
     MUST_FAIL_WITH_MESSAGE(RUN(), "Failed to authenticate pointer.");
+#else
+    printf("SKIPPED: negative PAuth tests are unimplemented on hardware.");
+#endif
   }
 }
 #endif  // VIXL_NEGATIVE_TESTING
@@ -2858,6 +2970,372 @@
   }
 }
 
+enum MTEStgAttribute {
+  StgNoSideEffect = 0,
+  StgPairTag = 1,
+  StgZeroing = 2,
+  StgPairReg = 4
+};
+
+// Support st2g, stg, stz2g and stzg.
+template <typename Op>
+static void MTEStoreTagHelper(Op op,
+                              AddrMode addr_mode,
+                              int attr = StgNoSideEffect) {
+  SETUP_WITH_FEATURES(CPUFeatures::kMTE);
+  START();
+
+  // This method does nothing when the size is zero. i.e. stg and st2g.
+  // Reserve x9 and x10.
+  auto LoadDataAndSum = [&](Register reg, int off, unsigned size_in_bytes) {
+    for (unsigned j = 0; j < size_in_bytes / kXRegSizeInBytes; j++) {
+      __ Ldr(x9, MemOperand(reg, off));
+      __ Add(x10, x9, x10);
+      off += kXRegSizeInBytes;
+    }
+  };
+
+  // Initialize registers to zero.
+  for (int i = 0; i < 29; i++) {
+    __ Mov(XRegister(i), 0);
+  }
+
+  Register base = x28;
+  Register base_tag = x27;
+  uint32_t* data_ptr = nullptr;
+  const int data_size = 640;
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
+  data_ptr = reinterpret_cast<uint32_t*>(
+      simulator.Mmap(NULL,
+                     data_size * sizeof(uint32_t),
+                     PROT_READ | PROT_WRITE | PROT_MTE,
+                     MAP_PRIVATE | MAP_ANONYMOUS,
+                     -1,
+                     0));
+
+  VIXL_ASSERT(data_ptr != nullptr);
+  uint32_t* untagged_ptr = AddressUntag(data_ptr);
+  memset(untagged_ptr, 0xae, data_size * sizeof(uint32_t));
+#else
+// TODO: Port the memory allocation to work on MTE supported platform natively.
+// Note that `CAN_RUN` prevents running in MTE-unsupported environments.
+#endif
+
+  __ Mov(base, reinterpret_cast<uint64_t>(&data_ptr[data_size / 2]));
+
+  VIXL_STATIC_ASSERT(kMTETagGranuleInBytes == 16);
+  const int tag_granule = kMTETagGranuleInBytes;
+  int size = ((attr & StgZeroing) != 0) ? tag_granule : 0;
+  // lsb of MTE tag field.
+  const int tag_lsb = 56;
+
+  for (int i = 1; i < 7; i++) {
+    uint64_t tag = static_cast<uint64_t>(i) << tag_lsb;
+    int offset = 2 * i * tag_granule;
+    __ Mov(XRegister(i), tag);
+    (masm.*op)(XRegister(i), MemOperand(base, offset, addr_mode));
+
+    // The address tag has been changed after the execution of store tag
+    // instructions, so update the pointer tag as well.
+    __ Bic(base_tag, base, 0x0f00000000000000);
+    __ Orr(base_tag, base_tag, XRegister(i));
+
+    switch (addr_mode) {
+      case Offset:
+        __ Ldg(XRegister(i + 10), MemOperand(base_tag, offset));
+        LoadDataAndSum(base_tag, offset, size);
+        if ((attr & StgPairTag) != 0) {
+          __ Ldg(XRegister(i + 20), MemOperand(base_tag, offset + tag_granule));
+          LoadDataAndSum(base_tag, offset + tag_granule, size);
+        }
+        break;
+
+      case PreIndex:
+        __ Ldg(XRegister(i + 10), MemOperand(base_tag));
+        LoadDataAndSum(base_tag, 0, size);
+        if ((attr & StgPairTag) != 0) {
+          __ Ldg(XRegister(i + 20), MemOperand(base_tag, tag_granule));
+          LoadDataAndSum(base_tag, tag_granule, size);
+        }
+        break;
+
+      case PostIndex:
+        __ Ldg(XRegister(i + 10), MemOperand(base_tag, -offset));
+        LoadDataAndSum(base_tag, -offset, size);
+        if ((attr & StgPairTag) != 0) {
+          __ Ldg(XRegister(i + 20),
+                 MemOperand(base_tag, -offset + tag_granule));
+          LoadDataAndSum(base_tag, -offset + tag_granule, size);
+        }
+        break;
+
+      default:
+        VIXL_UNIMPLEMENTED();
+        break;
+    }
+
+    // Switch the sign to test both positive and negative offsets.
+    offset = -offset;
+  }
+
+  int pos_offset = 304;
+  int neg_offset = -256;
+
+  // Backup stack pointer and others.
+  __ Mov(x7, sp);
+  __ Mov(base_tag, base);
+
+  // Test the cases where operand is the stack pointer.
+  __ Mov(x8, 11UL << tag_lsb);
+  __ Mov(sp, x8);
+  (masm.*op)(sp, MemOperand(base, neg_offset, addr_mode));
+
+  // Synthesise a new address with new tag and assign to the stack pointer.
+  __ Add(sp, base_tag, 32);
+  (masm.*op)(x8, MemOperand(sp, pos_offset, addr_mode));
+
+  switch (addr_mode) {
+    case Offset:
+      __ Ldg(x17, MemOperand(base, neg_offset));
+      __ Ldg(x19, MemOperand(sp, pos_offset));
+      if ((attr & StgPairTag) != 0) {
+        __ Ldg(x18, MemOperand(base, neg_offset + tag_granule));
+        __ Ldg(x20, MemOperand(sp, pos_offset + tag_granule));
+      }
+      break;
+    case PreIndex:
+      __ Ldg(x17, MemOperand(base));
+      __ Ldg(x19, MemOperand(sp));
+      if ((attr & StgPairTag) != 0) {
+        __ Ldg(x18, MemOperand(base, tag_granule));
+        __ Ldg(x20, MemOperand(sp, tag_granule));
+      }
+      break;
+    case PostIndex:
+      __ Ldg(x17, MemOperand(base, -neg_offset));
+      __ Ldg(x19, MemOperand(sp, -pos_offset));
+      if ((attr & StgPairTag) != 0) {
+        __ Ldg(x18, MemOperand(base, -neg_offset + tag_granule));
+        __ Ldg(x20, MemOperand(sp, -pos_offset + tag_granule));
+      }
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  // Restore stack pointer.
+  __ Mov(sp, x7);
+
+  END();
+
+  if (CAN_RUN()) {
+#ifndef VIXL_INCLUDE_SIMULATOR_AARCH64
+    VIXL_UNIMPLEMENTED();
+#endif
+    RUN();
+
+    ASSERT_EQUAL_64(1UL << tag_lsb, x11);
+    ASSERT_EQUAL_64(2UL << tag_lsb, x12);
+    ASSERT_EQUAL_64(3UL << tag_lsb, x13);
+    ASSERT_EQUAL_64(4UL << tag_lsb, x14);
+    ASSERT_EQUAL_64(5UL << tag_lsb, x15);
+    ASSERT_EQUAL_64(6UL << tag_lsb, x16);
+    ASSERT_EQUAL_64(11UL << tag_lsb, x17);
+    ASSERT_EQUAL_64(11UL << tag_lsb, x19);
+
+    if ((attr & StgPairTag) != 0) {
+      ASSERT_EQUAL_64(1UL << tag_lsb, x21);
+      ASSERT_EQUAL_64(2UL << tag_lsb, x22);
+      ASSERT_EQUAL_64(3UL << tag_lsb, x23);
+      ASSERT_EQUAL_64(4UL << tag_lsb, x24);
+      ASSERT_EQUAL_64(5UL << tag_lsb, x25);
+      ASSERT_EQUAL_64(6UL << tag_lsb, x26);
+      ASSERT_EQUAL_64(11UL << tag_lsb, x18);
+      ASSERT_EQUAL_64(11UL << tag_lsb, x20);
+    }
+
+    if ((attr & StgZeroing) != 0) {
+      ASSERT_EQUAL_64(0, x10);
+    }
+  }
+
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
+  simulator.Munmap(data_ptr, data_size, PROT_MTE);
+#endif
+}
+
+TEST(st2g_ldg) {
+  MTEStoreTagHelper(&MacroAssembler::St2g, Offset, StgPairTag);
+  MTEStoreTagHelper(&MacroAssembler::St2g, PreIndex, StgPairTag);
+  MTEStoreTagHelper(&MacroAssembler::St2g, PostIndex, StgPairTag);
+}
+
+TEST(stg_ldg) {
+  MTEStoreTagHelper(&MacroAssembler::Stg, Offset);
+  MTEStoreTagHelper(&MacroAssembler::Stg, PreIndex);
+  MTEStoreTagHelper(&MacroAssembler::Stg, PostIndex);
+}
+
+TEST(stz2g_ldg) {
+  MTEStoreTagHelper(&MacroAssembler::Stz2g, Offset, StgPairTag | StgZeroing);
+  MTEStoreTagHelper(&MacroAssembler::Stz2g, PreIndex, StgPairTag | StgZeroing);
+  MTEStoreTagHelper(&MacroAssembler::Stz2g, PostIndex, StgPairTag | StgZeroing);
+}
+
+TEST(stzg_ldg) {
+  MTEStoreTagHelper(&MacroAssembler::Stzg, Offset, StgZeroing);
+  MTEStoreTagHelper(&MacroAssembler::Stzg, PreIndex, StgZeroing);
+  MTEStoreTagHelper(&MacroAssembler::Stzg, PostIndex, StgZeroing);
+}
+
+TEST(stgp_ldg) {
+  SETUP_WITH_FEATURES(CPUFeatures::kMTE);
+  START();
+
+  // Initialize registers to zero.
+  for (int i = 0; i < 29; i++) {
+    __ Mov(XRegister(i), 0);
+  }
+
+  // Reserve x14 and x15.
+  auto LoadDataAndSum = [&](Register reg, int off) {
+    __ Ldr(x14, MemOperand(reg, off));
+    __ Add(x15, x14, x15);
+    __ Ldr(x14, MemOperand(reg, off + static_cast<int>(kXRegSizeInBytes)));
+    __ Add(x15, x14, x15);
+  };
+
+  Register base = x28;
+  uint32_t* data_ptr = nullptr;
+  const int data_size = 640;
+  uint64_t init_tag = 17;
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
+  data_ptr = reinterpret_cast<uint32_t*>(
+      simulator.Mmap(NULL,
+                     data_size * sizeof(uint32_t),
+                     PROT_READ | PROT_WRITE | PROT_MTE,
+                     MAP_PRIVATE | MAP_ANONYMOUS,
+                     -1,
+                     0));
+
+  VIXL_ASSERT(data_ptr != nullptr);
+  init_tag = CPU::GetPointerTag(data_ptr);
+  uint32_t* untagged_ptr = AddressUntag(data_ptr);
+  memset(untagged_ptr, 0xc9, data_size * sizeof(uint32_t));
+#else
+// TODO: Port the memory allocation to work on MTE supported platform natively.
+// Note that `CAN_RUN` prevents running in MTE-unsupported environments.
+#endif
+
+  __ Mov(base, reinterpret_cast<uint64_t>(&data_ptr[data_size / 2]));
+
+  // lsb of MTE tag field.
+  const int tag_lsb = 56;
+  for (int i = 0; i < 11; i++) {
+    // <63..60> <59..56> <55........5> <4..0>
+    //        0       i             0      i
+    __ Mov(XRegister(i), i | (static_cast<uint64_t>(i) << tag_lsb));
+  }
+
+  // Backup stack pointer.
+  __ Mov(x0, sp);
+
+  int offset = -16;
+  __ Addg(base, base, 0, 1);
+  __ Stgp(x1, x2, MemOperand(base, offset, Offset));
+  // Make sure `ldg` works well with address that isn't tag-granule aligned.
+  __ Add(x29, base, 8);
+  __ Ldg(x18, MemOperand(x29, offset));
+  LoadDataAndSum(base, offset);
+
+  offset = -304;
+  __ Addg(base, base, 0, 1);
+  __ Stgp(x2, x3, MemOperand(base, offset, Offset));
+  __ Add(x29, base, 4);
+  __ Ldg(x19, MemOperand(x29, offset));
+  LoadDataAndSum(base, offset);
+
+  offset = 128;
+  __ Addg(base, base, 0, 1);
+  __ Stgp(x3, x4, MemOperand(base, offset, Offset));
+  __ Mov(sp, base);
+  __ Ldg(x20, MemOperand(sp, offset));
+  LoadDataAndSum(base, offset);
+
+  offset = -48;
+  __ Addg(base, base, 0, 1);
+  __ Stgp(x4, x5, MemOperand(base, offset, PreIndex));
+  __ Add(x29, base, 8);
+  __ Ldg(x21, MemOperand(x29));
+  LoadDataAndSum(base, 0);
+
+  offset = 64;
+  __ Addg(base, base, 0, 1);
+  __ Stgp(x5, x6, MemOperand(base, offset, PreIndex));
+  __ Add(x29, base, 4);
+  __ Ldg(x22, MemOperand(x29));
+  LoadDataAndSum(base, 0);
+
+  offset = -288;
+  __ Addg(base, base, 0, 1);
+  __ Stgp(x6, x7, MemOperand(base, offset, PreIndex));
+  __ Mov(sp, base);
+  __ Ldg(x23, MemOperand(sp));
+  LoadDataAndSum(base, 0);
+
+  offset = -96;
+  __ Addg(base, base, 0, 1);
+  __ Stgp(x7, x8, MemOperand(base, offset, PostIndex));
+  __ Add(x29, base, 8);
+  __ Ldg(x24, MemOperand(x29, -offset));
+  LoadDataAndSum(base, -offset);
+
+  offset = 80;
+  __ Addg(base, base, 0, 1);
+  __ Stgp(x8, x9, MemOperand(base, offset, PostIndex));
+  __ Add(x29, base, 4);
+  __ Ldg(x25, MemOperand(x29, -offset));
+  LoadDataAndSum(base, -offset);
+
+  offset = -224;
+  __ Addg(base, base, 0, 1);
+  __ Stgp(x9, x10, MemOperand(base, offset, PostIndex));
+  __ Mov(sp, base);
+  __ Ldg(x26, MemOperand(sp, -offset));
+  LoadDataAndSum(base, -offset);
+
+  __ Mov(sp, x0);
+
+  END();
+
+  if (CAN_RUN()) {
+#ifndef VIXL_INCLUDE_SIMULATOR_AARCH64
+    VIXL_UNIMPLEMENTED();
+#endif
+    RUN();
+
+    const uint64_t k = kMTETagGranuleInBytes;
+    USE(k);
+    ASSERT_EQUAL_64(((init_tag + 1) % k) << tag_lsb, x18);
+    ASSERT_EQUAL_64(((init_tag + 2) % k) << tag_lsb, x19);
+    ASSERT_EQUAL_64(((init_tag + 3) % k) << tag_lsb, x20);
+    ASSERT_EQUAL_64(((init_tag + 4) % k) << tag_lsb, x21);
+    ASSERT_EQUAL_64(((init_tag + 5) % k) << tag_lsb, x22);
+    ASSERT_EQUAL_64(((init_tag + 6) % k) << tag_lsb, x23);
+    ASSERT_EQUAL_64(((init_tag + 7) % k) << tag_lsb, x24);
+    ASSERT_EQUAL_64(((init_tag + 8) % k) << tag_lsb, x25);
+    ASSERT_EQUAL_64(((init_tag + 9) % k) << tag_lsb, x26);
+
+    // We store 1, 2, 2, 3, 3, 4, ....9, 9, 10 to memory, so the total sum of
+    // these values is 1 + (2 * (2 + 9) * 8 / 2) + 10 = 99.
+    ASSERT_EQUAL_64((99UL << tag_lsb | 99UL), x15);
+  }
+
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
+  simulator.Munmap(data_ptr, data_size, PROT_MTE);
+#endif
+}
 
 TEST(ldr_str_offset) {
   SETUP();
@@ -3260,7 +3738,11 @@
   END();
 
   if (CAN_RUN()) {
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
     MUST_FAIL_WITH_MESSAGE(RUN(), "Failed to authenticate pointer.");
+#else
+    printf("SKIPPED: negative PAuth tests are unimplemented on hardware.");
+#endif
   }
 }
 #endif  // VIXL_NEGATIVE_TESTING
@@ -4193,6 +4675,11 @@
   for (int op = 0; op < (1 << ImmPrefetchOperation_width); op++) {
     // Unallocated prefetch operations are ignored, so test all of them.
     // We have to use the Assembler directly for this.
+
+    // Prefetch operations of the form 0b11xxx are allocated to another
+    // instruction.
+    if (op >= 0b11000) continue;
+
     ExactAssemblyScope guard(&masm, inputs.GetCount() * kInstructionSize);
     CPURegList loop = inputs;
     while (!loop.IsEmpty()) {
@@ -5438,6 +5925,10 @@
   START();
   __ Mov(x0, 0x0123456789abcdef);
 
+  // Clear bits of `rmif` masks leave NZCV unmodified, so we need to initialise
+  // it to a known state to make the test reproducible.
+  __ Msr(NZCV, x0);
+
   // Set NZCV to 0b1011 (0xb)
   __ Rmif(x0, 0, NCVFlag);
   __ Mrs(x1, NZCV);
@@ -5484,6 +5975,9 @@
   __ Mov(x7, 0x10001);
   __ Mov(x8, 0xfffffffff);
 
+  // These instruction don't modify 'C', so give it a consistent value.
+  __ Ands(xzr, xzr, 0);
+
   __ Setf8(w0);
   __ Mrs(x9, NZCV);
   __ Setf8(w1);
@@ -6765,6 +7259,9 @@
   __ Cinc(x7, x7, hs);  // C
   __ Cinc(x7, x7, vc);  // !V
 
+  Register old_fpcr = x15;
+  __ Mrs(old_fpcr, FPCR);
+
   // All core FPCR fields must be writable.
   __ Mov(x8, fpcr_core);
   __ Msr(FPCR, x8);
@@ -6799,6 +7296,8 @@
   __ Mrs(x10, FPCR);
 #endif
 
+  __ Msr(FPCR, old_fpcr);
+
   END();
 
   if (CAN_RUN()) {
@@ -6827,23 +7326,32 @@
   temps.Exclude(x16, x17);
   temps.Include(x10, x11);
 
-  // Backup stack pointer.
+  Register pointer = x21;
+  Register retry_limit = x22;
+  Label retry;
+
+  __ Mov(pointer, 0x0000000012345678);
+  __ Mov(retry_limit, 0x0000000012345678 + 32);
+
+  // Back up stack pointer.
   __ Mov(x20, sp);
 
   // Modifiers
   __ Mov(x16, 0x477d469dec0b8760);
   __ Mov(sp, 0x477d469dec0b8760);
 
+  __ Bind(&retry);
+
   // Generate PACs using the 3 system instructions.
-  __ Mov(x17, 0x0000000012345678);
+  __ Mov(x17, pointer);
   __ Pacia1716();
   __ Mov(x0, x17);
 
-  __ Mov(lr, 0x0000000012345678);
+  __ Mov(lr, pointer);
   __ Paciaz();
   __ Mov(x1, lr);
 
-  __ Mov(lr, 0x0000000012345678);
+  __ Mov(lr, pointer);
   __ Paciasp();
   __ Mov(x2, lr);
 
@@ -6878,41 +7386,51 @@
   __ Xpaclri();
   __ Mov(x9, lr);
 
+  // Retry on collisions.
+  __ Cmp(x0, x1);
+  __ Ccmp(pointer, x0, ZFlag, ne);
+  __ Ccmp(pointer, x1, ZFlag, ne);
+  __ Ccmp(pointer, x2, ZFlag, ne);
+  __ Ccmp(pointer, x6, ZFlag, ne);
+  __ Ccmp(pointer, x7, ZFlag, ne);
+  __ Ccmp(pointer, x8, ZFlag, ne);
+  __ Ccmp(pointer, retry_limit, ZFlag, eq);
+  __ Cinc(pointer, pointer, ne);
+  __ B(ne, &retry);
+
   // Restore stack pointer.
   __ Mov(sp, x20);
 
-  // Mask out just the PAC code bits.
-  // TODO: use Simulator::CalculatePACMask in a nice way.
-  __ And(x0, x0, 0x007f000000000000);
-  __ And(x1, x1, 0x007f000000000000);
-  __ And(x2, x2, 0x007f000000000000);
-
   END();
 
   if (CAN_RUN()) {
     RUN();
 
-    // Check PAC codes have been generated and aren't equal.
-    // NOTE: with a different ComputePAC implementation, there may be a
-    // collision.
-    ASSERT_NOT_EQUAL_64(0, x0);
-    ASSERT_NOT_EQUAL_64(0, x1);
-    ASSERT_NOT_EQUAL_64(0, x2);
+    // Check PAC codes have been generated.
+    ASSERT_NOT_EQUAL_64(pointer, x0);
+    ASSERT_NOT_EQUAL_64(pointer, x1);
+    ASSERT_NOT_EQUAL_64(pointer, x2);
     ASSERT_NOT_EQUAL_64(x0, x1);
     ASSERT_EQUAL_64(x0, x2);
 
     // Pointers correctly authenticated.
-    ASSERT_EQUAL_64(0x0000000012345678, x3);
-    ASSERT_EQUAL_64(0x0000000012345678, x4);
-    ASSERT_EQUAL_64(0x0000000012345678, x5);
+    ASSERT_EQUAL_64(pointer, x3);
+    ASSERT_EQUAL_64(pointer, x4);
+    ASSERT_EQUAL_64(pointer, x5);
 
     // Pointers corrupted after failing to authenticate.
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
     ASSERT_EQUAL_64(0x0020000012345678, x6);
     ASSERT_EQUAL_64(0x0020000012345678, x7);
     ASSERT_EQUAL_64(0x0020000012345678, x8);
+#else
+    ASSERT_NOT_EQUAL_64(pointer, x6);
+    ASSERT_NOT_EQUAL_64(pointer, x7);
+    ASSERT_NOT_EQUAL_64(pointer, x8);
+#endif
 
     // Pointer with code stripped.
-    ASSERT_EQUAL_64(0x0000000012345678, x9);
+    ASSERT_EQUAL_64(pointer, x9);
   }
 }
 
@@ -6927,13 +7445,22 @@
   temps.Exclude(x16, x17);
   temps.Include(x10, x11);
 
-  // Backup stack pointer.
+  Register pointer = x21;
+  Register retry_limit = x22;
+  Label retry;
+
+  __ Mov(pointer, 0x0000000012345678);
+  __ Mov(retry_limit, 0x0000000012345678 + 32);
+
+  // Back up stack pointer.
   __ Mov(x20, sp);
 
   // Modifiers
   __ Mov(x16, 0x477d469dec0b8760);
   __ Mov(sp, 0x477d469dec0b8760);
 
+  __ Bind(&retry);
+
   // Generate PACs using the 3 system instructions.
   __ Mov(x17, 0x0000000012345678);
   __ Pacib1716();
@@ -6978,15 +7505,21 @@
   __ Xpaclri();
   __ Mov(x9, lr);
 
+  // Retry on collisions.
+  __ Cmp(x0, x1);
+  __ Ccmp(pointer, x0, ZFlag, ne);
+  __ Ccmp(pointer, x1, ZFlag, ne);
+  __ Ccmp(pointer, x2, ZFlag, ne);
+  __ Ccmp(pointer, x6, ZFlag, ne);
+  __ Ccmp(pointer, x7, ZFlag, ne);
+  __ Ccmp(pointer, x8, ZFlag, ne);
+  __ Ccmp(pointer, retry_limit, ZFlag, eq);
+  __ Cinc(pointer, pointer, ne);
+  __ B(ne, &retry);
+
   // Restore stack pointer.
   __ Mov(sp, x20);
 
-  // Mask out just the PAC code bits.
-  // TODO: use Simulator::CalculatePACMask in a nice way.
-  __ And(x0, x0, 0x007f000000000000);
-  __ And(x1, x1, 0x007f000000000000);
-  __ And(x2, x2, 0x007f000000000000);
-
   END();
 
   if (CAN_RUN()) {
@@ -6995,24 +7528,30 @@
     // Check PAC codes have been generated and aren't equal.
     // NOTE: with a different ComputePAC implementation, there may be a
     // collision.
-    ASSERT_NOT_EQUAL_64(0, x0);
-    ASSERT_NOT_EQUAL_64(0, x1);
-    ASSERT_NOT_EQUAL_64(0, x2);
+    ASSERT_NOT_EQUAL_64(pointer, x0);
+    ASSERT_NOT_EQUAL_64(pointer, x1);
+    ASSERT_NOT_EQUAL_64(pointer, x2);
     ASSERT_NOT_EQUAL_64(x0, x1);
     ASSERT_EQUAL_64(x0, x2);
 
     // Pointers correctly authenticated.
-    ASSERT_EQUAL_64(0x0000000012345678, x3);
-    ASSERT_EQUAL_64(0x0000000012345678, x4);
-    ASSERT_EQUAL_64(0x0000000012345678, x5);
+    ASSERT_EQUAL_64(pointer, x3);
+    ASSERT_EQUAL_64(pointer, x4);
+    ASSERT_EQUAL_64(pointer, x5);
 
     // Pointers corrupted after failing to authenticate.
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
     ASSERT_EQUAL_64(0x0040000012345678, x6);
     ASSERT_EQUAL_64(0x0040000012345678, x7);
     ASSERT_EQUAL_64(0x0040000012345678, x8);
+#else
+    ASSERT_NOT_EQUAL_64(pointer, x6);
+    ASSERT_NOT_EQUAL_64(pointer, x7);
+    ASSERT_NOT_EQUAL_64(pointer, x8);
+#endif
 
     // Pointer with code stripped.
-    ASSERT_EQUAL_64(0x0000000012345678, x9);
+    ASSERT_EQUAL_64(pointer, x9);
   }
 }
 
@@ -7097,11 +7636,12 @@
   __ Blr(x0);
   __ Adr(ipreg, &jump_call_target);
   __ Blr(ipreg);
-  __ Adr(lr, &done);  // Make Ret return to done label.
+  __ Mov(lr, 0);  // Zero lr so we branch to done.
   __ Br(ipreg);
   __ Bind(&call_target, EmitBTI_c);
   __ Ret();
   __ Bind(&jump_call_target, EmitBTI_jc);
+  __ Cbz(lr, &done);
   __ Ret();
   __ Bind(&done);
   END();
@@ -7109,10 +7649,15 @@
   if (CAN_RUN()) {
 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
     simulator.SetGuardedPages(true);
-#else
-    VIXL_UNIMPLEMENTED();
 #endif
+    // On hardware, we'll run the test anyway, but mark it as SKIPPED until
+    // we've implemented a mechanism for marking Guarded pages.
+
     RUN();
+
+#ifndef VIXL_INCLUDE_SIMULATOR_AARCH64
+    printf("SKIPPED: marking guarded pages is unimplemented on hardware");
+#endif
   }
 }
 
@@ -7125,36 +7670,42 @@
   SETUP_WITH_FEATURES(CPUFeatures::kBTI);
 
   Label start, none, c, j, jc;
+  Label jump_to_c, call_to_j;
   START();
   __ B(&start);
   __ Bind(&none, EmitBTI);
   __ Bind(&c, EmitBTI_c);
   __ Bind(&j, EmitBTI_j);
   __ Bind(&jc, EmitBTI_jc);
-  VIXL_CHECK(__ GetSizeOfCodeGeneratedSince(&none) == 4 * kInstructionSize);
+  __ Hint(BTI);
+  __ Hint(BTI_c);
+  __ Hint(BTI_j);
+  __ Hint(BTI_jc);
+  VIXL_CHECK(__ GetSizeOfCodeGeneratedSince(&none) == 8 * kInstructionSize);
+  __ Cmp(x1, 1);
+  __ B(lt, &jump_to_c);
+  __ B(eq, &call_to_j);
   __ Ret();
 
-  Label jump_to_c, call_to_j;
   __ Bind(&start);
   __ Adr(x0, &none);
-  __ Adr(lr, &jump_to_c);
+  __ Mov(x1, 0);
   __ Br(x0);
 
   __ Bind(&jump_to_c);
   __ Adr(x0, &c);
-  __ Adr(lr, &call_to_j);
+  __ Mov(x1, 1);
   __ Br(x0);
 
   __ Bind(&call_to_j);
   __ Adr(x0, &j);
+  __ Mov(x1, 2);
   __ Blr(x0);
   END();
 
   if (CAN_RUN()) {
 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
     simulator.SetGuardedPages(false);
-#else
-    VIXL_UNIMPLEMENTED();
 #endif
     RUN();
   }
@@ -7178,12 +7729,12 @@
   if (CAN_RUN()) {
 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
     simulator.SetGuardedPages(true);
-#else
-    VIXL_UNIMPLEMENTED();
-#endif
     MUST_FAIL_WITH_MESSAGE(RUN(),
                            "Executing non-BTI instruction with wrong "
                            "BType.");
+#else
+    printf("SKIPPED: marking guarded pages is unimplemented on hardware");
+#endif
   }
 }
 
@@ -7202,12 +7753,12 @@
   if (CAN_RUN()) {
 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
     simulator.SetGuardedPages(true);
-#else
-    VIXL_UNIMPLEMENTED();
-#endif
     MUST_FAIL_WITH_MESSAGE(RUN(),
                            "Executing non-BTI instruction with wrong "
                            "BType.");
+#else
+    printf("SKIPPED: marking guarded pages is unimplemented on hardware");
+#endif
   }
 }
 
@@ -7226,12 +7777,12 @@
   if (CAN_RUN()) {
 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
     simulator.SetGuardedPages(true);
-#else
-    VIXL_UNIMPLEMENTED();
-#endif
     MUST_FAIL_WITH_MESSAGE(RUN(),
                            "Executing non-BTI instruction with wrong "
                            "BType.");
+#else
+    printf("SKIPPED: marking guarded pages is unimplemented on hardware");
+#endif
   }
 }
 
@@ -7251,10 +7802,10 @@
   if (CAN_RUN()) {
 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
     simulator.SetGuardedPages(true);
-#else
-    VIXL_UNIMPLEMENTED();
-#endif
     MUST_FAIL_WITH_MESSAGE(RUN(), "Executing BTI c with wrong BType.");
+#else
+    printf("SKIPPED: marking guarded pages is unimplemented on hardware");
+#endif
   }
 }
 
@@ -7274,10 +7825,10 @@
   if (CAN_RUN()) {
 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
     simulator.SetGuardedPages(true);
-#else
-    VIXL_UNIMPLEMENTED();
-#endif
     MUST_FAIL_WITH_MESSAGE(RUN(), "Executing BTI j with wrong BType.");
+#else
+    printf("SKIPPED: marking guarded pages is unimplemented on hardware");
+#endif
   }
 }
 #endif  // VIXL_NEGATIVE_TESTING
@@ -7302,12 +7853,17 @@
   if (CAN_RUN()) {
 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
     simulator.SetGuardedPages(true);
-#else
-    VIXL_UNIMPLEMENTED();
 #endif
+    // On hardware, we'll run the test anyway, but mark it as SKIPPED until
+    // we've implemented a mechanism for marking Guarded pages.
+
     RUN();
 
     ASSERT_EQUAL_64(4, x0);
+
+#ifndef VIXL_INCLUDE_SIMULATOR_AARCH64
+    printf("SKIPPED: marking guarded pages is unimplemented on hardware");
+#endif
   }
 }
 
@@ -9145,7 +9701,7 @@
 
 
 TEST(blr_lr) {
-  // A simple test to check that the simulator correcty handle "blr lr".
+  // A simple test to check that the simulator correctly handle "blr lr".
   SETUP();
 
   START();
@@ -12140,6 +12696,26 @@
   }
 }
 
+TEST(system_dc_mte) {
+  SETUP_WITH_FEATURES(CPUFeatures::kMTE);
+  const char* msg = "DC MTE test!";
+  uintptr_t msg_addr = reinterpret_cast<uintptr_t>(msg);
+
+  START();
+  __ Mov(x20, msg_addr);
+  __ Dc(CGVAC, x20);
+  __ Dc(CGDVAC, x20);
+  __ Dc(CGVAP, x20);
+  __ Dc(CGDVAP, x20);
+  __ Dc(CIGVAC, x20);
+  __ Dc(CIGDVAC, x20);
+  END();
+
+  if (CAN_RUN()) {
+    RUN();
+    ASSERT_EQUAL_64(msg_addr, x20);
+  }
+}
 
 // We currently disable tests for CRC32 instructions when running natively.
 // Support for this family of instruction is optional, and so native platforms
@@ -12940,6 +13516,76 @@
   END();
 }
 
+static void VeneerBackwardBranchHelper(ImmBranchType type, int limit) {
+  SETUP();
+  START();
+
+  // This is a code generation test. The code generated is not executed.
+
+  __ Mov(x0, 1);
+
+  // Non-veneer case: generate 'limit' instructions, plus the branch itself.
+  Label start0;
+  __ Bind(&start0);
+  for (int i = 0; i < limit; i++) {
+    __ Nop();
+  }
+  switch (type) {
+    case CompareBranchType:
+      __ Cbz(x0, &start0);
+      break;
+    case TestBranchType:
+      __ Tbz(x0, 0, &start0);
+      break;
+    default:
+      VIXL_ASSERT(type == CondBranchType);
+      __ B(eq, &start0);
+  }
+  VIXL_CHECK(masm.GetSizeOfCodeGeneratedSince(&start0) ==
+             ((limit + 1) * kInstructionSize));
+
+  // Veneer case: As above, plus one extra nop and a branch for the veneer; we
+  // expect a total of limit + 3 instructions.
+  //
+  //  start1:
+  //    nop x (limit + 1)
+  //    tbnz skip_veneer
+  //    b start1
+  //  skip_veneer:
+  //
+  Label start1;
+  __ Bind(&start1);
+  for (int i = 0; i < limit; i++) {
+    __ Nop();
+  }
+  __ Nop();  // One extra instruction to exceed branch range.
+  switch (type) {
+    case CompareBranchType:
+      __ Cbz(x0, &start0);
+      break;
+    case TestBranchType:
+      __ Tbz(x0, 0, &start0);
+      break;
+    default:
+      VIXL_ASSERT(type == CondBranchType);
+      __ B(eq, &start0);
+  }
+  VIXL_CHECK(masm.GetSizeOfCodeGeneratedSince(&start1) ==
+             ((limit + 3) * kInstructionSize));
+
+  END();
+  DISASSEMBLE();
+}
+
+TEST(veneer_backward_tbz) { VeneerBackwardBranchHelper(TestBranchType, 8192); }
+
+TEST(veneer_backward_cbz) {
+  VeneerBackwardBranchHelper(CompareBranchType, 262144);
+}
+
+TEST(veneer_backward_bcond) {
+  VeneerBackwardBranchHelper(CondBranchType, 262144);
+}
 
 TEST(ldr_literal_explicit) {
   SETUP();
@@ -13242,6 +13888,8 @@
 
 void runtime_call_store_at_address(int64_t* address) { *address = 0xf00d; }
 
+int32_t runtime_call_no_args() { return 1; }
+
 enum RuntimeCallTestEnum { Enum0 };
 
 RuntimeCallTestEnum runtime_call_enum(RuntimeCallTestEnum e) { return e; }
@@ -13362,6 +14010,10 @@
   __ Mov(x0, reinterpret_cast<uint64_t>(&value));
   __ CallRuntime(runtime_call_store_at_address);
 
+  __ Mov(w0, 0);
+  __ CallRuntime(runtime_call_no_args);
+  __ Mov(w25, w0);
+
   END();
 
 #if defined(VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT) || \
@@ -13377,9 +14029,79 @@
     ASSERT_EQUAL_64(0, x22);
     ASSERT_EQUAL_32(124, w23);
     ASSERT_EQUAL_64(0, x24);
+    ASSERT_EQUAL_32(1, w25);
   }
 #endif  // #if defined(VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT) || ...
 }
+
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
+void void_func() {}
+uint32_t uint32_func() { return 2; }
+void void_param_func(uint32_t x) { USE(x); }
+uint32_t uint32_param_func(uint32_t x) { return ++x; }
+
+void void_placeholder() {}
+uint32_t uint32_placeholder() { return 4; }
+void void_param_placeholder(uint32_t x) { USE(x); }
+uint32_t uint32_param_placeholder(uint32_t x) { return ++x; }
+
+#define DO_TEST_BRANCH_INTERCEPTION(func)        \
+  __ Mov(x16, reinterpret_cast<uint64_t>(func)); \
+  __ Blr(x16);
+
+TEST(branch_interception) {
+  SETUP();
+  START();
+
+  // Test default branch interception, i.e: do a runtime call to the function.
+  DO_TEST_BRANCH_INTERCEPTION(void_func);
+  DO_TEST_BRANCH_INTERCEPTION(uint32_func);
+  __ Mov(w20, w0);
+  DO_TEST_BRANCH_INTERCEPTION(void_param_func);
+  __ Mov(w0, 2);
+  DO_TEST_BRANCH_INTERCEPTION(uint32_param_func);
+  __ Mov(w21, w0);
+
+  // Test interceptions with callbacks.
+  DO_TEST_BRANCH_INTERCEPTION(void_placeholder);
+  __ Mov(w22, w0);
+  DO_TEST_BRANCH_INTERCEPTION(uint32_placeholder);
+  __ Mov(w23, w0);
+  __ Mov(w0, 4);
+  DO_TEST_BRANCH_INTERCEPTION(uint32_placeholder);
+  __ Mov(w24, w0);
+  DO_TEST_BRANCH_INTERCEPTION(uint32_placeholder);
+  __ Mov(w25, w0);
+
+  END();
+
+  simulator.RegisterBranchInterception(void_func);
+  simulator.RegisterBranchInterception(uint32_func);
+  simulator.RegisterBranchInterception(void_param_func);
+  simulator.RegisterBranchInterception(uint32_param_func);
+
+  auto callback = [&simulator](uint64_t original_target) {
+    USE(original_target);
+    simulator.WriteWRegister(0, 1);
+  };
+
+  simulator.RegisterBranchInterception(void_placeholder, callback);
+  simulator.RegisterBranchInterception(uint32_placeholder, callback);
+  simulator.RegisterBranchInterception(void_param_placeholder, callback);
+  simulator.RegisterBranchInterception(uint32_param_placeholder, callback);
+
+  if (CAN_RUN()) {
+    RUN();
+
+    ASSERT_EQUAL_32(2, w20);
+    ASSERT_EQUAL_32(3, w21);
+    ASSERT_EQUAL_32(1, w22);
+    ASSERT_EQUAL_32(1, w23);
+    ASSERT_EQUAL_32(1, w24);
+    ASSERT_EQUAL_32(1, w25);
+  }
+}
+#endif  // #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
 #endif  // #ifdef VIXL_HAS_MACROASSEMBLER_RUNTIME_CALL_SUPPORT
 
 
@@ -13416,6 +14138,1244 @@
   masm.FinalizeCode();
 }
 
+
+TEST(mte_addg_subg) {
+  SETUP_WITH_FEATURES(CPUFeatures::kMTE);
+
+  START();
+  __ Mov(x0, 0x5555000055555555);
+
+  // Add/subtract an address offset, changing tag each time.
+  __ Addg(x1, x0, 16, 2);
+  __ Subg(x2, x1, 16, 1);
+
+  // Add/subtract address offsets, keep tag.
+  __ Addg(x3, x0, 1008, 0);
+  __ Subg(x4, x3, 1008, 0);
+
+  // Change tag only. Check wraparound.
+  __ Addg(x5, x0, 0, 15);
+  __ Subg(x6, x0, 0, 14);
+
+  // Do nothing.
+  __ Addg(x7, x0, 0, 0);
+  __ Subg(x8, x0, 0, 0);
+
+  // Use stack pointer as source/destination.
+  __ Mov(x20, sp);  // Store original sp.
+
+  __ Subg(sp, sp, 32, 0);  // Claim 32 bytes.
+  __ Sub(x9, sp, x20);     // Subtract original sp and store difference.
+
+  __ Mov(sp, x20);  // Restore original sp.
+  __ Claim(32);
+  __ Addg(sp, sp, 32, 0);  // Drop 32 bytes.
+  __ Sub(x10, sp, x20);    // Subtract original sp and store difference.
+
+  __ Mov(sp, x20);        // Restore sp (should be no-op)
+  __ Addg(sp, sp, 0, 1);  // Tag the sp.
+  __ Sub(x11, sp, x20);  // Subtract original sp and store for later comparison.
+  __ Mov(sp, x20);       // Restore sp.
+
+  END();
+
+  if (CAN_RUN()) {
+    RUN();
+
+    ASSERT_EQUAL_64(0x5755000055555565, x1);
+    ASSERT_EQUAL_64(0x5855000055555555, x2);
+    ASSERT_EQUAL_64(0x5555000055555945, x3);
+    ASSERT_EQUAL_64(0x5555000055555555, x4);
+    ASSERT_EQUAL_64(0x5455000055555555, x5);
+    ASSERT_EQUAL_64(0x5355000055555555, x6);
+    ASSERT_EQUAL_64(0x5555000055555555, x7);
+    ASSERT_EQUAL_64(0x5555000055555555, x8);
+    ASSERT_EQUAL_64(-32, x9);
+    ASSERT_EQUAL_64(0, x10);
+    ASSERT_EQUAL_64(UINT64_C(1) << 56, x11);
+  }
+}
+
+TEST(mte_subp) {
+  SETUP_WITH_FEATURES(CPUFeatures::kMTE);
+
+  START();
+  __ Mov(x0, 0x5555555555555555);
+  __ Mov(x1, -42);
+
+  // Test subp with equivalent sbfx/sub(s) operations.
+  __ Sbfx(x10, x0, 0, 56);
+  __ Sbfx(x11, x1, 0, 56);
+
+  __ Subp(x4, x0, x1);
+  __ Sub(x5, x10, x11);
+
+  __ Subp(x6, x1, x0);
+  __ Sub(x7, x11, x10);
+
+  __ Subps(x8, x0, x1);
+  __ Mrs(x18, NZCV);
+  __ Subs(x9, x10, x11);
+  __ Mrs(x19, NZCV);
+
+  __ Cmpp(x1, x0);
+  __ Mrs(x20, NZCV);
+  __ Cmp(x11, x10);
+  __ Mrs(x21, NZCV);
+
+  // Test equal pointers with mismatched tags compare equal and produce a zero
+  // difference with subps.
+  __ Mov(x2, 0x20);  // Exclude tag 5.
+  __ Irg(x3, x0, x2);
+  __ Subps(x22, x0, x3);
+
+  END();
+
+  if (CAN_RUN()) {
+    RUN();
+
+    ASSERT_EQUAL_64(x5, x4);
+    ASSERT_EQUAL_64(x7, x6);
+    ASSERT_EQUAL_64(x9, x8);
+    ASSERT_EQUAL_64(x19, x18);
+    ASSERT_EQUAL_64(x20, x21);
+    ASSERT_EQUAL_64(0, x22);
+    ASSERT_EQUAL_NZCV(ZCFlag);
+  }
+}
+
+TEST(mte_gmi) {
+  SETUP_WITH_FEATURES(CPUFeatures::kMTE);
+
+  START();
+  __ Mov(x0, 0xaaaa);
+  __ Mov(x20, 0x12345678);
+
+  __ Gmi(x0, x20, x0);  // Add mask bit 0.
+  __ Addg(x20, x20, 0, 1);
+  __ Gmi(x1, x20, x0);  // No effect.
+  __ Addg(x20, x20, 0, 1);
+  __ Gmi(x2, x20, x1);  // Add mask bit 2.
+  __ Addg(x20, x20, 0, 1);
+  __ Gmi(x3, x20, x2);  // No effect.
+  __ Addg(x20, x20, 0, 1);
+  __ Gmi(x4, x20, x3);  // Add mask bit 4.
+  __ Addg(x20, x20, 0, 1);
+  __ Gmi(x5, x20, x4);  // No effect.
+  __ Addg(x20, x20, 0, 9);
+  __ Gmi(x6, x20, x5);   // Add mask bit 14.
+  __ Gmi(x7, x20, xzr);  // Only mask bit 14.
+  END();
+
+  if (CAN_RUN()) {
+    RUN();
+
+    ASSERT_EQUAL_64(0xaaab, x0);
+    ASSERT_EQUAL_64(0xaaab, x1);
+    ASSERT_EQUAL_64(0xaaaf, x2);
+    ASSERT_EQUAL_64(0xaaaf, x3);
+    ASSERT_EQUAL_64(0xaabf, x4);
+    ASSERT_EQUAL_64(0xaabf, x5);
+    ASSERT_EQUAL_64(0xeabf, x6);
+    ASSERT_EQUAL_64(0x4000, x7);
+  }
+}
+
+TEST(mte_irg) {
+  SETUP_WITH_FEATURES(CPUFeatures::kMTE);
+
+  START();
+  __ Mov(x10, 8);
+  __ Mov(x0, 0x5555555555555555);
+  // Insert a random tag repeatedly. If the loop doesn't exit in the expected
+  // way, it's statistically likely that a random tag was never inserted.
+  Label loop, failed, done;
+  __ Bind(&loop);
+  __ Irg(x1, x0);
+  __ Sub(x10, x10, 1);
+  __ Cbz(x10, &failed);  // Exit if loop count exceeded.
+  __ Cmp(x1, 0x5555555555555555);
+  __ B(eq, &loop);  // Loop if the tag hasn't changed.
+
+  // Check non-tag bits have not changed.
+  __ Bic(x1, x1, 0x0f00000000000000);
+  __ Subs(x1, x1, 0x5055555555555555);
+  __ B(&done);
+
+  __ Bind(&failed);
+  __ Mov(x1, 1);
+
+  __ Bind(&done);
+
+  // Insert random tags, excluding oddly-numbered tags, and set a bit in a
+  // result register for each tag used.
+  // After 128 rounds, it's statistically likely that all even bits in the
+  // least-significant half word will be set.
+  __ Mov(x3, 0);
+  __ Mov(x4, 1);
+  __ Mov(x10, 128);
+  __ Mov(x11, 0xaaaa);
+
+  Label loop2;
+  __ Bind(&loop2);
+  __ Irg(x2, x1, x11);
+  __ Lsr(x2, x2, 56);
+  __ Lsl(x2, x4, x2);
+  __ Orr(x3, x3, x2);
+  __ Subs(x10, x10, 1);
+  __ B(ne, &loop2);
+  __ Mov(x2, x3);
+
+  // Check that excluding all tags results in zero tag insertion.
+  __ Mov(x3, 0xffffffffffffffff);
+  __ Irg(x3, x3, x3);
+  END();
+
+  if (CAN_RUN()) {
+    RUN();
+
+    ASSERT_EQUAL_64(0, x1);
+    ASSERT_EQUAL_64(0x5555, x2);
+    ASSERT_EQUAL_64(0xf0ffffffffffffff, x3);
+  }
+}
+
+TEST(mops_set) {
+  SETUP_WITH_FEATURES(CPUFeatures::kMOPS);
+
+  uint8_t dst[16];
+  memset(dst, 0x55, ArrayLength(dst));
+  uintptr_t dst_addr = reinterpret_cast<uintptr_t>(dst);
+
+  START();
+  __ Mov(x0, dst_addr);
+  __ Add(x1, x0, 1);
+  __ Mov(x2, 13);
+  __ Mov(x3, 0x1234aa);
+
+  // Set 13 bytes dst[1] onwards to 0xaa.
+  __ Setp(x1, x2, x3);
+  __ Setm(x1, x2, x3);
+  __ Sete(x1, x2, x3);
+  __ Mrs(x20, NZCV);
+
+  // x2 is now zero, so this should do nothing.
+  __ Setp(x1, x2, x3);
+  __ Setm(x1, x2, x3);
+  __ Sete(x1, x2, x3);
+  __ Mrs(x21, NZCV);
+
+  // Set dst[15] to zero using the masm helper.
+  __ Add(x1, x0, 15);
+  __ Mov(x2, 1);
+  __ Set(x1, x2, xzr);
+  __ Mrs(x22, NZCV);
+
+  // Load dst for comparison.
+  __ Ldp(x10, x11, MemOperand(x0));
+  END();
+
+  if (CAN_RUN()) {
+    // Permitted results:
+    //            NZCV    Xd                Xn
+    //  Option A: ....    end of buffer     0
+    //  Option B: ..C.    end of buffer     0
+
+    std::vector<uint64_t> allowed_flags = {NoFlag, CFlag};
+
+    RUN();
+    ASSERT_EQUAL_64(allowed_flags, x20);
+    ASSERT_EQUAL_64(allowed_flags, x21);
+    ASSERT_EQUAL_64(allowed_flags, x22);
+    ASSERT_EQUAL_64(dst_addr + 16, x1);
+    ASSERT_EQUAL_64(0, x2);
+    ASSERT_EQUAL_64(0x1234aa, x3);
+    ASSERT_EQUAL_64(0xaaaa'aaaa'aaaa'aa55, x10);
+    ASSERT_EQUAL_64(0x0055'aaaa'aaaa'aaaa, x11);
+  }
+}
+
+TEST(mops_setn) {
+  SETUP_WITH_FEATURES(CPUFeatures::kMOPS);
+
+  // In simulation, non-temporal set is handled by the same code as normal set,
+  // so only a basic test is required beyond that already provided above.
+
+  uint8_t dst[16] = {0x55};
+  uintptr_t dst_addr = reinterpret_cast<uintptr_t>(dst);
+
+  START();
+  __ Mov(x0, dst_addr);
+  __ Mov(x1, x0);
+  __ Mov(x2, 16);
+  __ Mov(x3, 0x42);
+  __ Setn(x1, x2, x3);
+  __ Mrs(x20, NZCV);
+  __ Ldp(x10, x11, MemOperand(x0));
+  END();
+
+  if (CAN_RUN()) {
+    // Permitted results:
+    //            NZCV    Xd                Xn
+    //  Option A: ....    end of buffer     0
+    //  Option B: ..C.    end of buffer     0
+
+    std::vector<uint64_t> allowed_flags = {NoFlag, CFlag};
+
+    RUN();
+    ASSERT_EQUAL_64(allowed_flags, x20);
+    ASSERT_EQUAL_64(dst_addr + 16, x1);
+    ASSERT_EQUAL_64(0, x2);
+    ASSERT_EQUAL_64(0x42, x3);
+    ASSERT_EQUAL_64(0x4242'4242'4242'4242, x10);
+    ASSERT_EQUAL_64(0x4242'4242'4242'4242, x11);
+  }
+}
+
+TEST(mops_setg) {
+  SETUP_WITH_FEATURES(CPUFeatures::kMOPS, CPUFeatures::kMTE);
+
+  uint8_t* dst = nullptr;
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
+  const int dst_size = 32;
+  dst = reinterpret_cast<uint8_t*>(
+      simulator.Mmap(NULL,
+                     dst_size * sizeof(uint8_t),
+                     PROT_READ | PROT_WRITE | PROT_MTE,
+                     MAP_PRIVATE | MAP_ANONYMOUS,
+                     -1,
+                     0));
+
+  VIXL_ASSERT(dst != nullptr);
+  uint8_t* untagged_ptr = AddressUntag(dst);
+  memset(untagged_ptr, 0xc9, dst_size);
+#else
+// TODO: Port the memory allocation to work on MTE supported platform natively.
+// Note that `CAN_RUN` prevents running in MTE-unsupported environments.
+#endif
+
+  uintptr_t dst_addr = reinterpret_cast<uintptr_t>(dst);
+  uint64_t tag_mask = 0xf0ff'ffff'ffff'ffff;
+
+  START();
+  __ Mov(x0, dst_addr);
+  __ Gmi(x2, x0, xzr);
+  __ Irg(x1, x0, x2);  // Choose new tag for setg destination.
+  __ Mov(x2, 16);
+  __ Mov(x3, 0x42);
+  __ Setg(x1, x2, x3);
+  __ Mrs(x20, NZCV);
+
+  __ Ubfx(x4, x1, 56, 4);  // Extract new tag.
+  __ Bfi(x0, x4, 56, 4);   // Tag dst_addr so set region can be loaded.
+  __ Ldp(x10, x11, MemOperand(x0));
+
+  __ Mov(x0, dst_addr);
+  __ Ldp(x12, x13, MemOperand(x0, 16));  // Unset region has original tag.
+
+  __ And(x1, x1, tag_mask);  // Strip tag for repeatable checks.
+  END();
+
+  if (CAN_RUN()) {
+    // Permitted results:
+    //            NZCV    Xd                Xn
+    //  Option A: ....    end of buffer     0
+    //  Option B: ..C.    end of buffer     0
+
+    std::vector<uint64_t> allowed_flags = {NoFlag, CFlag};
+
+    RUN();
+    ASSERT_EQUAL_64(allowed_flags, x20);
+    ASSERT_EQUAL_64((dst_addr & tag_mask) + 16, x1);
+    ASSERT_EQUAL_64(0, x2);
+    ASSERT_EQUAL_64(0x42, x3);
+    ASSERT_EQUAL_64(0x4242'4242'4242'4242, x10);
+    ASSERT_EQUAL_64(0x4242'4242'4242'4242, x11);
+    ASSERT_EQUAL_64(0xc9c9'c9c9'c9c9'c9c9, x12);
+    ASSERT_EQUAL_64(0xc9c9'c9c9'c9c9'c9c9, x13);
+  }
+
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
+  simulator.Munmap(dst, dst_size, PROT_MTE);
+#endif
+}
+
+TEST(mops_cpy) {
+  SETUP_WITH_FEATURES(CPUFeatures::kMOPS);
+
+  uint8_t buf[16];
+  uintptr_t buf_addr = reinterpret_cast<uintptr_t>(buf);
+
+  for (unsigned i = 0; i < ArrayLength(buf); i++) {
+    buf[i] = i;
+  }
+
+  START();
+  __ Mov(x0, buf_addr);
+
+  // Copy first eight bytes into second eight.
+  __ Mov(x1, x0);     // src = &buf[0]
+  __ Add(x2, x0, 8);  // dst = &buf[8]
+  __ Mov(x3, 8);      // count = 8
+  __ Cpyp(x2, x1, x3);
+  __ Cpym(x2, x1, x3);
+  __ Cpye(x2, x1, x3);
+  __ Ldp(x10, x11, MemOperand(x0));
+  __ Mrs(x20, NZCV);
+
+  // Copy first eight bytes to overlapping offset, forcing backwards copy.
+  __ Mov(x4, x0);     // src = &buf[0]
+  __ Add(x5, x0, 4);  // dst = &buf[4]
+  __ Mov(x6, 8);      // count = 8
+  __ Cpy(x5, x4, x6);
+  __ Ldp(x12, x13, MemOperand(x0));
+  __ Mrs(x21, NZCV);
+
+  // Copy last eight bytes to overlapping offset, forcing forwards copy.
+  __ Add(x7, x0, 8);  // src = &buf[8]
+  __ Add(x8, x0, 6);  // dst = &buf[6]
+  __ Mov(x9, 8);      // count = 8
+  __ Cpy(x8, x7, x9);
+  __ Ldp(x14, x15, MemOperand(x0));
+  __ Mrs(x22, NZCV);
+  END();
+
+  if (CAN_RUN()) {
+    // Permitted results:
+    //                        NZCV    Xs/Xd               Xn
+    //  Option A (forwards) : ....    ends of buffers     0
+    //  Option A (backwards): ....    starts of buffers   0
+    //  Option B (forwards) : ..C.    ends of buffers     0
+    //  Option B (backwards): N.C.    starts of buffers   0
+
+    std::vector<uint64_t> allowed_backwards_flags = {NoFlag, NCFlag};
+    std::vector<uint64_t> allowed_forwards_flags = {NoFlag, CFlag};
+
+    RUN();
+    // IMPLEMENTATION DEFINED direction
+    if (static_cast<uintptr_t>(core.xreg(2)) > buf_addr) {
+      // Forwards
+      ASSERT_EQUAL_64(buf_addr + 8, x1);
+      ASSERT_EQUAL_64(buf_addr + 16, x2);
+      ASSERT_EQUAL_64(allowed_forwards_flags, x20);
+    } else {
+      // Backwards
+      ASSERT_EQUAL_64(buf_addr, x1);
+      ASSERT_EQUAL_64(buf_addr + 8, x2);
+      ASSERT_EQUAL_64(allowed_backwards_flags, x20);
+    }
+    ASSERT_EQUAL_64(0, x3);  // Xn
+    ASSERT_EQUAL_64(0x0706'0504'0302'0100, x10);
+    ASSERT_EQUAL_64(0x0706'0504'0302'0100, x11);
+
+    ASSERT_EQUAL_64(buf_addr, x4);      // Xs
+    ASSERT_EQUAL_64(buf_addr + 4, x5);  // Xd
+    ASSERT_EQUAL_64(0, x6);             // Xn
+    ASSERT_EQUAL_64(0x0302'0100'0302'0100, x12);
+    ASSERT_EQUAL_64(0x0706'0504'0706'0504, x13);
+    ASSERT_EQUAL_64(allowed_backwards_flags, x21);
+
+    ASSERT_EQUAL_64(buf_addr + 16, x7);  // Xs
+    ASSERT_EQUAL_64(buf_addr + 14, x8);  // Xd
+    ASSERT_EQUAL_64(0, x9);              // Xn
+    ASSERT_EQUAL_64(0x0504'0100'0302'0100, x14);
+    ASSERT_EQUAL_64(0x0706'0706'0504'0706, x15);
+    ASSERT_EQUAL_64(allowed_forwards_flags, x22);
+  }
+}
+
+TEST(mops_cpyn) {
+  SETUP_WITH_FEATURES(CPUFeatures::kMOPS);
+
+  // In simulation, non-temporal cpy is handled by the same code as normal cpy,
+  // so only a basic test is required beyond that already provided above.
+
+  uint8_t buf[16];
+  uintptr_t buf_addr = reinterpret_cast<uintptr_t>(buf);
+
+  for (unsigned i = 0; i < ArrayLength(buf); i++) {
+    buf[i] = i;
+  }
+
+  START();
+  __ Mov(x0, buf_addr);
+
+  __ Add(x1, x0, 1);  // src = &buf[1]
+  __ Mov(x2, x0);     // dst = &buf[0]
+  __ Mov(x3, 15);     // count = 15
+  __ Cpyn(x2, x1, x3);
+  __ Ldp(x10, x11, MemOperand(x0));
+  __ Mrs(x20, NZCV);
+
+  __ Add(x4, x0, 1);  // src = &buf[1]
+  __ Mov(x5, x0);     // dst = &buf[0]
+  __ Mov(x6, 15);     // count = 15
+  __ Cpyrn(x5, x4, x6);
+  __ Ldp(x12, x13, MemOperand(x0));
+  __ Mrs(x21, NZCV);
+
+  __ Add(x7, x0, 1);  // src = &buf[1]
+  __ Mov(x8, x0);     // dst = &buf[0]
+  __ Mov(x9, 15);     // count = 15
+  __ Cpywn(x8, x7, x9);
+  __ Ldp(x14, x15, MemOperand(x0));
+  __ Mrs(x22, NZCV);
+  END();
+
+  if (CAN_RUN()) {
+    // Permitted results:
+    //                        NZCV    Xs/Xd               Xn
+    //  Option A (forwards) : ....    ends of buffers     0
+    //  Option A (backwards): ....    starts of buffers   0
+    //  Option B (forwards) : ..C.    ends of buffers     0
+    //  Option B (backwards): N.C.    starts of buffers   0
+    //
+    // All cases overlap to force a forwards copy.
+
+    std::vector<uint64_t> allowed_forwards_flags = {NoFlag, CFlag};
+
+    RUN();
+    ASSERT_EQUAL_64(buf_addr + 16, x1);  // Xs
+    ASSERT_EQUAL_64(buf_addr + 15, x2);  // Xd
+    ASSERT_EQUAL_64(0, x3);              // Xn
+    ASSERT_EQUAL_64(allowed_forwards_flags, x20);
+    ASSERT_EQUAL_64(0x0807'0605'0403'0201, x10);
+    ASSERT_EQUAL_64(0x0f0f'0e0d'0c0b'0a09, x11);
+
+    ASSERT_EQUAL_64(buf_addr + 16, x4);  // Xs
+    ASSERT_EQUAL_64(buf_addr + 15, x5);  // Xd
+    ASSERT_EQUAL_64(0, x6);              // Xn
+    ASSERT_EQUAL_64(allowed_forwards_flags, x21);
+    ASSERT_EQUAL_64(0x0908'0706'0504'0302, x12);
+    ASSERT_EQUAL_64(0x0f0f'0f0e'0d0c'0b0a, x13);
+
+    ASSERT_EQUAL_64(buf_addr + 16, x7);  // Xs
+    ASSERT_EQUAL_64(buf_addr + 15, x8);  // Xd
+    ASSERT_EQUAL_64(0, x9);              // Xn
+    ASSERT_EQUAL_64(allowed_forwards_flags, x22);
+    ASSERT_EQUAL_64(0x0a09'0807'0605'0403, x14);
+    ASSERT_EQUAL_64(0x0f0f'0f0f'0e0d'0c0b, x15);
+  }
+}
+
+TEST(mops_cpyf) {
+  SETUP_WITH_FEATURES(CPUFeatures::kMOPS);
+
+  uint8_t buf[16];
+  uintptr_t buf_addr = reinterpret_cast<uintptr_t>(buf);
+
+  for (unsigned i = 0; i < ArrayLength(buf); i++) {
+    buf[i] = i;
+  }
+
+  // As `mops_cpy`, but `cpyf` always copies forwards, so is only useful for
+  // non-overlapping buffers, or those where the source address is greater than
+  // the destination address.
+
+  START();
+  __ Mov(x0, buf_addr);
+
+  // Copy first eight bytes into second eight, without overlap.
+  __ Mov(x1, x0);     // src = &buf[0]
+  __ Add(x2, x0, 8);  // dst = &buf[8]
+  __ Mov(x3, 8);      // count = 8
+  __ Cpyfp(x2, x1, x3);
+  __ Cpyfm(x2, x1, x3);
+  __ Cpyfe(x2, x1, x3);
+  __ Ldp(x10, x11, MemOperand(x0));
+  __ Mrs(x20, NZCV);
+
+  // Copy last eight bytes to overlapping offset where src < dst.
+  __ Add(x4, x0, 8);  // src = &buf[8]
+  __ Add(x5, x0, 6);  // dst = &buf[6]
+  __ Mov(x6, 8);      // count = 8
+  __ Cpyf(x5, x4, x6);
+  __ Ldp(x12, x13, MemOperand(x0));
+  __ Mrs(x21, NZCV);
+
+  // Copy first eight bytes to overlapping offset where src > dst.
+  __ Mov(x7, x0);     // src = &buf[0]
+  __ Add(x8, x0, 4);  // dst = &buf[4]
+  __ Mov(x9, 8);      // count = 8
+  __ Cpyf(x8, x7, x9);
+  // The only testable result is the first and last four bytes, which are not
+  // written at all.
+  __ Ldr(w14, MemOperand(x0));
+  __ Ldr(w15, MemOperand(x0, 12));
+  __ Mrs(x22, NZCV);
+
+  END();
+
+  if (CAN_RUN()) {
+    // Permitted results:
+    //            NZCV    Xs/Xd               Xn
+    //  Option A: ....    ends of buffers     0
+    //  Option B: ..C.    ends of buffers     0
+
+    std::vector<uint64_t> allowed_forwards_flags = {NoFlag, CFlag};
+
+    RUN();
+
+    // No overlap.
+    ASSERT_EQUAL_64(buf_addr + 8, x1);   // Xs
+    ASSERT_EQUAL_64(buf_addr + 16, x2);  // Xd
+    ASSERT_EQUAL_64(0, x3);              // Xn
+    ASSERT_EQUAL_64(allowed_forwards_flags, x20);
+    ASSERT_EQUAL_64(0x0706'0504'0302'0100, x10);
+    ASSERT_EQUAL_64(0x0706'0504'0302'0100, x11);
+
+    // Overlap, src > dst.
+    ASSERT_EQUAL_64(buf_addr + 16, x4);  // Xs
+    ASSERT_EQUAL_64(buf_addr + 14, x5);  // Xd
+    ASSERT_EQUAL_64(0, x6);              // Xn
+    ASSERT_EQUAL_64(0x0100'0504'0302'0100, x12);
+    ASSERT_EQUAL_64(0x0706'0706'0504'0302, x13);
+    ASSERT_EQUAL_64(allowed_forwards_flags, x21);
+
+    // Overlap, src < dst.
+    ASSERT_EQUAL_64(buf_addr + 8, x7);   // Xs
+    ASSERT_EQUAL_64(buf_addr + 12, x8);  // Xd
+    ASSERT_EQUAL_64(0, x9);              // Xn
+    // We can only reliably test that the operation didn't write outside the
+    // specified region.
+    ASSERT_EQUAL_32(0x0302'0100, w14);
+    ASSERT_EQUAL_32(0x0706'0706, w15);
+    ASSERT_EQUAL_64(allowed_forwards_flags, x22);
+  }
+}
+
+TEST(mops_cpyfn) {
+  SETUP_WITH_FEATURES(CPUFeatures::kMOPS);
+
+  // In simulation, non-temporal cpy is handled by the same code as normal cpy,
+  // so only a basic test is required beyond that already provided above.
+
+  uint8_t buf[16];
+  uintptr_t buf_addr = reinterpret_cast<uintptr_t>(buf);
+
+  for (unsigned i = 0; i < ArrayLength(buf); i++) {
+    buf[i] = i;
+  }
+
+  START();
+  __ Mov(x0, buf_addr);
+
+  __ Add(x1, x0, 1);  // src = &buf[1]
+  __ Mov(x2, x0);     // dst = &buf[0]
+  __ Mov(x3, 15);     // count = 15
+  __ Cpyfn(x2, x1, x3);
+  __ Ldp(x10, x11, MemOperand(x0));
+  __ Mrs(x20, NZCV);
+
+  __ Add(x4, x0, 1);  // src = &buf[1]
+  __ Mov(x5, x0);     // dst = &buf[0]
+  __ Mov(x6, 15);     // count = 15
+  __ Cpyfrn(x5, x4, x6);
+  __ Ldp(x12, x13, MemOperand(x0));
+  __ Mrs(x21, NZCV);
+
+  __ Add(x7, x0, 1);  // src = &buf[1]
+  __ Mov(x8, x0);     // dst = &buf[0]
+  __ Mov(x9, 15);     // count = 15
+  __ Cpyfwn(x8, x7, x9);
+  __ Ldp(x14, x15, MemOperand(x0));
+  __ Mrs(x22, NZCV);
+  END();
+
+  if (CAN_RUN()) {
+    // Permitted results:
+    //            NZCV    Xs/Xd               Xn
+    //  Option A: ....    ends of buffers     0
+    //  Option B: ..C.    ends of buffers     0
+
+    std::vector<uint64_t> allowed_flags = {NoFlag, CFlag};
+
+    RUN();
+    ASSERT_EQUAL_64(buf_addr + 16, x1);  // Xs
+    ASSERT_EQUAL_64(buf_addr + 15, x2);  // Xd
+    ASSERT_EQUAL_64(0, x3);              // Xn
+    ASSERT_EQUAL_64(allowed_flags, x20);
+    ASSERT_EQUAL_64(0x0807'0605'0403'0201, x10);
+    ASSERT_EQUAL_64(0x0f0f'0e0d'0c0b'0a09, x11);
+
+    ASSERT_EQUAL_64(buf_addr + 16, x4);  // Xs
+    ASSERT_EQUAL_64(buf_addr + 15, x5);  // Xd
+    ASSERT_EQUAL_64(0, x6);              // Xn
+    ASSERT_EQUAL_64(allowed_flags, x21);
+    ASSERT_EQUAL_64(0x0908'0706'0504'0302, x12);
+    ASSERT_EQUAL_64(0x0f0f'0f0e'0d0c'0b0a, x13);
+
+    ASSERT_EQUAL_64(buf_addr + 16, x7);  // Xs
+    ASSERT_EQUAL_64(buf_addr + 15, x8);  // Xd
+    ASSERT_EQUAL_64(0, x9);              // Xn
+    ASSERT_EQUAL_64(allowed_flags, x22);
+    ASSERT_EQUAL_64(0x0a09'0807'0605'0403, x14);
+    ASSERT_EQUAL_64(0x0f0f'0f0f'0e0d'0c0b, x15);
+  }
+}
+
+TEST(cssc_abs) {
+  SETUP_WITH_FEATURES(CPUFeatures::kCSSC);
+
+  START();
+  __ Mov(x0, -1);
+  __ Mov(x1, 1);
+  __ Mov(x2, 0);
+  __ Mov(x3, 0x7fff'ffff);
+  __ Mov(x4, 0x8000'0000);
+  __ Mov(x5, 0x8000'0001);
+  __ Mov(x6, 0x7fff'ffff'ffff'ffff);
+  __ Mov(x7, 0x8000'0000'0000'0000);
+  __ Mov(x8, 0x8000'0000'0000'0001);
+
+  __ Abs(w10, w0);
+  __ Abs(x11, x0);
+  __ Abs(w12, w1);
+  __ Abs(x13, x1);
+  __ Abs(w14, w2);
+  __ Abs(x15, x2);
+
+  __ Abs(w19, w3);
+  __ Abs(x20, x3);
+  __ Abs(w21, w4);
+  __ Abs(x22, x4);
+  __ Abs(w23, w5);
+  __ Abs(x24, x5);
+  __ Abs(w25, w6);
+  __ Abs(x26, x6);
+  __ Abs(w27, w7);
+  __ Abs(x28, x7);
+  __ Abs(w29, w8);
+  __ Abs(x30, x8);
+  END();
+
+  if (CAN_RUN()) {
+    RUN();
+
+    ASSERT_EQUAL_64(1, x10);
+    ASSERT_EQUAL_64(1, x11);
+    ASSERT_EQUAL_64(1, x12);
+    ASSERT_EQUAL_64(1, x13);
+    ASSERT_EQUAL_64(0, x14);
+    ASSERT_EQUAL_64(0, x15);
+    ASSERT_EQUAL_64(0x7fff'ffff, x19);
+    ASSERT_EQUAL_64(0x7fff'ffff, x20);
+    ASSERT_EQUAL_64(0x8000'0000, x21);
+    ASSERT_EQUAL_64(0x8000'0000, x22);
+    ASSERT_EQUAL_64(0x7fff'ffff, x23);
+    ASSERT_EQUAL_64(0x8000'0001, x24);
+    ASSERT_EQUAL_64(1, x25);
+    ASSERT_EQUAL_64(0x7fff'ffff'ffff'ffff, x26);
+    ASSERT_EQUAL_64(0, x27);
+    ASSERT_EQUAL_64(0x8000'0000'0000'0000, x28);
+    ASSERT_EQUAL_64(1, x29);
+    ASSERT_EQUAL_64(0x7fff'ffff'ffff'ffff, x30);
+  }
+}
+
+TEST(cssc_cnt) {
+  SETUP_WITH_FEATURES(CPUFeatures::kCSSC);
+
+  START();
+  __ Mov(x0, -1);
+  __ Mov(x1, 1);
+  __ Mov(x2, 0);
+  __ Mov(x3, 0x7fff'ffff);
+  __ Mov(x4, 0x8000'0000);
+  __ Mov(x5, 0x8000'0001);
+  __ Mov(x6, 0x7fff'ffff'ffff'ffff);
+  __ Mov(x7, 0x4242'4242'aaaa'aaaa);
+
+  __ Cnt(w10, w0);
+  __ Cnt(x11, x0);
+  __ Cnt(w12, w1);
+  __ Cnt(x13, x1);
+  __ Cnt(w14, w2);
+  __ Cnt(x15, x2);
+  __ Cnt(w19, w3);
+  __ Cnt(x20, x3);
+  __ Cnt(w21, w4);
+  __ Cnt(x22, x4);
+  __ Cnt(w23, w5);
+  __ Cnt(x24, x5);
+  __ Cnt(w25, w6);
+  __ Cnt(x26, x6);
+  __ Cnt(w27, w7);
+  __ Cnt(x28, x7);
+  END();
+
+  if (CAN_RUN()) {
+    RUN();
+
+    ASSERT_EQUAL_64(32, x10);
+    ASSERT_EQUAL_64(64, x11);
+    ASSERT_EQUAL_64(1, x12);
+    ASSERT_EQUAL_64(1, x13);
+    ASSERT_EQUAL_64(0, x14);
+    ASSERT_EQUAL_64(0, x15);
+    ASSERT_EQUAL_64(31, x19);
+    ASSERT_EQUAL_64(31, x20);
+    ASSERT_EQUAL_64(1, x21);
+    ASSERT_EQUAL_64(1, x22);
+    ASSERT_EQUAL_64(2, x23);
+    ASSERT_EQUAL_64(2, x24);
+    ASSERT_EQUAL_64(32, x25);
+    ASSERT_EQUAL_64(63, x26);
+    ASSERT_EQUAL_64(16, x27);
+    ASSERT_EQUAL_64(24, x28);
+  }
+}
+
+TEST(cssc_ctz) {
+  SETUP_WITH_FEATURES(CPUFeatures::kCSSC);
+
+  START();
+  __ Mov(x0, -1);
+  __ Mov(x1, 1);
+  __ Mov(x2, 2);
+  __ Mov(x3, 0x7fff'ff00);
+  __ Mov(x4, 0x8000'4000);
+  __ Mov(x5, 0x4000'0001);
+  __ Mov(x6, 0x0000'0001'0000'0000);
+  __ Mov(x7, 0x4200'0000'0000'0000);
+
+  __ Ctz(w10, w0);
+  __ Ctz(x11, x0);
+  __ Ctz(w12, w1);
+  __ Ctz(x13, x1);
+  __ Ctz(w14, w2);
+  __ Ctz(x15, x2);
+  __ Ctz(w19, w3);
+  __ Ctz(x20, x3);
+  __ Ctz(w21, w4);
+  __ Ctz(x22, x4);
+  __ Ctz(w23, w5);
+  __ Ctz(x24, x5);
+  __ Ctz(w25, w6);
+  __ Ctz(x26, x6);
+  __ Ctz(w27, w7);
+  __ Ctz(x28, x7);
+  END();
+
+  if (CAN_RUN()) {
+    RUN();
+
+    ASSERT_EQUAL_64(0, x10);
+    ASSERT_EQUAL_64(0, x11);
+    ASSERT_EQUAL_64(0, x12);
+    ASSERT_EQUAL_64(0, x13);
+    ASSERT_EQUAL_64(1, x14);
+    ASSERT_EQUAL_64(1, x15);
+    ASSERT_EQUAL_64(8, x19);
+    ASSERT_EQUAL_64(8, x20);
+    ASSERT_EQUAL_64(14, x21);
+    ASSERT_EQUAL_64(14, x22);
+    ASSERT_EQUAL_64(0, x23);
+    ASSERT_EQUAL_64(0, x24);
+    ASSERT_EQUAL_64(32, x25);
+    ASSERT_EQUAL_64(32, x26);
+    ASSERT_EQUAL_64(32, x27);
+    ASSERT_EQUAL_64(57, x28);
+  }
+}
+
+using MinMaxOp = void (MacroAssembler::*)(const Register&,
+                                          const Register&,
+                                          const Operand&);
+
+static void MinMaxHelper(MinMaxOp op,
+                         bool is_signed,
+                         uint64_t a,
+                         uint64_t b,
+                         uint32_t wexp,
+                         uint64_t xexp) {
+  SETUP_WITH_FEATURES(CPUFeatures::kCSSC);
+
+  START();
+  __ Mov(x0, a);
+  __ Mov(x1, b);
+  if ((is_signed && IsInt8(b)) || (!is_signed && IsUint8(b))) {
+    (masm.*op)(w10, w0, b);
+    (masm.*op)(x11, x0, b);
+  } else {
+    (masm.*op)(w10, w0, w1);
+    (masm.*op)(x11, x0, x1);
+  }
+  END();
+
+  if (CAN_RUN()) {
+    RUN();
+    ASSERT_EQUAL_64(wexp, x10);
+    ASSERT_EQUAL_64(xexp, x11);
+  }
+}
+
+TEST(cssc_umin) {
+  MinMaxOp op = &MacroAssembler::Umin;
+  uint32_t s32min = 0x8000'0000;
+  uint32_t s32max = 0x7fff'ffff;
+  uint64_t s64min = 0x8000'0000'0000'0000;
+  uint64_t s64max = 0x7fff'ffff'ffff'ffff;
+
+  MinMaxHelper(op, false, 0, 0, 0, 0);
+  MinMaxHelper(op, false, 128, 255, 128, 128);
+  MinMaxHelper(op, false, 0, 0xffff'ffff'ffff'ffff, 0, 0);
+  MinMaxHelper(op, false, s32max, s32min, s32max, s32max);
+  MinMaxHelper(op, false, s32min, s32max, s32max, s32max);
+  MinMaxHelper(op, false, s64max, s32min, s32min, s32min);
+  MinMaxHelper(op, false, s64min, s64max, 0, s64max);
+}
+
+TEST(cssc_umax) {
+  MinMaxOp op = &MacroAssembler::Umax;
+  uint32_t s32min = 0x8000'0000;
+  uint32_t s32max = 0x7fff'ffff;
+  uint64_t s64min = 0x8000'0000'0000'0000;
+  uint64_t s64max = 0x7fff'ffff'ffff'ffff;
+
+  MinMaxHelper(op, false, 0, 0, 0, 0);
+  MinMaxHelper(op, false, 128, 255, 255, 255);
+  MinMaxHelper(op,
+               false,
+               0,
+               0xffff'ffff'ffff'ffff,
+               0xffff'ffff,
+               0xffff'ffff'ffff'ffff);
+  MinMaxHelper(op, false, s32max, s32min, s32min, s32min);
+  MinMaxHelper(op, false, s32min, s32max, s32min, s32min);
+  MinMaxHelper(op, false, s64max, s32min, 0xffff'ffff, s64max);
+  MinMaxHelper(op, false, s64min, s64max, 0xffff'ffff, s64min);
+}
+
+TEST(cssc_smin) {
+  MinMaxOp op = &MacroAssembler::Smin;
+  uint32_t s32min = 0x8000'0000;
+  uint32_t s32max = 0x7fff'ffff;
+  uint64_t s64min = 0x8000'0000'0000'0000;
+  uint64_t s64max = 0x7fff'ffff'ffff'ffff;
+
+  MinMaxHelper(op, true, 0, 0, 0, 0);
+  MinMaxHelper(op, true, 128, 255, 128, 128);
+  MinMaxHelper(op,
+               true,
+               0,
+               0xffff'ffff'ffff'ffff,
+               0xffff'ffff,
+               0xffff'ffff'ffff'ffff);
+  MinMaxHelper(op, true, s32max, s32min, s32min, s32max);
+  MinMaxHelper(op, true, s32min, s32max, s32min, s32max);
+  MinMaxHelper(op, true, s64max, s32min, s32min, s32min);
+  MinMaxHelper(op, true, s64min, s64max, 0xffff'ffff, s64min);
+}
+
+TEST(cssc_smax) {
+  MinMaxOp op = &MacroAssembler::Smax;
+  uint32_t s32min = 0x8000'0000;
+  uint32_t s32max = 0x7fff'ffff;
+  uint64_t s64min = 0x8000'0000'0000'0000;
+  uint64_t s64max = 0x7fff'ffff'ffff'ffff;
+
+  MinMaxHelper(op, true, 0, 0, 0, 0);
+  MinMaxHelper(op, true, 128, 255, 255, 255);
+  MinMaxHelper(op, true, 0, 0xffff'ffff'ffff'ffff, 0, 0);
+  MinMaxHelper(op, true, s32max, s32min, s32max, s32min);
+  MinMaxHelper(op, true, s32min, s32max, s32max, s32min);
+  MinMaxHelper(op, true, s64max, s32min, 0xffff'ffff, s64max);
+  MinMaxHelper(op, true, s64min, s64max, 0, s64max);
+}
+
+static void ChkfeatHelper(uint64_t initial,
+                          uint64_t chkfeat,
+                          CPUFeatures require) {
+  SETUP_WITH_FEATURES(require);
+
+  START();
+  __ Mov(x16, initial);
+  __ Chkfeat(x16);
+  __ Mov(x0, x16);
+
+  __ Mov(x1, initial);
+  __ Chkfeat(x1);
+  END();
+
+  if (CAN_RUN()) {
+    RUN_WITHOUT_SEEN_FEATURE_CHECK();
+    ASSERT_EQUAL_64(chkfeat, x0);
+    ASSERT_EQUAL_64(x0, x1);
+  }
+}
+
+TEST(chkfeat) { ChkfeatHelper(0x0, 0x0, CPUFeatures::None()); }
+
+TEST(chkfeat_gcs) { ChkfeatHelper(0x1, 0x0, CPUFeatures::kGCS); }
+
+TEST(chkfeat_unused) {
+  // Bits 1-63 are reserved. This test ensures that they are unmodified by
+  // `chkfeat`, but it will need to be updated if these bits are assigned in the
+  // future.
+  ChkfeatHelper(0xffff'ffff'ffff'fffe,
+                0xffff'ffff'ffff'fffe,
+                CPUFeatures::None());
+}
+
+TEST(gcs_feature_off) {
+  SETUP();
+
+  START();
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
+  simulator.DisableGCSCheck();
+#else
+// TODO: Disable GCS via operating system for this test, here and in the
+// gcs_off_pac_on test below.
+#endif
+  __ Mov(x16, 0x0123'4567'89ab'cdef);
+  __ Chkfeat(x16);
+
+  // This sequence would fail with GCS enabled.
+  Label lab, end;
+  __ Bl(&lab);
+  __ B(&end);
+
+  __ Bind(&lab);
+  __ Adr(lr, &end);
+  __ Ret();
+
+  __ Bind(&end);
+  END();
+
+  if (CAN_RUN()) {
+    // TODO: This will currently fail on GCS-supporting hardware.
+    RUN();
+    ASSERT_EQUAL_64(0x0123'4567'89ab'cdef, x16);
+  }
+}
+
+TEST(gcs_gcspushm) {
+  SETUP_WITH_FEATURES(CPUFeatures::kGCS);
+
+  Label ret;
+  START();
+  __ Adr(x0, &ret);
+  __ Gcspushm(x0);
+  __ Ret(x0);
+  __ Nop();
+  __ Bind(&ret);
+  END();
+
+  if (CAN_RUN()) {
+    RUN();
+  }
+}
+
+TEST(gcs_gcspopm) {
+  SETUP_WITH_FEATURES(CPUFeatures::kGCS);
+
+  Label lab, ret;
+  START();
+  __ Adr(x0, &ret);
+  __ Bl(&lab);
+  __ Bind(&ret);
+  __ Nop();
+  __ Bind(&lab);
+  __ Gcspopm(x1);
+  END();
+
+  if (CAN_RUN()) {
+    RUN();
+    ASSERT_EQUAL_64(x0, x1);
+  }
+}
+
+TEST(gcs_gcsss1) {
+  SETUP_WITH_FEATURES(CPUFeatures::kGCS);
+
+  START();
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
+  uint64_t new_gcs = simulator.GetGCSManager().AllocateStack();
+  __ Mov(x0, new_gcs);
+#else
+// TODO: Request new GCS from the operating system.
+#endif
+
+  // Partial stack swap to check GCS has changed, and a token is at the top
+  // of the new stack.
+  __ Gcsss1(x0);
+  __ Gcspopm(x1);
+
+  __ Bic(x0, x0, 7);  // Clear LSB of new GCS.
+  __ Bic(x2, x1, 7);  // Clear LSB of old GCS.
+  __ Cmp(x0, x2);
+  __ Cset(x0, eq);
+  __ And(x1, x1, 7);  // In progress token.
+  END();
+
+  if (CAN_RUN()) {
+    RUN();
+    ASSERT_EQUAL_64(0, x0);  // GCS must not be equal.
+    ASSERT_EQUAL_64(5, x1);  // In progress token must be present.
+  }
+}
+
+// TODO: Add extra tests for combinations of PAC and GCS enabled.
+TEST(gcs_stack_swap) {
+  SETUP_WITH_FEATURES(CPUFeatures::kGCS);
+
+  START();
+  Label stack_swap, sub_fn, end;
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
+  uint64_t new_gcs = simulator.GetGCSManager().AllocateStack();
+  __ Mov(x0, new_gcs);
+#else
+// TODO: Request new GCS from the operating system.
+#endif
+  __ Bl(&stack_swap);
+  __ B(&end);
+
+  __ Bind(&stack_swap);
+  __ Gcsss1(x0);  // x0 = new GCS.
+  __ Gcsss2(x1);  // x1 = old GCS.
+  __ Mov(x29, lr);
+  __ Bl(&sub_fn);
+  __ Mov(lr, x29);
+  __ Gcsss1(x1);  // Restore old GCS.
+  __ Gcsss2(x0);
+  __ Ret();
+
+  __ Bind(&sub_fn);
+  __ Mov(x2, 42);
+  __ Ret();
+
+  __ Bind(&end);
+  END();
+
+  if (CAN_RUN()) {
+    RUN();
+    ASSERT_EQUAL_64(42, x2);
+  }
+}
+
+TEST(gcs_off_pac_on) {
+  SETUP_WITH_FEATURES(CPUFeatures::kPAuth);
+
+  START();
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
+  simulator.DisableGCSCheck();
+#else
+// TODO: Disable GCS via operating system for this test, and enable for native.
+#endif
+  __ Mov(x16, 1);
+  __ Chkfeat(x16);
+  __ Mov(x1, x16);
+
+  Label fn1, after_fn1;
+
+  __ Mov(x28, sp);
+  __ Mov(x29, lr);
+  __ Mov(sp, 0x477d469dec0b8760);
+
+  __ Mov(x0, 0);
+  __ B(&after_fn1);
+
+  __ Bind(&fn1);
+  __ Mov(x0, 42);
+  __ Paciasp();
+  __ Retaa();
+
+  __ Bind(&after_fn1);
+  __ Bl(&fn1);
+
+  __ Mov(sp, x28);
+  __ Mov(lr, x29);
+  END();
+
+  if (CAN_RUN()) {
+    RUN();
+
+    ASSERT_EQUAL_64(42, x0);
+    ASSERT_EQUAL_64(1, x1);
+  }
+}
+
+#ifdef VIXL_NEGATIVE_TESTING
+TEST(gcs_negative_test) {
+  SETUP_WITH_FEATURES(CPUFeatures::kGCS);
+
+  Label fn, bad_return_addr, done;
+  START();
+  __ Bl(&fn);
+  __ Nop();  // GCS enforces that fn() returns here...
+
+  __ Bind(&bad_return_addr);
+  __ B(&done);  // ... but this test attempts to return here.
+
+  __ Bind(&fn);
+  __ Adr(lr, &bad_return_addr);
+  __ Ret();
+
+  __ Bind(&done);
+  END();
+
+  if (CAN_RUN()) {
+    MUST_FAIL_WITH_MESSAGE(RUN(), "GCS failed");
+  }
+}
+#endif  // VIXL_NEGATIVE_TESTING
+
+TEST(dc_zva) {
+  SETUP_WITH_FEATURES(CPUFeatures::kNEON);
+
+  const int zva_blocksize = 64;  // Assumed blocksize.
+  uint8_t buf[2 * zva_blocksize];
+  uintptr_t buf_addr = reinterpret_cast<uintptr_t>(buf);
+  uintptr_t aligned_addr = AlignUp(buf_addr, zva_blocksize);
+
+  START();
+  // Skip this test if the ZVA blocksize is not 64 bytes.
+  // Set up initial register values to allow the test to pass when skipped.
+  Label skip;
+  __ Movi(q0.V16B(), 0);
+  __ Movi(q1.V16B(), 0);
+  __ Movi(q2.V16B(), 0);
+  __ Movi(q3.V16B(), 0);
+
+  __ Mrs(x1, DCZID_EL0);
+  __ Cmp(x1, 4);  // 4 => DC ZVA enabled with 64-byte blocks.
+  __ B(ne, &skip);
+
+  // Fill aligned region with a pattern.
+  __ Mov(x0, aligned_addr);
+  __ Movi(q0.V16B(), 0x55);
+  __ Movi(q1.V16B(), 0xaa);
+  __ Movi(q2.V16B(), 0x55);
+  __ Movi(q3.V16B(), 0xaa);
+  __ St4(q0.V16B(), q1.V16B(), q2.V16B(), q3.V16B(), MemOperand(x0));
+
+  // Misalign the address to check DC ZVA re-aligns.
+  __ Add(x0, x0, 42);
+
+  // Clear the aligned region.
+  __ Dc(ZVA, x0);
+
+  // Reload the aligned region to check contents.
+  __ Mov(x0, aligned_addr);
+  __ Ld1(q0.V16B(), q1.V16B(), q2.V16B(), q3.V16B(), MemOperand(x0));
+
+  __ Bind(&skip);
+  END();
+
+  if (CAN_RUN()) {
+    RUN();
+    if (core.xreg(1) == 4) {
+      ASSERT_EQUAL_128(0, 0, q0);
+      ASSERT_EQUAL_128(0, 0, q1);
+      ASSERT_EQUAL_128(0, 0, q2);
+      ASSERT_EQUAL_128(0, 0, q3);
+    } else {
+      printf("SKIPPED: DC ZVA chunksize not 64-bytes");
+    }
+  }
+}
+
 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
 // Test the pseudo-instructions that control CPUFeatures dynamically in the
 // Simulator. These are used by the test infrastructure itself, but in a fairly
@@ -13687,5 +15647,26 @@
 #endif
 #endif
 
+TEST(scalar_movi) {
+  SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kNEON);
+  START();
+
+  // Make sure that V0 is initialized to a non-zero value.
+  __ Movi(v0.V16B(), 0xFF);
+  // This constant value can't be encoded in a MOVI instruction,
+  // so the program would use a fallback path that must set the
+  // upper 64 bits of the destination vector to 0.
+  __ Movi(v0.V1D(), 0xDECAFC0FFEE);
+  __ Mov(x0, v0.V2D(), 1);
+
+  END();
+
+  if (CAN_RUN()) {
+    RUN();
+
+    ASSERT_EQUAL_64(0, x0);
+  }
+}
+
 }  // namespace aarch64
 }  // namespace vixl
diff --git a/test/aarch64/test-assembler-aarch64.h b/test/aarch64/test-assembler-aarch64.h
index c3f3264..ee7467b 100644
--- a/test/aarch64/test-assembler-aarch64.h
+++ b/test/aarch64/test-assembler-aarch64.h
@@ -132,6 +132,12 @@
     SimulationCPUFeaturesScope cpu(&masm, kInfrastructureCPUFeatures);        \
     __ PushCalleeSavedRegisters();                                            \
   }                                                                           \
+  /* The infrastructure code hasn't been covered at the moment, e.g. */       \
+  /* prologue/epilogue. Suppress tagging mis-match exception before  */       \
+  /* this point. */                                                           \
+  if (masm.GetCPUFeatures()->Has(CPUFeatures::kMTE)) {                        \
+    __ Hlt(DebugHltOpcode::kMTEActive);                                       \
+  }                                                                           \
   {                                                                           \
     int trace_parameters = 0;                                                 \
     if (Test::trace_reg()) trace_parameters |= LOG_STATE;                     \
@@ -151,6 +157,9 @@
   /* Avoid unused-variable warnings in case a test never calls RUN(). */ \
   USE(offset_before_infrastructure_end);                                 \
   __ Trace(LOG_ALL, TRACE_DISABLE);                                      \
+  if (masm.GetCPUFeatures()->Has(CPUFeatures::kMTE)) {                   \
+    __ Hlt(DebugHltOpcode::kMTEInactive);                                \
+  }                                                                      \
   {                                                                      \
     SimulationCPUFeaturesScope cpu(&masm, kInfrastructureCPUFeatures);   \
     core.Dump(&masm);                                                    \
@@ -163,7 +172,7 @@
   RUN_WITHOUT_SEEN_FEATURE_CHECK();                                            \
   {                                                                            \
     /* We expect the test to use all of the features it requested, plus the */ \
-    /* features that the instructure code requires.                         */ \
+    /* features that the instruction code requires.                         */ \
     CPUFeatures const& expected_features =                                     \
         simulator.GetCPUFeatures()->With(CPUFeatures::kNEON);                  \
     CPUFeatures const& seen = simulator.GetSeenFeatures();                     \
diff --git a/test/aarch64/test-assembler-fp-aarch64.cc b/test/aarch64/test-assembler-fp-aarch64.cc
index 4ae9ec7..2201029 100644
--- a/test/aarch64/test-assembler-fp-aarch64.cc
+++ b/test/aarch64/test-assembler-fp-aarch64.cc
@@ -24,22 +24,21 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-#include <sys/mman.h>
-
 #include <cfloat>
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
+#include <sys/mman.h>
 
 #include "test-runner.h"
 #include "test-utils.h"
-#include "aarch64/test-utils-aarch64.h"
 
 #include "aarch64/cpu-aarch64.h"
 #include "aarch64/disasm-aarch64.h"
 #include "aarch64/macro-assembler-aarch64.h"
 #include "aarch64/simulator-aarch64.h"
+#include "aarch64/test-utils-aarch64.h"
 #include "test-assembler-aarch64.h"
 
 namespace vixl {
@@ -3671,720 +3670,280 @@
   }
 }
 
+typedef void (MacroAssembler::*FcvtFn2)(const Register& rd,
+                                        const VRegister& vn);
+typedef void (MacroAssembler::*FcvtFn3)(const Register& rd,
+                                        const VRegister& vn,
+                                        int fbits);
+
+static void GenFcvt(MacroAssembler* m,
+                    FcvtFn2 fn,
+                    const Register& rd,
+                    const VRegister& vn) {
+  (m->*fn)(rd, vn);
+}
+static void GenFcvt(MacroAssembler* m,
+                    FcvtFn3 fn,
+                    const Register& rd,
+                    const VRegister& vn) {
+  (m->*fn)(rd, vn, 0);
+}
+
+template <typename F = FcvtFn2, typename T, size_t N>
+static void FcvtHelper(F fn,
+                       const T (&inputs)[N],
+                       const uint64_t (&expected)[N],
+                       int dstsize) {
+  VIXL_STATIC_ASSERT(N < 16);  // Use no more than 16 registers.
+
+  SETUP_WITH_FEATURES(CPUFeatures::kFP);
+  START();
+
+  for (unsigned i = 0; i < N; i++) {
+    Register wi = WRegister(i);
+    Register xi = XRegister(i);
+    VRegister si = SRegister(i);
+    VRegister di = DRegister(i);
+
+    if (std::is_same<float, T>::value) {
+      __ Fmov(si, inputs[i]);
+      if (dstsize == kWRegSize) {
+        GenFcvt(&masm, fn, wi, si);
+      } else {
+        VIXL_ASSERT(dstsize == kXRegSize);
+        GenFcvt(&masm, fn, xi, si);
+      }
+    } else {
+      __ Fmov(di, inputs[i]);
+      if (dstsize == kWRegSize) {
+        GenFcvt(&masm, fn, wi, di);
+      } else {
+        VIXL_ASSERT(dstsize == kXRegSize);
+        GenFcvt(&masm, fn, xi, di);
+      }
+    }
+  }
+
+  END();
+  if (CAN_RUN()) {
+    RUN();
+
+    for (unsigned i = 0; i < N; i++) {
+      ASSERT_EQUAL_64(expected[i], XRegister(i));
+    }
+  }
+}
+
+// Largest float/double < INT32_MAX.
+static const float kLargestF32ltI32Max = RawbitsToFloat(0x4effffff);
+static const double kLargestF64ltI32Max = kWMaxInt - 1;
+
+// Smallest float/double > INT32_MIN.
+static const float kSmallestF32gtI32Min = RawbitsToFloat(0xceffffff);
+static const double kSmallestF64gtI32Min = kWMinInt + 1;
+
+// Largest float/double < INT64_MAX.
+static const float kLargestF32ltI64Max = RawbitsToFloat(0x5effffff);
+static const double kLargestF64ltI64Max = RawbitsToDouble(0x43dfffffffffffff);
+
+// Smallest float/double > INT64_MIN.
+static const float kSmallestF32gtI64Min = RawbitsToFloat(0xdeffffff);
+static const double kSmallestF64gtI64Min = RawbitsToDouble(0xc3dfffffffffffff);
+
+// Largest float/double < UINT32_MAX.
+static const float kLargestF32ltU32Max = 0xffffff00;
+static const double kLargestF64ltU32Max = 0xfffffffe;
+
+// Largest float/double < UINT64_MAX.
+static const float kLargestF32ltU64Max = 0xffffff0000000000;
+static const double kLargestF64ltU64Max = 0xfffffffffffff800;
+
+TEST(fcvt_infinity) {
+  float inputs_s[] = {kFP32PositiveInfinity, kFP32NegativeInfinity};
+  double inputs_d[] = {kFP64PositiveInfinity, kFP64NegativeInfinity};
+  uint64_t expected_w[] = {0x7fffffff, 0x80000000};
+  uint64_t expected_x[] = {0x7fffffffffffffff, 0x8000000000000000};
+
+  // Test all combinations of fcvt, input size and output size.
+  FcvtHelper(&MacroAssembler::Fcvtas, inputs_s, expected_w, kWRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtms, inputs_s, expected_w, kWRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtns, inputs_s, expected_w, kWRegSize);
+  FcvtHelper<FcvtFn3>(&MacroAssembler::Fcvtzs, inputs_s, expected_w, kWRegSize);
+
+  FcvtHelper(&MacroAssembler::Fcvtas, inputs_d, expected_w, kWRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtms, inputs_d, expected_w, kWRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtns, inputs_d, expected_w, kWRegSize);
+  FcvtHelper<FcvtFn3>(&MacroAssembler::Fcvtzs, inputs_d, expected_w, kWRegSize);
+
+  FcvtHelper(&MacroAssembler::Fcvtas, inputs_s, expected_x, kXRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtms, inputs_s, expected_x, kXRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtns, inputs_s, expected_x, kXRegSize);
+  FcvtHelper<FcvtFn3>(&MacroAssembler::Fcvtzs, inputs_s, expected_x, kXRegSize);
+
+  FcvtHelper(&MacroAssembler::Fcvtas, inputs_d, expected_x, kXRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtms, inputs_d, expected_x, kXRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtns, inputs_d, expected_x, kXRegSize);
+  FcvtHelper<FcvtFn3>(&MacroAssembler::Fcvtzs, inputs_d, expected_x, kXRegSize);
+}
+
+TEST(fcvt_ws_minmax) {
+  float inputs[] = {kLargestF32ltI32Max, kSmallestF32gtI32Min};
+  uint64_t expected[] = {0x7fffff80, 0x80000080};
+  FcvtHelper(&MacroAssembler::Fcvtas, inputs, expected, kWRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtms, inputs, expected, kWRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtns, inputs, expected, kWRegSize);
+  FcvtHelper<FcvtFn3>(&MacroAssembler::Fcvtzs, inputs, expected, kWRegSize);
+
+  float inputs_u[] = {kLargestF32ltU32Max};
+  uint64_t expected_u[] = {0xffffff00};
+  FcvtHelper(&MacroAssembler::Fcvtau, inputs_u, expected_u, kWRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtmu, inputs_u, expected_u, kWRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtnu, inputs_u, expected_u, kWRegSize);
+  FcvtHelper<FcvtFn3>(&MacroAssembler::Fcvtzu, inputs_u, expected_u, kWRegSize);
+}
+
+TEST(fcvt_wd_minmax) {
+  double inputs[] = {kLargestF64ltI32Max, kSmallestF64gtI32Min};
+  uint64_t expected[] = {0x7ffffffe, 0x80000001};
+  FcvtHelper(&MacroAssembler::Fcvtas, inputs, expected, kWRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtms, inputs, expected, kWRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtns, inputs, expected, kWRegSize);
+  FcvtHelper<FcvtFn3>(&MacroAssembler::Fcvtzs, inputs, expected, kWRegSize);
+
+  double inputs_u[] = {kLargestF64ltU32Max};
+  uint64_t expected_u[] = {0xfffffffe};
+  FcvtHelper(&MacroAssembler::Fcvtau, inputs_u, expected_u, kWRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtmu, inputs_u, expected_u, kWRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtnu, inputs_u, expected_u, kWRegSize);
+  FcvtHelper<FcvtFn3>(&MacroAssembler::Fcvtzu, inputs_u, expected_u, kWRegSize);
+}
+
+TEST(fcvt_xs_minmax) {
+  float inputs[] = {kLargestF32ltI64Max, kSmallestF32gtI64Min};
+  uint64_t expected[] = {0x7fffff8000000000, 0x8000008000000000};
+  FcvtHelper(&MacroAssembler::Fcvtas, inputs, expected, kXRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtms, inputs, expected, kXRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtns, inputs, expected, kXRegSize);
+  FcvtHelper<FcvtFn3>(&MacroAssembler::Fcvtzs, inputs, expected, kXRegSize);
+
+  float inputs_u[] = {kLargestF32ltU64Max};
+  uint64_t expected_u[] = {0xffffff0000000000};
+  FcvtHelper(&MacroAssembler::Fcvtau, inputs_u, expected_u, kXRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtmu, inputs_u, expected_u, kXRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtnu, inputs_u, expected_u, kXRegSize);
+  FcvtHelper<FcvtFn3>(&MacroAssembler::Fcvtzu, inputs_u, expected_u, kXRegSize);
+}
+
+TEST(fcvt_xd_minmax) {
+  double inputs[] = {kLargestF64ltI64Max, kSmallestF64gtI64Min};
+  uint64_t expected[] = {0x7ffffffffffffc00, 0x8000000000000400};
+  FcvtHelper(&MacroAssembler::Fcvtas, inputs, expected, kXRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtms, inputs, expected, kXRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtns, inputs, expected, kXRegSize);
+  FcvtHelper<FcvtFn3>(&MacroAssembler::Fcvtzs, inputs, expected, kXRegSize);
+
+  double inputs_u[] = {kLargestF64ltU64Max};
+  uint64_t expected_u[] = {0xfffffffffffff800};
+  FcvtHelper(&MacroAssembler::Fcvtau, inputs_u, expected_u, kXRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtmu, inputs_u, expected_u, kXRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtnu, inputs_u, expected_u, kXRegSize);
+  FcvtHelper<FcvtFn3>(&MacroAssembler::Fcvtzu, inputs_u, expected_u, kXRegSize);
+}
 
 TEST(fcvtas) {
-  SETUP_WITH_FEATURES(CPUFeatures::kFP);
+  float inputs_s[] = {1.0, 1.1, 2.5, -2.5};
+  double inputs_d[] = {1.0, 1.1, 2.5, -2.5};
+  uint64_t expected_w[] = {1, 1, 3, 0xfffffffd};
+  uint64_t expected_x[] = {1, 1, 3, 0xfffffffffffffffd};
 
-  START();
-  __ Fmov(s0, 1.0);
-  __ Fmov(s1, 1.1);
-  __ Fmov(s2, 2.5);
-  __ Fmov(s3, -2.5);
-  __ Fmov(s4, kFP32PositiveInfinity);
-  __ Fmov(s5, kFP32NegativeInfinity);
-  __ Fmov(s6, 0x7fffff80);  // Largest float < INT32_MAX.
-  __ Fneg(s7, s6);          // Smallest float > INT32_MIN.
-  __ Fmov(d8, 1.0);
-  __ Fmov(d9, 1.1);
-  __ Fmov(d10, 2.5);
-  __ Fmov(d11, -2.5);
-  __ Fmov(d12, kFP64PositiveInfinity);
-  __ Fmov(d13, kFP64NegativeInfinity);
-  __ Fmov(d14, kWMaxInt - 1);
-  __ Fmov(d15, kWMinInt + 1);
-  __ Fmov(s17, 1.1);
-  __ Fmov(s18, 2.5);
-  __ Fmov(s19, -2.5);
-  __ Fmov(s20, kFP32PositiveInfinity);
-  __ Fmov(s21, kFP32NegativeInfinity);
-  __ Fmov(s22, 0x7fffff8000000000);  // Largest float < INT64_MAX.
-  __ Fneg(s23, s22);                 // Smallest float > INT64_MIN.
-  __ Fmov(d24, 1.1);
-  __ Fmov(d25, 2.5);
-  __ Fmov(d26, -2.5);
-  __ Fmov(d27, kFP64PositiveInfinity);
-  __ Fmov(d28, kFP64NegativeInfinity);
-  __ Fmov(d29, 0x7ffffffffffffc00);  // Largest double < INT64_MAX.
-  __ Fneg(d30, d29);                 // Smallest double > INT64_MIN.
-
-  __ Fcvtas(w0, s0);
-  __ Fcvtas(w1, s1);
-  __ Fcvtas(w2, s2);
-  __ Fcvtas(w3, s3);
-  __ Fcvtas(w4, s4);
-  __ Fcvtas(w5, s5);
-  __ Fcvtas(w6, s6);
-  __ Fcvtas(w7, s7);
-  __ Fcvtas(w8, d8);
-  __ Fcvtas(w9, d9);
-  __ Fcvtas(w10, d10);
-  __ Fcvtas(w11, d11);
-  __ Fcvtas(w12, d12);
-  __ Fcvtas(w13, d13);
-  __ Fcvtas(w14, d14);
-  __ Fcvtas(w15, d15);
-  __ Fcvtas(x17, s17);
-  __ Fcvtas(x18, s18);
-  __ Fcvtas(x19, s19);
-  __ Fcvtas(x20, s20);
-  __ Fcvtas(x21, s21);
-  __ Fcvtas(x22, s22);
-  __ Fcvtas(x23, s23);
-  __ Fcvtas(x24, d24);
-  __ Fcvtas(x25, d25);
-  __ Fcvtas(x26, d26);
-  __ Fcvtas(x27, d27);
-  __ Fcvtas(x28, d28);
-  __ Fcvtas(x29, d29);
-  __ Fcvtas(x30, d30);
-  END();
-
-  if (CAN_RUN()) {
-    RUN();
-
-    ASSERT_EQUAL_64(1, x0);
-    ASSERT_EQUAL_64(1, x1);
-    ASSERT_EQUAL_64(3, x2);
-    ASSERT_EQUAL_64(0xfffffffd, x3);
-    ASSERT_EQUAL_64(0x7fffffff, x4);
-    ASSERT_EQUAL_64(0x80000000, x5);
-    ASSERT_EQUAL_64(0x7fffff80, x6);
-    ASSERT_EQUAL_64(0x80000080, x7);
-    ASSERT_EQUAL_64(1, x8);
-    ASSERT_EQUAL_64(1, x9);
-    ASSERT_EQUAL_64(3, x10);
-    ASSERT_EQUAL_64(0xfffffffd, x11);
-    ASSERT_EQUAL_64(0x7fffffff, x12);
-    ASSERT_EQUAL_64(0x80000000, x13);
-    ASSERT_EQUAL_64(0x7ffffffe, x14);
-    ASSERT_EQUAL_64(0x80000001, x15);
-    ASSERT_EQUAL_64(1, x17);
-    ASSERT_EQUAL_64(3, x18);
-    ASSERT_EQUAL_64(0xfffffffffffffffd, x19);
-    ASSERT_EQUAL_64(0x7fffffffffffffff, x20);
-    ASSERT_EQUAL_64(0x8000000000000000, x21);
-    ASSERT_EQUAL_64(0x7fffff8000000000, x22);
-    ASSERT_EQUAL_64(0x8000008000000000, x23);
-    ASSERT_EQUAL_64(1, x24);
-    ASSERT_EQUAL_64(3, x25);
-    ASSERT_EQUAL_64(0xfffffffffffffffd, x26);
-    ASSERT_EQUAL_64(0x7fffffffffffffff, x27);
-    ASSERT_EQUAL_64(0x8000000000000000, x28);
-    ASSERT_EQUAL_64(0x7ffffffffffffc00, x29);
-    ASSERT_EQUAL_64(0x8000000000000400, x30);
-  }
+  FcvtHelper(&MacroAssembler::Fcvtas, inputs_s, expected_w, kWRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtas, inputs_d, expected_w, kWRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtas, inputs_s, expected_x, kXRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtas, inputs_d, expected_x, kXRegSize);
 }
 
-
 TEST(fcvtau) {
-  SETUP_WITH_FEATURES(CPUFeatures::kFP);
+  float inputs_s[] = {1.0, 1.1, 2.5, -2.5, 0x100000000};
+  double inputs_d[] = {1.0, 1.1, 2.5, -2.5, 0x100000000};
+  uint64_t expected_w[] = {1, 1, 3, 0, 0xffffffff};
+  uint64_t expected_x[] = {1, 1, 3, 0, 0x100000000};
 
-  START();
-  __ Fmov(s0, 1.0);
-  __ Fmov(s1, 1.1);
-  __ Fmov(s2, 2.5);
-  __ Fmov(s3, -2.5);
-  __ Fmov(s4, kFP32PositiveInfinity);
-  __ Fmov(s5, kFP32NegativeInfinity);
-  __ Fmov(s6, 0xffffff00);  // Largest float < UINT32_MAX.
-  __ Fmov(d8, 1.0);
-  __ Fmov(d9, 1.1);
-  __ Fmov(d10, 2.5);
-  __ Fmov(d11, -2.5);
-  __ Fmov(d12, kFP64PositiveInfinity);
-  __ Fmov(d13, kFP64NegativeInfinity);
-  __ Fmov(d14, 0xfffffffe);
-  __ Fmov(s16, 1.0);
-  __ Fmov(s17, 1.1);
-  __ Fmov(s18, 2.5);
-  __ Fmov(s19, -2.5);
-  __ Fmov(s20, kFP32PositiveInfinity);
-  __ Fmov(s21, kFP32NegativeInfinity);
-  __ Fmov(s22, 0xffffff0000000000);  // Largest float < UINT64_MAX.
-  __ Fmov(d24, 1.1);
-  __ Fmov(d25, 2.5);
-  __ Fmov(d26, -2.5);
-  __ Fmov(d27, kFP64PositiveInfinity);
-  __ Fmov(d28, kFP64NegativeInfinity);
-  __ Fmov(d29, 0xfffffffffffff800);  // Largest double < UINT64_MAX.
-  __ Fmov(s30, 0x100000000);
-
-  __ Fcvtau(w0, s0);
-  __ Fcvtau(w1, s1);
-  __ Fcvtau(w2, s2);
-  __ Fcvtau(w3, s3);
-  __ Fcvtau(w4, s4);
-  __ Fcvtau(w5, s5);
-  __ Fcvtau(w6, s6);
-  __ Fcvtau(w8, d8);
-  __ Fcvtau(w9, d9);
-  __ Fcvtau(w10, d10);
-  __ Fcvtau(w11, d11);
-  __ Fcvtau(w12, d12);
-  __ Fcvtau(w13, d13);
-  __ Fcvtau(w14, d14);
-  __ Fcvtau(w15, d15);
-  __ Fcvtau(x16, s16);
-  __ Fcvtau(x17, s17);
-  __ Fcvtau(x18, s18);
-  __ Fcvtau(x19, s19);
-  __ Fcvtau(x20, s20);
-  __ Fcvtau(x21, s21);
-  __ Fcvtau(x22, s22);
-  __ Fcvtau(x24, d24);
-  __ Fcvtau(x25, d25);
-  __ Fcvtau(x26, d26);
-  __ Fcvtau(x27, d27);
-  __ Fcvtau(x28, d28);
-  __ Fcvtau(x29, d29);
-  __ Fcvtau(w30, s30);
-  END();
-
-  if (CAN_RUN()) {
-    RUN();
-
-    ASSERT_EQUAL_64(1, x0);
-    ASSERT_EQUAL_64(1, x1);
-    ASSERT_EQUAL_64(3, x2);
-    ASSERT_EQUAL_64(0, x3);
-    ASSERT_EQUAL_64(0xffffffff, x4);
-    ASSERT_EQUAL_64(0, x5);
-    ASSERT_EQUAL_64(0xffffff00, x6);
-    ASSERT_EQUAL_64(1, x8);
-    ASSERT_EQUAL_64(1, x9);
-    ASSERT_EQUAL_64(3, x10);
-    ASSERT_EQUAL_64(0, x11);
-    ASSERT_EQUAL_64(0xffffffff, x12);
-    ASSERT_EQUAL_64(0, x13);
-    ASSERT_EQUAL_64(0xfffffffe, x14);
-    ASSERT_EQUAL_64(1, x16);
-    ASSERT_EQUAL_64(1, x17);
-    ASSERT_EQUAL_64(3, x18);
-    ASSERT_EQUAL_64(0, x19);
-    ASSERT_EQUAL_64(0xffffffffffffffff, x20);
-    ASSERT_EQUAL_64(0, x21);
-    ASSERT_EQUAL_64(0xffffff0000000000, x22);
-    ASSERT_EQUAL_64(1, x24);
-    ASSERT_EQUAL_64(3, x25);
-    ASSERT_EQUAL_64(0, x26);
-    ASSERT_EQUAL_64(0xffffffffffffffff, x27);
-    ASSERT_EQUAL_64(0, x28);
-    ASSERT_EQUAL_64(0xfffffffffffff800, x29);
-    ASSERT_EQUAL_64(0xffffffff, x30);
-  }
+  FcvtHelper(&MacroAssembler::Fcvtau, inputs_s, expected_w, kWRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtau, inputs_d, expected_w, kWRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtau, inputs_s, expected_x, kXRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtau, inputs_d, expected_x, kXRegSize);
 }
 
-
 TEST(fcvtms) {
-  SETUP_WITH_FEATURES(CPUFeatures::kFP);
+  float inputs_s[] = {1.0, 1.1, 1.5, -1.5};
+  double inputs_d[] = {1.0, 1.1, 1.5, -1.5};
+  uint64_t expected_w[] = {1, 1, 1, 0xfffffffe};
+  uint64_t expected_x[] = {1, 1, 1, 0xfffffffffffffffe};
 
-  START();
-  __ Fmov(s0, 1.0);
-  __ Fmov(s1, 1.1);
-  __ Fmov(s2, 1.5);
-  __ Fmov(s3, -1.5);
-  __ Fmov(s4, kFP32PositiveInfinity);
-  __ Fmov(s5, kFP32NegativeInfinity);
-  __ Fmov(s6, 0x7fffff80);  // Largest float < INT32_MAX.
-  __ Fneg(s7, s6);          // Smallest float > INT32_MIN.
-  __ Fmov(d8, 1.0);
-  __ Fmov(d9, 1.1);
-  __ Fmov(d10, 1.5);
-  __ Fmov(d11, -1.5);
-  __ Fmov(d12, kFP64PositiveInfinity);
-  __ Fmov(d13, kFP64NegativeInfinity);
-  __ Fmov(d14, kWMaxInt - 1);
-  __ Fmov(d15, kWMinInt + 1);
-  __ Fmov(s17, 1.1);
-  __ Fmov(s18, 1.5);
-  __ Fmov(s19, -1.5);
-  __ Fmov(s20, kFP32PositiveInfinity);
-  __ Fmov(s21, kFP32NegativeInfinity);
-  __ Fmov(s22, 0x7fffff8000000000);  // Largest float < INT64_MAX.
-  __ Fneg(s23, s22);                 // Smallest float > INT64_MIN.
-  __ Fmov(d24, 1.1);
-  __ Fmov(d25, 1.5);
-  __ Fmov(d26, -1.5);
-  __ Fmov(d27, kFP64PositiveInfinity);
-  __ Fmov(d28, kFP64NegativeInfinity);
-  __ Fmov(d29, 0x7ffffffffffffc00);  // Largest double < INT64_MAX.
-  __ Fneg(d30, d29);                 // Smallest double > INT64_MIN.
-
-  __ Fcvtms(w0, s0);
-  __ Fcvtms(w1, s1);
-  __ Fcvtms(w2, s2);
-  __ Fcvtms(w3, s3);
-  __ Fcvtms(w4, s4);
-  __ Fcvtms(w5, s5);
-  __ Fcvtms(w6, s6);
-  __ Fcvtms(w7, s7);
-  __ Fcvtms(w8, d8);
-  __ Fcvtms(w9, d9);
-  __ Fcvtms(w10, d10);
-  __ Fcvtms(w11, d11);
-  __ Fcvtms(w12, d12);
-  __ Fcvtms(w13, d13);
-  __ Fcvtms(w14, d14);
-  __ Fcvtms(w15, d15);
-  __ Fcvtms(x17, s17);
-  __ Fcvtms(x18, s18);
-  __ Fcvtms(x19, s19);
-  __ Fcvtms(x20, s20);
-  __ Fcvtms(x21, s21);
-  __ Fcvtms(x22, s22);
-  __ Fcvtms(x23, s23);
-  __ Fcvtms(x24, d24);
-  __ Fcvtms(x25, d25);
-  __ Fcvtms(x26, d26);
-  __ Fcvtms(x27, d27);
-  __ Fcvtms(x28, d28);
-  __ Fcvtms(x29, d29);
-  __ Fcvtms(x30, d30);
-  END();
-
-  if (CAN_RUN()) {
-    RUN();
-
-    ASSERT_EQUAL_64(1, x0);
-    ASSERT_EQUAL_64(1, x1);
-    ASSERT_EQUAL_64(1, x2);
-    ASSERT_EQUAL_64(0xfffffffe, x3);
-    ASSERT_EQUAL_64(0x7fffffff, x4);
-    ASSERT_EQUAL_64(0x80000000, x5);
-    ASSERT_EQUAL_64(0x7fffff80, x6);
-    ASSERT_EQUAL_64(0x80000080, x7);
-    ASSERT_EQUAL_64(1, x8);
-    ASSERT_EQUAL_64(1, x9);
-    ASSERT_EQUAL_64(1, x10);
-    ASSERT_EQUAL_64(0xfffffffe, x11);
-    ASSERT_EQUAL_64(0x7fffffff, x12);
-    ASSERT_EQUAL_64(0x80000000, x13);
-    ASSERT_EQUAL_64(0x7ffffffe, x14);
-    ASSERT_EQUAL_64(0x80000001, x15);
-    ASSERT_EQUAL_64(1, x17);
-    ASSERT_EQUAL_64(1, x18);
-    ASSERT_EQUAL_64(0xfffffffffffffffe, x19);
-    ASSERT_EQUAL_64(0x7fffffffffffffff, x20);
-    ASSERT_EQUAL_64(0x8000000000000000, x21);
-    ASSERT_EQUAL_64(0x7fffff8000000000, x22);
-    ASSERT_EQUAL_64(0x8000008000000000, x23);
-    ASSERT_EQUAL_64(1, x24);
-    ASSERT_EQUAL_64(1, x25);
-    ASSERT_EQUAL_64(0xfffffffffffffffe, x26);
-    ASSERT_EQUAL_64(0x7fffffffffffffff, x27);
-    ASSERT_EQUAL_64(0x8000000000000000, x28);
-    ASSERT_EQUAL_64(0x7ffffffffffffc00, x29);
-    ASSERT_EQUAL_64(0x8000000000000400, x30);
-  }
+  FcvtHelper(&MacroAssembler::Fcvtms, inputs_s, expected_w, kWRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtms, inputs_d, expected_w, kWRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtms, inputs_s, expected_x, kXRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtms, inputs_d, expected_x, kXRegSize);
 }
 
-
 TEST(fcvtmu) {
-  SETUP_WITH_FEATURES(CPUFeatures::kFP);
+  float inputs_s[] = {1.0, 1.1, 1.5, -1.5};
+  double inputs_d[] = {1.0, 1.1, 1.5, -1.5};
+  uint64_t expected_w[] = {1, 1, 1, 0};
+  uint64_t expected_x[] = {1, 1, 1, 0};
 
-  START();
-  __ Fmov(s0, 1.0);
-  __ Fmov(s1, 1.1);
-  __ Fmov(s2, 1.5);
-  __ Fmov(s3, -1.5);
-  __ Fmov(s4, kFP32PositiveInfinity);
-  __ Fmov(s5, kFP32NegativeInfinity);
-  __ Fmov(s6, 0x7fffff80);  // Largest float < INT32_MAX.
-  __ Fneg(s7, s6);          // Smallest float > INT32_MIN.
-  __ Fmov(d8, 1.0);
-  __ Fmov(d9, 1.1);
-  __ Fmov(d10, 1.5);
-  __ Fmov(d11, -1.5);
-  __ Fmov(d12, kFP64PositiveInfinity);
-  __ Fmov(d13, kFP64NegativeInfinity);
-  __ Fmov(d14, kWMaxInt - 1);
-  __ Fmov(d15, kWMinInt + 1);
-  __ Fmov(s17, 1.1);
-  __ Fmov(s18, 1.5);
-  __ Fmov(s19, -1.5);
-  __ Fmov(s20, kFP32PositiveInfinity);
-  __ Fmov(s21, kFP32NegativeInfinity);
-  __ Fmov(s22, 0x7fffff8000000000);  // Largest float < INT64_MAX.
-  __ Fneg(s23, s22);                 // Smallest float > INT64_MIN.
-  __ Fmov(d24, 1.1);
-  __ Fmov(d25, 1.5);
-  __ Fmov(d26, -1.5);
-  __ Fmov(d27, kFP64PositiveInfinity);
-  __ Fmov(d28, kFP64NegativeInfinity);
-  __ Fmov(d29, 0x7ffffffffffffc00);  // Largest double < INT64_MAX.
-  __ Fneg(d30, d29);                 // Smallest double > INT64_MIN.
-
-  __ Fcvtmu(w0, s0);
-  __ Fcvtmu(w1, s1);
-  __ Fcvtmu(w2, s2);
-  __ Fcvtmu(w3, s3);
-  __ Fcvtmu(w4, s4);
-  __ Fcvtmu(w5, s5);
-  __ Fcvtmu(w6, s6);
-  __ Fcvtmu(w7, s7);
-  __ Fcvtmu(w8, d8);
-  __ Fcvtmu(w9, d9);
-  __ Fcvtmu(w10, d10);
-  __ Fcvtmu(w11, d11);
-  __ Fcvtmu(w12, d12);
-  __ Fcvtmu(w13, d13);
-  __ Fcvtmu(w14, d14);
-  __ Fcvtmu(x17, s17);
-  __ Fcvtmu(x18, s18);
-  __ Fcvtmu(x19, s19);
-  __ Fcvtmu(x20, s20);
-  __ Fcvtmu(x21, s21);
-  __ Fcvtmu(x22, s22);
-  __ Fcvtmu(x23, s23);
-  __ Fcvtmu(x24, d24);
-  __ Fcvtmu(x25, d25);
-  __ Fcvtmu(x26, d26);
-  __ Fcvtmu(x27, d27);
-  __ Fcvtmu(x28, d28);
-  __ Fcvtmu(x29, d29);
-  __ Fcvtmu(x30, d30);
-  END();
-
-  if (CAN_RUN()) {
-    RUN();
-
-    ASSERT_EQUAL_64(1, x0);
-    ASSERT_EQUAL_64(1, x1);
-    ASSERT_EQUAL_64(1, x2);
-    ASSERT_EQUAL_64(0, x3);
-    ASSERT_EQUAL_64(0xffffffff, x4);
-    ASSERT_EQUAL_64(0, x5);
-    ASSERT_EQUAL_64(0x7fffff80, x6);
-    ASSERT_EQUAL_64(0, x7);
-    ASSERT_EQUAL_64(1, x8);
-    ASSERT_EQUAL_64(1, x9);
-    ASSERT_EQUAL_64(1, x10);
-    ASSERT_EQUAL_64(0, x11);
-    ASSERT_EQUAL_64(0xffffffff, x12);
-    ASSERT_EQUAL_64(0, x13);
-    ASSERT_EQUAL_64(0x7ffffffe, x14);
-    ASSERT_EQUAL_64(1, x17);
-    ASSERT_EQUAL_64(1, x18);
-    ASSERT_EQUAL_64(0, x19);
-    ASSERT_EQUAL_64(0xffffffffffffffff, x20);
-    ASSERT_EQUAL_64(0, x21);
-    ASSERT_EQUAL_64(0x7fffff8000000000, x22);
-    ASSERT_EQUAL_64(0, x23);
-    ASSERT_EQUAL_64(1, x24);
-    ASSERT_EQUAL_64(1, x25);
-    ASSERT_EQUAL_64(0, x26);
-    ASSERT_EQUAL_64(0xffffffffffffffff, x27);
-    ASSERT_EQUAL_64(0, x28);
-    ASSERT_EQUAL_64(0x7ffffffffffffc00, x29);
-    ASSERT_EQUAL_64(0, x30);
-  }
+  FcvtHelper(&MacroAssembler::Fcvtmu, inputs_s, expected_w, kWRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtmu, inputs_d, expected_w, kWRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtmu, inputs_s, expected_x, kXRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtmu, inputs_d, expected_x, kXRegSize);
 }
 
-
 TEST(fcvtns) {
-  SETUP_WITH_FEATURES(CPUFeatures::kFP);
+  float inputs_s[] = {1.0, 1.1, 1.5, -1.5};
+  double inputs_d[] = {1.0, 1.1, 1.5, -1.5};
+  uint64_t expected_w[] = {1, 1, 2, 0xfffffffe};
+  uint64_t expected_x[] = {1, 1, 2, 0xfffffffffffffffe};
 
-  START();
-  __ Fmov(s0, 1.0);
-  __ Fmov(s1, 1.1);
-  __ Fmov(s2, 1.5);
-  __ Fmov(s3, -1.5);
-  __ Fmov(s4, kFP32PositiveInfinity);
-  __ Fmov(s5, kFP32NegativeInfinity);
-  __ Fmov(s6, 0x7fffff80);  // Largest float < INT32_MAX.
-  __ Fneg(s7, s6);          // Smallest float > INT32_MIN.
-  __ Fmov(d8, 1.0);
-  __ Fmov(d9, 1.1);
-  __ Fmov(d10, 1.5);
-  __ Fmov(d11, -1.5);
-  __ Fmov(d12, kFP64PositiveInfinity);
-  __ Fmov(d13, kFP64NegativeInfinity);
-  __ Fmov(d14, kWMaxInt - 1);
-  __ Fmov(d15, kWMinInt + 1);
-  __ Fmov(s17, 1.1);
-  __ Fmov(s18, 1.5);
-  __ Fmov(s19, -1.5);
-  __ Fmov(s20, kFP32PositiveInfinity);
-  __ Fmov(s21, kFP32NegativeInfinity);
-  __ Fmov(s22, 0x7fffff8000000000);  // Largest float < INT64_MAX.
-  __ Fneg(s23, s22);                 // Smallest float > INT64_MIN.
-  __ Fmov(d24, 1.1);
-  __ Fmov(d25, 1.5);
-  __ Fmov(d26, -1.5);
-  __ Fmov(d27, kFP64PositiveInfinity);
-  __ Fmov(d28, kFP64NegativeInfinity);
-  __ Fmov(d29, 0x7ffffffffffffc00);  // Largest double < INT64_MAX.
-  __ Fneg(d30, d29);                 // Smallest double > INT64_MIN.
-
-  __ Fcvtns(w0, s0);
-  __ Fcvtns(w1, s1);
-  __ Fcvtns(w2, s2);
-  __ Fcvtns(w3, s3);
-  __ Fcvtns(w4, s4);
-  __ Fcvtns(w5, s5);
-  __ Fcvtns(w6, s6);
-  __ Fcvtns(w7, s7);
-  __ Fcvtns(w8, d8);
-  __ Fcvtns(w9, d9);
-  __ Fcvtns(w10, d10);
-  __ Fcvtns(w11, d11);
-  __ Fcvtns(w12, d12);
-  __ Fcvtns(w13, d13);
-  __ Fcvtns(w14, d14);
-  __ Fcvtns(w15, d15);
-  __ Fcvtns(x17, s17);
-  __ Fcvtns(x18, s18);
-  __ Fcvtns(x19, s19);
-  __ Fcvtns(x20, s20);
-  __ Fcvtns(x21, s21);
-  __ Fcvtns(x22, s22);
-  __ Fcvtns(x23, s23);
-  __ Fcvtns(x24, d24);
-  __ Fcvtns(x25, d25);
-  __ Fcvtns(x26, d26);
-  __ Fcvtns(x27, d27);
-  __ Fcvtns(x28, d28);
-  __ Fcvtns(x29, d29);
-  __ Fcvtns(x30, d30);
-  END();
-
-  if (CAN_RUN()) {
-    RUN();
-
-    ASSERT_EQUAL_64(1, x0);
-    ASSERT_EQUAL_64(1, x1);
-    ASSERT_EQUAL_64(2, x2);
-    ASSERT_EQUAL_64(0xfffffffe, x3);
-    ASSERT_EQUAL_64(0x7fffffff, x4);
-    ASSERT_EQUAL_64(0x80000000, x5);
-    ASSERT_EQUAL_64(0x7fffff80, x6);
-    ASSERT_EQUAL_64(0x80000080, x7);
-    ASSERT_EQUAL_64(1, x8);
-    ASSERT_EQUAL_64(1, x9);
-    ASSERT_EQUAL_64(2, x10);
-    ASSERT_EQUAL_64(0xfffffffe, x11);
-    ASSERT_EQUAL_64(0x7fffffff, x12);
-    ASSERT_EQUAL_64(0x80000000, x13);
-    ASSERT_EQUAL_64(0x7ffffffe, x14);
-    ASSERT_EQUAL_64(0x80000001, x15);
-    ASSERT_EQUAL_64(1, x17);
-    ASSERT_EQUAL_64(2, x18);
-    ASSERT_EQUAL_64(0xfffffffffffffffe, x19);
-    ASSERT_EQUAL_64(0x7fffffffffffffff, x20);
-    ASSERT_EQUAL_64(0x8000000000000000, x21);
-    ASSERT_EQUAL_64(0x7fffff8000000000, x22);
-    ASSERT_EQUAL_64(0x8000008000000000, x23);
-    ASSERT_EQUAL_64(1, x24);
-    ASSERT_EQUAL_64(2, x25);
-    ASSERT_EQUAL_64(0xfffffffffffffffe, x26);
-    ASSERT_EQUAL_64(0x7fffffffffffffff, x27);
-    ASSERT_EQUAL_64(0x8000000000000000, x28);
-    ASSERT_EQUAL_64(0x7ffffffffffffc00, x29);
-    ASSERT_EQUAL_64(0x8000000000000400, x30);
-  }
+  FcvtHelper(&MacroAssembler::Fcvtns, inputs_s, expected_w, kWRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtns, inputs_d, expected_w, kWRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtns, inputs_s, expected_x, kXRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtns, inputs_d, expected_x, kXRegSize);
 }
 
-
 TEST(fcvtnu) {
-  SETUP_WITH_FEATURES(CPUFeatures::kFP);
+  float inputs_s[] = {1.0, 1.1, 1.5, -1.5, 0x100000000};
+  double inputs_d[] = {1.0, 1.1, 1.5, -1.5, 0x100000000};
+  uint64_t expected_w[] = {1, 1, 2, 0, 0xffffffff};
+  uint64_t expected_x[] = {1, 1, 2, 0, 0x100000000};
 
-  START();
-  __ Fmov(s0, 1.0);
-  __ Fmov(s1, 1.1);
-  __ Fmov(s2, 1.5);
-  __ Fmov(s3, -1.5);
-  __ Fmov(s4, kFP32PositiveInfinity);
-  __ Fmov(s5, kFP32NegativeInfinity);
-  __ Fmov(s6, 0xffffff00);  // Largest float < UINT32_MAX.
-  __ Fmov(d8, 1.0);
-  __ Fmov(d9, 1.1);
-  __ Fmov(d10, 1.5);
-  __ Fmov(d11, -1.5);
-  __ Fmov(d12, kFP64PositiveInfinity);
-  __ Fmov(d13, kFP64NegativeInfinity);
-  __ Fmov(d14, 0xfffffffe);
-  __ Fmov(s16, 1.0);
-  __ Fmov(s17, 1.1);
-  __ Fmov(s18, 1.5);
-  __ Fmov(s19, -1.5);
-  __ Fmov(s20, kFP32PositiveInfinity);
-  __ Fmov(s21, kFP32NegativeInfinity);
-  __ Fmov(s22, 0xffffff0000000000);  // Largest float < UINT64_MAX.
-  __ Fmov(d24, 1.1);
-  __ Fmov(d25, 1.5);
-  __ Fmov(d26, -1.5);
-  __ Fmov(d27, kFP64PositiveInfinity);
-  __ Fmov(d28, kFP64NegativeInfinity);
-  __ Fmov(d29, 0xfffffffffffff800);  // Largest double < UINT64_MAX.
-  __ Fmov(s30, 0x100000000);
-
-  __ Fcvtnu(w0, s0);
-  __ Fcvtnu(w1, s1);
-  __ Fcvtnu(w2, s2);
-  __ Fcvtnu(w3, s3);
-  __ Fcvtnu(w4, s4);
-  __ Fcvtnu(w5, s5);
-  __ Fcvtnu(w6, s6);
-  __ Fcvtnu(w8, d8);
-  __ Fcvtnu(w9, d9);
-  __ Fcvtnu(w10, d10);
-  __ Fcvtnu(w11, d11);
-  __ Fcvtnu(w12, d12);
-  __ Fcvtnu(w13, d13);
-  __ Fcvtnu(w14, d14);
-  __ Fcvtnu(w15, d15);
-  __ Fcvtnu(x16, s16);
-  __ Fcvtnu(x17, s17);
-  __ Fcvtnu(x18, s18);
-  __ Fcvtnu(x19, s19);
-  __ Fcvtnu(x20, s20);
-  __ Fcvtnu(x21, s21);
-  __ Fcvtnu(x22, s22);
-  __ Fcvtnu(x24, d24);
-  __ Fcvtnu(x25, d25);
-  __ Fcvtnu(x26, d26);
-  __ Fcvtnu(x27, d27);
-  __ Fcvtnu(x28, d28);
-  __ Fcvtnu(x29, d29);
-  __ Fcvtnu(w30, s30);
-  END();
-
-  if (CAN_RUN()) {
-    RUN();
-
-    ASSERT_EQUAL_64(1, x0);
-    ASSERT_EQUAL_64(1, x1);
-    ASSERT_EQUAL_64(2, x2);
-    ASSERT_EQUAL_64(0, x3);
-    ASSERT_EQUAL_64(0xffffffff, x4);
-    ASSERT_EQUAL_64(0, x5);
-    ASSERT_EQUAL_64(0xffffff00, x6);
-    ASSERT_EQUAL_64(1, x8);
-    ASSERT_EQUAL_64(1, x9);
-    ASSERT_EQUAL_64(2, x10);
-    ASSERT_EQUAL_64(0, x11);
-    ASSERT_EQUAL_64(0xffffffff, x12);
-    ASSERT_EQUAL_64(0, x13);
-    ASSERT_EQUAL_64(0xfffffffe, x14);
-    ASSERT_EQUAL_64(1, x16);
-    ASSERT_EQUAL_64(1, x17);
-    ASSERT_EQUAL_64(2, x18);
-    ASSERT_EQUAL_64(0, x19);
-    ASSERT_EQUAL_64(0xffffffffffffffff, x20);
-    ASSERT_EQUAL_64(0, x21);
-    ASSERT_EQUAL_64(0xffffff0000000000, x22);
-    ASSERT_EQUAL_64(1, x24);
-    ASSERT_EQUAL_64(2, x25);
-    ASSERT_EQUAL_64(0, x26);
-    ASSERT_EQUAL_64(0xffffffffffffffff, x27);
-    ASSERT_EQUAL_64(0, x28);
-    ASSERT_EQUAL_64(0xfffffffffffff800, x29);
-    ASSERT_EQUAL_64(0xffffffff, x30);
-  }
+  FcvtHelper(&MacroAssembler::Fcvtnu, inputs_s, expected_w, kWRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtnu, inputs_d, expected_w, kWRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtnu, inputs_s, expected_x, kXRegSize);
+  FcvtHelper(&MacroAssembler::Fcvtnu, inputs_d, expected_x, kXRegSize);
 }
 
-
 TEST(fcvtzs) {
-  SETUP_WITH_FEATURES(CPUFeatures::kFP);
+  float inputs_s[] = {1.0, 1.1, 1.5, -1.5};
+  double inputs_d[] = {1.0, 1.1, 1.5, -1.5};
+  uint64_t expected_w[] = {1, 1, 1, 0xffffffff};
+  uint64_t expected_x[] = {1, 1, 1, 0xffffffffffffffff};
 
-  START();
-  __ Fmov(s0, 1.0);
-  __ Fmov(s1, 1.1);
-  __ Fmov(s2, 1.5);
-  __ Fmov(s3, -1.5);
-  __ Fmov(s4, kFP32PositiveInfinity);
-  __ Fmov(s5, kFP32NegativeInfinity);
-  __ Fmov(s6, 0x7fffff80);  // Largest float < INT32_MAX.
-  __ Fneg(s7, s6);          // Smallest float > INT32_MIN.
-  __ Fmov(d8, 1.0);
-  __ Fmov(d9, 1.1);
-  __ Fmov(d10, 1.5);
-  __ Fmov(d11, -1.5);
-  __ Fmov(d12, kFP64PositiveInfinity);
-  __ Fmov(d13, kFP64NegativeInfinity);
-  __ Fmov(d14, kWMaxInt - 1);
-  __ Fmov(d15, kWMinInt + 1);
-  __ Fmov(s17, 1.1);
-  __ Fmov(s18, 1.5);
-  __ Fmov(s19, -1.5);
-  __ Fmov(s20, kFP32PositiveInfinity);
-  __ Fmov(s21, kFP32NegativeInfinity);
-  __ Fmov(s22, 0x7fffff8000000000);  // Largest float < INT64_MAX.
-  __ Fneg(s23, s22);                 // Smallest float > INT64_MIN.
-  __ Fmov(d24, 1.1);
-  __ Fmov(d25, 1.5);
-  __ Fmov(d26, -1.5);
-  __ Fmov(d27, kFP64PositiveInfinity);
-  __ Fmov(d28, kFP64NegativeInfinity);
-  __ Fmov(d29, 0x7ffffffffffffc00);  // Largest double < INT64_MAX.
-  __ Fneg(d30, d29);                 // Smallest double > INT64_MIN.
+  FcvtHelper<FcvtFn3>(&MacroAssembler::Fcvtzs, inputs_s, expected_w, kWRegSize);
+  FcvtHelper<FcvtFn3>(&MacroAssembler::Fcvtzs, inputs_d, expected_w, kWRegSize);
+  FcvtHelper<FcvtFn3>(&MacroAssembler::Fcvtzs, inputs_s, expected_x, kXRegSize);
+  FcvtHelper<FcvtFn3>(&MacroAssembler::Fcvtzs, inputs_d, expected_x, kXRegSize);
+}
 
-  __ Fcvtzs(w0, s0);
-  __ Fcvtzs(w1, s1);
-  __ Fcvtzs(w2, s2);
-  __ Fcvtzs(w3, s3);
-  __ Fcvtzs(w4, s4);
-  __ Fcvtzs(w5, s5);
-  __ Fcvtzs(w6, s6);
-  __ Fcvtzs(w7, s7);
-  __ Fcvtzs(w8, d8);
-  __ Fcvtzs(w9, d9);
-  __ Fcvtzs(w10, d10);
-  __ Fcvtzs(w11, d11);
-  __ Fcvtzs(w12, d12);
-  __ Fcvtzs(w13, d13);
-  __ Fcvtzs(w14, d14);
-  __ Fcvtzs(w15, d15);
-  __ Fcvtzs(x17, s17);
-  __ Fcvtzs(x18, s18);
-  __ Fcvtzs(x19, s19);
-  __ Fcvtzs(x20, s20);
-  __ Fcvtzs(x21, s21);
-  __ Fcvtzs(x22, s22);
-  __ Fcvtzs(x23, s23);
-  __ Fcvtzs(x24, d24);
-  __ Fcvtzs(x25, d25);
-  __ Fcvtzs(x26, d26);
-  __ Fcvtzs(x27, d27);
-  __ Fcvtzs(x28, d28);
-  __ Fcvtzs(x29, d29);
-  __ Fcvtzs(x30, d30);
-  END();
+TEST(fcvtzu) {
+  float inputs_s[] = {1.0, 1.1, 1.5, -1.5};
+  double inputs_d[] = {1.0, 1.1, 1.5, -1.5};
+  uint64_t expected_w[] = {1, 1, 1, 0};
+  uint64_t expected_x[] = {1, 1, 1, 0};
 
-  if (CAN_RUN()) {
-    RUN();
-
-    ASSERT_EQUAL_64(1, x0);
-    ASSERT_EQUAL_64(1, x1);
-    ASSERT_EQUAL_64(1, x2);
-    ASSERT_EQUAL_64(0xffffffff, x3);
-    ASSERT_EQUAL_64(0x7fffffff, x4);
-    ASSERT_EQUAL_64(0x80000000, x5);
-    ASSERT_EQUAL_64(0x7fffff80, x6);
-    ASSERT_EQUAL_64(0x80000080, x7);
-    ASSERT_EQUAL_64(1, x8);
-    ASSERT_EQUAL_64(1, x9);
-    ASSERT_EQUAL_64(1, x10);
-    ASSERT_EQUAL_64(0xffffffff, x11);
-    ASSERT_EQUAL_64(0x7fffffff, x12);
-    ASSERT_EQUAL_64(0x80000000, x13);
-    ASSERT_EQUAL_64(0x7ffffffe, x14);
-    ASSERT_EQUAL_64(0x80000001, x15);
-    ASSERT_EQUAL_64(1, x17);
-    ASSERT_EQUAL_64(1, x18);
-    ASSERT_EQUAL_64(0xffffffffffffffff, x19);
-    ASSERT_EQUAL_64(0x7fffffffffffffff, x20);
-    ASSERT_EQUAL_64(0x8000000000000000, x21);
-    ASSERT_EQUAL_64(0x7fffff8000000000, x22);
-    ASSERT_EQUAL_64(0x8000008000000000, x23);
-    ASSERT_EQUAL_64(1, x24);
-    ASSERT_EQUAL_64(1, x25);
-    ASSERT_EQUAL_64(0xffffffffffffffff, x26);
-    ASSERT_EQUAL_64(0x7fffffffffffffff, x27);
-    ASSERT_EQUAL_64(0x8000000000000000, x28);
-    ASSERT_EQUAL_64(0x7ffffffffffffc00, x29);
-    ASSERT_EQUAL_64(0x8000000000000400, x30);
-  }
+  FcvtHelper<FcvtFn3>(&MacroAssembler::Fcvtzu, inputs_s, expected_w, kWRegSize);
+  FcvtHelper<FcvtFn3>(&MacroAssembler::Fcvtzu, inputs_d, expected_w, kWRegSize);
+  FcvtHelper<FcvtFn3>(&MacroAssembler::Fcvtzu, inputs_s, expected_x, kXRegSize);
+  FcvtHelper<FcvtFn3>(&MacroAssembler::Fcvtzu, inputs_d, expected_x, kXRegSize);
 }
 
 void FjcvtzsHelper(uint64_t value, uint64_t expected, uint32_t expected_z) {
@@ -4490,107 +4049,6 @@
   }
 }
 
-TEST(fcvtzu) {
-  SETUP_WITH_FEATURES(CPUFeatures::kFP);
-
-  START();
-  __ Fmov(s0, 1.0);
-  __ Fmov(s1, 1.1);
-  __ Fmov(s2, 1.5);
-  __ Fmov(s3, -1.5);
-  __ Fmov(s4, kFP32PositiveInfinity);
-  __ Fmov(s5, kFP32NegativeInfinity);
-  __ Fmov(s6, 0x7fffff80);  // Largest float < INT32_MAX.
-  __ Fneg(s7, s6);          // Smallest float > INT32_MIN.
-  __ Fmov(d8, 1.0);
-  __ Fmov(d9, 1.1);
-  __ Fmov(d10, 1.5);
-  __ Fmov(d11, -1.5);
-  __ Fmov(d12, kFP64PositiveInfinity);
-  __ Fmov(d13, kFP64NegativeInfinity);
-  __ Fmov(d14, kWMaxInt - 1);
-  __ Fmov(d15, kWMinInt + 1);
-  __ Fmov(s17, 1.1);
-  __ Fmov(s18, 1.5);
-  __ Fmov(s19, -1.5);
-  __ Fmov(s20, kFP32PositiveInfinity);
-  __ Fmov(s21, kFP32NegativeInfinity);
-  __ Fmov(s22, 0x7fffff8000000000);  // Largest float < INT64_MAX.
-  __ Fneg(s23, s22);                 // Smallest float > INT64_MIN.
-  __ Fmov(d24, 1.1);
-  __ Fmov(d25, 1.5);
-  __ Fmov(d26, -1.5);
-  __ Fmov(d27, kFP64PositiveInfinity);
-  __ Fmov(d28, kFP64NegativeInfinity);
-  __ Fmov(d29, 0x7ffffffffffffc00);  // Largest double < INT64_MAX.
-  __ Fneg(d30, d29);                 // Smallest double > INT64_MIN.
-
-  __ Fcvtzu(w0, s0);
-  __ Fcvtzu(w1, s1);
-  __ Fcvtzu(w2, s2);
-  __ Fcvtzu(w3, s3);
-  __ Fcvtzu(w4, s4);
-  __ Fcvtzu(w5, s5);
-  __ Fcvtzu(w6, s6);
-  __ Fcvtzu(w7, s7);
-  __ Fcvtzu(w8, d8);
-  __ Fcvtzu(w9, d9);
-  __ Fcvtzu(w10, d10);
-  __ Fcvtzu(w11, d11);
-  __ Fcvtzu(w12, d12);
-  __ Fcvtzu(w13, d13);
-  __ Fcvtzu(w14, d14);
-  __ Fcvtzu(x17, s17);
-  __ Fcvtzu(x18, s18);
-  __ Fcvtzu(x19, s19);
-  __ Fcvtzu(x20, s20);
-  __ Fcvtzu(x21, s21);
-  __ Fcvtzu(x22, s22);
-  __ Fcvtzu(x23, s23);
-  __ Fcvtzu(x24, d24);
-  __ Fcvtzu(x25, d25);
-  __ Fcvtzu(x26, d26);
-  __ Fcvtzu(x27, d27);
-  __ Fcvtzu(x28, d28);
-  __ Fcvtzu(x29, d29);
-  __ Fcvtzu(x30, d30);
-  END();
-
-  if (CAN_RUN()) {
-    RUN();
-
-    ASSERT_EQUAL_64(1, x0);
-    ASSERT_EQUAL_64(1, x1);
-    ASSERT_EQUAL_64(1, x2);
-    ASSERT_EQUAL_64(0, x3);
-    ASSERT_EQUAL_64(0xffffffff, x4);
-    ASSERT_EQUAL_64(0, x5);
-    ASSERT_EQUAL_64(0x7fffff80, x6);
-    ASSERT_EQUAL_64(0, x7);
-    ASSERT_EQUAL_64(1, x8);
-    ASSERT_EQUAL_64(1, x9);
-    ASSERT_EQUAL_64(1, x10);
-    ASSERT_EQUAL_64(0, x11);
-    ASSERT_EQUAL_64(0xffffffff, x12);
-    ASSERT_EQUAL_64(0, x13);
-    ASSERT_EQUAL_64(0x7ffffffe, x14);
-    ASSERT_EQUAL_64(1, x17);
-    ASSERT_EQUAL_64(1, x18);
-    ASSERT_EQUAL_64(0, x19);
-    ASSERT_EQUAL_64(0xffffffffffffffff, x20);
-    ASSERT_EQUAL_64(0, x21);
-    ASSERT_EQUAL_64(0x7fffff8000000000, x22);
-    ASSERT_EQUAL_64(0, x23);
-    ASSERT_EQUAL_64(1, x24);
-    ASSERT_EQUAL_64(1, x25);
-    ASSERT_EQUAL_64(0, x26);
-    ASSERT_EQUAL_64(0xffffffffffffffff, x27);
-    ASSERT_EQUAL_64(0, x28);
-    ASSERT_EQUAL_64(0x7ffffffffffffc00, x29);
-    ASSERT_EQUAL_64(0, x30);
-  }
-}
-
 // Test that scvtf and ucvtf can convert the 64-bit input into the expected
 // value. All possible values of 'fbits' are tested. The expected value is
 // modified accordingly in each case.
@@ -4692,7 +4150,7 @@
 
 TEST(scvtf_ucvtf_double) {
   // Simple conversions of positive numbers which require no rounding; the
-  // results should not depened on the rounding mode, and ucvtf and scvtf should
+  // results should not depend on the rounding mode, and ucvtf and scvtf should
   // produce the same result.
   TestUScvtfHelper(0x0000000000000000, 0x0000000000000000, 0x0000000000000000);
   TestUScvtfHelper(0x0000000000000001, 0x3ff0000000000000, 0x3ff0000000000000);
@@ -4847,7 +4305,7 @@
 
 TEST(scvtf_ucvtf_float) {
   // Simple conversions of positive numbers which require no rounding; the
-  // results should not depened on the rounding mode, and ucvtf and scvtf should
+  // results should not depend on the rounding mode, and ucvtf and scvtf should
   // produce the same result.
   TestUScvtf32Helper(0x0000000000000000, 0x00000000, 0x00000000);
   TestUScvtf32Helper(0x0000000000000001, 0x3f800000, 0x3f800000);
diff --git a/test/aarch64/test-assembler-neon-aarch64.cc b/test/aarch64/test-assembler-neon-aarch64.cc
index b1cf516..2155db4 100644
--- a/test/aarch64/test-assembler-neon-aarch64.cc
+++ b/test/aarch64/test-assembler-neon-aarch64.cc
@@ -24,22 +24,21 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-#include <sys/mman.h>
-
 #include <cfloat>
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
+#include <sys/mman.h>
 
 #include "test-runner.h"
 #include "test-utils.h"
-#include "aarch64/test-utils-aarch64.h"
 
 #include "aarch64/cpu-aarch64.h"
 #include "aarch64/disasm-aarch64.h"
 #include "aarch64/macro-assembler-aarch64.h"
 #include "aarch64/simulator-aarch64.h"
+#include "aarch64/test-utils-aarch64.h"
 #include "test-assembler-aarch64.h"
 
 namespace vixl {
@@ -5977,6 +5976,44 @@
   }
 }
 
+TEST(neon_2regmisc_xtn_regression_test) {
+  SETUP_WITH_FEATURES(CPUFeatures::kNEON);
+
+  START();
+
+  __ Movi(v0.V2D(), 0x5555555555555555, 0x5555555555555555);
+  __ Movi(v1.V2D(), 0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa);
+  __ Movi(v2.V2D(), 0x5555555555555555, 0x5555555555555555);
+  __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa);
+  __ Movi(v4.V2D(), 0x5555555555555555, 0x5555555555555555);
+  __ Movi(v5.V2D(), 0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa);
+  __ Movi(v6.V2D(), 0x5555555555555555, 0x5555555555555555);
+  __ Movi(v7.V2D(), 0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa);
+
+  __ Xtn(v0.V2S(), v0.V2D());
+  __ Xtn2(v1.V4S(), v1.V2D());
+  __ Sqxtn(v2.V2S(), v2.V2D());
+  __ Sqxtn2(v3.V4S(), v3.V2D());
+  __ Uqxtn(v4.V2S(), v4.V2D());
+  __ Uqxtn2(v5.V4S(), v5.V2D());
+  __ Sqxtun(v6.V2S(), v6.V2D());
+  __ Sqxtun2(v7.V4S(), v7.V2D());
+
+  END();
+
+  if (CAN_RUN()) {
+    RUN();
+    ASSERT_EQUAL_128(0x0000000000000000, 0x5555555555555555, q0);
+    ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa, q1);
+    ASSERT_EQUAL_128(0x0000000000000000, 0x7fffffff7fffffff, q2);
+    ASSERT_EQUAL_128(0x8000000080000000, 0xaaaaaaaaaaaaaaaa, q3);
+    ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q4);
+    ASSERT_EQUAL_128(0xffffffffffffffff, 0xaaaaaaaaaaaaaaaa, q5);
+    ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q6);
+    ASSERT_EQUAL_128(0x0000000000000000, 0xaaaaaaaaaaaaaaaa, q7);
+  }
+}
+
 TEST(neon_3same_and) {
   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
 
@@ -10938,8 +10975,26 @@
   }
 }
 
+TEST(neon_pmull_regression_test) {
+  SETUP_WITH_FEATURES(CPUFeatures::kNEON);
+
+  START();
+  __ Movi(v0.V2D(), 0xdecafc0ffee);
+  __ Pmull(v0.V8H(), v0.V8B(), v0.V8B());
+
+  __ Movi(v1.V2D(), 0xaaaaaaaa55555555);
+  __ Pmull2(v1.V8H(), v1.V16B(), v1.V16B());
+  END();
+
+  if (CAN_RUN()) {
+    RUN();
+    ASSERT_EQUAL_128(0x0000000000515450, 0x4455500055555454, q0);
+    ASSERT_EQUAL_128(0x4444444444444444, 0x1111111111111111, q1);
+  }
+}
+
 TEST(zero_high_b) {
-  SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON, CPUFeatures::kRDM);
+  SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON);
   START();
 
   __ Mov(x0, 0x55aa42ffaa42ff55);
@@ -10959,7 +11014,7 @@
   __ Ror(x0, x0, 8);
 
   {
-    ExactAssemblyScope scope(&masm, 81 * kInstructionSize);
+    ExactAssemblyScope scope(&masm, 75 * kInstructionSize);
     __ movi(q9.V16B(), 0x55);
     __ dci(0x5e010409);  // mov b9, v0.b[0]
     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
@@ -10977,14 +11032,6 @@
     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
 
     __ movi(q9.V16B(), 0x55);
-    __ dci(0x7e008429);  // sqrdmlah b9, b1, b0
-    __ orr(q30.V16B(), q30.V16B(), q9.V16B());
-
-    __ movi(q9.V16B(), 0x55);
-    __ dci(0x7e008c29);  // sqrdmlsh b9, b1, b0
-    __ orr(q30.V16B(), q30.V16B(), q9.V16B());
-
-    __ movi(q9.V16B(), 0x55);
     __ dci(0x5e205c29);  // sqrshl b9, b1, b0
     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
 
@@ -11784,10 +11831,7 @@
 }
 
 TEST(zero_high_d) {
-  SETUP_WITH_FEATURES(CPUFeatures::kSVE,
-                      CPUFeatures::kNEON,
-                      CPUFeatures::kFP,
-                      CPUFeatures::kRDM);
+  SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON, CPUFeatures::kFP);
   START();
 
   __ Mov(x0, 0x55aa42ffaa42ff55);
@@ -11807,7 +11851,7 @@
   __ Ror(x0, x0, 8);
 
   {
-    ExactAssemblyScope scope(&masm, 291 * kInstructionSize);
+    ExactAssemblyScope scope(&masm, 285 * kInstructionSize);
     __ movi(q9.V16B(), 0x55);
     __ dci(0x5ee0b809);  // abs d9, d0
     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
@@ -12077,14 +12121,6 @@
     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
 
     __ movi(q9.V16B(), 0x55);
-    __ dci(0x7ec08429);  // sqrdmlah d9, d1, d0
-    __ orr(q30.V16B(), q30.V16B(), q9.V16B());
-
-    __ movi(q9.V16B(), 0x55);
-    __ dci(0x7ec08c29);  // sqrdmlsh d9, d1, d0
-    __ orr(q30.V16B(), q30.V16B(), q9.V16B());
-
-    __ movi(q9.V16B(), 0x55);
     __ dci(0x5ee05c29);  // sqrshl d9, d1, d0
     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
 
diff --git a/test/aarch64/test-assembler-sve-aarch64.cc b/test/aarch64/test-assembler-sve-aarch64.cc
index 053d5c8..cc49d5b 100644
--- a/test/aarch64/test-assembler-sve-aarch64.cc
+++ b/test/aarch64/test-assembler-sve-aarch64.cc
@@ -24,24 +24,23 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-#include <sys/mman.h>
-#include <unistd.h>
-
 #include <cfloat>
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include <functional>
+#include <sys/mman.h>
+#include <unistd.h>
 
 #include "test-runner.h"
 #include "test-utils.h"
-#include "aarch64/test-utils-aarch64.h"
 
 #include "aarch64/cpu-aarch64.h"
 #include "aarch64/disasm-aarch64.h"
 #include "aarch64/macro-assembler-aarch64.h"
 #include "aarch64/simulator-aarch64.h"
+#include "aarch64/test-utils-aarch64.h"
 #include "test-assembler-aarch64.h"
 
 #define TEST_SVE(name) TEST_SVE_INNER("ASM", name)
@@ -287,7 +286,7 @@
                           CPUFeatures::kSVE);
   START();
 
-  // The Simulator has two mechansisms for writing V registers:
+  // The Simulator has two mechanisms for writing V registers:
   //  - Write*Register, calling through to SimRegisterBase::Write.
   //  - LogicVRegister::ClearForWrite followed by one or more lane updates.
   // Try to cover both variants.
@@ -4483,7 +4482,7 @@
   SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
   START();
 
-  ZRegister src_a = z31.WithLaneSize(lane_size_in_bits);
+  ZRegister src_a = z30.WithLaneSize(lane_size_in_bits);
   ZRegister src_b = z27.WithLaneSize(lane_size_in_bits);
   InsrHelper(&masm, src_a, zn_inputs);
   InsrHelper(&masm, src_b, zm_inputs);
@@ -7244,7 +7243,7 @@
   uint8_t* data = new uint8_t[data_size];
   memset(data, 0, data_size);
 
-  // Set the base half-way through the buffer so we can use negative indeces.
+  // Set the base half-way through the buffer so we can use negative indices.
   __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
 
   __ Index(z14.VnB(), 1, -3);
@@ -7416,7 +7415,7 @@
   uint8_t* data = new uint8_t[data_size];
   memset(data, 0, data_size);
 
-  // Set the base half-way through the buffer so we can use negative indeces.
+  // Set the base half-way through the buffer so we can use negative indices.
   __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
 
   __ Index(z10.VnB(), -4, 11);
@@ -7589,7 +7588,7 @@
   uint8_t* data = new uint8_t[data_size];
   memset(data, 0, data_size);
 
-  // Set the base half-way through the buffer so we can use negative indeces.
+  // Set the base half-way through the buffer so we can use negative indices.
   __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
 
   // We can test ld3 by comparing the values loaded with the values stored.
@@ -7795,7 +7794,7 @@
   uint8_t* data = new uint8_t[data_size];
   memset(data, 0, data_size);
 
-  // Set the base half-way through the buffer so we can use negative indeces.
+  // Set the base half-way through the buffer so we can use negative indices.
   __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
 
   // We can test ld3 by comparing the values loaded with the values stored.
@@ -8009,7 +8008,7 @@
   uint8_t* data = new uint8_t[data_size];
   memset(data, 0, data_size);
 
-  // Set the base half-way through the buffer so we can use negative indeces.
+  // Set the base half-way through the buffer so we can use negative indices.
   __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
 
   // We can test ld4 by comparing the values loaded with the values stored.
@@ -8259,7 +8258,7 @@
   uint8_t* data = new uint8_t[data_size];
   memset(data, 0, data_size);
 
-  // Set the base half-way through the buffer so we can use negative indeces.
+  // Set the base half-way through the buffer so we can use negative indices.
   __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
 
   // We can test ld4 by comparing the values loaded with the values stored.
@@ -8705,7 +8704,7 @@
                                 uint64_t* addresses = nullptr,
                                 uint64_t* max_address = nullptr) {
   // Use a fixed seed for nrand48() so that test runs are reproducible.
-  unsigned short seed[3] = {1, 2, 3};  // NOLINT(runtime/int)
+  unsigned short seed[3] = {1, 2, 3};  // NOLINT(google-runtime-int)
 
   // Fill a buffer with arbitrary data.
   for (size_t i = 0; i < buffer_size; i++) {
@@ -15382,7 +15381,7 @@
   PRegisterWithLaneSize pg_all_active = p0.WithLaneSize(lane_size_in_bits);
   __ Ptrue(pg_all_active);
 
-  // Test floating-point conversions with all lanes actived.
+  // Test floating-point conversions with all lanes activated.
   (masm.*macro_m)(zd_all_active.WithLaneSize(dst_type_size_in_bits),
                   pg_all_active.Merging(),
                   zn.WithLaneSize(src_type_size_in_bits));
@@ -15936,7 +15935,7 @@
   PRegisterWithLaneSize pg_all_active = p0.WithLaneSize(lane_size_in_bits);
   __ Ptrue(pg_all_active);
 
-  // Test integer conversions with all lanes actived.
+  // Test integer conversions with all lanes activated.
   __ Scvtf(zd_scvtf_all_active.WithLaneSize(dst_type_size_in_bits),
            pg_all_active.Merging(),
            zn.WithLaneSize(src_type_size_in_bits));
@@ -16006,7 +16005,7 @@
   // clang-format off
   CvtfTestDataSet data_set_1[] = {
     // Simple conversions of positive numbers which require no rounding; the
-    // results should not depened on the rounding mode, and ucvtf and scvtf should
+    // results should not depend on the rounding mode, and ucvtf and scvtf should
     // produce the same result.
     {0x0000, 0x0000, 0x0000},
     {0x0001, 0x3c00, 0x3c00},
@@ -16062,7 +16061,7 @@
   int src_lane_size = kSRegSize;
 
   // Simple conversions of positive numbers which require no rounding; the
-  // results should not depened on the rounding mode, and ucvtf and scvtf should
+  // results should not depend on the rounding mode, and ucvtf and scvtf should
   // produce the same result.
   CvtfTestDataSet data_set_1[] = {
     {0x00000000, 0x00000000, 0x00000000},
@@ -16118,7 +16117,7 @@
   int src_lane_size = kDRegSize;
 
   // Simple conversions of positive numbers which require no rounding; the
-  // results should not depened on the rounding mode, and ucvtf and scvtf should
+  // results should not depend on the rounding mode, and ucvtf and scvtf should
   // produce the same result.
   CvtfTestDataSet data_set_1[] = {
     {0x0000000000000000, 0x00000000, 0x00000000},
@@ -16178,7 +16177,7 @@
   int src_lane_size = kDRegSize;
 
   // Simple conversions of positive numbers which require no rounding; the
-  // results should not depened on the rounding mode, and ucvtf and scvtf should
+  // results should not depend on the rounding mode, and ucvtf and scvtf should
   // produce the same result.
   CvtfTestDataSet data_set_1[] = {
     {0x0000000000000000, 0x0000000000000000, 0x0000000000000000},
@@ -16237,7 +16236,7 @@
   int src_lane_size = kSRegSize;
 
   // Simple conversions of positive numbers which require no rounding; the
-  // results should not depened on the rounding mode, and ucvtf and scvtf should
+  // results should not depend on the rounding mode, and ucvtf and scvtf should
   // produce the same result.
   CvtfTestDataSet data_set_1[] = {
     {0x00000000, 0x0000000000000000, 0x0000000000000000},
@@ -16274,9 +16273,7 @@
 }
 
 TEST_SVE(sve_fadda) {
-  SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
-                          CPUFeatures::kFP,
-                          CPUFeatures::kFPHalf);
+  SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kFP);
   START();
 
   __ Ptrue(p0.VnB());
@@ -18122,7 +18119,7 @@
                               macro_m,
                               macro_z);
 
-  // The complementary of above precicate to get full input coverage.
+  // The complementary of above predicate to get full input coverage.
   uint64_t pg_c_inputs[] = {0x5aa55aa55aa55aa5,
                             0x5aa55aa55aa55aa5,
                             0x5aa55aa55aa55aa5,
@@ -19732,10 +19729,943 @@
   }
 }
 
+TEST_SVE(neon_ins_zero_high_regression_test) {
+  SVE_SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kSVE);
+
+  START();
+  __ Movi(v0.V2D(), 0x0f0e0d0c0b0a0908, 0x0706050403020100);
+
+  // Check that both forms of ins zero bits <VL-1:128>
+  __ Index(z1.VnB(), 0, 1);
+  __ Ins(v1.V16B(), 0, wzr);
+  __ Index(z2.VnB(), 0, 1);
+  __ Ins(v2.V16B(), 3, v2.V16B(), 3);
+  END();
+
+  if (CAN_RUN()) {
+    RUN();
+    ASSERT_EQUAL_SVE(z0, z1);
+    ASSERT_EQUAL_SVE(z0, z2);
+  }
+}
+
+TEST_SVE(neon_fcvt_zero_high_regression_test) {
+  SVE_SETUP_WITH_FEATURES(CPUFeatures::kFP,
+                          CPUFeatures::kNEON,
+                          CPUFeatures::kSVE);
+
+  START();
+  __ Mov(z1.VnD(), 0);
+  __ Mov(z2.VnD(), 0);
+  __ Mov(z3.VnD(), 0);
+  __ Mov(z4.VnD(), 0);
+  __ Mov(z5.VnD(), 0);
+  __ Mov(z6.VnD(), 0);
+  __ Mov(z10.VnD(), 0);
+
+  Label done;
+  // Skip calculations for VL128.
+  __ Rdvl(x0, 1);
+  __ Cmp(x0, 16);
+  __ B(eq, &done);
+
+  __ Movi(v0.V2D(), 0x3ff000003f800000);
+  __ Index(z1.VnB(), 0, 1);
+  __ Index(z2.VnB(), 0, 1);
+  __ Index(z3.VnB(), 0, 1);
+  __ Index(z4.VnB(), 0, 1);
+  __ Index(z5.VnB(), 0, 1);
+  __ Index(z6.VnB(), 0, 1);
+
+  // Test zeroing bits <VL-1:128> for fcvtl, fcvtn and fcvtxn.
+  __ Fcvtl(v1.V2D(), v0.V2S());
+  __ Fcvtl2(v2.V2D(), v0.V4S());
+
+  __ Fcvtn(v3.V2S(), v0.V2D());
+  __ Fcvtn2(v4.V4S(), v0.V2D());
+
+  __ Fcvtxn(v5.V2S(), v0.V2D());
+  __ Fcvtxn2(v6.V4S(), v0.V2D());
+
+  // Set the expected non-zero bits to zero.
+  __ Ext(z1.VnB(), z1.VnB(), z10.VnB(), kDRegSizeInBytes * 2);
+  __ Ext(z2.VnB(), z2.VnB(), z10.VnB(), kDRegSizeInBytes * 2);
+  __ Ext(z3.VnB(), z3.VnB(), z10.VnB(), kSRegSizeInBytes * 2);
+  __ Ext(z4.VnB(), z4.VnB(), z10.VnB(), kSRegSizeInBytes * 4);
+  __ Ext(z5.VnB(), z5.VnB(), z10.VnB(), kSRegSizeInBytes * 2);
+  __ Ext(z6.VnB(), z6.VnB(), z10.VnB(), kSRegSizeInBytes * 4);
+
+  __ Bind(&done);
+  END();
+
+  if (CAN_RUN()) {
+    RUN();
+    ASSERT_EQUAL_SVE(z10, z1);
+    ASSERT_EQUAL_SVE(z10, z2);
+    ASSERT_EQUAL_SVE(z10, z3);
+    ASSERT_EQUAL_SVE(z10, z4);
+    ASSERT_EQUAL_SVE(z10, z5);
+    ASSERT_EQUAL_SVE(z10, z6);
+  }
+}
+
+#define TEST_ZEROING(INST)  \
+  __ Index(z0.VnB(), 0, 1); \
+  __ INST;                  \
+  __ Orr(z10.VnB(), z10.VnB(), z0.VnB());
+
+TEST_SVE(neon_zero_high) {
+  SVE_SETUP_WITH_FEATURES(CPUFeatures::kFP,
+                          CPUFeatures::kNEON,
+                          CPUFeatures::kNEONHalf,
+                          CPUFeatures::kSVE,
+                          CPUFeatures::kFcma,
+                          CPUFeatures::kFHM,
+                          CPUFeatures::kFrintToFixedSizedInt,
+                          CPUFeatures::kDotProduct,
+                          CPUFeatures::kRDM,
+                          CPUFeatures::kI8MM);
+
+  START();
+  __ Mov(z10.VnD(), 0);  // Initialise cumulative result register.
+
+  TEST_ZEROING(Abs(v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Abs(v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Add(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Add(v0.V2S(), v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Addhn2(v0.V16B(), v0.V8H(), v0.V8H()));
+  TEST_ZEROING(Addhn(v0.V4H(), v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Addp(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Addp(v0.V2S(), v0.V2S(), v0.V2S()));
+  TEST_ZEROING(And(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Bic(v0.V8H(), 0, 0));
+  TEST_ZEROING(Bic(v0.V2S(), 255, 0));
+  TEST_ZEROING(Bic(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Bif(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Bit(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Bsl(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Cls(v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Cls(v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Clz(v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Clz(v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Cmeq(v0.V16B(), v0.V16B(), 0));
+  TEST_ZEROING(Cmeq(v0.V2S(), v0.V2S(), 0));
+  TEST_ZEROING(Cmeq(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Cmeq(v0.V2S(), v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Cmge(v0.V16B(), v0.V16B(), 0));
+  TEST_ZEROING(Cmge(v0.V2S(), v0.V2S(), 0));
+  TEST_ZEROING(Cmge(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Cmge(v0.V2S(), v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Cmgt(v0.V16B(), v0.V16B(), 0));
+  TEST_ZEROING(Cmgt(v0.V2S(), v0.V2S(), 0));
+  TEST_ZEROING(Cmgt(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Cmgt(v0.V2S(), v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Cmhi(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Cmhi(v0.V2S(), v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Cmhs(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Cmhs(v0.V2S(), v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Cmle(v0.V16B(), v0.V16B(), 0));
+  TEST_ZEROING(Cmle(v0.V2S(), v0.V2S(), 0));
+  TEST_ZEROING(Cmlt(v0.V16B(), v0.V16B(), 0));
+  TEST_ZEROING(Cmlt(v0.V2S(), v0.V2S(), 0));
+  TEST_ZEROING(Cmtst(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Cmtst(v0.V2S(), v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Cnt(v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Dup(v0.V2S(), w0));
+  TEST_ZEROING(Dup(v0.V8B(), w0));
+  TEST_ZEROING(Dup(v0.V2S(), v0.S(), 0));
+  TEST_ZEROING(Dup(v0.V8B(), v0.B(), 0));
+  TEST_ZEROING(Eor(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Ext(v0.V16B(), v0.V16B(), v0.V16B(), 0));
+  TEST_ZEROING(Ext(v0.V8B(), v0.V8B(), v0.V8B(), 4));
+  TEST_ZEROING(Fabd(v0.V4S(), v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Fabd(v0.V8H(), v0.V8H(), v0.V8H()));
+  TEST_ZEROING(Fabs(v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Fabs(v0.V8H(), v0.V8H()));
+  TEST_ZEROING(Facge(v0.V2S(), v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Facge(v0.V8H(), v0.V8H(), v0.V8H()));
+  TEST_ZEROING(Facgt(v0.V2S(), v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Facgt(v0.V8H(), v0.V8H(), v0.V8H()));
+  TEST_ZEROING(Fadd(v0.V2S(), v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Fadd(v0.V8H(), v0.V8H(), v0.V8H()));
+  TEST_ZEROING(Faddp(v0.V2S(), v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Faddp(v0.V8H(), v0.V8H(), v0.V8H()));
+  TEST_ZEROING(Fcadd(v0.V2S(), v0.V2S(), v0.V2S(), 90));
+  TEST_ZEROING(Fcadd(v0.V8H(), v0.V8H(), v0.V8H(), 90));
+  TEST_ZEROING(Fcmeq(v0.V2S(), v0.V2S(), 0));
+  TEST_ZEROING(Fcmeq(v0.V8H(), v0.V8H(), 0));
+  TEST_ZEROING(Fcmeq(v0.V2S(), v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Fcmeq(v0.V8H(), v0.V8H(), v0.V8H()));
+  TEST_ZEROING(Fcmge(v0.V2S(), v0.V2S(), 0));
+  TEST_ZEROING(Fcmge(v0.V8H(), v0.V8H(), 0));
+  TEST_ZEROING(Fcmge(v0.V2S(), v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Fcmge(v0.V8H(), v0.V8H(), v0.V8H()));
+  TEST_ZEROING(Fcmgt(v0.V2S(), v0.V2S(), 0));
+  TEST_ZEROING(Fcmgt(v0.V8H(), v0.V8H(), 0));
+  TEST_ZEROING(Fcmgt(v0.V2S(), v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Fcmgt(v0.V8H(), v0.V8H(), v0.V8H()));
+  TEST_ZEROING(Fcmla(v0.V4H(), v0.V4H(), v0.H(), 0, 0));
+  TEST_ZEROING(Fcmla(v0.V4S(), v0.V4S(), v0.S(), 0, 0));
+  TEST_ZEROING(Fcmla(v0.V4S(), v0.V4S(), v0.V4S(), 0));
+  TEST_ZEROING(Fcmla(v0.V4H(), v0.V4H(), v0.V4H(), 0));
+  TEST_ZEROING(Fcmle(v0.V2S(), v0.V2S(), 0));
+  TEST_ZEROING(Fcmle(v0.V8H(), v0.V8H(), 0));
+  TEST_ZEROING(Fcmlt(v0.V2S(), v0.V2S(), 0));
+  TEST_ZEROING(Fcmlt(v0.V8H(), v0.V8H(), 0));
+  TEST_ZEROING(Fcvtas(v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Fcvtas(v0.V8H(), v0.V8H()));
+  TEST_ZEROING(Fcvtau(v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Fcvtau(v0.V8H(), v0.V8H()));
+  TEST_ZEROING(Fcvtl2(v0.V4S(), v0.V8H()));
+  TEST_ZEROING(Fcvtl(v0.V2D(), v0.V2S()));
+  TEST_ZEROING(Fcvtms(v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Fcvtms(v0.V8H(), v0.V8H()));
+  TEST_ZEROING(Fcvtmu(v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Fcvtmu(v0.V8H(), v0.V8H()));
+  TEST_ZEROING(Fcvtn2(v0.V8H(), v0.V4S()));
+  TEST_ZEROING(Fcvtn(v0.V2S(), v0.V2D()));
+  TEST_ZEROING(Fcvtns(v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Fcvtns(v0.V8H(), v0.V8H()));
+  TEST_ZEROING(Fcvtnu(v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Fcvtnu(v0.V8H(), v0.V8H()));
+  TEST_ZEROING(Fcvtps(v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Fcvtps(v0.V8H(), v0.V8H()));
+  TEST_ZEROING(Fcvtpu(v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Fcvtpu(v0.V8H(), v0.V8H()));
+  TEST_ZEROING(Fcvtxn(v0.V2S(), v0.V2D()));
+  TEST_ZEROING(Fcvtxn2(v0.V4S(), v0.V2D()));
+  TEST_ZEROING(Fcvtzs(v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Fcvtzs(v0.V8H(), v0.V8H()));
+  TEST_ZEROING(Fcvtzs(v0.V2D(), v0.V2D(), 8));
+  TEST_ZEROING(Fcvtzu(v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Fcvtzu(v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Fcvtzu(v0.V2D(), v0.V2D(), 8));
+  TEST_ZEROING(Fdiv(v0.V2S(), v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Fdiv(v0.V8H(), v0.V8H(), v0.V8H()));
+  TEST_ZEROING(Fmax(v0.V2S(), v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Fmax(v0.V8H(), v0.V8H(), v0.V8H()));
+  TEST_ZEROING(Fmaxnm(v0.V2S(), v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Fmaxnm(v0.V8H(), v0.V8H(), v0.V8H()));
+  TEST_ZEROING(Fmaxnmp(v0.V2S(), v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Fmaxnmp(v0.V8H(), v0.V8H(), v0.V8H()));
+  TEST_ZEROING(Fmaxp(v0.V2S(), v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Fmaxp(v0.V8H(), v0.V8H(), v0.V8H()));
+  TEST_ZEROING(Fmin(v0.V2S(), v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Fmin(v0.V8H(), v0.V8H(), v0.V8H()));
+  TEST_ZEROING(Fminnm(v0.V2S(), v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Fminnm(v0.V8H(), v0.V8H(), v0.V8H()));
+  TEST_ZEROING(Fminnmp(v0.V2S(), v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Fminnmp(v0.V8H(), v0.V8H(), v0.V8H()));
+  TEST_ZEROING(Fminp(v0.V2S(), v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Fminp(v0.V8H(), v0.V8H(), v0.V8H()));
+  TEST_ZEROING(Fmla(v0.V4S(), v0.V4S(), v0.S(), 0));
+  TEST_ZEROING(Fmla(v0.V4H(), v0.V4H(), v0.H(), 2));
+  TEST_ZEROING(Fmla(v0.V4S(), v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Fmla(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Fmlal2(v0.V4S(), v0.V4H(), v0.H(), 0));
+  TEST_ZEROING(Fmlal2(v0.V2S(), v0.V2H(), v0.H(), 2));
+  TEST_ZEROING(Fmlal2(v0.V4S(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Fmlal(v0.V4S(), v0.V4H(), v0.H(), 0));
+  TEST_ZEROING(Fmlal(v0.V2S(), v0.V2H(), v0.H(), 2));
+  TEST_ZEROING(Fmlal(v0.V4S(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Fmls(v0.V4S(), v0.V4S(), v0.S(), 0));
+  TEST_ZEROING(Fmls(v0.V4H(), v0.V4H(), v0.H(), 2));
+  TEST_ZEROING(Fmls(v0.V4S(), v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Fmls(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Fmlsl2(v0.V4S(), v0.V4H(), v0.H(), 0));
+  TEST_ZEROING(Fmlsl2(v0.V2S(), v0.V2H(), v0.H(), 2));
+  TEST_ZEROING(Fmlsl2(v0.V4S(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Fmlsl(v0.V4S(), v0.V4H(), v0.H(), 0));
+  TEST_ZEROING(Fmlsl(v0.V2S(), v0.V2H(), v0.H(), 2));
+  TEST_ZEROING(Fmlsl(v0.V4S(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Fmov(v0.V2D(), 2.0000));
+  TEST_ZEROING(Fmov(v0.V4H(), 2.0000));
+  TEST_ZEROING(Fmov(v0.D(), 1, x1));
+  TEST_ZEROING(Fmul(v0.V4S(), v0.V4S(), v0.S(), 0));
+  TEST_ZEROING(Fmul(v0.V4H(), v0.V4H(), v0.H(), 2));
+  TEST_ZEROING(Fmul(v0.V4S(), v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Fmul(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Fmulx(v0.V4S(), v0.V4S(), v0.S(), 0));
+  TEST_ZEROING(Fmulx(v0.V4H(), v0.V4H(), v0.H(), 2));
+  TEST_ZEROING(Fmulx(v0.V4S(), v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Fmulx(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Fneg(v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Fneg(v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Frecpe(v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Frecpe(v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Frecps(v0.V4S(), v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Frecps(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Frint32x(v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Frint32z(v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Frint64x(v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Frint64z(v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Frinta(v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Frinta(v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Frinti(v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Frinti(v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Frintm(v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Frintm(v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Frintn(v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Frintn(v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Frintp(v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Frintp(v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Frintx(v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Frintx(v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Frintz(v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Frintz(v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Frsqrte(v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Frsqrte(v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Frsqrts(v0.V4S(), v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Frsqrts(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Fsqrt(v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Fsqrt(v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Fsub(v0.V4S(), v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Fsub(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Mov(v0.D(), 0, x0));
+  TEST_ZEROING(Mov(v0.S(), 0, w0));
+  TEST_ZEROING(Mov(v0.H(), 0, w0));
+  TEST_ZEROING(Mov(v0.B(), 0, w0));
+  TEST_ZEROING(Mov(v0.D(), 0, v0.D(), 0));
+  TEST_ZEROING(Mov(v0.S(), 0, v0.S(), 0));
+  TEST_ZEROING(Mov(v0.H(), 0, v0.H(), 0));
+  TEST_ZEROING(Mov(v0.B(), 0, v0.B(), 0));
+  TEST_ZEROING(Mla(v0.V4S(), v0.V4S(), v0.S(), 0));
+  TEST_ZEROING(Mla(v0.V4H(), v0.V4H(), v0.H(), 0));
+  TEST_ZEROING(Mla(v0.V4S(), v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Mla(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Mls(v0.V4S(), v0.V4S(), v0.S(), 0));
+  TEST_ZEROING(Mls(v0.V4H(), v0.V4H(), v0.H(), 0));
+  TEST_ZEROING(Mls(v0.V4S(), v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Mls(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Movi(v0.V2D(), 0xff));
+  TEST_ZEROING(Movi(v0.V2S(), 0xff));
+  TEST_ZEROING(Movi(v0.V4S(), 0x10, LSL, 8));
+  TEST_ZEROING(Movi(v0.V2S(), 0x10, LSL, 8));
+  TEST_ZEROING(Mul(v0.V4S(), v0.V4S(), v0.S(), 0));
+  TEST_ZEROING(Mul(v0.V4H(), v0.V4H(), v0.H(), 0));
+  TEST_ZEROING(Mul(v0.V4S(), v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Mul(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Mvni(v0.V4H(), 0x10, LSL, 8));
+  TEST_ZEROING(Mvni(v0.V4H(), 0x10, LSL, 8));
+  TEST_ZEROING(Neg(v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Neg(v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Mvn(v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Mvn(v0.V8B(), v0.V8B()));
+  TEST_ZEROING(Orn(v0.V8B(), v0.V8B(), v0.V8B()));
+  TEST_ZEROING(Orn(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Orr(v0.V8H(), 0x10, 8));
+  TEST_ZEROING(Orr(v0.V4H(), 0x10, 8));
+  TEST_ZEROING(Mov(v0.V8B(), v0.V8B()));
+  TEST_ZEROING(Mov(v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Pmul(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Pmull(v0.V8H(), v0.V8B(), v0.V8B()));
+  TEST_ZEROING(Pmull2(v0.V8H(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Raddhn2(v0.V16B(), v0.V8H(), v0.V8H()));
+  TEST_ZEROING(Raddhn(v0.V4H(), v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Rbit(v0.V8B(), v0.V8B()));
+  TEST_ZEROING(Rbit(v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Rsubhn2(v0.V16B(), v0.V8H(), v0.V8H()));
+  TEST_ZEROING(Rsubhn(v0.V4H(), v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Saba(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Saba(v0.V2S(), v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Saba(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Sabal2(v0.V8H(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Sabal(v0.V4S(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Sabd(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Sabd(v0.V2S(), v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Sabd(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Sabdl2(v0.V8H(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Sabdl(v0.V4S(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Sadalp(v0.V8H(), v0.V16B()));
+  TEST_ZEROING(Saddl2(v0.V8H(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Saddl(v0.V2D(), v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Saddl(v0.V4S(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Saddw2(v0.V8H(), v0.V8H(), v0.V16B()));
+  TEST_ZEROING(Saddw(v0.V4S(), v0.V4S(), v0.V4H()));
+  TEST_ZEROING(Scvtf(v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Scvtf(v0.V8H(), v0.V8H()));
+  TEST_ZEROING(Scvtf(v0.V2D(), v0.V2D(), 8));
+  TEST_ZEROING(Sdot(v0.V4S(), v0.V16B(), v0.S4B(), 0));
+  TEST_ZEROING(Sdot(v0.V2S(), v0.V8B(), v0.S4B(), 0));
+  TEST_ZEROING(Sdot(v0.V4S(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Sdot(v0.V2S(), v0.V8B(), v0.V8B()));
+  TEST_ZEROING(Shadd(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Shadd(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Shl(v0.V2D(), v0.V2D(), 56));
+  TEST_ZEROING(Shll2(v0.V8H(), v0.V16B(), 8));
+  TEST_ZEROING(Shll(v0.V2D(), v0.V2S(), 32));
+  TEST_ZEROING(Shsub(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Shsub(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Sli(v0.V2D(), v0.V2D(), 56));
+  TEST_ZEROING(Sli(v0.V2S(), v0.V2S(), 16));
+  TEST_ZEROING(Smax(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Smax(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Smaxp(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Smaxp(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Smin(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Smin(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Sminp(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Sminp(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Smlal2(v0.V8H(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Smlal(v0.V2D(), v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Smlal(v0.V2D(), v0.V2S(), v0.S(), 0));
+  TEST_ZEROING(Smlsl2(v0.V8H(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Smlsl(v0.V2D(), v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Smlsl(v0.V2D(), v0.V2S(), v0.S(), 0));
+  TEST_ZEROING(Smull2(v0.V8H(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Smull(v0.V2D(), v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Smull(v0.V2D(), v0.V2S(), v0.S(), 0));
+  TEST_ZEROING(Sqabs(v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Sqabs(v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Sqadd(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Sqadd(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Sqdmlal2(v0.V4S(), v0.V8H(), v0.V8H()));
+  TEST_ZEROING(Sqdmlal(v0.V2D(), v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Sqdmlal(v0.V2D(), v0.V2S(), v0.S(), 0));
+  TEST_ZEROING(Sqdmlsl2(v0.V4S(), v0.V8H(), v0.V8H()));
+  TEST_ZEROING(Sqdmlsl(v0.V2D(), v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Sqdmlsl(v0.V2D(), v0.V2S(), v0.S(), 0));
+  TEST_ZEROING(Sqdmulh(v0.V4S(), v0.V4S(), v0.S(), 0));
+  TEST_ZEROING(Sqdmulh(v0.V4H(), v0.V4H(), v0.H(), 0));
+  TEST_ZEROING(Sqdmulh(v0.V4S(), v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Sqdmulh(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Sqdmull2(v0.V2D(), v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Sqdmull(v0.V4S(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Sqdmull2(v0.V2D(), v0.V4S(), v0.S(), 0));
+  TEST_ZEROING(Sqdmull(v0.V4S(), v0.V4H(), v0.H(), 0));
+  TEST_ZEROING(Sqneg(v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Sqneg(v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Sqrdmlah(v0.V4S(), v0.V4S(), v0.S(), 0));
+  TEST_ZEROING(Sqrdmlah(v0.V4H(), v0.V4H(), v0.H(), 0));
+  TEST_ZEROING(Sqrdmlah(v0.V4S(), v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Sqrdmlah(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Sqrdmlsh(v0.V4S(), v0.V4S(), v0.S(), 0));
+  TEST_ZEROING(Sqrdmlsh(v0.V4H(), v0.V4H(), v0.H(), 0));
+  TEST_ZEROING(Sqrdmlsh(v0.V4S(), v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Sqrdmlsh(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Sqrdmulh(v0.V4S(), v0.V4S(), v0.S(), 0));
+  TEST_ZEROING(Sqrdmulh(v0.V4H(), v0.V4H(), v0.H(), 0));
+  TEST_ZEROING(Sqrdmulh(v0.V4S(), v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Sqrdmulh(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Sqrshl(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Sqrshl(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Sqshl(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Sqshl(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Sqshl(v0.V2D(), v0.V2D(), 56));
+  TEST_ZEROING(Sqshl(v0.V2S(), v0.V2S(), 16));
+  TEST_ZEROING(Sqshlu(v0.V2D(), v0.V2D(), 56));
+  TEST_ZEROING(Sqshlu(v0.V2S(), v0.V2S(), 16));
+  TEST_ZEROING(Sqsub(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Sqsub(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Sqxtn2(v0.V16B(), v0.V8H()));
+  TEST_ZEROING(Sqxtn(v0.V2S(), v0.V2D()));
+  TEST_ZEROING(Sqxtun2(v0.V16B(), v0.V8H()));
+  TEST_ZEROING(Sqxtun(v0.V2S(), v0.V2D()));
+  TEST_ZEROING(Srhadd(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Srhadd(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Sri(v0.V2D(), v0.V2D(), 8));
+  TEST_ZEROING(Sri(v0.V2S(), v0.V2S(), 8));
+  TEST_ZEROING(Srshl(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Srshl(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Srshr(v0.V2D(), v0.V2D(), 8));
+  TEST_ZEROING(Srshr(v0.V2S(), v0.V2S(), 8));
+  TEST_ZEROING(Srsra(v0.V2D(), v0.V2D(), 8));
+  TEST_ZEROING(Srsra(v0.V2S(), v0.V2S(), 8));
+  TEST_ZEROING(Sshl(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Sshl(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Sshr(v0.V2D(), v0.V2D(), 8));
+  TEST_ZEROING(Sshr(v0.V2S(), v0.V2S(), 8));
+  TEST_ZEROING(Ssra(v0.V2D(), v0.V2D(), 8));
+  TEST_ZEROING(Ssra(v0.V2S(), v0.V2S(), 8));
+  TEST_ZEROING(Ssubl2(v0.V8H(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Ssubl(v0.V4S(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Ssubw2(v0.V8H(), v0.V8H(), v0.V16B()));
+  TEST_ZEROING(Ssubw(v0.V4S(), v0.V4S(), v0.V4H()));
+  TEST_ZEROING(Sub(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Sub(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Subhn2(v0.V16B(), v0.V8H(), v0.V8H()));
+  TEST_ZEROING(Subhn(v0.V4H(), v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Sudot(v0.V4S(), v0.V16B(), v0.S4B(), 0));
+  TEST_ZEROING(Sudot(v0.V2S(), v0.V8B(), v0.S4B(), 2));
+  TEST_ZEROING(Suqadd(v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Suqadd(v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Tbl(v0.V8B(), {v0.V16B()}, v0.V8B()));
+  TEST_ZEROING(Tbl(v0.V16B(), {v0.V16B()}, v0.V16B()));
+  TEST_ZEROING(Tbx(v0.V8B(), {v0.V16B()}, v0.V8B()));
+  TEST_ZEROING(Tbx(v0.V16B(), {v0.V16B()}, v0.V16B()));
+  TEST_ZEROING(Trn1(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Trn1(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Trn2(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Trn2(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Uaba(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Uaba(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Uabal2(v0.V8H(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Uabal(v0.V4S(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Uabd(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Uabd(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Uabdl2(v0.V8H(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Uabdl(v0.V4S(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Uadalp(v0.V8H(), v0.V16B()));
+  TEST_ZEROING(Uadalp(v0.V2S(), v0.V4H()));
+  TEST_ZEROING(Uaddl2(v0.V8H(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Uaddl(v0.V4S(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Uaddlp(v0.V8H(), v0.V16B()));
+  TEST_ZEROING(Uaddlp(v0.V2S(), v0.V4H()));
+  TEST_ZEROING(Uaddw2(v0.V8H(), v0.V8H(), v0.V16B()));
+  TEST_ZEROING(Uaddw(v0.V4S(), v0.V4S(), v0.V4H()));
+  TEST_ZEROING(Ucvtf(v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Ucvtf(v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Ucvtf(v0.V2D(), v0.V2D(), 8));
+  TEST_ZEROING(Ucvtf(v0.V2S(), v0.V2S(), 8));
+  TEST_ZEROING(Udot(v0.V4S(), v0.V16B(), v0.S4B(), 0));
+  TEST_ZEROING(Udot(v0.V2S(), v0.V8B(), v0.S4B(), 0));
+  TEST_ZEROING(Udot(v0.V2S(), v0.V8B(), v0.V8B()));
+  TEST_ZEROING(Udot(v0.V4S(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Uhadd(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Uhadd(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Uhsub(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Uhsub(v0.V2S(), v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Umax(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Umax(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Umaxp(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Umaxp(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Umin(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Umin(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Uminp(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Uminp(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Umlal2(v0.V8H(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Umlal(v0.V4S(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Umlal(v0.V2D(), v0.V2S(), v0.S(), 0));
+  TEST_ZEROING(Umlal(v0.V4S(), v0.V4H(), v0.H(), 0));
+  TEST_ZEROING(Umlsl2(v0.V8H(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Umlsl(v0.V4S(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Umlsl(v0.V2D(), v0.V2S(), v0.S(), 0));
+  TEST_ZEROING(Umlsl(v0.V4S(), v0.V4H(), v0.H(), 0));
+  TEST_ZEROING(Umull2(v0.V8H(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Umull(v0.V4S(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Umull(v0.V2D(), v0.V2S(), v0.S(), 0));
+  TEST_ZEROING(Umull(v0.V4S(), v0.V4H(), v0.H(), 0));
+  TEST_ZEROING(Uqadd(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Uqadd(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Uqrshl(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Uqrshl(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Uqshl(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Uqshl(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Uqsub(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Uqsub(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Uqxtn2(v0.V16B(), v0.V8H()));
+  TEST_ZEROING(Uqxtn(v0.V2S(), v0.V2D()));
+  TEST_ZEROING(Urecpe(v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Urecpe(v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Urhadd(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Urhadd(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Urshl(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Urshl(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Urshr(v0.V2D(), v0.V2D(), 8));
+  TEST_ZEROING(Urshr(v0.V2S(), v0.V2S(), 8));
+  TEST_ZEROING(Ursqrte(v0.V4S(), v0.V4S()));
+  TEST_ZEROING(Ursqrte(v0.V2S(), v0.V2S()));
+  TEST_ZEROING(Ursra(v0.V2D(), v0.V2D(), 8));
+  TEST_ZEROING(Ursra(v0.V2S(), v0.V2S(), 8));
+  TEST_ZEROING(Usdot(v0.V4S(), v0.V16B(), v0.S4B(), 0));
+  TEST_ZEROING(Usdot(v0.V2S(), v0.V8B(), v0.S4B(), 1));
+  TEST_ZEROING(Usdot(v0.V4S(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Usdot(v0.V2S(), v0.V8B(), v0.V8B()));
+  TEST_ZEROING(Ushl(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Ushl(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Ushr(v0.V2D(), v0.V2D(), 8));
+  TEST_ZEROING(Ushr(v0.V2S(), v0.V2S(), 8));
+  TEST_ZEROING(Usqadd(v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Usqadd(v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Usra(v0.V2D(), v0.V2D(), 8));
+  TEST_ZEROING(Usra(v0.V2S(), v0.V2S(), 8));
+  TEST_ZEROING(Usubl2(v0.V8H(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Usubl(v0.V4S(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Usubw2(v0.V8H(), v0.V8H(), v0.V16B()));
+  TEST_ZEROING(Usubw(v0.V4S(), v0.V4S(), v0.V4H()));
+  TEST_ZEROING(Uzp1(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Uzp1(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Uzp2(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Uzp2(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Xtn2(v0.V16B(), v0.V8H()));
+  TEST_ZEROING(Xtn(v0.V4H(), v0.V4S()));
+  TEST_ZEROING(Zip1(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Zip1(v0.V4H(), v0.V4H(), v0.V4H()));
+  TEST_ZEROING(Zip2(v0.V16B(), v0.V16B(), v0.V16B()));
+  TEST_ZEROING(Zip2(v0.V4H(), v0.V4H(), v0.V4H()));
+
+  __ Mov(z11.VnD(), 0);
+
+  Label done, zero_127_to_0;
+  __ Rdvl(x0, 1);
+  __ Cmp(x0, 16);
+  __ B(gt, &zero_127_to_0);
+
+  // For 128-bit VL, there's nothing to be tested, so zero the whole register.
+  __ Mov(z10.VnD(), 0);
+  __ B(&done);
+
+  // Set the expected non-zero bits to zero.
+  __ Bind(&zero_127_to_0);
+  __ Ext(z10.VnB(), z10.VnB(), z11.VnB(), kDRegSizeInBytes * 2);
+
+  __ Bind(&done);
+
+  END();
+
+  if (CAN_RUN()) {
+    RUN();
+    ASSERT_EQUAL_SVE(z11, z10);
+  }
+}
+
+#undef TEST_ZEROING
+
+#define TEST_ZEROING_1(INST) \
+  __ Index(z0.VnB(), 0, 1);  \
+  __ INST;                   \
+  __ Orr(z10.VnB(), z10.VnB(), z0.VnB());
+#define TEST_ZEROING_2(INST)              \
+  __ Index(z0.VnB(), 0, 1);               \
+  __ Index(z1.VnB(), 0, 1);               \
+  __ INST;                                \
+  __ Orr(z10.VnB(), z10.VnB(), z0.VnB()); \
+  __ Orr(z10.VnB(), z10.VnB(), z1.VnB());
+#define TEST_ZEROING_3(INST)              \
+  __ Index(z0.VnB(), 0, 1);               \
+  __ Index(z1.VnB(), 0, 1);               \
+  __ Index(z2.VnB(), 0, 1);               \
+  __ INST;                                \
+  __ Orr(z10.VnB(), z10.VnB(), z0.VnB()); \
+  __ Orr(z10.VnB(), z10.VnB(), z1.VnB()); \
+  __ Orr(z10.VnB(), z10.VnB(), z2.VnB());
+#define TEST_ZEROING_4(INST)              \
+  __ Index(z0.VnB(), 0, 1);               \
+  __ Index(z1.VnB(), 0, 1);               \
+  __ Index(z2.VnB(), 0, 1);               \
+  __ Index(z3.VnB(), 0, 1);               \
+  __ INST;                                \
+  __ Orr(z10.VnB(), z10.VnB(), z0.VnB()); \
+  __ Orr(z10.VnB(), z10.VnB(), z1.VnB()); \
+  __ Orr(z10.VnB(), z10.VnB(), z2.VnB()); \
+  __ Orr(z10.VnB(), z10.VnB(), z3.VnB());
+
+TEST_SVE(neon_load_zero_high) {
+  SVE_SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kSVE);
+
+  START();
+  __ Mov(z10.VnD(), 0);  // Initialise cumulative result register.
+
+  // Initialise x0 to point to a buffer from which data is loaded. The contents
+  // does not need to be defined.
+  int data_size = 4 * kQRegSizeInBytes;
+  uint8_t* data = new uint8_t[data_size];
+  __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size]));
+
+  MemOperand mop = MemOperand(x0);
+  TEST_ZEROING_1(Ld1(v0.V16B(), mop));
+  TEST_ZEROING_1(Ld1(v0.V4H(), mop));
+  TEST_ZEROING_1(Ld1(v0.V16B(), v1.V16B(), mop));
+  TEST_ZEROING_1(Ld1(v0.V4H(), v1.V4H(), mop));
+  TEST_ZEROING_1(Ld1(v0.V16B(), v1.V16B(), v2.V16B(), mop));
+  TEST_ZEROING_1(Ld1(v0.V4H(), v1.V4H(), v2.V4H(), mop));
+  TEST_ZEROING_1(Ld1(v0.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), mop));
+  TEST_ZEROING_1(Ld1(v0.V4H(), v1.V4H(), v2.V4H(), v3.V4H(), mop));
+  TEST_ZEROING_1(Ld1(v0.B(), 1, mop));
+  TEST_ZEROING_1(Ld1(v0.D(), 1, mop));
+  TEST_ZEROING_1(Ld1(v0.H(), 1, mop));
+  TEST_ZEROING_1(Ld1(v0.S(), 1, mop));
+  TEST_ZEROING_1(Ld1r(v0.V16B(), mop));
+  TEST_ZEROING_1(Ld1r(v0.V4H(), mop));
+  TEST_ZEROING_2(Ld2(v0.V16B(), v1.V16B(), mop));
+  TEST_ZEROING_2(Ld2(v0.V4H(), v1.V4H(), mop));
+  TEST_ZEROING_2(Ld2(v0.B(), v1.B(), 1, mop));
+  TEST_ZEROING_2(Ld2(v0.D(), v1.D(), 1, mop));
+  TEST_ZEROING_2(Ld2(v0.H(), v1.H(), 1, mop));
+  TEST_ZEROING_2(Ld2(v0.S(), v1.S(), 1, mop));
+  TEST_ZEROING_2(Ld2r(v0.V16B(), v1.V16B(), mop));
+  TEST_ZEROING_2(Ld2r(v0.V4H(), v1.V4H(), mop));
+  TEST_ZEROING_3(Ld3(v0.V16B(), v1.V16B(), v2.V16B(), mop));
+  TEST_ZEROING_3(Ld3(v0.V4H(), v1.V4H(), v2.V4H(), mop));
+  TEST_ZEROING_3(Ld3(v0.B(), v1.B(), v2.B(), 1, mop));
+  TEST_ZEROING_3(Ld3(v0.D(), v1.D(), v2.D(), 1, mop));
+  TEST_ZEROING_3(Ld3(v0.H(), v1.H(), v2.H(), 1, mop));
+  TEST_ZEROING_3(Ld3(v0.S(), v1.S(), v2.S(), 1, mop));
+  TEST_ZEROING_3(Ld3r(v0.V16B(), v1.V16B(), v2.V16B(), mop));
+  TEST_ZEROING_3(Ld3r(v0.V4H(), v1.V4H(), v2.V4H(), mop));
+  TEST_ZEROING_4(Ld4(v0.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), mop));
+  TEST_ZEROING_4(Ld4(v0.V4H(), v1.V4H(), v2.V4H(), v3.V4H(), mop));
+  TEST_ZEROING_4(Ld4(v0.B(), v1.B(), v2.B(), v3.B(), 1, mop));
+  TEST_ZEROING_4(Ld4(v0.D(), v1.D(), v2.D(), v3.D(), 1, mop));
+  TEST_ZEROING_4(Ld4(v0.H(), v1.H(), v2.H(), v3.H(), 1, mop));
+  TEST_ZEROING_4(Ld4(v0.S(), v1.S(), v2.S(), v3.S(), 1, mop));
+  TEST_ZEROING_4(Ld4r(v0.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), mop));
+  TEST_ZEROING_4(Ld4r(v0.V4H(), v1.V4H(), v2.V4H(), v3.V4H(), mop));
+
+  __ Mov(z11.VnD(), 0);
+
+  Label done, zero_127_to_0;
+  __ Rdvl(x0, 1);
+  __ Cmp(x0, 16);
+  __ B(gt, &zero_127_to_0);
+
+  // For 128-bit VL, there's nothing to be tested, so zero the whole register.
+  __ Mov(z10.VnD(), 0);
+  __ B(&done);
+
+  // Set the expected non-zero bits to zero.
+  __ Bind(&zero_127_to_0);
+  __ Ext(z10.VnB(), z10.VnB(), z11.VnB(), kDRegSizeInBytes * 2);
+
+  __ Bind(&done);
+
+  END();
+
+  if (CAN_RUN()) {
+    RUN();
+    ASSERT_EQUAL_SVE(z11, z10);
+  }
+}
+
+#undef TEST_ZEROING_1
+#undef TEST_ZEROING_2
+#undef TEST_ZEROING_3
+#undef TEST_ZEROING_4
+
+TEST_SVE(sve_load_store_sp_base_regression_test) {
+  SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
+  START();
+
+  __ Mov(x0, 0);
+  __ Mov(z0.VnB(), 0);
+  __ Ptrue(p0.VnB());
+
+  Label loop;
+  __ Mov(x1, 128);
+  __ Bind(&loop);
+  __ Push(xzr, xzr);
+  __ Sub(x1, x1, 1);
+  __ Cbnz(x1, &loop);
+
+  {
+    ExactAssemblyScope scope(&masm, 193 * kInstructionSize);
+
+    __ dci(0xa420a3e0);  // ld1b {z0.h}, p0/z, [sp]
+    __ dci(0xa440a3e0);  // ld1b {z0.s}, p0/z, [sp]
+    __ dci(0xa460a3e0);  // ld1b {z0.d}, p0/z, [sp]
+    __ dci(0xa400a3e0);  // ld1b {z0.b}, p0/z, [sp]
+    __ dci(0xa42043e0);  // ld1b {z0.h}, p0/z, [sp, x0]
+    __ dci(0xa44043e0);  // ld1b {z0.s}, p0/z, [sp, x0]
+    __ dci(0xa46043e0);  // ld1b {z0.d}, p0/z, [sp, x0]
+    __ dci(0xa40043e0);  // ld1b {z0.b}, p0/z, [sp, x0]
+    __ dci(0xc440c3e0);  // ld1b {z0.d}, p0/z, [sp, z0.d]
+    __ dci(0xa5e0a3e0);  // ld1d {z0.d}, p0/z, [sp]
+    __ dci(0xa5e043e0);  // ld1d {z0.d}, p0/z, [sp, x0, lsl #3]
+    __ dci(0xc5e0c3e0);  // ld1d {z0.d}, p0/z, [sp, z0.d, lsl #3]
+    __ dci(0xc5c0c3e0);  // ld1d {z0.d}, p0/z, [sp, z0.d]
+    __ dci(0xa4a0a3e0);  // ld1h {z0.h}, p0/z, [sp]
+    __ dci(0xa4c0a3e0);  // ld1h {z0.s}, p0/z, [sp]
+    __ dci(0xa4e0a3e0);  // ld1h {z0.d}, p0/z, [sp]
+    __ dci(0xa4a043e0);  // ld1h {z0.h}, p0/z, [sp, x0, lsl #1]
+    __ dci(0xa4c043e0);  // ld1h {z0.s}, p0/z, [sp, x0, lsl #1]
+    __ dci(0xa4e043e0);  // ld1h {z0.d}, p0/z, [sp, x0, lsl #1]
+    __ dci(0xc4e0c3e0);  // ld1h {z0.d}, p0/z, [sp, z0.d, lsl #1]
+    __ dci(0xc4c0c3e0);  // ld1h {z0.d}, p0/z, [sp, z0.d]
+    __ dci(0x8440a3e0);  // ld1rb {z0.h}, p0/z, [sp]
+    __ dci(0x8440c3e0);  // ld1rb {z0.s}, p0/z, [sp]
+    __ dci(0x8440e3e0);  // ld1rb {z0.d}, p0/z, [sp]
+    __ dci(0x844083e0);  // ld1rb {z0.b}, p0/z, [sp]
+    __ dci(0x85c0e3e0);  // ld1rd {z0.d}, p0/z, [sp]
+    __ dci(0x84c0a3e0);  // ld1rh {z0.h}, p0/z, [sp]
+    __ dci(0x84c0c3e0);  // ld1rh {z0.s}, p0/z, [sp]
+    __ dci(0x84c0e3e0);  // ld1rh {z0.d}, p0/z, [sp]
+    __ dci(0xa40023e0);  // ld1rqb {z0.b}, p0/z, [sp]
+    __ dci(0xa40003e0);  // ld1rqb {z0.b}, p0/z, [sp, x0]
+    __ dci(0xa58023e0);  // ld1rqd {z0.d}, p0/z, [sp]
+    __ dci(0xa58003e0);  // ld1rqd {z0.d}, p0/z, [sp, x0, lsl #3]
+    __ dci(0xa48023e0);  // ld1rqh {z0.h}, p0/z, [sp]
+    __ dci(0xa48003e0);  // ld1rqh {z0.h}, p0/z, [sp, x0, lsl #1]
+    __ dci(0xa50023e0);  // ld1rqw {z0.s}, p0/z, [sp]
+    __ dci(0xa50003e0);  // ld1rqw {z0.s}, p0/z, [sp, x0, lsl #2]
+    __ dci(0x85c0c3e0);  // ld1rsb {z0.h}, p0/z, [sp]
+    __ dci(0x85c0a3e0);  // ld1rsb {z0.s}, p0/z, [sp]
+    __ dci(0x85c083e0);  // ld1rsb {z0.d}, p0/z, [sp]
+    __ dci(0x8540a3e0);  // ld1rsh {z0.s}, p0/z, [sp]
+    __ dci(0x854083e0);  // ld1rsh {z0.d}, p0/z, [sp]
+    __ dci(0x84c083e0);  // ld1rsw {z0.d}, p0/z, [sp]
+    __ dci(0x8540c3e0);  // ld1rw {z0.s}, p0/z, [sp]
+    __ dci(0x8540e3e0);  // ld1rw {z0.d}, p0/z, [sp]
+    __ dci(0xa5c0a3e0);  // ld1sb {z0.h}, p0/z, [sp]
+    __ dci(0xa5a0a3e0);  // ld1sb {z0.s}, p0/z, [sp]
+    __ dci(0xa580a3e0);  // ld1sb {z0.d}, p0/z, [sp]
+    __ dci(0xa5c043e0);  // ld1sb {z0.h}, p0/z, [sp, x0]
+    __ dci(0xa5a043e0);  // ld1sb {z0.s}, p0/z, [sp, x0]
+    __ dci(0xa58043e0);  // ld1sb {z0.d}, p0/z, [sp, x0]
+    __ dci(0xc44083e0);  // ld1sb {z0.d}, p0/z, [sp, z0.d]
+    __ dci(0xa520a3e0);  // ld1sh {z0.s}, p0/z, [sp]
+    __ dci(0xa500a3e0);  // ld1sh {z0.d}, p0/z, [sp]
+    __ dci(0xa52043e0);  // ld1sh {z0.s}, p0/z, [sp, x0, lsl #1]
+    __ dci(0xa50043e0);  // ld1sh {z0.d}, p0/z, [sp, x0, lsl #1]
+    __ dci(0xc4e083e0);  // ld1sh {z0.d}, p0/z, [sp, z0.d, lsl #1]
+    __ dci(0xc4c083e0);  // ld1sh {z0.d}, p0/z, [sp, z0.d]
+    __ dci(0xa480a3e0);  // ld1sw {z0.d}, p0/z, [sp]
+    __ dci(0xa48043e0);  // ld1sw {z0.d}, p0/z, [sp, x0, lsl #2]
+    __ dci(0xc56083e0);  // ld1sw {z0.d}, p0/z, [sp, z0.d, lsl #2]
+    __ dci(0xc54083e0);  // ld1sw {z0.d}, p0/z, [sp, z0.d]
+    __ dci(0xa540a3e0);  // ld1w {z0.s}, p0/z, [sp]
+    __ dci(0xa560a3e0);  // ld1w {z0.d}, p0/z, [sp]
+    __ dci(0xa54043e0);  // ld1w {z0.s}, p0/z, [sp, x0, lsl #2]
+    __ dci(0xa56043e0);  // ld1w {z0.d}, p0/z, [sp, x0, lsl #2]
+    __ dci(0xc560c3e0);  // ld1w {z0.d}, p0/z, [sp, z0.d, lsl #2]
+    __ dci(0xc540c3e0);  // ld1w {z0.d}, p0/z, [sp, z0.d]
+    __ dci(0xa420e3e0);  // ld2b {z0.b, z1.b}, p0/z, [sp]
+    __ dci(0xa420c3e0);  // ld2b {z0.b, z1.b}, p0/z, [sp, x0]
+    __ dci(0xa5a0e3e0);  // ld2d {z0.d, z1.d}, p0/z, [sp]
+    __ dci(0xa5a0c3e0);  // ld2d {z0.d, z1.d}, p0/z, [sp, x0, lsl #3]
+    __ dci(0xa4a0e3e0);  // ld2h {z0.h, z1.h}, p0/z, [sp]
+    __ dci(0xa4a0c3e0);  // ld2h {z0.h, z1.h}, p0/z, [sp, x0, lsl #1]
+    __ dci(0xa520e3e0);  // ld2w {z0.s, z1.s}, p0/z, [sp]
+    __ dci(0xa520c3e0);  // ld2w {z0.s, z1.s}, p0/z, [sp, x0, lsl #2]
+    __ dci(0xa440e3e0);  // ld3b {z0.b, z1.b, z2.b}, p0/z, [sp]
+    __ dci(0xa440c3e0);  // ld3b {z0.b, z1.b, z2.b}, p0/z, [sp, x0]
+    __ dci(0xa5c0e3e0);  // ld3d {z0.d, z1.d, z2.d}, p0/z, [sp]
+    __ dci(0xa5c0c3e0);  // ld3d {z0.d, z1.d, z2.d}, p0/z, [sp, x0, lsl #3]
+    __ dci(0xa4c0e3e0);  // ld3h {z0.h, z1.h, z2.h}, p0/z, [sp]
+    __ dci(0xa4c0c3e0);  // ld3h {z0.h, z1.h, z2.h}, p0/z, [sp, x0, lsl #1]
+    __ dci(0xa540e3e0);  // ld3w {z0.s, z1.s, z2.s}, p0/z, [sp]
+    __ dci(0xa540c3e0);  // ld3w {z0.s, z1.s, z2.s}, p0/z, [sp, x0, lsl #2]
+    __ dci(0xa460e3e0);  // ld4b {z0.b, z1.b, z2.b, z3.b}, p0/z, [sp]
+    __ dci(0xa460c3e0);  // ld4b {z0.b, z1.b, z2.b, z3.b}, p0/z, [sp, x0]
+    __ dci(0xa5e0e3e0);  // ld4d {z0.d, z1.d, z2.d, z3.d}, p0/z, [sp]
+    __ dci(
+        0xa5e0c3e0);  // ld4d {z0.d, z1.d, z2.d, z3.d}, p0/z, [sp, x0, lsl #3]
+    __ dci(0xa4e0e3e0);  // ld4h {z0.h, z1.h, z2.h, z3.h}, p0/z, [sp]
+    __ dci(
+        0xa4e0c3e0);  // ld4h {z0.h, z1.h, z2.h, z3.h}, p0/z, [sp, x0, lsl #1]
+    __ dci(0xa560e3e0);  // ld4w {z0.s, z1.s, z2.s, z3.s}, p0/z, [sp]
+    __ dci(
+        0xa560c3e0);  // ld4w {z0.s, z1.s, z2.s, z3.s}, p0/z, [sp, x0, lsl #2]
+    __ dci(0xa42063e0);  // ldff1b {z0.h}, p0/z, [sp, x0]
+    __ dci(0xa44063e0);  // ldff1b {z0.s}, p0/z, [sp, x0]
+    __ dci(0xa46063e0);  // ldff1b {z0.d}, p0/z, [sp, x0]
+    __ dci(0xa40063e0);  // ldff1b {z0.b}, p0/z, [sp, x0]
+    __ dci(0xc440e3e0);  // ldff1b {z0.d}, p0/z, [sp, z0.d]
+    __ dci(0xa5e063e0);  // ldff1d {z0.d}, p0/z, [sp, x0, lsl #3]
+    __ dci(0xc5e0e3e0);  // ldff1d {z0.d}, p0/z, [sp, z0.d, lsl #3]
+    __ dci(0xc5c0e3e0);  // ldff1d {z0.d}, p0/z, [sp, z0.d]
+    __ dci(0xa4a063e0);  // ldff1h {z0.h}, p0/z, [sp, x0, lsl #1]
+    __ dci(0xa4c063e0);  // ldff1h {z0.s}, p0/z, [sp, x0, lsl #1]
+    __ dci(0xa4e063e0);  // ldff1h {z0.d}, p0/z, [sp, x0, lsl #1]
+    __ dci(0xc4e0e3e0);  // ldff1h {z0.d}, p0/z, [sp, z0.d, lsl #1]
+    __ dci(0xc4c0e3e0);  // ldff1h {z0.d}, p0/z, [sp, z0.d]
+    __ dci(0xa5c063e0);  // ldff1sb {z0.h}, p0/z, [sp, x0]
+    __ dci(0xa5a063e0);  // ldff1sb {z0.s}, p0/z, [sp, x0]
+    __ dci(0xa58063e0);  // ldff1sb {z0.d}, p0/z, [sp, x0]
+    __ dci(0xc440a3e0);  // ldff1sb {z0.d}, p0/z, [sp, z0.d]
+    __ dci(0xa52063e0);  // ldff1sh {z0.s}, p0/z, [sp, x0, lsl #1]
+    __ dci(0xa50063e0);  // ldff1sh {z0.d}, p0/z, [sp, x0, lsl #1]
+    __ dci(0xc4e0a3e0);  // ldff1sh {z0.d}, p0/z, [sp, z0.d, lsl #1]
+    __ dci(0xc4c0a3e0);  // ldff1sh {z0.d}, p0/z, [sp, z0.d]
+    __ dci(0xa48063e0);  // ldff1sw {z0.d}, p0/z, [sp, x0, lsl #2]
+    __ dci(0xc560a3e0);  // ldff1sw {z0.d}, p0/z, [sp, z0.d, lsl #2]
+    __ dci(0xc540a3e0);  // ldff1sw {z0.d}, p0/z, [sp, z0.d]
+    __ dci(0xa54063e0);  // ldff1w {z0.s}, p0/z, [sp, x0, lsl #2]
+    __ dci(0xa56063e0);  // ldff1w {z0.d}, p0/z, [sp, x0, lsl #2]
+    __ dci(0xc560e3e0);  // ldff1w {z0.d}, p0/z, [sp, z0.d, lsl #2]
+    __ dci(0xc540e3e0);  // ldff1w {z0.d}, p0/z, [sp, z0.d]
+    __ dci(0xa430a3e0);  // ldnf1b {z0.h}, p0/z, [sp]
+    __ dci(0xa450a3e0);  // ldnf1b {z0.s}, p0/z, [sp]
+    __ dci(0xa470a3e0);  // ldnf1b {z0.d}, p0/z, [sp]
+    __ dci(0xa410a3e0);  // ldnf1b {z0.b}, p0/z, [sp]
+    __ dci(0xa5f0a3e0);  // ldnf1d {z0.d}, p0/z, [sp]
+    __ dci(0xa4b0a3e0);  // ldnf1h {z0.h}, p0/z, [sp]
+    __ dci(0xa4d0a3e0);  // ldnf1h {z0.s}, p0/z, [sp]
+    __ dci(0xa4f0a3e0);  // ldnf1h {z0.d}, p0/z, [sp]
+    __ dci(0xa5d0a3e0);  // ldnf1sb {z0.h}, p0/z, [sp]
+    __ dci(0xa5b0a3e0);  // ldnf1sb {z0.s}, p0/z, [sp]
+    __ dci(0xa590a3e0);  // ldnf1sb {z0.d}, p0/z, [sp]
+    __ dci(0xa530a3e0);  // ldnf1sh {z0.s}, p0/z, [sp]
+    __ dci(0xa510a3e0);  // ldnf1sh {z0.d}, p0/z, [sp]
+    __ dci(0xa490a3e0);  // ldnf1sw {z0.d}, p0/z, [sp]
+    __ dci(0xa550a3e0);  // ldnf1w {z0.s}, p0/z, [sp]
+    __ dci(0xa570a3e0);  // ldnf1w {z0.d}, p0/z, [sp]
+    __ dci(0xa400e3e0);  // ldnt1b {z0.b}, p0/z, [sp]
+    __ dci(0xa400c3e0);  // ldnt1b {z0.b}, p0/z, [sp, x0]
+    __ dci(0xa580e3e0);  // ldnt1d {z0.d}, p0/z, [sp]
+    __ dci(0xa580c3e0);  // ldnt1d {z0.d}, p0/z, [sp, x0, lsl #3]
+    __ dci(0xa480e3e0);  // ldnt1h {z0.h}, p0/z, [sp]
+    __ dci(0xa480c3e0);  // ldnt1h {z0.h}, p0/z, [sp, x0, lsl #1]
+    __ dci(0xa500e3e0);  // ldnt1w {z0.s}, p0/z, [sp]
+    __ dci(0xa500c3e0);  // ldnt1w {z0.s}, p0/z, [sp, x0, lsl #2]
+    __ dci(0x858043e0);  // ldr z0, [sp]
+    __ dci(0xe400e3e0);  // st1b {z0.b}, p0, [sp]
+    __ dci(0xe40043e0);  // st1b {z0.b}, p0, [sp, x0]
+    __ dci(0xe400a3e0);  // st1b {z0.d}, p0, [sp, z0.d]
+    __ dci(0xe5e0e3e0);  // st1d {z0.d}, p0, [sp]
+    __ dci(0xe5e043e0);  // st1d {z0.d}, p0, [sp, x0, lsl #3]
+    __ dci(0xe5a0a3e0);  // st1d {z0.d}, p0, [sp, z0.d, lsl #3]
+    __ dci(0xe580a3e0);  // st1d {z0.d}, p0, [sp, z0.d]
+    __ dci(0xe4e0e3e0);  // st1h {z0.d}, p0, [sp]
+    __ dci(0xe4e043e0);  // st1h {z0.d}, p0, [sp, x0, lsl #1]
+    __ dci(0xe4a0a3e0);  // st1h {z0.d}, p0, [sp, z0.d, lsl #1]
+    __ dci(0xe480a3e0);  // st1h {z0.d}, p0, [sp, z0.d]
+    __ dci(0xe560e3e0);  // st1w {z0.d}, p0, [sp]
+    __ dci(0xe56043e0);  // st1w {z0.d}, p0, [sp, x0, lsl #2]
+    __ dci(0xe430e3e0);  // st2b {z0.b, z1.b}, p0, [sp]
+    __ dci(0xe42063e0);  // st2b {z0.b, z1.b}, p0, [sp, x0]
+    __ dci(0xe5b0e3e0);  // st2d {z0.d, z1.d}, p0, [sp]
+    __ dci(0xe5a063e0);  // st2d {z0.d, z1.d}, p0, [sp, x0, lsl #3]
+    __ dci(0xe4b0e3e0);  // st2h {z0.h, z1.h}, p0, [sp]
+    __ dci(0xe4a063e0);  // st2h {z0.h, z1.h}, p0, [sp, x0, lsl #1]
+    __ dci(0xe530e3e0);  // st2w {z0.s, z1.s}, p0, [sp]
+    __ dci(0xe52063e0);  // st2w {z0.s, z1.s}, p0, [sp, x0, lsl #2]
+    __ dci(0xe450e3e0);  // st3b {z0.b, z1.b, z2.b}, p0, [sp]
+    __ dci(0xe44063e0);  // st3b {z0.b, z1.b, z2.b}, p0, [sp, x0]
+    __ dci(0xe5d0e3e0);  // st3d {z0.d, z1.d, z2.d}, p0, [sp]
+    __ dci(0xe5c063e0);  // st3d {z0.d, z1.d, z2.d}, p0, [sp, x0, lsl #3]
+    __ dci(0xe4d0e3e0);  // st3h {z0.h, z1.h, z2.h}, p0, [sp]
+    __ dci(0xe4c063e0);  // st3h {z0.h, z1.h, z2.h}, p0, [sp, x0, lsl #1]
+    __ dci(0xe550e3e0);  // st3w {z0.s, z1.s, z2.s}, p0, [sp]
+    __ dci(0xe54063e0);  // st3w {z0.s, z1.s, z2.s}, p0, [sp, x0, lsl #2]
+    __ dci(0xe470e3e0);  // st4b {z0.b, z1.b, z2.b, z3.b}, p0, [sp]
+    __ dci(0xe46063e0);  // st4b {z0.b, z1.b, z2.b, z3.b}, p0, [sp, x0]
+    __ dci(0xe5f0e3e0);  // st4d {z0.d, z1.d, z2.d, z3.d}, p0, [sp]
+    __ dci(0xe5e063e0);  // st4d {z0.d, z1.d, z2.d, z3.d}, p0, [sp, x0, lsl #3]
+    __ dci(0xe4f0e3e0);  // st4h {z0.h, z1.h, z2.h, z3.h}, p0, [sp]
+    __ dci(0xe4e063e0);  // st4h {z0.h, z1.h, z2.h, z3.h}, p0, [sp, x0, lsl #1]
+    __ dci(0xe570e3e0);  // st4w {z0.s, z1.s, z2.s, z3.s}, p0, [sp]
+    __ dci(0xe56063e0);  // st4w {z0.s, z1.s, z2.s, z3.s}, p0, [sp, x0, lsl #2]
+    __ dci(0xe410e3e0);  // stnt1b {z0.b}, p0, [sp]
+    __ dci(0xe40063e0);  // stnt1b {z0.b}, p0, [sp, x0]
+    __ dci(0xe590e3e0);  // stnt1d {z0.d}, p0, [sp]
+    __ dci(0xe58063e0);  // stnt1d {z0.d}, p0, [sp, x0, lsl #3]
+    __ dci(0xe490e3e0);  // stnt1h {z0.h}, p0, [sp]
+    __ dci(0xe48063e0);  // stnt1h {z0.h}, p0, [sp, x0, lsl #1]
+    __ dci(0xe510e3e0);  // stnt1w {z0.s}, p0, [sp]
+    __ dci(0xe50063e0);  // stnt1w {z0.s}, p0, [sp, x0, lsl #2]
+    __ dci(0x858003e0);  // ldr p0, [sp]
+    __ dci(0xe58003e0);  // str p0, [sp]
+    __ dci(0xe58043e0);  // str z0, [sp]
+  }
+
+  __ Drop(128 * 2 * kXRegSizeInBytes);
+
+  END();
+
+  if (CAN_RUN()) {
+    RUN();
+
+    // No checks are made here. The test is designed to ensure that the base
+    // register is interpreted correctly as sp, not xzr. If it is interpreted
+    // as xzr, the memory access to addresses near zero will fault, and the
+    // test will fail.
+  }
+}
+
 // Manually constructed simulator test to avoid creating a VL128 variant.
 
 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
-void Testsve_fmatmul(Test* config) {
+void Test_sve_fmatmul(Test* config) {
   SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM);
 
   // Only double-precision matrix multiply is tested here. Single-precision is
@@ -19845,13 +20775,13 @@
   }
 }
 Test* test_sve_fmatmul_list[] =
-    {Test::MakeSVETest(256, "AARCH64_ASM_sve_fmatmul_vl256", &Testsve_fmatmul),
-     Test::MakeSVETest(384, "AARCH64_ASM_sve_fmatmul_vl384", &Testsve_fmatmul),
+    {Test::MakeSVETest(256, "AARCH64_ASM_sve_fmatmul_vl256", &Test_sve_fmatmul),
+     Test::MakeSVETest(384, "AARCH64_ASM_sve_fmatmul_vl384", &Test_sve_fmatmul),
      Test::MakeSVETest(2048,
                        "AARCH64_ASM_sve_fmatmul_vl2048",
-                       &Testsve_fmatmul)};
+                       &Test_sve_fmatmul)};
 
-void Testsve_ld1ro(Test* config) {
+void Test_sve_ld1ro(Test* config) {
   SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM);
   START();
 
@@ -19975,9 +20905,9 @@
   }
 }
 Test* test_sve_ld1ro_list[] =
-    {Test::MakeSVETest(256, "AARCH64_ASM_sve_ld1ro_vl256", &Testsve_ld1ro),
-     Test::MakeSVETest(384, "AARCH64_ASM_sve_ld1ro_vl384", &Testsve_ld1ro),
-     Test::MakeSVETest(2048, "AARCH64_ASM_sve_ld1ro_vl2048", &Testsve_ld1ro)};
+    {Test::MakeSVETest(256, "AARCH64_ASM_sve_ld1ro_vl256", &Test_sve_ld1ro),
+     Test::MakeSVETest(384, "AARCH64_ASM_sve_ld1ro_vl384", &Test_sve_ld1ro),
+     Test::MakeSVETest(2048, "AARCH64_ASM_sve_ld1ro_vl2048", &Test_sve_ld1ro)};
 #endif
 
 }  // namespace aarch64
diff --git a/test/aarch64/test-cpu-features-aarch64.cc b/test/aarch64/test-cpu-features-aarch64.cc
index f23da93..c018f49 100644
--- a/test/aarch64/test-cpu-features-aarch64.cc
+++ b/test/aarch64/test-cpu-features-aarch64.cc
@@ -30,9 +30,9 @@
 
 #include "test-runner.h"
 #include "test-utils.h"
-#include "aarch64/test-utils-aarch64.h"
 
 #include "aarch64/macro-assembler-aarch64.h"
+#include "aarch64/test-utils-aarch64.h"
 
 #define __ masm->
 #define TEST(name) TEST_(AARCH64_CPUFeatures_##name)
@@ -3778,5 +3778,91 @@
 TEST_FP_FCMA_NEON_NEONHALF(fcmla_2, fcmla(v0.V4H(), v1.V4H(), v2.V4H(), 180))
 TEST_FP_FCMA_NEON_NEONHALF(fcmla_3, fcmla(v0.V8H(), v1.V8H(), v2.V8H(), 0))
 
+#define TEST_FEAT(NAME, ASM)                                            \
+  TEST_TEMPLATE(CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kPmull1Q), \
+                NEON_Pmull1Q_##NAME,                                    \
+                ASM)
+TEST_FEAT(pmull1q_0, pmull(v5.V1Q(), v6.V1D(), v7.V1D()))
+#undef TEST_FEAT
+
+#define TEST_NEON_SHA3(NAME, ASM)                                    \
+  TEST_TEMPLATE(CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSHA3), \
+                NEON_SHA3_##NAME,                                    \
+                ASM)
+TEST_NEON_SHA3(bcax_0, bcax(v0.V16B(), v1.V16B(), v2.V16B(), v3.V16B()))
+TEST_NEON_SHA3(eor3_0, eor3(v0.V16B(), v1.V16B(), v2.V16B(), v3.V16B()))
+TEST_NEON_SHA3(xar_0, xar(v0.V2D(), v1.V2D(), v2.V2D(), 42))
+TEST_NEON_SHA3(rax1_0, rax1(v0.V2D(), v1.V2D(), v2.V2D()))
+
+#define TEST_NEON_SHA1(NAME, ASM)                                    \
+  TEST_TEMPLATE(CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSHA1), \
+                NEON_SHA1_##NAME,                                    \
+                ASM)
+TEST_NEON_SHA1(sha1c_0, sha1c(q0, s12, v20.V4S()))
+TEST_NEON_SHA1(sha1m_0, sha1m(q22, s2, v13.V4S()))
+TEST_NEON_SHA1(sha1p_0, sha1p(q31, s5, v15.V4S()))
+TEST_NEON_SHA1(sha1su0_0, sha1su0(v19.V4S(), v9.V4S(), v27.V4S()))
+TEST_NEON_SHA1(sha1h_0, sha1h(s12, s0))
+TEST_NEON_SHA1(sha1su1_0, sha1su1(v2.V4S(), v4.V4S()))
+
+#define TEST_FEAT(NAME, ASM)                                         \
+  TEST_TEMPLATE(CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSHA2), \
+                NEON_SHA2_##NAME,                                    \
+                ASM)
+TEST_FEAT(sha256h_0, sha256h(q0, q12, v20.V4S()))
+TEST_FEAT(sha256h2_0, sha256h2(q22, q2, v13.V4S()))
+TEST_FEAT(sha256su0_0, sha256su0(v2.V4S(), v4.V4S()))
+TEST_FEAT(sha256su1_0, sha256su1(v19.V4S(), v9.V4S(), v27.V4S()))
+#undef TEST_FEAT
+
+#define TEST_FEAT(NAME, ASM)                                           \
+  TEST_TEMPLATE(CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSHA512), \
+                NEON_SHA512_##NAME,                                    \
+                ASM)
+TEST_FEAT(sha512h_0, sha512h(q0, q12, v20.V2D()))
+TEST_FEAT(sha512h2_0, sha512h2(q22, q2, v13.V2D()))
+TEST_FEAT(sha512su0_0, sha512su0(v2.V2D(), v4.V2D()))
+TEST_FEAT(sha512su1_0, sha512su1(v19.V2D(), v9.V2D(), v27.V2D()))
+#undef TEST_FEAT
+
+#define TEST_FEAT(NAME, ASM)                                        \
+  TEST_TEMPLATE(CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kAES), \
+                NEON_AES_##NAME,                                    \
+                ASM)
+TEST_FEAT(aesd_0, aesd(v0.V16B(), v29.V16B()))
+TEST_FEAT(aese_0, aese(v0.V16B(), v29.V16B()))
+TEST_FEAT(aesimc_0, aesimc(v0.V16B(), v29.V16B()))
+TEST_FEAT(aesmc_0, aesmc(v0.V16B(), v29.V16B()))
+#undef TEST_FEAT
+
+#define TEST_FEAT(NAME, ASM)                                        \
+  TEST_TEMPLATE(CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSM3), \
+                NEON_SM3_##NAME,                                    \
+                ASM)
+TEST_FEAT(sm3partw1_0, sm3partw1(v12.V4S(), v13.V4S(), v14.V4S()))
+TEST_FEAT(sm3partw2_0, sm3partw2(v12.V4S(), v13.V4S(), v14.V4S()))
+TEST_FEAT(sm3ss1_0, sm3ss1(v13.V4S(), v15.V4S(), v17.V4S(), v21.V4S()))
+TEST_FEAT(sm3tt1a_0, sm3tt1a(v30.V4S(), v29.V4S(), v9.V4S(), 1))
+TEST_FEAT(sm3tt1b_0, sm3tt1b(v30.V4S(), v29.V4S(), v9.V4S(), 3))
+TEST_FEAT(sm3tt2a_0, sm3tt2a(v30.V4S(), v29.V4S(), v9.V4S(), 2))
+TEST_FEAT(sm3tt2b_0, sm3tt2b(v30.V4S(), v29.V4S(), v9.V4S(), 0))
+#undef TEST_FEAT
+
+#define TEST_FEAT(NAME, ASM)                                        \
+  TEST_TEMPLATE(CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSM4), \
+                NEON_SM4_##NAME,                                    \
+                ASM)
+TEST_FEAT(sm4e, sm4e(v12.V4S(), v13.V4S()))
+TEST_FEAT(sm4ekey, sm4ekey(v12.V4S(), v13.V4S(), v14.V4S()))
+#undef TEST_FEAT
+
+#define TEST_FEAT(NAME, ASM)                                                \
+  TEST_TEMPLATE(CPUFeatures(CPUFeatures::kSVE2, CPUFeatures::kSVEPmull128), \
+                SVE_PMULL128_##NAME,                                        \
+                ASM)
+TEST_FEAT(pmullb, pmullb(z12.VnQ(), z21.VnD(), z12.VnD()))
+TEST_FEAT(pmullt, pmullt(z12.VnQ(), z21.VnD(), z12.VnD()))
+#undef TEST_FEAT
+
 }  // namespace aarch64
 }  // namespace vixl
diff --git a/test/aarch64/test-debugger-aarch64.cc b/test/aarch64/test-debugger-aarch64.cc
new file mode 100644
index 0000000..4fb5df0
--- /dev/null
+++ b/test/aarch64/test-debugger-aarch64.cc
@@ -0,0 +1,368 @@
+// Copyright 2023, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "test-debugger-aarch64.h"
+
+namespace vixl {
+namespace aarch64 {
+
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
+// The debugger is only available to be tested in simulator builds.
+
+TEST(breakpoints_invalid) {
+  SETUP();
+
+  // Test invalid strings instead of numbers.
+  SETUP_CMD("break a", "Error: Use `break <address>` to set a breakpoint");
+  SETUP_CMD("break abcdef", "Error: Use `break <address>` to set a breakpoint");
+  SETUP_CMD("break A", "Error: Use `break <address>` to set a breakpoint");
+  SETUP_CMD("break ABCDEF", "Error: Use `break <address>` to set a breakpoint");
+  SETUP_CMD("break 0x", "Error: Use `break <address>` to set a breakpoint");
+  SETUP_CMD("break 0xg", "Error: Use `break <address>` to set a breakpoint");
+
+  // Test different amounts of parameters.
+  SETUP_CMD("break", "Error: Use `break <address>` to set a breakpoint");
+  SETUP_CMD("break 42 52", "Error: Use `break <address>` to set a breakpoint");
+
+  // Test out of range addresses.
+  SETUP_CMD("break 0xFFFFFFFFFFFFFFFF1",
+            "Error: Use `break <address>` to set a breakpoint");
+  SETUP_CMD("break 18446744073709551616",
+            "Error: Use `break <address>` to set a breakpoint");
+
+  // Continue to exit the debugger.
+  SETUP_CMD("continue", "Continuing...");
+  RUN();
+
+  CHECK_OUTPUT();
+}
+
+TEST(breakpoints_valid) {
+  SETUP();
+
+  // Test bottom boundary of addresses.
+  SETUP_CMD("break 0x0", "Breakpoint successfully added at: 0x0");
+  SETUP_CMD("break 0", "Breakpoint successfully removed at: 0x0");
+  SETUP_CMD("break 0x1", "Breakpoint successfully added at: 0x1");
+  SETUP_CMD("break 1", "Breakpoint successfully removed at: 0x1");
+
+  // Test top boundary of addresses.
+  SETUP_CMD("break 0xFFFFFFFFFFFFFFFF",
+            "Breakpoint successfully added at: 0xffffffffffffffff");
+  SETUP_CMD("break 18446744073709551615",
+            "Breakpoint successfully removed at: 0xffffffffffffffff");
+
+  // Continue to exit the debugger.
+  SETUP_CMD("continue", "Continuing...");
+  RUN();
+
+  CHECK_OUTPUT();
+}
+
+TEST(breakpoints_hit) {
+  SETUP();
+
+  // Test hitting a breakpoint.
+  std::string mov_addr = GET_INSTRUCTION_ADDRESS("mov x2, #0x2");
+  std::string break_cmd = "break ";
+  break_cmd += mov_addr;
+  std::string expected_trace = "Breakpoint successfully added at: ";
+  expected_trace += mov_addr;
+  SETUP_CMD(break_cmd, expected_trace);
+  SETUP_CMD("continue",
+            "Continuing...\n"
+            "Debugger hit breakpoint, breaking...");
+
+  // Continue to exit the debugger.
+  SETUP_CMD("continue", "Continuing...");
+  RUN();
+
+  CHECK_OUTPUT();
+}
+
+TEST(cmd_aliases) {
+  SETUP();
+
+  // Test all short form commands, to ensure they correctly run their long form
+  // counterparts.
+  SETUP_CMD("b", "Error: Use `break <address>` to set a breakpoint");
+  SETUP_CMD("s x",
+            "Error: use `step \\[number\\]` to step an optional number of"
+            " instructions");
+  SETUP_CMD("p",
+            "Error: use `print <register|all>` to print the contents of a"
+            " specific register or all registers.");
+  SETUP_CMD("t 1", "Error: use `trace` to toggle tracing of registers.");
+  SETUP_CMD("g 1",
+            "Error: use `gdb` to enter GDB from the simulator debugger.");
+
+  // Continue to exit the debugger.
+  SETUP_CMD("c", "Continuing...");
+  RUN();
+
+  CHECK_OUTPUT();
+}
+
+TEST(stepping_single) {
+  SETUP();
+
+  // Test single stepping through the whole program.
+  SETUP_CMD("step", ".*mov x2, #0x2");
+  SETUP_CMD("step", ".*sub x3, x1, x2");
+  SETUP_CMD("step", ".*ret");
+  SETUP_CMD("step",
+            ".*Debugger at the end of simulation, leaving simulator...");
+
+  RUN();
+
+  CHECK_OUTPUT();
+}
+
+TEST(stepping_single_and_continue) {
+  SETUP();
+
+  // Test single stepping and then continuing.
+  SETUP_CMD("step", ".*mov x2, #0x2");
+  SETUP_CMD("continue", "Continuing...");
+
+  RUN();
+
+  CHECK_OUTPUT();
+}
+
+TEST(stepping_multi_1) {
+  SETUP();
+
+  // Test multi stepping a single instruction.
+  SETUP_CMD("step 1", ".*mov x2, #0x2");
+
+  // Continue to exit the debugger.
+  SETUP_CMD("continue", "Continuing...");
+  RUN();
+
+  CHECK_OUTPUT();
+}
+
+TEST(stepping_multi_2) {
+  SETUP();
+
+  // Test multi stepping two instructions.
+  SETUP_CMD("step 2",
+            ".*mov x2, #0x2\n"
+            ".*sub x3, x1, x2");
+
+  // Continue to exit the debugger.
+  SETUP_CMD("continue", "Continuing...");
+  RUN();
+
+  CHECK_OUTPUT();
+}
+
+TEST(stepping_multi_3) {
+  SETUP();
+
+  // Test multi stepping three instructions.
+  SETUP_CMD("step 3",
+            ".*mov x2, #0x2\n"
+            ".*sub x3, x1, x2\n"
+            ".*ret");
+
+  // Continue to exit the debugger.
+  SETUP_CMD("continue", "Continuing...");
+  RUN();
+
+  CHECK_OUTPUT();
+}
+
+TEST(stepping_multi_4) {
+  SETUP();
+
+  // Test stepping through the whole program in one go.
+  SETUP_CMD("step 4",
+            ".*mov x2, #0x2\n"
+            ".*sub x3, x1, x2\n"
+            ".*ret\n"
+            "Debugger at the end of simulation, leaving simulator...");
+
+  RUN();
+
+  CHECK_OUTPUT();
+}
+
+TEST(stepping_multi_5) {
+  SETUP();
+
+  // Test multi stepping past the end of the program.
+  SETUP_CMD("step 5",
+            ".*mov x2, #0x2\n"
+            ".*sub x3, x1, x2\n"
+            ".*ret\n"
+            "Debugger at the end of simulation, leaving simulator...");
+
+  RUN();
+
+  CHECK_OUTPUT();
+}
+
+TEST(stepping_invalid) {
+  SETUP();
+
+  // Test invalid arguments to step command.
+  SETUP_CMD("step 1 2",
+            "Error: use `step \\[number\\]` to step an optional number of"
+            " instructions");
+
+  // Continue to exit the debugger.
+  SETUP_CMD("continue", "Continuing...");
+  RUN();
+
+  CHECK_OUTPUT();
+}
+
+TEST(print_invalid) {
+  SETUP();
+
+  // Test invalid amounts of arguments to the print command.
+  SETUP_CMD("print",
+            "Error: use `print <register|all>` to print the contents of a"
+            " specific register or all registers.");
+  SETUP_CMD("print all all",
+            "Error: use `print <register|all>` to print the contents of a"
+            " specific register or all registers.");
+
+  // Test invalid types of registers.
+  SETUP_CMD("print alls",
+            "Error: incorrect register format, use e.g: X0, x0, etc...");
+  SETUP_CMD("print a",
+            "Error: incorrect register format, use e.g: X0, x0, etc...");
+  SETUP_CMD("print x",
+            "Error: incorrect register format, use e.g: X0, x0, etc...");
+  SETUP_CMD("print 0",
+            "Error: incorrect register format, use e.g: X0, x0, etc...");
+
+  // Test registers that don't exist on AARCH64.
+  SETUP_CMD("print w32",
+            "Error: incorrect register format, use e.g: X0, x0, etc...");
+  SETUP_CMD("print W32",
+            "Error: incorrect register format, use e.g: X0, x0, etc...");
+  SETUP_CMD("print x32",
+            "Error: incorrect register format, use e.g: X0, x0, etc...");
+  SETUP_CMD("print X32",
+            "Error: incorrect register format, use e.g: X0, x0, etc...");
+  SETUP_CMD("print v32",
+            "Error: incorrect register format, use e.g: X0, x0, etc...");
+  SETUP_CMD("print V32",
+            "Error: incorrect register format, use e.g: X0, x0, etc...");
+
+  // Continue to exit the debugger.
+  SETUP_CMD("continue", "Continuing...");
+  RUN();
+
+  CHECK_OUTPUT();
+}
+
+TEST(trace_invalid) {
+  SETUP();
+
+  // Test invalid arguments to trace command.
+  SETUP_CMD("trace 1 2", "Error: use `trace` to toggle tracing of registers.");
+
+  // Continue to exit the debugger.
+  SETUP_CMD("continue", "Continuing...");
+  RUN();
+
+  CHECK_OUTPUT();
+}
+
+TEST(trace_toggling) {
+  SETUP();
+
+  // Test toggling tracing.
+  SETUP_CMD("trace",
+            "Enabling disassembly, registers and memory write tracing");
+  SETUP_CMD("trace",
+            "Disabling disassembly, registers and memory write tracing");
+  SETUP_CMD("trace",
+            "Enabling disassembly, registers and memory write tracing");
+  SETUP_CMD("trace",
+            "Disabling disassembly, registers and memory write tracing");
+
+  // Continue to exit the debugger.
+  SETUP_CMD("continue", "Continuing...");
+  RUN();
+
+  CHECK_OUTPUT();
+}
+
+TEST(trace_full) {
+  SETUP();
+
+  // Test tracing the whole program.
+  SETUP_CMD("trace",
+            "Enabling disassembly, registers and memory write tracing");
+
+  std::string expected_trace = "Continuing...\n";
+  expected_trace += ".*add x1, x0, #0x5 \\(5\\)\n";
+  expected_trace += "(" + x_register_trace + "\\n){32}";
+  expected_trace += "(" + v_register_trace + "\\n){32}";
+  expected_trace += ".*mov x2, #0x2\n";
+  expected_trace += x_register_trace + "\n";
+  expected_trace += ".*sub x3, x1, x2\n";
+  expected_trace += x_register_trace + "\n";
+  expected_trace += ".*ret\n";
+  expected_trace += "# Branch to 0x0000000000000000.";
+  SETUP_CMD("continue", expected_trace);
+
+  RUN();
+
+  CHECK_OUTPUT();
+}
+
+TEST(trace_partial) {
+  SETUP();
+
+  // Test tracing a single line.
+  SETUP_CMD("trace",
+            "Enabling disassembly, registers and memory write tracing");
+
+  std::string expected_trace = ".*add x1, x0, #0x5 \\(5\\)\n";
+  expected_trace += "(" + x_register_trace + "\\n){32}";
+  expected_trace += "(" + v_register_trace + "\\n){32}";
+  expected_trace += ".*mov x2, #0x2\n";
+  SETUP_CMD("step", expected_trace);
+  SETUP_CMD("trace",
+            "Disabling disassembly, registers and memory write tracing");
+  SETUP_CMD("continue", "Continuing...\n");
+
+  RUN();
+
+  CHECK_OUTPUT();
+}
+
+#endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
+
+}  // namespace aarch64
+}  // namespace vixl
diff --git a/test/aarch64/test-debugger-aarch64.h b/test/aarch64/test-debugger-aarch64.h
new file mode 100644
index 0000000..b02d380
--- /dev/null
+++ b/test/aarch64/test-debugger-aarch64.h
@@ -0,0 +1,164 @@
+// Copyright 2023, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Tests for the simulator debugger.
+
+#include <fstream>
+#include <regex>
+
+#include "test-runner.h"
+#include "test-utils.h"
+
+#include "aarch64/macro-assembler-aarch64.h"
+#include "aarch64/simulator-aarch64.h"
+#include "aarch64/test-utils-aarch64.h"
+
+namespace vixl {
+namespace aarch64 {
+
+#define __ masm->
+#define TEST(name) TEST_(AARCH64_DBG_##name)
+
+//
+// Regex for various types of printing/tracing output.
+//
+
+// Matches traced/printed general purpose register output from the simulator,
+// e.g:
+//   "#             x0: 0x000000000badbeef"
+const std::string x_register_trace = "#[\\s]+(x\\d{1,2}|lr|sp): 0x[0-9a-f]+";
+// Matches traced/printed vector register output from the simulator, e.g:
+//   "#             v0: 0x7ff0f0007f80f0017ff0f0007f80f000"
+const std::string v_register_trace = "#[\\s]+(v\\d{1,2}): 0x[0-9a-f]+";
+
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
+// Run tests with the simulator.
+
+// Generate some basic code which immediately breaks into the debugger.
+// This serves as a sandbox for all debugger tests to run in.
+void GenerateDebuggerAsm(MacroAssembler* masm) {
+  // Create a breakpoint here to break into the debugger.
+  __ Brk(0);
+
+  // Do some arithmetic.
+  __ Add(x1, x0, 5);
+  __ Mov(x2, 2);
+  __ Sub(x3, x1, x2);
+
+  __ Ret();
+}
+
+// Setup the test environment with the debugger assembler and simulator.
+#define SETUP()                                                           \
+  MacroAssembler masm;                                                    \
+  masm.SetCPUFeatures(CPUFeatures::None());                               \
+  masm.SetGenerateSimulatorCode(true);                                    \
+  GenerateDebuggerAsm(&masm);                                             \
+  masm.FinalizeCode();                                                    \
+  Instruction* start = masm.GetBuffer()->GetStartAddress<Instruction*>(); \
+  Decoder decoder;                                                        \
+  std::istringstream input_stream;                                        \
+  char ostream_filename[] = "/tmp/vixl-test-debugger-XXXXXX";             \
+  FILE* output_stream = fdopen(mkstemp(ostream_filename), "w");           \
+  /* Disassemble the generated code so we can use the addresses later. */ \
+  PrintDisassembler disassembler(output_stream);                          \
+  disassembler.DisassembleBuffer(start, masm.GetSizeOfCodeGenerated());   \
+  fflush(output_stream);                                                  \
+  Simulator simulator(&decoder, output_stream);                           \
+  simulator.GetDebugger()->SetInputStream(&input_stream);                 \
+  simulator.SetColouredTrace(Test::coloured_trace());                     \
+  simulator.SetCPUFeatures(CPUFeatures::None());                          \
+  simulator.SetDebuggerEnabled(true);                                     \
+  /* Setup a map so that commands and their output can be checked. */     \
+  std::unordered_map<std::string, std::string> command_map
+
+// Add a command to the input stream queue and map its expected output so that
+// it can be checked at the end of simulation.
+#define SETUP_CMD(cmd, expected_output)             \
+  {                                                 \
+    std::string cmd_str(cmd);                       \
+    cmd_str += "\n";                                \
+    std::string exp_out(expected_output);           \
+    input_stream.str(input_stream.str() + cmd_str); \
+    command_map.insert({cmd_str, exp_out});         \
+  }
+
+// Run the simulator.
+#define RUN()               \
+  simulator.RunFrom(start); \
+  fclose(output_stream)
+
+// Read the output file stream and check that the expected output from each
+// command is found directly following it.
+#define CHECK_OUTPUT()                                                       \
+  std::ifstream file_stream(ostream_filename);                               \
+  std::ostringstream ostream;                                                \
+  ostream << file_stream.rdbuf();                                            \
+  for (const auto& iter : command_map) {                                     \
+    std::string cmd = iter.first;                                            \
+    std::string expected = iter.second;                                      \
+    /* We assume the expected output follows the command that was issued. */ \
+    std::regex regex(cmd + expected);                                        \
+    if (!std::regex_search(ostream.str(), regex)) {                          \
+      printf("output = \n\"%s\"\n", ostream.str().c_str());                  \
+      /* Remove the newlines. */                                             \
+      cmd.erase(cmd.size() - 1, 1);                                          \
+      std::string err =                                                      \
+          cmd + " - failed: \"" + expected + "\" not found in output ";      \
+      VIXL_ABORT_WITH_MSG(err.c_str());                                      \
+    }                                                                        \
+  }                                                                          \
+  std::remove(ostream_filename)
+
+#define GET_INSTRUCTION_ADDRESS(instruction) \
+  GetInstructionAddress(ostream_filename, instruction)
+
+// Get the address of an instruction from the given filename.
+std::string GetInstructionAddress(std::string filename,
+                                  std::string instruction) {
+  std::ifstream file_stream(filename);
+  std::ostringstream ostream;
+  ostream << file_stream.rdbuf();
+
+  // Match the instruction string and capture the address of that instruction.
+  // Note: leading 0's are matched but not captured.
+  std::smatch sub_matches;
+  std::string str = ostream.str();
+  std::string regex_str = "(0x)0*([0-9a-f]+)  [0-9a-f]+\t\t";
+  regex_str += instruction;
+  std::regex regex(regex_str);
+  if (std::regex_search(str, sub_matches, regex) && sub_matches.size() == 3) {
+    return sub_matches[1].str() + sub_matches[2].str();
+  } else {
+    std::string err = regex_str + " not found in output ";
+    VIXL_ABORT_WITH_MSG(err.c_str());
+  }
+}
+
+#endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
+
+}  // namespace aarch64
+}  // namespace vixl
diff --git a/test/aarch64/test-disasm-aarch64.cc b/test/aarch64/test-disasm-aarch64.cc
index 553168c..40abef1 100644
--- a/test/aarch64/test-disasm-aarch64.cc
+++ b/test/aarch64/test-disasm-aarch64.cc
@@ -25,6 +25,8 @@
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
+#include "test-disasm-aarch64.h"
+
 #include <cstdio>
 #include <cstring>
 #include <string>
@@ -33,7 +35,6 @@
 
 #include "aarch64/disasm-aarch64.h"
 #include "aarch64/macro-assembler-aarch64.h"
-#include "test-disasm-aarch64.h"
 
 namespace vixl {
 namespace aarch64 {
@@ -238,11 +239,6 @@
   COMPARE(cmn(sp, Operand(24)), "cmn sp, #0x18 (24)");
   COMPARE(adds(wzr, wsp, Operand(9)), "cmn wsp, #0x9 (9)");
 
-  // Instructions in the add/sub immediate space, but unallocated due to shift
-  // value out of range.
-  COMPARE(dci(0x11800400), "unallocated (Unallocated)");
-  COMPARE(dci(0x11c00400), "unallocated (Unallocated)");
-
   CLEANUP();
 }
 
@@ -976,6 +972,69 @@
   CLEANUP();
 }
 
+TEST(mte_load_store) {
+  SETUP();
+
+  COMPARE(ldg(x0, MemOperand(sp)), "ldg x0, [sp]");
+  COMPARE(ldg(x22, MemOperand(x3, 32)), "ldg x22, [x3, #32]");
+  COMPARE(st2g(x3, MemOperand(sp)), "st2g x3, [sp]");
+  COMPARE(st2g(sp, MemOperand(sp, -320)), "st2g sp, [sp, #-320]");
+  COMPARE(st2g(x30, MemOperand(x4, 160, PreIndex)), "st2g x30, [x4, #160]!");
+  COMPARE(st2g(x22, MemOperand(x24, 1600, PostIndex)),
+          "st2g x22, [x24], #1600");
+  COMPARE(stg(x0, MemOperand(x1)), "stg x0, [x1]");
+  COMPARE(stg(x2, MemOperand(x3, 16)), "stg x2, [x3, #16]");
+  COMPARE(stg(sp, MemOperand(x1, -16, PreIndex)), "stg sp, [x1, #-16]!");
+  COMPARE(stg(x4, MemOperand(sp, -256, PostIndex)), "stg x4, [sp], #-256");
+  COMPARE(stgp(x29, x14, MemOperand(sp)), "stgp x29, x14, [sp]");
+  COMPARE(stgp(x3, x4, MemOperand(x3, -1024)), "stgp x3, x4, [x3, #-1024]");
+  COMPARE(stgp(x17, x7, MemOperand(x18, -64, PreIndex)),
+          "stgp x17, x7, [x18, #-64]!");
+  COMPARE(stgp(x5, x21, MemOperand(sp, 1008, PostIndex)),
+          "stgp x5, x21, [sp], #1008");
+  COMPARE(stzg(x20, MemOperand(x1)), "stzg x20, [x1]");
+  COMPARE(stzg(x22, MemOperand(x3, -32)), "stzg x22, [x3, #-32]");
+  COMPARE(stzg(sp, MemOperand(x1, 32, PreIndex)), "stzg sp, [x1, #32]!");
+  COMPARE(stzg(x5, MemOperand(sp, -2560, PostIndex)), "stzg x5, [sp], #-2560");
+  COMPARE(stz2g(x6, MemOperand(x21)), "stz2g x6, [x21]");
+  COMPARE(stz2g(x18, MemOperand(x13, 16)), "stz2g x18, [x13, #16]");
+  COMPARE(stz2g(sp, MemOperand(sp, -384, PreIndex)), "stz2g sp, [sp, #-384]!");
+  COMPARE(stz2g(sp, MemOperand(x7, -256, PostIndex)), "stz2g sp, [x7], #-256");
+
+  COMPARE_MACRO(Ldg(x10, MemOperand(x17)), "ldg x10, [x17]");
+  COMPARE_MACRO(Ldg(x15, MemOperand(sp, -4096)), "ldg x15, [sp, #-4096]");
+  COMPARE_MACRO(St2g(x3, MemOperand(x15)), "st2g x3, [x15]");
+  COMPARE_MACRO(St2g(sp, MemOperand(sp, -1600)), "st2g sp, [sp, #-1600]");
+  COMPARE_MACRO(St2g(x30, MemOperand(x4, 3216, PreIndex)),
+                "st2g x30, [x4, #3216]!");
+  COMPARE_MACRO(St2g(x18, MemOperand(x2, 352, PostIndex)),
+                "st2g x18, [x2], #352");
+  COMPARE_MACRO(Stg(x10, MemOperand(x17)), "stg x10, [x17]");
+  COMPARE_MACRO(Stg(sp, MemOperand(x3, -4096)), "stg sp, [x3, #-4096]");
+  COMPARE_MACRO(Stg(sp, MemOperand(sp, 4080, PreIndex)),
+                "stg sp, [sp, #4080]!");
+  COMPARE_MACRO(Stg(x14, MemOperand(x20, 1024, PostIndex)),
+                "stg x14, [x20], #1024");
+  COMPARE_MACRO(Stgp(x3, x5, MemOperand(x7)), "stgp x3, x5, [x7]");
+  COMPARE_MACRO(Stgp(x8, x4, MemOperand(x2, -496)), "stgp x8, x4, [x2, #-496]");
+  COMPARE_MACRO(Stgp(x27, x7, MemOperand(sp, -672, PreIndex)),
+                "stgp x27, x7, [sp, #-672]!");
+  COMPARE_MACRO(Stgp(x14, x16, MemOperand(x0, 576, PostIndex)),
+                "stgp x14, x16, [x0], #576");
+  COMPARE_MACRO(Stz2g(x22, MemOperand(x12)), "stz2g x22, [x12]");
+  COMPARE_MACRO(Stz2g(sp, MemOperand(sp, -704)), "stz2g sp, [sp, #-704]");
+  COMPARE_MACRO(Stz2g(x3, MemOperand(x4, 272, PreIndex)),
+                "stz2g x3, [x4, #272]!");
+  COMPARE_MACRO(Stz2g(sp, MemOperand(sp, 1024, PostIndex)),
+                "stz2g sp, [sp], #1024");
+  COMPARE_MACRO(Stzg(x17, MemOperand(x11)), "stzg x17, [x11]");
+  COMPARE_MACRO(Stzg(x29, MemOperand(x3, -3552)), "stzg x29, [x3, #-3552]");
+  COMPARE_MACRO(Stzg(x2, MemOperand(x15, 784, PreIndex)),
+                "stzg x2, [x15, #784]!");
+  COMPARE_MACRO(Stzg(x8, MemOperand(x3, 1488, PostIndex)),
+                "stzg x8, [x3], #1488");
+  CLEANUP();
+}
 
 TEST(load_store) {
   SETUP();
@@ -1793,8 +1852,8 @@
 TEST(atomic_memory) {
   SETUP();
 
-// These macros generate tests for all the variations of the atomic memory
-// operations, e.g. ldadd, ldadda, ldaddb, staddl, etc.
+  // These macros generate tests for all the variations of the atomic memory
+  // operations, e.g. ldadd, ldadda, ldaddb, staddl, etc.
 
 #define AM_LOAD_X_TESTS(N, MN)                                     \
   COMPARE(ld##N(x0, x1, MemOperand(x2)), "ld" MN " x0, x1, [x2]"); \
@@ -1974,23 +2033,24 @@
   SETUP();
 
   // Test every encodable prefetch operation.
-  const char* expected[] = {
-      "prfm pldl1keep, ", "prfm pldl1strm, ", "prfm pldl2keep, ",
-      "prfm pldl2strm, ", "prfm pldl3keep, ", "prfm pldl3strm, ",
-      "prfm #0b00110, ",  "prfm #0b00111, ",  "prfm plil1keep, ",
-      "prfm plil1strm, ", "prfm plil2keep, ", "prfm plil2strm, ",
-      "prfm plil3keep, ", "prfm plil3strm, ", "prfm #0b01110, ",
-      "prfm #0b01111, ",  "prfm pstl1keep, ", "prfm pstl1strm, ",
-      "prfm pstl2keep, ", "prfm pstl2strm, ", "prfm pstl3keep, ",
-      "prfm pstl3strm, ", "prfm #0b10110, ",  "prfm #0b10111, ",
-      "prfm #0b11000, ",  "prfm #0b11001, ",  "prfm #0b11010, ",
-      "prfm #0b11011, ",  "prfm #0b11100, ",  "prfm #0b11101, ",
-      "prfm #0b11110, ",  "prfm #0b11111, ",
-  };
-  const int expected_count = sizeof(expected) / sizeof(expected[0]);
-  VIXL_STATIC_ASSERT((1 << ImmPrefetchOperation_width) == expected_count);
+  const char* expected[] = {"prfm pldl1keep, ", "prfm pldl1strm, ",
+                            "prfm pldl2keep, ", "prfm pldl2strm, ",
+                            "prfm pldl3keep, ", "prfm pldl3strm, ",
+                            "prfm #0b00110, ",  "prfm #0b00111, ",
+                            "prfm plil1keep, ", "prfm plil1strm, ",
+                            "prfm plil2keep, ", "prfm plil2strm, ",
+                            "prfm plil3keep, ", "prfm plil3strm, ",
+                            "prfm #0b01110, ",  "prfm #0b01111, ",
+                            "prfm pstl1keep, ", "prfm pstl1strm, ",
+                            "prfm pstl2keep, ", "prfm pstl2strm, ",
+                            "prfm pstl3keep, ", "prfm pstl3strm, ",
+                            "prfm #0b10110, ",  "prfm #0b10111, "};
 
   for (int op = 0; op < (1 << ImmPrefetchOperation_width); op++) {
+    // Prefetch operations of the form 0b11xxx are allocated to another
+    // instruction.
+    if (op >= 0b11000) continue;
+
     COMPARE_PREFIX(prfm(op, INT64_C(0)), expected[op]);
     COMPARE_PREFIX(prfm(op, MemOperand(x0, 0)), expected[op]);
     COMPARE_PREFIX(prfm(op, MemOperand(x0, x1)), expected[op]);
@@ -2551,6 +2611,7 @@
   COMPARE(mrs(x15, FPCR), "mrs x15, fpcr");
   COMPARE(mrs(x20, RNDR), "mrs x20, rndr");
   COMPARE(mrs(x5, RNDRRS), "mrs x5, rndrrs");
+  COMPARE(mrs(x9, DCZID_EL0), "mrs x9, dczid_el0");
 
   // Test mrs that use system registers we haven't named.
   COMPARE(dci(MRS | (0x5555 << 5)), "mrs x0, S3_2_c10_c10_5");
@@ -2631,6 +2692,20 @@
   CLEANUP();
 }
 
+TEST(system_dc_mte) {
+  SETUP();
+
+  COMPARE(dc(GVA, x0), "dc gva, x0");
+  COMPARE(dc(GZVA, x1), "dc gzva, x1");
+  COMPARE(dc(CGVAC, x2), "dc cgvac, x2");
+  COMPARE(dc(CGDVAC, x3), "dc cgdvac, x3");
+  COMPARE(dc(CGVAP, x3), "dc cgvap, x3");
+  COMPARE(dc(CGDVAP, x3), "dc cgdvap, x3");
+  COMPARE(dc(CIGVAC, x4), "dc cigvac, x4");
+  COMPARE(dc(CIGDVAC, x4), "dc cigdvac, x4");
+
+  CLEANUP();
+}
 
 TEST(system_nop) {
   SETUP();
@@ -3099,18 +3174,218 @@
   CLEANUP();
 }
 
+TEST(mte) {
+  SETUP();
+
+#if 0
+  COMPARE(ldg(x2, x2, int imm9), "ldg <Xt>, [<Xn|SP>{, #<simm>}]");
+  COMPARE(st2g(x3, int imm9), "st2g <Xt|SP>, [<Xn|SP>{, #<simm>}]");
+  COMPARE(st2g(x31, int imm9), "st2g <Xt|SP>, [<Xn|SP>], #<simm>");
+  COMPARE(st2g(x30, int imm9), "st2g <Xt|SP>, [<Xn|SP>, #<simm>]!");
+  COMPARE(stgp(x301, x302, x30, int imm7), "stgp <Xt1>, <Xt2>, [<Xn|SP>{, #<imm>}]");
+  COMPARE(stgp(x201, x202, x20, int imm7), "stgp <Xt1>, <Xt2>, [<Xn|SP>], #<imm>");
+  COMPARE(stgp(x161, x162, x16, int imm7), "stgp <Xt1>, <Xt2>, [<Xn|SP>, #<imm>]!");
+  COMPARE(stg(x9, int imm9), "stg <Xt|SP>, [<Xn|SP>{, #<simm>}]");
+  COMPARE(stg(x20, int imm9), "stg <Xt|SP>, [<Xn|SP>], #<simm>");
+  COMPARE(stg(x29, int imm9), "stg <Xt|SP>, [<Xn|SP>, #<simm>]!");
+  COMPARE(stz2g(x9, int imm9), "stz2g <Xt|SP>, [<Xn|SP>{, #<simm>}]");
+  COMPARE(stz2g(x28, int imm9), "stz2g <Xt|SP>, [<Xn|SP>], #<simm>");
+  COMPARE(stz2g(x7, int imm9), "stz2g <Xt|SP>, [<Xn|SP>, #<simm>]!");
+  COMPARE(stzg(x20, int imm9), "stzg <Xt|SP>, [<Xn|SP>{, #<simm>}]");
+  COMPARE(stzg(x6, int imm9), "stzg <Xt|SP>, [<Xn|SP>], #<simm>");
+  COMPARE(stzg(x28, int imm9), "stzg <Xt|SP>, [<Xn|SP>, #<simm>]!");
+#endif
+
+  CLEANUP();
+}
+
+TEST(mte_dp) {
+  SETUP();
+
+  COMPARE(addg(x26, x27, 0, 0), "addg x26, x27, #0, #0");
+  COMPARE(addg(x26, x27, 512, 2), "addg x26, x27, #512, #2");
+  COMPARE(addg(x26, x27, 1008, 15), "addg x26, x27, #1008, #15");
+  COMPARE(addg(sp, x27, 1008, 15), "addg sp, x27, #1008, #15");
+  COMPARE(addg(x26, sp, 1008, 15), "addg x26, sp, #1008, #15");
+  COMPARE(addg(sp, sp, 1008, 15), "addg sp, sp, #1008, #15");
+  COMPARE(subg(x6, x7, 0, 0), "subg x6, x7, #0, #0");
+  COMPARE(subg(x6, x7, 640, 9), "subg x6, x7, #640, #9");
+  COMPARE(subg(x6, x7, 1008, 15), "subg x6, x7, #1008, #15");
+  COMPARE(subg(sp, x7, 1008, 15), "subg sp, x7, #1008, #15");
+  COMPARE(subg(x6, sp, 1008, 15), "subg x6, sp, #1008, #15");
+  COMPARE(subg(sp, sp, 1008, 15), "subg sp, sp, #1008, #15");
+  COMPARE(gmi(x3, x5, x4), "gmi x3, x5, x4");
+  COMPARE(gmi(x3, sp, x4), "gmi x3, sp, x4");
+  COMPARE(gmi(xzr, sp, xzr), "gmi xzr, sp, xzr");
+  COMPARE(irg(x24, x23, x22), "irg x24, x23, x22");
+  COMPARE(irg(sp, x23, x22), "irg sp, x23, x22");
+  COMPARE(irg(x24, sp, x22), "irg x24, sp, x22");
+  COMPARE(irg(sp, sp, x22), "irg sp, sp, x22");
+  COMPARE(irg(x24, x23, xzr), "irg x24, x23");
+  COMPARE(irg(x24, x23), "irg x24, x23");
+  COMPARE(subp(x28, x29, x30), "subp x28, x29, x30");
+  COMPARE(subp(x28, sp, x30), "subp x28, sp, x30");
+  COMPARE(subp(x28, x29, sp), "subp x28, x29, sp");
+  COMPARE(subp(x28, sp, sp), "subp x28, sp, sp");
+  COMPARE(subp(xzr, sp, sp), "subp xzr, sp, sp");
+  COMPARE(subps(x2, x10, x0), "subps x2, x10, x0");
+  COMPARE(subps(x2, sp, x0), "subps x2, sp, x0");
+  COMPARE(subps(x2, x10, sp), "subps x2, x10, sp");
+  COMPARE(subps(x2, sp, sp), "subps x2, sp, sp");
+  COMPARE(subps(xzr, sp, sp), "cmpp sp, sp");
+  COMPARE(subps(xzr, x2, sp), "cmpp x2, sp");
+  COMPARE(cmpp(x6, x30), "cmpp x6, x30");
+
+  CLEANUP();
+}
+
+TEST(mops) {
+  SETUP();
+
+  COMPARE_MACRO(Cpyen(x4, x5, x6), "cpyen [x4]!, [x5]!, x6!");
+  COMPARE_MACRO(Cpyern(x7, x6, x1), "cpyern [x7]!, [x6]!, x1!");
+  COMPARE_MACRO(Cpyewn(x26, x27, x28), "cpyewn [x26]!, [x27]!, x28!");
+  COMPARE_MACRO(Cpye(x14, x15, x19), "cpye [x14]!, [x15]!, x19!");
+  COMPARE_MACRO(Cpyfen(x13, x1, x9), "cpyfen [x13]!, [x1]!, x9!");
+  COMPARE_MACRO(Cpyfern(x24, x2, x10), "cpyfern [x24]!, [x2]!, x10!");
+  COMPARE_MACRO(Cpyfewn(x12, x2, x11), "cpyfewn [x12]!, [x2]!, x11!");
+  COMPARE_MACRO(Cpyfe(x3, x9, x12), "cpyfe [x3]!, [x9]!, x12!");
+  COMPARE_MACRO(Cpyfmn(x7, x27, x13), "cpyfmn [x7]!, [x27]!, x13!");
+  COMPARE_MACRO(Cpyfmrn(x19, x9, x14), "cpyfmrn [x19]!, [x9]!, x14!");
+  COMPARE_MACRO(Cpyfmwn(x1, x11, x15), "cpyfmwn [x1]!, [x11]!, x15!");
+  COMPARE_MACRO(Cpyfm(x1, x13, x16), "cpyfm [x1]!, [x13]!, x16!");
+  COMPARE_MACRO(Cpyfpn(x3, x13, x17), "cpyfpn [x3]!, [x13]!, x17!");
+  COMPARE_MACRO(Cpyfprn(x18, x8, x17), "cpyfprn [x18]!, [x8]!, x17!");
+  COMPARE_MACRO(Cpyfpwn(x9, x29, x18), "cpyfpwn [x9]!, [x29]!, x18!");
+  COMPARE_MACRO(Cpyfp(x4, x3, x19), "cpyfp [x4]!, [x3]!, x19!");
+  COMPARE_MACRO(Cpymn(x5, x15, x20), "cpymn [x5]!, [x15]!, x20!");
+  COMPARE_MACRO(Cpymrn(x12, x22, x21), "cpymrn [x12]!, [x22]!, x21!");
+  COMPARE_MACRO(Cpymwn(x12, x1, x22), "cpymwn [x12]!, [x1]!, x22!");
+  COMPARE_MACRO(Cpym(x1, x10, x23), "cpym [x1]!, [x10]!, x23!");
+  COMPARE_MACRO(Cpypn(x3, x26, x25), "cpypn [x3]!, [x26]!, x25!");
+  COMPARE_MACRO(Cpyprn(x4, x14, x24), "cpyprn [x4]!, [x14]!, x24!");
+  COMPARE_MACRO(Cpypwn(x9, x29, x26), "cpypwn [x9]!, [x29]!, x26!");
+  COMPARE_MACRO(Cpyp(x0, x30, x28), "cpyp [x0]!, [x30]!, x28!");
+
+  COMPARE_MACRO(Seten(x6, x26, x27), "seten [x6]!, x26!, x27");
+  COMPARE_MACRO(Sete(x3, x23, x1), "sete [x3]!, x23!, x1");
+  COMPARE_MACRO(Setgen(x6, x16, x2), "setgen [x6]!, x16!, x2");
+  COMPARE_MACRO(Setge(x4, x24, x3), "setge [x4]!, x24!, x3");
+  COMPARE_MACRO(Setgmn(x9, x29, x4), "setgmn [x9]!, x29!, x4");
+  COMPARE_MACRO(Setgm(x30, x3, x5), "setgm [x30]!, x3!, x5");
+  COMPARE_MACRO(Setgpn(x11, x1, x6), "setgpn [x11]!, x1!, x6");
+  COMPARE_MACRO(Setgp(x1, x16, x7), "setgp [x1]!, x16!, x7");
+  COMPARE_MACRO(Setmn(x4, x14, x8), "setmn [x4]!, x14!, x8");
+  COMPARE_MACRO(Setm(x8, x7, x9), "setm [x8]!, x7!, x9");
+  COMPARE_MACRO(Setpn(x2, x22, x10), "setpn [x2]!, x22!, x10");
+  COMPARE_MACRO(Setp(x7, x17, x11), "setp [x7]!, x17!, x11");
+
+  // Check unallocated bit patterns.
+  COMPARE_PREFIX(dci(0x1d000422), "cpyp [x2]!, [x0]!, x1!");
+  COMPARE_PREFIX(dci(0xdd000422), "unallocated");  // sz != 0
+  COMPARE_PREFIX(dci(0x1d000442), "unallocated");  // Xd == Xn
+  COMPARE_PREFIX(dci(0x1d020422), "unallocated");  // Xd == Xs
+  COMPARE_PREFIX(dci(0x1d000402), "unallocated");  // Xn == Xs
+  COMPARE_PREFIX(dci(0x1d00043f), "unallocated");  // Xd == 31
+  COMPARE_PREFIX(dci(0x1d0007e2), "unallocated");  // Xn == 31
+  COMPARE_PREFIX(dci(0x1d1f0422), "unallocated");  // Xs == 31
+
+  COMPARE_PREFIX(dci(0x19c02424), "setpn [x4]!, x1!, x0");
+  COMPARE_PREFIX(dci(0xd9c02424), "unallocated");  // sz != 0
+  COMPARE_PREFIX(dci(0x19c0e424), "unallocated");  // op2 == 0xe
+  COMPARE_PREFIX(dci(0x19c02400), "unallocated");  // Xd == Xn
+  COMPARE_PREFIX(dci(0x19c02420), "unallocated");  // Xd == Xs
+  COMPARE_PREFIX(dci(0x19c02404), "unallocated");  // Xn == Xs
+  COMPARE_PREFIX(dci(0x19c0243f), "unallocated");  // Xd == 31
+  COMPARE_PREFIX(dci(0x19c027e4), "unallocated");  // Xn == 31
+
+  CLEANUP();
+}
+
+TEST(cssc) {
+  SETUP();
+
+  COMPARE_MACRO(Abs(w0, w22), "abs w0, w22");
+  COMPARE_MACRO(Abs(x0, x23), "abs x0, x23");
+  COMPARE_MACRO(Abs(wzr, wzr), "abs wzr, wzr");
+  COMPARE_MACRO(Cnt(w21, w30), "cnt w21, w30");
+  COMPARE_MACRO(Cnt(x19, x9), "cnt x19, x9");
+  COMPARE_MACRO(Cnt(xzr, x30), "cnt xzr, x30");
+  COMPARE_MACRO(Ctz(w3, w5), "ctz w3, w5");
+  COMPARE_MACRO(Ctz(x3, x28), "ctz x3, x28");
+  COMPARE_MACRO(Ctz(w0, wzr), "ctz w0, wzr");
+
+  COMPARE_MACRO(Smax(w5, w9, w10), "smax w5, w9, w10");
+  COMPARE_MACRO(Smax(x6, x8, x9), "smax x6, x8, x9");
+  COMPARE_MACRO(Smin(w11, w8, w17), "smin w11, w8, w17");
+  COMPARE_MACRO(Smin(x12, x10, x20), "smin x12, x10, x20");
+  COMPARE_MACRO(Umax(w5, w9, w10), "umax w5, w9, w10");
+  COMPARE_MACRO(Umax(x6, x8, x9), "umax x6, x8, x9");
+  COMPARE_MACRO(Umin(w11, w8, w17), "umin w11, w8, w17");
+  COMPARE_MACRO(Umin(x12, x10, x20), "umin x12, x10, x20");
+
+  COMPARE_MACRO(Smax(w5, w9, 127), "smax w5, w9, #127");
+  COMPARE_MACRO(Smax(x6, x8, -128), "smax x6, x8, #-128");
+  COMPARE_MACRO(Smin(w19, w20, -1), "smin w19, w20, #-1");
+  COMPARE_MACRO(Smin(x30, xzr, 0), "smin x30, xzr, #0");
+  COMPARE_MACRO(Umax(w5, w9, 255), "umax w5, w9, #255");
+  COMPARE_MACRO(Umax(x6, x8, 128), "umax x6, x8, #128");
+  COMPARE_MACRO(Umin(wzr, w20, 1), "umin wzr, w20, #1");
+  COMPARE_MACRO(Umin(x30, xzr, 0), "umin x30, xzr, #0");
+
+  COMPARE_MACRO(Smax(w5, w6, 128),
+                "mov w16, #0x80\n"
+                "smax w5, w6, w16");
+  COMPARE_MACRO(Smax(x10, x11, -129),
+                "mov x16, #0xffffffffffffff7f\n"
+                "smax x10, x11, x16");
+  COMPARE_MACRO(Smin(w5, w6, 128),
+                "mov w16, #0x80\n"
+                "smin w5, w6, w16");
+  COMPARE_MACRO(Smin(x10, x11, -129),
+                "mov x16, #0xffffffffffffff7f\n"
+                "smin x10, x11, x16");
+  COMPARE_MACRO(Umax(w5, w6, 256),
+                "mov w16, #0x100\n"
+                "umax w5, w6, w16");
+  COMPARE_MACRO(Umax(x10, x11, 0x4242),
+                "mov x16, #0x4242\n"
+                "umax x10, x11, x16");
+  COMPARE_MACRO(Umin(w5, w6, 256),
+                "mov w16, #0x100\n"
+                "umin w5, w6, w16");
+  COMPARE_MACRO(Umin(x10, x11, 0x4242),
+                "mov x16, #0x4242\n"
+                "umin x10, x11, x16");
+  CLEANUP();
+}
+
+TEST(gcs) {
+  SETUP();
+
+  COMPARE_MACRO(Chkfeat(x16), "chkfeat x16");
+  COMPARE_MACRO(Gcspopm(x0), "gcspopm x0");
+  COMPARE_MACRO(Gcspopm(), "gcspopm");
+  COMPARE_MACRO(Gcspopm(xzr), "gcspopm");
+  COMPARE_MACRO(Gcsss1(x4), "gcsss1 x4");
+  COMPARE_MACRO(Gcsss2(x2), "gcsss2 x2");
+  COMPARE_MACRO(Gcspushm(x1), "gcspushm x1");
+
+  CLEANUP();
+}
+
 TEST(architecture_features) {
   SETUP();
 
   // ARMv8.1 - LOR
-  COMPARE_PREFIX(dci(0x08800000), "stllrb");  // STLLRB_SL32_ldstexcl
-  COMPARE_PREFIX(dci(0x08c00000), "ldlarb");  // LDLARB_LR32_ldstexcl
-  COMPARE_PREFIX(dci(0x48800000), "stllrh");  // STLLRH_SL32_ldstexcl
-  COMPARE_PREFIX(dci(0x48c00000), "ldlarh");  // LDLARH_LR32_ldstexcl
-  COMPARE_PREFIX(dci(0x88800000), "stllr");   // STLLR_SL32_ldstexcl
-  COMPARE_PREFIX(dci(0x88c00000), "ldlar");   // LDLAR_LR32_ldstexcl
-  COMPARE_PREFIX(dci(0xc8800000), "stllr");   // STLLR_SL64_ldstexcl
-  COMPARE_PREFIX(dci(0xc8c00000), "ldlar");   // LDLAR_LR64_ldstexcl
+  COMPARE_PREFIX(dci(0x089f7c00), "stllrb");  // STLLRB_SL32_ldstexcl
+  COMPARE_PREFIX(dci(0x08df7c00), "ldlarb");  // LDLARB_LR32_ldstexcl
+  COMPARE_PREFIX(dci(0x489f7c00), "stllrh");  // STLLRH_SL32_ldstexcl
+  COMPARE_PREFIX(dci(0x48df7c00), "ldlarh");  // LDLARH_LR32_ldstexcl
+  COMPARE_PREFIX(dci(0x889f7c00), "stllr");   // STLLR_SL32_ldstexcl
+  COMPARE_PREFIX(dci(0x88df7c00), "ldlar");   // LDLAR_LR32_ldstexcl
+  COMPARE_PREFIX(dci(0xc89f7c00), "stllr");   // STLLR_SL64_ldstexcl
+  COMPARE_PREFIX(dci(0xc8df7c00), "ldlar");   // LDLAR_LR64_ldstexcl
 
   // ARMv8.1 - LSE
   COMPARE_PREFIX(dci(0x08207c00), "casp");       // CASP_CP32_ldstexcl
@@ -3283,19 +3558,19 @@
   COMPARE_PREFIX(dci(0xf8e08000), "swpal");      // SWPAL_64_memop
 
   // ARMv8.1 - RDM
-  COMPARE_PREFIX(dci(0x2e008400), "sqrdmlah");  // SQRDMLAH_asimdsame2_only
-  COMPARE_PREFIX(dci(0x2e008c00), "sqrdmlsh");  // SQRDMLSH_asimdsame2_only
+  COMPARE_PREFIX(dci(0x2e808400), "sqrdmlah");  // SQRDMLAH_asimdsame2_only
+  COMPARE_PREFIX(dci(0x2e808c00), "sqrdmlsh");  // SQRDMLSH_asimdsame2_only
   COMPARE_PREFIX(dci(0x2f40d000), "sqrdmlah");  // SQRDMLAH_asimdelem_R
   COMPARE_PREFIX(dci(0x2f40f000), "sqrdmlsh");  // SQRDMLSH_asimdelem_R
-  COMPARE_PREFIX(dci(0x7e008400), "sqrdmlah");  // SQRDMLAH_asisdsame2_only
-  COMPARE_PREFIX(dci(0x7e008c00), "sqrdmlsh");  // SQRDMLSH_asisdsame2_only
+  COMPARE_PREFIX(dci(0x7e408400), "sqrdmlah");  // SQRDMLAH_asisdsame2_only
+  COMPARE_PREFIX(dci(0x7e408c00), "sqrdmlsh");  // SQRDMLSH_asisdsame2_only
   COMPARE_PREFIX(dci(0x7f40d000), "sqrdmlah");  // SQRDMLAH_asisdelem_R
   COMPARE_PREFIX(dci(0x7f40f000), "sqrdmlsh");  // SQRDMLSH_asisdelem_R
 
   // ARMv8.2 - DotProd
-  COMPARE_PREFIX(dci(0x0e009400), "sdot");  // SDOT_asimdsame2_D
+  COMPARE_PREFIX(dci(0x0e809400), "sdot");  // SDOT_asimdsame2_D
   COMPARE_PREFIX(dci(0x0f00e000), "sdot");  // SDOT_asimdelem_D
-  COMPARE_PREFIX(dci(0x2e009400), "udot");  // UDOT_asimdsame2_D
+  COMPARE_PREFIX(dci(0x2e809400), "udot");  // UDOT_asimdsame2_D
   COMPARE_PREFIX(dci(0x2f00e000), "udot");  // UDOT_asimdelem_D
 
   // ARMv8.2 - FHM
@@ -3515,42 +3790,39 @@
   COMPARE_PREFIX(dci(0xd503221f), "esb");  // ESB_HI_hints
 
   // ARMv8.2 - SHA3
-  // COMPARE_PREFIX(dci(0xce000000), "eor3");   // EOR3_VVV16_crypto4
-  // COMPARE_PREFIX(dci(0xce200000), "bcax");   // BCAX_VVV16_crypto4
-  // COMPARE_PREFIX(dci(0xce608c00), "rax1");   // RAX1_VVV2_cryptosha512_3
-  // COMPARE_PREFIX(dci(0xce800000), "xar");   // XAR_VVV2_crypto3_imm6
+  COMPARE_PREFIX(dci(0xce000000), "eor3");  // EOR3_VVV16_crypto4
+  COMPARE_PREFIX(dci(0xce200000), "bcax");  // BCAX_VVV16_crypto4
+  COMPARE_PREFIX(dci(0xce608c00), "rax1");  // RAX1_VVV2_cryptosha512_3
+  COMPARE_PREFIX(dci(0xce800000), "xar");   // XAR_VVV2_crypto3_imm6
 
   // ARMv8.2 - SHA512
-  // COMPARE_PREFIX(dci(0xce608000), "sha512h");   // SHA512H_QQV_cryptosha512_3
-  // COMPARE_PREFIX(dci(0xce608400), "sha512h2");   //
-  // SHA512H2_QQV_cryptosha512_3
-  // COMPARE_PREFIX(dci(0xce608800), "sha512su1");   //
-  // SHA512SU1_VVV2_cryptosha512_3
-  // COMPARE_PREFIX(dci(0xcec08000), "sha512su0");   //
-  // SHA512SU0_VV2_cryptosha512_2
+  COMPARE_PREFIX(dci(0xce608000), "sha512h");   // SHA512H_QQV_cryptosha512_3
+  COMPARE_PREFIX(dci(0xce608400), "sha512h2");  // SHA512H2_QQV_cryptosha512_3
+  COMPARE_PREFIX(dci(0xce608800),
+                 "sha512su1");  // SHA512SU1_VVV2_cryptosha512_3
+  COMPARE_PREFIX(dci(0xcec08000), "sha512su0");  // SHA512SU0_VV2_cryptosha512_2
 
   // ARMv8.2 - SM3
-  // COMPARE_PREFIX(dci(0xce400000), "sm3ss1");   // SM3SS1_VVV4_crypto4
-  // COMPARE_PREFIX(dci(0xce408000), "sm3tt1a");   // SM3TT1A_VVV4_crypto3_imm2
-  // COMPARE_PREFIX(dci(0xce408400), "sm3tt1b");   // SM3TT1B_VVV4_crypto3_imm2
-  // COMPARE_PREFIX(dci(0xce408800), "sm3tt2a");   // SM3TT2A_VVV4_crypto3_imm2
-  // COMPARE_PREFIX(dci(0xce408c00), "sm3tt2b");   // SM3TT2B_VVV_crypto3_imm2
-  // COMPARE_PREFIX(dci(0xce60c000), "sm3partw1");   //
-  // SM3PARTW1_VVV4_cryptosha512_3
-  // COMPARE_PREFIX(dci(0xce60c400), "sm3partw2");   //
-  // SM3PARTW2_VVV4_cryptosha512_3
+  COMPARE_PREFIX(dci(0xce400000), "sm3ss1");   // SM3SS1_VVV4_crypto4
+  COMPARE_PREFIX(dci(0xce408000), "sm3tt1a");  // SM3TT1A_VVV4_crypto3_imm2
+  COMPARE_PREFIX(dci(0xce408400), "sm3tt1b");  // SM3TT1B_VVV4_crypto3_imm2
+  COMPARE_PREFIX(dci(0xce408800), "sm3tt2a");  // SM3TT2A_VVV4_crypto3_imm2
+  COMPARE_PREFIX(dci(0xce408c00), "sm3tt2b");  // SM3TT2B_VVV_crypto3_imm2
+  COMPARE_PREFIX(dci(0xce60c000),
+                 "sm3partw1");  // SM3PARTW1_VVV4_cryptosha512_3
+  COMPARE_PREFIX(dci(0xce60c400),
+                 "sm3partw2");  // SM3PARTW2_VVV4_cryptosha512_3
 
   // ARMv8.2 - SM4
-  // COMPARE_PREFIX(dci(0xce60c800), "sm4ekey");   //
-  // SM4EKEY_VVV4_cryptosha512_3
-  // COMPARE_PREFIX(dci(0xcec08400), "sm4e");   // SM4E_VV4_cryptosha512_2
+  COMPARE_PREFIX(dci(0xce60c800), "sm4ekey");  // SM4EKEY_VVV4_cryptosha512_3
+  COMPARE_PREFIX(dci(0xcec08400), "sm4e");     // SM4E_VV4_cryptosha512_2
 
   // ARMv8.2 - SPE
   // COMPARE_PREFIX(dci(0xd503223f), "psb");   // PSB_HC_hints
 
   // ARMv8.3 - FCMA
   COMPARE_PREFIX(dci(0x2e40c400), "fcmla");  // FCMLA_asimdsame2_C
-  COMPARE_PREFIX(dci(0x2e00e400), "fcadd");  // FCADD_asimdsame2_C
+  COMPARE_PREFIX(dci(0x2e40e400), "fcadd");  // FCADD_asimdsame2_C
   COMPARE_PREFIX(dci(0x2f401000), "fcmla");  // FCMLA_asimdelem_C_H
   COMPARE_PREFIX(dci(0x6f801000), "fcmla");  // FCMLA_asimdelem_C_S
 
@@ -3558,10 +3830,10 @@
   COMPARE_PREFIX(dci(0x1e7e0000), "fjcvtzs");  // FJCVTZS_32D_float2int
 
   // ARMv8.3 - LRCPC
-  COMPARE_PREFIX(dci(0x38a0c000), "ldaprb");  // LDAPRB_32L_memop
-  COMPARE_PREFIX(dci(0x78a0c000), "ldaprh");  // LDAPRH_32L_memop
-  COMPARE_PREFIX(dci(0xb8a0c000), "ldapr");   // LDAPR_32L_memop
-  COMPARE_PREFIX(dci(0xf8a0c000), "ldapr");   // LDAPR_64L_memop
+  COMPARE_PREFIX(dci(0x38bfc000), "ldaprb");  // LDAPRB_32L_memop
+  COMPARE_PREFIX(dci(0x78bfc000), "ldaprh");  // LDAPRH_32L_memop
+  COMPARE_PREFIX(dci(0xb8bfc000), "ldapr");   // LDAPR_32L_memop
+  COMPARE_PREFIX(dci(0xf8bfc000), "ldapr");   // LDAPR_64L_memop
 
   // ARMv8.3 - PAuth
   COMPARE_PREFIX(dci(0x9ac03000), "pacga");      // PACGA_64P_dp_2src
diff --git a/test/aarch64/test-disasm-neon-aarch64.cc b/test/aarch64/test-disasm-neon-aarch64.cc
index 17957f5..f50e5a6 100644
--- a/test/aarch64/test-disasm-neon-aarch64.cc
+++ b/test/aarch64/test-disasm-neon-aarch64.cc
@@ -1792,6 +1792,34 @@
   COMPARE_MACRO(Pmul(v6.V16B(), v7.V16B(), v8.V16B()),
                 "pmul v6.16b, v7.16b, v8.16b");
 
+  // Check unallocated vector types for SDOT.
+  COMPARE(dci(0x0e009400), "unallocated (Unallocated)");  // 8B
+  COMPARE(dci(0x4e009400), "unallocated (Unallocated)");  // 16B
+  COMPARE(dci(0x0e409400), "unallocated (Unallocated)");  // 4H
+  COMPARE(dci(0x4e409400), "unallocated (Unallocated)");  // 8H
+  COMPARE(dci(0x0ec09400), "unallocated (Unallocated)");  // 1D
+  COMPARE(dci(0x4ec09400), "unallocated (Unallocated)");  // 2D
+
+  // Check unallocated vector types for UDOT.
+  COMPARE(dci(0x2e009400), "unallocated (Unallocated)");  // 8B
+  COMPARE(dci(0x6e009400), "unallocated (Unallocated)");  // 16B
+  COMPARE(dci(0x2e409400), "unallocated (Unallocated)");  // 4H
+  COMPARE(dci(0x6e409400), "unallocated (Unallocated)");  // 8H
+  COMPARE(dci(0x2ec09400), "unallocated (Unallocated)");  // 1D
+  COMPARE(dci(0x6ec09400), "unallocated (Unallocated)");  // 2D
+
+  // Check unallocated vector types for SQRDMLAH.
+  COMPARE(dci(0x2e008400), "unallocated (Unallocated)");  // 8B
+  COMPARE(dci(0x6e008400), "unallocated (Unallocated)");  // 16B
+  COMPARE(dci(0x2ec08400), "unallocated (Unallocated)");  // 1D
+  COMPARE(dci(0x6ec08400), "unallocated (Unallocated)");  // 2D
+
+  // Check unallocated vector types for SQRDMLSH.
+  COMPARE(dci(0x2e008c00), "unallocated (Unallocated)");  // 8B
+  COMPARE(dci(0x6e008c00), "unallocated (Unallocated)");  // 16B
+  COMPARE(dci(0x2ec08c00), "unallocated (Unallocated)");  // 1D
+  COMPARE(dci(0x6ec08c00), "unallocated (Unallocated)");  // 2D
+
   CLEANUP();
 }
 
@@ -1924,6 +1952,16 @@
   COMPARE(dci(0x2e00ec00), "unallocated (Unallocated)");  // opcode = 0x1101
   COMPARE(dci(0x2e00fc00), "unallocated (Unallocated)");  // opcode = 0x1111
 
+  // Check unallocated vector types for FCADD.
+  COMPARE(dci(0x2e00e400), "unallocated (Unallocated)");  // 8B
+  COMPARE(dci(0x6e00e400), "unallocated (Unallocated)");  // 16B
+  COMPARE(dci(0x2ec0e400), "unallocated (Unallocated)");  // 1D
+
+  // Check unallocated vector types for FCMLA.
+  COMPARE(dci(0x2e00c400), "unallocated (Unallocated)");  // 8B
+  COMPARE(dci(0x6e00c400), "unallocated (Unallocated)");  // 16B
+  COMPARE(dci(0x2ec0c400), "unallocated (Unallocated)");  // 1D
+
   CLEANUP();
 }
 
@@ -2251,6 +2289,8 @@
                 "mul v0.2s, v1.2s, v2.s[0]");
   COMPARE_MACRO(Mul(v2.V4S(), v3.V4S(), v15.S(), 3),
                 "mul v2.4s, v3.4s, v15.s[3]");
+  COMPARE_MACRO(Mul(v11.V2S(), v17.V2S(), v26.S(), 1),
+                "mul v11.2s, v17.2s, v26.s[1]");
 
   COMPARE_MACRO(Mla(v0.V4H(), v1.V4H(), v2.H(), 0),
                 "mla v0.4h, v1.4h, v2.h[0]");
@@ -2260,6 +2300,8 @@
                 "mla v0.2s, v1.2s, v2.s[0]");
   COMPARE_MACRO(Mla(v2.V4S(), v3.V4S(), v15.S(), 3),
                 "mla v2.4s, v3.4s, v15.s[3]");
+  COMPARE_MACRO(Mla(v11.V2S(), v17.V2S(), v26.S(), 1),
+                "mla v11.2s, v17.2s, v26.s[1]");
 
   COMPARE_MACRO(Mls(v0.V4H(), v1.V4H(), v2.H(), 0),
                 "mls v0.4h, v1.4h, v2.h[0]");
@@ -2269,6 +2311,8 @@
                 "mls v0.2s, v1.2s, v2.s[0]");
   COMPARE_MACRO(Mls(v2.V4S(), v3.V4S(), v15.S(), 3),
                 "mls v2.4s, v3.4s, v15.s[3]");
+  COMPARE_MACRO(Mls(v11.V2S(), v17.V2S(), v31.S(), 1),
+                "mls v11.2s, v17.2s, v31.s[1]");
 
   COMPARE_MACRO(Sqdmulh(v0.V4H(), v1.V4H(), v2.H(), 0),
                 "sqdmulh v0.4h, v1.4h, v2.h[0]");
@@ -2278,8 +2322,11 @@
                 "sqdmulh v0.2s, v1.2s, v2.s[0]");
   COMPARE_MACRO(Sqdmulh(v2.V4S(), v3.V4S(), v15.S(), 3),
                 "sqdmulh v2.4s, v3.4s, v15.s[3]");
+  COMPARE_MACRO(Sqdmulh(v11.V2S(), v17.V2S(), v31.S(), 1),
+                "sqdmulh v11.2s, v17.2s, v31.s[1]");
   COMPARE_MACRO(Sqdmulh(h0, h1, v2.H(), 0), "sqdmulh h0, h1, v2.h[0]");
   COMPARE_MACRO(Sqdmulh(s0, s1, v2.S(), 0), "sqdmulh s0, s1, v2.s[0]");
+  COMPARE_MACRO(Sqdmulh(s0, s1, v31.S(), 1), "sqdmulh s0, s1, v31.s[1]");
 
   COMPARE_MACRO(Sqrdmulh(v0.V4H(), v1.V4H(), v2.H(), 0),
                 "sqrdmulh v0.4h, v1.4h, v2.h[0]");
@@ -2289,13 +2336,18 @@
                 "sqrdmulh v0.2s, v1.2s, v2.s[0]");
   COMPARE_MACRO(Sqrdmulh(v2.V4S(), v3.V4S(), v15.S(), 3),
                 "sqrdmulh v2.4s, v3.4s, v15.s[3]");
+  COMPARE_MACRO(Sqrdmulh(v11.V2S(), v17.V2S(), v31.S(), 1),
+                "sqrdmulh v11.2s, v17.2s, v31.s[1]");
   COMPARE_MACRO(Sqrdmulh(h0, h1, v2.H(), 0), "sqrdmulh h0, h1, v2.h[0]");
   COMPARE_MACRO(Sqrdmulh(s0, s1, v2.S(), 0), "sqrdmulh s0, s1, v2.s[0]");
+  COMPARE_MACRO(Sqrdmulh(s0, s1, v31.S(), 1), "sqrdmulh s0, s1, v31.s[1]");
 
   COMPARE_MACRO(Sdot(v0.V2S(), v1.V8B(), v2.S4B(), 0),
                 "sdot v0.2s, v1.8b, v2.4b[0]");
   COMPARE_MACRO(Sdot(v2.V4S(), v3.V16B(), v15.S4B(), 3),
                 "sdot v2.4s, v3.16b, v15.4b[3]");
+  COMPARE_MACRO(Sdot(v11.V2S(), v17.V8B(), v31.S4B(), 1),
+                "sdot v11.2s, v17.8b, v31.4b[1]");
 
   COMPARE_MACRO(Sqrdmlah(v0.V4H(), v1.V4H(), v2.H(), 0),
                 "sqrdmlah v0.4h, v1.4h, v2.h[0]");
@@ -2305,13 +2357,18 @@
                 "sqrdmlah v0.2s, v1.2s, v2.s[0]");
   COMPARE_MACRO(Sqrdmlah(v2.V4S(), v3.V4S(), v15.S(), 3),
                 "sqrdmlah v2.4s, v3.4s, v15.s[3]");
+  COMPARE_MACRO(Sqrdmlah(v11.V2S(), v17.V2S(), v31.S(), 1),
+                "sqrdmlah v11.2s, v17.2s, v31.s[1]");
   COMPARE_MACRO(Sqrdmlah(h0, h1, v2.H(), 0), "sqrdmlah h0, h1, v2.h[0]");
   COMPARE_MACRO(Sqrdmlah(s0, s1, v2.S(), 0), "sqrdmlah s0, s1, v2.s[0]");
+  COMPARE_MACRO(Sqrdmlah(s0, s1, v31.S(), 1), "sqrdmlah s0, s1, v31.s[1]");
 
   COMPARE_MACRO(Udot(v0.V2S(), v1.V8B(), v2.S4B(), 0),
                 "udot v0.2s, v1.8b, v2.4b[0]");
   COMPARE_MACRO(Udot(v2.V4S(), v3.V16B(), v15.S4B(), 3),
                 "udot v2.4s, v3.16b, v15.4b[3]");
+  COMPARE_MACRO(Udot(v11.V2S(), v17.V8B(), v31.S4B(), 1),
+                "udot v11.2s, v17.8b, v31.4b[1]");
 
   COMPARE_MACRO(Sqrdmlsh(v0.V4H(), v1.V4H(), v2.H(), 0),
                 "sqrdmlsh v0.4h, v1.4h, v2.h[0]");
@@ -2321,8 +2378,11 @@
                 "sqrdmlsh v0.2s, v1.2s, v2.s[0]");
   COMPARE_MACRO(Sqrdmlsh(v2.V4S(), v3.V4S(), v15.S(), 3),
                 "sqrdmlsh v2.4s, v3.4s, v15.s[3]");
+  COMPARE_MACRO(Sqrdmlsh(v11.V2S(), v17.V2S(), v31.S(), 1),
+                "sqrdmlsh v11.2s, v17.2s, v31.s[1]");
   COMPARE_MACRO(Sqrdmlsh(h0, h1, v2.H(), 0), "sqrdmlsh h0, h1, v2.h[0]");
   COMPARE_MACRO(Sqrdmlsh(s0, s1, v2.S(), 0), "sqrdmlsh s0, s1, v2.s[0]");
+  COMPARE_MACRO(Sqrdmlsh(s0, s1, v31.S(), 1), "sqrdmlsh s0, s1, v31.s[1]");
 
   COMPARE_MACRO(Smull(v0.V4S(), v1.V4H(), v2.H(), 0),
                 "smull v0.4s, v1.4h, v2.h[0]");
@@ -2332,6 +2392,8 @@
                 "smull v0.2d, v1.2s, v2.s[0]");
   COMPARE_MACRO(Smull2(v2.V2D(), v3.V4S(), v4.S(), 3),
                 "smull2 v2.2d, v3.4s, v4.s[3]");
+  COMPARE_MACRO(Smull(v11.V2D(), v17.V2S(), v31.S(), 1),
+                "smull v11.2d, v17.2s, v31.s[1]");
 
   COMPARE_MACRO(Umull(v0.V4S(), v1.V4H(), v2.H(), 0),
                 "umull v0.4s, v1.4h, v2.h[0]");
@@ -2341,6 +2403,8 @@
                 "umull v0.2d, v1.2s, v2.s[0]");
   COMPARE_MACRO(Umull2(v2.V2D(), v3.V4S(), v4.S(), 3),
                 "umull2 v2.2d, v3.4s, v4.s[3]");
+  COMPARE_MACRO(Umull(v11.V2D(), v17.V2S(), v31.S(), 1),
+                "umull v11.2d, v17.2s, v31.s[1]");
 
   COMPARE_MACRO(Smlal(v0.V4S(), v1.V4H(), v2.H(), 0),
                 "smlal v0.4s, v1.4h, v2.h[0]");
@@ -2350,6 +2414,8 @@
                 "smlal v0.2d, v1.2s, v2.s[0]");
   COMPARE_MACRO(Smlal2(v2.V2D(), v3.V4S(), v4.S(), 3),
                 "smlal2 v2.2d, v3.4s, v4.s[3]");
+  COMPARE_MACRO(Smlal(v11.V2D(), v17.V2S(), v31.S(), 1),
+                "smlal v11.2d, v17.2s, v31.s[1]");
 
   COMPARE_MACRO(Umlal(v0.V4S(), v1.V4H(), v2.H(), 0),
                 "umlal v0.4s, v1.4h, v2.h[0]");
@@ -2359,6 +2425,8 @@
                 "umlal v0.2d, v1.2s, v2.s[0]");
   COMPARE_MACRO(Umlal2(v2.V2D(), v3.V4S(), v4.S(), 3),
                 "umlal2 v2.2d, v3.4s, v4.s[3]");
+  COMPARE_MACRO(Umlal(v11.V2D(), v17.V2S(), v31.S(), 1),
+                "umlal v11.2d, v17.2s, v31.s[1]");
 
   COMPARE_MACRO(Smlsl(v0.V4S(), v1.V4H(), v2.H(), 0),
                 "smlsl v0.4s, v1.4h, v2.h[0]");
@@ -2368,6 +2436,8 @@
                 "smlsl v0.2d, v1.2s, v2.s[0]");
   COMPARE_MACRO(Smlsl2(v2.V2D(), v3.V4S(), v4.S(), 3),
                 "smlsl2 v2.2d, v3.4s, v4.s[3]");
+  COMPARE_MACRO(Smlsl(v11.V2D(), v17.V2S(), v31.S(), 1),
+                "smlsl v11.2d, v17.2s, v31.s[1]");
 
   COMPARE_MACRO(Umlsl(v0.V4S(), v1.V4H(), v2.H(), 0),
                 "umlsl v0.4s, v1.4h, v2.h[0]");
@@ -2377,6 +2447,8 @@
                 "umlsl v0.2d, v1.2s, v2.s[0]");
   COMPARE_MACRO(Umlsl2(v2.V2D(), v3.V4S(), v4.S(), 3),
                 "umlsl2 v2.2d, v3.4s, v4.s[3]");
+  COMPARE_MACRO(Umlsl(v11.V2D(), v17.V2S(), v31.S(), 1),
+                "umlsl v11.2d, v17.2s, v31.s[1]");
 
   COMPARE_MACRO(Sqdmull(v0.V4S(), v1.V4H(), v2.H(), 0),
                 "sqdmull v0.4s, v1.4h, v2.h[0]");
@@ -2386,8 +2458,11 @@
                 "sqdmull v0.2d, v1.2s, v2.s[0]");
   COMPARE_MACRO(Sqdmull2(v2.V2D(), v3.V4S(), v4.S(), 3),
                 "sqdmull2 v2.2d, v3.4s, v4.s[3]");
+  COMPARE_MACRO(Sqdmull(v11.V2D(), v17.V2S(), v31.S(), 1),
+                "sqdmull v11.2d, v17.2s, v31.s[1]");
   COMPARE_MACRO(Sqdmull(s0, h1, v2.H(), 0), "sqdmull s0, h1, v2.h[0]");
   COMPARE_MACRO(Sqdmull(d0, s1, v2.S(), 0), "sqdmull d0, s1, v2.s[0]");
+  COMPARE_MACRO(Sqdmull(d0, s1, v31.S(), 0), "sqdmull d0, s1, v31.s[0]");
 
   COMPARE_MACRO(Sqdmlal(v0.V4S(), v1.V4H(), v2.H(), 0),
                 "sqdmlal v0.4s, v1.4h, v2.h[0]");
@@ -2397,8 +2472,11 @@
                 "sqdmlal v0.2d, v1.2s, v2.s[0]");
   COMPARE_MACRO(Sqdmlal2(v2.V2D(), v3.V4S(), v4.S(), 3),
                 "sqdmlal2 v2.2d, v3.4s, v4.s[3]");
+  COMPARE_MACRO(Sqdmlal(v11.V2D(), v17.V2S(), v31.S(), 1),
+                "sqdmlal v11.2d, v17.2s, v31.s[1]");
   COMPARE_MACRO(Sqdmlal(s0, h1, v2.H(), 0), "sqdmlal s0, h1, v2.h[0]");
   COMPARE_MACRO(Sqdmlal(d0, s1, v2.S(), 0), "sqdmlal d0, s1, v2.s[0]");
+  COMPARE_MACRO(Sqdmlal(d0, s1, v31.S(), 0), "sqdmlal d0, s1, v31.s[0]");
 
   COMPARE_MACRO(Sqdmlsl(v0.V4S(), v1.V4H(), v2.H(), 0),
                 "sqdmlsl v0.4s, v1.4h, v2.h[0]");
@@ -2408,8 +2486,11 @@
                 "sqdmlsl v0.2d, v1.2s, v2.s[0]");
   COMPARE_MACRO(Sqdmlsl2(v2.V2D(), v3.V4S(), v4.S(), 3),
                 "sqdmlsl2 v2.2d, v3.4s, v4.s[3]");
+  COMPARE_MACRO(Sqdmlsl(v11.V2D(), v17.V2S(), v31.S(), 1),
+                "sqdmlsl v11.2d, v17.2s, v31.s[1]");
   COMPARE_MACRO(Sqdmlsl(s0, h1, v2.H(), 0), "sqdmlsl s0, h1, v2.h[0]");
   COMPARE_MACRO(Sqdmlsl(d0, s1, v2.S(), 0), "sqdmlsl d0, s1, v2.s[0]");
+  COMPARE_MACRO(Sqdmlsl(d0, s1, v31.S(), 0), "sqdmlsl d0, s1, v31.s[0]");
 
   // FMLAL and so on are special cases in that the {2} variants operate
   // independently from the lane count.
@@ -2434,10 +2515,14 @@
                 "sudot v10.2s, v21.8b, v31.4b[0]");
   COMPARE_MACRO(Sudot(v12.V4S(), v23.V16B(), v16.S4B(), 3),
                 "sudot v12.4s, v23.16b, v16.4b[3]");
+  COMPARE_MACRO(Sudot(v11.V2S(), v17.V8B(), v31.S4B(), 1),
+                "sudot v11.2s, v17.8b, v31.4b[1]");
   COMPARE_MACRO(Usdot(v10.V2S(), v21.V8B(), v31.S4B(), 0),
                 "usdot v10.2s, v21.8b, v31.4b[0]");
   COMPARE_MACRO(Usdot(v12.V4S(), v23.V16B(), v16.S4B(), 3),
                 "usdot v12.4s, v23.16b, v16.4b[3]");
+  COMPARE_MACRO(Usdot(v11.V2S(), v17.V8B(), v31.S4B(), 1),
+                "usdot v11.2s, v17.8b, v31.4b[1]");
 
   CLEANUP();
 }
@@ -2454,13 +2539,19 @@
                 "fmul v0.2s, v1.2s, v2.s[0]");
   COMPARE_MACRO(Fmul(v2.V4S(), v3.V4S(), v15.S(), 3),
                 "fmul v2.4s, v3.4s, v15.s[3]");
+  COMPARE_MACRO(Fmul(v2.V4S(), v3.V4S(), v31.S(), 3),
+                "fmul v2.4s, v3.4s, v31.s[3]");
   COMPARE_MACRO(Fmul(v0.V2D(), v1.V2D(), v2.D(), 0),
                 "fmul v0.2d, v1.2d, v2.d[0]");
   COMPARE_MACRO(Fmul(v0.V2D(), v1.V2D(), v2.D(), 1),
                 "fmul v0.2d, v1.2d, v2.d[1]");
-  COMPARE_MACRO(Fmul(d0, d1, v2.D(), 0), "fmul d0, d1, v2.d[0]");
-  COMPARE_MACRO(Fmul(s0, s1, v2.S(), 0), "fmul s0, s1, v2.s[0]");
+  COMPARE_MACRO(Fmul(v0.V2D(), v1.V2D(), v31.D(), 1),
+                "fmul v0.2d, v1.2d, v31.d[1]");
   COMPARE_MACRO(Fmul(h0, h1, v2.H(), 0), "fmul h0, h1, v2.h[0]");
+  COMPARE_MACRO(Fmul(s0, s1, v2.S(), 0), "fmul s0, s1, v2.s[0]");
+  COMPARE_MACRO(Fmul(s0, s1, v31.S(), 0), "fmul s0, s1, v31.s[0]");
+  COMPARE_MACRO(Fmul(d0, d1, v2.D(), 0), "fmul d0, d1, v2.d[0]");
+  COMPARE_MACRO(Fmul(d0, d1, v31.D(), 0), "fmul d0, d1, v31.d[0]");
 
   COMPARE_MACRO(Fmla(v0.V4H(), v1.V4H(), v2.H(), 0),
                 "fmla v0.4h, v1.4h, v2.h[0]");
@@ -2470,13 +2561,19 @@
                 "fmla v0.2s, v1.2s, v2.s[0]");
   COMPARE_MACRO(Fmla(v2.V4S(), v3.V4S(), v15.S(), 3),
                 "fmla v2.4s, v3.4s, v15.s[3]");
+  COMPARE_MACRO(Fmla(v2.V4S(), v3.V4S(), v31.S(), 3),
+                "fmla v2.4s, v3.4s, v31.s[3]");
   COMPARE_MACRO(Fmla(v0.V2D(), v1.V2D(), v2.D(), 0),
                 "fmla v0.2d, v1.2d, v2.d[0]");
-  COMPARE_MACRO(Fmla(v0.V2D(), v1.V2D(), v2.D(), 1),
-                "fmla v0.2d, v1.2d, v2.d[1]");
-  COMPARE_MACRO(Fmla(d0, d1, v2.D(), 0), "fmla d0, d1, v2.d[0]");
-  COMPARE_MACRO(Fmla(s0, s1, v2.S(), 0), "fmla s0, s1, v2.s[0]");
+  COMPARE_MACRO(Fmla(v0.V2D(), v1.V2D(), v15.D(), 1),
+                "fmla v0.2d, v1.2d, v15.d[1]");
+  COMPARE_MACRO(Fmla(v0.V2D(), v1.V2D(), v31.D(), 1),
+                "fmla v0.2d, v1.2d, v31.d[1]");
   COMPARE_MACRO(Fmla(h0, h1, v2.H(), 0), "fmla h0, h1, v2.h[0]");
+  COMPARE_MACRO(Fmla(s0, s1, v2.S(), 0), "fmla s0, s1, v2.s[0]");
+  COMPARE_MACRO(Fmla(s0, s1, v31.S(), 0), "fmla s0, s1, v31.s[0]");
+  COMPARE_MACRO(Fmla(d0, d1, v2.D(), 0), "fmla d0, d1, v2.d[0]");
+  COMPARE_MACRO(Fmla(d0, d1, v31.D(), 0), "fmla d0, d1, v31.d[0]");
 
   COMPARE_MACRO(Fmls(v0.V4H(), v1.V4H(), v2.H(), 0),
                 "fmls v0.4h, v1.4h, v2.h[0]");
@@ -2486,13 +2583,19 @@
                 "fmls v0.2s, v1.2s, v2.s[0]");
   COMPARE_MACRO(Fmls(v2.V4S(), v3.V4S(), v15.S(), 3),
                 "fmls v2.4s, v3.4s, v15.s[3]");
+  COMPARE_MACRO(Fmls(v2.V4S(), v3.V4S(), v31.S(), 3),
+                "fmls v2.4s, v3.4s, v31.s[3]");
   COMPARE_MACRO(Fmls(v0.V2D(), v1.V2D(), v2.D(), 0),
                 "fmls v0.2d, v1.2d, v2.d[0]");
-  COMPARE_MACRO(Fmls(v0.V2D(), v1.V2D(), v2.D(), 1),
-                "fmls v0.2d, v1.2d, v2.d[1]");
-  COMPARE_MACRO(Fmls(d0, d1, v2.D(), 0), "fmls d0, d1, v2.d[0]");
-  COMPARE_MACRO(Fmls(s0, s1, v2.S(), 0), "fmls s0, s1, v2.s[0]");
+  COMPARE_MACRO(Fmls(v0.V2D(), v1.V2D(), v15.D(), 1),
+                "fmls v0.2d, v1.2d, v15.d[1]");
+  COMPARE_MACRO(Fmls(v0.V2D(), v1.V2D(), v31.D(), 1),
+                "fmls v0.2d, v1.2d, v31.d[1]");
   COMPARE_MACRO(Fmls(h0, h1, v2.H(), 0), "fmls h0, h1, v2.h[0]");
+  COMPARE_MACRO(Fmls(s0, s1, v2.S(), 0), "fmls s0, s1, v2.s[0]");
+  COMPARE_MACRO(Fmls(s0, s1, v31.S(), 0), "fmls s0, s1, v31.s[0]");
+  COMPARE_MACRO(Fmls(d0, d1, v2.D(), 0), "fmls d0, d1, v2.d[0]");
+  COMPARE_MACRO(Fmls(d0, d1, v31.D(), 0), "fmls d0, d1, v31.d[0]");
 
   COMPARE_MACRO(Fmulx(v0.V4H(), v1.V4H(), v2.H(), 0),
                 "fmulx v0.4h, v1.4h, v2.h[0]");
@@ -2502,22 +2605,39 @@
                 "fmulx v0.2s, v1.2s, v2.s[0]");
   COMPARE_MACRO(Fmulx(v2.V4S(), v3.V4S(), v8.S(), 3),
                 "fmulx v2.4s, v3.4s, v8.s[3]");
+  COMPARE_MACRO(Fmulx(v2.V4S(), v3.V4S(), v31.S(), 3),
+                "fmulx v2.4s, v3.4s, v31.s[3]");
   COMPARE_MACRO(Fmulx(v0.V2D(), v1.V2D(), v2.D(), 0),
                 "fmulx v0.2d, v1.2d, v2.d[0]");
-  COMPARE_MACRO(Fmulx(v0.V2D(), v1.V2D(), v2.D(), 1),
-                "fmulx v0.2d, v1.2d, v2.d[1]");
-  COMPARE_MACRO(Fmulx(d0, d1, v2.D(), 0), "fmulx d0, d1, v2.d[0]");
-  COMPARE_MACRO(Fmulx(s0, s1, v2.S(), 0), "fmulx s0, s1, v2.s[0]");
+  COMPARE_MACRO(Fmulx(v0.V2D(), v1.V2D(), v15.D(), 1),
+                "fmulx v0.2d, v1.2d, v15.d[1]");
+  COMPARE_MACRO(Fmulx(v0.V2D(), v1.V2D(), v31.D(), 1),
+                "fmulx v0.2d, v1.2d, v31.d[1]");
   COMPARE_MACRO(Fmulx(h0, h1, v2.H(), 0), "fmulx h0, h1, v2.h[0]");
+  COMPARE_MACRO(Fmulx(s0, s1, v2.S(), 0), "fmulx s0, s1, v2.s[0]");
+  COMPARE_MACRO(Fmulx(s0, s1, v31.S(), 0), "fmulx s0, s1, v31.s[0]");
+  COMPARE_MACRO(Fmulx(d0, d1, v2.D(), 0), "fmulx d0, d1, v2.d[0]");
+  COMPARE_MACRO(Fmulx(d0, d1, v31.D(), 0), "fmulx d0, d1, v31.d[0]");
 
   COMPARE_MACRO(Fcmla(v0.V4S(), v1.V4S(), v2.S(), 0, 270),
                 "fcmla v0.4s, v1.4s, v2.s[0], #270");
   COMPARE_MACRO(Fcmla(v0.V4S(), v1.V4S(), v2.S(), 1, 180),
                 "fcmla v0.4s, v1.4s, v2.s[1], #180");
+  COMPARE_MACRO(Fcmla(v0.V4S(), v1.V4S(), v31.S(), 1, 180),
+                "fcmla v0.4s, v1.4s, v31.s[1], #180");
   COMPARE_MACRO(Fcmla(v0.V4H(), v1.V4H(), v2.H(), 2, 90),
                 "fcmla v0.4h, v1.4h, v2.h[2], #90");
   COMPARE_MACRO(Fcmla(v0.V8H(), v1.V8H(), v2.H(), 3, 0),
                 "fcmla v0.8h, v1.8h, v2.h[3], #0");
+  COMPARE_MACRO(Fcmla(v0.V8H(), v1.V8H(), v31.H(), 3, 0),
+                "fcmla v0.8h, v1.8h, v31.h[3], #0");
+
+  // Check unallocated vector types for FCMLA.
+  COMPARE(dci(0x2f001000), "unallocated (Unallocated)");  // 8B
+  COMPARE(dci(0x6f001000), "unallocated (Unallocated)");  // 16B
+  COMPARE(dci(0x2f801000), "unallocated (Unallocated)");  // 2S
+  COMPARE(dci(0x2fc01000), "unallocated (Unallocated)");  // 1D
+  COMPARE(dci(0x6fc01000), "unallocated (Unallocated)");  // 2D
 
   CLEANUP();
 }
@@ -2829,6 +2949,10 @@
                 "pmull v0.8h, v1.8b, v2.8b");
   COMPARE_MACRO(Pmull2(v2.V8H(), v3.V16B(), v4.V16B()),
                 "pmull2 v2.8h, v3.16b, v4.16b");
+  COMPARE_MACRO(Pmull(v5.V1Q(), v6.V1D(), v7.V1D()),
+                "pmull v5.1q, v6.1d, v7.1d");
+  COMPARE_MACRO(Pmull2(v8.V1Q(), v9.V2D(), v10.V2D()),
+                "pmull2 v8.1q, v9.2d, v10.2d");
 
   CLEANUP();
 }
@@ -3124,7 +3248,7 @@
   COMPARE_MACRO(Movi(v2.V2D(), 0xff00ff00ff00ff, 0xff00ff00ff00ff),
                 "movi v2.2d, #0xff00ff00ff00ff");
   COMPARE_MACRO(Movi(v3.V2D(), 0xffff, 0xff00ff00ff00ff),
-                "movi v3.2d, #0xff00ff00ff00ff\n"
+                "movi d3, #0xff00ff00ff00ff\n"
                 "mov x16, #0xffff\n"
                 "mov v3.d[1], x16");
 
@@ -4392,6 +4516,100 @@
   CLEANUP();
 }
 
+TEST(neon_sha3) {
+  SETUP();
+
+  COMPARE_MACRO(Bcax(v0.V16B(), v1.V16B(), v2.V16B(), v3.V16B()),
+                "bcax v0.16b, v1.16b, v2.16b, v3.16b");
+  COMPARE_MACRO(Eor3(v10.V16B(), v11.V16B(), v12.V16B(), v13.V16B()),
+                "eor3 v10.16b, v11.16b, v12.16b, v13.16b");
+  COMPARE_MACRO(Xar(v20.V2D(), v21.V2D(), v22.V2D(), 42),
+                "xar v20.2d, v21.2d, v22.2d, #42");
+  COMPARE_MACRO(Rax1(v0.V2D(), v1.V2D(), v2.V2D()), "rax1 v0.2d, v1.2d, v2.2d");
+
+  CLEANUP();
+}
+
+TEST(neon_sha1) {
+  SETUP();
+
+  COMPARE_MACRO(Sha1c(q0, s12, v20.V4S()), "sha1c q0, s12, v20.4s");
+  COMPARE_MACRO(Sha1m(q22, s2, v13.V4S()), "sha1m q22, s2, v13.4s");
+  COMPARE_MACRO(Sha1p(q31, s5, v15.V4S()), "sha1p q31, s5, v15.4s");
+  COMPARE_MACRO(Sha1su0(v19.V4S(), v9.V4S(), v27.V4S()),
+                "sha1su0 v19.4s, v9.4s, v27.4s");
+  COMPARE_MACRO(Sha1h(s12, s0), "sha1h s12, s0");
+  COMPARE_MACRO(Sha1su1(v2.V4S(), v4.V4S()), "sha1su1 v2.4s, v4.4s");
+
+  CLEANUP();
+}
+
+TEST(neon_sha2) {
+  SETUP();
+
+  COMPARE_MACRO(Sha256h(q0, q12, v20.V4S()), "sha256h q0, q12, v20.4s");
+  COMPARE_MACRO(Sha256h2(q22, q2, v13.V4S()), "sha256h2 q22, q2, v13.4s");
+  COMPARE_MACRO(Sha256su0(v2.V4S(), v4.V4S()), "sha256su0 v2.4s, v4.4s");
+  COMPARE_MACRO(Sha256su1(v19.V4S(), v9.V4S(), v27.V4S()),
+                "sha256su1 v19.4s, v9.4s, v27.4s");
+
+  CLEANUP();
+}
+
+TEST(neon_sha512) {
+  SETUP();
+
+  COMPARE_MACRO(Sha512h(q0, q12, v20.V2D()), "sha512h q0, q12, v20.2d");
+  COMPARE_MACRO(Sha512h2(q22, q2, v13.V2D()), "sha512h2 q22, q2, v13.2d");
+  COMPARE_MACRO(Sha512su0(v2.V2D(), v4.V2D()), "sha512su0 v2.2d, v4.2d");
+  COMPARE_MACRO(Sha512su1(v19.V2D(), v9.V2D(), v27.V2D()),
+                "sha512su1 v19.2d, v9.2d, v27.2d");
+
+  CLEANUP();
+}
+
+TEST(neon_aes) {
+  SETUP();
+
+  COMPARE_MACRO(Aesd(v0.V16B(), v29.V16B()), "aesd v0.16b, v29.16b");
+  COMPARE_MACRO(Aese(v0.V16B(), v29.V16B()), "aese v0.16b, v29.16b");
+  COMPARE_MACRO(Aesimc(v0.V16B(), v29.V16B()), "aesimc v0.16b, v29.16b");
+  COMPARE_MACRO(Aesmc(v0.V16B(), v29.V16B()), "aesmc v0.16b, v29.16b");
+
+  CLEANUP();
+}
+
+TEST(neon_sm3) {
+  SETUP();
+
+  COMPARE_MACRO(Sm3partw1(v12.V4S(), v13.V4S(), v14.V4S()),
+                "sm3partw1 v12.4s, v13.4s, v14.4s");
+  COMPARE_MACRO(Sm3partw2(v12.V4S(), v13.V4S(), v14.V4S()),
+                "sm3partw2 v12.4s, v13.4s, v14.4s");
+  COMPARE_MACRO(Sm3ss1(v13.V4S(), v15.V4S(), v17.V4S(), v21.V4S()),
+                "sm3ss1 v13.4s, v15.4s, v17.4s, v21.4s");
+  COMPARE_MACRO(Sm3tt1a(v30.V4S(), v29.V4S(), v9.V4S(), 1),
+                "sm3tt1a v30.4s, v29.4s, v9.s[1]");
+  COMPARE_MACRO(Sm3tt1b(v30.V4S(), v29.V4S(), v9.V4S(), 3),
+                "sm3tt1b v30.4s, v29.4s, v9.s[3]");
+  COMPARE_MACRO(Sm3tt2a(v30.V4S(), v29.V4S(), v9.V4S(), 2),
+                "sm3tt2a v30.4s, v29.4s, v9.s[2]");
+  COMPARE_MACRO(Sm3tt2b(v30.V4S(), v29.V4S(), v9.V4S(), 0),
+                "sm3tt2b v30.4s, v29.4s, v9.s[0]");
+
+  CLEANUP();
+}
+
+TEST(neon_sm4) {
+  SETUP();
+
+  COMPARE_MACRO(Sm4e(v12.V4S(), v13.V4S()), "sm4e v12.4s, v13.4s");
+  COMPARE_MACRO(Sm4ekey(v12.V4S(), v13.V4S(), v14.V4S()),
+                "sm4ekey v12.4s, v13.4s, v14.4s");
+
+  CLEANUP();
+}
+
 TEST(neon_unallocated_regression_test) {
   SETUP();
 
@@ -4487,8 +4705,6 @@
   COMPARE_PREFIX(dci(0x2efb9dbd), "unallocated");  // pmul v.und, v.und, v.und
   COMPARE_PREFIX(dci(0x4eace101), "unallocated");  // pmull v.d, v.s, v.s
   COMPARE_PREFIX(dci(0x0e6de3ad), "unallocated");  // pmull v.s, v.h, v.h
-  COMPARE_PREFIX(dci(0x4ee3e2c0), "unallocated");  // pmull v.und, v.d, v.d
-  COMPARE_PREFIX(dci(0x0eede060), "unallocated");  // pmull v.und, v.und, v.und
   COMPARE_PREFIX(dci(0x6ee00afd), "unallocated");  // rev v.d, v.d
   COMPARE_PREFIX(dci(0x4e601975), "unallocated");  // rev v.h, v.h
   COMPARE_PREFIX(dci(0x4ea019f3), "unallocated");  // rev v.s, v.s
@@ -4558,10 +4774,14 @@
   COMPARE_PREFIX(dci(0x6fd6d80f), "unallocated");  // sqrdmlah v.d, v.d, v.d[]
   COMPARE_PREFIX(dci(0x2fecdae5),
                  "unallocated");  // sqrdmlah v.und, v.und, v.d[]
+  COMPARE_PREFIX(dci(0x7e008429), "unallocated");  // sqrdmlah b9, b1, b0
+  COMPARE_PREFIX(dci(0x7ec08429), "unallocated");  // sqrdmlah d9, d1, d0
   COMPARE_PREFIX(dci(0x7fe0f992), "unallocated");  // sqrdmlsh d, d, v.d[]
   COMPARE_PREFIX(dci(0x6ff1f9df), "unallocated");  // sqrdmlsh v.d, v.d, v.d[]
   COMPARE_PREFIX(dci(0x2fcdfad1),
                  "unallocated");  // sqrdmlsh v.und, v.und, v.d[]
+  COMPARE_PREFIX(dci(0x7e008c29), "unallocated");  // sqrdmlsh b9, b1, b0
+  COMPARE_PREFIX(dci(0x7ec08c29), "unallocated");  // sqrdmlsh d9, d1, d0
   COMPARE_PREFIX(dci(0x7e23b7fa), "unallocated");  // sqrdmulh b, b, b
   COMPARE_PREFIX(dci(0x5f1ad272), "unallocated");  // sqrdmulh b, b, v.b[]
   COMPARE_PREFIX(dci(0x7ef8b6e0), "unallocated");  // sqrdmulh d, d, d
diff --git a/test/aarch64/test-disasm-sve-aarch64.cc b/test/aarch64/test-disasm-sve-aarch64.cc
index 933e808..fbdff33 100644
--- a/test/aarch64/test-disasm-sve-aarch64.cc
+++ b/test/aarch64/test-disasm-sve-aarch64.cc
@@ -30,10 +30,10 @@
 #include <string>
 
 #include "test-runner.h"
-#include "aarch64/test-utils-aarch64.h"
 
 #include "aarch64/disasm-aarch64.h"
 #include "aarch64/macro-assembler-aarch64.h"
+#include "aarch64/test-utils-aarch64.h"
 #include "test-disasm-aarch64.h"
 #include "test-utils-aarch64.h"
 
@@ -2381,7 +2381,9 @@
   COMPARE_MACRO(Bic(z17.VnB(), p7.Merging(), z17.VnB(), z10.VnB()),
                 "bic z17.b, p7/m, z17.b, z10.b");
   COMPARE_MACRO(Bic(z17.VnS(), p7.Merging(), z10.VnS(), z17.VnS()),
-                "bic z17.s, p7/m, z17.s, z10.s");
+                "mov z31.d, z17.d\n"
+                "movprfx z17.s, p7/m, z10.s\n"
+                "bic z17.s, p7/m, z17.s, z31.s");
   COMPARE_MACRO(Bic(z17.VnD(), p7.Merging(), z7.VnD(), z27.VnD()),
                 "movprfx z17.d, p7/m, z7.d\n"
                 "bic z17.d, p7/m, z17.d, z27.d");
@@ -7671,13 +7673,14 @@
   COMPARE(sqdmullt(z7.VnD(), z4.VnS(), z0.VnS(), 0),
           "sqdmullt z7.d, z4.s, z0.s[0]");
 
-  // Feature `SVEPmull128` is not supported.
-  // COMPARE(pmullb(z12.VnQ(), z21.VnD(), z12.VnD()),
-  //                "pmullb z12.q, z21.d, z12.d");
   COMPARE(pmullb(z12.VnH(), z21.VnB(), z12.VnB()),
           "pmullb z12.h, z21.b, z12.b");
   COMPARE(pmullt(z31.VnD(), z30.VnS(), z26.VnS()),
           "pmullt z31.d, z30.s, z26.s");
+  COMPARE(pmullb(z12.VnQ(), z21.VnD(), z12.VnD()),
+          "pmullb z12.q, z21.d, z12.d");
+  COMPARE(pmullt(z12.VnQ(), z21.VnD(), z12.VnD()),
+          "pmullt z12.q, z21.d, z12.d");
 
   COMPARE(smullb(z10.VnD(), z4.VnS(), z4.VnS()), "smullb z10.d, z4.s, z4.s");
   COMPARE(smullb(z11.VnH(), z14.VnB(), z14.VnB()),
@@ -7699,6 +7702,10 @@
   COMPARE(umullt(z24.VnH(), z7.VnB(), z16.VnB()), "umullt z24.h, z7.b, z16.b");
   COMPARE(umullt(z24.VnS(), z8.VnH(), z26.VnH()), "umullt z24.s, z8.h, z26.h");
 
+  // Check related but undefined encodings.
+  COMPARE(dci(0x45806800), "unallocated (Unallocated)");  // pmullb s, h, h
+  COMPARE(dci(0x45806c00), "unallocated (Unallocated)");  // pmullt s, h, h
+
   CLEANUP();
 }
 
diff --git a/test/aarch64/test-metadata-aarch64.cc b/test/aarch64/test-metadata-aarch64.cc
new file mode 100644
index 0000000..350c959
--- /dev/null
+++ b/test/aarch64/test-metadata-aarch64.cc
@@ -0,0 +1,130 @@
+// Copyright 2022, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "test-runner.h"
+#include "test-utils.h"
+
+#include "aarch64/cpu-aarch64.h"
+#include "aarch64/disasm-aarch64.h"
+#include "aarch64/macro-assembler-aarch64.h"
+#include "aarch64/simulator-aarch64.h"
+#include "aarch64/test-utils-aarch64.h"
+#include "test-assembler-aarch64.h"
+
+namespace vixl {
+namespace aarch64 {
+
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
+TEST(test_metadata_mte) {
+  SETUP_WITH_FEATURES(CPUFeatures::kMTE);
+
+  size_t data_size = 320;
+  void* tagged_address = simulator.Mmap(NULL,
+                                        data_size,
+                                        PROT_READ | PROT_WRITE | PROT_MTE,
+                                        MAP_PRIVATE | MAP_ANONYMOUS,
+                                        -1,
+                                        0);
+
+  START();
+
+  Register tagged_heap_ptr = x20;
+  __ Mov(tagged_heap_ptr, reinterpret_cast<uintptr_t>(tagged_address));
+  for (int i = 0; i < 10; i++) {
+    __ Ldr(w0, MemOperand(tagged_heap_ptr, i * 32));
+    __ Str(w0, MemOperand(tagged_heap_ptr, i * 32));
+  }
+  __ Ldr(x2, MemOperand(tagged_heap_ptr, 8));
+  __ Ldrb(w3, MemOperand(tagged_heap_ptr, 1));
+  __ Ldrh(w4, MemOperand(tagged_heap_ptr, 67));
+
+  __ Addg(x21, tagged_heap_ptr, 16, 2);
+
+  END();
+
+  if (CAN_RUN()) {
+    RUN();
+  }
+
+  simulator.Munmap(tagged_address, data_size, PROT_MTE);
+}
+
+#ifdef VIXL_NEGATIVE_TESTING
+TEST(test_metadata_mte_neg) {
+  CPUFeatures features(CPUFeatures::kMTE);
+  SETUP_WITH_FEATURES(features);
+  size_t data_size = 320;
+  void* tagged_address = simulator.Mmap(NULL,
+                                        data_size,
+                                        PROT_READ | PROT_WRITE | PROT_MTE,
+                                        MAP_PRIVATE | MAP_ANONYMOUS,
+                                        -1,
+                                        0);
+
+  START();
+
+  Register tagged_heap_ptr = x20;
+  __ Mov(tagged_heap_ptr, reinterpret_cast<uintptr_t>(tagged_address));
+  __ Addg(x21, tagged_heap_ptr, 16, 2);
+
+  // The memory tag has been changed and becomes invalid.
+  __ Ldr(w0, MemOperand(x21));
+  __ Str(w0, MemOperand(x21));
+
+  // Out-of-bound access error.
+  __ Ldr(w0, MemOperand(tagged_heap_ptr, 320));
+  __ Str(w0, MemOperand(tagged_heap_ptr, 336));
+  __ Ldr(w0, MemOperand(tagged_heap_ptr, -8));
+  __ Str(w0, MemOperand(tagged_heap_ptr, -16));
+
+  void* tagged_address_2 = simulator.Mmap(NULL,
+                                          data_size,
+                                          PROT_READ | PROT_WRITE | PROT_MTE,
+                                          MAP_PRIVATE | MAP_ANONYMOUS,
+                                          -1,
+                                          0);
+
+  __ Mov(x22, reinterpret_cast<uintptr_t>(tagged_address_2));
+  simulator.Munmap(tagged_address_2, data_size, PROT_MTE);
+
+  // Use-after-free error.
+  __ Ldr(w0, MemOperand(x22));
+
+  END();
+
+  if (CAN_RUN()) {
+    MUST_FAIL_WITH_MESSAGE(RUN(), "Tag mismatch.");
+  }
+
+  simulator.Munmap(tagged_address, data_size, PROT_MTE);
+}
+#endif  // VIXL_NEGATIVE_TESTING
+#endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
+}  // namespace aarch64
+}  // namespace vixl
diff --git a/test/aarch64/test-pointer-auth-aarch64.cc b/test/aarch64/test-pointer-auth-aarch64.cc
index 80ea873..b97a58c 100644
--- a/test/aarch64/test-pointer-auth-aarch64.cc
+++ b/test/aarch64/test-pointer-auth-aarch64.cc
@@ -27,6 +27,7 @@
 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
 
 #include "test-runner.h"
+
 #include "aarch64/simulator-aarch64.h"
 
 #define TEST(name) TEST_(AARCH64_POINTER_AUTH_##name)
@@ -92,7 +93,7 @@
 
   VIXL_CHECK(sim.StripPAC(ptr_a, Simulator::kInstructionPointer) == ptr);
 }
-}
-}  // namespace vixl::aarch64
+}  // namespace aarch64
+}  // namespace vixl
 
 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
diff --git a/test/aarch64/test-simulator-aarch64.cc b/test/aarch64/test-simulator-aarch64.cc
index ac812fd..e9d8fdb 100644
--- a/test/aarch64/test-simulator-aarch64.cc
+++ b/test/aarch64/test-simulator-aarch64.cc
@@ -26,19 +26,17 @@
 
 #include <cfloat>
 #include <cstdio>
-
 #include <sstream>
 
 #include "test-runner.h"
 #include "test-utils.h"
 
-#include "aarch64/test-simulator-inputs-aarch64.h"
-#include "aarch64/test-simulator-traces-aarch64.h"
-#include "aarch64/test-utils-aarch64.h"
-
 #include "aarch64/cpu-features-auditor-aarch64.h"
 #include "aarch64/macro-assembler-aarch64.h"
 #include "aarch64/simulator-aarch64.h"
+#include "aarch64/test-simulator-inputs-aarch64.h"
+#include "aarch64/test-simulator-traces-aarch64.h"
+#include "aarch64/test-utils-aarch64.h"
 
 namespace vixl {
 namespace aarch64 {
@@ -69,24 +67,33 @@
   Simulator simulator(&decoder);                 \
   simulator.SetColouredTrace(Test::coloured_trace());
 
-#define START()                         \
-  masm.Reset();                         \
-  simulator.ResetState();               \
-  __ PushCalleeSavedRegisters();        \
-  if (Test::trace_reg()) {              \
-    __ Trace(LOG_STATE, TRACE_ENABLE);  \
-  }                                     \
-  if (Test::trace_write()) {            \
-    __ Trace(LOG_WRITE, TRACE_ENABLE);  \
-  }                                     \
-  if (Test::trace_sim()) {              \
-    __ Trace(LOG_DISASM, TRACE_ENABLE); \
+#define START()                                                         \
+  masm.Reset();                                                         \
+  simulator.ResetState();                                               \
+  __ PushCalleeSavedRegisters();                                        \
+  /* The infrastructure code hasn't been covered at the moment, e.g. */ \
+  /* prologue/epilogue. Suppress tagging mis-match exception before  */ \
+  /* this point. */                                                     \
+  if (masm.GetCPUFeatures()->Has(CPUFeatures::kMTE)) {                  \
+    __ Hlt(DebugHltOpcode::kMTEActive);                                 \
+  }                                                                     \
+  if (Test::trace_reg()) {                                              \
+    __ Trace(LOG_STATE, TRACE_ENABLE);                                  \
+  }                                                                     \
+  if (Test::trace_write()) {                                            \
+    __ Trace(LOG_WRITE, TRACE_ENABLE);                                  \
+  }                                                                     \
+  if (Test::trace_sim()) {                                              \
+    __ Trace(LOG_DISASM, TRACE_ENABLE);                                 \
   }
 
-#define END()                       \
-  __ Trace(LOG_ALL, TRACE_DISABLE); \
-  __ PopCalleeSavedRegisters();     \
-  __ Ret();                         \
+#define END()                                          \
+  if (masm.GetCPUFeatures()->Has(CPUFeatures::kMTE)) { \
+    __ Hlt(DebugHltOpcode::kMTEInactive);              \
+  }                                                    \
+  __ Trace(LOG_ALL, TRACE_DISABLE);                    \
+  __ PopCalleeSavedRegisters();                        \
+  __ Ret();                                            \
   masm.FinalizeCode()
 
 #define TRY_RUN(skipped)                                                \
@@ -95,6 +102,95 @@
   /* The simulator can run every test. */                               \
   *skipped = false
 
+#ifdef VIXL_ENABLE_IMPLICIT_CHECKS
+// The signal handler needs access to the simulator.
+Simulator* gImplicitCheckSim;
+
+#ifdef __x86_64__
+#include <signal.h>
+#include <ucontext.h>
+void HandleSegFault(int sig, siginfo_t* info, void* context) {
+  USE(sig);
+  USE(info);
+  Simulator* sim = gImplicitCheckSim;
+
+  // Did the signal come from the simulator?
+  ucontext_t* uc = reinterpret_cast<ucontext_t*>(context);
+  uintptr_t fault_pc = uc->uc_mcontext.gregs[REG_RIP];
+  VIXL_CHECK(sim->IsSimulatedMemoryAccess(fault_pc));
+
+  // Increment the counter (x1) each time we handle a signal.
+  int64_t counter = reinterpret_cast<int64_t>(sim->ReadXRegister(1));
+  sim->WriteXRegister(1, ++counter);
+
+  // Return to the VIXL memory access continuation point, which is also the
+  // next instruction, after this handler.
+  uc->uc_mcontext.gregs[REG_RIP] = sim->GetSignalReturnAddress();
+  // Return that the memory access failed.
+  uc->uc_mcontext.gregs[REG_RAX] =
+      static_cast<greg_t>(MemoryAccessResult::Failure);
+}
+#endif  // __x86_64__
+
+// Start an implicit check test with a counter and start label so the number of
+// faults can be counted. Note: each instruction after the start will be
+// expected to fault.
+#define START_IMPLICIT_CHECK()                                                \
+  gImplicitCheckSim = &simulator;                                             \
+  /* Set up a signal handler to count the number of faulting instructions. */ \
+  struct sigaction sa;                                                        \
+  sa.sa_sigaction = HandleSegFault;                                           \
+  sigaction(SIGSEGV, &sa, NULL);                                              \
+  START();                                                                    \
+  /* Reset the counter. */                                                    \
+  __ Mov(x1, 0);                                                              \
+  /* Use a consistent bad address. */                                         \
+  __ Mov(x15, xzr);                                                           \
+  __ Mov(ip0, xzr);                                                           \
+  /* Load an amount of data to load. */                                       \
+  __ Mov(ip1, 4096);                                                          \
+  [[maybe_unused]] MemOperand bad_memory = MemOperand(ip0);                   \
+  if (masm.GetCPUFeatures()->Has(CPUFeatures::kSVE)) {                        \
+    /* Turn on all lanes to ensure all loads/stores are tested. */            \
+    __ Ptrue(p0.VnB());                                                       \
+    __ Ptrue(p1.VnB());                                                       \
+    __ Ptrue(p2.VnB());                                                       \
+    __ Ptrue(p3.VnB());                                                       \
+    __ Ptrue(p4.VnB());                                                       \
+    __ Ptrue(p5.VnB());                                                       \
+    __ Ptrue(p6.VnB());                                                       \
+    __ Ptrue(p7.VnB());                                                       \
+    __ Ptrue(p8.VnB());                                                       \
+    __ Ptrue(p9.VnB());                                                       \
+    __ Ptrue(p10.VnB());                                                      \
+    __ Ptrue(p11.VnB());                                                      \
+    __ Ptrue(p12.VnB());                                                      \
+    __ Ptrue(p13.VnB());                                                      \
+    __ Ptrue(p14.VnB());                                                      \
+    __ Ptrue(p15.VnB());                                                      \
+  }                                                                           \
+  Label l_start, l_end;                                                       \
+  __ Bind(&l_start);
+
+#define END_IMPLICIT_CHECK() \
+  __ Bind(&l_end);           \
+  /* Return the counter. */  \
+  __ Mov(x0, x1);            \
+  END();
+
+#define TRY_RUN_IMPLICIT_CHECK()                                              \
+  bool skipped;                                                               \
+  TRY_RUN(&skipped);                                                          \
+  /* Implicit checks should only be used with the simulator. */               \
+  VIXL_ASSERT(!skipped);                                                      \
+  /* Check that each load/store instruction generated a segfault that was */  \
+  /* raised and dealt with. */                                                \
+  size_t result = simulator.ReadXRegister(0);                                 \
+  size_t num_of_faulting_instr = masm.GetSizeOfCodeGeneratedSince(&l_start) - \
+                                 masm.GetSizeOfCodeGeneratedSince(&l_end);    \
+  VIXL_CHECK((result * kInstructionSize) == num_of_faulting_instr);
+
+#endif  // VIXL_ENABLE_IMPLICIT_CHECKS
 
 #else  // VIXL_INCLUDE_SIMULATOR_AARCH64
 
@@ -2843,7 +2939,7 @@
         }
       }
     }
-    VIXL_ASSERT(counted_length == expected_length);
+    VIXL_CHECK(counted_length == expected_length);
     if (error_count > kErrorReportLimit) {
       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
     }
@@ -3000,23 +3096,31 @@
                     kFormat##vdform,                                         \
                     kFormat##vnform)
 
-#define CALL_TEST_NEON_HELPER_2Op(                               \
-    mnemonic, vdform, vnform, vmform, input_d, input_n, input_m) \
-  Test2OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform),         \
-              &MacroAssembler::mnemonic,                         \
-              input_d,                                           \
-              input_n,                                           \
-              (sizeof(input_n) / sizeof(input_n[0])),            \
-              input_m,                                           \
-              (sizeof(input_m) / sizeof(input_m[0])),            \
-              kExpected_NEON_##mnemonic##_##vdform,              \
-              kExpectedCount_NEON_##mnemonic##_##vdform,         \
-              kFormat##vdform,                                   \
-              kFormat##vnform,                                   \
+#define CALL_TEST_NEON_HELPER_2Op(mnemonic,              \
+                                  vdform,                \
+                                  vnform,                \
+                                  vmform,                \
+                                  input_d,               \
+                                  input_n,               \
+                                  input_m)               \
+  Test2OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \
+              &MacroAssembler::mnemonic,                 \
+              input_d,                                   \
+              input_n,                                   \
+              (sizeof(input_n) / sizeof(input_n[0])),    \
+              input_m,                                   \
+              (sizeof(input_m) / sizeof(input_m[0])),    \
+              kExpected_NEON_##mnemonic##_##vdform,      \
+              kExpectedCount_NEON_##mnemonic##_##vdform, \
+              kFormat##vdform,                           \
+              kFormat##vnform,                           \
               kFormat##vmform)
 
-#define CALL_TEST_NEON_HELPER_2OpImm(                                 \
-    mnemonic, vdform, vnform, input_n, input_m)                       \
+#define CALL_TEST_NEON_HELPER_2OpImm(mnemonic,                        \
+                                     vdform,                          \
+                                     vnform,                          \
+                                     input_n,                         \
+                                     input_m)                         \
   Test2OpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_2OPIMM", \
                  &MacroAssembler::mnemonic,                           \
                  input_n,                                             \
@@ -3028,23 +3132,29 @@
                  kFormat##vdform,                                     \
                  kFormat##vnform)
 
-#define CALL_TEST_NEON_HELPER_ByElement(                                  \
-    mnemonic, vdform, vnform, vmform, input_d, input_n, input_m, indices) \
-  TestByElementNEON(                                                      \
-      STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY(            \
-          vnform) "_" STRINGIFY(vmform),                                  \
-      &MacroAssembler::mnemonic,                                          \
-      input_d,                                                            \
-      input_n,                                                            \
-      (sizeof(input_n) / sizeof(input_n[0])),                             \
-      input_m,                                                            \
-      (sizeof(input_m) / sizeof(input_m[0])),                             \
-      indices,                                                            \
-      (sizeof(indices) / sizeof(indices[0])),                             \
-      kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform,         \
-      kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform,    \
-      kFormat##vdform,                                                    \
-      kFormat##vnform,                                                    \
+#define CALL_TEST_NEON_HELPER_ByElement(mnemonic,                      \
+                                        vdform,                        \
+                                        vnform,                        \
+                                        vmform,                        \
+                                        input_d,                       \
+                                        input_n,                       \
+                                        input_m,                       \
+                                        indices)                       \
+  TestByElementNEON(                                                   \
+      STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY(         \
+          vnform) "_" STRINGIFY(vmform),                               \
+      &MacroAssembler::mnemonic,                                       \
+      input_d,                                                         \
+      input_n,                                                         \
+      (sizeof(input_n) / sizeof(input_n[0])),                          \
+      input_m,                                                         \
+      (sizeof(input_m) / sizeof(input_m[0])),                          \
+      indices,                                                         \
+      (sizeof(indices) / sizeof(indices[0])),                          \
+      kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform,      \
+      kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \
+      kFormat##vdform,                                                 \
+      kFormat##vnform,                                                 \
       kFormat##vmform)
 
 #define CALL_TEST_NEON_HELPER_ByElement_Dot_Product(mnemonic,           \
@@ -3534,16 +3644,21 @@
                                 kInputFloat16##input_m);          \
   }
 
-#define CALL_TEST_NEON_HELPER_3DIFF(                             \
-    mnemonic, vdform, vnform, vmform, input_d, input_n, input_m) \
-  {                                                              \
-    CALL_TEST_NEON_HELPER_2Op(mnemonic,                          \
-                              vdform,                            \
-                              vnform,                            \
-                              vmform,                            \
-                              input_d,                           \
-                              input_n,                           \
-                              input_m);                          \
+#define CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
+                                    vdform,   \
+                                    vnform,   \
+                                    vmform,   \
+                                    input_d,  \
+                                    input_n,  \
+                                    input_m)  \
+  {                                           \
+    CALL_TEST_NEON_HELPER_2Op(mnemonic,       \
+                              vdform,         \
+                              vnform,         \
+                              vmform,         \
+                              input_d,        \
+                              input_n,        \
+                              input_m);       \
   }
 
 #define DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input)     \
@@ -3774,14 +3889,17 @@
   }
 
 
-#define CALL_TEST_NEON_HELPER_2OPIMM(             \
-    mnemonic, vdform, vnform, input_n, input_imm) \
-  {                                               \
-    CALL_TEST_NEON_HELPER_2OpImm(mnemonic,        \
-                                 vdform,          \
-                                 vnform,          \
-                                 input_n,         \
-                                 input_imm);      \
+#define CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,  \
+                                     vdform,    \
+                                     vnform,    \
+                                     input_n,   \
+                                     input_imm) \
+  {                                             \
+    CALL_TEST_NEON_HELPER_2OpImm(mnemonic,      \
+                                 vdform,        \
+                                 vnform,        \
+                                 input_n,       \
+                                 input_imm);    \
   }
 
 #define DEFINE_TEST_NEON_2OPIMM(mnemonic, input, input_imm)   \
@@ -4214,8 +4332,10 @@
                                                 vm_subvector_count);    \
   }
 
-#define DEFINE_TEST_NEON_BYELEMENT_DOT_PRODUCT(                        \
-    mnemonic, input_d, input_n, input_m)                               \
+#define DEFINE_TEST_NEON_BYELEMENT_DOT_PRODUCT(mnemonic,               \
+                                               input_d,                \
+                                               input_n,                \
+                                               input_m)                \
   TEST(mnemonic##_2S_8B_B) {                                           \
     CALL_TEST_NEON_HELPER_BYELEMENT_DOT_PRODUCT(mnemonic,              \
                                                 2S,                    \
@@ -4239,17 +4359,23 @@
                                                 4);                    \
   }
 
-#define CALL_TEST_NEON_HELPER_BYELEMENT(                                  \
-    mnemonic, vdform, vnform, vmform, input_d, input_n, input_m, indices) \
-  {                                                                       \
-    CALL_TEST_NEON_HELPER_ByElement(mnemonic,                             \
-                                    vdform,                               \
-                                    vnform,                               \
-                                    vmform,                               \
-                                    input_d,                              \
-                                    input_n,                              \
-                                    input_m,                              \
-                                    indices);                             \
+#define CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
+                                        vdform,   \
+                                        vnform,   \
+                                        vmform,   \
+                                        input_d,  \
+                                        input_n,  \
+                                        input_m,  \
+                                        indices)  \
+  {                                               \
+    CALL_TEST_NEON_HELPER_ByElement(mnemonic,     \
+                                    vdform,       \
+                                    vnform,       \
+                                    vmform,       \
+                                    input_d,      \
+                                    input_n,      \
+                                    input_m,      \
+                                    indices);     \
   }
 
 #define DEFINE_TEST_NEON_BYELEMENT(mnemonic, input_d, input_n, input_m) \
@@ -4465,8 +4591,10 @@
                                     kInputSIndices);                         \
   }
 
-#define DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(            \
-    mnemonic, input_d, input_n, input_m)                   \
+#define DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(mnemonic,   \
+                                               input_d,    \
+                                               input_n,    \
+                                               input_m)    \
   TEST(mnemonic##_S_H_H) {                                 \
     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,              \
                                     S,                     \
@@ -4489,21 +4617,28 @@
   }
 
 
-#define CALL_TEST_NEON_HELPER_2OP2IMM(                           \
-    mnemonic, variant, input_d, input_imm1, input_n, input_imm2) \
-  {                                                              \
-    CALL_TEST_NEON_HELPER_OpImmOpImm(&MacroAssembler::mnemonic,  \
-                                     mnemonic,                   \
-                                     variant,                    \
-                                     variant,                    \
-                                     input_d,                    \
-                                     input_imm1,                 \
-                                     input_n,                    \
-                                     input_imm2);                \
+#define CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                 \
+                                      variant,                  \
+                                      input_d,                  \
+                                      input_imm1,               \
+                                      input_n,                  \
+                                      input_imm2)               \
+  {                                                             \
+    CALL_TEST_NEON_HELPER_OpImmOpImm(&MacroAssembler::mnemonic, \
+                                     mnemonic,                  \
+                                     variant,                   \
+                                     variant,                   \
+                                     input_d,                   \
+                                     input_imm1,                \
+                                     input_n,                   \
+                                     input_imm2);               \
   }
 
-#define DEFINE_TEST_NEON_2OP2IMM(                               \
-    mnemonic, input_d, input_imm1, input_n, input_imm2)         \
+#define DEFINE_TEST_NEON_2OP2IMM(mnemonic,                      \
+                                 input_d,                       \
+                                 input_imm1,                    \
+                                 input_n,                       \
+                                 input_imm2)                    \
   TEST(mnemonic##_B) {                                          \
     CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                     \
                                   16B,                          \
@@ -4966,6 +5101,802 @@
 DEFINE_TEST_NEON_FHM_BYELEMENT(fmlsl2, Basic, Basic, Basic)
 
 
+#ifdef VIXL_ENABLE_IMPLICIT_CHECKS
+TEST(ImplicitCheck) {
+  SETUP_WITH_FEATURES(CPUFeatures::kNEON);
+  START_IMPLICIT_CHECK();
+
+  EmissionCheckScope guard(&masm, masm.GetBuffer()->GetRemainingBytes());
+  // Invalid memory reads.
+  __ ldar(w3, bad_memory);
+  __ ldar(x4, bad_memory);
+  __ ldarb(w5, bad_memory);
+  __ ldarb(x6, bad_memory);
+  __ ldarh(w7, bad_memory);
+  __ ldarh(x8, bad_memory);
+  __ ldaxp(w9, w10, bad_memory);
+  __ ldaxp(x11, x12, bad_memory);
+  __ ldaxr(w13, bad_memory);
+  __ ldaxr(x14, bad_memory);
+  __ ldaxrb(w15, bad_memory);
+  __ ldaxrb(x16, bad_memory);
+  __ ldaxrh(w17, bad_memory);
+  __ ldaxrh(x18, bad_memory);
+  __ ldnp(w19, w20, bad_memory);
+  __ ldnp(x21, x22, bad_memory);
+  __ ldp(w23, w24, bad_memory);
+  __ ldp(x25, x26, bad_memory);
+  __ ldpsw(x27, x28, bad_memory);
+  __ ldr(w29, bad_memory);
+  __ ldr(x2, bad_memory);
+  __ ldrb(w3, bad_memory);
+  __ ldrb(x4, bad_memory);
+  __ ldrh(w5, bad_memory);
+  __ ldrh(x6, bad_memory);
+  __ ldrsb(w7, bad_memory);
+  __ ldrsb(x8, bad_memory);
+  __ ldrsh(w9, bad_memory);
+  __ ldrsh(x10, bad_memory);
+  __ ldrsw(x11, bad_memory);
+  __ ldur(w12, bad_memory);
+  __ ldur(x13, bad_memory);
+  __ ldurb(w14, bad_memory);
+  __ ldurb(x15, bad_memory);
+  __ ldurh(w16, bad_memory);
+  __ ldurh(x17, bad_memory);
+  __ ldursb(w18, bad_memory);
+  __ ldursb(x19, bad_memory);
+  __ ldursh(w20, bad_memory);
+  __ ldursh(x21, bad_memory);
+  __ ldursw(x22, bad_memory);
+  __ ldxp(w23, w24, bad_memory);
+  __ ldxp(x25, x26, bad_memory);
+  __ ldxr(w27, bad_memory);
+  __ ldxr(x28, bad_memory);
+  __ ldxrb(w29, bad_memory);
+  __ ldxrb(x2, bad_memory);
+  __ ldxrh(w3, bad_memory);
+  __ ldxrh(x4, bad_memory);
+
+  // Invalid memory writes. Note: exclusive store instructions are not tested
+  // because they can fail due to the global monitor before trying to perform a
+  // memory store.
+  __ stlr(w18, bad_memory);
+  __ stlr(x19, bad_memory);
+  __ stlrb(w20, bad_memory);
+  __ stlrb(x21, bad_memory);
+  __ stlrh(w22, bad_memory);
+  __ stlrh(x23, bad_memory);
+  __ stnp(w14, w15, bad_memory);
+  __ stnp(x16, x17, bad_memory);
+  __ stp(w18, w19, bad_memory);
+  __ stp(x20, x21, bad_memory);
+  __ str(w22, bad_memory);
+  __ str(x23, bad_memory);
+  __ strb(w24, bad_memory);
+  __ strb(x25, bad_memory);
+  __ strh(w26, bad_memory);
+  __ strh(x27, bad_memory);
+  __ stur(w28, bad_memory);
+  __ stur(x29, bad_memory);
+  __ sturb(w2, bad_memory);
+  __ sturb(x3, bad_memory);
+  __ sturh(w4, bad_memory);
+  __ sturh(x5, bad_memory);
+
+  END_IMPLICIT_CHECK();
+  TRY_RUN_IMPLICIT_CHECK();
+}
+
+TEST(ImplicitCheckNeon) {
+  SETUP_WITH_FEATURES(CPUFeatures::kNEON);
+  START_IMPLICIT_CHECK();
+
+  EmissionCheckScope guard(&masm, masm.GetBuffer()->GetRemainingBytes());
+  __ ld1(v18.V16B(), v19.V16B(), v20.V16B(), v21.V16B(), bad_memory);
+  __ ld1(v23.V16B(), v24.V16B(), v25.V16B(), v26.V16B(), bad_memory);
+  __ ld1(v5.V16B(), v6.V16B(), v7.V16B(), v8.V16B(), bad_memory);
+  __ ld1(v18.V16B(), v19.V16B(), v20.V16B(), bad_memory);
+  __ ld1(v13.V16B(), v14.V16B(), v15.V16B(), bad_memory);
+  __ ld1(v19.V16B(), v20.V16B(), v21.V16B(), bad_memory);
+  __ ld1(v17.V16B(), v18.V16B(), bad_memory);
+  __ ld1(v20.V16B(), v21.V16B(), bad_memory);
+  __ ld1(v28.V16B(), v29.V16B(), bad_memory);
+  __ ld1(v29.V16B(), bad_memory);
+  __ ld1(v21.V16B(), bad_memory);
+  __ ld1(v4.V16B(), bad_memory);
+  __ ld1(v4.V1D(), v5.V1D(), v6.V1D(), v7.V1D(), bad_memory);
+  __ ld1(v17.V1D(), v18.V1D(), v19.V1D(), v20.V1D(), bad_memory);
+  __ ld1(v28.V1D(), v29.V1D(), v30.V1D(), v31.V1D(), bad_memory);
+  __ ld1(v20.V1D(), v21.V1D(), v22.V1D(), bad_memory);
+  __ ld1(v19.V1D(), v20.V1D(), v21.V1D(), bad_memory);
+  __ ld1(v12.V1D(), v13.V1D(), v14.V1D(), bad_memory);
+  __ ld1(v29.V1D(), v30.V1D(), bad_memory);
+  __ ld1(v31.V1D(), v0.V1D(), bad_memory);
+  __ ld1(v3.V1D(), v4.V1D(), bad_memory);
+  __ ld1(v28.V1D(), bad_memory);
+  __ ld1(v11.V1D(), bad_memory);
+  __ ld1(v29.V1D(), bad_memory);
+  __ ld1(v28.V2D(), v29.V2D(), v30.V2D(), v31.V2D(), bad_memory);
+  __ ld1(v8.V2D(), v9.V2D(), v10.V2D(), v11.V2D(), bad_memory);
+  __ ld1(v14.V2D(), v15.V2D(), v16.V2D(), v17.V2D(), bad_memory);
+  __ ld1(v26.V2D(), v27.V2D(), v28.V2D(), bad_memory);
+  __ ld1(v5.V2D(), v6.V2D(), v7.V2D(), bad_memory);
+  __ ld1(v26.V2D(), v27.V2D(), v28.V2D(), bad_memory);
+  __ ld1(v18.V2D(), v19.V2D(), bad_memory);
+  __ ld1(v21.V2D(), v22.V2D(), bad_memory);
+  __ ld1(v17.V2D(), v18.V2D(), bad_memory);
+  __ ld1(v5.V2D(), bad_memory);
+  __ ld1(v6.V2D(), bad_memory);
+  __ ld1(v15.V2D(), bad_memory);
+  __ ld1(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), bad_memory);
+  __ ld1(v24.V2S(), v25.V2S(), v26.V2S(), v27.V2S(), bad_memory);
+  __ ld1(v27.V2S(), v28.V2S(), v29.V2S(), v30.V2S(), bad_memory);
+  __ ld1(v11.V2S(), v12.V2S(), v13.V2S(), bad_memory);
+  __ ld1(v8.V2S(), v9.V2S(), v10.V2S(), bad_memory);
+  __ ld1(v31.V2S(), v0.V2S(), v1.V2S(), bad_memory);
+  __ ld1(v0.V2S(), v1.V2S(), bad_memory);
+  __ ld1(v13.V2S(), v14.V2S(), bad_memory);
+  __ ld1(v3.V2S(), v4.V2S(), bad_memory);
+  __ ld1(v26.V2S(), bad_memory);
+  __ ld1(v0.V2S(), bad_memory);
+  __ ld1(v11.V2S(), bad_memory);
+  __ ld1(v16.V4H(), v17.V4H(), v18.V4H(), v19.V4H(), bad_memory);
+  __ ld1(v24.V4H(), v25.V4H(), v26.V4H(), v27.V4H(), bad_memory);
+  __ ld1(v1.V4H(), v2.V4H(), v3.V4H(), v4.V4H(), bad_memory);
+  __ ld1(v30.V4H(), v31.V4H(), v0.V4H(), bad_memory);
+  __ ld1(v25.V4H(), v26.V4H(), v27.V4H(), bad_memory);
+  __ ld1(v3.V4H(), v4.V4H(), v5.V4H(), bad_memory);
+  __ ld1(v3.V4H(), v4.V4H(), bad_memory);
+  __ ld1(v3.V4H(), v4.V4H(), bad_memory);
+  __ ld1(v23.V4H(), v24.V4H(), bad_memory);
+  __ ld1(v26.V4H(), bad_memory);
+  __ ld1(v1.V4H(), bad_memory);
+  __ ld1(v14.V4H(), bad_memory);
+  __ ld1(v26.V4S(), v27.V4S(), v28.V4S(), v29.V4S(), bad_memory);
+  __ ld1(v28.V4S(), v29.V4S(), v30.V4S(), v31.V4S(), bad_memory);
+  __ ld1(v4.V4S(), v5.V4S(), v6.V4S(), v7.V4S(), bad_memory);
+  __ ld1(v2.V4S(), v3.V4S(), v4.V4S(), bad_memory);
+  __ ld1(v22.V4S(), v23.V4S(), v24.V4S(), bad_memory);
+  __ ld1(v15.V4S(), v16.V4S(), v17.V4S(), bad_memory);
+  __ ld1(v20.V4S(), v21.V4S(), bad_memory);
+  __ ld1(v30.V4S(), v31.V4S(), bad_memory);
+  __ ld1(v11.V4S(), v12.V4S(), bad_memory);
+  __ ld1(v15.V4S(), bad_memory);
+  __ ld1(v12.V4S(), bad_memory);
+  __ ld1(v0.V4S(), bad_memory);
+  __ ld1(v17.V8B(), v18.V8B(), v19.V8B(), v20.V8B(), bad_memory);
+  __ ld1(v5.V8B(), v6.V8B(), v7.V8B(), v8.V8B(), bad_memory);
+  __ ld1(v9.V8B(), v10.V8B(), v11.V8B(), v12.V8B(), bad_memory);
+  __ ld1(v4.V8B(), v5.V8B(), v6.V8B(), bad_memory);
+  __ ld1(v2.V8B(), v3.V8B(), v4.V8B(), bad_memory);
+  __ ld1(v12.V8B(), v13.V8B(), v14.V8B(), bad_memory);
+  __ ld1(v10.V8B(), v11.V8B(), bad_memory);
+  __ ld1(v11.V8B(), v12.V8B(), bad_memory);
+  __ ld1(v27.V8B(), v28.V8B(), bad_memory);
+  __ ld1(v31.V8B(), bad_memory);
+  __ ld1(v10.V8B(), bad_memory);
+  __ ld1(v28.V8B(), bad_memory);
+  __ ld1(v5.V8H(), v6.V8H(), v7.V8H(), v8.V8H(), bad_memory);
+  __ ld1(v2.V8H(), v3.V8H(), v4.V8H(), v5.V8H(), bad_memory);
+  __ ld1(v10.V8H(), v11.V8H(), v12.V8H(), v13.V8H(), bad_memory);
+  __ ld1(v26.V8H(), v27.V8H(), v28.V8H(), bad_memory);
+  __ ld1(v3.V8H(), v4.V8H(), v5.V8H(), bad_memory);
+  __ ld1(v17.V8H(), v18.V8H(), v19.V8H(), bad_memory);
+  __ ld1(v4.V8H(), v5.V8H(), bad_memory);
+  __ ld1(v21.V8H(), v22.V8H(), bad_memory);
+  __ ld1(v4.V8H(), v5.V8H(), bad_memory);
+  __ ld1(v9.V8H(), bad_memory);
+  __ ld1(v27.V8H(), bad_memory);
+  __ ld1(v26.V8H(), bad_memory);
+  __ ld1(v19.B(), 1, bad_memory);
+  __ ld1(v12.B(), 3, bad_memory);
+  __ ld1(v27.B(), 12, bad_memory);
+  __ ld1(v10.D(), 1, bad_memory);
+  __ ld1(v26.D(), 1, bad_memory);
+  __ ld1(v7.D(), 1, bad_memory);
+  __ ld1(v19.H(), 5, bad_memory);
+  __ ld1(v10.H(), 1, bad_memory);
+  __ ld1(v5.H(), 4, bad_memory);
+  __ ld1(v21.S(), 2, bad_memory);
+  __ ld1(v13.S(), 2, bad_memory);
+  __ ld1(v1.S(), 2, bad_memory);
+  __ ld1r(v2.V16B(), bad_memory);
+  __ ld1r(v2.V16B(), bad_memory);
+  __ ld1r(v22.V16B(), bad_memory);
+  __ ld1r(v25.V1D(), bad_memory);
+  __ ld1r(v9.V1D(), bad_memory);
+  __ ld1r(v23.V1D(), bad_memory);
+  __ ld1r(v19.V2D(), bad_memory);
+  __ ld1r(v21.V2D(), bad_memory);
+  __ ld1r(v30.V2D(), bad_memory);
+  __ ld1r(v24.V2S(), bad_memory);
+  __ ld1r(v26.V2S(), bad_memory);
+  __ ld1r(v28.V2S(), bad_memory);
+  __ ld1r(v19.V4H(), bad_memory);
+  __ ld1r(v1.V4H(), bad_memory);
+  __ ld1r(v21.V4H(), bad_memory);
+  __ ld1r(v15.V4S(), bad_memory);
+  __ ld1r(v21.V4S(), bad_memory);
+  __ ld1r(v23.V4S(), bad_memory);
+  __ ld1r(v26.V8B(), bad_memory);
+  __ ld1r(v14.V8B(), bad_memory);
+  __ ld1r(v19.V8B(), bad_memory);
+  __ ld1r(v13.V8H(), bad_memory);
+  __ ld1r(v30.V8H(), bad_memory);
+  __ ld1r(v27.V8H(), bad_memory);
+  __ ld2(v21.V16B(), v22.V16B(), bad_memory);
+  __ ld2(v21.V16B(), v22.V16B(), bad_memory);
+  __ ld2(v12.V16B(), v13.V16B(), bad_memory);
+  __ ld2(v14.V2D(), v15.V2D(), bad_memory);
+  __ ld2(v0.V2D(), v1.V2D(), bad_memory);
+  __ ld2(v12.V2D(), v13.V2D(), bad_memory);
+  __ ld2(v27.V2S(), v28.V2S(), bad_memory);
+  __ ld2(v2.V2S(), v3.V2S(), bad_memory);
+  __ ld2(v12.V2S(), v13.V2S(), bad_memory);
+  __ ld2(v9.V4H(), v10.V4H(), bad_memory);
+  __ ld2(v23.V4H(), v24.V4H(), bad_memory);
+  __ ld2(v1.V4H(), v2.V4H(), bad_memory);
+  __ ld2(v20.V4S(), v21.V4S(), bad_memory);
+  __ ld2(v10.V4S(), v11.V4S(), bad_memory);
+  __ ld2(v24.V4S(), v25.V4S(), bad_memory);
+  __ ld2(v17.V8B(), v18.V8B(), bad_memory);
+  __ ld2(v13.V8B(), v14.V8B(), bad_memory);
+  __ ld2(v7.V8B(), v8.V8B(), bad_memory);
+  __ ld2(v30.V8H(), v31.V8H(), bad_memory);
+  __ ld2(v4.V8H(), v5.V8H(), bad_memory);
+  __ ld2(v13.V8H(), v14.V8H(), bad_memory);
+  __ ld2(v5.B(), v6.B(), 12, bad_memory);
+  __ ld2(v16.B(), v17.B(), 7, bad_memory);
+  __ ld2(v29.B(), v30.B(), 2, bad_memory);
+  __ ld2(v11.D(), v12.D(), 1, bad_memory);
+  __ ld2(v26.D(), v27.D(), 0, bad_memory);
+  __ ld2(v25.D(), v26.D(), 0, bad_memory);
+  __ ld2(v18.H(), v19.H(), 7, bad_memory);
+  __ ld2(v17.H(), v18.H(), 5, bad_memory);
+  __ ld2(v30.H(), v31.H(), 2, bad_memory);
+  __ ld2(v29.S(), v30.S(), 3, bad_memory);
+  __ ld2(v28.S(), v29.S(), 0, bad_memory);
+  __ ld2(v6.S(), v7.S(), 1, bad_memory);
+  __ ld2r(v26.V16B(), v27.V16B(), bad_memory);
+  __ ld2r(v21.V16B(), v22.V16B(), bad_memory);
+  __ ld2r(v5.V16B(), v6.V16B(), bad_memory);
+  __ ld2r(v26.V1D(), v27.V1D(), bad_memory);
+  __ ld2r(v14.V1D(), v15.V1D(), bad_memory);
+  __ ld2r(v23.V1D(), v24.V1D(), bad_memory);
+  __ ld2r(v11.V2D(), v12.V2D(), bad_memory);
+  __ ld2r(v29.V2D(), v30.V2D(), bad_memory);
+  __ ld2r(v15.V2D(), v16.V2D(), bad_memory);
+  __ ld2r(v26.V2S(), v27.V2S(), bad_memory);
+  __ ld2r(v22.V2S(), v23.V2S(), bad_memory);
+  __ ld2r(v2.V2S(), v3.V2S(), bad_memory);
+  __ ld2r(v2.V4H(), v3.V4H(), bad_memory);
+  __ ld2r(v9.V4H(), v10.V4H(), bad_memory);
+  __ ld2r(v6.V4H(), v7.V4H(), bad_memory);
+  __ ld2r(v7.V4S(), v8.V4S(), bad_memory);
+  __ ld2r(v19.V4S(), v20.V4S(), bad_memory);
+  __ ld2r(v21.V4S(), v22.V4S(), bad_memory);
+  __ ld2r(v26.V8B(), v27.V8B(), bad_memory);
+  __ ld2r(v20.V8B(), v21.V8B(), bad_memory);
+  __ ld2r(v11.V8B(), v12.V8B(), bad_memory);
+  __ ld2r(v12.V8H(), v13.V8H(), bad_memory);
+  __ ld2r(v6.V8H(), v7.V8H(), bad_memory);
+  __ ld2r(v25.V8H(), v26.V8H(), bad_memory);
+  __ ld3(v20.V16B(), v21.V16B(), v22.V16B(), bad_memory);
+  __ ld3(v28.V16B(), v29.V16B(), v30.V16B(), bad_memory);
+  __ ld3(v20.V16B(), v21.V16B(), v22.V16B(), bad_memory);
+  __ ld3(v21.V2D(), v22.V2D(), v23.V2D(), bad_memory);
+  __ ld3(v18.V2D(), v19.V2D(), v20.V2D(), bad_memory);
+  __ ld3(v27.V2D(), v28.V2D(), v29.V2D(), bad_memory);
+  __ ld3(v7.V2S(), v8.V2S(), v9.V2S(), bad_memory);
+  __ ld3(v20.V2S(), v21.V2S(), v22.V2S(), bad_memory);
+  __ ld3(v26.V2S(), v27.V2S(), v28.V2S(), bad_memory);
+  __ ld3(v27.V4H(), v28.V4H(), v29.V4H(), bad_memory);
+  __ ld3(v28.V4H(), v29.V4H(), v30.V4H(), bad_memory);
+  __ ld3(v7.V4H(), v8.V4H(), v9.V4H(), bad_memory);
+  __ ld3(v2.V4S(), v3.V4S(), v4.V4S(), bad_memory);
+  __ ld3(v24.V4S(), v25.V4S(), v26.V4S(), bad_memory);
+  __ ld3(v11.V4S(), v12.V4S(), v13.V4S(), bad_memory);
+  __ ld3(v29.V8B(), v30.V8B(), v31.V8B(), bad_memory);
+  __ ld3(v1.V8B(), v2.V8B(), v3.V8B(), bad_memory);
+  __ ld3(v12.V8B(), v13.V8B(), v14.V8B(), bad_memory);
+  __ ld3(v22.V8H(), v23.V8H(), v24.V8H(), bad_memory);
+  __ ld3(v13.V8H(), v14.V8H(), v15.V8H(), bad_memory);
+  __ ld3(v28.V8H(), v29.V8H(), v30.V8H(), bad_memory);
+  __ ld3(v21.B(), v22.B(), v23.B(), 11, bad_memory);
+  __ ld3(v5.B(), v6.B(), v7.B(), 9, bad_memory);
+  __ ld3(v23.B(), v24.B(), v25.B(), 0, bad_memory);
+  __ ld3(v16.D(), v17.D(), v18.D(), 0, bad_memory);
+  __ ld3(v30.D(), v31.D(), v0.D(), 0, bad_memory);
+  __ ld3(v28.D(), v29.D(), v30.D(), 1, bad_memory);
+  __ ld3(v13.H(), v14.H(), v15.H(), 2, bad_memory);
+  __ ld3(v22.H(), v23.H(), v24.H(), 7, bad_memory);
+  __ ld3(v14.H(), v15.H(), v16.H(), 3, bad_memory);
+  __ ld3(v22.S(), v23.S(), v24.S(), 3, bad_memory);
+  __ ld3(v30.S(), v31.S(), v0.S(), 2, bad_memory);
+  __ ld3(v12.S(), v13.S(), v14.S(), 1, bad_memory);
+  __ ld3r(v24.V16B(), v25.V16B(), v26.V16B(), bad_memory);
+  __ ld3r(v24.V16B(), v25.V16B(), v26.V16B(), bad_memory);
+  __ ld3r(v3.V16B(), v4.V16B(), v5.V16B(), bad_memory);
+  __ ld3r(v4.V1D(), v5.V1D(), v6.V1D(), bad_memory);
+  __ ld3r(v7.V1D(), v8.V1D(), v9.V1D(), bad_memory);
+  __ ld3r(v17.V1D(), v18.V1D(), v19.V1D(), bad_memory);
+  __ ld3r(v16.V2D(), v17.V2D(), v18.V2D(), bad_memory);
+  __ ld3r(v20.V2D(), v21.V2D(), v22.V2D(), bad_memory);
+  __ ld3r(v14.V2D(), v15.V2D(), v16.V2D(), bad_memory);
+  __ ld3r(v10.V2S(), v11.V2S(), v12.V2S(), bad_memory);
+  __ ld3r(v0.V2S(), v1.V2S(), v2.V2S(), bad_memory);
+  __ ld3r(v23.V2S(), v24.V2S(), v25.V2S(), bad_memory);
+  __ ld3r(v22.V4H(), v23.V4H(), v24.V4H(), bad_memory);
+  __ ld3r(v6.V4H(), v7.V4H(), v8.V4H(), bad_memory);
+  __ ld3r(v7.V4H(), v8.V4H(), v9.V4H(), bad_memory);
+  __ ld3r(v26.V4S(), v27.V4S(), v28.V4S(), bad_memory);
+  __ ld3r(v0.V4S(), v1.V4S(), v2.V4S(), bad_memory);
+  __ ld3r(v30.V4S(), v31.V4S(), v0.V4S(), bad_memory);
+  __ ld3r(v2.V8B(), v3.V8B(), v4.V8B(), bad_memory);
+  __ ld3r(v10.V8B(), v11.V8B(), v12.V8B(), bad_memory);
+  __ ld3r(v28.V8B(), v29.V8B(), v30.V8B(), bad_memory);
+  __ ld3r(v6.V8H(), v7.V8H(), v8.V8H(), bad_memory);
+  __ ld3r(v29.V8H(), v30.V8H(), v31.V8H(), bad_memory);
+  __ ld3r(v7.V8H(), v8.V8H(), v9.V8H(), bad_memory);
+  __ ld4(v3.V16B(), v4.V16B(), v5.V16B(), v6.V16B(), bad_memory);
+  __ ld4(v2.V16B(), v3.V16B(), v4.V16B(), v5.V16B(), bad_memory);
+  __ ld4(v5.V16B(), v6.V16B(), v7.V16B(), v8.V16B(), bad_memory);
+  __ ld4(v18.V2D(), v19.V2D(), v20.V2D(), v21.V2D(), bad_memory);
+  __ ld4(v4.V2D(), v5.V2D(), v6.V2D(), v7.V2D(), bad_memory);
+  __ ld4(v29.V2D(), v30.V2D(), v31.V2D(), v0.V2D(), bad_memory);
+  __ ld4(v27.V2S(), v28.V2S(), v29.V2S(), v30.V2S(), bad_memory);
+  __ ld4(v24.V2S(), v25.V2S(), v26.V2S(), v27.V2S(), bad_memory);
+  __ ld4(v4.V2S(), v5.V2S(), v6.V2S(), v7.V2S(), bad_memory);
+  __ ld4(v16.V4H(), v17.V4H(), v18.V4H(), v19.V4H(), bad_memory);
+  __ ld4(v23.V4H(), v24.V4H(), v25.V4H(), v26.V4H(), bad_memory);
+  __ ld4(v2.V4H(), v3.V4H(), v4.V4H(), v5.V4H(), bad_memory);
+  __ ld4(v7.V4S(), v8.V4S(), v9.V4S(), v10.V4S(), bad_memory);
+  __ ld4(v28.V4S(), v29.V4S(), v30.V4S(), v31.V4S(), bad_memory);
+  __ ld4(v29.V4S(), v30.V4S(), v31.V4S(), v0.V4S(), bad_memory);
+  __ ld4(v15.V8B(), v16.V8B(), v17.V8B(), v18.V8B(), bad_memory);
+  __ ld4(v27.V8B(), v28.V8B(), v29.V8B(), v30.V8B(), bad_memory);
+  __ ld4(v5.V8B(), v6.V8B(), v7.V8B(), v8.V8B(), bad_memory);
+  __ ld4(v25.V8H(), v26.V8H(), v27.V8H(), v28.V8H(), bad_memory);
+  __ ld4(v2.V8H(), v3.V8H(), v4.V8H(), v5.V8H(), bad_memory);
+  __ ld4(v20.V8H(), v21.V8H(), v22.V8H(), v23.V8H(), bad_memory);
+  __ ld4(v20.B(), v21.B(), v22.B(), v23.B(), 3, bad_memory);
+  __ ld4(v12.B(), v13.B(), v14.B(), v15.B(), 3, bad_memory);
+  __ ld4(v27.B(), v28.B(), v29.B(), v30.B(), 6, bad_memory);
+  __ ld4(v28.D(), v29.D(), v30.D(), v31.D(), 1, bad_memory);
+  __ ld4(v15.D(), v16.D(), v17.D(), v18.D(), 1, bad_memory);
+  __ ld4(v16.D(), v17.D(), v18.D(), v19.D(), 1, bad_memory);
+  __ ld4(v2.H(), v3.H(), v4.H(), v5.H(), 6, bad_memory);
+  __ ld4(v5.H(), v6.H(), v7.H(), v8.H(), 3, bad_memory);
+  __ ld4(v7.H(), v8.H(), v9.H(), v10.H(), 6, bad_memory);
+  __ ld4(v6.S(), v7.S(), v8.S(), v9.S(), 1, bad_memory);
+  __ ld4(v25.S(), v26.S(), v27.S(), v28.S(), 2, bad_memory);
+  __ ld4(v8.S(), v9.S(), v10.S(), v11.S(), 3, bad_memory);
+  __ ld4r(v14.V16B(), v15.V16B(), v16.V16B(), v17.V16B(), bad_memory);
+  __ ld4r(v13.V16B(), v14.V16B(), v15.V16B(), v16.V16B(), bad_memory);
+  __ ld4r(v9.V16B(), v10.V16B(), v11.V16B(), v12.V16B(), bad_memory);
+  __ ld4r(v8.V1D(), v9.V1D(), v10.V1D(), v11.V1D(), bad_memory);
+  __ ld4r(v4.V1D(), v5.V1D(), v6.V1D(), v7.V1D(), bad_memory);
+  __ ld4r(v26.V1D(), v27.V1D(), v28.V1D(), v29.V1D(), bad_memory);
+  __ ld4r(v19.V2D(), v20.V2D(), v21.V2D(), v22.V2D(), bad_memory);
+  __ ld4r(v28.V2D(), v29.V2D(), v30.V2D(), v31.V2D(), bad_memory);
+  __ ld4r(v15.V2D(), v16.V2D(), v17.V2D(), v18.V2D(), bad_memory);
+  __ ld4r(v31.V2S(), v0.V2S(), v1.V2S(), v2.V2S(), bad_memory);
+  __ ld4r(v28.V2S(), v29.V2S(), v30.V2S(), v31.V2S(), bad_memory);
+  __ ld4r(v11.V2S(), v12.V2S(), v13.V2S(), v14.V2S(), bad_memory);
+  __ ld4r(v19.V4H(), v20.V4H(), v21.V4H(), v22.V4H(), bad_memory);
+  __ ld4r(v22.V4H(), v23.V4H(), v24.V4H(), v25.V4H(), bad_memory);
+  __ ld4r(v20.V4H(), v21.V4H(), v22.V4H(), v23.V4H(), bad_memory);
+  __ ld4r(v16.V4S(), v17.V4S(), v18.V4S(), v19.V4S(), bad_memory);
+  __ ld4r(v25.V4S(), v26.V4S(), v27.V4S(), v28.V4S(), bad_memory);
+  __ ld4r(v23.V4S(), v24.V4S(), v25.V4S(), v26.V4S(), bad_memory);
+  __ ld4r(v22.V8B(), v23.V8B(), v24.V8B(), v25.V8B(), bad_memory);
+  __ ld4r(v27.V8B(), v28.V8B(), v29.V8B(), v30.V8B(), bad_memory);
+  __ ld4r(v29.V8B(), v30.V8B(), v31.V8B(), v0.V8B(), bad_memory);
+  __ ld4r(v28.V8H(), v29.V8H(), v30.V8H(), v31.V8H(), bad_memory);
+  __ ld4r(v25.V8H(), v26.V8H(), v27.V8H(), v28.V8H(), bad_memory);
+  __ ld4r(v22.V8H(), v23.V8H(), v24.V8H(), v25.V8H(), bad_memory);
+
+  __ st1(v18.V16B(), v19.V16B(), v20.V16B(), v21.V16B(), bad_memory);
+  __ st1(v10.V16B(), v11.V16B(), v12.V16B(), v13.V16B(), bad_memory);
+  __ st1(v27.V16B(), v28.V16B(), v29.V16B(), v30.V16B(), bad_memory);
+  __ st1(v16.V16B(), v17.V16B(), v18.V16B(), bad_memory);
+  __ st1(v21.V16B(), v22.V16B(), v23.V16B(), bad_memory);
+  __ st1(v9.V16B(), v10.V16B(), v11.V16B(), bad_memory);
+  __ st1(v7.V16B(), v8.V16B(), bad_memory);
+  __ st1(v26.V16B(), v27.V16B(), bad_memory);
+  __ st1(v22.V16B(), v23.V16B(), bad_memory);
+  __ st1(v23.V16B(), bad_memory);
+  __ st1(v28.V16B(), bad_memory);
+  __ st1(v2.V16B(), bad_memory);
+  __ st1(v29.V1D(), v30.V1D(), v31.V1D(), v0.V1D(), bad_memory);
+  __ st1(v12.V1D(), v13.V1D(), v14.V1D(), v15.V1D(), bad_memory);
+  __ st1(v30.V1D(), v31.V1D(), v0.V1D(), v1.V1D(), bad_memory);
+  __ st1(v16.V1D(), v17.V1D(), v18.V1D(), bad_memory);
+  __ st1(v3.V1D(), v4.V1D(), v5.V1D(), bad_memory);
+  __ st1(v14.V1D(), v15.V1D(), v16.V1D(), bad_memory);
+  __ st1(v18.V1D(), v19.V1D(), bad_memory);
+  __ st1(v5.V1D(), v6.V1D(), bad_memory);
+  __ st1(v2.V1D(), v3.V1D(), bad_memory);
+  __ st1(v4.V1D(), bad_memory);
+  __ st1(v27.V1D(), bad_memory);
+  __ st1(v23.V1D(), bad_memory);
+  __ st1(v2.V2D(), v3.V2D(), v4.V2D(), v5.V2D(), bad_memory);
+  __ st1(v22.V2D(), v23.V2D(), v24.V2D(), v25.V2D(), bad_memory);
+  __ st1(v28.V2D(), v29.V2D(), v30.V2D(), v31.V2D(), bad_memory);
+  __ st1(v17.V2D(), v18.V2D(), v19.V2D(), bad_memory);
+  __ st1(v16.V2D(), v17.V2D(), v18.V2D(), bad_memory);
+  __ st1(v22.V2D(), v23.V2D(), v24.V2D(), bad_memory);
+  __ st1(v21.V2D(), v22.V2D(), bad_memory);
+  __ st1(v6.V2D(), v7.V2D(), bad_memory);
+  __ st1(v27.V2D(), v28.V2D(), bad_memory);
+  __ st1(v21.V2D(), bad_memory);
+  __ st1(v29.V2D(), bad_memory);
+  __ st1(v20.V2D(), bad_memory);
+  __ st1(v22.V2S(), v23.V2S(), v24.V2S(), v25.V2S(), bad_memory);
+  __ st1(v8.V2S(), v9.V2S(), v10.V2S(), v11.V2S(), bad_memory);
+  __ st1(v15.V2S(), v16.V2S(), v17.V2S(), v18.V2S(), bad_memory);
+  __ st1(v2.V2S(), v3.V2S(), v4.V2S(), bad_memory);
+  __ st1(v23.V2S(), v24.V2S(), v25.V2S(), bad_memory);
+  __ st1(v7.V2S(), v8.V2S(), v9.V2S(), bad_memory);
+  __ st1(v28.V2S(), v29.V2S(), bad_memory);
+  __ st1(v29.V2S(), v30.V2S(), bad_memory);
+  __ st1(v23.V2S(), v24.V2S(), bad_memory);
+  __ st1(v6.V2S(), bad_memory);
+  __ st1(v11.V2S(), bad_memory);
+  __ st1(v17.V2S(), bad_memory);
+  __ st1(v6.V4H(), v7.V4H(), v8.V4H(), v9.V4H(), bad_memory);
+  __ st1(v9.V4H(), v10.V4H(), v11.V4H(), v12.V4H(), bad_memory);
+  __ st1(v25.V4H(), v26.V4H(), v27.V4H(), v28.V4H(), bad_memory);
+  __ st1(v11.V4H(), v12.V4H(), v13.V4H(), bad_memory);
+  __ st1(v10.V4H(), v11.V4H(), v12.V4H(), bad_memory);
+  __ st1(v12.V4H(), v13.V4H(), v14.V4H(), bad_memory);
+  __ st1(v13.V4H(), v14.V4H(), bad_memory);
+  __ st1(v15.V4H(), v16.V4H(), bad_memory);
+  __ st1(v21.V4H(), v22.V4H(), bad_memory);
+  __ st1(v16.V4H(), bad_memory);
+  __ st1(v8.V4H(), bad_memory);
+  __ st1(v30.V4H(), bad_memory);
+  __ st1(v3.V4S(), v4.V4S(), v5.V4S(), v6.V4S(), bad_memory);
+  __ st1(v25.V4S(), v26.V4S(), v27.V4S(), v28.V4S(), bad_memory);
+  __ st1(v5.V4S(), v6.V4S(), v7.V4S(), v8.V4S(), bad_memory);
+  __ st1(v31.V4S(), v0.V4S(), v1.V4S(), bad_memory);
+  __ st1(v30.V4S(), v31.V4S(), v0.V4S(), bad_memory);
+  __ st1(v6.V4S(), v7.V4S(), v8.V4S(), bad_memory);
+  __ st1(v17.V4S(), v18.V4S(), bad_memory);
+  __ st1(v31.V4S(), v0.V4S(), bad_memory);
+  __ st1(v1.V4S(), v2.V4S(), bad_memory);
+  __ st1(v26.V4S(), bad_memory);
+  __ st1(v15.V4S(), bad_memory);
+  __ st1(v13.V4S(), bad_memory);
+  __ st1(v26.V8B(), v27.V8B(), v28.V8B(), v29.V8B(), bad_memory);
+  __ st1(v10.V8B(), v11.V8B(), v12.V8B(), v13.V8B(), bad_memory);
+  __ st1(v15.V8B(), v16.V8B(), v17.V8B(), v18.V8B(), bad_memory);
+  __ st1(v19.V8B(), v20.V8B(), v21.V8B(), bad_memory);
+  __ st1(v31.V8B(), v0.V8B(), v1.V8B(), bad_memory);
+  __ st1(v9.V8B(), v10.V8B(), v11.V8B(), bad_memory);
+  __ st1(v12.V8B(), v13.V8B(), bad_memory);
+  __ st1(v2.V8B(), v3.V8B(), bad_memory);
+  __ st1(v0.V8B(), v1.V8B(), bad_memory);
+  __ st1(v16.V8B(), bad_memory);
+  __ st1(v25.V8B(), bad_memory);
+  __ st1(v31.V8B(), bad_memory);
+  __ st1(v4.V8H(), v5.V8H(), v6.V8H(), v7.V8H(), bad_memory);
+  __ st1(v3.V8H(), v4.V8H(), v5.V8H(), v6.V8H(), bad_memory);
+  __ st1(v26.V8H(), v27.V8H(), v28.V8H(), v29.V8H(), bad_memory);
+  __ st1(v10.V8H(), v11.V8H(), v12.V8H(), bad_memory);
+  __ st1(v21.V8H(), v22.V8H(), v23.V8H(), bad_memory);
+  __ st1(v18.V8H(), v19.V8H(), v20.V8H(), bad_memory);
+  __ st1(v26.V8H(), v27.V8H(), bad_memory);
+  __ st1(v24.V8H(), v25.V8H(), bad_memory);
+  __ st1(v17.V8H(), v18.V8H(), bad_memory);
+  __ st1(v29.V8H(), bad_memory);
+  __ st1(v19.V8H(), bad_memory);
+  __ st1(v23.V8H(), bad_memory);
+  __ st1(v19.B(), 15, bad_memory);
+  __ st1(v25.B(), 9, bad_memory);
+  __ st1(v4.B(), 8, bad_memory);
+  __ st1(v13.D(), 0, bad_memory);
+  __ st1(v30.D(), 0, bad_memory);
+  __ st1(v3.D(), 0, bad_memory);
+  __ st1(v22.H(), 0, bad_memory);
+  __ st1(v31.H(), 7, bad_memory);
+  __ st1(v23.H(), 3, bad_memory);
+  __ st1(v0.S(), 0, bad_memory);
+  __ st1(v11.S(), 3, bad_memory);
+  __ st1(v24.S(), 3, bad_memory);
+  __ st2(v7.V16B(), v8.V16B(), bad_memory);
+  __ st2(v5.V16B(), v6.V16B(), bad_memory);
+  __ st2(v18.V16B(), v19.V16B(), bad_memory);
+  __ st2(v14.V2D(), v15.V2D(), bad_memory);
+  __ st2(v7.V2D(), v8.V2D(), bad_memory);
+  __ st2(v24.V2D(), v25.V2D(), bad_memory);
+  __ st2(v22.V2S(), v23.V2S(), bad_memory);
+  __ st2(v4.V2S(), v5.V2S(), bad_memory);
+  __ st2(v2.V2S(), v3.V2S(), bad_memory);
+  __ st2(v23.V4H(), v24.V4H(), bad_memory);
+  __ st2(v8.V4H(), v9.V4H(), bad_memory);
+  __ st2(v7.V4H(), v8.V4H(), bad_memory);
+  __ st2(v17.V4S(), v18.V4S(), bad_memory);
+  __ st2(v6.V4S(), v7.V4S(), bad_memory);
+  __ st2(v26.V4S(), v27.V4S(), bad_memory);
+  __ st2(v31.V8B(), v0.V8B(), bad_memory);
+  __ st2(v0.V8B(), v1.V8B(), bad_memory);
+  __ st2(v21.V8B(), v22.V8B(), bad_memory);
+  __ st2(v7.V8H(), v8.V8H(), bad_memory);
+  __ st2(v22.V8H(), v23.V8H(), bad_memory);
+  __ st2(v4.V8H(), v5.V8H(), bad_memory);
+  __ st2(v8.B(), v9.B(), 15, bad_memory);
+  __ st2(v8.B(), v9.B(), 15, bad_memory);
+  __ st2(v7.B(), v8.B(), 4, bad_memory);
+  __ st2(v25.D(), v26.D(), 0, bad_memory);
+  __ st2(v17.D(), v18.D(), 1, bad_memory);
+  __ st2(v3.D(), v4.D(), 1, bad_memory);
+  __ st2(v4.H(), v5.H(), 3, bad_memory);
+  __ st2(v0.H(), v1.H(), 5, bad_memory);
+  __ st2(v22.H(), v23.H(), 2, bad_memory);
+  __ st2(v14.S(), v15.S(), 3, bad_memory);
+  __ st2(v23.S(), v24.S(), 3, bad_memory);
+  __ st2(v0.S(), v1.S(), 2, bad_memory);
+  __ st3(v26.V16B(), v27.V16B(), v28.V16B(), bad_memory);
+  __ st3(v21.V16B(), v22.V16B(), v23.V16B(), bad_memory);
+  __ st3(v24.V16B(), v25.V16B(), v26.V16B(), bad_memory);
+  __ st3(v17.V2D(), v18.V2D(), v19.V2D(), bad_memory);
+  __ st3(v23.V2D(), v24.V2D(), v25.V2D(), bad_memory);
+  __ st3(v10.V2D(), v11.V2D(), v12.V2D(), bad_memory);
+  __ st3(v9.V2S(), v10.V2S(), v11.V2S(), bad_memory);
+  __ st3(v13.V2S(), v14.V2S(), v15.V2S(), bad_memory);
+  __ st3(v22.V2S(), v23.V2S(), v24.V2S(), bad_memory);
+  __ st3(v31.V4H(), v0.V4H(), v1.V4H(), bad_memory);
+  __ st3(v8.V4H(), v9.V4H(), v10.V4H(), bad_memory);
+  __ st3(v19.V4H(), v20.V4H(), v21.V4H(), bad_memory);
+  __ st3(v18.V4S(), v19.V4S(), v20.V4S(), bad_memory);
+  __ st3(v25.V4S(), v26.V4S(), v27.V4S(), bad_memory);
+  __ st3(v16.V4S(), v17.V4S(), v18.V4S(), bad_memory);
+  __ st3(v27.V8B(), v28.V8B(), v29.V8B(), bad_memory);
+  __ st3(v29.V8B(), v30.V8B(), v31.V8B(), bad_memory);
+  __ st3(v30.V8B(), v31.V8B(), v0.V8B(), bad_memory);
+  __ st3(v8.V8H(), v9.V8H(), v10.V8H(), bad_memory);
+  __ st3(v18.V8H(), v19.V8H(), v20.V8H(), bad_memory);
+  __ st3(v18.V8H(), v19.V8H(), v20.V8H(), bad_memory);
+  __ st3(v31.B(), v0.B(), v1.B(), 10, bad_memory);
+  __ st3(v4.B(), v5.B(), v6.B(), 5, bad_memory);
+  __ st3(v5.B(), v6.B(), v7.B(), 1, bad_memory);
+  __ st3(v5.D(), v6.D(), v7.D(), 0, bad_memory);
+  __ st3(v6.D(), v7.D(), v8.D(), 0, bad_memory);
+  __ st3(v0.D(), v1.D(), v2.D(), 0, bad_memory);
+  __ st3(v31.H(), v0.H(), v1.H(), 2, bad_memory);
+  __ st3(v14.H(), v15.H(), v16.H(), 5, bad_memory);
+  __ st3(v21.H(), v22.H(), v23.H(), 6, bad_memory);
+  __ st3(v21.S(), v22.S(), v23.S(), 0, bad_memory);
+  __ st3(v11.S(), v12.S(), v13.S(), 1, bad_memory);
+  __ st3(v15.S(), v16.S(), v17.S(), 0, bad_memory);
+  __ st4(v22.V16B(), v23.V16B(), v24.V16B(), v25.V16B(), bad_memory);
+  __ st4(v24.V16B(), v25.V16B(), v26.V16B(), v27.V16B(), bad_memory);
+  __ st4(v15.V16B(), v16.V16B(), v17.V16B(), v18.V16B(), bad_memory);
+  __ st4(v16.V2D(), v17.V2D(), v18.V2D(), v19.V2D(), bad_memory);
+  __ st4(v17.V2D(), v18.V2D(), v19.V2D(), v20.V2D(), bad_memory);
+  __ st4(v9.V2D(), v10.V2D(), v11.V2D(), v12.V2D(), bad_memory);
+  __ st4(v23.V2S(), v24.V2S(), v25.V2S(), v26.V2S(), bad_memory);
+  __ st4(v15.V2S(), v16.V2S(), v17.V2S(), v18.V2S(), bad_memory);
+  __ st4(v24.V2S(), v25.V2S(), v26.V2S(), v27.V2S(), bad_memory);
+  __ st4(v14.V4H(), v15.V4H(), v16.V4H(), v17.V4H(), bad_memory);
+  __ st4(v18.V4H(), v19.V4H(), v20.V4H(), v21.V4H(), bad_memory);
+  __ st4(v1.V4H(), v2.V4H(), v3.V4H(), v4.V4H(), bad_memory);
+  __ st4(v13.V4S(), v14.V4S(), v15.V4S(), v16.V4S(), bad_memory);
+  __ st4(v6.V4S(), v7.V4S(), v8.V4S(), v9.V4S(), bad_memory);
+  __ st4(v15.V4S(), v16.V4S(), v17.V4S(), v18.V4S(), bad_memory);
+  __ st4(v26.V8B(), v27.V8B(), v28.V8B(), v29.V8B(), bad_memory);
+  __ st4(v25.V8B(), v26.V8B(), v27.V8B(), v28.V8B(), bad_memory);
+  __ st4(v19.V8B(), v20.V8B(), v21.V8B(), v22.V8B(), bad_memory);
+  __ st4(v19.V8H(), v20.V8H(), v21.V8H(), v22.V8H(), bad_memory);
+  __ st4(v15.V8H(), v16.V8H(), v17.V8H(), v18.V8H(), bad_memory);
+  __ st4(v31.V8H(), v0.V8H(), v1.V8H(), v2.V8H(), bad_memory);
+  __ st4(v0.B(), v1.B(), v2.B(), v3.B(), 13, bad_memory);
+  __ st4(v4.B(), v5.B(), v6.B(), v7.B(), 10, bad_memory);
+  __ st4(v9.B(), v10.B(), v11.B(), v12.B(), 9, bad_memory);
+  __ st4(v2.D(), v3.D(), v4.D(), v5.D(), 1, bad_memory);
+  __ st4(v7.D(), v8.D(), v9.D(), v10.D(), 0, bad_memory);
+  __ st4(v31.D(), v0.D(), v1.D(), v2.D(), 1, bad_memory);
+  __ st4(v2.H(), v3.H(), v4.H(), v5.H(), 1, bad_memory);
+  __ st4(v27.H(), v28.H(), v29.H(), v30.H(), 3, bad_memory);
+  __ st4(v24.H(), v25.H(), v26.H(), v27.H(), 4, bad_memory);
+  __ st4(v18.S(), v19.S(), v20.S(), v21.S(), 2, bad_memory);
+  __ st4(v6.S(), v7.S(), v8.S(), v9.S(), 2, bad_memory);
+  __ st4(v25.S(), v26.S(), v27.S(), v28.S(), 1, bad_memory);
+
+  END_IMPLICIT_CHECK();
+  TRY_RUN_IMPLICIT_CHECK();
+}
+
+TEST(ImplicitCheckSve) {
+  SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+                      CPUFeatures::kSVE2,
+                      CPUFeatures::kNEON);
+  START_IMPLICIT_CHECK();
+
+  SVEMemOperand bad_sve_memory = SVEMemOperand(ip0);
+
+  EmissionCheckScope guard(&masm, masm.GetBuffer()->GetRemainingBytes());
+  // Simple, unpredicated loads and stores.
+  __ Str(p12.VnD(), bad_sve_memory);
+  __ Str(p13.VnS(), bad_sve_memory);
+  __ Str(p14.VnH(), bad_sve_memory);
+  __ Str(p15.VnB(), bad_sve_memory);
+  __ Ldr(p8.VnD(), bad_sve_memory);
+  __ Ldr(p9.VnS(), bad_sve_memory);
+  __ Ldr(p10.VnH(), bad_sve_memory);
+  __ Ldr(p11.VnB(), bad_sve_memory);
+
+  __ Str(z0.VnD(), bad_sve_memory);
+  __ Str(z1.VnS(), bad_sve_memory);
+  __ Str(z2.VnH(), bad_sve_memory);
+  __ Str(z3.VnB(), bad_sve_memory);
+  __ Ldr(z20.VnD(), bad_sve_memory);
+  __ Ldr(z21.VnS(), bad_sve_memory);
+  __ Ldr(z22.VnH(), bad_sve_memory);
+  __ Ldr(z23.VnB(), bad_sve_memory);
+
+  // Structured accesses.
+  __ St1b(z0.VnB(), p2, bad_sve_memory);
+  __ St1h(z1.VnH(), p1, bad_sve_memory);
+  __ St1w(z2.VnS(), p1, bad_sve_memory);
+  __ St1d(z3.VnD(), p2, bad_sve_memory);
+  __ Ld1b(z20.VnB(), p1.Zeroing(), bad_sve_memory);
+  __ Ld1h(z21.VnH(), p2.Zeroing(), bad_sve_memory);
+  __ Ld1w(z22.VnS(), p1.Zeroing(), bad_sve_memory);
+  __ Ld1d(z23.VnD(), p1.Zeroing(), bad_sve_memory);
+
+  // Structured, packed accesses.
+  __ St1b(z2.VnH(), p1, bad_sve_memory);
+  __ St1b(z3.VnS(), p2, bad_sve_memory);
+  __ St1b(z4.VnD(), p2, bad_sve_memory);
+  __ St1h(z0.VnS(), p1, bad_sve_memory);
+  __ St1h(z1.VnD(), p1, bad_sve_memory);
+  __ St1w(z2.VnD(), p1, bad_sve_memory);
+  __ Ld1b(z20.VnH(), p1.Zeroing(), bad_sve_memory);
+  __ Ld1b(z21.VnS(), p1.Zeroing(), bad_sve_memory);
+  __ Ld1b(z22.VnD(), p1.Zeroing(), bad_sve_memory);
+  __ Ld1h(z23.VnS(), p2.Zeroing(), bad_sve_memory);
+  __ Ld1h(z24.VnD(), p2.Zeroing(), bad_sve_memory);
+  __ Ld1w(z20.VnD(), p1.Zeroing(), bad_sve_memory);
+  __ Ld1sb(z21.VnH(), p1.Zeroing(), bad_sve_memory);
+  __ Ld1sb(z22.VnS(), p1.Zeroing(), bad_sve_memory);
+  __ Ld1sb(z23.VnD(), p2.Zeroing(), bad_sve_memory);
+  __ Ld1sh(z24.VnS(), p2.Zeroing(), bad_sve_memory);
+  __ Ld1sh(z20.VnD(), p1.Zeroing(), bad_sve_memory);
+  __ Ld1sw(z21.VnD(), p1.Zeroing(), bad_sve_memory);
+
+  // Structured, interleaved accesses.
+  __ St2b(z0.VnB(), z1.VnB(), p4, bad_sve_memory);
+  __ St2h(z1.VnH(), z2.VnH(), p4, bad_sve_memory);
+  __ St2w(z2.VnS(), z3.VnS(), p3, bad_sve_memory);
+  __ St2d(z3.VnD(), z4.VnD(), p4, bad_sve_memory);
+  __ Ld2b(z20.VnB(), z21.VnB(), p5.Zeroing(), bad_sve_memory);
+  __ Ld2h(z21.VnH(), z22.VnH(), p6.Zeroing(), bad_sve_memory);
+  __ Ld2w(z22.VnS(), z23.VnS(), p6.Zeroing(), bad_sve_memory);
+  __ Ld2d(z23.VnD(), z24.VnD(), p5.Zeroing(), bad_sve_memory);
+
+  __ St3b(z4.VnB(), z5.VnB(), z6.VnB(), p4, bad_sve_memory);
+  __ St3h(z5.VnH(), z6.VnH(), z7.VnH(), p4, bad_sve_memory);
+  __ St3w(z6.VnS(), z7.VnS(), z8.VnS(), p3, bad_sve_memory);
+  __ St3d(z7.VnD(), z8.VnD(), z9.VnD(), p4, bad_sve_memory);
+  __ Ld3b(z24.VnB(), z25.VnB(), z26.VnB(), p5.Zeroing(), bad_sve_memory);
+  __ Ld3h(z25.VnH(), z26.VnH(), z27.VnH(), p6.Zeroing(), bad_sve_memory);
+  __ Ld3w(z26.VnS(), z27.VnS(), z28.VnS(), p6.Zeroing(), bad_sve_memory);
+  __ Ld3d(z27.VnD(), z28.VnD(), z29.VnD(), p5.Zeroing(), bad_sve_memory);
+
+  __ St4b(z31.VnB(), z0.VnB(), z1.VnB(), z2.VnB(), p4, bad_sve_memory);
+  __ St4h(z0.VnH(), z1.VnH(), z2.VnH(), z3.VnH(), p4, bad_sve_memory);
+  __ St4w(z1.VnS(), z2.VnS(), z3.VnS(), z4.VnS(), p3, bad_sve_memory);
+  __ St4d(z2.VnD(), z3.VnD(), z4.VnD(), z5.VnD(), p4, bad_sve_memory);
+  __ Ld4b(z25.VnB(),
+          z26.VnB(),
+          z27.VnB(),
+          z28.VnB(),
+          p5.Zeroing(),
+          bad_sve_memory);
+  __ Ld4h(z26.VnH(),
+          z27.VnH(),
+          z28.VnH(),
+          z29.VnH(),
+          p6.Zeroing(),
+          bad_sve_memory);
+  __ Ld4w(z27.VnS(),
+          z28.VnS(),
+          z29.VnS(),
+          z30.VnS(),
+          p6.Zeroing(),
+          bad_sve_memory);
+  __ Ld4d(z28.VnD(),
+          z29.VnD(),
+          z30.VnD(),
+          z31.VnD(),
+          p5.Zeroing(),
+          bad_sve_memory);
+
+  END_IMPLICIT_CHECK();
+  TRY_RUN_IMPLICIT_CHECK();
+}
+
+TEST(ImplicitCheckAtomics) {
+  SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kAtomics);
+  START_IMPLICIT_CHECK();
+
+  EmissionCheckScope guard(&masm, masm.GetBuffer()->GetRemainingBytes());
+#define INST_LIST(OP)                 \
+  __ Ld##OP##b(w0, w0, bad_memory);   \
+  __ Ld##OP##ab(w0, w1, bad_memory);  \
+  __ Ld##OP##lb(w0, w2, bad_memory);  \
+  __ Ld##OP##alb(w0, w3, bad_memory); \
+  __ Ld##OP##h(w0, w0, bad_memory);   \
+  __ Ld##OP##ah(w0, w1, bad_memory);  \
+  __ Ld##OP##lh(w0, w2, bad_memory);  \
+  __ Ld##OP##alh(w0, w3, bad_memory); \
+  __ Ld##OP(w0, w0, bad_memory);      \
+  __ Ld##OP##a(w0, w1, bad_memory);   \
+  __ Ld##OP##l(w0, w2, bad_memory);   \
+  __ Ld##OP##al(w0, w3, bad_memory);  \
+  __ Ld##OP(x0, x0, bad_memory);      \
+  __ Ld##OP##a(x0, x1, bad_memory);   \
+  __ Ld##OP##l(x0, x2, bad_memory);   \
+  __ Ld##OP##al(x0, x3, bad_memory);  \
+  __ St##OP##b(w0, bad_memory);       \
+  __ St##OP##lb(w0, bad_memory);      \
+  __ St##OP##h(w0, bad_memory);       \
+  __ St##OP##lh(w0, bad_memory);      \
+  __ St##OP(w0, bad_memory);          \
+  __ St##OP##l(w0, bad_memory);       \
+  __ St##OP(x0, bad_memory);          \
+  __ St##OP##l(x0, bad_memory);
+
+  INST_LIST(add);
+  INST_LIST(set);
+  INST_LIST(eor);
+  INST_LIST(smin);
+  INST_LIST(smax);
+  INST_LIST(umin);
+  INST_LIST(umax);
+  INST_LIST(clr);
+
+#undef INST_LIST
+
+  END_IMPLICIT_CHECK();
+  TRY_RUN_IMPLICIT_CHECK();
+}
+
+TEST(ImplicitCheckMops) {
+  SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kMOPS);
+  START_IMPLICIT_CHECK();
+
+  EmissionCheckScope guard(&masm, masm.GetBuffer()->GetRemainingBytes());
+  __ Set(x15, ip1, ip0);
+  __ Setn(x15, ip1, ip0);
+  __ Setg(x15, ip1, ip0);
+  __ Setgn(x15, ip1, ip0);
+
+  __ Cpy(x15, ip0, ip1);
+  __ Cpyn(x15, ip0, ip1);
+  __ Cpyrn(x15, ip0, ip1);
+  __ Cpywn(x15, ip0, ip1);
+  __ Cpyf(x15, ip0, ip1);
+  __ Cpyfn(x15, ip0, ip1);
+  __ Cpyfrn(x15, ip0, ip1);
+  __ Cpyfwn(x15, ip0, ip1);
+
+  // The macro-assembler expands each instruction into prologue, main and
+  // epilogue instructions where only the main instruction will fail. Increase
+  // the counter to account for those additional instructions and the following
+  // instructions.
+  __ Mov(x0, 3);
+  __ Mul(x1, x1, x0);
+  __ Add(x1, x1, x0);
+
+  END_IMPLICIT_CHECK();
+  TRY_RUN_IMPLICIT_CHECK();
+}
+#endif  // VIXL_ENABLE_IMPLICIT_CHECKS
+
 #undef __
 #define __ masm->
 
@@ -5094,6 +6025,7 @@
                                                         3.0);
   VIXL_CHECK(res_double == 6.0);
 }
+
 #endif
 
 
diff --git a/test/aarch64/test-simulator-inputs-aarch64.h b/test/aarch64/test-simulator-inputs-aarch64.h
index d19a39e..620c164 100644
--- a/test/aarch64/test-simulator-inputs-aarch64.h
+++ b/test/aarch64/test-simulator-inputs-aarch64.h
@@ -37,7 +37,7 @@
 // This header should only be used by test/test-simulator-aarch64.cc, so it
 // doesn't need the usual header guard.
 #ifdef VIXL_AARCH64_TEST_SIMULATOR_INPUTS_AARCH64_H_
-#error This header should be inluded only once.
+#error This header should be included only once.
 #endif
 #define VIXL_AARCH64_TEST_SIMULATOR_INPUTS_AARCH64_H_
 
diff --git a/test/aarch64/test-simulator-sve-aarch64.cc b/test/aarch64/test-simulator-sve-aarch64.cc
index 58d9f48..6b5b958 100644
--- a/test/aarch64/test-simulator-sve-aarch64.cc
+++ b/test/aarch64/test-simulator-sve-aarch64.cc
@@ -26,12 +26,12 @@
 
 #include "test-runner.h"
 #include "test-utils.h"
-#include "aarch64/test-utils-aarch64.h"
 
 #include "aarch64/cpu-aarch64.h"
 #include "aarch64/disasm-aarch64.h"
 #include "aarch64/macro-assembler-aarch64.h"
 #include "aarch64/simulator-aarch64.h"
+#include "aarch64/test-utils-aarch64.h"
 #include "test-assembler-aarch64.h"
 
 #define TEST_SVE(name) TEST_SVE_INNER("SIM", name)
@@ -267,5 +267,1776 @@
   }
 }
 
+// Below here, there are tests for Neon instructions. As these forms of test
+// check the entire register state, they also need SVE features.
+
+TEST_SVE(neon_pmull) {
+  SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+                          CPUFeatures::kNEON,
+                          CPUFeatures::kCRC32,
+                          CPUFeatures::kPmull1Q);
+  START();
+
+  SetInitialMachineState(&masm);
+  // state = 0xe2bd2480
+
+  {
+    ExactAssemblyScope scope(&masm, 40 * kInstructionSize);
+    __ dci(0x4e20e000);  // pmull2 v0.8h, v0.16b, v0.16b
+    // vl128 state = 0x5eba4d4f
+    __ dci(0x4e20e228);  // pmull2 v8.8h, v17.16b, v0.16b
+    // vl128 state = 0x86bceb87
+    __ dci(0x4ee0e22a);  // pmull2 v10.1q, v17.2d, v0.2d
+    // vl128 state = 0x1332fe02
+    __ dci(0x0ee8e222);  // pmull v2.1q, v17.1d, v8.1d
+    // vl128 state = 0xd357dc7b
+    __ dci(0x4eece226);  // pmull2 v6.1q, v17.2d, v12.2d
+    // vl128 state = 0xdff409ad
+    __ dci(0x0eece276);  // pmull v22.1q, v19.1d, v12.1d
+    // vl128 state = 0xd8af1dc6
+    __ dci(0x0eede232);  // pmull v18.1q, v17.1d, v13.1d
+    // vl128 state = 0x41e6ed0e
+    __ dci(0x0efde216);  // pmull v22.1q, v16.1d, v29.1d
+    // vl128 state = 0x1f10365f
+    __ dci(0x0effe23e);  // pmull v30.1q, v17.1d, v31.1d
+    // vl128 state = 0x9779ece5
+    __ dci(0x0ee7e23f);  // pmull v31.1q, v17.1d, v7.1d
+    // vl128 state = 0x11fc8ce9
+    __ dci(0x0ee2e23e);  // pmull v30.1q, v17.1d, v2.1d
+    // vl128 state = 0x101d5a6f
+    __ dci(0x0ee2e23c);  // pmull v28.1q, v17.1d, v2.1d
+    // vl128 state = 0xcc4fe26e
+    __ dci(0x0eeae27d);  // pmull v29.1q, v19.1d, v10.1d
+    // vl128 state = 0xc84be9f4
+    __ dci(0x4eeae24d);  // pmull2 v13.1q, v18.2d, v10.2d
+    // vl128 state = 0x2fc540b4
+    __ dci(0x4eeae25d);  // pmull2 v29.1q, v18.2d, v10.2d
+    // vl128 state = 0x1b2d99cd
+    __ dci(0x4eeae2ed);  // pmull2 v13.1q, v23.2d, v10.2d
+    // vl128 state = 0x8a278b95
+    __ dci(0x4eeae2e9);  // pmull2 v9.1q, v23.2d, v10.2d
+    // vl128 state = 0x3359b4c8
+    __ dci(0x4efee2e8);  // pmull2 v8.1q, v23.2d, v30.2d
+    // vl128 state = 0x5c25ed31
+    __ dci(0x4effe3e0);  // pmull2 v0.1q, v31.2d, v31.2d
+    // vl128 state = 0x28ff67d1
+    __ dci(0x4eefe3d0);  // pmull2 v16.1q, v30.2d, v15.2d
+    // vl128 state = 0x1543436d
+    __ dci(0x4ee7e2d1);  // pmull2 v17.1q, v22.2d, v7.2d
+    // vl128 state = 0x71b8bc90
+    __ dci(0x4eefe3d5);  // pmull2 v21.1q, v30.2d, v15.2d
+    // vl128 state = 0x3d35ca02
+    __ dci(0x4eefe314);  // pmull2 v20.1q, v24.2d, v15.2d
+    // vl128 state = 0x40e8fade
+    __ dci(0x4eefe310);  // pmull2 v16.1q, v24.2d, v15.2d
+    // vl128 state = 0xb8affb87
+    __ dci(0x4eefe300);  // pmull2 v0.1q, v24.2d, v15.2d
+    // vl128 state = 0x4824ee5c
+    __ dci(0x4eede350);  // pmull2 v16.1q, v26.2d, v13.2d
+    // vl128 state = 0x39202868
+    __ dci(0x4ee7e354);  // pmull2 v20.1q, v26.2d, v7.2d
+    // vl128 state = 0xc8fde340
+    __ dci(0x4e27e356);  // pmull2 v22.8h, v26.16b, v7.16b
+    // vl128 state = 0x0f02316b
+    __ dci(0x4e37e15e);  // pmull2 v30.8h, v10.16b, v23.16b
+    // vl128 state = 0xced4f8bd
+    __ dci(0x4e33e05f);  // pmull2 v31.8h, v2.16b, v19.16b
+    // vl128 state = 0x0c76bdb3
+    __ dci(0x0e23e05e);  // pmull v30.8h, v2.8b, v3.8b
+    // vl128 state = 0x0e36962b
+    __ dci(0x4e23e25f);  // pmull2 v31.8h, v18.16b, v3.16b
+    // vl128 state = 0x11a8dcc3
+    __ dci(0x4e23e25b);  // pmull2 v27.8h, v18.16b, v3.16b
+    // vl128 state = 0xf01bfe16
+    __ dci(0x4e23e259);  // pmull2 v25.8h, v18.16b, v3.16b
+    // vl128 state = 0xea351afe
+    __ dci(0x4e22e2c9);  // pmull2 v9.8h, v22.16b, v2.16b
+    // vl128 state = 0x16e933ef
+    __ dci(0x4e3ae2c8);  // pmull2 v8.8h, v22.16b, v26.16b
+    // vl128 state = 0x02528a2a
+    __ dci(0x4e32e249);  // pmull2 v9.8h, v18.16b, v18.16b
+    // vl128 state = 0xe7e20633
+    __ dci(0x4e36e20d);  // pmull2 v13.8h, v16.16b, v22.16b
+    // vl128 state = 0x6f231732
+    __ dci(0x4e36e205);  // pmull2 v5.8h, v16.16b, v22.16b
+    // vl128 state = 0x423eb7ea
+    __ dci(0x4e22e20d);  // pmull2 v13.8h, v16.16b, v2.16b
+    // vl128 state = 0xfc0d1c14
+  }
+
+  uint32_t state;
+  ComputeMachineStateHash(&masm, &state);
+  __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+  __ Ldr(w0, MemOperand(x0));
+
+  END();
+  if (CAN_RUN()) {
+    RUN();
+    uint32_t expected_hashes[] = {
+        0xfc0d1c14,
+        0x4cb040a3,
+        0x4b913ebe,
+        0xfa35b836,
+        0x78745d20,
+        0x6666b09a,
+        0xee2868f4,
+        0x1936a795,
+        0x1025244a,
+        0xe8551950,
+        0xae73af02,
+        0x0fdd5fc7,
+        0x22e9827b,
+        0x384ce1ac,
+        0xc833cbeb,
+        0x255baab5,
+    };
+    ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+  }
+}
+
+TEST_SVE(neon_sha1_2reg) {
+  SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+                          CPUFeatures::kNEON,
+                          CPUFeatures::kCRC32,
+                          CPUFeatures::kSHA1);
+  START();
+
+  SetInitialMachineState(&masm);
+  // state = 0xe2bd2480
+
+  {
+    ExactAssemblyScope scope(&masm, 30 * kInstructionSize);
+    __ dci(0x5e280800);  // sha1h s0, s0
+    // vl128 state = 0xc388d4f8
+    __ dci(0x5e280a28);  // sha1h s8, s17
+    // vl128 state = 0x5c88b904
+    __ dci(0x5e280a2a);  // sha1h s10, s17
+    // vl128 state = 0x6f63c596
+    __ dci(0x5e281aae);  // sha1su1 v14.4s, v21.4s
+    // vl128 state = 0x85e1119d
+    __ dci(0x5e281abe);  // sha1su1 v30.4s, v21.4s
+    // vl128 state = 0x9b814260
+    __ dci(0x5e281a0e);  // sha1su1 v14.4s, v16.4s
+    // vl128 state = 0x8ccca0ab
+    __ dci(0x5e281a0a);  // sha1su1 v10.4s, v16.4s
+    // vl128 state = 0x42262836
+    __ dci(0x5e281acb);  // sha1su1 v11.4s, v22.4s
+    // vl128 state = 0xabcde33d
+    __ dci(0x5e281acf);  // sha1su1 v15.4s, v22.4s
+    // vl128 state = 0xdf44e7be
+    __ dci(0x5e281adf);  // sha1su1 v31.4s, v22.4s
+    // vl128 state = 0x48c332a3
+    __ dci(0x5e280a9d);  // sha1h s29, s20
+    // vl128 state = 0x56bafe13
+    __ dci(0x5e28188d);  // sha1su1 v13.4s, v4.4s
+    // vl128 state = 0x218eb351
+    __ dci(0x5e2808cf);  // sha1h s15, s6
+    // vl128 state = 0xc1720d9f
+    __ dci(0x5e2808cb);  // sha1h s11, s6
+    // vl128 state = 0x67119e1c
+    __ dci(0x5e2808c9);  // sha1h s9, s6
+    // vl128 state = 0x31f69637
+    __ dci(0x5e2808c1);  // sha1h s1, s6
+    // vl128 state = 0x214a25ff
+    __ dci(0x5e280871);  // sha1h s17, s3
+    // vl128 state = 0xa5e88b55
+    __ dci(0x5e280815);  // sha1h s21, s0
+    // vl128 state = 0xc8c91e29
+    __ dci(0x5e28185d);  // sha1su1 v29.4s, v2.4s
+    // vl128 state = 0x5582c6a8
+    __ dci(0x5e28185f);  // sha1su1 v31.4s, v2.4s
+    // vl128 state = 0xd3288a61
+    __ dci(0x5e28087e);  // sha1h s30, s3
+    // vl128 state = 0x350b39c2
+    __ dci(0x5e28093f);  // sha1h s31, s9
+    // vl128 state = 0xbdc1ac98
+    __ dci(0x5e28093b);  // sha1h s27, s9
+    // vl128 state = 0x62f828bf
+    __ dci(0x5e28092b);  // sha1h s11, s9
+    // vl128 state = 0xc8f2f671
+    __ dci(0x5e2819bb);  // sha1su1 v27.4s, v13.4s
+    // vl128 state = 0x24ec8c34
+    __ dci(0x5e281b93);  // sha1su1 v19.4s, v28.4s
+    // vl128 state = 0x71e188de
+    __ dci(0x5e281b97);  // sha1su1 v23.4s, v28.4s
+    // vl128 state = 0x22490375
+    __ dci(0x5e281b95);  // sha1su1 v21.4s, v28.4s
+    // vl128 state = 0x016b70d1
+    __ dci(0x5e281b51);  // sha1su1 v17.4s, v26.4s
+    // vl128 state = 0xa6252086
+    __ dci(0x5e2819d3);  // sha1su1 v19.4s, v14.4s
+    // vl128 state = 0x78683885
+  }
+
+  uint32_t state;
+  ComputeMachineStateHash(&masm, &state);
+  __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+  __ Ldr(w0, MemOperand(x0));
+
+  END();
+  if (CAN_RUN()) {
+    RUN();
+    uint32_t expected_hashes[] = {
+        0x78683885,
+        0x59574c2a,
+        0x449978bf,
+        0x0ddab775,
+        0x1a043ef3,
+        0xf501e2e7,
+        0xa219e725,
+        0xf17f57c8,
+        0x4ccdbf99,
+        0x419d4fc3,
+        0x7302571d,
+        0xd6bee170,
+        0x7d81c301,
+        0xbaa7d729,
+        0xf33f0bc4,
+        0xff8b070a,
+    };
+    ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+  }
+}
+
+TEST_SVE(neon_sha1_3reg) {
+  SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+                          CPUFeatures::kNEON,
+                          CPUFeatures::kCRC32,
+                          CPUFeatures::kSHA1);
+  START();
+
+  SetInitialMachineState(&masm);
+  // state = 0xe2bd2480
+
+  {
+    ExactAssemblyScope scope(&masm, 30 * kInstructionSize);
+    __ dci(0x5e1f02bd);  // sha1c q29, s21, v31.4s
+    // vl128 state = 0xec2a37ad
+    __ dci(0x5e0810af);  // sha1p q15, s5, v8.4s
+    // vl128 state = 0x3fe9252a
+    __ dci(0x5e122227);  // sha1m q7, s17, v18.4s
+    // vl128 state = 0x4465789e
+    __ dci(0x5e0b039d);  // sha1c q29, s28, v11.4s
+    // vl128 state = 0x2186488a
+    __ dci(0x5e1a03e9);  // sha1c q9, s31, v26.4s
+    // vl128 state = 0x9eddf8e3
+    __ dci(0x5e0c138c);  // sha1p q12, s28, v12.4s
+    // vl128 state = 0x0ca7cd3d
+    __ dci(0x5e1f1316);  // sha1p q22, s24, v31.4s
+    // vl128 state = 0xb80a61c0
+    __ dci(0x5e052204);  // sha1m q4, s16, v5.4s
+    // vl128 state = 0x941821ca
+    __ dci(0x5e0a00d6);  // sha1c q22, s6, v10.4s
+    // vl128 state = 0x5e71ccae
+    __ dci(0x5e0e032e);  // sha1c q14, s25, v14.4s
+    // vl128 state = 0x7ed4486a
+    __ dci(0x5e1d1098);  // sha1p q24, s4, v29.4s
+    // vl128 state = 0x0978a637
+    __ dci(0x5e0400d9);  // sha1c q25, s6, v4.4s
+    // vl128 state = 0x34c8609e
+    __ dci(0x5e1a330e);  // sha1su0 v14.4s, v24.4s, v26.4s
+    // vl128 state = 0xcb078fad
+    __ dci(0x5e1e30f5);  // sha1su0 v21.4s, v7.4s, v30.4s
+    // vl128 state = 0x885200be
+    __ dci(0x5e1e32e1);  // sha1su0 v1.4s, v23.4s, v30.4s
+    // vl128 state = 0xabc6a188
+    __ dci(0x5e0733d3);  // sha1su0 v19.4s, v30.4s, v7.4s
+    // vl128 state = 0x37a4fe6f
+    __ dci(0x5e0b22e6);  // sha1m q6, s23, v11.4s
+    // vl128 state = 0x68b788d2
+    __ dci(0x5e011210);  // sha1p q16, s16, v1.4s
+    // vl128 state = 0x6b36b092
+    __ dci(0x5e1702e1);  // sha1c q1, s23, v23.4s
+    // vl128 state = 0x74ef56f5
+    __ dci(0x5e1e30f6);  // sha1su0 v22.4s, v7.4s, v30.4s
+    // vl128 state = 0x5a150dfd
+    __ dci(0x5e1b3348);  // sha1su0 v8.4s, v26.4s, v27.4s
+    // vl128 state = 0xe0a45d9c
+    __ dci(0x5e0a3041);  // sha1su0 v1.4s, v2.4s, v10.4s
+    // vl128 state = 0x6ba02d02
+    __ dci(0x5e17119a);  // sha1p q26, s12, v23.4s
+    // vl128 state = 0x3bf511fc
+    __ dci(0x5e0b32c7);  // sha1su0 v7.4s, v22.4s, v11.4s
+    // vl128 state = 0xf5c513b6
+    __ dci(0x5e063016);  // sha1su0 v22.4s, v0.4s, v6.4s
+    // vl128 state = 0x3eb44b28
+    __ dci(0x5e05323c);  // sha1su0 v28.4s, v17.4s, v5.4s
+    // vl128 state = 0x7c2d3adf
+    __ dci(0x5e1d132a);  // sha1p q10, s25, v29.4s
+    // vl128 state = 0x2b0963c4
+    __ dci(0x5e13003c);  // sha1c q28, s1, v19.4s
+    // vl128 state = 0x4a582d00
+    __ dci(0x5e13322c);  // sha1su0 v12.4s, v17.4s, v19.4s
+    // vl128 state = 0x7bb2cc8c
+    __ dci(0x5e032330);  // sha1m q16, s25, v3.4s
+    // vl128 state = 0x2a8b4c0d
+  }
+
+  uint32_t state;
+  ComputeMachineStateHash(&masm, &state);
+  __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+  __ Ldr(w0, MemOperand(x0));
+
+  END();
+  if (CAN_RUN()) {
+    RUN();
+    uint32_t expected_hashes[] = {
+        0x2a8b4c0d,
+        0x114e25bb,
+        0x4f035af9,
+        0x23db7966,
+        0x3d106b42,
+        0x62651fcf,
+        0x44c20879,
+        0xadf71d73,
+        0xe6858f82,
+        0x93a74ae5,
+        0xc270310e,
+        0x3d07058c,
+        0x69f83d0e,
+        0x28c5813b,
+        0xbb9de2c1,
+        0xe06b94cd,
+    };
+    ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+  }
+}
+
+TEST_SVE(neon_sha2h) {
+  SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+                          CPUFeatures::kNEON,
+                          CPUFeatures::kCRC32,
+                          CPUFeatures::kSHA2);
+  START();
+
+  SetInitialMachineState(&masm);
+  // state = 0xe2bd2480
+
+  {
+    ExactAssemblyScope scope(&masm, 30 * kInstructionSize);
+    __ dci(0x5e0152a2);  // sha256h2 q2, q21, v1.4s
+    // vl128 state = 0x6bda8984
+    __ dci(0x5e1552b2);  // sha256h2 q18, q21, v21.4s
+    // vl128 state = 0xe985c68a
+    __ dci(0x5e055293);  // sha256h2 q19, q20, v5.4s
+    // vl128 state = 0xab18a98b
+    __ dci(0x5e055297);  // sha256h2 q23, q20, v5.4s
+    // vl128 state = 0x896bad28
+    __ dci(0x5e0752a7);  // sha256h2 q7, q21, v7.4s
+    // vl128 state = 0x4e00ba08
+    __ dci(0x5e175223);  // sha256h2 q3, q17, v23.4s
+    // vl128 state = 0x380f3893
+    __ dci(0x5e1f5262);  // sha256h2 q2, q19, v31.4s
+    // vl128 state = 0xb431122d
+    __ dci(0x5e1f5272);  // sha256h2 q18, q19, v31.4s
+    // vl128 state = 0x18140047
+    __ dci(0x5e1e4262);  // sha256h q2, q19, v30.4s
+    // vl128 state = 0x721779be
+    __ dci(0x5e164363);  // sha256h q3, q27, v22.4s
+    // vl128 state = 0x383ad878
+    __ dci(0x5e175361);  // sha256h2 q1, q27, v23.4s
+    // vl128 state = 0xd985bd85
+    __ dci(0x5e115360);  // sha256h2 q0, q27, v17.4s
+    // vl128 state = 0xfa5e77f3
+    __ dci(0x5e135270);  // sha256h2 q16, q19, v19.4s
+    // vl128 state = 0x4fc1f5cc
+    __ dci(0x5e195260);  // sha256h2 q0, q19, v25.4s
+    // vl128 state = 0x89435952
+    __ dci(0x5e1952c4);  // sha256h2 q4, q22, v25.4s
+    // vl128 state = 0x93c60c86
+    __ dci(0x5e1a52c6);  // sha256h2 q6, q22, v26.4s
+    // vl128 state = 0xedc42105
+    __ dci(0x5e1a52c4);  // sha256h2 q4, q22, v26.4s
+    // vl128 state = 0xd5d638a8
+    __ dci(0x5e1a4285);  // sha256h q5, q20, v26.4s
+    // vl128 state = 0x9f9da446
+    __ dci(0x5e1a428d);  // sha256h q13, q20, v26.4s
+    // vl128 state = 0x87d49cfb
+    __ dci(0x5e1b42cf);  // sha256h q15, q22, v27.4s
+    // vl128 state = 0xa6802b10
+    __ dci(0x5e1b43ed);  // sha256h q13, q31, v27.4s
+    // vl128 state = 0x2e346937
+    __ dci(0x5e0b436f);  // sha256h q15, q27, v11.4s
+    // vl128 state = 0x1005f372
+    __ dci(0x5e03433f);  // sha256h q31, q25, v3.4s
+    // vl128 state = 0xd908918c
+    __ dci(0x5e13532f);  // sha256h2 q15, q25, v19.4s
+    // vl128 state = 0x31c73fe0
+    __ dci(0x5e01533f);  // sha256h2 q31, q25, v1.4s
+    // vl128 state = 0x84e35a20
+    __ dci(0x5e03523d);  // sha256h2 q29, q17, v3.4s
+    // vl128 state = 0x40da34aa
+    __ dci(0x5e0b527c);  // sha256h2 q28, q19, v11.4s
+    // vl128 state = 0x506a21d9
+    __ dci(0x5e0f5238);  // sha256h2 q24, q17, v15.4s
+    // vl128 state = 0x6a67f033
+    __ dci(0x5e0d5210);  // sha256h2 q16, q16, v13.4s
+    // vl128 state = 0x317e084c
+    __ dci(0x5e0d5214);  // sha256h2 q20, q16, v13.4s
+    // vl128 state = 0xdd0eb379
+  }
+
+  uint32_t state;
+  ComputeMachineStateHash(&masm, &state);
+  __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+  __ Ldr(w0, MemOperand(x0));
+
+  END();
+  if (CAN_RUN()) {
+    RUN();
+    uint32_t expected_hashes[] = {
+        0xdd0eb379,
+        0x15384d69,
+        0x32bbc73a,
+        0xc5879e77,
+        0x9241294d,
+        0xfc01bad8,
+        0xf5e79af5,
+        0xee66e696,
+        0x535158e8,
+        0x09cfa8b6,
+        0x8cd83eae,
+        0x93ff18b0,
+        0x561444e4,
+        0xa6249eea,
+        0x830e4c73,
+        0xb516eaae,
+    };
+    ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+  }
+}
+
+TEST_SVE(neon_sha2su0) {
+  SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+                          CPUFeatures::kNEON,
+                          CPUFeatures::kCRC32,
+                          CPUFeatures::kSHA2);
+  START();
+
+  SetInitialMachineState(&masm);
+  // state = 0xe2bd2480
+
+  {
+    ExactAssemblyScope scope(&masm, 30 * kInstructionSize);
+    __ dci(0x5e2828e3);  // sha256su0 v3.4s, v7.4s
+    // vl128 state = 0xbc7a7764
+    __ dci(0x5e282be1);  // sha256su0 v1.4s, v31.4s
+    // vl128 state = 0x6138a856
+    __ dci(0x5e282be9);  // sha256su0 v9.4s, v31.4s
+    // vl128 state = 0x49c6be17
+    __ dci(0x5e282beb);  // sha256su0 v11.4s, v31.4s
+    // vl128 state = 0xca658743
+    __ dci(0x5e2829bb);  // sha256su0 v27.4s, v13.4s
+    // vl128 state = 0x1bf1d233
+    __ dci(0x5e2829ba);  // sha256su0 v26.4s, v13.4s
+    // vl128 state = 0xafb0c6ae
+    __ dci(0x5e2829aa);  // sha256su0 v10.4s, v13.4s
+    // vl128 state = 0x2182e90d
+    __ dci(0x5e282b2e);  // sha256su0 v14.4s, v25.4s
+    // vl128 state = 0x401d297d
+    __ dci(0x5e282aaf);  // sha256su0 v15.4s, v21.4s
+    // vl128 state = 0x6c01fefa
+    __ dci(0x5e282aad);  // sha256su0 v13.4s, v21.4s
+    // vl128 state = 0x0f4c191d
+    __ dci(0x5e282a7d);  // sha256su0 v29.4s, v19.4s
+    // vl128 state = 0xcf26aa1b
+    __ dci(0x5e282ad9);  // sha256su0 v25.4s, v22.4s
+    // vl128 state = 0xae04081e
+    __ dci(0x5e282ac9);  // sha256su0 v9.4s, v22.4s
+    // vl128 state = 0x08149009
+    __ dci(0x5e282acb);  // sha256su0 v11.4s, v22.4s
+    // vl128 state = 0xa691e487
+    __ dci(0x5e282ac3);  // sha256su0 v3.4s, v22.4s
+    // vl128 state = 0xd728e1b5
+    __ dci(0x5e282ac7);  // sha256su0 v7.4s, v22.4s
+    // vl128 state = 0x120fac30
+    __ dci(0x5e282ac5);  // sha256su0 v5.4s, v22.4s
+    // vl128 state = 0x88086f82
+    __ dci(0x5e282ac4);  // sha256su0 v4.4s, v22.4s
+    // vl128 state = 0x625160b7
+    __ dci(0x5e282a65);  // sha256su0 v5.4s, v19.4s
+    // vl128 state = 0x308feecd
+    __ dci(0x5e282a6d);  // sha256su0 v13.4s, v19.4s
+    // vl128 state = 0x65f03097
+    __ dci(0x5e282a65);  // sha256su0 v5.4s, v19.4s
+    // vl128 state = 0x44d9fbb6
+    __ dci(0x5e282a67);  // sha256su0 v7.4s, v19.4s
+    // vl128 state = 0x694fe04a
+    __ dci(0x5e282a17);  // sha256su0 v23.4s, v16.4s
+    // vl128 state = 0x3d5c139b
+    __ dci(0x5e282a13);  // sha256su0 v19.4s, v16.4s
+    // vl128 state = 0x922f40a5
+    __ dci(0x5e282b3b);  // sha256su0 v27.4s, v25.4s
+    // vl128 state = 0x4f9c34f2
+    __ dci(0x5e282ab9);  // sha256su0 v25.4s, v21.4s
+    // vl128 state = 0x18a4f581
+    __ dci(0x5e282ab1);  // sha256su0 v17.4s, v21.4s
+    // vl128 state = 0x69da3844
+    __ dci(0x5e282ab9);  // sha256su0 v25.4s, v21.4s
+    // vl128 state = 0x57f8ce0b
+    __ dci(0x5e282a1d);  // sha256su0 v29.4s, v16.4s
+    // vl128 state = 0xafa03001
+    __ dci(0x5e282ad5);  // sha256su0 v21.4s, v22.4s
+    // vl128 state = 0x029b78a8
+  }
+
+  uint32_t state;
+  ComputeMachineStateHash(&masm, &state);
+  __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+  __ Ldr(w0, MemOperand(x0));
+
+  END();
+  if (CAN_RUN()) {
+    RUN();
+    uint32_t expected_hashes[] = {
+        0x029b78a8,
+        0x479a8911,
+        0x6bdbdb48,
+        0x5ef3718b,
+        0x695ce173,
+        0x586543d0,
+        0xd00a22be,
+        0xe63a91b9,
+        0x42bb89a2,
+        0xea48ee79,
+        0x9788ac35,
+        0x1e8599a3,
+        0xd0d2d6ee,
+        0xfe7aaaf7,
+        0x77da6831,
+        0xb93fb875,
+    };
+    ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+  }
+}
+
+TEST_SVE(neon_sha2su1) {
+  SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+                          CPUFeatures::kNEON,
+                          CPUFeatures::kCRC32,
+                          CPUFeatures::kSHA2);
+  START();
+
+  SetInitialMachineState(&masm);
+  // state = 0xe2bd2480
+
+  {
+    ExactAssemblyScope scope(&masm, 30 * kInstructionSize);
+    __ dci(0x5e1e6146);  // sha256su1 v6.4s, v10.4s, v30.4s
+    // vl128 state = 0x3bbf7782
+    __ dci(0x5e0f6144);  // sha256su1 v4.4s, v10.4s, v15.4s
+    // vl128 state = 0xf8c83149
+    __ dci(0x5e0e6174);  // sha256su1 v20.4s, v11.4s, v14.4s
+    // vl128 state = 0x3b8c353b
+    __ dci(0x5e0e6170);  // sha256su1 v16.4s, v11.4s, v14.4s
+    // vl128 state = 0x1041e30e
+    __ dci(0x5e0a6131);  // sha256su1 v17.4s, v9.4s, v10.4s
+    // vl128 state = 0xe4d81cd2
+    __ dci(0x5e0a6135);  // sha256su1 v21.4s, v9.4s, v10.4s
+    // vl128 state = 0x24869db3
+    __ dci(0x5e0a6131);  // sha256su1 v17.4s, v9.4s, v10.4s
+    // vl128 state = 0xfb093436
+    __ dci(0x5e0a6199);  // sha256su1 v25.4s, v12.4s, v10.4s
+    // vl128 state = 0x0c7939ba
+    __ dci(0x5e0e639b);  // sha256su1 v27.4s, v28.4s, v14.4s
+    // vl128 state = 0xa7e5c40a
+    __ dci(0x5e0663ab);  // sha256su1 v11.4s, v29.4s, v6.4s
+    // vl128 state = 0xc4ae571c
+    __ dci(0x5e06619b);  // sha256su1 v27.4s, v12.4s, v6.4s
+    // vl128 state = 0xf84ef221
+    __ dci(0x5e066199);  // sha256su1 v25.4s, v12.4s, v6.4s
+    // vl128 state = 0x24f98d3c
+    __ dci(0x5e0e6118);  // sha256su1 v24.4s, v8.4s, v14.4s
+    // vl128 state = 0xcdb43a3b
+    __ dci(0x5e0f601a);  // sha256su1 v26.4s, v0.4s, v15.4s
+    // vl128 state = 0x85fd37e9
+    __ dci(0x5e096012);  // sha256su1 v18.4s, v0.4s, v9.4s
+    // vl128 state = 0xabccd3f6
+    __ dci(0x5e0c601a);  // sha256su1 v26.4s, v0.4s, v12.4s
+    // vl128 state = 0x8c0232e5
+    __ dci(0x5e1c602a);  // sha256su1 v10.4s, v1.4s, v28.4s
+    // vl128 state = 0xcdcf37ba
+    __ dci(0x5e1e622e);  // sha256su1 v14.4s, v17.4s, v30.4s
+    // vl128 state = 0x25129c9a
+    __ dci(0x5e1e623e);  // sha256su1 v30.4s, v17.4s, v30.4s
+    // vl128 state = 0xd0a281b7
+    __ dci(0x5e1e630e);  // sha256su1 v14.4s, v24.4s, v30.4s
+    // vl128 state = 0x3ed92f18
+    __ dci(0x5e1f639e);  // sha256su1 v30.4s, v28.4s, v31.4s
+    // vl128 state = 0xda1056b9
+    __ dci(0x5e0f629f);  // sha256su1 v31.4s, v20.4s, v15.4s
+    // vl128 state = 0x367274fa
+    __ dci(0x5e0f63bd);  // sha256su1 v29.4s, v29.4s, v15.4s
+    // vl128 state = 0x46a79748
+    __ dci(0x5e0f63b5);  // sha256su1 v21.4s, v29.4s, v15.4s
+    // vl128 state = 0xdc427315
+    __ dci(0x5e0b63f7);  // sha256su1 v23.4s, v31.4s, v11.4s
+    // vl128 state = 0x91547f41
+    __ dci(0x5e0263e7);  // sha256su1 v7.4s, v31.4s, v2.4s
+    // vl128 state = 0x1c233ffa
+    __ dci(0x5e0062f7);  // sha256su1 v23.4s, v23.4s, v0.4s
+    // vl128 state = 0x8c2948a1
+    __ dci(0x5e1062c7);  // sha256su1 v7.4s, v22.4s, v16.4s
+    // vl128 state = 0x8b72f498
+    __ dci(0x5e1062c6);  // sha256su1 v6.4s, v22.4s, v16.4s
+    // vl128 state = 0x43d27746
+    __ dci(0x5e1063ee);  // sha256su1 v14.4s, v31.4s, v16.4s
+    // vl128 state = 0xa864e589
+  }
+
+  uint32_t state;
+  ComputeMachineStateHash(&masm, &state);
+  __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+  __ Ldr(w0, MemOperand(x0));
+
+  END();
+  if (CAN_RUN()) {
+    RUN();
+    uint32_t expected_hashes[] = {
+        0xa864e589,
+        0xc588dfe0,
+        0x171add38,
+        0x884ca9db,
+        0x5f47fb6a,
+        0x0bd024c5,
+        0xa6921cce,
+        0x01dc8899,
+        0x0f5b4b19,
+        0x948260c1,
+        0x4d4faafe,
+        0x76ee7ff7,
+        0xd9a56156,
+        0x63c8e138,
+        0xe687f7c3,
+        0x51785434,
+    };
+    ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+  }
+}
+
+TEST_SVE(neon_sha3) {
+  SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+                          CPUFeatures::kNEON,
+                          CPUFeatures::kCRC32,
+                          CPUFeatures::kSHA3);
+  START();
+
+  SetInitialMachineState(&masm);
+  // state = 0xe2bd2480
+
+  {
+    ExactAssemblyScope scope(&masm, 60 * kInstructionSize);
+    __ dci(0xce608c00);  // rax1 v0.2d, v0.2d, v0.2d
+    // vl128 state = 0x960c2b9f
+    __ dci(0xce608e28);  // rax1 v8.2d, v17.2d, v0.2d
+    // vl128 state = 0x89ea3f7b
+    __ dci(0xce618e6c);  // rax1 v12.2d, v19.2d, v1.2d
+    // vl128 state = 0xa7801384
+    __ dci(0xce718e48);  // rax1 v8.2d, v18.2d, v17.2d
+    // vl128 state = 0x4477d70d
+    __ dci(0xce738e60);  // rax1 v0.2d, v19.2d, v19.2d
+    // vl128 state = 0xdee66854
+    __ dci(0xce6b8e61);  // rax1 v1.2d, v19.2d, v11.2d
+    // vl128 state = 0x2e383dc2
+    __ dci(0xce6e8e60);  // rax1 v0.2d, v19.2d, v14.2d
+    // vl128 state = 0xa022bb6d
+    __ dci(0xce6e8e62);  // rax1 v2.2d, v19.2d, v14.2d
+    // vl128 state = 0x923f5d32
+    __ dci(0xce668e23);  // rax1 v3.2d, v17.2d, v6.2d
+    // vl128 state = 0xc2c6ca00
+    __ dci(0xce260e33);  // bcax v19.16b, v17.16b, v6.16b, v3.16b
+    // vl128 state = 0x517e85e9
+    __ dci(0xce260e23);  // bcax v3.16b, v17.16b, v6.16b, v3.16b
+    // vl128 state = 0xbcf4c332
+    __ dci(0xce260e93);  // bcax v19.16b, v20.16b, v6.16b, v3.16b
+    // vl128 state = 0x5d9d51ef
+    __ dci(0xce260a11);  // bcax v17.16b, v16.16b, v6.16b, v2.16b
+    // vl128 state = 0x69ce0099
+    __ dci(0xce260a15);  // bcax v21.16b, v16.16b, v6.16b, v2.16b
+    // vl128 state = 0x9a2cdc9f
+    __ dci(0xce244a11);  // bcax v17.16b, v16.16b, v4.16b, v18.16b
+    // vl128 state = 0x27eeff29
+    __ dci(0xce304a10);  // bcax v16.16b, v16.16b, v16.16b, v18.16b
+    // vl128 state = 0x6d586875
+    __ dci(0xce314b18);  // bcax v24.16b, v24.16b, v17.16b, v18.16b
+    // vl128 state = 0xe38b6054
+    __ dci(0xce214b28);  // bcax v8.16b, v25.16b, v1.16b, v18.16b
+    // vl128 state = 0x27a3f5f6
+    __ dci(0xce294f38);  // bcax v24.16b, v25.16b, v9.16b, v19.16b
+    // vl128 state = 0x7d7ffa9b
+    __ dci(0xce214e39);  // bcax v25.16b, v17.16b, v1.16b, v19.16b
+    // vl128 state = 0x936374f0
+    __ dci(0xce216a3d);  // bcax v29.16b, v17.16b, v1.16b, v26.16b
+    // vl128 state = 0x1c5136d5
+    __ dci(0xce296b39);  // bcax v25.16b, v25.16b, v9.16b, v26.16b
+    // vl128 state = 0x75cd7131
+    __ dci(0xce216338);  // bcax v24.16b, v25.16b, v1.16b, v24.16b
+    // vl128 state = 0xcc747626
+    __ dci(0xce2163f9);  // bcax v25.16b, v31.16b, v1.16b, v24.16b
+    // vl128 state = 0x9409c8bc
+    __ dci(0xce2043f1);  // bcax v17.16b, v31.16b, v0.16b, v16.16b
+    // vl128 state = 0x8db3a0c8
+    __ dci(0xce2043f5);  // bcax v21.16b, v31.16b, v0.16b, v16.16b
+    // vl128 state = 0xa55f8d7d
+    __ dci(0xce2043e5);  // bcax v5.16b, v31.16b, v0.16b, v16.16b
+    // vl128 state = 0xe1960c7a
+    __ dci(0xce224be7);  // bcax v7.16b, v31.16b, v2.16b, v18.16b
+    // vl128 state = 0xc9599bde
+    __ dci(0xce204bb7);  // bcax v23.16b, v29.16b, v0.16b, v18.16b
+    // vl128 state = 0x7176d08d
+    __ dci(0xce004b9f);  // eor3 v31.16b, v28.16b, v0.16b, v18.16b
+    // vl128 state = 0x10620821
+    __ dci(0xce000baf);  // eor3 v15.16b, v29.16b, v0.16b, v2.16b
+    // vl128 state = 0x0aba0288
+    __ dci(0xce0a0bab);  // eor3 v11.16b, v29.16b, v10.16b, v2.16b
+    // vl128 state = 0xe6517156
+    __ dci(0xce0e1baf);  // eor3 v15.16b, v29.16b, v14.16b, v6.16b
+    // vl128 state = 0x6b7021fb
+    __ dci(0xce0e3fa7);  // eor3 v7.16b, v29.16b, v14.16b, v15.16b
+    // vl128 state = 0x05761b1f
+    __ dci(0xce0e2fe5);  // eor3 v5.16b, v31.16b, v14.16b, v11.16b
+    // vl128 state = 0xe01822c6
+    __ dci(0xce2e2fc7);  // bcax v7.16b, v30.16b, v14.16b, v11.16b
+    // vl128 state = 0xdc6444d7
+    __ dci(0xce3e2dcf);  // bcax v15.16b, v14.16b, v30.16b, v11.16b
+    // vl128 state = 0xa5ecad2e
+    __ dci(0xce3e3fdf);  // bcax v31.16b, v30.16b, v30.16b, v15.16b
+    // vl128 state = 0x2124dc42
+    __ dci(0xce3a3ede);  // bcax v30.16b, v22.16b, v26.16b, v15.16b
+    // vl128 state = 0x57f77204
+    __ dci(0xce3a2e9c);  // bcax v28.16b, v20.16b, v26.16b, v11.16b
+    // vl128 state = 0x6e8d303d
+    __ dci(0xce3a2294);  // bcax v20.16b, v20.16b, v26.16b, v8.16b
+    // vl128 state = 0xdb53d42c
+    __ dci(0xce38029c);  // bcax v28.16b, v20.16b, v24.16b, v0.16b
+    // vl128 state = 0x258d49b8
+    __ dci(0xce38088c);  // bcax v12.16b, v4.16b, v24.16b, v2.16b
+    // vl128 state = 0xe751a348
+    __ dci(0xce28008e);  // bcax v14.16b, v4.16b, v8.16b, v0.16b
+    // vl128 state = 0x8ce0aa1a
+    __ dci(0xce28008a);  // bcax v10.16b, v4.16b, v8.16b, v0.16b
+    // vl128 state = 0x1fdf89a5
+    __ dci(0xce280088);  // bcax v8.16b, v4.16b, v8.16b, v0.16b
+    // vl128 state = 0xcc51f5e1
+    __ dci(0xce2a1089);  // bcax v9.16b, v4.16b, v10.16b, v4.16b
+    // vl128 state = 0xdaf766b0
+    __ dci(0xce0b1081);  // eor3 v1.16b, v4.16b, v11.16b, v4.16b
+    // vl128 state = 0x2da7deb5
+    __ dci(0xce0a1011);  // eor3 v17.16b, v0.16b, v10.16b, v4.16b
+    // vl128 state = 0xcc86f5d4
+    __ dci(0xce121010);  // eor3 v16.16b, v0.16b, v18.16b, v4.16b
+    // vl128 state = 0xfb722105
+    __ dci(0xce921118);  // xar v24.2d, v8.2d, v18.2d, #4
+    // vl128 state = 0x9a7752e3
+    __ dci(0xce9a1199);  // xar v25.2d, v12.2d, v26.2d, #4
+    // vl128 state = 0x83a251c2
+    __ dci(0xce9e11dd);  // xar v29.2d, v14.2d, v30.2d, #4
+    // vl128 state = 0x1e31c9d5
+    __ dci(0xce9e915c);  // xar v28.2d, v10.2d, v30.2d, #36
+    // vl128 state = 0x0e421d73
+    __ dci(0xce1e115d);  // eor3 v29.16b, v10.16b, v30.16b, v4.16b
+    // vl128 state = 0xb5a8c677
+    __ dci(0xce3e515c);  // bcax v28.16b, v10.16b, v30.16b, v20.16b
+    // vl128 state = 0x21587300
+    __ dci(0xce3e5154);  // bcax v20.16b, v10.16b, v30.16b, v20.16b
+    // vl128 state = 0x9459c629
+    __ dci(0xce3e1056);  // bcax v22.16b, v2.16b, v30.16b, v4.16b
+    // vl128 state = 0xdb02263a
+    __ dci(0xce2a105e);  // bcax v30.16b, v2.16b, v10.16b, v4.16b
+    // vl128 state = 0xc9d210aa
+    __ dci(0xce3a5056);  // bcax v22.16b, v2.16b, v26.16b, v20.16b
+    // vl128 state = 0x4cc56293
+  }
+
+  uint32_t state;
+  ComputeMachineStateHash(&masm, &state);
+  __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+  __ Ldr(w0, MemOperand(x0));
+
+  END();
+  if (CAN_RUN()) {
+    RUN();
+    uint32_t expected_hashes[] = {
+        0x4cc56293,
+        0xee8bac03,
+        0xc1253ac9,
+        0x9fe5aa0f,
+        0x43df27f4,
+        0x19f03be6,
+        0xd26c928b,
+        0x7b9da4c4,
+        0xe13149a7,
+        0x9fa11ed9,
+        0xe02cc4dd,
+        0x7848dfe7,
+        0x5ed1726f,
+        0x983e0123,
+        0x34166240,
+        0xc4ee172f,
+    };
+    ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+  }
+}
+
+TEST_SVE(neon_sha512) {
+  SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+                          CPUFeatures::kNEON,
+                          CPUFeatures::kCRC32,
+                          CPUFeatures::kSHA512);
+  START();
+
+  SetInitialMachineState(&masm);
+  // state = 0xe2bd2480
+
+  {
+    ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+    __ dci(0xce6583cc);  // sha512h q12, q30, v5.2d
+    // vl128 state = 0xecc5733a
+    __ dci(0xce6586c8);  // sha512h2 q8, q22, v5.2d
+    // vl128 state = 0xe05f2087
+    __ dci(0xce7586e0);  // sha512h2 q0, q23, v21.2d
+    // vl128 state = 0x1925555b
+    __ dci(0xce7187e8);  // sha512h2 q8, q31, v17.2d
+    // vl128 state = 0x891dba65
+    __ dci(0xce7586ec);  // sha512h2 q12, q23, v21.2d
+    // vl128 state = 0xdfbe3239
+    __ dci(0xce7580fc);  // sha512h q28, q7, v21.2d
+    // vl128 state = 0xba49dbc1
+    __ dci(0xce7580f4);  // sha512h q20, q7, v21.2d
+    // vl128 state = 0x3ad11a23
+    __ dci(0xce6780f6);  // sha512h q22, q7, v7.2d
+    // vl128 state = 0xcf9e1803
+    __ dci(0xce6780f7);  // sha512h q23, q7, v7.2d
+    // vl128 state = 0xe2baee15
+    __ dci(0xce6785e7);  // sha512h2 q7, q15, v7.2d
+    // vl128 state = 0x900a337c
+    __ dci(0xce6f8565);  // sha512h2 q5, q11, v15.2d
+    // vl128 state = 0xc6e5d7eb
+    __ dci(0xce6f8424);  // sha512h2 q4, q1, v15.2d
+    // vl128 state = 0xcbcb6ac1
+    __ dci(0xce6b84a6);  // sha512h2 q6, q5, v11.2d
+    // vl128 state = 0xa3c1a679
+    __ dci(0xce7b848e);  // sha512h2 q14, q4, v27.2d
+    // vl128 state = 0x47c4e54d
+    __ dci(0xce7d849e);  // sha512h2 q30, q4, v29.2d
+    // vl128 state = 0x9f519a29
+    __ dci(0xce7f859c);  // sha512h2 q28, q12, v31.2d
+    // vl128 state = 0xa4433415
+    __ dci(0xce778494);  // sha512h2 q20, q4, v23.2d
+    // vl128 state = 0xf03a69ec
+    __ dci(0xce778484);  // sha512h2 q4, q4, v23.2d
+    // vl128 state = 0x2c728333
+    __ dci(0xce77850c);  // sha512h2 q12, q8, v23.2d
+    // vl128 state = 0xaedc423e
+    __ dci(0xce77815c);  // sha512h q28, q10, v23.2d
+    // vl128 state = 0xea9346ea
+    __ dci(0xce7381cc);  // sha512h q12, q14, v19.2d
+    // vl128 state = 0x05ad87d1
+    __ dci(0xce7a81dc);  // sha512h q28, q14, v26.2d
+    // vl128 state = 0x9b1cd7b3
+    __ dci(0xce7285d4);  // sha512h2 q20, q14, v18.2d
+    // vl128 state = 0x154201ac
+    __ dci(0xce7280d6);  // sha512h q22, q6, v18.2d
+    // vl128 state = 0xd8640492
+    __ dci(0xce7a81d4);  // sha512h q20, q14, v26.2d
+    // vl128 state = 0x908eb258
+    __ dci(0xce7281f0);  // sha512h q16, q15, v18.2d
+    // vl128 state = 0x0067f162
+    __ dci(0xce728572);  // sha512h2 q18, q11, v18.2d
+    // vl128 state = 0xca9bc751
+    __ dci(0xce728422);  // sha512h2 q2, q1, v18.2d
+    // vl128 state = 0x06b7318d
+    __ dci(0xce738412);  // sha512h2 q18, q0, v19.2d
+    // vl128 state = 0xad019588
+    __ dci(0xce718016);  // sha512h q22, q0, v17.2d
+    // vl128 state = 0x55a29e9b
+    __ dci(0xce718834);  // sha512su1 v20.2d, v1.2d, v17.2d
+    // vl128 state = 0x953a9c7a
+    __ dci(0xce738876);  // sha512su1 v22.2d, v3.2d, v19.2d
+    // vl128 state = 0x4f194c71
+    __ dci(0xce638826);  // sha512su1 v6.2d, v1.2d, v3.2d
+    // vl128 state = 0x08e50d47
+    __ dci(0xce6b886e);  // sha512su1 v14.2d, v3.2d, v11.2d
+    // vl128 state = 0x4bdfb870
+    __ dci(0xce6b88de);  // sha512su1 v30.2d, v6.2d, v11.2d
+    // vl128 state = 0xbcf4b6c5
+    __ dci(0xce7f88df);  // sha512su1 v31.2d, v6.2d, v31.2d
+    // vl128 state = 0x916dede1
+    __ dci(0xce6f8acf);  // sha512su1 v15.2d, v22.2d, v15.2d
+    // vl128 state = 0x3b776003
+    __ dci(0xce6d8bcb);  // sha512su1 v11.2d, v30.2d, v13.2d
+    // vl128 state = 0x5d5cb7d9
+    __ dci(0xce6d83ea);  // sha512h q10, q31, v13.2d
+    // vl128 state = 0x18df9e46
+    __ dci(0xce6d8328);  // sha512h q8, q25, v13.2d
+    // vl128 state = 0xde5807d0
+    __ dci(0xce6583b8);  // sha512h q24, q29, v5.2d
+    // vl128 state = 0x861020e7
+    __ dci(0xce6d83f9);  // sha512h q25, q31, v13.2d
+    // vl128 state = 0x39d960f4
+    __ dci(0xce6d8b78);  // sha512su1 v24.2d, v27.2d, v13.2d
+    // vl128 state = 0x3afc2b5c
+    __ dci(0xce6c8968);  // sha512su1 v8.2d, v11.2d, v12.2d
+    // vl128 state = 0x74d44114
+    __ dci(0xce6c8b49);  // sha512su1 v9.2d, v26.2d, v12.2d
+    // vl128 state = 0x72e6b5cd
+    __ dci(0xce6c8b39);  // sha512su1 v25.2d, v25.2d, v12.2d
+    // vl128 state = 0x6aaa4658
+    __ dci(0xce6c8b9d);  // sha512su1 v29.2d, v28.2d, v12.2d
+    // vl128 state = 0x7c076c9b
+    __ dci(0xce648b0d);  // sha512su1 v13.2d, v24.2d, v4.2d
+    // vl128 state = 0x1082519d
+    __ dci(0xce648385);  // sha512h q5, q28, v4.2d
+    // vl128 state = 0x9ed9d190
+    __ dci(0xce648715);  // sha512h2 q21, q24, v4.2d
+    // vl128 state = 0xaace5a02
+  }
+
+  uint32_t state;
+  ComputeMachineStateHash(&masm, &state);
+  __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+  __ Ldr(w0, MemOperand(x0));
+
+  END();
+  if (CAN_RUN()) {
+    RUN();
+    uint32_t expected_hashes[] = {
+        0xaace5a02,
+        0x912905de,
+        0xc62c0756,
+        0xac6646d5,
+        0xd3c2e6af,
+        0x029ae35f,
+        0xf5e83b54,
+        0x49f8d50c,
+        0xc5175320,
+        0xb51c8ebd,
+        0x2dc184b0,
+        0x01e01875,
+        0x28df0d5a,
+        0x01d2fff2,
+        0x5f5f5909,
+        0x6aead9d8,
+    };
+    ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+  }
+}
+
+TEST_SVE(neon_sha512su0) {
+  SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+                          CPUFeatures::kNEON,
+                          CPUFeatures::kCRC32,
+                          CPUFeatures::kSHA512);
+  START();
+
+  SetInitialMachineState(&masm);
+  // state = 0xe2bd2480
+
+  {
+    ExactAssemblyScope scope(&masm, 30 * kInstructionSize);
+    __ dci(0xcec083f6);  // sha512su0 v22.2d, v31.2d
+    // vl128 state = 0xf7a54f2b
+    __ dci(0xcec083e6);  // sha512su0 v6.2d, v31.2d
+    // vl128 state = 0x919c170d
+    __ dci(0xcec08347);  // sha512su0 v7.2d, v26.2d
+    // vl128 state = 0x8a1800d6
+    __ dci(0xcec082c6);  // sha512su0 v6.2d, v22.2d
+    // vl128 state = 0x353aa8bf
+    __ dci(0xcec082c4);  // sha512su0 v4.2d, v22.2d
+    // vl128 state = 0x87d75b6c
+    __ dci(0xcec082c0);  // sha512su0 v0.2d, v22.2d
+    // vl128 state = 0xf2ee6974
+    __ dci(0xcec082c1);  // sha512su0 v1.2d, v22.2d
+    // vl128 state = 0xf2ec1e17
+    __ dci(0xcec082c0);  // sha512su0 v0.2d, v22.2d
+    // vl128 state = 0x1bcca060
+    __ dci(0xcec082c4);  // sha512su0 v4.2d, v22.2d
+    // vl128 state = 0x67773394
+    __ dci(0xcec082c5);  // sha512su0 v5.2d, v22.2d
+    // vl128 state = 0xbb344c8d
+    __ dci(0xcec083e1);  // sha512su0 v1.2d, v31.2d
+    // vl128 state = 0x595e2eb0
+    __ dci(0xcec081a5);  // sha512su0 v5.2d, v13.2d
+    // vl128 state = 0x7d7f4e15
+    __ dci(0xcec081a7);  // sha512su0 v7.2d, v13.2d
+    // vl128 state = 0xba4b1bc6
+    __ dci(0xcec081a3);  // sha512su0 v3.2d, v13.2d
+    // vl128 state = 0x2c56ee6e
+    __ dci(0xcec083f3);  // sha512su0 v19.2d, v31.2d
+    // vl128 state = 0xefe9b855
+    __ dci(0xcec08397);  // sha512su0 v23.2d, v28.2d
+    // vl128 state = 0x6f0d20ba
+    __ dci(0xcec08396);  // sha512su0 v22.2d, v28.2d
+    // vl128 state = 0x9be77fdb
+    __ dci(0xcec081b7);  // sha512su0 v23.2d, v13.2d
+    // vl128 state = 0x5d981c55
+    __ dci(0xcec080ff);  // sha512su0 v31.2d, v7.2d
+    // vl128 state = 0x9126079f
+    __ dci(0xcec080fd);  // sha512su0 v29.2d, v7.2d
+    // vl128 state = 0x3199dc9e
+    __ dci(0xcec081dc);  // sha512su0 v28.2d, v14.2d
+    // vl128 state = 0x20fb48d7
+    __ dci(0xcec081cc);  // sha512su0 v12.2d, v14.2d
+    // vl128 state = 0x4ae6221a
+    __ dci(0xcec08088);  // sha512su0 v8.2d, v4.2d
+    // vl128 state = 0x17e8b62d
+    __ dci(0xcec0808a);  // sha512su0 v10.2d, v4.2d
+    // vl128 state = 0x90d73468
+    __ dci(0xcec0809a);  // sha512su0 v26.2d, v4.2d
+    // vl128 state = 0x1f02f97f
+    __ dci(0xcec081de);  // sha512su0 v30.2d, v14.2d
+    // vl128 state = 0xe5ef3e67
+    __ dci(0xcec081bf);  // sha512su0 v31.2d, v13.2d
+    // vl128 state = 0xd1bcc363
+    __ dci(0xcec081bb);  // sha512su0 v27.2d, v13.2d
+    // vl128 state = 0x8bcfab58
+    __ dci(0xcec08033);  // sha512su0 v19.2d, v1.2d
+    // vl128 state = 0x93fb8bad
+    __ dci(0xcec080fb);  // sha512su0 v27.2d, v7.2d
+    // vl128 state = 0x3598e921
+  }
+
+  uint32_t state;
+  ComputeMachineStateHash(&masm, &state);
+  __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+  __ Ldr(w0, MemOperand(x0));
+
+  END();
+  if (CAN_RUN()) {
+    RUN();
+    uint32_t expected_hashes[] = {
+        0x3598e921,
+        0x7e3ee16c,
+        0x4856987c,
+        0x193bda79,
+        0x84154d6f,
+        0x861f1795,
+        0xb74d39b3,
+        0x9653d8b3,
+        0x6690a066,
+        0x00a29b51,
+        0xb2c795ce,
+        0xcbd03b05,
+        0x9fb2aaec,
+        0x0216b732,
+        0x96eb6864,
+        0x4024f5c7,
+    };
+    ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+  }
+}
+
+TEST_SVE(neon_aes) {
+  SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+                          CPUFeatures::kNEON,
+                          CPUFeatures::kCRC32,
+                          CPUFeatures::kAES);
+  START();
+
+  SetInitialMachineState(&masm);
+  // state = 0xe2bd2480
+
+  {
+    ExactAssemblyScope scope(&masm, 30 * kInstructionSize);
+    __ dci(0x4e285a86);  // aesd v6.16b, v20.16b
+    // vl128 state = 0x801bfc08
+    __ dci(0x4e2858ae);  // aesd v14.16b, v5.16b
+    // vl128 state = 0xbd83a757
+    __ dci(0x4e2858ac);  // aesd v12.16b, v5.16b
+    // vl128 state = 0x9fb1dc6b
+    __ dci(0x4e2858ae);  // aesd v14.16b, v5.16b
+    // vl128 state = 0xfa1fa7e4
+    __ dci(0x4e28482a);  // aese v10.16b, v1.16b
+    // vl128 state = 0xecfcfe2d
+    __ dci(0x4e28483a);  // aese v26.16b, v1.16b
+    // vl128 state = 0x05e22f07
+    __ dci(0x4e28488a);  // aese v10.16b, v4.16b
+    // vl128 state = 0xdd53df5f
+    __ dci(0x4e28488e);  // aese v14.16b, v4.16b
+    // vl128 state = 0x9d2ac50f
+    __ dci(0x4e28484f);  // aese v15.16b, v2.16b
+    // vl128 state = 0xf45146ab
+    __ dci(0x4e28484b);  // aese v11.16b, v2.16b
+    // vl128 state = 0xf1260a7c
+    __ dci(0x4e28485b);  // aese v27.16b, v2.16b
+    // vl128 state = 0x3a0844da
+    __ dci(0x4e285819);  // aesd v25.16b, v0.16b
+    // vl128 state = 0xaca89993
+    __ dci(0x4e284a09);  // aese v9.16b, v16.16b
+    // vl128 state = 0xef4e9a5f
+    __ dci(0x4e285a4b);  // aesd v11.16b, v18.16b
+    // vl128 state = 0x209a44bc
+    __ dci(0x4e285a4f);  // aesd v15.16b, v18.16b
+    // vl128 state = 0xc6d2d718
+    __ dci(0x4e285a4d);  // aesd v13.16b, v18.16b
+    // vl128 state = 0x1aceef8f
+    __ dci(0x4e285a45);  // aesd v5.16b, v18.16b
+    // vl128 state = 0x7ed056c6
+    __ dci(0x4e285af5);  // aesd v21.16b, v23.16b
+    // vl128 state = 0x429ed71e
+    __ dci(0x4e285a91);  // aesd v17.16b, v20.16b
+    // vl128 state = 0xd7a1f687
+    __ dci(0x4e284ad9);  // aese v25.16b, v22.16b
+    // vl128 state = 0x8fa44574
+    __ dci(0x4e284adb);  // aese v27.16b, v22.16b
+    // vl128 state = 0xd2792169
+    __ dci(0x4e285afa);  // aesd v26.16b, v23.16b
+    // vl128 state = 0xe502f095
+    __ dci(0x4e285bbb);  // aesd v27.16b, v29.16b
+    // vl128 state = 0x0e3d3238
+    __ dci(0x4e285bbf);  // aesd v31.16b, v29.16b
+    // vl128 state = 0x0ad06592
+    __ dci(0x4e285baf);  // aesd v15.16b, v29.16b
+    // vl128 state = 0xb94f3c19
+    __ dci(0x4e284b3f);  // aese v31.16b, v25.16b
+    // vl128 state = 0xf31a0da1
+    __ dci(0x4e284917);  // aese v23.16b, v8.16b
+    // vl128 state = 0x7d2d7811
+    __ dci(0x4e284913);  // aese v19.16b, v8.16b
+    // vl128 state = 0x41b7b854
+    __ dci(0x4e284911);  // aese v17.16b, v8.16b
+    // vl128 state = 0x60600536
+    __ dci(0x4e2849d5);  // aese v21.16b, v14.16b
+    // vl128 state = 0x3e0cc74f
+  }
+
+  uint32_t state;
+  ComputeMachineStateHash(&masm, &state);
+  __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+  __ Ldr(w0, MemOperand(x0));
+
+  END();
+  if (CAN_RUN()) {
+    RUN();
+    uint32_t expected_hashes[] = {
+        0x3e0cc74f,
+        0x7f17ba2e,
+        0xd59f8e91,
+        0x9f15a51b,
+        0x11d92e66,
+        0xcd53d015,
+        0xbc652785,
+        0x6974fa54,
+        0x953d342e,
+        0xf1aa56b3,
+        0xde8ca1d3,
+        0xba408b82,
+        0x48094fa4,
+        0xb757bcf1,
+        0x2cc5be58,
+        0x6e7a0f58,
+    };
+    ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+  }
+}
+
+TEST_SVE(neon_aesmc) {
+  SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+                          CPUFeatures::kNEON,
+                          CPUFeatures::kCRC32,
+                          CPUFeatures::kAES);
+  START();
+
+  SetInitialMachineState(&masm);
+  // state = 0xe2bd2480
+
+  {
+    ExactAssemblyScope scope(&masm, 30 * kInstructionSize);
+    __ dci(0x4e287800);  // aesimc v0.16b, v0.16b
+    // vl128 state = 0x03554749
+    __ dci(0x4e287a28);  // aesimc v8.16b, v17.16b
+    // vl128 state = 0x59d5fedd
+    __ dci(0x4e287a2a);  // aesimc v10.16b, v17.16b
+    // vl128 state = 0xcda29514
+    __ dci(0x4e286aae);  // aesmc v14.16b, v21.16b
+    // vl128 state = 0xae8f019a
+    __ dci(0x4e286abe);  // aesmc v30.16b, v21.16b
+    // vl128 state = 0x7b04c6c0
+    __ dci(0x4e286a0e);  // aesmc v14.16b, v16.16b
+    // vl128 state = 0xaf6c5ce6
+    __ dci(0x4e286a0a);  // aesmc v10.16b, v16.16b
+    // vl128 state = 0xf1d7fd2b
+    __ dci(0x4e286acb);  // aesmc v11.16b, v22.16b
+    // vl128 state = 0x5d693c63
+    __ dci(0x4e286acf);  // aesmc v15.16b, v22.16b
+    // vl128 state = 0xec8971ad
+    __ dci(0x4e286adf);  // aesmc v31.16b, v22.16b
+    // vl128 state = 0x6389b200
+    __ dci(0x4e287a9d);  // aesimc v29.16b, v20.16b
+    // vl128 state = 0xd69341fb
+    __ dci(0x4e28688d);  // aesmc v13.16b, v4.16b
+    // vl128 state = 0x6344af95
+    __ dci(0x4e2878cf);  // aesimc v15.16b, v6.16b
+    // vl128 state = 0x5c58dfac
+    __ dci(0x4e2878cb);  // aesimc v11.16b, v6.16b
+    // vl128 state = 0x7dc9cf34
+    __ dci(0x4e2878c9);  // aesimc v9.16b, v6.16b
+    // vl128 state = 0xff4b3544
+    __ dci(0x4e2878c1);  // aesimc v1.16b, v6.16b
+    // vl128 state = 0xd1937de2
+    __ dci(0x4e287871);  // aesimc v17.16b, v3.16b
+    // vl128 state = 0x7cabd208
+    __ dci(0x4e287815);  // aesimc v21.16b, v0.16b
+    // vl128 state = 0xbc06df94
+    __ dci(0x4e28685d);  // aesmc v29.16b, v2.16b
+    // vl128 state = 0xfc4478bb
+    __ dci(0x4e28685f);  // aesmc v31.16b, v2.16b
+    // vl128 state = 0x0c72c200
+    __ dci(0x4e28787e);  // aesimc v30.16b, v3.16b
+    // vl128 state = 0xdd822b9d
+    __ dci(0x4e28793f);  // aesimc v31.16b, v9.16b
+    // vl128 state = 0x1397dcc6
+    __ dci(0x4e28793b);  // aesimc v27.16b, v9.16b
+    // vl128 state = 0x43f3abd6
+    __ dci(0x4e28792b);  // aesimc v11.16b, v9.16b
+    // vl128 state = 0xeb8ca365
+    __ dci(0x4e2869bb);  // aesmc v27.16b, v13.16b
+    // vl128 state = 0x0a957f4f
+    __ dci(0x4e286b93);  // aesmc v19.16b, v28.16b
+    // vl128 state = 0xbc5da8bd
+    __ dci(0x4e286b97);  // aesmc v23.16b, v28.16b
+    // vl128 state = 0xc49343cc
+    __ dci(0x4e286b95);  // aesmc v21.16b, v28.16b
+    // vl128 state = 0x8c80c144
+    __ dci(0x4e286b51);  // aesmc v17.16b, v26.16b
+    // vl128 state = 0xeda3255d
+    __ dci(0x4e2869d3);  // aesmc v19.16b, v14.16b
+    // vl128 state = 0x8db8a9d0
+  }
+
+  uint32_t state;
+  ComputeMachineStateHash(&masm, &state);
+  __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+  __ Ldr(w0, MemOperand(x0));
+
+  END();
+  if (CAN_RUN()) {
+    RUN();
+    uint32_t expected_hashes[] = {
+        0x8db8a9d0,
+        0xb13d8e1e,
+        0x9f33ca70,
+        0x38f7ef7a,
+        0x65352b29,
+        0xc4257260,
+        0xf49587c2,
+        0xb3f61256,
+        0x8ef4a534,
+        0x6e061aa9,
+        0x7270527d,
+        0x3e1f82f9,
+        0x1fe79e60,
+        0x985cab68,
+        0xe77b4484,
+        0xe3817f4e,
+    };
+    ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+  }
+}
+
+TEST_SVE(neon_sm3) {
+  SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+                          CPUFeatures::kNEON,
+                          CPUFeatures::kCRC32,
+                          CPUFeatures::kSM3);
+  START();
+
+  SetInitialMachineState(&masm);
+  // state = 0xe2bd2480
+
+  {
+    ExactAssemblyScope scope(&masm, 10 * kInstructionSize);
+    __ dci(0xce591017);  // sm3ss1 v23.4s, v0.4s, v25.4s, v4.4s
+    // vl128 state = 0xad4bba0a
+    __ dci(0xce49121f);  // sm3ss1 v31.4s, v16.4s, v9.4s, v4.4s
+    // vl128 state = 0x84adef21
+    __ dci(0xce49121e);  // sm3ss1 v30.4s, v16.4s, v9.4s, v4.4s
+    // vl128 state = 0xccfd7e5a
+    __ dci(0xce49301a);  // sm3ss1 v26.4s, v0.4s, v9.4s, v12.4s
+    // vl128 state = 0x60833cc7
+    __ dci(0xce49720a);  // sm3ss1 v10.4s, v16.4s, v9.4s, v28.4s
+    // vl128 state = 0x03f03263
+    __ dci(0xce58721a);  // sm3ss1 v26.4s, v16.4s, v24.4s, v28.4s
+    // vl128 state = 0x31845f40
+    __ dci(0xce58702a);  // sm3ss1 v10.4s, v1.4s, v24.4s, v28.4s
+    // vl128 state = 0x54c64f70
+    __ dci(0xce58753a);  // sm3ss1 v26.4s, v9.4s, v24.4s, v29.4s
+    // vl128 state = 0x3d5cb04f
+    __ dci(0xce507518);  // sm3ss1 v24.4s, v8.4s, v16.4s, v29.4s
+    // vl128 state = 0xe02de221
+    __ dci(0xce406519);  // sm3ss1 v25.4s, v8.4s, v0.4s, v25.4s
+    // vl128 state = 0x73d36ae8
+  }
+
+  uint32_t state;
+  ComputeMachineStateHash(&masm, &state);
+  __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+  __ Ldr(w0, MemOperand(x0));
+
+  END();
+  if (CAN_RUN()) {
+    RUN();
+    uint32_t expected_hashes[] = {
+        0x73d36ae8,
+        0xcbcda2db,
+        0x6ee9ad3d,
+        0xa6857a16,
+        0xa238ec05,
+        0x1bc82d1d,
+        0xe4530773,
+        0xfb0d092e,
+        0xe62aff0a,
+        0xf56a593f,
+        0x3967d590,
+        0xebcd14a0,
+        0xa7bedcb8,
+        0x867fa43c,
+        0x1679eab5,
+        0x0a836861,
+    };
+    ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+  }
+}
+
+TEST_SVE(neon_sm3partw12) {
+  SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+                          CPUFeatures::kNEON,
+                          CPUFeatures::kCRC32,
+                          CPUFeatures::kSM3);
+  START();
+
+  SetInitialMachineState(&masm);
+  // state = 0xe2bd2480
+
+  {
+    ExactAssemblyScope scope(&masm, 30 * kInstructionSize);
+    __ dci(0xce70c201);  // sm3partw1 v1.4s, v16.4s, v16.4s
+    // vl128 state = 0x6f2069a6
+    __ dci(0xce72c303);  // sm3partw1 v3.4s, v24.4s, v18.4s
+    // vl128 state = 0x986fa56c
+    __ dci(0xce76c381);  // sm3partw1 v1.4s, v28.4s, v22.4s
+    // vl128 state = 0x5dbd953c
+    __ dci(0xce7ec3b1);  // sm3partw1 v17.4s, v29.4s, v30.4s
+    // vl128 state = 0xc72ccca5
+    __ dci(0xce7ac1b5);  // sm3partw1 v21.4s, v13.4s, v26.4s
+    // vl128 state = 0x33cdfd6a
+    __ dci(0xce7ac1b7);  // sm3partw1 v23.4s, v13.4s, v26.4s
+    // vl128 state = 0x4303e945
+    __ dci(0xce7ac1bf);  // sm3partw1 v31.4s, v13.4s, v26.4s
+    // vl128 state = 0x56acac84
+    __ dci(0xce78c1fd);  // sm3partw1 v29.4s, v15.4s, v24.4s
+    // vl128 state = 0x5e2a2793
+    __ dci(0xce78c5df);  // sm3partw2 v31.4s, v14.4s, v24.4s
+    // vl128 state = 0xf7c457f3
+    __ dci(0xce70c55d);  // sm3partw2 v29.4s, v10.4s, v16.4s
+    // vl128 state = 0xfa3557ac
+    __ dci(0xce60c159);  // sm3partw1 v25.4s, v10.4s, v0.4s
+    // vl128 state = 0xb3ae6830
+    __ dci(0xce62c55b);  // sm3partw2 v27.4s, v10.4s, v2.4s
+    // vl128 state = 0xa7747c70
+    __ dci(0xce66c753);  // sm3partw2 v19.4s, v26.4s, v6.4s
+    // vl128 state = 0xb55f5895
+    __ dci(0xce67c551);  // sm3partw2 v17.4s, v10.4s, v7.4s
+    // vl128 state = 0x519b1342
+    __ dci(0xce65c750);  // sm3partw2 v16.4s, v26.4s, v5.4s
+    // vl128 state = 0xc4e6e4b9
+    __ dci(0xce61c718);  // sm3partw2 v24.4s, v24.4s, v1.4s
+    // vl128 state = 0x127c483c
+    __ dci(0xce61c71c);  // sm3partw2 v28.4s, v24.4s, v1.4s
+    // vl128 state = 0x92783ecc
+    __ dci(0xce6dc714);  // sm3partw2 v20.4s, v24.4s, v13.4s
+    // vl128 state = 0xe11e87d3
+    __ dci(0xce65c756);  // sm3partw2 v22.4s, v26.4s, v5.4s
+    // vl128 state = 0x8b6878d0
+    __ dci(0xce65c5d2);  // sm3partw2 v18.4s, v14.4s, v5.4s
+    // vl128 state = 0xf2fb1e86
+    __ dci(0xce64c550);  // sm3partw2 v16.4s, v10.4s, v4.4s
+    // vl128 state = 0x73ad3b0f
+    __ dci(0xce66c578);  // sm3partw2 v24.4s, v11.4s, v6.4s
+    // vl128 state = 0x7e03900d
+    __ dci(0xce76c55c);  // sm3partw2 v28.4s, v10.4s, v22.4s
+    // vl128 state = 0x1d0b5df6
+    __ dci(0xce76c54c);  // sm3partw2 v12.4s, v10.4s, v22.4s
+    // vl128 state = 0x1a3d7a77
+    __ dci(0xce7ec448);  // sm3partw2 v8.4s, v2.4s, v30.4s
+    // vl128 state = 0x3ed2e4bd
+    __ dci(0xce6ec409);  // sm3partw2 v9.4s, v0.4s, v14.4s
+    // vl128 state = 0x826dd348
+    __ dci(0xce6ec52b);  // sm3partw2 v11.4s, v9.4s, v14.4s
+    // vl128 state = 0x3ff5e482
+    __ dci(0xce66c72f);  // sm3partw2 v15.4s, v25.4s, v6.4s
+    // vl128 state = 0x6fd24cd4
+    __ dci(0xce65c73f);  // sm3partw2 v31.4s, v25.4s, v5.4s
+    // vl128 state = 0xd51ac474
+    __ dci(0xce67c77b);  // sm3partw2 v27.4s, v27.4s, v7.4s
+    // vl128 state = 0x720d7419
+  }
+
+  uint32_t state;
+  ComputeMachineStateHash(&masm, &state);
+  __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+  __ Ldr(w0, MemOperand(x0));
+
+  END();
+  if (CAN_RUN()) {
+    RUN();
+    uint32_t expected_hashes[] = {
+        0x720d7419,
+        0x31445e06,
+        0xd2aee240,
+        0x45a27e4b,
+        0xd6c46f08,
+        0xcaed7f9e,
+        0x734820c7,
+        0x377e1f38,
+        0x12e03585,
+        0x1b9cbe63,
+        0x1d58d49a,
+        0xc160a9dc,
+        0x22c2fe25,
+        0x86b7af0f,
+        0xfeae7bf5,
+        0xf8dfcc40,
+    };
+    ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+  }
+}
+
+TEST_SVE(neon_sm3tt1) {
+  SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+                          CPUFeatures::kNEON,
+                          CPUFeatures::kCRC32,
+                          CPUFeatures::kSM3);
+  START();
+
+  SetInitialMachineState(&masm);
+  // state = 0xe2bd2480
+
+  {
+    ExactAssemblyScope scope(&masm, 20 * kInstructionSize);
+    __ dci(0xce53a363);  // sm3tt1a v3.4s, v27.4s, v19.s[2]
+    // vl128 state = 0xaaa8c715
+    __ dci(0xce58a7a7);  // sm3tt1b v7.4s, v29.4s, v24.s[2]
+    // vl128 state = 0xb99a301d
+    __ dci(0xce5eb2b7);  // sm3tt1a v23.4s, v21.4s, v30.s[3]
+    // vl128 state = 0xe8dabe99
+    __ dci(0xce43b6ce);  // sm3tt1b v14.4s, v22.4s, v3.s[3]
+    // vl128 state = 0xaa498ae5
+    __ dci(0xce448027);  // sm3tt1a v7.4s, v1.4s, v4.s[0]
+    // vl128 state = 0x32093547
+    __ dci(0xce4286d8);  // sm3tt1b v24.4s, v22.4s, v2.s[0]
+    // vl128 state = 0xe03e3a81
+    __ dci(0xce44a0f3);  // sm3tt1a v19.4s, v7.4s, v4.s[2]
+    // vl128 state = 0xcb555b4a
+    __ dci(0xce418233);  // sm3tt1a v19.4s, v17.4s, v1.s[0]
+    // vl128 state = 0x751e4f7d
+    __ dci(0xce58a49f);  // sm3tt1b v31.4s, v4.4s, v24.s[2]
+    // vl128 state = 0xcaff7580
+    __ dci(0xce548326);  // sm3tt1a v6.4s, v25.4s, v20.s[0]
+    // vl128 state = 0xc4308a78
+    __ dci(0xce548124);  // sm3tt1a v4.4s, v9.4s, v20.s[0]
+    // vl128 state = 0x1f1bfdfb
+    __ dci(0xce5fb282);  // sm3tt1a v2.4s, v20.4s, v31.s[3]
+    // vl128 state = 0xa632c0b2
+    __ dci(0xce549573);  // sm3tt1b v19.4s, v11.4s, v20.s[1]
+    // vl128 state = 0x7fb7c2d3
+    __ dci(0xce4387ae);  // sm3tt1b v14.4s, v29.4s, v3.s[0]
+    // vl128 state = 0xe8d4c534
+    __ dci(0xce5094eb);  // sm3tt1b v11.4s, v7.4s, v16.s[1]
+    // vl128 state = 0xf34a4fbc
+    __ dci(0xce51b59f);  // sm3tt1b v31.4s, v12.4s, v17.s[3]
+    // vl128 state = 0x98e388e9
+    __ dci(0xce50a7bf);  // sm3tt1b v31.4s, v29.4s, v16.s[2]
+    // vl128 state = 0x7cd7a6ac
+    __ dci(0xce5ca52e);  // sm3tt1b v14.4s, v9.4s, v28.s[2]
+    // vl128 state = 0xce9410c5
+    __ dci(0xce5aa741);  // sm3tt1b v1.4s, v26.4s, v26.s[2]
+    // vl128 state = 0xd83fbd58
+    __ dci(0xce5e94da);  // sm3tt1b v26.4s, v6.4s, v30.s[1]
+    // vl128 state = 0xc6055fe3
+  }
+
+  uint32_t state;
+  ComputeMachineStateHash(&masm, &state);
+  __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+  __ Ldr(w0, MemOperand(x0));
+
+  END();
+  if (CAN_RUN()) {
+    RUN();
+    uint32_t expected_hashes[] = {
+        0xc6055fe3,
+        0xa2c33f98,
+        0x1cc9a227,
+        0xf29eb254,
+        0xd1739d6e,
+        0x1c4fff34,
+        0x0c182795,
+        0x96e46836,
+        0x43d010c9,
+        0xd7c4f94c,
+        0x78c387f2,
+        0x4319fef3,
+        0x72407eef,
+        0xa77d3869,
+        0x3c81c49a,
+        0x68cc20ef,
+    };
+    ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+  }
+}
+
+TEST_SVE(neon_sm3tt2) {
+  SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+                          CPUFeatures::kNEON,
+                          CPUFeatures::kCRC32,
+                          CPUFeatures::kSM3);
+  START();
+
+  SetInitialMachineState(&masm);
+  // state = 0xe2bd2480
+
+  {
+    ExactAssemblyScope scope(&masm, 20 * kInstructionSize);
+    __ dci(0xce439d42);  // sm3tt2b v2.4s, v10.4s, v3.s[1]
+    // vl128 state = 0x388642cc
+    __ dci(0xce42b89d);  // sm3tt2a v29.4s, v4.4s, v2.s[3]
+    // vl128 state = 0x66f4e60a
+    __ dci(0xce4da95d);  // sm3tt2a v29.4s, v10.4s, v13.s[2]
+    // vl128 state = 0x95d4651d
+    __ dci(0xce49b926);  // sm3tt2a v6.4s, v9.4s, v9.s[3]
+    // vl128 state = 0x826919fe
+    __ dci(0xce5cae33);  // sm3tt2b v19.4s, v17.4s, v28.s[2]
+    // vl128 state = 0xb5cfefb0
+    __ dci(0xce478959);  // sm3tt2a v25.4s, v10.4s, v7.s[0]
+    // vl128 state = 0xfe17b730
+    __ dci(0xce549cc2);  // sm3tt2b v2.4s, v6.4s, v20.s[1]
+    // vl128 state = 0x769a0d76
+    __ dci(0xce4c9f90);  // sm3tt2b v16.4s, v28.4s, v12.s[1]
+    // vl128 state = 0x8f633b95
+    __ dci(0xce508d49);  // sm3tt2b v9.4s, v10.4s, v16.s[0]
+    // vl128 state = 0x5eab6daa
+    __ dci(0xce59ad79);  // sm3tt2b v25.4s, v11.4s, v25.s[2]
+    // vl128 state = 0xfb197616
+    __ dci(0xce458fd6);  // sm3tt2b v22.4s, v30.4s, v5.s[0]
+    // vl128 state = 0x875ff29d
+    __ dci(0xce4ab92c);  // sm3tt2a v12.4s, v9.4s, v10.s[3]
+    // vl128 state = 0xad159c01
+    __ dci(0xce598a1c);  // sm3tt2a v28.4s, v16.4s, v25.s[0]
+    // vl128 state = 0x3da313e4
+    __ dci(0xce43989f);  // sm3tt2a v31.4s, v4.4s, v3.s[1]
+    // vl128 state = 0xc0a54179
+    __ dci(0xce459c8a);  // sm3tt2b v10.4s, v4.4s, v5.s[1]
+    // vl128 state = 0x4739cdbf
+    __ dci(0xce539959);  // sm3tt2a v25.4s, v10.4s, v19.s[1]
+    // vl128 state = 0xd85f84ab
+    __ dci(0xce429be1);  // sm3tt2a v1.4s, v31.4s, v2.s[1]
+    // vl128 state = 0x85b5871c
+    __ dci(0xce5d9fe3);  // sm3tt2b v3.4s, v31.4s, v29.s[1]
+    // vl128 state = 0x2be5bd95
+    __ dci(0xce4ebe16);  // sm3tt2b v22.4s, v16.4s, v14.s[3]
+    // vl128 state = 0x2f8146e9
+    __ dci(0xce599a63);  // sm3tt2a v3.4s, v19.4s, v25.s[1]
+    // vl128 state = 0xa6e513e2
+  }
+
+  uint32_t state;
+  ComputeMachineStateHash(&masm, &state);
+  __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+  __ Ldr(w0, MemOperand(x0));
+
+  END();
+  if (CAN_RUN()) {
+    RUN();
+    uint32_t expected_hashes[] = {
+        0xa6e513e2,
+        0x6bf4ae47,
+        0x74e074db,
+        0xae1a57e0,
+        0x0db67f09,
+        0x85332e49,
+        0xc40d6565,
+        0x07ed81aa,
+        0xfa0e10bb,
+        0x9addadfa,
+        0xa9cea561,
+        0xa481e17b,
+        0x7c2be34e,
+        0xd4cf493f,
+        0x8b30cc5e,
+        0xe44416d3,
+    };
+    ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+  }
+}
+
+TEST_SVE(neon_sm4e) {
+  SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+                          CPUFeatures::kNEON,
+                          CPUFeatures::kCRC32,
+                          CPUFeatures::kSM4);
+  START();
+
+  SetInitialMachineState(&masm);
+  // state = 0xe2bd2480
+
+  {
+    ExactAssemblyScope scope(&masm, 20 * kInstructionSize);
+    __ dci(0xcec08400);  // sm4e v0.4s, v0.4s
+    // vl128 state = 0xa687bacc
+    __ dci(0xcec08628);  // sm4e v8.4s, v17.4s
+    // vl128 state = 0xf174e346
+    __ dci(0xcec0862a);  // sm4e v10.4s, v17.4s
+    // vl128 state = 0xab88f8ca
+    __ dci(0xcec08628);  // sm4e v8.4s, v17.4s
+    // vl128 state = 0x000d3840
+    __ dci(0xcec08638);  // sm4e v24.4s, v17.4s
+    // vl128 state = 0xd980ddc2
+    __ dci(0xcec08688);  // sm4e v8.4s, v20.4s
+    // vl128 state = 0xd501f2c2
+    __ dci(0xcec0868c);  // sm4e v12.4s, v20.4s
+    // vl128 state = 0x699d6b6f
+    __ dci(0xcec0864d);  // sm4e v13.4s, v18.4s
+    // vl128 state = 0x67baf406
+    __ dci(0xcec08649);  // sm4e v9.4s, v18.4s
+    // vl128 state = 0x178b048e
+    __ dci(0xcec08659);  // sm4e v25.4s, v18.4s
+    // vl128 state = 0x552a70d9
+    __ dci(0xcec0865d);  // sm4e v29.4s, v18.4s
+    // vl128 state = 0x3be534d1
+    __ dci(0xcec0865f);  // sm4e v31.4s, v18.4s
+    // vl128 state = 0x396fdf70
+    __ dci(0xcec08657);  // sm4e v23.4s, v18.4s
+    // vl128 state = 0x836c474b
+    __ dci(0xcec086e7);  // sm4e v7.4s, v23.4s
+    // vl128 state = 0x71aebad7
+    __ dci(0xcec08683);  // sm4e v3.4s, v20.4s
+    // vl128 state = 0xadfd515c
+    __ dci(0xcec08681);  // sm4e v1.4s, v20.4s
+    // vl128 state = 0xf1465ab4
+    __ dci(0xcec087c0);  // sm4e v0.4s, v30.4s
+    // vl128 state = 0x8555b40f
+    __ dci(0xcec087c4);  // sm4e v4.4s, v30.4s
+    // vl128 state = 0x2cb3f99f
+    __ dci(0xcec087d4);  // sm4e v20.4s, v30.4s
+    // vl128 state = 0x733336fd
+    __ dci(0xcec085fc);  // sm4e v28.4s, v15.4s
+    // vl128 state = 0x11b138f9
+  }
+
+  uint32_t state;
+  ComputeMachineStateHash(&masm, &state);
+  __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+  __ Ldr(w0, MemOperand(x0));
+
+  END();
+  if (CAN_RUN()) {
+    RUN();
+    uint32_t expected_hashes[] = {
+        0x11b138f9,
+        0x5993c196,
+        0xb9eef6b5,
+        0xf96d88cf,
+        0x8e92bd49,
+        0x04d27185,
+        0x8833f291,
+        0x77933d5b,
+        0x135500cc,
+        0xe5ca977f,
+        0x3e4536af,
+        0xb169aa9d,
+        0xe0b4425b,
+        0x35c1f76e,
+        0x54e3448a,
+        0x4dbf0c92,
+    };
+    ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+  }
+}
+
+TEST_SVE(neon_sm4ekey) {
+  SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+                          CPUFeatures::kNEON,
+                          CPUFeatures::kCRC32,
+                          CPUFeatures::kSM4);
+  START();
+
+  SetInitialMachineState(&masm);
+  // state = 0xe2bd2480
+
+  {
+    ExactAssemblyScope scope(&masm, 20 * kInstructionSize);
+    __ dci(0xce6fc9d4);  // sm4ekey v20.4s, v14.4s, v15.4s
+    // vl128 state = 0x4bb7b396
+    __ dci(0xce6bc8d5);  // sm4ekey v21.4s, v6.4s, v11.4s
+    // vl128 state = 0xf4354b26
+    __ dci(0xce6bc8c5);  // sm4ekey v5.4s, v6.4s, v11.4s
+    // vl128 state = 0x0a331378
+    __ dci(0xce6bc8cd);  // sm4ekey v13.4s, v6.4s, v11.4s
+    // vl128 state = 0x7ed4c2a7
+    __ dci(0xce6fc8e5);  // sm4ekey v5.4s, v7.4s, v15.4s
+    // vl128 state = 0x38a433fd
+    __ dci(0xce6fc8e4);  // sm4ekey v4.4s, v7.4s, v15.4s
+    // vl128 state = 0xc1ad0d76
+    __ dci(0xce6bcaec);  // sm4ekey v12.4s, v23.4s, v11.4s
+    // vl128 state = 0x81660ce3
+    __ dci(0xce6bcae8);  // sm4ekey v8.4s, v23.4s, v11.4s
+    // vl128 state = 0x79f3e5c1
+    __ dci(0xce7bcaaa);  // sm4ekey v10.4s, v21.4s, v27.4s
+    // vl128 state = 0x231e0a79
+    __ dci(0xce72caa8);  // sm4ekey v8.4s, v21.4s, v18.4s
+    // vl128 state = 0xd931c858
+    __ dci(0xce7ac8aa);  // sm4ekey v10.4s, v5.4s, v26.4s
+    // vl128 state = 0x2476ef6a
+    __ dci(0xce7bc888);  // sm4ekey v8.4s, v4.4s, v27.4s
+    // vl128 state = 0xd4a9ac83
+    __ dci(0xce7bc889);  // sm4ekey v9.4s, v4.4s, v27.4s
+    // vl128 state = 0x149fd9b3
+    __ dci(0xce7bc9cd);  // sm4ekey v13.4s, v14.4s, v27.4s
+    // vl128 state = 0xece67fce
+    __ dci(0xce79cbc5);  // sm4ekey v5.4s, v30.4s, v25.4s
+    // vl128 state = 0xccb45863
+    __ dci(0xce71cac4);  // sm4ekey v4.4s, v22.4s, v17.4s
+    // vl128 state = 0xafb23c9d
+    __ dci(0xce71c8e0);  // sm4ekey v0.4s, v7.4s, v17.4s
+    // vl128 state = 0x5c808694
+    __ dci(0xce71c882);  // sm4ekey v2.4s, v4.4s, v17.4s
+    // vl128 state = 0x6cea5132
+    __ dci(0xce73c803);  // sm4ekey v3.4s, v0.4s, v19.4s
+    // vl128 state = 0x67e316db
+    __ dci(0xce71c847);  // sm4ekey v7.4s, v2.4s, v17.4s
+    // vl128 state = 0x317aafac
+  }
+
+  uint32_t state;
+  ComputeMachineStateHash(&masm, &state);
+  __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+  __ Ldr(w0, MemOperand(x0));
+
+  END();
+  if (CAN_RUN()) {
+    RUN();
+    uint32_t expected_hashes[] = {
+        0x317aafac,
+        0xbacd34de,
+        0x3e92f0b2,
+        0x3043dbe3,
+        0x6dda4d17,
+        0x6e59ba0d,
+        0xa29887cf,
+        0x3bee1f56,
+        0xacd43191,
+        0x97ab7ada,
+        0x39ebcf53,
+        0xea7b411e,
+        0xd8e1efe9,
+        0x2b99fc57,
+        0xf5f62e02,
+        0xd50621d1,
+    };
+    ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+  }
+}
+
 }  // namespace aarch64
 }  // namespace vixl
diff --git a/test/aarch64/test-simulator-sve2-aarch64.cc b/test/aarch64/test-simulator-sve2-aarch64.cc
index 0a4c6d1..621754d 100644
--- a/test/aarch64/test-simulator-sve2-aarch64.cc
+++ b/test/aarch64/test-simulator-sve2-aarch64.cc
@@ -24,24 +24,23 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-#include <sys/mman.h>
-#include <unistd.h>
-
 #include <cfloat>
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include <functional>
+#include <sys/mman.h>
+#include <unistd.h>
 
 #include "test-runner.h"
 #include "test-utils.h"
-#include "aarch64/test-utils-aarch64.h"
 
 #include "aarch64/cpu-aarch64.h"
 #include "aarch64/disasm-aarch64.h"
 #include "aarch64/macro-assembler-aarch64.h"
 #include "aarch64/simulator-aarch64.h"
+#include "aarch64/test-utils-aarch64.h"
 #include "test-assembler-aarch64.h"
 
 #define TEST_SVE(name) TEST_SVE_INNER("SIM", name)
@@ -9118,5 +9117,130 @@
   }
 }
 
+TEST_SVE(sve2_pmull128) {
+  SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+                          CPUFeatures::kSVE2,
+                          CPUFeatures::kNEON,
+                          CPUFeatures::kCRC32,
+                          CPUFeatures::kSVEPmull128);
+  START();
+
+  SetInitialMachineState(&masm);
+  // state = 0xe2bd2480
+
+  {
+    ExactAssemblyScope scope(&masm, 40 * kInstructionSize);
+    __ dci(0x45006800);  // pmullb z0.q, z0.d, z0.d
+    // vl128 state = 0x4107ca0c
+    __ dci(0x45006a28);  // pmullb z8.q, z17.d, z0.d
+    // vl128 state = 0xa87d231a
+    __ dci(0x45016a6c);  // pmullb z12.q, z19.d, z1.d
+    // vl128 state = 0xc547fcf6
+    __ dci(0x45116e68);  // pmullt z8.q, z19.d, z17.d
+    // vl128 state = 0x6a01d521
+    __ dci(0x45106a69);  // pmullb z9.q, z19.d, z16.d
+    // vl128 state = 0x64a7ba8a
+    __ dci(0x45006a4d);  // pmullb z13.q, z18.d, z0.d
+    // vl128 state = 0xe59e3f8e
+    __ dci(0x45086e5d);  // pmullt z29.q, z18.d, z8.d
+    // vl128 state = 0xbfbb9316
+    __ dci(0x450a6e75);  // pmullt z21.q, z19.d, z10.d
+    // vl128 state = 0x29f6a4c7
+    __ dci(0x45126e74);  // pmullt z20.q, z19.d, z18.d
+    // vl128 state = 0x4ced9406
+    __ dci(0x45176e75);  // pmullt z21.q, z19.d, z23.d
+    // vl128 state = 0xd09e5676
+    __ dci(0x45176e77);  // pmullt z23.q, z19.d, z23.d
+    // vl128 state = 0x568c0e25
+    __ dci(0x45176e75);  // pmullt z21.q, z19.d, z23.d
+    // vl128 state = 0xb2f13c36
+    __ dci(0x45176b71);  // pmullb z17.q, z27.d, z23.d
+    // vl128 state = 0x160bec4f
+    __ dci(0x451f6b30);  // pmullb z16.q, z25.d, z31.d
+    // vl128 state = 0x2d7e7f49
+    __ dci(0x451f6b20);  // pmullb z0.q, z25.d, z31.d
+    // vl128 state = 0x113d828b
+    __ dci(0x451f6b90);  // pmullb z16.q, z28.d, z31.d
+    // vl128 state = 0xb8b3b3d9
+    __ dci(0x451f6f12);  // pmullt z18.q, z24.d, z31.d
+    // vl128 state = 0x277aacb8
+    __ dci(0x451f6f16);  // pmullt z22.q, z24.d, z31.d
+    // vl128 state = 0xef79c8da
+    __ dci(0x450b6f17);  // pmullt z23.q, z24.d, z11.d
+    // vl128 state = 0x1dc19104
+    __ dci(0x450a6e1f);  // pmullt z31.q, z16.d, z10.d
+    // vl128 state = 0x3ccb4ea8
+    __ dci(0x451a6e2f);  // pmullt z15.q, z17.d, z26.d
+    // vl128 state = 0x14e13481
+    __ dci(0x45126a3f);  // pmullb z31.q, z17.d, z18.d
+    // vl128 state = 0x4e6502f9
+    __ dci(0x451a6b3e);  // pmullb z30.q, z25.d, z26.d
+    // vl128 state = 0xf6f18478
+    __ dci(0x45126a3a);  // pmullb z26.q, z17.d, z18.d
+    // vl128 state = 0xdd4f14fb
+    __ dci(0x45126afb);  // pmullb z27.q, z23.d, z18.d
+    // vl128 state = 0xcbf3bee2
+    __ dci(0x45126aff);  // pmullb z31.q, z23.d, z18.d
+    // vl128 state = 0x627bec09
+    __ dci(0x45126aef);  // pmullb z15.q, z23.d, z18.d
+    // vl128 state = 0xf5de1fa9
+    __ dci(0x45106abf);  // pmullb z31.q, z21.d, z16.d
+    // vl128 state = 0x44bb6385
+    __ dci(0x451a6abb);  // pmullb z27.q, z21.d, z26.d
+    // vl128 state = 0x5c5fa224
+    __ dci(0x450a68b3);  // pmullb z19.q, z5.d, z10.d
+    // vl128 state = 0x28b6085c
+    __ dci(0x450e69b2);  // pmullb z18.q, z13.d, z14.d
+    // vl128 state = 0x450898d6
+    __ dci(0x450e69b6);  // pmullb z22.q, z13.d, z14.d
+    // vl128 state = 0x79d7911b
+    __ dci(0x450e69b4);  // pmullb z20.q, z13.d, z14.d
+    // vl128 state = 0x98bf6939
+    __ dci(0x450f6924);  // pmullb z4.q, z9.d, z15.d
+    // vl128 state = 0xb8a1bbc7
+    __ dci(0x45176925);  // pmullb z5.q, z9.d, z23.d
+    // vl128 state = 0x631b41c8
+    __ dci(0x451f69a4);  // pmullb z4.q, z13.d, z31.d
+    // vl128 state = 0x617fc272
+    __ dci(0x451b69e0);  // pmullb z0.q, z15.d, z27.d
+    // vl128 state = 0x77780ac1
+    __ dci(0x451b69e8);  // pmullb z8.q, z15.d, z27.d
+    // vl128 state = 0xce5ae18f
+    __ dci(0x450f69e0);  // pmullb z0.q, z15.d, z15.d
+    // vl128 state = 0xa037371a
+    __ dci(0x450b6be8);  // pmullb z8.q, z31.d, z11.d
+    // vl128 state = 0xb59be233
+  }
+
+  uint32_t state;
+  ComputeMachineStateHash(&masm, &state);
+  __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+  __ Ldr(w0, MemOperand(x0));
+
+  END();
+  if (CAN_RUN()) {
+    RUN();
+    uint32_t expected_hashes[] = {
+        0xb59be233,
+        0x32430624,
+        0x5cc3ec66,
+        0xecfdffe7,
+        0x6d77a270,
+        0xa0d604f2,
+        0x2178aa11,
+        0xabdcbeaa,
+        0xab3b974f,
+        0x11a874f5,
+        0xf2eb6131,
+        0x6d311c6c,
+        0xd4e99b72,
+        0x5177ce8e,
+        0x32aa02f0,
+        0x681ef977,
+    };
+    ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+  }
+}
+
 }  // namespace aarch64
 }  // namespace vixl
diff --git a/test/aarch64/test-trace-aarch64.cc b/test/aarch64/test-trace-aarch64.cc
index 27e71d5..d48f806 100644
--- a/test/aarch64/test-trace-aarch64.cc
+++ b/test/aarch64/test-trace-aarch64.cc
@@ -29,17 +29,16 @@
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
-
 #include <fstream>
 #include <regex>
 
 #include "test-runner.h"
-#include "test-utils-aarch64.h"
 
 #include "aarch64/cpu-aarch64.h"
 #include "aarch64/disasm-aarch64.h"
 #include "aarch64/macro-assembler-aarch64.h"
 #include "aarch64/simulator-aarch64.h"
+#include "test-utils-aarch64.h"
 
 namespace vixl {
 namespace aarch64 {
@@ -371,6 +370,16 @@
   __ uxtw(w8, w9);
   __ uxtw(x10, x11);
 
+  // Regression tests.
+  __ stp(x10, xzr, MemOperand(sp, -16, PreIndex));
+  __ ldp(x10, xzr, MemOperand(sp, 16, PostIndex));
+  __ str(xzr, MemOperand(sp, -16, PreIndex));
+  __ ldrsb(xzr, MemOperand(sp, 16, PostIndex));
+  __ str(xzr, MemOperand(sp, -16, PreIndex));
+  __ ldrsh(xzr, MemOperand(sp, 16, PostIndex));
+  __ str(xzr, MemOperand(sp, -16, PreIndex));
+  __ ldrsw(xzr, MemOperand(sp, 16, PostIndex));
+
   // Branch tests.
   {
     Label end;
diff --git a/test/aarch64/test-utils-aarch64.cc b/test/aarch64/test-utils-aarch64.cc
index 76e7eae..c23f4e8 100644
--- a/test/aarch64/test-utils-aarch64.cc
+++ b/test/aarch64/test-utils-aarch64.cc
@@ -24,11 +24,12 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+#include "test-utils-aarch64.h"
+
 #include <cmath>
 #include <queue>
 
 #include "test-runner.h"
-#include "test-utils-aarch64.h"
 
 #include "../test/aarch64/test-simulator-inputs-aarch64.h"
 #include "aarch64/cpu-aarch64.h"
@@ -88,6 +89,34 @@
 }
 
 
+bool Equal64(std::vector<uint64_t> reference_list,
+             const RegisterDump*,
+             uint64_t result,
+             ExpectedResult option) {
+  switch (option) {
+    case kExpectEqual:
+      for (uint64_t reference : reference_list) {
+        if (result == reference) return true;
+      }
+      printf("Expected a result in (\n");
+      break;
+    case kExpectNotEqual:
+      for (uint64_t reference : reference_list) {
+        if (result == reference) {
+          printf("Expected a result not in (\n");
+          break;
+        }
+      }
+      return true;
+  }
+  for (uint64_t reference : reference_list) {
+    printf("  0x%016" PRIx64 ",\n", reference);
+  }
+  printf(")\t Found 0x%016" PRIx64 "\n", result);
+  return false;
+}
+
+
 bool Equal128(QRegisterValue expected,
               const RegisterDump*,
               QRegisterValue result) {
@@ -199,6 +228,16 @@
 }
 
 
+bool Equal64(std::vector<uint64_t> reference_list,
+             const RegisterDump* core,
+             const Register& reg,
+             ExpectedResult option) {
+  VIXL_ASSERT(reg.Is64Bits());
+  uint64_t result = core->xreg(reg.GetCode());
+  return Equal64(reference_list, core, result, option);
+}
+
+
 bool NotEqual64(uint64_t reference,
                 const RegisterDump* core,
                 const Register& reg) {
@@ -780,7 +819,7 @@
 }
 
 // Note that the function assumes p0, p1, p2 and p3 are set to all true in b-,
-// h-, s- and d-lane sizes respectively, and p4, p5 are clobberred as a temp
+// h-, s- and d-lane sizes respectively, and p4, p5 are clobbered as a temp
 // predicate.
 template <typename T, size_t N>
 void SetFpData(MacroAssembler* masm,
diff --git a/test/aarch64/test-utils-aarch64.h b/test/aarch64/test-utils-aarch64.h
index b1c2898..40a5aa5 100644
--- a/test/aarch64/test-utils-aarch64.h
+++ b/test/aarch64/test-utils-aarch64.h
@@ -345,6 +345,10 @@
              const RegisterDump*,
              uint64_t result,
              ExpectedResult option = kExpectEqual);
+bool Equal64(std::vector<uint64_t> reference_list,
+             const RegisterDump*,
+             uint64_t result,
+             ExpectedResult option = kExpectEqual);
 bool Equal128(QRegisterValue expected,
               const RegisterDump*,
               QRegisterValue result);
@@ -358,6 +362,10 @@
              const RegisterDump* core,
              const Register& reg,
              ExpectedResult option = kExpectEqual);
+bool Equal64(std::vector<uint64_t> reference_list,
+             const RegisterDump* core,
+             const Register& reg,
+             ExpectedResult option = kExpectEqual);
 bool Equal64(uint64_t expected,
              const RegisterDump* core,
              const VRegister& vreg);
@@ -501,7 +509,7 @@
                                int reg_count,
                                RegList allowed);
 
-// Ovewrite the contents of the specified registers. This enables tests to
+// Overwrite the contents of the specified registers. This enables tests to
 // check that register contents are written in cases where it's likely that the
 // correct outcome could already be stored in the register.
 //
@@ -604,18 +612,20 @@
 // vector length.
 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
 
-#define TEST_SVE_INNER(type, name)                            \
-  void Test##name(Test* config);                              \
-  Test* test_##name##_list[] =                                \
-      {Test::MakeSVETest(128,                                 \
-                         "AARCH64_" type "_" #name "_vl128",  \
-                         &Test##name),                        \
-       Test::MakeSVETest(384,                                 \
-                         "AARCH64_" type "_" #name "_vl384",  \
-                         &Test##name),                        \
-       Test::MakeSVETest(2048,                                \
-                         "AARCH64_" type "_" #name "_vl2048", \
-                         &Test##name)};                       \
+#define TEST_SVE_INNER(type, name)                                          \
+  void Test##name(Test* config);                                            \
+  Test* test_##name##_list[] = {Test::MakeSVETest(128,                      \
+                                                  "AARCH64_" type "_" #name \
+                                                  "_vl128",                 \
+                                                  &Test##name),             \
+                                Test::MakeSVETest(384,                      \
+                                                  "AARCH64_" type "_" #name \
+                                                  "_vl384",                 \
+                                                  &Test##name),             \
+                                Test::MakeSVETest(2048,                     \
+                                                  "AARCH64_" type "_" #name \
+                                                  "_vl2048",                \
+                                                  &Test##name)};            \
   void Test##name(Test* config)
 
 #define SVE_SETUP_WITH_FEATURES(...) \
diff --git a/test/test-api.cc b/test/test-api.cc
index b30230d..dfb61af 100644
--- a/test/test-api.cc
+++ b/test/test-api.cc
@@ -29,9 +29,8 @@
 #include <sstream>
 #include <vector>
 
-#include "test-runner.h"
-
 #include "cpu-features.h"
+#include "test-runner.h"
 #include "utils-vixl.h"
 
 #if __cplusplus >= 201103L
diff --git a/test/test-invalset.cc b/test/test-invalset.cc
index ac53a04..548f67e 100644
--- a/test/test-invalset.cc
+++ b/test/test-invalset.cc
@@ -397,5 +397,27 @@
 #endif
 }
 
+TEST(move) {
+  TestSet set1;
+
+  set1.insert(Obj(-123, 456));
+  set1.insert(Obj(2718, 2871828));
+
+  TestSet set2(std::move(set1));
+  VIXL_CHECK(set1.empty());
+  VIXL_CHECK(set2.size() == 2);
+  VIXL_CHECK(set2.GetMinElement() == Obj(-123, 456));
+
+  // Test with more elements.
+  for (unsigned i = 0; i < 4 * kNPreallocatedElements; i++) {
+    set2.insert(Obj(i, -1));
+  }
+
+  TestSet set3(std::move(set2));
+  VIXL_CHECK(set2.empty());
+  VIXL_CHECK(set3.size() == 2 + 4 * kNPreallocatedElements);
+  VIXL_CHECK(set3.GetMinElement() == Obj(-123, 456));
+}
+
 
 }  // namespace vixl
diff --git a/test/test-pool-manager.cc b/test/test-pool-manager.cc
index df2f32b..194154b 100644
--- a/test/test-pool-manager.cc
+++ b/test/test-pool-manager.cc
@@ -297,7 +297,7 @@
                object.max_location_));
   }
 }
-}
+}  // namespace vixl
 
 // Basic test - checks that emitting a very simple pool works.
 TEST(Basic) {
@@ -343,14 +343,15 @@
                                                   int32_t min_offset,
                                                   int32_t max_offset,
                                                   int alignment) {
-  IF_VERBOSE(printf(
-      "About to add a new reference to object %d with min location = %d, max "
-      "location = %d, alignment = %d, size = %d\n",
-      id,
-      min_offset + pc,
-      max_offset + pc,
-      alignment,
-      size));
+  IF_VERBOSE(
+      printf("About to add a new reference to object %d with min location = "
+             "%d, max "
+             "location = %d, alignment = %d, size = %d\n",
+             id,
+             min_offset + pc,
+             max_offset + pc,
+             alignment,
+             size));
   return new ForwardReference<int32_t>(pc,
                                        size,
                                        min_offset + pc,
@@ -375,7 +376,7 @@
   }
 
   int32_t pc = 0;
-  for (int i = 0; !objects.empty(); ++i) {
+  while (!objects.empty()) {
     IF_VERBOSE(printf("PC = 0x%x (%d)\n", pc, pc));
     int32_t pc_increment = RandomPCIncrement();
     IF_VERBOSE(printf("Attempting to increment PC by %d\n", pc_increment));
@@ -450,7 +451,7 @@
   }
 
   int32_t pc = 0;
-  for (int i = 0; !objects.empty(); ++i) {
+  while (!objects.empty()) {
     IF_VERBOSE(printf("PC = 0x%x (%d)\n", pc, pc));
 
     int32_t pc_increment = RandomPCIncrement();
@@ -797,7 +798,7 @@
 
 
   // Increment PC to close to the checkpoint of the pools minus a known
-  // thershold.
+  // threshold.
   const int kBigObjectSize = 1024;
   TestPoolManager test(&pool_manager);
   pc = test.GetPoolCheckpoint() - kBigObjectSize;
diff --git a/test/test-runner.cc b/test/test-runner.cc
index 092a4ed..093c5be 100644
--- a/test/test-runner.cc
+++ b/test/test-runner.cc
@@ -24,12 +24,12 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+#include "test-runner.h"
+
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
 
-#include "test-runner.h"
-
 // Initialize the list as empty.
 vixl::Test* vixl::Test::first_ = NULL;
 vixl::Test* vixl::Test::last_ = NULL;
diff --git a/test/test-runner.h b/test/test-runner.h
index bb72ce6..f0c11ad 100644
--- a/test/test-runner.h
+++ b/test/test-runner.h
@@ -28,6 +28,7 @@
 #define TEST_TEST_H_
 
 #include "utils-vixl.h"
+
 #include "aarch64/instructions-aarch64.h"
 
 namespace vixl {
diff --git a/test/test-trace-reference/log-all b/test/test-trace-reference/log-all
index 9e9904a..948a13f 100644
--- a/test/test-trace-reference/log-all
+++ b/test/test-trace-reference/log-all
@@ -1034,6 +1034,35 @@
 #             w8:         0x00000001
 0x~~~~~~~~~~~~~~~~  d3407d6a		ubfx x10, x11, #0, #32
 #            x10: 0x0000000000000000
+0x~~~~~~~~~~~~~~~~  a9bf7fea		stp x10, xzr, [sp, #-16]!
+#             sp: 0x~~~~~~~~~~~~~~~~
+#      x10<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+#      xzr<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~  a8c17fea		ldp x10, xzr, [sp], #16
+#            x10: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#            xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#             sp: 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~  f81f0fff		str xzr, [sp, #-16]!
+#             sp: 0x~~~~~~~~~~~~~~~~
+#      xzr<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~  388107ff		ldrsb xzr, [sp], #16
+#            xzr: 0x0000000000000000
+#                                  ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+#             sp: 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~  f81f0fff		str xzr, [sp, #-16]!
+#             sp: 0x~~~~~~~~~~~~~~~~
+#      xzr<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~  788107ff		ldrsh xzr, [sp], #16
+#            xzr: 0x0000000000000000
+#                                  ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+#             sp: 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~  f81f0fff		str xzr, [sp, #-16]!
+#             sp: 0x~~~~~~~~~~~~~~~~
+#      xzr<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~  b88107ff		ldrsw xzr, [sp], #16
+#            xzr: 0x0000000000000000
+#                                  ╙─ 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#             sp: 0x~~~~~~~~~~~~~~~~
 0x~~~~~~~~~~~~~~~~  14000001		b #+0x4 (addr 0x~~~~~~~~~~~~~~~~)
 # Branch to 0x~~~~~~~~~~~~~~~~.
 0x~~~~~~~~~~~~~~~~  eb030063		subs x3, x3, x3
diff --git a/test/test-trace-reference/log-all-colour b/test/test-trace-reference/log-all-colour
index bf5ec20..31a0790 100644
--- a/test/test-trace-reference/log-all-colour
+++ b/test/test-trace-reference/log-all-colour
@@ -1034,6 +1034,35 @@
 #             w8:         0x00000001
 0x~~~~~~~~~~~~~~~~  d3407d6a		ubfx x10, x11, #0, #32
 #            x10: 0x0000000000000000
+0x~~~~~~~~~~~~~~~~  a9bf7fea		stp x10, xzr, [sp, #-16]!
+#             sp: 0x~~~~~~~~~~~~~~~~
+#      x10<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+#      xzr<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~  a8c17fea		ldp x10, xzr, [sp], #16
+#            x10: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#            xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#             sp: 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~  f81f0fff		str xzr, [sp, #-16]!
+#             sp: 0x~~~~~~~~~~~~~~~~
+#      xzr<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~  388107ff		ldrsb xzr, [sp], #16
+#            xzr: 0x0000000000000000
+#                                  ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+#             sp: 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~  f81f0fff		str xzr, [sp, #-16]!
+#             sp: 0x~~~~~~~~~~~~~~~~
+#      xzr<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~  788107ff		ldrsh xzr, [sp], #16
+#            xzr: 0x0000000000000000
+#                                  ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+#             sp: 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~  f81f0fff		str xzr, [sp, #-16]!
+#             sp: 0x~~~~~~~~~~~~~~~~
+#      xzr<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~  b88107ff		ldrsw xzr, [sp], #16
+#            xzr: 0x0000000000000000
+#                                  ╙─ 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#             sp: 0x~~~~~~~~~~~~~~~~
 0x~~~~~~~~~~~~~~~~  14000001		b #+0x4 (addr 0x~~~~~~~~~~~~~~~~)
 # Branch to 0x~~~~~~~~~~~~~~~~.
 0x~~~~~~~~~~~~~~~~  eb030063		subs x3, x3, x3
diff --git a/test/test-trace-reference/log-branch b/test/test-trace-reference/log-branch
index 0491d50..f1096e6 100644
--- a/test/test-trace-reference/log-branch
+++ b/test/test-trace-reference/log-branch
@@ -44,6 +44,11 @@
 #        x3<7:0>:               0xff -> 0x~~~~~~~~~~~~~~~~
 #       x4<15:0>:             0x0001 -> 0x~~~~~~~~~~~~~~~~
 #       x5<15:0>:             0x8500 -> 0x~~~~~~~~~~~~~~~~
+#      x10<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+#      xzr<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+#      xzr<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+#      xzr<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+#      xzr<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
 #     v21<127:0>: 0x000000000000000000000000ffff8007
 #                    ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙─ 0x07 -> 0x~~~~~~~~~~~~~~~~
 #                    ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙─── 0x80 -> 0x~~~~~~~~~~~~~~~~
diff --git a/test/test-trace-reference/log-branch-colour b/test/test-trace-reference/log-branch-colour
index 7caf1a4..8a43844 100644
--- a/test/test-trace-reference/log-branch-colour
+++ b/test/test-trace-reference/log-branch-colour
@@ -44,6 +44,11 @@
 #        x3<7:0>:               0xff -> 0x~~~~~~~~~~~~~~~~
 #       x4<15:0>:             0x0001 -> 0x~~~~~~~~~~~~~~~~
 #       x5<15:0>:             0x8500 -> 0x~~~~~~~~~~~~~~~~
+#      x10<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+#      xzr<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+#      xzr<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+#      xzr<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+#      xzr<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
 #     v21<127:0>: 0x000000000000000000000000ffff8007
 #                    ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙─ 0x07 -> 0x~~~~~~~~~~~~~~~~
 #                    ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙─── 0x80 -> 0x~~~~~~~~~~~~~~~~
diff --git a/test/test-trace-reference/log-cpufeatures b/test/test-trace-reference/log-cpufeatures
index 795d358..fd727e6 100644
--- a/test/test-trace-reference/log-cpufeatures
+++ b/test/test-trace-reference/log-cpufeatures
@@ -314,6 +314,14 @@
 0x~~~~~~~~~~~~~~~~  d3403ce6		uxth x6, w7
 0x~~~~~~~~~~~~~~~~  53007d28		lsr w8, w9, #0
 0x~~~~~~~~~~~~~~~~  d3407d6a		ubfx x10, x11, #0, #32
+0x~~~~~~~~~~~~~~~~  a9bf7fea		stp x10, xzr, [sp, #-16]!
+0x~~~~~~~~~~~~~~~~  a8c17fea		ldp x10, xzr, [sp], #16
+0x~~~~~~~~~~~~~~~~  f81f0fff		str xzr, [sp, #-16]!
+0x~~~~~~~~~~~~~~~~  388107ff		ldrsb xzr, [sp], #16
+0x~~~~~~~~~~~~~~~~  f81f0fff		str xzr, [sp, #-16]!
+0x~~~~~~~~~~~~~~~~  788107ff		ldrsh xzr, [sp], #16
+0x~~~~~~~~~~~~~~~~  f81f0fff		str xzr, [sp, #-16]!
+0x~~~~~~~~~~~~~~~~  b88107ff		ldrsw xzr, [sp], #16
 0x~~~~~~~~~~~~~~~~  14000001		b #+0x4 (addr 0x~~~~~~~~~~~~~~~~)
 0x~~~~~~~~~~~~~~~~  eb030063		subs x3, x3, x3
 0x~~~~~~~~~~~~~~~~  54000061		b.ne #+0xc (addr 0x~~~~~~~~~~~~~~~~)
@@ -451,7 +459,7 @@
 0x~~~~~~~~~~~~~~~~  1e7e79d3		fminnm d19, d14, d30                    // Needs: FP
 0x~~~~~~~~~~~~~~~~  1e217820		fminnm s0, s1, s1                       // Needs: FP
 0x~~~~~~~~~~~~~~~~  1e6040cd		fmov d13, d6                            // Needs: FP
-0x~~~~~~~~~~~~~~~~  9e670222		fmov d2, x17                            // Needs: FP
+0x~~~~~~~~~~~~~~~~  9e670222		fmov d2, x17                            // Needs: FP, NEON
 0x~~~~~~~~~~~~~~~~  1e709008		fmov d8, #0x84 (-2.5000)                // Needs: FP
 0x~~~~~~~~~~~~~~~~  1e204065		fmov s5, s3                             // Needs: FP
 0x~~~~~~~~~~~~~~~~  1e270299		fmov s25, w20                           // Needs: FP
diff --git a/test/test-trace-reference/log-cpufeatures-colour b/test/test-trace-reference/log-cpufeatures-colour
index 170f34d..94c9c95 100644
--- a/test/test-trace-reference/log-cpufeatures-colour
+++ b/test/test-trace-reference/log-cpufeatures-colour
@@ -314,6 +314,14 @@
 0x~~~~~~~~~~~~~~~~  d3403ce6		uxth x6, w7
 0x~~~~~~~~~~~~~~~~  53007d28		lsr w8, w9, #0
 0x~~~~~~~~~~~~~~~~  d3407d6a		ubfx x10, x11, #0, #32
+0x~~~~~~~~~~~~~~~~  a9bf7fea		stp x10, xzr, [sp, #-16]!
+0x~~~~~~~~~~~~~~~~  a8c17fea		ldp x10, xzr, [sp], #16
+0x~~~~~~~~~~~~~~~~  f81f0fff		str xzr, [sp, #-16]!
+0x~~~~~~~~~~~~~~~~  388107ff		ldrsb xzr, [sp], #16
+0x~~~~~~~~~~~~~~~~  f81f0fff		str xzr, [sp, #-16]!
+0x~~~~~~~~~~~~~~~~  788107ff		ldrsh xzr, [sp], #16
+0x~~~~~~~~~~~~~~~~  f81f0fff		str xzr, [sp, #-16]!
+0x~~~~~~~~~~~~~~~~  b88107ff		ldrsw xzr, [sp], #16
 0x~~~~~~~~~~~~~~~~  14000001		b #+0x4 (addr 0x~~~~~~~~~~~~~~~~)
 0x~~~~~~~~~~~~~~~~  eb030063		subs x3, x3, x3
 0x~~~~~~~~~~~~~~~~  54000061		b.ne #+0xc (addr 0x~~~~~~~~~~~~~~~~)
@@ -451,7 +459,7 @@
 0x~~~~~~~~~~~~~~~~  1e7e79d3		fminnm d19, d14, d30                    FP
 0x~~~~~~~~~~~~~~~~  1e217820		fminnm s0, s1, s1                       FP
 0x~~~~~~~~~~~~~~~~  1e6040cd		fmov d13, d6                            FP
-0x~~~~~~~~~~~~~~~~  9e670222		fmov d2, x17                            FP
+0x~~~~~~~~~~~~~~~~  9e670222		fmov d2, x17                            FP, NEON
 0x~~~~~~~~~~~~~~~~  1e709008		fmov d8, #0x84 (-2.5000)                FP
 0x~~~~~~~~~~~~~~~~  1e204065		fmov s5, s3                             FP
 0x~~~~~~~~~~~~~~~~  1e270299		fmov s25, w20                           FP
diff --git a/test/test-trace-reference/log-cpufeatures-custom b/test/test-trace-reference/log-cpufeatures-custom
index 3975ec9..b46c23e 100644
--- a/test/test-trace-reference/log-cpufeatures-custom
+++ b/test/test-trace-reference/log-cpufeatures-custom
@@ -314,6 +314,14 @@
 0x~~~~~~~~~~~~~~~~  d3403ce6		uxth x6, w7
 0x~~~~~~~~~~~~~~~~  53007d28		lsr w8, w9, #0
 0x~~~~~~~~~~~~~~~~  d3407d6a		ubfx x10, x11, #0, #32
+0x~~~~~~~~~~~~~~~~  a9bf7fea		stp x10, xzr, [sp, #-16]!
+0x~~~~~~~~~~~~~~~~  a8c17fea		ldp x10, xzr, [sp], #16
+0x~~~~~~~~~~~~~~~~  f81f0fff		str xzr, [sp, #-16]!
+0x~~~~~~~~~~~~~~~~  388107ff		ldrsb xzr, [sp], #16
+0x~~~~~~~~~~~~~~~~  f81f0fff		str xzr, [sp, #-16]!
+0x~~~~~~~~~~~~~~~~  788107ff		ldrsh xzr, [sp], #16
+0x~~~~~~~~~~~~~~~~  f81f0fff		str xzr, [sp, #-16]!
+0x~~~~~~~~~~~~~~~~  b88107ff		ldrsw xzr, [sp], #16
 0x~~~~~~~~~~~~~~~~  14000001		b #+0x4 (addr 0x~~~~~~~~~~~~~~~~)
 0x~~~~~~~~~~~~~~~~  eb030063		subs x3, x3, x3
 0x~~~~~~~~~~~~~~~~  54000061		b.ne #+0xc (addr 0x~~~~~~~~~~~~~~~~)
@@ -451,7 +459,7 @@
 0x~~~~~~~~~~~~~~~~  1e7e79d3		fminnm d19, d14, d30                    ### {FP} ###
 0x~~~~~~~~~~~~~~~~  1e217820		fminnm s0, s1, s1                       ### {FP} ###
 0x~~~~~~~~~~~~~~~~  1e6040cd		fmov d13, d6                            ### {FP} ###
-0x~~~~~~~~~~~~~~~~  9e670222		fmov d2, x17                            ### {FP} ###
+0x~~~~~~~~~~~~~~~~  9e670222		fmov d2, x17                            ### {FP, NEON} ###
 0x~~~~~~~~~~~~~~~~  1e709008		fmov d8, #0x84 (-2.5000)                ### {FP} ###
 0x~~~~~~~~~~~~~~~~  1e204065		fmov s5, s3                             ### {FP} ###
 0x~~~~~~~~~~~~~~~~  1e270299		fmov s25, w20                           ### {FP} ###
diff --git a/test/test-trace-reference/log-disasm b/test/test-trace-reference/log-disasm
index 53f8f01..7705af1 100644
--- a/test/test-trace-reference/log-disasm
+++ b/test/test-trace-reference/log-disasm
@@ -314,6 +314,14 @@
 0x~~~~~~~~~~~~~~~~  d3403ce6		uxth x6, w7
 0x~~~~~~~~~~~~~~~~  53007d28		lsr w8, w9, #0
 0x~~~~~~~~~~~~~~~~  d3407d6a		ubfx x10, x11, #0, #32
+0x~~~~~~~~~~~~~~~~  a9bf7fea		stp x10, xzr, [sp, #-16]!
+0x~~~~~~~~~~~~~~~~  a8c17fea		ldp x10, xzr, [sp], #16
+0x~~~~~~~~~~~~~~~~  f81f0fff		str xzr, [sp, #-16]!
+0x~~~~~~~~~~~~~~~~  388107ff		ldrsb xzr, [sp], #16
+0x~~~~~~~~~~~~~~~~  f81f0fff		str xzr, [sp, #-16]!
+0x~~~~~~~~~~~~~~~~  788107ff		ldrsh xzr, [sp], #16
+0x~~~~~~~~~~~~~~~~  f81f0fff		str xzr, [sp, #-16]!
+0x~~~~~~~~~~~~~~~~  b88107ff		ldrsw xzr, [sp], #16
 0x~~~~~~~~~~~~~~~~  14000001		b #+0x4 (addr 0x~~~~~~~~~~~~~~~~)
 0x~~~~~~~~~~~~~~~~  eb030063		subs x3, x3, x3
 0x~~~~~~~~~~~~~~~~  54000061		b.ne #+0xc (addr 0x~~~~~~~~~~~~~~~~)
diff --git a/test/test-trace-reference/log-disasm-colour b/test/test-trace-reference/log-disasm-colour
index 53f8f01..7705af1 100644
--- a/test/test-trace-reference/log-disasm-colour
+++ b/test/test-trace-reference/log-disasm-colour
@@ -314,6 +314,14 @@
 0x~~~~~~~~~~~~~~~~  d3403ce6		uxth x6, w7
 0x~~~~~~~~~~~~~~~~  53007d28		lsr w8, w9, #0
 0x~~~~~~~~~~~~~~~~  d3407d6a		ubfx x10, x11, #0, #32
+0x~~~~~~~~~~~~~~~~  a9bf7fea		stp x10, xzr, [sp, #-16]!
+0x~~~~~~~~~~~~~~~~  a8c17fea		ldp x10, xzr, [sp], #16
+0x~~~~~~~~~~~~~~~~  f81f0fff		str xzr, [sp, #-16]!
+0x~~~~~~~~~~~~~~~~  388107ff		ldrsb xzr, [sp], #16
+0x~~~~~~~~~~~~~~~~  f81f0fff		str xzr, [sp, #-16]!
+0x~~~~~~~~~~~~~~~~  788107ff		ldrsh xzr, [sp], #16
+0x~~~~~~~~~~~~~~~~  f81f0fff		str xzr, [sp, #-16]!
+0x~~~~~~~~~~~~~~~~  b88107ff		ldrsw xzr, [sp], #16
 0x~~~~~~~~~~~~~~~~  14000001		b #+0x4 (addr 0x~~~~~~~~~~~~~~~~)
 0x~~~~~~~~~~~~~~~~  eb030063		subs x3, x3, x3
 0x~~~~~~~~~~~~~~~~  54000061		b.ne #+0xc (addr 0x~~~~~~~~~~~~~~~~)
diff --git a/test/test-trace-reference/log-regs b/test/test-trace-reference/log-regs
index c350f52..10c4402 100644
--- a/test/test-trace-reference/log-regs
+++ b/test/test-trace-reference/log-regs
@@ -393,6 +393,22 @@
 #             x6: 0x000000000000ffff
 #             w8:         0x00000001
 #            x10: 0x0000000000000000
+#             sp: 0x~~~~~~~~~~~~~~~~
+#            x10: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#            xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#             sp: 0x~~~~~~~~~~~~~~~~
+#             sp: 0x~~~~~~~~~~~~~~~~
+#            xzr: 0x0000000000000000
+#                                  ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+#             sp: 0x~~~~~~~~~~~~~~~~
+#             sp: 0x~~~~~~~~~~~~~~~~
+#            xzr: 0x0000000000000000
+#                                  ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+#             sp: 0x~~~~~~~~~~~~~~~~
+#             sp: 0x~~~~~~~~~~~~~~~~
+#            xzr: 0x0000000000000000
+#                                  ╙─ 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#             sp: 0x~~~~~~~~~~~~~~~~
 #             x3: 0x0000000000000000
 #            w18:         0x00000000
 #            w29:         0x00000000
diff --git a/test/test-trace-reference/log-regs-colour b/test/test-trace-reference/log-regs-colour
index 5583998..4e877d7 100644
--- a/test/test-trace-reference/log-regs-colour
+++ b/test/test-trace-reference/log-regs-colour
@@ -393,6 +393,22 @@
 #             x6: 0x000000000000ffff
 #             w8:         0x00000001
 #            x10: 0x0000000000000000
+#             sp: 0x~~~~~~~~~~~~~~~~
+#            x10: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#            xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#             sp: 0x~~~~~~~~~~~~~~~~
+#             sp: 0x~~~~~~~~~~~~~~~~
+#            xzr: 0x0000000000000000
+#                                  ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+#             sp: 0x~~~~~~~~~~~~~~~~
+#             sp: 0x~~~~~~~~~~~~~~~~
+#            xzr: 0x0000000000000000
+#                                  ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+#             sp: 0x~~~~~~~~~~~~~~~~
+#             sp: 0x~~~~~~~~~~~~~~~~
+#            xzr: 0x0000000000000000
+#                                  ╙─ 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#             sp: 0x~~~~~~~~~~~~~~~~
 #             x3: 0x0000000000000000
 #            w18:         0x00000000
 #            w29:         0x00000000
diff --git a/test/test-trace-reference/log-state b/test/test-trace-reference/log-state
index 0b5069d..419e479 100644
--- a/test/test-trace-reference/log-state
+++ b/test/test-trace-reference/log-state
@@ -672,6 +672,22 @@
 #             x6: 0x000000000000ffff
 #             w8:         0x00000001
 #            x10: 0x0000000000000000
+#             sp: 0x~~~~~~~~~~~~~~~~
+#            x10: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#            xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#             sp: 0x~~~~~~~~~~~~~~~~
+#             sp: 0x~~~~~~~~~~~~~~~~
+#            xzr: 0x0000000000000000
+#                                  ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+#             sp: 0x~~~~~~~~~~~~~~~~
+#             sp: 0x~~~~~~~~~~~~~~~~
+#            xzr: 0x0000000000000000
+#                                  ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+#             sp: 0x~~~~~~~~~~~~~~~~
+#             sp: 0x~~~~~~~~~~~~~~~~
+#            xzr: 0x0000000000000000
+#                                  ╙─ 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#             sp: 0x~~~~~~~~~~~~~~~~
 # NZCV: N:0 Z:1 C:1 V:0
 #             x3: 0x0000000000000000
 # NZCV: N:1 Z:0 C:0 V:0
diff --git a/test/test-trace-reference/log-state-colour b/test/test-trace-reference/log-state-colour
index 3234c7a..85acd79 100644
--- a/test/test-trace-reference/log-state-colour
+++ b/test/test-trace-reference/log-state-colour
@@ -672,6 +672,22 @@
 #             x6: 0x000000000000ffff
 #             w8:         0x00000001
 #            x10: 0x0000000000000000
+#             sp: 0x~~~~~~~~~~~~~~~~
+#            x10: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#            xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#             sp: 0x~~~~~~~~~~~~~~~~
+#             sp: 0x~~~~~~~~~~~~~~~~
+#            xzr: 0x0000000000000000
+#                                  ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+#             sp: 0x~~~~~~~~~~~~~~~~
+#             sp: 0x~~~~~~~~~~~~~~~~
+#            xzr: 0x0000000000000000
+#                                  ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+#             sp: 0x~~~~~~~~~~~~~~~~
+#             sp: 0x~~~~~~~~~~~~~~~~
+#            xzr: 0x0000000000000000
+#                                  ╙─ 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#             sp: 0x~~~~~~~~~~~~~~~~
 # NZCV: N:0 Z:1 C:1 V:0
 #             x3: 0x0000000000000000
 # NZCV: N:1 Z:0 C:0 V:0
diff --git a/test/test-trace-reference/log-write b/test/test-trace-reference/log-write
index 0491d50..f1096e6 100644
--- a/test/test-trace-reference/log-write
+++ b/test/test-trace-reference/log-write
@@ -44,6 +44,11 @@
 #        x3<7:0>:               0xff -> 0x~~~~~~~~~~~~~~~~
 #       x4<15:0>:             0x0001 -> 0x~~~~~~~~~~~~~~~~
 #       x5<15:0>:             0x8500 -> 0x~~~~~~~~~~~~~~~~
+#      x10<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+#      xzr<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+#      xzr<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+#      xzr<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+#      xzr<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
 #     v21<127:0>: 0x000000000000000000000000ffff8007
 #                    ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙─ 0x07 -> 0x~~~~~~~~~~~~~~~~
 #                    ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙─── 0x80 -> 0x~~~~~~~~~~~~~~~~
diff --git a/test/test-trace-reference/log-write-colour b/test/test-trace-reference/log-write-colour
index 7caf1a4..8a43844 100644
--- a/test/test-trace-reference/log-write-colour
+++ b/test/test-trace-reference/log-write-colour
@@ -44,6 +44,11 @@
 #        x3<7:0>:               0xff -> 0x~~~~~~~~~~~~~~~~
 #       x4<15:0>:             0x0001 -> 0x~~~~~~~~~~~~~~~~
 #       x5<15:0>:             0x8500 -> 0x~~~~~~~~~~~~~~~~
+#      x10<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+#      xzr<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+#      xzr<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+#      xzr<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+#      xzr<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
 #     v21<127:0>: 0x000000000000000000000000ffff8007
 #                    ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙─ 0x07 -> 0x~~~~~~~~~~~~~~~~
 #                    ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙─── 0x80 -> 0x~~~~~~~~~~~~~~~~
diff --git a/tools/clang_format.py b/tools/clang_format.py
index a0d000c..a9c53c9 100755
--- a/tools/clang_format.py
+++ b/tools/clang_format.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 
 # Copyright 2016, VIXL authors
 # All rights reserved.
@@ -33,17 +33,17 @@
 import subprocess
 import sys
 import tempfile
+import shutil
 
 from threaded_tests import Test, TestQueue
 import printer
 import util
 
-CLANG_FORMAT_VERSION_MAJOR = 4
-CLANG_FORMAT_VERSION_MINOR = 0
+CLANG_TOOL_SUPPORTED_VERSIONS = range(11, 16)
 
-DEFAULT_CLANG_FORMAT = \
-    'clang-format-{}.{}'.format(CLANG_FORMAT_VERSION_MAJOR,
-                                CLANG_FORMAT_VERSION_MINOR)
+DEFAULT_CLANG_FORMAT = 'clang-format'
+
+CLANG_TOOL_VERSION_MATCH = r"(clang-format|LLVM) version ([\d]+)\.[\d]+\.[\d]+.*$"
 
 is_output_redirected = not sys.stdout.isatty()
 
@@ -69,21 +69,37 @@
                       as it thinks useful.''')
   return parser.parse_args()
 
-
-def ClangFormatIsAvailable(clang_format):
-  if not util.IsCommandAvailable(clang_format):
+def is_supported(tool):
+  if not shutil.which(tool):
     return False
-  cmd = '%s -version' % clang_format
-  rc, version = util.getstatusoutput(cmd)
+
+  cmd = '%s -version' % tool
+
+  try:
+    rc, version = util.getstatusoutput(cmd)
+  except OSError:
+    return False
+
   if rc != 0:
       util.abort("Failed to execute %s: %s" % (cmd, version))
-  m = re.search("^clang-format version (\d)\.(\d)\.\d.*$",
-                version.decode(), re.M)
+  m = re.search(CLANG_TOOL_VERSION_MATCH, version, re.MULTILINE)
   if not m:
-      util.abort("Failed to get clang-format's version: %s" % version)
-  major, minor = m.groups()
-  return int(major) == CLANG_FORMAT_VERSION_MAJOR and \
-      int(minor) == CLANG_FORMAT_VERSION_MINOR
+      util.abort("Failed to get clang tool version: %s" % version)
+  _, major = m.groups()
+
+  if int(major) in CLANG_TOOL_SUPPORTED_VERSIONS:
+    return True
+
+  return False
+
+def detect_clang_tool(tool):
+  supported_tools = [tool] + [tool + '-' + str(ver) for ver in CLANG_TOOL_SUPPORTED_VERSIONS]
+  for tool in supported_tools:
+    if is_supported(tool):
+        return tool
+
+  return None
+
 
 def RunTest(test):
   filename = test.args['filename']
@@ -102,6 +118,7 @@
 
   cmd_diff = ['diff', '--unified', filename, temp_file_name]
   cmd_diff_string = '$ ' + ' '.join(cmd_diff)
+
   p_diff = subprocess.Popen(cmd_diff,
                             stdout = subprocess.PIPE, stderr = subprocess.STDOUT)
 
@@ -111,15 +128,15 @@
             stdin = p_diff.stdout,
             stdout = subprocess.PIPE, stderr = subprocess.STDOUT)
     out, unused = p_colordiff.communicate()
+    rc += p_colordiff.returncode
   else:
     out, unused = p_diff.communicate()
+    rc += p_diff.returncode
 
-  rc += p_diff.wait()
 
   if in_place:
       cmd_format = [clang_format, '-i', filename]
-      p_format = subprocess.Popen(cmd_format,
-                                  stdout=temp_file, stderr=subprocess.STDOUT)
+      subprocess.run(cmd_format, stdout=temp_file, stderr=subprocess.STDOUT)
 
   if rc != 0:
     with Test.n_tests_failed.get_lock(): Test.n_tests_failed.value += 1
@@ -143,7 +160,8 @@
     printer.Print('Incorrectly formatted file: ' + filename + '\n' + \
                   cmd_format_string + '\n' + \
                   cmd_diff_string + '\n' + \
-                  out, has_lock = True)
+                  out.decode(), has_lock = True)
+
   printer.__print_lock__.release()
 
   os.remove(temp_file_name)
@@ -151,12 +169,12 @@
 # Returns the total number of files incorrectly formatted.
 def ClangFormatFiles(files, clang_format, in_place = False, jobs = 1,
                      progress_prefix = ''):
-  if not ClangFormatIsAvailable(clang_format):
-    error_message = "`{}` version {}.{} not found. Please ensure it " \
-                    "is installed, in your PATH and the correct version." \
-                    .format(clang_format,
-                            CLANG_FORMAT_VERSION_MAJOR,
-                            CLANG_FORMAT_VERSION_MINOR)
+
+  clang_format = detect_clang_tool("clang-format")
+
+  if not clang_format:
+    error_message = "clang-format not found. Please ensure it " \
+                    "is installed, in your PATH and the correct version."
     print(printer.COLOUR_RED + error_message + printer.NO_COLOUR)
     return -1
 
@@ -176,7 +194,7 @@
 if __name__ == '__main__':
   # Parse the arguments.
   args = BuildOptions()
-  files = args.files or util.get_source_files()
+  files = args.files or util.get_source_files(exclude_dirs=['.*', '*/traces/*', '*/aarch32/*'])
 
   rc = ClangFormatFiles(files, clang_format = args.clang_format,
                         in_place = args.in_place, jobs = args.jobs)
diff --git a/tools/clang_tidy.py b/tools/clang_tidy.py
index 8607547..4297a2b 100755
--- a/tools/clang_tidy.py
+++ b/tools/clang_tidy.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 
 # Copyright 2019, VIXL authors
 # All rights reserved.
@@ -30,19 +30,15 @@
 import os
 import re
 import sys
+import subprocess
 
-from clang_format import CLANG_FORMAT_VERSION_MAJOR, CLANG_FORMAT_VERSION_MINOR
+from clang_format import detect_clang_tool
 from threaded_tests import Test, TestQueue
 import config
 import printer
 import util
 
-CLANG_TIDY_VERSION_MAJOR = CLANG_FORMAT_VERSION_MAJOR
-CLANG_TIDY_VERSION_MINOR = CLANG_FORMAT_VERSION_MINOR
-
-DEFAULT_CLANG_TIDY = \
-    'clang-tidy-{}.{}'.format(CLANG_TIDY_VERSION_MAJOR,
-                              CLANG_TIDY_VERSION_MINOR)
+DEFAULT_CLANG_TIDY = 'clang-tidy'
 
 def BuildOptions():
   parser = argparse.ArgumentParser(
@@ -60,20 +56,6 @@
                       help='Path to clang-tidy.')
   return parser.parse_args()
 
-def ClangTidyIsAvailable(clang_tidy):
-  if not util.IsCommandAvailable(clang_tidy):
-    return False
-  cmd = '%s -version' % clang_tidy
-  rc, version = util.getstatusoutput(cmd)
-  if rc != 0:
-    util.abort("Failed to execute %s: %s" % (cmd, version))
-  m = re.search("LLVM version (\d)\.(\d)\.\d.*$", version.decode(), re.M)
-  if not m:
-    util.abort("Failed to get clang-tidy's version: %s" % version)
-  major, minor = m.groups()
-  return int(major) == CLANG_TIDY_VERSION_MAJOR and \
-    int(minor) == CLANG_TIDY_VERSION_MINOR
-
 def FilterClangTidyLines(lines):
   out = []
   print_context = False
@@ -93,7 +75,7 @@
   return "\n".join(out)
 
 def FilterFiles(list_files):
-  return list(filter(lambda x: x.endswith('.cc'), list_files))
+  return [x for x in list_files if x.endswith('.cc')]
 
 def RunTest(test):
   cmd = " ".join(test.args['command'])
@@ -133,12 +115,12 @@
   printer.__print_lock__.release()
 
 def ClangTidyFiles(files, clang_tidy, jobs = 1, progress_prefix = ''):
-  if not ClangTidyIsAvailable(clang_tidy):
-    error_message = "`{}` version {}.{} not found. Please ensure it " \
-                    "is installed, in your PATH and the correct version." \
-                    .format(clang_tidy,
-                            CLANG_TIDY_VERSION_MAJOR,
-                            CLANG_TIDY_VERSION_MINOR)
+
+  clang_tidy = detect_clang_tool("clang-tidy")
+
+  if not clang_tidy:
+    error_message = "clang-tidy not found. Please ensure it " \
+                    "is installed, in your PATH and the correct version."
     print(printer.COLOUR_RED + error_message + printer.NO_COLOUR)
     return -1
 
diff --git a/tools/code_coverage.log b/tools/code_coverage.log
index f19570a..d787f6f 100644
--- a/tools/code_coverage.log
+++ b/tools/code_coverage.log
@@ -6,5 +6,30 @@
 1642688881 82.94% 97.51% 95.27%
 1646150629 82.94% 97.51% 95.36%
 1647535694 82.93% 97.52% 95.36%
+1650549095 82.93% 97.52% 95.33%
 1651138061 82.94% 97.52% 95.36%
 1653484786 82.79% 97.46% 95.51%
+1657272256 83.03% 97.50% 95.35%
+1657620989 82.93% 97.52% 95.33%
+1660224011 82.79% 97.51% 95.50%
+1663161852 82.79% 97.51% 95.50%
+1666104118 82.79% 97.51% 95.50%
+1668785529 82.75% 97.44% 95.40%
+1669202345 82.79% 97.51% 95.51%
+1673432155 82.79% 97.51% 95.51%
+1677171445 82.78% 97.56% 94.81%
+1681814646 82.90% 97.57% 94.87%
+1686666000 82.90% 97.57% 94.87%
+1693487542 82.91% 97.57% 94.87%
+1694008240 82.72% 97.50% 94.95%
+1697036303 82.87% 97.56% 94.76%
+1698228274 82.93% 97.68% 94.90%
+1698330215 82.92% 97.57% 94.88%
+1702052331 82.89% 97.59% 94.77%
+1706691191 82.87% 97.59% 94.74%
+1707395574 82.89% 97.59% 94.77%
+1715261843 82.84% 97.60% 94.69%
+1718190785 82.85% 97.60% 94.70%
+1722595938 82.94% 97.78% 94.72%
+1728570468 82.94% 97.78% 94.71%
+1736874659 82.94% 97.63% 94.78%
diff --git a/tools/code_coverage.sh b/tools/code_coverage.sh
index 5525bb0..28ce407 100755
--- a/tools/code_coverage.sh
+++ b/tools/code_coverage.sh
@@ -38,7 +38,7 @@
 export CXX=clang++
 export LLVM_PROFILE_FILE=$(mktemp)
 PROFDATA=$(mktemp)
-BUILDDIR="obj/target_a64/mode_debug/symbols_on/compiler_clang++/std_c++14/simulator_aarch64/negative_testing_off/code_buffer_allocator_mmap"
+BUILDDIR="obj/target_a64/mode_debug/symbols_on/compiler_clang++/std_c++17/simulator_aarch64/negative_testing_off/code_buffer_allocator_mmap"
 RUNNER="$BUILDDIR/test/test-runner"
 
 # Build with code coverage instrumentation enabled.
diff --git a/tools/config.py b/tools/config.py
index f178d4e..627e16a 100644
--- a/tools/config.py
+++ b/tools/config.py
@@ -44,7 +44,7 @@
 # The full list of available build modes.
 build_options_modes = ['debug', 'release']
 # The list of C++ standard to test for. The first value is used as the default.
-tested_cpp_standards = ['c++14']
+tested_cpp_standards = ['c++17']
 # The list of compilers tested.
 tested_compilers = ['clang++', 'g++']
 # The list of target arch/isa options to test with. Do not list 'all' as an
diff --git a/tools/generate_simulator_traces.py b/tools/generate_simulator_traces.py
index 3e25b0d..66c49c3 100755
--- a/tools/generate_simulator_traces.py
+++ b/tools/generate_simulator_traces.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 
 # Copyright 2015, VIXL authors
 # All rights reserved.
@@ -155,7 +155,7 @@
     master_trace_f.write('\n\n')
 
     # Find the AArch64 simulator tests.
-    tests = sorted(filter(lambda t: 'AARCH64_SIM_' in t, test_list.split()),
+    tests = sorted([t for t in test_list.split() if 'AARCH64_SIM_' in t],
                    key=lambda t: GetAArch64Filename(t))
 
     for test in tests:
@@ -164,7 +164,7 @@
       trace_filename = GetAArch64Filename(test_name)
       if not args.filter or re.compile(args.filter).search(test):
         # Run each test.
-        print 'Generating trace for ' + test;
+        print('Generating trace for ' + test);
         cmd = ' '.join([args.runner, '--generate_test_trace', test])
         status, output = util.getstatusoutput(cmd)
         if status != 0: util.abort('Failed to run ' + cmd + '.')
@@ -207,7 +207,7 @@
 
     for test in tests:
       # Run each test.
-      print 'Generating trace for ' + test;
+      print('Generating trace for ' + test);
       # Strip out 'AARCH32_' to get the name of the test.
       test_name = test[len('AARCH32_'):]
 
@@ -234,4 +234,4 @@
           '\n' + "#endif  // VIXL_" + test_name.upper() + "_H_" + '\n')
       trace_f.close()
 
-  print 'Trace generation COMPLETE'
+  print('Trace generation COMPLETE')
diff --git a/tools/generate_test_trace_a64_reference.py b/tools/generate_test_trace_a64_reference.py
index cef06fb..d42594d 100755
--- a/tools/generate_test_trace_a64_reference.py
+++ b/tools/generate_test_trace_a64_reference.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 
 # Copyright 2016, VIXL authors
 # All rights reserved.
@@ -55,7 +55,7 @@
   # Find the trace tests.
   status, output = util.getstatusoutput(args.runner + ' --list')
   if status != 0: util.abort('Failed to list all tests')
-  tests = filter(lambda t: 'TRACE_' in t, output.split())
+  tests = [t for t in output.split() if 'TRACE_' in t]
   tests.sort()
 
   if not os.path.exists(args.outdir):
@@ -63,7 +63,7 @@
 
   for test in tests:
     # Run each test.
-    print 'Generating trace for ' + test;
+    print('Generating trace for ' + test);
     cmd = ' '.join([args.runner, '--generate_test_trace', test])
     status, output = util.getstatusoutput(cmd)
     if status != 0: util.abort('Failed to run ' + cmd + '.')
diff --git a/tools/git.py b/tools/git.py
deleted file mode 100644
index a133a48..0000000
--- a/tools/git.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# Copyright 2014, VIXL authors
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-#   * Redistributions of source code must retain the above copyright notice,
-#     this list of conditions and the following disclaimer.
-#   * Redistributions in binary form must reproduce the above copyright notice,
-#     this list of conditions and the following disclaimer in the documentation
-#     and/or other materials provided with the distribution.
-#   * Neither the name of ARM Limited nor the names of its contributors may be
-#     used to endorse or promote products derived from this software without
-#     specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY DIRECT, INDIRECT,
-# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
-# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
-# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import re
-import util
-import os.path
-from pipes import quote
-
-def is_git_repository_root(path):
-  command = 'git -C ' + quote(path) + ' rev-parse --show-toplevel'
-  status, toplevel = util.getstatusoutput(command)
-  if status != 0: return False
-  return os.path.samefile(toplevel, path)
-
-def get_tracked_files():
-  command = 'git ls-tree HEAD -r --full-tree --name-only'
-
-  status, tracked = util.getstatusoutput(command)
-  if status != 0: util.abort('Failed to list tracked files.')
-
-  return tracked
-
-
-# Get untracked files in src/, test/, and tools/.
-def get_untracked_files():
-  status, output = util.getstatusoutput('git status -s')
-  if status != 0: util.abort('Failed to get git status.')
-
-  untracked_regexp = re.compile('\?\?.*(src/|test/|tools/).*(.cc$|.h$)')
-  files_in_watched_folder = lambda n: untracked_regexp.search(n) != None
-  untracked_files = filter(files_in_watched_folder, output.split('\n'))
-
-  return untracked_files
diff --git a/tools/known_test_failures.py b/tools/known_test_failures.py
index 262d6e6..791568b 100644
--- a/tools/known_test_failures.py
+++ b/tools/known_test_failures.py
@@ -76,7 +76,7 @@
     'AARCH64_SIM_frsqrts_D'
   }
 
-  filtered_list = filter(lambda x: x not in known_valgrind_test_failures, tests)
+  filtered_list = [x for x in tests if x not in known_valgrind_test_failures]
   return (filtered_list, len(tests) - len(filtered_list), reason)
 
 def FilterKnownTestFailures(tests, **env):
diff --git a/tools/lint.py b/tools/lint.py
index d4c9f65..f67799b 100755
--- a/tools/lint.py
+++ b/tools/lint.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 
 # Copyright 2015, VIXL authors
 # All rights reserved.
@@ -38,7 +38,6 @@
 import sys
 
 import config
-import git
 import printer
 import util
 
@@ -92,7 +91,7 @@
   printer.Print(outerr)
 
   # Find the number of errors in this file.
-  res = re.search('Total errors found: (\d+)', outerr)
+  res = re.search(r'Total errors found: (\d+)', outerr)
   if res:
     n_errors_str = res.string[res.start(1):res.end(1)]
     n_errors = int(n_errors_str)
@@ -143,12 +142,12 @@
     return -1
 
   # Filter out directories.
-  files = filter(os.path.isfile, files)
+  files = list(filter(os.path.isfile, files))
 
   # Filter out files for which we have a cached correct result.
   if cached_results is not None and len(cached_results) != 0:
     n_input_files = len(files)
-    files = filter(lambda f: ShouldLint(f, cached_results), files)
+    files = [f for f in files if ShouldLint(f, cached_results)]
     n_skipped_files = n_input_files - len(files)
     if n_skipped_files != 0:
       printer.Print(
@@ -171,7 +170,7 @@
     pool.terminate()
     sys.exit(1)
 
-  n_errors = sum(map(lambda (filename, errors): errors, results))
+  n_errors = sum([filename_errors[1] for filename_errors in results])
 
   if cached_results is not None:
     for filename, errors in results:
@@ -193,7 +192,7 @@
     return retcode == 0
 
 
-CPP_EXT_REGEXP = re.compile('\.(cc|h)$')
+CPP_EXT_REGEXP = re.compile(r'\.(cc|h)$')
 def IsLinterInput(filename):
   # lint all C++ files.
   return CPP_EXT_REGEXP.search(filename) != None
@@ -223,7 +222,7 @@
     return \
       fnmatch.fnmatch(f, os.path.join(relative_aarch32_traces_path, '*.h')) or \
       fnmatch.fnmatch(f, os.path.join(relative_aarch64_traces_path, '*.h'))
-  return filter(lambda f: not IsTraceHeader(f), files)
+  return [f for f in files if not IsTraceHeader(f)]
 
 
 def RunLinter(files, jobs=1, progress_prefix='', cached=True):
diff --git a/tools/printer.py b/tools/printer.py
index 609da7f..57e22e3 100644
--- a/tools/printer.py
+++ b/tools/printer.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 
 # Copyright 2014, VIXL authors
 # All rights reserved.
diff --git a/tools/test.py b/tools/test.py
index 75c700d..9eb859f 100755
--- a/tools/test.py
+++ b/tools/test.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 
 # Copyright 2015, VIXL authors
 # All rights reserved.
@@ -28,7 +28,6 @@
 
 import argparse
 import fcntl
-import git
 import itertools
 import multiprocessing
 import os
@@ -220,7 +219,7 @@
   t_current = t_start
   t_last_indication = t_start
   t_current = t_start
-  process_output = ''
+  process_output = b''
 
   # Keep looping as long as the process is running.
   while p.poll() is None:
@@ -238,9 +237,9 @@
       try:
         line = os.read(p.stdout.fileno(), 1024)
       except OSError:
-        line = ''
+        line = b''
         break
-      if line == '': break
+      if line == b'': break
       process_output += line
 
   # The process has exited. Don't forget to retrieve the rest of its output.
@@ -253,24 +252,23 @@
     printer.Print(printer.COLOUR_GREEN + printable_command + printer.NO_COLOUR)
   else:
     printer.Print(printer.COLOUR_RED + printable_command + printer.NO_COLOUR)
-    printer.Print(process_output)
+    printer.Print(process_output.decode())
   return rc
 
 
 def RunLinter(jobs):
-  return lint.RunLinter(map(lambda x: join(dir_root, x),
-                        util.get_source_files()),
+  return lint.RunLinter([join(dir_root, x) for x in util.get_source_files()],
                         jobs = args.jobs, progress_prefix = 'cpp lint: ')
 
 
 def RunClangFormat(clang_path, jobs):
-  return clang_format.ClangFormatFiles(util.get_source_files(),
+  return clang_format.ClangFormatFiles(util.get_source_files(exclude_dirs=['.*', '*/traces/*', '*/aarch32/*']),
                                        clang_path,
                                        jobs = jobs,
                                        progress_prefix = 'clang-format: ')
 
 def RunClangTidy(clang_path, jobs):
-  return clang_tidy.ClangTidyFiles(util.get_source_files(),
+  return clang_tidy.ClangTidyFiles(util.get_source_files(exclude_dirs=['.*', '*/traces/*', '*/aarch32/*']),
                                    clang_path,
                                    jobs = jobs,
                                    progress_prefix = 'clang-tidy: ')
diff --git a/tools/test_generator/data_types.py b/tools/test_generator/data_types.py
index f51f2bb..40d2bf3 100644
--- a/tools/test_generator/data_types.py
+++ b/tools/test_generator/data_types.py
@@ -456,7 +456,7 @@
 
   def Prologue(self):
     # When clearing or setting the `Q` bit, we need to make sure the `NZCV`
-    # flags are not overriden. Therefore we use two scratch registers that we
+    # flags are not overridden. Therefore we use two scratch registers that we
     # push on the stack first to allow the instruction to use them as operands.
     code = """{{
           UseScratchRegisterScope temp_registers(&masm);
diff --git a/tools/test_generator/generator.py b/tools/test_generator/generator.py
index 37afee7..baa970c 100644
--- a/tools/test_generator/generator.py
+++ b/tools/test_generator/generator.py
@@ -83,7 +83,7 @@
 
 class InputList(object):
   """
-  Convevience class representing a list of input objects.
+  Convenience class representing a list of input objects.
 
   This class is an iterator over input objects.
 
@@ -113,7 +113,7 @@
   Attributes:
     name            Name of the test case, it is used to name the array to
                     produce.
-    seed            Seed value to use for reproducable random generation.
+    seed            Seed value to use for reproducible random generation.
     operand_names   List of operand names this test case covers.
     input_names     List of input names this test case covers.
     operand_filter  Python expression as a string to filter out operands.
@@ -320,7 +320,7 @@
 
     # A simulator test cannot easily make use of the PC and SP registers.
     if self.test_type == "simulator":
-      # We need to explicitely create our own deep copy the operands before we
+      # We need to explicitly create our own deep copy the operands before we
       # can modify them.
       self.operands = deepcopy(operands)
       self.operands.ExcludeVariants("Register", ["r13", "r15"])
diff --git a/tools/test_generator/parser.py b/tools/test_generator/parser.py
index af042e2..65b76df 100644
--- a/tools/test_generator/parser.py
+++ b/tools/test_generator/parser.py
@@ -165,7 +165,7 @@
   Parse the instruction description into a
   (`generator.OperandList`, `generator.InputList`) tuple.
 
-  Example for an instruction that takes a condidition code, two registers and an
+  Example for an instruction that takes a condition code, two registers and an
   immediate as operand. It will also need inputs for the registers, as well as
   NZCV flags.
   ~~~
@@ -400,12 +400,12 @@
   """
   # Strip the ".json" extension
   stripped_basename = os.path.splitext(os.path.basename(filename))[0]
-  # The ISA is the last element in the filename, seperated with "-".
+  # The ISA is the last element in the filename, separated with "-".
   if stripped_basename.endswith(('-a32', '-t32')):
     isa = [stripped_basename[-3:]]
     test_name = stripped_basename[:-4]
   else:
-    # If the ISA is ommitted, support both.
+    # If the ISA is omitted, support both.
     isa = ["a32", "t32"]
     test_name = stripped_basename
 
diff --git a/tools/test_runner.py b/tools/test_runner.py
index 3a3f459..d4459ef 100644
--- a/tools/test_runner.py
+++ b/tools/test_runner.py
@@ -41,7 +41,7 @@
 
   tests = output.split()
   for f in filters:
-    tests = filter(re.compile(f).search, tests)
+    tests = list(filter(re.compile(f).search, tests))
 
   return tests
 
@@ -51,6 +51,7 @@
                        stdout=subprocess.PIPE,
                        stderr=subprocess.STDOUT)
   p_out, p_err = p.communicate()
+  p_out = p_out.decode()
   rc = p.poll()
 
   if rc == 0:
diff --git a/tools/thread_pool.py b/tools/thread_pool.py
index 60abafa..c8fd10e 100644
--- a/tools/thread_pool.py
+++ b/tools/thread_pool.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 
 # Copyright 2019, VIXL authors
 # All rights reserved.
diff --git a/tools/threaded_tests.py b/tools/threaded_tests.py
index 0b83db1..0383c4f 100644
--- a/tools/threaded_tests.py
+++ b/tools/threaded_tests.py
@@ -42,79 +42,79 @@
   n_tests_passed = multiprocessing.Value('i', 0)
   n_tests_failed = multiprocessing.Value('i', 0)
   n_tests_skipped = multiprocessing.Value('i', 0)
-  manager = multiprocessing.Manager()
 
   def __init__(self, name, shared, **kwargs):
-      self.name = name
-      self.shared = shared
-      self.args = kwargs
+    self.name = name
+    self.shared = shared
+    self.args = kwargs
 
 class TestQueue(object):
   def __init__(self, prefix = ''):
     self.progress_prefix = prefix
     self.queue = []
-    self.tests_skipped = Test.manager.dict()
+    self.tests_skipped = None
     self.n_known_failures = 0
     self.known_failures = collections.Counter()
 
   def AddKnownFailures(self, reason, n_tests):
-      self.n_known_failures += n_tests
-      self.known_failures[reason] += n_tests
+    self.n_known_failures += n_tests
+    self.known_failures[reason] += n_tests
 
   def AddTest(self, name, **kwargs):
     self.queue.append(Test(name, self, **kwargs))
 
   # Run the specified tests.
   def Run(self, jobs, verbose, run_function):
-    def InitGlobals():
-      # Initialisation.
-      self.start_time = time.time()
-      self.n_tests = len(self.queue)
-      if self.n_tests == 0:
-        printer.Print('No tests to run.')
-        return False
-      Test.n_tests_passed.value = 0
-      Test.n_tests_failed.value = 0
-      Test.n_tests_skipped.value = 0
-      self.tests_skipped.clear()
-      return True
+    with multiprocessing.Manager() as manager:
+      def InitGlobals():
+        # Initialisation.
+        self.start_time = time.time()
+        self.n_tests = len(self.queue)
+        if self.n_tests == 0:
+          printer.Print('No tests to run.')
+          return False
+        Test.n_tests_passed.value = 0
+        Test.n_tests_failed.value = 0
+        Test.n_tests_skipped.value = 0
+        self.tests_skipped = manager.dict()
+        return True
 
-    thread_pool.Multithread(run_function, self.queue, jobs, InitGlobals)
+      thread_pool.Multithread(run_function, self.queue, jobs, InitGlobals)
 
-    printer.UpdateProgress(self.start_time,
-                           Test.n_tests_passed.value,
-                           Test.n_tests_failed.value,
-                           self.n_tests,
-                           Test.n_tests_skipped.value,
-                           self.n_known_failures,
-                           '== Done ==',
-                           prevent_next_overwrite = True,
-                           prefix = self.progress_prefix)
-    n_tests_features = 0
-    features = set()
-    for reason, n_tests in self.tests_skipped.items():
-      m = re.match(REGEXP_MISSING_FEATURES, reason)
-      if m:
-        if verbose:
-          printer.Print("%d tests skipped because the following features are not "
-                        "available '%s'" % (n_tests, m.group(1)))
+      printer.UpdateProgress(self.start_time,
+                             Test.n_tests_passed.value,
+                             Test.n_tests_failed.value,
+                             self.n_tests,
+                             Test.n_tests_skipped.value,
+                             self.n_known_failures,
+                             '== Done ==',
+                             prevent_next_overwrite = True,
+                             prefix = self.progress_prefix)
+      n_tests_features = 0
+      features = set()
+      for reason, n_tests in self.tests_skipped.items():
+        m = re.match(REGEXP_MISSING_FEATURES, reason)
+        if m:
+          if verbose:
+            printer.Print("%d tests skipped because the following features are "
+                          "not available '%s'" % (n_tests, m.group(1)))
+          else:
+            n_tests_features += n_tests
+            features.update(m.group(1).split(', '))
         else:
-          n_tests_features += n_tests
-          features.update(m.group(1).split(', '))
-      else:
-        printer.Print("%d tests skipped because '%s'" % (n_tests, reason))
+          printer.Print("%d tests skipped because '%s'" % (n_tests, reason))
 
-    n_tests_other = 0
-    if n_tests_features > 0 :
-      printer.Print("%d tests skipped because the CPU does not support "
-                    "the following features: '%s'" %
-                    (n_tests_features, ", ".join(features)))
+      n_tests_other = 0
+      if n_tests_features > 0 :
+        printer.Print("%d tests skipped because the CPU does not support "
+                      "the following features: '%s'" %
+                      (n_tests_features, ", ".join(features)))
 
-    for reason, n_tests in self.known_failures.items():
-        printer.Print("%d tests skipped because '%s'" % (n_tests, reason))
+      for reason, n_tests in self.known_failures.items():
+          printer.Print("%d tests skipped because '%s'" % (n_tests, reason))
 
-    # Empty the queue now that the tests have been run.
-    self.queue = []
-    # `0` indicates success
-    return Test.n_tests_failed.value
+      # Empty the queue now that the tests have been run.
+      self.queue = []
+      # `0` indicates success
+      return Test.n_tests_failed.value
 
diff --git a/tools/util.py b/tools/util.py
index 9152584..240c697 100644
--- a/tools/util.py
+++ b/tools/util.py
@@ -37,8 +37,8 @@
 
 
 def ListCCFilesWithoutExt(path):
-  return map(lambda x : os.path.splitext(os.path.basename(x))[0],
-             glob.glob(os.path.join(path, '*.cc')))
+  src_files = glob.glob(os.path.join(path, '*.cc'))
+  return [os.path.splitext(os.path.basename(x))[0] for x in src_files]
 
 
 def abort(message):
@@ -46,14 +46,8 @@
   sys.exit(1)
 
 
-# Emulate python3 subprocess.getstatusoutput.
 def getstatusoutput(command):
-  try:
-    args = shlex.split(command)
-    output = subprocess.check_output(args, stderr=subprocess.STDOUT)
-    return 0, output.rstrip('\n')
-  except subprocess.CalledProcessError as e:
-    return e.returncode, e.output.rstrip('\n')
+  return subprocess.getstatusoutput(command)
 
 
 def IsCommandAvailable(command):
@@ -77,10 +71,10 @@
   return os.path.relpath(os.path.realpath(path), start)
 
 # Query the compiler about its preprocessor directives and return all of them as
-# a dictionnary.
+# a dictionary.
 def GetCompilerDirectives(env):
   args = [env['compiler']]
-  # Pass the CXXFLAGS varables to the compile, in case we've used "-m32" to
+  # Pass the CXXFLAGS variables to the compile, in case we've used "-m32" to
   # compile for i386.
   if env['CXXFLAGS']:
     args.append(str(env['CXXFLAGS']))
@@ -95,7 +89,7 @@
     match.group(1): match.group(2)
     for match in [
       # Capture macro name.
-      re.search('^#define (\S+?) (.+)$', macro)
+      re.search(r'^#define (\S+?) (.+)$', macro)
       for macro in out.split('\n')
     ]
     # Filter out non-matches.
@@ -116,7 +110,7 @@
   elif "__aarch64__" in directives:
     return "aarch64"
   else:
-    raise Exception("Unsupported archtecture")
+    raise Exception("Unsupported architecture")
 
 # Class representing the compiler toolchain and version.
 class CompilerInformation(object):
@@ -189,7 +183,7 @@
   # "{compiler}-{major}.{minor}". The comparison is done using the provided
   # `operator` argument.
   def CompareVersion(self, operator, description):
-    match = re.search('^(\S+)-(.*?)$', description)
+    match = re.search(r'^(\S+)-(.*?)$', description)
     if not match:
       raise Exception("A version number is required when comparing compilers")
     compiler, version = match.group(1), match.group(2)
diff --git a/tools/verify_assembler_traces.py b/tools/verify_assembler_traces.py
index d1d29db..d78d17c 100755
--- a/tools/verify_assembler_traces.py
+++ b/tools/verify_assembler_traces.py
@@ -31,7 +31,7 @@
 
 This script will find all files in `test/aarch32/traces/` with names starting
 will `assembler`, and check them against `llvm-mc`. It checks our assembler is
-correct by looking up what instruction we meant to asssemble, assemble it with
+correct by looking up what instruction we meant to assemble, assemble it with
 `llvm` and check the result is bit identical to what our assembler generated.
 
 You may run the script with no arguments from VIXL's top-level directory as long
@@ -136,7 +136,7 @@
   """
   Take an string representing an instruction and convert it to assembly syntax
   for LLVM. VIXL's test generation framework will print instruction
-  representations as a space seperated list. The first element is the mnemonic
+  representations as a space separated list. The first element is the mnemonic
   and the following elements are operands.
   """
 
@@ -304,12 +304,12 @@
     ]
 
   # Our test generator framework uses mnemonics starting with a capital letters.
-  # We need everythin to be lower case for LLVM.
+  # We need everything to be lower case for LLVM.
   vixl_instruction = vixl_instruction.lower()
 
   llvm_instruction = []
 
-  # VIXL may have generated more than one instruction seperated by ';'
+  # VIXL may have generated more than one instruction separated by ';'
   # (an IT instruction for example).
   for instruction in vixl_instruction.split(';'):
     # Strip out extra white spaces.
@@ -401,7 +401,7 @@
   # due to IT instructions preceding every instruction under test. VIXL's
   # assembly reference files will contain a single array of 4 bytes encoding
   # both the IT and the following instruction. While LLVM will have decoded them
-  # into two seperate 2 bytes arrays.
+  # into two separate 2 bytes arrays.
   if len(llvm_encodings) == 2 * len(vixl_encodings):
     llvm_encodings = [
         llvm_encodings[i * 2] + llvm_encodings[(i * 2) + 1]