test/aarch64/test-utils-aarch64.cc - platform/external/vixl - Git at Google

 // Copyright 2014, VIXL authors
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are met:
 //
 //   * Redistributions of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //   * Redistributions in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //   * Neither the name of ARM Limited nor the names of its contributors may be
 //     used to endorse or promote products derived from this software without
 //     specific prior written permission.
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 #include "test-utils-aarch64.h"

 #include <cmath>
 #include <queue>

 #include "test-runner.h"

 #include "../test/aarch64/test-simulator-inputs-aarch64.h"
 #include "aarch64/cpu-aarch64.h"
 #include "aarch64/disasm-aarch64.h"
 #include "aarch64/macro-assembler-aarch64.h"
 #include "aarch64/simulator-aarch64.h"

 #define __ masm->

 namespace vixl {
 namespace aarch64 {


 // This value is a signalling NaN as FP64, and also as FP32 or FP16 (taking the
 // least-significant bits).
 const double kFP64SignallingNaN = RawbitsToDouble(UINT64_C(0x7ff000007f807c01));
 const float kFP32SignallingNaN = RawbitsToFloat(0x7f807c01);
 const Float16 kFP16SignallingNaN = RawbitsToFloat16(0x7c01);

 // A similar value, but as a quiet NaN.
 const double kFP64QuietNaN = RawbitsToDouble(UINT64_C(0x7ff800007fc07e01));
 const float kFP32QuietNaN = RawbitsToFloat(0x7fc07e01);
 const Float16 kFP16QuietNaN = RawbitsToFloat16(0x7e01);


 bool Equal32(uint32_t expected, const RegisterDump*, uint32_t result) {
   if (result != expected) {
     printf("Expected 0x%08" PRIx32 "\t Found 0x%08" PRIx32 "\n",
            expected,
            result);
   }

   return expected == result;
 }


 bool Equal64(uint64_t reference,
              const RegisterDump*,
              uint64_t result,
              ExpectedResult option) {
   switch (option) {
     case kExpectEqual:
       if (result != reference) {
         printf("Expected 0x%016" PRIx64 "\t Found 0x%016" PRIx64 "\n",
                reference,
                result);
       }
       break;
     case kExpectNotEqual:
       if (result == reference) {
         printf("Expected a result not equal to 0x%016" PRIx64 "\n", reference);
       }
       break;
   }

   return reference == result;
 }


 bool Equal64(std::vector<uint64_t> reference_list,
              const RegisterDump*,
              uint64_t result,
              ExpectedResult option) {
   switch (option) {
     case kExpectEqual:
       for (uint64_t reference : reference_list) {
         if (result == reference) return true;
       }
       printf("Expected a result in (\n");
       break;
     case kExpectNotEqual:
       for (uint64_t reference : reference_list) {
         if (result == reference) {
           printf("Expected a result not in (\n");
           break;
         }
       }
       return true;
   }
   for (uint64_t reference : reference_list) {
     printf("  0x%016" PRIx64 ",\n", reference);
   }
   printf(")\t Found 0x%016" PRIx64 "\n", result);
   return false;
 }


 bool Equal128(QRegisterValue expected,
               const RegisterDump*,
               QRegisterValue result) {
   if (!expected.Equals(result)) {
     printf("Expected 0x%016" PRIx64 "%016" PRIx64
            "\t "
            "Found 0x%016" PRIx64 "%016" PRIx64 "\n",
            expected.GetLane<uint64_t>(1),
            expected.GetLane<uint64_t>(0),
            result.GetLane<uint64_t>(1),
            result.GetLane<uint64_t>(0));
   }

   return expected.Equals(result);
 }


 bool EqualFP16(Float16 expected, const RegisterDump*, Float16 result) {
   uint16_t e_rawbits = Float16ToRawbits(expected);
   uint16_t r_rawbits = Float16ToRawbits(result);
   if (e_rawbits == r_rawbits) {
     return true;
   } else {
     if (IsNaN(expected) || IsZero(expected)) {
       printf("Expected 0x%04" PRIx16 "\t Found 0x%04" PRIx16 "\n",
              e_rawbits,
              r_rawbits);
     } else {
       printf("Expected %.6f (16 bit): (0x%04" PRIx16
              ")\t "
              "Found %.6f (0x%04" PRIx16 ")\n",
              FPToFloat(expected, kIgnoreDefaultNaN),
              e_rawbits,
              FPToFloat(result, kIgnoreDefaultNaN),
              r_rawbits);
     }
     return false;
   }
 }


 bool EqualFP32(float expected, const RegisterDump*, float result) {
   if (FloatToRawbits(expected) == FloatToRawbits(result)) {
     return true;
   } else {
     if (IsNaN(expected) || (expected == 0.0)) {
       printf("Expected 0x%08" PRIx32 "\t Found 0x%08" PRIx32 "\n",
              FloatToRawbits(expected),
              FloatToRawbits(result));
     } else {
       printf("Expected %.9f (0x%08" PRIx32
              ")\t "
              "Found %.9f (0x%08" PRIx32 ")\n",
              expected,
              FloatToRawbits(expected),
              result,
              FloatToRawbits(result));
     }
     return false;
   }
 }


 bool EqualFP64(double expected, const RegisterDump*, double result) {
   if (DoubleToRawbits(expected) == DoubleToRawbits(result)) {
     return true;
   }

   if (IsNaN(expected) || (expected == 0.0)) {
     printf("Expected 0x%016" PRIx64 "\t Found 0x%016" PRIx64 "\n",
            DoubleToRawbits(expected),
            DoubleToRawbits(result));
   } else {
     printf("Expected %.17f (0x%016" PRIx64
            ")\t "
            "Found %.17f (0x%016" PRIx64 ")\n",
            expected,
            DoubleToRawbits(expected),
            result,
            DoubleToRawbits(result));
   }
   return false;
 }


 bool Equal32(uint32_t expected, const RegisterDump* core, const Register& reg) {
   VIXL_ASSERT(reg.Is32Bits());
   // Retrieve the corresponding X register so we can check that the upper part
   // was properly cleared.
   int64_t result_x = core->xreg(reg.GetCode());
   if ((result_x & 0xffffffff00000000) != 0) {
     printf("Expected 0x%08" PRIx32 "\t Found 0x%016" PRIx64 "\n",
            expected,
            result_x);
     return false;
   }
   uint32_t result_w = core->wreg(reg.GetCode());
   return Equal32(expected, core, result_w);
 }


 bool Equal64(uint64_t reference,
              const RegisterDump* core,
              const Register& reg,
              ExpectedResult option) {
   VIXL_ASSERT(reg.Is64Bits());
   uint64_t result = core->xreg(reg.GetCode());
   return Equal64(reference, core, result, option);
 }


 bool Equal64(std::vector<uint64_t> reference_list,
              const RegisterDump* core,
              const Register& reg,
              ExpectedResult option) {
   VIXL_ASSERT(reg.Is64Bits());
   uint64_t result = core->xreg(reg.GetCode());
   return Equal64(reference_list, core, result, option);
 }


 bool NotEqual64(uint64_t reference,
                 const RegisterDump* core,
                 const Register& reg) {
   VIXL_ASSERT(reg.Is64Bits());
   uint64_t result = core->xreg(reg.GetCode());
   return NotEqual64(reference, core, result);
 }


 bool Equal128(uint64_t expected_h,
               uint64_t expected_l,
               const RegisterDump* core,
               const VRegister& vreg) {
   VIXL_ASSERT(vreg.Is128Bits());
   QRegisterValue expected;
   expected.SetLane(0, expected_l);
   expected.SetLane(1, expected_h);
   QRegisterValue result = core->qreg(vreg.GetCode());
   return Equal128(expected, core, result);
 }


 bool EqualFP16(Float16 expected,
                const RegisterDump* core,
                const VRegister& fpreg) {
   VIXL_ASSERT(fpreg.Is16Bits());
   // Retrieve the corresponding D register so we can check that the upper part
   // was properly cleared.
   uint64_t result_64 = core->dreg_bits(fpreg.GetCode());
   if ((result_64 & 0xfffffffffff0000) != 0) {
     printf("Expected 0x%04" PRIx16 " (%f)\t Found 0x%016" PRIx64 "\n",
            Float16ToRawbits(expected),
            FPToFloat(expected, kIgnoreDefaultNaN),
            result_64);
     return false;
   }
   return EqualFP16(expected, core, core->hreg(fpreg.GetCode()));
 }


 bool EqualFP32(float expected,
                const RegisterDump* core,
                const VRegister& fpreg) {
   VIXL_ASSERT(fpreg.Is32Bits());
   // Retrieve the corresponding D register so we can check that the upper part
   // was properly cleared.
   uint64_t result_64 = core->dreg_bits(fpreg.GetCode());
   if ((result_64 & 0xffffffff00000000) != 0) {
     printf("Expected 0x%08" PRIx32 " (%f)\t Found 0x%016" PRIx64 "\n",
            FloatToRawbits(expected),
            expected,
            result_64);
     return false;
   }

   return EqualFP32(expected, core, core->sreg(fpreg.GetCode()));
 }


 bool EqualFP64(double expected,
                const RegisterDump* core,
                const VRegister& fpreg) {
   VIXL_ASSERT(fpreg.Is64Bits());
   return EqualFP64(expected, core, core->dreg(fpreg.GetCode()));
 }


 bool Equal64(const Register& reg0,
              const RegisterDump* core,
              const Register& reg1,
              ExpectedResult option) {
   VIXL_ASSERT(reg0.Is64Bits() && reg1.Is64Bits());
   int64_t reference = core->xreg(reg0.GetCode());
   int64_t result = core->xreg(reg1.GetCode());
   return Equal64(reference, core, result, option);
 }


 bool NotEqual64(const Register& reg0,
                 const RegisterDump* core,
                 const Register& reg1) {
   VIXL_ASSERT(reg0.Is64Bits() && reg1.Is64Bits());
   int64_t expected = core->xreg(reg0.GetCode());
   int64_t result = core->xreg(reg1.GetCode());
   return NotEqual64(expected, core, result);
 }


 bool Equal64(uint64_t expected,
              const RegisterDump* core,
              const VRegister& vreg) {
   VIXL_ASSERT(vreg.Is64Bits());
   uint64_t result = core->dreg_bits(vreg.GetCode());
   return Equal64(expected, core, result);
 }


 static char FlagN(uint32_t flags) { return (flags & NFlag) ? 'N' : 'n'; }


 static char FlagZ(uint32_t flags) { return (flags & ZFlag) ? 'Z' : 'z'; }


 static char FlagC(uint32_t flags) { return (flags & CFlag) ? 'C' : 'c'; }


 static char FlagV(uint32_t flags) { return (flags & VFlag) ? 'V' : 'v'; }


 bool EqualNzcv(uint32_t expected, uint32_t result) {
   VIXL_ASSERT((expected & ~NZCVFlag) == 0);
   VIXL_ASSERT((result & ~NZCVFlag) == 0);
   if (result != expected) {
     printf("Expected: %c%c%c%c\t Found: %c%c%c%c\n",
            FlagN(expected),
            FlagZ(expected),
            FlagC(expected),
            FlagV(expected),
            FlagN(result),
            FlagZ(result),
            FlagC(result),
            FlagV(result));
     return false;
   }

   return true;
 }


 bool EqualRegisters(const RegisterDump* a, const RegisterDump* b) {
   for (unsigned i = 0; i < kNumberOfRegisters; i++) {
     if (a->xreg(i) != b->xreg(i)) {
       printf("x%d\t Expected 0x%016" PRIx64 "\t Found 0x%016" PRIx64 "\n",
              i,
              a->xreg(i),
              b->xreg(i));
       return false;
     }
   }

   for (unsigned i = 0; i < kNumberOfVRegisters; i++) {
     uint64_t a_bits = a->dreg_bits(i);
     uint64_t b_bits = b->dreg_bits(i);
     if (a_bits != b_bits) {
       printf("d%d\t Expected 0x%016" PRIx64 "\t Found 0x%016" PRIx64 "\n",
              i,
              a_bits,
              b_bits);
       return false;
     }
   }

   return true;
 }

 bool EqualSVELane(uint64_t expected,
                   const RegisterDump* core,
                   const ZRegister& reg,
                   int lane) {
   unsigned lane_size = reg.GetLaneSizeInBits();
   // For convenience in the tests, we allow negative values to be passed into
   // `expected`, but truncate them to an appropriately-sized unsigned value for
   // the check. For example, in `EqualSVELane(-1, core, z0.VnB())`, the expected
   // value is truncated from 0xffffffffffffffff to 0xff before the comparison.
   VIXL_ASSERT(IsUintN(lane_size, expected) ||
               IsIntN(lane_size, RawbitsToInt64(expected)));
   expected &= GetUintMask(lane_size);

   uint64_t result = core->zreg_lane(reg.GetCode(), lane_size, lane);
   if (expected != result) {
     unsigned lane_size_in_hex_chars = lane_size / 4;
     std::string reg_name = reg.GetArchitecturalName();
     printf("%s[%d]\t Expected 0x%0*" PRIx64 "\t Found 0x%0*" PRIx64 "\n",
            reg_name.c_str(),
            lane,
            lane_size_in_hex_chars,
            expected,
            lane_size_in_hex_chars,
            result);
     return false;
   }
   return true;
 }

 bool EqualSVELane(uint64_t expected,
                   const RegisterDump* core,
                   const PRegister& reg,
                   int lane) {
   VIXL_ASSERT(reg.HasLaneSize());
   VIXL_ASSERT((reg.GetLaneSizeInBits() % kZRegBitsPerPRegBit) == 0);
   unsigned p_bits_per_lane = reg.GetLaneSizeInBits() / kZRegBitsPerPRegBit;
   VIXL_ASSERT(IsUintN(p_bits_per_lane, expected));
   expected &= GetUintMask(p_bits_per_lane);

   uint64_t result = core->preg_lane(reg.GetCode(), p_bits_per_lane, lane);
   if (expected != result) {
     unsigned lane_size_in_hex_chars = (p_bits_per_lane + 3) / 4;
     std::string reg_name = reg.GetArchitecturalName();
     printf("%s[%d]\t Expected 0x%0*" PRIx64 "\t Found 0x%0*" PRIx64 "\n",
            reg_name.c_str(),
            lane,
            lane_size_in_hex_chars,
            expected,
            lane_size_in_hex_chars,
            result);
     return false;
   }
   return true;
 }

 struct EqualMemoryChunk {
   typedef uint64_t RawChunk;

   uintptr_t address;
   RawChunk expected;
   RawChunk result;

   bool IsEqual() const { return expected == result; }
 };

 bool EqualMemory(const void* expected,
                  const void* result,
                  size_t size_in_bytes,
                  size_t zero_offset) {
   if (memcmp(expected, result, size_in_bytes) == 0) return true;

   // Read 64-bit chunks, and print them side-by-side if they don't match.

   // Remember the last few chunks, even if they matched, so we can print some
   // context. We don't want to print the whole buffer, because it could be huge.
   static const size_t kContextLines = 1;
   std::queue<EqualMemoryChunk> context;
   static const size_t kChunkSize = sizeof(EqualMemoryChunk::RawChunk);

   // This assumption keeps the logic simple, and is acceptable for our tests.
   VIXL_ASSERT((size_in_bytes % kChunkSize) == 0);

   const char* expected_it = reinterpret_cast<const char*>(expected);
   const char* result_it = reinterpret_cast<const char*>(result);

   // This is the first error, so print a header row.
   printf("  Address (of result)                  Expected           Result\n");

   // Always print some context at the start of the buffer.
   uintptr_t print_context_to =
       reinterpret_cast<uintptr_t>(result) + (kContextLines + 1) * kChunkSize;
   for (size_t i = 0; i < size_in_bytes; i += kChunkSize) {
     EqualMemoryChunk chunk;
     chunk.address = reinterpret_cast<uintptr_t>(result_it);
     memcpy(&chunk.expected, expected_it, kChunkSize);
     memcpy(&chunk.result, result_it, kChunkSize);

     while (context.size() > kContextLines) context.pop();
     context.push(chunk);

     // Print context after an error, and at the end of the buffer.
     if (!chunk.IsEqual() || ((i + kChunkSize) >= size_in_bytes)) {
       if (chunk.address > print_context_to) {
         // We aren't currently printing context, so separate this context from
         // the previous block.
         printf("...\n");
       }
       print_context_to = chunk.address + (kContextLines + 1) * kChunkSize;
     }

     // Print context (including the current line).
     while (!context.empty() && (context.front().address < print_context_to)) {
       uintptr_t address = context.front().address;
       uint64_t offset = address - reinterpret_cast<uintptr_t>(result);
       bool is_negative = (offset < zero_offset);
       printf("0x%016" PRIxPTR " (result %c %5" PRIu64 "): 0x%016" PRIx64
              " 0x%016" PRIx64 "\n",
              address,
              (is_negative ? '-' : '+'),
              (is_negative ? (zero_offset - offset) : (offset - zero_offset)),
              context.front().expected,
              context.front().result);
       context.pop();
     }

     expected_it += kChunkSize;
     result_it += kChunkSize;
   }

   return false;
 }
 RegList PopulateRegisterArray(Register* w,
                               Register* x,
                               Register* r,
                               int reg_size,
                               int reg_count,
                               RegList allowed) {
   RegList list = 0;
   int i = 0;
   for (unsigned n = 0; (n < kNumberOfRegisters) && (i < reg_count); n++) {
     if (((UINT64_C(1) << n) & allowed) != 0) {
       // Only assign allowed registers.
       if (r) {
         r[i] = Register(n, reg_size);
       }
       if (x) {
         x[i] = Register(n, kXRegSize);
       }
       if (w) {
         w[i] = Register(n, kWRegSize);
       }
       list |= (UINT64_C(1) << n);
       i++;
     }
   }
   // Check that we got enough registers.
   VIXL_ASSERT(CountSetBits(list, kNumberOfRegisters) == reg_count);

   return list;
 }


 RegList PopulateVRegisterArray(VRegister* s,
                                VRegister* d,
                                VRegister* v,
                                int reg_size,
                                int reg_count,
                                RegList allowed) {
   RegList list = 0;
   int i = 0;
   for (unsigned n = 0; (n < kNumberOfVRegisters) && (i < reg_count); n++) {
     if (((UINT64_C(1) << n) & allowed) != 0) {
       // Only assigned allowed registers.
       if (v) {
         v[i] = VRegister(n, reg_size);
       }
       if (d) {
         d[i] = VRegister(n, kDRegSize);
       }
       if (s) {
         s[i] = VRegister(n, kSRegSize);
       }
       list |= (UINT64_C(1) << n);
       i++;
     }
   }
   // Check that we got enough registers.
   VIXL_ASSERT(CountSetBits(list, kNumberOfVRegisters) == reg_count);

   return list;
 }


 void Clobber(MacroAssembler* masm, RegList reg_list, uint64_t const value) {
   Register first = NoReg;
   for (unsigned i = 0; i < kNumberOfRegisters; i++) {
     if (reg_list & (UINT64_C(1) << i)) {
       Register xn(i, kXRegSize);
       // We should never write into sp here.
       VIXL_ASSERT(!xn.Is(sp));
       if (!xn.IsZero()) {
         if (!first.IsValid()) {
           // This is the first register we've hit, so construct the literal.
           __ Mov(xn, value);
           first = xn;
         } else {
           // We've already loaded the literal, so re-use the value already
           // loaded into the first register we hit.
           __ Mov(xn, first);
         }
       }
     }
   }
 }


 void ClobberFP(MacroAssembler* masm, RegList reg_list, double const value) {
   VRegister first = NoVReg;
   for (unsigned i = 0; i < kNumberOfVRegisters; i++) {
     if (reg_list & (UINT64_C(1) << i)) {
       VRegister dn(i, kDRegSize);
       if (!first.IsValid()) {
         // This is the first register we've hit, so construct the literal.
         __ Fmov(dn, value);
         first = dn;
       } else {
         // We've already loaded the literal, so re-use the value already loaded
         // into the first register we hit.
         __ Fmov(dn, first);
       }
     }
   }
 }


 void Clobber(MacroAssembler* masm, CPURegList reg_list) {
   if (reg_list.GetType() == CPURegister::kRegister) {
     // This will always clobber X registers.
     Clobber(masm, reg_list.GetList());
   } else if (reg_list.GetType() == CPURegister::kVRegister) {
     // This will always clobber D registers.
     ClobberFP(masm, reg_list.GetList());
   } else {
     VIXL_UNIMPLEMENTED();
   }
 }

 // TODO: Once registers have sufficiently compatible interfaces, merge the two
 // DumpRegisters templates.
 template <typename T>
 static void DumpRegisters(MacroAssembler* masm,
                           Register dump_base,
                           int offset) {
   UseScratchRegisterScope temps(masm);
   Register dump = temps.AcquireX();
   __ Add(dump, dump_base, offset);
   for (unsigned i = 0; i <= T::GetMaxCode(); i++) {
     T reg(i);
     __ Str(reg, SVEMemOperand(dump));
     __ Add(dump, dump, reg.GetMaxSizeInBytes());
   }
 }

 template <typename T>
 static void DumpRegisters(MacroAssembler* masm,
                           Register dump_base,
                           int offset,
                           int reg_size_in_bytes) {
   UseScratchRegisterScope temps(masm);
   Register dump = temps.AcquireX();
   __ Add(dump, dump_base, offset);
   for (unsigned i = 0; i <= T::GetMaxCode(); i++) {
     T reg(i, reg_size_in_bytes * kBitsPerByte);
     __ Str(reg, MemOperand(dump));
     __ Add(dump, dump, reg_size_in_bytes);
   }
 }

 void RegisterDump::Dump(MacroAssembler* masm) {
   VIXL_ASSERT(__ StackPointer().Is(sp));

   dump_cpu_features_ = *masm->GetCPUFeatures();

   // We need some scratch registers, but we also need to dump them, so we have
   // to control exactly which registers are used, and dump them separately.
   CPURegList scratch_registers(x0, x1, x2, x3);

   UseScratchRegisterScope temps(masm);
   temps.ExcludeAll();
   __ PushCPURegList(scratch_registers);
   temps.Include(scratch_registers);

   Register dump_base = temps.AcquireX();
   Register tmp = temps.AcquireX();

   // Offsets into the dump_ structure.
   const int x_offset = offsetof(dump_t, x_);
   const int w_offset = offsetof(dump_t, w_);
   const int d_offset = offsetof(dump_t, d_);
   const int s_offset = offsetof(dump_t, s_);
   const int h_offset = offsetof(dump_t, h_);
   const int q_offset = offsetof(dump_t, q_);
   const int z_offset = offsetof(dump_t, z_);
   const int p_offset = offsetof(dump_t, p_);
   const int sp_offset = offsetof(dump_t, sp_);
   const int wsp_offset = offsetof(dump_t, wsp_);
   const int flags_offset = offsetof(dump_t, flags_);
   const int vl_offset = offsetof(dump_t, vl_);

   // Load the address where we will dump the state.
   __ Mov(dump_base, reinterpret_cast<uintptr_t>(&dump_));

   // Dump the stack pointer (sp and wsp).
   // The stack pointer cannot be stored directly; it needs to be moved into
   // another register first. Also, we pushed four X registers, so we need to
   // compensate here.
   __ Add(tmp, sp, 4 * kXRegSizeInBytes);
   __ Str(tmp, MemOperand(dump_base, sp_offset));
   __ Add(tmp.W(), wsp, 4 * kXRegSizeInBytes);
   __ Str(tmp.W(), MemOperand(dump_base, wsp_offset));

   // Dump core registers.
   DumpRegisters<Register>(masm, dump_base, x_offset, kXRegSizeInBytes);
   DumpRegisters<Register>(masm, dump_base, w_offset, kWRegSizeInBytes);

   // Dump NEON and FP registers.
   DumpRegisters<VRegister>(masm, dump_base, q_offset, kQRegSizeInBytes);
   DumpRegisters<VRegister>(masm, dump_base, d_offset, kDRegSizeInBytes);
   DumpRegisters<VRegister>(masm, dump_base, s_offset, kSRegSizeInBytes);
   DumpRegisters<VRegister>(masm, dump_base, h_offset, kHRegSizeInBytes);

   // Dump SVE registers.
   if (CPUHas(CPUFeatures::kSVE)) {
     DumpRegisters<ZRegister>(masm, dump_base, z_offset);
     DumpRegisters<PRegister>(masm, dump_base, p_offset);

     // Record the vector length.
     __ Rdvl(tmp, kBitsPerByte);
     __ Str(tmp, MemOperand(dump_base, vl_offset));
   }

   // Dump the flags.
   __ Mrs(tmp, NZCV);
   __ Str(tmp, MemOperand(dump_base, flags_offset));

   // To dump the values we used as scratch registers, we need a new scratch
   // register. We can use any of the already dumped registers since we can
   // easily restore them.
   Register dump2_base = x10;
   VIXL_ASSERT(!scratch_registers.IncludesAliasOf(dump2_base));

   VIXL_ASSERT(scratch_registers.IncludesAliasOf(dump_base));

   // Ensure that we don't try to use the scratch registers again.
   temps.ExcludeAll();

   // Don't lose the dump_ address.
   __ Mov(dump2_base, dump_base);

   __ PopCPURegList(scratch_registers);

   while (!scratch_registers.IsEmpty()) {
     CPURegister reg = scratch_registers.PopLowestIndex();
     Register x = reg.X();
     Register w = reg.W();
     unsigned code = reg.GetCode();
     __ Str(x, MemOperand(dump2_base, x_offset + (code * kXRegSizeInBytes)));
     __ Str(w, MemOperand(dump2_base, w_offset + (code * kWRegSizeInBytes)));
   }

   // Finally, restore dump2_base.
   __ Ldr(dump2_base,
          MemOperand(dump2_base,
                     x_offset + (dump2_base.GetCode() * kXRegSizeInBytes)));

   completed_ = true;
 }

 uint64_t GetSignallingNan(int size_in_bits) {
   switch (size_in_bits) {
     case kHRegSize:
       return Float16ToRawbits(kFP16SignallingNaN);
     case kSRegSize:
       return FloatToRawbits(kFP32SignallingNaN);
     case kDRegSize:
       return DoubleToRawbits(kFP64SignallingNaN);
     default:
       VIXL_UNIMPLEMENTED();
       return 0;
   }
 }

 bool CanRun(const CPUFeatures& required, bool* queried_can_run) {
   bool log_if_missing = true;
   if (queried_can_run != NULL) {
     log_if_missing = !*queried_can_run;
     *queried_can_run = true;
   }

 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
   // The Simulator can run any test that VIXL can assemble.
   USE(required);
   USE(log_if_missing);
   return true;
 #else
   CPUFeatures cpu = CPUFeatures::InferFromOS();
   // If InferFromOS fails, assume that basic features are present.
   if (cpu.HasNoFeatures()) cpu = CPUFeatures::AArch64LegacyBaseline();
   VIXL_ASSERT(cpu.Has(kInfrastructureCPUFeatures));

   if (cpu.Has(required)) return true;

   if (log_if_missing) {
     CPUFeatures missing = required.Without(cpu);
     // Note: This message needs to match REGEXP_MISSING_FEATURES from
     // tools/threaded_test.py.
     std::cout << "SKIPPED: Missing features: { " << missing << " }\n";
     std::cout << "This test requires the following features to run its "
                  "generated code on this CPU: "
               << required << "\n";
   }
   return false;
 #endif
 }

 // Note that the function assumes p0, p1, p2 and p3 are set to all true in b-,
 // h-, s- and d-lane sizes respectively, and p4, p5 are clobbered as a temp
 // predicate.
 template <typename T, size_t N>
 void SetFpData(MacroAssembler* masm,
                int esize,
                const T (&values)[N],
                uint64_t lcg_mult) {
   uint64_t a = 0;
   uint64_t b = lcg_mult;
   // Be used to populate the assigned element slots of register based on the
   // type of floating point.
   __ Pfalse(p5.VnB());
   switch (esize) {
     case kHRegSize:
       a = Float16ToRawbits(Float16(1.5));
       // Pick a convenient number within largest normal half-precision floating
       // point.
       b = Float16ToRawbits(Float16(lcg_mult % 1024));
       // Step 1: Set fp16 numbers to the undefined registers.
       //      p4< 15:0>: 0b0101010101010101
       // z{code}<127:0>: 0xHHHHHHHHHHHHHHHH
       __ Zip1(p4.VnB(), p0.VnB(), p5.VnB());
       break;
     case kSRegSize:
       a = FloatToRawbits(1.5);
       b = FloatToRawbits(lcg_mult);
       // Step 2: Set fp32 numbers to register on top of fp16 initialized.
       //      p4< 15:0>: 0b0000000100000001
       // z{code}<127:0>: 0xHHHHSSSSHHHHSSSS
       __ Zip1(p4.VnS(), p2.VnS(), p5.VnS());
       break;
     case kDRegSize:
       a = DoubleToRawbits(1.5);
       b = DoubleToRawbits(lcg_mult);
       // Step 3: Set fp64 numbers to register on top of both fp16 and fp 32
       // initialized.
       //      p4< 15:0>: 0b0000000000000001
       // z{code}<127:0>: 0xHHHHSSSSDDDDDDDD
       __ Zip1(p4.VnD(), p3.VnD(), p5.VnD());
       break;
     default:
       VIXL_UNIMPLEMENTED();
       break;
   }

   __ Dup(z30.WithLaneSize(esize), a);
   __ Dup(z31.WithLaneSize(esize), b);

   for (unsigned j = 0; j <= (kZRegMaxSize / (N * esize)); j++) {
     // As floating point operations on random values have a tendency to
     // converge on special-case numbers like NaNs, adopt normal floating point
     // values be the seed instead.
     InsrHelper(masm, z0.WithLaneSize(esize), values);
   }

   __ Fmla(z0.WithLaneSize(esize),
           p4.Merging(),
           z30.WithLaneSize(esize),
           z0.WithLaneSize(esize),
           z31.WithLaneSize(esize),
           FastNaNPropagation);

   for (unsigned i = 1; i < kNumberOfZRegisters - 1; i++) {
     __ Fmla(ZRegister(i).WithLaneSize(esize),
             p4.Merging(),
             z30.WithLaneSize(esize),
             ZRegister(i - 1).WithLaneSize(esize),
             z31.WithLaneSize(esize),
             FastNaNPropagation);
   }

   __ Fmul(z31.WithLaneSize(esize),
           p4.Merging(),
           z31.WithLaneSize(esize),
           z30.WithLaneSize(esize),
           FastNaNPropagation);
   __ Fadd(z31.WithLaneSize(esize), p4.Merging(), z31.WithLaneSize(esize), 1);
 }

 // Set z0 - z31 to some normal floating point data.
 void InitialiseRegisterFp(MacroAssembler* masm, uint64_t lcg_mult) {
   // Initialise each Z registers to a mixture of fp16/32/64 values as following
   // pattern:
   // z0.h[0-1] = fp16, z0.s[1] = fp32, z0.d[1] = fp64 repeatedly throughout the
   // register.
   //
   // For example:
   // z{code}<2047:1920>: 0x{<      fp64      ><  fp32  ><fp16><fp16>}
   // ...
   // z{code}< 127:   0>: 0x{<      fp64      ><  fp32  ><fp16><fp16>}
   //
   // In current manner, in order to make a desired mixture, each part of
   // initialization have to be called in the following order.
   SetFpData(masm, kHRegSize, kInputFloat16Basic, lcg_mult);
   SetFpData(masm, kSRegSize, kInputFloatBasic, lcg_mult);
   SetFpData(masm, kDRegSize, kInputDoubleBasic, lcg_mult);
 }

 void SetInitialMachineState(MacroAssembler* masm, InputSet input_set) {
   USE(input_set);
   uint64_t lcg_mult = 6364136223846793005;

   // Set x0 - x30 to pseudo-random data.
   __ Mov(x29, 1);  // LCG increment.
   __ Mov(x30, lcg_mult);
   __ Mov(x0, 42);  // LCG seed.

   __ Cmn(x0, 0);  // Clear NZCV flags for later.

   __ Madd(x0, x0, x30, x29);  // First pseudo-random number.

   // Registers 1 - 29.
   for (unsigned i = 1; i < 30; i++) {
     __ Madd(XRegister(i), XRegister(i - 1), x30, x29);
   }
   __ Mul(x30, x29, x30);
   __ Add(x30, x30, 1);


   // Set first four predicate registers to true for increasing lane sizes.
   __ Ptrue(p0.VnB());
   __ Ptrue(p1.VnH());
   __ Ptrue(p2.VnS());
   __ Ptrue(p3.VnD());

   // Set z0 - z31 to pseudo-random data.
   if (input_set == kIntInputSet) {
     __ Dup(z30.VnD(), 1);
     __ Dup(z31.VnD(), lcg_mult);
     __ Index(z0.VnB(), -16, 13);  // LCG seeds.

     __ Mla(z0.VnD(), p0.Merging(), z30.VnD(), z0.VnD(), z31.VnD());
     for (unsigned i = 1; i < kNumberOfZRegisters - 1; i++) {
       __ Mla(ZRegister(i).VnD(),
              p0.Merging(),
              z30.VnD(),
              ZRegister(i - 1).VnD(),
              z31.VnD());
     }
     __ Mul(z31.VnD(), p0.Merging(), z31.VnD(), z30.VnD());
     __ Add(z31.VnD(), z31.VnD(), 1);

   } else {
     VIXL_ASSERT(input_set == kFpInputSet);
     InitialiseRegisterFp(masm, lcg_mult);
   }

   // Set remaining predicate registers based on earlier pseudo-random data.
   for (unsigned i = 4; i < kNumberOfPRegisters; i++) {
     __ Cmpge(PRegister(i).VnB(), p0.Zeroing(), ZRegister(i).VnB(), 0);
   }
   for (unsigned i = 4; i < kNumberOfPRegisters; i += 2) {
     __ Zip1(p0.VnB(), PRegister(i).VnB(), PRegister(i + 1).VnB());
     __ Zip2(PRegister(i + 1).VnB(), PRegister(i).VnB(), PRegister(i + 1).VnB());
     __ Mov(PRegister(i), p0);
   }
   __ Ptrue(p0.VnB());

   // At this point, only sp and a few status registers are undefined. These
   // must be ignored when computing the state hash.
 }

 void ComputeMachineStateHash(MacroAssembler* masm, uint32_t* dst) {
   // Use explicit registers, to avoid hash order varying if
   // UseScratchRegisterScope changes.
   UseScratchRegisterScope temps(masm);
   temps.ExcludeAll();
   Register t0 = w0;
   Register t1 = x1;

   // Compute hash of x0 - x30.
   __ Push(t0.X(), t1);
   __ Crc32x(t0, wzr, t0.X());
   for (unsigned i = 0; i < kNumberOfRegisters; i++) {
     if (i == xzr.GetCode()) continue;   // Skip sp.
     if (t0.Is(WRegister(i))) continue;  // Skip t0, as it's already hashed.
     __ Crc32x(t0, t0, XRegister(i));
   }

   // Hash the status flags.
   __ Mrs(t1, NZCV);
   __ Crc32x(t0, t0, t1);

   // Acquire another temp, as integer registers have been hashed already.
   __ Push(x30, xzr);
   Register t2 = x30;

   // Compute hash of all bits in z0 - z31. This implies different hashes are
   // produced for machines of different vector length.
   for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
     __ Rdvl(t2, 1);
     __ Lsr(t2, t2, 4);
     Label vl_loop;
     __ Bind(&vl_loop);
     __ Umov(t1, VRegister(i).V2D(), 0);
     __ Crc32x(t0, t0, t1);
     __ Umov(t1, VRegister(i).V2D(), 1);
     __ Crc32x(t0, t0, t1);
     __ Ext(ZRegister(i).VnB(), ZRegister(i).VnB(), ZRegister(i).VnB(), 16);
     __ Sub(t2, t2, 1);
     __ Cbnz(t2, &vl_loop);
   }

   // Hash predicate registers. For simplicity, this writes the predicate
   // registers to a zero-initialised area of stack of the maximum size required
   // for P registers. It then computes a hash of that entire stack area.
   unsigned p_stack_space = kNumberOfPRegisters * kPRegMaxSizeInBytes;

   // Zero claimed stack area.
   for (unsigned i = 0; i < p_stack_space; i += kXRegSizeInBytes * 2) {
     __ Push(xzr, xzr);
   }

   // Store all P registers to the stack.
   __ Mov(t1, sp);
   for (unsigned i = 0; i < kNumberOfPRegisters; i++) {
     __ Str(PRegister(i), SVEMemOperand(t1));
     __ Add(t1, t1, kPRegMaxSizeInBytes);
   }

   // Hash the entire stack area.
   for (unsigned i = 0; i < p_stack_space; i += kXRegSizeInBytes * 2) {
     __ Pop(t1, t2);
     __ Crc32x(t0, t0, t1);
     __ Crc32x(t0, t0, t2);
   }

   __ Mov(t1, reinterpret_cast<uint64_t>(dst));
   __ Str(t0, MemOperand(t1));

   __ Pop(xzr, x30);
   __ Pop(t1, t0.X());
 }

 }  // namespace aarch64
 }  // namespace vixl