blob: c23f4e8b2acb88067b5c35ba452fbbe1c7cfb87b [file] [log] [blame]
// Copyright 2014, VIXL authors
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of ARM Limited nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "test-utils-aarch64.h"
#include <cmath>
#include <queue>
#include "test-runner.h"
#include "../test/aarch64/test-simulator-inputs-aarch64.h"
#include "aarch64/cpu-aarch64.h"
#include "aarch64/disasm-aarch64.h"
#include "aarch64/macro-assembler-aarch64.h"
#include "aarch64/simulator-aarch64.h"
#define __ masm->
namespace vixl {
namespace aarch64 {
// This value is a signalling NaN as FP64, and also as FP32 or FP16 (taking the
// least-significant bits).
const double kFP64SignallingNaN = RawbitsToDouble(UINT64_C(0x7ff000007f807c01));
const float kFP32SignallingNaN = RawbitsToFloat(0x7f807c01);
const Float16 kFP16SignallingNaN = RawbitsToFloat16(0x7c01);
// A similar value, but as a quiet NaN.
const double kFP64QuietNaN = RawbitsToDouble(UINT64_C(0x7ff800007fc07e01));
const float kFP32QuietNaN = RawbitsToFloat(0x7fc07e01);
const Float16 kFP16QuietNaN = RawbitsToFloat16(0x7e01);
bool Equal32(uint32_t expected, const RegisterDump*, uint32_t result) {
if (result != expected) {
printf("Expected 0x%08" PRIx32 "\t Found 0x%08" PRIx32 "\n",
expected,
result);
}
return expected == result;
}
bool Equal64(uint64_t reference,
const RegisterDump*,
uint64_t result,
ExpectedResult option) {
switch (option) {
case kExpectEqual:
if (result != reference) {
printf("Expected 0x%016" PRIx64 "\t Found 0x%016" PRIx64 "\n",
reference,
result);
}
break;
case kExpectNotEqual:
if (result == reference) {
printf("Expected a result not equal to 0x%016" PRIx64 "\n", reference);
}
break;
}
return reference == result;
}
bool Equal64(std::vector<uint64_t> reference_list,
const RegisterDump*,
uint64_t result,
ExpectedResult option) {
switch (option) {
case kExpectEqual:
for (uint64_t reference : reference_list) {
if (result == reference) return true;
}
printf("Expected a result in (\n");
break;
case kExpectNotEqual:
for (uint64_t reference : reference_list) {
if (result == reference) {
printf("Expected a result not in (\n");
break;
}
}
return true;
}
for (uint64_t reference : reference_list) {
printf(" 0x%016" PRIx64 ",\n", reference);
}
printf(")\t Found 0x%016" PRIx64 "\n", result);
return false;
}
bool Equal128(QRegisterValue expected,
const RegisterDump*,
QRegisterValue result) {
if (!expected.Equals(result)) {
printf("Expected 0x%016" PRIx64 "%016" PRIx64
"\t "
"Found 0x%016" PRIx64 "%016" PRIx64 "\n",
expected.GetLane<uint64_t>(1),
expected.GetLane<uint64_t>(0),
result.GetLane<uint64_t>(1),
result.GetLane<uint64_t>(0));
}
return expected.Equals(result);
}
bool EqualFP16(Float16 expected, const RegisterDump*, Float16 result) {
uint16_t e_rawbits = Float16ToRawbits(expected);
uint16_t r_rawbits = Float16ToRawbits(result);
if (e_rawbits == r_rawbits) {
return true;
} else {
if (IsNaN(expected) || IsZero(expected)) {
printf("Expected 0x%04" PRIx16 "\t Found 0x%04" PRIx16 "\n",
e_rawbits,
r_rawbits);
} else {
printf("Expected %.6f (16 bit): (0x%04" PRIx16
")\t "
"Found %.6f (0x%04" PRIx16 ")\n",
FPToFloat(expected, kIgnoreDefaultNaN),
e_rawbits,
FPToFloat(result, kIgnoreDefaultNaN),
r_rawbits);
}
return false;
}
}
bool EqualFP32(float expected, const RegisterDump*, float result) {
if (FloatToRawbits(expected) == FloatToRawbits(result)) {
return true;
} else {
if (IsNaN(expected) || (expected == 0.0)) {
printf("Expected 0x%08" PRIx32 "\t Found 0x%08" PRIx32 "\n",
FloatToRawbits(expected),
FloatToRawbits(result));
} else {
printf("Expected %.9f (0x%08" PRIx32
")\t "
"Found %.9f (0x%08" PRIx32 ")\n",
expected,
FloatToRawbits(expected),
result,
FloatToRawbits(result));
}
return false;
}
}
bool EqualFP64(double expected, const RegisterDump*, double result) {
if (DoubleToRawbits(expected) == DoubleToRawbits(result)) {
return true;
}
if (IsNaN(expected) || (expected == 0.0)) {
printf("Expected 0x%016" PRIx64 "\t Found 0x%016" PRIx64 "\n",
DoubleToRawbits(expected),
DoubleToRawbits(result));
} else {
printf("Expected %.17f (0x%016" PRIx64
")\t "
"Found %.17f (0x%016" PRIx64 ")\n",
expected,
DoubleToRawbits(expected),
result,
DoubleToRawbits(result));
}
return false;
}
bool Equal32(uint32_t expected, const RegisterDump* core, const Register& reg) {
VIXL_ASSERT(reg.Is32Bits());
// Retrieve the corresponding X register so we can check that the upper part
// was properly cleared.
int64_t result_x = core->xreg(reg.GetCode());
if ((result_x & 0xffffffff00000000) != 0) {
printf("Expected 0x%08" PRIx32 "\t Found 0x%016" PRIx64 "\n",
expected,
result_x);
return false;
}
uint32_t result_w = core->wreg(reg.GetCode());
return Equal32(expected, core, result_w);
}
bool Equal64(uint64_t reference,
const RegisterDump* core,
const Register& reg,
ExpectedResult option) {
VIXL_ASSERT(reg.Is64Bits());
uint64_t result = core->xreg(reg.GetCode());
return Equal64(reference, core, result, option);
}
bool Equal64(std::vector<uint64_t> reference_list,
const RegisterDump* core,
const Register& reg,
ExpectedResult option) {
VIXL_ASSERT(reg.Is64Bits());
uint64_t result = core->xreg(reg.GetCode());
return Equal64(reference_list, core, result, option);
}
bool NotEqual64(uint64_t reference,
const RegisterDump* core,
const Register& reg) {
VIXL_ASSERT(reg.Is64Bits());
uint64_t result = core->xreg(reg.GetCode());
return NotEqual64(reference, core, result);
}
bool Equal128(uint64_t expected_h,
uint64_t expected_l,
const RegisterDump* core,
const VRegister& vreg) {
VIXL_ASSERT(vreg.Is128Bits());
QRegisterValue expected;
expected.SetLane(0, expected_l);
expected.SetLane(1, expected_h);
QRegisterValue result = core->qreg(vreg.GetCode());
return Equal128(expected, core, result);
}
bool EqualFP16(Float16 expected,
const RegisterDump* core,
const VRegister& fpreg) {
VIXL_ASSERT(fpreg.Is16Bits());
// Retrieve the corresponding D register so we can check that the upper part
// was properly cleared.
uint64_t result_64 = core->dreg_bits(fpreg.GetCode());
if ((result_64 & 0xfffffffffff0000) != 0) {
printf("Expected 0x%04" PRIx16 " (%f)\t Found 0x%016" PRIx64 "\n",
Float16ToRawbits(expected),
FPToFloat(expected, kIgnoreDefaultNaN),
result_64);
return false;
}
return EqualFP16(expected, core, core->hreg(fpreg.GetCode()));
}
bool EqualFP32(float expected,
const RegisterDump* core,
const VRegister& fpreg) {
VIXL_ASSERT(fpreg.Is32Bits());
// Retrieve the corresponding D register so we can check that the upper part
// was properly cleared.
uint64_t result_64 = core->dreg_bits(fpreg.GetCode());
if ((result_64 & 0xffffffff00000000) != 0) {
printf("Expected 0x%08" PRIx32 " (%f)\t Found 0x%016" PRIx64 "\n",
FloatToRawbits(expected),
expected,
result_64);
return false;
}
return EqualFP32(expected, core, core->sreg(fpreg.GetCode()));
}
bool EqualFP64(double expected,
const RegisterDump* core,
const VRegister& fpreg) {
VIXL_ASSERT(fpreg.Is64Bits());
return EqualFP64(expected, core, core->dreg(fpreg.GetCode()));
}
bool Equal64(const Register& reg0,
const RegisterDump* core,
const Register& reg1,
ExpectedResult option) {
VIXL_ASSERT(reg0.Is64Bits() && reg1.Is64Bits());
int64_t reference = core->xreg(reg0.GetCode());
int64_t result = core->xreg(reg1.GetCode());
return Equal64(reference, core, result, option);
}
bool NotEqual64(const Register& reg0,
const RegisterDump* core,
const Register& reg1) {
VIXL_ASSERT(reg0.Is64Bits() && reg1.Is64Bits());
int64_t expected = core->xreg(reg0.GetCode());
int64_t result = core->xreg(reg1.GetCode());
return NotEqual64(expected, core, result);
}
bool Equal64(uint64_t expected,
const RegisterDump* core,
const VRegister& vreg) {
VIXL_ASSERT(vreg.Is64Bits());
uint64_t result = core->dreg_bits(vreg.GetCode());
return Equal64(expected, core, result);
}
static char FlagN(uint32_t flags) { return (flags & NFlag) ? 'N' : 'n'; }
static char FlagZ(uint32_t flags) { return (flags & ZFlag) ? 'Z' : 'z'; }
static char FlagC(uint32_t flags) { return (flags & CFlag) ? 'C' : 'c'; }
static char FlagV(uint32_t flags) { return (flags & VFlag) ? 'V' : 'v'; }
bool EqualNzcv(uint32_t expected, uint32_t result) {
VIXL_ASSERT((expected & ~NZCVFlag) == 0);
VIXL_ASSERT((result & ~NZCVFlag) == 0);
if (result != expected) {
printf("Expected: %c%c%c%c\t Found: %c%c%c%c\n",
FlagN(expected),
FlagZ(expected),
FlagC(expected),
FlagV(expected),
FlagN(result),
FlagZ(result),
FlagC(result),
FlagV(result));
return false;
}
return true;
}
bool EqualRegisters(const RegisterDump* a, const RegisterDump* b) {
for (unsigned i = 0; i < kNumberOfRegisters; i++) {
if (a->xreg(i) != b->xreg(i)) {
printf("x%d\t Expected 0x%016" PRIx64 "\t Found 0x%016" PRIx64 "\n",
i,
a->xreg(i),
b->xreg(i));
return false;
}
}
for (unsigned i = 0; i < kNumberOfVRegisters; i++) {
uint64_t a_bits = a->dreg_bits(i);
uint64_t b_bits = b->dreg_bits(i);
if (a_bits != b_bits) {
printf("d%d\t Expected 0x%016" PRIx64 "\t Found 0x%016" PRIx64 "\n",
i,
a_bits,
b_bits);
return false;
}
}
return true;
}
bool EqualSVELane(uint64_t expected,
const RegisterDump* core,
const ZRegister& reg,
int lane) {
unsigned lane_size = reg.GetLaneSizeInBits();
// For convenience in the tests, we allow negative values to be passed into
// `expected`, but truncate them to an appropriately-sized unsigned value for
// the check. For example, in `EqualSVELane(-1, core, z0.VnB())`, the expected
// value is truncated from 0xffffffffffffffff to 0xff before the comparison.
VIXL_ASSERT(IsUintN(lane_size, expected) ||
IsIntN(lane_size, RawbitsToInt64(expected)));
expected &= GetUintMask(lane_size);
uint64_t result = core->zreg_lane(reg.GetCode(), lane_size, lane);
if (expected != result) {
unsigned lane_size_in_hex_chars = lane_size / 4;
std::string reg_name = reg.GetArchitecturalName();
printf("%s[%d]\t Expected 0x%0*" PRIx64 "\t Found 0x%0*" PRIx64 "\n",
reg_name.c_str(),
lane,
lane_size_in_hex_chars,
expected,
lane_size_in_hex_chars,
result);
return false;
}
return true;
}
bool EqualSVELane(uint64_t expected,
const RegisterDump* core,
const PRegister& reg,
int lane) {
VIXL_ASSERT(reg.HasLaneSize());
VIXL_ASSERT((reg.GetLaneSizeInBits() % kZRegBitsPerPRegBit) == 0);
unsigned p_bits_per_lane = reg.GetLaneSizeInBits() / kZRegBitsPerPRegBit;
VIXL_ASSERT(IsUintN(p_bits_per_lane, expected));
expected &= GetUintMask(p_bits_per_lane);
uint64_t result = core->preg_lane(reg.GetCode(), p_bits_per_lane, lane);
if (expected != result) {
unsigned lane_size_in_hex_chars = (p_bits_per_lane + 3) / 4;
std::string reg_name = reg.GetArchitecturalName();
printf("%s[%d]\t Expected 0x%0*" PRIx64 "\t Found 0x%0*" PRIx64 "\n",
reg_name.c_str(),
lane,
lane_size_in_hex_chars,
expected,
lane_size_in_hex_chars,
result);
return false;
}
return true;
}
struct EqualMemoryChunk {
typedef uint64_t RawChunk;
uintptr_t address;
RawChunk expected;
RawChunk result;
bool IsEqual() const { return expected == result; }
};
bool EqualMemory(const void* expected,
const void* result,
size_t size_in_bytes,
size_t zero_offset) {
if (memcmp(expected, result, size_in_bytes) == 0) return true;
// Read 64-bit chunks, and print them side-by-side if they don't match.
// Remember the last few chunks, even if they matched, so we can print some
// context. We don't want to print the whole buffer, because it could be huge.
static const size_t kContextLines = 1;
std::queue<EqualMemoryChunk> context;
static const size_t kChunkSize = sizeof(EqualMemoryChunk::RawChunk);
// This assumption keeps the logic simple, and is acceptable for our tests.
VIXL_ASSERT((size_in_bytes % kChunkSize) == 0);
const char* expected_it = reinterpret_cast<const char*>(expected);
const char* result_it = reinterpret_cast<const char*>(result);
// This is the first error, so print a header row.
printf(" Address (of result) Expected Result\n");
// Always print some context at the start of the buffer.
uintptr_t print_context_to =
reinterpret_cast<uintptr_t>(result) + (kContextLines + 1) * kChunkSize;
for (size_t i = 0; i < size_in_bytes; i += kChunkSize) {
EqualMemoryChunk chunk;
chunk.address = reinterpret_cast<uintptr_t>(result_it);
memcpy(&chunk.expected, expected_it, kChunkSize);
memcpy(&chunk.result, result_it, kChunkSize);
while (context.size() > kContextLines) context.pop();
context.push(chunk);
// Print context after an error, and at the end of the buffer.
if (!chunk.IsEqual() || ((i + kChunkSize) >= size_in_bytes)) {
if (chunk.address > print_context_to) {
// We aren't currently printing context, so separate this context from
// the previous block.
printf("...\n");
}
print_context_to = chunk.address + (kContextLines + 1) * kChunkSize;
}
// Print context (including the current line).
while (!context.empty() && (context.front().address < print_context_to)) {
uintptr_t address = context.front().address;
uint64_t offset = address - reinterpret_cast<uintptr_t>(result);
bool is_negative = (offset < zero_offset);
printf("0x%016" PRIxPTR " (result %c %5" PRIu64 "): 0x%016" PRIx64
" 0x%016" PRIx64 "\n",
address,
(is_negative ? '-' : '+'),
(is_negative ? (zero_offset - offset) : (offset - zero_offset)),
context.front().expected,
context.front().result);
context.pop();
}
expected_it += kChunkSize;
result_it += kChunkSize;
}
return false;
}
RegList PopulateRegisterArray(Register* w,
Register* x,
Register* r,
int reg_size,
int reg_count,
RegList allowed) {
RegList list = 0;
int i = 0;
for (unsigned n = 0; (n < kNumberOfRegisters) && (i < reg_count); n++) {
if (((UINT64_C(1) << n) & allowed) != 0) {
// Only assign allowed registers.
if (r) {
r[i] = Register(n, reg_size);
}
if (x) {
x[i] = Register(n, kXRegSize);
}
if (w) {
w[i] = Register(n, kWRegSize);
}
list |= (UINT64_C(1) << n);
i++;
}
}
// Check that we got enough registers.
VIXL_ASSERT(CountSetBits(list, kNumberOfRegisters) == reg_count);
return list;
}
RegList PopulateVRegisterArray(VRegister* s,
VRegister* d,
VRegister* v,
int reg_size,
int reg_count,
RegList allowed) {
RegList list = 0;
int i = 0;
for (unsigned n = 0; (n < kNumberOfVRegisters) && (i < reg_count); n++) {
if (((UINT64_C(1) << n) & allowed) != 0) {
// Only assigned allowed registers.
if (v) {
v[i] = VRegister(n, reg_size);
}
if (d) {
d[i] = VRegister(n, kDRegSize);
}
if (s) {
s[i] = VRegister(n, kSRegSize);
}
list |= (UINT64_C(1) << n);
i++;
}
}
// Check that we got enough registers.
VIXL_ASSERT(CountSetBits(list, kNumberOfVRegisters) == reg_count);
return list;
}
void Clobber(MacroAssembler* masm, RegList reg_list, uint64_t const value) {
Register first = NoReg;
for (unsigned i = 0; i < kNumberOfRegisters; i++) {
if (reg_list & (UINT64_C(1) << i)) {
Register xn(i, kXRegSize);
// We should never write into sp here.
VIXL_ASSERT(!xn.Is(sp));
if (!xn.IsZero()) {
if (!first.IsValid()) {
// This is the first register we've hit, so construct the literal.
__ Mov(xn, value);
first = xn;
} else {
// We've already loaded the literal, so re-use the value already
// loaded into the first register we hit.
__ Mov(xn, first);
}
}
}
}
}
void ClobberFP(MacroAssembler* masm, RegList reg_list, double const value) {
VRegister first = NoVReg;
for (unsigned i = 0; i < kNumberOfVRegisters; i++) {
if (reg_list & (UINT64_C(1) << i)) {
VRegister dn(i, kDRegSize);
if (!first.IsValid()) {
// This is the first register we've hit, so construct the literal.
__ Fmov(dn, value);
first = dn;
} else {
// We've already loaded the literal, so re-use the value already loaded
// into the first register we hit.
__ Fmov(dn, first);
}
}
}
}
void Clobber(MacroAssembler* masm, CPURegList reg_list) {
if (reg_list.GetType() == CPURegister::kRegister) {
// This will always clobber X registers.
Clobber(masm, reg_list.GetList());
} else if (reg_list.GetType() == CPURegister::kVRegister) {
// This will always clobber D registers.
ClobberFP(masm, reg_list.GetList());
} else {
VIXL_UNIMPLEMENTED();
}
}
// TODO: Once registers have sufficiently compatible interfaces, merge the two
// DumpRegisters templates.
template <typename T>
static void DumpRegisters(MacroAssembler* masm,
Register dump_base,
int offset) {
UseScratchRegisterScope temps(masm);
Register dump = temps.AcquireX();
__ Add(dump, dump_base, offset);
for (unsigned i = 0; i <= T::GetMaxCode(); i++) {
T reg(i);
__ Str(reg, SVEMemOperand(dump));
__ Add(dump, dump, reg.GetMaxSizeInBytes());
}
}
template <typename T>
static void DumpRegisters(MacroAssembler* masm,
Register dump_base,
int offset,
int reg_size_in_bytes) {
UseScratchRegisterScope temps(masm);
Register dump = temps.AcquireX();
__ Add(dump, dump_base, offset);
for (unsigned i = 0; i <= T::GetMaxCode(); i++) {
T reg(i, reg_size_in_bytes * kBitsPerByte);
__ Str(reg, MemOperand(dump));
__ Add(dump, dump, reg_size_in_bytes);
}
}
void RegisterDump::Dump(MacroAssembler* masm) {
VIXL_ASSERT(__ StackPointer().Is(sp));
dump_cpu_features_ = *masm->GetCPUFeatures();
// We need some scratch registers, but we also need to dump them, so we have
// to control exactly which registers are used, and dump them separately.
CPURegList scratch_registers(x0, x1, x2, x3);
UseScratchRegisterScope temps(masm);
temps.ExcludeAll();
__ PushCPURegList(scratch_registers);
temps.Include(scratch_registers);
Register dump_base = temps.AcquireX();
Register tmp = temps.AcquireX();
// Offsets into the dump_ structure.
const int x_offset = offsetof(dump_t, x_);
const int w_offset = offsetof(dump_t, w_);
const int d_offset = offsetof(dump_t, d_);
const int s_offset = offsetof(dump_t, s_);
const int h_offset = offsetof(dump_t, h_);
const int q_offset = offsetof(dump_t, q_);
const int z_offset = offsetof(dump_t, z_);
const int p_offset = offsetof(dump_t, p_);
const int sp_offset = offsetof(dump_t, sp_);
const int wsp_offset = offsetof(dump_t, wsp_);
const int flags_offset = offsetof(dump_t, flags_);
const int vl_offset = offsetof(dump_t, vl_);
// Load the address where we will dump the state.
__ Mov(dump_base, reinterpret_cast<uintptr_t>(&dump_));
// Dump the stack pointer (sp and wsp).
// The stack pointer cannot be stored directly; it needs to be moved into
// another register first. Also, we pushed four X registers, so we need to
// compensate here.
__ Add(tmp, sp, 4 * kXRegSizeInBytes);
__ Str(tmp, MemOperand(dump_base, sp_offset));
__ Add(tmp.W(), wsp, 4 * kXRegSizeInBytes);
__ Str(tmp.W(), MemOperand(dump_base, wsp_offset));
// Dump core registers.
DumpRegisters<Register>(masm, dump_base, x_offset, kXRegSizeInBytes);
DumpRegisters<Register>(masm, dump_base, w_offset, kWRegSizeInBytes);
// Dump NEON and FP registers.
DumpRegisters<VRegister>(masm, dump_base, q_offset, kQRegSizeInBytes);
DumpRegisters<VRegister>(masm, dump_base, d_offset, kDRegSizeInBytes);
DumpRegisters<VRegister>(masm, dump_base, s_offset, kSRegSizeInBytes);
DumpRegisters<VRegister>(masm, dump_base, h_offset, kHRegSizeInBytes);
// Dump SVE registers.
if (CPUHas(CPUFeatures::kSVE)) {
DumpRegisters<ZRegister>(masm, dump_base, z_offset);
DumpRegisters<PRegister>(masm, dump_base, p_offset);
// Record the vector length.
__ Rdvl(tmp, kBitsPerByte);
__ Str(tmp, MemOperand(dump_base, vl_offset));
}
// Dump the flags.
__ Mrs(tmp, NZCV);
__ Str(tmp, MemOperand(dump_base, flags_offset));
// To dump the values we used as scratch registers, we need a new scratch
// register. We can use any of the already dumped registers since we can
// easily restore them.
Register dump2_base = x10;
VIXL_ASSERT(!scratch_registers.IncludesAliasOf(dump2_base));
VIXL_ASSERT(scratch_registers.IncludesAliasOf(dump_base));
// Ensure that we don't try to use the scratch registers again.
temps.ExcludeAll();
// Don't lose the dump_ address.
__ Mov(dump2_base, dump_base);
__ PopCPURegList(scratch_registers);
while (!scratch_registers.IsEmpty()) {
CPURegister reg = scratch_registers.PopLowestIndex();
Register x = reg.X();
Register w = reg.W();
unsigned code = reg.GetCode();
__ Str(x, MemOperand(dump2_base, x_offset + (code * kXRegSizeInBytes)));
__ Str(w, MemOperand(dump2_base, w_offset + (code * kWRegSizeInBytes)));
}
// Finally, restore dump2_base.
__ Ldr(dump2_base,
MemOperand(dump2_base,
x_offset + (dump2_base.GetCode() * kXRegSizeInBytes)));
completed_ = true;
}
uint64_t GetSignallingNan(int size_in_bits) {
switch (size_in_bits) {
case kHRegSize:
return Float16ToRawbits(kFP16SignallingNaN);
case kSRegSize:
return FloatToRawbits(kFP32SignallingNaN);
case kDRegSize:
return DoubleToRawbits(kFP64SignallingNaN);
default:
VIXL_UNIMPLEMENTED();
return 0;
}
}
bool CanRun(const CPUFeatures& required, bool* queried_can_run) {
bool log_if_missing = true;
if (queried_can_run != NULL) {
log_if_missing = !*queried_can_run;
*queried_can_run = true;
}
#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
// The Simulator can run any test that VIXL can assemble.
USE(required);
USE(log_if_missing);
return true;
#else
CPUFeatures cpu = CPUFeatures::InferFromOS();
// If InferFromOS fails, assume that basic features are present.
if (cpu.HasNoFeatures()) cpu = CPUFeatures::AArch64LegacyBaseline();
VIXL_ASSERT(cpu.Has(kInfrastructureCPUFeatures));
if (cpu.Has(required)) return true;
if (log_if_missing) {
CPUFeatures missing = required.Without(cpu);
// Note: This message needs to match REGEXP_MISSING_FEATURES from
// tools/threaded_test.py.
std::cout << "SKIPPED: Missing features: { " << missing << " }\n";
std::cout << "This test requires the following features to run its "
"generated code on this CPU: "
<< required << "\n";
}
return false;
#endif
}
// Note that the function assumes p0, p1, p2 and p3 are set to all true in b-,
// h-, s- and d-lane sizes respectively, and p4, p5 are clobbered as a temp
// predicate.
template <typename T, size_t N>
void SetFpData(MacroAssembler* masm,
int esize,
const T (&values)[N],
uint64_t lcg_mult) {
uint64_t a = 0;
uint64_t b = lcg_mult;
// Be used to populate the assigned element slots of register based on the
// type of floating point.
__ Pfalse(p5.VnB());
switch (esize) {
case kHRegSize:
a = Float16ToRawbits(Float16(1.5));
// Pick a convenient number within largest normal half-precision floating
// point.
b = Float16ToRawbits(Float16(lcg_mult % 1024));
// Step 1: Set fp16 numbers to the undefined registers.
// p4< 15:0>: 0b0101010101010101
// z{code}<127:0>: 0xHHHHHHHHHHHHHHHH
__ Zip1(p4.VnB(), p0.VnB(), p5.VnB());
break;
case kSRegSize:
a = FloatToRawbits(1.5);
b = FloatToRawbits(lcg_mult);
// Step 2: Set fp32 numbers to register on top of fp16 initialized.
// p4< 15:0>: 0b0000000100000001
// z{code}<127:0>: 0xHHHHSSSSHHHHSSSS
__ Zip1(p4.VnS(), p2.VnS(), p5.VnS());
break;
case kDRegSize:
a = DoubleToRawbits(1.5);
b = DoubleToRawbits(lcg_mult);
// Step 3: Set fp64 numbers to register on top of both fp16 and fp 32
// initialized.
// p4< 15:0>: 0b0000000000000001
// z{code}<127:0>: 0xHHHHSSSSDDDDDDDD
__ Zip1(p4.VnD(), p3.VnD(), p5.VnD());
break;
default:
VIXL_UNIMPLEMENTED();
break;
}
__ Dup(z30.WithLaneSize(esize), a);
__ Dup(z31.WithLaneSize(esize), b);
for (unsigned j = 0; j <= (kZRegMaxSize / (N * esize)); j++) {
// As floating point operations on random values have a tendency to
// converge on special-case numbers like NaNs, adopt normal floating point
// values be the seed instead.
InsrHelper(masm, z0.WithLaneSize(esize), values);
}
__ Fmla(z0.WithLaneSize(esize),
p4.Merging(),
z30.WithLaneSize(esize),
z0.WithLaneSize(esize),
z31.WithLaneSize(esize),
FastNaNPropagation);
for (unsigned i = 1; i < kNumberOfZRegisters - 1; i++) {
__ Fmla(ZRegister(i).WithLaneSize(esize),
p4.Merging(),
z30.WithLaneSize(esize),
ZRegister(i - 1).WithLaneSize(esize),
z31.WithLaneSize(esize),
FastNaNPropagation);
}
__ Fmul(z31.WithLaneSize(esize),
p4.Merging(),
z31.WithLaneSize(esize),
z30.WithLaneSize(esize),
FastNaNPropagation);
__ Fadd(z31.WithLaneSize(esize), p4.Merging(), z31.WithLaneSize(esize), 1);
}
// Set z0 - z31 to some normal floating point data.
void InitialiseRegisterFp(MacroAssembler* masm, uint64_t lcg_mult) {
// Initialise each Z registers to a mixture of fp16/32/64 values as following
// pattern:
// z0.h[0-1] = fp16, z0.s[1] = fp32, z0.d[1] = fp64 repeatedly throughout the
// register.
//
// For example:
// z{code}<2047:1920>: 0x{< fp64 >< fp32 ><fp16><fp16>}
// ...
// z{code}< 127: 0>: 0x{< fp64 >< fp32 ><fp16><fp16>}
//
// In current manner, in order to make a desired mixture, each part of
// initialization have to be called in the following order.
SetFpData(masm, kHRegSize, kInputFloat16Basic, lcg_mult);
SetFpData(masm, kSRegSize, kInputFloatBasic, lcg_mult);
SetFpData(masm, kDRegSize, kInputDoubleBasic, lcg_mult);
}
void SetInitialMachineState(MacroAssembler* masm, InputSet input_set) {
USE(input_set);
uint64_t lcg_mult = 6364136223846793005;
// Set x0 - x30 to pseudo-random data.
__ Mov(x29, 1); // LCG increment.
__ Mov(x30, lcg_mult);
__ Mov(x0, 42); // LCG seed.
__ Cmn(x0, 0); // Clear NZCV flags for later.
__ Madd(x0, x0, x30, x29); // First pseudo-random number.
// Registers 1 - 29.
for (unsigned i = 1; i < 30; i++) {
__ Madd(XRegister(i), XRegister(i - 1), x30, x29);
}
__ Mul(x30, x29, x30);
__ Add(x30, x30, 1);
// Set first four predicate registers to true for increasing lane sizes.
__ Ptrue(p0.VnB());
__ Ptrue(p1.VnH());
__ Ptrue(p2.VnS());
__ Ptrue(p3.VnD());
// Set z0 - z31 to pseudo-random data.
if (input_set == kIntInputSet) {
__ Dup(z30.VnD(), 1);
__ Dup(z31.VnD(), lcg_mult);
__ Index(z0.VnB(), -16, 13); // LCG seeds.
__ Mla(z0.VnD(), p0.Merging(), z30.VnD(), z0.VnD(), z31.VnD());
for (unsigned i = 1; i < kNumberOfZRegisters - 1; i++) {
__ Mla(ZRegister(i).VnD(),
p0.Merging(),
z30.VnD(),
ZRegister(i - 1).VnD(),
z31.VnD());
}
__ Mul(z31.VnD(), p0.Merging(), z31.VnD(), z30.VnD());
__ Add(z31.VnD(), z31.VnD(), 1);
} else {
VIXL_ASSERT(input_set == kFpInputSet);
InitialiseRegisterFp(masm, lcg_mult);
}
// Set remaining predicate registers based on earlier pseudo-random data.
for (unsigned i = 4; i < kNumberOfPRegisters; i++) {
__ Cmpge(PRegister(i).VnB(), p0.Zeroing(), ZRegister(i).VnB(), 0);
}
for (unsigned i = 4; i < kNumberOfPRegisters; i += 2) {
__ Zip1(p0.VnB(), PRegister(i).VnB(), PRegister(i + 1).VnB());
__ Zip2(PRegister(i + 1).VnB(), PRegister(i).VnB(), PRegister(i + 1).VnB());
__ Mov(PRegister(i), p0);
}
__ Ptrue(p0.VnB());
// At this point, only sp and a few status registers are undefined. These
// must be ignored when computing the state hash.
}
void ComputeMachineStateHash(MacroAssembler* masm, uint32_t* dst) {
// Use explicit registers, to avoid hash order varying if
// UseScratchRegisterScope changes.
UseScratchRegisterScope temps(masm);
temps.ExcludeAll();
Register t0 = w0;
Register t1 = x1;
// Compute hash of x0 - x30.
__ Push(t0.X(), t1);
__ Crc32x(t0, wzr, t0.X());
for (unsigned i = 0; i < kNumberOfRegisters; i++) {
if (i == xzr.GetCode()) continue; // Skip sp.
if (t0.Is(WRegister(i))) continue; // Skip t0, as it's already hashed.
__ Crc32x(t0, t0, XRegister(i));
}
// Hash the status flags.
__ Mrs(t1, NZCV);
__ Crc32x(t0, t0, t1);
// Acquire another temp, as integer registers have been hashed already.
__ Push(x30, xzr);
Register t2 = x30;
// Compute hash of all bits in z0 - z31. This implies different hashes are
// produced for machines of different vector length.
for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
__ Rdvl(t2, 1);
__ Lsr(t2, t2, 4);
Label vl_loop;
__ Bind(&vl_loop);
__ Umov(t1, VRegister(i).V2D(), 0);
__ Crc32x(t0, t0, t1);
__ Umov(t1, VRegister(i).V2D(), 1);
__ Crc32x(t0, t0, t1);
__ Ext(ZRegister(i).VnB(), ZRegister(i).VnB(), ZRegister(i).VnB(), 16);
__ Sub(t2, t2, 1);
__ Cbnz(t2, &vl_loop);
}
// Hash predicate registers. For simplicity, this writes the predicate
// registers to a zero-initialised area of stack of the maximum size required
// for P registers. It then computes a hash of that entire stack area.
unsigned p_stack_space = kNumberOfPRegisters * kPRegMaxSizeInBytes;
// Zero claimed stack area.
for (unsigned i = 0; i < p_stack_space; i += kXRegSizeInBytes * 2) {
__ Push(xzr, xzr);
}
// Store all P registers to the stack.
__ Mov(t1, sp);
for (unsigned i = 0; i < kNumberOfPRegisters; i++) {
__ Str(PRegister(i), SVEMemOperand(t1));
__ Add(t1, t1, kPRegMaxSizeInBytes);
}
// Hash the entire stack area.
for (unsigned i = 0; i < p_stack_space; i += kXRegSizeInBytes * 2) {
__ Pop(t1, t2);
__ Crc32x(t0, t0, t1);
__ Crc32x(t0, t0, t2);
}
__ Mov(t1, reinterpret_cast<uint64_t>(dst));
__ Str(t0, MemOperand(t1));
__ Pop(xzr, x30);
__ Pop(t1, t0.X());
}
} // namespace aarch64
} // namespace vixl