src/aarch32/macro-assembler-aarch32.cc - platform/external/vixl - Git at Google

 // Copyright 2017, VIXL authors
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are met:
 //
 //   * Redistributions of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //   * Redistributions in binary form must reproduce the above copyright
 //     notice, this list of conditions and the following disclaimer in the
 //     documentation and/or other materials provided with the distribution.
 //   * Neither the name of ARM Limited nor the names of its contributors may
 //     be used to endorse or promote products derived from this software
 //     without specific prior written permission.
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 // POSSIBILITY OF SUCH DAMAGE.

 #include "aarch32/macro-assembler-aarch32.h"

 #define STRINGIFY(x) #x
 #define TOSTRING(x) STRINGIFY(x)

 #define CONTEXT_SCOPE \
   ContextScope context(this, __FILE__ ":" TOSTRING(__LINE__))

 namespace vixl {
 namespace aarch32 {

 ExactAssemblyScopeWithoutPoolsCheck::ExactAssemblyScopeWithoutPoolsCheck(
     MacroAssembler* masm, size_t size, SizePolicy size_policy)
     : ExactAssemblyScope(masm,
                          size,
                          size_policy,
                          ExactAssemblyScope::kIgnorePools) {}

 void UseScratchRegisterScope::Open(MacroAssembler* masm) {
   VIXL_ASSERT(masm_ == NULL);
   VIXL_ASSERT(masm != NULL);
   masm_ = masm;

   old_available_ = masm_->GetScratchRegisterList()->GetList();
   old_available_vfp_ = masm_->GetScratchVRegisterList()->GetList();

   parent_ = masm->GetCurrentScratchRegisterScope();
   masm->SetCurrentScratchRegisterScope(this);
 }


 void UseScratchRegisterScope::Close() {
   if (masm_ != NULL) {
     // Ensure that scopes nest perfectly, and do not outlive their parents.
     // This is a run-time check because the order of destruction of objects in
     // the _same_ scope is implementation-defined, and is likely to change in
     // optimised builds.
     VIXL_CHECK(masm_->GetCurrentScratchRegisterScope() == this);
     masm_->SetCurrentScratchRegisterScope(parent_);

     masm_->GetScratchRegisterList()->SetList(old_available_);
     masm_->GetScratchVRegisterList()->SetList(old_available_vfp_);

     masm_ = NULL;
   }
 }


 bool UseScratchRegisterScope::IsAvailable(const Register& reg) const {
   VIXL_ASSERT(masm_ != NULL);
   VIXL_ASSERT(reg.IsValid());
   return masm_->GetScratchRegisterList()->Includes(reg);
 }


 bool UseScratchRegisterScope::IsAvailable(const VRegister& reg) const {
   VIXL_ASSERT(masm_ != NULL);
   VIXL_ASSERT(reg.IsValid());
   return masm_->GetScratchVRegisterList()->IncludesAllOf(reg);
 }


 Register UseScratchRegisterScope::Acquire() {
   VIXL_ASSERT(masm_ != NULL);
   Register reg = masm_->GetScratchRegisterList()->GetFirstAvailableRegister();
   VIXL_CHECK(reg.IsValid());
   masm_->GetScratchRegisterList()->Remove(reg);
   return reg;
 }


 VRegister UseScratchRegisterScope::AcquireV(unsigned size_in_bits) {
   switch (size_in_bits) {
     case kSRegSizeInBits:
       return AcquireS();
     case kDRegSizeInBits:
       return AcquireD();
     case kQRegSizeInBits:
       return AcquireQ();
     default:
       VIXL_UNREACHABLE();
       return NoVReg;
   }
 }


 QRegister UseScratchRegisterScope::AcquireQ() {
   VIXL_ASSERT(masm_ != NULL);
   QRegister reg =
       masm_->GetScratchVRegisterList()->GetFirstAvailableQRegister();
   VIXL_CHECK(reg.IsValid());
   masm_->GetScratchVRegisterList()->Remove(reg);
   return reg;
 }


 DRegister UseScratchRegisterScope::AcquireD() {
   VIXL_ASSERT(masm_ != NULL);
   DRegister reg =
       masm_->GetScratchVRegisterList()->GetFirstAvailableDRegister();
   VIXL_CHECK(reg.IsValid());
   masm_->GetScratchVRegisterList()->Remove(reg);
   return reg;
 }


 SRegister UseScratchRegisterScope::AcquireS() {
   VIXL_ASSERT(masm_ != NULL);
   SRegister reg =
       masm_->GetScratchVRegisterList()->GetFirstAvailableSRegister();
   VIXL_CHECK(reg.IsValid());
   masm_->GetScratchVRegisterList()->Remove(reg);
   return reg;
 }


 void UseScratchRegisterScope::Release(const Register& reg) {
   VIXL_ASSERT(masm_ != NULL);
   VIXL_ASSERT(reg.IsValid());
   VIXL_ASSERT(!masm_->GetScratchRegisterList()->Includes(reg));
   masm_->GetScratchRegisterList()->Combine(reg);
 }


 void UseScratchRegisterScope::Release(const VRegister& reg) {
   VIXL_ASSERT(masm_ != NULL);
   VIXL_ASSERT(reg.IsValid());
   VIXL_ASSERT(!masm_->GetScratchVRegisterList()->IncludesAliasOf(reg));
   masm_->GetScratchVRegisterList()->Combine(reg);
 }


 void UseScratchRegisterScope::Include(const RegisterList& list) {
   VIXL_ASSERT(masm_ != NULL);
   RegisterList excluded_registers(sp, lr, pc);
   uint32_t mask = list.GetList() & ~excluded_registers.GetList();
   RegisterList* available = masm_->GetScratchRegisterList();
   available->SetList(available->GetList() | mask);
 }


 void UseScratchRegisterScope::Include(const VRegisterList& list) {
   VIXL_ASSERT(masm_ != NULL);
   VRegisterList* available = masm_->GetScratchVRegisterList();
   available->SetList(available->GetList() | list.GetList());
 }


 void UseScratchRegisterScope::Exclude(const RegisterList& list) {
   VIXL_ASSERT(masm_ != NULL);
   RegisterList* available = masm_->GetScratchRegisterList();
   available->SetList(available->GetList() & ~list.GetList());
 }


 void UseScratchRegisterScope::Exclude(const VRegisterList& list) {
   VIXL_ASSERT(masm_ != NULL);
   VRegisterList* available = masm_->GetScratchVRegisterList();
   available->SetList(available->GetList() & ~list.GetList());
 }


 void UseScratchRegisterScope::Exclude(const Operand& operand) {
   if (operand.IsImmediateShiftedRegister()) {
     Exclude(operand.GetBaseRegister());
   } else if (operand.IsRegisterShiftedRegister()) {
     Exclude(operand.GetBaseRegister(), operand.GetShiftRegister());
   } else {
     VIXL_ASSERT(operand.IsImmediate());
   }
 }


 void UseScratchRegisterScope::ExcludeAll() {
   VIXL_ASSERT(masm_ != NULL);
   masm_->GetScratchRegisterList()->SetList(0);
   masm_->GetScratchVRegisterList()->SetList(0);
 }


 void MacroAssembler::EnsureEmitPoolsFor(size_t size_arg) {
   // We skip the check when the pools are blocked.
   if (ArePoolsBlocked()) return;

   VIXL_ASSERT(IsUint32(size_arg));
   uint32_t size = static_cast<uint32_t>(size_arg);

   if (pool_manager_.MustEmit(GetCursorOffset(), size)) {
     int32_t new_pc = pool_manager_.Emit(this, GetCursorOffset(), size);
     VIXL_ASSERT(new_pc == GetCursorOffset());
     USE(new_pc);
   }
 }


 void MacroAssembler::HandleOutOfBoundsImmediate(Condition cond,
                                                 Register tmp,
                                                 uint32_t imm) {
   if (IsUintN(16, imm)) {
     CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
     mov(cond, tmp, imm & 0xffff);
     return;
   }
   if (IsUsingT32()) {
     if (ImmediateT32::IsImmediateT32(~imm)) {
       CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
       mvn(cond, tmp, ~imm);
       return;
     }
   } else {
     if (ImmediateA32::IsImmediateA32(~imm)) {
       CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
       mvn(cond, tmp, ~imm);
       return;
     }
   }
   CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
   mov(cond, tmp, imm & 0xffff);
   movt(cond, tmp, imm >> 16);
 }


 MemOperand MacroAssembler::MemOperandComputationHelper(
     Condition cond,
     Register scratch,
     Register base,
     uint32_t offset,
     uint32_t extra_offset_mask) {
   VIXL_ASSERT(!AliasesAvailableScratchRegister(scratch));
   VIXL_ASSERT(!AliasesAvailableScratchRegister(base));
   VIXL_ASSERT(allow_macro_instructions_);
   VIXL_ASSERT(OutsideITBlock());

   // Check for the simple pass-through case.
   if ((offset & extra_offset_mask) == offset) return MemOperand(base, offset);

   MacroEmissionCheckScope guard(this);
   ITScope it_scope(this, &cond, guard);

   uint32_t load_store_offset = offset & extra_offset_mask;
   uint32_t add_offset = offset & ~extra_offset_mask;
   if ((add_offset != 0) && (IsModifiedImmediate(offset) ||
                             IsModifiedImmediate(UnsignedNegate(offset)))) {
     load_store_offset = 0;
     add_offset = offset;
   }

   if (base.IsPC()) {
     // Special handling for PC bases. We must read the PC in the first
     // instruction (and only in that instruction), and we must also take care to
     // keep the same address calculation as loads and stores. For T32, that
     // means using something like ADR, which uses AlignDown(PC, 4).

     // We don't handle positive offsets from PC because the intention is not
     // clear; does the user expect the offset from the current
     // GetCursorOffset(), or to allow a certain amount of space after the
     // instruction?
     VIXL_ASSERT((offset & 0x80000000) != 0);
     if (IsUsingT32()) {
       // T32: make the first instruction "SUB (immediate, from PC)" -- an alias
       // of ADR -- to get behaviour like loads and stores. This ADR can handle
       // at least as much offset as the load_store_offset so it can replace it.

       uint32_t sub_pc_offset = UnsignedNegate(offset) & 0xfff;
       load_store_offset = (offset + sub_pc_offset) & extra_offset_mask;
       add_offset = (offset + sub_pc_offset) & ~extra_offset_mask;

       ExactAssemblyScope scope(this, k32BitT32InstructionSizeInBytes);
       sub(cond, scratch, base, sub_pc_offset);

       if (add_offset == 0) return MemOperand(scratch, load_store_offset);

       // The rest of the offset can be generated in the usual way.
       base = scratch;
     }
     // A32 can use any SUB instruction, so we don't have to do anything special
     // here except to ensure that we read the PC first.
   }

   add(cond, scratch, base, add_offset);
   return MemOperand(scratch, load_store_offset);
 }


 uint32_t MacroAssembler::GetOffsetMask(InstructionType type,
                                        AddrMode addrmode) {
   switch (type) {
     case kLdr:
     case kLdrb:
     case kStr:
     case kStrb:
       if (IsUsingA32() || (addrmode == Offset)) {
         return 0xfff;
       } else {
         return 0xff;
       }
     case kLdrsb:
     case kLdrh:
     case kLdrsh:
     case kStrh:
       if (IsUsingT32() && (addrmode == Offset)) {
         return 0xfff;
       } else {
         return 0xff;
       }
     case kVldr:
     case kVstr:
       return 0x3fc;
     case kLdrd:
     case kStrd:
       if (IsUsingA32()) {
         return 0xff;
       } else {
         return 0x3fc;
       }
     default:
       VIXL_UNREACHABLE();
       return 0;
   }
 }


 HARDFLOAT void PrintfTrampolineRRRR(
     const char* format, uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
   printf(format, a, b, c, d);
 }


 HARDFLOAT void PrintfTrampolineRRRD(
     const char* format, uint32_t a, uint32_t b, uint32_t c, double d) {
   printf(format, a, b, c, d);
 }


 HARDFLOAT void PrintfTrampolineRRDR(
     const char* format, uint32_t a, uint32_t b, double c, uint32_t d) {
   printf(format, a, b, c, d);
 }


 HARDFLOAT void PrintfTrampolineRRDD(
     const char* format, uint32_t a, uint32_t b, double c, double d) {
   printf(format, a, b, c, d);
 }


 HARDFLOAT void PrintfTrampolineRDRR(
     const char* format, uint32_t a, double b, uint32_t c, uint32_t d) {
   printf(format, a, b, c, d);
 }


 HARDFLOAT void PrintfTrampolineRDRD(
     const char* format, uint32_t a, double b, uint32_t c, double d) {
   printf(format, a, b, c, d);
 }


 HARDFLOAT void PrintfTrampolineRDDR(
     const char* format, uint32_t a, double b, double c, uint32_t d) {
   printf(format, a, b, c, d);
 }


 HARDFLOAT void PrintfTrampolineRDDD(
     const char* format, uint32_t a, double b, double c, double d) {
   printf(format, a, b, c, d);
 }


 HARDFLOAT void PrintfTrampolineDRRR(
     const char* format, double a, uint32_t b, uint32_t c, uint32_t d) {
   printf(format, a, b, c, d);
 }


 HARDFLOAT void PrintfTrampolineDRRD(
     const char* format, double a, uint32_t b, uint32_t c, double d) {
   printf(format, a, b, c, d);
 }


 HARDFLOAT void PrintfTrampolineDRDR(
     const char* format, double a, uint32_t b, double c, uint32_t d) {
   printf(format, a, b, c, d);
 }


 HARDFLOAT void PrintfTrampolineDRDD(
     const char* format, double a, uint32_t b, double c, double d) {
   printf(format, a, b, c, d);
 }


 HARDFLOAT void PrintfTrampolineDDRR(
     const char* format, double a, double b, uint32_t c, uint32_t d) {
   printf(format, a, b, c, d);
 }


 HARDFLOAT void PrintfTrampolineDDRD(
     const char* format, double a, double b, uint32_t c, double d) {
   printf(format, a, b, c, d);
 }


 HARDFLOAT void PrintfTrampolineDDDR(
     const char* format, double a, double b, double c, uint32_t d) {
   printf(format, a, b, c, d);
 }


 HARDFLOAT void PrintfTrampolineDDDD(
     const char* format, double a, double b, double c, double d) {
   printf(format, a, b, c, d);
 }


 void MacroAssembler::Printf(const char* format,
                             CPURegister reg1,
                             CPURegister reg2,
                             CPURegister reg3,
                             CPURegister reg4) {
   // Exclude all registers from the available scratch registers, so
   // that we are able to use ip below.
   // TODO: Refactor this function to use UseScratchRegisterScope
   // for temporary registers below.
   UseScratchRegisterScope scratch(this);
   scratch.ExcludeAll();
   if (generate_simulator_code_) {
     PushRegister(reg4);
     PushRegister(reg3);
     PushRegister(reg2);
     PushRegister(reg1);
     Push(RegisterList(r0, r1));
     StringLiteral* format_literal =
         new StringLiteral(format, RawLiteral::kDeletedOnPlacementByPool);
     Adr(r0, format_literal);
     uint32_t args = (reg4.GetType() << 12) | (reg3.GetType() << 8) |
                     (reg2.GetType() << 4) | reg1.GetType();
     Mov(r1, args);
     Hvc(kPrintfCode);
     Pop(RegisterList(r0, r1));
     int size = reg4.GetRegSizeInBytes() + reg3.GetRegSizeInBytes() +
                reg2.GetRegSizeInBytes() + reg1.GetRegSizeInBytes();
     Drop(size);
   } else {
     // Generate on a native platform => 32 bit environment.
     // Preserve core registers r0-r3, r12, r14
     const uint32_t saved_registers_mask =
         kCallerSavedRegistersMask | (1 << r5.GetCode());
     Push(RegisterList(saved_registers_mask));
     // Push VFP registers.
     Vpush(Untyped64, DRegisterList(d0, 8));
     if (Has32DRegs()) Vpush(Untyped64, DRegisterList(d16, 16));
     // Search one register which has been saved and which doesn't need to be
     // printed.
     RegisterList available_registers(kCallerSavedRegistersMask);
     if (reg1.GetType() == CPURegister::kRRegister) {
       available_registers.Remove(Register(reg1.GetCode()));
     }
     if (reg2.GetType() == CPURegister::kRRegister) {
       available_registers.Remove(Register(reg2.GetCode()));
     }
     if (reg3.GetType() == CPURegister::kRRegister) {
       available_registers.Remove(Register(reg3.GetCode()));
     }
     if (reg4.GetType() == CPURegister::kRRegister) {
       available_registers.Remove(Register(reg4.GetCode()));
     }
     Register tmp = available_registers.GetFirstAvailableRegister();
     VIXL_ASSERT(tmp.GetType() == CPURegister::kRRegister);
     // Push the flags.
     Mrs(tmp, APSR);
     Push(tmp);
     Vmrs(RegisterOrAPSR_nzcv(tmp.GetCode()), FPSCR);
     Push(tmp);
     // Push the registers to print on the stack.
     PushRegister(reg4);
     PushRegister(reg3);
     PushRegister(reg2);
     PushRegister(reg1);
     int core_count = 1;
     int vfp_count = 0;
     uint32_t printf_type = 0;
     // Pop the registers to print and store them into r1-r3 and/or d0-d3.
     // Reg4 may stay into the stack if all the register to print are core
     // registers.
     PreparePrintfArgument(reg1, &core_count, &vfp_count, &printf_type);
     PreparePrintfArgument(reg2, &core_count, &vfp_count, &printf_type);
     PreparePrintfArgument(reg3, &core_count, &vfp_count, &printf_type);
     PreparePrintfArgument(reg4, &core_count, &vfp_count, &printf_type);
     // Ensure that the stack is aligned on 8 bytes.
     And(r5, sp, 0x7);
     if (core_count == 5) {
       // One 32 bit argument (reg4) has been left on the stack =>  align the
       // stack
       // before the argument.
       Pop(r0);
       Sub(sp, sp, r5);
       Push(r0);
     } else {
       Sub(sp, sp, r5);
     }
     // Select the right trampoline depending on the arguments.
     uintptr_t address;
     switch (printf_type) {
       case 0:
         address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRRR);
         break;
       case 1:
         address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRRR);
         break;
       case 2:
         address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDRR);
         break;
       case 3:
         address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDRR);
         break;
       case 4:
         address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRDR);
         break;
       case 5:
         address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRDR);
         break;
       case 6:
         address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDDR);
         break;
       case 7:
         address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDDR);
         break;
       case 8:
         address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRRD);
         break;
       case 9:
         address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRRD);
         break;
       case 10:
         address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDRD);
         break;
       case 11:
         address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDRD);
         break;
       case 12:
         address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRDD);
         break;
       case 13:
         address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRDD);
         break;
       case 14:
         address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDDD);
         break;
       case 15:
         address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDDD);
         break;
       default:
         VIXL_UNREACHABLE();
         address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRRR);
         break;
     }
     StringLiteral* format_literal =
         new StringLiteral(format, RawLiteral::kDeletedOnPlacementByPool);
     Adr(r0, format_literal);
     Mov(ip, Operand::From(address));
     Blx(ip);
     // If register reg4 was left on the stack => skip it.
     if (core_count == 5) Drop(kRegSizeInBytes);
     // Restore the stack as it was before alignment.
     Add(sp, sp, r5);
     // Restore the flags.
     Pop(tmp);
     Vmsr(FPSCR, tmp);
     Pop(tmp);
     Msr(APSR_nzcvqg, tmp);
     // Restore the registers.
     if (Has32DRegs()) Vpop(Untyped64, DRegisterList(d16, 16));
     Vpop(Untyped64, DRegisterList(d0, 8));
     Pop(RegisterList(saved_registers_mask));
   }
 }


 void MacroAssembler::PushRegister(CPURegister reg) {
   switch (reg.GetType()) {
     case CPURegister::kNoRegister:
       break;
     case CPURegister::kRRegister:
       Push(Register(reg.GetCode()));
       break;
     case CPURegister::kSRegister:
       Vpush(Untyped32, SRegisterList(SRegister(reg.GetCode())));
       break;
     case CPURegister::kDRegister:
       Vpush(Untyped64, DRegisterList(DRegister(reg.GetCode())));
       break;
     case CPURegister::kQRegister:
       VIXL_UNIMPLEMENTED();
       break;
   }
 }


 void MacroAssembler::PreparePrintfArgument(CPURegister reg,
                                            int* core_count,
                                            int* vfp_count,
                                            uint32_t* printf_type) {
   switch (reg.GetType()) {
     case CPURegister::kNoRegister:
       break;
     case CPURegister::kRRegister:
       VIXL_ASSERT(*core_count <= 4);
       if (*core_count < 4) Pop(Register(*core_count));
       *core_count += 1;
       break;
     case CPURegister::kSRegister:
       VIXL_ASSERT(*vfp_count < 4);
       *printf_type |= 1 << (*core_count + *vfp_count - 1);
       Vpop(Untyped32, SRegisterList(SRegister(*vfp_count * 2)));
       Vcvt(F64, F32, DRegister(*vfp_count), SRegister(*vfp_count * 2));
       *vfp_count += 1;
       break;
     case CPURegister::kDRegister:
       VIXL_ASSERT(*vfp_count < 4);
       *printf_type |= 1 << (*core_count + *vfp_count - 1);
       Vpop(Untyped64, DRegisterList(DRegister(*vfp_count)));
       *vfp_count += 1;
       break;
     case CPURegister::kQRegister:
       VIXL_UNIMPLEMENTED();
       break;
   }
 }


 void MacroAssembler::Delegate(InstructionType type,
                               InstructionCondROp instruction,
                               Condition cond,
                               Register rn,
                               const Operand& operand) {
   VIXL_ASSERT((type == kMovt) || (type == kSxtb16) || (type == kTeq) ||
               (type == kUxtb16));

   if (type == kMovt) {
     VIXL_ABORT_WITH_MSG("`Movt` expects a 16-bit immediate.\n");
   }

   // This delegate only supports teq with immediates.
   CONTEXT_SCOPE;
   if ((type == kTeq) && operand.IsImmediate()) {
     UseScratchRegisterScope temps(this);
     Register scratch = temps.Acquire();
     HandleOutOfBoundsImmediate(cond, scratch, operand.GetImmediate());
     CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
     teq(cond, rn, scratch);
     return;
   }
   Assembler::Delegate(type, instruction, cond, rn, operand);
 }


 void MacroAssembler::Delegate(InstructionType type,
                               InstructionCondSizeROp instruction,
                               Condition cond,
                               EncodingSize size,
                               Register rn,
                               const Operand& operand) {
   CONTEXT_SCOPE;
   VIXL_ASSERT(size.IsBest());
   VIXL_ASSERT((type == kCmn) || (type == kCmp) || (type == kMov) ||
               (type == kMovs) || (type == kMvn) || (type == kMvns) ||
               (type == kSxtb) || (type == kSxth) || (type == kTst) ||
               (type == kUxtb) || (type == kUxth));
   if (IsUsingT32() && operand.IsRegisterShiftedRegister()) {
     VIXL_ASSERT((type != kMov) || (type != kMovs));
     InstructionCondRROp shiftop = NULL;
     switch (operand.GetShift().GetType()) {
       case LSL:
         shiftop = &Assembler::lsl;
         break;
       case LSR:
         shiftop = &Assembler::lsr;
         break;
       case ASR:
         shiftop = &Assembler::asr;
         break;
       case RRX:
         // A RegisterShiftedRegister operand cannot have a shift of type RRX.
         VIXL_UNREACHABLE();
         break;
       case ROR:
         shiftop = &Assembler::ror;
         break;
       default:
         VIXL_UNREACHABLE();
     }
     if (shiftop != NULL) {
       UseScratchRegisterScope temps(this);
       Register scratch = temps.Acquire();
       CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
       (this->*shiftop)(cond,
                        scratch,
                        operand.GetBaseRegister(),
                        operand.GetShiftRegister());
       (this->*instruction)(cond, size, rn, scratch);
       return;
     }
   }
   if (operand.IsImmediate()) {
     uint32_t imm = operand.GetImmediate();
     switch (type) {
       case kMov:
       case kMovs:
         if (!rn.IsPC()) {
           // Immediate is too large, but not using PC, so handle with mov{t}.
           HandleOutOfBoundsImmediate(cond, rn, imm);
           if (type == kMovs) {
             CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
             tst(cond, rn, rn);
           }
           return;
         } else if (type == kMov) {
           VIXL_ASSERT(IsUsingA32() || cond.Is(al));
           // Immediate is too large and using PC, so handle using a temporary
           // register.
           UseScratchRegisterScope temps(this);
           Register scratch = temps.Acquire();
           HandleOutOfBoundsImmediate(al, scratch, imm);
           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
           bx(cond, scratch);
           return;
         }
         break;
       case kCmn:
       case kCmp:
         if (IsUsingA32() || !rn.IsPC()) {
           UseScratchRegisterScope temps(this);
           Register scratch = temps.Acquire();
           HandleOutOfBoundsImmediate(cond, scratch, imm);
           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
           (this->*instruction)(cond, size, rn, scratch);
           return;
         }
         break;
       case kMvn:
       case kMvns:
         if (!rn.IsPC()) {
           UseScratchRegisterScope temps(this);
           Register scratch = temps.Acquire();
           HandleOutOfBoundsImmediate(cond, scratch, imm);
           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
           (this->*instruction)(cond, size, rn, scratch);
           return;
         }
         break;
       case kTst:
         if (IsUsingA32() || !rn.IsPC()) {
           UseScratchRegisterScope temps(this);
           Register scratch = temps.Acquire();
           HandleOutOfBoundsImmediate(cond, scratch, imm);
           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
           (this->*instruction)(cond, size, rn, scratch);
           return;
         }
         break;
       default:  // kSxtb, Sxth, Uxtb, Uxth
         break;
     }
   }
   Assembler::Delegate(type, instruction, cond, size, rn, operand);
 }


 void MacroAssembler::Delegate(InstructionType type,
                               InstructionCondRROp instruction,
                               Condition cond,
                               Register rd,
                               Register rn,
                               const Operand& operand) {
   if ((type == kSxtab) || (type == kSxtab16) || (type == kSxtah) ||
       (type == kUxtab) || (type == kUxtab16) || (type == kUxtah) ||
       (type == kPkhbt) || (type == kPkhtb)) {
     UnimplementedDelegate(type);
     return;
   }

   // This delegate only handles the following instructions.
   VIXL_ASSERT((type == kOrn) || (type == kOrns) || (type == kRsc) ||
               (type == kRscs));
   CONTEXT_SCOPE;

   // T32 does not support register shifted register operands, emulate it.
   if (IsUsingT32() && operand.IsRegisterShiftedRegister()) {
     InstructionCondRROp shiftop = NULL;
     switch (operand.GetShift().GetType()) {
       case LSL:
         shiftop = &Assembler::lsl;
         break;
       case LSR:
         shiftop = &Assembler::lsr;
         break;
       case ASR:
         shiftop = &Assembler::asr;
         break;
       case RRX:
         // A RegisterShiftedRegister operand cannot have a shift of type RRX.
         VIXL_UNREACHABLE();
         break;
       case ROR:
         shiftop = &Assembler::ror;
         break;
       default:
         VIXL_UNREACHABLE();
     }
     if (shiftop != NULL) {
       UseScratchRegisterScope temps(this);
       Register rm = operand.GetBaseRegister();
       Register rs = operand.GetShiftRegister();
       // Try to use rd as a scratch register. We can do this if it aliases rs or
       // rm (because we read them in the first instruction), but not rn.
       if (!rd.Is(rn)) temps.Include(rd);
       Register scratch = temps.Acquire();
       // TODO: The scope length was measured empirically. We should analyse the
       // worst-case size and add targetted tests.
       CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
       (this->*shiftop)(cond, scratch, rm, rs);
       (this->*instruction)(cond, rd, rn, scratch);
       return;
     }
   }

   // T32 does not have a Rsc instruction, negate the lhs input and turn it into
   // an Adc. Adc and Rsc are equivalent using a bitwise NOT:
   //   adc rd, rn, operand <-> rsc rd, NOT(rn), operand
   if (IsUsingT32() && ((type == kRsc) || (type == kRscs))) {
     // The RegisterShiftRegister case should have been handled above.
     VIXL_ASSERT(!operand.IsRegisterShiftedRegister());
     UseScratchRegisterScope temps(this);
     // Try to use rd as a scratch register. We can do this if it aliases rn
     // (because we read it in the first instruction), but not rm.
     temps.Include(rd);
     temps.Exclude(operand);
     Register negated_rn = temps.Acquire();
     {
       CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
       mvn(cond, negated_rn, rn);
     }
     if (type == kRsc) {
       CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
       adc(cond, rd, negated_rn, operand);
       return;
     }
     // TODO: We shouldn't have to specify how much space the next instruction
     // needs.
     CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
     adcs(cond, rd, negated_rn, operand);
     return;
   }

   if (operand.IsImmediate()) {
     // If the immediate can be encoded when inverted, turn Orn into Orr.
     // Otherwise rely on HandleOutOfBoundsImmediate to generate a series of
     // mov.
     int32_t imm = operand.GetSignedImmediate();
     if (((type == kOrn) || (type == kOrns)) && IsModifiedImmediate(~imm)) {
       CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
       switch (type) {
         case kOrn:
           orr(cond, rd, rn, ~imm);
           return;
         case kOrns:
           orrs(cond, rd, rn, ~imm);
           return;
         default:
           VIXL_UNREACHABLE();
           break;
       }
     }
   }

   // A32 does not have a Orn instruction, negate the rhs input and turn it into
   // a Orr.
   if (IsUsingA32() && ((type == kOrn) || (type == kOrns))) {
     // TODO: orn r0, r1, imm -> orr r0, r1, neg(imm) if doable
     //  mvn r0, r2
     //  orr r0, r1, r0
     Register scratch;
     UseScratchRegisterScope temps(this);
     // Try to use rd as a scratch register. We can do this if it aliases rs or
     // rm (because we read them in the first instruction), but not rn.
     if (!rd.Is(rn)) temps.Include(rd);
     scratch = temps.Acquire();
     {
       // TODO: We shouldn't have to specify how much space the next instruction
       // needs.
       CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
       mvn(cond, scratch, operand);
     }
     if (type == kOrns) {
       CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
       orrs(cond, rd, rn, scratch);
       return;
     }
     CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
     orr(cond, rd, rn, scratch);
     return;
   }

   if (operand.IsImmediate()) {
     UseScratchRegisterScope temps(this);
     // Allow using the destination as a scratch register if possible.
     if (!rd.Is(rn)) temps.Include(rd);
     Register scratch = temps.Acquire();
     int32_t imm = operand.GetSignedImmediate();
     HandleOutOfBoundsImmediate(cond, scratch, imm);
     CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
     (this->*instruction)(cond, rd, rn, scratch);
     return;
   }
   Assembler::Delegate(type, instruction, cond, rd, rn, operand);
 }


 void MacroAssembler::Delegate(InstructionType type,
                               InstructionCondSizeRL instruction,
                               Condition cond,
                               EncodingSize size,
                               Register rd,
                               Location* location) {
   VIXL_ASSERT((type == kLdr) || (type == kAdr));

   CONTEXT_SCOPE;
   VIXL_ASSERT(size.IsBest());

   if ((type == kLdr) && location->IsBound()) {
     CodeBufferCheckScope scope(this, 5 * kMaxInstructionSizeInBytes);
     UseScratchRegisterScope temps(this);
     temps.Include(rd);
     uint32_t mask = GetOffsetMask(type, Offset);
     ldr(rd, MemOperandComputationHelper(cond, temps.Acquire(), location, mask));
     return;
   }

   Assembler::Delegate(type, instruction, cond, size, rd, location);
 }


 bool MacroAssembler::GenerateSplitInstruction(
     InstructionCondSizeRROp instruction,
     Condition cond,
     Register rd,
     Register rn,
     uint32_t imm,
     uint32_t mask) {
   uint32_t high = imm & ~mask;
   if (!IsModifiedImmediate(high) && !rn.IsPC()) return false;
   // If high is a modified immediate, we can perform the operation with
   // only 2 instructions.
   // Else, if rn is PC, we want to avoid moving PC into a temporary.
   // Therefore, we also use the pattern even if the second call may
   // generate 3 instructions.
   uint32_t low = imm & mask;
   CodeBufferCheckScope scope(this,
                              (rn.IsPC() ? 4 : 2) * kMaxInstructionSizeInBytes);
   (this->*instruction)(cond, Best, rd, rn, low);
   (this->*instruction)(cond, Best, rd, rd, high);
   return true;
 }


 void MacroAssembler::Delegate(InstructionType type,
                               InstructionCondSizeRROp instruction,
                               Condition cond,
                               EncodingSize size,
                               Register rd,
                               Register rn,
                               const Operand& operand) {
   VIXL_ASSERT(
       (type == kAdc) || (type == kAdcs) || (type == kAdd) || (type == kAdds) ||
       (type == kAnd) || (type == kAnds) || (type == kAsr) || (type == kAsrs) ||
       (type == kBic) || (type == kBics) || (type == kEor) || (type == kEors) ||
       (type == kLsl) || (type == kLsls) || (type == kLsr) || (type == kLsrs) ||
       (type == kOrr) || (type == kOrrs) || (type == kRor) || (type == kRors) ||
       (type == kRsb) || (type == kRsbs) || (type == kSbc) || (type == kSbcs) ||
       (type == kSub) || (type == kSubs));

   CONTEXT_SCOPE;
   VIXL_ASSERT(size.IsBest());
   if (IsUsingT32() && operand.IsRegisterShiftedRegister()) {
     InstructionCondRROp shiftop = NULL;
     switch (operand.GetShift().GetType()) {
       case LSL:
         shiftop = &Assembler::lsl;
         break;
       case LSR:
         shiftop = &Assembler::lsr;
         break;
       case ASR:
         shiftop = &Assembler::asr;
         break;
       case RRX:
         // A RegisterShiftedRegister operand cannot have a shift of type RRX.
         VIXL_UNREACHABLE();
         break;
       case ROR:
         shiftop = &Assembler::ror;
         break;
       default:
         VIXL_UNREACHABLE();
     }
     if (shiftop != NULL) {
       UseScratchRegisterScope temps(this);
       Register rm = operand.GetBaseRegister();
       Register rs = operand.GetShiftRegister();
       // Try to use rd as a scratch register. We can do this if it aliases rs or
       // rm (because we read them in the first instruction), but not rn.
       if (!rd.Is(rn)) temps.Include(rd);
       Register scratch = temps.Acquire();
       CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
       (this->*shiftop)(cond, scratch, rm, rs);
       (this->*instruction)(cond, size, rd, rn, scratch);
       return;
     }
   }
   if (operand.IsImmediate()) {
     int32_t imm = operand.GetSignedImmediate();
     if (ImmediateT32::IsImmediateT32(~imm)) {
       if (IsUsingT32()) {
         switch (type) {
           case kOrr:
             orn(cond, rd, rn, ~imm);
             return;
           case kOrrs:
             orns(cond, rd, rn, ~imm);
             return;
           default:
             break;
         }
       }
     }
     if (imm < 0) {
       InstructionCondSizeRROp asmcb = NULL;
       // Add and sub are equivalent using an arithmetic negation:
       //   add rd, rn, #imm <-> sub rd, rn, - #imm
       // Add and sub with carry are equivalent using a bitwise NOT:
       //   adc rd, rn, #imm <-> sbc rd, rn, NOT #imm
       switch (type) {
         case kAdd:
           asmcb = &Assembler::sub;
           imm = -imm;
           break;
         case kAdds:
           asmcb = &Assembler::subs;
           imm = -imm;
           break;
         case kSub:
           asmcb = &Assembler::add;
           imm = -imm;
           break;
         case kSubs:
           asmcb = &Assembler::adds;
           imm = -imm;
           break;
         case kAdc:
           asmcb = &Assembler::sbc;
           imm = ~imm;
           break;
         case kAdcs:
           asmcb = &Assembler::sbcs;
           imm = ~imm;
           break;
         case kSbc:
           asmcb = &Assembler::adc;
           imm = ~imm;
           break;
         case kSbcs:
           asmcb = &Assembler::adcs;
           imm = ~imm;
           break;
         default:
           break;
       }
       if (asmcb != NULL) {
         CodeBufferCheckScope scope(this, 4 * kMaxInstructionSizeInBytes);
         (this->*asmcb)(cond, size, rd, rn, Operand(imm));
         return;
       }
     }

     // When rn is PC, only handle negative offsets. The correct way to handle
     // positive offsets isn't clear; does the user want the offset from the
     // start of the macro, or from the end (to allow a certain amount of space)?
     // When type is Add or Sub, imm is always positive (imm < 0 has just been
     // handled and imm == 0 would have been generated without the need of a
     // delegate). Therefore, only add to PC is forbidden here.
     if ((((type == kAdd) && !rn.IsPC()) || (type == kSub)) &&
         (IsUsingA32() || (!rd.IsPC() && !rn.IsPC()))) {
       VIXL_ASSERT(imm > 0);
       // Try to break the constant into two modified immediates.
       // For T32 also try to break the constant into one imm12 and one modified
       // immediate. Count the trailing zeroes and get the biggest even value.
       int trailing_zeroes = CountTrailingZeros(imm) & ~1u;
       uint32_t mask = ((trailing_zeroes < 4) && IsUsingT32())
                           ? 0xfff
                           : (0xff << trailing_zeroes);
       if (GenerateSplitInstruction(instruction, cond, rd, rn, imm, mask)) {
         return;
       }
       InstructionCondSizeRROp asmcb = NULL;
       switch (type) {
         case kAdd:
           asmcb = &Assembler::sub;
           break;
         case kSub:
           asmcb = &Assembler::add;
           break;
         default:
           VIXL_UNREACHABLE();
       }
       if (GenerateSplitInstruction(asmcb, cond, rd, rn, -imm, mask)) {
         return;
       }
     }

     UseScratchRegisterScope temps(this);
     // Allow using the destination as a scratch register if possible.
     if (!rd.Is(rn)) temps.Include(rd);
     if (rn.IsPC()) {
       // If we're reading the PC, we need to do it in the first instruction,
       // otherwise we'll read the wrong value. We rely on this to handle the
       // long-range PC-relative MemOperands which can result from user-managed
       // literals.

       // Only handle negative offsets. The correct way to handle positive
       // offsets isn't clear; does the user want the offset from the start of
       // the macro, or from the end (to allow a certain amount of space)?
       bool offset_is_negative_or_zero = (imm <= 0);
       switch (type) {
         case kAdd:
         case kAdds:
           offset_is_negative_or_zero = (imm <= 0);
           break;
         case kSub:
         case kSubs:
           offset_is_negative_or_zero = (imm >= 0);
           break;
         case kAdc:
         case kAdcs:
           offset_is_negative_or_zero = (imm < 0);
           break;
         case kSbc:
         case kSbcs:
           offset_is_negative_or_zero = (imm > 0);
           break;
         default:
           break;
       }
       if (offset_is_negative_or_zero) {
         {
           rn = temps.Acquire();
           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
           mov(cond, rn, pc);
         }
         // Recurse rather than falling through, to try to get the immediate into
         // a single instruction.
         CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
         (this->*instruction)(cond, size, rd, rn, operand);
         return;
       }
     } else {
       Register scratch = temps.Acquire();
       // TODO: The scope length was measured empirically. We should analyse the
       // worst-case size and add targetted tests.
       CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
       mov(cond, scratch, operand.GetImmediate());
       (this->*instruction)(cond, size, rd, rn, scratch);
       return;
     }
   }
   Assembler::Delegate(type, instruction, cond, size, rd, rn, operand);
 }


 void MacroAssembler::Delegate(InstructionType type,
                               InstructionRL instruction,
                               Register rn,
                               Location* location) {
   VIXL_ASSERT((type == kCbz) || (type == kCbnz));

   CONTEXT_SCOPE;
   CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
   if (IsUsingA32()) {
     if (type == kCbz) {
       VIXL_ABORT_WITH_MSG("Cbz is only available for T32.\n");
     } else {
       VIXL_ABORT_WITH_MSG("Cbnz is only available for T32.\n");
     }
   } else if (rn.IsLow()) {
     switch (type) {
       case kCbnz: {
         Label done;
         cbz(rn, &done);
         b(location);
         Bind(&done);
         return;
       }
       case kCbz: {
         Label done;
         cbnz(rn, &done);
         b(location);
         Bind(&done);
         return;
       }
       default:
         break;
     }
   }
   Assembler::Delegate(type, instruction, rn, location);
 }


 void MacroAssembler::Delegate(InstructionType type,
                               InstructionCondSizeL instruction,
                               Condition cond,
                               EncodingSize size,
                               Location* location) {
   VIXL_ASSERT(type == kB);

   CONTEXT_SCOPE;

   // Apply veneer to increase range of backwards conditional branches.
   // This replaces:
   //   label:
   //    <instructions>
   //    bcond label   ; T3
   // With:
   //   label:
   //    <instructions>
   //    binvcond skip ; T1
   //    b label       ; T4
   //   skip:
   Location::Offset offset = location->GetLocation() -
     (GetCursorOffset() + GetArchitectureStatePCOffset());
   if (IsUsingT32() && location->IsBound() && ((offset & 0x1) == 0) &&
       !cond.Is(al) && cond.IsNotNever()) {
     // Bound locations must be earlier in the code.
     VIXL_ASSERT(offset < 0);

     // The offset must be within range of a T4 branch, accounting for the
     // conditional branch (T1) we emit first, in order to jump over it.
     offset -= k16BitT32InstructionSizeInBytes;
     if (offset >= -16777216) {
       CodeBufferCheckScope scope(this, k16BitT32InstructionSizeInBytes +
                                        k32BitT32InstructionSizeInBytes);
       Label skip;
       b(cond.Negate(), Narrow, &skip);
       b(location);
       Bind(&skip);
       return;
     } else {
       VIXL_ABORT_WITH_MSG("Conditional branch too far for veneer.\n");
     }
   }

   Assembler::Delegate(type, instruction, cond, size, location);
 }


 template <typename T>
 static inline bool IsI64BitPattern(T imm) {
   for (T mask = 0xff << ((sizeof(T) - 1) * 8); mask != 0; mask >>= 8) {
     if (((imm & mask) != mask) && ((imm & mask) != 0)) return false;
   }
   return true;
 }


 template <typename T>
 static inline bool IsI8BitPattern(T imm) {
   uint8_t imm8 = imm & 0xff;
   for (unsigned rep = sizeof(T) - 1; rep > 0; rep--) {
     imm >>= 8;
     if ((imm & 0xff) != imm8) return false;
   }
   return true;
 }


 static inline bool CanBeInverted(uint32_t imm32) {
   uint32_t fill8 = 0;

   if ((imm32 & 0xffffff00) == 0xffffff00) {
     //    11111111 11111111 11111111 abcdefgh
     return true;
   }
   if (((imm32 & 0xff) == 0) || ((imm32 & 0xff) == 0xff)) {
     fill8 = imm32 & 0xff;
     imm32 >>= 8;
     if ((imm32 >> 8) == 0xffff) {
       //    11111111 11111111 abcdefgh 00000000
       // or 11111111 11111111 abcdefgh 11111111
       return true;
     }
     if ((imm32 & 0xff) == fill8) {
       imm32 >>= 8;
       if ((imm32 >> 8) == 0xff) {
         //    11111111 abcdefgh 00000000 00000000
         // or 11111111 abcdefgh 11111111 11111111
         return true;
       }
       if ((fill8 == 0xff) && ((imm32 & 0xff) == 0xff)) {
         //    abcdefgh 11111111 11111111 11111111
         return true;
       }
     }
   }
   return false;
 }


 template <typename RES, typename T>
 static inline RES replicate(T imm) {
   VIXL_ASSERT((sizeof(RES) > sizeof(T)) &&
               (((sizeof(RES) / sizeof(T)) * sizeof(T)) == sizeof(RES)));
   RES res = imm;
   for (unsigned i = sizeof(RES) / sizeof(T) - 1; i > 0; i--) {
     res = (res << (sizeof(T) * 8)) | imm;
   }
   return res;
 }


 void MacroAssembler::Delegate(InstructionType type,
                               InstructionCondDtSSop instruction,
                               Condition cond,
                               DataType dt,
                               SRegister rd,
                               const SOperand& operand) {
   CONTEXT_SCOPE;
   if (type == kVmov) {
     if (operand.IsImmediate() && dt.Is(F32)) {
       const NeonImmediate& neon_imm = operand.GetNeonImmediate();
       if (neon_imm.CanConvert<float>()) {
         // movw ip, imm16
         // movk ip, imm16
         // vmov s0, ip
         UseScratchRegisterScope temps(this);
         Register scratch = temps.Acquire();
         float f = neon_imm.GetImmediate<float>();
         // TODO: The scope length was measured empirically. We should analyse
         // the
         // worst-case size and add targetted tests.
         CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
         mov(cond, scratch, FloatToRawbits(f));
         vmov(cond, rd, scratch);
         return;
       }
     }
   }
   Assembler::Delegate(type, instruction, cond, dt, rd, operand);
 }


 void MacroAssembler::Delegate(InstructionType type,
                               InstructionCondDtDDop instruction,
                               Condition cond,
                               DataType dt,
                               DRegister rd,
                               const DOperand& operand) {
   CONTEXT_SCOPE;
   if (type == kVmov) {
     if (operand.IsImmediate()) {
       const NeonImmediate& neon_imm = operand.GetNeonImmediate();
       switch (dt.GetValue()) {
         case I32:
           if (neon_imm.CanConvert<uint32_t>()) {
             uint32_t imm = neon_imm.GetImmediate<uint32_t>();
             // vmov.i32 d0, 0xabababab will translate into vmov.i8 d0, 0xab
             if (IsI8BitPattern(imm)) {
               CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
               vmov(cond, I8, rd, imm & 0xff);
               return;
             }
             // vmov.i32 d0, 0xff0000ff will translate into
             // vmov.i64 d0, 0xff0000ffff0000ff
             if (IsI64BitPattern(imm)) {
               CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
               vmov(cond, I64, rd, replicate<uint64_t>(imm));
               return;
             }
             // vmov.i32 d0, 0xffab0000 will translate into
             // vmvn.i32 d0, 0x0054ffff
             if (cond.Is(al) && CanBeInverted(imm)) {
               CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
               vmvn(I32, rd, ~imm);
               return;
             }
           }
           break;
         case I16:
           if (neon_imm.CanConvert<uint16_t>()) {
             uint16_t imm = neon_imm.GetImmediate<uint16_t>();
             // vmov.i16 d0, 0xabab will translate into vmov.i8 d0, 0xab
             if (IsI8BitPattern(imm)) {
               CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
               vmov(cond, I8, rd, imm & 0xff);
               return;
             }
           }
           break;
         case I64:
           if (neon_imm.CanConvert<uint64_t>()) {
             uint64_t imm = neon_imm.GetImmediate<uint64_t>();
             // vmov.i64 d0, -1 will translate into vmov.i8 d0, 0xff
             if (IsI8BitPattern(imm)) {
               CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
               vmov(cond, I8, rd, imm & 0xff);
               return;
             }
             // mov ip, lo(imm64)
             // vdup d0, ip
             // vdup is prefered to 'vmov d0[0]' as d0[1] does not need to be
             // preserved
             {
               UseScratchRegisterScope temps(this);
               Register scratch = temps.Acquire();
               {
                 // TODO: The scope length was measured empirically. We should
                 // analyse the
                 // worst-case size and add targetted tests.
                 CodeBufferCheckScope scope(this,
                                            2 * kMaxInstructionSizeInBytes);
                 mov(cond, scratch, static_cast<uint32_t>(imm & 0xffffffff));
               }
               CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
               vdup(cond, Untyped32, rd, scratch);
             }
             // mov ip, hi(imm64)
             // vmov d0[1], ip
             {
               UseScratchRegisterScope temps(this);
               Register scratch = temps.Acquire();
               {
                 // TODO: The scope length was measured empirically. We should
                 // analyse the
                 // worst-case size and add targetted tests.
                 CodeBufferCheckScope scope(this,
                                            2 * kMaxInstructionSizeInBytes);
                 mov(cond, scratch, static_cast<uint32_t>(imm >> 32));
               }
               CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
               vmov(cond, Untyped32, DRegisterLane(rd, 1), scratch);
             }
             return;
           }
           break;
         default:
           break;
       }
       VIXL_ASSERT(!dt.Is(I8));  // I8 cases should have been handled already.
       if ((dt.Is(I16) || dt.Is(I32)) && neon_imm.CanConvert<uint32_t>()) {
         // mov ip, imm32
         // vdup.16 d0, ip
         UseScratchRegisterScope temps(this);
         Register scratch = temps.Acquire();
         {
           CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
           mov(cond, scratch, neon_imm.GetImmediate<uint32_t>());
         }
         DataTypeValue vdup_dt = Untyped32;
         switch (dt.GetValue()) {
           case I16:
             vdup_dt = Untyped16;
             break;
           case I32:
             vdup_dt = Untyped32;
             break;
           default:
             VIXL_UNREACHABLE();
         }
         CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
         vdup(cond, vdup_dt, rd, scratch);
         return;
       }
       if (dt.Is(F32) && neon_imm.CanConvert<float>()) {
         float f = neon_imm.GetImmediate<float>();
         // Punt to vmov.i32
         // TODO: The scope length was guessed based on the double case below. We
         // should analyse the worst-case size and add targetted tests.
         CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
         vmov(cond, I32, rd, FloatToRawbits(f));
         return;
       }
       if (dt.Is(F64) && neon_imm.CanConvert<double>()) {
         // Punt to vmov.i64
         double d = neon_imm.GetImmediate<double>();
         // TODO: The scope length was measured empirically. We should analyse
         // the
         // worst-case size and add targetted tests.
         CodeBufferCheckScope scope(this, 6 * kMaxInstructionSizeInBytes);
         vmov(cond, I64, rd, DoubleToRawbits(d));
         return;
       }
     }
   }
   Assembler::Delegate(type, instruction, cond, dt, rd, operand);
 }


 void MacroAssembler::Delegate(InstructionType type,
                               InstructionCondDtQQop instruction,
                               Condition cond,
                               DataType dt,
                               QRegister rd,
                               const QOperand& operand) {
   CONTEXT_SCOPE;
   if (type == kVmov) {
     if (operand.IsImmediate()) {
       const NeonImmediate& neon_imm = operand.GetNeonImmediate();
       switch (dt.GetValue()) {
         case I32:
           if (neon_imm.CanConvert<uint32_t>()) {
             uint32_t imm = neon_imm.GetImmediate<uint32_t>();
             // vmov.i32 d0, 0xabababab will translate into vmov.i8 d0, 0xab
             if (IsI8BitPattern(imm)) {
               CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
               vmov(cond, I8, rd, imm & 0xff);
               return;
             }
             // vmov.i32 d0, 0xff0000ff will translate into
             // vmov.i64 d0, 0xff0000ffff0000ff
             if (IsI64BitPattern(imm)) {
               CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
               vmov(cond, I64, rd, replicate<uint64_t>(imm));
               return;
             }
             // vmov.i32 d0, 0xffab0000 will translate into
             // vmvn.i32 d0, 0x0054ffff
             if (CanBeInverted(imm)) {
               CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
               vmvn(cond, I32, rd, ~imm);
               return;
             }
           }
           break;
         case I16:
           if (neon_imm.CanConvert<uint16_t>()) {
             uint16_t imm = neon_imm.GetImmediate<uint16_t>();
             // vmov.i16 d0, 0xabab will translate into vmov.i8 d0, 0xab
             if (IsI8BitPattern(imm)) {
               CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
               vmov(cond, I8, rd, imm & 0xff);
               return;
             }
           }
           break;
         case I64:
           if (neon_imm.CanConvert<uint64_t>()) {
             uint64_t imm = neon_imm.GetImmediate<uint64_t>();
             // vmov.i64 d0, -1 will translate into vmov.i8 d0, 0xff
             if (IsI8BitPattern(imm)) {
               CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
               vmov(cond, I8, rd, imm & 0xff);
               return;
             }
             // mov ip, lo(imm64)
             // vdup q0, ip
             // vdup is prefered to 'vmov d0[0]' as d0[1-3] don't need to be
             // preserved
             {
               UseScratchRegisterScope temps(this);
               Register scratch = temps.Acquire();
               {
                 CodeBufferCheckScope scope(this,
                                            2 * kMaxInstructionSizeInBytes);
                 mov(cond, scratch, static_cast<uint32_t>(imm & 0xffffffff));
               }
               CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
               vdup(cond, Untyped32, rd, scratch);
             }
             // mov ip, hi(imm64)
             // vmov.i32 d0[1], ip
             // vmov d1, d0
             {
               UseScratchRegisterScope temps(this);
               Register scratch = temps.Acquire();
               {
                 CodeBufferCheckScope scope(this,
                                            2 * kMaxInstructionSizeInBytes);
                 mov(cond, scratch, static_cast<uint32_t>(imm >> 32));
               }
               {
                 CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
                 vmov(cond,
                      Untyped32,
                      DRegisterLane(rd.GetLowDRegister(), 1),
                      scratch);
               }
               CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
               vmov(cond, F64, rd.GetHighDRegister(), rd.GetLowDRegister());
             }
             return;
           }
           break;
         default:
           break;
       }
       VIXL_ASSERT(!dt.Is(I8));  // I8 cases should have been handled already.
       if ((dt.Is(I16) || dt.Is(I32)) && neon_imm.CanConvert<uint32_t>()) {
         // mov ip, imm32
         // vdup.16 d0, ip
         UseScratchRegisterScope temps(this);
         Register scratch = temps.Acquire();
         {
           CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
           mov(cond, scratch, neon_imm.GetImmediate<uint32_t>());
         }
         DataTypeValue vdup_dt = Untyped32;
         switch (dt.GetValue()) {
           case I16:
             vdup_dt = Untyped16;
             break;
           case I32:
             vdup_dt = Untyped32;
             break;
           default:
             VIXL_UNREACHABLE();
         }
         CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
         vdup(cond, vdup_dt, rd, scratch);
         return;
       }
       if (dt.Is(F32) && neon_imm.CanConvert<float>()) {
         // Punt to vmov.i64
         float f = neon_imm.GetImmediate<float>();
         CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
         vmov(cond, I32, rd, FloatToRawbits(f));
         return;
       }
       if (dt.Is(F64) && neon_imm.CanConvert<double>()) {
         // Use vmov to create the double in the low D register, then duplicate
         // it into the high D register.
         double d = neon_imm.GetImmediate<double>();
         CodeBufferCheckScope scope(this, 7 * kMaxInstructionSizeInBytes);
         vmov(cond, F64, rd.GetLowDRegister(), d);
         vmov(cond, F64, rd.GetHighDRegister(), rd.GetLowDRegister());
         return;
       }
     }
   }
   Assembler::Delegate(type, instruction, cond, dt, rd, operand);
 }


 void MacroAssembler::Delegate(InstructionType type,
                               InstructionCondRL instruction,
                               Condition cond,
                               Register rt,
                               Location* location) {
   VIXL_ASSERT((type == kLdrb) || (type == kLdrh) || (type == kLdrsb) ||
               (type == kLdrsh));

   CONTEXT_SCOPE;

   if (location->IsBound()) {
     CodeBufferCheckScope scope(this, 5 * kMaxInstructionSizeInBytes);
     UseScratchRegisterScope temps(this);
     temps.Include(rt);
     Register scratch = temps.Acquire();
     uint32_t mask = GetOffsetMask(type, Offset);
     switch (type) {
       case kLdrb:
         ldrb(rt, MemOperandComputationHelper(cond, scratch, location, mask));
         return;
       case kLdrh:
         ldrh(rt, MemOperandComputationHelper(cond, scratch, location, mask));
         return;
       case kLdrsb:
         ldrsb(rt, MemOperandComputationHelper(cond, scratch, location, mask));
         return;
       case kLdrsh:
         ldrsh(rt, MemOperandComputationHelper(cond, scratch, location, mask));
         return;
       default:
         VIXL_UNREACHABLE();
     }
     return;
   }

   Assembler::Delegate(type, instruction, cond, rt, location);
 }


 void MacroAssembler::Delegate(InstructionType type,
                               InstructionCondRRL instruction,
                               Condition cond,
                               Register rt,
                               Register rt2,
                               Location* location) {
   VIXL_ASSERT(type == kLdrd);

   CONTEXT_SCOPE;

   if (location->IsBound()) {
     CodeBufferCheckScope scope(this, 6 * kMaxInstructionSizeInBytes);
     UseScratchRegisterScope temps(this);
     temps.Include(rt, rt2);
     Register scratch = temps.Acquire();
     uint32_t mask = GetOffsetMask(type, Offset);
     ldrd(rt, rt2, MemOperandComputationHelper(cond, scratch, location, mask));
     return;
   }

   Assembler::Delegate(type, instruction, cond, rt, rt2, location);
 }


 void MacroAssembler::Delegate(InstructionType type,
                               InstructionCondSizeRMop instruction,
                               Condition cond,
                               EncodingSize size,
                               Register rd,
                               const MemOperand& operand) {
   CONTEXT_SCOPE;
   VIXL_ASSERT(size.IsBest());
   VIXL_ASSERT((type == kLdr) || (type == kLdrb) || (type == kLdrh) ||
               (type == kLdrsb) || (type == kLdrsh) || (type == kStr) ||
               (type == kStrb) || (type == kStrh));
   if (operand.IsImmediate()) {
     const Register& rn = operand.GetBaseRegister();
     AddrMode addrmode = operand.GetAddrMode();
     int32_t offset = operand.GetOffsetImmediate();
     uint32_t extra_offset_mask = GetOffsetMask(type, addrmode);
     // Try to maximize the offset used by the MemOperand (load_store_offset).
     // Add the part which can't be used by the MemOperand (add_offset).
     uint32_t load_store_offset = offset & extra_offset_mask;
     uint32_t add_offset = offset & ~extra_offset_mask;
     if ((add_offset != 0) &&
         (IsModifiedImmediate(offset) || IsModifiedImmediate(-offset))) {
       load_store_offset = 0;
       add_offset = offset;
     }
     switch (addrmode) {
       case PreIndex:
         // Avoid the unpredictable case 'str r0, [r0, imm]!'
         if (!rn.Is(rd)) {
           // Pre-Indexed case:
           // ldr r0, [r1, 12345]! will translate into
           //   add r1, r1, 12345
           //   ldr r0, [r1]
           {
             CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
             add(cond, rn, rn, add_offset);
           }
           {
             CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
             (this->*instruction)(cond,
                                  size,
                                  rd,
                                  MemOperand(rn, load_store_offset, PreIndex));
           }
           return;
         }
         break;
       case Offset: {
         UseScratchRegisterScope temps(this);
         // Allow using the destination as a scratch register if possible.
         if ((type != kStr) && (type != kStrb) && (type != kStrh) &&
             !rd.Is(rn)) {
           temps.Include(rd);
         }
         Register scratch = temps.Acquire();
         // Offset case:
         // ldr r0, [r1, 12345] will translate into
         //   add r0, r1, 12345
         //   ldr r0, [r0]
         {
           CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
           add(cond, scratch, rn, add_offset);
         }
         {
           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
           (this->*instruction)(cond,
                                size,
                                rd,
                                MemOperand(scratch, load_store_offset));
         }
         return;
       }
       case PostIndex:
         // Avoid the unpredictable case 'ldr r0, [r0], imm'
         if (!rn.Is(rd)) {
           // Post-indexed case:
           // ldr r0. [r1], imm32 will translate into
           //   ldr r0, [r1]
           //   movw ip. imm32 & 0xffffffff
           //   movt ip, imm32 >> 16
           //   add r1, r1, ip
           {
             CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
             (this->*instruction)(cond,
                                  size,
                                  rd,
                                  MemOperand(rn, load_store_offset, PostIndex));
           }
           {
             CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
             add(cond, rn, rn, add_offset);
           }
           return;
         }
         break;
     }
   } else if (operand.IsPlainRegister()) {
     const Register& rn = operand.GetBaseRegister();
     AddrMode addrmode = operand.GetAddrMode();
     const Register& rm = operand.GetOffsetRegister();
     if (rm.IsPC()) {
       VIXL_ABORT_WITH_MSG(
           "The MacroAssembler does not convert loads and stores with a PC "
           "offset register.\n");
     }
     if (rn.IsPC()) {
       if (addrmode == Offset) {
         if (IsUsingT32()) {
           VIXL_ABORT_WITH_MSG(
               "The MacroAssembler does not convert loads and stores with a PC "
               "base register for T32.\n");
         }
       } else {
         VIXL_ABORT_WITH_MSG(
             "The MacroAssembler does not convert loads and stores with a PC "
             "base register in pre-index or post-index mode.\n");
       }
     }
     switch (addrmode) {
       case PreIndex:
         // Avoid the unpredictable case 'str r0, [r0, imm]!'
         if (!rn.Is(rd)) {
           // Pre-Indexed case:
           // ldr r0, [r1, r2]! will translate into
           //   add r1, r1, r2
           //   ldr r0, [r1]
           {
             CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
             if (operand.GetSign().IsPlus()) {
               add(cond, rn, rn, rm);
             } else {
               sub(cond, rn, rn, rm);
             }
           }
           {
             CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
             (this->*instruction)(cond, size, rd, MemOperand(rn, Offset));
           }
           return;
         }
         break;
       case Offset: {
         UseScratchRegisterScope temps(this);
         // Allow using the destination as a scratch register if this is not a
         // store.
         // Avoid using PC as a temporary as this has side-effects.
         if ((type != kStr) && (type != kStrb) && (type != kStrh) &&
             !rd.IsPC()) {
           temps.Include(rd);
         }
         Register scratch = temps.Acquire();
         // Offset case:
         // ldr r0, [r1, r2] will translate into
         //   add r0, r1, r2
         //   ldr r0, [r0]
         {
           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
           if (operand.GetSign().IsPlus()) {
             add(cond, scratch, rn, rm);
           } else {
             sub(cond, scratch, rn, rm);
           }
         }
         {
           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
           (this->*instruction)(cond, size, rd, MemOperand(scratch, Offset));
         }
         return;
       }
       case PostIndex:
         // Avoid the unpredictable case 'ldr r0, [r0], imm'
         if (!rn.Is(rd)) {
           // Post-indexed case:
           // ldr r0. [r1], r2 will translate into
           //   ldr r0, [r1]
           //   add r1, r1, r2
           {
             CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
             (this->*instruction)(cond, size, rd, MemOperand(rn, Offset));
           }
           {
             CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
             if (operand.GetSign().IsPlus()) {
               add(cond, rn, rn, rm);
             } else {
               sub(cond, rn, rn, rm);
             }
           }
           return;
         }
         break;
     }
   }
   Assembler::Delegate(type, instruction, cond, size, rd, operand);
 }


 void MacroAssembler::Delegate(InstructionType type,
                               InstructionCondRRMop instruction,
                               Condition cond,
                               Register rt,
                               Register rt2,
                               const MemOperand& operand) {
   if ((type == kLdaexd) || (type == kLdrexd) || (type == kStlex) ||
       (type == kStlexb) || (type == kStlexh) || (type == kStrex) ||
       (type == kStrexb) || (type == kStrexh)) {
     UnimplementedDelegate(type);
     return;
   }

   VIXL_ASSERT((type == kLdrd) || (type == kStrd));

   CONTEXT_SCOPE;

   // TODO: Should we allow these cases?
   if (IsUsingA32()) {
     // The first register needs to be even.
     if ((rt.GetCode() & 1) != 0) {
       UnimplementedDelegate(type);
       return;
     }
     // Registers need to be adjacent.
     if (((rt.GetCode() + 1) % kNumberOfRegisters) != rt2.GetCode()) {
       UnimplementedDelegate(type);
       return;
     }
     // LDRD lr, pc [...] is not allowed.
     if (rt.Is(lr)) {
       UnimplementedDelegate(type);
       return;
     }
   }

   if (operand.IsImmediate()) {
     const Register& rn = operand.GetBaseRegister();
     AddrMode addrmode = operand.GetAddrMode();
     int32_t offset = operand.GetOffsetImmediate();
     uint32_t extra_offset_mask = GetOffsetMask(type, addrmode);
     // Try to maximize the offset used by the MemOperand (load_store_offset).
     // Add the part which can't be used by the MemOperand (add_offset).
     uint32_t load_store_offset = offset & extra_offset_mask;
     uint32_t add_offset = offset & ~extra_offset_mask;
     if ((add_offset != 0) &&
         (IsModifiedImmediate(offset) || IsModifiedImmediate(-offset))) {
       load_store_offset = 0;
       add_offset = offset;
     }
     switch (addrmode) {
       case PreIndex: {
         // Allow using the destinations as a scratch registers if possible.
         UseScratchRegisterScope temps(this);
         if (type == kLdrd) {
           if (!rt.Is(rn)) temps.Include(rt);
           if (!rt2.Is(rn)) temps.Include(rt2);
         }

         // Pre-Indexed case:
         // ldrd r0, r1, [r2, 12345]! will translate into
         //   add r2, 12345
         //   ldrd r0, r1, [r2]
         {
           CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
           add(cond, rn, rn, add_offset);
         }
         {
           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
           (this->*instruction)(cond,
                                rt,
                                rt2,
                                MemOperand(rn, load_store_offset, PreIndex));
         }
         return;
       }
       case Offset: {
         UseScratchRegisterScope temps(this);
         // Allow using the destinations as a scratch registers if possible.
         if (type == kLdrd) {
           if (!rt.Is(rn)) temps.Include(rt);
           if (!rt2.Is(rn)) temps.Include(rt2);
         }
         Register scratch = temps.Acquire();
         // Offset case:
         // ldrd r0, r1, [r2, 12345] will translate into
         //   add r0, r2, 12345
         //   ldrd r0, r1, [r0]
         {
           CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
           add(cond, scratch, rn, add_offset);
         }
         {
           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
           (this->*instruction)(cond,
                                rt,
                                rt2,
                                MemOperand(scratch, load_store_offset));
         }
         return;
       }
       case PostIndex:
         // Avoid the unpredictable case 'ldrd r0, r1, [r0], imm'
         if (!rn.Is(rt) && !rn.Is(rt2)) {
           // Post-indexed case:
           // ldrd r0, r1, [r2], imm32 will translate into
           //   ldrd r0, r1, [r2]
           //   movw ip. imm32 & 0xffffffff
           //   movt ip, imm32 >> 16
           //   add r2, ip
           {
             CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
             (this->*instruction)(cond,
                                  rt,
                                  rt2,
                                  MemOperand(rn, load_store_offset, PostIndex));
           }
           {
             CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
             add(cond, rn, rn, add_offset);
           }
           return;
         }
         break;
     }
   }
   if (operand.IsPlainRegister()) {
     const Register& rn = operand.GetBaseRegister();
     const Register& rm = operand.GetOffsetRegister();
     AddrMode addrmode = operand.GetAddrMode();
     switch (addrmode) {
       case PreIndex:
         // ldrd r0, r1, [r2, r3]! will translate into
         //   add r2, r3
         //   ldrd r0, r1, [r2]
         {
           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
           if (operand.GetSign().IsPlus()) {
             add(cond, rn, rn, rm);
           } else {
             sub(cond, rn, rn, rm);
           }
         }
         {
           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
           (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
         }
         return;
       case PostIndex:
         // ldrd r0, r1, [r2], r3 will translate into
         //   ldrd r0, r1, [r2]
         //   add r2, r3
         {
           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
           (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
         }
         {
           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
           if (operand.GetSign().IsPlus()) {
             add(cond, rn, rn, rm);
           } else {
             sub(cond, rn, rn, rm);
           }
         }
         return;
       case Offset: {
         UseScratchRegisterScope temps(this);
         // Allow using the destinations as a scratch registers if possible.
         if (type == kLdrd) {
           if (!rt.Is(rn)) temps.Include(rt);
           if (!rt2.Is(rn)) temps.Include(rt2);
         }
         Register scratch = temps.Acquire();
         // Offset case:
         // ldrd r0, r1, [r2, r3] will translate into
         //   add r0, r2, r3
         //   ldrd r0, r1, [r0]
         {
           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
           if (operand.GetSign().IsPlus()) {
             add(cond, scratch, rn, rm);
           } else {
             sub(cond, scratch, rn, rm);
           }
         }
         {
           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
           (this->*instruction)(cond, rt, rt2, MemOperand(scratch, Offset));
         }
         return;
       }
     }
   }
   Assembler::Delegate(type, instruction, cond, rt, rt2, operand);
 }


 void MacroAssembler::Delegate(InstructionType type,
                               InstructionCondDtSMop instruction,
                               Condition cond,
                               DataType dt,
                               SRegister rd,
                               const MemOperand& operand) {
   CONTEXT_SCOPE;
   if (operand.IsImmediate()) {
     const Register& rn = operand.GetBaseRegister();
     AddrMode addrmode = operand.GetAddrMode();
     int32_t offset = operand.GetOffsetImmediate();
     VIXL_ASSERT(((offset > 0) && operand.GetSign().IsPlus()) ||
                 ((offset < 0) && operand.GetSign().IsMinus()) || (offset == 0));
     if (rn.IsPC()) {
       VIXL_ABORT_WITH_MSG(
           "The MacroAssembler does not convert vldr or vstr with a PC base "
           "register.\n");
     }
     switch (addrmode) {
       case PreIndex:
         // Pre-Indexed case:
         // vldr.32 s0, [r1, 12345]! will translate into
         //   add r1, 12345
         //   vldr.32 s0, [r1]
         if (offset != 0) {
           CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
           add(cond, rn, rn, offset);
         }
         {
           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
           (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
         }
         return;
       case Offset: {
         UseScratchRegisterScope temps(this);
         Register scratch = temps.Acquire();
         // Offset case:
         // vldr.32 s0, [r1, 12345] will translate into
         //   add ip, r1, 12345
         //   vldr.32 s0, [ip]
         {
           VIXL_ASSERT(offset != 0);
           CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
           add(cond, scratch, rn, offset);
         }
         {
           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
           (this->*instruction)(cond, dt, rd, MemOperand(scratch, Offset));
         }
         return;
       }
       case PostIndex:
         // Post-indexed case:
         // vldr.32 s0, [r1], imm32 will translate into
         //   vldr.32 s0, [r1]
         //   movw ip. imm32 & 0xffffffff
         //   movt ip, imm32 >> 16
         //   add r1, ip
         {
           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
           (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
         }
         if (offset != 0) {
           CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
           add(cond, rn, rn, offset);
         }
         return;
     }
   }
   Assembler::Delegate(type, instruction, cond, dt, rd, operand);
 }


 void MacroAssembler::Delegate(InstructionType type,
                               InstructionCondDtDMop instruction,
                               Condition cond,
                               DataType dt,
                               DRegister rd,
                               const MemOperand& operand) {
   CONTEXT_SCOPE;
   if (operand.IsImmediate()) {
     const Register& rn = operand.GetBaseRegister();
     AddrMode addrmode = operand.GetAddrMode();
     int32_t offset = operand.GetOffsetImmediate();
     VIXL_ASSERT(((offset > 0) && operand.GetSign().IsPlus()) ||
                 ((offset < 0) && operand.GetSign().IsMinus()) || (offset == 0));
     if (rn.IsPC()) {
       VIXL_ABORT_WITH_MSG(
           "The MacroAssembler does not convert vldr or vstr with a PC base "
           "register.\n");
     }
     switch (addrmode) {
       case PreIndex:
         // Pre-Indexed case:
         // vldr.64 d0, [r1, 12345]! will translate into
         //   add r1, 12345
         //   vldr.64 d0, [r1]
         if (offset != 0) {
           CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
           add(cond, rn, rn, offset);
         }
         {
           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
           (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
         }
         return;
       case Offset: {
         UseScratchRegisterScope temps(this);
         Register scratch = temps.Acquire();
         // Offset case:
         // vldr.64 d0, [r1, 12345] will translate into
         //   add ip, r1, 12345
         //   vldr.32 s0, [ip]
         {
           VIXL_ASSERT(offset != 0);
           CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
           add(cond, scratch, rn, offset);
         }
         {
           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
           (this->*instruction)(cond, dt, rd, MemOperand(scratch, Offset));
         }
         return;
       }
       case PostIndex:
         // Post-indexed case:
         // vldr.64 d0. [r1], imm32 will translate into
         //   vldr.64 d0, [r1]
         //   movw ip. imm32 & 0xffffffff
         //   movt ip, imm32 >> 16
         //   add r1, ip
         {
           CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
           (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
         }
         if (offset != 0) {
           CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
           add(cond, rn, rn, offset);
         }
         return;
     }
   }
   Assembler::Delegate(type, instruction, cond, dt, rd, operand);
 }


 void MacroAssembler::Delegate(InstructionType type,
                               InstructionCondMsrOp instruction,
                               Condition cond,
                               MaskedSpecialRegister spec_reg,
                               const Operand& operand) {
   USE(type);
   VIXL_ASSERT(type == kMsr);
   if (operand.IsImmediate()) {
     UseScratchRegisterScope temps(this);
     Register scratch = temps.Acquire();
     {
       CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
       mov(cond, scratch, operand);
     }
     CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
     msr(cond, spec_reg, scratch);
     return;
   }
   Assembler::Delegate(type, instruction, cond, spec_reg, operand);
 }


 void MacroAssembler::Delegate(InstructionType type,
                               InstructionCondDtDL instruction,
                               Condition cond,
                               DataType dt,
                               DRegister rd,
                               Location* location) {
   VIXL_ASSERT(type == kVldr);

   CONTEXT_SCOPE;

   if (location->IsBound()) {
     CodeBufferCheckScope scope(this, 5 * kMaxInstructionSizeInBytes);
     UseScratchRegisterScope temps(this);
     Register scratch = temps.Acquire();
     uint32_t mask = GetOffsetMask(type, Offset);
     vldr(dt, rd, MemOperandComputationHelper(cond, scratch, location, mask));
     return;
   }

   Assembler::Delegate(type, instruction, cond, dt, rd, location);
 }


 void MacroAssembler::Delegate(InstructionType type,
                               InstructionCondDtSL instruction,
                               Condition cond,
                               DataType dt,
                               SRegister rd,
                               Location* location) {
   VIXL_ASSERT(type == kVldr);

   CONTEXT_SCOPE;

   if (location->IsBound()) {
     CodeBufferCheckScope scope(this, 5 * kMaxInstructionSizeInBytes);
     UseScratchRegisterScope temps(this);
     Register scratch = temps.Acquire();
     uint32_t mask = GetOffsetMask(type, Offset);
     vldr(dt, rd, MemOperandComputationHelper(cond, scratch, location, mask));
     return;
   }

   Assembler::Delegate(type, instruction, cond, dt, rd, location);
 }


 #undef CONTEXT_SCOPE
 #undef TOSTRING
 #undef STRINGIFY

 // Start of generated code.
 // End of generated code.
 }  // namespace aarch32
 }  // namespace vixl