src/hotspot/cpu/aarch64/assembler_aarch64.cpp - toolchain/jdk/jdk21 - Git at Google

 /*
  * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, 2020 Red Hat Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 only, as
  * published by the Free Software Foundation.
  *
  * This code is distributed in the hope that it will be useful, but WITHOUT
  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  * version 2 for more details (a copy is included in the LICENSE file that
  * accompanied this code).
  *
  * You should have received a copy of the GNU General Public License version
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  * or visit www.oracle.com if you need additional information or have any
  * questions.
  */

 #include "precompiled.hpp"
 #include "asm/assembler.hpp"
 #include "asm/assembler.inline.hpp"
 #include "asm/macroAssembler.hpp"
 #include "compiler/disassembler.hpp"
 #include "immediate_aarch64.hpp"
 #include "memory/resourceArea.hpp"
 #include "metaprogramming/primitiveConversions.hpp"

 #ifndef PRODUCT
 const uintptr_t Assembler::asm_bp = 0x0000ffffac221240;
 #endif

 static float unpack(unsigned value);

 short Assembler::SIMD_Size_in_bytes[] = {
   // T8B, T16B, T4H, T8H, T2S, T4S, T1D, T2D, T1Q
        8,   16,   8,  16,   8,  16,   8,  16,  16
 };

 Assembler::SIMD_Arrangement Assembler::_esize2arrangement_table[9][2] = {
   // esize        isQ:false             isQ:true
   /*   0  */      {INVALID_ARRANGEMENT, INVALID_ARRANGEMENT},
   /*   1  */      {T8B,                 T16B},
   /*   2  */      {T4H,                 T8H},
   /*   3  */      {INVALID_ARRANGEMENT, INVALID_ARRANGEMENT},
   /*   4  */      {T2S,                 T4S},
   /*   5  */      {INVALID_ARRANGEMENT, INVALID_ARRANGEMENT},
   /*   6  */      {INVALID_ARRANGEMENT, INVALID_ARRANGEMENT},
   /*   7  */      {INVALID_ARRANGEMENT, INVALID_ARRANGEMENT},
   /*   8  */      {T1D,                 T2D}
   };

 Assembler::SIMD_RegVariant Assembler::_esize2regvariant[9] = {
   INVALID,
   B,
   H,
   INVALID,
   S,
   INVALID,
   INVALID,
   INVALID,
   D,
 };

 Assembler::SIMD_Arrangement Assembler::esize2arrangement(unsigned esize, bool isQ) {
   guarantee(esize < ARRAY_SIZE(_esize2arrangement_table) &&
          _esize2arrangement_table[esize][isQ] != INVALID_ARRANGEMENT, "unsupported element size");
   return _esize2arrangement_table[esize][isQ];
 }

 Assembler::SIMD_RegVariant Assembler::elemBytes_to_regVariant(unsigned esize) {
   guarantee(esize < ARRAY_SIZE(_esize2regvariant) && _esize2regvariant[esize] != INVALID,
          "unsupported element size");
   return _esize2regvariant[esize];
 }

 Assembler::SIMD_RegVariant Assembler::elemType_to_regVariant(BasicType bt) {
   return elemBytes_to_regVariant(type2aelembytes(bt));
 }

 unsigned Assembler::regVariant_to_elemBits(Assembler::SIMD_RegVariant T){
   guarantee(T != Q, "Invalid register variant");
   return 1 << (T + 3);
 }

 void Assembler::emit_data64(jlong data,
                             relocInfo::relocType rtype,
                             int format) {
   if (rtype == relocInfo::none) {
     emit_int64(data);
   } else {
     emit_data64(data, Relocation::spec_simple(rtype), format);
   }
 }

 void Assembler::emit_data64(jlong data,
                             RelocationHolder const& rspec,
                             int format) {

   assert(inst_mark() != nullptr, "must be inside InstructionMark");
   // Do not use AbstractAssembler::relocate, which is not intended for
   // embedded words.  Instead, relocate to the enclosing instruction.
   code_section()->relocate(inst_mark(), rspec, format);
   emit_int64(data);
 }

 extern "C" {
   void das(uint64_t start, int len) {
     ResourceMark rm;
     len <<= 2;
     if (len < 0)
       Disassembler::decode((address)start + len, (address)start);
     else
       Disassembler::decode((address)start, (address)start + len);
   }

   JNIEXPORT void das1(uintptr_t insn) {
     das(insn, 1);
   }
 }

 #define __ as->

 void Address::lea(MacroAssembler *as, Register r) const {
   switch(_mode) {
   case base_plus_offset: {
     if (offset() == 0 && base() == r) // it's a nop
       break;
     if (offset() > 0)
       __ add(r, base(), offset());
     else
       __ sub(r, base(), -offset());
     break;
   }
   case base_plus_offset_reg: {
     __ add(r, base(), index(), ext().op(), MAX2(ext().shift(), 0));
     break;
   }
   case literal: {
     as->code_section()->relocate(as->inst_mark(), rspec());
     if (rspec().type() == relocInfo::none)
       __ mov(r, target());
     else
       __ movptr(r, (uint64_t)target());
     break;
   }
   default:
     ShouldNotReachHere();
   }
 }

 #undef __

 #define starti Instruction_aarch64 current_insn(this);

 #define f current_insn.f
 #define sf current_insn.sf
 #define rf current_insn.rf
 #define srf current_insn.srf
 #define zrf current_insn.zrf
 #define prf current_insn.prf
 #define pgrf current_insn.pgrf
 #define fixed current_insn.fixed

   void Assembler::adr(Register Rd, address adr) {
     intptr_t offset = adr - pc();
     int offset_lo = offset & 3;
     offset >>= 2;
     starti;
     f(0, 31), f(offset_lo, 30, 29), f(0b10000, 28, 24), sf(offset, 23, 5);
     rf(Rd, 0);
   }

   void Assembler::_adrp(Register Rd, address adr) {
     uint64_t pc_page = (uint64_t)pc() >> 12;
     uint64_t adr_page = (uint64_t)adr >> 12;
     intptr_t offset = adr_page - pc_page;
     int offset_lo = offset & 3;
     offset >>= 2;
     starti;
     f(1, 31), f(offset_lo, 30, 29), f(0b10000, 28, 24), sf(offset, 23, 5);
     zrf(Rd, 0);
   }

 // This encoding is similar (but not quite identical) to the encoding used
 // by literal ld/st. see JDK-8324123.
 // PRFM does not support writeback or pre/post index.
 void Assembler::prfm(const Address &adr, prfop pfop) {
   Address::mode mode = adr.getMode();
   // PRFM does not support pre/post index
   guarantee((mode != Address::pre) && (mode != Address::post), "prfm does not support pre/post indexing");
   if (mode == Address::literal) {
     starti;
     f(0b11, 31, 30), f(0b011, 29, 27), f(0b000, 26, 24);
     f(pfop, 4, 0);
     int64_t offset = (adr.target() - pc()) >> 2;
     sf(offset, 23, 5);
   } else {
     assert((mode == Address::base_plus_offset)
             || (mode == Address::base_plus_offset_reg), "must be base_plus_offset/base_plus_offset_reg");
     ld_st2(as_Register(pfop), adr, 0b11, 0b10);
   }
 }

 // An "all-purpose" add/subtract immediate, per ARM documentation:
 // A "programmer-friendly" assembler may accept a negative immediate
 // between -(2^24 -1) and -1 inclusive, causing it to convert a
 // requested ADD operation to a SUB, or vice versa, and then encode
 // the absolute value of the immediate as for uimm24.
 void Assembler::add_sub_immediate(Instruction_aarch64 &current_insn,
                                   Register Rd, Register Rn, unsigned uimm, int op,
                                   int negated_op) {
   bool sets_flags = op & 1;   // this op sets flags
   union {
     unsigned u;
     int imm;
   };
   u = uimm;
   bool shift = false;
   bool neg = imm < 0;
   if (neg) {
     imm = -imm;
     op = negated_op;
   }
   assert(Rd != sp || imm % 16 == 0, "misaligned stack");
   if (imm >= (1 << 11)
       && ((imm >> 12) << 12 == imm)) {
     imm >>= 12;
     shift = true;
   }
   f(op, 31, 29), f(0b10001, 28, 24), f(shift, 23, 22), f(imm, 21, 10);

   // add/subtract immediate ops with the S bit set treat r31 as zr;
   // with S unset they use sp.
   if (sets_flags)
     zrf(Rd, 0);
   else
     srf(Rd, 0);

   srf(Rn, 5);
 }

 #undef f
 #undef sf
 #undef rf
 #undef srf
 #undef zrf
 #undef prf
 #undef pgrf
 #undef fixed

 #undef starti

 #ifdef ASSERT

 void Address::assert_is_literal() const {
   assert(_mode == literal, "addressing mode is non-literal: %d", _mode);
 }

 void Address::assert_is_nonliteral() const {
   assert(_mode != literal, "unexpected literal addressing mode");
   assert(_mode != no_mode, "unexpected no_mode addressing mode");
 }

 #endif // ASSERT

 static RelocationHolder address_relocation(address target, relocInfo::relocType rtype) {
   switch (rtype) {
   case relocInfo::oop_type:
   case relocInfo::metadata_type:
     // Oops are a special case. Normally they would be their own section
     // but in cases like icBuffer they are literals in the code stream that
     // we don't have a section for. We use none so that we get a literal address
     // which is always patchable.
     return RelocationHolder::none;
   case relocInfo::external_word_type:
     return external_word_Relocation::spec(target);
   case relocInfo::internal_word_type:
     return internal_word_Relocation::spec(target);
   case relocInfo::opt_virtual_call_type:
     return opt_virtual_call_Relocation::spec();
   case relocInfo::static_call_type:
     return static_call_Relocation::spec();
   case relocInfo::runtime_call_type:
     return runtime_call_Relocation::spec();
   case relocInfo::poll_type:
   case relocInfo::poll_return_type:
     return Relocation::spec_simple(rtype);
   case relocInfo::none:
     return RelocationHolder::none;
   default:
     ShouldNotReachHere();
     return RelocationHolder::none;
   }
 }

 Address::Address(address target, relocInfo::relocType rtype) :
   _mode(literal),
   _literal(target, address_relocation(target, rtype))
 {}

 void Assembler::b(const Address &dest) {
   code_section()->relocate(pc(), dest.rspec());
   b(dest.target());
 }

 void Assembler::bl(const Address &dest) {
   code_section()->relocate(pc(), dest.rspec());
   bl(dest.target());
 }

 void Assembler::adr(Register r, const Address &dest) {
   code_section()->relocate(pc(), dest.rspec());
   adr(r, dest.target());
 }

 void Assembler::br(Condition cc, Label &L) {
   if (L.is_bound()) {
     br(cc, target(L));
   } else {
     L.add_patch_at(code(), locator());
     br(cc, pc());
   }
 }

 void Assembler::wrap_label(Label &L,
                                  Assembler::uncond_branch_insn insn) {
   if (L.is_bound()) {
     (this->*insn)(target(L));
   } else {
     L.add_patch_at(code(), locator());
     (this->*insn)(pc());
   }
 }

 void Assembler::wrap_label(Register r, Label &L,
                                  compare_and_branch_insn insn) {
   if (L.is_bound()) {
     (this->*insn)(r, target(L));
   } else {
     L.add_patch_at(code(), locator());
     (this->*insn)(r, pc());
   }
 }

 void Assembler::wrap_label(Register r, int bitpos, Label &L,
                                  test_and_branch_insn insn) {
   if (L.is_bound()) {
     (this->*insn)(r, bitpos, target(L));
   } else {
     L.add_patch_at(code(), locator());
     (this->*insn)(r, bitpos, pc());
   }
 }

 void Assembler::wrap_label(Label &L, prfop op, prefetch_insn insn) {
   if (L.is_bound()) {
     (this->*insn)(target(L), op);
   } else {
     L.add_patch_at(code(), locator());
     (this->*insn)(pc(), op);
   }
 }

 bool Assembler::operand_valid_for_add_sub_immediate(int64_t imm) {
   return operand_valid_for_immediate_bits(imm, 12);
 }

 bool Assembler::operand_valid_for_sve_add_sub_immediate(int64_t imm) {
   return operand_valid_for_immediate_bits(imm, 8);
 }

 bool Assembler::operand_valid_for_logical_immediate(bool is32, uint64_t imm) {
   return encode_logical_immediate(is32, imm) != 0xffffffff;
 }

 // Check immediate encoding for movi.
 // Return the shift amount which can be {0, 8, 16, 24} for B/H/S types. As the D type
 // movi does not have shift variant, in this case the return value is the immediate
 // after encoding.
 // Return -1 if the input imm64 can not be encoded.
 int Assembler::operand_valid_for_movi_immediate(uint64_t imm64, SIMD_Arrangement T) {
   if (T == T1D || T == T2D) {
      // To encode into movi, the 64-bit imm must be in the form of
      // 'aaaaaaaabbbbbbbbccccccccddddddddeeeeeeeeffffffffgggggggghhhhhhhh'
      // and encoded in "a:b:c:d:e:f:g:h".
      uint64_t tmp = imm64;
      uint64_t one_byte = 0;
      for (int i = 0; i < 8; i++) {
        one_byte = tmp & 0xffULL;
        if (one_byte != 0xffULL && one_byte != 0) {
          return -1; // can not be encoded
        }
        tmp = tmp >> 8;
      }

      imm64 &= 0x0101010101010101ULL;
      imm64 |= (imm64 >> 7);
      imm64 |= (imm64 >> 14);
      imm64 |= (imm64 >> 28);

      return imm64 & 0xff;
   }

   uint32_t imm32 = imm64 & 0xffffffffULL;
   if (T == T8B || T == T16B) {       // 8-bit variant
     if (0 == (imm32 & ~0xff))        return 0;
   } else if(T == T4H || T == T8H) {  // 16-bit variant
     if (0 == (imm32 & ~0xff))        return 0;
     if (0 == (imm32 & ~0xff00))      return 8;
   } else if (T == T2S || T == T4S) { // 32-bit variant
     if (0 == (imm32 & ~0xff))        return 0;
     if (0 == (imm32 & ~0xff00))      return 8;
     if (0 == (imm32 & ~0xff0000))    return 16;
     if (0 == (imm32 & ~0xff000000))  return 24;
   } else {
     assert(false, "unsupported");
     ShouldNotReachHere();
   }

   return -1;
 }

 bool Assembler::operand_valid_for_sve_logical_immediate(unsigned elembits, uint64_t imm) {
   return encode_sve_logical_immediate(elembits, imm) != 0xffffffff;
 }

 static uint64_t doubleTo64Bits(jdouble d) {
   union {
     jdouble double_value;
     uint64_t double_bits;
   };

   double_value = d;
   return double_bits;
 }

 bool Assembler::operand_valid_for_float_immediate(double imm) {
   // If imm is all zero bits we can use ZR as the source of a
   // floating-point value.
   if (doubleTo64Bits(imm) == 0)
     return true;

   // Otherwise try to encode imm then convert the encoded value back
   // and make sure it's the exact same bit pattern.
   unsigned result = encoding_for_fp_immediate(imm);
   return doubleTo64Bits(imm) == fp_immediate_for_encoding(result, true);
 }

 int AbstractAssembler::code_fill_byte() {
   return 0;
 }

 // n.b. this is implemented in subclass MacroAssembler
 void Assembler::bang_stack_with_offset(int offset) { Unimplemented(); }

 bool asm_util::operand_valid_for_immediate_bits(int64_t imm, unsigned nbits) {
   guarantee(nbits == 8 || nbits == 12, "invalid nbits value");
   uint64_t uimm = (uint64_t)uabs((jlong)imm);
   if (uimm < (UCONST64(1) << nbits))
     return true;
   if (uimm < (UCONST64(1) << (2 * nbits))
       && ((uimm >> nbits) << nbits == uimm)) {
     return true;
   }
   return false;
 }

 // and now the routines called by the assembler which encapsulate the
 // above encode and decode functions

 uint32_t
 asm_util::encode_logical_immediate(bool is32, uint64_t imm)
 {
   if (is32) {
     /* Allow all zeros or all ones in top 32-bits, so that
        constant expressions like ~1 are permitted. */
     if (imm >> 32 != 0 && imm >> 32 != 0xffffffff)
       return 0xffffffff;
     /* Replicate the 32 lower bits to the 32 upper bits.  */
     imm &= 0xffffffff;
     imm |= imm << 32;
   }

   return encoding_for_logical_immediate(imm);
 }

 uint32_t
 asm_util::encode_sve_logical_immediate(unsigned elembits, uint64_t imm) {
   guarantee(elembits == 8 || elembits == 16 ||
             elembits == 32 || elembits == 64, "unsupported element size");
   uint64_t upper = UCONST64(-1) << (elembits/2) << (elembits/2);
   /* Allow all zeros or all ones in top bits, so that
    * constant expressions like ~1 are permitted. */
   if ((imm & ~upper) != imm && (imm | upper) != imm)
     return 0xffffffff;

   // Replicate the immediate in different element sizes to 64 bits.
   imm &= ~upper;
   for (unsigned i = elembits; i < 64; i *= 2) {
     imm |= (imm << i);
   }

   return encoding_for_logical_immediate(imm);
 }

 unsigned Assembler::pack(double value) {
   float val = (float)value;
   unsigned result = encoding_for_fp_immediate(val);
   guarantee(unpack(result) == value,
             "Invalid floating-point immediate operand");
   return result;
 }

 // Packed operands for  Floating-point Move (immediate)

 static float unpack(unsigned value) {
   unsigned ival = fp_immediate_for_encoding(value, 0);
   return PrimitiveConversions::cast<float>(ival);
 }

 address Assembler::locate_next_instruction(address inst) {
   return inst + Assembler::instruction_size;
 }
	/*
	* Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
	* Copyright (c) 2014, 2020 Red Hat Inc. All rights reserved.
	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
	*
	* This code is free software; you can redistribute it and/or modify it
	* under the terms of the GNU General Public License version 2 only, as
	* published by the Free Software Foundation.
	*
	* This code is distributed in the hope that it will be useful, but WITHOUT
	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
	* version 2 for more details (a copy is included in the LICENSE file that
	* accompanied this code).
	*
	* You should have received a copy of the GNU General Public License version
	* 2 along with this work; if not, write to the Free Software Foundation,
	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
	*
	* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
	* or visit www.oracle.com if you need additional information or have any
	* questions.
	*/

	#include "precompiled.hpp"
	#include "asm/assembler.hpp"
	#include "asm/assembler.inline.hpp"
	#include "asm/macroAssembler.hpp"
	#include "compiler/disassembler.hpp"
	#include "immediate_aarch64.hpp"
	#include "memory/resourceArea.hpp"
	#include "metaprogramming/primitiveConversions.hpp"

	#ifndef PRODUCT
	const uintptr_t Assembler::asm_bp = 0x0000ffffac221240;
	#endif

	static float unpack(unsigned value);

	short Assembler::SIMD_Size_in_bytes[] = {
	// T8B, T16B, T4H, T8H, T2S, T4S, T1D, T2D, T1Q
	8, 16, 8, 16, 8, 16, 8, 16, 16
	};

	Assembler::SIMD_Arrangement Assembler::_esize2arrangement_table[9][2] = {
	// esize isQ:false isQ:true
	/* 0 */ {INVALID_ARRANGEMENT, INVALID_ARRANGEMENT},
	/* 1 */ {T8B, T16B},
	/* 2 */ {T4H, T8H},
	/* 3 */ {INVALID_ARRANGEMENT, INVALID_ARRANGEMENT},
	/* 4 */ {T2S, T4S},
	/* 5 */ {INVALID_ARRANGEMENT, INVALID_ARRANGEMENT},
	/* 6 */ {INVALID_ARRANGEMENT, INVALID_ARRANGEMENT},
	/* 7 */ {INVALID_ARRANGEMENT, INVALID_ARRANGEMENT},
	/* 8 */ {T1D, T2D}
	};

	Assembler::SIMD_RegVariant Assembler::_esize2regvariant[9] = {
	INVALID,
	B,
	H,
	INVALID,
	S,
	INVALID,
	INVALID,
	INVALID,
	D,
	};

	Assembler::SIMD_Arrangement Assembler::esize2arrangement(unsigned esize, bool isQ) {
	guarantee(esize < ARRAY_SIZE(_esize2arrangement_table) &&
	_esize2arrangement_table[esize][isQ] != INVALID_ARRANGEMENT, "unsupported element size");
	return _esize2arrangement_table[esize][isQ];
	}

	Assembler::SIMD_RegVariant Assembler::elemBytes_to_regVariant(unsigned esize) {
	guarantee(esize < ARRAY_SIZE(_esize2regvariant) && _esize2regvariant[esize] != INVALID,
	"unsupported element size");
	return _esize2regvariant[esize];
	}

	Assembler::SIMD_RegVariant Assembler::elemType_to_regVariant(BasicType bt) {
	return elemBytes_to_regVariant(type2aelembytes(bt));
	}

	unsigned Assembler::regVariant_to_elemBits(Assembler::SIMD_RegVariant T){
	guarantee(T != Q, "Invalid register variant");
	return 1 << (T + 3);
	}

	void Assembler::emit_data64(jlong data,
	relocInfo::relocType rtype,
	int format) {
	if (rtype == relocInfo::none) {
	emit_int64(data);
	} else {
	emit_data64(data, Relocation::spec_simple(rtype), format);
	}
	}

	void Assembler::emit_data64(jlong data,
	RelocationHolder const& rspec,
	int format) {

	assert(inst_mark() != nullptr, "must be inside InstructionMark");
	// Do not use AbstractAssembler::relocate, which is not intended for
	// embedded words. Instead, relocate to the enclosing instruction.
	code_section()->relocate(inst_mark(), rspec, format);
	emit_int64(data);
	}

	extern "C" {
	void das(uint64_t start, int len) {
	ResourceMark rm;
	len <<= 2;
	if (len < 0)
	Disassembler::decode((address)start + len, (address)start);
	else
	Disassembler::decode((address)start, (address)start + len);
	}

	JNIEXPORT void das1(uintptr_t insn) {
	das(insn, 1);
	}
	}

	#define __ as->

	void Address::lea(MacroAssembler *as, Register r) const {
	switch(_mode) {
	case base_plus_offset: {
	if (offset() == 0 && base() == r) // it's a nop
	break;
	if (offset() > 0)
	__ add(r, base(), offset());
	else
	__ sub(r, base(), -offset());
	break;
	}
	case base_plus_offset_reg: {
	__ add(r, base(), index(), ext().op(), MAX2(ext().shift(), 0));
	break;
	}
	case literal: {
	as->code_section()->relocate(as->inst_mark(), rspec());
	if (rspec().type() == relocInfo::none)
	__ mov(r, target());
	else
	__ movptr(r, (uint64_t)target());
	break;
	}
	default:
	ShouldNotReachHere();
	}
	}

	#undef __

	#define starti Instruction_aarch64 current_insn(this);

	#define f current_insn.f
	#define sf current_insn.sf
	#define rf current_insn.rf
	#define srf current_insn.srf
	#define zrf current_insn.zrf
	#define prf current_insn.prf
	#define pgrf current_insn.pgrf
	#define fixed current_insn.fixed

	void Assembler::adr(Register Rd, address adr) {
	intptr_t offset = adr - pc();
	int offset_lo = offset & 3;
	offset >>= 2;
	starti;
	f(0, 31), f(offset_lo, 30, 29), f(0b10000, 28, 24), sf(offset, 23, 5);
	rf(Rd, 0);
	}

	void Assembler::_adrp(Register Rd, address adr) {
	uint64_t pc_page = (uint64_t)pc() >> 12;
	uint64_t adr_page = (uint64_t)adr >> 12;
	intptr_t offset = adr_page - pc_page;
	int offset_lo = offset & 3;
	offset >>= 2;
	starti;
	f(1, 31), f(offset_lo, 30, 29), f(0b10000, 28, 24), sf(offset, 23, 5);
	zrf(Rd, 0);
	}

	// This encoding is similar (but not quite identical) to the encoding used
	// by literal ld/st. see JDK-8324123.
	// PRFM does not support writeback or pre/post index.
	void Assembler::prfm(const Address &adr, prfop pfop) {
	Address::mode mode = adr.getMode();
	// PRFM does not support pre/post index
	guarantee((mode != Address::pre) && (mode != Address::post), "prfm does not support pre/post indexing");
	if (mode == Address::literal) {
	starti;
	f(0b11, 31, 30), f(0b011, 29, 27), f(0b000, 26, 24);
	f(pfop, 4, 0);
	int64_t offset = (adr.target() - pc()) >> 2;
	sf(offset, 23, 5);
	} else {
	assert((mode == Address::base_plus_offset)
	\|\| (mode == Address::base_plus_offset_reg), "must be base_plus_offset/base_plus_offset_reg");
	ld_st2(as_Register(pfop), adr, 0b11, 0b10);
	}
	}

	// An "all-purpose" add/subtract immediate, per ARM documentation:
	// A "programmer-friendly" assembler may accept a negative immediate
	// between -(2^24 -1) and -1 inclusive, causing it to convert a
	// requested ADD operation to a SUB, or vice versa, and then encode
	// the absolute value of the immediate as for uimm24.
	void Assembler::add_sub_immediate(Instruction_aarch64 &current_insn,
	Register Rd, Register Rn, unsigned uimm, int op,
	int negated_op) {
	bool sets_flags = op & 1; // this op sets flags
	union {
	unsigned u;
	int imm;
	};
	u = uimm;
	bool shift = false;
	bool neg = imm < 0;
	if (neg) {
	imm = -imm;
	op = negated_op;
	}
	assert(Rd != sp \|\| imm % 16 == 0, "misaligned stack");
	if (imm >= (1 << 11)
	&& ((imm >> 12) << 12 == imm)) {
	imm >>= 12;
	shift = true;
	}
	f(op, 31, 29), f(0b10001, 28, 24), f(shift, 23, 22), f(imm, 21, 10);

	// add/subtract immediate ops with the S bit set treat r31 as zr;
	// with S unset they use sp.
	if (sets_flags)
	zrf(Rd, 0);
	else
	srf(Rd, 0);

	srf(Rn, 5);
	}

	#undef f
	#undef sf
	#undef rf
	#undef srf
	#undef zrf
	#undef prf
	#undef pgrf
	#undef fixed

	#undef starti

	#ifdef ASSERT

	void Address::assert_is_literal() const {
	assert(_mode == literal, "addressing mode is non-literal: %d", _mode);
	}

	void Address::assert_is_nonliteral() const {
	assert(_mode != literal, "unexpected literal addressing mode");
	assert(_mode != no_mode, "unexpected no_mode addressing mode");
	}

	#endif // ASSERT

	static RelocationHolder address_relocation(address target, relocInfo::relocType rtype) {
	switch (rtype) {
	case relocInfo::oop_type:
	case relocInfo::metadata_type:
	// Oops are a special case. Normally they would be their own section
	// but in cases like icBuffer they are literals in the code stream that
	// we don't have a section for. We use none so that we get a literal address
	// which is always patchable.
	return RelocationHolder::none;
	case relocInfo::external_word_type:
	return external_word_Relocation::spec(target);
	case relocInfo::internal_word_type:
	return internal_word_Relocation::spec(target);
	case relocInfo::opt_virtual_call_type:
	return opt_virtual_call_Relocation::spec();
	case relocInfo::static_call_type:
	return static_call_Relocation::spec();
	case relocInfo::runtime_call_type:
	return runtime_call_Relocation::spec();
	case relocInfo::poll_type:
	case relocInfo::poll_return_type:
	return Relocation::spec_simple(rtype);
	case relocInfo::none:
	return RelocationHolder::none;
	default:
	ShouldNotReachHere();
	return RelocationHolder::none;
	}
	}

	Address::Address(address target, relocInfo::relocType rtype) :
	_mode(literal),
	_literal(target, address_relocation(target, rtype))
	{}

	void Assembler::b(const Address &dest) {
	code_section()->relocate(pc(), dest.rspec());
	b(dest.target());
	}

	void Assembler::bl(const Address &dest) {
	code_section()->relocate(pc(), dest.rspec());
	bl(dest.target());
	}

	void Assembler::adr(Register r, const Address &dest) {
	code_section()->relocate(pc(), dest.rspec());
	adr(r, dest.target());
	}

	void Assembler::br(Condition cc, Label &L) {
	if (L.is_bound()) {
	br(cc, target(L));
	} else {
	L.add_patch_at(code(), locator());
	br(cc, pc());
	}
	}

	void Assembler::wrap_label(Label &L,
	Assembler::uncond_branch_insn insn) {
	if (L.is_bound()) {
	(this->*insn)(target(L));
	} else {
	L.add_patch_at(code(), locator());
	(this->*insn)(pc());
	}
	}

	void Assembler::wrap_label(Register r, Label &L,
	compare_and_branch_insn insn) {
	if (L.is_bound()) {
	(this->*insn)(r, target(L));
	} else {
	L.add_patch_at(code(), locator());
	(this->*insn)(r, pc());
	}
	}

	void Assembler::wrap_label(Register r, int bitpos, Label &L,
	test_and_branch_insn insn) {
	if (L.is_bound()) {
	(this->*insn)(r, bitpos, target(L));
	} else {
	L.add_patch_at(code(), locator());
	(this->*insn)(r, bitpos, pc());
	}
	}

	void Assembler::wrap_label(Label &L, prfop op, prefetch_insn insn) {
	if (L.is_bound()) {
	(this->*insn)(target(L), op);
	} else {
	L.add_patch_at(code(), locator());
	(this->*insn)(pc(), op);
	}
	}

	bool Assembler::operand_valid_for_add_sub_immediate(int64_t imm) {
	return operand_valid_for_immediate_bits(imm, 12);
	}

	bool Assembler::operand_valid_for_sve_add_sub_immediate(int64_t imm) {
	return operand_valid_for_immediate_bits(imm, 8);
	}

	bool Assembler::operand_valid_for_logical_immediate(bool is32, uint64_t imm) {
	return encode_logical_immediate(is32, imm) != 0xffffffff;
	}

	// Check immediate encoding for movi.
	// Return the shift amount which can be {0, 8, 16, 24} for B/H/S types. As the D type
	// movi does not have shift variant, in this case the return value is the immediate
	// after encoding.
	// Return -1 if the input imm64 can not be encoded.
	int Assembler::operand_valid_for_movi_immediate(uint64_t imm64, SIMD_Arrangement T) {
	if (T == T1D \|\| T == T2D) {
	// To encode into movi, the 64-bit imm must be in the form of
	// 'aaaaaaaabbbbbbbbccccccccddddddddeeeeeeeeffffffffgggggggghhhhhhhh'
	// and encoded in "a:b:c:d:e:f:g:h".
	uint64_t tmp = imm64;
	uint64_t one_byte = 0;
	for (int i = 0; i < 8; i++) {
	one_byte = tmp & 0xffULL;
	if (one_byte != 0xffULL && one_byte != 0) {
	return -1; // can not be encoded
	}
	tmp = tmp >> 8;
	}

	imm64 &= 0x0101010101010101ULL;
	imm64 \|= (imm64 >> 7);
	imm64 \|= (imm64 >> 14);
	imm64 \|= (imm64 >> 28);

	return imm64 & 0xff;
	}

	uint32_t imm32 = imm64 & 0xffffffffULL;
	if (T == T8B \|\| T == T16B) { // 8-bit variant
	if (0 == (imm32 & ~0xff)) return 0;
	} else if(T == T4H \|\| T == T8H) { // 16-bit variant
	if (0 == (imm32 & ~0xff)) return 0;
	if (0 == (imm32 & ~0xff00)) return 8;
	} else if (T == T2S \|\| T == T4S) { // 32-bit variant
	if (0 == (imm32 & ~0xff)) return 0;
	if (0 == (imm32 & ~0xff00)) return 8;
	if (0 == (imm32 & ~0xff0000)) return 16;
	if (0 == (imm32 & ~0xff000000)) return 24;
	} else {
	assert(false, "unsupported");
	ShouldNotReachHere();
	}

	return -1;
	}

	bool Assembler::operand_valid_for_sve_logical_immediate(unsigned elembits, uint64_t imm) {
	return encode_sve_logical_immediate(elembits, imm) != 0xffffffff;
	}

	static uint64_t doubleTo64Bits(jdouble d) {
	union {
	jdouble double_value;
	uint64_t double_bits;
	};

	double_value = d;
	return double_bits;
	}

	bool Assembler::operand_valid_for_float_immediate(double imm) {
	// If imm is all zero bits we can use ZR as the source of a
	// floating-point value.
	if (doubleTo64Bits(imm) == 0)
	return true;

	// Otherwise try to encode imm then convert the encoded value back
	// and make sure it's the exact same bit pattern.
	unsigned result = encoding_for_fp_immediate(imm);
	return doubleTo64Bits(imm) == fp_immediate_for_encoding(result, true);
	}

	int AbstractAssembler::code_fill_byte() {
	return 0;
	}

	// n.b. this is implemented in subclass MacroAssembler
	void Assembler::bang_stack_with_offset(int offset) { Unimplemented(); }

	bool asm_util::operand_valid_for_immediate_bits(int64_t imm, unsigned nbits) {
	guarantee(nbits == 8 \|\| nbits == 12, "invalid nbits value");
	uint64_t uimm = (uint64_t)uabs((jlong)imm);
	if (uimm < (UCONST64(1) << nbits))
	return true;
	if (uimm < (UCONST64(1) << (2 * nbits))
	&& ((uimm >> nbits) << nbits == uimm)) {
	return true;
	}
	return false;
	}

	// and now the routines called by the assembler which encapsulate the
	// above encode and decode functions

	uint32_t
	asm_util::encode_logical_immediate(bool is32, uint64_t imm)
	{
	if (is32) {
	/* Allow all zeros or all ones in top 32-bits, so that
	constant expressions like ~1 are permitted. */
	if (imm >> 32 != 0 && imm >> 32 != 0xffffffff)
	return 0xffffffff;
	/* Replicate the 32 lower bits to the 32 upper bits. */
	imm &= 0xffffffff;
	imm \|= imm << 32;
	}

	return encoding_for_logical_immediate(imm);
	}

	uint32_t
	asm_util::encode_sve_logical_immediate(unsigned elembits, uint64_t imm) {
	guarantee(elembits == 8 \|\| elembits == 16 \|\|
	elembits == 32 \|\| elembits == 64, "unsupported element size");
	uint64_t upper = UCONST64(-1) << (elembits/2) << (elembits/2);
	/* Allow all zeros or all ones in top bits, so that
	* constant expressions like ~1 are permitted. */
	if ((imm & ~upper) != imm && (imm \| upper) != imm)
	return 0xffffffff;

	// Replicate the immediate in different element sizes to 64 bits.
	imm &= ~upper;
	for (unsigned i = elembits; i < 64; i *= 2) {
	imm \|= (imm << i);
	}

	return encoding_for_logical_immediate(imm);
	}

	unsigned Assembler::pack(double value) {
	float val = (float)value;
	unsigned result = encoding_for_fp_immediate(val);
	guarantee(unpack(result) == value,
	"Invalid floating-point immediate operand");
	return result;
	}

	// Packed operands for Floating-point Move (immediate)

	static float unpack(unsigned value) {
	unsigned ival = fp_immediate_for_encoding(value, 0);
	return PrimitiveConversions::cast<float>(ival);
	}

	address Assembler::locate_next_instruction(address inst) {
	return inst + Assembler::instruction_size;
	}