| /* |
| * Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved. |
| * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| * |
| * This code is free software; you can redistribute it and/or modify it |
| * under the terms of the GNU General Public License version 2 only, as |
| * published by the Free Software Foundation. |
| * |
| * This code is distributed in the hope that it will be useful, but WITHOUT |
| * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| * version 2 for more details (a copy is included in the LICENSE file that |
| * accompanied this code). |
| * |
| * You should have received a copy of the GNU General Public License version |
| * 2 along with this work; if not, write to the Free Software Foundation, |
| * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| * |
| * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| * or visit www.oracle.com if you need additional information or have any |
| * questions. |
| * |
| */ |
| |
| #ifndef CPU_AARCH64_C2_MACROASSEMBLER_AARCH64_HPP |
| #define CPU_AARCH64_C2_MACROASSEMBLER_AARCH64_HPP |
| |
| // C2_MacroAssembler contains high-level macros for C2 |
| |
| private: |
| // Return true if the phase output is in the scratch emit size mode. |
| virtual bool in_scratch_emit_size() override; |
| |
| void neon_reduce_logical_helper(int opc, bool sf, Register Rd, Register Rn, Register Rm, |
| enum shift_kind kind = Assembler::LSL, unsigned shift = 0); |
| |
| public: |
| // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file. |
| // See full description in macroAssembler_aarch64.cpp. |
| void fast_lock(Register object, Register box, Register tmp, Register tmp2, Register tmp3); |
| void fast_unlock(Register object, Register box, Register tmp, Register tmp2); |
| |
| void string_compare(Register str1, Register str2, |
| Register cnt1, Register cnt2, Register result, |
| Register tmp1, Register tmp2, FloatRegister vtmp1, |
| FloatRegister vtmp2, FloatRegister vtmp3, |
| PRegister pgtmp1, PRegister pgtmp2, int ae); |
| |
| void string_indexof(Register str1, Register str2, |
| Register cnt1, Register cnt2, |
| Register tmp1, Register tmp2, |
| Register tmp3, Register tmp4, |
| Register tmp5, Register tmp6, |
| int int_cnt1, Register result, int ae); |
| |
| void string_indexof_char(Register str1, Register cnt1, |
| Register ch, Register result, |
| Register tmp1, Register tmp2, Register tmp3); |
| |
| void stringL_indexof_char(Register str1, Register cnt1, |
| Register ch, Register result, |
| Register tmp1, Register tmp2, Register tmp3); |
| |
| void string_indexof_char_sve(Register str1, Register cnt1, |
| Register ch, Register result, |
| FloatRegister ztmp1, FloatRegister ztmp2, |
| PRegister pgtmp, PRegister ptmp, bool isL); |
| |
| // Compress the least significant bit of each byte to the rightmost and clear |
| // the higher garbage bits. |
| void bytemask_compress(Register dst); |
| |
| // Pack the lowest-numbered bit of each mask element in src into a long value |
| // in dst, at most the first 64 lane elements. |
| void sve_vmask_tolong(Register dst, PRegister src, BasicType bt, int lane_cnt, |
| FloatRegister vtmp1, FloatRegister vtmp2); |
| |
| // Unpack the mask, a long value in src, into predicate register dst based on the |
| // corresponding data type. Note that dst can support at most 64 lanes. |
| void sve_vmask_fromlong(PRegister dst, Register src, BasicType bt, int lane_cnt, |
| FloatRegister vtmp1, FloatRegister vtmp2); |
| |
| // SIMD&FP comparison |
| void neon_compare(FloatRegister dst, BasicType bt, FloatRegister src1, |
| FloatRegister src2, Condition cond, bool isQ); |
| |
| void neon_compare_zero(FloatRegister dst, BasicType bt, FloatRegister src, |
| Condition cond, bool isQ); |
| |
| void sve_compare(PRegister pd, BasicType bt, PRegister pg, |
| FloatRegister zn, FloatRegister zm, Condition cond); |
| |
| void sve_vmask_lasttrue(Register dst, BasicType bt, PRegister src, PRegister ptmp); |
| |
| // Vector cast |
| void neon_vector_extend(FloatRegister dst, BasicType dst_bt, unsigned dst_vlen_in_bytes, |
| FloatRegister src, BasicType src_bt); |
| |
| void neon_vector_narrow(FloatRegister dst, BasicType dst_bt, |
| FloatRegister src, BasicType src_bt, unsigned src_vlen_in_bytes); |
| |
| void sve_vector_extend(FloatRegister dst, SIMD_RegVariant dst_size, |
| FloatRegister src, SIMD_RegVariant src_size); |
| |
| void sve_vector_narrow(FloatRegister dst, SIMD_RegVariant dst_size, |
| FloatRegister src, SIMD_RegVariant src_size, FloatRegister tmp); |
| |
| void sve_vmaskcast_extend(PRegister dst, PRegister src, |
| uint dst_element_length_in_bytes, uint src_element_lenght_in_bytes); |
| |
| void sve_vmaskcast_narrow(PRegister dst, PRegister src, PRegister ptmp, |
| uint dst_element_length_in_bytes, uint src_element_lenght_in_bytes); |
| |
| // Vector reduction |
| void neon_reduce_add_integral(Register dst, BasicType bt, |
| Register isrc, FloatRegister vsrc, |
| unsigned vector_length_in_bytes, FloatRegister vtmp); |
| |
| void neon_reduce_mul_integral(Register dst, BasicType bt, |
| Register isrc, FloatRegister vsrc, |
| unsigned vector_length_in_bytes, |
| FloatRegister vtmp1, FloatRegister vtmp2); |
| |
| void neon_reduce_mul_fp(FloatRegister dst, BasicType bt, |
| FloatRegister fsrc, FloatRegister vsrc, |
| unsigned vector_length_in_bytes, FloatRegister vtmp); |
| |
| void neon_reduce_logical(int opc, Register dst, BasicType bt, Register isrc, |
| FloatRegister vsrc, unsigned vector_length_in_bytes); |
| |
| void neon_reduce_minmax_integral(int opc, Register dst, BasicType bt, |
| Register isrc, FloatRegister vsrc, |
| unsigned vector_length_in_bytes, FloatRegister vtmp); |
| |
| void sve_reduce_integral(int opc, Register dst, BasicType bt, Register src1, |
| FloatRegister src2, PRegister pg, FloatRegister tmp); |
| |
| // Set elements of the dst predicate to true for lanes in the range of |
| // [0, lane_cnt), or to false otherwise. The input "lane_cnt" should be |
| // smaller than or equal to the supported max vector length of the basic |
| // type. Clobbers: rscratch1 and the rFlagsReg. |
| void sve_gen_mask_imm(PRegister dst, BasicType bt, uint32_t lane_cnt); |
| |
| // Extract a scalar element from an sve vector at position 'idx'. |
| // The input elements in src are expected to be of integral type. |
| void sve_extract_integral(Register dst, BasicType bt, FloatRegister src, |
| int idx, FloatRegister vtmp); |
| |
| // java.lang.Math::round intrinsics |
| void vector_round_neon(FloatRegister dst, FloatRegister src, FloatRegister tmp1, |
| FloatRegister tmp2, FloatRegister tmp3, |
| SIMD_Arrangement T); |
| void vector_round_sve(FloatRegister dst, FloatRegister src, FloatRegister tmp1, |
| FloatRegister tmp2, PRegister pgtmp, |
| SIMD_RegVariant T); |
| |
| // Pack active elements of src, under the control of mask, into the |
| // lowest-numbered elements of dst. Any remaining elements of dst will |
| // be filled with zero. |
| void sve_compress_byte(FloatRegister dst, FloatRegister src, PRegister mask, |
| FloatRegister vtmp1, FloatRegister vtmp2, |
| FloatRegister vtmp3, FloatRegister vtmp4, |
| PRegister ptmp, PRegister pgtmp); |
| |
| void sve_compress_short(FloatRegister dst, FloatRegister src, PRegister mask, |
| FloatRegister vtmp1, FloatRegister vtmp2, |
| PRegister pgtmp); |
| |
| void neon_reverse_bits(FloatRegister dst, FloatRegister src, BasicType bt, bool isQ); |
| |
| void neon_reverse_bytes(FloatRegister dst, FloatRegister src, BasicType bt, bool isQ); |
| |
| // java.lang.Math::signum intrinsics |
| void vector_signum_neon(FloatRegister dst, FloatRegister src, FloatRegister zero, |
| FloatRegister one, SIMD_Arrangement T); |
| |
| void vector_signum_sve(FloatRegister dst, FloatRegister src, FloatRegister zero, |
| FloatRegister one, FloatRegister vtmp, PRegister pgtmp, SIMD_RegVariant T); |
| |
| #endif // CPU_AARCH64_C2_MACROASSEMBLER_AARCH64_HPP |