blob: 1f151800b32547f288beee54bbc36c36697f947c [file] [log] [blame]
//! Lowering rules for X64.
// ISLE integration glue.
pub(super) mod isle;
use crate::ir::{types, ExternalName, Inst as IRInst, LibCall, Opcode, Type};
use crate::isa::x64::abi::*;
use crate::isa::x64::inst::args::*;
use crate::isa::x64::inst::*;
use crate::isa::{x64::settings as x64_settings, x64::X64Backend, CallConv};
use crate::machinst::lower::*;
use crate::machinst::*;
use crate::result::CodegenResult;
use crate::settings::Flags;
use smallvec::SmallVec;
use target_lexicon::Triple;
//=============================================================================
// Helpers for instruction lowering.
fn is_int_or_ref_ty(ty: Type) -> bool {
match ty {
types::I8 | types::I16 | types::I32 | types::I64 | types::R64 => true,
types::B1 | types::B8 | types::B16 | types::B32 | types::B64 => true,
types::R32 => panic!("shouldn't have 32-bits refs on x64"),
_ => false,
}
}
/// Returns whether the given specified `input` is a result produced by an instruction with Opcode
/// `op`.
// TODO investigate failures with checking against the result index.
fn matches_input(ctx: &mut Lower<Inst>, input: InsnInput, op: Opcode) -> Option<IRInst> {
let inputs = ctx.get_input_as_source_or_const(input.insn, input.input);
inputs.inst.as_inst().and_then(|(src_inst, _)| {
let data = ctx.data(src_inst);
if data.opcode() == op {
return Some(src_inst);
}
None
})
}
/// Emits instruction(s) to generate the given 64-bit constant value into a newly-allocated
/// temporary register, returning that register.
fn generate_constant(ctx: &mut Lower<Inst>, ty: Type, c: u64) -> ValueRegs<Reg> {
let from_bits = ty_bits(ty);
let masked = if from_bits < 64 {
c & ((1u64 << from_bits) - 1)
} else {
c
};
let cst_copy = ctx.alloc_tmp(ty);
for inst in Inst::gen_constant(cst_copy, masked as u128, ty, |ty| {
ctx.alloc_tmp(ty).only_reg().unwrap()
})
.into_iter()
{
ctx.emit(inst);
}
non_writable_value_regs(cst_copy)
}
/// Put the given input into possibly multiple registers, and mark it as used (side-effect).
fn put_input_in_regs(ctx: &mut Lower<Inst>, spec: InsnInput) -> ValueRegs<Reg> {
let ty = ctx.input_ty(spec.insn, spec.input);
let input = ctx.get_input_as_source_or_const(spec.insn, spec.input);
if let Some(c) = input.constant {
// Generate constants fresh at each use to minimize long-range register pressure.
generate_constant(ctx, ty, c)
} else {
ctx.put_input_in_regs(spec.insn, spec.input)
}
}
/// Put the given input into a register, and mark it as used (side-effect).
fn put_input_in_reg(ctx: &mut Lower<Inst>, spec: InsnInput) -> Reg {
put_input_in_regs(ctx, spec)
.only_reg()
.expect("Multi-register value not expected")
}
/// Determines whether a load operation (indicated by `src_insn`) can be merged
/// into the current lowering point. If so, returns the address-base source (as
/// an `InsnInput`) and an offset from that address from which to perform the
/// load.
fn is_mergeable_load(ctx: &mut Lower<Inst>, src_insn: IRInst) -> Option<(InsnInput, i32)> {
let insn_data = ctx.data(src_insn);
let inputs = ctx.num_inputs(src_insn);
if inputs != 1 {
return None;
}
let load_ty = ctx.output_ty(src_insn, 0);
if ty_bits(load_ty) < 32 {
// Narrower values are handled by ALU insts that are at least 32 bits
// wide, which is normally OK as we ignore upper buts; but, if we
// generate, e.g., a direct-from-memory 32-bit add for a byte value and
// the byte is the last byte in a page, the extra data that we load is
// incorrectly accessed. So we only allow loads to merge for
// 32-bit-and-above widths.
return None;
}
// SIMD instructions can only be load-coalesced when the loaded value comes
// from an aligned address.
if load_ty.is_vector() && !insn_data.memflags().map_or(false, |f| f.aligned()) {
return None;
}
// Just testing the opcode is enough, because the width will always match if
// the type does (and the type should match if the CLIF is properly
// constructed).
if insn_data.opcode() == Opcode::Load {
let offset = insn_data
.load_store_offset()
.expect("load should have offset");
Some((
InsnInput {
insn: src_insn,
input: 0,
},
offset,
))
} else {
None
}
}
fn input_to_imm(ctx: &mut Lower<Inst>, spec: InsnInput) -> Option<u64> {
ctx.get_input_as_source_or_const(spec.insn, spec.input)
.constant
}
fn emit_vm_call(
ctx: &mut Lower<Inst>,
flags: &Flags,
triple: &Triple,
libcall: LibCall,
inputs: &[Reg],
outputs: &[Writable<Reg>],
) -> CodegenResult<()> {
let extname = ExternalName::LibCall(libcall);
let dist = if flags.use_colocated_libcalls() {
RelocDistance::Near
} else {
RelocDistance::Far
};
// TODO avoid recreating signatures for every single Libcall function.
let call_conv = CallConv::for_libcall(flags, CallConv::triple_default(triple));
let sig = libcall.signature(call_conv);
let caller_conv = ctx.abi().call_conv(ctx.sigs());
if !ctx.sigs().have_abi_sig_for_signature(&sig) {
ctx.sigs_mut()
.make_abi_sig_from_ir_signature::<X64ABIMachineSpec>(sig.clone(), flags)?;
}
let mut abi =
X64Caller::from_libcall(ctx.sigs(), &sig, &extname, dist, caller_conv, flags.clone())?;
abi.emit_stack_pre_adjust(ctx);
assert_eq!(inputs.len(), abi.num_args(ctx.sigs()));
for (i, input) in inputs.iter().enumerate() {
for inst in abi.gen_copy_regs_to_arg(ctx, i, ValueRegs::one(*input)) {
ctx.emit(inst);
}
}
abi.emit_call(ctx);
for (i, output) in outputs.iter().enumerate() {
for inst in abi.gen_copy_retval_to_regs(ctx, i, ValueRegs::one(*output)) {
ctx.emit(inst);
}
}
abi.emit_stack_post_adjust(ctx);
Ok(())
}
/// Returns whether the given input is a shift by a constant value less or equal than 3.
/// The goal is to embed it within an address mode.
fn matches_small_constant_shift(ctx: &mut Lower<Inst>, spec: InsnInput) -> Option<(InsnInput, u8)> {
matches_input(ctx, spec, Opcode::Ishl).and_then(|shift| {
match input_to_imm(
ctx,
InsnInput {
insn: shift,
input: 1,
},
) {
Some(shift_amt) if shift_amt <= 3 => Some((
InsnInput {
insn: shift,
input: 0,
},
shift_amt as u8,
)),
_ => None,
}
})
}
/// Lowers an instruction to one of the x86 addressing modes.
///
/// Note: the 32-bit offset in Cranelift has to be sign-extended, which maps x86's behavior.
fn lower_to_amode(ctx: &mut Lower<Inst>, spec: InsnInput, offset: i32) -> Amode {
let flags = ctx
.memflags(spec.insn)
.expect("Instruction with amode should have memflags");
// We now either have an add that we must materialize, or some other input; as well as the
// final offset.
if let Some(add) = matches_input(ctx, spec, Opcode::Iadd) {
debug_assert_eq!(ctx.output_ty(add, 0), types::I64);
let add_inputs = &[
InsnInput {
insn: add,
input: 0,
},
InsnInput {
insn: add,
input: 1,
},
];
// TODO heap_addr legalization generates a uext64 *after* the shift, so these optimizations
// aren't happening in the wasm case. We could do better, given some range analysis.
let (base, index, shift) = if let Some((shift_input, shift_amt)) =
matches_small_constant_shift(ctx, add_inputs[0])
{
(
put_input_in_reg(ctx, add_inputs[1]),
put_input_in_reg(ctx, shift_input),
shift_amt,
)
} else if let Some((shift_input, shift_amt)) =
matches_small_constant_shift(ctx, add_inputs[1])
{
(
put_input_in_reg(ctx, add_inputs[0]),
put_input_in_reg(ctx, shift_input),
shift_amt,
)
} else {
for i in 0..=1 {
// Try to pierce through uextend.
if let Some(uextend) = matches_input(
ctx,
InsnInput {
insn: add,
input: i,
},
Opcode::Uextend,
) {
if let Some(cst) = ctx.get_input_as_source_or_const(uextend, 0).constant {
// Zero the upper bits.
let input_size = ctx.input_ty(uextend, 0).bits() as u64;
let shift: u64 = 64 - input_size;
let uext_cst: u64 = (cst << shift) >> shift;
let final_offset = (offset as i64).wrapping_add(uext_cst as i64);
if low32_will_sign_extend_to_64(final_offset as u64) {
let base = put_input_in_reg(ctx, add_inputs[1 - i]);
return Amode::imm_reg(final_offset as u32, base).with_flags(flags);
}
}
}
// If it's a constant, add it directly!
if let Some(cst) = ctx.get_input_as_source_or_const(add, i).constant {
let final_offset = (offset as i64).wrapping_add(cst as i64);
if low32_will_sign_extend_to_64(final_offset as u64) {
let base = put_input_in_reg(ctx, add_inputs[1 - i]);
return Amode::imm_reg(final_offset as u32, base).with_flags(flags);
}
}
}
(
put_input_in_reg(ctx, add_inputs[0]),
put_input_in_reg(ctx, add_inputs[1]),
0,
)
};
return Amode::imm_reg_reg_shift(
offset as u32,
Gpr::new(base).unwrap(),
Gpr::new(index).unwrap(),
shift,
)
.with_flags(flags);
}
let input = put_input_in_reg(ctx, spec);
Amode::imm_reg(offset as u32, input).with_flags(flags)
}
//=============================================================================
// Top-level instruction lowering entry point, for one instruction.
/// Actually codegen an instruction's results into registers.
fn lower_insn_to_regs(
ctx: &mut Lower<Inst>,
insn: IRInst,
flags: &Flags,
isa_flags: &x64_settings::Flags,
triple: &Triple,
) -> CodegenResult<()> {
let outputs: SmallVec<[InsnOutput; 2]> = (0..ctx.num_outputs(insn))
.map(|i| InsnOutput { insn, output: i })
.collect();
if let Ok(()) = isle::lower(ctx, triple, flags, isa_flags, &outputs, insn) {
return Ok(());
}
let op = ctx.data(insn).opcode();
match op {
Opcode::Iconst
| Opcode::Bconst
| Opcode::F32const
| Opcode::F64const
| Opcode::Null
| Opcode::Iadd
| Opcode::IaddIfcout
| Opcode::SaddSat
| Opcode::UaddSat
| Opcode::Isub
| Opcode::SsubSat
| Opcode::UsubSat
| Opcode::AvgRound
| Opcode::Band
| Opcode::Bor
| Opcode::Bxor
| Opcode::Imul
| Opcode::BandNot
| Opcode::Iabs
| Opcode::Imax
| Opcode::Umax
| Opcode::Imin
| Opcode::Umin
| Opcode::Bnot
| Opcode::Bitselect
| Opcode::Vselect
| Opcode::Ushr
| Opcode::Sshr
| Opcode::Ishl
| Opcode::Rotl
| Opcode::Rotr
| Opcode::Ineg
| Opcode::Trap
| Opcode::ResumableTrap
| Opcode::Clz
| Opcode::Ctz
| Opcode::Popcnt
| Opcode::Bitrev
| Opcode::IsNull
| Opcode::IsInvalid
| Opcode::Uextend
| Opcode::Sextend
| Opcode::Breduce
| Opcode::Bextend
| Opcode::Ireduce
| Opcode::Bint
| Opcode::Debugtrap
| Opcode::WideningPairwiseDotProductS
| Opcode::Fadd
| Opcode::Fsub
| Opcode::Fmul
| Opcode::Fdiv
| Opcode::Fmin
| Opcode::Fmax
| Opcode::FminPseudo
| Opcode::FmaxPseudo
| Opcode::Sqrt
| Opcode::Fpromote
| Opcode::FvpromoteLow
| Opcode::Fdemote
| Opcode::Fvdemote
| Opcode::Fma
| Opcode::Icmp
| Opcode::Fcmp
| Opcode::Load
| Opcode::Uload8
| Opcode::Sload8
| Opcode::Uload16
| Opcode::Sload16
| Opcode::Uload32
| Opcode::Sload32
| Opcode::Sload8x8
| Opcode::Uload8x8
| Opcode::Sload16x4
| Opcode::Uload16x4
| Opcode::Sload32x2
| Opcode::Uload32x2
| Opcode::Store
| Opcode::Istore8
| Opcode::Istore16
| Opcode::Istore32
| Opcode::AtomicRmw
| Opcode::AtomicCas
| Opcode::AtomicLoad
| Opcode::AtomicStore
| Opcode::Fence
| Opcode::FuncAddr
| Opcode::SymbolValue
| Opcode::Return
| Opcode::Call
| Opcode::CallIndirect
| Opcode::Trapif
| Opcode::Trapff
| Opcode::GetFramePointer
| Opcode::GetStackPointer
| Opcode::GetReturnAddress
| Opcode::Select
| Opcode::Selectif
| Opcode::SelectifSpectreGuard
| Opcode::FcvtFromSint
| Opcode::FcvtLowFromSint
| Opcode::FcvtFromUint
| Opcode::FcvtToUint
| Opcode::FcvtToSint
| Opcode::FcvtToUintSat
| Opcode::FcvtToSintSat
| Opcode::IaddPairwise
| Opcode::UwidenHigh
| Opcode::UwidenLow
| Opcode::SwidenHigh
| Opcode::SwidenLow
| Opcode::Snarrow
| Opcode::Unarrow
| Opcode::Bitcast
| Opcode::Fabs
| Opcode::Fneg
| Opcode::Fcopysign
| Opcode::Ceil
| Opcode::Floor
| Opcode::Nearest
| Opcode::Trunc
| Opcode::StackAddr
| Opcode::Udiv
| Opcode::Urem
| Opcode::Sdiv
| Opcode::Srem
| Opcode::Umulhi
| Opcode::Smulhi
| Opcode::GetPinnedReg
| Opcode::SetPinnedReg
| Opcode::Vconst
| Opcode::RawBitcast
| Opcode::Insertlane
| Opcode::Shuffle
| Opcode::Swizzle
| Opcode::Extractlane
| Opcode::ScalarToVector
| Opcode::Splat
| Opcode::VanyTrue
| Opcode::VallTrue
| Opcode::VhighBits
| Opcode::Iconcat
| Opcode::Isplit
| Opcode::TlsValue
| Opcode::SqmulRoundSat
| Opcode::Uunarrow
| Opcode::Nop => {
let ty = if outputs.len() > 0 {
Some(ctx.output_ty(insn, 0))
} else {
None
};
unreachable!(
"implemented in ISLE: inst = `{}`, type = `{:?}`",
ctx.dfg().display_inst(insn),
ty
)
}
Opcode::DynamicStackAddr => unimplemented!("DynamicStackAddr"),
// Unimplemented opcodes below. These are not currently used by Wasm
// lowering or other known embeddings, but should be either supported or
// removed eventually
Opcode::ExtractVector => {
unimplemented!("ExtractVector not supported");
}
Opcode::Cls => unimplemented!("Cls not supported"),
Opcode::BorNot | Opcode::BxorNot => {
unimplemented!("or-not / xor-not opcodes not implemented");
}
Opcode::Bmask => unimplemented!("Bmask not implemented"),
Opcode::Trueif | Opcode::Trueff => unimplemented!("trueif / trueff not implemented"),
Opcode::Vsplit | Opcode::Vconcat => {
unimplemented!("Vector split/concat ops not implemented.");
}
// Opcodes that should be removed by legalization. These should
// eventually be removed if/when we replace in-situ legalization with
// something better.
Opcode::Ifcmp | Opcode::Ffcmp => {
panic!("Should never reach ifcmp/ffcmp as isel root!");
}
Opcode::IaddImm
| Opcode::ImulImm
| Opcode::UdivImm
| Opcode::SdivImm
| Opcode::UremImm
| Opcode::SremImm
| Opcode::IrsubImm
| Opcode::IaddCin
| Opcode::IaddIfcin
| Opcode::IaddCout
| Opcode::IaddCarry
| Opcode::IaddIfcarry
| Opcode::IsubBin
| Opcode::IsubIfbin
| Opcode::IsubBout
| Opcode::IsubIfbout
| Opcode::IsubBorrow
| Opcode::IsubIfborrow
| Opcode::BandImm
| Opcode::BorImm
| Opcode::BxorImm
| Opcode::RotlImm
| Opcode::RotrImm
| Opcode::IshlImm
| Opcode::UshrImm
| Opcode::SshrImm
| Opcode::IcmpImm
| Opcode::IfcmpImm => {
panic!("ALU+imm and ALU+carry ops should not appear here!");
}
Opcode::StackLoad
| Opcode::StackStore
| Opcode::DynamicStackStore
| Opcode::DynamicStackLoad => {
panic!("Direct stack memory access not supported; should have been legalized");
}
Opcode::GlobalValue => {
panic!("global_value should have been removed by legalization!");
}
Opcode::HeapAddr => {
panic!("heap_addr should have been removed by legalization!");
}
Opcode::TableAddr => {
panic!("table_addr should have been removed by legalization!");
}
Opcode::Copy => {
panic!("Unused opcode should not be encountered.");
}
Opcode::Trapz | Opcode::Trapnz | Opcode::ResumableTrapnz => {
panic!("trapz / trapnz / resumable_trapnz should have been removed by legalization!");
}
Opcode::Jump
| Opcode::Brz
| Opcode::Brnz
| Opcode::BrIcmp
| Opcode::Brif
| Opcode::Brff
| Opcode::BrTable => {
panic!("Branch opcode reached non-branch lowering logic!");
}
}
}
//=============================================================================
// Lowering-backend trait implementation.
impl LowerBackend for X64Backend {
type MInst = Inst;
fn lower(&self, ctx: &mut Lower<Inst>, ir_inst: IRInst) -> CodegenResult<()> {
lower_insn_to_regs(ctx, ir_inst, &self.flags, &self.x64_flags, &self.triple)
}
fn lower_branch_group(
&self,
ctx: &mut Lower<Inst>,
branches: &[IRInst],
targets: &[MachLabel],
) -> CodegenResult<()> {
// A block should end with at most two branches. The first may be a
// conditional branch; a conditional branch can be followed only by an
// unconditional branch or fallthrough. Otherwise, if only one branch,
// it may be an unconditional branch, a fallthrough, a return, or a
// trap. These conditions are verified by `is_ebb_basic()` during the
// verifier pass.
assert!(branches.len() <= 2);
if branches.len() == 2 {
let op1 = ctx.data(branches[1]).opcode();
assert!(op1 == Opcode::Jump);
}
if let Ok(()) = isle::lower_branch(
ctx,
&self.triple,
&self.flags,
&self.x64_flags,
branches[0],
targets,
) {
return Ok(());
}
unreachable!(
"implemented in ISLE: branch = `{}`",
ctx.dfg().display_inst(branches[0]),
);
}
fn maybe_pinned_reg(&self) -> Option<Reg> {
Some(regs::pinned_reg())
}
}