blob: d211eb5feb512a555e3716e9352d1542a1cf57d7 [file] [log] [blame]
//! Defines `SimpleJITModule`.
use crate::{compiled_blob::CompiledBlob, memory::Memory};
use cranelift_codegen::isa::TargetIsa;
use cranelift_codegen::settings::Configurable;
use cranelift_codegen::{self, ir, settings};
use cranelift_codegen::{
binemit::{self, Addend, CodeInfo, CodeOffset, Reloc, RelocSink, TrapSink},
CodegenError,
};
use cranelift_entity::SecondaryMap;
use cranelift_module::{
DataContext, DataDescription, DataId, FuncId, Init, Linkage, Module, ModuleCompiledFunction,
ModuleDeclarations, ModuleError, ModuleResult, RelocRecord,
};
use cranelift_native;
#[cfg(not(windows))]
use libc;
use log::info;
use std::collections::HashMap;
use std::convert::TryInto;
use std::ffi::CString;
use std::io::Write;
use std::ptr;
use target_lexicon::PointerWidth;
#[cfg(windows)]
use winapi;
const EXECUTABLE_DATA_ALIGNMENT: u64 = 0x10;
const WRITABLE_DATA_ALIGNMENT: u64 = 0x8;
const READONLY_DATA_ALIGNMENT: u64 = 0x1;
/// A builder for `SimpleJITModule`.
pub struct SimpleJITBuilder {
isa: Box<dyn TargetIsa>,
symbols: HashMap<String, *const u8>,
libcall_names: Box<dyn Fn(ir::LibCall) -> String + Send + Sync>,
}
impl SimpleJITBuilder {
/// Create a new `SimpleJITBuilder`.
///
/// The `libcall_names` function provides a way to translate `cranelift_codegen`'s `ir::LibCall`
/// enum to symbols. LibCalls are inserted in the IR as part of the legalization for certain
/// floating point instructions, and for stack probes. If you don't know what to use for this
/// argument, use `cranelift_module::default_libcall_names()`.
pub fn new(libcall_names: Box<dyn Fn(ir::LibCall) -> String + Send + Sync>) -> Self {
let mut flag_builder = settings::builder();
// On at least AArch64, "colocated" calls use shorter-range relocations,
// which might not reach all definitions; we can't handle that here, so
// we require long-range relocation types.
flag_builder.set("use_colocated_libcalls", "false").unwrap();
let isa_builder = cranelift_native::builder().unwrap_or_else(|msg| {
panic!("host machine is not supported: {}", msg);
});
let isa = isa_builder.finish(settings::Flags::new(flag_builder));
Self::with_isa(isa, libcall_names)
}
/// Create a new `SimpleJITBuilder` with an arbitrary target. This is mainly
/// useful for testing.
///
/// SimpleJIT requires a `TargetIsa` configured for non-PIC.
///
/// To create a `SimpleJITBuilder` for native use, use the `new` constructor
/// instead.
///
/// The `libcall_names` function provides a way to translate `cranelift_codegen`'s `ir::LibCall`
/// enum to symbols. LibCalls are inserted in the IR as part of the legalization for certain
/// floating point instructions, and for stack probes. If you don't know what to use for this
/// argument, use `cranelift_module::default_libcall_names()`.
pub fn with_isa(
isa: Box<dyn TargetIsa>,
libcall_names: Box<dyn Fn(ir::LibCall) -> String + Send + Sync>,
) -> Self {
debug_assert!(!isa.flags().is_pic(), "SimpleJIT requires non-PIC code");
let symbols = HashMap::new();
Self {
isa,
symbols,
libcall_names,
}
}
/// Define a symbol in the internal symbol table.
///
/// The JIT will use the symbol table to resolve names that are declared,
/// but not defined, in the module being compiled. A common example is
/// external functions. With this method, functions and data can be exposed
/// to the code being compiled which are defined by the host.
///
/// If a symbol is defined more than once, the most recent definition will
/// be retained.
///
/// If the JIT fails to find a symbol in its internal table, it will fall
/// back to a platform-specific search (this typically involves searching
/// the current process for public symbols, followed by searching the
/// platform's C runtime).
pub fn symbol<K>(&mut self, name: K, ptr: *const u8) -> &Self
where
K: Into<String>,
{
self.symbols.insert(name.into(), ptr);
self
}
/// Define multiple symbols in the internal symbol table.
///
/// Using this is equivalent to calling `symbol` on each element.
pub fn symbols<It, K>(&mut self, symbols: It) -> &Self
where
It: IntoIterator<Item = (K, *const u8)>,
K: Into<String>,
{
for (name, ptr) in symbols {
self.symbols.insert(name.into(), ptr);
}
self
}
}
/// A `SimpleJITModule` implements `Module` and emits code and data into memory where it can be
/// directly called and accessed.
///
/// See the `SimpleJITBuilder` for a convenient way to construct `SimpleJITModule` instances.
pub struct SimpleJITModule {
isa: Box<dyn TargetIsa>,
symbols: HashMap<String, *const u8>,
libcall_names: Box<dyn Fn(ir::LibCall) -> String>,
memory: MemoryHandle,
declarations: ModuleDeclarations,
compiled_functions: SecondaryMap<FuncId, Option<CompiledBlob>>,
compiled_data_objects: SecondaryMap<DataId, Option<CompiledBlob>>,
functions_to_finalize: Vec<FuncId>,
data_objects_to_finalize: Vec<DataId>,
}
/// A handle to allow freeing memory allocated by the `Module`.
struct MemoryHandle {
code: Memory,
readonly: Memory,
writable: Memory,
}
impl SimpleJITModule {
/// Free memory allocated for code and data segments of compiled functions.
///
/// # Safety
///
/// Because this function invalidates any pointers retrived from the
/// corresponding module, it should only be used when none of the functions
/// from that module are currently executing and none of the `fn` pointers
/// are called afterwards.
pub unsafe fn free_memory(&mut self) {
self.memory.code.free_memory();
self.memory.readonly.free_memory();
self.memory.writable.free_memory();
}
fn lookup_symbol(&self, name: &str) -> Option<*const u8> {
self.symbols
.get(name)
.copied()
.or_else(|| lookup_with_dlsym(name))
}
fn get_definition(&self, name: &ir::ExternalName) -> *const u8 {
match *name {
ir::ExternalName::User { .. } => {
let (name, linkage) = if ModuleDeclarations::is_function(name) {
let func_id = FuncId::from_name(name);
match &self.compiled_functions[func_id] {
Some(compiled) => return compiled.ptr,
None => {
let decl = self.declarations.get_function_decl(func_id);
(&decl.name, decl.linkage)
}
}
} else {
let data_id = DataId::from_name(name);
match &self.compiled_data_objects[data_id] {
Some(compiled) => return compiled.ptr,
None => {
let decl = self.declarations.get_data_decl(data_id);
(&decl.name, decl.linkage)
}
}
};
if let Some(ptr) = self.lookup_symbol(&name) {
ptr
} else if linkage == Linkage::Preemptible {
0 as *const u8
} else {
panic!("can't resolve symbol {}", name);
}
}
ir::ExternalName::LibCall(ref libcall) => {
let sym = (self.libcall_names)(*libcall);
self.lookup_symbol(&sym)
.unwrap_or_else(|| panic!("can't resolve libcall {}", sym))
}
_ => panic!("invalid ExternalName {}", name),
}
}
/// Returns the address of a finalized function.
pub fn get_finalized_function(&self, func_id: FuncId) -> *const u8 {
let info = &self.compiled_functions[func_id];
debug_assert!(
!self.functions_to_finalize.iter().any(|x| *x == func_id),
"function not yet finalized"
);
info.as_ref()
.expect("function must be compiled before it can be finalized")
.ptr
}
/// Returns the address and size of a finalized data object.
pub fn get_finalized_data(&self, data_id: DataId) -> (*const u8, usize) {
let info = &self.compiled_data_objects[data_id];
debug_assert!(
!self.data_objects_to_finalize.iter().any(|x| *x == data_id),
"data object not yet finalized"
);
let compiled = info
.as_ref()
.expect("data object must be compiled before it can be finalized");
(compiled.ptr, compiled.size)
}
fn record_function_for_perf(&self, ptr: *mut u8, size: usize, name: &str) {
// The Linux perf tool supports JIT code via a /tmp/perf-$PID.map file,
// which contains memory regions and their associated names. If we
// are profiling with perf and saving binaries to PERF_BUILDID_DIR
// for post-profile analysis, write information about each function
// we define.
if cfg!(target_os = "linux") && ::std::env::var_os("PERF_BUILDID_DIR").is_some() {
let mut map_file = ::std::fs::OpenOptions::new()
.create(true)
.append(true)
.open(format!("/tmp/perf-{}.map", ::std::process::id()))
.unwrap();
let _ = writeln!(map_file, "{:x} {:x} {}", ptr as usize, size, name);
}
}
/// Finalize all functions and data objects that are defined but not yet finalized.
/// All symbols referenced in their bodies that are declared as needing a definition
/// must be defined by this point.
///
/// Use `get_finalized_function` and `get_finalized_data` to obtain the final
/// artifacts.
pub fn finalize_definitions(&mut self) {
for func in std::mem::take(&mut self.functions_to_finalize) {
let decl = self.declarations.get_function_decl(func);
debug_assert!(decl.linkage.is_definable());
let func = self.compiled_functions[func]
.as_ref()
.expect("function must be compiled before it can be finalized");
func.perform_relocations(|name| self.get_definition(name));
}
for data in std::mem::take(&mut self.data_objects_to_finalize) {
let decl = self.declarations.get_data_decl(data);
debug_assert!(decl.linkage.is_definable());
let data = self.compiled_data_objects[data]
.as_ref()
.expect("data object must be compiled before it can be finalized");
data.perform_relocations(|name| self.get_definition(name));
}
// Now that we're done patching, prepare the memory for execution!
self.memory.readonly.set_readonly();
self.memory.code.set_readable_and_executable();
}
/// Create a new `SimpleJITModule`.
pub fn new(builder: SimpleJITBuilder) -> Self {
let memory = MemoryHandle {
code: Memory::new(),
readonly: Memory::new(),
writable: Memory::new(),
};
Self {
isa: builder.isa,
symbols: builder.symbols,
libcall_names: builder.libcall_names,
memory,
declarations: ModuleDeclarations::default(),
compiled_functions: SecondaryMap::new(),
compiled_data_objects: SecondaryMap::new(),
functions_to_finalize: Vec::new(),
data_objects_to_finalize: Vec::new(),
}
}
}
impl<'simple_jit_backend> Module for SimpleJITModule {
fn isa(&self) -> &dyn TargetIsa {
&*self.isa
}
fn declarations(&self) -> &ModuleDeclarations {
&self.declarations
}
fn declare_function(
&mut self,
name: &str,
linkage: Linkage,
signature: &ir::Signature,
) -> ModuleResult<FuncId> {
let (id, _decl) = self
.declarations
.declare_function(name, linkage, signature)?;
Ok(id)
}
fn declare_data(
&mut self,
name: &str,
linkage: Linkage,
writable: bool,
tls: bool,
) -> ModuleResult<DataId> {
assert!(!tls, "SimpleJIT doesn't yet support TLS");
let (id, _decl) = self
.declarations
.declare_data(name, linkage, writable, tls)?;
Ok(id)
}
fn define_function<TS>(
&mut self,
id: FuncId,
ctx: &mut cranelift_codegen::Context,
trap_sink: &mut TS,
) -> ModuleResult<ModuleCompiledFunction>
where
TS: TrapSink,
{
info!("defining function {}: {}", id, ctx.func.display(self.isa()));
let CodeInfo {
total_size: code_size,
..
} = ctx.compile(self.isa())?;
let decl = self.declarations.get_function_decl(id);
if !decl.linkage.is_definable() {
return Err(ModuleError::InvalidImportDefinition(decl.name.clone()));
}
if !self.compiled_functions[id].is_none() {
return Err(ModuleError::DuplicateDefinition(decl.name.to_owned()));
}
let size = code_size as usize;
let ptr = self
.memory
.code
.allocate(size, EXECUTABLE_DATA_ALIGNMENT)
.expect("TODO: handle OOM etc.");
let mut reloc_sink = SimpleJITRelocSink::default();
let mut stack_map_sink = binemit::NullStackMapSink {};
unsafe {
ctx.emit_to_memory(
&*self.isa,
ptr,
&mut reloc_sink,
trap_sink,
&mut stack_map_sink,
)
};
self.record_function_for_perf(ptr, size, &decl.name);
self.compiled_functions[id] = Some(CompiledBlob {
ptr,
size,
relocs: reloc_sink.relocs,
});
self.functions_to_finalize.push(id);
Ok(ModuleCompiledFunction { size: code_size })
}
fn define_function_bytes(
&mut self,
id: FuncId,
bytes: &[u8],
relocs: &[RelocRecord],
) -> ModuleResult<ModuleCompiledFunction> {
info!("defining function {} with bytes", id);
let total_size: u32 = match bytes.len().try_into() {
Ok(total_size) => total_size,
_ => Err(CodegenError::CodeTooLarge)?,
};
let decl = self.declarations.get_function_decl(id);
if !decl.linkage.is_definable() {
return Err(ModuleError::InvalidImportDefinition(decl.name.clone()));
}
if !self.compiled_functions[id].is_none() {
return Err(ModuleError::DuplicateDefinition(decl.name.to_owned()));
}
let size = bytes.len();
let ptr = self
.memory
.code
.allocate(size, EXECUTABLE_DATA_ALIGNMENT)
.expect("TODO: handle OOM etc.");
unsafe {
ptr::copy_nonoverlapping(bytes.as_ptr(), ptr, size);
}
self.record_function_for_perf(ptr, size, &decl.name);
self.compiled_functions[id] = Some(CompiledBlob {
ptr,
size,
relocs: relocs.to_vec(),
});
self.functions_to_finalize.push(id);
Ok(ModuleCompiledFunction { size: total_size })
}
fn define_data(&mut self, id: DataId, data: &DataContext) -> ModuleResult<()> {
let decl = self.declarations.get_data_decl(id);
if !decl.linkage.is_definable() {
return Err(ModuleError::InvalidImportDefinition(decl.name.clone()));
}
if !self.compiled_data_objects[id].is_none() {
return Err(ModuleError::DuplicateDefinition(decl.name.to_owned()));
}
assert!(!decl.tls, "SimpleJIT doesn't yet support TLS");
let &DataDescription {
ref init,
function_decls: _,
data_decls: _,
function_relocs: _,
data_relocs: _,
custom_segment_section: _,
align,
} = data.description();
let size = init.size();
let ptr = if decl.writable {
self.memory
.writable
.allocate(size, align.unwrap_or(WRITABLE_DATA_ALIGNMENT))
.expect("TODO: handle OOM etc.")
} else {
self.memory
.readonly
.allocate(size, align.unwrap_or(READONLY_DATA_ALIGNMENT))
.expect("TODO: handle OOM etc.")
};
match *init {
Init::Uninitialized => {
panic!("data is not initialized yet");
}
Init::Zeros { .. } => {
unsafe { ptr::write_bytes(ptr, 0, size) };
}
Init::Bytes { ref contents } => {
let src = contents.as_ptr();
unsafe { ptr::copy_nonoverlapping(src, ptr, size) };
}
}
let pointer_reloc = match self.isa.triple().pointer_width().unwrap() {
PointerWidth::U16 => panic!(),
PointerWidth::U32 => Reloc::Abs4,
PointerWidth::U64 => Reloc::Abs8,
};
let relocs = data
.description()
.all_relocs(pointer_reloc)
.collect::<Vec<_>>();
self.compiled_data_objects[id] = Some(CompiledBlob { ptr, size, relocs });
self.data_objects_to_finalize.push(id);
Ok(())
}
}
#[cfg(not(windows))]
fn lookup_with_dlsym(name: &str) -> Option<*const u8> {
let c_str = CString::new(name).unwrap();
let c_str_ptr = c_str.as_ptr();
let sym = unsafe { libc::dlsym(libc::RTLD_DEFAULT, c_str_ptr) };
if sym.is_null() {
None
} else {
Some(sym as *const u8)
}
}
#[cfg(windows)]
fn lookup_with_dlsym(name: &str) -> Option<*const u8> {
const MSVCRT_DLL: &[u8] = b"msvcrt.dll\0";
let c_str = CString::new(name).unwrap();
let c_str_ptr = c_str.as_ptr();
unsafe {
let handles = [
// try to find the searched symbol in the currently running executable
ptr::null_mut(),
// try to find the searched symbol in local c runtime
winapi::um::libloaderapi::GetModuleHandleA(MSVCRT_DLL.as_ptr() as *const i8),
];
for handle in &handles {
let addr = winapi::um::libloaderapi::GetProcAddress(*handle, c_str_ptr);
if addr.is_null() {
continue;
}
return Some(addr as *const u8);
}
None
}
}
#[derive(Default)]
struct SimpleJITRelocSink {
relocs: Vec<RelocRecord>,
}
impl RelocSink for SimpleJITRelocSink {
fn reloc_external(
&mut self,
offset: CodeOffset,
_srcloc: ir::SourceLoc,
reloc: Reloc,
name: &ir::ExternalName,
addend: Addend,
) {
self.relocs.push(RelocRecord {
offset,
reloc,
name: name.clone(),
addend,
});
}
fn reloc_jt(&mut self, _offset: CodeOffset, reloc: Reloc, _jt: ir::JumpTable) {
match reloc {
Reloc::X86PCRelRodata4 => {
// Not necessary to record this unless we are going to split apart code and its
// jumptbl/rodata.
}
_ => {
panic!("Unhandled reloc");
}
}
}
fn reloc_constant(&mut self, _offset: CodeOffset, reloc: Reloc, _constant: ir::ConstantOffset) {
match reloc {
Reloc::X86PCRelRodata4 => {
// Not necessary to record this unless we are going to split apart code and its
// jumptbl/rodata.
}
_ => {
panic!("Unhandled reloc");
}
}
}
}