blob: 56008a1cf9dcca221ff7d17d843e32af7e4e2203 [file] [log] [blame]
//! Defines `ObjectModule`.
use anyhow::anyhow;
use cranelift_codegen::binemit::{
Addend, CodeInfo, CodeOffset, NullStackMapSink, Reloc, RelocSink, TrapSink,
};
use cranelift_codegen::entity::SecondaryMap;
use cranelift_codegen::isa::TargetIsa;
use cranelift_codegen::{self, ir};
use cranelift_module::{
DataContext, DataDescription, DataId, FuncId, Init, Linkage, Module, ModuleCompiledFunction,
ModuleDeclarations, ModuleError, ModuleResult,
};
use log::info;
use object::write::{
Object, Relocation, SectionId, StandardSection, Symbol, SymbolId, SymbolSection,
};
use object::{
RelocationEncoding, RelocationKind, SectionKind, SymbolFlags, SymbolKind, SymbolScope,
};
use std::collections::HashMap;
use std::convert::TryInto;
use std::mem;
use target_lexicon::PointerWidth;
/// A builder for `ObjectModule`.
pub struct ObjectBuilder {
isa: Box<dyn TargetIsa>,
binary_format: object::BinaryFormat,
architecture: object::Architecture,
endian: object::Endianness,
name: Vec<u8>,
libcall_names: Box<dyn Fn(ir::LibCall) -> String>,
function_alignment: u64,
per_function_section: bool,
}
impl ObjectBuilder {
/// Create a new `ObjectBuilder` using the given Cranelift target, that
/// can be passed to [`ObjectModule::new`].
///
/// The `libcall_names` function provides a way to translate `cranelift_codegen`'s `ir::LibCall`
/// enum to symbols. LibCalls are inserted in the IR as part of the legalization for certain
/// floating point instructions, and for stack probes. If you don't know what to use for this
/// argument, use `cranelift_module::default_libcall_names()`.
pub fn new<V: Into<Vec<u8>>>(
isa: Box<dyn TargetIsa>,
name: V,
libcall_names: Box<dyn Fn(ir::LibCall) -> String>,
) -> ModuleResult<Self> {
let binary_format = match isa.triple().binary_format {
target_lexicon::BinaryFormat::Elf => object::BinaryFormat::Elf,
target_lexicon::BinaryFormat::Coff => object::BinaryFormat::Coff,
target_lexicon::BinaryFormat::Macho => object::BinaryFormat::MachO,
target_lexicon::BinaryFormat::Wasm => {
return Err(ModuleError::Backend(anyhow!(
"binary format wasm is unsupported",
)))
}
target_lexicon::BinaryFormat::Unknown => {
return Err(ModuleError::Backend(anyhow!("binary format is unknown")))
}
other => {
return Err(ModuleError::Backend(anyhow!(
"binary format {} not recognized",
other
)))
}
};
let architecture = match isa.triple().architecture {
target_lexicon::Architecture::X86_32(_) => object::Architecture::I386,
target_lexicon::Architecture::X86_64 => object::Architecture::X86_64,
target_lexicon::Architecture::Arm(_) => object::Architecture::Arm,
target_lexicon::Architecture::Aarch64(_) => object::Architecture::Aarch64,
architecture => {
return Err(ModuleError::Backend(anyhow!(
"target architecture {:?} is unsupported",
architecture,
)))
}
};
let endian = match isa.triple().endianness().unwrap() {
target_lexicon::Endianness::Little => object::Endianness::Little,
target_lexicon::Endianness::Big => object::Endianness::Big,
};
Ok(Self {
isa,
binary_format,
architecture,
endian,
name: name.into(),
libcall_names,
function_alignment: 1,
per_function_section: false,
})
}
/// Set the alignment used for functions.
pub fn function_alignment(&mut self, alignment: u64) -> &mut Self {
self.function_alignment = alignment;
self
}
/// Set if every function should end up in their own section.
pub fn per_function_section(&mut self, per_function_section: bool) -> &mut Self {
self.per_function_section = per_function_section;
self
}
}
/// An `ObjectModule` implements `Module` and emits ".o" files using the `object` library.
///
/// See the `ObjectBuilder` for a convenient way to construct `ObjectModule` instances.
pub struct ObjectModule {
isa: Box<dyn TargetIsa>,
object: Object,
declarations: ModuleDeclarations,
functions: SecondaryMap<FuncId, Option<(SymbolId, bool)>>,
data_objects: SecondaryMap<DataId, Option<(SymbolId, bool)>>,
relocs: Vec<SymbolRelocs>,
libcalls: HashMap<ir::LibCall, SymbolId>,
libcall_names: Box<dyn Fn(ir::LibCall) -> String>,
function_alignment: u64,
per_function_section: bool,
}
impl ObjectModule {
/// Create a new `ObjectModule` using the given Cranelift target.
pub fn new(builder: ObjectBuilder) -> Self {
let mut object = Object::new(builder.binary_format, builder.architecture, builder.endian);
object.add_file_symbol(builder.name);
Self {
isa: builder.isa,
object,
declarations: ModuleDeclarations::default(),
functions: SecondaryMap::new(),
data_objects: SecondaryMap::new(),
relocs: Vec::new(),
libcalls: HashMap::new(),
libcall_names: builder.libcall_names,
function_alignment: builder.function_alignment,
per_function_section: builder.per_function_section,
}
}
}
impl Module for ObjectModule {
fn isa(&self) -> &dyn TargetIsa {
&*self.isa
}
fn declarations(&self) -> &ModuleDeclarations {
&self.declarations
}
fn declare_function(
&mut self,
name: &str,
linkage: Linkage,
signature: &ir::Signature,
) -> ModuleResult<FuncId> {
let (id, decl) = self
.declarations
.declare_function(name, linkage, signature)?;
let (scope, weak) = translate_linkage(decl.linkage);
if let Some((function, _defined)) = self.functions[id] {
let symbol = self.object.symbol_mut(function);
symbol.scope = scope;
symbol.weak = weak;
} else {
let symbol_id = self.object.add_symbol(Symbol {
name: name.as_bytes().to_vec(),
value: 0,
size: 0,
kind: SymbolKind::Text,
scope,
weak,
section: SymbolSection::Undefined,
flags: SymbolFlags::None,
});
self.functions[id] = Some((symbol_id, false));
}
Ok(id)
}
fn declare_data(
&mut self,
name: &str,
linkage: Linkage,
writable: bool,
tls: bool,
) -> ModuleResult<DataId> {
let (id, decl) = self
.declarations
.declare_data(name, linkage, writable, tls)?;
let kind = if decl.tls {
SymbolKind::Tls
} else {
SymbolKind::Data
};
let (scope, weak) = translate_linkage(decl.linkage);
if let Some((data, _defined)) = self.data_objects[id] {
let symbol = self.object.symbol_mut(data);
symbol.kind = kind;
symbol.scope = scope;
symbol.weak = weak;
} else {
let symbol_id = self.object.add_symbol(Symbol {
name: name.as_bytes().to_vec(),
value: 0,
size: 0,
kind,
scope,
weak,
section: SymbolSection::Undefined,
flags: SymbolFlags::None,
});
self.data_objects[id] = Some((symbol_id, false));
}
Ok(id)
}
fn define_function<TS>(
&mut self,
func_id: FuncId,
ctx: &mut cranelift_codegen::Context,
trap_sink: &mut TS,
) -> ModuleResult<ModuleCompiledFunction>
where
TS: TrapSink,
{
info!(
"defining function {}: {}",
func_id,
ctx.func.display(self.isa())
);
let CodeInfo {
total_size: code_size,
..
} = ctx.compile(self.isa())?;
let decl = self.declarations.get_function_decl(func_id);
if !decl.linkage.is_definable() {
return Err(ModuleError::InvalidImportDefinition(decl.name.clone()));
}
let &mut (symbol, ref mut defined) = self.functions[func_id].as_mut().unwrap();
if *defined {
return Err(ModuleError::DuplicateDefinition(decl.name.clone()));
}
*defined = true;
let mut code: Vec<u8> = vec![0; code_size as usize];
let mut reloc_sink = ObjectRelocSink::new(self.object.format());
let mut stack_map_sink = NullStackMapSink {};
unsafe {
ctx.emit_to_memory(
&*self.isa,
code.as_mut_ptr(),
&mut reloc_sink,
trap_sink,
&mut stack_map_sink,
)
};
let (section, offset) = if self.per_function_section {
let symbol_name = self.object.symbol(symbol).name.clone();
let (section, offset) = self.object.add_subsection(
StandardSection::Text,
&symbol_name,
&code,
self.function_alignment,
);
self.object.symbol_mut(symbol).section = SymbolSection::Section(section);
self.object.symbol_mut(symbol).value = offset;
(section, offset)
} else {
let section = self.object.section_id(StandardSection::Text);
let offset =
self.object
.add_symbol_data(symbol, section, &code, self.function_alignment);
(section, offset)
};
if !reloc_sink.relocs.is_empty() {
self.relocs.push(SymbolRelocs {
section,
offset,
relocs: reloc_sink.relocs,
});
}
Ok(ModuleCompiledFunction { size: code_size })
}
fn define_function_bytes(
&mut self,
func_id: FuncId,
bytes: &[u8],
) -> ModuleResult<ModuleCompiledFunction> {
info!("defining function {} with bytes", func_id);
let decl = self.declarations.get_function_decl(func_id);
if !decl.linkage.is_definable() {
return Err(ModuleError::InvalidImportDefinition(decl.name.clone()));
}
let total_size: u32 = match bytes.len().try_into() {
Ok(total_size) => total_size,
_ => Err(ModuleError::FunctionTooLarge(decl.name.clone()))?,
};
let &mut (symbol, ref mut defined) = self.functions[func_id].as_mut().unwrap();
if *defined {
return Err(ModuleError::DuplicateDefinition(decl.name.clone()));
}
*defined = true;
if self.per_function_section {
let symbol_name = self.object.symbol(symbol).name.clone();
let (section, offset) = self.object.add_subsection(
StandardSection::Text,
&symbol_name,
bytes,
self.function_alignment,
);
self.object.symbol_mut(symbol).section = SymbolSection::Section(section);
self.object.symbol_mut(symbol).value = offset;
} else {
let section = self.object.section_id(StandardSection::Text);
let _offset =
self.object
.add_symbol_data(symbol, section, bytes, self.function_alignment);
}
Ok(ModuleCompiledFunction { size: total_size })
}
fn define_data(&mut self, data_id: DataId, data_ctx: &DataContext) -> ModuleResult<()> {
let decl = self.declarations.get_data_decl(data_id);
if !decl.linkage.is_definable() {
return Err(ModuleError::InvalidImportDefinition(decl.name.clone()));
}
let &mut (symbol, ref mut defined) = self.data_objects[data_id].as_mut().unwrap();
if *defined {
return Err(ModuleError::DuplicateDefinition(decl.name.clone()));
}
*defined = true;
let &DataDescription {
ref init,
ref function_decls,
ref data_decls,
ref function_relocs,
ref data_relocs,
ref custom_segment_section,
align,
} = data_ctx.description();
let reloc_size = match self.isa.triple().pointer_width().unwrap() {
PointerWidth::U16 => 16,
PointerWidth::U32 => 32,
PointerWidth::U64 => 64,
};
let mut relocs = Vec::new();
for &(offset, id) in function_relocs {
relocs.push(RelocRecord {
offset,
name: function_decls[id].clone(),
kind: RelocationKind::Absolute,
encoding: RelocationEncoding::Generic,
size: reloc_size,
addend: 0,
});
}
for &(offset, id, addend) in data_relocs {
relocs.push(RelocRecord {
offset,
name: data_decls[id].clone(),
kind: RelocationKind::Absolute,
encoding: RelocationEncoding::Generic,
size: reloc_size,
addend,
});
}
let section = if custom_segment_section.is_none() {
let section_kind = if let Init::Zeros { .. } = *init {
if decl.tls {
StandardSection::UninitializedTls
} else {
StandardSection::UninitializedData
}
} else if decl.tls {
StandardSection::Tls
} else if decl.writable {
StandardSection::Data
} else if relocs.is_empty() {
StandardSection::ReadOnlyData
} else {
StandardSection::ReadOnlyDataWithRel
};
self.object.section_id(section_kind)
} else {
if decl.tls {
return Err(cranelift_module::ModuleError::Backend(anyhow::anyhow!(
"Custom section not supported for TLS"
)));
}
let (seg, sec) = &custom_segment_section.as_ref().unwrap();
self.object.add_section(
seg.clone().into_bytes(),
sec.clone().into_bytes(),
if decl.writable {
SectionKind::Data
} else if relocs.is_empty() {
SectionKind::ReadOnlyData
} else {
SectionKind::Data
},
)
};
let align = align.unwrap_or(1);
let offset = match *init {
Init::Uninitialized => {
panic!("data is not initialized yet");
}
Init::Zeros { size } => self
.object
.add_symbol_bss(symbol, section, size as u64, align),
Init::Bytes { ref contents } => self
.object
.add_symbol_data(symbol, section, &contents, align),
};
if !relocs.is_empty() {
self.relocs.push(SymbolRelocs {
section,
offset,
relocs,
});
}
Ok(())
}
}
impl ObjectModule {
/// Finalize all relocations and output an object.
pub fn finish(mut self) -> ObjectProduct {
let symbol_relocs = mem::take(&mut self.relocs);
for symbol in symbol_relocs {
for &RelocRecord {
offset,
ref name,
kind,
encoding,
size,
addend,
} in &symbol.relocs
{
let target_symbol = self.get_symbol(name);
self.object
.add_relocation(
symbol.section,
Relocation {
offset: symbol.offset + u64::from(offset),
size,
kind,
encoding,
symbol: target_symbol,
addend,
},
)
.unwrap();
}
}
// Indicate that this object has a non-executable stack.
if self.object.format() == object::BinaryFormat::Elf {
self.object.add_section(
vec![],
".note.GNU-stack".as_bytes().to_vec(),
SectionKind::Linker,
);
}
ObjectProduct {
object: self.object,
functions: self.functions,
data_objects: self.data_objects,
}
}
/// This should only be called during finish because it creates
/// symbols for missing libcalls.
fn get_symbol(&mut self, name: &ir::ExternalName) -> SymbolId {
match *name {
ir::ExternalName::User { .. } => {
if self.declarations.is_function(name) {
let id = self.declarations.get_function_id(name);
self.functions[id].unwrap().0
} else {
let id = self.declarations.get_data_id(name);
self.data_objects[id].unwrap().0
}
}
ir::ExternalName::LibCall(ref libcall) => {
let name = (self.libcall_names)(*libcall);
if let Some(symbol) = self.object.symbol_id(name.as_bytes()) {
symbol
} else if let Some(symbol) = self.libcalls.get(libcall) {
*symbol
} else {
let symbol = self.object.add_symbol(Symbol {
name: name.as_bytes().to_vec(),
value: 0,
size: 0,
kind: SymbolKind::Text,
scope: SymbolScope::Unknown,
weak: false,
section: SymbolSection::Undefined,
flags: SymbolFlags::None,
});
self.libcalls.insert(*libcall, symbol);
symbol
}
}
_ => panic!("invalid ExternalName {}", name),
}
}
}
fn translate_linkage(linkage: Linkage) -> (SymbolScope, bool) {
let scope = match linkage {
Linkage::Import => SymbolScope::Unknown,
Linkage::Local => SymbolScope::Compilation,
Linkage::Hidden => SymbolScope::Linkage,
Linkage::Export | Linkage::Preemptible => SymbolScope::Dynamic,
};
// TODO: this matches rustc_codegen_cranelift, but may be wrong.
let weak = linkage == Linkage::Preemptible;
(scope, weak)
}
/// This is the output of `Module`'s
/// [`finish`](../cranelift_module/struct.Module.html#method.finish) function.
/// It contains the generated `Object` and other information produced during
/// compilation.
pub struct ObjectProduct {
/// Object artifact with all functions and data from the module defined.
pub object: Object,
/// Symbol IDs for functions (both declared and defined).
pub functions: SecondaryMap<FuncId, Option<(SymbolId, bool)>>,
/// Symbol IDs for data objects (both declared and defined).
pub data_objects: SecondaryMap<DataId, Option<(SymbolId, bool)>>,
}
impl ObjectProduct {
/// Return the `SymbolId` for the given function.
#[inline]
pub fn function_symbol(&self, id: FuncId) -> SymbolId {
self.functions[id].unwrap().0
}
/// Return the `SymbolId` for the given data object.
#[inline]
pub fn data_symbol(&self, id: DataId) -> SymbolId {
self.data_objects[id].unwrap().0
}
/// Write the object bytes in memory.
#[inline]
pub fn emit(self) -> Result<Vec<u8>, object::write::Error> {
self.object.write()
}
}
#[derive(Clone)]
struct SymbolRelocs {
section: SectionId,
offset: u64,
relocs: Vec<RelocRecord>,
}
#[derive(Clone)]
struct RelocRecord {
offset: CodeOffset,
name: ir::ExternalName,
kind: RelocationKind,
encoding: RelocationEncoding,
size: u8,
addend: Addend,
}
struct ObjectRelocSink {
format: object::BinaryFormat,
relocs: Vec<RelocRecord>,
}
impl ObjectRelocSink {
fn new(format: object::BinaryFormat) -> Self {
Self {
format,
relocs: vec![],
}
}
}
impl RelocSink for ObjectRelocSink {
fn reloc_block(&mut self, _offset: CodeOffset, _reloc: Reloc, _block_offset: CodeOffset) {
unimplemented!();
}
fn reloc_external(
&mut self,
offset: CodeOffset,
_srcloc: ir::SourceLoc,
reloc: Reloc,
name: &ir::ExternalName,
mut addend: Addend,
) {
let (kind, encoding, size) = match reloc {
Reloc::Abs4 => (RelocationKind::Absolute, RelocationEncoding::Generic, 32),
Reloc::Abs8 => (RelocationKind::Absolute, RelocationEncoding::Generic, 64),
Reloc::X86PCRel4 => (RelocationKind::Relative, RelocationEncoding::Generic, 32),
Reloc::X86CallPCRel4 => (RelocationKind::Relative, RelocationEncoding::X86Branch, 32),
// TODO: Get Cranelift to tell us when we can use
// R_X86_64_GOTPCRELX/R_X86_64_REX_GOTPCRELX.
Reloc::X86CallPLTRel4 => (
RelocationKind::PltRelative,
RelocationEncoding::X86Branch,
32,
),
Reloc::X86GOTPCRel4 => (RelocationKind::GotRelative, RelocationEncoding::Generic, 32),
Reloc::ElfX86_64TlsGd => {
assert_eq!(
self.format,
object::BinaryFormat::Elf,
"ElfX86_64TlsGd is not supported for this file format"
);
(
RelocationKind::Elf(object::elf::R_X86_64_TLSGD),
RelocationEncoding::Generic,
32,
)
}
Reloc::MachOX86_64Tlv => {
assert_eq!(
self.format,
object::BinaryFormat::MachO,
"MachOX86_64Tlv is not supported for this file format"
);
addend += 4; // X86_64_RELOC_TLV has an implicit addend of -4
(
RelocationKind::MachO {
value: object::macho::X86_64_RELOC_TLV,
relative: true,
},
RelocationEncoding::Generic,
32,
)
}
// FIXME
_ => unimplemented!(),
};
self.relocs.push(RelocRecord {
offset,
name: name.clone(),
kind,
encoding,
size,
addend,
});
}
fn reloc_jt(&mut self, _offset: CodeOffset, reloc: Reloc, _jt: ir::JumpTable) {
match reloc {
Reloc::X86PCRelRodata4 => {
// Not necessary to record this unless we are going to split apart code and its
// jumptbl/rodata.
}
_ => {
panic!("Unhandled reloc");
}
}
}
fn reloc_constant(&mut self, _offset: CodeOffset, reloc: Reloc, _jt: ir::ConstantOffset) {
match reloc {
Reloc::X86PCRelRodata4 => {
// Not necessary to record this unless we are going to split apart code and its
// jumptbl/rodata.
}
_ => {
panic!("Unhandled reloc");
}
}
}
}