blob: 6225c73e081cdc7452f79002b7cd7ae3a7288c83 [file] [log] [blame] [edit]
//! Support for egraphs represented in the DataFlowGraph.
use crate::alias_analysis::{AliasAnalysis, LastStores};
use crate::ctxhash::{CtxEq, CtxHash, CtxHashMap};
use crate::cursor::{Cursor, CursorPosition, FuncCursor};
use crate::dominator_tree::{DominatorTree, DominatorTreePreorder};
use crate::egraph::elaborate::Elaborator;
use crate::inst_predicates::{is_mergeable_for_egraph, is_pure_for_egraph};
use crate::ir::pcc::Fact;
use crate::ir::{
Block, DataFlowGraph, Function, Inst, InstructionData, Opcode, Type, Value, ValueDef,
ValueListPool,
};
use crate::loop_analysis::LoopAnalysis;
use crate::opts::IsleContext;
use crate::scoped_hash_map::{Entry as ScopedEntry, ScopedHashMap};
use crate::settings::Flags;
use crate::trace;
use crate::unionfind::UnionFind;
use core::cmp::Ordering;
use cranelift_control::ControlPlane;
use cranelift_entity::packed_option::ReservedValue;
use cranelift_entity::SecondaryMap;
use rustc_hash::FxHashSet;
use smallvec::SmallVec;
use std::hash::Hasher;
mod cost;
mod elaborate;
/// Pass over a Function that does the whole aegraph thing.
///
/// - Removes non-skeleton nodes from the Layout.
/// - Performs a GVN-and-rule-application pass over all Values
/// reachable from the skeleton, potentially creating new Union
/// nodes (i.e., an aegraph) so that some values have multiple
/// representations.
/// - Does "extraction" on the aegraph: selects the best value out of
/// the tree-of-Union nodes for each used value.
/// - Does "scoped elaboration" on the aegraph: chooses one or more
/// locations for pure nodes to become instructions again in the
/// layout, as forced by the skeleton.
///
/// At the beginning and end of this pass, the CLIF should be in a
/// state that passes the verifier and, additionally, has no Union
/// nodes. During the pass, Union nodes may exist, and instructions in
/// the layout may refer to results of instructions that are not
/// placed in the layout.
pub struct EgraphPass<'a> {
/// The function we're operating on.
func: &'a mut Function,
/// Dominator tree for the CFG, used to visit blocks in pre-order
/// so we see value definitions before their uses, and also used for
/// O(1) dominance checks.
domtree: DominatorTreePreorder,
/// Alias analysis, used during optimization.
alias_analysis: &'a mut AliasAnalysis<'a>,
/// Loop analysis results, used for built-in LICM during
/// elaboration.
loop_analysis: &'a LoopAnalysis,
/// Compiler flags.
flags: &'a Flags,
/// Chaos-mode control-plane so we can test that we still get
/// correct results when our heuristics make bad decisions.
ctrl_plane: &'a mut ControlPlane,
/// Which canonical Values do we want to rematerialize in each
/// block where they're used?
///
/// (A canonical Value is the *oldest* Value in an eclass,
/// i.e. tree of union value-nodes).
remat_values: FxHashSet<Value>,
/// Stats collected while we run this pass.
pub(crate) stats: Stats,
/// Union-find that maps all members of a Union tree (eclass) back
/// to the *oldest* (lowest-numbered) `Value`.
pub(crate) eclasses: UnionFind<Value>,
}
// The maximum number of rewrites we will take from a single call into ISLE.
const MATCHES_LIMIT: usize = 5;
/// Context passed through node insertion and optimization.
pub(crate) struct OptimizeCtx<'opt, 'analysis>
where
'analysis: 'opt,
{
// Borrowed from EgraphPass:
pub(crate) func: &'opt mut Function,
pub(crate) value_to_opt_value: &'opt mut SecondaryMap<Value, Value>,
pub(crate) gvn_map: &'opt mut CtxHashMap<(Type, InstructionData), Value>,
pub(crate) effectful_gvn_map: &'opt mut ScopedHashMap<(Type, InstructionData), Value>,
available_block: &'opt mut SecondaryMap<Value, Block>,
pub(crate) eclasses: &'opt mut UnionFind<Value>,
pub(crate) remat_values: &'opt mut FxHashSet<Value>,
pub(crate) stats: &'opt mut Stats,
domtree: &'opt DominatorTreePreorder,
pub(crate) alias_analysis: &'opt mut AliasAnalysis<'analysis>,
pub(crate) alias_analysis_state: &'opt mut LastStores,
flags: &'opt Flags,
ctrl_plane: &'opt mut ControlPlane,
// Held locally during optimization of one node (recursively):
pub(crate) rewrite_depth: usize,
pub(crate) subsume_values: FxHashSet<Value>,
optimized_values: SmallVec<[Value; MATCHES_LIMIT]>,
}
/// For passing to `insert_pure_enode`. Sometimes the enode already
/// exists as an Inst (from the original CLIF), and sometimes we're in
/// the middle of creating it and want to avoid inserting it if
/// possible until we know we need it.
pub(crate) enum NewOrExistingInst {
New(InstructionData, Type),
Existing(Inst),
}
impl NewOrExistingInst {
fn get_inst_key<'a>(&'a self, dfg: &'a DataFlowGraph) -> (Type, InstructionData) {
match self {
NewOrExistingInst::New(data, ty) => (*ty, *data),
NewOrExistingInst::Existing(inst) => {
let ty = dfg.ctrl_typevar(*inst);
(ty, dfg.insts[*inst])
}
}
}
}
impl<'opt, 'analysis> OptimizeCtx<'opt, 'analysis>
where
'analysis: 'opt,
{
/// Optimization of a single instruction.
///
/// This does a few things:
/// - Looks up the instruction in the GVN deduplication map. If we
/// already have the same instruction somewhere else, with the
/// same args, then we can alias the original instruction's
/// results and omit this instruction entirely.
/// - Note that we do this canonicalization based on the
/// instruction with its arguments as *canonical* eclass IDs,
/// that is, the oldest (smallest index) `Value` reachable in
/// the tree-of-unions (whole eclass). This ensures that we
/// properly canonicalize newer nodes that use newer "versions"
/// of a value that are still equal to the older versions.
/// - If the instruction is "new" (not deduplicated), then apply
/// optimization rules:
/// - All of the mid-end rules written in ISLE.
/// - Store-to-load forwarding.
/// - Update the value-to-opt-value map, and update the eclass
/// union-find, if we rewrote the value to different form(s).
pub(crate) fn insert_pure_enode(&mut self, inst: NewOrExistingInst) -> Value {
// Create the external context for looking up and updating the
// GVN map. This is necessary so that instructions themselves
// do not have to carry all the references or data for a full
// `Eq` or `Hash` impl.
let gvn_context = GVNContext {
union_find: self.eclasses,
value_lists: &self.func.dfg.value_lists,
};
self.stats.pure_inst += 1;
if let NewOrExistingInst::New(..) = inst {
self.stats.new_inst += 1;
}
// Does this instruction already exist? If so, add entries to
// the value-map to rewrite uses of its results to the results
// of the original (existing) instruction. If not, optimize
// the new instruction.
if let Some(&orig_result) = self
.gvn_map
.get(&inst.get_inst_key(&self.func.dfg), &gvn_context)
{
self.stats.pure_inst_deduped += 1;
if let NewOrExistingInst::Existing(inst) = inst {
debug_assert_eq!(self.func.dfg.inst_results(inst).len(), 1);
let result = self.func.dfg.first_result(inst);
debug_assert!(
self.domtree.dominates(
self.available_block[orig_result],
self.get_available_block(inst)
),
"GVN shouldn't replace {result} (available in {}) with non-dominating {orig_result} (available in {})",
self.get_available_block(inst),
self.available_block[orig_result],
);
self.value_to_opt_value[result] = orig_result;
self.func.dfg.merge_facts(result, orig_result);
}
orig_result
} else {
// Now actually insert the InstructionData and attach
// result value (exactly one).
let (inst, result, ty) = match inst {
NewOrExistingInst::New(data, typevar) => {
let inst = self.func.dfg.make_inst(data);
// TODO: reuse return value?
self.func.dfg.make_inst_results(inst, typevar);
let result = self.func.dfg.first_result(inst);
// Add to eclass unionfind.
self.eclasses.add(result);
// New inst. We need to do the analysis of its result.
(inst, result, typevar)
}
NewOrExistingInst::Existing(inst) => {
let result = self.func.dfg.first_result(inst);
let ty = self.func.dfg.ctrl_typevar(inst);
(inst, result, ty)
}
};
self.attach_constant_fact(inst, result, ty);
self.available_block[result] = self.get_available_block(inst);
let opt_value = self.optimize_pure_enode(inst);
for &argument in self.func.dfg.inst_args(inst) {
self.eclasses.pin_index(argument);
}
let gvn_context = GVNContext {
union_find: self.eclasses,
value_lists: &self.func.dfg.value_lists,
};
self.gvn_map
.insert((ty, self.func.dfg.insts[inst]), opt_value, &gvn_context);
self.value_to_opt_value[result] = opt_value;
opt_value
}
}
/// Find the block where a pure instruction first becomes available,
/// defined as the block that is closest to the root where all of
/// its arguments are available. In the unusual case where a pure
/// instruction has no arguments (e.g. get_return_address), we can
/// place it anywhere, so it is available in the entry block.
///
/// This function does not compute available blocks recursively.
/// All of the instruction's arguments must have had their available
/// blocks assigned already.
fn get_available_block(&self, inst: Inst) -> Block {
// Side-effecting instructions have different rules for where
// they become available, so this function does not apply.
debug_assert!(is_pure_for_egraph(self.func, inst));
// Note that the def-point of all arguments to an instruction
// in SSA lie on a line of direct ancestors in the domtree, and
// so do their available-blocks. This means that for any pair of
// arguments, their available blocks are either the same or one
// strictly dominates the other. We just need to find any argument
// whose available block is deepest in the domtree.
self.func.dfg.insts[inst]
.arguments(&self.func.dfg.value_lists)
.iter()
.map(|&v| {
let block = self.available_block[v];
debug_assert!(!block.is_reserved_value());
block
})
.max_by(|&x, &y| {
if self.domtree.dominates(x, y) {
Ordering::Less
} else {
debug_assert!(self.domtree.dominates(y, x));
Ordering::Greater
}
})
.unwrap_or(self.func.layout.entry_block().unwrap())
}
/// Optimizes an enode by applying any matching mid-end rewrite
/// rules (or store-to-load forwarding, which is a special case),
/// unioning together all possible optimized (or rewritten) forms
/// of this expression into an eclass and returning the `Value`
/// that represents that eclass.
fn optimize_pure_enode(&mut self, inst: Inst) -> Value {
// A pure node always has exactly one result.
let orig_value = self.func.dfg.first_result(inst);
let mut optimized_values = std::mem::take(&mut self.optimized_values);
// Limit rewrite depth. When we apply optimization rules, they
// may create new nodes (values) and those are, recursively,
// optimized eagerly as soon as they are created. So we may
// have more than one ISLE invocation on the stack. (This is
// necessary so that as the toplevel builds the
// right-hand-side expression bottom-up, it uses the "latest"
// optimized values for all the constituent parts.) To avoid
// infinite or problematic recursion, we bound the rewrite
// depth to a small constant here.
const REWRITE_LIMIT: usize = 5;
if self.rewrite_depth > REWRITE_LIMIT {
self.stats.rewrite_depth_limit += 1;
return orig_value;
}
self.rewrite_depth += 1;
trace!("Incrementing rewrite depth; now {}", self.rewrite_depth);
// Invoke the ISLE toplevel constructor, getting all new
// values produced as equivalents to this value.
trace!("Calling into ISLE with original value {}", orig_value);
self.stats.rewrite_rule_invoked += 1;
debug_assert!(optimized_values.is_empty());
crate::opts::generated_code::constructor_simplify(
&mut IsleContext { ctx: self },
orig_value,
&mut optimized_values,
);
optimized_values.push(orig_value);
// Remove any values from optimized_values that do not have
// the highest possible available block in the domtree, in
// O(n) time. This loop scans in reverse, establishing the
// loop invariant that all values at indices >= idx have the
// same available block, which is the best available block
// seen so far. Note that orig_value must also be removed if
// it isn't in the best block, so we push it above, which means
// optimized_values is never empty: there's always at least one
// value in best_block.
let mut best_block = self.available_block[*optimized_values.last().unwrap()];
for idx in (0..optimized_values.len() - 1).rev() {
// At the beginning of each iteration, there is a non-empty
// collection of values after idx, which are all available
// at best_block.
let this_block = self.available_block[optimized_values[idx]];
if this_block != best_block {
if self.domtree.dominates(this_block, best_block) {
// If the available block for this value dominates
// the best block we've seen so far, discard all
// the values we already checked and leave only this
// value in the tail of the vector.
optimized_values.truncate(idx + 1);
best_block = this_block;
} else {
// Otherwise the tail of the vector contains values
// which are all better than this value, so we can
// swap any of them in place of this value to delete
// this one in O(1) time.
debug_assert!(self.domtree.dominates(best_block, this_block));
optimized_values.swap_remove(idx);
debug_assert!(optimized_values.len() > idx);
}
}
}
// It's not supposed to matter what order `simplify` returns values in.
self.ctrl_plane.shuffle(&mut optimized_values);
let num_matches = optimized_values.len();
if num_matches > MATCHES_LIMIT {
trace!(
"Reached maximum matches limit; too many optimized values \
({num_matches} > {MATCHES_LIMIT}); ignoring rest.",
);
optimized_values.truncate(MATCHES_LIMIT);
}
trace!(" -> returned from ISLE: {orig_value} -> {optimized_values:?}");
// Create a union of all new values with the original (or
// maybe just one new value marked as "subsuming" the
// original, if present.)
let mut union_value = optimized_values.pop().unwrap();
for optimized_value in optimized_values.drain(..) {
trace!(
"Returned from ISLE for {}, got {:?}",
orig_value,
optimized_value
);
if optimized_value == orig_value {
trace!(" -> same as orig value; skipping");
continue;
}
if self.subsume_values.contains(&optimized_value) {
// Merge in the unionfind so canonicalization
// still works, but take *only* the subsuming
// value, and break now.
self.eclasses.union(optimized_value, union_value);
self.func.dfg.merge_facts(optimized_value, union_value);
union_value = optimized_value;
break;
}
let old_union_value = union_value;
union_value = self.func.dfg.union(old_union_value, optimized_value);
self.available_block[union_value] = best_block;
self.stats.union += 1;
trace!(" -> union: now {}", union_value);
self.eclasses.add(union_value);
self.eclasses.union(old_union_value, optimized_value);
self.func.dfg.merge_facts(old_union_value, optimized_value);
self.eclasses.union(old_union_value, union_value);
}
self.rewrite_depth -= 1;
trace!("Decrementing rewrite depth; now {}", self.rewrite_depth);
debug_assert!(self.optimized_values.is_empty());
self.optimized_values = optimized_values;
union_value
}
/// Optimize a "skeleton" instruction, possibly removing
/// it. Returns `true` if the instruction should be removed from
/// the layout.
fn optimize_skeleton_inst(&mut self, inst: Inst) -> bool {
self.stats.skeleton_inst += 1;
for &result in self.func.dfg.inst_results(inst) {
self.available_block[result] = self.func.layout.inst_block(inst).unwrap();
}
// First, can we try to deduplicate? We need to keep some copy
// of the instruction around because it's side-effecting, but
// we may be able to reuse an earlier instance of it.
if is_mergeable_for_egraph(self.func, inst) {
let result = self.func.dfg.inst_results(inst)[0];
trace!(" -> mergeable side-effecting op {}", inst);
// Does this instruction already exist? If so, add entries to
// the value-map to rewrite uses of its results to the results
// of the original (existing) instruction. If not, optimize
// the new instruction.
//
// Note that we use the "effectful GVN map", which is
// scoped: because effectful ops are not removed from the
// skeleton (`Layout`), we need to be mindful of whether
// our current position is dominated by an instance of the
// instruction. (See #5796 for details.)
let ty = self.func.dfg.ctrl_typevar(inst);
match self
.effectful_gvn_map
.entry((ty, self.func.dfg.insts[inst]))
{
ScopedEntry::Occupied(o) => {
let orig_result = *o.get();
// Hit in GVN map -- reuse value.
self.value_to_opt_value[result] = orig_result;
trace!(" -> merges result {} to {}", result, orig_result);
true
}
ScopedEntry::Vacant(v) => {
// Otherwise, insert it into the value-map.
self.value_to_opt_value[result] = result;
v.insert(result);
trace!(" -> inserts as new (no GVN)");
false
}
}
}
// Otherwise, if a load or store, process it with the alias
// analysis to see if we can optimize it (rewrite in terms of
// an earlier load or stored value).
else if let Some(new_result) =
self.alias_analysis
.process_inst(self.func, self.alias_analysis_state, inst)
{
self.stats.alias_analysis_removed += 1;
let result = self.func.dfg.first_result(inst);
trace!(
" -> inst {} has result {} replaced with {}",
inst,
result,
new_result
);
self.value_to_opt_value[result] = new_result;
self.func.dfg.merge_facts(result, new_result);
true
}
// Otherwise, generic side-effecting op -- always keep it, and
// set its results to identity-map to original values.
else {
// Set all results to identity-map to themselves
// in the value-to-opt-value map.
for &result in self.func.dfg.inst_results(inst) {
self.value_to_opt_value[result] = result;
self.eclasses.add(result);
}
false
}
}
/// Helper to propagate facts on constant values: if PCC is
/// enabled, then unconditionally add a fact attesting to the
/// Value's concrete value.
fn attach_constant_fact(&mut self, inst: Inst, value: Value, ty: Type) {
if self.flags.enable_pcc() {
if let InstructionData::UnaryImm {
opcode: Opcode::Iconst,
imm,
} = self.func.dfg.insts[inst]
{
let imm: i64 = imm.into();
self.func.dfg.facts[value] =
Some(Fact::constant(ty.bits().try_into().unwrap(), imm as u64));
}
}
}
}
impl<'a> EgraphPass<'a> {
/// Create a new EgraphPass.
pub fn new(
func: &'a mut Function,
raw_domtree: &'a DominatorTree,
loop_analysis: &'a LoopAnalysis,
alias_analysis: &'a mut AliasAnalysis<'a>,
flags: &'a Flags,
ctrl_plane: &'a mut ControlPlane,
) -> Self {
let num_values = func.dfg.num_values();
let mut domtree = DominatorTreePreorder::new();
domtree.compute(raw_domtree, &func.layout);
Self {
func,
domtree,
loop_analysis,
alias_analysis,
flags,
ctrl_plane,
stats: Stats::default(),
eclasses: UnionFind::with_capacity(num_values),
remat_values: FxHashSet::default(),
}
}
/// Run the process.
pub fn run(&mut self) {
self.remove_pure_and_optimize();
trace!("egraph built:\n{}\n", self.func.display());
if cfg!(feature = "trace-log") {
for (value, def) in self.func.dfg.values_and_defs() {
trace!(" -> {} = {:?}", value, def);
match def {
ValueDef::Result(i, 0) => {
trace!(" -> {} = {:?}", i, self.func.dfg.insts[i]);
}
_ => {}
}
}
}
trace!("stats: {:#?}", self.stats);
trace!("pinned_union_count: {}", self.eclasses.pinned_union_count);
self.elaborate();
}
/// Remove pure nodes from the `Layout` of the function, ensuring
/// that only the "side-effect skeleton" remains, and also
/// optimize the pure nodes. This is the first step of
/// egraph-based processing and turns the pure CFG-based CLIF into
/// a CFG skeleton with a sea of (optimized) nodes tying it
/// together.
///
/// As we walk through the code, we eagerly apply optimization
/// rules; at any given point we have a "latest version" of an
/// eclass of possible representations for a `Value` in the
/// original program, which is itself a `Value` at the root of a
/// union-tree. We keep a map from the original values to these
/// optimized values. When we encounter any instruction (pure or
/// side-effecting skeleton) we rewrite its arguments to capture
/// the "latest" optimized forms of these values. (We need to do
/// this as part of this pass, and not later using a finished map,
/// because the eclass can continue to be updated and we need to
/// only refer to its subset that exists at this stage, to
/// maintain acyclicity.)
fn remove_pure_and_optimize(&mut self) {
let mut cursor = FuncCursor::new(self.func);
let mut value_to_opt_value: SecondaryMap<Value, Value> =
SecondaryMap::with_default(Value::reserved_value());
// Map from instruction to value for hash-consing of pure ops
// into the egraph. This can be a standard (non-scoped)
// hashmap because pure ops have no location: they are
// "outside of" control flow.
//
// Note also that we keep the controlling typevar (the `Type`
// in the tuple below) because it may disambiguate
// instructions that are identical except for type.
let mut gvn_map: CtxHashMap<(Type, InstructionData), Value> =
CtxHashMap::with_capacity(cursor.func.dfg.num_values());
// Map from instruction to value for GVN'ing of effectful but
// idempotent ops, which remain in the side-effecting
// skeleton. This needs to be scoped because we cannot
// deduplicate one instruction to another that is in a
// non-dominating block.
//
// Note that we can use a ScopedHashMap here without the
// "context" (as needed by CtxHashMap) because in practice the
// ops we want to GVN have all their args inline. Equality on
// the InstructionData itself is conservative: two insts whose
// struct contents compare shallowly equal are definitely
// identical, but identical insts in a deep-equality sense may
// not compare shallowly equal, due to list indirection. This
// is fine for GVN, because it is still sound to skip any
// given GVN opportunity (and keep the original instructions).
//
// As above, we keep the controlling typevar here as part of
// the key: effectful instructions may (as for pure
// instructions) be differentiated only on the type.
let mut effectful_gvn_map: ScopedHashMap<(Type, InstructionData), Value> =
ScopedHashMap::new();
// We assign an "available block" to every value. Values tied to
// the side-effecting skeleton are available in the block where
// they're defined. Results from pure instructions could legally
// float up the domtree so they are available as soon as all
// their arguments are available. Values which identify union
// nodes are available in the same block as all values in the
// eclass, enforced by optimize_pure_enode.
let mut available_block: SecondaryMap<Value, Block> =
SecondaryMap::with_default(Block::reserved_value());
// This is an initial guess at the size we'll need, but we add
// more values as we build simplified alternative expressions so
// this is likely to realloc again later.
available_block.resize(cursor.func.dfg.num_values());
// In domtree preorder, visit blocks. (TODO: factor out an
// iterator from this and elaborator.)
let root = cursor.layout().entry_block().unwrap();
enum StackEntry {
Visit(Block),
Pop,
}
let mut block_stack = vec![StackEntry::Visit(root)];
while let Some(entry) = block_stack.pop() {
match entry {
StackEntry::Visit(block) => {
// We popped this block; push children
// immediately, then process this block.
block_stack.push(StackEntry::Pop);
block_stack.extend(
self.ctrl_plane
.shuffled(self.domtree.children(block))
.map(StackEntry::Visit),
);
effectful_gvn_map.increment_depth();
trace!("Processing block {}", block);
cursor.set_position(CursorPosition::Before(block));
let mut alias_analysis_state = self.alias_analysis.block_starting_state(block);
for &param in cursor.func.dfg.block_params(block) {
trace!("creating initial singleton eclass for blockparam {}", param);
self.eclasses.add(param);
value_to_opt_value[param] = param;
available_block[param] = block;
}
while let Some(inst) = cursor.next_inst() {
trace!("Processing inst {}", inst);
// While we're passing over all insts, create initial
// singleton eclasses for all result and blockparam
// values. Also do initial analysis of all inst
// results.
for &result in cursor.func.dfg.inst_results(inst) {
trace!("creating initial singleton eclass for {}", result);
self.eclasses.add(result);
}
// Rewrite args of *all* instructions using the
// value-to-opt-value map.
cursor.func.dfg.map_inst_values(inst, |arg| {
let new_value = value_to_opt_value[arg];
trace!("rewriting arg {} of inst {} to {}", arg, inst, new_value);
debug_assert_ne!(new_value, Value::reserved_value());
new_value
});
// Build a context for optimization, with borrows of
// state. We can't invoke a method on `self` because
// we've borrowed `self.func` mutably (as
// `cursor.func`) so we pull apart the pieces instead
// here.
let mut ctx = OptimizeCtx {
func: cursor.func,
value_to_opt_value: &mut value_to_opt_value,
gvn_map: &mut gvn_map,
effectful_gvn_map: &mut effectful_gvn_map,
available_block: &mut available_block,
eclasses: &mut self.eclasses,
rewrite_depth: 0,
subsume_values: FxHashSet::default(),
remat_values: &mut self.remat_values,
stats: &mut self.stats,
domtree: &self.domtree,
alias_analysis: self.alias_analysis,
alias_analysis_state: &mut alias_analysis_state,
flags: self.flags,
ctrl_plane: self.ctrl_plane,
optimized_values: Default::default(),
};
if is_pure_for_egraph(ctx.func, inst) {
// Insert into GVN map and optimize any new nodes
// inserted (recursively performing this work for
// any nodes the optimization rules produce).
let inst = NewOrExistingInst::Existing(inst);
ctx.insert_pure_enode(inst);
// We've now rewritten all uses, or will when we
// see them, and the instruction exists as a pure
// enode in the eclass, so we can remove it.
cursor.remove_inst_and_step_back();
} else {
if ctx.optimize_skeleton_inst(inst) {
cursor.remove_inst_and_step_back();
}
}
}
}
StackEntry::Pop => {
effectful_gvn_map.decrement_depth();
}
}
}
}
/// Scoped elaboration: compute a final ordering of op computation
/// for each block and update the given Func body. After this
/// runs, the function body is back into the state where every
/// Inst with an used result is placed in the layout (possibly
/// duplicated, if our code-motion logic decides this is the best
/// option).
///
/// This works in concert with the domtree. We do a preorder
/// traversal of the domtree, tracking a scoped map from Id to
/// (new) Value. The map's scopes correspond to levels in the
/// domtree.
///
/// At each block, we iterate forward over the side-effecting
/// eclasses, and recursively generate their arg eclasses, then
/// emit the ops themselves.
///
/// To use an eclass in a given block, we first look it up in the
/// scoped map, and get the Value if already present. If not, we
/// need to generate it. We emit the extracted enode for this
/// eclass after recursively generating its args. Eclasses are
/// thus computed "as late as possible", but then memoized into
/// the Id-to-Value map and available to all dominated blocks and
/// for the rest of this block. (This subsumes GVN.)
fn elaborate(&mut self) {
let mut elaborator = Elaborator::new(
self.func,
&self.domtree,
self.loop_analysis,
&mut self.remat_values,
&mut self.stats,
self.ctrl_plane,
);
elaborator.elaborate();
self.check_post_egraph();
}
#[cfg(debug_assertions)]
fn check_post_egraph(&self) {
// Verify that no union nodes are reachable from inst args,
// and that all inst args' defining instructions are in the
// layout.
for block in self.func.layout.blocks() {
for inst in self.func.layout.block_insts(block) {
self.func
.dfg
.inst_values(inst)
.for_each(|arg| match self.func.dfg.value_def(arg) {
ValueDef::Result(i, _) => {
debug_assert!(self.func.layout.inst_block(i).is_some());
}
ValueDef::Union(..) => {
panic!("egraph union node {} still reachable at {}!", arg, inst);
}
_ => {}
})
}
}
}
#[cfg(not(debug_assertions))]
fn check_post_egraph(&self) {}
}
/// Implementation of external-context equality and hashing on
/// InstructionData. This allows us to deduplicate instructions given
/// some context that lets us see its value lists and the mapping from
/// any value to "canonical value" (in an eclass).
struct GVNContext<'a> {
value_lists: &'a ValueListPool,
union_find: &'a UnionFind<Value>,
}
impl<'a> CtxEq<(Type, InstructionData), (Type, InstructionData)> for GVNContext<'a> {
fn ctx_eq(
&self,
(a_ty, a_inst): &(Type, InstructionData),
(b_ty, b_inst): &(Type, InstructionData),
) -> bool {
a_ty == b_ty
&& a_inst.eq(b_inst, self.value_lists, |value| {
self.union_find.find(value)
})
}
}
impl<'a> CtxHash<(Type, InstructionData)> for GVNContext<'a> {
fn ctx_hash<H: Hasher>(&self, state: &mut H, (ty, inst): &(Type, InstructionData)) {
std::hash::Hash::hash(&ty, state);
inst.hash(state, self.value_lists, |value| self.union_find.find(value));
}
}
/// Statistics collected during egraph-based processing.
#[derive(Clone, Debug, Default)]
pub(crate) struct Stats {
pub(crate) pure_inst: u64,
pub(crate) pure_inst_deduped: u64,
pub(crate) skeleton_inst: u64,
pub(crate) alias_analysis_removed: u64,
pub(crate) new_inst: u64,
pub(crate) union: u64,
pub(crate) subsume: u64,
pub(crate) remat: u64,
pub(crate) rewrite_rule_invoked: u64,
pub(crate) rewrite_depth_limit: u64,
pub(crate) elaborate_visit_node: u64,
pub(crate) elaborate_memoize_hit: u64,
pub(crate) elaborate_memoize_miss: u64,
pub(crate) elaborate_remat: u64,
pub(crate) elaborate_licm_hoist: u64,
pub(crate) elaborate_func: u64,
pub(crate) elaborate_func_pre_insts: u64,
pub(crate) elaborate_func_post_insts: u64,
pub(crate) elaborate_best_cost_fixpoint_iters: u64,
}