vendor/cranelift-codegen/src/egraph/stores.rs - toolchain/rustc - Git at Google

 //! Last-store tracking via alias analysis.
 //!
 //! We partition memory state into several *disjoint pieces* of
 //! "abstract state". There are a finite number of such pieces:
 //! currently, we call them "heap", "table", "vmctx", and "other". Any
 //! given address in memory belongs to exactly one disjoint piece.
 //!
 //! One never tracks which piece a concrete address belongs to at
 //! runtime; this is a purely static concept. Instead, all
 //! memory-accessing instructions (loads and stores) are labeled with
 //! one of these four categories in the `MemFlags`. It is forbidden
 //! for a load or store to access memory under one category and a
 //! later load or store to access the same memory under a different
 //! category. This is ensured to be true by construction during
 //! frontend translation into CLIF and during legalization.
 //!
 //! Given that this non-aliasing property is ensured by the producer
 //! of CLIF, we can compute a *may-alias* property: one load or store
 //! may-alias another load or store if both access the same category
 //! of abstract state.
 //!
 //! The "last store" pass helps to compute this aliasing: we perform a
 //! fixpoint analysis to track the last instruction that *might have*
 //! written to a given part of abstract state. We also track the block
 //! containing this store.
 //!
 //! We can't say for sure that the "last store" *did* actually write
 //! that state, but we know for sure that no instruction *later* than
 //! it (up to the current instruction) did. However, we can get a
 //! must-alias property from this: if at a given load or store, we
 //! look backward to the "last store", *AND* we find that it has
 //! exactly the same address expression and value type, then we know
 //! that the current instruction's access *must* be to the same memory
 //! location.
 //!
 //! To get this must-alias property, we leverage the node
 //! hashconsing. We design the Eq/Hash (node identity relation
 //! definition) of the `Node` struct so that all loads with (i) the
 //! same "last store", and (ii) the same address expression, and (iii)
 //! the same opcode-and-offset, will deduplicate (the first will be
 //! computed, and the later ones will use the same value). Furthermore
 //! we have an optimization that rewrites a load into the stored value
 //! of the last store *if* the last store has the same address
 //! expression and constant offset.
 //!
 //! This gives us two optimizations, "redundant load elimination" and
 //! "store-to-load forwarding".
 //!
 //! In theory we could also do *dead-store elimination*, where if a
 //! store overwrites a value earlier written by another store, *and*
 //! if no other load/store to the abstract state category occurred,
 //! *and* no other trapping instruction occurred (at which point we
 //! need an up-to-date memory state because post-trap-termination
 //! memory state can be observed), *and* we can prove the original
 //! store could not have trapped, then we can eliminate the original
 //! store. Because this is so complex, and the conditions for doing it
 //! correctly when post-trap state must be correct likely reduce the
 //! potential benefit, we don't yet do this.

 use crate::flowgraph::ControlFlowGraph;
 use crate::fx::{FxHashMap, FxHashSet};
 use crate::inst_predicates::has_memory_fence_semantics;
 use crate::ir::{Block, Function, Inst, InstructionData, MemFlags, Opcode};
 use crate::trace;
 use cranelift_entity::{EntityRef, SecondaryMap};
 use smallvec::{smallvec, SmallVec};

 /// For a given program point, the vector of last-store instruction
 /// indices for each disjoint category of abstract state.
 #[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
 struct LastStores {
     heap: MemoryState,
     table: MemoryState,
     vmctx: MemoryState,
     other: MemoryState,
 }

 /// State of memory seen by a load.
 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
 pub enum MemoryState {
     /// State at function entry: nothing is known (but it is one
     /// consistent value, so two loads from "entry" state at the same
     /// address will still provide the same result).
     #[default]
     Entry,
     /// State just after a store by the given instruction. The
     /// instruction is a store from which we can forward.
     Store(Inst),
     /// State just before the given instruction. Used for abstract
     /// value merges at merge-points when we cannot name a single
     /// producing site.
     BeforeInst(Inst),
     /// State just after the given instruction. Used when the
     /// instruction may update the associated state, but is not a
     /// store whose value we can cleanly forward. (E.g., perhaps a
     /// barrier of some sort.)
     AfterInst(Inst),
 }

 /// Memory state index, packed into a u32.
 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub struct PackedMemoryState(u32);

 impl From<MemoryState> for PackedMemoryState {
     fn from(state: MemoryState) -> Self {
         match state {
             MemoryState::Entry => Self(0),
             MemoryState::Store(i) => Self(1 | (i.index() as u32) << 2),
             MemoryState::BeforeInst(i) => Self(2 | (i.index() as u32) << 2),
             MemoryState::AfterInst(i) => Self(3 | (i.index() as u32) << 2),
         }
     }
 }

 impl PackedMemoryState {
     /// Does this memory state refer to a specific store instruction?
     pub fn as_store(&self) -> Option<Inst> {
         if self.0 & 3 == 1 {
             Some(Inst::from_bits(self.0 >> 2))
         } else {
             None
         }
     }
 }

 impl LastStores {
     fn update(&mut self, func: &Function, inst: Inst) {
         let opcode = func.dfg[inst].opcode();
         if has_memory_fence_semantics(opcode) {
             self.heap = MemoryState::AfterInst(inst);
             self.table = MemoryState::AfterInst(inst);
             self.vmctx = MemoryState::AfterInst(inst);
             self.other = MemoryState::AfterInst(inst);
         } else if opcode.can_store() {
             if let Some(memflags) = func.dfg[inst].memflags() {
                 *self.for_flags(memflags) = MemoryState::Store(inst);
             } else {
                 self.heap = MemoryState::AfterInst(inst);
                 self.table = MemoryState::AfterInst(inst);
                 self.vmctx = MemoryState::AfterInst(inst);
                 self.other = MemoryState::AfterInst(inst);
             }
         }
     }

     fn for_flags(&mut self, memflags: MemFlags) -> &mut MemoryState {
         if memflags.heap() {
             &mut self.heap
         } else if memflags.table() {
             &mut self.table
         } else if memflags.vmctx() {
             &mut self.vmctx
         } else {
             &mut self.other
         }
     }

     fn meet_from(&mut self, other: &LastStores, loc: Inst) {
         let meet = |a: MemoryState, b: MemoryState| -> MemoryState {
             match (a, b) {
                 (a, b) if a == b => a,
                 _ => MemoryState::BeforeInst(loc),
             }
         };

         self.heap = meet(self.heap, other.heap);
         self.table = meet(self.table, other.table);
         self.vmctx = meet(self.vmctx, other.vmctx);
         self.other = meet(self.other, other.other);
     }
 }

 /// An alias-analysis pass.
 pub struct AliasAnalysis {
     /// Last-store instruction (or none) for a given load. Use a hash map
     /// instead of a `SecondaryMap` because this is sparse.
     load_mem_state: FxHashMap<Inst, PackedMemoryState>,
 }

 impl AliasAnalysis {
     /// Perform an alias analysis pass.
     pub fn new(func: &Function, cfg: &ControlFlowGraph) -> AliasAnalysis {
         log::trace!("alias analysis: input is:\n{:?}", func);
         let block_input = Self::compute_block_input_states(func, cfg);
         let load_mem_state = Self::compute_load_last_stores(func, block_input);
         AliasAnalysis { load_mem_state }
     }

     fn compute_block_input_states(
         func: &Function,
         cfg: &ControlFlowGraph,
     ) -> SecondaryMap<Block, Option<LastStores>> {
         let mut block_input = SecondaryMap::with_capacity(func.dfg.num_blocks());
         let mut worklist: SmallVec<[Block; 16]> = smallvec![];
         let mut worklist_set = FxHashSet::default();
         let entry = func.layout.entry_block().unwrap();
         worklist.push(entry);
         worklist_set.insert(entry);
         block_input[entry] = Some(LastStores::default());

         while let Some(block) = worklist.pop() {
             worklist_set.remove(&block);
             let state = block_input[block].clone().unwrap();

             trace!("alias analysis: input to {} is {:?}", block, state);

             let state = func
                 .layout
                 .block_insts(block)
                 .fold(state, |mut state, inst| {
                     state.update(func, inst);
                     trace!("after {}: state is {:?}", inst, state);
                     state
                 });

             for succ in cfg.succ_iter(block) {
                 let succ_first_inst = func.layout.first_inst(succ).unwrap();
                 let succ_state = &mut block_input[succ];
                 let old = succ_state.clone();
                 if let Some(succ_state) = succ_state.as_mut() {
                     succ_state.meet_from(&state, succ_first_inst);
                 } else {
                     *succ_state = Some(state);
                 };
                 let updated = *succ_state != old;

                 if updated && worklist_set.insert(succ) {
                     worklist.push(succ);
                 }
             }
         }

         block_input
     }

     fn compute_load_last_stores(
         func: &Function,
         block_input: SecondaryMap<Block, Option<LastStores>>,
     ) -> FxHashMap<Inst, PackedMemoryState> {
         let mut load_mem_state = FxHashMap::default();
         load_mem_state.reserve(func.dfg.num_insts() / 8);

         for block in func.layout.blocks() {
             let mut state = block_input[block].clone().unwrap();

             for inst in func.layout.block_insts(block) {
                 trace!(
                     "alias analysis: scanning at {} with state {:?} ({:?})",
                     inst,
                     state,
                     func.dfg[inst],
                 );

                 // N.B.: we match `Load` specifically, and not any
                 // other kinds of loads (or any opcode such that
                 // `opcode.can_load()` returns true), because some
                 // "can load" instructions actually have very
                 // different semantics (are not just a load of a
                 // particularly-typed value). For example, atomic
                 // (load/store, RMW, CAS) instructions "can load" but
                 // definitely should not participate in store-to-load
                 // forwarding or redundant-load elimination. Our goal
                 // here is to provide a `MemoryState` just for plain
                 // old loads whose semantics we can completely reason
                 // about.
                 if let InstructionData::Load {
                     opcode: Opcode::Load,
                     flags,
                     ..
                 } = func.dfg[inst]
                 {
                     let mem_state = *state.for_flags(flags);
                     trace!(
                         "alias analysis: at {}: load with mem_state {:?}",
                         inst,
                         mem_state,
                     );

                     load_mem_state.insert(inst, mem_state.into());
                 }

                 state.update(func, inst);
             }
         }

         load_mem_state
     }

     /// Get the state seen by a load, if any.
     pub fn get_state_for_load(&self, inst: Inst) -> Option<PackedMemoryState> {
         self.load_mem_state.get(&inst).copied()
     }
 }
	//! Last-store tracking via alias analysis.
	//!
	//! We partition memory state into several disjoint pieces of
	//! "abstract state". There are a finite number of such pieces:
	//! currently, we call them "heap", "table", "vmctx", and "other". Any
	//! given address in memory belongs to exactly one disjoint piece.
	//!
	//! One never tracks which piece a concrete address belongs to at
	//! runtime; this is a purely static concept. Instead, all
	//! memory-accessing instructions (loads and stores) are labeled with
	//! one of these four categories in the `MemFlags`. It is forbidden
	//! for a load or store to access memory under one category and a
	//! later load or store to access the same memory under a different
	//! category. This is ensured to be true by construction during
	//! frontend translation into CLIF and during legalization.
	//!
	//! Given that this non-aliasing property is ensured by the producer
	//! of CLIF, we can compute a may-alias property: one load or store
	//! may-alias another load or store if both access the same category
	//! of abstract state.
	//!
	//! The "last store" pass helps to compute this aliasing: we perform a
	//! fixpoint analysis to track the last instruction that might have
	//! written to a given part of abstract state. We also track the block
	//! containing this store.
	//!
	//! We can't say for sure that the "last store" did actually write
	//! that state, but we know for sure that no instruction later than
	//! it (up to the current instruction) did. However, we can get a
	//! must-alias property from this: if at a given load or store, we
	//! look backward to the "last store", AND we find that it has
	//! exactly the same address expression and value type, then we know
	//! that the current instruction's access must be to the same memory
	//! location.
	//!
	//! To get this must-alias property, we leverage the node
	//! hashconsing. We design the Eq/Hash (node identity relation
	//! definition) of the `Node` struct so that all loads with (i) the
	//! same "last store", and (ii) the same address expression, and (iii)
	//! the same opcode-and-offset, will deduplicate (the first will be
	//! computed, and the later ones will use the same value). Furthermore
	//! we have an optimization that rewrites a load into the stored value
	//! of the last store if the last store has the same address
	//! expression and constant offset.
	//!
	//! This gives us two optimizations, "redundant load elimination" and
	//! "store-to-load forwarding".
	//!
	//! In theory we could also do dead-store elimination, where if a
	//! store overwrites a value earlier written by another store, and
	//! if no other load/store to the abstract state category occurred,
	//! and no other trapping instruction occurred (at which point we
	//! need an up-to-date memory state because post-trap-termination
	//! memory state can be observed), and we can prove the original
	//! store could not have trapped, then we can eliminate the original
	//! store. Because this is so complex, and the conditions for doing it
	//! correctly when post-trap state must be correct likely reduce the
	//! potential benefit, we don't yet do this.

	use crate::flowgraph::ControlFlowGraph;
	use crate::fx::{FxHashMap, FxHashSet};
	use crate::inst_predicates::has_memory_fence_semantics;
	use crate::ir::{Block, Function, Inst, InstructionData, MemFlags, Opcode};
	use crate::trace;
	use cranelift_entity::{EntityRef, SecondaryMap};
	use smallvec::{smallvec, SmallVec};

	/// For a given program point, the vector of last-store instruction
	/// indices for each disjoint category of abstract state.
	#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
	struct LastStores {
	heap: MemoryState,
	table: MemoryState,
	vmctx: MemoryState,
	other: MemoryState,
	}

	/// State of memory seen by a load.
	#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
	pub enum MemoryState {
	/// State at function entry: nothing is known (but it is one
	/// consistent value, so two loads from "entry" state at the same
	/// address will still provide the same result).
	#[default]
	Entry,
	/// State just after a store by the given instruction. The
	/// instruction is a store from which we can forward.
	Store(Inst),
	/// State just before the given instruction. Used for abstract
	/// value merges at merge-points when we cannot name a single
	/// producing site.
	BeforeInst(Inst),
	/// State just after the given instruction. Used when the
	/// instruction may update the associated state, but is not a
	/// store whose value we can cleanly forward. (E.g., perhaps a
	/// barrier of some sort.)
	AfterInst(Inst),
	}

	/// Memory state index, packed into a u32.
	#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
	pub struct PackedMemoryState(u32);

	impl From<MemoryState> for PackedMemoryState {
	fn from(state: MemoryState) -> Self {
	match state {
	MemoryState::Entry => Self(0),
	MemoryState::Store(i) => Self(1 \| (i.index() as u32) << 2),
	MemoryState::BeforeInst(i) => Self(2 \| (i.index() as u32) << 2),
	MemoryState::AfterInst(i) => Self(3 \| (i.index() as u32) << 2),
	}
	}
	}

	impl PackedMemoryState {
	/// Does this memory state refer to a specific store instruction?
	pub fn as_store(&self) -> Option<Inst> {
	if self.0 & 3 == 1 {
	Some(Inst::from_bits(self.0 >> 2))
	} else {
	None
	}
	}
	}

	impl LastStores {
	fn update(&mut self, func: &Function, inst: Inst) {
	let opcode = func.dfg[inst].opcode();
	if has_memory_fence_semantics(opcode) {
	self.heap = MemoryState::AfterInst(inst);
	self.table = MemoryState::AfterInst(inst);
	self.vmctx = MemoryState::AfterInst(inst);
	self.other = MemoryState::AfterInst(inst);
	} else if opcode.can_store() {
	if let Some(memflags) = func.dfg[inst].memflags() {
	*self.for_flags(memflags) = MemoryState::Store(inst);
	} else {
	self.heap = MemoryState::AfterInst(inst);
	self.table = MemoryState::AfterInst(inst);
	self.vmctx = MemoryState::AfterInst(inst);
	self.other = MemoryState::AfterInst(inst);
	}
	}
	}

	fn for_flags(&mut self, memflags: MemFlags) -> &mut MemoryState {
	if memflags.heap() {
	&mut self.heap
	} else if memflags.table() {
	&mut self.table
	} else if memflags.vmctx() {
	&mut self.vmctx
	} else {
	&mut self.other
	}
	}

	fn meet_from(&mut self, other: &LastStores, loc: Inst) {
	let meet = \|a: MemoryState, b: MemoryState\| -> MemoryState {
	match (a, b) {
	(a, b) if a == b => a,
	_ => MemoryState::BeforeInst(loc),
	}
	};

	self.heap = meet(self.heap, other.heap);
	self.table = meet(self.table, other.table);
	self.vmctx = meet(self.vmctx, other.vmctx);
	self.other = meet(self.other, other.other);
	}
	}

	/// An alias-analysis pass.
	pub struct AliasAnalysis {
	/// Last-store instruction (or none) for a given load. Use a hash map
	/// instead of a `SecondaryMap` because this is sparse.
	load_mem_state: FxHashMap<Inst, PackedMemoryState>,
	}

	impl AliasAnalysis {
	/// Perform an alias analysis pass.
	pub fn new(func: &Function, cfg: &ControlFlowGraph) -> AliasAnalysis {
	log::trace!("alias analysis: input is:\n{:?}", func);
	let block_input = Self::compute_block_input_states(func, cfg);
	let load_mem_state = Self::compute_load_last_stores(func, block_input);
	AliasAnalysis { load_mem_state }
	}

	fn compute_block_input_states(
	func: &Function,
	cfg: &ControlFlowGraph,
	) -> SecondaryMap<Block, Option<LastStores>> {
	let mut block_input = SecondaryMap::with_capacity(func.dfg.num_blocks());
	let mut worklist: SmallVec<[Block; 16]> = smallvec![];
	let mut worklist_set = FxHashSet::default();
	let entry = func.layout.entry_block().unwrap();
	worklist.push(entry);
	worklist_set.insert(entry);
	block_input[entry] = Some(LastStores::default());

	while let Some(block) = worklist.pop() {
	worklist_set.remove(&block);
	let state = block_input[block].clone().unwrap();

	trace!("alias analysis: input to {} is {:?}", block, state);

	let state = func
	.layout
	.block_insts(block)
	.fold(state, \|mut state, inst\| {
	state.update(func, inst);
	trace!("after {}: state is {:?}", inst, state);
	state
	});

	for succ in cfg.succ_iter(block) {
	let succ_first_inst = func.layout.first_inst(succ).unwrap();
	let succ_state = &mut block_input[succ];
	let old = succ_state.clone();
	if let Some(succ_state) = succ_state.as_mut() {
	succ_state.meet_from(&state, succ_first_inst);
	} else {
	*succ_state = Some(state);
	};
	let updated = *succ_state != old;

	if updated && worklist_set.insert(succ) {
	worklist.push(succ);
	}
	}
	}

	block_input
	}

	fn compute_load_last_stores(
	func: &Function,
	block_input: SecondaryMap<Block, Option<LastStores>>,
	) -> FxHashMap<Inst, PackedMemoryState> {
	let mut load_mem_state = FxHashMap::default();
	load_mem_state.reserve(func.dfg.num_insts() / 8);

	for block in func.layout.blocks() {
	let mut state = block_input[block].clone().unwrap();

	for inst in func.layout.block_insts(block) {
	trace!(
	"alias analysis: scanning at {} with state {:?} ({:?})",
	inst,
	state,
	func.dfg[inst],
	);

	// N.B.: we match `Load` specifically, and not any
	// other kinds of loads (or any opcode such that
	// `opcode.can_load()` returns true), because some
	// "can load" instructions actually have very
	// different semantics (are not just a load of a
	// particularly-typed value). For example, atomic
	// (load/store, RMW, CAS) instructions "can load" but
	// definitely should not participate in store-to-load
	// forwarding or redundant-load elimination. Our goal
	// here is to provide a `MemoryState` just for plain
	// old loads whose semantics we can completely reason
	// about.
	if let InstructionData::Load {
	opcode: Opcode::Load,
	flags,
	..
	} = func.dfg[inst]
	{
	let mem_state = *state.for_flags(flags);
	trace!(
	"alias analysis: at {}: load with mem_state {:?}",
	inst,
	mem_state,
	);

	load_mem_state.insert(inst, mem_state.into());
	}

	state.update(func, inst);
	}
	}

	load_mem_state
	}

	/// Get the state seen by a load, if any.
	pub fn get_state_for_load(&self, inst: Inst) -> Option<PackedMemoryState> {
	self.load_mem_state.get(&inst).copied()
	}
	}