| //======- CFLGraph.h - Abstract stratified sets implementation. --------======// |
| // |
| // The LLVM Compiler Infrastructure |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| /// \file |
| /// This file defines CFLGraph, an auxiliary data structure used by CFL-based |
| /// alias analysis. |
| /// |
| //===----------------------------------------------------------------------===// |
| |
| #ifndef LLVM_ANALYSIS_CFLGRAPH_H |
| #define LLVM_ANALYSIS_CFLGRAPH_H |
| |
| #include "AliasAnalysisSummary.h" |
| #include "llvm/ADT/SmallPtrSet.h" |
| #include "llvm/Analysis/MemoryBuiltins.h" |
| #include "llvm/IR/InstVisitor.h" |
| #include "llvm/IR/Instructions.h" |
| |
| namespace llvm { |
| namespace cflaa { |
| |
| /// \brief The Program Expression Graph (PEG) of CFL analysis |
| /// CFLGraph is auxiliary data structure used by CFL-based alias analysis to |
| /// describe flow-insensitive pointer-related behaviors. Given an LLVM function, |
| /// the main purpose of this graph is to abstract away unrelated facts and |
| /// translate the rest into a form that can be easily digested by CFL analyses. |
| /// Each Node in the graph is an InstantiatedValue, and each edge represent a |
| /// pointer assignment between InstantiatedValue. Pointer |
| /// references/dereferences are not explicitly stored in the graph: we |
| /// implicitly assume that for each node (X, I) it has a dereference edge to (X, |
| /// I+1) and a reference edge to (X, I-1). |
| class CFLGraph { |
| public: |
| typedef InstantiatedValue Node; |
| |
| struct Edge { |
| Node Other; |
| }; |
| |
| typedef std::vector<Edge> EdgeList; |
| |
| struct NodeInfo { |
| EdgeList Edges; |
| AliasAttrs Attr; |
| }; |
| |
| class ValueInfo { |
| std::vector<NodeInfo> Levels; |
| |
| public: |
| bool addNodeToLevel(unsigned Level) { |
| auto NumLevels = Levels.size(); |
| if (NumLevels > Level) |
| return false; |
| Levels.resize(Level + 1); |
| return true; |
| } |
| |
| NodeInfo &getNodeInfoAtLevel(unsigned Level) { |
| assert(Level < Levels.size()); |
| return Levels[Level]; |
| } |
| const NodeInfo &getNodeInfoAtLevel(unsigned Level) const { |
| assert(Level < Levels.size()); |
| return Levels[Level]; |
| } |
| |
| unsigned getNumLevels() const { return Levels.size(); } |
| }; |
| |
| private: |
| typedef DenseMap<Value *, ValueInfo> ValueMap; |
| ValueMap ValueImpls; |
| |
| const NodeInfo *getNode(Node N) const { |
| auto Itr = ValueImpls.find(N.Val); |
| if (Itr == ValueImpls.end() || Itr->second.getNumLevels() <= N.DerefLevel) |
| return nullptr; |
| return &Itr->second.getNodeInfoAtLevel(N.DerefLevel); |
| } |
| NodeInfo *getNode(Node N) { |
| auto Itr = ValueImpls.find(N.Val); |
| if (Itr == ValueImpls.end() || Itr->second.getNumLevels() <= N.DerefLevel) |
| return nullptr; |
| return &Itr->second.getNodeInfoAtLevel(N.DerefLevel); |
| } |
| |
| public: |
| typedef ValueMap::const_iterator const_value_iterator; |
| |
| bool addNode(Node N, AliasAttrs Attr = AliasAttrs()) { |
| assert(N.Val != nullptr); |
| auto &ValInfo = ValueImpls[N.Val]; |
| auto Changed = ValInfo.addNodeToLevel(N.DerefLevel); |
| ValInfo.getNodeInfoAtLevel(N.DerefLevel).Attr |= Attr; |
| return Changed; |
| } |
| |
| void addAttr(Node N, AliasAttrs Attr) { |
| auto *Info = getNode(N); |
| assert(Info != nullptr); |
| Info->Attr |= Attr; |
| } |
| |
| void addEdge(Node From, Node To, int64_t Offset = 0) { |
| assert(getNode(To) != nullptr); |
| |
| auto *FromInfo = getNode(From); |
| assert(FromInfo != nullptr); |
| FromInfo->Edges.push_back(Edge{To}); |
| } |
| |
| AliasAttrs attrFor(Node N) const { |
| auto *Info = getNode(N); |
| assert(Info != nullptr); |
| return Info->Attr; |
| } |
| |
| iterator_range<const_value_iterator> value_mappings() const { |
| return make_range<const_value_iterator>(ValueImpls.begin(), |
| ValueImpls.end()); |
| } |
| }; |
| |
| ///\brief A builder class used to create CFLGraph instance from a given function |
| /// The CFL-AA that uses this builder must provide its own type as a template |
| /// argument. This is necessary for interprocedural processing: CFLGraphBuilder |
| /// needs a way of obtaining the summary of other functions when callinsts are |
| /// encountered. |
| /// As a result, we expect the said CFL-AA to expose a getAliasSummary() public |
| /// member function that takes a Function& and returns the corresponding summary |
| /// as a const AliasSummary*. |
| template <typename CFLAA> class CFLGraphBuilder { |
| // Input of the builder |
| CFLAA &Analysis; |
| const TargetLibraryInfo &TLI; |
| |
| // Output of the builder |
| CFLGraph Graph; |
| SmallVector<Value *, 4> ReturnedValues; |
| |
| // Helper class |
| /// Gets the edges our graph should have, based on an Instruction* |
| class GetEdgesVisitor : public InstVisitor<GetEdgesVisitor, void> { |
| CFLAA &AA; |
| const TargetLibraryInfo &TLI; |
| |
| CFLGraph &Graph; |
| SmallVectorImpl<Value *> &ReturnValues; |
| |
| static bool hasUsefulEdges(ConstantExpr *CE) { |
| // ConstantExpr doesn't have terminators, invokes, or fences, so only |
| // needs |
| // to check for compares. |
| return CE->getOpcode() != Instruction::ICmp && |
| CE->getOpcode() != Instruction::FCmp; |
| } |
| |
| // Returns possible functions called by CS into the given SmallVectorImpl. |
| // Returns true if targets found, false otherwise. |
| static bool getPossibleTargets(CallSite CS, |
| SmallVectorImpl<Function *> &Output) { |
| if (auto *Fn = CS.getCalledFunction()) { |
| Output.push_back(Fn); |
| return true; |
| } |
| |
| // TODO: If the call is indirect, we might be able to enumerate all |
| // potential |
| // targets of the call and return them, rather than just failing. |
| return false; |
| } |
| |
| void addNode(Value *Val, AliasAttrs Attr = AliasAttrs()) { |
| assert(Val != nullptr && Val->getType()->isPointerTy()); |
| if (auto GVal = dyn_cast<GlobalValue>(Val)) { |
| if (Graph.addNode(InstantiatedValue{GVal, 0}, |
| getGlobalOrArgAttrFromValue(*GVal))) |
| Graph.addNode(InstantiatedValue{GVal, 1}, getAttrUnknown()); |
| } else if (auto CExpr = dyn_cast<ConstantExpr>(Val)) { |
| if (hasUsefulEdges(CExpr)) { |
| if (Graph.addNode(InstantiatedValue{CExpr, 0})) |
| visitConstantExpr(CExpr); |
| } |
| } else |
| Graph.addNode(InstantiatedValue{Val, 0}, Attr); |
| } |
| |
| void addAssignEdge(Value *From, Value *To, int64_t Offset = 0) { |
| assert(From != nullptr && To != nullptr); |
| if (!From->getType()->isPointerTy() || !To->getType()->isPointerTy()) |
| return; |
| addNode(From); |
| if (To != From) { |
| addNode(To); |
| Graph.addEdge(InstantiatedValue{From, 0}, InstantiatedValue{To, 0}, |
| Offset); |
| } |
| } |
| |
| void addDerefEdge(Value *From, Value *To) { |
| assert(From != nullptr && To != nullptr); |
| if (!From->getType()->isPointerTy() || !To->getType()->isPointerTy()) |
| return; |
| addNode(From); |
| addNode(To); |
| Graph.addNode(InstantiatedValue{From, 1}); |
| Graph.addEdge(InstantiatedValue{From, 1}, InstantiatedValue{To, 0}); |
| } |
| |
| public: |
| GetEdgesVisitor(CFLGraphBuilder &Builder) |
| : AA(Builder.Analysis), TLI(Builder.TLI), Graph(Builder.Graph), |
| ReturnValues(Builder.ReturnedValues) {} |
| |
| void visitInstruction(Instruction &) { |
| llvm_unreachable("Unsupported instruction encountered"); |
| } |
| |
| void visitReturnInst(ReturnInst &Inst) { |
| if (auto RetVal = Inst.getReturnValue()) { |
| if (RetVal->getType()->isPointerTy()) { |
| addNode(RetVal); |
| ReturnValues.push_back(RetVal); |
| } |
| } |
| } |
| |
| void visitPtrToIntInst(PtrToIntInst &Inst) { |
| auto *Ptr = Inst.getOperand(0); |
| addNode(Ptr, getAttrEscaped()); |
| } |
| |
| void visitIntToPtrInst(IntToPtrInst &Inst) { |
| auto *Ptr = &Inst; |
| addNode(Ptr, getAttrUnknown()); |
| } |
| |
| void visitCastInst(CastInst &Inst) { |
| auto *Src = Inst.getOperand(0); |
| addAssignEdge(Src, &Inst); |
| } |
| |
| void visitBinaryOperator(BinaryOperator &Inst) { |
| auto *Op1 = Inst.getOperand(0); |
| auto *Op2 = Inst.getOperand(1); |
| addAssignEdge(Op1, &Inst); |
| addAssignEdge(Op2, &Inst); |
| } |
| |
| void visitAtomicCmpXchgInst(AtomicCmpXchgInst &Inst) { |
| auto *Ptr = Inst.getPointerOperand(); |
| auto *Val = Inst.getNewValOperand(); |
| addDerefEdge(Ptr, Val); |
| } |
| |
| void visitAtomicRMWInst(AtomicRMWInst &Inst) { |
| auto *Ptr = Inst.getPointerOperand(); |
| auto *Val = Inst.getValOperand(); |
| addDerefEdge(Ptr, Val); |
| } |
| |
| void visitPHINode(PHINode &Inst) { |
| for (Value *Val : Inst.incoming_values()) |
| addAssignEdge(Val, &Inst); |
| } |
| |
| void visitGetElementPtrInst(GetElementPtrInst &Inst) { |
| auto *Op = Inst.getPointerOperand(); |
| addAssignEdge(Op, &Inst); |
| } |
| |
| void visitSelectInst(SelectInst &Inst) { |
| // Condition is not processed here (The actual statement producing |
| // the condition result is processed elsewhere). For select, the |
| // condition is evaluated, but not loaded, stored, or assigned |
| // simply as a result of being the condition of a select. |
| |
| auto *TrueVal = Inst.getTrueValue(); |
| auto *FalseVal = Inst.getFalseValue(); |
| addAssignEdge(TrueVal, &Inst); |
| addAssignEdge(FalseVal, &Inst); |
| } |
| |
| void visitAllocaInst(AllocaInst &Inst) { addNode(&Inst); } |
| |
| void visitLoadInst(LoadInst &Inst) { |
| auto *Ptr = Inst.getPointerOperand(); |
| auto *Val = &Inst; |
| addDerefEdge(Ptr, Val); |
| } |
| |
| void visitStoreInst(StoreInst &Inst) { |
| auto *Ptr = Inst.getPointerOperand(); |
| auto *Val = Inst.getValueOperand(); |
| addDerefEdge(Ptr, Val); |
| } |
| |
| void visitVAArgInst(VAArgInst &Inst) { |
| // We can't fully model va_arg here. For *Ptr = Inst.getOperand(0), it |
| // does |
| // two things: |
| // 1. Loads a value from *((T*)*Ptr). |
| // 2. Increments (stores to) *Ptr by some target-specific amount. |
| // For now, we'll handle this like a landingpad instruction (by placing |
| // the |
| // result in its own group, and having that group alias externals). |
| addNode(&Inst, getAttrUnknown()); |
| } |
| |
| static bool isFunctionExternal(Function *Fn) { |
| return !Fn->hasExactDefinition(); |
| } |
| |
| bool tryInterproceduralAnalysis(CallSite CS, |
| const SmallVectorImpl<Function *> &Fns) { |
| assert(Fns.size() > 0); |
| |
| if (CS.arg_size() > MaxSupportedArgsInSummary) |
| return false; |
| |
| // Exit early if we'll fail anyway |
| for (auto *Fn : Fns) { |
| if (isFunctionExternal(Fn) || Fn->isVarArg()) |
| return false; |
| // Fail if the caller does not provide enough arguments |
| assert(Fn->arg_size() <= CS.arg_size()); |
| if (!AA.getAliasSummary(*Fn)) |
| return false; |
| } |
| |
| for (auto *Fn : Fns) { |
| auto Summary = AA.getAliasSummary(*Fn); |
| assert(Summary != nullptr); |
| |
| auto &RetParamRelations = Summary->RetParamRelations; |
| for (auto &Relation : RetParamRelations) { |
| auto IRelation = instantiateExternalRelation(Relation, CS); |
| if (IRelation.hasValue()) { |
| Graph.addNode(IRelation->From); |
| Graph.addNode(IRelation->To); |
| Graph.addEdge(IRelation->From, IRelation->To); |
| } |
| } |
| |
| auto &RetParamAttributes = Summary->RetParamAttributes; |
| for (auto &Attribute : RetParamAttributes) { |
| auto IAttr = instantiateExternalAttribute(Attribute, CS); |
| if (IAttr.hasValue()) |
| Graph.addNode(IAttr->IValue, IAttr->Attr); |
| } |
| } |
| |
| return true; |
| } |
| |
| void visitCallSite(CallSite CS) { |
| auto Inst = CS.getInstruction(); |
| |
| // Make sure all arguments and return value are added to the graph first |
| for (Value *V : CS.args()) |
| if (V->getType()->isPointerTy()) |
| addNode(V); |
| if (Inst->getType()->isPointerTy()) |
| addNode(Inst); |
| |
| // Check if Inst is a call to a library function that |
| // allocates/deallocates |
| // on the heap. Those kinds of functions do not introduce any aliases. |
| // TODO: address other common library functions such as realloc(), |
| // strdup(), |
| // etc. |
| if (isMallocLikeFn(Inst, &TLI) || isCallocLikeFn(Inst, &TLI) || |
| isFreeCall(Inst, &TLI)) |
| return; |
| |
| // TODO: Add support for noalias args/all the other fun function |
| // attributes |
| // that we can tack on. |
| SmallVector<Function *, 4> Targets; |
| if (getPossibleTargets(CS, Targets)) |
| if (tryInterproceduralAnalysis(CS, Targets)) |
| return; |
| |
| // Because the function is opaque, we need to note that anything |
| // could have happened to the arguments (unless the function is marked |
| // readonly or readnone), and that the result could alias just about |
| // anything, too (unless the result is marked noalias). |
| if (!CS.onlyReadsMemory()) |
| for (Value *V : CS.args()) { |
| if (V->getType()->isPointerTy()) { |
| // The argument itself escapes. |
| Graph.addAttr(InstantiatedValue{V, 0}, getAttrEscaped()); |
| // The fate of argument memory is unknown. Note that since |
| // AliasAttrs is transitive with respect to dereference, we only |
| // need to specify it for the first-level memory. |
| Graph.addNode(InstantiatedValue{V, 1}, getAttrUnknown()); |
| } |
| } |
| |
| if (Inst->getType()->isPointerTy()) { |
| auto *Fn = CS.getCalledFunction(); |
| if (Fn == nullptr || !Fn->doesNotAlias(0)) |
| // No need to call addNode() since we've added Inst at the |
| // beginning of this function and we know it is not a global. |
| Graph.addAttr(InstantiatedValue{Inst, 0}, getAttrUnknown()); |
| } |
| } |
| |
| /// Because vectors/aggregates are immutable and unaddressable, there's |
| /// nothing we can do to coax a value out of them, other than calling |
| /// Extract{Element,Value}. We can effectively treat them as pointers to |
| /// arbitrary memory locations we can store in and load from. |
| void visitExtractElementInst(ExtractElementInst &Inst) { |
| auto *Ptr = Inst.getVectorOperand(); |
| auto *Val = &Inst; |
| addDerefEdge(Ptr, Val); |
| } |
| |
| void visitInsertElementInst(InsertElementInst &Inst) { |
| auto *Vec = Inst.getOperand(0); |
| auto *Val = Inst.getOperand(1); |
| addAssignEdge(Vec, &Inst); |
| addDerefEdge(&Inst, Val); |
| } |
| |
| void visitLandingPadInst(LandingPadInst &Inst) { |
| // Exceptions come from "nowhere", from our analysis' perspective. |
| // So we place the instruction its own group, noting that said group may |
| // alias externals |
| addNode(&Inst, getAttrUnknown()); |
| } |
| |
| void visitInsertValueInst(InsertValueInst &Inst) { |
| auto *Agg = Inst.getOperand(0); |
| auto *Val = Inst.getOperand(1); |
| addAssignEdge(Agg, &Inst); |
| addDerefEdge(&Inst, Val); |
| } |
| |
| void visitExtractValueInst(ExtractValueInst &Inst) { |
| auto *Ptr = Inst.getAggregateOperand(); |
| addDerefEdge(Ptr, &Inst); |
| } |
| |
| void visitShuffleVectorInst(ShuffleVectorInst &Inst) { |
| auto *From1 = Inst.getOperand(0); |
| auto *From2 = Inst.getOperand(1); |
| addAssignEdge(From1, &Inst); |
| addAssignEdge(From2, &Inst); |
| } |
| |
| void visitConstantExpr(ConstantExpr *CE) { |
| switch (CE->getOpcode()) { |
| default: |
| llvm_unreachable("Unknown instruction type encountered!"); |
| // Build the switch statement using the Instruction.def file. |
| #define HANDLE_INST(NUM, OPCODE, CLASS) \ |
| case Instruction::OPCODE: \ |
| this->visit##OPCODE(*(CLASS *)CE); \ |
| break; |
| #include "llvm/IR/Instruction.def" |
| } |
| } |
| }; |
| |
| // Helper functions |
| |
| // Determines whether or not we an instruction is useless to us (e.g. |
| // FenceInst) |
| static bool hasUsefulEdges(Instruction *Inst) { |
| bool IsNonInvokeRetTerminator = isa<TerminatorInst>(Inst) && |
| !isa<InvokeInst>(Inst) && |
| !isa<ReturnInst>(Inst); |
| return !isa<CmpInst>(Inst) && !isa<FenceInst>(Inst) && |
| !IsNonInvokeRetTerminator; |
| } |
| |
| void addArgumentToGraph(Argument &Arg) { |
| if (Arg.getType()->isPointerTy()) { |
| Graph.addNode(InstantiatedValue{&Arg, 0}, |
| getGlobalOrArgAttrFromValue(Arg)); |
| // Pointees of a formal parameter is known to the caller |
| Graph.addNode(InstantiatedValue{&Arg, 1}, getAttrCaller()); |
| } |
| } |
| |
| // Given an Instruction, this will add it to the graph, along with any |
| // Instructions that are potentially only available from said Instruction |
| // For example, given the following line: |
| // %0 = load i16* getelementptr ([1 x i16]* @a, 0, 0), align 2 |
| // addInstructionToGraph would add both the `load` and `getelementptr` |
| // instructions to the graph appropriately. |
| void addInstructionToGraph(GetEdgesVisitor &Visitor, Instruction &Inst) { |
| if (!hasUsefulEdges(&Inst)) |
| return; |
| |
| Visitor.visit(Inst); |
| } |
| |
| // Builds the graph needed for constructing the StratifiedSets for the given |
| // function |
| void buildGraphFrom(Function &Fn) { |
| GetEdgesVisitor Visitor(*this); |
| |
| for (auto &Bb : Fn.getBasicBlockList()) |
| for (auto &Inst : Bb.getInstList()) |
| addInstructionToGraph(Visitor, Inst); |
| |
| for (auto &Arg : Fn.args()) |
| addArgumentToGraph(Arg); |
| } |
| |
| public: |
| CFLGraphBuilder(CFLAA &Analysis, const TargetLibraryInfo &TLI, Function &Fn) |
| : Analysis(Analysis), TLI(TLI) { |
| buildGraphFrom(Fn); |
| } |
| |
| const CFLGraph &getCFLGraph() const { return Graph; } |
| const SmallVector<Value *, 4> &getReturnValues() const { |
| return ReturnedValues; |
| } |
| }; |
| } |
| } |
| |
| #endif |