| //===-- EfficiencySanitizer.cpp - performance tuner -----------------------===// |
| // |
| // The LLVM Compiler Infrastructure |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file is a part of EfficiencySanitizer, a family of performance tuners |
| // that detects multiple performance issues via separate sub-tools. |
| // |
| // The instrumentation phase is straightforward: |
| // - Take action on every memory access: either inlined instrumentation, |
| // or Inserted calls to our run-time library. |
| // - Optimizations may apply to avoid instrumenting some of the accesses. |
| // - Turn mem{set,cpy,move} instrinsics into library calls. |
| // The rest is handled by the run-time library. |
| //===----------------------------------------------------------------------===// |
| |
| #include "llvm/Transforms/Instrumentation.h" |
| #include "llvm/ADT/SmallString.h" |
| #include "llvm/ADT/SmallVector.h" |
| #include "llvm/ADT/Statistic.h" |
| #include "llvm/ADT/StringExtras.h" |
| #include "llvm/Analysis/TargetLibraryInfo.h" |
| #include "llvm/IR/Function.h" |
| #include "llvm/IR/IRBuilder.h" |
| #include "llvm/IR/IntrinsicInst.h" |
| #include "llvm/IR/Module.h" |
| #include "llvm/IR/Type.h" |
| #include "llvm/Support/CommandLine.h" |
| #include "llvm/Support/Debug.h" |
| #include "llvm/Support/raw_ostream.h" |
| #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
| #include "llvm/Transforms/Utils/Local.h" |
| #include "llvm/Transforms/Utils/ModuleUtils.h" |
| |
| using namespace llvm; |
| |
| #define DEBUG_TYPE "esan" |
| |
| // The tool type must be just one of these ClTool* options, as the tools |
| // cannot be combined due to shadow memory constraints. |
| static cl::opt<bool> |
| ClToolCacheFrag("esan-cache-frag", cl::init(false), |
| cl::desc("Detect data cache fragmentation"), cl::Hidden); |
| static cl::opt<bool> |
| ClToolWorkingSet("esan-working-set", cl::init(false), |
| cl::desc("Measure the working set size"), cl::Hidden); |
| // Each new tool will get its own opt flag here. |
| // These are converted to EfficiencySanitizerOptions for use |
| // in the code. |
| |
| static cl::opt<bool> ClInstrumentLoadsAndStores( |
| "esan-instrument-loads-and-stores", cl::init(true), |
| cl::desc("Instrument loads and stores"), cl::Hidden); |
| static cl::opt<bool> ClInstrumentMemIntrinsics( |
| "esan-instrument-memintrinsics", cl::init(true), |
| cl::desc("Instrument memintrinsics (memset/memcpy/memmove)"), cl::Hidden); |
| static cl::opt<bool> ClInstrumentFastpath( |
| "esan-instrument-fastpath", cl::init(true), |
| cl::desc("Instrument fastpath"), cl::Hidden); |
| static cl::opt<bool> ClAuxFieldInfo( |
| "esan-aux-field-info", cl::init(true), |
| cl::desc("Generate binary with auxiliary struct field information"), |
| cl::Hidden); |
| |
| // Experiments show that the performance difference can be 2x or more, |
| // and accuracy loss is typically negligible, so we turn this on by default. |
| static cl::opt<bool> ClAssumeIntraCacheLine( |
| "esan-assume-intra-cache-line", cl::init(true), |
| cl::desc("Assume each memory access touches just one cache line, for " |
| "better performance but with a potential loss of accuracy."), |
| cl::Hidden); |
| |
| STATISTIC(NumInstrumentedLoads, "Number of instrumented loads"); |
| STATISTIC(NumInstrumentedStores, "Number of instrumented stores"); |
| STATISTIC(NumFastpaths, "Number of instrumented fastpaths"); |
| STATISTIC(NumAccessesWithIrregularSize, |
| "Number of accesses with a size outside our targeted callout sizes"); |
| STATISTIC(NumIgnoredStructs, "Number of ignored structs"); |
| STATISTIC(NumIgnoredGEPs, "Number of ignored GEP instructions"); |
| STATISTIC(NumInstrumentedGEPs, "Number of instrumented GEP instructions"); |
| STATISTIC(NumAssumedIntraCacheLine, |
| "Number of accesses assumed to be intra-cache-line"); |
| |
| static const uint64_t EsanCtorAndDtorPriority = 0; |
| static const char *const EsanModuleCtorName = "esan.module_ctor"; |
| static const char *const EsanModuleDtorName = "esan.module_dtor"; |
| static const char *const EsanInitName = "__esan_init"; |
| static const char *const EsanExitName = "__esan_exit"; |
| |
| // We need to specify the tool to the runtime earlier than |
| // the ctor is called in some cases, so we set a global variable. |
| static const char *const EsanWhichToolName = "__esan_which_tool"; |
| |
| // We must keep these Shadow* constants consistent with the esan runtime. |
| // FIXME: Try to place these shadow constants, the names of the __esan_* |
| // interface functions, and the ToolType enum into a header shared between |
| // llvm and compiler-rt. |
| static const uint64_t ShadowMask = 0x00000fffffffffffull; |
| static const uint64_t ShadowOffs[3] = { // Indexed by scale |
| 0x0000130000000000ull, |
| 0x0000220000000000ull, |
| 0x0000440000000000ull, |
| }; |
| // This array is indexed by the ToolType enum. |
| static const int ShadowScale[] = { |
| 0, // ESAN_None. |
| 2, // ESAN_CacheFrag: 4B:1B, so 4 to 1 == >>2. |
| 6, // ESAN_WorkingSet: 64B:1B, so 64 to 1 == >>6. |
| }; |
| |
| // MaxStructCounterNameSize is a soft size limit to avoid insanely long |
| // names for those extremely large structs. |
| static const unsigned MaxStructCounterNameSize = 512; |
| |
| namespace { |
| |
| static EfficiencySanitizerOptions |
| OverrideOptionsFromCL(EfficiencySanitizerOptions Options) { |
| if (ClToolCacheFrag) |
| Options.ToolType = EfficiencySanitizerOptions::ESAN_CacheFrag; |
| else if (ClToolWorkingSet) |
| Options.ToolType = EfficiencySanitizerOptions::ESAN_WorkingSet; |
| |
| // Direct opt invocation with no params will have the default ESAN_None. |
| // We run the default tool in that case. |
| if (Options.ToolType == EfficiencySanitizerOptions::ESAN_None) |
| Options.ToolType = EfficiencySanitizerOptions::ESAN_CacheFrag; |
| |
| return Options; |
| } |
| |
| // Create a constant for Str so that we can pass it to the run-time lib. |
| static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str, |
| bool AllowMerging) { |
| Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str); |
| // We use private linkage for module-local strings. If they can be merged |
| // with another one, we set the unnamed_addr attribute. |
| GlobalVariable *GV = |
| new GlobalVariable(M, StrConst->getType(), true, |
| GlobalValue::PrivateLinkage, StrConst, ""); |
| if (AllowMerging) |
| GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); |
| GV->setAlignment(1); // Strings may not be merged w/o setting align 1. |
| return GV; |
| } |
| |
| /// EfficiencySanitizer: instrument each module to find performance issues. |
| class EfficiencySanitizer : public ModulePass { |
| public: |
| EfficiencySanitizer( |
| const EfficiencySanitizerOptions &Opts = EfficiencySanitizerOptions()) |
| : ModulePass(ID), Options(OverrideOptionsFromCL(Opts)) {} |
| const char *getPassName() const override; |
| void getAnalysisUsage(AnalysisUsage &AU) const override; |
| bool runOnModule(Module &M) override; |
| static char ID; |
| |
| private: |
| bool initOnModule(Module &M); |
| void initializeCallbacks(Module &M); |
| bool shouldIgnoreStructType(StructType *StructTy); |
| void createStructCounterName( |
| StructType *StructTy, SmallString<MaxStructCounterNameSize> &NameStr); |
| void createCacheFragAuxGV( |
| Module &M, const DataLayout &DL, StructType *StructTy, |
| GlobalVariable *&TypeNames, GlobalVariable *&Offsets, GlobalVariable *&Size); |
| GlobalVariable *createCacheFragInfoGV(Module &M, const DataLayout &DL, |
| Constant *UnitName); |
| Constant *createEsanInitToolInfoArg(Module &M, const DataLayout &DL); |
| void createDestructor(Module &M, Constant *ToolInfoArg); |
| bool runOnFunction(Function &F, Module &M); |
| bool instrumentLoadOrStore(Instruction *I, const DataLayout &DL); |
| bool instrumentMemIntrinsic(MemIntrinsic *MI); |
| bool instrumentGetElementPtr(Instruction *I, Module &M); |
| bool insertCounterUpdate(Instruction *I, StructType *StructTy, |
| unsigned CounterIdx); |
| unsigned getFieldCounterIdx(StructType *StructTy) { |
| return 0; |
| } |
| unsigned getArrayCounterIdx(StructType *StructTy) { |
| return StructTy->getNumElements(); |
| } |
| unsigned getStructCounterSize(StructType *StructTy) { |
| // The struct counter array includes: |
| // - one counter for each struct field, |
| // - one counter for the struct access within an array. |
| return (StructTy->getNumElements()/*field*/ + 1/*array*/); |
| } |
| bool shouldIgnoreMemoryAccess(Instruction *I); |
| int getMemoryAccessFuncIndex(Value *Addr, const DataLayout &DL); |
| Value *appToShadow(Value *Shadow, IRBuilder<> &IRB); |
| bool instrumentFastpath(Instruction *I, const DataLayout &DL, bool IsStore, |
| Value *Addr, unsigned Alignment); |
| // Each tool has its own fastpath routine: |
| bool instrumentFastpathCacheFrag(Instruction *I, const DataLayout &DL, |
| Value *Addr, unsigned Alignment); |
| bool instrumentFastpathWorkingSet(Instruction *I, const DataLayout &DL, |
| Value *Addr, unsigned Alignment); |
| |
| EfficiencySanitizerOptions Options; |
| LLVMContext *Ctx; |
| Type *IntptrTy; |
| // Our slowpath involves callouts to the runtime library. |
| // Access sizes are powers of two: 1, 2, 4, 8, 16. |
| static const size_t NumberOfAccessSizes = 5; |
| Function *EsanAlignedLoad[NumberOfAccessSizes]; |
| Function *EsanAlignedStore[NumberOfAccessSizes]; |
| Function *EsanUnalignedLoad[NumberOfAccessSizes]; |
| Function *EsanUnalignedStore[NumberOfAccessSizes]; |
| // For irregular sizes of any alignment: |
| Function *EsanUnalignedLoadN, *EsanUnalignedStoreN; |
| Function *MemmoveFn, *MemcpyFn, *MemsetFn; |
| Function *EsanCtorFunction; |
| Function *EsanDtorFunction; |
| // Remember the counter variable for each struct type to avoid |
| // recomputing the variable name later during instrumentation. |
| std::map<Type *, GlobalVariable *> StructTyMap; |
| }; |
| } // namespace |
| |
| char EfficiencySanitizer::ID = 0; |
| INITIALIZE_PASS_BEGIN( |
| EfficiencySanitizer, "esan", |
| "EfficiencySanitizer: finds performance issues.", false, false) |
| INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) |
| INITIALIZE_PASS_END( |
| EfficiencySanitizer, "esan", |
| "EfficiencySanitizer: finds performance issues.", false, false) |
| |
| const char *EfficiencySanitizer::getPassName() const { |
| return "EfficiencySanitizer"; |
| } |
| |
| void EfficiencySanitizer::getAnalysisUsage(AnalysisUsage &AU) const { |
| AU.addRequired<TargetLibraryInfoWrapperPass>(); |
| } |
| |
| ModulePass * |
| llvm::createEfficiencySanitizerPass(const EfficiencySanitizerOptions &Options) { |
| return new EfficiencySanitizer(Options); |
| } |
| |
| void EfficiencySanitizer::initializeCallbacks(Module &M) { |
| IRBuilder<> IRB(M.getContext()); |
| // Initialize the callbacks. |
| for (size_t Idx = 0; Idx < NumberOfAccessSizes; ++Idx) { |
| const unsigned ByteSize = 1U << Idx; |
| std::string ByteSizeStr = utostr(ByteSize); |
| // We'll inline the most common (i.e., aligned and frequent sizes) |
| // load + store instrumentation: these callouts are for the slowpath. |
| SmallString<32> AlignedLoadName("__esan_aligned_load" + ByteSizeStr); |
| EsanAlignedLoad[Idx] = |
| checkSanitizerInterfaceFunction(M.getOrInsertFunction( |
| AlignedLoadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); |
| SmallString<32> AlignedStoreName("__esan_aligned_store" + ByteSizeStr); |
| EsanAlignedStore[Idx] = |
| checkSanitizerInterfaceFunction(M.getOrInsertFunction( |
| AlignedStoreName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); |
| SmallString<32> UnalignedLoadName("__esan_unaligned_load" + ByteSizeStr); |
| EsanUnalignedLoad[Idx] = |
| checkSanitizerInterfaceFunction(M.getOrInsertFunction( |
| UnalignedLoadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); |
| SmallString<32> UnalignedStoreName("__esan_unaligned_store" + ByteSizeStr); |
| EsanUnalignedStore[Idx] = |
| checkSanitizerInterfaceFunction(M.getOrInsertFunction( |
| UnalignedStoreName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); |
| } |
| EsanUnalignedLoadN = checkSanitizerInterfaceFunction( |
| M.getOrInsertFunction("__esan_unaligned_loadN", IRB.getVoidTy(), |
| IRB.getInt8PtrTy(), IntptrTy, nullptr)); |
| EsanUnalignedStoreN = checkSanitizerInterfaceFunction( |
| M.getOrInsertFunction("__esan_unaligned_storeN", IRB.getVoidTy(), |
| IRB.getInt8PtrTy(), IntptrTy, nullptr)); |
| MemmoveFn = checkSanitizerInterfaceFunction( |
| M.getOrInsertFunction("memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), |
| IRB.getInt8PtrTy(), IntptrTy, nullptr)); |
| MemcpyFn = checkSanitizerInterfaceFunction( |
| M.getOrInsertFunction("memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), |
| IRB.getInt8PtrTy(), IntptrTy, nullptr)); |
| MemsetFn = checkSanitizerInterfaceFunction( |
| M.getOrInsertFunction("memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), |
| IRB.getInt32Ty(), IntptrTy, nullptr)); |
| } |
| |
| bool EfficiencySanitizer::shouldIgnoreStructType(StructType *StructTy) { |
| if (StructTy == nullptr || StructTy->isOpaque() /* no struct body */) |
| return true; |
| return false; |
| } |
| |
| void EfficiencySanitizer::createStructCounterName( |
| StructType *StructTy, SmallString<MaxStructCounterNameSize> &NameStr) { |
| // Append NumFields and field type ids to avoid struct conflicts |
| // with the same name but different fields. |
| if (StructTy->hasName()) |
| NameStr += StructTy->getName(); |
| else |
| NameStr += "struct.anon"; |
| // We allow the actual size of the StructCounterName to be larger than |
| // MaxStructCounterNameSize and append #NumFields and at least one |
| // field type id. |
| // Append #NumFields. |
| NameStr += "#"; |
| Twine(StructTy->getNumElements()).toVector(NameStr); |
| // Append struct field type ids in the reverse order. |
| for (int i = StructTy->getNumElements() - 1; i >= 0; --i) { |
| NameStr += "#"; |
| Twine(StructTy->getElementType(i)->getTypeID()).toVector(NameStr); |
| if (NameStr.size() >= MaxStructCounterNameSize) |
| break; |
| } |
| if (StructTy->isLiteral()) { |
| // End with # for literal struct. |
| NameStr += "#"; |
| } |
| } |
| |
| // Create global variables with auxiliary information (e.g., struct field size, |
| // offset, and type name) for better user report. |
| void EfficiencySanitizer::createCacheFragAuxGV( |
| Module &M, const DataLayout &DL, StructType *StructTy, |
| GlobalVariable *&TypeName, GlobalVariable *&Offset, |
| GlobalVariable *&Size) { |
| auto *Int8PtrTy = Type::getInt8PtrTy(*Ctx); |
| auto *Int32Ty = Type::getInt32Ty(*Ctx); |
| // FieldTypeName. |
| auto *TypeNameArrayTy = ArrayType::get(Int8PtrTy, StructTy->getNumElements()); |
| TypeName = new GlobalVariable(M, TypeNameArrayTy, true, |
| GlobalVariable::InternalLinkage, nullptr); |
| SmallVector<Constant *, 16> TypeNameVec; |
| // FieldOffset. |
| auto *OffsetArrayTy = ArrayType::get(Int32Ty, StructTy->getNumElements()); |
| Offset = new GlobalVariable(M, OffsetArrayTy, true, |
| GlobalVariable::InternalLinkage, nullptr); |
| SmallVector<Constant *, 16> OffsetVec; |
| // FieldSize |
| auto *SizeArrayTy = ArrayType::get(Int32Ty, StructTy->getNumElements()); |
| Size = new GlobalVariable(M, SizeArrayTy, true, |
| GlobalVariable::InternalLinkage, nullptr); |
| SmallVector<Constant *, 16> SizeVec; |
| for (unsigned i = 0; i < StructTy->getNumElements(); ++i) { |
| Type *Ty = StructTy->getElementType(i); |
| std::string Str; |
| raw_string_ostream StrOS(Str); |
| Ty->print(StrOS); |
| TypeNameVec.push_back( |
| ConstantExpr::getPointerCast( |
| createPrivateGlobalForString(M, StrOS.str(), true), |
| Int8PtrTy)); |
| OffsetVec.push_back( |
| ConstantInt::get(Int32Ty, |
| DL.getStructLayout(StructTy)->getElementOffset(i))); |
| SizeVec.push_back(ConstantInt::get(Int32Ty, |
| DL.getTypeAllocSize(Ty))); |
| } |
| TypeName->setInitializer(ConstantArray::get(TypeNameArrayTy, TypeNameVec)); |
| Offset->setInitializer(ConstantArray::get(OffsetArrayTy, OffsetVec)); |
| Size->setInitializer(ConstantArray::get(SizeArrayTy, SizeVec)); |
| } |
| |
| // Create the global variable for the cache-fragmentation tool. |
| GlobalVariable *EfficiencySanitizer::createCacheFragInfoGV( |
| Module &M, const DataLayout &DL, Constant *UnitName) { |
| assert(Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag); |
| |
| auto *Int8PtrTy = Type::getInt8PtrTy(*Ctx); |
| auto *Int8PtrPtrTy = Int8PtrTy->getPointerTo(); |
| auto *Int32Ty = Type::getInt32Ty(*Ctx); |
| auto *Int32PtrTy = Type::getInt32PtrTy(*Ctx); |
| auto *Int64Ty = Type::getInt64Ty(*Ctx); |
| auto *Int64PtrTy = Type::getInt64PtrTy(*Ctx); |
| // This structure should be kept consistent with the StructInfo struct |
| // in the runtime library. |
| // struct StructInfo { |
| // const char *StructName; |
| // u32 Size; |
| // u32 NumFields; |
| // u32 *FieldOffset; // auxiliary struct field info. |
| // u32 *FieldSize; // auxiliary struct field info. |
| // const char **FieldTypeName; // auxiliary struct field info. |
| // u64 *FieldCounters; |
| // u64 *ArrayCounter; |
| // }; |
| auto *StructInfoTy = |
| StructType::get(Int8PtrTy, Int32Ty, Int32Ty, Int32PtrTy, Int32PtrTy, |
| Int8PtrPtrTy, Int64PtrTy, Int64PtrTy, nullptr); |
| auto *StructInfoPtrTy = StructInfoTy->getPointerTo(); |
| // This structure should be kept consistent with the CacheFragInfo struct |
| // in the runtime library. |
| // struct CacheFragInfo { |
| // const char *UnitName; |
| // u32 NumStructs; |
| // StructInfo *Structs; |
| // }; |
| auto *CacheFragInfoTy = |
| StructType::get(Int8PtrTy, Int32Ty, StructInfoPtrTy, nullptr); |
| |
| std::vector<StructType *> Vec = M.getIdentifiedStructTypes(); |
| unsigned NumStructs = 0; |
| SmallVector<Constant *, 16> Initializers; |
| |
| for (auto &StructTy : Vec) { |
| if (shouldIgnoreStructType(StructTy)) { |
| ++NumIgnoredStructs; |
| continue; |
| } |
| ++NumStructs; |
| |
| // StructName. |
| SmallString<MaxStructCounterNameSize> CounterNameStr; |
| createStructCounterName(StructTy, CounterNameStr); |
| GlobalVariable *StructCounterName = createPrivateGlobalForString( |
| M, CounterNameStr, /*AllowMerging*/true); |
| |
| // Counters. |
| // We create the counter array with StructCounterName and weak linkage |
| // so that the structs with the same name and layout from different |
| // compilation units will be merged into one. |
| auto *CounterArrayTy = ArrayType::get(Int64Ty, |
| getStructCounterSize(StructTy)); |
| GlobalVariable *Counters = |
| new GlobalVariable(M, CounterArrayTy, false, |
| GlobalVariable::WeakAnyLinkage, |
| ConstantAggregateZero::get(CounterArrayTy), |
| CounterNameStr); |
| |
| // Remember the counter variable for each struct type. |
| StructTyMap.insert(std::pair<Type *, GlobalVariable *>(StructTy, Counters)); |
| |
| // We pass the field type name array, offset array, and size array to |
| // the runtime for better reporting. |
| GlobalVariable *TypeName = nullptr, *Offset = nullptr, *Size = nullptr; |
| if (ClAuxFieldInfo) |
| createCacheFragAuxGV(M, DL, StructTy, TypeName, Offset, Size); |
| |
| Constant *FieldCounterIdx[2]; |
| FieldCounterIdx[0] = ConstantInt::get(Int32Ty, 0); |
| FieldCounterIdx[1] = ConstantInt::get(Int32Ty, |
| getFieldCounterIdx(StructTy)); |
| Constant *ArrayCounterIdx[2]; |
| ArrayCounterIdx[0] = ConstantInt::get(Int32Ty, 0); |
| ArrayCounterIdx[1] = ConstantInt::get(Int32Ty, |
| getArrayCounterIdx(StructTy)); |
| Initializers.push_back( |
| ConstantStruct::get( |
| StructInfoTy, |
| ConstantExpr::getPointerCast(StructCounterName, Int8PtrTy), |
| ConstantInt::get(Int32Ty, |
| DL.getStructLayout(StructTy)->getSizeInBytes()), |
| ConstantInt::get(Int32Ty, StructTy->getNumElements()), |
| Offset == nullptr ? ConstantPointerNull::get(Int32PtrTy) : |
| ConstantExpr::getPointerCast(Offset, Int32PtrTy), |
| Size == nullptr ? ConstantPointerNull::get(Int32PtrTy) : |
| ConstantExpr::getPointerCast(Size, Int32PtrTy), |
| TypeName == nullptr ? ConstantPointerNull::get(Int8PtrPtrTy) : |
| ConstantExpr::getPointerCast(TypeName, Int8PtrPtrTy), |
| ConstantExpr::getGetElementPtr(CounterArrayTy, Counters, |
| FieldCounterIdx), |
| ConstantExpr::getGetElementPtr(CounterArrayTy, Counters, |
| ArrayCounterIdx), |
| nullptr)); |
| } |
| // Structs. |
| Constant *StructInfo; |
| if (NumStructs == 0) { |
| StructInfo = ConstantPointerNull::get(StructInfoPtrTy); |
| } else { |
| auto *StructInfoArrayTy = ArrayType::get(StructInfoTy, NumStructs); |
| StructInfo = ConstantExpr::getPointerCast( |
| new GlobalVariable(M, StructInfoArrayTy, false, |
| GlobalVariable::InternalLinkage, |
| ConstantArray::get(StructInfoArrayTy, Initializers)), |
| StructInfoPtrTy); |
| } |
| |
| auto *CacheFragInfoGV = new GlobalVariable( |
| M, CacheFragInfoTy, true, GlobalVariable::InternalLinkage, |
| ConstantStruct::get(CacheFragInfoTy, |
| UnitName, |
| ConstantInt::get(Int32Ty, NumStructs), |
| StructInfo, |
| nullptr)); |
| return CacheFragInfoGV; |
| } |
| |
| // Create the tool-specific argument passed to EsanInit and EsanExit. |
| Constant *EfficiencySanitizer::createEsanInitToolInfoArg(Module &M, |
| const DataLayout &DL) { |
| // This structure contains tool-specific information about each compilation |
| // unit (module) and is passed to the runtime library. |
| GlobalVariable *ToolInfoGV = nullptr; |
| |
| auto *Int8PtrTy = Type::getInt8PtrTy(*Ctx); |
| // Compilation unit name. |
| auto *UnitName = ConstantExpr::getPointerCast( |
| createPrivateGlobalForString(M, M.getModuleIdentifier(), true), |
| Int8PtrTy); |
| |
| // Create the tool-specific variable. |
| if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) |
| ToolInfoGV = createCacheFragInfoGV(M, DL, UnitName); |
| |
| if (ToolInfoGV != nullptr) |
| return ConstantExpr::getPointerCast(ToolInfoGV, Int8PtrTy); |
| |
| // Create the null pointer if no tool-specific variable created. |
| return ConstantPointerNull::get(Int8PtrTy); |
| } |
| |
| void EfficiencySanitizer::createDestructor(Module &M, Constant *ToolInfoArg) { |
| PointerType *Int8PtrTy = Type::getInt8PtrTy(*Ctx); |
| EsanDtorFunction = Function::Create(FunctionType::get(Type::getVoidTy(*Ctx), |
| false), |
| GlobalValue::InternalLinkage, |
| EsanModuleDtorName, &M); |
| ReturnInst::Create(*Ctx, BasicBlock::Create(*Ctx, "", EsanDtorFunction)); |
| IRBuilder<> IRB_Dtor(EsanDtorFunction->getEntryBlock().getTerminator()); |
| Function *EsanExit = checkSanitizerInterfaceFunction( |
| M.getOrInsertFunction(EsanExitName, IRB_Dtor.getVoidTy(), |
| Int8PtrTy, nullptr)); |
| EsanExit->setLinkage(Function::ExternalLinkage); |
| IRB_Dtor.CreateCall(EsanExit, {ToolInfoArg}); |
| appendToGlobalDtors(M, EsanDtorFunction, EsanCtorAndDtorPriority); |
| } |
| |
| bool EfficiencySanitizer::initOnModule(Module &M) { |
| Ctx = &M.getContext(); |
| const DataLayout &DL = M.getDataLayout(); |
| IRBuilder<> IRB(M.getContext()); |
| IntegerType *OrdTy = IRB.getInt32Ty(); |
| PointerType *Int8PtrTy = Type::getInt8PtrTy(*Ctx); |
| IntptrTy = DL.getIntPtrType(M.getContext()); |
| // Create the variable passed to EsanInit and EsanExit. |
| Constant *ToolInfoArg = createEsanInitToolInfoArg(M, DL); |
| // Constructor |
| // We specify the tool type both in the EsanWhichToolName global |
| // and as an arg to the init routine as a sanity check. |
| std::tie(EsanCtorFunction, std::ignore) = createSanitizerCtorAndInitFunctions( |
| M, EsanModuleCtorName, EsanInitName, /*InitArgTypes=*/{OrdTy, Int8PtrTy}, |
| /*InitArgs=*/{ |
| ConstantInt::get(OrdTy, static_cast<int>(Options.ToolType)), |
| ToolInfoArg}); |
| appendToGlobalCtors(M, EsanCtorFunction, EsanCtorAndDtorPriority); |
| |
| createDestructor(M, ToolInfoArg); |
| |
| new GlobalVariable(M, OrdTy, true, |
| GlobalValue::WeakAnyLinkage, |
| ConstantInt::get(OrdTy, |
| static_cast<int>(Options.ToolType)), |
| EsanWhichToolName); |
| |
| return true; |
| } |
| |
| Value *EfficiencySanitizer::appToShadow(Value *Shadow, IRBuilder<> &IRB) { |
| // Shadow = ((App & Mask) + Offs) >> Scale |
| Shadow = IRB.CreateAnd(Shadow, ConstantInt::get(IntptrTy, ShadowMask)); |
| uint64_t Offs; |
| int Scale = ShadowScale[Options.ToolType]; |
| if (Scale <= 2) |
| Offs = ShadowOffs[Scale]; |
| else |
| Offs = ShadowOffs[0] << Scale; |
| Shadow = IRB.CreateAdd(Shadow, ConstantInt::get(IntptrTy, Offs)); |
| if (Scale > 0) |
| Shadow = IRB.CreateLShr(Shadow, Scale); |
| return Shadow; |
| } |
| |
| bool EfficiencySanitizer::shouldIgnoreMemoryAccess(Instruction *I) { |
| if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) { |
| // We'd like to know about cache fragmentation in vtable accesses and |
| // constant data references, so we do not currently ignore anything. |
| return false; |
| } else if (Options.ToolType == EfficiencySanitizerOptions::ESAN_WorkingSet) { |
| // TODO: the instrumentation disturbs the data layout on the stack, so we |
| // may want to add an option to ignore stack references (if we can |
| // distinguish them) to reduce overhead. |
| } |
| // TODO(bruening): future tools will be returning true for some cases. |
| return false; |
| } |
| |
| bool EfficiencySanitizer::runOnModule(Module &M) { |
| bool Res = initOnModule(M); |
| initializeCallbacks(M); |
| for (auto &F : M) { |
| Res |= runOnFunction(F, M); |
| } |
| return Res; |
| } |
| |
| bool EfficiencySanitizer::runOnFunction(Function &F, Module &M) { |
| // This is required to prevent instrumenting the call to __esan_init from |
| // within the module constructor. |
| if (&F == EsanCtorFunction) |
| return false; |
| SmallVector<Instruction *, 8> LoadsAndStores; |
| SmallVector<Instruction *, 8> MemIntrinCalls; |
| SmallVector<Instruction *, 8> GetElementPtrs; |
| bool Res = false; |
| const DataLayout &DL = M.getDataLayout(); |
| const TargetLibraryInfo *TLI = |
| &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); |
| |
| for (auto &BB : F) { |
| for (auto &Inst : BB) { |
| if ((isa<LoadInst>(Inst) || isa<StoreInst>(Inst) || |
| isa<AtomicRMWInst>(Inst) || isa<AtomicCmpXchgInst>(Inst)) && |
| !shouldIgnoreMemoryAccess(&Inst)) |
| LoadsAndStores.push_back(&Inst); |
| else if (isa<MemIntrinsic>(Inst)) |
| MemIntrinCalls.push_back(&Inst); |
| else if (isa<GetElementPtrInst>(Inst)) |
| GetElementPtrs.push_back(&Inst); |
| else if (CallInst *CI = dyn_cast<CallInst>(&Inst)) |
| maybeMarkSanitizerLibraryCallNoBuiltin(CI, TLI); |
| } |
| } |
| |
| if (ClInstrumentLoadsAndStores) { |
| for (auto Inst : LoadsAndStores) { |
| Res |= instrumentLoadOrStore(Inst, DL); |
| } |
| } |
| |
| if (ClInstrumentMemIntrinsics) { |
| for (auto Inst : MemIntrinCalls) { |
| Res |= instrumentMemIntrinsic(cast<MemIntrinsic>(Inst)); |
| } |
| } |
| |
| if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) { |
| for (auto Inst : GetElementPtrs) { |
| Res |= instrumentGetElementPtr(Inst, M); |
| } |
| } |
| |
| return Res; |
| } |
| |
| bool EfficiencySanitizer::instrumentLoadOrStore(Instruction *I, |
| const DataLayout &DL) { |
| IRBuilder<> IRB(I); |
| bool IsStore; |
| Value *Addr; |
| unsigned Alignment; |
| if (LoadInst *Load = dyn_cast<LoadInst>(I)) { |
| IsStore = false; |
| Alignment = Load->getAlignment(); |
| Addr = Load->getPointerOperand(); |
| } else if (StoreInst *Store = dyn_cast<StoreInst>(I)) { |
| IsStore = true; |
| Alignment = Store->getAlignment(); |
| Addr = Store->getPointerOperand(); |
| } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) { |
| IsStore = true; |
| Alignment = 0; |
| Addr = RMW->getPointerOperand(); |
| } else if (AtomicCmpXchgInst *Xchg = dyn_cast<AtomicCmpXchgInst>(I)) { |
| IsStore = true; |
| Alignment = 0; |
| Addr = Xchg->getPointerOperand(); |
| } else |
| llvm_unreachable("Unsupported mem access type"); |
| |
| Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType(); |
| const uint32_t TypeSizeBytes = DL.getTypeStoreSizeInBits(OrigTy) / 8; |
| Value *OnAccessFunc = nullptr; |
| |
| // Convert 0 to the default alignment. |
| if (Alignment == 0) |
| Alignment = DL.getPrefTypeAlignment(OrigTy); |
| |
| if (IsStore) |
| NumInstrumentedStores++; |
| else |
| NumInstrumentedLoads++; |
| int Idx = getMemoryAccessFuncIndex(Addr, DL); |
| if (Idx < 0) { |
| OnAccessFunc = IsStore ? EsanUnalignedStoreN : EsanUnalignedLoadN; |
| IRB.CreateCall(OnAccessFunc, |
| {IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), |
| ConstantInt::get(IntptrTy, TypeSizeBytes)}); |
| } else { |
| if (ClInstrumentFastpath && |
| instrumentFastpath(I, DL, IsStore, Addr, Alignment)) { |
| NumFastpaths++; |
| return true; |
| } |
| if (Alignment == 0 || (Alignment % TypeSizeBytes) == 0) |
| OnAccessFunc = IsStore ? EsanAlignedStore[Idx] : EsanAlignedLoad[Idx]; |
| else |
| OnAccessFunc = IsStore ? EsanUnalignedStore[Idx] : EsanUnalignedLoad[Idx]; |
| IRB.CreateCall(OnAccessFunc, |
| IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy())); |
| } |
| return true; |
| } |
| |
| // It's simplest to replace the memset/memmove/memcpy intrinsics with |
| // calls that the runtime library intercepts. |
| // Our pass is late enough that calls should not turn back into intrinsics. |
| bool EfficiencySanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) { |
| IRBuilder<> IRB(MI); |
| bool Res = false; |
| if (isa<MemSetInst>(MI)) { |
| IRB.CreateCall( |
| MemsetFn, |
| {IRB.CreatePointerCast(MI->getArgOperand(0), IRB.getInt8PtrTy()), |
| IRB.CreateIntCast(MI->getArgOperand(1), IRB.getInt32Ty(), false), |
| IRB.CreateIntCast(MI->getArgOperand(2), IntptrTy, false)}); |
| MI->eraseFromParent(); |
| Res = true; |
| } else if (isa<MemTransferInst>(MI)) { |
| IRB.CreateCall( |
| isa<MemCpyInst>(MI) ? MemcpyFn : MemmoveFn, |
| {IRB.CreatePointerCast(MI->getArgOperand(0), IRB.getInt8PtrTy()), |
| IRB.CreatePointerCast(MI->getArgOperand(1), IRB.getInt8PtrTy()), |
| IRB.CreateIntCast(MI->getArgOperand(2), IntptrTy, false)}); |
| MI->eraseFromParent(); |
| Res = true; |
| } else |
| llvm_unreachable("Unsupported mem intrinsic type"); |
| return Res; |
| } |
| |
| bool EfficiencySanitizer::instrumentGetElementPtr(Instruction *I, Module &M) { |
| GetElementPtrInst *GepInst = dyn_cast<GetElementPtrInst>(I); |
| bool Res = false; |
| if (GepInst == nullptr || GepInst->getNumIndices() == 1) { |
| ++NumIgnoredGEPs; |
| return false; |
| } |
| Type *SourceTy = GepInst->getSourceElementType(); |
| StructType *StructTy; |
| ConstantInt *Idx; |
| // Check if GEP calculates address from a struct array. |
| if (isa<StructType>(SourceTy)) { |
| StructTy = cast<StructType>(SourceTy); |
| Idx = dyn_cast<ConstantInt>(GepInst->getOperand(1)); |
| if ((Idx == nullptr || Idx->getSExtValue() != 0) && |
| !shouldIgnoreStructType(StructTy) && StructTyMap.count(StructTy) != 0) |
| Res |= insertCounterUpdate(I, StructTy, getArrayCounterIdx(StructTy)); |
| } |
| // Iterate all (except the first and the last) idx within each GEP instruction |
| // for possible nested struct field address calculation. |
| for (unsigned i = 1; i < GepInst->getNumIndices(); ++i) { |
| SmallVector<Value *, 8> IdxVec(GepInst->idx_begin(), |
| GepInst->idx_begin() + i); |
| Type *Ty = GetElementPtrInst::getIndexedType(SourceTy, IdxVec); |
| unsigned CounterIdx = 0; |
| if (isa<ArrayType>(Ty)) { |
| ArrayType *ArrayTy = cast<ArrayType>(Ty); |
| StructTy = dyn_cast<StructType>(ArrayTy->getElementType()); |
| if (shouldIgnoreStructType(StructTy) || StructTyMap.count(StructTy) == 0) |
| continue; |
| // The last counter for struct array access. |
| CounterIdx = getArrayCounterIdx(StructTy); |
| } else if (isa<StructType>(Ty)) { |
| StructTy = cast<StructType>(Ty); |
| if (shouldIgnoreStructType(StructTy) || StructTyMap.count(StructTy) == 0) |
| continue; |
| // Get the StructTy's subfield index. |
| Idx = cast<ConstantInt>(GepInst->getOperand(i+1)); |
| assert(Idx->getSExtValue() >= 0 && |
| Idx->getSExtValue() < StructTy->getNumElements()); |
| CounterIdx = getFieldCounterIdx(StructTy) + Idx->getSExtValue(); |
| } |
| Res |= insertCounterUpdate(I, StructTy, CounterIdx); |
| } |
| if (Res) |
| ++NumInstrumentedGEPs; |
| else |
| ++NumIgnoredGEPs; |
| return Res; |
| } |
| |
| bool EfficiencySanitizer::insertCounterUpdate(Instruction *I, |
| StructType *StructTy, |
| unsigned CounterIdx) { |
| GlobalVariable *CounterArray = StructTyMap[StructTy]; |
| if (CounterArray == nullptr) |
| return false; |
| IRBuilder<> IRB(I); |
| Constant *Indices[2]; |
| // Xref http://llvm.org/docs/LangRef.html#i-getelementptr and |
| // http://llvm.org/docs/GetElementPtr.html. |
| // The first index of the GEP instruction steps through the first operand, |
| // i.e., the array itself. |
| Indices[0] = ConstantInt::get(IRB.getInt32Ty(), 0); |
| // The second index is the index within the array. |
| Indices[1] = ConstantInt::get(IRB.getInt32Ty(), CounterIdx); |
| Constant *Counter = |
| ConstantExpr::getGetElementPtr( |
| ArrayType::get(IRB.getInt64Ty(), getStructCounterSize(StructTy)), |
| CounterArray, Indices); |
| Value *Load = IRB.CreateLoad(Counter); |
| IRB.CreateStore(IRB.CreateAdd(Load, ConstantInt::get(IRB.getInt64Ty(), 1)), |
| Counter); |
| return true; |
| } |
| |
| int EfficiencySanitizer::getMemoryAccessFuncIndex(Value *Addr, |
| const DataLayout &DL) { |
| Type *OrigPtrTy = Addr->getType(); |
| Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType(); |
| assert(OrigTy->isSized()); |
| // The size is always a multiple of 8. |
| uint32_t TypeSizeBytes = DL.getTypeStoreSizeInBits(OrigTy) / 8; |
| if (TypeSizeBytes != 1 && TypeSizeBytes != 2 && TypeSizeBytes != 4 && |
| TypeSizeBytes != 8 && TypeSizeBytes != 16) { |
| // Irregular sizes do not have per-size call targets. |
| NumAccessesWithIrregularSize++; |
| return -1; |
| } |
| size_t Idx = countTrailingZeros(TypeSizeBytes); |
| assert(Idx < NumberOfAccessSizes); |
| return Idx; |
| } |
| |
| bool EfficiencySanitizer::instrumentFastpath(Instruction *I, |
| const DataLayout &DL, bool IsStore, |
| Value *Addr, unsigned Alignment) { |
| if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) { |
| return instrumentFastpathCacheFrag(I, DL, Addr, Alignment); |
| } else if (Options.ToolType == EfficiencySanitizerOptions::ESAN_WorkingSet) { |
| return instrumentFastpathWorkingSet(I, DL, Addr, Alignment); |
| } |
| return false; |
| } |
| |
| bool EfficiencySanitizer::instrumentFastpathCacheFrag(Instruction *I, |
| const DataLayout &DL, |
| Value *Addr, |
| unsigned Alignment) { |
| // Do nothing. |
| return true; // Return true to avoid slowpath instrumentation. |
| } |
| |
| bool EfficiencySanitizer::instrumentFastpathWorkingSet( |
| Instruction *I, const DataLayout &DL, Value *Addr, unsigned Alignment) { |
| assert(ShadowScale[Options.ToolType] == 6); // The code below assumes this |
| IRBuilder<> IRB(I); |
| Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType(); |
| const uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy); |
| // Bail to the slowpath if the access might touch multiple cache lines. |
| // An access aligned to its size is guaranteed to be intra-cache-line. |
| // getMemoryAccessFuncIndex has already ruled out a size larger than 16 |
| // and thus larger than a cache line for platforms this tool targets |
| // (and our shadow memory setup assumes 64-byte cache lines). |
| assert(TypeSize <= 128); |
| if (!(TypeSize == 8 || |
| (Alignment % (TypeSize / 8)) == 0)) { |
| if (ClAssumeIntraCacheLine) |
| ++NumAssumedIntraCacheLine; |
| else |
| return false; |
| } |
| |
| // We inline instrumentation to set the corresponding shadow bits for |
| // each cache line touched by the application. Here we handle a single |
| // load or store where we've already ruled out the possibility that it |
| // might touch more than one cache line and thus we simply update the |
| // shadow memory for a single cache line. |
| // Our shadow memory model is fine with races when manipulating shadow values. |
| // We generate the following code: |
| // |
| // const char BitMask = 0x81; |
| // char *ShadowAddr = appToShadow(AppAddr); |
| // if ((*ShadowAddr & BitMask) != BitMask) |
| // *ShadowAddr |= Bitmask; |
| // |
| Value *AddrPtr = IRB.CreatePointerCast(Addr, IntptrTy); |
| Value *ShadowPtr = appToShadow(AddrPtr, IRB); |
| Type *ShadowTy = IntegerType::get(*Ctx, 8U); |
| Type *ShadowPtrTy = PointerType::get(ShadowTy, 0); |
| // The bottom bit is used for the current sampling period's working set. |
| // The top bit is used for the total working set. We set both on each |
| // memory access, if they are not already set. |
| Value *ValueMask = ConstantInt::get(ShadowTy, 0x81); // 10000001B |
| |
| Value *OldValue = IRB.CreateLoad(IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy)); |
| // The AND and CMP will be turned into a TEST instruction by the compiler. |
| Value *Cmp = IRB.CreateICmpNE(IRB.CreateAnd(OldValue, ValueMask), ValueMask); |
| TerminatorInst *CmpTerm = SplitBlockAndInsertIfThen(Cmp, I, false); |
| // FIXME: do I need to call SetCurrentDebugLocation? |
| IRB.SetInsertPoint(CmpTerm); |
| // We use OR to set the shadow bits to avoid corrupting the middle 6 bits, |
| // which are used by the runtime library. |
| Value *NewVal = IRB.CreateOr(OldValue, ValueMask); |
| IRB.CreateStore(NewVal, IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy)); |
| IRB.SetInsertPoint(I); |
| |
| return true; |
| } |