Importing rustc-1.65.0 Bug: 250026064 Test: ./build.py --lto=thin Change-Id: If144bbb779543363527e335b12d3ae6fbb4be8f0

commit: 2f380c1f7952c665764c26a26842375bb02f5296 [log] [tgz]
author: Chris Wailes <[email protected]> Wed Nov 09 13:04:22 2022 -0800
committer: Chris Wailes <[email protected]> Mon Dec 05 17:08:55 2022 -0800
tree: e5cdc6dae1dcc406dd7cc2386c6f84d44bc25f86
parent: b1d328019a485dc2e58d5dc46556067c927ec9f2 [diff]
diff --git a/src/llvm-project/llvm/lib/Analysis/AliasAnalysis.cpp b/src/llvm-project/llvm/lib/Analysis/AliasAnalysis.cpp
index a8132e5..e249c38e 100644
--- a/src/llvm-project/llvm/lib/Analysis/AliasAnalysis.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/AliasAnalysis.cpp

@@ -42,7 +42,6 @@
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
 #include "llvm/InitializePasses.h"
@@ -680,7 +679,7 @@
     }
   }
 
-  const MemoryLocation &Loc = OptLoc.getValueOr(MemoryLocation());
+  const MemoryLocation &Loc = OptLoc.value_or(MemoryLocation());
 
   switch (I->getOpcode()) {
   case Instruction::VAArg:
@@ -988,6 +987,28 @@
   return isa<AllocaInst>(V) || isNoAliasCall(V) || isNoAliasOrByValArgument(V);
 }
 
+bool llvm::isEscapeSource(const Value *V) {
+  if (auto *CB = dyn_cast<CallBase>(V))
+    return !isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(CB,
+                                                                        true);
+
+  // The load case works because isNonEscapingLocalObject considers all
+  // stores to be escapes (it passes true for the StoreCaptures argument
+  // to PointerMayBeCaptured).
+  if (isa<LoadInst>(V))
+    return true;
+
+  // The inttoptr case works because isNonEscapingLocalObject considers all
+  // means of converting or equating a pointer to an int (ptrtoint, ptr store
+  // which could be followed by an integer load, ptr<->int compare) as
+  // escaping, and objects located at well-known addresses via platform-specific
+  // means cannot be considered non-escaping local objects.
+  if (isa<IntToPtrInst>(V))
+    return true;
+
+  return false;
+}
+
 bool llvm::isNotVisibleOnUnwind(const Value *Object,
                                 bool &RequiresNoCaptureBeforeUnwind) {
   RequiresNoCaptureBeforeUnwind = false;

diff --git a/src/llvm-project/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/src/llvm-project/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
index 1577f1e..e3446a1 100644
--- a/src/llvm-project/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp

@@ -9,9 +9,7 @@
 #include "llvm/Analysis/AliasAnalysisEvaluator.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
@@ -19,7 +17,6 @@
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
@@ -41,30 +38,48 @@
 
 static cl::opt<bool> EvalAAMD("evaluate-aa-metadata", cl::ReallyHidden);
 
-static void PrintResults(AliasResult AR, bool P, const Value *V1,
-                         const Value *V2, const Module *M) {
+static void PrintResults(AliasResult AR, bool P,
+                         std::pair<const Value *, Type *> Loc1,
+                         std::pair<const Value *, Type *> Loc2,
+                         const Module *M) {
   if (PrintAll || P) {
+    Type *Ty1 = Loc1.second, *Ty2 = Loc2.second;
+    unsigned AS1 = Loc1.first->getType()->getPointerAddressSpace();
+    unsigned AS2 = Loc2.first->getType()->getPointerAddressSpace();
     std::string o1, o2;
     {
       raw_string_ostream os1(o1), os2(o2);
-      V1->printAsOperand(os1, true, M);
-      V2->printAsOperand(os2, true, M);
+      Loc1.first->printAsOperand(os1, false, M);
+      Loc2.first->printAsOperand(os2, false, M);
     }
 
     if (o2 < o1) {
       std::swap(o1, o2);
+      std::swap(Ty1, Ty2);
+      std::swap(AS1, AS2);
       // Change offset sign for the local AR, for printing only.
       AR.swap();
     }
-    errs() << "  " << AR << ":\t" << o1 << ", " << o2 << "\n";
+    errs() << "  " << AR << ":\t";
+    Ty1->print(errs(), false, /* NoDetails */ true);
+    if (AS1 != 0)
+      errs() << " addrspace(" << AS1 << ")";
+    errs() << "* " << o1 << ", ";
+    Ty2->print(errs(), false, /* NoDetails */ true);
+    if (AS2 != 0)
+      errs() << " addrspace(" << AS2 << ")";
+    errs() << "* " << o2 << "\n";
   }
 }
 
-static inline void PrintModRefResults(const char *Msg, bool P, Instruction *I,
-                                      Value *Ptr, Module *M) {
+static inline void PrintModRefResults(
+    const char *Msg, bool P, Instruction *I,
+    std::pair<const Value *, Type *> Loc, Module *M) {
   if (PrintAll || P) {
     errs() << "  " << Msg << ":  Ptr: ";
-    Ptr->printAsOperand(errs(), true, M);
+    Loc.second->print(errs(), false, /* NoDetails */ true);
+    errs() << "* ";
+    Loc.first->printAsOperand(errs(), false, M);
     errs() << "\t<->" << *I << '\n';
   }
 }
@@ -84,11 +99,6 @@
   }
 }
 
-static inline bool isInterestingPointer(Value *V) {
-  return V->getType()->isPointerTy()
-      && !isa<ConstantPointerNull>(V);
-}
-
 PreservedAnalyses AAEvaluator::run(Function &F, FunctionAnalysisManager &AM) {
   runInternal(F, AM.getResult<AAManager>(F));
   return PreservedAnalyses::all();
@@ -99,38 +109,21 @@
 
   ++FunctionCount;
 
-  SetVector<Value *> Pointers;
+  SetVector<std::pair<const Value *, Type *>> Pointers;
   SmallSetVector<CallBase *, 16> Calls;
   SetVector<Value *> Loads;
   SetVector<Value *> Stores;
 
-  for (auto &I : F.args())
-    if (I.getType()->isPointerTy())    // Add all pointer arguments.
-      Pointers.insert(&I);
-
   for (Instruction &Inst : instructions(F)) {
-    if (Inst.getType()->isPointerTy()) // Add all pointer instructions.
-      Pointers.insert(&Inst);
-    if (EvalAAMD && isa<LoadInst>(&Inst))
-      Loads.insert(&Inst);
-    if (EvalAAMD && isa<StoreInst>(&Inst))
-      Stores.insert(&Inst);
-    if (auto *Call = dyn_cast<CallBase>(&Inst)) {
-      Value *Callee = Call->getCalledOperand();
-      // Skip actual functions for direct function calls.
-      if (!isa<Function>(Callee) && isInterestingPointer(Callee))
-        Pointers.insert(Callee);
-      // Consider formals.
-      for (Use &DataOp : Call->data_ops())
-        if (isInterestingPointer(DataOp))
-          Pointers.insert(DataOp);
-      Calls.insert(Call);
-    } else {
-      // Consider all operands.
-      for (Use &Op : Inst.operands())
-        if (isInterestingPointer(Op))
-          Pointers.insert(Op);
-    }
+    if (auto *LI = dyn_cast<LoadInst>(&Inst)) {
+      Pointers.insert({LI->getPointerOperand(), LI->getType()});
+      Loads.insert(LI);
+    } else if (auto *SI = dyn_cast<StoreInst>(&Inst)) {
+      Pointers.insert({SI->getPointerOperand(),
+                       SI->getValueOperand()->getType()});
+      Stores.insert(SI);
+    } else if (auto *CB = dyn_cast<CallBase>(&Inst))
+      Calls.insert(CB);
   }
 
   if (PrintAll || PrintNoAlias || PrintMayAlias || PrintPartialAlias ||
@@ -139,20 +132,12 @@
            << " pointers, " << Calls.size() << " call sites\n";
 
   // iterate over the worklist, and run the full (n^2)/2 disambiguations
-  for (SetVector<Value *>::iterator I1 = Pointers.begin(), E = Pointers.end();
-       I1 != E; ++I1) {
-    auto I1Size = LocationSize::afterPointer();
-    Type *I1ElTy = (*I1)->getType()->getPointerElementType();
-    if (I1ElTy->isSized())
-      I1Size = LocationSize::precise(DL.getTypeStoreSize(I1ElTy));
-
-    for (SetVector<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) {
-      auto I2Size = LocationSize::afterPointer();
-      Type *I2ElTy = (*I2)->getType()->getPointerElementType();
-      if (I2ElTy->isSized())
-        I2Size = LocationSize::precise(DL.getTypeStoreSize(I2ElTy));
-
-      AliasResult AR = AA.alias(*I1, I1Size, *I2, I2Size);
+  for (auto I1 = Pointers.begin(), E = Pointers.end(); I1 != E; ++I1) {
+    LocationSize Size1 = LocationSize::precise(DL.getTypeStoreSize(I1->second));
+    for (auto I2 = Pointers.begin(); I2 != I1; ++I2) {
+      LocationSize Size2 =
+          LocationSize::precise(DL.getTypeStoreSize(I2->second));
+      AliasResult AR = AA.alias(I1->first, Size1, I2->first, Size2);
       switch (AR) {
       case AliasResult::NoAlias:
         PrintResults(AR, PrintNoAlias, *I1, *I2, F.getParent());
@@ -231,13 +216,10 @@
 
   // Mod/ref alias analysis: compare all pairs of calls and values
   for (CallBase *Call : Calls) {
-    for (auto Pointer : Pointers) {
-      auto Size = LocationSize::afterPointer();
-      Type *ElTy = Pointer->getType()->getPointerElementType();
-      if (ElTy->isSized())
-        Size = LocationSize::precise(DL.getTypeStoreSize(ElTy));
-
-      switch (AA.getModRefInfo(Call, Pointer, Size)) {
+    for (const auto &Pointer : Pointers) {
+      LocationSize Size =
+          LocationSize::precise(DL.getTypeStoreSize(Pointer.second));
+      switch (AA.getModRefInfo(Call, Pointer.first, Size)) {
       case ModRefInfo::NoModRef:
         PrintModRefResults("NoModRef", PrintNoModRef, Call, Pointer,
                            F.getParent());

diff --git a/src/llvm-project/llvm/lib/Analysis/AliasSetTracker.cpp b/src/llvm-project/llvm/lib/Analysis/AliasSetTracker.cpp
index 288928a..bb25244 100644
--- a/src/llvm-project/llvm/lib/Analysis/AliasSetTracker.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/AliasSetTracker.cpp

@@ -13,16 +13,12 @@
 #include "llvm/Analysis/AliasSetTracker.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/GuardUtils.h"
-#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryLocation.h"
 #include "llvm/Config/llvm-config.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Value.h"
@@ -258,31 +254,6 @@
   return false;
 }
 
-Instruction* AliasSet::getUniqueInstruction() {
-  if (AliasAny)
-    // May have collapses alias set
-    return nullptr;
-  if (begin() != end()) {
-    if (!UnknownInsts.empty())
-      // Another instruction found
-      return nullptr;
-    if (std::next(begin()) != end())
-      // Another instruction found
-      return nullptr;
-    Value *Addr = begin()->getValue();
-    assert(!Addr->user_empty() &&
-           "where's the instruction which added this pointer?");
-    if (std::next(Addr->user_begin()) != Addr->user_end())
-      // Another instruction found -- this is really restrictive
-      // TODO: generalize!
-      return nullptr;
-    return cast<Instruction>(*(Addr->user_begin()));
-  }
-  if (1 != UnknownInsts.size())
-    return nullptr;
-  return cast<Instruction>(UnknownInsts[0]);
-}
-
 void AliasSetTracker::clear() {
   // Delete all the PointerRec entries.
   for (auto &I : PointerMap)
@@ -608,7 +579,7 @@
   AliasAnyAS->Access = AliasSet::ModRefAccess;
   AliasAnyAS->AliasAny = true;
 
-  for (auto Cur : ASVector) {
+  for (auto *Cur : ASVector) {
     // If Cur was already forwarding, just forward to the new AS instead.
     AliasSet *FwdTo = Cur->Forward;
     if (FwdTo) {

diff --git a/src/llvm-project/llvm/lib/Analysis/Analysis.cpp b/src/llvm-project/llvm/lib/Analysis/Analysis.cpp
index 177f38a..460dddc 100644
--- a/src/llvm-project/llvm/lib/Analysis/Analysis.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/Analysis.cpp

@@ -40,14 +40,14 @@
   initializeDelinearizationPass(Registry);
   initializeDemandedBitsWrapperPassPass(Registry);
   initializeDominanceFrontierWrapperPassPass(Registry);
-  initializeDomViewerPass(Registry);
-  initializeDomPrinterPass(Registry);
-  initializeDomOnlyViewerPass(Registry);
-  initializePostDomViewerPass(Registry);
-  initializeDomOnlyPrinterPass(Registry);
-  initializePostDomPrinterPass(Registry);
-  initializePostDomOnlyViewerPass(Registry);
-  initializePostDomOnlyPrinterPass(Registry);
+  initializeDomViewerWrapperPassPass(Registry);
+  initializeDomPrinterWrapperPassPass(Registry);
+  initializeDomOnlyViewerWrapperPassPass(Registry);
+  initializePostDomViewerWrapperPassPass(Registry);
+  initializeDomOnlyPrinterWrapperPassPass(Registry);
+  initializePostDomPrinterWrapperPassPass(Registry);
+  initializePostDomOnlyViewerWrapperPassPass(Registry);
+  initializePostDomOnlyPrinterWrapperPassPass(Registry);
   initializeAAResultsWrapperPassPass(Registry);
   initializeGlobalsAAWrapperPassPass(Registry);
   initializeIVUsersWrapperPassPass(Registry);

diff --git a/src/llvm-project/llvm/lib/Analysis/AssumeBundleQueries.cpp b/src/llvm-project/llvm/lib/Analysis/AssumeBundleQueries.cpp
index 9d4fe12..7440dbd 100644
--- a/src/llvm-project/llvm/lib/Analysis/AssumeBundleQueries.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/AssumeBundleQueries.cpp

@@ -10,8 +10,8 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/Support/DebugCounter.h"

diff --git a/src/llvm-project/llvm/lib/Analysis/AssumptionCache.cpp b/src/llvm-project/llvm/lib/Analysis/AssumptionCache.cpp
index 3e0214e..e7e476d 100644
--- a/src/llvm-project/llvm/lib/Analysis/AssumptionCache.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/AssumptionCache.cpp

@@ -11,18 +11,17 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Analysis/AssumeBundleQueries.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AssumeBundleQueries.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/InitializePasses.h"
@@ -31,7 +30,6 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include <algorithm>
 #include <cassert>
 #include <utility>
 

diff --git a/src/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/src/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index 0a0b537..c3b032a 100644
--- a/src/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp

@@ -22,7 +22,6 @@
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/CaptureTracking.h"
-#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/MemoryLocation.h"
 #include "llvm/Analysis/PhiValues.h"
@@ -45,7 +44,6 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/Metadata.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/User.h"
@@ -105,29 +103,6 @@
 // Useful predicates
 //===----------------------------------------------------------------------===//
 
-/// Returns true if the pointer is one which would have been considered an
-/// escape by isNonEscapingLocalObject.
-static bool isEscapeSource(const Value *V) {
-  if (isa<CallBase>(V))
-    return true;
-
-  // The load case works because isNonEscapingLocalObject considers all
-  // stores to be escapes (it passes true for the StoreCaptures argument
-  // to PointerMayBeCaptured).
-  if (isa<LoadInst>(V))
-    return true;
-
-  // The inttoptr case works because isNonEscapingLocalObject considers all
-  // means of converting or equating a pointer to an int (ptrtoint, ptr store
-  // which could be followed by an integer load, ptr<->int compare) as
-  // escaping, and objects located at well-known addresses via platform-specific
-  // means cannot be considered non-escaping local objects.
-  if (isa<IntToPtrInst>(V))
-    return true;
-
-  return false;
-}
-
 /// Returns the size of the object specified by V or UnknownSize if unknown.
 static uint64_t getObjectSize(const Value *V, const DataLayout &DL,
                               const TargetLibraryInfo &TLI,
@@ -234,7 +209,7 @@
   if (Iter.second) {
     Instruction *EarliestCapture = FindEarliestCapture(
         Object, *const_cast<Function *>(I->getFunction()),
-        /*ReturnCaptures=*/false, /*StoreCaptures=*/true, DT);
+        /*ReturnCaptures=*/false, /*StoreCaptures=*/true, DT, EphValues);
     if (EarliestCapture) {
       auto Ins = Inst2Obj.insert({EarliestCapture, {}});
       Ins.first->second.push_back(Object);
@@ -661,8 +636,8 @@
       unsigned TypeSize =
           DL.getTypeAllocSize(GTI.getIndexedType()).getFixedSize();
       LE = LE.mul(APInt(IndexSize, TypeSize), GEPOp->isInBounds());
-      Decomposed.Offset += LE.Offset.sextOrSelf(MaxIndexSize);
-      APInt Scale = LE.Scale.sextOrSelf(MaxIndexSize);
+      Decomposed.Offset += LE.Offset.sext(MaxIndexSize);
+      APInt Scale = LE.Scale.sext(MaxIndexSize);
 
       // If we already had an occurrence of this index variable, merge this
       // scale into it.  For example, we want to handle:
@@ -1299,8 +1274,31 @@
     const VariableGEPIndex &Var = DecompGEP1.VarIndices[0];
     if (Var.Val.TruncBits == 0 &&
         isKnownNonZero(Var.Val.V, DL, 0, &AC, Var.CxtI, DT)) {
-      // If V != 0 then abs(VarIndex) >= abs(Scale).
-      MinAbsVarIndex = Var.Scale.abs();
+      // If V != 0, then abs(VarIndex) > 0.
+      MinAbsVarIndex = APInt(Var.Scale.getBitWidth(), 1);
+
+      // Check if abs(V*Scale) >= abs(Scale) holds in the presence of
+      // potentially wrapping math.
+      auto MultiplyByScaleNoWrap = [](const VariableGEPIndex &Var) {
+        if (Var.IsNSW)
+          return true;
+
+        int ValOrigBW = Var.Val.V->getType()->getPrimitiveSizeInBits();
+        // If Scale is small enough so that abs(V*Scale) >= abs(Scale) holds.
+        // The max value of abs(V) is 2^ValOrigBW - 1. Multiplying with a
+        // constant smaller than 2^(bitwidth(Val) - ValOrigBW) won't wrap.
+        int MaxScaleValueBW = Var.Val.getBitWidth() - ValOrigBW;
+        if (MaxScaleValueBW <= 0)
+          return false;
+        return Var.Scale.ule(
+            APInt::getMaxValue(MaxScaleValueBW).zext(Var.Scale.getBitWidth()));
+      };
+      // Refine MinAbsVarIndex, if abs(Scale*V) >= abs(Scale) holds in the
+      // presence of potentially wrapping math.
+      if (MultiplyByScaleNoWrap(Var)) {
+        // If V != 0 then abs(VarIndex) >= abs(Scale).
+        MinAbsVarIndex = Var.Scale.abs();
+      }
     }
   } else if (DecompGEP1.VarIndices.size() == 2) {
     // VarIndex = Scale*V0 + (-Scale)*V1.
@@ -1370,15 +1368,15 @@
 
   // If both arms of the Select node NoAlias or MustAlias V2, then returns
   // NoAlias / MustAlias. Otherwise, returns MayAlias.
-  AliasResult Alias = getBestAAResults().alias(
-      MemoryLocation(V2, V2Size),
-      MemoryLocation(SI->getTrueValue(), SISize), AAQI);
+  AliasResult Alias =
+      getBestAAResults().alias(MemoryLocation(SI->getTrueValue(), SISize),
+                               MemoryLocation(V2, V2Size), AAQI);
   if (Alias == AliasResult::MayAlias)
     return AliasResult::MayAlias;
 
-  AliasResult ThisAlias = getBestAAResults().alias(
-      MemoryLocation(V2, V2Size),
-      MemoryLocation(SI->getFalseValue(), SISize), AAQI);
+  AliasResult ThisAlias =
+      getBestAAResults().alias(MemoryLocation(SI->getFalseValue(), SISize),
+                               MemoryLocation(V2, V2Size), AAQI);
   return MergeAliasResults(ThisAlias, Alias);
 }
 
@@ -1500,8 +1498,7 @@
   AAQueryInfo *UseAAQI = BlockInserted ? &NewAAQI : &AAQI;
 
   AliasResult Alias = getBestAAResults().alias(
-      MemoryLocation(V2, V2Size),
-      MemoryLocation(V1Srcs[0], PNSize), *UseAAQI);
+      MemoryLocation(V1Srcs[0], PNSize), MemoryLocation(V2, V2Size), *UseAAQI);
 
   // Early exit if the check of the first PHI source against V2 is MayAlias.
   // Other results are not possible.
@@ -1518,7 +1515,7 @@
     Value *V = V1Srcs[i];
 
     AliasResult ThisAlias = getBestAAResults().alias(
-        MemoryLocation(V2, V2Size), MemoryLocation(V, PNSize), *UseAAQI);
+        MemoryLocation(V, PNSize), MemoryLocation(V2, V2Size), *UseAAQI);
     Alias = MergeAliasResults(ThisAlias, Alias);
     if (Alias == AliasResult::MayAlias)
       break;
@@ -1767,7 +1764,7 @@
   // Make sure that the visited phis cannot reach the Value. This ensures that
   // the Values cannot come from different iterations of a potential cycle the
   // phi nodes could be involved in.
-  for (auto *P : VisitedPhiBBs)
+  for (const auto *P : VisitedPhiBBs)
     if (isPotentiallyReachable(&P->front(), Inst, nullptr, DT))
       return false;
 

diff --git a/src/llvm-project/llvm/lib/Analysis/BlockFrequencyInfo.cpp b/src/llvm-project/llvm/lib/Analysis/BlockFrequencyInfo.cpp
index b464071..436b017 100644
--- a/src/llvm-project/llvm/lib/Analysis/BlockFrequencyInfo.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/BlockFrequencyInfo.cpp

@@ -25,7 +25,6 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/GraphWriter.h"
 #include "llvm/Support/raw_ostream.h"
-#include <algorithm>
 #include <cassert>
 #include <string>
 

diff --git a/src/llvm-project/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp b/src/llvm-project/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
index 2a5e1f6..ec8d318 100644
--- a/src/llvm-project/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp

@@ -13,7 +13,6 @@
 #include "llvm/Analysis/BlockFrequencyInfoImpl.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/GraphTraits.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/SCCIterator.h"
 #include "llvm/Config/llvm-config.h"
@@ -22,8 +21,8 @@
 #include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/ScaledNumber.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ScaledNumber.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
@@ -48,7 +47,7 @@
              "for debugging missed BFI updates"));
 
 cl::opt<bool> UseIterativeBFIInference(
-    "use-iterative-bfi-inference", cl::init(false), cl::Hidden, cl::ZeroOrMore,
+    "use-iterative-bfi-inference", cl::Hidden,
     cl::desc("Apply an iterative post-processing to infer correct BFI counts"));
 
 cl::opt<unsigned> IterativeBFIMaxIterationsPerBlock(

diff --git a/src/llvm-project/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/src/llvm-project/llvm/lib/Analysis/BranchProbabilityInfo.cpp
index ffb8013..f457287 100644
--- a/src/llvm-project/llvm/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/BranchProbabilityInfo.cpp

@@ -15,6 +15,7 @@
 #include "llvm/ADT/SCCIterator.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/PostDominators.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
@@ -414,8 +415,7 @@
     const LoopBlock DstLoopBB = getLoopBlock(TI->getSuccessor(I - 1));
     auto EstimatedWeight = getEstimatedEdgeWeight({SrcLoopBB, DstLoopBB});
     if (EstimatedWeight &&
-        EstimatedWeight.getValue() <=
-            static_cast<uint32_t>(BlockExecWeight::UNREACHABLE))
+        *EstimatedWeight <= static_cast<uint32_t>(BlockExecWeight::UNREACHABLE))
       UnreachableIdxs.push_back(I - 1);
     else
       ReachableIdxs.push_back(I - 1);
@@ -630,9 +630,10 @@
       if (!CmpLHSConst || !llvm::is_contained(successors(BB), B))
         continue;
       // First collapse InstChain
+      const DataLayout &DL = BB->getModule()->getDataLayout();
       for (Instruction *I : llvm::reverse(InstChain)) {
-        CmpLHSConst = ConstantExpr::get(I->getOpcode(), CmpLHSConst,
-                                        cast<Constant>(I->getOperand(1)), true);
+        CmpLHSConst = ConstantFoldBinaryOpOperands(
+            I->getOpcode(), CmpLHSConst, cast<Constant>(I->getOperand(1)), DL);
         if (!CmpLHSConst)
           break;
       }
@@ -688,7 +689,7 @@
     if (!Weight)
       return None;
 
-    if (!MaxWeight || MaxWeight.getValue() < Weight.getValue())
+    if (!MaxWeight || *MaxWeight < *Weight)
       MaxWeight = Weight;
   }
 
@@ -827,9 +828,8 @@
     if (auto BBWeight = getInitialEstimatedBlockWeight(BB))
       // If we were able to find estimated weight for the block set it to this
       // block and propagate up the IR.
-      propagateEstimatedBlockWeight(getLoopBlock(BB), DT, PDT,
-                                    BBWeight.getValue(), BlockWorkList,
-                                    LoopWorkList);
+      propagateEstimatedBlockWeight(getLoopBlock(BB), DT, PDT, BBWeight.value(),
+                                    BlockWorkList, LoopWorkList);
 
   // BlockWorklist/LoopWorkList contains blocks/loops with at least one
   // successor/exit having estimated weight. Try to propagate weight to such
@@ -852,8 +852,7 @@
         if (LoopWeight <= static_cast<uint32_t>(BlockExecWeight::UNREACHABLE))
           LoopWeight = static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO);
 
-        EstimatedLoopWeight.insert(
-            {LoopBB.getLoopData(), LoopWeight.getValue()});
+        EstimatedLoopWeight.insert({LoopBB.getLoopData(), *LoopWeight});
         // Add all blocks entering the loop into working list.
         getLoopEnterBlocks(LoopBB, BlockWorkList);
       }
@@ -875,7 +874,7 @@
       auto MaxWeight = getMaxEstimatedEdgeWeight(LoopBB, successors(BB));
 
       if (MaxWeight)
-        propagateEstimatedBlockWeight(LoopBB, DT, PDT, MaxWeight.getValue(),
+        propagateEstimatedBlockWeight(LoopBB, DT, PDT, *MaxWeight,
                                       BlockWorkList, LoopWorkList);
     }
   } while (!BlockWorkList.empty() || !LoopWorkList.empty());
@@ -913,7 +912,7 @@
       // Scale down loop exiting weight by trip count.
       Weight = std::max(
           static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO),
-          Weight.getValueOr(static_cast<uint32_t>(BlockExecWeight::DEFAULT)) /
+          Weight.value_or(static_cast<uint32_t>(BlockExecWeight::DEFAULT)) /
               TC);
     }
     bool IsUnlikelyEdge = LoopBB.getLoop() && UnlikelyBlocks.contains(SuccBB);
@@ -923,15 +922,14 @@
       // 'Unlikely' blocks have twice lower weight.
       Weight = std::max(
           static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO),
-          Weight.getValueOr(static_cast<uint32_t>(BlockExecWeight::DEFAULT)) /
-              2);
+          Weight.value_or(static_cast<uint32_t>(BlockExecWeight::DEFAULT)) / 2);
     }
 
     if (Weight)
       FoundEstimatedWeight = true;
 
     auto WeightVal =
-        Weight.getValueOr(static_cast<uint32_t>(BlockExecWeight::DEFAULT));
+        Weight.value_or(static_cast<uint32_t>(BlockExecWeight::DEFAULT));
     TotalWeight += WeightVal;
     SuccWeights.push_back(WeightVal);
   }
@@ -1252,7 +1250,7 @@
 
   // Walk the basic blocks in post-order so that we can build up state about
   // the successors of a block iteratively.
-  for (auto BB : post_order(&F.getEntryBlock())) {
+  for (const auto *BB : post_order(&F.getEntryBlock())) {
     LLVM_DEBUG(dbgs() << "Computing probabilities for " << BB->getName()
                       << "\n");
     // If there is no at least two successors, no sense to set probability.

diff --git a/src/llvm-project/llvm/lib/Analysis/CFG.cpp b/src/llvm-project/llvm/lib/Analysis/CFG.cpp
index ec25ee1..e5dd458 100644
--- a/src/llvm-project/llvm/lib/Analysis/CFG.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/CFG.cpp

@@ -127,11 +127,7 @@
 // the outermost loop in the loop nest that contains BB.
 static const Loop *getOutermostLoop(const LoopInfo *LI, const BasicBlock *BB) {
   const Loop *L = LI->getLoopFor(BB);
-  if (L) {
-    while (const Loop *Parent = L->getParentLoop())
-      L = Parent;
-  }
-  return L;
+  return L ? L->getOutermostLoop() : nullptr;
 }
 
 bool llvm::isPotentiallyReachableFromMany(
@@ -153,7 +149,7 @@
   // untrue.
   SmallPtrSet<const Loop *, 8> LoopsWithHoles;
   if (LI && ExclusionSet) {
-    for (auto BB : *ExclusionSet) {
+    for (auto *BB : *ExclusionSet) {
       if (const Loop *L = getOutermostLoop(LI, BB))
         LoopsWithHoles.insert(L);
     }

diff --git a/src/llvm-project/llvm/lib/Analysis/CFGPrinter.cpp b/src/llvm-project/llvm/lib/Analysis/CFGPrinter.cpp
index 04ccdc5..f8eba1a 100644
--- a/src/llvm-project/llvm/lib/Analysis/CFGPrinter.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/CFGPrinter.cpp

@@ -23,7 +23,7 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileSystem.h"
-#include <algorithm>
+#include "llvm/Support/GraphWriter.h"
 
 using namespace llvm;
 

diff --git a/src/llvm-project/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp b/src/llvm-project/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp
index 1216d03..d70e1b2 100644
--- a/src/llvm-project/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp

@@ -615,7 +615,7 @@
       auto Src = InstantiatedValue{Val, I};
       // If there's an assignment edge from X to Y, it means Y is reachable from
       // X at S3 and X is reachable from Y at S1
-      for (auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges) {
+      for (const auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges) {
         propagate(Edge.Other, Src, MatchState::FlowFromReadOnly, ReachSet,
                   WorkList);
         propagate(Src, Edge.Other, MatchState::FlowToWriteOnly, ReachSet,
@@ -831,14 +831,14 @@
     scan(Fn);
     Iter = Cache.find(&Fn);
     assert(Iter != Cache.end());
-    assert(Iter->second.hasValue());
+    assert(Iter->second);
   }
   return Iter->second;
 }
 
 const AliasSummary *CFLAndersAAResult::getAliasSummary(const Function &Fn) {
   auto &FunInfo = ensureCached(Fn);
-  if (FunInfo.hasValue())
+  if (FunInfo)
     return &FunInfo->getAliasSummary();
   else
     return nullptr;

diff --git a/src/llvm-project/llvm/lib/Analysis/CFLGraph.h b/src/llvm-project/llvm/lib/Analysis/CFLGraph.h
index 02a13d6..47bb02a 100644
--- a/src/llvm-project/llvm/lib/Analysis/CFLGraph.h
+++ b/src/llvm-project/llvm/lib/Analysis/CFLGraph.h

@@ -403,7 +403,7 @@
         auto &RetParamRelations = Summary->RetParamRelations;
         for (auto &Relation : RetParamRelations) {
           auto IRelation = instantiateExternalRelation(Relation, Call);
-          if (IRelation.hasValue()) {
+          if (IRelation) {
             Graph.addNode(IRelation->From);
             Graph.addNode(IRelation->To);
             Graph.addEdge(IRelation->From, IRelation->To);
@@ -413,7 +413,7 @@
         auto &RetParamAttributes = Summary->RetParamAttributes;
         for (auto &Attribute : RetParamAttributes) {
           auto IAttr = instantiateExternalAttribute(Attribute, Call);
-          if (IAttr.hasValue())
+          if (IAttr)
             Graph.addNode(IAttr->IValue, IAttr->Attr);
         }
       }
@@ -434,7 +434,8 @@
       // introduce any aliases.
       // TODO: address other common library functions such as realloc(),
       // strdup(), etc.
-      if (isMallocOrCallocLikeFn(&Call, &TLI) || isFreeCall(&Call, &TLI))
+      if (isMallocOrCallocLikeFn(&Call, &TLI) ||
+          getFreedOperand(&Call, &TLI) != nullptr)
         return;
 
       // TODO: Add support for noalias args/all the other fun function

diff --git a/src/llvm-project/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp b/src/llvm-project/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp
index 090dccc..33ed6f8 100644
--- a/src/llvm-project/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp

@@ -165,7 +165,7 @@
     assert(RetVal != nullptr);
     assert(RetVal->getType()->isPointerTy());
     auto RetInfo = Sets.find(InstantiatedValue{RetVal, 0});
-    if (RetInfo.hasValue())
+    if (RetInfo)
       AddToRetParamRelations(0, RetInfo->Index);
   }
 
@@ -174,7 +174,7 @@
   for (auto &Param : Fn.args()) {
     if (Param.getType()->isPointerTy()) {
       auto ParamInfo = Sets.find(InstantiatedValue{&Param, 0});
-      if (ParamInfo.hasValue())
+      if (ParamInfo)
         AddToRetParamRelations(I + 1, ParamInfo->Index);
     }
     ++I;
@@ -216,7 +216,7 @@
 
     for (unsigned I = 0, E = ValueInfo.getNumLevels(); I < E; ++I) {
       auto Src = InstantiatedValue{Val, I};
-      for (auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges)
+      for (const auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges)
         SetBuilder.addWith(Src, Edge.Other);
     }
   }
@@ -250,14 +250,14 @@
     scan(Fn);
     Iter = Cache.find(Fn);
     assert(Iter != Cache.end());
-    assert(Iter->second.hasValue());
+    assert(Iter->second);
   }
   return Iter->second;
 }
 
 const AliasSummary *CFLSteensAAResult::getAliasSummary(Function &Fn) {
   auto &FunInfo = ensureCached(&Fn);
-  if (FunInfo.hasValue())
+  if (FunInfo)
     return &FunInfo->getAliasSummary();
   else
     return nullptr;
@@ -293,15 +293,15 @@
 
   assert(Fn != nullptr);
   auto &MaybeInfo = ensureCached(Fn);
-  assert(MaybeInfo.hasValue());
+  assert(MaybeInfo);
 
   auto &Sets = MaybeInfo->getStratifiedSets();
   auto MaybeA = Sets.find(InstantiatedValue{ValA, 0});
-  if (!MaybeA.hasValue())
+  if (!MaybeA)
     return AliasResult::MayAlias;
 
   auto MaybeB = Sets.find(InstantiatedValue{ValB, 0});
-  if (!MaybeB.hasValue())
+  if (!MaybeB)
     return AliasResult::MayAlias;
 
   auto SetA = *MaybeA;

diff --git a/src/llvm-project/llvm/lib/Analysis/CGSCCPassManager.cpp b/src/llvm-project/llvm/lib/Analysis/CGSCCPassManager.cpp
index c60b70a..b2e7422 100644
--- a/src/llvm-project/llvm/lib/Analysis/CGSCCPassManager.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/CGSCCPassManager.cpp

@@ -9,6 +9,7 @@
 #include "llvm/Analysis/CGSCCPassManager.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/ADT/PriorityWorklist.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -27,7 +28,6 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TimeProfiler.h"
 #include "llvm/Support/raw_ostream.h"
-#include <algorithm>
 #include <cassert>
 #include <iterator>
 
@@ -164,9 +164,9 @@
       InlinedInternalEdges;
 
   CGSCCUpdateResult UR = {
-      RCWorklist, CWorklist, InvalidRefSCCSet,         InvalidSCCSet,
-      nullptr,    nullptr,   PreservedAnalyses::all(), InlinedInternalEdges,
-      {}};
+      RCWorklist,           CWorklist, InvalidRefSCCSet,
+      InvalidSCCSet,        nullptr,   PreservedAnalyses::all(),
+      InlinedInternalEdges, {}};
 
   // Request PassInstrumentation from analysis manager, will use it to run
   // instrumenting callbacks for the passes later.
@@ -174,9 +174,8 @@
 
   PreservedAnalyses PA = PreservedAnalyses::all();
   CG.buildRefSCCs();
-  for (auto RCI = CG.postorder_ref_scc_begin(),
-            RCE = CG.postorder_ref_scc_end();
-       RCI != RCE;) {
+  for (LazyCallGraph::RefSCC &RC :
+       llvm::make_early_inc_range(CG.postorder_ref_sccs())) {
     assert(RCWorklist.empty() &&
            "Should always start with an empty RefSCC worklist");
     // The postorder_ref_sccs range we are walking is lazily constructed, so
@@ -190,7 +189,7 @@
     //
     // We also eagerly increment the iterator to the next position because
     // the CGSCC passes below may delete the current RefSCC.
-    RCWorklist.insert(&*RCI++);
+    RCWorklist.insert(&RC);
 
     do {
       LazyCallGraph::RefSCC *RC = RCWorklist.pop_back_val();
@@ -230,11 +229,15 @@
           LLVM_DEBUG(dbgs() << "Skipping redundant run on SCC: " << *C << "\n");
           continue;
         }
-        if (&C->getOuterRefSCC() != RC) {
-          LLVM_DEBUG(dbgs() << "Skipping an SCC that is now part of some other "
-                               "RefSCC...\n");
-          continue;
-        }
+        // We used to also check if the current SCC is part of the current
+        // RefSCC and bail if it wasn't, since it should be in RCWorklist.
+        // However, this can cause compile time explosions in some cases on
+        // modules with a huge RefSCC. If a non-trivial amount of SCCs in the
+        // huge RefSCC can become their own child RefSCC, we create one child
+        // RefSCC, bail on the current RefSCC, visit the child RefSCC, revisit
+        // the huge RefSCC, and repeat. By visiting all SCCs in the original
+        // RefSCC we create all the child RefSCCs in one pass of the RefSCC,
+        // rather one pass of the RefSCC creating one child RefSCC at a time.
 
         // Ensure we can proxy analysis updates from the CGSCC analysis manager
         // into the the Function analysis manager by getting a proxy here.
@@ -264,11 +267,8 @@
           // Check that we didn't miss any update scenario.
           assert(!InvalidSCCSet.count(C) && "Processing an invalid SCC!");
           assert(C->begin() != C->end() && "Cannot have an empty SCC!");
-          assert(&C->getOuterRefSCC() == RC &&
-                 "Processing an SCC in a different RefSCC!");
 
           LastUpdatedC = UR.UpdatedC;
-          UR.UpdatedRC = nullptr;
           UR.UpdatedC = nullptr;
 
           // Check the PassInstrumentation's BeforePass callbacks before
@@ -290,7 +290,6 @@
 
           // Update the SCC and RefSCC if necessary.
           C = UR.UpdatedC ? UR.UpdatedC : C;
-          RC = UR.UpdatedRC ? UR.UpdatedRC : RC;
 
           if (UR.UpdatedC) {
             // If we're updating the SCC, also update the FAM inside the proxy's
@@ -1213,10 +1212,8 @@
   assert(!UR.InvalidatedRefSCCs.count(RC) && "Invalidated the current RefSCC!");
   assert(&C->getOuterRefSCC() == RC && "Current SCC not in current RefSCC!");
 
-  // Record the current RefSCC and SCC for higher layers of the CGSCC pass
-  // manager now that all the updates have been applied.
-  if (RC != &InitialRC)
-    UR.UpdatedRC = RC;
+  // Record the current SCC for higher layers of the CGSCC pass manager now that
+  // all the updates have been applied.
   if (C != &InitialC)
     UR.UpdatedC = C;
 

diff --git a/src/llvm-project/llvm/lib/Analysis/CMakeLists.txt b/src/llvm-project/llvm/lib/Analysis/CMakeLists.txt
index aec8412..e59725c 100644
--- a/src/llvm-project/llvm/lib/Analysis/CMakeLists.txt
+++ b/src/llvm-project/llvm/lib/Analysis/CMakeLists.txt

@@ -2,10 +2,7 @@
   include(TensorFlowCompile)
   set(LLVM_INLINER_MODEL_PATH_DEFAULT "models/inliner-Oz")
 
-  # This url points to the most recent most which is known to be compatible with
-  # LLVM. When better models are published, this url should be updated to aid
-  # discoverability.
-  set(LLVM_INLINER_MODEL_CURRENT_URL "https://github.com/google/ml-compiler-opt/releases/download/inlining-Oz-v1.1/inlining-Oz-99f0063-v1.1.tar.gz")
+  set(LLVM_INLINER_MODEL_CURRENT_URL "<UNSPECIFIED>")
 
   if (DEFINED LLVM_HAVE_TF_AOT)
     tf_find_and_compile(
@@ -101,6 +98,7 @@
   MemoryBuiltins.cpp
   MemoryDependenceAnalysis.cpp
   MemoryLocation.cpp
+  MemoryProfileInfo.cpp
   MemorySSA.cpp
   MemorySSAUpdater.cpp
   ModelUnderTrainingRunner.cpp
@@ -131,6 +129,7 @@
   SyncDependenceAnalysis.cpp
   SyntheticCountsUtils.cpp
   TFUtils.cpp
+  TensorSpec.cpp
   TargetLibraryInfo.cpp
   TargetTransformInfo.cpp
   Trace.cpp

diff --git a/src/llvm-project/llvm/lib/Analysis/CallGraph.cpp b/src/llvm-project/llvm/lib/Analysis/CallGraph.cpp
index dfbd29b..f855271 100644
--- a/src/llvm-project/llvm/lib/Analysis/CallGraph.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/CallGraph.cpp

@@ -21,7 +21,6 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include <algorithm>
 #include <cassert>
 
 using namespace llvm;
@@ -70,8 +69,7 @@
   // Check whether the analysis, all analyses on functions, or the function's
   // CFG have been preserved.
   auto PAC = PA.getChecker<CallGraphAnalysis>();
-  return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Module>>() ||
-           PAC.preservedSet<CFGAnalyses>());
+  return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Module>>());
 }
 
 void CallGraph::addToCallGraph(Function *F) {

diff --git a/src/llvm-project/llvm/lib/Analysis/CallGraphSCCPass.cpp b/src/llvm-project/llvm/lib/Analysis/CallGraphSCCPass.cpp
index 930cb13..8438f33 100644
--- a/src/llvm-project/llvm/lib/Analysis/CallGraphSCCPass.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/CallGraphSCCPass.cpp

@@ -28,7 +28,6 @@
 #include "llvm/IR/OptBisect.h"
 #include "llvm/IR/PassTimingInfo.h"
 #include "llvm/IR/PrintPasses.h"
-#include "llvm/IR/StructuralHash.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -271,7 +270,7 @@
           Calls.count(Call) ||
 
           // If the call edge is not from a call or invoke, or it is a
-          // instrinsic call, then the function pass RAUW'd a call with
+          // intrinsic call, then the function pass RAUW'd a call with
           // another value. This can happen when constant folding happens
           // of well known functions etc.
           (Call->getCalledFunction() &&
@@ -470,7 +469,7 @@
     initializeAnalysisImpl(P);
 
 #ifdef EXPENSIVE_CHECKS
-    uint64_t RefHash = StructuralHash(CG.getModule());
+    uint64_t RefHash = P->structuralHash(CG.getModule());
 #endif
 
     // Actually run this pass on the current SCC.
@@ -480,7 +479,7 @@
     Changed |= LocalChanged;
 
 #ifdef EXPENSIVE_CHECKS
-    if (!LocalChanged && (RefHash != StructuralHash(CG.getModule()))) {
+    if (!LocalChanged && (RefHash != P->structuralHash(CG.getModule()))) {
       llvm::errs() << "Pass modifies its input and doesn't report it: "
                    << P->getPassName() << "\n";
       llvm_unreachable("Pass modifies its input and doesn't report it");

diff --git a/src/llvm-project/llvm/lib/Analysis/CallPrinter.cpp b/src/llvm-project/llvm/lib/Analysis/CallPrinter.cpp
index 829532a..65e3184 100644
--- a/src/llvm-project/llvm/lib/Analysis/CallPrinter.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/CallPrinter.cpp

@@ -14,18 +14,23 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/CallPrinter.h"
-#include "llvm/Analysis/BlockFrequencyInfo.h"
-#include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Analysis/DOTGraphTraitsPass.h"
-#include "llvm/Analysis/HeatUtils.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/InitializePasses.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/HeatUtils.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/DOTGraphTraits.h"
+#include "llvm/Support/GraphWriter.h"
 
 using namespace llvm;
 
+namespace llvm {
+template <class GraphType> struct GraphTraits;
+}
+
 // This option shows static (relative) call counts.
 // FIXME:
 // Need to show real counts when profile data is available
@@ -213,6 +218,71 @@
 } // end llvm namespace
 
 namespace {
+void doCallGraphDOTPrinting(
+    Module &M, function_ref<BlockFrequencyInfo *(Function &)> LookupBFI) {
+  std::string Filename;
+  if (!CallGraphDotFilenamePrefix.empty())
+    Filename = (CallGraphDotFilenamePrefix + ".callgraph.dot");
+  else
+    Filename = (std::string(M.getModuleIdentifier()) + ".callgraph.dot");
+  errs() << "Writing '" << Filename << "'...";
+
+  std::error_code EC;
+  raw_fd_ostream File(Filename, EC, sys::fs::OF_Text);
+
+  CallGraph CG(M);
+  CallGraphDOTInfo CFGInfo(&M, &CG, LookupBFI);
+
+  if (!EC)
+    WriteGraph(File, &CFGInfo);
+  else
+    errs() << "  error opening file for writing!";
+  errs() << "\n";
+}
+
+void viewCallGraph(Module &M,
+                   function_ref<BlockFrequencyInfo *(Function &)> LookupBFI) {
+  CallGraph CG(M);
+  CallGraphDOTInfo CFGInfo(&M, &CG, LookupBFI);
+
+  std::string Title =
+      DOTGraphTraits<CallGraphDOTInfo *>::getGraphName(&CFGInfo);
+  ViewGraph(&CFGInfo, "callgraph", true, Title);
+}
+} // namespace
+
+namespace llvm {
+PreservedAnalyses CallGraphDOTPrinterPass::run(Module &M,
+                                               ModuleAnalysisManager &AM) {
+  FunctionAnalysisManager &FAM =
+      AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+
+  auto LookupBFI = [&FAM](Function &F) {
+    return &FAM.getResult<BlockFrequencyAnalysis>(F);
+  };
+
+  doCallGraphDOTPrinting(M, LookupBFI);
+
+  return PreservedAnalyses::all();
+}
+
+PreservedAnalyses CallGraphViewerPass::run(Module &M,
+                                           ModuleAnalysisManager &AM) {
+
+  FunctionAnalysisManager &FAM =
+      AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+
+  auto LookupBFI = [&FAM](Function &F) {
+    return &FAM.getResult<BlockFrequencyAnalysis>(F);
+  };
+
+  viewCallGraph(M, LookupBFI);
+
+  return PreservedAnalyses::all();
+}
+} // namespace llvm
+
+namespace {
 // Viewer
 class CallGraphViewer : public ModulePass {
 public:
@@ -234,12 +304,7 @@
     return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
   };
 
-  CallGraph CG(M);
-  CallGraphDOTInfo CFGInfo(&M, &CG, LookupBFI);
-
-  std::string Title =
-      DOTGraphTraits<CallGraphDOTInfo *>::getGraphName(&CFGInfo);
-  ViewGraph(&CFGInfo, "callgraph", true, Title);
+  viewCallGraph(M, LookupBFI);
 
   return false;
 }
@@ -266,24 +331,7 @@
     return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
   };
 
-  std::string Filename;
-  if (!CallGraphDotFilenamePrefix.empty())
-    Filename = (CallGraphDotFilenamePrefix + ".callgraph.dot");
-  else
-    Filename = (std::string(M.getModuleIdentifier()) + ".callgraph.dot");
-  errs() << "Writing '" << Filename << "'...";
-
-  std::error_code EC;
-  raw_fd_ostream File(Filename, EC, sys::fs::OF_Text);
-
-  CallGraph CG(M);
-  CallGraphDOTInfo CFGInfo(&M, &CG, LookupBFI);
-
-  if (!EC)
-    WriteGraph(File, &CFGInfo);
-  else
-    errs() << "  error opening file for writing!";
-  errs() << "\n";
+  doCallGraphDOTPrinting(M, LookupBFI);
 
   return false;
 }

diff --git a/src/llvm-project/llvm/lib/Analysis/CaptureTracking.cpp b/src/llvm-project/llvm/lib/Analysis/CaptureTracking.cpp
index ba8462e..f4fd660 100644
--- a/src/llvm-project/llvm/lib/Analysis/CaptureTracking.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/CaptureTracking.cpp

@@ -16,6 +16,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
@@ -44,15 +45,15 @@
 /// use it where possible. The caching version can use much higher limit or
 /// don't have this cap at all.
 static cl::opt<unsigned>
-DefaultMaxUsesToExplore("capture-tracking-max-uses-to-explore", cl::Hidden,
-                        cl::desc("Maximal number of uses to explore."),
-                        cl::init(20));
+    DefaultMaxUsesToExplore("capture-tracking-max-uses-to-explore", cl::Hidden,
+                            cl::desc("Maximal number of uses to explore."),
+                            cl::init(100));
 
 unsigned llvm::getDefaultMaxUsesToExploreForCaptureTracking() {
   return DefaultMaxUsesToExplore;
 }
 
-CaptureTracker::~CaptureTracker() {}
+CaptureTracker::~CaptureTracker() = default;
 
 bool CaptureTracker::shouldExplore(const Use *U) { return true; }
 
@@ -74,8 +75,10 @@
 
 namespace {
   struct SimpleCaptureTracker : public CaptureTracker {
-    explicit SimpleCaptureTracker(bool ReturnCaptures)
-        : ReturnCaptures(ReturnCaptures) {}
+    explicit SimpleCaptureTracker(
+
+        const SmallPtrSetImpl<const Value *> &EphValues, bool ReturnCaptures)
+        : EphValues(EphValues), ReturnCaptures(ReturnCaptures) {}
 
     void tooManyUses() override { Captured = true; }
 
@@ -83,10 +86,15 @@
       if (isa<ReturnInst>(U->getUser()) && !ReturnCaptures)
         return false;
 
+      if (EphValues.contains(U->getUser()))
+        return false;
+
       Captured = true;
       return true;
     }
 
+    const SmallPtrSetImpl<const Value *> &EphValues;
+
     bool ReturnCaptures;
 
     bool Captured = false;
@@ -154,8 +162,9 @@
   // escape are not in a cycle.
   struct EarliestCaptures : public CaptureTracker {
 
-    EarliestCaptures(bool ReturnCaptures, Function &F, const DominatorTree &DT)
-        : DT(DT), ReturnCaptures(ReturnCaptures), F(F) {}
+    EarliestCaptures(bool ReturnCaptures, Function &F, const DominatorTree &DT,
+                     const SmallPtrSetImpl<const Value *> &EphValues)
+        : EphValues(EphValues), DT(DT), ReturnCaptures(ReturnCaptures), F(F) {}
 
     void tooManyUses() override {
       Captured = true;
@@ -167,6 +176,9 @@
       if (isa<ReturnInst>(I) && !ReturnCaptures)
         return false;
 
+      if (EphValues.contains(I))
+        return false;
+
       if (!EarliestCapture) {
         EarliestCapture = I;
       } else if (EarliestCapture->getParent() == I->getParent()) {
@@ -193,6 +205,8 @@
       return false;
     }
 
+    const SmallPtrSetImpl<const Value *> &EphValues;
+
     Instruction *EarliestCapture = nullptr;
 
     const DominatorTree &DT;
@@ -212,8 +226,18 @@
 /// counts as capturing it or not.  The boolean StoreCaptures specified whether
 /// storing the value (or part of it) into memory anywhere automatically
 /// counts as capturing it or not.
-bool llvm::PointerMayBeCaptured(const Value *V,
-                                bool ReturnCaptures, bool StoreCaptures,
+bool llvm::PointerMayBeCaptured(const Value *V, bool ReturnCaptures,
+                                bool StoreCaptures, unsigned MaxUsesToExplore) {
+  SmallPtrSet<const Value *, 1> Empty;
+  return PointerMayBeCaptured(V, ReturnCaptures, StoreCaptures, Empty,
+                              MaxUsesToExplore);
+}
+
+/// Variant of the above function which accepts a set of Values that are
+/// ephemeral and cannot cause pointers to escape.
+bool llvm::PointerMayBeCaptured(const Value *V, bool ReturnCaptures,
+                                bool StoreCaptures,
+                                const SmallPtrSetImpl<const Value *> &EphValues,
                                 unsigned MaxUsesToExplore) {
   assert(!isa<GlobalValue>(V) &&
          "It doesn't make sense to ask whether a global is captured.");
@@ -224,7 +248,7 @@
   // take advantage of this.
   (void)StoreCaptures;
 
-  SimpleCaptureTracker SCT(ReturnCaptures);
+  SimpleCaptureTracker SCT(EphValues, ReturnCaptures);
   PointerMayBeCaptured(V, &SCT, MaxUsesToExplore);
   if (SCT.Captured)
     ++NumCaptured;
@@ -266,14 +290,16 @@
   return CB.Captured;
 }
 
-Instruction *llvm::FindEarliestCapture(const Value *V, Function &F,
-                                       bool ReturnCaptures, bool StoreCaptures,
-                                       const DominatorTree &DT,
-                                       unsigned MaxUsesToExplore) {
+Instruction *
+llvm::FindEarliestCapture(const Value *V, Function &F, bool ReturnCaptures,
+                          bool StoreCaptures, const DominatorTree &DT,
+
+                          const SmallPtrSetImpl<const Value *> &EphValues,
+                          unsigned MaxUsesToExplore) {
   assert(!isa<GlobalValue>(V) &&
          "It doesn't make sense to ask whether a global is captured.");
 
-  EarliestCaptures CB(ReturnCaptures, F, DT);
+  EarliestCaptures CB(ReturnCaptures, F, DT, EphValues);
   PointerMayBeCaptured(V, &CB, MaxUsesToExplore);
   if (CB.Captured)
     ++NumCapturedBefore;
@@ -282,6 +308,132 @@
   return CB.EarliestCapture;
 }
 
+UseCaptureKind llvm::DetermineUseCaptureKind(
+    const Use &U,
+    function_ref<bool(Value *, const DataLayout &)> IsDereferenceableOrNull) {
+  Instruction *I = cast<Instruction>(U.getUser());
+
+  switch (I->getOpcode()) {
+  case Instruction::Call:
+  case Instruction::Invoke: {
+    auto *Call = cast<CallBase>(I);
+    // Not captured if the callee is readonly, doesn't return a copy through
+    // its return value and doesn't unwind (a readonly function can leak bits
+    // by throwing an exception or not depending on the input value).
+    if (Call->onlyReadsMemory() && Call->doesNotThrow() &&
+        Call->getType()->isVoidTy())
+      return UseCaptureKind::NO_CAPTURE;
+
+    // The pointer is not captured if returned pointer is not captured.
+    // NOTE: CaptureTracking users should not assume that only functions
+    // marked with nocapture do not capture. This means that places like
+    // getUnderlyingObject in ValueTracking or DecomposeGEPExpression
+    // in BasicAA also need to know about this property.
+    if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(Call, true))
+      return UseCaptureKind::PASSTHROUGH;
+
+    // Volatile operations effectively capture the memory location that they
+    // load and store to.
+    if (auto *MI = dyn_cast<MemIntrinsic>(Call))
+      if (MI->isVolatile())
+        return UseCaptureKind::MAY_CAPTURE;
+
+    // Calling a function pointer does not in itself cause the pointer to
+    // be captured.  This is a subtle point considering that (for example)
+    // the callee might return its own address.  It is analogous to saying
+    // that loading a value from a pointer does not cause the pointer to be
+    // captured, even though the loaded value might be the pointer itself
+    // (think of self-referential objects).
+    if (Call->isCallee(&U))
+      return UseCaptureKind::NO_CAPTURE;
+
+    // Not captured if only passed via 'nocapture' arguments.
+    if (Call->isDataOperand(&U) &&
+        !Call->doesNotCapture(Call->getDataOperandNo(&U))) {
+      // The parameter is not marked 'nocapture' - captured.
+      return UseCaptureKind::MAY_CAPTURE;
+    }
+    return UseCaptureKind::NO_CAPTURE;
+  }
+  case Instruction::Load:
+    // Volatile loads make the address observable.
+    if (cast<LoadInst>(I)->isVolatile())
+      return UseCaptureKind::MAY_CAPTURE;
+    return UseCaptureKind::NO_CAPTURE;
+  case Instruction::VAArg:
+    // "va-arg" from a pointer does not cause it to be captured.
+    return UseCaptureKind::NO_CAPTURE;
+  case Instruction::Store:
+    // Stored the pointer - conservatively assume it may be captured.
+    // Volatile stores make the address observable.
+    if (U.getOperandNo() == 0 || cast<StoreInst>(I)->isVolatile())
+      return UseCaptureKind::MAY_CAPTURE;
+    return UseCaptureKind::NO_CAPTURE;
+  case Instruction::AtomicRMW: {
+    // atomicrmw conceptually includes both a load and store from
+    // the same location.
+    // As with a store, the location being accessed is not captured,
+    // but the value being stored is.
+    // Volatile stores make the address observable.
+    auto *ARMWI = cast<AtomicRMWInst>(I);
+    if (U.getOperandNo() == 1 || ARMWI->isVolatile())
+      return UseCaptureKind::MAY_CAPTURE;
+    return UseCaptureKind::NO_CAPTURE;
+  }
+  case Instruction::AtomicCmpXchg: {
+    // cmpxchg conceptually includes both a load and store from
+    // the same location.
+    // As with a store, the location being accessed is not captured,
+    // but the value being stored is.
+    // Volatile stores make the address observable.
+    auto *ACXI = cast<AtomicCmpXchgInst>(I);
+    if (U.getOperandNo() == 1 || U.getOperandNo() == 2 || ACXI->isVolatile())
+      return UseCaptureKind::MAY_CAPTURE;
+    return UseCaptureKind::NO_CAPTURE;
+  }
+  case Instruction::BitCast:
+  case Instruction::GetElementPtr:
+  case Instruction::PHI:
+  case Instruction::Select:
+  case Instruction::AddrSpaceCast:
+    // The original value is not captured via this if the new value isn't.
+    return UseCaptureKind::PASSTHROUGH;
+  case Instruction::ICmp: {
+    unsigned Idx = U.getOperandNo();
+    unsigned OtherIdx = 1 - Idx;
+    if (auto *CPN = dyn_cast<ConstantPointerNull>(I->getOperand(OtherIdx))) {
+      // Don't count comparisons of a no-alias return value against null as
+      // captures. This allows us to ignore comparisons of malloc results
+      // with null, for example.
+      if (CPN->getType()->getAddressSpace() == 0)
+        if (isNoAliasCall(U.get()->stripPointerCasts()))
+          return UseCaptureKind::NO_CAPTURE;
+      if (!I->getFunction()->nullPointerIsDefined()) {
+        auto *O = I->getOperand(Idx)->stripPointerCastsSameRepresentation();
+        // Comparing a dereferenceable_or_null pointer against null cannot
+        // lead to pointer escapes, because if it is not null it must be a
+        // valid (in-bounds) pointer.
+        const DataLayout &DL = I->getModule()->getDataLayout();
+        if (IsDereferenceableOrNull && IsDereferenceableOrNull(O, DL))
+          return UseCaptureKind::NO_CAPTURE;
+      }
+    }
+    // Comparison against value stored in global variable. Given the pointer
+    // does not escape, its value cannot be guessed and stored separately in a
+    // global variable.
+    auto *LI = dyn_cast<LoadInst>(I->getOperand(OtherIdx));
+    if (LI && isa<GlobalVariable>(LI->getPointerOperand()))
+      return UseCaptureKind::NO_CAPTURE;
+    // Otherwise, be conservative. There are crazy ways to capture pointers
+    // using comparisons.
+    return UseCaptureKind::MAY_CAPTURE;
+  }
+  default:
+    // Something else - be conservative and say it is captured.
+    return UseCaptureKind::MAY_CAPTURE;
+  }
+}
+
 void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker,
                                 unsigned MaxUsesToExplore) {
   assert(V->getType()->isPointerTy() && "Capture is for pointers only!");
@@ -293,11 +445,10 @@
   SmallSet<const Use *, 20> Visited;
 
   auto AddUses = [&](const Value *V) {
-    unsigned Count = 0;
     for (const Use &U : V->uses()) {
       // If there are lots of uses, conservatively say that the value
       // is captured to avoid taking too much compile time.
-      if (Count++ >= MaxUsesToExplore) {
+      if (Visited.size()  >= MaxUsesToExplore) {
         Tracker->tooManyUses();
         return false;
       }
@@ -312,144 +463,22 @@
   if (!AddUses(V))
     return;
 
+  auto IsDereferenceableOrNull = [Tracker](Value *V, const DataLayout &DL) {
+    return Tracker->isDereferenceableOrNull(V, DL);
+  };
   while (!Worklist.empty()) {
     const Use *U = Worklist.pop_back_val();
-    Instruction *I = cast<Instruction>(U->getUser());
-
-    switch (I->getOpcode()) {
-    case Instruction::Call:
-    case Instruction::Invoke: {
-      auto *Call = cast<CallBase>(I);
-      // Not captured if the callee is readonly, doesn't return a copy through
-      // its return value and doesn't unwind (a readonly function can leak bits
-      // by throwing an exception or not depending on the input value).
-      if (Call->onlyReadsMemory() && Call->doesNotThrow() &&
-          Call->getType()->isVoidTy())
-        break;
-
-      // The pointer is not captured if returned pointer is not captured.
-      // NOTE: CaptureTracking users should not assume that only functions
-      // marked with nocapture do not capture. This means that places like
-      // getUnderlyingObject in ValueTracking or DecomposeGEPExpression
-      // in BasicAA also need to know about this property.
-      if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(Call,
-                                                                      true)) {
-        if (!AddUses(Call))
-          return;
-        break;
-      }
-
-      // Volatile operations effectively capture the memory location that they
-      // load and store to.
-      if (auto *MI = dyn_cast<MemIntrinsic>(Call))
-        if (MI->isVolatile())
-          if (Tracker->captured(U))
-            return;
-
-      // Calling a function pointer does not in itself cause the pointer to
-      // be captured.  This is a subtle point considering that (for example)
-      // the callee might return its own address.  It is analogous to saying
-      // that loading a value from a pointer does not cause the pointer to be
-      // captured, even though the loaded value might be the pointer itself
-      // (think of self-referential objects).
-      if (Call->isCallee(U))
-        break;
-
-      // Not captured if only passed via 'nocapture' arguments.
-      if (Call->isDataOperand(U) &&
-          !Call->doesNotCapture(Call->getDataOperandNo(U))) {
-        // The parameter is not marked 'nocapture' - captured.
-        if (Tracker->captured(U))
-          return;
-      }
-      break;
-    }
-    case Instruction::Load:
-      // Volatile loads make the address observable.
-      if (cast<LoadInst>(I)->isVolatile())
-        if (Tracker->captured(U))
-          return;
-      break;
-    case Instruction::VAArg:
-      // "va-arg" from a pointer does not cause it to be captured.
-      break;
-    case Instruction::Store:
-      // Stored the pointer - conservatively assume it may be captured.
-      // Volatile stores make the address observable.
-      if (U->getOperandNo() == 0 || cast<StoreInst>(I)->isVolatile())
-        if (Tracker->captured(U))
-          return;
-      break;
-    case Instruction::AtomicRMW: {
-      // atomicrmw conceptually includes both a load and store from
-      // the same location.
-      // As with a store, the location being accessed is not captured,
-      // but the value being stored is.
-      // Volatile stores make the address observable.
-      auto *ARMWI = cast<AtomicRMWInst>(I);
-      if (U->getOperandNo() == 1 || ARMWI->isVolatile())
-        if (Tracker->captured(U))
-          return;
-      break;
-    }
-    case Instruction::AtomicCmpXchg: {
-      // cmpxchg conceptually includes both a load and store from
-      // the same location.
-      // As with a store, the location being accessed is not captured,
-      // but the value being stored is.
-      // Volatile stores make the address observable.
-      auto *ACXI = cast<AtomicCmpXchgInst>(I);
-      if (U->getOperandNo() == 1 || U->getOperandNo() == 2 ||
-          ACXI->isVolatile())
-        if (Tracker->captured(U))
-          return;
-      break;
-    }
-    case Instruction::BitCast:
-    case Instruction::GetElementPtr:
-    case Instruction::PHI:
-    case Instruction::Select:
-    case Instruction::AddrSpaceCast:
-      // The original value is not captured via this if the new value isn't.
-      if (!AddUses(I))
-        return;
-      break;
-    case Instruction::ICmp: {
-      unsigned Idx = U->getOperandNo();
-      unsigned OtherIdx = 1 - Idx;
-      if (auto *CPN = dyn_cast<ConstantPointerNull>(I->getOperand(OtherIdx))) {
-        // Don't count comparisons of a no-alias return value against null as
-        // captures. This allows us to ignore comparisons of malloc results
-        // with null, for example.
-        if (CPN->getType()->getAddressSpace() == 0)
-          if (isNoAliasCall(U->get()->stripPointerCasts()))
-            break;
-        if (!I->getFunction()->nullPointerIsDefined()) {
-          auto *O = I->getOperand(Idx)->stripPointerCastsSameRepresentation();
-          // Comparing a dereferenceable_or_null pointer against null cannot
-          // lead to pointer escapes, because if it is not null it must be a
-          // valid (in-bounds) pointer.
-          if (Tracker->isDereferenceableOrNull(O, I->getModule()->getDataLayout()))
-            break;
-        }
-      }
-      // Comparison against value stored in global variable. Given the pointer
-      // does not escape, its value cannot be guessed and stored separately in a
-      // global variable.
-      auto *LI = dyn_cast<LoadInst>(I->getOperand(OtherIdx));
-      if (LI && isa<GlobalVariable>(LI->getPointerOperand()))
-        break;
-      // Otherwise, be conservative. There are crazy ways to capture pointers
-      // using comparisons.
+    switch (DetermineUseCaptureKind(*U, IsDereferenceableOrNull)) {
+    case UseCaptureKind::NO_CAPTURE:
+      continue;
+    case UseCaptureKind::MAY_CAPTURE:
       if (Tracker->captured(U))
         return;
-      break;
-    }
-    default:
-      // Something else - be conservative and say it is captured.
-      if (Tracker->captured(U))
+      continue;
+    case UseCaptureKind::PASSTHROUGH:
+      if (!AddUses(U->getUser()))
         return;
-      break;
+      continue;
     }
   }
 

diff --git a/src/llvm-project/llvm/lib/Analysis/CmpInstAnalysis.cpp b/src/llvm-project/llvm/lib/Analysis/CmpInstAnalysis.cpp
index 5b95198..20b1df6 100644
--- a/src/llvm-project/llvm/lib/Analysis/CmpInstAnalysis.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/CmpInstAnalysis.cpp

@@ -18,9 +18,7 @@
 
 using namespace llvm;
 
-unsigned llvm::getICmpCode(const ICmpInst *ICI, bool InvertPred) {
-  ICmpInst::Predicate Pred = InvertPred ? ICI->getInversePredicate()
-                                        : ICI->getPredicate();
+unsigned llvm::getICmpCode(CmpInst::Predicate Pred) {
   switch (Pred) {
       // False -> 0
     case ICmpInst::ICMP_UGT: return 1;  // 001
@@ -63,6 +61,18 @@
          (CmpInst::isSigned(P2) && ICmpInst::isEquality(P1));
 }
 
+Constant *llvm::getPredForFCmpCode(unsigned Code, Type *OpTy,
+                                   CmpInst::Predicate &Pred) {
+  Pred = static_cast<FCmpInst::Predicate>(Code);
+  assert(FCmpInst::FCMP_FALSE <= Pred && Pred <= FCmpInst::FCMP_TRUE &&
+         "Unexpected FCmp predicate!");
+  if (Pred == FCmpInst::FCMP_FALSE)
+    return ConstantInt::get(CmpInst::makeCmpResultType(OpTy), 0);
+  if (Pred == FCmpInst::FCMP_TRUE)
+    return ConstantInt::get(CmpInst::makeCmpResultType(OpTy), 1);
+  return nullptr;
+}
+
 bool llvm::decomposeBitTestICmp(Value *LHS, Value *RHS,
                                 CmpInst::Predicate &Pred,
                                 Value *&X, APInt &Mask, bool LookThruTrunc) {

diff --git a/src/llvm-project/llvm/lib/Analysis/CodeMetrics.cpp b/src/llvm-project/llvm/lib/Analysis/CodeMetrics.cpp
index 27c5250..ded842b 100644
--- a/src/llvm-project/llvm/lib/Analysis/CodeMetrics.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/CodeMetrics.cpp

@@ -15,7 +15,6 @@
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Function.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/InstructionCost.h"
@@ -118,13 +117,6 @@
     const BasicBlock *BB, const TargetTransformInfo &TTI,
     const SmallPtrSetImpl<const Value *> &EphValues, bool PrepareForLTO) {
   ++NumBlocks;
-  // Use a proxy variable for NumInsts of type InstructionCost, so that it can
-  // use InstructionCost's arithmetic properties such as saturation when this
-  // feature is added to InstructionCost.
-  // When storing the value back to NumInsts, we can assume all costs are Valid
-  // because the IR should not contain any nodes that cannot be costed. If that
-  // happens the cost-model is broken.
-  InstructionCost NumInstsProxy = NumInsts;
   InstructionCost NumInstsBeforeThisBB = NumInsts;
   for (const Instruction &I : *BB) {
     // Skip ephemeral values.
@@ -141,7 +133,8 @@
         // When preparing for LTO, liberally consider calls as inline
         // candidates.
         if (!Call->isNoInline() && IsLoweredToCall &&
-            ((F->hasInternalLinkage() && F->hasOneUse()) || PrepareForLTO)) {
+            ((F->hasInternalLinkage() && F->hasOneLiveUse()) ||
+             PrepareForLTO)) {
           ++NumInlineCandidates;
         }
 
@@ -184,8 +177,7 @@
       if (InvI->cannotDuplicate())
         notDuplicatable = true;
 
-    NumInstsProxy += TTI.getUserCost(&I, TargetTransformInfo::TCK_CodeSize);
-    NumInsts = *NumInstsProxy.getValue();
+    NumInsts += TTI.getUserCost(&I, TargetTransformInfo::TCK_CodeSize);
   }
 
   if (isa<ReturnInst>(BB->getTerminator()))
@@ -205,6 +197,6 @@
   notDuplicatable |= isa<IndirectBrInst>(BB->getTerminator());
 
   // Remember NumInsts for this BB.
-  InstructionCost NumInstsThisBB = NumInstsProxy - NumInstsBeforeThisBB;
-  NumBBInsts[BB] = *NumInstsThisBB.getValue();
+  InstructionCost NumInstsThisBB = NumInsts - NumInstsBeforeThisBB;
+  NumBBInsts[BB] = NumInstsThisBB;
 }

diff --git a/src/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp b/src/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp
index f6b9551..ae927da 100644
--- a/src/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp

@@ -30,6 +30,7 @@
 #include "llvm/Analysis/VectorUtils.h"
 #include "llvm/Config/config.h"
 #include "llvm/IR/Constant.h"
+#include "llvm/IR/ConstantFold.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
@@ -57,7 +58,6 @@
 #include <cerrno>
 #include <cfenv>
 #include <cmath>
-#include <cstddef>
 #include <cstdint>
 
 using namespace llvm;
@@ -92,7 +92,7 @@
       return ConstantExpr::getBitCast(C, DestTy);
 
     Result <<= BitShift;
-    Result |= ElementCI->getValue().zextOrSelf(Result.getBitWidth());
+    Result |= ElementCI->getValue().zext(Result.getBitWidth());
   }
 
   return nullptr;
@@ -634,6 +634,39 @@
   return ConstantInt::get(IntType->getContext(), ResultVal);
 }
 
+} // anonymous namespace
+
+// If GV is a constant with an initializer read its representation starting
+// at Offset and return it as a constant array of unsigned char.  Otherwise
+// return null.
+Constant *llvm::ReadByteArrayFromGlobal(const GlobalVariable *GV,
+                                        uint64_t Offset) {
+  if (!GV->isConstant() || !GV->hasDefinitiveInitializer())
+    return nullptr;
+
+  const DataLayout &DL = GV->getParent()->getDataLayout();
+  Constant *Init = const_cast<Constant *>(GV->getInitializer());
+  TypeSize InitSize = DL.getTypeAllocSize(Init->getType());
+  if (InitSize < Offset)
+    return nullptr;
+
+  uint64_t NBytes = InitSize - Offset;
+  if (NBytes > UINT16_MAX)
+    // Bail for large initializers in excess of 64K to avoid allocating
+    // too much memory.
+    // Offset is assumed to be less than or equal than InitSize (this
+    // is enforced in ReadDataFromGlobal).
+    return nullptr;
+
+  SmallVector<unsigned char, 256> RawBytes(static_cast<size_t>(NBytes));
+  unsigned char *CurPtr = RawBytes.data();
+
+  if (!ReadDataFromGlobal(Init, Offset, CurPtr, NBytes, DL))
+    return nullptr;
+
+  return ConstantDataArray::get(GV->getContext(), RawBytes);
+}
+
 /// If this Offset points exactly to the start of an aggregate element, return
 /// that element, otherwise return nullptr.
 Constant *getConstantAtOffset(Constant *Base, APInt Offset,
@@ -662,8 +695,6 @@
   return C;
 }
 
-} // end anonymous namespace
-
 Constant *llvm::ConstantFoldLoadFromConst(Constant *C, Type *Ty,
                                           const APInt &Offset,
                                           const DataLayout &DL) {
@@ -867,21 +898,6 @@
 
   Type *IntIdxTy = DL.getIndexType(Ptr->getType());
 
-  // If this is "gep i8* Ptr, (sub 0, V)", fold this as:
-  // "inttoptr (sub (ptrtoint Ptr), V)"
-  if (Ops.size() == 2 && ResElemTy->isIntegerTy(8)) {
-    auto *CE = dyn_cast<ConstantExpr>(Ops[1]);
-    assert((!CE || CE->getType() == IntIdxTy) &&
-           "CastGEPIndices didn't canonicalize index types!");
-    if (CE && CE->getOpcode() == Instruction::Sub &&
-        CE->getOperand(0)->isNullValue()) {
-      Constant *Res = ConstantExpr::getPtrToInt(Ptr, CE->getType());
-      Res = ConstantExpr::getSub(Res, CE->getOperand(1));
-      Res = ConstantExpr::getIntToPtr(Res, ResTy);
-      return ConstantFoldConstant(Res, DL, TLI);
-    }
-  }
-
   for (unsigned i = 1, e = Ops.size(); i != e; ++i)
     if (!isa<ConstantInt>(Ops[i]))
       return nullptr;
@@ -1015,8 +1031,24 @@
   if (Instruction::isUnaryOp(Opcode))
     return ConstantFoldUnaryOpOperand(Opcode, Ops[0], DL);
 
-  if (Instruction::isBinaryOp(Opcode))
+  if (Instruction::isBinaryOp(Opcode)) {
+    switch (Opcode) {
+    default:
+      break;
+    case Instruction::FAdd:
+    case Instruction::FSub:
+    case Instruction::FMul:
+    case Instruction::FDiv:
+    case Instruction::FRem:
+      // Handle floating point instructions separately to account for denormals
+      // TODO: If a constant expression is being folded rather than an
+      // instruction, denormals will not be flushed/treated as zero
+      if (const auto *I = dyn_cast<Instruction>(InstOrCE)) {
+        return ConstantFoldFPInstOperands(Opcode, Ops[0], Ops[1], DL, I);
+      }
+    }
     return ConstantFoldBinaryOpOperands(Opcode, Ops[0], Ops[1], DL);
+  }
 
   if (Instruction::isCast(Opcode))
     return ConstantFoldCastOperand(Opcode, Ops[0], DestTy, DL);
@@ -1030,13 +1062,21 @@
                                           GEP->getInRangeIndex());
   }
 
-  if (auto *CE = dyn_cast<ConstantExpr>(InstOrCE))
+  if (auto *CE = dyn_cast<ConstantExpr>(InstOrCE)) {
+    if (CE->isCompare())
+      return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1],
+                                             DL, TLI);
     return CE->getWithOperands(Ops);
+  }
 
   switch (Opcode) {
   default: return nullptr;
   case Instruction::ICmp:
-  case Instruction::FCmp: llvm_unreachable("Invalid for compares");
+  case Instruction::FCmp: {
+    auto *C = cast<CmpInst>(InstOrCE);
+    return ConstantFoldCompareInstOperands(C->getPredicate(), Ops[0], Ops[1],
+                                           DL, TLI, C);
+  }
   case Instruction::Freeze:
     return isGuaranteedNotToBeUndefOrPoison(Ops[0]) ? Ops[0] : nullptr;
   case Instruction::Call:
@@ -1051,13 +1091,22 @@
   case Instruction::ExtractElement:
     return ConstantExpr::getExtractElement(Ops[0], Ops[1]);
   case Instruction::ExtractValue:
-    return ConstantExpr::getExtractValue(
+    return ConstantFoldExtractValueInstruction(
         Ops[0], cast<ExtractValueInst>(InstOrCE)->getIndices());
   case Instruction::InsertElement:
     return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]);
+  case Instruction::InsertValue:
+    return ConstantFoldInsertValueInstruction(
+        Ops[0], Ops[1], cast<InsertValueInst>(InstOrCE)->getIndices());
   case Instruction::ShuffleVector:
     return ConstantExpr::getShuffleVector(
         Ops[0], Ops[1], cast<ShuffleVectorInst>(InstOrCE)->getShuffleMask());
+  case Instruction::Load: {
+    const auto *LI = dyn_cast<LoadInst>(InstOrCE);
+    if (LI->isVolatile())
+      return nullptr;
+    return ConstantFoldLoadFromConstPtr(Ops[0], LI->getType(), DL);
+  }
   }
 }
 
@@ -1095,11 +1144,10 @@
   }
 
   if (auto *CE = dyn_cast<ConstantExpr>(C)) {
-    if (CE->isCompare())
-      return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1],
-                                             DL, TLI);
-
-    return ConstantFoldInstOperandsImpl(CE, CE->getOpcode(), Ops, DL, TLI);
+    if (Constant *Res =
+            ConstantFoldInstOperandsImpl(CE, CE->getOpcode(), Ops, DL, TLI))
+      return Res;
+    return const_cast<Constant *>(C);
   }
 
   assert(isa<ConstantVector>(C));
@@ -1153,22 +1201,6 @@
     Ops.push_back(Op);
   }
 
-  if (const auto *CI = dyn_cast<CmpInst>(I))
-    return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1],
-                                           DL, TLI);
-
-  if (const auto *LI = dyn_cast<LoadInst>(I)) {
-    if (LI->isVolatile())
-      return nullptr;
-    return ConstantFoldLoadFromConstPtr(Ops[0], LI->getType(), DL);
-  }
-
-  if (auto *IVI = dyn_cast<InsertValueInst>(I))
-    return ConstantExpr::getInsertValue(Ops[0], Ops[1], IVI->getIndices());
-
-  if (auto *EVI = dyn_cast<ExtractValueInst>(I))
-    return ConstantExpr::getExtractValue(Ops[0], EVI->getIndices());
-
   return ConstantFoldInstOperands(I, Ops, DL, TLI);
 }
 
@@ -1185,10 +1217,9 @@
   return ConstantFoldInstOperandsImpl(I, I->getOpcode(), Ops, DL, TLI);
 }
 
-Constant *llvm::ConstantFoldCompareInstOperands(unsigned IntPredicate,
-                                                Constant *Ops0, Constant *Ops1,
-                                                const DataLayout &DL,
-                                                const TargetLibraryInfo *TLI) {
+Constant *llvm::ConstantFoldCompareInstOperands(
+    unsigned IntPredicate, Constant *Ops0, Constant *Ops1, const DataLayout &DL,
+    const TargetLibraryInfo *TLI, const Instruction *I) {
   CmpInst::Predicate Predicate = (CmpInst::Predicate)IntPredicate;
   // fold: icmp (inttoptr x), null         -> icmp x, 0
   // fold: icmp null, (inttoptr x)         -> icmp 0, x
@@ -1290,6 +1321,11 @@
     return ConstantFoldCompareInstOperands(Predicate, Ops1, Ops0, DL, TLI);
   }
 
+  // Flush any denormal constant float input according to denormal handling
+  // mode.
+  Ops0 = FlushFPConstant(Ops0, I, /* IsOutput */ false);
+  Ops1 = FlushFPConstant(Ops1, I, /* IsOutput */ false);
+
   return ConstantExpr::getCompare(Predicate, Ops0, Ops1);
 }
 
@@ -1308,7 +1344,68 @@
     if (Constant *C = SymbolicallyEvaluateBinop(Opcode, LHS, RHS, DL))
       return C;
 
-  return ConstantExpr::get(Opcode, LHS, RHS);
+  if (ConstantExpr::isDesirableBinOp(Opcode))
+    return ConstantExpr::get(Opcode, LHS, RHS);
+  return ConstantFoldBinaryInstruction(Opcode, LHS, RHS);
+}
+
+Constant *llvm::FlushFPConstant(Constant *Operand, const Instruction *I,
+                                bool IsOutput) {
+  if (!I || !I->getParent() || !I->getFunction())
+    return Operand;
+
+  ConstantFP *CFP = dyn_cast<ConstantFP>(Operand);
+  if (!CFP)
+    return Operand;
+
+  const APFloat &APF = CFP->getValueAPF();
+  Type *Ty = CFP->getType();
+  DenormalMode DenormMode =
+      I->getFunction()->getDenormalMode(Ty->getFltSemantics());
+  DenormalMode::DenormalModeKind Mode =
+      IsOutput ? DenormMode.Output : DenormMode.Input;
+  switch (Mode) {
+  default:
+    llvm_unreachable("unknown denormal mode");
+    return Operand;
+  case DenormalMode::IEEE:
+    return Operand;
+  case DenormalMode::PreserveSign:
+    if (APF.isDenormal()) {
+      return ConstantFP::get(
+          Ty->getContext(),
+          APFloat::getZero(Ty->getFltSemantics(), APF.isNegative()));
+    }
+    return Operand;
+  case DenormalMode::PositiveZero:
+    if (APF.isDenormal()) {
+      return ConstantFP::get(Ty->getContext(),
+                             APFloat::getZero(Ty->getFltSemantics(), false));
+    }
+    return Operand;
+  }
+  return Operand;
+}
+
+Constant *llvm::ConstantFoldFPInstOperands(unsigned Opcode, Constant *LHS,
+                                           Constant *RHS, const DataLayout &DL,
+                                           const Instruction *I) {
+  if (Instruction::isBinaryOp(Opcode)) {
+    // Flush denormal inputs if needed.
+    Constant *Op0 = FlushFPConstant(LHS, I, /* IsOutput */ false);
+    Constant *Op1 = FlushFPConstant(RHS, I, /* IsOutput */ false);
+
+    // Calculate constant result.
+    Constant *C = ConstantFoldBinaryOpOperands(Opcode, Op0, Op1, DL);
+    if (!C)
+      return nullptr;
+
+    // Flush denormal output if needed.
+    return FlushFPConstant(C, I, /* IsOutput */ true);
+  }
+  // If instruction lacks a parent/function and the denormal mode cannot be
+  // determined, use the default (IEEE).
+  return ConstantFoldBinaryOpOperands(Opcode, LHS, RHS, DL);
 }
 
 Constant *llvm::ConstantFoldCastOperand(unsigned Opcode, Constant *C,
@@ -1337,6 +1434,19 @@
             DL, BaseOffset, /*AllowNonInbounds=*/true));
         if (Base->isNullValue()) {
           FoldedValue = ConstantInt::get(CE->getContext(), BaseOffset);
+        } else {
+          // ptrtoint (gep i8, Ptr, (sub 0, V)) -> sub (ptrtoint Ptr), V
+          if (GEP->getNumIndices() == 1 &&
+              GEP->getSourceElementType()->isIntegerTy(8)) {
+            auto *Ptr = cast<Constant>(GEP->getPointerOperand());
+            auto *Sub = dyn_cast<ConstantExpr>(GEP->getOperand(1));
+            Type *IntIdxTy = DL.getIndexType(Ptr->getType());
+            if (Sub && Sub->getType() == IntIdxTy &&
+                Sub->getOpcode() == Instruction::Sub &&
+                Sub->getOperand(0)->isNullValue())
+              FoldedValue = ConstantExpr::getSub(
+                  ConstantExpr::getPtrToInt(Ptr, IntIdxTy), Sub->getOperand(1));
+          }
         }
       }
       if (FoldedValue) {
@@ -1389,6 +1499,8 @@
 bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
   if (Call->isNoBuiltin())
     return false;
+  if (Call->getFunctionType() != F->getFunctionType())
+    return false;
   switch (F->getIntrinsicID()) {
   // Operations that do not operate floating-point numbers and do not depend on
   // FP environment can be folded even in strictfp functions.
@@ -1530,6 +1642,8 @@
   case Intrinsic::experimental_constrained_trunc:
   case Intrinsic::experimental_constrained_nearbyint:
   case Intrinsic::experimental_constrained_rint:
+  case Intrinsic::experimental_constrained_fcmp:
+  case Intrinsic::experimental_constrained_fcmps:
     return true;
   default:
     return false;
@@ -1801,12 +1915,12 @@
 
   // If evaluation raised FP exception, the result can depend on rounding
   // mode. If the latter is unknown, folding is not possible.
-  if (!ORM || *ORM == RoundingMode::Dynamic)
+  if (ORM && *ORM == RoundingMode::Dynamic)
     return false;
 
   // If FP exceptions are ignored, fold the call, even if such exception is
   // raised.
-  if (!EB || *EB != fp::ExceptionBehavior::ebStrict)
+  if (EB && *EB != fp::ExceptionBehavior::ebStrict)
     return true;
 
   // Leave the calculation for runtime so that exception flags be correctly set
@@ -1982,7 +2096,7 @@
     case Intrinsic::experimental_constrained_rint: {
       auto CI = cast<ConstrainedFPIntrinsic>(Call);
       RM = CI->getRoundingMode();
-      if (!RM || RM.getValue() == RoundingMode::Dynamic)
+      if (!RM || *RM == RoundingMode::Dynamic)
         return nullptr;
       break;
     }
@@ -2304,6 +2418,24 @@
   return nullptr;
 }
 
+static Constant *evaluateCompare(const APFloat &Op1, const APFloat &Op2,
+                                 const ConstrainedFPIntrinsic *Call) {
+  APFloat::opStatus St = APFloat::opOK;
+  auto *FCmp = cast<ConstrainedFPCmpIntrinsic>(Call);
+  FCmpInst::Predicate Cond = FCmp->getPredicate();
+  if (FCmp->isSignaling()) {
+    if (Op1.isNaN() || Op2.isNaN())
+      St = APFloat::opInvalidOp;
+  } else {
+    if (Op1.isSignaling() || Op2.isSignaling())
+      St = APFloat::opInvalidOp;
+  }
+  bool Result = FCmpInst::compare(Op1, Op2, Cond);
+  if (mayFoldConstrained(const_cast<ConstrainedFPCmpIntrinsic *>(FCmp), St))
+    return ConstantInt::get(Call->getType()->getScalarType(), Result);
+  return nullptr;
+}
+
 static Constant *ConstantFoldScalarCall2(StringRef Name,
                                          Intrinsic::ID IntrinsicID,
                                          Type *Ty,
@@ -2332,8 +2464,6 @@
   }
 
   if (const auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
-    if (!Ty->isFloatingPointTy())
-      return nullptr;
     const APFloat &Op1V = Op1->getValueAPF();
 
     if (const auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
@@ -2363,6 +2493,9 @@
         case Intrinsic::experimental_constrained_frem:
           St = Res.mod(Op2V);
           break;
+        case Intrinsic::experimental_constrained_fcmp:
+        case Intrinsic::experimental_constrained_fcmps:
+          return evaluateCompare(Op1V, Op2V, ConstrIntr);
         }
         if (mayFoldConstrained(const_cast<ConstrainedFPIntrinsic *>(ConstrIntr),
                                St))
@@ -2487,6 +2620,11 @@
     case Intrinsic::smin:
     case Intrinsic::umax:
     case Intrinsic::umin:
+      // This is the same as for binary ops - poison propagates.
+      // TODO: Poison handling should be consolidated.
+      if (isa<PoisonValue>(Operands[0]) || isa<PoisonValue>(Operands[1]))
+        return PoisonValue::get(Ty);
+
       if (!C0 && !C1)
         return UndefValue::get(Ty);
       if (!C0 || !C1)
@@ -2553,6 +2691,11 @@
     }
     case Intrinsic::uadd_sat:
     case Intrinsic::sadd_sat:
+      // This is the same as for binary ops - poison propagates.
+      // TODO: Poison handling should be consolidated.
+      if (isa<PoisonValue>(Operands[0]) || isa<PoisonValue>(Operands[1]))
+        return PoisonValue::get(Ty);
+
       if (!C0 && !C1)
         return UndefValue::get(Ty);
       if (!C0 || !C1)
@@ -2563,6 +2706,11 @@
         return ConstantInt::get(Ty, C0->sadd_sat(*C1));
     case Intrinsic::usub_sat:
     case Intrinsic::ssub_sat:
+      // This is the same as for binary ops - poison propagates.
+      // TODO: Poison handling should be consolidated.
+      if (isa<PoisonValue>(Operands[0]) || isa<PoisonValue>(Operands[1]))
+        return PoisonValue::get(Ty);
+
       if (!C0 && !C1)
         return UndefValue::get(Ty);
       if (!C0 || !C1)
@@ -2843,11 +2991,11 @@
     unsigned Width = C0->getBitWidth();
     assert(Scale < Width && "Illegal scale.");
     unsigned ExtendedWidth = Width * 2;
-    APInt Product = (C0->sextOrSelf(ExtendedWidth) *
-                     C1->sextOrSelf(ExtendedWidth)).ashr(Scale);
+    APInt Product =
+        (C0->sext(ExtendedWidth) * C1->sext(ExtendedWidth)).ashr(Scale);
     if (IntrinsicID == Intrinsic::smul_fix_sat) {
-      APInt Max = APInt::getSignedMaxValue(Width).sextOrSelf(ExtendedWidth);
-      APInt Min = APInt::getSignedMinValue(Width).sextOrSelf(ExtendedWidth);
+      APInt Max = APInt::getSignedMaxValue(Width).sext(ExtendedWidth);
+      APInt Min = APInt::getSignedMinValue(Width).sext(ExtendedWidth);
       Product = APIntOps::smin(Product, Max);
       Product = APIntOps::smax(Product, Min);
     }
@@ -2972,7 +3120,7 @@
       }
       return ConstantVector::get(NCs);
     }
-    break;
+    return nullptr;
   }
   case Intrinsic::get_active_lane_mask: {
     auto *Op0 = dyn_cast<ConstantInt>(Operands[0]);
@@ -2991,7 +3139,7 @@
       }
       return ConstantVector::get(NCs);
     }
-    break;
+    return nullptr;
   }
   default:
     break;
@@ -3001,7 +3149,7 @@
     // Gather a column of constants.
     for (unsigned J = 0, JE = Operands.size(); J != JE; ++J) {
       // Some intrinsics use a scalar type for certain arguments.
-      if (hasVectorInstrinsicScalarOpd(IntrinsicID, J)) {
+      if (isVectorIntrinsicWithScalarOpAtArg(IntrinsicID, J)) {
         Lane[J] = Operands[J];
         continue;
       }

diff --git a/src/llvm-project/llvm/lib/Analysis/ConstraintSystem.cpp b/src/llvm-project/llvm/lib/Analysis/ConstraintSystem.cpp
index 773f71a..2efa474 100644
--- a/src/llvm-project/llvm/lib/Analysis/ConstraintSystem.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/ConstraintSystem.cpp

@@ -12,7 +12,6 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/Debug.h"
 
-#include <algorithm>
 #include <string>
 
 using namespace llvm;
@@ -111,7 +110,7 @@
   if (Constraints.empty())
     return;
 
-  for (auto &Row : Constraints) {
+  for (const auto &Row : Constraints) {
     SmallVector<std::string, 16> Parts;
     for (unsigned I = 1, S = Row.size(); I < S; ++I) {
       if (Row[I] == 0)

diff --git a/src/llvm-project/llvm/lib/Analysis/CostModel.cpp b/src/llvm-project/llvm/lib/Analysis/CostModel.cpp
index 326baca..3c162f6 100644
--- a/src/llvm-project/llvm/lib/Analysis/CostModel.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/CostModel.cpp

@@ -17,7 +17,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/CostModel.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/Function.h"
@@ -25,8 +24,8 @@
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/IR/IntrinsicInst.h"
 using namespace llvm;
 
 static cl::opt<TargetTransformInfo::TargetCostKind> CostKind(
@@ -41,6 +40,9 @@
                clEnumValN(TargetTransformInfo::TCK_SizeAndLatency,
                           "size-latency", "Code size and latency")));
 
+static cl::opt<bool> TypeBasedIntrinsicCost("type-based-intrinsic-cost",
+    cl::desc("Calculate intrinsics cost based only on argument types"),
+    cl::init(false));
 
 #define CM_NAME "cost-model"
 #define DEBUG_TYPE CM_NAME
@@ -105,7 +107,16 @@
 
   for (BasicBlock &B : *F) {
     for (Instruction &Inst : B) {
-      InstructionCost Cost = TTI->getInstructionCost(&Inst, CostKind);
+      InstructionCost Cost;
+      if (TypeBasedIntrinsicCost && isa<IntrinsicInst>(&Inst)) {
+        auto *II = dyn_cast<IntrinsicInst>(&Inst);
+        IntrinsicCostAttributes ICA(II->getIntrinsicID(), *II,
+                                    InstructionCost::getInvalid(), true);
+        Cost = TTI->getIntrinsicInstrCost(ICA, CostKind);
+      }
+      else {
+        Cost = TTI->getInstructionCost(&Inst, CostKind);
+      }
       if (auto CostVal = Cost.getValue())
         OS << "Cost Model: Found an estimated cost of " << *CostVal;
       else
@@ -119,12 +130,21 @@
 PreservedAnalyses CostModelPrinterPass::run(Function &F,
                                             FunctionAnalysisManager &AM) {
   auto &TTI = AM.getResult<TargetIRAnalysis>(F);
-  OS << "Cost Model for function '" << F.getName() << "'\n";
+  OS << "Printing analysis 'Cost Model Analysis' for function '" << F.getName() << "':\n";
   for (BasicBlock &B : F) {
     for (Instruction &Inst : B) {
       // TODO: Use a pass parameter instead of cl::opt CostKind to determine
       // which cost kind to print.
-      InstructionCost Cost = TTI.getInstructionCost(&Inst, CostKind);
+      InstructionCost Cost;
+      if (TypeBasedIntrinsicCost && isa<IntrinsicInst>(&Inst)) {
+        auto *II = dyn_cast<IntrinsicInst>(&Inst);
+        IntrinsicCostAttributes ICA(II->getIntrinsicID(), *II,
+                                    InstructionCost::getInvalid(), true);
+        Cost = TTI.getIntrinsicInstrCost(ICA, CostKind);
+      }
+      else {
+        Cost = TTI.getInstructionCost(&Inst, CostKind);
+      }
       if (auto CostVal = Cost.getValue())
         OS << "Cost Model: Found an estimated cost of " << *CostVal;
       else

diff --git a/src/llvm-project/llvm/lib/Analysis/CycleAnalysis.cpp b/src/llvm-project/llvm/lib/Analysis/CycleAnalysis.cpp
index 09c7ee6..1799812 100644
--- a/src/llvm-project/llvm/lib/Analysis/CycleAnalysis.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/CycleAnalysis.cpp

@@ -8,11 +8,15 @@
 
 #include "llvm/Analysis/CycleAnalysis.h"
 #include "llvm/ADT/GenericCycleImpl.h"
-#include "llvm/IR/CFG.h"
+#include "llvm/IR/CFG.h" // for successors found by ADL in GenericCycleImpl.h
 #include "llvm/InitializePasses.h"
 
 using namespace llvm;
 
+namespace llvm {
+class Module;
+}
+
 template class llvm::GenericCycleInfo<SSAContext>;
 template class llvm::GenericCycle<SSAContext>;
 

diff --git a/src/llvm-project/llvm/lib/Analysis/DDG.cpp b/src/llvm-project/llvm/lib/Analysis/DDG.cpp
index 7e13579..da64ef1 100644
--- a/src/llvm-project/llvm/lib/Analysis/DDG.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/DDG.cpp

@@ -17,13 +17,12 @@
 using namespace llvm;
 
 static cl::opt<bool> SimplifyDDG(
-    "ddg-simplify", cl::init(true), cl::Hidden, cl::ZeroOrMore,
+    "ddg-simplify", cl::init(true), cl::Hidden,
     cl::desc(
         "Simplify DDG by merging nodes that have less interesting edges."));
 
-static cl::opt<bool>
-    CreatePiBlocks("ddg-pi-blocks", cl::init(true), cl::Hidden, cl::ZeroOrMore,
-                   cl::desc("Create pi-block nodes."));
+static cl::opt<bool> CreatePiBlocks("ddg-pi-blocks", cl::init(true), cl::Hidden,
+                                    cl::desc("Create pi-block nodes."));
 
 #define DEBUG_TYPE "ddg"
 
@@ -34,7 +33,7 @@
 //===--------------------------------------------------------------------===//
 // DDGNode implementation
 //===--------------------------------------------------------------------===//
-DDGNode::~DDGNode() {}
+DDGNode::~DDGNode() = default;
 
 bool DDGNode::collectInstructions(
     llvm::function_ref<bool(Instruction *)> const &Pred,
@@ -96,7 +95,7 @@
     llvm_unreachable("unimplemented type of node");
 
   OS << (N.getEdges().empty() ? " Edges:none!\n" : " Edges:\n");
-  for (auto &E : N.getEdges())
+  for (const auto &E : N.getEdges())
     OS.indent(2) << *E;
   return OS;
 }
@@ -189,7 +188,7 @@
   // Put the basic blocks in program order for correct dependence
   // directions.
   BasicBlockListType BBList;
-  for (auto &SCC : make_range(scc_begin(&F), scc_end(&F)))
+  for (const auto &SCC : make_range(scc_begin(&F), scc_end(&F)))
     append_range(BBList, SCC);
   std::reverse(BBList.begin(), BBList.end());
   DDGBuilder(*this, D, BBList).populate();

diff --git a/src/llvm-project/llvm/lib/Analysis/DDGPrinter.cpp b/src/llvm-project/llvm/lib/Analysis/DDGPrinter.cpp
index 0d5a936..6b5acd2 100644
--- a/src/llvm-project/llvm/lib/Analysis/DDGPrinter.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/DDGPrinter.cpp

@@ -18,8 +18,8 @@
 
 using namespace llvm;
 
-static cl::opt<bool> DotOnly("dot-ddg-only", cl::init(false), cl::Hidden,
-                             cl::ZeroOrMore, cl::desc("simple ddg dot graph"));
+static cl::opt<bool> DotOnly("dot-ddg-only", cl::Hidden,
+                             cl::desc("simple ddg dot graph"));
 static cl::opt<std::string> DDGDotFilenamePrefix(
     "dot-ddg-filename-prefix", cl::init("ddg"), cl::Hidden,
     cl::desc("The prefix used for the DDG dot file names."));

diff --git a/src/llvm-project/llvm/lib/Analysis/Delinearization.cpp b/src/llvm-project/llvm/lib/Analysis/Delinearization.cpp
index 670532c..7ab91b9 100644
--- a/src/llvm-project/llvm/lib/Analysis/Delinearization.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/Delinearization.cpp

@@ -24,9 +24,7 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/PassManager.h"
-#include "llvm/IR/Type.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
@@ -135,7 +133,7 @@
     if (auto *Mul = dyn_cast<SCEVMulExpr>(S)) {
       bool HasAddRec = false;
       SmallVector<const SCEV *, 0> Operands;
-      for (auto Op : Mul->operands()) {
+      for (const auto *Op : Mul->operands()) {
         const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Op);
         if (Unknown && !isa<CallInst>(Unknown->getValue())) {
           Operands.push_back(Op);
@@ -523,6 +521,44 @@
   return !Subscripts.empty();
 }
 
+bool llvm::tryDelinearizeFixedSizeImpl(
+    ScalarEvolution *SE, Instruction *Inst, const SCEV *AccessFn,
+    SmallVectorImpl<const SCEV *> &Subscripts, SmallVectorImpl<int> &Sizes) {
+  Value *SrcPtr = getLoadStorePointerOperand(Inst);
+
+  // Check the simple case where the array dimensions are fixed size.
+  auto *SrcGEP = dyn_cast<GetElementPtrInst>(SrcPtr);
+  if (!SrcGEP)
+    return false;
+
+  getIndexExpressionsFromGEP(*SE, SrcGEP, Subscripts, Sizes);
+
+  // Check that the two size arrays are non-empty and equal in length and
+  // value.
+  // TODO: it would be better to let the caller to clear Subscripts, similar
+  // to how we handle Sizes.
+  if (Sizes.empty() || Subscripts.size() <= 1) {
+    Subscripts.clear();
+    return false;
+  }
+
+  // Check that for identical base pointers we do not miss index offsets
+  // that have been added before this GEP is applied.
+  Value *SrcBasePtr = SrcGEP->getOperand(0)->stripPointerCasts();
+  const SCEVUnknown *SrcBase =
+      dyn_cast<SCEVUnknown>(SE->getPointerBase(AccessFn));
+  if (!SrcBase || SrcBasePtr != SrcBase->getValue()) {
+    Subscripts.clear();
+    return false;
+  }
+
+  assert(Subscripts.size() == Sizes.size() + 1 &&
+         "Expected equal number of entries in the list of size and "
+         "subscript.");
+
+  return true;
+}
+
 namespace {
 
 class Delinearization : public FunctionPass {

diff --git a/src/llvm-project/llvm/lib/Analysis/DemandedBits.cpp b/src/llvm-project/llvm/lib/Analysis/DemandedBits.cpp
index 117b12f..e01ed48 100644
--- a/src/llvm-project/llvm/lib/Analysis/DemandedBits.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/DemandedBits.cpp

@@ -21,19 +21,13 @@
 #include "llvm/Analysis/DemandedBits.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/InstIterator.h"
-#include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PassManager.h"

diff --git a/src/llvm-project/llvm/lib/Analysis/DependenceAnalysis.cpp b/src/llvm-project/llvm/lib/Analysis/DependenceAnalysis.cpp
index f827f74..3d2d84e 100644
--- a/src/llvm-project/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/DependenceAnalysis.cpp

@@ -50,7 +50,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/DependenceAnalysis.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/Delinearization.h"
@@ -58,10 +57,8 @@
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Config/llvm-config.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Module.h"
-#include "llvm/IR/Operator.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -109,11 +106,10 @@
 STATISTIC(BanerjeeSuccesses, "Banerjee successes");
 
 static cl::opt<bool>
-    Delinearize("da-delinearize", cl::init(true), cl::Hidden, cl::ZeroOrMore,
+    Delinearize("da-delinearize", cl::init(true), cl::Hidden,
                 cl::desc("Try to delinearize array references."));
 static cl::opt<bool> DisableDelinearizationChecks(
-    "da-disable-delinearization-checks", cl::init(false), cl::Hidden,
-    cl::ZeroOrMore,
+    "da-disable-delinearization-checks", cl::Hidden,
     cl::desc(
         "Disable checks that try to statically verify validity of "
         "delinearized subscripts. Enabling this option may result in incorrect "
@@ -121,7 +117,7 @@
         "dimension to underflow or overflow into another dimension."));
 
 static cl::opt<unsigned> MIVMaxLevelThreshold(
-    "da-miv-max-level-threshold", cl::init(7), cl::Hidden, cl::ZeroOrMore,
+    "da-miv-max-level-threshold", cl::init(7), cl::Hidden,
     cl::desc("Maximum depth allowed for the recursive algorithm used to "
              "explore MIV direction vectors."));
 
@@ -787,6 +783,8 @@
 unsigned DependenceInfo::mapDstLoop(const Loop *DstLoop) const {
   unsigned D = DstLoop->getLoopDepth();
   if (D > CommonLevels)
+    // This tries to make sure that we assign unique numbers to src and dst when
+    // the memory accesses reside in different loops that have the same depth.
     return D - CommonLevels + SrcLevels;
   else
     return D;
@@ -796,10 +794,16 @@
 // Returns true if Expression is loop invariant in LoopNest.
 bool DependenceInfo::isLoopInvariant(const SCEV *Expression,
                                      const Loop *LoopNest) const {
+  // Unlike ScalarEvolution::isLoopInvariant() we consider an access outside of
+  // any loop as invariant, because we only consier expression evaluation at a
+  // specific position (where the array access takes place), and not across the
+  // entire function.
   if (!LoopNest)
     return true;
-  return SE->isLoopInvariant(Expression, LoopNest) &&
-    isLoopInvariant(Expression, LoopNest->getParentLoop());
+
+  // If the expression is invariant in the outermost loop of the loop nest, it
+  // is invariant anywhere in the loop nest.
+  return SE->isLoopInvariant(Expression, LoopNest->getOutermostLoop());
 }
 
 
@@ -890,13 +894,25 @@
   }
 }
 
-// Examine the scev and return true iff it's linear.
+// Examine the scev and return true iff it's affine.
 // Collect any loops mentioned in the set of "Loops".
 bool DependenceInfo::checkSubscript(const SCEV *Expr, const Loop *LoopNest,
                                     SmallBitVector &Loops, bool IsSrc) {
   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
   if (!AddRec)
     return isLoopInvariant(Expr, LoopNest);
+
+  // The AddRec must depend on one of the containing loops. Otherwise,
+  // mapSrcLoop and mapDstLoop return indices outside the intended range. This
+  // can happen when a subscript in one loop references an IV from a sibling
+  // loop that could not be replaced with a concrete exit value by
+  // getSCEVAtScope.
+  const Loop *L = LoopNest;
+  while (L && AddRec->getLoop() != L)
+    L = L->getParentLoop();
+  if (!L)
+    return false;
+
   const SCEV *Start = AddRec->getStart();
   const SCEV *Step = AddRec->getStepRecurrence(*SE);
   const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop());
@@ -3318,59 +3334,45 @@
   return true;
 }
 
+/// Try to delinearize \p SrcAccessFn and \p DstAccessFn if the underlying
+/// arrays accessed are fixed-size arrays. Return true if delinearization was
+/// successful.
 bool DependenceInfo::tryDelinearizeFixedSize(
     Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,
     const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
     SmallVectorImpl<const SCEV *> &DstSubscripts) {
+  LLVM_DEBUG({
+    const SCEVUnknown *SrcBase =
+        dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
+    const SCEVUnknown *DstBase =
+        dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
+    assert(SrcBase && DstBase && SrcBase == DstBase &&
+           "expected src and dst scev unknowns to be equal");
+    });
 
-  Value *SrcPtr = getLoadStorePointerOperand(Src);
-  Value *DstPtr = getLoadStorePointerOperand(Dst);
-  const SCEVUnknown *SrcBase =
-      dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
-  const SCEVUnknown *DstBase =
-      dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
-  assert(SrcBase && DstBase && SrcBase == DstBase &&
-         "expected src and dst scev unknowns to be equal");
-
-  // Check the simple case where the array dimensions are fixed size.
-  auto *SrcGEP = dyn_cast<GetElementPtrInst>(SrcPtr);
-  auto *DstGEP = dyn_cast<GetElementPtrInst>(DstPtr);
-  if (!SrcGEP || !DstGEP)
+  SmallVector<int, 4> SrcSizes;
+  SmallVector<int, 4> DstSizes;
+  if (!tryDelinearizeFixedSizeImpl(SE, Src, SrcAccessFn, SrcSubscripts,
+                                   SrcSizes) ||
+      !tryDelinearizeFixedSizeImpl(SE, Dst, DstAccessFn, DstSubscripts,
+                                   DstSizes))
     return false;
 
-  SmallVector<int, 4> SrcSizes, DstSizes;
-  getIndexExpressionsFromGEP(*SE, SrcGEP, SrcSubscripts, SrcSizes);
-  getIndexExpressionsFromGEP(*SE, DstGEP, DstSubscripts, DstSizes);
-
   // Check that the two size arrays are non-empty and equal in length and
   // value.
-  if (SrcSizes.empty() || SrcSubscripts.size() <= 1 ||
-      SrcSizes.size() != DstSizes.size() ||
+  if (SrcSizes.size() != DstSizes.size() ||
       !std::equal(SrcSizes.begin(), SrcSizes.end(), DstSizes.begin())) {
     SrcSubscripts.clear();
     DstSubscripts.clear();
     return false;
   }
 
-  Value *SrcBasePtr = SrcGEP->getOperand(0);
-  Value *DstBasePtr = DstGEP->getOperand(0);
-  while (auto *PCast = dyn_cast<BitCastInst>(SrcBasePtr))
-    SrcBasePtr = PCast->getOperand(0);
-  while (auto *PCast = dyn_cast<BitCastInst>(DstBasePtr))
-    DstBasePtr = PCast->getOperand(0);
-
-  // Check that for identical base pointers we do not miss index offsets
-  // that have been added before this GEP is applied.
-  if (SrcBasePtr != SrcBase->getValue() || DstBasePtr != DstBase->getValue()) {
-    SrcSubscripts.clear();
-    DstSubscripts.clear();
-    return false;
-  }
-
   assert(SrcSubscripts.size() == DstSubscripts.size() &&
-         SrcSubscripts.size() == SrcSizes.size() + 1 &&
-         "Expected equal number of entries in the list of sizes and "
-         "subscripts.");
+         "Expected equal number of entries in the list of SrcSubscripts and "
+         "DstSubscripts.");
+
+  Value *SrcPtr = getLoadStorePointerOperand(Src);
+  Value *DstPtr = getLoadStorePointerOperand(Dst);
 
   // In general we cannot safely assume that the subscripts recovered from GEPs
   // are in the range of values defined for their corresponding array
@@ -3406,8 +3408,8 @@
   }
   LLVM_DEBUG({
     dbgs() << "Delinearized subscripts of fixed-size array\n"
-           << "SrcGEP:" << *SrcGEP << "\n"
-           << "DstGEP:" << *DstGEP << "\n";
+           << "SrcGEP:" << *SrcPtr << "\n"
+           << "DstGEP:" << *DstPtr << "\n";
   });
   return true;
 }

diff --git a/src/llvm-project/llvm/lib/Analysis/DependenceGraphBuilder.cpp b/src/llvm-project/llvm/lib/Analysis/DependenceGraphBuilder.cpp
index 6b90db4..7ee2adf 100644
--- a/src/llvm-project/llvm/lib/Analysis/DependenceGraphBuilder.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/DependenceGraphBuilder.cpp

@@ -12,6 +12,7 @@
 #include "llvm/Analysis/DependenceGraphBuilder.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/EnumeratedArray.h"
+#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/SCCIterator.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/DDG.h"

diff --git a/src/llvm-project/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp b/src/llvm-project/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp
index 4a792fc..79ea160 100644
--- a/src/llvm-project/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp

@@ -11,7 +11,6 @@
 //
 //===----------------------------------------------------------------------===//
 #include "llvm/Config/config.h"
-#include "llvm/Support/Casting.h"
 #if defined(LLVM_HAVE_TF_API)
 
 #include "llvm/ADT/BitVector.h"
@@ -273,8 +272,8 @@
 static const std::vector<TensorSpec> getInputFeatures() {
   std::vector<TensorSpec> InputSpecs;
   for (size_t I = 0; I < NumberOfFeatures; ++I)
-    InputSpecs.push_back(
-        TensorSpec::createSpec<int64_t>(TFFeedPrefix + FeatureNameMap[I], {1}));
+    InputSpecs.push_back(TensorSpec::createSpec<int64_t>(
+        TFFeedPrefix + FeatureMap[I].name(), FeatureMap[I].shape()));
   append_range(InputSpecs, TrainingOnlyFeatures);
   return InputSpecs;
 }
@@ -290,8 +289,7 @@
   std::vector<LoggedFeatureSpec> FT;
 
   for (size_t I = 0; I < NumberOfFeatures; ++I)
-    FT.push_back(
-        {TensorSpec::createSpec<int64_t>(FeatureNameMap.at(I), {1}), None});
+    FT.push_back({FeatureMap.at(I), None});
   if (MUTR && MUTR->outputLoggedFeatureSpecs().size() > 1)
     append_range(FT, drop_begin(MUTR->outputLoggedFeatureSpecs()));
 

diff --git a/src/llvm-project/llvm/lib/Analysis/DivergenceAnalysis.cpp b/src/llvm-project/llvm/lib/Analysis/DivergenceAnalysis.cpp
index 39e80c2..02c40d2 100644
--- a/src/llvm-project/llvm/lib/Analysis/DivergenceAnalysis.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/DivergenceAnalysis.cpp

@@ -73,15 +73,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/PostDominators.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -221,19 +220,19 @@
     // phi nodes at the fringes of the dominance region
     if (!DT.dominates(&LoopHeader, UserBlock)) {
       // all PHI nodes of UserBlock become divergent
-      for (auto &Phi : UserBlock->phis()) {
+      for (const auto &Phi : UserBlock->phis()) {
         analyzeTemporalDivergence(Phi, OuterDivLoop);
       }
       continue;
     }
 
     // Taint outside users of values carried by OuterDivLoop.
-    for (auto &I : *UserBlock) {
+    for (const auto &I : *UserBlock) {
       analyzeTemporalDivergence(I, OuterDivLoop);
     }
 
     // visit all blocks in the dominance region
-    for (auto *SuccBlock : successors(UserBlock)) {
+    for (const auto *SuccBlock : successors(UserBlock)) {
       if (!Visited.insert(SuccBlock).second) {
         continue;
       }
@@ -400,7 +399,7 @@
     }
     for (const BasicBlock &BB : F) {
       OS << "\n           " << BB.getName() << ":\n";
-      for (auto &I : BB.instructionsWithoutDebug()) {
+      for (const auto &I : BB.instructionsWithoutDebug()) {
         OS << (DI.isDivergent(I) ? "DIVERGENT:     " : "               ");
         OS << I << "\n";
       }

diff --git a/src/llvm-project/llvm/lib/Analysis/DomPrinter.cpp b/src/llvm-project/llvm/lib/Analysis/DomPrinter.cpp
index 6088de5..e9f5103 100644
--- a/src/llvm-project/llvm/lib/Analysis/DomPrinter.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/DomPrinter.cpp

@@ -24,74 +24,6 @@
 
 using namespace llvm;
 
-namespace llvm {
-template<>
-struct DOTGraphTraits<DomTreeNode*> : public DefaultDOTGraphTraits {
-
-  DOTGraphTraits (bool isSimple=false)
-    : DefaultDOTGraphTraits(isSimple) {}
-
-  std::string getNodeLabel(DomTreeNode *Node, DomTreeNode *Graph) {
-
-    BasicBlock *BB = Node->getBlock();
-
-    if (!BB)
-      return "Post dominance root node";
-
-
-    if (isSimple())
-      return DOTGraphTraits<DOTFuncInfo *>
-        ::getSimpleNodeLabel(BB, nullptr);
-    else
-      return DOTGraphTraits<DOTFuncInfo *>
-        ::getCompleteNodeLabel(BB, nullptr);
-  }
-};
-
-template<>
-struct DOTGraphTraits<DominatorTree*> : public DOTGraphTraits<DomTreeNode*> {
-
-  DOTGraphTraits (bool isSimple=false)
-    : DOTGraphTraits<DomTreeNode*>(isSimple) {}
-
-  static std::string getGraphName(DominatorTree *DT) {
-    return "Dominator tree";
-  }
-
-  std::string getNodeLabel(DomTreeNode *Node, DominatorTree *G) {
-    return DOTGraphTraits<DomTreeNode*>::getNodeLabel(Node, G->getRootNode());
-  }
-};
-
-template<>
-struct DOTGraphTraits<PostDominatorTree*>
-  : public DOTGraphTraits<DomTreeNode*> {
-
-  DOTGraphTraits (bool isSimple=false)
-    : DOTGraphTraits<DomTreeNode*>(isSimple) {}
-
-  static std::string getGraphName(PostDominatorTree *DT) {
-    return "Post dominator tree";
-  }
-
-  std::string getNodeLabel(DomTreeNode *Node, PostDominatorTree *G ) {
-    return DOTGraphTraits<DomTreeNode*>::getNodeLabel(Node, G->getRootNode());
-  }
-};
-}
-
-PreservedAnalyses DomTreePrinterPass::run(Function &F,
-                                          FunctionAnalysisManager &AM) {
-  WriteDOTGraphToFile(F, &AM.getResult<DominatorTreeAnalysis>(F), "dom", false);
-  return PreservedAnalyses::all();
-}
-
-PreservedAnalyses DomTreeOnlyPrinterPass::run(Function &F,
-                                              FunctionAnalysisManager &AM) {
-  WriteDOTGraphToFile(F, &AM.getResult<DominatorTreeAnalysis>(F), "domonly",
-                      true);
-  return PreservedAnalyses::all();
-}
 
 void DominatorTree::viewGraph(const Twine &Name, const Twine &Title) {
 #ifndef NDEBUG
@@ -110,166 +42,167 @@
 }
 
 namespace {
-struct DominatorTreeWrapperPassAnalysisGraphTraits {
+struct LegacyDominatorTreeWrapperPassAnalysisGraphTraits {
   static DominatorTree *getGraph(DominatorTreeWrapperPass *DTWP) {
     return &DTWP->getDomTree();
   }
 };
 
-struct DomViewer : public DOTGraphTraitsViewer<
-                       DominatorTreeWrapperPass, false, DominatorTree *,
-                       DominatorTreeWrapperPassAnalysisGraphTraits> {
+struct DomViewerWrapperPass
+    : public DOTGraphTraitsViewerWrapperPass<
+          DominatorTreeWrapperPass, false, DominatorTree *,
+          LegacyDominatorTreeWrapperPassAnalysisGraphTraits> {
   static char ID;
-  DomViewer()
-      : DOTGraphTraitsViewer<DominatorTreeWrapperPass, false, DominatorTree *,
-                             DominatorTreeWrapperPassAnalysisGraphTraits>(
-            "dom", ID) {
-    initializeDomViewerPass(*PassRegistry::getPassRegistry());
+  DomViewerWrapperPass()
+      : DOTGraphTraitsViewerWrapperPass<
+            DominatorTreeWrapperPass, false, DominatorTree *,
+            LegacyDominatorTreeWrapperPassAnalysisGraphTraits>("dom", ID) {
+    initializeDomViewerWrapperPassPass(*PassRegistry::getPassRegistry());
   }
 };
 
-struct DomOnlyViewer : public DOTGraphTraitsViewer<
-                           DominatorTreeWrapperPass, true, DominatorTree *,
-                           DominatorTreeWrapperPassAnalysisGraphTraits> {
+struct DomOnlyViewerWrapperPass
+    : public DOTGraphTraitsViewerWrapperPass<
+          DominatorTreeWrapperPass, true, DominatorTree *,
+          LegacyDominatorTreeWrapperPassAnalysisGraphTraits> {
   static char ID;
-  DomOnlyViewer()
-      : DOTGraphTraitsViewer<DominatorTreeWrapperPass, true, DominatorTree *,
-                             DominatorTreeWrapperPassAnalysisGraphTraits>(
-            "domonly", ID) {
-    initializeDomOnlyViewerPass(*PassRegistry::getPassRegistry());
+  DomOnlyViewerWrapperPass()
+      : DOTGraphTraitsViewerWrapperPass<
+            DominatorTreeWrapperPass, true, DominatorTree *,
+            LegacyDominatorTreeWrapperPassAnalysisGraphTraits>("domonly", ID) {
+    initializeDomOnlyViewerWrapperPassPass(*PassRegistry::getPassRegistry());
   }
 };
 
-struct PostDominatorTreeWrapperPassAnalysisGraphTraits {
+struct LegacyPostDominatorTreeWrapperPassAnalysisGraphTraits {
   static PostDominatorTree *getGraph(PostDominatorTreeWrapperPass *PDTWP) {
     return &PDTWP->getPostDomTree();
   }
 };
 
-struct PostDomViewer : public DOTGraphTraitsViewer<
-                          PostDominatorTreeWrapperPass, false,
-                          PostDominatorTree *,
-                          PostDominatorTreeWrapperPassAnalysisGraphTraits> {
+struct PostDomViewerWrapperPass
+    : public DOTGraphTraitsViewerWrapperPass<
+          PostDominatorTreeWrapperPass, false, PostDominatorTree *,
+          LegacyPostDominatorTreeWrapperPassAnalysisGraphTraits> {
   static char ID;
-  PostDomViewer() :
-    DOTGraphTraitsViewer<PostDominatorTreeWrapperPass, false,
-                         PostDominatorTree *,
-                         PostDominatorTreeWrapperPassAnalysisGraphTraits>(
-        "postdom", ID){
-      initializePostDomViewerPass(*PassRegistry::getPassRegistry());
-    }
+  PostDomViewerWrapperPass()
+      : DOTGraphTraitsViewerWrapperPass<
+            PostDominatorTreeWrapperPass, false, PostDominatorTree *,
+            LegacyPostDominatorTreeWrapperPassAnalysisGraphTraits>("postdom",
+                                                                   ID) {
+    initializePostDomViewerWrapperPassPass(*PassRegistry::getPassRegistry());
+  }
 };
 
-struct PostDomOnlyViewer : public DOTGraphTraitsViewer<
-                            PostDominatorTreeWrapperPass, true,
-                            PostDominatorTree *,
-                            PostDominatorTreeWrapperPassAnalysisGraphTraits> {
+struct PostDomOnlyViewerWrapperPass
+    : public DOTGraphTraitsViewerWrapperPass<
+          PostDominatorTreeWrapperPass, true, PostDominatorTree *,
+          LegacyPostDominatorTreeWrapperPassAnalysisGraphTraits> {
   static char ID;
-  PostDomOnlyViewer() :
-    DOTGraphTraitsViewer<PostDominatorTreeWrapperPass, true,
-                         PostDominatorTree *,
-                         PostDominatorTreeWrapperPassAnalysisGraphTraits>(
-        "postdomonly", ID){
-      initializePostDomOnlyViewerPass(*PassRegistry::getPassRegistry());
-    }
+  PostDomOnlyViewerWrapperPass()
+      : DOTGraphTraitsViewerWrapperPass<
+            PostDominatorTreeWrapperPass, true, PostDominatorTree *,
+            LegacyPostDominatorTreeWrapperPassAnalysisGraphTraits>(
+            "postdomonly", ID) {
+    initializePostDomOnlyViewerWrapperPassPass(
+        *PassRegistry::getPassRegistry());
+  }
 };
 } // end anonymous namespace
 
-char DomViewer::ID = 0;
-INITIALIZE_PASS(DomViewer, "view-dom",
+char DomViewerWrapperPass::ID = 0;
+INITIALIZE_PASS(DomViewerWrapperPass, "view-dom",
                 "View dominance tree of function", false, false)
 
-char DomOnlyViewer::ID = 0;
-INITIALIZE_PASS(DomOnlyViewer, "view-dom-only",
+char DomOnlyViewerWrapperPass::ID = 0;
+INITIALIZE_PASS(DomOnlyViewerWrapperPass, "view-dom-only",
                 "View dominance tree of function (with no function bodies)",
                 false, false)
 
-char PostDomViewer::ID = 0;
-INITIALIZE_PASS(PostDomViewer, "view-postdom",
+char PostDomViewerWrapperPass::ID = 0;
+INITIALIZE_PASS(PostDomViewerWrapperPass, "view-postdom",
                 "View postdominance tree of function", false, false)
 
-char PostDomOnlyViewer::ID = 0;
-INITIALIZE_PASS(PostDomOnlyViewer, "view-postdom-only",
+char PostDomOnlyViewerWrapperPass::ID = 0;
+INITIALIZE_PASS(PostDomOnlyViewerWrapperPass, "view-postdom-only",
                 "View postdominance tree of function "
                 "(with no function bodies)",
                 false, false)
 
 namespace {
-struct DomPrinter : public DOTGraphTraitsPrinter<
-                        DominatorTreeWrapperPass, false, DominatorTree *,
-                        DominatorTreeWrapperPassAnalysisGraphTraits> {
+struct DomPrinterWrapperPass
+    : public DOTGraphTraitsPrinterWrapperPass<
+          DominatorTreeWrapperPass, false, DominatorTree *,
+          LegacyDominatorTreeWrapperPassAnalysisGraphTraits> {
   static char ID;
-  DomPrinter()
-      : DOTGraphTraitsPrinter<DominatorTreeWrapperPass, false, DominatorTree *,
-                              DominatorTreeWrapperPassAnalysisGraphTraits>(
-            "dom", ID) {
-    initializeDomPrinterPass(*PassRegistry::getPassRegistry());
+  DomPrinterWrapperPass()
+      : DOTGraphTraitsPrinterWrapperPass<
+            DominatorTreeWrapperPass, false, DominatorTree *,
+            LegacyDominatorTreeWrapperPassAnalysisGraphTraits>("dom", ID) {
+    initializeDomPrinterWrapperPassPass(*PassRegistry::getPassRegistry());
   }
 };
 
-struct DomOnlyPrinter : public DOTGraphTraitsPrinter<
-                            DominatorTreeWrapperPass, true, DominatorTree *,
-                            DominatorTreeWrapperPassAnalysisGraphTraits> {
+struct DomOnlyPrinterWrapperPass
+    : public DOTGraphTraitsPrinterWrapperPass<
+          DominatorTreeWrapperPass, true, DominatorTree *,
+          LegacyDominatorTreeWrapperPassAnalysisGraphTraits> {
   static char ID;
-  DomOnlyPrinter()
-      : DOTGraphTraitsPrinter<DominatorTreeWrapperPass, true, DominatorTree *,
-                              DominatorTreeWrapperPassAnalysisGraphTraits>(
-            "domonly", ID) {
-    initializeDomOnlyPrinterPass(*PassRegistry::getPassRegistry());
+  DomOnlyPrinterWrapperPass()
+      : DOTGraphTraitsPrinterWrapperPass<
+            DominatorTreeWrapperPass, true, DominatorTree *,
+            LegacyDominatorTreeWrapperPassAnalysisGraphTraits>("domonly", ID) {
+    initializeDomOnlyPrinterWrapperPassPass(*PassRegistry::getPassRegistry());
   }
 };
 
-struct PostDomPrinter
-  : public DOTGraphTraitsPrinter<
-                            PostDominatorTreeWrapperPass, false,
-                            PostDominatorTree *,
-                            PostDominatorTreeWrapperPassAnalysisGraphTraits> {
+struct PostDomPrinterWrapperPass
+    : public DOTGraphTraitsPrinterWrapperPass<
+          PostDominatorTreeWrapperPass, false, PostDominatorTree *,
+          LegacyPostDominatorTreeWrapperPassAnalysisGraphTraits> {
   static char ID;
-  PostDomPrinter() :
-    DOTGraphTraitsPrinter<PostDominatorTreeWrapperPass, false,
-                          PostDominatorTree *,
-                          PostDominatorTreeWrapperPassAnalysisGraphTraits>(
-        "postdom", ID) {
-      initializePostDomPrinterPass(*PassRegistry::getPassRegistry());
-    }
+  PostDomPrinterWrapperPass()
+      : DOTGraphTraitsPrinterWrapperPass<
+            PostDominatorTreeWrapperPass, false, PostDominatorTree *,
+            LegacyPostDominatorTreeWrapperPassAnalysisGraphTraits>("postdom",
+                                                                   ID) {
+    initializePostDomPrinterWrapperPassPass(*PassRegistry::getPassRegistry());
+  }
 };
 
-struct PostDomOnlyPrinter
-  : public DOTGraphTraitsPrinter<
-                            PostDominatorTreeWrapperPass, true,
-                            PostDominatorTree *,
-                            PostDominatorTreeWrapperPassAnalysisGraphTraits> {
+struct PostDomOnlyPrinterWrapperPass
+    : public DOTGraphTraitsPrinterWrapperPass<
+          PostDominatorTreeWrapperPass, true, PostDominatorTree *,
+          LegacyPostDominatorTreeWrapperPassAnalysisGraphTraits> {
   static char ID;
-  PostDomOnlyPrinter() :
-    DOTGraphTraitsPrinter<PostDominatorTreeWrapperPass, true,
-                          PostDominatorTree *,
-                          PostDominatorTreeWrapperPassAnalysisGraphTraits>(
-        "postdomonly", ID) {
-      initializePostDomOnlyPrinterPass(*PassRegistry::getPassRegistry());
-    }
+  PostDomOnlyPrinterWrapperPass()
+      : DOTGraphTraitsPrinterWrapperPass<
+            PostDominatorTreeWrapperPass, true, PostDominatorTree *,
+            LegacyPostDominatorTreeWrapperPassAnalysisGraphTraits>(
+            "postdomonly", ID) {
+    initializePostDomOnlyPrinterWrapperPassPass(
+        *PassRegistry::getPassRegistry());
+  }
 };
 } // end anonymous namespace
 
+char DomPrinterWrapperPass::ID = 0;
+INITIALIZE_PASS(DomPrinterWrapperPass, "dot-dom",
+                "Print dominance tree of function to 'dot' file", false, false)
 
-
-char DomPrinter::ID = 0;
-INITIALIZE_PASS(DomPrinter, "dot-dom",
-                "Print dominance tree of function to 'dot' file",
-                false, false)
-
-char DomOnlyPrinter::ID = 0;
-INITIALIZE_PASS(DomOnlyPrinter, "dot-dom-only",
+char DomOnlyPrinterWrapperPass::ID = 0;
+INITIALIZE_PASS(DomOnlyPrinterWrapperPass, "dot-dom-only",
                 "Print dominance tree of function to 'dot' file "
                 "(with no function bodies)",
                 false, false)
 
-char PostDomPrinter::ID = 0;
-INITIALIZE_PASS(PostDomPrinter, "dot-postdom",
-                "Print postdominance tree of function to 'dot' file",
-                false, false)
+char PostDomPrinterWrapperPass::ID = 0;
+INITIALIZE_PASS(PostDomPrinterWrapperPass, "dot-postdom",
+                "Print postdominance tree of function to 'dot' file", false,
+                false)
 
-char PostDomOnlyPrinter::ID = 0;
-INITIALIZE_PASS(PostDomOnlyPrinter, "dot-postdom-only",
+char PostDomOnlyPrinterWrapperPass::ID = 0;
+INITIALIZE_PASS(PostDomOnlyPrinterWrapperPass, "dot-postdom-only",
                 "Print postdominance tree of function to 'dot' file "
                 "(with no function bodies)",
                 false, false)
@@ -278,34 +211,34 @@
 // "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by
 // the link time optimization.
 
-FunctionPass *llvm::createDomPrinterPass() {
-  return new DomPrinter();
+FunctionPass *llvm::createDomPrinterWrapperPassPass() {
+  return new DomPrinterWrapperPass();
 }
 
-FunctionPass *llvm::createDomOnlyPrinterPass() {
-  return new DomOnlyPrinter();
+FunctionPass *llvm::createDomOnlyPrinterWrapperPassPass() {
+  return new DomOnlyPrinterWrapperPass();
 }
 
-FunctionPass *llvm::createDomViewerPass() {
-  return new DomViewer();
+FunctionPass *llvm::createDomViewerWrapperPassPass() {
+  return new DomViewerWrapperPass();
 }
 
-FunctionPass *llvm::createDomOnlyViewerPass() {
-  return new DomOnlyViewer();
+FunctionPass *llvm::createDomOnlyViewerWrapperPassPass() {
+  return new DomOnlyViewerWrapperPass();
 }
 
-FunctionPass *llvm::createPostDomPrinterPass() {
-  return new PostDomPrinter();
+FunctionPass *llvm::createPostDomPrinterWrapperPassPass() {
+  return new PostDomPrinterWrapperPass();
 }
 
-FunctionPass *llvm::createPostDomOnlyPrinterPass() {
-  return new PostDomOnlyPrinter();
+FunctionPass *llvm::createPostDomOnlyPrinterWrapperPassPass() {
+  return new PostDomOnlyPrinterWrapperPass();
 }
 
-FunctionPass *llvm::createPostDomViewerPass() {
-  return new PostDomViewer();
+FunctionPass *llvm::createPostDomViewerWrapperPassPass() {
+  return new PostDomViewerWrapperPass();
 }
 
-FunctionPass *llvm::createPostDomOnlyViewerPass() {
-  return new PostDomOnlyViewer();
+FunctionPass *llvm::createPostDomOnlyViewerWrapperPassPass() {
+  return new PostDomOnlyViewerWrapperPass();
 }

diff --git a/src/llvm-project/llvm/lib/Analysis/DomTreeUpdater.cpp b/src/llvm-project/llvm/lib/Analysis/DomTreeUpdater.cpp
index 6e29926..888c167 100644
--- a/src/llvm-project/llvm/lib/Analysis/DomTreeUpdater.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/DomTreeUpdater.cpp

@@ -14,6 +14,7 @@
 #include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/Analysis/PostDominators.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/Support/GenericDomTree.h"
 #include <algorithm>
@@ -314,98 +315,6 @@
   return *PDT;
 }
 
-void DomTreeUpdater::insertEdge(BasicBlock *From, BasicBlock *To) {
-
-#ifndef NDEBUG
-  assert(isUpdateValid({DominatorTree::Insert, From, To}) &&
-         "Inserted edge does not appear in the CFG");
-#endif
-
-  if (!DT && !PDT)
-    return;
-
-  // Won't affect DomTree and PostDomTree; discard update.
-  if (From == To)
-    return;
-
-  if (Strategy == UpdateStrategy::Eager) {
-    if (DT)
-      DT->insertEdge(From, To);
-    if (PDT)
-      PDT->insertEdge(From, To);
-    return;
-  }
-
-  PendUpdates.push_back({DominatorTree::Insert, From, To});
-}
-
-void DomTreeUpdater::insertEdgeRelaxed(BasicBlock *From, BasicBlock *To) {
-  if (From == To)
-    return;
-
-  if (!DT && !PDT)
-    return;
-
-  if (!isUpdateValid({DominatorTree::Insert, From, To}))
-    return;
-
-  if (Strategy == UpdateStrategy::Eager) {
-    if (DT)
-      DT->insertEdge(From, To);
-    if (PDT)
-      PDT->insertEdge(From, To);
-    return;
-  }
-
-  PendUpdates.push_back({DominatorTree::Insert, From, To});
-}
-
-void DomTreeUpdater::deleteEdge(BasicBlock *From, BasicBlock *To) {
-
-#ifndef NDEBUG
-  assert(isUpdateValid({DominatorTree::Delete, From, To}) &&
-         "Deleted edge still exists in the CFG!");
-#endif
-
-  if (!DT && !PDT)
-    return;
-
-  // Won't affect DomTree and PostDomTree; discard update.
-  if (From == To)
-    return;
-
-  if (Strategy == UpdateStrategy::Eager) {
-    if (DT)
-      DT->deleteEdge(From, To);
-    if (PDT)
-      PDT->deleteEdge(From, To);
-    return;
-  }
-
-  PendUpdates.push_back({DominatorTree::Delete, From, To});
-}
-
-void DomTreeUpdater::deleteEdgeRelaxed(BasicBlock *From, BasicBlock *To) {
-  if (From == To)
-    return;
-
-  if (!DT && !PDT)
-    return;
-
-  if (!isUpdateValid({DominatorTree::Delete, From, To}))
-    return;
-
-  if (Strategy == UpdateStrategy::Eager) {
-    if (DT)
-      DT->deleteEdge(From, To);
-    if (PDT)
-      PDT->deleteEdge(From, To);
-    return;
-  }
-
-  PendUpdates.push_back({DominatorTree::Delete, From, To});
-}
-
 void DomTreeUpdater::dropOutOfDateUpdates() {
   if (Strategy == DomTreeUpdater::UpdateStrategy::Eager)
     return;

diff --git a/src/llvm-project/llvm/lib/Analysis/DominanceFrontier.cpp b/src/llvm-project/llvm/lib/Analysis/DominanceFrontier.cpp
index a8806fe..ccba913 100644
--- a/src/llvm-project/llvm/lib/Analysis/DominanceFrontier.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/DominanceFrontier.cpp

@@ -15,7 +15,6 @@
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;

diff --git a/src/llvm-project/llvm/lib/Analysis/EHPersonalities.cpp b/src/llvm-project/llvm/lib/Analysis/EHPersonalities.cpp
index df8b7e1..277ff6b 100644
--- a/src/llvm-project/llvm/lib/Analysis/EHPersonalities.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/EHPersonalities.cpp

@@ -8,6 +8,7 @@
 
 #include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
@@ -67,7 +68,10 @@
 }
 
 EHPersonality llvm::getDefaultEHPersonality(const Triple &T) {
-  return EHPersonality::GNU_C;
+  if (T.isPS5())
+    return EHPersonality::GNU_CXX;
+  else
+    return EHPersonality::GNU_C;
 }
 
 bool llvm::canSimplifyInvokeNoUnwind(const Function *F) {

diff --git a/src/llvm-project/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp b/src/llvm-project/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp
index 3351903..782c119 100644
--- a/src/llvm-project/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp

@@ -12,48 +12,87 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/FunctionPropertiesAnalysis.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
+#include <deque>
 
 using namespace llvm;
 
-FunctionPropertiesInfo
-FunctionPropertiesInfo::getFunctionPropertiesInfo(const Function &F,
+namespace {
+int64_t getNrBlocksFromCond(const BasicBlock &BB) {
+  int64_t Ret = 0;
+  if (const auto *BI = dyn_cast<BranchInst>(BB.getTerminator())) {
+    if (BI->isConditional())
+      Ret += BI->getNumSuccessors();
+  } else if (const auto *SI = dyn_cast<SwitchInst>(BB.getTerminator())) {
+    Ret += (SI->getNumCases() + (nullptr != SI->getDefaultDest()));
+  }
+  return Ret;
+}
+
+int64_t getUses(const Function &F) {
+  return ((!F.hasLocalLinkage()) ? 1 : 0) + F.getNumUses();
+}
+} // namespace
+
+void FunctionPropertiesInfo::reIncludeBB(const BasicBlock &BB) {
+  updateForBB(BB, +1);
+}
+
+void FunctionPropertiesInfo::updateForBB(const BasicBlock &BB,
+                                         int64_t Direction) {
+  assert(Direction == 1 || Direction == -1);
+  BasicBlockCount += Direction;
+  BlocksReachedFromConditionalInstruction +=
+      (Direction * getNrBlocksFromCond(BB));
+  for (const auto &I : BB) {
+    if (auto *CS = dyn_cast<CallBase>(&I)) {
+      const auto *Callee = CS->getCalledFunction();
+      if (Callee && !Callee->isIntrinsic() && !Callee->isDeclaration())
+        DirectCallsToDefinedFunctions += Direction;
+    }
+    if (I.getOpcode() == Instruction::Load) {
+      LoadInstCount += Direction;
+    } else if (I.getOpcode() == Instruction::Store) {
+      StoreInstCount += Direction;
+    }
+  }
+  TotalInstructionCount += Direction * BB.sizeWithoutDebug();
+}
+
+void FunctionPropertiesInfo::updateAggregateStats(const Function &F,
                                                   const LoopInfo &LI) {
 
-  FunctionPropertiesInfo FPI;
-
-  FPI.Uses = ((!F.hasLocalLinkage()) ? 1 : 0) + F.getNumUses();
-
-  for (const auto &BB : F) {
-    ++FPI.BasicBlockCount;
-
-    if (const auto *BI = dyn_cast<BranchInst>(BB.getTerminator())) {
-      if (BI->isConditional())
-        FPI.BlocksReachedFromConditionalInstruction += BI->getNumSuccessors();
-    } else if (const auto *SI = dyn_cast<SwitchInst>(BB.getTerminator())) {
-      FPI.BlocksReachedFromConditionalInstruction +=
-          (SI->getNumCases() + (nullptr != SI->getDefaultDest()));
-    }
-
-    for (const auto &I : BB) {
-      if (auto *CS = dyn_cast<CallBase>(&I)) {
-        const auto *Callee = CS->getCalledFunction();
-        if (Callee && !Callee->isIntrinsic() && !Callee->isDeclaration())
-          ++FPI.DirectCallsToDefinedFunctions;
-      }
-      if (I.getOpcode() == Instruction::Load) {
-        ++FPI.LoadInstCount;
-      } else if (I.getOpcode() == Instruction::Store) {
-        ++FPI.StoreInstCount;
-      }
-    }
-    // Loop Depth of the Basic Block
-    int64_t LoopDepth;
-    LoopDepth = LI.getLoopDepth(&BB);
-    if (FPI.MaxLoopDepth < LoopDepth)
-      FPI.MaxLoopDepth = LoopDepth;
+  Uses = getUses(F);
+  TopLevelLoopCount = llvm::size(LI);
+  MaxLoopDepth = 0;
+  std::deque<const Loop *> Worklist;
+  llvm::append_range(Worklist, LI);
+  while (!Worklist.empty()) {
+    const auto *L = Worklist.front();
+    MaxLoopDepth =
+        std::max(MaxLoopDepth, static_cast<int64_t>(L->getLoopDepth()));
+    Worklist.pop_front();
+    llvm::append_range(Worklist, L->getSubLoops());
   }
-  FPI.TopLevelLoopCount += llvm::size(LI);
+}
+
+FunctionPropertiesInfo FunctionPropertiesInfo::getFunctionPropertiesInfo(
+    const Function &F, FunctionAnalysisManager &FAM) {
+
+  FunctionPropertiesInfo FPI;
+  // The const casts are due to the getResult API - there's no mutation of F.
+  const auto &LI = FAM.getResult<LoopAnalysis>(const_cast<Function &>(F));
+  const auto &DT =
+      FAM.getResult<DominatorTreeAnalysis>(const_cast<Function &>(F));
+  for (const auto &BB : F)
+    if (DT.isReachableFromEntry(&BB))
+      FPI.reIncludeBB(BB);
+  FPI.updateAggregateStats(F, LI);
   return FPI;
 }
 
@@ -67,15 +106,15 @@
      << "LoadInstCount: " << LoadInstCount << "\n"
      << "StoreInstCount: " << StoreInstCount << "\n"
      << "MaxLoopDepth: " << MaxLoopDepth << "\n"
-     << "TopLevelLoopCount: " << TopLevelLoopCount << "\n\n";
+     << "TopLevelLoopCount: " << TopLevelLoopCount << "\n"
+     << "TotalInstructionCount: " << TotalInstructionCount << "\n\n";
 }
 
 AnalysisKey FunctionPropertiesAnalysis::Key;
 
 FunctionPropertiesInfo
 FunctionPropertiesAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
-  return FunctionPropertiesInfo::getFunctionPropertiesInfo(
-      F, FAM.getResult<LoopAnalysis>(F));
+  return FunctionPropertiesInfo::getFunctionPropertiesInfo(F, FAM);
 }
 
 PreservedAnalyses
@@ -86,3 +125,127 @@
   AM.getResult<FunctionPropertiesAnalysis>(F).print(OS);
   return PreservedAnalyses::all();
 }
+
+FunctionPropertiesUpdater::FunctionPropertiesUpdater(
+    FunctionPropertiesInfo &FPI, const CallBase &CB)
+    : FPI(FPI), CallSiteBB(*CB.getParent()), Caller(*CallSiteBB.getParent()) {
+  assert(isa<CallInst>(CB) || isa<InvokeInst>(CB));
+  // For BBs that are likely to change, we subtract from feature totals their
+  // contribution. Some features, like max loop counts or depths, are left
+  // invalid, as they will be updated post-inlining.
+  SmallPtrSet<const BasicBlock *, 4> LikelyToChangeBBs;
+  // The CB BB will change - it'll either be split or the callee's body (single
+  // BB) will be pasted in.
+  LikelyToChangeBBs.insert(&CallSiteBB);
+
+  // The caller's entry BB may change due to new alloca instructions.
+  LikelyToChangeBBs.insert(&*Caller.begin());
+
+  // The successors may become unreachable in the case of `invoke` inlining.
+  // We track successors separately, too, because they form a boundary, together
+  // with the CB BB ('Entry') between which the inlined callee will be pasted.
+  Successors.insert(succ_begin(&CallSiteBB), succ_end(&CallSiteBB));
+
+  // Inlining only handles invoke and calls. If this is an invoke, and inlining
+  // it pulls another invoke, the original landing pad may get split, so as to
+  // share its content with other potential users. So the edge up to which we
+  // need to invalidate and then re-account BB data is the successors of the
+  // current landing pad. We can leave the current lp, too - if it doesn't get
+  // split, then it will be the place traversal stops. Either way, the
+  // discounted BBs will be checked if reachable and re-added.
+  if (const auto *II = dyn_cast<InvokeInst>(&CB)) {
+    const auto *UnwindDest = II->getUnwindDest();
+    Successors.insert(succ_begin(UnwindDest), succ_end(UnwindDest));
+  }
+
+  // Exclude the CallSiteBB, if it happens to be its own successor (1-BB loop).
+  // We are only interested in BBs the graph moves past the callsite BB to
+  // define the frontier past which we don't want to re-process BBs. Including
+  // the callsite BB in this case would prematurely stop the traversal in
+  // finish().
+  Successors.erase(&CallSiteBB);
+
+  for (const auto *BB : Successors)
+    LikelyToChangeBBs.insert(BB);
+
+  // Commit the change. While some of the BBs accounted for above may play dual
+  // role - e.g. caller's entry BB may be the same as the callsite BB - set
+  // insertion semantics make sure we account them once. This needs to be
+  // followed in `finish`, too.
+  for (const auto *BB : LikelyToChangeBBs)
+    FPI.updateForBB(*BB, -1);
+}
+
+void FunctionPropertiesUpdater::finish(FunctionAnalysisManager &FAM) const {
+  // Update feature values from the BBs that were copied from the callee, or
+  // might have been modified because of inlining. The latter have been
+  // subtracted in the FunctionPropertiesUpdater ctor.
+  // There could be successors that were reached before but now are only
+  // reachable from elsewhere in the CFG.
+  // One example is the following diamond CFG (lines are arrows pointing down):
+  //    A
+  //  /   \
+  // B     C
+  // |     |
+  // |     D
+  // |     |
+  // |     E
+  //  \   /
+  //    F
+  // There's a call site in C that is inlined. Upon doing that, it turns out
+  // it expands to
+  //   call void @llvm.trap()
+  //   unreachable
+  // F isn't reachable from C anymore, but we did discount it when we set up
+  // FunctionPropertiesUpdater, so we need to re-include it here.
+  // At the same time, D and E were reachable before, but now are not anymore,
+  // so we need to leave D out (we discounted it at setup), and explicitly
+  // remove E.
+  SetVector<const BasicBlock *> Reinclude;
+  SetVector<const BasicBlock *> Unreachable;
+  const auto &DT =
+      FAM.getResult<DominatorTreeAnalysis>(const_cast<Function &>(Caller));
+
+  if (&CallSiteBB != &*Caller.begin())
+    Reinclude.insert(&*Caller.begin());
+
+  // Distribute the successors to the 2 buckets.
+  for (const auto *Succ : Successors)
+    if (DT.isReachableFromEntry(Succ))
+      Reinclude.insert(Succ);
+    else
+      Unreachable.insert(Succ);
+
+  // For reinclusion, we want to stop at the reachable successors, who are at
+  // the beginning of the worklist; but, starting from the callsite bb and
+  // ending at those successors, we also want to perform a traversal.
+  // IncludeSuccessorsMark is the index after which we include successors.
+  const auto IncludeSuccessorsMark = Reinclude.size();
+  bool CSInsertion = Reinclude.insert(&CallSiteBB);
+  (void)CSInsertion;
+  assert(CSInsertion);
+  for (size_t I = 0; I < Reinclude.size(); ++I) {
+    const auto *BB = Reinclude[I];
+    FPI.reIncludeBB(*BB);
+    if (I >= IncludeSuccessorsMark)
+      Reinclude.insert(succ_begin(BB), succ_end(BB));
+  }
+
+  // For exclusion, we don't need to exclude the set of BBs that were successors
+  // before and are now unreachable, because we already did that at setup. For
+  // the rest, as long as a successor is unreachable, we want to explicitly
+  // exclude it.
+  const auto AlreadyExcludedMark = Unreachable.size();
+  for (size_t I = 0; I < Unreachable.size(); ++I) {
+    const auto *U = Unreachable[I];
+    if (I >= AlreadyExcludedMark)
+      FPI.updateForBB(*U, -1);
+    for (const auto *Succ : successors(U))
+      if (!DT.isReachableFromEntry(Succ))
+        Unreachable.insert(Succ);
+  }
+
+  const auto &LI = FAM.getResult<LoopAnalysis>(const_cast<Function &>(Caller));
+  FPI.updateAggregateStats(Caller, LI);
+  assert(FPI == FunctionPropertiesInfo::getFunctionPropertiesInfo(Caller, FAM));
+}

diff --git a/src/llvm-project/llvm/lib/Analysis/GlobalsModRef.cpp b/src/llvm-project/llvm/lib/Analysis/GlobalsModRef.cpp
index 6869530..e6ef1c7 100644
--- a/src/llvm-project/llvm/lib/Analysis/GlobalsModRef.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/GlobalsModRef.cpp

@@ -21,11 +21,11 @@
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
@@ -68,8 +68,8 @@
   /// should provide this much alignment at least, but this makes it clear we
   /// specifically rely on this amount of alignment.
   struct alignas(8) AlignedMap {
-    AlignedMap() {}
-    AlignedMap(const AlignedMap &Arg) : Map(Arg.Map) {}
+    AlignedMap() = default;
+    AlignedMap(const AlignedMap &Arg) = default;
     GlobalInfoMapType Map;
   };
 
@@ -102,7 +102,7 @@
                 "Insufficient low bits to store our flag and ModRef info.");
 
 public:
-  FunctionInfo() {}
+  FunctionInfo() = default;
   ~FunctionInfo() {
     delete Info.getPointer();
   }
@@ -256,22 +256,6 @@
   return FunctionModRefBehavior(AAResultBase::getModRefBehavior(F) & Min);
 }
 
-FunctionModRefBehavior
-GlobalsAAResult::getModRefBehavior(const CallBase *Call) {
-  FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior;
-
-  if (!Call->hasOperandBundles())
-    if (const Function *F = Call->getCalledFunction())
-      if (FunctionInfo *FI = getFunctionInfo(F)) {
-        if (!isModOrRefSet(FI->getModRefInfo()))
-          Min = FMRB_DoesNotAccessMemory;
-        else if (!isModSet(FI->getModRefInfo()))
-          Min = FMRB_OnlyReadsMemory;
-      }
-
-  return FunctionModRefBehavior(AAResultBase::getModRefBehavior(Call) & Min);
-}
-
 /// Returns the function info for the function, or null if we don't have
 /// anything useful to say about it.
 GlobalsAAResult::FunctionInfo *
@@ -377,7 +361,7 @@
       if (Call->isDataOperand(&U)) {
         // Detect calls to free.
         if (Call->isArgOperand(&U) &&
-            isFreeCall(I, &GetTLI(*Call->getFunction()))) {
+            getFreedOperand(Call, &GetTLI(*Call->getFunction())) == U) {
           if (Writers)
             Writers->insert(Call->getParent()->getParent());
         } else {
@@ -511,6 +495,18 @@
     Handles.front().I = Handles.begin();
     bool KnowNothing = false;
 
+    // Intrinsics, like any other synchronizing function, can make effects
+    // of other threads visible. Without nosync we know nothing really.
+    // Similarly, if `nocallback` is missing the function, or intrinsic,
+    // can call into the module arbitrarily. If both are set the function
+    // has an effect but will not interact with accesses of internal
+    // globals inside the module. We are conservative here for optnone
+    // functions, might not be necessary.
+    auto MaySyncOrCallIntoModule = [](const Function &F) {
+      return !F.isDeclaration() || !F.hasNoSync() ||
+             !F.hasFnAttribute(Attribute::NoCallback);
+    };
+
     // Collect the mod/ref properties due to called functions.  We only compute
     // one mod-ref set.
     for (unsigned i = 0, e = SCC.size(); i != e && !KnowNothing; ++i) {
@@ -525,7 +521,7 @@
           // Can't do better than that!
         } else if (F->onlyReadsMemory()) {
           FI.addModRefInfo(ModRefInfo::Ref);
-          if (!F->isIntrinsic() && !F->onlyAccessesArgMemory())
+          if (!F->onlyAccessesArgMemory() && MaySyncOrCallIntoModule(*F))
             // This function might call back into the module and read a global -
             // consider every global as possibly being read by this function.
             FI.setMayReadAnyGlobal();
@@ -533,7 +529,7 @@
           FI.addModRefInfo(ModRefInfo::ModRef);
           if (!F->onlyAccessesArgMemory())
             FI.setMayReadAnyGlobal();
-          if (!F->isIntrinsic()) {
+          if (MaySyncOrCallIntoModule(*F)) {
             KnowNothing = true;
             break;
           }
@@ -585,12 +581,7 @@
         // We handle calls specially because the graph-relevant aspects are
         // handled above.
         if (auto *Call = dyn_cast<CallBase>(&I)) {
-          auto &TLI = GetTLI(*Node->getFunction());
-          if (isAllocationFn(Call, &TLI) || isFreeCall(Call, &TLI)) {
-            // FIXME: It is completely unclear why this is necessary and not
-            // handled by the above graph code.
-            FI.addModRefInfo(ModRefInfo::ModRef);
-          } else if (Function *Callee = Call->getCalledFunction()) {
+          if (Function *Callee = Call->getCalledFunction()) {
             // The callgraph doesn't include intrinsic calls.
             if (Callee->isIntrinsic()) {
               if (isa<DbgInfoIntrinsic>(Call))
@@ -915,7 +906,7 @@
 
   // Iterate through all the arguments to the called function. If any argument
   // is based on GV, return the conservative result.
-  for (auto &A : Call->args()) {
+  for (const auto &A : Call->args()) {
     SmallVector<const Value*, 4> Objects;
     getUnderlyingObjects(A, Objects);
 
@@ -979,7 +970,7 @@
   }
 }
 
-GlobalsAAResult::~GlobalsAAResult() {}
+GlobalsAAResult::~GlobalsAAResult() = default;
 
 /*static*/ GlobalsAAResult GlobalsAAResult::analyzeModule(
     Module &M, std::function<const TargetLibraryInfo &(Function &F)> GetTLI,
@@ -1010,6 +1001,24 @@
                                         AM.getResult<CallGraphAnalysis>(M));
 }
 
+PreservedAnalyses RecomputeGlobalsAAPass::run(Module &M,
+                                              ModuleAnalysisManager &AM) {
+  if (auto *G = AM.getCachedResult<GlobalsAA>(M)) {
+    auto &CG = AM.getResult<CallGraphAnalysis>(M);
+    G->NonAddressTakenGlobals.clear();
+    G->UnknownFunctionsWithLocalLinkage = false;
+    G->IndirectGlobals.clear();
+    G->AllocsForIndirectGlobals.clear();
+    G->FunctionInfos.clear();
+    G->FunctionToSCCMap.clear();
+    G->Handles.clear();
+    G->CollectSCCMembership(CG);
+    G->AnalyzeGlobals(M);
+    G->AnalyzeCallGraph(CG, M);
+  }
+  return PreservedAnalyses::all();
+}
+
 char GlobalsAAWrapperPass::ID = 0;
 INITIALIZE_PASS_BEGIN(GlobalsAAWrapperPass, "globals-aa",
                       "Globals Alias Analysis", false, true)

diff --git a/src/llvm-project/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/src/llvm-project/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
index 01681c4..a681c52 100644
--- a/src/llvm-project/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/IRSimilarityIdentifier.cpp

@@ -64,7 +64,7 @@
   // Here we collect the operands and their types for determining whether
   // the structure of the operand use matches between two different candidates.
   for (Use &OI : Inst->operands()) {
-    if (isa<CmpInst>(Inst) && RevisedPredicate.hasValue()) {
+    if (isa<CmpInst>(Inst) && RevisedPredicate) {
       // If we have a CmpInst where the predicate is reversed, it means the
       // operands must be reversed as well.
       OperVals.insert(OperVals.begin(), OI.get());
@@ -183,9 +183,9 @@
   assert(isa<CmpInst>(Inst) &&
          "Can only get a predicate from a compare instruction");
 
-  if (RevisedPredicate.hasValue())
-    return RevisedPredicate.getValue();
-  
+  if (RevisedPredicate)
+    return RevisedPredicate.value();
+
   return cast<CmpInst>(Inst)->getPredicate();
 }
 
@@ -193,7 +193,7 @@
   assert(isa<CallInst>(Inst) &&
          "Can only get a name from a call instruction");
 
-  assert(CalleeName.hasValue() && "CalleeName has not been set");
+  assert(CalleeName && "CalleeName has not been set");
 
   return *CalleeName;
 }
@@ -289,14 +289,12 @@
     }
   }
 
-  if (HaveLegalRange) {
-    if (AddedIllegalLastTime)
-      mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true);
-    for (IRInstructionData *ID : InstrListForBB)
-      this->IDL->push_back(*ID);
-    llvm::append_range(InstrList, InstrListForBB);
-    llvm::append_range(IntegerMapping, IntegerMappingForBB);
-  }
+  if (AddedIllegalLastTime)
+    mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true);
+  for (IRInstructionData *ID : InstrListForBB)
+    this->IDL->push_back(*ID);
+  llvm::append_range(InstrList, InstrListForBB);
+  llvm::append_range(IntegerMapping, IntegerMappingForBB);
 }
 
 // TODO: This is the same as the MachineOutliner, and should be consolidated
@@ -461,6 +459,18 @@
   // that both of these instructions are not nullptrs.
   FirstInst = FirstInstIt;
   LastInst = LastInstIt;
+
+  // Add the basic blocks contained in the set into the global value numbering.
+  DenseSet<BasicBlock *> BBSet;
+  getBasicBlocks(BBSet);
+  for (BasicBlock *BB : BBSet) {
+    if (ValueToNumber.find(BB) != ValueToNumber.end())
+      continue;
+    
+    ValueToNumber.try_emplace(BB, LocalValNumber);
+    NumberToValue.try_emplace(LocalValNumber, BB);
+    LocalValNumber++;
+  }
 }
 
 bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A,
@@ -516,19 +526,13 @@
   for (Value *V : SourceOperands) {
     ArgVal = SourceValueToNumberMapping.find(V)->second;
 
+    // Instead of finding a current mapping, we attempt to insert a set.
     std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert(
         std::make_pair(ArgVal, TargetValueNumbers));
 
-    // Instead of finding a current mapping, we inserted a set.  This means a
-    // mapping did not exist for the source Instruction operand, it has no
-    // current constraints we need to check.
-    if (WasInserted)
-      continue;
-
-    // If a mapping already exists for the source operand to the values in the
-    // other IRSimilarityCandidate we need to iterate over the items in other
-    // IRSimilarityCandidate's Instruction to determine whether there is a valid
-    // mapping of Value to Value.
+    // We need to iterate over the items in other IRSimilarityCandidate's
+    // Instruction to determine whether there is a valid mapping of
+    // Value to Value.
     DenseSet<unsigned> NewSet;
     for (unsigned &Curr : ValueMappingIt->second)
       // If we can find the value in the mapping, we add it to the new set.
@@ -548,7 +552,6 @@
     if (ValueMappingIt->second.size() != 1)
       continue;
 
-
     unsigned ValToRemove = *ValueMappingIt->second.begin();
     // When there is only one item left in the mapping for and operand, remove
     // the value from the other operands.  If it results in there being no
@@ -791,7 +794,8 @@
     // We have different paths for commutative instructions and non-commutative
     // instructions since commutative instructions could allow multiple mappings
     // to certain values.
-    if (IA->isCommutative() && !isa<FPMathOperator>(IA)) {
+    if (IA->isCommutative() && !isa<FPMathOperator>(IA) &&
+        !isa<IntrinsicInst>(IA)) {
       if (!compareCommutativeOperandMapping(
               {A, OperValsA, ValueNumberMappingA},
               {B, OperValsB, ValueNumberMappingB}))
@@ -1008,6 +1012,40 @@
     CanonNumToNumber.insert(std::make_pair(CanonNum, SourceGVN));
     NumberToCanonNum.insert(std::make_pair(SourceGVN, CanonNum));
   }
+
+  DenseSet<BasicBlock *> BBSet;
+  getBasicBlocks(BBSet);
+  // Find canonical numbers for the BasicBlocks in the current candidate.
+  // This is done by finding the corresponding value for the first instruction
+  // in the block in the current candidate, finding the matching value in the
+  // source candidate.  Then by finding the parent of this value, use the
+  // canonical number of the block in the source candidate for the canonical
+  // number in the current candidate.
+  for (BasicBlock *BB : BBSet) {
+    unsigned BBGVNForCurrCand = ValueToNumber.find(BB)->second;
+
+    // We can skip the BasicBlock if the canonical numbering has already been
+    // found in a separate instruction.
+    if (NumberToCanonNum.find(BBGVNForCurrCand) != NumberToCanonNum.end())
+      continue;
+
+    // If the basic block is the starting block, then the shared instruction may
+    // not be the first instruction in the block, it will be the first
+    // instruction in the similarity region.
+    Value *FirstOutlineInst = BB == getStartBB()
+                                  ? frontInstruction()
+                                  : &*BB->instructionsWithoutDebug().begin();
+
+    unsigned FirstInstGVN = *getGVN(FirstOutlineInst);
+    unsigned FirstInstCanonNum = *getCanonicalNum(FirstInstGVN);
+    unsigned SourceGVN = *SourceCand.fromCanonicalNum(FirstInstCanonNum);
+    Value *SourceV = *SourceCand.fromGVN(SourceGVN);
+    BasicBlock *SourceBB = cast<Instruction>(SourceV)->getParent();
+    unsigned SourceBBGVN = *SourceCand.getGVN(SourceBB);
+    unsigned SourceCanonBBGVN = *SourceCand.getCanonicalNum(SourceBBGVN);
+    CanonNumToNumber.insert(std::make_pair(SourceCanonBBGVN, BBGVNForCurrCand));
+    NumberToCanonNum.insert(std::make_pair(BBGVNForCurrCand, SourceCanonBBGVN));
+  }
 }
 
 void IRSimilarityCandidate::createCanonicalMappingFor(
@@ -1162,11 +1200,12 @@
   Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
   Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
   Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
+  Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls;
 
   populateMapper(Modules, InstrList, IntegerMapping);
   findCandidates(InstrList, IntegerMapping);
 
-  return SimilarityCandidates.getValue();
+  return *SimilarityCandidates;
 }
 
 SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) {
@@ -1175,6 +1214,7 @@
   Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
   Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
   Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
+  Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls;
 
   std::vector<IRInstructionData *> InstrList;
   std::vector<unsigned> IntegerMapping;
@@ -1182,7 +1222,7 @@
   populateMapper(M, InstrList, IntegerMapping);
   findCandidates(InstrList, IntegerMapping);
 
-  return SimilarityCandidates.getValue();
+  return *SimilarityCandidates;
 }
 
 INITIALIZE_PASS(IRSimilarityIdentifierWrapperPass, "ir-similarity-identifier",
@@ -1196,7 +1236,8 @@
 
 bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) {
   IRSI.reset(new IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
-                                        MatchCallsByName, !DisableIntrinsics));
+                                        MatchCallsByName, !DisableIntrinsics,
+                                        false));
   return false;
 }
 
@@ -1214,7 +1255,8 @@
 IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M,
                                                  ModuleAnalysisManager &) {
   auto IRSI = IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
-                                     MatchCallsByName, !DisableIntrinsics);
+                                     MatchCallsByName, !DisableIntrinsics,
+                                     false);
   IRSI.findSimilarity(M);
   return IRSI;
 }

diff --git a/src/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp b/src/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp
index 74b0d67..a51e974 100644
--- a/src/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp

@@ -11,26 +11,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/IVDescriptors.h"
-#include "llvm/ADT/ScopeExit.h"
-#include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/DemandedBits.h"
-#include "llvm/Analysis/DomTreeUpdater.h"
-#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/MustExecute.h"
 #include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/ValueHandle.h"
-#include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/KnownBits.h"
 
@@ -73,20 +63,6 @@
   return (Kind != RecurKind::None) && !isIntegerRecurrenceKind(Kind);
 }
 
-bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurKind Kind) {
-  switch (Kind) {
-  default:
-    break;
-  case RecurKind::Add:
-  case RecurKind::Mul:
-  case RecurKind::FAdd:
-  case RecurKind::FMul:
-  case RecurKind::FMulAdd:
-    return true;
-  }
-  return false;
-}
-
 /// Determines if Phi may have been type-promoted. If Phi has a single user
 /// that ANDs the Phi with a type mask, return the user. RT is updated to
 /// account for the narrower bit width represented by the mask, and the AND
@@ -237,12 +213,10 @@
   return true;
 }
 
-bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
-                                           Loop *TheLoop, FastMathFlags FuncFMF,
-                                           RecurrenceDescriptor &RedDes,
-                                           DemandedBits *DB,
-                                           AssumptionCache *AC,
-                                           DominatorTree *DT) {
+bool RecurrenceDescriptor::AddReductionVar(
+    PHINode *Phi, RecurKind Kind, Loop *TheLoop, FastMathFlags FuncFMF,
+    RecurrenceDescriptor &RedDes, DemandedBits *DB, AssumptionCache *AC,
+    DominatorTree *DT, ScalarEvolution *SE) {
   if (Phi->getNumIncomingValues() != 2)
     return false;
 
@@ -259,6 +233,12 @@
   // This includes users of the reduction, variables (which form a cycle
   // which ends in the phi node).
   Instruction *ExitInstruction = nullptr;
+
+  // Variable to keep last visited store instruction. By the end of the
+  // algorithm this variable will be either empty or having intermediate
+  // reduction value stored in invariant address.
+  StoreInst *IntermediateStore = nullptr;
+
   // Indicates that we found a reduction operation in our scan.
   bool FoundReduxOp = false;
 
@@ -324,6 +304,10 @@
   //  - By instructions outside of the loop (safe).
   //      * One value may have several outside users, but all outside
   //        uses must be of the same value.
+  //  - By store instructions with a loop invariant address (safe with
+  //    the following restrictions):
+  //      * If there are several stores, all must have the same address.
+  //      * Final value should be stored in that loop invariant address.
   //  - By an instruction that is not part of the reduction (not safe).
   //    This is either:
   //      * An instruction type other than PHI or the reduction operation.
@@ -331,6 +315,43 @@
   while (!Worklist.empty()) {
     Instruction *Cur = Worklist.pop_back_val();
 
+    // Store instructions are allowed iff it is the store of the reduction
+    // value to the same loop invariant memory location.
+    if (auto *SI = dyn_cast<StoreInst>(Cur)) {
+      if (!SE) {
+        LLVM_DEBUG(dbgs() << "Store instructions are not processed without "
+                          << "Scalar Evolution Analysis\n");
+        return false;
+      }
+
+      const SCEV *PtrScev = SE->getSCEV(SI->getPointerOperand());
+      // Check it is the same address as previous stores
+      if (IntermediateStore) {
+        const SCEV *OtherScev =
+            SE->getSCEV(IntermediateStore->getPointerOperand());
+
+        if (OtherScev != PtrScev) {
+          LLVM_DEBUG(dbgs() << "Storing reduction value to different addresses "
+                            << "inside the loop: " << *SI->getPointerOperand()
+                            << " and "
+                            << *IntermediateStore->getPointerOperand() << '\n');
+          return false;
+        }
+      }
+
+      // Check the pointer is loop invariant
+      if (!SE->isLoopInvariant(PtrScev, TheLoop)) {
+        LLVM_DEBUG(dbgs() << "Storing reduction value to non-uniform address "
+                          << "inside the loop: " << *SI->getPointerOperand()
+                          << '\n');
+        return false;
+      }
+
+      // IntermediateStore is always the last store in the loop.
+      IntermediateStore = SI;
+      continue;
+    }
+
     // No Users.
     // If the instruction has no users then this is a broken chain and can't be
     // a reduction variable.
@@ -453,10 +474,17 @@
       // reductions which are represented as a cmp followed by a select.
       InstDesc IgnoredVal(false, nullptr);
       if (VisitedInsts.insert(UI).second) {
-        if (isa<PHINode>(UI))
+        if (isa<PHINode>(UI)) {
           PHIs.push_back(UI);
-        else
+        } else {
+          StoreInst *SI = dyn_cast<StoreInst>(UI);
+          if (SI && SI->getPointerOperand() == Cur) {
+            // Reduction variable chain can only be stored somewhere but it
+            // can't be used as an address.
+            return false;
+          }
           NonPHIs.push_back(UI);
+        }
       } else if (!isa<PHINode>(UI) &&
                  ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) &&
                    !isa<SelectInst>(UI)) ||
@@ -476,7 +504,7 @@
 
   // This means we have seen one but not the other instruction of the
   // pattern or more than just a select and cmp. Zero implies that we saw a
-  // llvm.min/max instrinsic, which is always OK.
+  // llvm.min/max intrinsic, which is always OK.
   if (isMinMaxRecurrenceKind(Kind) && NumCmpSelectPatternInst != 2 &&
       NumCmpSelectPatternInst != 0)
     return false;
@@ -484,6 +512,32 @@
   if (isSelectCmpRecurrenceKind(Kind) && NumCmpSelectPatternInst != 1)
     return false;
 
+  if (IntermediateStore) {
+    // Check that stored value goes to the phi node again. This way we make sure
+    // that the value stored in IntermediateStore is indeed the final reduction
+    // value.
+    if (!is_contained(Phi->operands(), IntermediateStore->getValueOperand())) {
+      LLVM_DEBUG(dbgs() << "Not a final reduction value stored: "
+                        << *IntermediateStore << '\n');
+      return false;
+    }
+
+    // If there is an exit instruction it's value should be stored in
+    // IntermediateStore
+    if (ExitInstruction &&
+        IntermediateStore->getValueOperand() != ExitInstruction) {
+      LLVM_DEBUG(dbgs() << "Last store Instruction of reduction value does not "
+                           "store last calculated value of the reduction: "
+                        << *IntermediateStore << '\n');
+      return false;
+    }
+
+    // If all uses are inside the loop (intermediate stores), then the
+    // reduction value after the loop will be the one used in the last store.
+    if (!ExitInstruction)
+      ExitInstruction = cast<Instruction>(IntermediateStore->getValueOperand());
+  }
+
   if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
     return false;
 
@@ -545,9 +599,9 @@
   // is saved as part of the RecurrenceDescriptor.
 
   // Save the description of this reduction variable.
-  RecurrenceDescriptor RD(RdxStart, ExitInstruction, Kind, FMF, ExactFPMathInst,
-                          RecurrenceType, IsSigned, IsOrdered, CastInsts,
-                          MinWidthCastToRecurrenceType);
+  RecurrenceDescriptor RD(RdxStart, ExitInstruction, IntermediateStore, Kind,
+                          FMF, ExactFPMathInst, RecurrenceType, IsSigned,
+                          IsOrdered, CastInsts, MinWidthCastToRecurrenceType);
   RedDes = RD;
 
   return true;
@@ -771,7 +825,8 @@
 bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
                                           RecurrenceDescriptor &RedDes,
                                           DemandedBits *DB, AssumptionCache *AC,
-                                          DominatorTree *DT) {
+                                          DominatorTree *DT,
+                                          ScalarEvolution *SE) {
   BasicBlock *Header = TheLoop->getHeader();
   Function &F = *Header->getParent();
   FastMathFlags FMF;
@@ -780,72 +835,85 @@
   FMF.setNoSignedZeros(
       F.getFnAttribute("no-signed-zeros-fp-math").getValueAsBool());
 
-  if (AddReductionVar(Phi, RecurKind::Add, TheLoop, FMF, RedDes, DB, AC, DT)) {
+  if (AddReductionVar(Phi, RecurKind::Add, TheLoop, FMF, RedDes, DB, AC, DT,
+                      SE)) {
     LLVM_DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n");
     return true;
   }
-  if (AddReductionVar(Phi, RecurKind::Mul, TheLoop, FMF, RedDes, DB, AC, DT)) {
+  if (AddReductionVar(Phi, RecurKind::Mul, TheLoop, FMF, RedDes, DB, AC, DT,
+                      SE)) {
     LLVM_DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n");
     return true;
   }
-  if (AddReductionVar(Phi, RecurKind::Or, TheLoop, FMF, RedDes, DB, AC, DT)) {
+  if (AddReductionVar(Phi, RecurKind::Or, TheLoop, FMF, RedDes, DB, AC, DT,
+                      SE)) {
     LLVM_DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n");
     return true;
   }
-  if (AddReductionVar(Phi, RecurKind::And, TheLoop, FMF, RedDes, DB, AC, DT)) {
+  if (AddReductionVar(Phi, RecurKind::And, TheLoop, FMF, RedDes, DB, AC, DT,
+                      SE)) {
     LLVM_DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n");
     return true;
   }
-  if (AddReductionVar(Phi, RecurKind::Xor, TheLoop, FMF, RedDes, DB, AC, DT)) {
+  if (AddReductionVar(Phi, RecurKind::Xor, TheLoop, FMF, RedDes, DB, AC, DT,
+                      SE)) {
     LLVM_DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n");
     return true;
   }
-  if (AddReductionVar(Phi, RecurKind::SMax, TheLoop, FMF, RedDes, DB, AC, DT)) {
+  if (AddReductionVar(Phi, RecurKind::SMax, TheLoop, FMF, RedDes, DB, AC, DT,
+                      SE)) {
     LLVM_DEBUG(dbgs() << "Found a SMAX reduction PHI." << *Phi << "\n");
     return true;
   }
-  if (AddReductionVar(Phi, RecurKind::SMin, TheLoop, FMF, RedDes, DB, AC, DT)) {
+  if (AddReductionVar(Phi, RecurKind::SMin, TheLoop, FMF, RedDes, DB, AC, DT,
+                      SE)) {
     LLVM_DEBUG(dbgs() << "Found a SMIN reduction PHI." << *Phi << "\n");
     return true;
   }
-  if (AddReductionVar(Phi, RecurKind::UMax, TheLoop, FMF, RedDes, DB, AC, DT)) {
+  if (AddReductionVar(Phi, RecurKind::UMax, TheLoop, FMF, RedDes, DB, AC, DT,
+                      SE)) {
     LLVM_DEBUG(dbgs() << "Found a UMAX reduction PHI." << *Phi << "\n");
     return true;
   }
-  if (AddReductionVar(Phi, RecurKind::UMin, TheLoop, FMF, RedDes, DB, AC, DT)) {
+  if (AddReductionVar(Phi, RecurKind::UMin, TheLoop, FMF, RedDes, DB, AC, DT,
+                      SE)) {
     LLVM_DEBUG(dbgs() << "Found a UMIN reduction PHI." << *Phi << "\n");
     return true;
   }
   if (AddReductionVar(Phi, RecurKind::SelectICmp, TheLoop, FMF, RedDes, DB, AC,
-                      DT)) {
+                      DT, SE)) {
     LLVM_DEBUG(dbgs() << "Found an integer conditional select reduction PHI."
                       << *Phi << "\n");
     return true;
   }
-  if (AddReductionVar(Phi, RecurKind::FMul, TheLoop, FMF, RedDes, DB, AC, DT)) {
+  if (AddReductionVar(Phi, RecurKind::FMul, TheLoop, FMF, RedDes, DB, AC, DT,
+                      SE)) {
     LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
     return true;
   }
-  if (AddReductionVar(Phi, RecurKind::FAdd, TheLoop, FMF, RedDes, DB, AC, DT)) {
+  if (AddReductionVar(Phi, RecurKind::FAdd, TheLoop, FMF, RedDes, DB, AC, DT,
+                      SE)) {
     LLVM_DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n");
     return true;
   }
-  if (AddReductionVar(Phi, RecurKind::FMax, TheLoop, FMF, RedDes, DB, AC, DT)) {
+  if (AddReductionVar(Phi, RecurKind::FMax, TheLoop, FMF, RedDes, DB, AC, DT,
+                      SE)) {
     LLVM_DEBUG(dbgs() << "Found a float MAX reduction PHI." << *Phi << "\n");
     return true;
   }
-  if (AddReductionVar(Phi, RecurKind::FMin, TheLoop, FMF, RedDes, DB, AC, DT)) {
+  if (AddReductionVar(Phi, RecurKind::FMin, TheLoop, FMF, RedDes, DB, AC, DT,
+                      SE)) {
     LLVM_DEBUG(dbgs() << "Found a float MIN reduction PHI." << *Phi << "\n");
     return true;
   }
   if (AddReductionVar(Phi, RecurKind::SelectFCmp, TheLoop, FMF, RedDes, DB, AC,
-                      DT)) {
+                      DT, SE)) {
     LLVM_DEBUG(dbgs() << "Found a float conditional select reduction PHI."
                       << " PHI." << *Phi << "\n");
     return true;
   }
-  if (AddReductionVar(Phi, RecurKind::FMulAdd, TheLoop, FMF, RedDes, DB, AC,
-                      DT)) {
+  if (AddReductionVar(Phi, RecurKind::FMulAdd, TheLoop, FMF, RedDes, DB, AC, DT,
+                      SE)) {
     LLVM_DEBUG(dbgs() << "Found an FMulAdd reduction PHI." << *Phi << "\n");
     return true;
   }
@@ -917,12 +985,37 @@
         SinkCandidate->mayReadFromMemory() || SinkCandidate->isTerminator())
       return false;
 
-    // Do not try to sink an instruction multiple times (if multiple operands
-    // are first order recurrences).
-    // TODO: We can support this case, by sinking the instruction after the
-    // 'deepest' previous instruction.
-    if (SinkAfter.find(SinkCandidate) != SinkAfter.end())
-      return false;
+    // Avoid sinking an instruction multiple times (if multiple operands are
+    // first order recurrences) by sinking once - after the latest 'previous'
+    // instruction.
+    auto It = SinkAfter.find(SinkCandidate);
+    if (It != SinkAfter.end()) {
+      auto *OtherPrev = It->second;
+      // Find the earliest entry in the 'sink-after' chain. The last entry in
+      // the chain is the original 'Previous' for a recurrence handled earlier.
+      auto EarlierIt = SinkAfter.find(OtherPrev);
+      while (EarlierIt != SinkAfter.end()) {
+        Instruction *EarlierInst = EarlierIt->second;
+        EarlierIt = SinkAfter.find(EarlierInst);
+        // Bail out if order has not been preserved.
+        if (EarlierIt != SinkAfter.end() &&
+            !DT->dominates(EarlierInst, OtherPrev))
+          return false;
+        OtherPrev = EarlierInst;
+      }
+      // Bail out if order has not been preserved.
+      if (OtherPrev != It->second && !DT->dominates(It->second, OtherPrev))
+        return false;
+
+      // SinkCandidate is already being sunk after an instruction after
+      // Previous. Nothing left to do.
+      if (DT->dominates(Previous, OtherPrev) || Previous == OtherPrev)
+        return true;
+      // Otherwise, Previous comes after OtherPrev and SinkCandidate needs to be
+      // re-sunk to Previous, instead of sinking to OtherPrev. Remove
+      // SinkCandidate from SinkAfter to ensure it's insert position is updated.
+      SinkAfter.erase(SinkCandidate);
+    }
 
     // If we reach a PHI node that is not dominated by Previous, we reached a
     // header PHI. No need for sinking.
@@ -1052,7 +1145,7 @@
   // to check for a pair of icmp/select, for which we use getNextInstruction and
   // isCorrectOpcode functions to step the right number of instruction, and
   // check the icmp/select pair.
-  // FIXME: We also do not attempt to look through Phi/Select's yet, which might
+  // FIXME: We also do not attempt to look through Select's yet, which might
   // be part of the reduction chain, or attempt to looks through And's to find a
   // smaller bitwidth. Subs are also currently not allowed (which are usually
   // treated as part of a add reduction) as they are expected to generally be
@@ -1062,16 +1155,21 @@
   if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp)
     ExpectedUses = 2;
 
-  auto getNextInstruction = [&](Instruction *Cur) {
-    if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) {
-      // We are expecting a icmp/select pair, which we go to the next select
-      // instruction if we can. We already know that Cur has 2 uses.
-      if (isa<SelectInst>(*Cur->user_begin()))
-        return cast<Instruction>(*Cur->user_begin());
-      else
-        return cast<Instruction>(*std::next(Cur->user_begin()));
+  auto getNextInstruction = [&](Instruction *Cur) -> Instruction * {
+    for (auto *User : Cur->users()) {
+      Instruction *UI = cast<Instruction>(User);
+      if (isa<PHINode>(UI))
+        continue;
+      if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) {
+        // We are expecting a icmp/select pair, which we go to the next select
+        // instruction if we can. We already know that Cur has 2 uses.
+        if (isa<SelectInst>(UI))
+          return UI;
+        continue;
+      }
+      return UI;
     }
-    return cast<Instruction>(*Cur->user_begin());
+    return nullptr;
   };
   auto isCorrectOpcode = [&](Instruction *Cur) {
     if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) {
@@ -1086,22 +1184,46 @@
     return Cur->getOpcode() == RedOp;
   };
 
+  // Attempt to look through Phis which are part of the reduction chain
+  unsigned ExtraPhiUses = 0;
+  Instruction *RdxInstr = LoopExitInstr;
+  if (auto ExitPhi = dyn_cast<PHINode>(LoopExitInstr)) {
+    if (ExitPhi->getNumIncomingValues() != 2)
+      return {};
+
+    Instruction *Inc0 = dyn_cast<Instruction>(ExitPhi->getIncomingValue(0));
+    Instruction *Inc1 = dyn_cast<Instruction>(ExitPhi->getIncomingValue(1));
+
+    Instruction *Chain = nullptr;
+    if (Inc0 == Phi)
+      Chain = Inc1;
+    else if (Inc1 == Phi)
+      Chain = Inc0;
+    else
+      return {};
+
+    RdxInstr = Chain;
+    ExtraPhiUses = 1;
+  }
+
   // The loop exit instruction we check first (as a quick test) but add last. We
   // check the opcode is correct (and dont allow them to be Subs) and that they
   // have expected to have the expected number of uses. They will have one use
   // from the phi and one from a LCSSA value, no matter the type.
-  if (!isCorrectOpcode(LoopExitInstr) || !LoopExitInstr->hasNUses(2))
+  if (!isCorrectOpcode(RdxInstr) || !LoopExitInstr->hasNUses(2))
     return {};
 
-  // Check that the Phi has one (or two for min/max) uses.
-  if (!Phi->hasNUses(ExpectedUses))
+  // Check that the Phi has one (or two for min/max) uses, plus an extra use
+  // for conditional reductions.
+  if (!Phi->hasNUses(ExpectedUses + ExtraPhiUses))
     return {};
+
   Instruction *Cur = getNextInstruction(Phi);
 
   // Each other instruction in the chain should have the expected number of uses
   // and be the correct opcode.
-  while (Cur != LoopExitInstr) {
-    if (!isCorrectOpcode(Cur) || !Cur->hasNUses(ExpectedUses))
+  while (Cur != RdxInstr) {
+    if (!Cur || !isCorrectOpcode(Cur) || !Cur->hasNUses(ExpectedUses))
       return {};
 
     ReductionOperations.push_back(Cur);

diff --git a/src/llvm-project/llvm/lib/Analysis/IVUsers.cpp b/src/llvm-project/llvm/lib/Analysis/IVUsers.cpp
index 0f3929f..8302116 100644
--- a/src/llvm-project/llvm/lib/Analysis/IVUsers.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/IVUsers.cpp

@@ -12,25 +12,21 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/IVUsers.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CodeMetrics.h"
 #include "llvm/Analysis/LoopAnalysisManager.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Config/llvm-config.h"
-#include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include <algorithm>
 using namespace llvm;
 
 #define DEBUG_TYPE "iv-users"
@@ -278,7 +274,7 @@
     OS << "  ";
     IVUse.getOperandValToReplace()->printAsOperand(OS, false);
     OS << " = " << *getReplacementExpr(IVUse);
-    for (auto PostIncLoop : IVUse.PostIncLoops) {
+    for (const auto *PostIncLoop : IVUse.PostIncLoops) {
       OS << " (post-inc with loop ";
       PostIncLoop->getHeader()->printAsOperand(OS, false);
       OS << ")";

diff --git a/src/llvm-project/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/src/llvm-project/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
index b112ed2..ebfa1c8 100644
--- a/src/llvm-project/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp

@@ -13,12 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Analysis/IndirectCallVisitor.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/InstVisitor.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/ProfileData/InstrProf.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -31,7 +26,7 @@
 // The percent threshold for the direct-call target (this call site vs the
 // remaining call count) for it to be considered as the promotion target.
 static cl::opt<unsigned> ICPRemainingPercentThreshold(
-    "icp-remaining-percent-threshold", cl::init(30), cl::Hidden, cl::ZeroOrMore,
+    "icp-remaining-percent-threshold", cl::init(30), cl::Hidden,
     cl::desc("The percentage threshold against remaining unpromoted indirect "
              "call count for the promotion"));
 
@@ -39,14 +34,14 @@
 // total call count) for it to be considered as the promotion target.
 static cl::opt<unsigned>
     ICPTotalPercentThreshold("icp-total-percent-threshold", cl::init(5),
-                             cl::Hidden, cl::ZeroOrMore,
+                             cl::Hidden,
                              cl::desc("The percentage threshold against total "
                                       "count for the promotion"));
 
 // Set the maximum number of targets to promote for a single indirect-call
 // callsite.
 static cl::opt<unsigned>
-    MaxNumPromotions("icp-max-prom", cl::init(3), cl::Hidden, cl::ZeroOrMore,
+    MaxNumPromotions("icp-max-prom", cl::init(3), cl::Hidden,
                      cl::desc("Max number of promotions for a single indirect "
                               "call callsite"));
 

diff --git a/src/llvm-project/llvm/lib/Analysis/InlineAdvisor.cpp b/src/llvm-project/llvm/lib/Analysis/InlineAdvisor.cpp
index f6e3dd3..3fafc30 100644
--- a/src/llvm-project/llvm/lib/Analysis/InlineAdvisor.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/InlineAdvisor.cpp

@@ -13,14 +13,15 @@
 
 #include "llvm/Analysis/InlineAdvisor.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/InlineCost.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/ReplayInlineAdvisor.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h"
 #include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/Instructions.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/raw_ostream.h"
@@ -55,6 +56,11 @@
                         cl::desc("Scale to limit the cost of inline deferral"),
                         cl::init(2), cl::Hidden);
 
+static cl::opt<bool>
+    AnnotateInlinePhase("annotate-inline-phase", cl::Hidden, cl::init(false),
+                        cl::desc("If true, annotate inline advisor remarks "
+                                 "with LTO and pass information."));
+
 extern cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats;
 
 namespace {
@@ -80,7 +86,8 @@
   void recordUnsuccessfulInliningImpl(const InlineResult &Result) override {
     if (IsInliningRecommended)
       ORE.emit([&]() {
-        return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
+        return OptimizationRemarkMissed(Advisor->getAnnotatedInlinePassName(),
+                                        "NotInlined", DLoc, Block)
                << "'" << NV("Callee", Callee) << "' is not AlwaysInline into '"
                << NV("Caller", Caller)
                << "': " << NV("Reason", Result.getFailureReason());
@@ -99,7 +106,8 @@
   llvm::setInlineRemark(*OriginalCB, std::string(Result.getFailureReason()) +
                                          "; " + inlineCostStr(*OIC));
   ORE.emit([&]() {
-    return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
+    return OptimizationRemarkMissed(Advisor->getAnnotatedInlinePassName(),
+                                    "NotInlined", DLoc, Block)
            << "'" << NV("Callee", Callee) << "' is not inlined into '"
            << NV("Caller", Caller)
            << "': " << NV("Reason", Result.getFailureReason());
@@ -108,12 +116,16 @@
 
 void DefaultInlineAdvice::recordInliningWithCalleeDeletedImpl() {
   if (EmitRemarks)
-    emitInlinedIntoBasedOnCost(ORE, DLoc, Block, *Callee, *Caller, *OIC);
+    emitInlinedIntoBasedOnCost(ORE, DLoc, Block, *Callee, *Caller, *OIC,
+                               /* ForProfileContext= */ false,
+                               Advisor->getAnnotatedInlinePassName());
 }
 
 void DefaultInlineAdvice::recordInliningImpl() {
   if (EmitRemarks)
-    emitInlinedIntoBasedOnCost(ORE, DLoc, Block, *Callee, *Caller, *OIC);
+    emitInlinedIntoBasedOnCost(ORE, DLoc, Block, *Callee, *Caller, *OIC,
+                               /* ForProfileContext= */ false,
+                               Advisor->getAnnotatedInlinePassName());
 }
 
 llvm::Optional<llvm::InlineCost> static getDefaultInlineAdvice(
@@ -146,7 +158,7 @@
   };
   return llvm::shouldInline(
       CB, GetInlineCost, ORE,
-      Params.EnableDeferral.getValueOr(EnableInlineDeferral));
+      Params.EnableDeferral.value_or(EnableInlineDeferral));
 }
 
 std::unique_ptr<InlineAdvice>
@@ -185,18 +197,18 @@
 
 bool InlineAdvisorAnalysis::Result::tryCreate(
     InlineParams Params, InliningAdvisorMode Mode,
-    const ReplayInlinerSettings &ReplaySettings) {
+    const ReplayInlinerSettings &ReplaySettings, InlineContext IC) {
   auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
   switch (Mode) {
   case InliningAdvisorMode::Default:
     LLVM_DEBUG(dbgs() << "Using default inliner heuristic.\n");
-    Advisor.reset(new DefaultInlineAdvisor(M, FAM, Params));
+    Advisor.reset(new DefaultInlineAdvisor(M, FAM, Params, IC));
     // Restrict replay to default advisor, ML advisors are stateful so
     // replay will need augmentations to interleave with them correctly.
     if (!ReplaySettings.ReplayFile.empty()) {
       Advisor = llvm::getReplayInlineAdvisor(M, FAM, M.getContext(),
                                              std::move(Advisor), ReplaySettings,
-                                             /* EmitRemarks =*/true);
+                                             /* EmitRemarks =*/true, IC);
     }
     break;
   case InliningAdvisorMode::Development:
@@ -442,7 +454,7 @@
 }
 
 void llvm::addLocationToRemarks(OptimizationRemark &Remark, DebugLoc DLoc) {
-  if (!DLoc.get()) {
+  if (!DLoc) {
     return;
   }
 
@@ -499,8 +511,12 @@
       PassName);
 }
 
-InlineAdvisor::InlineAdvisor(Module &M, FunctionAnalysisManager &FAM)
-    : M(M), FAM(FAM) {
+InlineAdvisor::InlineAdvisor(Module &M, FunctionAnalysisManager &FAM,
+                             Optional<InlineContext> IC)
+    : M(M), FAM(FAM), IC(IC),
+      AnnotatedInlinePassName((IC && AnnotateInlinePhase)
+                                  ? llvm::AnnotateInlinePassName(*IC)
+                                  : DEBUG_TYPE) {
   if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) {
     ImportedFunctionsStats =
         std::make_unique<ImportedFunctionsInliningStatistics>();
@@ -522,6 +538,48 @@
                                                  Advice);
 }
 
+static inline const char *getLTOPhase(ThinOrFullLTOPhase LTOPhase) {
+  switch (LTOPhase) {
+  case (ThinOrFullLTOPhase::None):
+    return "main";
+  case (ThinOrFullLTOPhase::ThinLTOPreLink):
+  case (ThinOrFullLTOPhase::FullLTOPreLink):
+    return "prelink";
+  case (ThinOrFullLTOPhase::ThinLTOPostLink):
+  case (ThinOrFullLTOPhase::FullLTOPostLink):
+    return "postlink";
+  }
+  llvm_unreachable("unreachable");
+}
+
+static inline const char *getInlineAdvisorContext(InlinePass IP) {
+  switch (IP) {
+  case (InlinePass::AlwaysInliner):
+    return "always-inline";
+  case (InlinePass::CGSCCInliner):
+    return "cgscc-inline";
+  case (InlinePass::EarlyInliner):
+    return "early-inline";
+  case (InlinePass::MLInliner):
+    return "ml-inline";
+  case (InlinePass::ModuleInliner):
+    return "module-inline";
+  case (InlinePass::ReplayCGSCCInliner):
+    return "replay-cgscc-inline";
+  case (InlinePass::ReplaySampleProfileInliner):
+    return "replay-sample-profile-inline";
+  case (InlinePass::SampleProfileInliner):
+    return "sample-profile-inline";
+  }
+
+  llvm_unreachable("unreachable");
+}
+
+std::string llvm::AnnotateInlinePassName(InlineContext IC) {
+  return std::string(getLTOPhase(IC.LTOPhase)) + "-" +
+         std::string(getInlineAdvisorContext(IC.Pass));
+}
+
 InlineAdvisor::MandatoryInliningKind
 InlineAdvisor::getMandatoryKind(CallBase &CB, FunctionAnalysisManager &FAM,
                                 OptimizationRemarkEmitter &ORE) {
@@ -536,7 +594,7 @@
   auto TrivialDecision =
       llvm::getAttributeBasedInliningDecision(CB, &Callee, TIR, GetTLI);
 
-  if (TrivialDecision.hasValue()) {
+  if (TrivialDecision) {
     if (TrivialDecision->isSuccess())
       return MandatoryInliningKind::Always;
     else
@@ -568,3 +626,22 @@
     IA->getAdvisor()->print(OS);
   return PreservedAnalyses::all();
 }
+
+PreservedAnalyses InlineAdvisorAnalysisPrinterPass::run(
+    LazyCallGraph::SCC &InitialC, CGSCCAnalysisManager &AM, LazyCallGraph &CG,
+    CGSCCUpdateResult &UR) {
+  const auto &MAMProxy =
+      AM.getResult<ModuleAnalysisManagerCGSCCProxy>(InitialC, CG);
+
+  if (InitialC.size() == 0) {
+    OS << "SCC is empty!\n";
+    return PreservedAnalyses::all();
+  }
+  Module &M = *InitialC.begin()->getFunction().getParent();
+  const auto *IA = MAMProxy.getCachedResult<InlineAdvisorAnalysis>(M);
+  if (!IA)
+    OS << "No Inline Advisor\n";
+  else
+    IA->getAdvisor()->print(OS);
+  return PreservedAnalyses::all();
+}

diff --git a/src/llvm-project/llvm/lib/Analysis/InlineCost.cpp b/src/llvm-project/llvm/lib/Analysis/InlineCost.cpp
index cd5314e..8192ed5 100644
--- a/src/llvm-project/llvm/lib/Analysis/InlineCost.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/InlineCost.cpp

@@ -18,11 +18,11 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
-#include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/CodeMetrics.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
@@ -42,6 +42,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/raw_ostream.h"
+#include <limits>
 
 using namespace llvm;
 
@@ -51,24 +52,33 @@
 
 static cl::opt<int>
     DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225),
-                     cl::ZeroOrMore,
                      cl::desc("Default amount of inlining to perform"));
 
+// We introduce this option since there is a minor compile-time win by avoiding
+// addition of TTI attributes (target-features in particular) to inline
+// candidates when they are guaranteed to be the same as top level methods in
+// some use cases. If we avoid adding the attribute, we need an option to avoid
+// checking these attributes.
+static cl::opt<bool> IgnoreTTIInlineCompatible(
+    "ignore-tti-inline-compatible", cl::Hidden, cl::init(false),
+    cl::desc("Ignore TTI attributes compatibility check between callee/caller "
+             "during inline cost calculation"));
+
 static cl::opt<bool> PrintInstructionComments(
     "print-instruction-comments", cl::Hidden, cl::init(false),
     cl::desc("Prints comments for instruction based on inline cost analysis"));
 
 static cl::opt<int> InlineThreshold(
-    "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
+    "inline-threshold", cl::Hidden, cl::init(225),
     cl::desc("Control the amount of inlining to perform (default = 225)"));
 
 static cl::opt<int> HintThreshold(
-    "inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore,
+    "inlinehint-threshold", cl::Hidden, cl::init(325),
     cl::desc("Threshold for inlining functions with inline hint"));
 
 static cl::opt<int>
     ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden,
-                          cl::init(45), cl::ZeroOrMore,
+                          cl::init(45),
                           cl::desc("Threshold for inlining cold callsites"));
 
 static cl::opt<bool> InlineEnableCostBenefitAnalysis(
@@ -76,12 +86,11 @@
     cl::desc("Enable the cost-benefit analysis for the inliner"));
 
 static cl::opt<int> InlineSavingsMultiplier(
-    "inline-savings-multiplier", cl::Hidden, cl::init(8), cl::ZeroOrMore,
+    "inline-savings-multiplier", cl::Hidden, cl::init(8),
     cl::desc("Multiplier to multiply cycle savings by during inlining"));
 
 static cl::opt<int>
     InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100),
-                        cl::ZeroOrMore,
                         cl::desc("The maximum size of a callee that get's "
                                  "inlined without sufficient cycle savings"));
 
@@ -89,26 +98,25 @@
 // PGO before we actually hook up inliner with analysis passes such as BPI and
 // BFI.
 static cl::opt<int> ColdThreshold(
-    "inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore,
+    "inlinecold-threshold", cl::Hidden, cl::init(45),
     cl::desc("Threshold for inlining functions with cold attribute"));
 
 static cl::opt<int>
     HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000),
-                         cl::ZeroOrMore,
                          cl::desc("Threshold for hot callsites "));
 
 static cl::opt<int> LocallyHotCallSiteThreshold(
-    "locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore,
+    "locally-hot-callsite-threshold", cl::Hidden, cl::init(525),
     cl::desc("Threshold for locally hot callsites "));
 
 static cl::opt<int> ColdCallSiteRelFreq(
-    "cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
+    "cold-callsite-rel-freq", cl::Hidden, cl::init(2),
     cl::desc("Maximum block frequency, expressed as a percentage of caller's "
              "entry frequency, for a callsite to be cold in the absence of "
              "profile information."));
 
 static cl::opt<int> HotCallSiteRelFreq(
-    "hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::ZeroOrMore,
+    "hot-callsite-rel-freq", cl::Hidden, cl::init(60),
     cl::desc("Minimum block frequency, expressed as a multiple of caller's "
              "entry frequency, for a callsite to be hot in the absence of "
              "profile information."));
@@ -117,14 +125,25 @@
     "inline-call-penalty", cl::Hidden, cl::init(25),
     cl::desc("Call penalty that is applied per callsite when inlining"));
 
+static cl::opt<size_t>
+    StackSizeThreshold("inline-max-stacksize", cl::Hidden,
+                       cl::init(std::numeric_limits<size_t>::max()),
+                       cl::desc("Do not inline functions with a stack size "
+                                "that exceeds the specified limit"));
+
+static cl::opt<size_t>
+    RecurStackSizeThreshold("recursive-inline-max-stacksize", cl::Hidden,
+                       cl::init(InlineConstants::TotalAllocaSizeRecursiveCaller),
+                       cl::desc("Do not inline recursive functions with a stack "
+                                "size that exceeds the specified limit"));
+
 static cl::opt<bool> OptComputeFullInlineCost(
-    "inline-cost-full", cl::Hidden, cl::init(false), cl::ZeroOrMore,
+    "inline-cost-full", cl::Hidden,
     cl::desc("Compute the full inline cost of a call site even when the cost "
              "exceeds the threshold."));
 
 static cl::opt<bool> InlineCallerSupersetNoBuiltin(
     "inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true),
-    cl::ZeroOrMore,
     cl::desc("Allow inlining when caller has a superset of callee's nobuiltin "
              "attributes."));
 
@@ -132,28 +151,9 @@
     "disable-gep-const-evaluation", cl::Hidden, cl::init(false),
     cl::desc("Disables evaluation of GetElementPtr with constant operands"));
 
-namespace {
-/// This function behaves more like CallBase::hasFnAttr: when it looks for the
-/// requested attribute, it check both the call instruction and the called
-/// function (if it's available and operand bundles don't prohibit that).
-Attribute getFnAttr(CallBase &CB, StringRef AttrKind) {
-  Attribute CallAttr = CB.getFnAttr(AttrKind);
-  if (CallAttr.isValid())
-    return CallAttr;
-
-  // Operand bundles override attributes on the called function, but don't
-  // override attributes directly present on the call instruction.
-  if (!CB.isFnAttrDisallowedByOpBundle(AttrKind))
-    if (const Function *F = CB.getCalledFunction())
-      return F->getFnAttribute(AttrKind);
-
-  return {};
-}
-} // namespace
-
 namespace llvm {
 Optional<int> getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind) {
-  Attribute Attr = getFnAttr(CB, AttrKind);
+  Attribute Attr = CB.getFnAttr(AttrKind);
   int AttrValue;
   if (Attr.getValueAsString().getAsInteger(10, AttrValue))
     return None;
@@ -185,8 +185,8 @@
 
 public:
   InlineCostAnnotationWriter(InlineCostCallAnalyzer *ICCA) : ICCA(ICCA) {}
-  virtual void emitInstructionAnnot(const Instruction *I,
-                                    formatted_raw_ostream &OS) override;
+  void emitInstructionAnnot(const Instruction *I,
+                            formatted_raw_ostream &OS) override;
 };
 
 /// Carry out call site analysis, in order to evaluate inlinability.
@@ -202,7 +202,7 @@
   friend class InstVisitor<CallAnalyzer, bool>;
 
 protected:
-  virtual ~CallAnalyzer() {}
+  virtual ~CallAnalyzer() = default;
   /// The TargetTransformInfo available for this compilation.
   const TargetTransformInfo &TTI;
 
@@ -356,7 +356,7 @@
   DenseMap<Value *, std::pair<Value *, APInt>> ConstantOffsetPtrs;
 
   /// Keep track of dead blocks due to the constant arguments.
-  SetVector<BasicBlock *> DeadBlocks;
+  SmallPtrSet<BasicBlock *, 16> DeadBlocks;
 
   /// The mapping of the blocks to their known unique successors due to the
   /// constant arguments.
@@ -389,8 +389,7 @@
   bool canFoldInboundsGEP(GetElementPtrInst &I);
   bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
   bool simplifyCallSite(Function *F, CallBase &Call);
-  template <typename Callable>
-  bool simplifyInstruction(Instruction &I, Callable Evaluate);
+  bool simplifyInstruction(Instruction &I);
   bool simplifyIntrinsicCallIsConstant(CallBase &CB);
   ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
 
@@ -708,8 +707,8 @@
       BlockFrequencyInfo *BFI = &(GetBFI(F));
       assert(BFI && "BFI must be available");
       auto ProfileCount = BFI->getBlockProfileCount(BB);
-      assert(ProfileCount.hasValue());
-      if (ProfileCount.getValue() == 0)
+      assert(ProfileCount);
+      if (ProfileCount.value() == 0)
         ColdSize += Cost - CostAtBBStart;
     }
 
@@ -833,14 +832,14 @@
       }
 
       auto ProfileCount = CalleeBFI->getBlockProfileCount(&BB);
-      assert(ProfileCount.hasValue());
-      CurrentSavings *= ProfileCount.getValue();
+      assert(ProfileCount);
+      CurrentSavings *= ProfileCount.value();
       CycleSavings += CurrentSavings;
     }
 
     // Compute the cycle savings per call.
     auto EntryProfileCount = F.getEntryCount();
-    assert(EntryProfileCount.hasValue() && EntryProfileCount->getCount());
+    assert(EntryProfileCount && EntryProfileCount->getCount());
     auto EntryCount = EntryProfileCount->getCount();
     CycleSavings += EntryCount / 2;
     CycleSavings = CycleSavings.udiv(EntryCount);
@@ -849,7 +848,7 @@
     auto *CallerBB = CandidateCall.getParent();
     BlockFrequencyInfo *CallerBFI = &(GetBFI(*(CallerBB->getParent())));
     CycleSavings += getCallsiteCost(this->CandidateCall, DL);
-    CycleSavings *= CallerBFI->getBlockProfileCount(CallerBB).getValue();
+    CycleSavings *= *CallerBFI->getBlockProfileCount(CallerBB);
 
     // Remove the cost of the cold basic blocks.
     int Size = Cost - ColdSize;
@@ -919,7 +918,7 @@
 
     if (auto Result = costBenefitAnalysis()) {
       DecidedByCostBenefit = true;
-      if (Result.getValue())
+      if (*Result)
         return InlineResult::success();
       else
         return InlineResult::failure("Cost over threshold.");
@@ -987,6 +986,8 @@
     if (F.getCallingConv() == CallingConv::Cold)
       Cost += InlineConstants::ColdccPenalty;
 
+    LLVM_DEBUG(dbgs() << "      Initial cost: " << Cost << "\n");
+
     // Check if we're done. This can happen due to bonuses and penalties.
     if (Cost >= Threshold && !ComputeFullInlineCost)
       return InlineResult::failure("high cost");
@@ -1011,7 +1012,7 @@
         BoostIndirectCalls(BoostIndirect), IgnoreThreshold(IgnoreThreshold),
         CostBenefitAnalysisEnabled(isCostBenefitAnalysisEnabled()),
         Writer(this) {
-    AllowRecursiveCall = Params.AllowRecursiveCall.getValue();
+    AllowRecursiveCall = *Params.AllowRecursiveCall;
   }
 
   /// Annotation Writer for instruction details
@@ -1029,7 +1030,7 @@
     return None;
   }
 
-  virtual ~InlineCostCallAnalyzer() {}
+  virtual ~InlineCostCallAnalyzer() = default;
   int getThreshold() const { return Threshold; }
   int getCost() const { return Cost; }
   Optional<CostBenefitPair> getCostBenefitPair() { return CostBenefit; }
@@ -1212,6 +1213,10 @@
     set(InlineCostFeatureIndex::ColdCcPenalty,
         (F.getCallingConv() == CallingConv::Cold));
 
+    set(InlineCostFeatureIndex::LastCallToStaticBonus,
+        (F.hasLocalLinkage() && F.hasOneLiveUse() &&
+         &F == CandidateCall.getCalledFunction()));
+
     // FIXME: we shouldn't repeat this logic in both the Features and Cost
     // analyzer - instead, we should abstract it to a common method in the
     // CallAnalyzer
@@ -1271,7 +1276,7 @@
   auto C = ICCA->getSimplifiedValue(const_cast<Instruction *>(I));
   if (C) {
     OS << ", simplified to ";
-    C.getValue()->print(OS, true);
+    (*C)->print(OS, true);
   }
   OS << "\n";
 }
@@ -1510,13 +1515,7 @@
   };
 
   if (!DisableGEPConstOperand)
-    if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
-          SmallVector<Constant *, 2> Indices;
-          for (unsigned int Index = 1; Index < COps.size(); ++Index)
-            Indices.push_back(COps[Index]);
-          return ConstantExpr::getGetElementPtr(
-              I.getSourceElementType(), COps[0], Indices, I.isInBounds());
-        }))
+    if (simplifyInstruction(I))
       return true;
 
   if ((I.isInBounds() && canFoldInboundsGEP(I)) || IsGEPOffsetConstant(I)) {
@@ -1534,11 +1533,8 @@
 }
 
 /// Simplify \p I if its operands are constants and update SimplifiedValues.
-/// \p Evaluate is a callable specific to instruction type that evaluates the
-/// instruction when all the operands are constants.
-template <typename Callable>
-bool CallAnalyzer::simplifyInstruction(Instruction &I, Callable Evaluate) {
-  SmallVector<Constant *, 2> COps;
+bool CallAnalyzer::simplifyInstruction(Instruction &I) {
+  SmallVector<Constant *> COps;
   for (Value *Op : I.operands()) {
     Constant *COp = dyn_cast<Constant>(Op);
     if (!COp)
@@ -1547,7 +1543,7 @@
       return false;
     COps.push_back(COp);
   }
-  auto *C = Evaluate(COps);
+  auto *C = ConstantFoldInstOperands(&I, COps, DL);
   if (!C)
     return false;
   SimplifiedValues[&I] = C;
@@ -1577,9 +1573,7 @@
 
 bool CallAnalyzer::visitBitCast(BitCastInst &I) {
   // Propagate constants through bitcasts.
-  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
-        return ConstantExpr::getBitCast(COps[0], I.getType());
-      }))
+  if (simplifyInstruction(I))
     return true;
 
   // Track base/offsets through casts
@@ -1599,9 +1593,7 @@
 
 bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
   // Propagate constants through ptrtoint.
-  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
-        return ConstantExpr::getPtrToInt(COps[0], I.getType());
-      }))
+  if (simplifyInstruction(I))
     return true;
 
   // Track base/offset pairs when converted to a plain integer provided the
@@ -1631,9 +1623,7 @@
 
 bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
   // Propagate constants through ptrtoint.
-  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
-        return ConstantExpr::getIntToPtr(COps[0], I.getType());
-      }))
+  if (simplifyInstruction(I))
     return true;
 
   // Track base/offset pairs when round-tripped through a pointer without
@@ -1656,9 +1646,7 @@
 
 bool CallAnalyzer::visitCastInst(CastInst &I) {
   // Propagate constants through casts.
-  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
-        return ConstantExpr::getCast(I.getOpcode(), COps[0], I.getType());
-      }))
+  if (simplifyInstruction(I))
     return true;
 
   // Disable SROA in the face of arbitrary casts we don't explicitly list
@@ -1799,12 +1787,12 @@
 
   // return min(A, B) if B is valid.
   auto MinIfValid = [](int A, Optional<int> B) {
-    return B ? std::min(A, B.getValue()) : A;
+    return B ? std::min(A, B.value()) : A;
   };
 
   // return max(A, B) if B is valid.
   auto MaxIfValid = [](int A, Optional<int> B) {
-    return B ? std::max(A, B.getValue()) : A;
+    return B ? std::max(A, B.value()) : A;
   };
 
   // Various bonus percentages. These are multiplied by Threshold to get the
@@ -1864,7 +1852,7 @@
       // current threshold, but AutoFDO + ThinLTO currently relies on this
       // behavior to prevent inlining of hot callsites during ThinLTO
       // compile phase.
-      Threshold = HotCallSiteThreshold.getValue();
+      Threshold = *HotCallSiteThreshold;
     } else if (isColdCallSite(Call, CallerBFI)) {
       LLVM_DEBUG(dbgs() << "Cold callsite.\n");
       // Do not apply bonuses for a cold callsite including the
@@ -1915,9 +1903,7 @@
 bool CallAnalyzer::visitCmpInst(CmpInst &I) {
   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
   // First try to handle simplified comparisons.
-  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
-        return ConstantExpr::getCompare(I.getPredicate(), COps[0], COps[1]);
-      }))
+  if (simplifyInstruction(I))
     return true;
 
   if (I.getOpcode() == Instruction::FCmp)
@@ -1993,11 +1979,11 @@
 
   Value *SimpleV = nullptr;
   if (auto FI = dyn_cast<FPMathOperator>(&I))
-    SimpleV = SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS,
+    SimpleV = simplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS,
                             FI->getFastMathFlags(), DL);
   else
     SimpleV =
-        SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL);
+        simplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL);
 
   if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
     SimplifiedValues[&I] = C;
@@ -2027,7 +2013,7 @@
   if (!COp)
     COp = SimplifiedValues.lookup(Op);
 
-  Value *SimpleV = SimplifyFNegInst(
+  Value *SimpleV = simplifyFNegInst(
       COp ? COp : Op, cast<FPMathOperator>(I).getFastMathFlags(), DL);
 
   if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
@@ -2076,9 +2062,7 @@
 
 bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
   // Constant folding for extract value is trivial.
-  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
-        return ConstantExpr::getExtractValue(COps[0], I.getIndices());
-      }))
+  if (simplifyInstruction(I))
     return true;
 
   // SROA can't look through these, but they may be free.
@@ -2087,11 +2071,7 @@
 
 bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
   // Constant folding for insert value is trivial.
-  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
-        return ConstantExpr::getInsertValue(/*AggregateOperand*/ COps[0],
-                                            /*InsertedValueOperand*/ COps[1],
-                                            I.getIndices());
-      }))
+  if (simplifyInstruction(I))
     return true;
 
   // SROA can't look through these, but they may be free.
@@ -2145,14 +2125,14 @@
   if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate())
     ContainsNoDuplicateCall = true;
 
-  Value *Callee = Call.getCalledOperand();
-  Function *F = dyn_cast_or_null<Function>(Callee);
+  Function *F = Call.getCalledFunction();
   bool IsIndirectCall = !F;
   if (IsIndirectCall) {
     // Check if this happens to be an indirect function call to a known function
     // in this inline context. If not, we've done all we can.
+    Value *Callee = Call.getCalledOperand();
     F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
-    if (!F) {
+    if (!F || F->getFunctionType() != Call.getFunctionType()) {
       onCallArgumentSetup(Call);
 
       if (!Call.onlyReadsMemory())
@@ -2470,8 +2450,7 @@
     // If the caller is a recursive function then we don't want to inline
     // functions which allocate a lot of stack space because it would increase
     // the caller stack usage dramatically.
-    if (IsCallerRecursive &&
-        AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) {
+    if (IsCallerRecursive && AllocatedSize > RecurStackSizeThreshold) {
       auto IR =
           InlineResult::failure("recursive and allocates too much stack space");
       if (ORE)
@@ -2561,7 +2540,7 @@
     NewDead.push_back(Succ);
     while (!NewDead.empty()) {
       BasicBlock *Dead = NewDead.pop_back_val();
-      if (DeadBlocks.insert(Dead))
+      if (DeadBlocks.insert(Dead).second)
         // Continue growing the dead block lists.
         for (BasicBlock *S : successors(Dead))
           if (IsNewlyDead(S))
@@ -2716,6 +2695,11 @@
   if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
     return InlineResult::failure("noduplicate");
 
+  // If the callee's stack size exceeds the user-specified threshold,
+  // do not let it be inlined.
+  if (AllocatedSize > StackSizeThreshold)
+    return InlineResult::failure("stacksize");
+
   return finalizeAnalysis();
 }
 
@@ -2754,7 +2738,8 @@
   // object, and always returns the same object (which is overwritten on each
   // GetTLI call). Therefore we copy the first result.
   auto CalleeTLI = GetTLI(*Callee);
-  return TTI.areInlineCompatible(Caller, Callee) &&
+  return (IgnoreTTIInlineCompatible ||
+          TTI.areInlineCompatible(Caller, Callee)) &&
          GetTLI(*Caller).areInlineCompatible(CalleeTLI,
                                              InlineCallerSupersetNoBuiltin) &&
          AttributeFuncs::areInlineCompatible(*Caller, *Callee);
@@ -2873,6 +2858,9 @@
   // Calls to functions with always-inline attributes should be inlined
   // whenever possible.
   if (Call.hasFnAttr(Attribute::AlwaysInline)) {
+    if (Call.getAttributes().hasFnAttr(Attribute::NoInline))
+      return InlineResult::failure("noinline call site attribute");
+
     auto IsViable = isInlineViable(*Callee);
     if (IsViable.isSuccess())
       return InlineResult::success();
@@ -2920,7 +2908,7 @@
   auto UserDecision =
       llvm::getAttributeBasedInliningDecision(Call, Callee, CalleeTTI, GetTLI);
 
-  if (UserDecision.hasValue()) {
+  if (UserDecision) {
     if (UserDecision->isSuccess())
       return llvm::InlineCost::getAlways("always inline attribute");
     return llvm::InlineCost::getNever(UserDecision->getFailureReason());

diff --git a/src/llvm-project/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp b/src/llvm-project/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp
index a2e231e..2371ecb 100644
--- a/src/llvm-project/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp

@@ -15,33 +15,32 @@
 #ifdef LLVM_HAVE_TF_API
 #include "llvm/Analysis/Utils/TFUtils.h"
 #endif
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
 #include "llvm/IR/PassManager.h"
-#include "llvm/MC/MCAsmLayout.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/raw_ostream.h"
 
-#include <algorithm>
-#include <deque>
-
 using namespace llvm;
 
 AnalysisKey InlineSizeEstimatorAnalysis::Key;
 
-#define DEBUG_TYPE "inline-size-estimator"
-
 #ifdef LLVM_HAVE_TF_API
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include <algorithm>
+#include <deque>
+
 cl::opt<std::string> TFIR2NativeModelPath(
     "ml-inliner-ir2native-model", cl::Hidden,
     cl::desc("Path to saved model evaluating native size from IR."));
 
+#define DEBUG_TYPE "inline-size-estimator"
 namespace {
 unsigned getMaxInstructionID() {
 #define LAST_OTHER_INST(NR) return NR;
@@ -261,10 +260,10 @@
 namespace llvm {
 class TFModelEvaluator {};
 } // namespace llvm
-InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {}
+InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() = default;
 InlineSizeEstimatorAnalysis ::InlineSizeEstimatorAnalysis(
     InlineSizeEstimatorAnalysis &&) {}
-InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {}
+InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() = default;
 InlineSizeEstimatorAnalysis::Result
 InlineSizeEstimatorAnalysis::run(const Function &F,
                                  FunctionAnalysisManager &FAM) {

diff --git a/src/llvm-project/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp b/src/llvm-project/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp
index 9fee57c..78e7f45 100644
--- a/src/llvm-project/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp

@@ -68,7 +68,7 @@
 
 void InstructionPrecedenceTracking::fill(const BasicBlock *BB) {
   FirstSpecialInsts.erase(BB);
-  for (auto &I : *BB) {
+  for (const auto &I : *BB) {
     NumInstScanned++;
     if (isSpecialInstruction(&I)) {
       FirstSpecialInsts[BB] = &I;
@@ -101,7 +101,7 @@
 
 void InstructionPrecedenceTracking::validateAll() const {
   // Check that for every known block the cached value is correct.
-  for (auto &It : FirstSpecialInsts)
+  for (const auto &It : FirstSpecialInsts)
     validate(It.first);
 }
 #endif

diff --git a/src/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp b/src/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp
index 60895d3..21fe448 100644
--- a/src/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp

@@ -20,7 +20,6 @@
 
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
@@ -36,13 +35,10 @@
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Dominators.h"
-#include "llvm/IR/GetElementPtrTypeIterator.h"
-#include "llvm/IR/GlobalAlias.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PatternMatch.h"
-#include "llvm/IR/ValueHandle.h"
 #include "llvm/Support/KnownBits.h"
 #include <algorithm>
 using namespace llvm;
@@ -52,28 +48,30 @@
 
 enum { RecursionLimit = 3 };
 
-STATISTIC(NumExpand,  "Number of expansions");
+STATISTIC(NumExpand, "Number of expansions");
 STATISTIC(NumReassoc, "Number of reassociations");
 
-static Value *SimplifyAndInst(Value *, Value *, const SimplifyQuery &, unsigned);
+static Value *simplifyAndInst(Value *, Value *, const SimplifyQuery &,
+                              unsigned);
 static Value *simplifyUnOp(unsigned, Value *, const SimplifyQuery &, unsigned);
 static Value *simplifyFPUnOp(unsigned, Value *, const FastMathFlags &,
                              const SimplifyQuery &, unsigned);
-static Value *SimplifyBinOp(unsigned, Value *, Value *, const SimplifyQuery &,
+static Value *simplifyBinOp(unsigned, Value *, Value *, const SimplifyQuery &,
                             unsigned);
-static Value *SimplifyBinOp(unsigned, Value *, Value *, const FastMathFlags &,
+static Value *simplifyBinOp(unsigned, Value *, Value *, const FastMathFlags &,
                             const SimplifyQuery &, unsigned);
-static Value *SimplifyCmpInst(unsigned, Value *, Value *, const SimplifyQuery &,
+static Value *simplifyCmpInst(unsigned, Value *, Value *, const SimplifyQuery &,
                               unsigned);
-static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+static Value *simplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
                                const SimplifyQuery &Q, unsigned MaxRecurse);
-static Value *SimplifyOrInst(Value *, Value *, const SimplifyQuery &, unsigned);
-static Value *SimplifyXorInst(Value *, Value *, const SimplifyQuery &, unsigned);
-static Value *SimplifyCastInst(unsigned, Value *, Type *,
-                               const SimplifyQuery &, unsigned);
-static Value *SimplifyGEPInst(Type *, Value *, ArrayRef<Value *>, bool,
+static Value *simplifyOrInst(Value *, Value *, const SimplifyQuery &, unsigned);
+static Value *simplifyXorInst(Value *, Value *, const SimplifyQuery &,
+                              unsigned);
+static Value *simplifyCastInst(unsigned, Value *, Type *, const SimplifyQuery &,
+                               unsigned);
+static Value *simplifyGEPInst(Type *, Value *, ArrayRef<Value *>, bool,
                               const SimplifyQuery &, unsigned);
-static Value *SimplifySelectInst(Value *, Value *, Value *,
+static Value *simplifySelectInst(Value *, Value *, Value *,
                                  const SimplifyQuery &, unsigned);
 
 static Value *foldSelectWithBinaryOp(Value *Cond, Value *TrueVal,
@@ -120,15 +118,11 @@
 
 /// For a boolean type or a vector of boolean type, return false or a vector
 /// with every element false.
-static Constant *getFalse(Type *Ty) {
-  return ConstantInt::getFalse(Ty);
-}
+static Constant *getFalse(Type *Ty) { return ConstantInt::getFalse(Ty); }
 
 /// For a boolean type or a vector of boolean type, return true or a vector
 /// with every element true.
-static Constant *getTrue(Type *Ty) {
-  return ConstantInt::getTrue(Ty);
-}
+static Constant *getTrue(Type *Ty) { return ConstantInt::getTrue(Ty); }
 
 /// isSameCompare - Is V equivalent to the comparison "LHS Pred RHS"?
 static bool isSameCompare(Value *V, CmpInst::Predicate Pred, Value *LHS,
@@ -141,7 +135,7 @@
   if (CPred == Pred && CLHS == LHS && CRHS == RHS)
     return true;
   return CPred == CmpInst::getSwappedPredicate(Pred) && CLHS == RHS &&
-    CRHS == LHS;
+         CRHS == LHS;
 }
 
 /// Simplify comparison with true or false branch of select:
@@ -153,7 +147,7 @@
                                  Value *RHS, Value *Cond,
                                  const SimplifyQuery &Q, unsigned MaxRecurse,
                                  Constant *TrueOrFalse) {
-  Value *SimplifiedCmp = SimplifyCmpInst(Pred, LHS, RHS, Q, MaxRecurse);
+  Value *SimplifiedCmp = simplifyCmpInst(Pred, LHS, RHS, Q, MaxRecurse);
   if (SimplifiedCmp == Cond) {
     // %cmp simplified to the select condition (%cond).
     return TrueOrFalse;
@@ -196,17 +190,17 @@
   // checks whether folding it does not convert a well-defined value into
   // poison.
   if (match(FCmp, m_Zero()) && impliesPoison(TCmp, Cond))
-    if (Value *V = SimplifyAndInst(Cond, TCmp, Q, MaxRecurse))
+    if (Value *V = simplifyAndInst(Cond, TCmp, Q, MaxRecurse))
       return V;
   // If the true value simplified to true, then the result of the compare
   // is equal to "Cond || FCmp".
   if (match(TCmp, m_One()) && impliesPoison(FCmp, Cond))
-    if (Value *V = SimplifyOrInst(Cond, FCmp, Q, MaxRecurse))
+    if (Value *V = simplifyOrInst(Cond, FCmp, Q, MaxRecurse))
       return V;
   // Finally, if the false value simplified to true and the true value to
   // false, then the result of the compare is equal to "!Cond".
   if (match(FCmp, m_One()) && match(TCmp, m_Zero()))
-    if (Value *V = SimplifyXorInst(
+    if (Value *V = simplifyXorInst(
             Cond, Constant::getAllOnesValue(Cond->getType()), Q, MaxRecurse))
       return V;
   return nullptr;
@@ -248,12 +242,12 @@
   if (!B || B->getOpcode() != OpcodeToExpand)
     return nullptr;
   Value *B0 = B->getOperand(0), *B1 = B->getOperand(1);
-  Value *L = SimplifyBinOp(Opcode, B0, OtherOp, Q.getWithoutUndef(),
-                           MaxRecurse);
+  Value *L =
+      simplifyBinOp(Opcode, B0, OtherOp, Q.getWithoutUndef(), MaxRecurse);
   if (!L)
     return nullptr;
-  Value *R = SimplifyBinOp(Opcode, B1, OtherOp, Q.getWithoutUndef(),
-                           MaxRecurse);
+  Value *R =
+      simplifyBinOp(Opcode, B1, OtherOp, Q.getWithoutUndef(), MaxRecurse);
   if (!R)
     return nullptr;
 
@@ -265,7 +259,7 @@
   }
 
   // Otherwise, return "L op' R" if it simplifies.
-  Value *S = SimplifyBinOp(OpcodeToExpand, L, R, Q, MaxRecurse);
+  Value *S = simplifyBinOp(OpcodeToExpand, L, R, Q, MaxRecurse);
   if (!S)
     return nullptr;
 
@@ -275,8 +269,8 @@
 
 /// Try to simplify binops of form "A op (B op' C)" or the commuted variant by
 /// distributing op over op'.
-static Value *expandCommutativeBinOp(Instruction::BinaryOps Opcode,
-                                     Value *L, Value *R,
+static Value *expandCommutativeBinOp(Instruction::BinaryOps Opcode, Value *L,
+                                     Value *R,
                                      Instruction::BinaryOps OpcodeToExpand,
                                      const SimplifyQuery &Q,
                                      unsigned MaxRecurse) {
@@ -293,7 +287,7 @@
 
 /// Generic simplifications for associative binary operations.
 /// Returns the simpler value, or null if none was found.
-static Value *SimplifyAssociativeBinOp(Instruction::BinaryOps Opcode,
+static Value *simplifyAssociativeBinOp(Instruction::BinaryOps Opcode,
                                        Value *LHS, Value *RHS,
                                        const SimplifyQuery &Q,
                                        unsigned MaxRecurse) {
@@ -313,12 +307,13 @@
     Value *C = RHS;
 
     // Does "B op C" simplify?
-    if (Value *V = SimplifyBinOp(Opcode, B, C, Q, MaxRecurse)) {
+    if (Value *V = simplifyBinOp(Opcode, B, C, Q, MaxRecurse)) {
       // It does!  Return "A op V" if it simplifies or is already available.
       // If V equals B then "A op V" is just the LHS.
-      if (V == B) return LHS;
+      if (V == B)
+        return LHS;
       // Otherwise return "A op V" if it simplifies.
-      if (Value *W = SimplifyBinOp(Opcode, A, V, Q, MaxRecurse)) {
+      if (Value *W = simplifyBinOp(Opcode, A, V, Q, MaxRecurse)) {
         ++NumReassoc;
         return W;
       }
@@ -332,12 +327,13 @@
     Value *C = Op1->getOperand(1);
 
     // Does "A op B" simplify?
-    if (Value *V = SimplifyBinOp(Opcode, A, B, Q, MaxRecurse)) {
+    if (Value *V = simplifyBinOp(Opcode, A, B, Q, MaxRecurse)) {
       // It does!  Return "V op C" if it simplifies or is already available.
       // If V equals B then "V op C" is just the RHS.
-      if (V == B) return RHS;
+      if (V == B)
+        return RHS;
       // Otherwise return "V op C" if it simplifies.
-      if (Value *W = SimplifyBinOp(Opcode, V, C, Q, MaxRecurse)) {
+      if (Value *W = simplifyBinOp(Opcode, V, C, Q, MaxRecurse)) {
         ++NumReassoc;
         return W;
       }
@@ -355,12 +351,13 @@
     Value *C = RHS;
 
     // Does "C op A" simplify?
-    if (Value *V = SimplifyBinOp(Opcode, C, A, Q, MaxRecurse)) {
+    if (Value *V = simplifyBinOp(Opcode, C, A, Q, MaxRecurse)) {
       // It does!  Return "V op B" if it simplifies or is already available.
       // If V equals A then "V op B" is just the LHS.
-      if (V == A) return LHS;
+      if (V == A)
+        return LHS;
       // Otherwise return "V op B" if it simplifies.
-      if (Value *W = SimplifyBinOp(Opcode, V, B, Q, MaxRecurse)) {
+      if (Value *W = simplifyBinOp(Opcode, V, B, Q, MaxRecurse)) {
         ++NumReassoc;
         return W;
       }
@@ -374,12 +371,13 @@
     Value *C = Op1->getOperand(1);
 
     // Does "C op A" simplify?
-    if (Value *V = SimplifyBinOp(Opcode, C, A, Q, MaxRecurse)) {
+    if (Value *V = simplifyBinOp(Opcode, C, A, Q, MaxRecurse)) {
       // It does!  Return "B op V" if it simplifies or is already available.
       // If V equals C then "B op V" is just the RHS.
-      if (V == C) return RHS;
+      if (V == C)
+        return RHS;
       // Otherwise return "B op V" if it simplifies.
-      if (Value *W = SimplifyBinOp(Opcode, B, V, Q, MaxRecurse)) {
+      if (Value *W = simplifyBinOp(Opcode, B, V, Q, MaxRecurse)) {
         ++NumReassoc;
         return W;
       }
@@ -393,7 +391,7 @@
 /// try to simplify the binop by seeing whether evaluating it on both branches
 /// of the select results in the same value. Returns the common value if so,
 /// otherwise returns null.
-static Value *ThreadBinOpOverSelect(Instruction::BinaryOps Opcode, Value *LHS,
+static Value *threadBinOpOverSelect(Instruction::BinaryOps Opcode, Value *LHS,
                                     Value *RHS, const SimplifyQuery &Q,
                                     unsigned MaxRecurse) {
   // Recursion is always used, so bail out at once if we already hit the limit.
@@ -412,11 +410,11 @@
   Value *TV;
   Value *FV;
   if (SI == LHS) {
-    TV = SimplifyBinOp(Opcode, SI->getTrueValue(), RHS, Q, MaxRecurse);
-    FV = SimplifyBinOp(Opcode, SI->getFalseValue(), RHS, Q, MaxRecurse);
+    TV = simplifyBinOp(Opcode, SI->getTrueValue(), RHS, Q, MaxRecurse);
+    FV = simplifyBinOp(Opcode, SI->getFalseValue(), RHS, Q, MaxRecurse);
   } else {
-    TV = SimplifyBinOp(Opcode, LHS, SI->getTrueValue(), Q, MaxRecurse);
-    FV = SimplifyBinOp(Opcode, LHS, SI->getFalseValue(), Q, MaxRecurse);
+    TV = simplifyBinOp(Opcode, LHS, SI->getTrueValue(), Q, MaxRecurse);
+    FV = simplifyBinOp(Opcode, LHS, SI->getFalseValue(), Q, MaxRecurse);
   }
 
   // If they simplified to the same value, then return the common value.
@@ -471,7 +469,7 @@
 /// We can simplify %cmp1 to true, because both branches of select are
 /// less than 3. We compose new comparison by substituting %tmp with both
 /// branches of select and see if it can be simplified.
-static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
+static Value *threadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
                                   Value *RHS, const SimplifyQuery &Q,
                                   unsigned MaxRecurse) {
   // Recursion is always used, so bail out at once if we already hit the limit.
@@ -517,7 +515,7 @@
 /// try to simplify the binop by seeing whether evaluating it on the incoming
 /// phi values yields the same result for every value. If so returns the common
 /// value, otherwise returns null.
-static Value *ThreadBinOpOverPHI(Instruction::BinaryOps Opcode, Value *LHS,
+static Value *threadBinOpOverPHI(Instruction::BinaryOps Opcode, Value *LHS,
                                  Value *RHS, const SimplifyQuery &Q,
                                  unsigned MaxRecurse) {
   // Recursion is always used, so bail out at once if we already hit the limit.
@@ -542,10 +540,10 @@
   Value *CommonValue = nullptr;
   for (Value *Incoming : PI->incoming_values()) {
     // If the incoming value is the phi node itself, it can safely be skipped.
-    if (Incoming == PI) continue;
-    Value *V = PI == LHS ?
-      SimplifyBinOp(Opcode, Incoming, RHS, Q, MaxRecurse) :
-      SimplifyBinOp(Opcode, LHS, Incoming, Q, MaxRecurse);
+    if (Incoming == PI)
+      continue;
+    Value *V = PI == LHS ? simplifyBinOp(Opcode, Incoming, RHS, Q, MaxRecurse)
+                         : simplifyBinOp(Opcode, LHS, Incoming, Q, MaxRecurse);
     // If the operation failed to simplify, or simplified to a different value
     // to previously, then give up.
     if (!V || (CommonValue && V != CommonValue))
@@ -560,7 +558,7 @@
 /// comparison by seeing whether comparing with all of the incoming phi values
 /// yields the same result every time. If so returns the common result,
 /// otherwise returns null.
-static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
+static Value *threadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
                                const SimplifyQuery &Q, unsigned MaxRecurse) {
   // Recursion is always used, so bail out at once if we already hit the limit.
   if (!MaxRecurse--)
@@ -584,11 +582,12 @@
     Value *Incoming = PI->getIncomingValue(u);
     Instruction *InTI = PI->getIncomingBlock(u)->getTerminator();
     // If the incoming value is the phi node itself, it can safely be skipped.
-    if (Incoming == PI) continue;
+    if (Incoming == PI)
+      continue;
     // Change the context instruction to the "edge" that flows into the phi.
     // This is important because that is where incoming is actually "evaluated"
     // even though it is used later somewhere else.
-    Value *V = SimplifyCmpInst(Pred, Incoming, RHS, Q.getWithInstruction(InTI),
+    Value *V = simplifyCmpInst(Pred, Incoming, RHS, Q.getWithInstruction(InTI),
                                MaxRecurse);
     // If the operation failed to simplify, or simplified to a different value
     // to previously, then give up.
@@ -604,8 +603,20 @@
                                        Value *&Op0, Value *&Op1,
                                        const SimplifyQuery &Q) {
   if (auto *CLHS = dyn_cast<Constant>(Op0)) {
-    if (auto *CRHS = dyn_cast<Constant>(Op1))
+    if (auto *CRHS = dyn_cast<Constant>(Op1)) {
+      switch (Opcode) {
+      default:
+        break;
+      case Instruction::FAdd:
+      case Instruction::FSub:
+      case Instruction::FMul:
+      case Instruction::FDiv:
+      case Instruction::FRem:
+        if (Q.CxtI != nullptr)
+          return ConstantFoldFPInstOperands(Opcode, CLHS, CRHS, Q.DL, Q.CxtI);
+      }
       return ConstantFoldBinaryOpOperands(Opcode, CLHS, CRHS, Q.DL);
+    }
 
     // Canonicalize the constant to the RHS if this is a commutative operation.
     if (Instruction::isCommutative(Opcode))
@@ -616,7 +627,7 @@
 
 /// Given operands for an Add, see if we can fold the result.
 /// If not, this returns null.
-static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool IsNSW, bool IsNUW,
+static Value *simplifyAddInst(Value *Op0, Value *Op1, bool IsNSW, bool IsNUW,
                               const SimplifyQuery &Q, unsigned MaxRecurse) {
   if (Constant *C = foldOrCommuteConstant(Instruction::Add, Op0, Op1, Q))
     return C;
@@ -647,8 +658,7 @@
 
   // X + ~X -> -1   since   ~X = -X-1
   Type *Ty = Op0->getType();
-  if (match(Op0, m_Not(m_Specific(Op1))) ||
-      match(Op1, m_Not(m_Specific(Op0))))
+  if (match(Op0, m_Not(m_Specific(Op1))) || match(Op1, m_Not(m_Specific(Op0))))
     return Constant::getAllOnesValue(Ty);
 
   // add nsw/nuw (xor Y, signmask), signmask --> Y
@@ -664,12 +674,12 @@
 
   /// i1 add -> xor.
   if (MaxRecurse && Op0->getType()->isIntOrIntVectorTy(1))
-    if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1))
+    if (Value *V = simplifyXorInst(Op0, Op1, Q, MaxRecurse - 1))
       return V;
 
   // Try some generic simplifications for associative operations.
-  if (Value *V = SimplifyAssociativeBinOp(Instruction::Add, Op0, Op1, Q,
-                                          MaxRecurse))
+  if (Value *V =
+          simplifyAssociativeBinOp(Instruction::Add, Op0, Op1, Q, MaxRecurse))
     return V;
 
   // Threading Add over selects and phi nodes is pointless, so don't bother.
@@ -684,45 +694,37 @@
   return nullptr;
 }
 
-Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool IsNSW, bool IsNUW,
+Value *llvm::simplifyAddInst(Value *Op0, Value *Op1, bool IsNSW, bool IsNUW,
                              const SimplifyQuery &Query) {
-  return ::SimplifyAddInst(Op0, Op1, IsNSW, IsNUW, Query, RecursionLimit);
+  return ::simplifyAddInst(Op0, Op1, IsNSW, IsNUW, Query, RecursionLimit);
 }
 
 /// Compute the base pointer and cumulative constant offsets for V.
 ///
 /// This strips all constant offsets off of V, leaving it the base pointer, and
-/// accumulates the total constant offset applied in the returned constant. It
-/// returns 0 if V is not a pointer, and returns the constant '0' if there are
-/// no constant offsets applied.
+/// accumulates the total constant offset applied in the returned constant.
+/// It returns zero if there are no constant offsets applied.
 ///
-/// This is very similar to GetPointerBaseWithConstantOffset except it doesn't
-/// follow non-inbounds geps. This allows it to remain usable for icmp ult/etc.
-/// folding.
-static Constant *stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V,
-                                                bool AllowNonInbounds = false) {
+/// This is very similar to stripAndAccumulateConstantOffsets(), except it
+/// normalizes the offset bitwidth to the stripped pointer type, not the
+/// original pointer type.
+static APInt stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V,
+                                            bool AllowNonInbounds = false) {
   assert(V->getType()->isPtrOrPtrVectorTy());
 
   APInt Offset = APInt::getZero(DL.getIndexTypeSizeInBits(V->getType()));
-
   V = V->stripAndAccumulateConstantOffsets(DL, Offset, AllowNonInbounds);
   // As that strip may trace through `addrspacecast`, need to sext or trunc
   // the offset calculated.
-  Type *IntIdxTy = DL.getIndexType(V->getType())->getScalarType();
-  Offset = Offset.sextOrTrunc(IntIdxTy->getIntegerBitWidth());
-
-  Constant *OffsetIntPtr = ConstantInt::get(IntIdxTy, Offset);
-  if (VectorType *VecTy = dyn_cast<VectorType>(V->getType()))
-    return ConstantVector::getSplat(VecTy->getElementCount(), OffsetIntPtr);
-  return OffsetIntPtr;
+  return Offset.sextOrTrunc(DL.getIndexTypeSizeInBits(V->getType()));
 }
 
 /// Compute the constant difference between two pointer values.
 /// If the difference is not a constant, returns zero.
 static Constant *computePointerDifference(const DataLayout &DL, Value *LHS,
                                           Value *RHS) {
-  Constant *LHSOffset = stripAndComputeConstantOffsets(DL, LHS);
-  Constant *RHSOffset = stripAndComputeConstantOffsets(DL, RHS);
+  APInt LHSOffset = stripAndComputeConstantOffsets(DL, LHS);
+  APInt RHSOffset = stripAndComputeConstantOffsets(DL, RHS);
 
   // If LHS and RHS are not related via constant offsets to the same base
   // value, there is nothing we can do here.
@@ -733,12 +735,15 @@
   //    LHS - RHS
   //  = (LHSOffset + Base) - (RHSOffset + Base)
   //  = LHSOffset - RHSOffset
-  return ConstantExpr::getSub(LHSOffset, RHSOffset);
+  Constant *Res = ConstantInt::get(LHS->getContext(), LHSOffset - RHSOffset);
+  if (auto *VecTy = dyn_cast<VectorType>(LHS->getType()))
+    Res = ConstantVector::getSplat(VecTy->getElementCount(), Res);
+  return Res;
 }
 
 /// Given operands for a Sub, see if we can fold the result.
 /// If not, this returns null.
-static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+static Value *simplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
                               const SimplifyQuery &Q, unsigned MaxRecurse) {
   if (Constant *C = foldOrCommuteConstant(Instruction::Sub, Op0, Op1, Q))
     return C;
@@ -784,17 +789,17 @@
   Value *X = nullptr, *Y = nullptr, *Z = Op1;
   if (MaxRecurse && match(Op0, m_Add(m_Value(X), m_Value(Y)))) { // (X + Y) - Z
     // See if "V === Y - Z" simplifies.
-    if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, Q, MaxRecurse-1))
+    if (Value *V = simplifyBinOp(Instruction::Sub, Y, Z, Q, MaxRecurse - 1))
       // It does!  Now see if "X + V" simplifies.
-      if (Value *W = SimplifyBinOp(Instruction::Add, X, V, Q, MaxRecurse-1)) {
+      if (Value *W = simplifyBinOp(Instruction::Add, X, V, Q, MaxRecurse - 1)) {
         // It does, we successfully reassociated!
         ++NumReassoc;
         return W;
       }
     // See if "V === X - Z" simplifies.
-    if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, Q, MaxRecurse-1))
+    if (Value *V = simplifyBinOp(Instruction::Sub, X, Z, Q, MaxRecurse - 1))
       // It does!  Now see if "Y + V" simplifies.
-      if (Value *W = SimplifyBinOp(Instruction::Add, Y, V, Q, MaxRecurse-1)) {
+      if (Value *W = simplifyBinOp(Instruction::Add, Y, V, Q, MaxRecurse - 1)) {
         // It does, we successfully reassociated!
         ++NumReassoc;
         return W;
@@ -806,17 +811,17 @@
   X = Op0;
   if (MaxRecurse && match(Op1, m_Add(m_Value(Y), m_Value(Z)))) { // X - (Y + Z)
     // See if "V === X - Y" simplifies.
-    if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse-1))
+    if (Value *V = simplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse - 1))
       // It does!  Now see if "V - Z" simplifies.
-      if (Value *W = SimplifyBinOp(Instruction::Sub, V, Z, Q, MaxRecurse-1)) {
+      if (Value *W = simplifyBinOp(Instruction::Sub, V, Z, Q, MaxRecurse - 1)) {
         // It does, we successfully reassociated!
         ++NumReassoc;
         return W;
       }
     // See if "V === X - Z" simplifies.
-    if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, Q, MaxRecurse-1))
+    if (Value *V = simplifyBinOp(Instruction::Sub, X, Z, Q, MaxRecurse - 1))
       // It does!  Now see if "V - Y" simplifies.
-      if (Value *W = SimplifyBinOp(Instruction::Sub, V, Y, Q, MaxRecurse-1)) {
+      if (Value *W = simplifyBinOp(Instruction::Sub, V, Y, Q, MaxRecurse - 1)) {
         // It does, we successfully reassociated!
         ++NumReassoc;
         return W;
@@ -828,9 +833,9 @@
   Z = Op0;
   if (MaxRecurse && match(Op1, m_Sub(m_Value(X), m_Value(Y)))) // Z - (X - Y)
     // See if "V === Z - X" simplifies.
-    if (Value *V = SimplifyBinOp(Instruction::Sub, Z, X, Q, MaxRecurse-1))
+    if (Value *V = simplifyBinOp(Instruction::Sub, Z, X, Q, MaxRecurse - 1))
       // It does!  Now see if "V + Y" simplifies.
-      if (Value *W = SimplifyBinOp(Instruction::Add, V, Y, Q, MaxRecurse-1)) {
+      if (Value *W = simplifyBinOp(Instruction::Add, V, Y, Q, MaxRecurse - 1)) {
         // It does, we successfully reassociated!
         ++NumReassoc;
         return W;
@@ -841,22 +846,21 @@
       match(Op1, m_Trunc(m_Value(Y))))
     if (X->getType() == Y->getType())
       // See if "V === X - Y" simplifies.
-      if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse-1))
+      if (Value *V = simplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse - 1))
         // It does!  Now see if "trunc V" simplifies.
-        if (Value *W = SimplifyCastInst(Instruction::Trunc, V, Op0->getType(),
+        if (Value *W = simplifyCastInst(Instruction::Trunc, V, Op0->getType(),
                                         Q, MaxRecurse - 1))
           // It does, return the simplified "trunc V".
           return W;
 
   // Variations on GEP(base, I, ...) - GEP(base, i, ...) -> GEP(null, I-i, ...).
-  if (match(Op0, m_PtrToInt(m_Value(X))) &&
-      match(Op1, m_PtrToInt(m_Value(Y))))
+  if (match(Op0, m_PtrToInt(m_Value(X))) && match(Op1, m_PtrToInt(m_Value(Y))))
     if (Constant *Result = computePointerDifference(Q.DL, X, Y))
       return ConstantExpr::getIntegerCast(Result, Op0->getType(), true);
 
   // i1 sub -> xor.
   if (MaxRecurse && Op0->getType()->isIntOrIntVectorTy(1))
-    if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1))
+    if (Value *V = simplifyXorInst(Op0, Op1, Q, MaxRecurse - 1))
       return V;
 
   // Threading Sub over selects and phi nodes is pointless, so don't bother.
@@ -871,14 +875,14 @@
   return nullptr;
 }
 
-Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+Value *llvm::simplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
                              const SimplifyQuery &Q) {
-  return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Q, RecursionLimit);
+  return ::simplifySubInst(Op0, Op1, isNSW, isNUW, Q, RecursionLimit);
 }
 
 /// Given operands for a Mul, see if we can fold the result.
 /// If not, this returns null.
-static Value *SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
+static Value *simplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
                               unsigned MaxRecurse) {
   if (Constant *C = foldOrCommuteConstant(Instruction::Mul, Op0, Op1, Q))
     return C;
@@ -906,12 +910,12 @@
 
   // i1 mul -> and.
   if (MaxRecurse && Op0->getType()->isIntOrIntVectorTy(1))
-    if (Value *V = SimplifyAndInst(Op0, Op1, Q, MaxRecurse-1))
+    if (Value *V = simplifyAndInst(Op0, Op1, Q, MaxRecurse - 1))
       return V;
 
   // Try some generic simplifications for associative operations.
-  if (Value *V = SimplifyAssociativeBinOp(Instruction::Mul, Op0, Op1, Q,
-                                          MaxRecurse))
+  if (Value *V =
+          simplifyAssociativeBinOp(Instruction::Mul, Op0, Op1, Q, MaxRecurse))
     return V;
 
   // Mul distributes over Add. Try some generic simplifications based on this.
@@ -922,22 +926,22 @@
   // If the operation is with the result of a select instruction, check whether
   // operating on either branch of the select always yields the same value.
   if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
-    if (Value *V = ThreadBinOpOverSelect(Instruction::Mul, Op0, Op1, Q,
-                                         MaxRecurse))
+    if (Value *V =
+            threadBinOpOverSelect(Instruction::Mul, Op0, Op1, Q, MaxRecurse))
       return V;
 
   // If the operation is with the result of a phi instruction, check whether
   // operating on all incoming values of the phi always yields the same value.
   if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
-    if (Value *V = ThreadBinOpOverPHI(Instruction::Mul, Op0, Op1, Q,
-                                      MaxRecurse))
+    if (Value *V =
+            threadBinOpOverPHI(Instruction::Mul, Op0, Op1, Q, MaxRecurse))
       return V;
 
   return nullptr;
 }
 
-Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
-  return ::SimplifyMulInst(Op0, Op1, Q, RecursionLimit);
+Value *llvm::simplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
+  return ::simplifyMulInst(Op0, Op1, Q, RecursionLimit);
 }
 
 /// Check for common or similar folds of integer division or integer remainder.
@@ -1026,7 +1030,7 @@
 /// when we can prove a relationship between the operands.
 static bool isICmpTrue(ICmpInst::Predicate Pred, Value *LHS, Value *RHS,
                        const SimplifyQuery &Q, unsigned MaxRecurse) {
-  Value *V = SimplifyICmpInst(Pred, LHS, RHS, Q, MaxRecurse);
+  Value *V = simplifyICmpInst(Pred, LHS, RHS, Q, MaxRecurse);
   Constant *C = dyn_cast_or_null<Constant>(V);
   return (C && C->isAllOnesValue());
 }
@@ -1122,13 +1126,13 @@
   // If the operation is with the result of a select instruction, check whether
   // operating on either branch of the select always yields the same value.
   if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
-    if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse))
+    if (Value *V = threadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse))
       return V;
 
   // If the operation is with the result of a phi instruction, check whether
   // operating on all incoming values of the phi always yields the same value.
   if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
-    if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
+    if (Value *V = threadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
       return V;
 
   if (isDivZero(Op0, Op1, Q, MaxRecurse, IsSigned))
@@ -1164,13 +1168,13 @@
   // If the operation is with the result of a select instruction, check whether
   // operating on either branch of the select always yields the same value.
   if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
-    if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse))
+    if (Value *V = threadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse))
       return V;
 
   // If the operation is with the result of a phi instruction, check whether
   // operating on all incoming values of the phi always yields the same value.
   if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
-    if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
+    if (Value *V = threadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
       return V;
 
   // If X / Y == 0, then X % Y == X.
@@ -1182,7 +1186,7 @@
 
 /// Given operands for an SDiv, see if we can fold the result.
 /// If not, this returns null.
-static Value *SimplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
+static Value *simplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
                                unsigned MaxRecurse) {
   // If two operands are negated and no signed overflow, return -1.
   if (isKnownNegation(Op0, Op1, /*NeedNSW=*/true))
@@ -1191,24 +1195,24 @@
   return simplifyDiv(Instruction::SDiv, Op0, Op1, Q, MaxRecurse);
 }
 
-Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
-  return ::SimplifySDivInst(Op0, Op1, Q, RecursionLimit);
+Value *llvm::simplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
+  return ::simplifySDivInst(Op0, Op1, Q, RecursionLimit);
 }
 
 /// Given operands for a UDiv, see if we can fold the result.
 /// If not, this returns null.
-static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
+static Value *simplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
                                unsigned MaxRecurse) {
   return simplifyDiv(Instruction::UDiv, Op0, Op1, Q, MaxRecurse);
 }
 
-Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
-  return ::SimplifyUDivInst(Op0, Op1, Q, RecursionLimit);
+Value *llvm::simplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
+  return ::simplifyUDivInst(Op0, Op1, Q, RecursionLimit);
 }
 
 /// Given operands for an SRem, see if we can fold the result.
 /// If not, this returns null.
-static Value *SimplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
+static Value *simplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
                                unsigned MaxRecurse) {
   // If the divisor is 0, the result is undefined, so assume the divisor is -1.
   // srem Op0, (sext i1 X) --> srem Op0, -1 --> 0
@@ -1223,19 +1227,19 @@
   return simplifyRem(Instruction::SRem, Op0, Op1, Q, MaxRecurse);
 }
 
-Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
-  return ::SimplifySRemInst(Op0, Op1, Q, RecursionLimit);
+Value *llvm::simplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
+  return ::simplifySRemInst(Op0, Op1, Q, RecursionLimit);
 }
 
 /// Given operands for a URem, see if we can fold the result.
 /// If not, this returns null.
-static Value *SimplifyURemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
+static Value *simplifyURemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
                                unsigned MaxRecurse) {
   return simplifyRem(Instruction::URem, Op0, Op1, Q, MaxRecurse);
 }
 
-Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
-  return ::SimplifyURemInst(Op0, Op1, Q, RecursionLimit);
+Value *llvm::simplifyURemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
+  return ::simplifyURemInst(Op0, Op1, Q, RecursionLimit);
 }
 
 /// Returns true if a shift by \c Amount always yields poison.
@@ -1268,7 +1272,7 @@
 
 /// Given operands for an Shl, LShr or AShr, see if we can fold the result.
 /// If not, this returns null.
-static Value *SimplifyShift(Instruction::BinaryOps Opcode, Value *Op0,
+static Value *simplifyShift(Instruction::BinaryOps Opcode, Value *Op0,
                             Value *Op1, bool IsNSW, const SimplifyQuery &Q,
                             unsigned MaxRecurse) {
   if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q))
@@ -1297,13 +1301,13 @@
   // If the operation is with the result of a select instruction, check whether
   // operating on either branch of the select always yields the same value.
   if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
-    if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse))
+    if (Value *V = threadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse))
       return V;
 
   // If the operation is with the result of a phi instruction, check whether
   // operating on all incoming values of the phi always yields the same value.
   if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
-    if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
+    if (Value *V = threadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
       return V;
 
   // If any bits in the shift amount make that value greater than or equal to
@@ -1338,11 +1342,11 @@
 
 /// Given operands for an Shl, LShr or AShr, see if we can
 /// fold the result.  If not, this returns null.
-static Value *SimplifyRightShift(Instruction::BinaryOps Opcode, Value *Op0,
-                                 Value *Op1, bool isExact, const SimplifyQuery &Q,
-                                 unsigned MaxRecurse) {
+static Value *simplifyRightShift(Instruction::BinaryOps Opcode, Value *Op0,
+                                 Value *Op1, bool isExact,
+                                 const SimplifyQuery &Q, unsigned MaxRecurse) {
   if (Value *V =
-          SimplifyShift(Opcode, Op0, Op1, /*IsNSW*/ false, Q, MaxRecurse))
+          simplifyShift(Opcode, Op0, Op1, /*IsNSW*/ false, Q, MaxRecurse))
     return V;
 
   // X >> X -> 0
@@ -1356,7 +1360,8 @@
 
   // The low bit cannot be shifted out of an exact shift if it is set.
   if (isExact) {
-    KnownBits Op0Known = computeKnownBits(Op0, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT);
+    KnownBits Op0Known =
+        computeKnownBits(Op0, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT);
     if (Op0Known.One[0])
       return Op0;
   }
@@ -1366,10 +1371,10 @@
 
 /// Given operands for an Shl, see if we can fold the result.
 /// If not, this returns null.
-static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+static Value *simplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
                               const SimplifyQuery &Q, unsigned MaxRecurse) {
   if (Value *V =
-          SimplifyShift(Instruction::Shl, Op0, Op1, isNSW, Q, MaxRecurse))
+          simplifyShift(Instruction::Shl, Op0, Op1, isNSW, Q, MaxRecurse))
     return V;
 
   // undef << X -> 0
@@ -1392,18 +1397,18 @@
   return nullptr;
 }
 
-Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+Value *llvm::simplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
                              const SimplifyQuery &Q) {
-  return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Q, RecursionLimit);
+  return ::simplifyShlInst(Op0, Op1, isNSW, isNUW, Q, RecursionLimit);
 }
 
 /// Given operands for an LShr, see if we can fold the result.
 /// If not, this returns null.
-static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
+static Value *simplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
                                const SimplifyQuery &Q, unsigned MaxRecurse) {
-  if (Value *V = SimplifyRightShift(Instruction::LShr, Op0, Op1, isExact, Q,
+  if (Value *V = simplifyRightShift(Instruction::LShr, Op0, Op1, isExact, Q,
                                     MaxRecurse))
-      return V;
+    return V;
 
   // (X << A) >> A -> X
   Value *X;
@@ -1429,16 +1434,16 @@
   return nullptr;
 }
 
-Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
+Value *llvm::simplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
                               const SimplifyQuery &Q) {
-  return ::SimplifyLShrInst(Op0, Op1, isExact, Q, RecursionLimit);
+  return ::simplifyLShrInst(Op0, Op1, isExact, Q, RecursionLimit);
 }
 
 /// Given operands for an AShr, see if we can fold the result.
 /// If not, this returns null.
-static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
+static Value *simplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
                                const SimplifyQuery &Q, unsigned MaxRecurse) {
-  if (Value *V = SimplifyRightShift(Instruction::AShr, Op0, Op1, isExact, Q,
+  if (Value *V = simplifyRightShift(Instruction::AShr, Op0, Op1, isExact, Q,
                                     MaxRecurse))
     return V;
 
@@ -1462,9 +1467,9 @@
   return nullptr;
 }
 
-Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
+Value *llvm::simplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
                               const SimplifyQuery &Q) {
-  return ::SimplifyAShrInst(Op0, Op1, isExact, Q, RecursionLimit);
+  return ::simplifyAShrInst(Op0, Op1, isExact, Q, RecursionLimit);
 }
 
 /// Commuted variants are assumed to be handled by calling this function again
@@ -1581,17 +1586,11 @@
 /// with the parameters swapped.
 static Value *simplifyAndOfICmpsWithSameOperands(ICmpInst *Op0, ICmpInst *Op1) {
   ICmpInst::Predicate Pred0, Pred1;
-  Value *A ,*B;
+  Value *A, *B;
   if (!match(Op0, m_ICmp(Pred0, m_Value(A), m_Value(B))) ||
       !match(Op1, m_ICmp(Pred1, m_Specific(A), m_Specific(B))))
     return nullptr;
 
-  // We have (icmp Pred0, A, B) & (icmp Pred1, A, B).
-  // If Op1 is always implied true by Op0, then Op0 is a subset of Op1, and we
-  // can eliminate Op1 from this 'and'.
-  if (ICmpInst::isImpliedTrueByMatchingCmp(Pred0, Pred1))
-    return Op0;
-
   // Check for any combination of predicates that are guaranteed to be disjoint.
   if ((Pred0 == ICmpInst::getInversePredicate(Pred1)) ||
       (Pred0 == ICmpInst::ICMP_EQ && ICmpInst::isFalseWhenEqual(Pred1)) ||
@@ -1606,17 +1605,11 @@
 /// with the parameters swapped.
 static Value *simplifyOrOfICmpsWithSameOperands(ICmpInst *Op0, ICmpInst *Op1) {
   ICmpInst::Predicate Pred0, Pred1;
-  Value *A ,*B;
+  Value *A, *B;
   if (!match(Op0, m_ICmp(Pred0, m_Value(A), m_Value(B))) ||
       !match(Op1, m_ICmp(Pred1, m_Specific(A), m_Specific(B))))
     return nullptr;
 
-  // We have (icmp Pred0, A, B) | (icmp Pred1, A, B).
-  // If Op1 is always implied true by Op0, then Op0 is a subset of Op1, and we
-  // can eliminate Op0 from this 'or'.
-  if (ICmpInst::isImpliedTrueByMatchingCmp(Pred0, Pred1))
-    return Op1;
-
   // Check for any combination of predicates that cover the entire range of
   // possibilities.
   if ((Pred0 == ICmpInst::getInversePredicate(Pred1)) ||
@@ -1812,6 +1805,27 @@
   return nullptr;
 }
 
+/// Try to simplify and/or of icmp with ctpop intrinsic.
+static Value *simplifyAndOrOfICmpsWithCtpop(ICmpInst *Cmp0, ICmpInst *Cmp1,
+                                            bool IsAnd) {
+  ICmpInst::Predicate Pred0, Pred1;
+  Value *X;
+  const APInt *C;
+  if (!match(Cmp0, m_ICmp(Pred0, m_Intrinsic<Intrinsic::ctpop>(m_Value(X)),
+                          m_APInt(C))) ||
+      !match(Cmp1, m_ICmp(Pred1, m_Specific(X), m_ZeroInt())) || C->isZero())
+    return nullptr;
+
+  // (ctpop(X) == C) || (X != 0) --> X != 0 where C > 0
+  if (!IsAnd && Pred0 == ICmpInst::ICMP_EQ && Pred1 == ICmpInst::ICMP_NE)
+    return Cmp1;
+  // (ctpop(X) != C) && (X == 0) --> X == 0 where C > 0
+  if (IsAnd && Pred0 == ICmpInst::ICMP_NE && Pred1 == ICmpInst::ICMP_EQ)
+    return Cmp1;
+
+  return nullptr;
+}
+
 static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1,
                                  const SimplifyQuery &Q) {
   if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true, Q))
@@ -1833,6 +1847,11 @@
   if (Value *X = simplifyAndOrOfICmpsWithZero(Op0, Op1, true))
     return X;
 
+  if (Value *X = simplifyAndOrOfICmpsWithCtpop(Op0, Op1, true))
+    return X;
+  if (Value *X = simplifyAndOrOfICmpsWithCtpop(Op1, Op0, true))
+    return X;
+
   if (Value *X = simplifyAndOfICmpsWithAdd(Op0, Op1, Q.IIQ))
     return X;
   if (Value *X = simplifyAndOfICmpsWithAdd(Op1, Op0, Q.IIQ))
@@ -1909,6 +1928,11 @@
   if (Value *X = simplifyAndOrOfICmpsWithZero(Op0, Op1, false))
     return X;
 
+  if (Value *X = simplifyAndOrOfICmpsWithCtpop(Op0, Op1, false))
+    return X;
+  if (Value *X = simplifyAndOrOfICmpsWithCtpop(Op1, Op0, false))
+    return X;
+
   if (Value *X = simplifyOrOfICmpsWithAdd(Op0, Op1, Q.IIQ))
     return X;
   if (Value *X = simplifyOrOfICmpsWithAdd(Op1, Op0, Q.IIQ))
@@ -1917,8 +1941,8 @@
   return nullptr;
 }
 
-static Value *simplifyAndOrOfFCmps(const TargetLibraryInfo *TLI,
-                                   FCmpInst *LHS, FCmpInst *RHS, bool IsAnd) {
+static Value *simplifyAndOrOfFCmps(const TargetLibraryInfo *TLI, FCmpInst *LHS,
+                                   FCmpInst *RHS, bool IsAnd) {
   Value *LHS0 = LHS->getOperand(0), *LHS1 = LHS->getOperand(1);
   Value *RHS0 = RHS->getOperand(0), *RHS1 = RHS->getOperand(1);
   if (LHS0->getType() != RHS0->getType())
@@ -1955,8 +1979,8 @@
   return nullptr;
 }
 
-static Value *simplifyAndOrOfCmps(const SimplifyQuery &Q,
-                                  Value *Op0, Value *Op1, bool IsAnd) {
+static Value *simplifyAndOrOfCmps(const SimplifyQuery &Q, Value *Op0,
+                                  Value *Op1, bool IsAnd) {
   // Look through casts of the 'and' operands to find compares.
   auto *Cast0 = dyn_cast<CastInst>(Op0);
   auto *Cast1 = dyn_cast<CastInst>(Op1);
@@ -2017,7 +2041,7 @@
 
 /// Given operands for an And, see if we can fold the result.
 /// If not, this returns null.
-static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
+static Value *simplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
                               unsigned MaxRecurse) {
   if (Constant *C = foldOrCommuteConstant(Instruction::And, Op0, Op1, Q))
     return C;
@@ -2043,8 +2067,7 @@
     return Op0;
 
   // A & ~A  =  ~A & A  =  0
-  if (match(Op0, m_Not(m_Specific(Op1))) ||
-      match(Op1, m_Not(m_Specific(Op0))))
+  if (match(Op0, m_Not(m_Specific(Op1))) || match(Op1, m_Not(m_Specific(Op0))))
     return Constant::getNullValue(Op0->getType());
 
   // (A | ?) & A = A
@@ -2117,8 +2140,8 @@
     return V;
 
   // Try some generic simplifications for associative operations.
-  if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, Q,
-                                          MaxRecurse))
+  if (Value *V =
+          simplifyAssociativeBinOp(Instruction::And, Op0, Op1, Q, MaxRecurse))
     return V;
 
   // And distributes over Or.  Try some generic simplifications based on this.
@@ -2142,16 +2165,16 @@
     // If the operation is with the result of a select instruction, check
     // whether operating on either branch of the select always yields the same
     // value.
-    if (Value *V = ThreadBinOpOverSelect(Instruction::And, Op0, Op1, Q,
-                                         MaxRecurse))
+    if (Value *V =
+            threadBinOpOverSelect(Instruction::And, Op0, Op1, Q, MaxRecurse))
       return V;
   }
 
   // If the operation is with the result of a phi instruction, check whether
   // operating on all incoming values of the phi always yields the same value.
   if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
-    if (Value *V = ThreadBinOpOverPHI(Instruction::And, Op0, Op1, Q,
-                                      MaxRecurse))
+    if (Value *V =
+            threadBinOpOverPHI(Instruction::And, Op0, Op1, Q, MaxRecurse))
       return V;
 
   // Assuming the effective width of Y is not larger than A, i.e. all bits
@@ -2174,8 +2197,7 @@
     const KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
     const unsigned EffWidthY = YKnown.countMaxActiveBits();
     if (EffWidthY <= ShftCnt) {
-      const KnownBits XKnown = computeKnownBits(X, Q.DL, 0, Q.AC, Q.CxtI,
-                                                Q.DT);
+      const KnownBits XKnown = computeKnownBits(X, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
       const unsigned EffWidthX = XKnown.countMaxActiveBits();
       const APInt EffBitsY = APInt::getLowBitsSet(Width, EffWidthY);
       const APInt EffBitsX = APInt::getLowBitsSet(Width, EffWidthX) << ShftCnt;
@@ -2197,11 +2219,20 @@
       match(Op1, m_c_Xor(m_Specific(Or), m_Specific(Y))))
     return Constant::getNullValue(Op0->getType());
 
+  if (Op0->getType()->isIntOrIntVectorTy(1)) {
+    // Op0&Op1 -> Op0 where Op0 implies Op1
+    if (isImpliedCondition(Op0, Op1, Q.DL).value_or(false))
+      return Op0;
+    // Op0&Op1 -> Op1 where Op1 implies Op0
+    if (isImpliedCondition(Op1, Op0, Q.DL).value_or(false))
+      return Op1;
+  }
+
   return nullptr;
 }
 
-Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
-  return ::SimplifyAndInst(Op0, Op1, Q, RecursionLimit);
+Value *llvm::simplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
+  return ::simplifyAndInst(Op0, Op1, Q, RecursionLimit);
 }
 
 static Value *simplifyOrLogic(Value *X, Value *Y) {
@@ -2289,7 +2320,7 @@
 
 /// Given operands for an Or, see if we can fold the result.
 /// If not, this returns null.
-static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
+static Value *simplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
                              unsigned MaxRecurse) {
   if (Constant *C = foldOrCommuteConstant(Instruction::Or, Op0, Op1, Q))
     return C;
@@ -2334,6 +2365,31 @@
     }
   }
 
+  // A funnel shift (rotate) can be decomposed into simpler shifts. See if we
+  // are mixing in another shift that is redundant with the funnel shift.
+
+  // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
+  // (shl X, Y) | (fshl X, ?, Y) --> fshl X, ?, Y
+  if (match(Op0,
+            m_Intrinsic<Intrinsic::fshl>(m_Value(X), m_Value(), m_Value(Y))) &&
+      match(Op1, m_Shl(m_Specific(X), m_Specific(Y))))
+    return Op0;
+  if (match(Op1,
+            m_Intrinsic<Intrinsic::fshl>(m_Value(X), m_Value(), m_Value(Y))) &&
+      match(Op0, m_Shl(m_Specific(X), m_Specific(Y))))
+    return Op1;
+
+  // (fshr ?, X, Y) | (lshr X, Y) --> fshr ?, X, Y
+  // (lshr X, Y) | (fshr ?, X, Y) --> fshr ?, X, Y
+  if (match(Op0,
+            m_Intrinsic<Intrinsic::fshr>(m_Value(), m_Value(X), m_Value(Y))) &&
+      match(Op1, m_LShr(m_Specific(X), m_Specific(Y))))
+    return Op0;
+  if (match(Op1,
+            m_Intrinsic<Intrinsic::fshr>(m_Value(), m_Value(X), m_Value(Y))) &&
+      match(Op0, m_LShr(m_Specific(X), m_Specific(Y))))
+    return Op1;
+
   if (Value *V = simplifyAndOrOfCmps(Q, Op0, Op1, false))
     return V;
 
@@ -2346,8 +2402,8 @@
     return Op0;
 
   // Try some generic simplifications for associative operations.
-  if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, Q,
-                                          MaxRecurse))
+  if (Value *V =
+          simplifyAssociativeBinOp(Instruction::Or, Op0, Op1, Q, MaxRecurse))
     return V;
 
   // Or distributes over And.  Try some generic simplifications based on this.
@@ -2366,8 +2422,8 @@
     // If the operation is with the result of a select instruction, check
     // whether operating on either branch of the select always yields the same
     // value.
-    if (Value *V = ThreadBinOpOverSelect(Instruction::Or, Op0, Op1, Q,
-                                         MaxRecurse))
+    if (Value *V =
+            threadBinOpOverSelect(Instruction::Or, Op0, Op1, Q, MaxRecurse))
       return V;
   }
 
@@ -2389,8 +2445,7 @@
           return A;
       }
       // Or commutes, try both ways.
-      if (C1->isMask() &&
-          match(B, m_c_Add(m_Specific(A), m_Value(N)))) {
+      if (C1->isMask() && match(B, m_c_Add(m_Specific(A), m_Value(N)))) {
         // Add commutes, try both ways.
         if (MaskedValueIsZero(N, *C1, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
           return B;
@@ -2401,19 +2456,28 @@
   // If the operation is with the result of a phi instruction, check whether
   // operating on all incoming values of the phi always yields the same value.
   if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
-    if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, Q, MaxRecurse))
+    if (Value *V = threadBinOpOverPHI(Instruction::Or, Op0, Op1, Q, MaxRecurse))
       return V;
 
+  if (Op0->getType()->isIntOrIntVectorTy(1)) {
+    // Op0|Op1 -> Op1 where Op0 implies Op1
+    if (isImpliedCondition(Op0, Op1, Q.DL).value_or(false))
+      return Op1;
+    // Op0|Op1 -> Op0 where Op1 implies Op0
+    if (isImpliedCondition(Op1, Op0, Q.DL).value_or(false))
+      return Op0;
+  }
+
   return nullptr;
 }
 
-Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
-  return ::SimplifyOrInst(Op0, Op1, Q, RecursionLimit);
+Value *llvm::simplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
+  return ::simplifyOrInst(Op0, Op1, Q, RecursionLimit);
 }
 
 /// Given operands for a Xor, see if we can fold the result.
 /// If not, this returns null.
-static Value *SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
+static Value *simplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
                               unsigned MaxRecurse) {
   if (Constant *C = foldOrCommuteConstant(Instruction::Xor, Op0, Op1, Q))
     return C;
@@ -2435,8 +2499,7 @@
     return Constant::getNullValue(Op0->getType());
 
   // A ^ ~A  =  ~A ^ A  =  -1
-  if (match(Op0, m_Not(m_Specific(Op1))) ||
-      match(Op1, m_Not(m_Specific(Op0))))
+  if (match(Op0, m_Not(m_Specific(Op1))) || match(Op1, m_Not(m_Specific(Op0))))
     return Constant::getAllOnesValue(Op0->getType());
 
   auto foldAndOrNot = [](Value *X, Value *Y) -> Value * {
@@ -2467,8 +2530,8 @@
     return V;
 
   // Try some generic simplifications for associative operations.
-  if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, Q,
-                                          MaxRecurse))
+  if (Value *V =
+          simplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, Q, MaxRecurse))
     return V;
 
   // Threading Xor over selects and phi nodes is pointless, so don't bother.
@@ -2483,19 +2546,18 @@
   return nullptr;
 }
 
-Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
-  return ::SimplifyXorInst(Op0, Op1, Q, RecursionLimit);
+Value *llvm::simplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
+  return ::simplifyXorInst(Op0, Op1, Q, RecursionLimit);
 }
 
-
-static Type *GetCompareTy(Value *Op) {
+static Type *getCompareTy(Value *Op) {
   return CmpInst::makeCmpResultType(Op->getType());
 }
 
 /// Rummage around inside V looking for something equivalent to the comparison
 /// "LHS Pred RHS". Return such a value if found, otherwise return null.
 /// Helper function for analyzing max/min idioms.
-static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred,
+static Value *extractEquivalentCondition(Value *V, CmpInst::Predicate Pred,
                                          Value *LHS, Value *RHS) {
   SelectInst *SI = dyn_cast<SelectInst>(V);
   if (!SI)
@@ -2512,6 +2574,70 @@
   return nullptr;
 }
 
+/// Return true if the underlying object (storage) must be disjoint from
+/// storage returned by any noalias return call.
+static bool isAllocDisjoint(const Value *V) {
+  // For allocas, we consider only static ones (dynamic
+  // allocas might be transformed into calls to malloc not simultaneously
+  // live with the compared-to allocation). For globals, we exclude symbols
+  // that might be resolve lazily to symbols in another dynamically-loaded
+  // library (and, thus, could be malloc'ed by the implementation).
+  if (const AllocaInst *AI = dyn_cast<AllocaInst>(V))
+    return AI->getParent() && AI->getFunction() && AI->isStaticAlloca();
+  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
+    return (GV->hasLocalLinkage() || GV->hasHiddenVisibility() ||
+            GV->hasProtectedVisibility() || GV->hasGlobalUnnamedAddr()) &&
+           !GV->isThreadLocal();
+  if (const Argument *A = dyn_cast<Argument>(V))
+    return A->hasByValAttr();
+  return false;
+}
+
+/// Return true if V1 and V2 are each the base of some distict storage region
+/// [V, object_size(V)] which do not overlap.  Note that zero sized regions
+/// *are* possible, and that zero sized regions do not overlap with any other.
+static bool haveNonOverlappingStorage(const Value *V1, const Value *V2) {
+  // Global variables always exist, so they always exist during the lifetime
+  // of each other and all allocas.  Global variables themselves usually have
+  // non-overlapping storage, but since their addresses are constants, the
+  // case involving two globals does not reach here and is instead handled in
+  // constant folding.
+  //
+  // Two different allocas usually have different addresses...
+  //
+  // However, if there's an @llvm.stackrestore dynamically in between two
+  // allocas, they may have the same address. It's tempting to reduce the
+  // scope of the problem by only looking at *static* allocas here. That would
+  // cover the majority of allocas while significantly reducing the likelihood
+  // of having an @llvm.stackrestore pop up in the middle. However, it's not
+  // actually impossible for an @llvm.stackrestore to pop up in the middle of
+  // an entry block. Also, if we have a block that's not attached to a
+  // function, we can't tell if it's "static" under the current definition.
+  // Theoretically, this problem could be fixed by creating a new kind of
+  // instruction kind specifically for static allocas. Such a new instruction
+  // could be required to be at the top of the entry block, thus preventing it
+  // from being subject to a @llvm.stackrestore. Instcombine could even
+  // convert regular allocas into these special allocas. It'd be nifty.
+  // However, until then, this problem remains open.
+  //
+  // So, we'll assume that two non-empty allocas have different addresses
+  // for now.
+  auto isByValArg = [](const Value *V) {
+    const Argument *A = dyn_cast<Argument>(V);
+    return A && A->hasByValAttr();
+  };
+
+  // Byval args are backed by store which does not overlap with each other,
+  // allocas, or globals.
+  if (isByValArg(V1))
+    return isa<AllocaInst>(V2) || isa<GlobalVariable>(V2) || isByValArg(V2);
+  if (isByValArg(V2))
+    return isa<AllocaInst>(V1) || isa<GlobalVariable>(V1) || isByValArg(V1);
+
+  return isa<AllocaInst>(V1) &&
+         (isa<AllocaInst>(V2) || isa<GlobalVariable>(V2));
+}
+
 // A significant optimization not implemented here is assuming that alloca
 // addresses are not equal to incoming argument values. They don't *alias*,
 // as we say, but that doesn't mean they aren't equal, so we take a
@@ -2540,9 +2666,8 @@
 // If the C and C++ standards are ever made sufficiently restrictive in this
 // area, it may be possible to update LLVM's semantics accordingly and reinstate
 // this optimization.
-static Constant *
-computePointerICmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
-                   const SimplifyQuery &Q) {
+static Constant *computePointerICmp(CmpInst::Predicate Pred, Value *LHS,
+                                    Value *RHS, const SimplifyQuery &Q) {
   const DataLayout &DL = Q.DL;
   const TargetLibraryInfo *TLI = Q.TLI;
   const DominatorTree *DT = Q.DT;
@@ -2557,8 +2682,7 @@
   if (isa<ConstantPointerNull>(RHS) && ICmpInst::isEquality(Pred) &&
       llvm::isKnownNonZero(LHS, DL, 0, nullptr, nullptr, nullptr,
                            IIQ.UseInstrInfo))
-    return ConstantInt::get(GetCompareTy(LHS),
-                            !CmpInst::isTrueWhenEqual(Pred));
+    return ConstantInt::get(getCompareTy(LHS), !CmpInst::isTrueWhenEqual(Pred));
 
   // We can only fold certain predicates on pointer comparisons.
   switch (Pred) {
@@ -2592,77 +2716,41 @@
   // Even if an non-inbounds GEP occurs along the path we can still optimize
   // equality comparisons concerning the result.
   bool AllowNonInbounds = ICmpInst::isEquality(Pred);
-  Constant *LHSOffset =
-      stripAndComputeConstantOffsets(DL, LHS, AllowNonInbounds);
-  Constant *RHSOffset =
-      stripAndComputeConstantOffsets(DL, RHS, AllowNonInbounds);
+  APInt LHSOffset = stripAndComputeConstantOffsets(DL, LHS, AllowNonInbounds);
+  APInt RHSOffset = stripAndComputeConstantOffsets(DL, RHS, AllowNonInbounds);
 
   // If LHS and RHS are related via constant offsets to the same base
   // value, we can replace it with an icmp which just compares the offsets.
   if (LHS == RHS)
-    return ConstantExpr::getICmp(Pred, LHSOffset, RHSOffset);
+    return ConstantInt::get(getCompareTy(LHS),
+                            ICmpInst::compare(LHSOffset, RHSOffset, Pred));
 
   // Various optimizations for (in)equality comparisons.
   if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) {
     // Different non-empty allocations that exist at the same time have
-    // different addresses (if the program can tell). Global variables always
-    // exist, so they always exist during the lifetime of each other and all
-    // allocas. Two different allocas usually have different addresses...
-    //
-    // However, if there's an @llvm.stackrestore dynamically in between two
-    // allocas, they may have the same address. It's tempting to reduce the
-    // scope of the problem by only looking at *static* allocas here. That would
-    // cover the majority of allocas while significantly reducing the likelihood
-    // of having an @llvm.stackrestore pop up in the middle. However, it's not
-    // actually impossible for an @llvm.stackrestore to pop up in the middle of
-    // an entry block. Also, if we have a block that's not attached to a
-    // function, we can't tell if it's "static" under the current definition.
-    // Theoretically, this problem could be fixed by creating a new kind of
-    // instruction kind specifically for static allocas. Such a new instruction
-    // could be required to be at the top of the entry block, thus preventing it
-    // from being subject to a @llvm.stackrestore. Instcombine could even
-    // convert regular allocas into these special allocas. It'd be nifty.
-    // However, until then, this problem remains open.
-    //
-    // So, we'll assume that two non-empty allocas have different addresses
-    // for now.
-    //
-    // With all that, if the offsets are within the bounds of their allocations
-    // (and not one-past-the-end! so we can't use inbounds!), and their
-    // allocations aren't the same, the pointers are not equal.
-    //
-    // Note that it's not necessary to check for LHS being a global variable
-    // address, due to canonicalization and constant folding.
-    if (isa<AllocaInst>(LHS) &&
-        (isa<AllocaInst>(RHS) || isa<GlobalVariable>(RHS))) {
-      ConstantInt *LHSOffsetCI = dyn_cast<ConstantInt>(LHSOffset);
-      ConstantInt *RHSOffsetCI = dyn_cast<ConstantInt>(RHSOffset);
+    // different addresses (if the program can tell). If the offsets are
+    // within the bounds of their allocations (and not one-past-the-end!
+    // so we can't use inbounds!), and their allocations aren't the same,
+    // the pointers are not equal.
+    if (haveNonOverlappingStorage(LHS, RHS)) {
       uint64_t LHSSize, RHSSize;
       ObjectSizeOpts Opts;
-      Opts.NullIsUnknownSize =
-          NullPointerIsDefined(cast<AllocaInst>(LHS)->getFunction());
-      if (LHSOffsetCI && RHSOffsetCI &&
-          getObjectSize(LHS, LHSSize, DL, TLI, Opts) &&
-          getObjectSize(RHS, RHSSize, DL, TLI, Opts)) {
-        const APInt &LHSOffsetValue = LHSOffsetCI->getValue();
-        const APInt &RHSOffsetValue = RHSOffsetCI->getValue();
-        if (!LHSOffsetValue.isNegative() &&
-            !RHSOffsetValue.isNegative() &&
-            LHSOffsetValue.ult(LHSSize) &&
-            RHSOffsetValue.ult(RHSSize)) {
-          return ConstantInt::get(GetCompareTy(LHS),
-                                  !CmpInst::isTrueWhenEqual(Pred));
-        }
-      }
-
-      // Repeat the above check but this time without depending on DataLayout
-      // or being able to compute a precise size.
-      if (!cast<PointerType>(LHS->getType())->isEmptyTy() &&
-          !cast<PointerType>(RHS->getType())->isEmptyTy() &&
-          LHSOffset->isNullValue() &&
-          RHSOffset->isNullValue())
-        return ConstantInt::get(GetCompareTy(LHS),
+      Opts.EvalMode = ObjectSizeOpts::Mode::Min;
+      auto *F = [](Value *V) -> Function * {
+        if (auto *I = dyn_cast<Instruction>(V))
+          return I->getFunction();
+        if (auto *A = dyn_cast<Argument>(V))
+          return A->getParent();
+        return nullptr;
+      }(LHS);
+      Opts.NullIsUnknownSize = F ? NullPointerIsDefined(F) : true;
+      if (getObjectSize(LHS, LHSSize, DL, TLI, Opts) &&
+          getObjectSize(RHS, RHSSize, DL, TLI, Opts) &&
+          !LHSOffset.isNegative() && !RHSOffset.isNegative() &&
+          LHSOffset.ult(LHSSize) && RHSOffset.ult(RHSSize)) {
+        return ConstantInt::get(getCompareTy(LHS),
                                 !CmpInst::isTrueWhenEqual(Pred));
+      }
     }
 
     // If one side of the equality comparison must come from a noalias call
@@ -2680,29 +2768,16 @@
     };
 
     // Is the set of underlying objects all things which must be disjoint from
-    // noalias calls. For allocas, we consider only static ones (dynamic
-    // allocas might be transformed into calls to malloc not simultaneously
-    // live with the compared-to allocation). For globals, we exclude symbols
-    // that might be resolve lazily to symbols in another dynamically-loaded
-    // library (and, thus, could be malloc'ed by the implementation).
+    // noalias calls.  We assume that indexing from such disjoint storage
+    // into the heap is undefined, and thus offsets can be safely ignored.
     auto IsAllocDisjoint = [](ArrayRef<const Value *> Objects) {
-      return all_of(Objects, [](const Value *V) {
-        if (const AllocaInst *AI = dyn_cast<AllocaInst>(V))
-          return AI->getParent() && AI->getFunction() && AI->isStaticAlloca();
-        if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
-          return (GV->hasLocalLinkage() || GV->hasHiddenVisibility() ||
-                  GV->hasProtectedVisibility() || GV->hasGlobalUnnamedAddr()) &&
-                 !GV->isThreadLocal();
-        if (const Argument *A = dyn_cast<Argument>(V))
-          return A->hasByValAttr();
-        return false;
-      });
+      return all_of(Objects, ::isAllocDisjoint);
     };
 
     if ((IsNAC(LHSUObjs) && IsAllocDisjoint(RHSUObjs)) ||
         (IsNAC(RHSUObjs) && IsAllocDisjoint(LHSUObjs)))
-        return ConstantInt::get(GetCompareTy(LHS),
-                                !CmpInst::isTrueWhenEqual(Pred));
+      return ConstantInt::get(getCompareTy(LHS),
+                              !CmpInst::isTrueWhenEqual(Pred));
 
     // Fold comparisons for non-escaping pointer even if the allocation call
     // cannot be elided. We cannot fold malloc comparison to null. Also, the
@@ -2719,7 +2794,7 @@
     // FIXME: We should also fold the compare when the pointer escapes, but the
     // compare dominates the pointer escape
     if (MI && !PointerMayBeCaptured(MI, true, true))
-      return ConstantInt::get(GetCompareTy(LHS),
+      return ConstantInt::get(getCompareTy(LHS),
                               CmpInst::isFalseWhenEqual(Pred));
   }
 
@@ -2730,7 +2805,7 @@
 /// Fold an icmp when its operands have i1 scalar type.
 static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS,
                                   Value *RHS, const SimplifyQuery &Q) {
-  Type *ITy = GetCompareTy(LHS); // The return type.
+  Type *ITy = getCompareTy(LHS); // The return type.
   Type *OpTy = LHS->getType();   // The operand type.
   if (!OpTy->isIntOrIntVectorTy(1))
     return nullptr;
@@ -2768,7 +2843,8 @@
     case CmpInst::ICMP_SLE: // X <=s 0 -> true
       return getTrue(ITy);
 
-    default: break;
+    default:
+      break;
     }
   } else if (match(RHS, m_One())) {
     switch (Pred) {
@@ -2792,7 +2868,8 @@
     case CmpInst::ICMP_SGE: // X >=s -1 -> true
       return getTrue(ITy);
 
-    default: break;
+    default:
+      break;
     }
   }
 
@@ -2800,7 +2877,7 @@
   default:
     break;
   case ICmpInst::ICMP_UGE:
-    if (isImpliedCondition(RHS, LHS, Q.DL).getValueOr(false))
+    if (isImpliedCondition(RHS, LHS, Q.DL).value_or(false))
       return getTrue(ITy);
     break;
   case ICmpInst::ICMP_SGE:
@@ -2811,11 +2888,11 @@
     ///  0  |  1  |  1 (0 >= -1)  |  1
     ///  1  |  0  |  0 (-1 >= 0)  |  0
     ///  1  |  1  |  1 (-1 >= -1) |  1
-    if (isImpliedCondition(LHS, RHS, Q.DL).getValueOr(false))
+    if (isImpliedCondition(LHS, RHS, Q.DL).value_or(false))
       return getTrue(ITy);
     break;
   case ICmpInst::ICMP_ULE:
-    if (isImpliedCondition(LHS, RHS, Q.DL).getValueOr(false))
+    if (isImpliedCondition(LHS, RHS, Q.DL).value_or(false))
       return getTrue(ITy);
     break;
   }
@@ -2829,7 +2906,7 @@
   if (!match(RHS, m_Zero()))
     return nullptr;
 
-  Type *ITy = GetCompareTy(LHS); // The return type.
+  Type *ITy = getCompareTy(LHS); // The return type.
   switch (Pred) {
   default:
     llvm_unreachable("Unknown ICmp predicate!");
@@ -2888,7 +2965,7 @@
 
 static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS,
                                        Value *RHS, const InstrInfoQuery &IIQ) {
-  Type *ITy = GetCompareTy(RHS); // The return type.
+  Type *ITy = getCompareTy(RHS); // The return type.
 
   Value *X;
   // Sign-bit checks can be optimized to true/false after unsigned
@@ -2935,10 +3012,11 @@
   return nullptr;
 }
 
-static Value *simplifyICmpWithBinOpOnLHS(
-    CmpInst::Predicate Pred, BinaryOperator *LBO, Value *RHS,
-    const SimplifyQuery &Q, unsigned MaxRecurse) {
-  Type *ITy = GetCompareTy(RHS); // The return type.
+static Value *simplifyICmpWithBinOpOnLHS(CmpInst::Predicate Pred,
+                                         BinaryOperator *LBO, Value *RHS,
+                                         const SimplifyQuery &Q,
+                                         unsigned MaxRecurse) {
+  Type *ITy = getCompareTy(RHS); // The return type.
 
   Value *Y = nullptr;
   // icmp pred (or X, Y), X
@@ -3073,7 +3151,6 @@
   return nullptr;
 }
 
-
 // If only one of the icmp's operands has NSW flags, try to prove that:
 //
 //   icmp slt (x + C1), (x +nsw C2)
@@ -3108,7 +3185,6 @@
          (C2->slt(*C1) && C1->isNonPositive());
 }
 
-
 /// TODO: A large part of this logic is duplicated in InstCombine's
 /// foldICmpBinOp(). We should be able to share that and avoid the code
 /// duplication.
@@ -3145,7 +3221,7 @@
 
     // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow.
     if ((A == RHS || B == RHS) && NoLHSWrapProblem)
-      if (Value *V = SimplifyICmpInst(Pred, A == RHS ? B : A,
+      if (Value *V = simplifyICmpInst(Pred, A == RHS ? B : A,
                                       Constant::getNullValue(RHS->getType()), Q,
                                       MaxRecurse - 1))
         return V;
@@ -3153,7 +3229,7 @@
     // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow.
     if ((C == LHS || D == LHS) && NoRHSWrapProblem)
       if (Value *V =
-              SimplifyICmpInst(Pred, Constant::getNullValue(LHS->getType()),
+              simplifyICmpInst(Pred, Constant::getNullValue(LHS->getType()),
                                C == LHS ? D : C, Q, MaxRecurse - 1))
         return V;
 
@@ -3181,7 +3257,7 @@
         Y = A;
         Z = C;
       }
-      if (Value *V = SimplifyICmpInst(Pred, Y, Z, Q, MaxRecurse - 1))
+      if (Value *V = simplifyICmpInst(Pred, Y, Z, Q, MaxRecurse - 1))
         return V;
     }
   }
@@ -3201,15 +3277,15 @@
     if (match(RHS, m_APInt(C))) {
       if (C->isStrictlyPositive()) {
         if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_NE)
-          return ConstantInt::getTrue(GetCompareTy(RHS));
+          return ConstantInt::getTrue(getCompareTy(RHS));
         if (Pred == ICmpInst::ICMP_SGE || Pred == ICmpInst::ICMP_EQ)
-          return ConstantInt::getFalse(GetCompareTy(RHS));
+          return ConstantInt::getFalse(getCompareTy(RHS));
       }
       if (C->isNonNegative()) {
         if (Pred == ICmpInst::ICMP_SLE)
-          return ConstantInt::getTrue(GetCompareTy(RHS));
+          return ConstantInt::getTrue(getCompareTy(RHS));
         if (Pred == ICmpInst::ICMP_SGT)
-          return ConstantInt::getFalse(GetCompareTy(RHS));
+          return ConstantInt::getFalse(getCompareTy(RHS));
       }
     }
   }
@@ -3232,9 +3308,9 @@
         Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(LBO)) ||
         match(LHS, m_Shl(m_One(), m_Value())) || !C->isZero()) {
       if (Pred == ICmpInst::ICMP_EQ)
-        return ConstantInt::getFalse(GetCompareTy(RHS));
+        return ConstantInt::getFalse(getCompareTy(RHS));
       if (Pred == ICmpInst::ICMP_NE)
-        return ConstantInt::getTrue(GetCompareTy(RHS));
+        return ConstantInt::getTrue(getCompareTy(RHS));
     }
   }
 
@@ -3243,9 +3319,9 @@
   // (1 << X) <=u 0x8000 --> true
   if (match(LHS, m_Shl(m_One(), m_Value())) && match(RHS, m_SignMask())) {
     if (Pred == ICmpInst::ICMP_UGT)
-      return ConstantInt::getFalse(GetCompareTy(RHS));
+      return ConstantInt::getFalse(getCompareTy(RHS));
     if (Pred == ICmpInst::ICMP_ULE)
-      return ConstantInt::getTrue(GetCompareTy(RHS));
+      return ConstantInt::getTrue(getCompareTy(RHS));
   }
 
   if (MaxRecurse && LBO && RBO && LBO->getOpcode() == RBO->getOpcode() &&
@@ -3258,22 +3334,22 @@
       if (ICmpInst::isSigned(Pred) || !Q.IIQ.isExact(LBO) ||
           !Q.IIQ.isExact(RBO))
         break;
-      if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
+      if (Value *V = simplifyICmpInst(Pred, LBO->getOperand(0),
                                       RBO->getOperand(0), Q, MaxRecurse - 1))
-          return V;
+        return V;
       break;
     case Instruction::SDiv:
       if (!ICmpInst::isEquality(Pred) || !Q.IIQ.isExact(LBO) ||
           !Q.IIQ.isExact(RBO))
         break;
-      if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
+      if (Value *V = simplifyICmpInst(Pred, LBO->getOperand(0),
                                       RBO->getOperand(0), Q, MaxRecurse - 1))
         return V;
       break;
     case Instruction::AShr:
       if (!Q.IIQ.isExact(LBO) || !Q.IIQ.isExact(RBO))
         break;
-      if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
+      if (Value *V = simplifyICmpInst(Pred, LBO->getOperand(0),
                                       RBO->getOperand(0), Q, MaxRecurse - 1))
         return V;
       break;
@@ -3284,7 +3360,7 @@
         break;
       if (!NSW && ICmpInst::isSigned(Pred))
         break;
-      if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
+      if (Value *V = simplifyICmpInst(Pred, LBO->getOperand(0),
                                       RBO->getOperand(0), Q, MaxRecurse - 1))
         return V;
       break;
@@ -3294,12 +3370,12 @@
   return nullptr;
 }
 
-/// Simplify integer comparisons where at least one operand of the compare
+/// simplify integer comparisons where at least one operand of the compare
 /// matches an integer min/max idiom.
 static Value *simplifyICmpWithMinMax(CmpInst::Predicate Pred, Value *LHS,
                                      Value *RHS, const SimplifyQuery &Q,
                                      unsigned MaxRecurse) {
-  Type *ITy = GetCompareTy(LHS); // The return type.
+  Type *ITy = getCompareTy(LHS); // The return type.
   Value *A, *B;
   CmpInst::Predicate P = CmpInst::BAD_ICMP_PREDICATE;
   CmpInst::Predicate EqP; // Chosen so that "A == max/min(A,B)" iff "A EqP B".
@@ -3344,13 +3420,13 @@
     case CmpInst::ICMP_SLE:
       // Equivalent to "A EqP B".  This may be the same as the condition tested
       // in the max/min; if so, we can just return that.
-      if (Value *V = ExtractEquivalentCondition(LHS, EqP, A, B))
+      if (Value *V = extractEquivalentCondition(LHS, EqP, A, B))
         return V;
-      if (Value *V = ExtractEquivalentCondition(RHS, EqP, A, B))
+      if (Value *V = extractEquivalentCondition(RHS, EqP, A, B))
         return V;
       // Otherwise, see if "A EqP B" simplifies.
       if (MaxRecurse)
-        if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse - 1))
+        if (Value *V = simplifyICmpInst(EqP, A, B, Q, MaxRecurse - 1))
           return V;
       break;
     case CmpInst::ICMP_NE:
@@ -3358,13 +3434,13 @@
       CmpInst::Predicate InvEqP = CmpInst::getInversePredicate(EqP);
       // Equivalent to "A InvEqP B".  This may be the same as the condition
       // tested in the max/min; if so, we can just return that.
-      if (Value *V = ExtractEquivalentCondition(LHS, InvEqP, A, B))
+      if (Value *V = extractEquivalentCondition(LHS, InvEqP, A, B))
         return V;
-      if (Value *V = ExtractEquivalentCondition(RHS, InvEqP, A, B))
+      if (Value *V = extractEquivalentCondition(RHS, InvEqP, A, B))
         return V;
       // Otherwise, see if "A InvEqP B" simplifies.
       if (MaxRecurse)
-        if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse - 1))
+        if (Value *V = simplifyICmpInst(InvEqP, A, B, Q, MaxRecurse - 1))
           return V;
       break;
     }
@@ -3418,13 +3494,13 @@
     case CmpInst::ICMP_ULE:
       // Equivalent to "A EqP B".  This may be the same as the condition tested
       // in the max/min; if so, we can just return that.
-      if (Value *V = ExtractEquivalentCondition(LHS, EqP, A, B))
+      if (Value *V = extractEquivalentCondition(LHS, EqP, A, B))
         return V;
-      if (Value *V = ExtractEquivalentCondition(RHS, EqP, A, B))
+      if (Value *V = extractEquivalentCondition(RHS, EqP, A, B))
         return V;
       // Otherwise, see if "A EqP B" simplifies.
       if (MaxRecurse)
-        if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse - 1))
+        if (Value *V = simplifyICmpInst(EqP, A, B, Q, MaxRecurse - 1))
           return V;
       break;
     case CmpInst::ICMP_NE:
@@ -3432,13 +3508,13 @@
       CmpInst::Predicate InvEqP = CmpInst::getInversePredicate(EqP);
       // Equivalent to "A InvEqP B".  This may be the same as the condition
       // tested in the max/min; if so, we can just return that.
-      if (Value *V = ExtractEquivalentCondition(LHS, InvEqP, A, B))
+      if (Value *V = extractEquivalentCondition(LHS, InvEqP, A, B))
         return V;
-      if (Value *V = ExtractEquivalentCondition(RHS, InvEqP, A, B))
+      if (Value *V = extractEquivalentCondition(RHS, InvEqP, A, B))
         return V;
       // Otherwise, see if "A InvEqP B" simplifies.
       if (MaxRecurse)
-        if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse - 1))
+        if (Value *V = simplifyICmpInst(InvEqP, A, B, Q, MaxRecurse - 1))
           return V;
       break;
     }
@@ -3494,11 +3570,10 @@
         continue;
 
       CallInst *Assume = cast<CallInst>(AssumeVH);
-      if (Optional<bool> Imp =
-              isImpliedCondition(Assume->getArgOperand(0), Predicate, LHS, RHS,
-                                 Q.DL))
+      if (Optional<bool> Imp = isImpliedCondition(Assume->getArgOperand(0),
+                                                  Predicate, LHS, RHS, Q.DL))
         if (isValidAssumeForContext(Assume, Q.CxtI, Q.DT))
-          return ConstantInt::get(GetCompareTy(LHS), *Imp);
+          return ConstantInt::get(getCompareTy(LHS), *Imp);
     }
   }
 
@@ -3507,7 +3582,7 @@
 
 /// Given operands for an ICmpInst, see if we can fold the result.
 /// If not, this returns null.
-static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+static Value *simplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
                                const SimplifyQuery &Q, unsigned MaxRecurse) {
   CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
   assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!");
@@ -3522,7 +3597,7 @@
   }
   assert(!isa<UndefValue>(LHS) && "Unexpected icmp undef,%X");
 
-  Type *ITy = GetCompareTy(LHS); // The return type.
+  Type *ITy = getCompareTy(LHS); // The return type.
 
   // icmp poison, X -> poison
   if (isa<PoisonValue>(RHS))
@@ -3584,15 +3659,15 @@
         Q.DL.getTypeSizeInBits(SrcTy) == DstTy->getPrimitiveSizeInBits()) {
       if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
         // Transfer the cast to the constant.
-        if (Value *V = SimplifyICmpInst(Pred, SrcOp,
+        if (Value *V = simplifyICmpInst(Pred, SrcOp,
                                         ConstantExpr::getIntToPtr(RHSC, SrcTy),
-                                        Q, MaxRecurse-1))
+                                        Q, MaxRecurse - 1))
           return V;
       } else if (PtrToIntInst *RI = dyn_cast<PtrToIntInst>(RHS)) {
         if (RI->getOperand(0)->getType() == SrcTy)
           // Compare without the cast.
-          if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
-                                          Q, MaxRecurse-1))
+          if (Value *V = simplifyICmpInst(Pred, SrcOp, RI->getOperand(0), Q,
+                                          MaxRecurse - 1))
             return V;
       }
     }
@@ -3603,9 +3678,9 @@
       if (ZExtInst *RI = dyn_cast<ZExtInst>(RHS)) {
         if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
           // Compare X and Y.  Note that signed predicates become unsigned.
-          if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
-                                          SrcOp, RI->getOperand(0), Q,
-                                          MaxRecurse-1))
+          if (Value *V =
+                  simplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred), SrcOp,
+                                   RI->getOperand(0), Q, MaxRecurse - 1))
             return V;
       }
       // Fold (zext X) ule (sext X), (zext X) sge (sext X) to true.
@@ -3628,15 +3703,16 @@
         // If the re-extended constant didn't change then this is effectively
         // also a case of comparing two zero-extended values.
         if (RExt == CI && MaxRecurse)
-          if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
-                                        SrcOp, Trunc, Q, MaxRecurse-1))
+          if (Value *V = simplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
+                                          SrcOp, Trunc, Q, MaxRecurse - 1))
             return V;
 
         // Otherwise the upper bits of LHS are zero while RHS has a non-zero bit
         // there.  Use this to work out the result of the comparison.
         if (RExt != CI) {
           switch (Pred) {
-          default: llvm_unreachable("Unknown ICmp predicate!");
+          default:
+            llvm_unreachable("Unknown ICmp predicate!");
           // LHS <u RHS.
           case ICmpInst::ICMP_EQ:
           case ICmpInst::ICMP_UGT:
@@ -3652,15 +3728,15 @@
           // is non-negative then LHS <s RHS.
           case ICmpInst::ICMP_SGT:
           case ICmpInst::ICMP_SGE:
-            return CI->getValue().isNegative() ?
-              ConstantInt::getTrue(CI->getContext()) :
-              ConstantInt::getFalse(CI->getContext());
+            return CI->getValue().isNegative()
+                       ? ConstantInt::getTrue(CI->getContext())
+                       : ConstantInt::getFalse(CI->getContext());
 
           case ICmpInst::ICMP_SLT:
           case ICmpInst::ICMP_SLE:
-            return CI->getValue().isNegative() ?
-              ConstantInt::getFalse(CI->getContext()) :
-              ConstantInt::getTrue(CI->getContext());
+            return CI->getValue().isNegative()
+                       ? ConstantInt::getFalse(CI->getContext())
+                       : ConstantInt::getTrue(CI->getContext());
           }
         }
       }
@@ -3672,8 +3748,8 @@
       if (SExtInst *RI = dyn_cast<SExtInst>(RHS)) {
         if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
           // Compare X and Y.  Note that the predicate does not change.
-          if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
-                                          Q, MaxRecurse-1))
+          if (Value *V = simplifyICmpInst(Pred, SrcOp, RI->getOperand(0), Q,
+                                          MaxRecurse - 1))
             return V;
       }
       // Fold (sext X) uge (zext X), (sext X) sle (zext X) to true.
@@ -3696,14 +3772,16 @@
         // If the re-extended constant didn't change then this is effectively
         // also a case of comparing two sign-extended values.
         if (RExt == CI && MaxRecurse)
-          if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, Q, MaxRecurse-1))
+          if (Value *V =
+                  simplifyICmpInst(Pred, SrcOp, Trunc, Q, MaxRecurse - 1))
             return V;
 
         // Otherwise the upper bits of LHS are all equal, while RHS has varying
         // bits there.  Use this to work out the result of the comparison.
         if (RExt != CI) {
           switch (Pred) {
-          default: llvm_unreachable("Unknown ICmp predicate!");
+          default:
+            llvm_unreachable("Unknown ICmp predicate!");
           case ICmpInst::ICMP_EQ:
             return ConstantInt::getFalse(CI->getContext());
           case ICmpInst::ICMP_NE:
@@ -3713,14 +3791,14 @@
           // LHS >s RHS.
           case ICmpInst::ICMP_SGT:
           case ICmpInst::ICMP_SGE:
-            return CI->getValue().isNegative() ?
-              ConstantInt::getTrue(CI->getContext()) :
-              ConstantInt::getFalse(CI->getContext());
+            return CI->getValue().isNegative()
+                       ? ConstantInt::getTrue(CI->getContext())
+                       : ConstantInt::getFalse(CI->getContext());
           case ICmpInst::ICMP_SLT:
           case ICmpInst::ICMP_SLE:
-            return CI->getValue().isNegative() ?
-              ConstantInt::getFalse(CI->getContext()) :
-              ConstantInt::getTrue(CI->getContext());
+            return CI->getValue().isNegative()
+                       ? ConstantInt::getFalse(CI->getContext())
+                       : ConstantInt::getTrue(CI->getContext());
 
           // If LHS is non-negative then LHS <u RHS.  If LHS is negative then
           // LHS >u RHS.
@@ -3728,18 +3806,18 @@
           case ICmpInst::ICMP_UGE:
             // Comparison is true iff the LHS <s 0.
             if (MaxRecurse)
-              if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SLT, SrcOp,
-                                              Constant::getNullValue(SrcTy),
-                                              Q, MaxRecurse-1))
+              if (Value *V = simplifyICmpInst(ICmpInst::ICMP_SLT, SrcOp,
+                                              Constant::getNullValue(SrcTy), Q,
+                                              MaxRecurse - 1))
                 return V;
             break;
           case ICmpInst::ICMP_ULT:
           case ICmpInst::ICMP_ULE:
             // Comparison is true iff the LHS >=s 0.
             if (MaxRecurse)
-              if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SGE, SrcOp,
-                                              Constant::getNullValue(SrcTy),
-                                              Q, MaxRecurse-1))
+              if (Value *V = simplifyICmpInst(ICmpInst::ICMP_SGE, SrcOp,
+                                              Constant::getNullValue(SrcTy), Q,
+                                              MaxRecurse - 1))
                 return V;
             break;
           }
@@ -3783,26 +3861,26 @@
   // If the comparison is with the result of a select instruction, check whether
   // comparing with either branch of the select always yields the same value.
   if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
-    if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, Q, MaxRecurse))
+    if (Value *V = threadCmpOverSelect(Pred, LHS, RHS, Q, MaxRecurse))
       return V;
 
   // If the comparison is with the result of a phi instruction, check whether
   // doing the compare with each incoming phi value yields a common result.
   if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
-    if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse))
+    if (Value *V = threadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse))
       return V;
 
   return nullptr;
 }
 
-Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+Value *llvm::simplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
                               const SimplifyQuery &Q) {
-  return ::SimplifyICmpInst(Predicate, LHS, RHS, Q, RecursionLimit);
+  return ::simplifyICmpInst(Predicate, LHS, RHS, Q, RecursionLimit);
 }
 
 /// Given operands for an FCmpInst, see if we can fold the result.
 /// If not, this returns null.
-static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+static Value *simplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
                                FastMathFlags FMF, const SimplifyQuery &Q,
                                unsigned MaxRecurse) {
   CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
@@ -3810,7 +3888,8 @@
 
   if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
     if (Constant *CRHS = dyn_cast<Constant>(RHS))
-      return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.DL, Q.TLI);
+      return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.DL, Q.TLI,
+                                             Q.CxtI);
 
     // If we have a constant, make sure it is on the RHS.
     std::swap(LHS, RHS);
@@ -3818,7 +3897,7 @@
   }
 
   // Fold trivial predicates.
-  Type *RetTy = GetCompareTy(LHS);
+  Type *RetTy = getCompareTy(LHS);
   if (Pred == FCmpInst::FCMP_FALSE)
     return getFalse(RetTy);
   if (Pred == FCmpInst::FCMP_TRUE)
@@ -3938,23 +4017,29 @@
       // The ordered relationship and minnum/maxnum guarantee that we do not
       // have NaN constants, so ordered/unordered preds are handled the same.
       switch (Pred) {
-      case FCmpInst::FCMP_OEQ: case FCmpInst::FCMP_UEQ:
+      case FCmpInst::FCMP_OEQ:
+      case FCmpInst::FCMP_UEQ:
         // minnum(X, LesserC)  == C --> false
         // maxnum(X, GreaterC) == C --> false
         return getFalse(RetTy);
-      case FCmpInst::FCMP_ONE: case FCmpInst::FCMP_UNE:
+      case FCmpInst::FCMP_ONE:
+      case FCmpInst::FCMP_UNE:
         // minnum(X, LesserC)  != C --> true
         // maxnum(X, GreaterC) != C --> true
         return getTrue(RetTy);
-      case FCmpInst::FCMP_OGE: case FCmpInst::FCMP_UGE:
-      case FCmpInst::FCMP_OGT: case FCmpInst::FCMP_UGT:
+      case FCmpInst::FCMP_OGE:
+      case FCmpInst::FCMP_UGE:
+      case FCmpInst::FCMP_OGT:
+      case FCmpInst::FCMP_UGT:
         // minnum(X, LesserC)  >= C --> false
         // minnum(X, LesserC)  >  C --> false
         // maxnum(X, GreaterC) >= C --> true
         // maxnum(X, GreaterC) >  C --> true
         return ConstantInt::get(RetTy, IsMaxNum);
-      case FCmpInst::FCMP_OLE: case FCmpInst::FCMP_ULE:
-      case FCmpInst::FCMP_OLT: case FCmpInst::FCMP_ULT:
+      case FCmpInst::FCMP_OLE:
+      case FCmpInst::FCMP_ULE:
+      case FCmpInst::FCMP_OLT:
+      case FCmpInst::FCMP_ULT:
         // minnum(X, LesserC)  <= C --> true
         // minnum(X, LesserC)  <  C --> true
         // maxnum(X, GreaterC) <= C --> false
@@ -3992,21 +4077,21 @@
   // If the comparison is with the result of a select instruction, check whether
   // comparing with either branch of the select always yields the same value.
   if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
-    if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, Q, MaxRecurse))
+    if (Value *V = threadCmpOverSelect(Pred, LHS, RHS, Q, MaxRecurse))
       return V;
 
   // If the comparison is with the result of a phi instruction, check whether
   // doing the compare with each incoming phi value yields a common result.
   if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
-    if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse))
+    if (Value *V = threadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse))
       return V;
 
   return nullptr;
 }
 
-Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+Value *llvm::simplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
                               FastMathFlags FMF, const SimplifyQuery &Q) {
-  return ::SimplifyFCmpInst(Predicate, LHS, RHS, FMF, Q, RecursionLimit);
+  return ::simplifyFCmpInst(Predicate, LHS, RHS, FMF, Q, RecursionLimit);
 }
 
 static Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
@@ -4073,22 +4158,21 @@
     };
 
     if (auto *B = dyn_cast<BinaryOperator>(I))
-      return PreventSelfSimplify(SimplifyBinOp(B->getOpcode(), NewOps[0],
+      return PreventSelfSimplify(simplifyBinOp(B->getOpcode(), NewOps[0],
                                                NewOps[1], Q, MaxRecurse - 1));
 
     if (CmpInst *C = dyn_cast<CmpInst>(I))
-      return PreventSelfSimplify(SimplifyCmpInst(C->getPredicate(), NewOps[0],
+      return PreventSelfSimplify(simplifyCmpInst(C->getPredicate(), NewOps[0],
                                                  NewOps[1], Q, MaxRecurse - 1));
 
     if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
-      return PreventSelfSimplify(SimplifyGEPInst(
+      return PreventSelfSimplify(simplifyGEPInst(
           GEP->getSourceElementType(), NewOps[0], makeArrayRef(NewOps).slice(1),
           GEP->isInBounds(), Q, MaxRecurse - 1));
 
     if (isa<SelectInst>(I))
-      return PreventSelfSimplify(
-          SimplifySelectInst(NewOps[0], NewOps[1], NewOps[2], Q,
-                             MaxRecurse - 1));
+      return PreventSelfSimplify(simplifySelectInst(
+          NewOps[0], NewOps[1], NewOps[2], Q, MaxRecurse - 1));
     // TODO: We could hand off more cases to instsimplify here.
   }
 
@@ -4114,14 +4198,6 @@
   if (!AllowRefinement && canCreatePoison(cast<Operator>(I)))
     return nullptr;
 
-  if (CmpInst *C = dyn_cast<CmpInst>(I))
-    return ConstantFoldCompareInstOperands(C->getPredicate(), ConstOps[0],
-                                           ConstOps[1], Q.DL, Q.TLI);
-
-  if (LoadInst *LI = dyn_cast<LoadInst>(I))
-    if (!LI->isVolatile())
-      return ConstantFoldLoadFromConstPtr(ConstOps[0], LI->getType(), Q.DL);
-
   return ConstantFoldInstOperands(I, ConstOps, Q.DL, Q.TLI);
 }
 
@@ -4184,7 +4260,8 @@
 /// Try to simplify a select instruction when its condition operand is an
 /// integer comparison.
 static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
-                                         Value *FalseVal, const SimplifyQuery &Q,
+                                         Value *FalseVal,
+                                         const SimplifyQuery &Q,
                                          unsigned MaxRecurse) {
   ICmpInst::Predicate Pred;
   Value *CmpLHS, *CmpRHS;
@@ -4204,7 +4281,8 @@
     Value *X, *Y;
     SelectPatternFlavor SPF =
         matchDecomposedSelectPattern(cast<ICmpInst>(CondVal), TrueVal, FalseVal,
-                                     X, Y).Flavor;
+                                     X, Y)
+            .Flavor;
     if (SelectPatternResult::isMinOrMax(SPF) && Pred == getMinMaxPred(SPF)) {
       APInt LimitC = getMinMaxLimit(getInverseMinMaxFlavor(SPF),
                                     X->getType()->getScalarSizeInBits());
@@ -4256,8 +4334,8 @@
   }
 
   // Check for other compares that behave like bit test.
-  if (Value *V = simplifySelectWithFakeICmpEq(CmpLHS, CmpRHS, Pred,
-                                              TrueVal, FalseVal))
+  if (Value *V =
+          simplifySelectWithFakeICmpEq(CmpLHS, CmpRHS, Pred, TrueVal, FalseVal))
     return V;
 
   // If we have a scalar equality comparison, then we know the value in one of
@@ -4267,18 +4345,18 @@
   // because each element of a vector select is chosen independently.
   if (Pred == ICmpInst::ICMP_EQ && !CondVal->getType()->isVectorTy()) {
     if (simplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q,
-                               /* AllowRefinement */ false, MaxRecurse) ==
-            TrueVal ||
+                               /* AllowRefinement */ false,
+                               MaxRecurse) == TrueVal ||
         simplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q,
-                               /* AllowRefinement */ false, MaxRecurse) ==
-            TrueVal)
+                               /* AllowRefinement */ false,
+                               MaxRecurse) == TrueVal)
       return FalseVal;
     if (simplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q,
-                               /* AllowRefinement */ true, MaxRecurse) ==
-            FalseVal ||
+                               /* AllowRefinement */ true,
+                               MaxRecurse) == FalseVal ||
         simplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q,
-                               /* AllowRefinement */ true, MaxRecurse) ==
-            FalseVal)
+                               /* AllowRefinement */ true,
+                               MaxRecurse) == FalseVal)
       return FalseVal;
   }
 
@@ -4297,11 +4375,11 @@
   // This transform is safe if we do not have (do not care about) -0.0 or if
   // at least one operand is known to not be -0.0. Otherwise, the select can
   // change the sign of a zero operand.
-  bool HasNoSignedZeros = Q.CxtI && isa<FPMathOperator>(Q.CxtI) &&
-                          Q.CxtI->hasNoSignedZeros();
+  bool HasNoSignedZeros =
+      Q.CxtI && isa<FPMathOperator>(Q.CxtI) && Q.CxtI->hasNoSignedZeros();
   const APFloat *C;
   if (HasNoSignedZeros || (match(T, m_APFloat(C)) && C->isNonZero()) ||
-                          (match(F, m_APFloat(C)) && C->isNonZero())) {
+      (match(F, m_APFloat(C)) && C->isNonZero())) {
     // (T == F) ? T : F --> F
     // (F == T) ? T : F --> F
     if (Pred == FCmpInst::FCMP_OEQ)
@@ -4318,7 +4396,7 @@
 
 /// Given operands for a SelectInst, see if we can fold the result.
 /// If not, this returns null.
-static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
+static Value *simplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
                                  const SimplifyQuery &Q, unsigned MaxRecurse) {
   if (auto *CondC = dyn_cast<Constant>(Cond)) {
     if (auto *TrueC = dyn_cast<Constant>(TrueVal))
@@ -4434,14 +4512,14 @@
   return nullptr;
 }
 
-Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
+Value *llvm::simplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
                                 const SimplifyQuery &Q) {
-  return ::SimplifySelectInst(Cond, TrueVal, FalseVal, Q, RecursionLimit);
+  return ::simplifySelectInst(Cond, TrueVal, FalseVal, Q, RecursionLimit);
 }
 
 /// Given operands for an GetElementPtrInst, see if we can fold the result.
 /// If not, this returns null.
-static Value *SimplifyGEPInst(Type *SrcTy, Value *Ptr,
+static Value *simplifyGEPInst(Type *SrcTy, Value *Ptr,
                               ArrayRef<Value *> Indices, bool InBounds,
                               const SimplifyQuery &Q, unsigned) {
   // The type of the GEP pointer operand.
@@ -4468,6 +4546,13 @@
     }
   }
 
+  // For opaque pointers an all-zero GEP is a no-op. For typed pointers,
+  // it may be equivalent to a bitcast.
+  if (Ptr->getType()->getScalarType()->isOpaquePointerTy() &&
+      Ptr->getType() == GEPTy &&
+      all_of(Indices, [](const auto *V) { return match(V, m_Zero()); }))
+    return Ptr;
+
   // getelementptr poison, idx -> poison
   // getelementptr baseptr, poison -> poison
   if (isa<PoisonValue>(Ptr) ||
@@ -4572,16 +4657,16 @@
   return ConstantFoldConstant(CE, Q.DL);
 }
 
-Value *llvm::SimplifyGEPInst(Type *SrcTy, Value *Ptr, ArrayRef<Value *> Indices,
+Value *llvm::simplifyGEPInst(Type *SrcTy, Value *Ptr, ArrayRef<Value *> Indices,
                              bool InBounds, const SimplifyQuery &Q) {
-  return ::SimplifyGEPInst(SrcTy, Ptr, Indices, InBounds, Q, RecursionLimit);
+  return ::simplifyGEPInst(SrcTy, Ptr, Indices, InBounds, Q, RecursionLimit);
 }
 
 /// Given operands for an InsertValueInst, see if we can fold the result.
 /// If not, this returns null.
-static Value *SimplifyInsertValueInst(Value *Agg, Value *Val,
-                                      ArrayRef<unsigned> Idxs, const SimplifyQuery &Q,
-                                      unsigned) {
+static Value *simplifyInsertValueInst(Value *Agg, Value *Val,
+                                      ArrayRef<unsigned> Idxs,
+                                      const SimplifyQuery &Q, unsigned) {
   if (Constant *CAgg = dyn_cast<Constant>(Agg))
     if (Constant *CVal = dyn_cast<Constant>(Val))
       return ConstantFoldInsertValueInstruction(CAgg, CVal, Idxs);
@@ -4606,13 +4691,13 @@
   return nullptr;
 }
 
-Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val,
+Value *llvm::simplifyInsertValueInst(Value *Agg, Value *Val,
                                      ArrayRef<unsigned> Idxs,
                                      const SimplifyQuery &Q) {
-  return ::SimplifyInsertValueInst(Agg, Val, Idxs, Q, RecursionLimit);
+  return ::simplifyInsertValueInst(Agg, Val, Idxs, Q, RecursionLimit);
 }
 
-Value *llvm::SimplifyInsertElementInst(Value *Vec, Value *Val, Value *Idx,
+Value *llvm::simplifyInsertElementInst(Value *Vec, Value *Val, Value *Idx,
                                        const SimplifyQuery &Q) {
   // Try to constant fold.
   auto *VecC = dyn_cast<Constant>(Vec);
@@ -4649,7 +4734,7 @@
 
 /// Given operands for an ExtractValueInst, see if we can fold the result.
 /// If not, this returns null.
-static Value *SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
+static Value *simplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
                                        const SimplifyQuery &, unsigned) {
   if (auto *CAgg = dyn_cast<Constant>(Agg))
     return ConstantFoldExtractValueInstruction(CAgg, Idxs);
@@ -4672,14 +4757,14 @@
   return nullptr;
 }
 
-Value *llvm::SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
+Value *llvm::simplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
                                       const SimplifyQuery &Q) {
-  return ::SimplifyExtractValueInst(Agg, Idxs, Q, RecursionLimit);
+  return ::simplifyExtractValueInst(Agg, Idxs, Q, RecursionLimit);
 }
 
 /// Given operands for an ExtractElementInst, see if we can fold the result.
 /// If not, this returns null.
-static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx,
+static Value *simplifyExtractElementInst(Value *Vec, Value *Idx,
                                          const SimplifyQuery &Q, unsigned) {
   auto *VecVTy = cast<VectorType>(Vec->getType());
   if (auto *CVec = dyn_cast<Constant>(Vec)) {
@@ -4716,13 +4801,13 @@
   return nullptr;
 }
 
-Value *llvm::SimplifyExtractElementInst(Value *Vec, Value *Idx,
+Value *llvm::simplifyExtractElementInst(Value *Vec, Value *Idx,
                                         const SimplifyQuery &Q) {
-  return ::SimplifyExtractElementInst(Vec, Idx, Q, RecursionLimit);
+  return ::simplifyExtractElementInst(Vec, Idx, Q, RecursionLimit);
 }
 
 /// See if we can fold the given phi. If not, returns null.
-static Value *SimplifyPHINode(PHINode *PN, ArrayRef<Value *> IncomingValues,
+static Value *simplifyPHINode(PHINode *PN, ArrayRef<Value *> IncomingValues,
                               const SimplifyQuery &Q) {
   // WARNING: no matter how worthwhile it may seem, we can not perform PHI CSE
   //          here, because the PHI we may succeed simplifying to was not
@@ -4734,14 +4819,15 @@
   bool HasUndefInput = false;
   for (Value *Incoming : IncomingValues) {
     // If the incoming value is the phi node itself, it can safely be skipped.
-    if (Incoming == PN) continue;
+    if (Incoming == PN)
+      continue;
     if (Q.isUndefValue(Incoming)) {
       // Remember that we saw an undef value, but otherwise ignore them.
       HasUndefInput = true;
       continue;
     }
     if (CommonValue && Incoming != CommonValue)
-      return nullptr;  // Not the same, bail out.
+      return nullptr; // Not the same, bail out.
     CommonValue = Incoming;
   }
 
@@ -4750,17 +4836,18 @@
   if (!CommonValue)
     return UndefValue::get(PN->getType());
 
-  // If we have a PHI node like phi(X, undef, X), where X is defined by some
-  // instruction, we cannot return X as the result of the PHI node unless it
-  // dominates the PHI block.
-  if (HasUndefInput)
+  if (HasUndefInput) {
+    // If we have a PHI node like phi(X, undef, X), where X is defined by some
+    // instruction, we cannot return X as the result of the PHI node unless it
+    // dominates the PHI block.
     return valueDominatesPHI(CommonValue, PN, Q.DT) ? CommonValue : nullptr;
+  }
 
   return CommonValue;
 }
 
-static Value *SimplifyCastInst(unsigned CastOpc, Value *Op,
-                               Type *Ty, const SimplifyQuery &Q, unsigned MaxRecurse) {
+static Value *simplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty,
+                               const SimplifyQuery &Q, unsigned MaxRecurse) {
   if (auto *C = dyn_cast<Constant>(Op))
     return ConstantFoldCastOperand(CastOpc, C, Ty, Q.DL);
 
@@ -4793,9 +4880,9 @@
   return nullptr;
 }
 
-Value *llvm::SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty,
+Value *llvm::simplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty,
                               const SimplifyQuery &Q) {
-  return ::SimplifyCastInst(CastOpc, Op, Ty, Q, RecursionLimit);
+  return ::simplifyCastInst(CastOpc, Op, Ty, Q, RecursionLimit);
 }
 
 /// For the given destination element of a shuffle, peek through shuffles to
@@ -4849,7 +4936,7 @@
   return RootVec;
 }
 
-static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1,
+static Value *simplifyShuffleVectorInst(Value *Op0, Value *Op1,
                                         ArrayRef<int> Mask, Type *RetTy,
                                         const SimplifyQuery &Q,
                                         unsigned MaxRecurse) {
@@ -4965,14 +5052,14 @@
 }
 
 /// Given operands for a ShuffleVectorInst, fold the result or return null.
-Value *llvm::SimplifyShuffleVectorInst(Value *Op0, Value *Op1,
+Value *llvm::simplifyShuffleVectorInst(Value *Op0, Value *Op1,
                                        ArrayRef<int> Mask, Type *RetTy,
                                        const SimplifyQuery &Q) {
-  return ::SimplifyShuffleVectorInst(Op0, Op1, Mask, RetTy, Q, RecursionLimit);
+  return ::simplifyShuffleVectorInst(Op0, Op1, Mask, RetTy, Q, RecursionLimit);
 }
 
-static Constant *foldConstant(Instruction::UnaryOps Opcode,
-                              Value *&Op, const SimplifyQuery &Q) {
+static Constant *foldConstant(Instruction::UnaryOps Opcode, Value *&Op,
+                              const SimplifyQuery &Q) {
   if (auto *C = dyn_cast<Constant>(Op))
     return ConstantFoldUnaryOpOperand(Opcode, C, Q.DL);
   return nullptr;
@@ -4993,7 +5080,7 @@
   return nullptr;
 }
 
-Value *llvm::SimplifyFNegInst(Value *Op, FastMathFlags FMF,
+Value *llvm::simplifyFNegInst(Value *Op, FastMathFlags FMF,
                               const SimplifyQuery &Q) {
   return ::simplifyFNegInst(Op, FMF, Q, RecursionLimit);
 }
@@ -5044,15 +5131,10 @@
   return nullptr;
 }
 
-// TODO: Move this out to a header file:
-static inline bool canIgnoreSNaN(fp::ExceptionBehavior EB, FastMathFlags FMF) {
-  return (EB == fp::ebIgnore || FMF.noNaNs());
-}
-
 /// Given operands for an FAdd, see if we can fold the result.  If not, this
 /// returns null.
 static Value *
-SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+simplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
                  const SimplifyQuery &Q, unsigned MaxRecurse,
                  fp::ExceptionBehavior ExBehavior = fp::ebIgnore,
                  RoundingMode Rounding = RoundingMode::NearestTiesToEven) {
@@ -5114,7 +5196,7 @@
 /// Given operands for an FSub, see if we can fold the result.  If not, this
 /// returns null.
 static Value *
-SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+simplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
                  const SimplifyQuery &Q, unsigned MaxRecurse,
                  fp::ExceptionBehavior ExBehavior = fp::ebIgnore,
                  RoundingMode Rounding = RoundingMode::NearestTiesToEven) {
@@ -5125,24 +5207,28 @@
   if (Constant *C = simplifyFPOp({Op0, Op1}, FMF, Q, ExBehavior, Rounding))
     return C;
 
-  if (!isDefaultFPEnvironment(ExBehavior, Rounding))
-    return nullptr;
-
   // fsub X, +0 ==> X
-  if (match(Op1, m_PosZeroFP()))
-    return Op0;
+  if (canIgnoreSNaN(ExBehavior, FMF) &&
+      (!canRoundingModeBe(Rounding, RoundingMode::TowardNegative) ||
+       FMF.noSignedZeros()))
+    if (match(Op1, m_PosZeroFP()))
+      return Op0;
 
   // fsub X, -0 ==> X, when we know X is not -0
-  if (match(Op1, m_NegZeroFP()) &&
-      (FMF.noSignedZeros() || CannotBeNegativeZero(Op0, Q.TLI)))
-    return Op0;
+  if (canIgnoreSNaN(ExBehavior, FMF))
+    if (match(Op1, m_NegZeroFP()) &&
+        (FMF.noSignedZeros() || CannotBeNegativeZero(Op0, Q.TLI)))
+      return Op0;
 
   // fsub -0.0, (fsub -0.0, X) ==> X
   // fsub -0.0, (fneg X) ==> X
   Value *X;
-  if (match(Op0, m_NegZeroFP()) &&
-      match(Op1, m_FNeg(m_Value(X))))
-    return X;
+  if (canIgnoreSNaN(ExBehavior, FMF))
+    if (match(Op0, m_NegZeroFP()) && match(Op1, m_FNeg(m_Value(X))))
+      return X;
+
+  if (!isDefaultFPEnvironment(ExBehavior, Rounding))
+    return nullptr;
 
   // fsub 0.0, (fsub 0.0, X) ==> X if signed zeros are ignored.
   // fsub 0.0, (fneg X) ==> X if signed zeros are ignored.
@@ -5165,7 +5251,7 @@
   return nullptr;
 }
 
-static Value *SimplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF,
+static Value *simplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF,
                               const SimplifyQuery &Q, unsigned MaxRecurse,
                               fp::ExceptionBehavior ExBehavior,
                               RoundingMode Rounding) {
@@ -5196,8 +5282,8 @@
   // 2. Ignore non-zero negative numbers because sqrt would produce NAN.
   // 3. Ignore -0.0 because sqrt(-0.0) == -0.0, but -0.0 * -0.0 == 0.0.
   Value *X;
-  if (Op0 == Op1 && match(Op0, m_Intrinsic<Intrinsic::sqrt>(m_Value(X))) &&
-      FMF.allowReassoc() && FMF.noNaNs() && FMF.noSignedZeros())
+  if (Op0 == Op1 && match(Op0, m_Sqrt(m_Value(X))) && FMF.allowReassoc() &&
+      FMF.noNaNs() && FMF.noSignedZeros())
     return X;
 
   return nullptr;
@@ -5205,7 +5291,7 @@
 
 /// Given the operands for an FMul, see if we can fold the result
 static Value *
-SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+simplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
                  const SimplifyQuery &Q, unsigned MaxRecurse,
                  fp::ExceptionBehavior ExBehavior = fp::ebIgnore,
                  RoundingMode Rounding = RoundingMode::NearestTiesToEven) {
@@ -5214,43 +5300,43 @@
       return C;
 
   // Now apply simplifications that do not require rounding.
-  return SimplifyFMAFMul(Op0, Op1, FMF, Q, MaxRecurse, ExBehavior, Rounding);
+  return simplifyFMAFMul(Op0, Op1, FMF, Q, MaxRecurse, ExBehavior, Rounding);
 }
 
-Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+Value *llvm::simplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
                               const SimplifyQuery &Q,
                               fp::ExceptionBehavior ExBehavior,
                               RoundingMode Rounding) {
-  return ::SimplifyFAddInst(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior,
+  return ::simplifyFAddInst(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior,
                             Rounding);
 }
 
-Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+Value *llvm::simplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
                               const SimplifyQuery &Q,
                               fp::ExceptionBehavior ExBehavior,
                               RoundingMode Rounding) {
-  return ::SimplifyFSubInst(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior,
+  return ::simplifyFSubInst(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior,
                             Rounding);
 }
 
-Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+Value *llvm::simplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
                               const SimplifyQuery &Q,
                               fp::ExceptionBehavior ExBehavior,
                               RoundingMode Rounding) {
-  return ::SimplifyFMulInst(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior,
+  return ::simplifyFMulInst(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior,
                             Rounding);
 }
 
-Value *llvm::SimplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF,
+Value *llvm::simplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF,
                              const SimplifyQuery &Q,
                              fp::ExceptionBehavior ExBehavior,
                              RoundingMode Rounding) {
-  return ::SimplifyFMAFMul(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior,
+  return ::simplifyFMAFMul(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior,
                            Rounding);
 }
 
 static Value *
-SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+simplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
                  const SimplifyQuery &Q, unsigned,
                  fp::ExceptionBehavior ExBehavior = fp::ebIgnore,
                  RoundingMode Rounding = RoundingMode::NearestTiesToEven) {
@@ -5296,16 +5382,16 @@
   return nullptr;
 }
 
-Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+Value *llvm::simplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
                               const SimplifyQuery &Q,
                               fp::ExceptionBehavior ExBehavior,
                               RoundingMode Rounding) {
-  return ::SimplifyFDivInst(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior,
+  return ::simplifyFDivInst(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior,
                             Rounding);
 }
 
 static Value *
-SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+simplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
                  const SimplifyQuery &Q, unsigned,
                  fp::ExceptionBehavior ExBehavior = fp::ebIgnore,
                  RoundingMode Rounding = RoundingMode::NearestTiesToEven) {
@@ -5334,11 +5420,11 @@
   return nullptr;
 }
 
-Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+Value *llvm::simplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
                               const SimplifyQuery &Q,
                               fp::ExceptionBehavior ExBehavior,
                               RoundingMode Rounding) {
-  return ::SimplifyFRemInst(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior,
+  return ::simplifyFRemInst(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior,
                             Rounding);
 }
 
@@ -5360,8 +5446,8 @@
 /// If not, this returns null.
 /// Try to use FastMathFlags when folding the result.
 static Value *simplifyFPUnOp(unsigned Opcode, Value *Op,
-                             const FastMathFlags &FMF,
-                             const SimplifyQuery &Q, unsigned MaxRecurse) {
+                             const FastMathFlags &FMF, const SimplifyQuery &Q,
+                             unsigned MaxRecurse) {
   switch (Opcode) {
   case Instruction::FNeg:
     return simplifyFNegInst(Op, FMF, Q, MaxRecurse);
@@ -5370,56 +5456,56 @@
   }
 }
 
-Value *llvm::SimplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q) {
+Value *llvm::simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q) {
   return ::simplifyUnOp(Opcode, Op, Q, RecursionLimit);
 }
 
-Value *llvm::SimplifyUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF,
+Value *llvm::simplifyUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF,
                           const SimplifyQuery &Q) {
   return ::simplifyFPUnOp(Opcode, Op, FMF, Q, RecursionLimit);
 }
 
 /// Given operands for a BinaryOperator, see if we can fold the result.
 /// If not, this returns null.
-static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+static Value *simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
                             const SimplifyQuery &Q, unsigned MaxRecurse) {
   switch (Opcode) {
   case Instruction::Add:
-    return SimplifyAddInst(LHS, RHS, false, false, Q, MaxRecurse);
+    return simplifyAddInst(LHS, RHS, false, false, Q, MaxRecurse);
   case Instruction::Sub:
-    return SimplifySubInst(LHS, RHS, false, false, Q, MaxRecurse);
+    return simplifySubInst(LHS, RHS, false, false, Q, MaxRecurse);
   case Instruction::Mul:
-    return SimplifyMulInst(LHS, RHS, Q, MaxRecurse);
+    return simplifyMulInst(LHS, RHS, Q, MaxRecurse);
   case Instruction::SDiv:
-    return SimplifySDivInst(LHS, RHS, Q, MaxRecurse);
+    return simplifySDivInst(LHS, RHS, Q, MaxRecurse);
   case Instruction::UDiv:
-    return SimplifyUDivInst(LHS, RHS, Q, MaxRecurse);
+    return simplifyUDivInst(LHS, RHS, Q, MaxRecurse);
   case Instruction::SRem:
-    return SimplifySRemInst(LHS, RHS, Q, MaxRecurse);
+    return simplifySRemInst(LHS, RHS, Q, MaxRecurse);
   case Instruction::URem:
-    return SimplifyURemInst(LHS, RHS, Q, MaxRecurse);
+    return simplifyURemInst(LHS, RHS, Q, MaxRecurse);
   case Instruction::Shl:
-    return SimplifyShlInst(LHS, RHS, false, false, Q, MaxRecurse);
+    return simplifyShlInst(LHS, RHS, false, false, Q, MaxRecurse);
   case Instruction::LShr:
-    return SimplifyLShrInst(LHS, RHS, false, Q, MaxRecurse);
+    return simplifyLShrInst(LHS, RHS, false, Q, MaxRecurse);
   case Instruction::AShr:
-    return SimplifyAShrInst(LHS, RHS, false, Q, MaxRecurse);
+    return simplifyAShrInst(LHS, RHS, false, Q, MaxRecurse);
   case Instruction::And:
-    return SimplifyAndInst(LHS, RHS, Q, MaxRecurse);
+    return simplifyAndInst(LHS, RHS, Q, MaxRecurse);
   case Instruction::Or:
-    return SimplifyOrInst(LHS, RHS, Q, MaxRecurse);
+    return simplifyOrInst(LHS, RHS, Q, MaxRecurse);
   case Instruction::Xor:
-    return SimplifyXorInst(LHS, RHS, Q, MaxRecurse);
+    return simplifyXorInst(LHS, RHS, Q, MaxRecurse);
   case Instruction::FAdd:
-    return SimplifyFAddInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
+    return simplifyFAddInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
   case Instruction::FSub:
-    return SimplifyFSubInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
+    return simplifyFSubInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
   case Instruction::FMul:
-    return SimplifyFMulInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
+    return simplifyFMulInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
   case Instruction::FDiv:
-    return SimplifyFDivInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
+    return simplifyFDivInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
   case Instruction::FRem:
-    return SimplifyFRemInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
+    return simplifyFRemInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
   default:
     llvm_unreachable("Unexpected opcode");
   }
@@ -5428,49 +5514,50 @@
 /// Given operands for a BinaryOperator, see if we can fold the result.
 /// If not, this returns null.
 /// Try to use FastMathFlags when folding the result.
-static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+static Value *simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
                             const FastMathFlags &FMF, const SimplifyQuery &Q,
                             unsigned MaxRecurse) {
   switch (Opcode) {
   case Instruction::FAdd:
-    return SimplifyFAddInst(LHS, RHS, FMF, Q, MaxRecurse);
+    return simplifyFAddInst(LHS, RHS, FMF, Q, MaxRecurse);
   case Instruction::FSub:
-    return SimplifyFSubInst(LHS, RHS, FMF, Q, MaxRecurse);
+    return simplifyFSubInst(LHS, RHS, FMF, Q, MaxRecurse);
   case Instruction::FMul:
-    return SimplifyFMulInst(LHS, RHS, FMF, Q, MaxRecurse);
+    return simplifyFMulInst(LHS, RHS, FMF, Q, MaxRecurse);
   case Instruction::FDiv:
-    return SimplifyFDivInst(LHS, RHS, FMF, Q, MaxRecurse);
+    return simplifyFDivInst(LHS, RHS, FMF, Q, MaxRecurse);
   default:
-    return SimplifyBinOp(Opcode, LHS, RHS, Q, MaxRecurse);
+    return simplifyBinOp(Opcode, LHS, RHS, Q, MaxRecurse);
   }
 }
 
-Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+Value *llvm::simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
                            const SimplifyQuery &Q) {
-  return ::SimplifyBinOp(Opcode, LHS, RHS, Q, RecursionLimit);
+  return ::simplifyBinOp(Opcode, LHS, RHS, Q, RecursionLimit);
 }
 
-Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+Value *llvm::simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
                            FastMathFlags FMF, const SimplifyQuery &Q) {
-  return ::SimplifyBinOp(Opcode, LHS, RHS, FMF, Q, RecursionLimit);
+  return ::simplifyBinOp(Opcode, LHS, RHS, FMF, Q, RecursionLimit);
 }
 
 /// Given operands for a CmpInst, see if we can fold the result.
-static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+static Value *simplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
                               const SimplifyQuery &Q, unsigned MaxRecurse) {
   if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate))
-    return SimplifyICmpInst(Predicate, LHS, RHS, Q, MaxRecurse);
-  return SimplifyFCmpInst(Predicate, LHS, RHS, FastMathFlags(), Q, MaxRecurse);
+    return simplifyICmpInst(Predicate, LHS, RHS, Q, MaxRecurse);
+  return simplifyFCmpInst(Predicate, LHS, RHS, FastMathFlags(), Q, MaxRecurse);
 }
 
-Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+Value *llvm::simplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
                              const SimplifyQuery &Q) {
-  return ::SimplifyCmpInst(Predicate, LHS, RHS, Q, RecursionLimit);
+  return ::simplifyCmpInst(Predicate, LHS, RHS, Q, RecursionLimit);
 }
 
-static bool IsIdempotent(Intrinsic::ID ID) {
+static bool isIdempotent(Intrinsic::ID ID) {
   switch (ID) {
-  default: return false;
+  default:
+    return false;
 
   // Unary idempotent: f(f(x)) = f(x)
   case Intrinsic::fabs:
@@ -5486,7 +5573,7 @@
   }
 }
 
-static Value *SimplifyRelativeLoad(Constant *Ptr, Constant *Offset,
+static Value *simplifyRelativeLoad(Constant *Ptr, Constant *Offset,
                                    const DataLayout &DL) {
   GlobalValue *PtrSym;
   APInt PtrOffset;
@@ -5546,7 +5633,7 @@
                                      const SimplifyQuery &Q) {
   // Idempotent functions return the same result when called repeatedly.
   Intrinsic::ID IID = F->getIntrinsicID();
-  if (IsIdempotent(IID))
+  if (isIdempotent(IID))
     if (auto *II = dyn_cast<IntrinsicInst>(Op0))
       if (II->getIntrinsicID() == IID)
         return II;
@@ -5554,15 +5641,18 @@
   Value *X;
   switch (IID) {
   case Intrinsic::fabs:
-    if (SignBitMustBeZero(Op0, Q.TLI)) return Op0;
+    if (SignBitMustBeZero(Op0, Q.TLI))
+      return Op0;
     break;
   case Intrinsic::bswap:
     // bswap(bswap(x)) -> x
-    if (match(Op0, m_BSwap(m_Value(X)))) return X;
+    if (match(Op0, m_BSwap(m_Value(X))))
+      return X;
     break;
   case Intrinsic::bitreverse:
     // bitreverse(bitreverse(x)) -> x
-    if (match(Op0, m_BitReverse(m_Value(X)))) return X;
+    if (match(Op0, m_BitReverse(m_Value(X))))
+      return X;
     break;
   case Intrinsic::ctpop: {
     // If everything but the lowest bit is zero, that bit is the pop-count. Ex:
@@ -5576,30 +5666,34 @@
   case Intrinsic::exp:
     // exp(log(x)) -> x
     if (Q.CxtI->hasAllowReassoc() &&
-        match(Op0, m_Intrinsic<Intrinsic::log>(m_Value(X)))) return X;
+        match(Op0, m_Intrinsic<Intrinsic::log>(m_Value(X))))
+      return X;
     break;
   case Intrinsic::exp2:
     // exp2(log2(x)) -> x
     if (Q.CxtI->hasAllowReassoc() &&
-        match(Op0, m_Intrinsic<Intrinsic::log2>(m_Value(X)))) return X;
+        match(Op0, m_Intrinsic<Intrinsic::log2>(m_Value(X))))
+      return X;
     break;
   case Intrinsic::log:
     // log(exp(x)) -> x
     if (Q.CxtI->hasAllowReassoc() &&
-        match(Op0, m_Intrinsic<Intrinsic::exp>(m_Value(X)))) return X;
+        match(Op0, m_Intrinsic<Intrinsic::exp>(m_Value(X))))
+      return X;
     break;
   case Intrinsic::log2:
     // log2(exp2(x)) -> x
     if (Q.CxtI->hasAllowReassoc() &&
         (match(Op0, m_Intrinsic<Intrinsic::exp2>(m_Value(X))) ||
-         match(Op0, m_Intrinsic<Intrinsic::pow>(m_SpecificFP(2.0),
-                                                m_Value(X))))) return X;
+         match(Op0,
+               m_Intrinsic<Intrinsic::pow>(m_SpecificFP(2.0), m_Value(X)))))
+      return X;
     break;
   case Intrinsic::log10:
     // log10(pow(10.0, x)) -> x
     if (Q.CxtI->hasAllowReassoc() &&
-        match(Op0, m_Intrinsic<Intrinsic::pow>(m_SpecificFP(10.0),
-                                               m_Value(X)))) return X;
+        match(Op0, m_Intrinsic<Intrinsic::pow>(m_SpecificFP(10.0), m_Value(X))))
+      return X;
     break;
   case Intrinsic::floor:
   case Intrinsic::trunc:
@@ -5821,7 +5915,7 @@
   case Intrinsic::load_relative:
     if (auto *C0 = dyn_cast<Constant>(Op0))
       if (auto *C1 = dyn_cast<Constant>(Op1))
-        return SimplifyRelativeLoad(C0, C1, Q.DL);
+        return simplifyRelativeLoad(C0, C1, Q.DL);
     break;
   case Intrinsic::powi:
     if (auto *Power = dyn_cast<ConstantInt>(Op1)) {
@@ -5848,7 +5942,8 @@
   case Intrinsic::maximum:
   case Intrinsic::minimum: {
     // If the arguments are the same, this is a no-op.
-    if (Op0 == Op1) return Op0;
+    if (Op0 == Op1)
+      return Op0;
 
     // Canonicalize constant operand as Op1.
     if (isa<Constant>(Op0))
@@ -5901,14 +5996,14 @@
 
     break;
   }
-  case Intrinsic::experimental_vector_extract: {
+  case Intrinsic::vector_extract: {
     Type *ReturnType = F->getReturnType();
 
     // (extract_vector (insert_vector _, X, 0), 0) -> X
     unsigned IdxN = cast<ConstantInt>(Op1)->getZExtValue();
     Value *X = nullptr;
-    if (match(Op0, m_Intrinsic<Intrinsic::experimental_vector_insert>(
-                       m_Value(), m_Value(X), m_Zero())) &&
+    if (match(Op0, m_Intrinsic<Intrinsic::vector_insert>(m_Value(), m_Value(X),
+                                                         m_Zero())) &&
         IdxN == 0 && X->getType() == ReturnType)
       return X;
 
@@ -6004,8 +6099,8 @@
     Value *Op2 = Call->getArgOperand(2);
     auto *FPI = cast<ConstrainedFPIntrinsic>(Call);
     if (Value *V = simplifyFPOp({Op0, Op1, Op2}, {}, Q,
-                                FPI->getExceptionBehavior().getValue(),
-                                FPI->getRoundingMode().getValue()))
+                                FPI->getExceptionBehavior().value(),
+                                FPI->getRoundingMode().value()))
       return V;
     return nullptr;
   }
@@ -6049,7 +6144,7 @@
 
     return nullptr;
   }
-  case Intrinsic::experimental_vector_insert: {
+  case Intrinsic::vector_insert: {
     Value *Vec = Call->getArgOperand(0);
     Value *SubVec = Call->getArgOperand(1);
     Value *Idx = Call->getArgOperand(2);
@@ -6059,8 +6154,8 @@
     // where: Y is X, or Y is undef
     unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
     Value *X = nullptr;
-    if (match(SubVec, m_Intrinsic<Intrinsic::experimental_vector_extract>(
-                          m_Value(X), m_Zero())) &&
+    if (match(SubVec,
+              m_Intrinsic<Intrinsic::vector_extract>(m_Value(X), m_Zero())) &&
         (Q.isUndefValue(Vec) || Vec == X) && IdxN == 0 &&
         X->getType() == ReturnType)
       return X;
@@ -6069,43 +6164,33 @@
   }
   case Intrinsic::experimental_constrained_fadd: {
     auto *FPI = cast<ConstrainedFPIntrinsic>(Call);
-    return SimplifyFAddInst(FPI->getArgOperand(0), FPI->getArgOperand(1),
-                            FPI->getFastMathFlags(), Q,
-                            FPI->getExceptionBehavior().getValue(),
-                            FPI->getRoundingMode().getValue());
-    break;
+    return simplifyFAddInst(
+        FPI->getArgOperand(0), FPI->getArgOperand(1), FPI->getFastMathFlags(),
+        Q, FPI->getExceptionBehavior().value(), FPI->getRoundingMode().value());
   }
   case Intrinsic::experimental_constrained_fsub: {
     auto *FPI = cast<ConstrainedFPIntrinsic>(Call);
-    return SimplifyFSubInst(FPI->getArgOperand(0), FPI->getArgOperand(1),
-                            FPI->getFastMathFlags(), Q,
-                            FPI->getExceptionBehavior().getValue(),
-                            FPI->getRoundingMode().getValue());
-    break;
+    return simplifyFSubInst(
+        FPI->getArgOperand(0), FPI->getArgOperand(1), FPI->getFastMathFlags(),
+        Q, FPI->getExceptionBehavior().value(), FPI->getRoundingMode().value());
   }
   case Intrinsic::experimental_constrained_fmul: {
     auto *FPI = cast<ConstrainedFPIntrinsic>(Call);
-    return SimplifyFMulInst(FPI->getArgOperand(0), FPI->getArgOperand(1),
-                            FPI->getFastMathFlags(), Q,
-                            FPI->getExceptionBehavior().getValue(),
-                            FPI->getRoundingMode().getValue());
-    break;
+    return simplifyFMulInst(
+        FPI->getArgOperand(0), FPI->getArgOperand(1), FPI->getFastMathFlags(),
+        Q, FPI->getExceptionBehavior().value(), FPI->getRoundingMode().value());
   }
   case Intrinsic::experimental_constrained_fdiv: {
     auto *FPI = cast<ConstrainedFPIntrinsic>(Call);
-    return SimplifyFDivInst(FPI->getArgOperand(0), FPI->getArgOperand(1),
-                            FPI->getFastMathFlags(), Q,
-                            FPI->getExceptionBehavior().getValue(),
-                            FPI->getRoundingMode().getValue());
-    break;
+    return simplifyFDivInst(
+        FPI->getArgOperand(0), FPI->getArgOperand(1), FPI->getFastMathFlags(),
+        Q, FPI->getExceptionBehavior().value(), FPI->getRoundingMode().value());
   }
   case Intrinsic::experimental_constrained_frem: {
     auto *FPI = cast<ConstrainedFPIntrinsic>(Call);
-    return SimplifyFRemInst(FPI->getArgOperand(0), FPI->getArgOperand(1),
-                            FPI->getFastMathFlags(), Q,
-                            FPI->getExceptionBehavior().getValue(),
-                            FPI->getRoundingMode().getValue());
-    break;
+    return simplifyFRemInst(
+        FPI->getArgOperand(0), FPI->getArgOperand(1), FPI->getFastMathFlags(),
+        Q, FPI->getExceptionBehavior().value(), FPI->getRoundingMode().value());
   }
   default:
     return nullptr;
@@ -6133,7 +6218,7 @@
   return ConstantFoldCall(Call, F, ConstantArgs, Q.TLI);
 }
 
-Value *llvm::SimplifyCall(CallBase *Call, const SimplifyQuery &Q) {
+Value *llvm::simplifyCall(CallBase *Call, const SimplifyQuery &Q) {
   // musttail calls can only be simplified if they are also DCEd.
   // As we can't guarantee this here, don't simplify them.
   if (Call->isMustTailCall())
@@ -6156,8 +6241,17 @@
   return nullptr;
 }
 
+Value *llvm::simplifyConstrainedFPCall(CallBase *Call, const SimplifyQuery &Q) {
+  assert(isa<ConstrainedFPIntrinsic>(Call));
+  if (Value *V = tryConstantFoldCall(Call, Q))
+    return V;
+  if (Value *Ret = simplifyIntrinsic(Call, Q))
+    return Ret;
+  return nullptr;
+}
+
 /// Given operands for a Freeze, see if we can fold the result.
-static Value *SimplifyFreezeInst(Value *Op0, const SimplifyQuery &Q) {
+static Value *simplifyFreezeInst(Value *Op0, const SimplifyQuery &Q) {
   // Use a utility function defined in ValueTracking.
   if (llvm::isGuaranteedNotToBeUndefOrPoison(Op0, Q.AC, Q.CxtI, Q.DT))
     return Op0;
@@ -6165,11 +6259,11 @@
   return nullptr;
 }
 
-Value *llvm::SimplifyFreezeInst(Value *Op0, const SimplifyQuery &Q) {
-  return ::SimplifyFreezeInst(Op0, Q);
+Value *llvm::simplifyFreezeInst(Value *Op0, const SimplifyQuery &Q) {
+  return ::simplifyFreezeInst(Op0, Q);
 }
 
-static Value *SimplifyLoadInst(LoadInst *LI, Value *PtrOp,
+static Value *simplifyLoadInst(LoadInst *LI, Value *PtrOp,
                                const SimplifyQuery &Q) {
   if (LI->isVolatile())
     return nullptr;
@@ -6213,134 +6307,134 @@
     }
     break;
   case Instruction::FNeg:
-    Result = SimplifyFNegInst(NewOps[0], I->getFastMathFlags(), Q);
+    Result = simplifyFNegInst(NewOps[0], I->getFastMathFlags(), Q);
     break;
   case Instruction::FAdd:
-    Result = SimplifyFAddInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q);
+    Result = simplifyFAddInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q);
     break;
   case Instruction::Add:
-    Result = SimplifyAddInst(
+    Result = simplifyAddInst(
         NewOps[0], NewOps[1], Q.IIQ.hasNoSignedWrap(cast<BinaryOperator>(I)),
         Q.IIQ.hasNoUnsignedWrap(cast<BinaryOperator>(I)), Q);
     break;
   case Instruction::FSub:
-    Result = SimplifyFSubInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q);
+    Result = simplifyFSubInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q);
     break;
   case Instruction::Sub:
-    Result = SimplifySubInst(
+    Result = simplifySubInst(
         NewOps[0], NewOps[1], Q.IIQ.hasNoSignedWrap(cast<BinaryOperator>(I)),
         Q.IIQ.hasNoUnsignedWrap(cast<BinaryOperator>(I)), Q);
     break;
   case Instruction::FMul:
-    Result = SimplifyFMulInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q);
+    Result = simplifyFMulInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q);
     break;
   case Instruction::Mul:
-    Result = SimplifyMulInst(NewOps[0], NewOps[1], Q);
+    Result = simplifyMulInst(NewOps[0], NewOps[1], Q);
     break;
   case Instruction::SDiv:
-    Result = SimplifySDivInst(NewOps[0], NewOps[1], Q);
+    Result = simplifySDivInst(NewOps[0], NewOps[1], Q);
     break;
   case Instruction::UDiv:
-    Result = SimplifyUDivInst(NewOps[0], NewOps[1], Q);
+    Result = simplifyUDivInst(NewOps[0], NewOps[1], Q);
     break;
   case Instruction::FDiv:
-    Result = SimplifyFDivInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q);
+    Result = simplifyFDivInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q);
     break;
   case Instruction::SRem:
-    Result = SimplifySRemInst(NewOps[0], NewOps[1], Q);
+    Result = simplifySRemInst(NewOps[0], NewOps[1], Q);
     break;
   case Instruction::URem:
-    Result = SimplifyURemInst(NewOps[0], NewOps[1], Q);
+    Result = simplifyURemInst(NewOps[0], NewOps[1], Q);
     break;
   case Instruction::FRem:
-    Result = SimplifyFRemInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q);
+    Result = simplifyFRemInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q);
     break;
   case Instruction::Shl:
-    Result = SimplifyShlInst(
+    Result = simplifyShlInst(
         NewOps[0], NewOps[1], Q.IIQ.hasNoSignedWrap(cast<BinaryOperator>(I)),
         Q.IIQ.hasNoUnsignedWrap(cast<BinaryOperator>(I)), Q);
     break;
   case Instruction::LShr:
-    Result = SimplifyLShrInst(NewOps[0], NewOps[1],
+    Result = simplifyLShrInst(NewOps[0], NewOps[1],
                               Q.IIQ.isExact(cast<BinaryOperator>(I)), Q);
     break;
   case Instruction::AShr:
-    Result = SimplifyAShrInst(NewOps[0], NewOps[1],
+    Result = simplifyAShrInst(NewOps[0], NewOps[1],
                               Q.IIQ.isExact(cast<BinaryOperator>(I)), Q);
     break;
   case Instruction::And:
-    Result = SimplifyAndInst(NewOps[0], NewOps[1], Q);
+    Result = simplifyAndInst(NewOps[0], NewOps[1], Q);
     break;
   case Instruction::Or:
-    Result = SimplifyOrInst(NewOps[0], NewOps[1], Q);
+    Result = simplifyOrInst(NewOps[0], NewOps[1], Q);
     break;
   case Instruction::Xor:
-    Result = SimplifyXorInst(NewOps[0], NewOps[1], Q);
+    Result = simplifyXorInst(NewOps[0], NewOps[1], Q);
     break;
   case Instruction::ICmp:
-    Result = SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(), NewOps[0],
+    Result = simplifyICmpInst(cast<ICmpInst>(I)->getPredicate(), NewOps[0],
                               NewOps[1], Q);
     break;
   case Instruction::FCmp:
-    Result = SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(), NewOps[0],
+    Result = simplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(), NewOps[0],
                               NewOps[1], I->getFastMathFlags(), Q);
     break;
   case Instruction::Select:
-    Result = SimplifySelectInst(NewOps[0], NewOps[1], NewOps[2], Q);
+    Result = simplifySelectInst(NewOps[0], NewOps[1], NewOps[2], Q);
     break;
   case Instruction::GetElementPtr: {
     auto *GEPI = cast<GetElementPtrInst>(I);
     Result =
-        SimplifyGEPInst(GEPI->getSourceElementType(), NewOps[0],
+        simplifyGEPInst(GEPI->getSourceElementType(), NewOps[0],
                         makeArrayRef(NewOps).slice(1), GEPI->isInBounds(), Q);
     break;
   }
   case Instruction::InsertValue: {
     InsertValueInst *IV = cast<InsertValueInst>(I);
-    Result = SimplifyInsertValueInst(NewOps[0], NewOps[1], IV->getIndices(), Q);
+    Result = simplifyInsertValueInst(NewOps[0], NewOps[1], IV->getIndices(), Q);
     break;
   }
   case Instruction::InsertElement: {
-    Result = SimplifyInsertElementInst(NewOps[0], NewOps[1], NewOps[2], Q);
+    Result = simplifyInsertElementInst(NewOps[0], NewOps[1], NewOps[2], Q);
     break;
   }
   case Instruction::ExtractValue: {
     auto *EVI = cast<ExtractValueInst>(I);
-    Result = SimplifyExtractValueInst(NewOps[0], EVI->getIndices(), Q);
+    Result = simplifyExtractValueInst(NewOps[0], EVI->getIndices(), Q);
     break;
   }
   case Instruction::ExtractElement: {
-    Result = SimplifyExtractElementInst(NewOps[0], NewOps[1], Q);
+    Result = simplifyExtractElementInst(NewOps[0], NewOps[1], Q);
     break;
   }
   case Instruction::ShuffleVector: {
     auto *SVI = cast<ShuffleVectorInst>(I);
-    Result = SimplifyShuffleVectorInst(
+    Result = simplifyShuffleVectorInst(
         NewOps[0], NewOps[1], SVI->getShuffleMask(), SVI->getType(), Q);
     break;
   }
   case Instruction::PHI:
-    Result = SimplifyPHINode(cast<PHINode>(I), NewOps, Q);
+    Result = simplifyPHINode(cast<PHINode>(I), NewOps, Q);
     break;
   case Instruction::Call: {
     // TODO: Use NewOps
-    Result = SimplifyCall(cast<CallInst>(I), Q);
+    Result = simplifyCall(cast<CallInst>(I), Q);
     break;
   }
   case Instruction::Freeze:
-    Result = llvm::SimplifyFreezeInst(NewOps[0], Q);
+    Result = llvm::simplifyFreezeInst(NewOps[0], Q);
     break;
 #define HANDLE_CAST_INST(num, opc, clas) case Instruction::opc:
 #include "llvm/IR/Instruction.def"
 #undef HANDLE_CAST_INST
-    Result = SimplifyCastInst(I->getOpcode(), NewOps[0], I->getType(), Q);
+    Result = simplifyCastInst(I->getOpcode(), NewOps[0], I->getType(), Q);
     break;
   case Instruction::Alloca:
     // No simplifications for Alloca and it can't be constant folded.
     Result = nullptr;
     break;
   case Instruction::Load:
-    Result = SimplifyLoadInst(cast<LoadInst>(I), NewOps[0], Q);
+    Result = simplifyLoadInst(cast<LoadInst>(I), NewOps[0], Q);
     break;
   }
 
@@ -6350,7 +6444,7 @@
   return Result == I ? UndefValue::get(I->getType()) : Result;
 }
 
-Value *llvm::SimplifyInstructionWithOperands(Instruction *I,
+Value *llvm::simplifyInstructionWithOperands(Instruction *I,
                                              ArrayRef<Value *> NewOps,
                                              const SimplifyQuery &SQ,
                                              OptimizationRemarkEmitter *ORE) {
@@ -6359,7 +6453,7 @@
   return ::simplifyInstructionWithOperands(I, NewOps, SQ, ORE);
 }
 
-Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ,
+Value *llvm::simplifyInstruction(Instruction *I, const SimplifyQuery &SQ,
                                  OptimizationRemarkEmitter *ORE) {
   SmallVector<Value *, 8> Ops(I->operands());
   return ::simplifyInstructionWithOperands(I, Ops, SQ, ORE);
@@ -6410,7 +6504,7 @@
     I = Worklist[Idx];
 
     // See if this instruction simplifies.
-    SimpleV = SimplifyInstruction(I, {DL, TLI, DT, AC});
+    SimpleV = simplifyInstruction(I, {DL, TLI, DT, AC});
     if (!SimpleV) {
       if (UnsimplifiedUsers)
         UnsimplifiedUsers->insert(I);
@@ -6473,6 +6567,6 @@
 }
 template const SimplifyQuery getBestSimplifyQuery(AnalysisManager<Function> &,
                                                   Function &);
-}
+} // namespace llvm
 
 void InstSimplifyFolder::anchor() {}

diff --git a/src/llvm-project/llvm/lib/Analysis/Interval.cpp b/src/llvm-project/llvm/lib/Analysis/Interval.cpp
index e228ec4..f7fffcb 100644
--- a/src/llvm-project/llvm/lib/Analysis/Interval.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/Interval.cpp

@@ -13,7 +13,6 @@
 
 #include "llvm/Analysis/Interval.h"
 #include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CFG.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;

diff --git a/src/llvm-project/llvm/lib/Analysis/LazyCallGraph.cpp b/src/llvm-project/llvm/lib/Analysis/LazyCallGraph.cpp
index e8e9593..20a905e 100644
--- a/src/llvm-project/llvm/lib/Analysis/LazyCallGraph.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/LazyCallGraph.cpp

@@ -9,14 +9,13 @@
 #include "llvm/Analysis/LazyCallGraph.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/Sequence.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Analysis/VectorUtils.h"
 #include "llvm/Config/llvm-config.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/InstIterator.h"
@@ -30,12 +29,15 @@
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
-#include <cstddef>
 #include <iterator>
 #include <string>
 #include <tuple>
 #include <utility>
 
+#ifdef EXPENSIVE_CHECKS
+#include "llvm/ADT/ScopeExit.h"
+#endif
+
 using namespace llvm;
 
 #define DEBUG_TYPE "lcg"

diff --git a/src/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp b/src/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp
index e311b40..2fae260 100644
--- a/src/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp

@@ -38,7 +38,6 @@
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/KnownBits.h"
 #include "llvm/Support/raw_ostream.h"
-#include <map>
 using namespace llvm;
 using namespace PatternMatch;
 
@@ -919,10 +918,10 @@
   // transfer rule on the full set since we may be able to locally infer
   // interesting facts.
   Optional<ConstantRange> LHSRes = getRangeFor(CI->getOperand(0), CI, BB);
-  if (!LHSRes.hasValue())
+  if (!LHSRes)
     // More work to do before applying this transfer rule.
     return None;
-  const ConstantRange &LHSRange = LHSRes.getValue();
+  const ConstantRange &LHSRange = LHSRes.value();
 
   const unsigned ResultBitWidth = CI->getType()->getIntegerBitWidth();
 
@@ -943,12 +942,12 @@
   // @foo()), 32"
   Optional<ConstantRange> LHSRes = getRangeFor(I->getOperand(0), I, BB);
   Optional<ConstantRange> RHSRes = getRangeFor(I->getOperand(1), I, BB);
-  if (!LHSRes.hasValue() || !RHSRes.hasValue())
+  if (!LHSRes || !RHSRes)
     // More work to do before applying this transfer rule.
     return None;
 
-  const ConstantRange &LHSRange = LHSRes.getValue();
-  const ConstantRange &RHSRange = RHSRes.getValue();
+  const ConstantRange &LHSRange = LHSRes.value();
+  const ConstantRange &RHSRange = RHSRes.value();
   return ValueLatticeElement::getRange(OpFn(LHSRange, RHSRange));
 }
 
@@ -956,13 +955,6 @@
     BinaryOperator *BO, BasicBlock *BB) {
   assert(BO->getOperand(0)->getType()->isSized() &&
          "all operands to binary operators are sized");
-  if (BO->getOpcode() == Instruction::Xor) {
-    // Xor is the only operation not supported by ConstantRange::binaryOp().
-    LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName()
-                      << "' - overdefined (unknown binary operator).\n");
-    return ValueLatticeElement::getOverdefined();
-  }
-
   if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(BO)) {
     unsigned NoWrapKind = 0;
     if (OBO->hasNoUnsignedWrap())
@@ -1020,7 +1012,7 @@
 
   // Handle extractvalue of insertvalue to allow further simplification
   // based on replaced with.overflow intrinsics.
-  if (Value *V = SimplifyExtractValueInst(
+  if (Value *V = simplifyExtractValueInst(
           EVI->getAggregateOperand(), EVI->getIndices(),
           EVI->getModule()->getDataLayout()))
     return getBlockValue(V, BB, EVI);
@@ -1141,7 +1133,7 @@
     ConstantRange CR = ConstantRange::makeExactICmpRegion(EdgePred, *C);
     if (!CR.isEmptySet())
       return ValueLatticeElement::getRange(ConstantRange::getNonEmpty(
-          CR.getUnsignedMin().zextOrSelf(BitWidth), APInt(BitWidth, 0)));
+          CR.getUnsignedMin().zext(BitWidth), APInt(BitWidth, 0)));
   }
 
   return ValueLatticeElement::getOverdefined();
@@ -1278,7 +1270,7 @@
   if (auto *CI = dyn_cast<CastInst>(Usr)) {
     assert(CI->getOperand(0) == Op && "Operand 0 isn't Op");
     if (auto *C = dyn_cast_or_null<ConstantInt>(
-            SimplifyCastInst(CI->getOpcode(), OpConst,
+            simplifyCastInst(CI->getOpcode(), OpConst,
                              CI->getDestTy(), DL))) {
       return ValueLatticeElement::getRange(ConstantRange(C->getValue()));
     }
@@ -1290,7 +1282,7 @@
     Value *LHS = Op0Match ? OpConst : BO->getOperand(0);
     Value *RHS = Op1Match ? OpConst : BO->getOperand(1);
     if (auto *C = dyn_cast_or_null<ConstantInt>(
-            SimplifyBinOp(BO->getOpcode(), LHS, RHS, DL))) {
+            simplifyBinOp(BO->getOpcode(), LHS, RHS, DL))) {
       return ValueLatticeElement::getRange(ConstantRange(C->getValue()));
     }
   } else if (isa<FreezeInst>(Usr)) {
@@ -1361,7 +1353,7 @@
               ValueLatticeElement OpLatticeVal =
                   getValueFromCondition(Op, Condition, isTrueDest);
               if (Optional<APInt> OpConst = OpLatticeVal.asConstantInteger()) {
-                Result = constantFoldUser(Usr, Op, OpConst.getValue(), DL);
+                Result = constantFoldUser(Usr, Op, *OpConst, DL);
                 break;
               }
             }
@@ -1432,8 +1424,9 @@
   if (Constant *VC = dyn_cast<Constant>(Val))
     return ValueLatticeElement::get(VC);
 
-  ValueLatticeElement LocalResult = getEdgeValueLocal(Val, BBFrom, BBTo)
-      .getValueOr(ValueLatticeElement::getOverdefined());
+  ValueLatticeElement LocalResult =
+      getEdgeValueLocal(Val, BBFrom, BBTo)
+          .value_or(ValueLatticeElement::getOverdefined());
   if (hasSingleValue(LocalResult))
     // Can't get any more precise here
     return LocalResult;
@@ -1886,6 +1879,11 @@
   }
 }
 
+void LazyValueInfo::clear(const Module *M) {
+  if (PImpl) {
+    getImpl(PImpl, AC, M).clear();
+  }
+}
 
 void LazyValueInfo::printLVI(Function &F, DominatorTree &DTree, raw_ostream &OS) {
   if (PImpl) {
@@ -1898,7 +1896,7 @@
     const BasicBlock *BB, formatted_raw_ostream &OS) {
   // Find if there are latticevalues defined for arguments of the function.
   auto *F = BB->getParent();
-  for (auto &Arg : F->args()) {
+  for (const auto &Arg : F->args()) {
     ValueLatticeElement Result = LVIImpl->getValueInBlock(
         const_cast<Argument *>(&Arg), const_cast<BasicBlock *>(BB));
     if (Result.isUnknown())
@@ -1934,12 +1932,12 @@
   printResult(ParentBB);
   // Print the LVI analysis results for the immediate successor blocks, that
   // are dominated by `ParentBB`.
-  for (auto *BBSucc : successors(ParentBB))
+  for (const auto *BBSucc : successors(ParentBB))
     if (DT.dominates(ParentBB, BBSucc))
       printResult(BBSucc);
 
   // Print LVI in blocks where `I` is used.
-  for (auto *U : I->users())
+  for (const auto *U : I->users())
     if (auto *UseI = dyn_cast<Instruction>(U))
       if (!isa<PHINode>(UseI) || DT.dominates(ParentBB, UseI->getParent()))
         printResult(UseI->getParent());

diff --git a/src/llvm-project/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp b/src/llvm-project/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp
index 031bf3b..381d62a 100644
--- a/src/llvm-project/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp

@@ -68,6 +68,7 @@
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/PostDominators.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
@@ -392,14 +393,14 @@
     return;
 
   // Dumps all divergent values in F, arguments and then instructions.
-  for (auto &Arg : F->args()) {
+  for (const auto &Arg : F->args()) {
     OS << (isDivergent(&Arg) ? "DIVERGENT: " : "           ");
     OS << Arg << "\n";
   }
   // Iterate instructions using instructions() to ensure a deterministic order.
   for (const BasicBlock &BB : *F) {
     OS << "\n           " << BB.getName() << ":\n";
-    for (auto &I : BB.instructionsWithoutDebug()) {
+    for (const auto &I : BB.instructionsWithoutDebug()) {
       OS << (isDivergent(&I) ? "DIVERGENT:     " : "               ");
       OS << I << "\n";
     }

diff --git a/src/llvm-project/llvm/lib/Analysis/Lint.cpp b/src/llvm-project/llvm/lib/Analysis/Lint.cpp
index f9a7a5b..8b0f2a8 100644
--- a/src/llvm-project/llvm/lib/Analysis/Lint.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/Lint.cpp

@@ -44,7 +44,6 @@
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/MemoryLocation.h"
-#include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Argument.h"
@@ -69,9 +68,7 @@
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/Support/Debug.h"
 #include "llvm/Support/KnownBits.h"
-#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
 #include <cstdint>
@@ -169,8 +166,8 @@
 };
 } // end anonymous namespace
 
-// Assert - We know that cond should be true, if not print an error message.
-#define Assert(C, ...)                                                         \
+// Check - We know that cond should be true, if not print an error message.
+#define Check(C, ...)                                                          \
   do {                                                                         \
     if (!(C)) {                                                                \
       CheckFailed(__VA_ARGS__);                                                \
@@ -181,8 +178,8 @@
 void Lint::visitFunction(Function &F) {
   // This isn't undefined behavior, it's just a little unusual, and it's a
   // fairly common mistake to neglect to name a function.
-  Assert(F.hasName() || F.hasLocalLinkage(),
-         "Unusual: Unnamed function with non-local linkage", &F);
+  Check(F.hasName() || F.hasLocalLinkage(),
+        "Unusual: Unnamed function with non-local linkage", &F);
 
   // TODO: Check for irreducible control flow.
 }
@@ -195,23 +192,23 @@
 
   if (Function *F = dyn_cast<Function>(findValue(Callee,
                                                  /*OffsetOk=*/false))) {
-    Assert(I.getCallingConv() == F->getCallingConv(),
-           "Undefined behavior: Caller and callee calling convention differ",
-           &I);
+    Check(I.getCallingConv() == F->getCallingConv(),
+          "Undefined behavior: Caller and callee calling convention differ",
+          &I);
 
     FunctionType *FT = F->getFunctionType();
     unsigned NumActualArgs = I.arg_size();
 
-    Assert(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs
-                          : FT->getNumParams() == NumActualArgs,
-           "Undefined behavior: Call argument count mismatches callee "
-           "argument count",
-           &I);
+    Check(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs
+                         : FT->getNumParams() == NumActualArgs,
+          "Undefined behavior: Call argument count mismatches callee "
+          "argument count",
+          &I);
 
-    Assert(FT->getReturnType() == I.getType(),
-           "Undefined behavior: Call return type mismatches "
-           "callee return type",
-           &I);
+    Check(FT->getReturnType() == I.getType(),
+          "Undefined behavior: Call return type mismatches "
+          "callee return type",
+          &I);
 
     // Check argument types (in case the callee was casted) and attributes.
     // TODO: Verify that caller and callee attributes are compatible.
@@ -221,10 +218,10 @@
       Value *Actual = *AI;
       if (PI != PE) {
         Argument *Formal = &*PI++;
-        Assert(Formal->getType() == Actual->getType(),
-               "Undefined behavior: Call argument type mismatches "
-               "callee parameter type",
-               &I);
+        Check(Formal->getType() == Actual->getType(),
+              "Undefined behavior: Call argument type mismatches "
+              "callee parameter type",
+              &I);
 
         // Check that noalias arguments don't alias other arguments. This is
         // not fully precise because we don't know the sizes of the dereferenced
@@ -232,7 +229,7 @@
         if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) {
           AttributeList PAL = I.getAttributes();
           unsigned ArgNo = 0;
-          for (auto BI = I.arg_begin(); BI != AE; ++BI, ++ArgNo) {
+          for (auto *BI = I.arg_begin(); BI != AE; ++BI, ++ArgNo) {
             // Skip ByVal arguments since they will be memcpy'd to the callee's
             // stack so we're not really passing the pointer anyway.
             if (PAL.hasParamAttr(ArgNo, Attribute::ByVal))
@@ -242,9 +239,9 @@
               continue;
             if (AI != BI && (*BI)->getType()->isPointerTy()) {
               AliasResult Result = AA->alias(*AI, *BI);
-              Assert(Result != AliasResult::MustAlias &&
-                         Result != AliasResult::PartialAlias,
-                     "Unusual: noalias argument aliases another argument", &I);
+              Check(Result != AliasResult::MustAlias &&
+                        Result != AliasResult::PartialAlias,
+                    "Unusual: noalias argument aliases another argument", &I);
             }
           }
         }
@@ -271,10 +268,10 @@
         if (PAL.hasParamAttr(ArgNo++, Attribute::ByVal))
           continue;
         Value *Obj = findValue(Arg, /*OffsetOk=*/true);
-        Assert(!isa<AllocaInst>(Obj),
-               "Undefined behavior: Call with \"tail\" keyword references "
-               "alloca",
-               &I);
+        Check(!isa<AllocaInst>(Obj),
+              "Undefined behavior: Call with \"tail\" keyword references "
+              "alloca",
+              &I);
       }
     }
   }
@@ -302,9 +299,9 @@
                                               /*OffsetOk=*/false)))
         if (Len->getValue().isIntN(32))
           Size = LocationSize::precise(Len->getValue().getZExtValue());
-      Assert(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) !=
-                 AliasResult::MustAlias,
-             "Undefined behavior: memcpy source and destination overlap", &I);
+      Check(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) !=
+                AliasResult::MustAlias,
+            "Undefined behavior: memcpy source and destination overlap", &I);
       break;
     }
     case Intrinsic::memcpy_inline: {
@@ -319,9 +316,9 @@
       // isn't expressive enough for what we really want to do. Known partial
       // overlap is not distinguished from the case where nothing is known.
       const LocationSize LS = LocationSize::precise(Size);
-      Assert(AA->alias(MCII->getSource(), LS, MCII->getDest(), LS) !=
-                 AliasResult::MustAlias,
-             "Undefined behavior: memcpy source and destination overlap", &I);
+      Check(AA->alias(MCII->getSource(), LS, MCII->getDest(), LS) !=
+                AliasResult::MustAlias,
+            "Undefined behavior: memcpy source and destination overlap", &I);
       break;
     }
     case Intrinsic::memmove: {
@@ -338,11 +335,17 @@
                            MSI->getDestAlign(), nullptr, MemRef::Write);
       break;
     }
+    case Intrinsic::memset_inline: {
+      MemSetInlineInst *MSII = cast<MemSetInlineInst>(&I);
+      visitMemoryReference(I, MemoryLocation::getForDest(MSII),
+                           MSII->getDestAlign(), nullptr, MemRef::Write);
+      break;
+    }
 
     case Intrinsic::vastart:
-      Assert(I.getParent()->getParent()->isVarArg(),
-             "Undefined behavior: va_start called in a non-varargs function",
-             &I);
+      Check(I.getParent()->getParent()->isVarArg(),
+            "Undefined behavior: va_start called in a non-varargs function",
+            &I);
 
       visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI), None,
                            nullptr, MemRef::Read | MemRef::Write);
@@ -367,20 +370,22 @@
       break;
     case Intrinsic::get_active_lane_mask:
       if (auto *TripCount = dyn_cast<ConstantInt>(I.getArgOperand(1)))
-        Assert(!TripCount->isZero(), "get_active_lane_mask: operand #2 "
-               "must be greater than 0", &I);
+        Check(!TripCount->isZero(),
+              "get_active_lane_mask: operand #2 "
+              "must be greater than 0",
+              &I);
       break;
     }
 }
 
 void Lint::visitReturnInst(ReturnInst &I) {
   Function *F = I.getParent()->getParent();
-  Assert(!F->doesNotReturn(),
-         "Unusual: Return statement in function with noreturn attribute", &I);
+  Check(!F->doesNotReturn(),
+        "Unusual: Return statement in function with noreturn attribute", &I);
 
   if (Value *V = I.getReturnValue()) {
     Value *Obj = findValue(V, /*OffsetOk=*/true);
-    Assert(!isa<AllocaInst>(Obj), "Unusual: Returning alloca value", &I);
+    Check(!isa<AllocaInst>(Obj), "Unusual: Returning alloca value", &I);
   }
 }
 
@@ -395,39 +400,39 @@
 
   Value *Ptr = const_cast<Value *>(Loc.Ptr);
   Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true);
-  Assert(!isa<ConstantPointerNull>(UnderlyingObject),
-         "Undefined behavior: Null pointer dereference", &I);
-  Assert(!isa<UndefValue>(UnderlyingObject),
-         "Undefined behavior: Undef pointer dereference", &I);
-  Assert(!isa<ConstantInt>(UnderlyingObject) ||
-             !cast<ConstantInt>(UnderlyingObject)->isMinusOne(),
-         "Unusual: All-ones pointer dereference", &I);
-  Assert(!isa<ConstantInt>(UnderlyingObject) ||
-             !cast<ConstantInt>(UnderlyingObject)->isOne(),
-         "Unusual: Address one pointer dereference", &I);
+  Check(!isa<ConstantPointerNull>(UnderlyingObject),
+        "Undefined behavior: Null pointer dereference", &I);
+  Check(!isa<UndefValue>(UnderlyingObject),
+        "Undefined behavior: Undef pointer dereference", &I);
+  Check(!isa<ConstantInt>(UnderlyingObject) ||
+            !cast<ConstantInt>(UnderlyingObject)->isMinusOne(),
+        "Unusual: All-ones pointer dereference", &I);
+  Check(!isa<ConstantInt>(UnderlyingObject) ||
+            !cast<ConstantInt>(UnderlyingObject)->isOne(),
+        "Unusual: Address one pointer dereference", &I);
 
   if (Flags & MemRef::Write) {
     if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(UnderlyingObject))
-      Assert(!GV->isConstant(), "Undefined behavior: Write to read-only memory",
-             &I);
-    Assert(!isa<Function>(UnderlyingObject) &&
-               !isa<BlockAddress>(UnderlyingObject),
-           "Undefined behavior: Write to text section", &I);
+      Check(!GV->isConstant(), "Undefined behavior: Write to read-only memory",
+            &I);
+    Check(!isa<Function>(UnderlyingObject) &&
+              !isa<BlockAddress>(UnderlyingObject),
+          "Undefined behavior: Write to text section", &I);
   }
   if (Flags & MemRef::Read) {
-    Assert(!isa<Function>(UnderlyingObject), "Unusual: Load from function body",
-           &I);
-    Assert(!isa<BlockAddress>(UnderlyingObject),
-           "Undefined behavior: Load from block address", &I);
+    Check(!isa<Function>(UnderlyingObject), "Unusual: Load from function body",
+          &I);
+    Check(!isa<BlockAddress>(UnderlyingObject),
+          "Undefined behavior: Load from block address", &I);
   }
   if (Flags & MemRef::Callee) {
-    Assert(!isa<BlockAddress>(UnderlyingObject),
-           "Undefined behavior: Call to block address", &I);
+    Check(!isa<BlockAddress>(UnderlyingObject),
+          "Undefined behavior: Call to block address", &I);
   }
   if (Flags & MemRef::Branchee) {
-    Assert(!isa<Constant>(UnderlyingObject) ||
-               isa<BlockAddress>(UnderlyingObject),
-           "Undefined behavior: Branch to non-blockaddress", &I);
+    Check(!isa<Constant>(UnderlyingObject) ||
+              isa<BlockAddress>(UnderlyingObject),
+          "Undefined behavior: Branch to non-blockaddress", &I);
   }
 
   // Check for buffer overflows and misalignment.
@@ -461,17 +466,17 @@
 
     // Accesses from before the start or after the end of the object are not
     // defined.
-    Assert(!Loc.Size.hasValue() || BaseSize == MemoryLocation::UnknownSize ||
-               (Offset >= 0 && Offset + Loc.Size.getValue() <= BaseSize),
-           "Undefined behavior: Buffer overflow", &I);
+    Check(!Loc.Size.hasValue() || BaseSize == MemoryLocation::UnknownSize ||
+              (Offset >= 0 && Offset + Loc.Size.getValue() <= BaseSize),
+          "Undefined behavior: Buffer overflow", &I);
 
     // Accesses that say that the memory is more aligned than it is are not
     // defined.
     if (!Align && Ty && Ty->isSized())
       Align = DL->getABITypeAlign(Ty);
     if (BaseAlign && Align)
-      Assert(*Align <= commonAlignment(*BaseAlign, Offset),
-             "Undefined behavior: Memory reference address is misaligned", &I);
+      Check(*Align <= commonAlignment(*BaseAlign, Offset),
+            "Undefined behavior: Memory reference address is misaligned", &I);
   }
 }
 
@@ -486,34 +491,34 @@
 }
 
 void Lint::visitXor(BinaryOperator &I) {
-  Assert(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)),
-         "Undefined result: xor(undef, undef)", &I);
+  Check(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)),
+        "Undefined result: xor(undef, undef)", &I);
 }
 
 void Lint::visitSub(BinaryOperator &I) {
-  Assert(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)),
-         "Undefined result: sub(undef, undef)", &I);
+  Check(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)),
+        "Undefined result: sub(undef, undef)", &I);
 }
 
 void Lint::visitLShr(BinaryOperator &I) {
   if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(1),
                                                         /*OffsetOk=*/false)))
-    Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
-           "Undefined result: Shift count out of range", &I);
+    Check(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
+          "Undefined result: Shift count out of range", &I);
 }
 
 void Lint::visitAShr(BinaryOperator &I) {
   if (ConstantInt *CI =
           dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
-    Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
-           "Undefined result: Shift count out of range", &I);
+    Check(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
+          "Undefined result: Shift count out of range", &I);
 }
 
 void Lint::visitShl(BinaryOperator &I) {
   if (ConstantInt *CI =
           dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
-    Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
-           "Undefined result: Shift count out of range", &I);
+    Check(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
+          "Undefined result: Shift count out of range", &I);
 }
 
 static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT,
@@ -554,30 +559,30 @@
 }
 
 void Lint::visitSDiv(BinaryOperator &I) {
-  Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
-         "Undefined behavior: Division by zero", &I);
+  Check(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
+        "Undefined behavior: Division by zero", &I);
 }
 
 void Lint::visitUDiv(BinaryOperator &I) {
-  Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
-         "Undefined behavior: Division by zero", &I);
+  Check(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
+        "Undefined behavior: Division by zero", &I);
 }
 
 void Lint::visitSRem(BinaryOperator &I) {
-  Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
-         "Undefined behavior: Division by zero", &I);
+  Check(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
+        "Undefined behavior: Division by zero", &I);
 }
 
 void Lint::visitURem(BinaryOperator &I) {
-  Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
-         "Undefined behavior: Division by zero", &I);
+  Check(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
+        "Undefined behavior: Division by zero", &I);
 }
 
 void Lint::visitAllocaInst(AllocaInst &I) {
   if (isa<ConstantInt>(I.getArraySize()))
     // This isn't undefined behavior, it's just an obvious pessimization.
-    Assert(&I.getParent()->getParent()->getEntryBlock() == I.getParent(),
-           "Pessimization: Static alloca outside of entry block", &I);
+    Check(&I.getParent()->getParent()->getEntryBlock() == I.getParent(),
+          "Pessimization: Static alloca outside of entry block", &I);
 
   // TODO: Check for an unusual size (MSB set?)
 }
@@ -591,14 +596,14 @@
   visitMemoryReference(I, MemoryLocation::getAfter(I.getAddress()), None,
                        nullptr, MemRef::Branchee);
 
-  Assert(I.getNumDestinations() != 0,
-         "Undefined behavior: indirectbr with no destinations", &I);
+  Check(I.getNumDestinations() != 0,
+        "Undefined behavior: indirectbr with no destinations", &I);
 }
 
 void Lint::visitExtractElementInst(ExtractElementInst &I) {
   if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getIndexOperand(),
                                                         /*OffsetOk=*/false)))
-    Assert(
+    Check(
         CI->getValue().ult(
             cast<FixedVectorType>(I.getVectorOperandType())->getNumElements()),
         "Undefined result: extractelement index out of range", &I);
@@ -607,18 +612,18 @@
 void Lint::visitInsertElementInst(InsertElementInst &I) {
   if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(2),
                                                         /*OffsetOk=*/false)))
-    Assert(CI->getValue().ult(
-               cast<FixedVectorType>(I.getType())->getNumElements()),
-           "Undefined result: insertelement index out of range", &I);
+    Check(CI->getValue().ult(
+              cast<FixedVectorType>(I.getType())->getNumElements()),
+          "Undefined result: insertelement index out of range", &I);
 }
 
 void Lint::visitUnreachableInst(UnreachableInst &I) {
   // This isn't undefined behavior, it's merely suspicious.
-  Assert(&I == &I.getParent()->front() ||
-             std::prev(I.getIterator())->mayHaveSideEffects(),
-         "Unusual: unreachable immediately preceded by instruction without "
-         "side effects",
-         &I);
+  Check(&I == &I.getParent()->front() ||
+            std::prev(I.getIterator())->mayHaveSideEffects(),
+        "Unusual: unreachable immediately preceded by instruction without "
+        "side effects",
+        &I);
 }
 
 /// findValue - Look through bitcasts and simple memory reference patterns
@@ -681,17 +686,12 @@
                                CE->getOperand(0)->getType(), CE->getType(),
                                *DL))
         return findValueImpl(CE->getOperand(0), OffsetOk, Visited);
-    } else if (CE->getOpcode() == Instruction::ExtractValue) {
-      ArrayRef<unsigned> Indices = CE->getIndices();
-      if (Value *W = FindInsertedValue(CE->getOperand(0), Indices))
-        if (W != V)
-          return findValueImpl(W, OffsetOk, Visited);
     }
   }
 
   // As a last resort, try SimplifyInstruction or constant folding.
   if (Instruction *Inst = dyn_cast<Instruction>(V)) {
-    if (Value *W = SimplifyInstruction(Inst, {*DL, TLI, DT, AC}))
+    if (Value *W = simplifyInstruction(Inst, {*DL, TLI, DT, AC}))
       return findValueImpl(W, OffsetOk, Visited);
   } else if (auto *C = dyn_cast<Constant>(V)) {
     Value *W = ConstantFoldConstant(C, *DL, TLI);

diff --git a/src/llvm-project/llvm/lib/Analysis/Loads.cpp b/src/llvm-project/llvm/lib/Analysis/Loads.cpp
index cd0d4d6..938d950 100644
--- a/src/llvm-project/llvm/lib/Analysis/Loads.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/Loads.cpp

@@ -13,19 +13,14 @@
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumeBundleQueries.h"
-#include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/MemoryLocation.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/DataLayout.h"
-#include "llvm/IR/GlobalAlias.h"
-#include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
 
@@ -410,7 +405,10 @@
                                        Instruction *ScanFrom,
                                        const DominatorTree *DT,
                                        const TargetLibraryInfo *TLI) {
-  APInt Size(DL.getIndexTypeSizeInBits(V->getType()), DL.getTypeStoreSize(Ty));
+  TypeSize TySize = DL.getTypeStoreSize(Ty);
+  if (TySize.isScalable())
+    return false;
+  APInt Size(DL.getIndexTypeSizeInBits(V->getType()), TySize.getFixedValue());
   return isSafeToLoadUnconditionally(V, Alignment, Size, DL, ScanFrom, DT, TLI);
 }
 
@@ -509,8 +507,8 @@
     if (CastInst::isBitOrNoopPointerCastable(Val->getType(), AccessTy, DL))
       return Val;
 
-    TypeSize StoreSize = DL.getTypeStoreSize(Val->getType());
-    TypeSize LoadSize = DL.getTypeStoreSize(AccessTy);
+    TypeSize StoreSize = DL.getTypeSizeInBits(Val->getType());
+    TypeSize LoadSize = DL.getTypeSizeInBits(AccessTy);
     if (TypeSize::isKnownLE(LoadSize, StoreSize))
       if (auto *C = dyn_cast<Constant>(Val))
         return ConstantFoldLoadFromConst(C, AccessTy, DL);

diff --git a/src/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/src/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 2ab78d2..8311b48 100644
--- a/src/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp

@@ -47,6 +47,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
 #include "llvm/IR/ValueHandle.h"
@@ -60,12 +61,12 @@
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
-#include <cstdlib>
 #include <iterator>
 #include <utility>
 #include <vector>
 
 using namespace llvm;
+using namespace llvm::PatternMatch;
 
 #define DEBUG_TYPE "loop-accesses"
 
@@ -129,6 +130,11 @@
     cl::desc("Enable conflict detection in loop-access analysis"),
     cl::init(true));
 
+static cl::opt<unsigned> MaxForkedSCEVDepth(
+    "max-forked-scev-depth", cl::Hidden,
+    cl::desc("Maximum recursion depth when finding forked SCEVs (default = 5)"),
+    cl::init(5));
+
 bool VectorizerParams::isInterleaveForced() {
   return ::VectorizationInterleave.getNumOccurrences() > 0;
 }
@@ -172,7 +178,8 @@
     : High(RtCheck.Pointers[Index].End), Low(RtCheck.Pointers[Index].Start),
       AddressSpace(RtCheck.Pointers[Index]
                        .PointerValue->getType()
-                       ->getPointerAddressSpace()) {
+                       ->getPointerAddressSpace()),
+      NeedsFreeze(RtCheck.Pointers[Index].NeedsFreeze) {
   Members.push_back(Index);
 }
 
@@ -189,21 +196,20 @@
 ///
 /// There is no conflict when the intervals are disjoint:
 /// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
-void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
+void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
+                                    Type *AccessTy, bool WritePtr,
                                     unsigned DepSetId, unsigned ASId,
-                                    const ValueToValueMap &Strides,
-                                    PredicatedScalarEvolution &PSE) {
-  // Get the stride replaced scev.
-  const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
+                                    PredicatedScalarEvolution &PSE,
+                                    bool NeedsFreeze) {
   ScalarEvolution *SE = PSE.getSE();
 
   const SCEV *ScStart;
   const SCEV *ScEnd;
 
-  if (SE->isLoopInvariant(Sc, Lp)) {
-    ScStart = ScEnd = Sc;
+  if (SE->isLoopInvariant(PtrExpr, Lp)) {
+    ScStart = ScEnd = PtrExpr;
   } else {
-    const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
+    const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrExpr);
     assert(AR && "Invalid addrec expression");
     const SCEV *Ex = PSE.getBackedgeTakenCount();
 
@@ -227,15 +233,103 @@
   // Add the size of the pointed element to ScEnd.
   auto &DL = Lp->getHeader()->getModule()->getDataLayout();
   Type *IdxTy = DL.getIndexType(Ptr->getType());
-  const SCEV *EltSizeSCEV =
-      SE->getStoreSizeOfExpr(IdxTy, Ptr->getType()->getPointerElementType());
+  const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy);
   ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
 
-  Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc);
+  Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, PtrExpr,
+                        NeedsFreeze);
 }
 
-SmallVector<RuntimePointerCheck, 4>
-RuntimePointerChecking::generateChecks() const {
+void RuntimePointerChecking::tryToCreateDiffCheck(
+    const RuntimeCheckingPtrGroup &CGI, const RuntimeCheckingPtrGroup &CGJ) {
+  if (!CanUseDiffCheck)
+    return;
+
+  // If either group contains multiple different pointers, bail out.
+  // TODO: Support multiple pointers by using the minimum or maximum pointer,
+  // depending on src & sink.
+  if (CGI.Members.size() != 1 || CGJ.Members.size() != 1) {
+    CanUseDiffCheck = false;
+    return;
+  }
+
+  PointerInfo *Src = &Pointers[CGI.Members[0]];
+  PointerInfo *Sink = &Pointers[CGJ.Members[0]];
+
+  // If either pointer is read and written, multiple checks may be needed. Bail
+  // out.
+  if (!DC.getOrderForAccess(Src->PointerValue, !Src->IsWritePtr).empty() ||
+      !DC.getOrderForAccess(Sink->PointerValue, !Sink->IsWritePtr).empty()) {
+    CanUseDiffCheck = false;
+    return;
+  }
+
+  ArrayRef<unsigned> AccSrc =
+      DC.getOrderForAccess(Src->PointerValue, Src->IsWritePtr);
+  ArrayRef<unsigned> AccSink =
+      DC.getOrderForAccess(Sink->PointerValue, Sink->IsWritePtr);
+  // If either pointer is accessed multiple times, there may not be a clear
+  // src/sink relation. Bail out for now.
+  if (AccSrc.size() != 1 || AccSink.size() != 1) {
+    CanUseDiffCheck = false;
+    return;
+  }
+  // If the sink is accessed before src, swap src/sink.
+  if (AccSink[0] < AccSrc[0])
+    std::swap(Src, Sink);
+
+  auto *SrcAR = dyn_cast<SCEVAddRecExpr>(Src->Expr);
+  auto *SinkAR = dyn_cast<SCEVAddRecExpr>(Sink->Expr);
+  if (!SrcAR || !SinkAR || SrcAR->getLoop() != DC.getInnermostLoop() ||
+      SinkAR->getLoop() != DC.getInnermostLoop()) {
+    CanUseDiffCheck = false;
+    return;
+  }
+
+  const DataLayout &DL =
+      SinkAR->getLoop()->getHeader()->getModule()->getDataLayout();
+  SmallVector<Instruction *, 4> SrcInsts =
+      DC.getInstructionsForAccess(Src->PointerValue, Src->IsWritePtr);
+  SmallVector<Instruction *, 4> SinkInsts =
+      DC.getInstructionsForAccess(Sink->PointerValue, Sink->IsWritePtr);
+  Type *SrcTy = getLoadStoreType(SrcInsts[0]);
+  Type *DstTy = getLoadStoreType(SinkInsts[0]);
+  if (isa<ScalableVectorType>(SrcTy) || isa<ScalableVectorType>(DstTy)) {
+    CanUseDiffCheck = false;
+    return;
+  }
+  unsigned AllocSize =
+      std::max(DL.getTypeAllocSize(SrcTy), DL.getTypeAllocSize(DstTy));
+  IntegerType *IntTy =
+      IntegerType::get(Src->PointerValue->getContext(),
+                       DL.getPointerSizeInBits(CGI.AddressSpace));
+
+  // Only matching constant steps matching the AllocSize are supported at the
+  // moment. This simplifies the difference computation. Can be extended in the
+  // future.
+  auto *Step = dyn_cast<SCEVConstant>(SinkAR->getStepRecurrence(*SE));
+  if (!Step || Step != SrcAR->getStepRecurrence(*SE) ||
+      Step->getAPInt().abs() != AllocSize) {
+    CanUseDiffCheck = false;
+    return;
+  }
+
+  // When counting down, the dependence distance needs to be swapped.
+  if (Step->getValue()->isNegative())
+    std::swap(SinkAR, SrcAR);
+
+  const SCEV *SinkStartInt = SE->getPtrToIntExpr(SinkAR->getStart(), IntTy);
+  const SCEV *SrcStartInt = SE->getPtrToIntExpr(SrcAR->getStart(), IntTy);
+  if (isa<SCEVCouldNotCompute>(SinkStartInt) ||
+      isa<SCEVCouldNotCompute>(SrcStartInt)) {
+    CanUseDiffCheck = false;
+    return;
+  }
+  DiffChecks.emplace_back(SrcStartInt, SinkStartInt, AllocSize,
+                          Src->NeedsFreeze || Sink->NeedsFreeze);
+}
+
+SmallVector<RuntimePointerCheck, 4> RuntimePointerChecking::generateChecks() {
   SmallVector<RuntimePointerCheck, 4> Checks;
 
   for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
@@ -243,8 +337,10 @@
       const RuntimeCheckingPtrGroup &CGI = CheckingGroups[I];
       const RuntimeCheckingPtrGroup &CGJ = CheckingGroups[J];
 
-      if (needsChecking(CGI, CGJ))
+      if (needsChecking(CGI, CGJ)) {
+        tryToCreateDiffCheck(CGI, CGJ);
         Checks.push_back(std::make_pair(&CGI, &CGJ));
+      }
     }
   }
   return Checks;
@@ -285,11 +381,12 @@
   return addPointer(
       Index, RtCheck.Pointers[Index].Start, RtCheck.Pointers[Index].End,
       RtCheck.Pointers[Index].PointerValue->getType()->getPointerAddressSpace(),
-      *RtCheck.SE);
+      RtCheck.Pointers[Index].NeedsFreeze, *RtCheck.SE);
 }
 
 bool RuntimeCheckingPtrGroup::addPointer(unsigned Index, const SCEV *Start,
                                          const SCEV *End, unsigned AS,
+                                         bool NeedsFreeze,
                                          ScalarEvolution &SE) {
   assert(AddressSpace == AS &&
          "all pointers in a checking group must be in the same address space");
@@ -314,6 +411,7 @@
     High = End;
 
   Members.push_back(Index);
+  this->NeedsFreeze |= NeedsFreeze;
   return true;
 }
 
@@ -371,9 +469,11 @@
 
   unsigned TotalComparisons = 0;
 
-  DenseMap<Value *, unsigned> PositionMap;
-  for (unsigned Index = 0; Index < Pointers.size(); ++Index)
-    PositionMap[Pointers[Index].PointerValue] = Index;
+  DenseMap<Value *, SmallVector<unsigned>> PositionMap;
+  for (unsigned Index = 0; Index < Pointers.size(); ++Index) {
+    auto Iter = PositionMap.insert({Pointers[Index].PointerValue, {}});
+    Iter.first->second.push_back(Index);
+  }
 
   // We need to keep track of what pointers we've already seen so we
   // don't process them twice.
@@ -404,34 +504,35 @@
       auto PointerI = PositionMap.find(MI->getPointer());
       assert(PointerI != PositionMap.end() &&
              "pointer in equivalence class not found in PositionMap");
-      unsigned Pointer = PointerI->second;
-      bool Merged = false;
-      // Mark this pointer as seen.
-      Seen.insert(Pointer);
+      for (unsigned Pointer : PointerI->second) {
+        bool Merged = false;
+        // Mark this pointer as seen.
+        Seen.insert(Pointer);
 
-      // Go through all the existing sets and see if we can find one
-      // which can include this pointer.
-      for (RuntimeCheckingPtrGroup &Group : Groups) {
-        // Don't perform more than a certain amount of comparisons.
-        // This should limit the cost of grouping the pointers to something
-        // reasonable.  If we do end up hitting this threshold, the algorithm
-        // will create separate groups for all remaining pointers.
-        if (TotalComparisons > MemoryCheckMergeThreshold)
-          break;
+        // Go through all the existing sets and see if we can find one
+        // which can include this pointer.
+        for (RuntimeCheckingPtrGroup &Group : Groups) {
+          // Don't perform more than a certain amount of comparisons.
+          // This should limit the cost of grouping the pointers to something
+          // reasonable.  If we do end up hitting this threshold, the algorithm
+          // will create separate groups for all remaining pointers.
+          if (TotalComparisons > MemoryCheckMergeThreshold)
+            break;
 
-        TotalComparisons++;
+          TotalComparisons++;
 
-        if (Group.addPointer(Pointer, *this)) {
-          Merged = true;
-          break;
+          if (Group.addPointer(Pointer, *this)) {
+            Merged = true;
+            break;
+          }
         }
-      }
 
-      if (!Merged)
-        // We couldn't add this pointer to any existing set or the threshold
-        // for the number of comparisons has been reached. Create a new group
-        // to hold the current pointer.
-        Groups.push_back(RuntimeCheckingPtrGroup(Pointer, *this));
+        if (!Merged)
+          // We couldn't add this pointer to any existing set or the threshold
+          // for the number of comparisons has been reached. Create a new group
+          // to hold the current pointer.
+          Groups.push_back(RuntimeCheckingPtrGroup(Pointer, *this));
+      }
     }
 
     // We've computed the grouped checks for this partition.
@@ -522,19 +623,19 @@
       : TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA), PSE(PSE) {}
 
   /// Register a load  and whether it is only read from.
-  void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
+  void addLoad(MemoryLocation &Loc, Type *AccessTy, bool IsReadOnly) {
     Value *Ptr = const_cast<Value*>(Loc.Ptr);
     AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags);
-    Accesses.insert(MemAccessInfo(Ptr, false));
+    Accesses[MemAccessInfo(Ptr, false)].insert(AccessTy);
     if (IsReadOnly)
       ReadOnlyPtr.insert(Ptr);
   }
 
   /// Register a store.
-  void addStore(MemoryLocation &Loc) {
+  void addStore(MemoryLocation &Loc, Type *AccessTy) {
     Value *Ptr = const_cast<Value*>(Loc.Ptr);
     AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags);
-    Accesses.insert(MemAccessInfo(Ptr, true));
+    Accesses[MemAccessInfo(Ptr, true)].insert(AccessTy);
   }
 
   /// Check if we can emit a run-time no-alias check for \p Access.
@@ -545,12 +646,11 @@
   /// we will attempt to use additional run-time checks in order to get
   /// the bounds of the pointer.
   bool createCheckForAccess(RuntimePointerChecking &RtCheck,
-                            MemAccessInfo Access,
+                            MemAccessInfo Access, Type *AccessTy,
                             const ValueToValueMap &Strides,
                             DenseMap<Value *, unsigned> &DepSetId,
                             Loop *TheLoop, unsigned &RunningDepId,
-                            unsigned ASId, bool ShouldCheckStride,
-                            bool Assume);
+                            unsigned ASId, bool ShouldCheckStride, bool Assume);
 
   /// Check whether we can check the pointers at runtime for
   /// non-intersection.
@@ -559,7 +659,7 @@
   /// (i.e. the pointers have computable bounds).
   bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE,
                        Loop *TheLoop, const ValueToValueMap &Strides,
-                       bool ShouldCheckWrap = false);
+                       Value *&UncomputablePtr, bool ShouldCheckWrap = false);
 
   /// Goes over all memory accesses, checks whether a RT check is needed
   /// and builds sets of dependent accesses.
@@ -583,14 +683,15 @@
   MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; }
 
 private:
-  typedef SetVector<MemAccessInfo> PtrAccessSet;
+  typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1>> PtrAccessMap;
 
   /// Go over all memory access and check whether runtime pointer checks
   /// are needed and build sets of dependency check candidates.
   void processMemAccesses();
 
-  /// Set of all accesses.
-  PtrAccessSet Accesses;
+  /// Map of all accesses. Values are the types used to access memory pointed to
+  /// by the pointer.
+  PtrAccessMap Accesses;
 
   /// The loop being checked.
   const Loop *TheLoop;
@@ -630,11 +731,8 @@
 /// Check whether a pointer can participate in a runtime bounds check.
 /// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr
 /// by adding run-time checks (overflow checks) if necessary.
-static bool hasComputableBounds(PredicatedScalarEvolution &PSE,
-                                const ValueToValueMap &Strides, Value *Ptr,
-                                Loop *L, bool Assume) {
-  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
-
+static bool hasComputableBounds(PredicatedScalarEvolution &PSE, Value *Ptr,
+                                const SCEV *PtrScev, Loop *L, bool Assume) {
   // The bounds for loop-invariant pointer is trivial.
   if (PSE.getSE()->isLoopInvariant(PtrScev, L))
     return true;
@@ -652,12 +750,12 @@
 
 /// Check whether a pointer address cannot wrap.
 static bool isNoWrap(PredicatedScalarEvolution &PSE,
-                     const ValueToValueMap &Strides, Value *Ptr, Loop *L) {
+                     const ValueToValueMap &Strides, Value *Ptr, Type *AccessTy,
+                     Loop *L) {
   const SCEV *PtrScev = PSE.getSCEV(Ptr);
   if (PSE.getSE()->isLoopInvariant(PtrScev, L))
     return true;
 
-  Type *AccessTy = Ptr->getType()->getPointerElementType();
   int64_t Stride = getPtrStride(PSE, AccessTy, Ptr, L, Strides);
   if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW))
     return true;
@@ -688,8 +786,142 @@
   }
 }
 
+// Walk back through the IR for a pointer, looking for a select like the
+// following:
+//
+//  %offset = select i1 %cmp, i64 %a, i64 %b
+//  %addr = getelementptr double, double* %base, i64 %offset
+//  %ld = load double, double* %addr, align 8
+//
+// We won't be able to form a single SCEVAddRecExpr from this since the
+// address for each loop iteration depends on %cmp. We could potentially
+// produce multiple valid SCEVAddRecExprs, though, and check all of them for
+// memory safety/aliasing if needed.
+//
+// If we encounter some IR we don't yet handle, or something obviously fine
+// like a constant, then we just add the SCEV for that term to the list passed
+// in by the caller. If we have a node that may potentially yield a valid
+// SCEVAddRecExpr then we decompose it into parts and build the SCEV terms
+// ourselves before adding to the list.
+static void
+findForkedSCEVs(ScalarEvolution *SE, const Loop *L, Value *Ptr,
+                SmallVectorImpl<std::pair<const SCEV *, bool>> &ScevList,
+                unsigned Depth) {
+  // If our Value is a SCEVAddRecExpr, loop invariant, not an instruction, or
+  // we've exceeded our limit on recursion, just return whatever we have
+  // regardless of whether it can be used for a forked pointer or not, along
+  // with an indication of whether it might be a poison or undef value.
+  const SCEV *Scev = SE->getSCEV(Ptr);
+  if (isa<SCEVAddRecExpr>(Scev) || L->isLoopInvariant(Ptr) ||
+      !isa<Instruction>(Ptr) || Depth == 0) {
+    ScevList.push_back(
+        std::make_pair(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr)));
+    return;
+  }
+
+  Depth--;
+
+  auto UndefPoisonCheck = [](std::pair<const SCEV *, bool> S) -> bool {
+    return S.second;
+  };
+
+  Instruction *I = cast<Instruction>(Ptr);
+  unsigned Opcode = I->getOpcode();
+  switch (Opcode) {
+  case Instruction::GetElementPtr: {
+    GetElementPtrInst *GEP = cast<GetElementPtrInst>(I);
+    Type *SourceTy = GEP->getSourceElementType();
+    // We only handle base + single offset GEPs here for now.
+    // Not dealing with preexisting gathers yet, so no vectors.
+    if (I->getNumOperands() != 2 || SourceTy->isVectorTy()) {
+      ScevList.push_back(
+          std::make_pair(Scev, !isGuaranteedNotToBeUndefOrPoison(GEP)));
+      break;
+    }
+    SmallVector<std::pair<const SCEV *, bool>, 2> BaseScevs;
+    SmallVector<std::pair<const SCEV *, bool>, 2> OffsetScevs;
+    findForkedSCEVs(SE, L, I->getOperand(0), BaseScevs, Depth);
+    findForkedSCEVs(SE, L, I->getOperand(1), OffsetScevs, Depth);
+
+    // See if we need to freeze our fork...
+    bool NeedsFreeze = any_of(BaseScevs, UndefPoisonCheck) ||
+                       any_of(OffsetScevs, UndefPoisonCheck);
+
+    // Check that we only have a single fork, on either the base or the offset.
+    // Copy the SCEV across for the one without a fork in order to generate
+    // the full SCEV for both sides of the GEP.
+    if (OffsetScevs.size() == 2 && BaseScevs.size() == 1)
+      BaseScevs.push_back(BaseScevs[0]);
+    else if (BaseScevs.size() == 2 && OffsetScevs.size() == 1)
+      OffsetScevs.push_back(OffsetScevs[0]);
+    else {
+      ScevList.push_back(std::make_pair(Scev, NeedsFreeze));
+      break;
+    }
+
+    // Find the pointer type we need to extend to.
+    Type *IntPtrTy = SE->getEffectiveSCEVType(
+        SE->getSCEV(GEP->getPointerOperand())->getType());
+
+    // Find the size of the type being pointed to. We only have a single
+    // index term (guarded above) so we don't need to index into arrays or
+    // structures, just get the size of the scalar value.
+    const SCEV *Size = SE->getSizeOfExpr(IntPtrTy, SourceTy);
+
+    // Scale up the offsets by the size of the type, then add to the bases.
+    const SCEV *Scaled1 = SE->getMulExpr(
+        Size, SE->getTruncateOrSignExtend(OffsetScevs[0].first, IntPtrTy));
+    const SCEV *Scaled2 = SE->getMulExpr(
+        Size, SE->getTruncateOrSignExtend(OffsetScevs[1].first, IntPtrTy));
+    ScevList.push_back(std::make_pair(
+        SE->getAddExpr(BaseScevs[0].first, Scaled1), NeedsFreeze));
+    ScevList.push_back(std::make_pair(
+        SE->getAddExpr(BaseScevs[1].first, Scaled2), NeedsFreeze));
+    break;
+  }
+  case Instruction::Select: {
+    SmallVector<std::pair<const SCEV *, bool>, 2> ChildScevs;
+    // A select means we've found a forked pointer, but we currently only
+    // support a single select per pointer so if there's another behind this
+    // then we just bail out and return the generic SCEV.
+    findForkedSCEVs(SE, L, I->getOperand(1), ChildScevs, Depth);
+    findForkedSCEVs(SE, L, I->getOperand(2), ChildScevs, Depth);
+    if (ChildScevs.size() == 2) {
+      ScevList.push_back(ChildScevs[0]);
+      ScevList.push_back(ChildScevs[1]);
+    } else
+      ScevList.push_back(
+          std::make_pair(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr)));
+    break;
+  }
+  default:
+    // Just return the current SCEV if we haven't handled the instruction yet.
+    LLVM_DEBUG(dbgs() << "ForkedPtr unhandled instruction: " << *I << "\n");
+    ScevList.push_back(
+        std::make_pair(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr)));
+    break;
+  }
+}
+
+static SmallVector<std::pair<const SCEV *, bool>>
+findForkedPointer(PredicatedScalarEvolution &PSE,
+                  const ValueToValueMap &StridesMap, Value *Ptr,
+                  const Loop *L) {
+  ScalarEvolution *SE = PSE.getSE();
+  assert(SE->isSCEVable(Ptr->getType()) && "Value is not SCEVable!");
+  SmallVector<std::pair<const SCEV *, bool>> Scevs;
+  findForkedSCEVs(SE, L, Ptr, Scevs, MaxForkedSCEVDepth);
+
+  // For now, we will only accept a forked pointer with two possible SCEVs.
+  if (Scevs.size() == 2)
+    return Scevs;
+
+  return {
+      std::make_pair(replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), false)};
+}
+
 bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
-                                          MemAccessInfo Access,
+                                          MemAccessInfo Access, Type *AccessTy,
                                           const ValueToValueMap &StridesMap,
                                           DenseMap<Value *, unsigned> &DepSetId,
                                           Loop *TheLoop, unsigned &RunningDepId,
@@ -697,42 +929,64 @@
                                           bool Assume) {
   Value *Ptr = Access.getPointer();
 
-  if (!hasComputableBounds(PSE, StridesMap, Ptr, TheLoop, Assume))
-    return false;
+  SmallVector<std::pair<const SCEV *, bool>> TranslatedPtrs =
+      findForkedPointer(PSE, StridesMap, Ptr, TheLoop);
 
-  // When we run after a failing dependency check we have to make sure
-  // we don't have wrapping pointers.
-  if (ShouldCheckWrap && !isNoWrap(PSE, StridesMap, Ptr, TheLoop)) {
-    auto *Expr = PSE.getSCEV(Ptr);
-    if (!Assume || !isa<SCEVAddRecExpr>(Expr))
+  for (auto &P : TranslatedPtrs) {
+    const SCEV *PtrExpr = P.first;
+    if (!hasComputableBounds(PSE, Ptr, PtrExpr, TheLoop, Assume))
       return false;
-    PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
+
+    // When we run after a failing dependency check we have to make sure
+    // we don't have wrapping pointers.
+    if (ShouldCheckWrap) {
+      // Skip wrap checking when translating pointers.
+      if (TranslatedPtrs.size() > 1)
+        return false;
+
+      if (!isNoWrap(PSE, StridesMap, Ptr, AccessTy, TheLoop)) {
+        auto *Expr = PSE.getSCEV(Ptr);
+        if (!Assume || !isa<SCEVAddRecExpr>(Expr))
+          return false;
+        PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
+      }
+    }
+    // If there's only one option for Ptr, look it up after bounds and wrap
+    // checking, because assumptions might have been added to PSE.
+    if (TranslatedPtrs.size() == 1)
+      TranslatedPtrs[0] = std::make_pair(
+          replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), false);
   }
 
-  // The id of the dependence set.
-  unsigned DepId;
+  for (auto &P : TranslatedPtrs) {
+    const SCEV *PtrExpr = P.first;
 
-  if (isDependencyCheckNeeded()) {
-    Value *Leader = DepCands.getLeaderValue(Access).getPointer();
-    unsigned &LeaderId = DepSetId[Leader];
-    if (!LeaderId)
-      LeaderId = RunningDepId++;
-    DepId = LeaderId;
-  } else
-    // Each access has its own dependence set.
-    DepId = RunningDepId++;
+    // The id of the dependence set.
+    unsigned DepId;
 
-  bool IsWrite = Access.getInt();
-  RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE);
-  LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
+    if (isDependencyCheckNeeded()) {
+      Value *Leader = DepCands.getLeaderValue(Access).getPointer();
+      unsigned &LeaderId = DepSetId[Leader];
+      if (!LeaderId)
+        LeaderId = RunningDepId++;
+      DepId = LeaderId;
+    } else
+      // Each access has its own dependence set.
+      DepId = RunningDepId++;
+
+    bool IsWrite = Access.getInt();
+    RtCheck.insert(TheLoop, Ptr, PtrExpr, AccessTy, IsWrite, DepId, ASId, PSE,
+                   P.second);
+    LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
+  }
 
   return true;
- }
+}
 
 bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
                                      ScalarEvolution *SE, Loop *TheLoop,
                                      const ValueToValueMap &StridesMap,
-                                     bool ShouldCheckWrap) {
+                                     Value *&UncomputablePtr, bool ShouldCheckWrap) {
   // Find pointers with computable bounds. We are going to use this information
   // to place a runtime bound check.
   bool CanDoRT = true;
@@ -756,7 +1010,7 @@
     unsigned RunningDepId = 1;
     DenseMap<Value *, unsigned> DepSetId;
 
-    SmallVector<MemAccessInfo, 4> Retries;
+    SmallVector<std::pair<MemAccessInfo, Type *>, 4> Retries;
 
     // First, count how many write and read accesses are in the alias set. Also
     // collect MemAccessInfos for later.
@@ -788,12 +1042,15 @@
     }
 
     for (auto &Access : AccessInfos) {
-      if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, TheLoop,
-                                RunningDepId, ASId, ShouldCheckWrap, false)) {
-        LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:"
-                          << *Access.getPointer() << '\n');
-        Retries.push_back(Access);
-        CanDoAliasSetRT = false;
+      for (const auto &AccessTy : Accesses[Access]) {
+        if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
+                                  DepSetId, TheLoop, RunningDepId, ASId,
+                                  ShouldCheckWrap, false)) {
+          LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:"
+                            << *Access.getPointer() << '\n');
+          Retries.push_back({Access, AccessTy});
+          CanDoAliasSetRT = false;
+        }
       }
     }
 
@@ -815,13 +1072,17 @@
       // We know that we need these checks, so we can now be more aggressive
       // and add further checks if required (overflow checks).
       CanDoAliasSetRT = true;
-      for (auto Access : Retries)
-        if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId,
-                                  TheLoop, RunningDepId, ASId,
+      for (auto Retry : Retries) {
+        MemAccessInfo Access = Retry.first;
+        Type *AccessTy = Retry.second;
+        if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
+                                  DepSetId, TheLoop, RunningDepId, ASId,
                                   ShouldCheckWrap, /*Assume=*/true)) {
           CanDoAliasSetRT = false;
+          UncomputablePtr = Access.getPointer();
           break;
         }
+      }
     }
 
     CanDoRT &= CanDoAliasSetRT;
@@ -886,9 +1147,12 @@
   LLVM_DEBUG(dbgs() << "LAA:   Accesses(" << Accesses.size() << "):\n");
   LLVM_DEBUG({
     for (auto A : Accesses)
-      dbgs() << "\t" << *A.getPointer() << " (" <<
-                (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ?
-                                         "read-only" : "read")) << ")\n";
+      dbgs() << "\t" << *A.first.getPointer() << " ("
+             << (A.first.getInt()
+                     ? "write"
+                     : (ReadOnlyPtr.count(A.first.getPointer()) ? "read-only"
+                                                                : "read"))
+             << ")\n";
   });
 
   // The AliasSetTracker has nicely partitioned our pointers by metadata
@@ -907,13 +1171,13 @@
     UnderlyingObjToAccessMap ObjToLastAccess;
 
     // Set of access to check after all writes have been processed.
-    PtrAccessSet DeferredAccesses;
+    PtrAccessMap DeferredAccesses;
 
     // Iterate over each alias set twice, once to process read/write pointers,
     // and then to process read-only pointers.
     for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
       bool UseDeferred = SetIteration > 0;
-      PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
+      PtrAccessMap &S = UseDeferred ? DeferredAccesses : Accesses;
 
       for (const auto &AV : AS) {
         Value *Ptr = AV.getValue();
@@ -921,10 +1185,10 @@
         // For a single memory access in AliasSetTracker, Accesses may contain
         // both read and write, and they both need to be handled for CheckDeps.
         for (const auto &AC : S) {
-          if (AC.getPointer() != Ptr)
+          if (AC.first.getPointer() != Ptr)
             continue;
 
-          bool IsWrite = AC.getInt();
+          bool IsWrite = AC.first.getInt();
 
           // If we're using the deferred access set, then it contains only
           // reads.
@@ -946,7 +1210,9 @@
           // consecutive as "read-only" pointers (so that we check
           // "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
           if (!UseDeferred && IsReadOnlyPtr) {
-            DeferredAccesses.insert(Access);
+            // We only use the pointer keys, the types vector values don't
+            // matter.
+            DeferredAccesses.insert({Access, {}});
             continue;
           }
 
@@ -1235,9 +1501,7 @@
   Value *Ptr0 = VL[0];
 
   using DistOrdPair = std::pair<int64_t, int>;
-  auto Compare = [](const DistOrdPair &L, const DistOrdPair &R) {
-    return L.first < R.first;
-  };
+  auto Compare = llvm::less_first();
   std::set<DistOrdPair, decltype(Compare)> Offsets(Compare);
   Offsets.emplace(0, 0);
   int Cnt = 1;
@@ -1445,13 +1709,13 @@
 
   const SCEV *CastedDist = &Dist;
   const SCEV *CastedProduct = Product;
-  uint64_t DistTypeSize = DL.getTypeAllocSize(Dist.getType());
-  uint64_t ProductTypeSize = DL.getTypeAllocSize(Product->getType());
+  uint64_t DistTypeSizeBits = DL.getTypeSizeInBits(Dist.getType());
+  uint64_t ProductTypeSizeBits = DL.getTypeSizeInBits(Product->getType());
 
   // The dependence distance can be positive/negative, so we sign extend Dist;
   // The multiplication of the absolute stride in bytes and the
   // backedgeTakenCount is non-negative, so we zero extend Product.
-  if (DistTypeSize > ProductTypeSize)
+  if (DistTypeSizeBits > ProductTypeSizeBits)
     CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType());
   else
     CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType());
@@ -1518,8 +1782,8 @@
   Value *BPtr = B.getPointer();
   bool AIsWrite = A.getInt();
   bool BIsWrite = B.getInt();
-  Type *ATy = APtr->getType()->getPointerElementType();
-  Type *BTy = BPtr->getType()->getPointerElementType();
+  Type *ATy = getLoadStoreType(InstMap[AIdx]);
+  Type *BTy = getLoadStoreType(InstMap[BIdx]);
 
   // Two reads are independent.
   if (!AIsWrite && !BIsWrite)
@@ -1842,8 +2106,6 @@
 void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
                                  const TargetLibraryInfo *TLI,
                                  DominatorTree *DT) {
-  typedef SmallPtrSet<Value*, 16> ValueSet;
-
   // Holds the Load and Store instructions.
   SmallVector<LoadInst *, 16> Loads;
   SmallVector<StoreInst *, 16> Stores;
@@ -1975,22 +2237,26 @@
   // for read and once for write, it will only appear once (on the write
   // list). This is okay, since we are going to check for conflicts between
   // writes and between reads and writes, but not between reads and reads.
-  ValueSet Seen;
+  SmallSet<std::pair<Value *, Type *>, 16> Seen;
 
   // Record uniform store addresses to identify if we have multiple stores
   // to the same address.
-  ValueSet UniformStores;
+  SmallPtrSet<Value *, 16> UniformStores;
 
   for (StoreInst *ST : Stores) {
     Value *Ptr = ST->getPointerOperand();
 
-    if (isUniform(Ptr))
+    if (isUniform(Ptr)) {
+      // Record store instructions to loop invariant addresses
+      StoresToInvariantAddresses.push_back(ST);
       HasDependenceInvolvingLoopInvariantAddress |=
           !UniformStores.insert(Ptr).second;
+    }
 
     // If we did *not* see this pointer before, insert it to  the read-write
     // list. At this phase it is only a 'write' list.
-    if (Seen.insert(Ptr).second) {
+    Type *AccessTy = getLoadStoreType(ST);
+    if (Seen.insert({Ptr, AccessTy}).second) {
       ++NumReadWrites;
 
       MemoryLocation Loc = MemoryLocation::get(ST);
@@ -2001,9 +2267,9 @@
         Loc.AATags.TBAA = nullptr;
 
       visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
-                    [&Accesses, Loc](Value *Ptr) {
+                    [&Accesses, AccessTy, Loc](Value *Ptr) {
                       MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
-                      Accesses.addStore(NewLoc);
+                      Accesses.addStore(NewLoc, AccessTy);
                     });
     }
   }
@@ -2027,7 +2293,8 @@
     // read a few words, modify, and write a few words, and some of the
     // words may be written to the same address.
     bool IsReadOnlyPtr = false;
-    if (Seen.insert(Ptr).second ||
+    Type *AccessTy = getLoadStoreType(LD);
+    if (Seen.insert({Ptr, AccessTy}).second ||
         !getPtrStride(*PSE, LD->getType(), Ptr, TheLoop, SymbolicStrides)) {
       ++NumReads;
       IsReadOnlyPtr = true;
@@ -2049,9 +2316,9 @@
       Loc.AATags.TBAA = nullptr;
 
     visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
-                  [&Accesses, Loc, IsReadOnlyPtr](Value *Ptr) {
+                  [&Accesses, AccessTy, Loc, IsReadOnlyPtr](Value *Ptr) {
                     MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
-                    Accesses.addLoad(NewLoc, IsReadOnlyPtr);
+                    Accesses.addLoad(NewLoc, AccessTy, IsReadOnlyPtr);
                   });
   }
 
@@ -2069,10 +2336,14 @@
 
   // Find pointers with computable bounds. We are going to use this information
   // to place a runtime bound check.
-  bool CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(),
-                                                  TheLoop, SymbolicStrides);
+  Value *UncomputablePtr = nullptr;
+  bool CanDoRTIfNeeded =
+      Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(), TheLoop,
+                               SymbolicStrides, UncomputablePtr, false);
   if (!CanDoRTIfNeeded) {
-    recordAnalysis("CantIdentifyArrayBounds") << "cannot identify array bounds";
+    auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr);
+    recordAnalysis("CantIdentifyArrayBounds", I) 
+        << "cannot identify array bounds";
     LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
                       << "the array bounds.\n");
     CanVecMem = false;
@@ -2099,12 +2370,14 @@
       PtrRtChecking->Need = true;
 
       auto *SE = PSE->getSE();
-      CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, SE, TheLoop,
-                                                 SymbolicStrides, true);
+      UncomputablePtr = nullptr;
+      CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(
+          *PtrRtChecking, SE, TheLoop, SymbolicStrides, UncomputablePtr, true);
 
       // Check that we found the bounds for the pointer.
       if (!CanDoRTIfNeeded) {
-        recordAnalysis("CantCheckMemDepsAtRunTime")
+        auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr);
+        recordAnalysis("CantCheckMemDepsAtRunTime", I)
             << "cannot check memory dependencies at runtime";
         LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
         CanVecMem = false;
@@ -2129,13 +2402,61 @@
         dbgs() << "LAA: No unsafe dependent memory operations in loop.  We"
                << (PtrRtChecking->Need ? "" : " don't")
                << " need runtime memory checks.\n");
-  else {
-    recordAnalysis("UnsafeMemDep")
-        << "unsafe dependent memory operations in loop. Use "
-           "#pragma loop distribute(enable) to allow loop distribution "
-           "to attempt to isolate the offending operations into a separate "
-           "loop";
-    LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
+  else
+    emitUnsafeDependenceRemark();
+}
+
+void LoopAccessInfo::emitUnsafeDependenceRemark() {
+  auto Deps = getDepChecker().getDependences();
+  if (!Deps)
+    return;
+  auto Found = std::find_if(
+      Deps->begin(), Deps->end(), [](const MemoryDepChecker::Dependence &D) {
+        return MemoryDepChecker::Dependence::isSafeForVectorization(D.Type) !=
+               MemoryDepChecker::VectorizationSafetyStatus::Safe;
+      });
+  if (Found == Deps->end())
+    return;
+  MemoryDepChecker::Dependence Dep = *Found;
+
+  LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
+
+  // Emit remark for first unsafe dependence
+  OptimizationRemarkAnalysis &R =
+      recordAnalysis("UnsafeDep", Dep.getDestination(*this))
+      << "unsafe dependent memory operations in loop. Use "
+         "#pragma loop distribute(enable) to allow loop distribution "
+         "to attempt to isolate the offending operations into a separate "
+         "loop";
+
+  switch (Dep.Type) {
+  case MemoryDepChecker::Dependence::NoDep:
+  case MemoryDepChecker::Dependence::Forward:
+  case MemoryDepChecker::Dependence::BackwardVectorizable:
+    llvm_unreachable("Unexpected dependence");
+  case MemoryDepChecker::Dependence::Backward:
+    R << "\nBackward loop carried data dependence.";
+    break;
+  case MemoryDepChecker::Dependence::ForwardButPreventsForwarding:
+    R << "\nForward loop carried data dependence that prevents "
+         "store-to-load forwarding.";
+    break;
+  case MemoryDepChecker::Dependence::BackwardVectorizableButPreventsForwarding:
+    R << "\nBackward loop carried data dependence that prevents "
+         "store-to-load forwarding.";
+    break;
+  case MemoryDepChecker::Dependence::Unknown:
+    R << "\nUnknown data dependence.";
+    break;
+  }
+
+  if (Instruction *I = Dep.getSource(*this)) {
+    DebugLoc SourceLoc = I->getDebugLoc();
+    if (auto *DD = dyn_cast_or_null<Instruction>(getPointerOperand(I)))
+      SourceLoc = DD->getDebugLoc();
+    if (SourceLoc)
+      R << " Memory location is the same as accessed at "
+        << ore::NV("Location", SourceLoc);
   }
 }
 
@@ -2212,12 +2533,12 @@
   // The Stride can be positive/negative, so we sign extend Stride;
   // The backedgeTakenCount is non-negative, so we zero extend BETakenCount.
   const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
-  uint64_t StrideTypeSize = DL.getTypeAllocSize(StrideExpr->getType());
-  uint64_t BETypeSize = DL.getTypeAllocSize(BETakenCount->getType());
+  uint64_t StrideTypeSizeBits = DL.getTypeSizeInBits(StrideExpr->getType());
+  uint64_t BETypeSizeBits = DL.getTypeSizeInBits(BETakenCount->getType());
   const SCEV *CastedStride = StrideExpr;
   const SCEV *CastedBECount = BETakenCount;
   ScalarEvolution *SE = PSE->getSE();
-  if (BETypeSize >= StrideTypeSize)
+  if (BETypeSizeBits >= StrideTypeSizeBits)
     CastedStride = SE->getNoopOrSignExtend(StrideExpr, BETakenCount->getType());
   else
     CastedBECount = SE->getZeroExtendExpr(BETakenCount, StrideExpr->getType());
@@ -2232,7 +2553,7 @@
                   "at most once.\n");
     return;
   }
-  LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.");
+  LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.\n");
 
   SymbolicStrides[Ptr] = Stride;
   StrideSet.insert(Stride);
@@ -2242,10 +2563,12 @@
                                const TargetLibraryInfo *TLI, AAResults *AA,
                                DominatorTree *DT, LoopInfo *LI)
     : PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
-      PtrRtChecking(std::make_unique<RuntimePointerChecking>(SE)),
+      PtrRtChecking(nullptr),
       DepChecker(std::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L) {
-  if (canAnalyzeLoop())
+  PtrRtChecking = std::make_unique<RuntimePointerChecking>(*DepChecker, SE);
+  if (canAnalyzeLoop()) {
     analyzeLoop(AA, LI, TLI, DT);
+  }
 }
 
 void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
@@ -2267,7 +2590,7 @@
 
   if (auto *Dependences = DepChecker->getDependences()) {
     OS.indent(Depth) << "Dependences:\n";
-    for (auto &Dep : *Dependences) {
+    for (const auto &Dep : *Dependences) {
       Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions());
       OS << "\n";
     }
@@ -2283,7 +2606,7 @@
                    << "found in loop.\n";
 
   OS.indent(Depth) << "SCEV assumptions:\n";
-  PSE->getUnionPredicate().print(OS, Depth);
+  PSE->getPredicate().print(OS, Depth);
 
   OS << "\n";
 
@@ -2301,7 +2624,7 @@
   if (!LAI)
     LAI = std::make_unique<LoopAccessInfo>(L, SE, TLI, AA, DT, LI);
 
-  return *LAI.get();
+  return *LAI;
 }
 
 void LoopAccessLegacyAnalysis::print(raw_ostream &OS, const Module *M) const {

diff --git a/src/llvm-project/llvm/lib/Analysis/LoopAnalysisManager.cpp b/src/llvm-project/llvm/lib/Analysis/LoopAnalysisManager.cpp
index 4d6f8a6..8d71b31 100644
--- a/src/llvm-project/llvm/lib/Analysis/LoopAnalysisManager.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/LoopAnalysisManager.cpp

@@ -8,12 +8,9 @@
 
 #include "llvm/Analysis/LoopAnalysisManager.h"
 #include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/BasicAliasAnalysis.h"
-#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemorySSA.h"
 #include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/PassManagerImpl.h"
 

diff --git a/src/llvm-project/llvm/lib/Analysis/LoopCacheAnalysis.cpp b/src/llvm-project/llvm/lib/Analysis/LoopCacheAnalysis.cpp
index ba014bd..85f2dad 100644
--- a/src/llvm-project/llvm/lib/Analysis/LoopCacheAnalysis.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/LoopCacheAnalysis.cpp

@@ -103,14 +103,24 @@
   return StepRec == &ElemSize;
 }
 
-/// Compute the trip count for the given loop \p L. Return the SCEV expression
-/// for the trip count or nullptr if it cannot be computed.
-static const SCEV *computeTripCount(const Loop &L, ScalarEvolution &SE) {
+/// Compute the trip count for the given loop \p L or assume a default value if
+/// it is not a compile time constant. Return the SCEV expression for the trip
+/// count.
+static const SCEV *computeTripCount(const Loop &L, const SCEV &ElemSize,
+                                    ScalarEvolution &SE) {
   const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(&L);
-  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
-      !isa<SCEVConstant>(BackedgeTakenCount))
-    return nullptr;
-  return SE.getTripCountFromExitCount(BackedgeTakenCount);
+  const SCEV *TripCount = (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) &&
+                           isa<SCEVConstant>(BackedgeTakenCount))
+                              ? SE.getTripCountFromExitCount(BackedgeTakenCount)
+                              : nullptr;
+
+  if (!TripCount) {
+    LLVM_DEBUG(dbgs() << "Trip count of loop " << L.getName()
+               << " could not be computed, using DefaultTripCount\n");
+    TripCount = SE.getConstant(ElemSize.getType(), DefaultTripCount);
+  }
+
+  return TripCount;
 }
 
 //===----------------------------------------------------------------------===//
@@ -274,27 +284,19 @@
     return 1;
   }
 
-  const SCEV *TripCount = computeTripCount(L, SE);
-  if (!TripCount) {
-    LLVM_DEBUG(dbgs() << "Trip count of loop " << L.getName()
-                      << " could not be computed, using DefaultTripCount\n");
-    const SCEV *ElemSize = Sizes.back();
-    TripCount = SE.getConstant(ElemSize->getType(), DefaultTripCount);
-  }
+  const SCEV *TripCount = computeTripCount(L, *Sizes.back(), SE);
+  assert(TripCount && "Expecting valid TripCount");
   LLVM_DEBUG(dbgs() << "TripCount=" << *TripCount << "\n");
 
-  // If the indexed reference is 'consecutive' the cost is
-  // (TripCount*Stride)/CLS, otherwise the cost is TripCount.
-  const SCEV *RefCost = TripCount;
-
-  if (isConsecutive(L, CLS)) {
-    const SCEV *Coeff = getLastCoefficient();
-    const SCEV *ElemSize = Sizes.back();
-    const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize);
+  const SCEV *RefCost = nullptr;
+  const SCEV *Stride = nullptr;
+  if (isConsecutive(L, Stride, CLS)) {
+    // If the indexed reference is 'consecutive' the cost is
+    // (TripCount*Stride)/CLS.
+    assert(Stride != nullptr &&
+           "Stride should not be null for consecutive access!");
     Type *WiderType = SE.getWiderType(Stride->getType(), TripCount->getType());
     const SCEV *CacheLineSize = SE.getConstant(WiderType, CLS);
-    if (SE.isKnownNegative(Stride))
-      Stride = SE.getNegativeSCEV(Stride);
     Stride = SE.getNoopOrAnyExtend(Stride, WiderType);
     TripCount = SE.getNoopOrAnyExtend(TripCount, WiderType);
     const SCEV *Numerator = SE.getMulExpr(Stride, TripCount);
@@ -303,10 +305,33 @@
     LLVM_DEBUG(dbgs().indent(4)
                << "Access is consecutive: RefCost=(TripCount*Stride)/CLS="
                << *RefCost << "\n");
-  } else
+  } else {
+    // If the indexed reference is not 'consecutive' the cost is proportional to
+    // the trip count and the depth of the dimension which the subject loop
+    // subscript is accessing. We try to estimate this by multiplying the cost
+    // by the trip counts of loops corresponding to the inner dimensions. For
+    // example, given the indexed reference 'A[i][j][k]', and assuming the
+    // i-loop is in the innermost position, the cost would be equal to the
+    // iterations of the i-loop multiplied by iterations of the j-loop.
+    RefCost = TripCount;
+
+    int Index = getSubscriptIndex(L);
+    assert(Index >= 0 && "Cound not locate a valid Index");
+
+    for (unsigned I = Index + 1; I < getNumSubscripts() - 1; ++I) {
+      const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(getSubscript(I));
+      assert(AR && AR->getLoop() && "Expecting valid loop");
+      const SCEV *TripCount =
+          computeTripCount(*AR->getLoop(), *Sizes.back(), SE);
+      Type *WiderType = SE.getWiderType(RefCost->getType(), TripCount->getType());
+      RefCost = SE.getMulExpr(SE.getNoopOrAnyExtend(RefCost, WiderType),
+                              SE.getNoopOrAnyExtend(TripCount, WiderType));
+    }
+
     LLVM_DEBUG(dbgs().indent(4)
-               << "Access is not consecutive: RefCost=TripCount=" << *RefCost
-               << "\n");
+               << "Access is not consecutive: RefCost=" << *RefCost << "\n");
+  }
+  assert(RefCost && "Expecting a valid RefCost");
 
   // Attempt to fold RefCost into a constant.
   if (auto ConstantCost = dyn_cast<SCEVConstant>(RefCost))
@@ -319,6 +344,26 @@
   return CacheCost::InvalidCost;
 }
 
+bool IndexedReference::tryDelinearizeFixedSize(
+    const SCEV *AccessFn, SmallVectorImpl<const SCEV *> &Subscripts) {
+  SmallVector<int, 4> ArraySizes;
+  if (!tryDelinearizeFixedSizeImpl(&SE, &StoreOrLoadInst, AccessFn, Subscripts,
+                                   ArraySizes))
+    return false;
+
+  // Populate Sizes with scev expressions to be used in calculations later.
+  for (auto Idx : seq<unsigned>(1, Subscripts.size()))
+    Sizes.push_back(
+        SE.getConstant(Subscripts[Idx]->getType(), ArraySizes[Idx - 1]));
+
+  LLVM_DEBUG({
+    dbgs() << "Delinearized subscripts of fixed-size array\n"
+           << "GEP:" << *getLoadStorePointerOperand(&StoreOrLoadInst)
+           << "\n";
+  });
+  return true;
+}
+
 bool IndexedReference::delinearize(const LoopInfo &LI) {
   assert(Subscripts.empty() && "Subscripts should be empty");
   assert(Sizes.empty() && "Sizes should be empty");
@@ -340,13 +385,25 @@
       return false;
     }
 
+    bool IsFixedSize = false;
+    // Try to delinearize fixed-size arrays.
+    if (tryDelinearizeFixedSize(AccessFn, Subscripts)) {
+      IsFixedSize = true;
+      // The last element of Sizes is the element size.
+      Sizes.push_back(ElemSize);
+      LLVM_DEBUG(dbgs().indent(2) << "In Loop '" << L->getName()
+                                  << "', AccessFn: " << *AccessFn << "\n");
+    }
+
     AccessFn = SE.getMinusSCEV(AccessFn, BasePointer);
 
-    LLVM_DEBUG(dbgs().indent(2) << "In Loop '" << L->getName()
-                                << "', AccessFn: " << *AccessFn << "\n");
-
-    llvm::delinearize(SE, AccessFn, Subscripts, Sizes,
-                      SE.getElementSize(&StoreOrLoadInst));
+    // Try to delinearize parametric-size arrays.
+    if (!IsFixedSize) {
+      LLVM_DEBUG(dbgs().indent(2) << "In Loop '" << L->getName()
+                                  << "', AccessFn: " << *AccessFn << "\n");
+      llvm::delinearize(SE, AccessFn, Subscripts, Sizes,
+                        SE.getElementSize(&StoreOrLoadInst));
+    }
 
     if (Subscripts.empty() || Sizes.empty() ||
         Subscripts.size() != Sizes.size()) {
@@ -403,7 +460,8 @@
   return allCoeffForLoopAreZero;
 }
 
-bool IndexedReference::isConsecutive(const Loop &L, unsigned CLS) const {
+bool IndexedReference::isConsecutive(const Loop &L, const SCEV *&Stride,
+                                     unsigned CLS) const {
   // The indexed reference is 'consecutive' if the only coefficient that uses
   // the loop induction variable is the last one...
   const SCEV *LastSubscript = Subscripts.back();
@@ -417,13 +475,35 @@
   // ...and the access stride is less than the cache line size.
   const SCEV *Coeff = getLastCoefficient();
   const SCEV *ElemSize = Sizes.back();
-  const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize);
+  Type *WiderType = SE.getWiderType(Coeff->getType(), ElemSize->getType());
+  // FIXME: This assumes that all values are signed integers which may
+  // be incorrect in unusual codes and incorrectly use sext instead of zext.
+  // for (uint32_t i = 0; i < 512; ++i) {
+  //   uint8_t trunc = i;
+  //   A[trunc] = 42;
+  // }
+  // This consecutively iterates twice over A. If `trunc` is sign-extended,
+  // we would conclude that this may iterate backwards over the array.
+  // However, LoopCacheAnalysis is heuristic anyway and transformations must
+  // not result in wrong optimizations if the heuristic was incorrect.
+  Stride = SE.getMulExpr(SE.getNoopOrSignExtend(Coeff, WiderType),
+                         SE.getNoopOrSignExtend(ElemSize, WiderType));
   const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS);
 
   Stride = SE.isKnownNegative(Stride) ? SE.getNegativeSCEV(Stride) : Stride;
   return SE.isKnownPredicate(ICmpInst::ICMP_ULT, Stride, CacheLineSize);
 }
 
+int IndexedReference::getSubscriptIndex(const Loop &L) const {
+  for (auto Idx : seq<int>(0, getNumSubscripts())) {
+    const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(getSubscript(Idx));
+    if (AR && AR->getLoop() == &L) {
+      return Idx;
+    }
+  }
+  return -1;
+}
+
 const SCEV *IndexedReference::getLastCoefficient() const {
   const SCEV *LastSubscript = getLastSubscript();
   auto *AR = cast<SCEVAddRecExpr>(LastSubscript);
@@ -550,7 +630,7 @@
 
       bool Added = false;
       for (ReferenceGroupTy &RefGroup : RefGroups) {
-        const IndexedReference &Representative = *RefGroup.front().get();
+        const IndexedReference &Representative = *RefGroup.front();
         LLVM_DEBUG({
           dbgs() << "References:\n";
           dbgs().indent(2) << *R << "\n";
@@ -574,8 +654,8 @@
         Optional<bool> HasSpacialReuse =
             R->hasSpacialReuse(Representative, CLS, AA);
 
-        if ((HasTemporalReuse.hasValue() && *HasTemporalReuse) ||
-            (HasSpacialReuse.hasValue() && *HasSpacialReuse)) {
+        if ((HasTemporalReuse && *HasTemporalReuse) ||
+            (HasSpacialReuse && *HasSpacialReuse)) {
           RefGroup.push_back(std::move(R));
           Added = true;
           break;

diff --git a/src/llvm-project/llvm/lib/Analysis/LoopInfo.cpp b/src/llvm-project/llvm/lib/Analysis/LoopInfo.cpp
index b161c49..693b9eb 100644
--- a/src/llvm-project/llvm/lib/Analysis/LoopInfo.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/LoopInfo.cpp

@@ -14,7 +14,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/LoopInfo.h"
-#include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/IVDescriptors.h"
@@ -30,7 +29,6 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/Dominators.h"
-#include "llvm/IR/IRPrintingPasses.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
@@ -38,9 +36,7 @@
 #include "llvm/IR/PrintPasses.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include <algorithm>
 using namespace llvm;
 
 // Explicitly instantiate methods in LoopInfoImpl.h for IR-level Loops.
@@ -429,12 +425,12 @@
 
 // Check that 'BB' doesn't have any uses outside of the 'L'
 static bool isBlockInLCSSAForm(const Loop &L, const BasicBlock &BB,
-                               const DominatorTree &DT) {
+                               const DominatorTree &DT, bool IgnoreTokens) {
   for (const Instruction &I : BB) {
     // Tokens can't be used in PHI nodes and live-out tokens prevent loop
     // optimizations, so for the purposes of considered LCSSA form, we
     // can ignore them.
-    if (I.getType()->isTokenTy())
+    if (IgnoreTokens && I.getType()->isTokenTy())
       continue;
 
     for (const Use &U : I.uses()) {
@@ -459,20 +455,20 @@
   return true;
 }
 
-bool Loop::isLCSSAForm(const DominatorTree &DT) const {
+bool Loop::isLCSSAForm(const DominatorTree &DT, bool IgnoreTokens) const {
   // For each block we check that it doesn't have any uses outside of this loop.
   return all_of(this->blocks(), [&](const BasicBlock *BB) {
-    return isBlockInLCSSAForm(*this, *BB, DT);
+    return isBlockInLCSSAForm(*this, *BB, DT, IgnoreTokens);
   });
 }
 
-bool Loop::isRecursivelyLCSSAForm(const DominatorTree &DT,
-                                  const LoopInfo &LI) const {
+bool Loop::isRecursivelyLCSSAForm(const DominatorTree &DT, const LoopInfo &LI,
+                                  bool IgnoreTokens) const {
   // For each block we check that it doesn't have any uses outside of its
   // innermost loop. This process will transitively guarantee that the current
   // loop and all of the nested loops are in LCSSA form.
   return all_of(this->blocks(), [&](const BasicBlock *BB) {
-    return isBlockInLCSSAForm(*LI.getLoopFor(BB), *BB, DT);
+    return isBlockInLCSSAForm(*LI.getLoopFor(BB), *BB, DT, IgnoreTokens);
   });
 }
 
@@ -486,11 +482,8 @@
 bool Loop::isSafeToClone() const {
   // Return false if any loop blocks contain indirectbrs, or there are any calls
   // to noduplicate functions.
-  // FIXME: it should be ok to clone CallBrInst's if we correctly update the
-  // operand list to reflect the newly cloned labels.
   for (BasicBlock *BB : this->blocks()) {
-    if (isa<IndirectBrInst>(BB->getTerminator()) ||
-        isa<CallBrInst>(BB->getTerminator()))
+    if (isa<IndirectBrInst>(BB->getTerminator()))
       return false;
 
     for (Instruction &I : *BB)
@@ -740,6 +733,7 @@
   bool Changed = FoundIB;
   for (unsigned NIters = 0; Changed; ++NIters) {
     assert(NIters < Unloop.getNumBlocks() && "runaway iterative algorithm");
+    (void) NIters;
 
     // Iterate over the postorder list of blocks, propagating the nearest loop
     // from successors to predecessors as before.
@@ -1085,13 +1079,13 @@
 }
 
 bool llvm::getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name) {
-  return getOptionalBoolLoopAttribute(TheLoop, Name).getValueOr(false);
+  return getOptionalBoolLoopAttribute(TheLoop, Name).value_or(false);
 }
 
 llvm::Optional<int> llvm::getOptionalIntLoopAttribute(const Loop *TheLoop,
                                                       StringRef Name) {
   const MDOperand *AttrMD =
-      findStringMetadataForLoop(TheLoop, Name).getValueOr(nullptr);
+      findStringMetadataForLoop(TheLoop, Name).value_or(nullptr);
   if (!AttrMD)
     return None;
 
@@ -1104,7 +1098,7 @@
 
 int llvm::getIntLoopAttribute(const Loop *TheLoop, StringRef Name,
                               int Default) {
-  return getOptionalIntLoopAttribute(TheLoop, Name).getValueOr(Default);
+  return getOptionalIntLoopAttribute(TheLoop, Name).value_or(Default);
 }
 
 bool llvm::isFinite(const Loop *L) {

diff --git a/src/llvm-project/llvm/lib/Analysis/LoopNestAnalysis.cpp b/src/llvm-project/llvm/lib/Analysis/LoopNestAnalysis.cpp
index 675bb7a..bff796f 100644
--- a/src/llvm-project/llvm/lib/Analysis/LoopNestAnalysis.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/LoopNestAnalysis.cpp

@@ -13,8 +13,7 @@
 
 #include "llvm/Analysis/LoopNestAnalysis.h"
 #include "llvm/ADT/BreadthFirstIterator.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/PostDominators.h"
+#include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/Analysis/ValueTracking.h"
 
 using namespace llvm;

diff --git a/src/llvm-project/llvm/lib/Analysis/LoopPass.cpp b/src/llvm-project/llvm/lib/Analysis/LoopPass.cpp
index b720bab..5d824ae 100644
--- a/src/llvm-project/llvm/lib/Analysis/LoopPass.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/LoopPass.cpp

@@ -13,14 +13,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/LoopAnalysisManager.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/OptBisect.h"
-#include "llvm/IR/PassManager.h"
 #include "llvm/IR/PassTimingInfo.h"
 #include "llvm/IR/PrintPasses.h"
-#include "llvm/IR/StructuralHash.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/TimeProfiler.h"
@@ -192,12 +190,12 @@
         PassManagerPrettyStackEntry X(P, *CurrentLoop->getHeader());
         TimeRegion PassTimer(getPassTimer(P));
 #ifdef EXPENSIVE_CHECKS
-        uint64_t RefHash = StructuralHash(F);
+        uint64_t RefHash = P->structuralHash(F);
 #endif
         LocalChanged = P->runOnLoop(CurrentLoop, *this);
 
 #ifdef EXPENSIVE_CHECKS
-        if (!LocalChanged && (RefHash != StructuralHash(F))) {
+        if (!LocalChanged && (RefHash != P->structuralHash(F))) {
           llvm::errs() << "Pass modifies its input and doesn't report it: "
                        << P->getPassName() << "\n";
           llvm_unreachable("Pass modifies its input and doesn't report it");

diff --git a/src/llvm-project/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp b/src/llvm-project/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp
index 15095d6..84f1eff 100644
--- a/src/llvm-project/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp

@@ -13,7 +13,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/LoopUnrollAnalyzer.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/IR/Operator.h"
 
 using namespace llvm;
 
@@ -84,9 +87,9 @@
   const DataLayout &DL = I.getModule()->getDataLayout();
   if (auto FI = dyn_cast<FPMathOperator>(&I))
     SimpleV =
-        SimplifyBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL);
+        simplifyBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL);
   else
-    SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL);
+    SimpleV = simplifyBinOp(I.getOpcode(), LHS, RHS, DL);
 
   if (SimpleV) {
     SimplifiedValues[&I] = SimpleV;
@@ -155,7 +158,7 @@
   // i32 0).
   if (CastInst::castIsValid(I.getOpcode(), Op, I.getType())) {
     const DataLayout &DL = I.getModule()->getDataLayout();
-    if (Value *V = SimplifyCastInst(I.getOpcode(), Op, I.getType(), DL)) {
+    if (Value *V = simplifyCastInst(I.getOpcode(), Op, I.getType(), DL)) {
       SimplifiedValues[&I] = V;
       return true;
     }
@@ -192,7 +195,7 @@
   }
 
   const DataLayout &DL = I.getModule()->getDataLayout();
-  if (Value *V = SimplifyCmpInst(I.getPredicate(), LHS, RHS, DL)) {
+  if (Value *V = simplifyCmpInst(I.getPredicate(), LHS, RHS, DL)) {
     SimplifiedValues[&I] = V;
     return true;
   }

diff --git a/src/llvm-project/llvm/lib/Analysis/MLInlineAdvisor.cpp b/src/llvm-project/llvm/lib/Analysis/MLInlineAdvisor.cpp
index 0480c1c..f55de71 100644
--- a/src/llvm-project/llvm/lib/Analysis/MLInlineAdvisor.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/MLInlineAdvisor.cpp

@@ -13,30 +13,25 @@
 //===----------------------------------------------------------------------===//
 #include "llvm/Analysis/MLInlineAdvisor.h"
 #include "llvm/ADT/SCCIterator.h"
+#include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/FunctionPropertiesAnalysis.h"
 #include "llvm/Analysis/InlineCost.h"
 #include "llvm/Analysis/InlineModelFeatureMaps.h"
 #include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MLModelRunner.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
-#include "llvm/Analysis/ReleaseModeModelRunner.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Config/config.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/InstIterator.h"
-#include "llvm/IR/Instructions.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Path.h"
-
-#include <limits>
-#include <unordered_map>
-#include <unordered_set>
 
 using namespace llvm;
 
 #if defined(LLVM_HAVE_TF_AOT_INLINERSIZEMODEL)
+#include "llvm/Analysis/ReleaseModeModelRunner.h"
 // codegen-ed file
 #include "InlinerSizeModel.h" // NOLINT
 
@@ -44,7 +39,7 @@
 llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM) {
   auto AOTRunner =
       std::make_unique<ReleaseModeModelRunner<llvm::InlinerSizeModel>>(
-          M.getContext(), FeatureNameMap, DecisionName);
+          M.getContext(), FeatureMap, DecisionName);
   return std::make_unique<MLInlineAdvisor>(M, MAM, std::move(AOTRunner));
 }
 #endif
@@ -57,15 +52,21 @@
              "blocking any further inlining."),
     cl::init(2.0));
 
+static cl::opt<bool> KeepFPICache(
+    "ml-advisor-keep-fpi-cache", cl::Hidden,
+    cl::desc(
+        "For test - keep the ML Inline advisor's FunctionPropertiesInfo cache"),
+    cl::init(false));
+
 // clang-format off
-const std::array<std::string, NumberOfFeatures> llvm::FeatureNameMap{
+const std::array<TensorSpec, NumberOfFeatures> llvm::FeatureMap{
+#define POPULATE_NAMES(_, NAME) TensorSpec::createSpec<int64_t>(NAME, {1} ),
 // InlineCost features - these must come first
-#define POPULATE_NAMES(INDEX_NAME, NAME) NAME,
   INLINE_COST_FEATURE_ITERATOR(POPULATE_NAMES)
 #undef POPULATE_NAMES
 
 // Non-cost features
-#define POPULATE_NAMES(INDEX_NAME, NAME, COMMENT) NAME,
+#define POPULATE_NAMES(_, NAME, __) TensorSpec::createSpec<int64_t>(NAME, {1} ),
   INLINE_FEATURE_ITERATOR(POPULATE_NAMES)
 #undef POPULATE_NAMES
 };
@@ -138,7 +139,10 @@
   return CG.lookup(F) ? FunctionLevels.at(CG.lookup(F)) : 0;
 }
 
-void MLInlineAdvisor::onPassEntry() {
+void MLInlineAdvisor::onPassEntry(LazyCallGraph::SCC *LastSCC) {
+  if (!LastSCC || ForceStop)
+    return;
+  FPICache.clear();
   // Function passes executed between InlinerPass runs may have changed the
   // module-wide features.
   // The cgscc pass manager rules are such that:
@@ -154,8 +158,8 @@
   // care about the nature of the Edge (call or ref).
   NodeCount -= static_cast<int64_t>(NodesInLastSCC.size());
   while (!NodesInLastSCC.empty()) {
-    const auto *N = NodesInLastSCC.front();
-    NodesInLastSCC.pop_front();
+    const auto *N = *NodesInLastSCC.begin();
+    NodesInLastSCC.erase(N);
     // The Function wrapped by N could have been deleted since we last saw it.
     if (N->isDead()) {
       assert(!N->getFunction().isDeclaration());
@@ -168,34 +172,52 @@
       assert(!AdjNode->isDead() && !AdjNode->getFunction().isDeclaration());
       auto I = AllNodes.insert(AdjNode);
       if (I.second)
-        NodesInLastSCC.push_back(AdjNode);
+        NodesInLastSCC.insert(AdjNode);
     }
   }
 
   EdgeCount -= EdgesOfLastSeenNodes;
   EdgesOfLastSeenNodes = 0;
+
+  // (Re)use NodesInLastSCC to remember the nodes in the SCC right now,
+  // in case the SCC is split before onPassExit and some nodes are split out
+  assert(NodesInLastSCC.empty());
+  for (const auto &N : *LastSCC)
+    NodesInLastSCC.insert(&N);
 }
 
 void MLInlineAdvisor::onPassExit(LazyCallGraph::SCC *LastSCC) {
-  if (!LastSCC)
+  // No need to keep this around - function passes will invalidate it.
+  if (!KeepFPICache)
+    FPICache.clear();
+  if (!LastSCC || ForceStop)
     return;
   // Keep track of the nodes and edges we last saw. Then, in onPassEntry,
   // we update the node count and edge count from the subset of these nodes that
   // survived.
-  assert(NodesInLastSCC.empty());
-  assert(NodeCount >= LastSCC->size());
   EdgesOfLastSeenNodes = 0;
+
+  // Check on nodes that were in SCC onPassEntry
+  for (auto I = NodesInLastSCC.begin(); I != NodesInLastSCC.end();) {
+    if ((*I)->isDead())
+      NodesInLastSCC.erase(*I++);
+    else
+      EdgesOfLastSeenNodes += getLocalCalls((*I++)->getFunction());
+  }
+
+  // Check on nodes that may have got added to SCC
   for (const auto &N : *LastSCC) {
     assert(!N.isDead());
-    EdgesOfLastSeenNodes += getLocalCalls(N.getFunction());
-    NodesInLastSCC.push_back(&N);
+    auto I = NodesInLastSCC.insert(&N);
+    if (I.second)
+      EdgesOfLastSeenNodes += getLocalCalls(N.getFunction());
   }
+  assert(NodeCount >= NodesInLastSCC.size());
   assert(EdgeCount >= EdgesOfLastSeenNodes);
 }
 
 int64_t MLInlineAdvisor::getLocalCalls(Function &F) {
-  return FAM.getResult<FunctionPropertiesAnalysis>(F)
-      .DirectCallsToDefinedFunctions;
+  return getCachedFPI(F).DirectCallsToDefinedFunctions;
 }
 
 // Update the internal state of the advisor, and force invalidate feature
@@ -208,13 +230,15 @@
   assert(!ForceStop);
   Function *Caller = Advice.getCaller();
   Function *Callee = Advice.getCallee();
-
   // The caller features aren't valid anymore.
   {
     PreservedAnalyses PA = PreservedAnalyses::all();
     PA.abandon<FunctionPropertiesAnalysis>();
+    PA.abandon<DominatorTreeAnalysis>();
+    PA.abandon<LoopAnalysis>();
     FAM.invalidate(*Caller, PA);
   }
+  Advice.updateCachedCallerFPI(FAM);
   int64_t IRSizeAfter =
       getIRSize(*Caller) + (CalleeWasDeleted ? 0 : Advice.CalleeIRSize);
   CurrentIRSize += IRSizeAfter - (Advice.CallerIRSize + Advice.CalleeIRSize);
@@ -227,15 +251,13 @@
   // For edges, we 'forget' the edges that the caller and callee used to have
   // before inlining, and add back what they currently have together.
   int64_t NewCallerAndCalleeEdges =
-      FAM.getResult<FunctionPropertiesAnalysis>(*Caller)
-          .DirectCallsToDefinedFunctions;
+      getCachedFPI(*Caller).DirectCallsToDefinedFunctions;
 
   if (CalleeWasDeleted)
     --NodeCount;
   else
     NewCallerAndCalleeEdges +=
-        FAM.getResult<FunctionPropertiesAnalysis>(*Callee)
-            .DirectCallsToDefinedFunctions;
+        getCachedFPI(*Callee).DirectCallsToDefinedFunctions;
   EdgeCount += (NewCallerAndCalleeEdges - Advice.CallerAndCalleeEdges);
   assert(CurrentIRSize >= 0 && EdgeCount >= 0 && NodeCount >= 0);
 }
@@ -248,7 +270,19 @@
   return Ret;
 }
 
+FunctionPropertiesInfo &MLInlineAdvisor::getCachedFPI(Function &F) const {
+  auto InsertPair =
+      FPICache.insert(std::make_pair(&F, FunctionPropertiesInfo()));
+  if (!InsertPair.second)
+    return InsertPair.first->second;
+  InsertPair.first->second = FAM.getResult<FunctionPropertiesAnalysis>(F);
+  return InsertPair.first->second;
+}
+
 std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdviceImpl(CallBase &CB) {
+  if (auto Skip = getSkipAdviceIfUnreachableCallsite(CB))
+    return Skip;
+
   auto &Caller = *CB.getCaller();
   auto &Callee = *CB.getCalledFunction();
 
@@ -307,8 +341,8 @@
     NrCtantParams += (isa<Constant>(*I));
   }
 
-  auto &CallerBefore = FAM.getResult<FunctionPropertiesAnalysis>(Caller);
-  auto &CalleeBefore = FAM.getResult<FunctionPropertiesAnalysis>(Callee);
+  auto &CallerBefore = getCachedFPI(Caller);
+  auto &CalleeBefore = getCachedFPI(Callee);
 
   *ModelRunner->getTensor<int64_t>(FeatureIndex::CalleeBasicBlockCount) =
       CalleeBefore.BasicBlockCount;
@@ -348,9 +382,19 @@
       this, CB, ORE, static_cast<bool>(ModelRunner->evaluate<int64_t>()));
 }
 
+std::unique_ptr<InlineAdvice>
+MLInlineAdvisor::getSkipAdviceIfUnreachableCallsite(CallBase &CB) {
+  if (!FAM.getResult<DominatorTreeAnalysis>(*CB.getCaller())
+           .isReachableFromEntry(CB.getParent()))
+    return std::make_unique<InlineAdvice>(this, CB, getCallerORE(CB), false);
+  return nullptr;
+}
+
 std::unique_ptr<InlineAdvice> MLInlineAdvisor::getMandatoryAdvice(CallBase &CB,
                                                                   bool Advice) {
   // Make sure we track inlinings in all cases - mandatory or not.
+  if (auto Skip = getSkipAdviceIfUnreachableCallsite(CB))
+    return Skip;
   if (Advice && !ForceStop)
     return getMandatoryAdviceImpl(CB);
 
@@ -366,16 +410,47 @@
   return std::make_unique<MLInlineAdvice>(this, CB, getCallerORE(CB), true);
 }
 
+void MLInlineAdvisor::print(raw_ostream &OS) const {
+  OS << "[MLInlineAdvisor] Nodes: " << NodeCount << " Edges: " << EdgeCount
+     << " EdgesOfLastSeenNodes: " << EdgesOfLastSeenNodes << "\n";
+  OS << "[MLInlineAdvisor] FPI:\n";
+  for (auto I : FPICache) {
+    OS << I.getFirst()->getName() << ":\n";
+    I.getSecond().print(OS);
+    OS << "\n";
+  }
+  OS << "\n";
+}
+
+MLInlineAdvice::MLInlineAdvice(MLInlineAdvisor *Advisor, CallBase &CB,
+                               OptimizationRemarkEmitter &ORE,
+                               bool Recommendation)
+    : InlineAdvice(Advisor, CB, ORE, Recommendation),
+      CallerIRSize(Advisor->isForcedToStop() ? 0 : Advisor->getIRSize(*Caller)),
+      CalleeIRSize(Advisor->isForcedToStop() ? 0 : Advisor->getIRSize(*Callee)),
+      CallerAndCalleeEdges(Advisor->isForcedToStop()
+                               ? 0
+                               : (Advisor->getLocalCalls(*Caller) +
+                                  Advisor->getLocalCalls(*Callee))),
+      PreInlineCallerFPI(Advisor->getCachedFPI(*Caller)) {
+  if (Recommendation)
+    FPU.emplace(Advisor->getCachedFPI(*getCaller()), CB);
+}
+
 void MLInlineAdvice::reportContextForRemark(
     DiagnosticInfoOptimizationBase &OR) {
   using namespace ore;
   OR << NV("Callee", Callee->getName());
   for (size_t I = 0; I < NumberOfFeatures; ++I)
-    OR << NV(FeatureNameMap[I],
+    OR << NV(FeatureMap[I].name(),
              *getAdvisor()->getModelRunner().getTensor<int64_t>(I));
   OR << NV("ShouldInline", isInliningRecommended());
 }
 
+void MLInlineAdvice::updateCachedCallerFPI(FunctionAnalysisManager &FAM) const {
+  FPU->finish(FAM);
+}
+
 void MLInlineAdvice::recordInliningImpl() {
   ORE.emit([&]() {
     OptimizationRemark R(DEBUG_TYPE, "InliningSuccess", DLoc, Block);
@@ -397,6 +472,7 @@
 
 void MLInlineAdvice::recordUnsuccessfulInliningImpl(
     const InlineResult &Result) {
+  getAdvisor()->getCachedFPI(*Caller) = PreInlineCallerFPI;
   ORE.emit([&]() {
     OptimizationRemarkMissed R(DEBUG_TYPE, "InliningAttemptedAndUnsuccessful",
                                DLoc, Block);
@@ -405,6 +481,7 @@
   });
 }
 void MLInlineAdvice::recordUnattemptedInliningImpl() {
+  assert(!FPU);
   ORE.emit([&]() {
     OptimizationRemarkMissed R(DEBUG_TYPE, "IniningNotAttempted", DLoc, Block);
     reportContextForRemark(R);

diff --git a/src/llvm-project/llvm/lib/Analysis/MemDepPrinter.cpp b/src/llvm-project/llvm/lib/Analysis/MemDepPrinter.cpp
index 0064234..305ae3e 100644
--- a/src/llvm-project/llvm/lib/Analysis/MemDepPrinter.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/MemDepPrinter.cpp

@@ -15,7 +15,6 @@
 #include "llvm/Analysis/Passes.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"

diff --git a/src/llvm-project/llvm/lib/Analysis/MemDerefPrinter.cpp b/src/llvm-project/llvm/lib/Analysis/MemDerefPrinter.cpp
index 82617c7..4dd5c76 100644
--- a/src/llvm-project/llvm/lib/Analysis/MemDerefPrinter.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/MemDerefPrinter.cpp

@@ -9,14 +9,11 @@
 #include "llvm/Analysis/MemDerefPrinter.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/Passes.h"
-#include "llvm/IR/DataLayout.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;

diff --git a/src/llvm-project/llvm/lib/Analysis/MemoryBuiltins.cpp b/src/llvm-project/llvm/lib/Analysis/MemoryBuiltins.cpp
index 4b08e64..413ec6d 100644
--- a/src/llvm-project/llvm/lib/Analysis/MemoryBuiltins.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/MemoryBuiltins.cpp

@@ -17,7 +17,7 @@
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/TargetFolder.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/Utils/Local.h"
@@ -43,6 +43,8 @@
 #include <cassert>
 #include <cstdint>
 #include <iterator>
+#include <numeric>
+#include <type_traits>
 #include <utility>
 
 using namespace llvm;
@@ -62,6 +64,42 @@
   AnyAlloc           = AllocLike | ReallocLike
 };
 
+enum class MallocFamily {
+  Malloc,
+  CPPNew,             // new(unsigned int)
+  CPPNewAligned,      // new(unsigned int, align_val_t)
+  CPPNewArray,        // new[](unsigned int)
+  CPPNewArrayAligned, // new[](unsigned long, align_val_t)
+  MSVCNew,            // new(unsigned int)
+  MSVCArrayNew,       // new[](unsigned int)
+  VecMalloc,
+  KmpcAllocShared,
+};
+
+StringRef mangledNameForMallocFamily(const MallocFamily &Family) {
+  switch (Family) {
+  case MallocFamily::Malloc:
+    return "malloc";
+  case MallocFamily::CPPNew:
+    return "_Znwm";
+  case MallocFamily::CPPNewAligned:
+    return "_ZnwmSt11align_val_t";
+  case MallocFamily::CPPNewArray:
+    return "_Znam";
+  case MallocFamily::CPPNewArrayAligned:
+    return "_ZnamSt11align_val_t";
+  case MallocFamily::MSVCNew:
+    return "??2@YAPAXI@Z";
+  case MallocFamily::MSVCArrayNew:
+    return "??_U@YAPAXI@Z";
+  case MallocFamily::VecMalloc:
+    return "vec_malloc";
+  case MallocFamily::KmpcAllocShared:
+    return "__kmpc_alloc_shared";
+  }
+  llvm_unreachable("missing an alloc family");
+}
+
 struct AllocFnsTy {
   AllocType AllocTy;
   unsigned NumParams;
@@ -69,53 +107,49 @@
   int FstParam, SndParam;
   // Alignment parameter for aligned_alloc and aligned new
   int AlignParam;
+  // Name of default allocator function to group malloc/free calls by family
+  MallocFamily Family;
 };
 
+// clang-format off
 // FIXME: certain users need more information. E.g., SimplifyLibCalls needs to
 // know which functions are nounwind, noalias, nocapture parameters, etc.
 static const std::pair<LibFunc, AllocFnsTy> AllocationFnData[] = {
-    {LibFunc_malloc,                            {MallocLike,       1,  0, -1, -1}},
-    {LibFunc_vec_malloc,                        {MallocLike,       1,  0, -1, -1}},
-    {LibFunc_valloc,                            {MallocLike,       1,  0, -1, -1}},
-    {LibFunc_Znwj,                              {OpNewLike,        1,  0, -1, -1}}, // new(unsigned int)
-    {LibFunc_ZnwjRKSt9nothrow_t,                {MallocLike,       2,  0, -1, -1}}, // new(unsigned int, nothrow)
-    {LibFunc_ZnwjSt11align_val_t,               {OpNewLike,        2,  0, -1,  1}}, // new(unsigned int, align_val_t)
-    {LibFunc_ZnwjSt11align_val_tRKSt9nothrow_t, {MallocLike,       3,  0, -1,  1}}, // new(unsigned int, align_val_t, nothrow)
-    {LibFunc_Znwm,                              {OpNewLike,        1,  0, -1, -1}}, // new(unsigned long)
-    {LibFunc_ZnwmRKSt9nothrow_t,                {MallocLike,       2,  0, -1, -1}}, // new(unsigned long, nothrow)
-    {LibFunc_ZnwmSt11align_val_t,               {OpNewLike,        2,  0, -1,  1}}, // new(unsigned long, align_val_t)
-    {LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t, {MallocLike,       3,  0, -1,  1}}, // new(unsigned long, align_val_t, nothrow)
-    {LibFunc_Znaj,                              {OpNewLike,        1,  0, -1, -1}}, // new[](unsigned int)
-    {LibFunc_ZnajRKSt9nothrow_t,                {MallocLike,       2,  0, -1, -1}}, // new[](unsigned int, nothrow)
-    {LibFunc_ZnajSt11align_val_t,               {OpNewLike,        2,  0, -1,  1}}, // new[](unsigned int, align_val_t)
-    {LibFunc_ZnajSt11align_val_tRKSt9nothrow_t, {MallocLike,       3,  0, -1,  1}}, // new[](unsigned int, align_val_t, nothrow)
-    {LibFunc_Znam,                              {OpNewLike,        1,  0, -1, -1}}, // new[](unsigned long)
-    {LibFunc_ZnamRKSt9nothrow_t,                {MallocLike,       2,  0, -1, -1}}, // new[](unsigned long, nothrow)
-    {LibFunc_ZnamSt11align_val_t,               {OpNewLike,        2,  0, -1,  1}}, // new[](unsigned long, align_val_t)
-    {LibFunc_ZnamSt11align_val_tRKSt9nothrow_t, {MallocLike,       3,  0, -1,  1}}, // new[](unsigned long, align_val_t, nothrow)
-    {LibFunc_msvc_new_int,                      {OpNewLike,        1,  0, -1, -1}}, // new(unsigned int)
-    {LibFunc_msvc_new_int_nothrow,              {MallocLike,       2,  0, -1, -1}}, // new(unsigned int, nothrow)
-    {LibFunc_msvc_new_longlong,                 {OpNewLike,        1,  0, -1, -1}}, // new(unsigned long long)
-    {LibFunc_msvc_new_longlong_nothrow,         {MallocLike,       2,  0, -1, -1}}, // new(unsigned long long, nothrow)
-    {LibFunc_msvc_new_array_int,                {OpNewLike,        1,  0, -1, -1}}, // new[](unsigned int)
-    {LibFunc_msvc_new_array_int_nothrow,        {MallocLike,       2,  0, -1, -1}}, // new[](unsigned int, nothrow)
-    {LibFunc_msvc_new_array_longlong,           {OpNewLike,        1,  0, -1, -1}}, // new[](unsigned long long)
-    {LibFunc_msvc_new_array_longlong_nothrow,   {MallocLike,       2,  0, -1, -1}}, // new[](unsigned long long, nothrow)
-    {LibFunc_aligned_alloc,                     {AlignedAllocLike, 2,  1, -1,  0}},
-    {LibFunc_memalign,                          {AlignedAllocLike, 2,  1, -1,  0}},
-    {LibFunc_calloc,                            {CallocLike,       2,  0,  1, -1}},
-    {LibFunc_vec_calloc,                        {CallocLike,       2,  0,  1, -1}},
-    {LibFunc_realloc,                           {ReallocLike,      2,  1, -1, -1}},
-    {LibFunc_vec_realloc,                       {ReallocLike,      2,  1, -1, -1}},
-    {LibFunc_reallocf,                          {ReallocLike,      2,  1, -1, -1}},
-    {LibFunc_strdup,                            {StrDupLike,       1, -1, -1, -1}},
-    {LibFunc_strndup,                           {StrDupLike,       2,  1, -1, -1}},
-    {LibFunc___kmpc_alloc_shared,               {MallocLike,       1,  0, -1, -1}},
-    {LibFunc_rust_alloc,                        {MallocLike,       2,  0, -1,  1}},
-    {LibFunc_rust_alloc_zeroed,                 {CallocLike,       2,  0, -1,  1}},
-    {LibFunc_rust_realloc,                      {ReallocLike,      4,  3, -1,  2}},
-    // TODO: Handle "int posix_memalign(void **, size_t, size_t)"
+    {LibFunc_vec_malloc,                        {MallocLike,       1,  0, -1, -1, MallocFamily::VecMalloc}},
+    {LibFunc_Znwj,                              {OpNewLike,        1,  0, -1, -1, MallocFamily::CPPNew}},             // new(unsigned int)
+    {LibFunc_ZnwjRKSt9nothrow_t,                {MallocLike,       2,  0, -1, -1, MallocFamily::CPPNew}},             // new(unsigned int, nothrow)
+    {LibFunc_ZnwjSt11align_val_t,               {OpNewLike,        2,  0, -1,  1, MallocFamily::CPPNewAligned}},      // new(unsigned int, align_val_t)
+    {LibFunc_ZnwjSt11align_val_tRKSt9nothrow_t, {MallocLike,       3,  0, -1,  1, MallocFamily::CPPNewAligned}},      // new(unsigned int, align_val_t, nothrow)
+    {LibFunc_Znwm,                              {OpNewLike,        1,  0, -1, -1, MallocFamily::CPPNew}},             // new(unsigned long)
+    {LibFunc_ZnwmRKSt9nothrow_t,                {MallocLike,       2,  0, -1, -1, MallocFamily::CPPNew}},             // new(unsigned long, nothrow)
+    {LibFunc_ZnwmSt11align_val_t,               {OpNewLike,        2,  0, -1,  1, MallocFamily::CPPNewAligned}},      // new(unsigned long, align_val_t)
+    {LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t, {MallocLike,       3,  0, -1,  1, MallocFamily::CPPNewAligned}},      // new(unsigned long, align_val_t, nothrow)
+    {LibFunc_Znaj,                              {OpNewLike,        1,  0, -1, -1, MallocFamily::CPPNewArray}},        // new[](unsigned int)
+    {LibFunc_ZnajRKSt9nothrow_t,                {MallocLike,       2,  0, -1, -1, MallocFamily::CPPNewArray}},        // new[](unsigned int, nothrow)
+    {LibFunc_ZnajSt11align_val_t,               {OpNewLike,        2,  0, -1,  1, MallocFamily::CPPNewArrayAligned}}, // new[](unsigned int, align_val_t)
+    {LibFunc_ZnajSt11align_val_tRKSt9nothrow_t, {MallocLike,       3,  0, -1,  1, MallocFamily::CPPNewArrayAligned}}, // new[](unsigned int, align_val_t, nothrow)
+    {LibFunc_Znam,                              {OpNewLike,        1,  0, -1, -1, MallocFamily::CPPNewArray}},        // new[](unsigned long)
+    {LibFunc_ZnamRKSt9nothrow_t,                {MallocLike,       2,  0, -1, -1, MallocFamily::CPPNewArray}},        // new[](unsigned long, nothrow)
+    {LibFunc_ZnamSt11align_val_t,               {OpNewLike,        2,  0, -1,  1, MallocFamily::CPPNewArrayAligned}}, // new[](unsigned long, align_val_t)
+    {LibFunc_ZnamSt11align_val_tRKSt9nothrow_t, {MallocLike,       3,  0, -1,  1, MallocFamily::CPPNewArrayAligned}}, // new[](unsigned long, align_val_t, nothrow)
+    {LibFunc_msvc_new_int,                      {OpNewLike,        1,  0, -1, -1, MallocFamily::MSVCNew}},            // new(unsigned int)
+    {LibFunc_msvc_new_int_nothrow,              {MallocLike,       2,  0, -1, -1, MallocFamily::MSVCNew}},            // new(unsigned int, nothrow)
+    {LibFunc_msvc_new_longlong,                 {OpNewLike,        1,  0, -1, -1, MallocFamily::MSVCNew}},            // new(unsigned long long)
+    {LibFunc_msvc_new_longlong_nothrow,         {MallocLike,       2,  0, -1, -1, MallocFamily::MSVCNew}},            // new(unsigned long long, nothrow)
+    {LibFunc_msvc_new_array_int,                {OpNewLike,        1,  0, -1, -1, MallocFamily::MSVCArrayNew}},       // new[](unsigned int)
+    {LibFunc_msvc_new_array_int_nothrow,        {MallocLike,       2,  0, -1, -1, MallocFamily::MSVCArrayNew}},       // new[](unsigned int, nothrow)
+    {LibFunc_msvc_new_array_longlong,           {OpNewLike,        1,  0, -1, -1, MallocFamily::MSVCArrayNew}},       // new[](unsigned long long)
+    {LibFunc_msvc_new_array_longlong_nothrow,   {MallocLike,       2,  0, -1, -1, MallocFamily::MSVCArrayNew}},       // new[](unsigned long long, nothrow)
+    {LibFunc_memalign,                          {AlignedAllocLike, 2,  1, -1,  0, MallocFamily::Malloc}},
+    {LibFunc_vec_calloc,                        {CallocLike,       2,  0,  1, -1, MallocFamily::VecMalloc}},
+    {LibFunc_vec_realloc,                       {ReallocLike,      2,  1, -1, -1, MallocFamily::VecMalloc}},
+    {LibFunc_strdup,                            {StrDupLike,       1, -1, -1, -1, MallocFamily::Malloc}},
+    {LibFunc_dunder_strdup,                     {StrDupLike,       1, -1, -1, -1, MallocFamily::Malloc}},
+    {LibFunc_strndup,                           {StrDupLike,       2,  1, -1, -1, MallocFamily::Malloc}},
+    {LibFunc_dunder_strndup,                    {StrDupLike,       2,  1, -1, -1, MallocFamily::Malloc}},
+    {LibFunc___kmpc_alloc_shared,               {MallocLike,       1,  0, -1, -1, MallocFamily::KmpcAllocShared}},
 };
+// clang-format on
 
 static const Function *getCalledFunction(const Value *V,
                                          bool &IsNoBuiltin) {
@@ -139,6 +173,11 @@
 static Optional<AllocFnsTy>
 getAllocationDataForFunction(const Function *Callee, AllocType AllocTy,
                              const TargetLibraryInfo *TLI) {
+  // Don't perform a slow TLI lookup, if this function doesn't return a pointer
+  // and thus can't be an allocation function.
+  if (!Callee->getReturnType()->isPointerTy())
+    return None;
+
   // Make sure that the function is available.
   LibFunc TLIFn;
   if (!TLI || !TLI->getLibFunc(*Callee, TLIFn) || !TLI->has(TLIFn))
@@ -220,87 +259,117 @@
   Result.AllocTy = MallocLike;
   Result.NumParams = Callee->getNumOperands();
   Result.FstParam = Args.first;
-  Result.SndParam = Args.second.getValueOr(-1);
+  Result.SndParam = Args.second.value_or(-1);
   // Allocsize has no way to specify an alignment argument
   Result.AlignParam = -1;
   return Result;
 }
 
+static AllocFnKind getAllocFnKind(const Value *V) {
+  if (const auto *CB = dyn_cast<CallBase>(V)) {
+    Attribute Attr = CB->getFnAttr(Attribute::AllocKind);
+    if (Attr.isValid())
+      return AllocFnKind(Attr.getValueAsInt());
+  }
+  return AllocFnKind::Unknown;
+}
+
+static AllocFnKind getAllocFnKind(const Function *F) {
+  Attribute Attr = F->getFnAttribute(Attribute::AllocKind);
+  if (Attr.isValid())
+    return AllocFnKind(Attr.getValueAsInt());
+  return AllocFnKind::Unknown;
+}
+
+static bool checkFnAllocKind(const Value *V, AllocFnKind Wanted) {
+  return (getAllocFnKind(V) & Wanted) != AllocFnKind::Unknown;
+}
+
+static bool checkFnAllocKind(const Function *F, AllocFnKind Wanted) {
+  return (getAllocFnKind(F) & Wanted) != AllocFnKind::Unknown;
+}
+
 /// Tests if a value is a call or invoke to a library function that
 /// allocates or reallocates memory (either malloc, calloc, realloc, or strdup
 /// like).
 bool llvm::isAllocationFn(const Value *V, const TargetLibraryInfo *TLI) {
-  return getAllocationData(V, AnyAlloc, TLI).hasValue();
+  return getAllocationData(V, AnyAlloc, TLI).has_value() ||
+         checkFnAllocKind(V, AllocFnKind::Alloc | AllocFnKind::Realloc);
 }
 bool llvm::isAllocationFn(
-    const Value *V, function_ref<const TargetLibraryInfo &(Function &)> GetTLI) {
-  return getAllocationData(V, AnyAlloc, GetTLI).hasValue();
+    const Value *V,
+    function_ref<const TargetLibraryInfo &(Function &)> GetTLI) {
+  return getAllocationData(V, AnyAlloc, GetTLI).has_value() ||
+         checkFnAllocKind(V, AllocFnKind::Alloc | AllocFnKind::Realloc);
 }
 
 /// Tests if a value is a call or invoke to a library function that
 /// allocates uninitialized memory (such as malloc).
 static bool isMallocLikeFn(const Value *V, const TargetLibraryInfo *TLI) {
-  return getAllocationData(V, MallocOrOpNewLike, TLI).hasValue();
+  return getAllocationData(V, MallocOrOpNewLike, TLI).has_value();
 }
 
 /// Tests if a value is a call or invoke to a library function that
 /// allocates uninitialized memory with alignment (such as aligned_alloc).
 static bool isAlignedAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI) {
-  return getAllocationData(V, AlignedAllocLike, TLI)
-      .hasValue();
+  return getAllocationData(V, AlignedAllocLike, TLI).has_value();
 }
 
 /// Tests if a value is a call or invoke to a library function that
 /// allocates zero-filled memory (such as calloc).
 static bool isCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI) {
-  return getAllocationData(V, CallocLike, TLI).hasValue();
+  return getAllocationData(V, CallocLike, TLI).has_value();
 }
 
 /// Tests if a value is a call or invoke to a library function that
 /// allocates memory similar to malloc or calloc.
 bool llvm::isMallocOrCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI) {
-  return getAllocationData(V, MallocOrCallocLike, TLI).hasValue();
+  return getAllocationData(V, MallocOrCallocLike, TLI).has_value();
 }
 
 /// Tests if a value is a call or invoke to a library function that
 /// allocates memory (either malloc, calloc, or strdup like).
 bool llvm::isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI) {
-  return getAllocationData(V, AllocLike, TLI).hasValue();
-}
-
-/// Tests if a value is a call or invoke to a library function that
-/// reallocates memory (e.g., realloc).
-bool llvm::isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI) {
-  return getAllocationData(V, ReallocLike, TLI).hasValue();
+  return getAllocationData(V, AllocLike, TLI).has_value() ||
+         checkFnAllocKind(V, AllocFnKind::Alloc);
 }
 
 /// Tests if a functions is a call or invoke to a library function that
 /// reallocates memory (e.g., realloc).
 bool llvm::isReallocLikeFn(const Function *F, const TargetLibraryInfo *TLI) {
-  return getAllocationDataForFunction(F, ReallocLike, TLI).hasValue();
+  return getAllocationDataForFunction(F, ReallocLike, TLI).has_value() ||
+         checkFnAllocKind(F, AllocFnKind::Realloc);
 }
 
-bool llvm::isAllocRemovable(const CallBase *CB, const TargetLibraryInfo *TLI) {
-  assert(isAllocationFn(CB, TLI));
+Value *llvm::getReallocatedOperand(const CallBase *CB,
+                                   const TargetLibraryInfo *TLI) {
+  if (getAllocationData(CB, ReallocLike, TLI).has_value()) {
+    // All currently supported realloc functions reallocate the first argument.
+    return CB->getArgOperand(0);
+  }
+  if (checkFnAllocKind(CB, AllocFnKind::Realloc))
+    return CB->getArgOperandWithAttribute(Attribute::AllocatedPointer);
+  return nullptr;
+}
 
+bool llvm::isRemovableAlloc(const CallBase *CB, const TargetLibraryInfo *TLI) {
   // Note: Removability is highly dependent on the source language.  For
   // example, recent C++ requires direct calls to the global allocation
   // [basic.stc.dynamic.allocation] to be observable unless part of a new
   // expression [expr.new paragraph 13].
 
-  // Historically we've treated the C family allocation routines as removable
+  // Historically we've treated the C family allocation routines and operator
+  // new as removable
   return isAllocLikeFn(CB, TLI);
 }
 
 Value *llvm::getAllocAlignment(const CallBase *V,
                                const TargetLibraryInfo *TLI) {
-  assert(isAllocationFn(V, TLI));
-
   const Optional<AllocFnsTy> FnData = getAllocationData(V, AnyAlloc, TLI);
-  if (!FnData.hasValue() || FnData->AlignParam < 0) {
-    return nullptr;
+  if (FnData && FnData->AlignParam >= 0) {
+    return V->getOperand(FnData->AlignParam);
   }
-  return V->getOperand(FnData->AlignParam);
+  return V->getArgOperandWithAttribute(Attribute::AllocAlign);
 }
 
 /// When we're compiling N-bit code, and the user uses parameters that are
@@ -320,9 +389,8 @@
 }
 
 Optional<APInt>
-llvm::getAllocSize(const CallBase *CB,
-                   const TargetLibraryInfo *TLI,
-                   std::function<const Value*(const Value*)> Mapper) {
+llvm::getAllocSize(const CallBase *CB, const TargetLibraryInfo *TLI,
+                   function_ref<const Value *(const Value *)> Mapper) {
   // Note: This handles both explicitly listed allocation functions and
   // allocsize.  The code structure could stand to be cleaned up a bit.
   Optional<AllocFnsTy> FnData = getAllocationSize(CB, TLI);
@@ -347,7 +415,7 @@
       if (!Arg)
         return None;
 
-      APInt MaxSize = Arg->getValue().zextOrSelf(IntTyBits);
+      APInt MaxSize = Arg->getValue().zext(IntTyBits);
       if (Size.ugt(MaxSize))
         Size = MaxSize + 1;
     }
@@ -382,10 +450,12 @@
   return Size;
 }
 
-Constant *llvm::getInitialValueOfAllocation(const CallBase *Alloc,
+Constant *llvm::getInitialValueOfAllocation(const Value *V,
                                             const TargetLibraryInfo *TLI,
                                             Type *Ty) {
-  assert(isAllocationFn(Alloc, TLI));
+  auto *Alloc = dyn_cast<CallBase>(V);
+  if (!Alloc)
+    return nullptr;
 
   // malloc and aligned_alloc are uninitialized (undef)
   if (isMallocLikeFn(Alloc, TLI) || isAlignedAllocLikeFn(Alloc, TLI))
@@ -395,48 +465,99 @@
   if (isCallocLikeFn(Alloc, TLI))
     return Constant::getNullValue(Ty);
 
+  AllocFnKind AK = getAllocFnKind(Alloc);
+  if ((AK & AllocFnKind::Uninitialized) != AllocFnKind::Unknown)
+    return UndefValue::get(Ty);
+  if ((AK & AllocFnKind::Zeroed) != AllocFnKind::Unknown)
+    return Constant::getNullValue(Ty);
+
   return nullptr;
 }
 
+struct FreeFnsTy {
+  unsigned NumParams;
+  // Name of default allocator function to group malloc/free calls by family
+  MallocFamily Family;
+};
+
+// clang-format off
+static const std::pair<LibFunc, FreeFnsTy> FreeFnData[] = {
+    {LibFunc_vec_free,                           {1, MallocFamily::VecMalloc}},
+    {LibFunc_ZdlPv,                              {1, MallocFamily::CPPNew}},             // operator delete(void*)
+    {LibFunc_ZdaPv,                              {1, MallocFamily::CPPNewArray}},        // operator delete[](void*)
+    {LibFunc_msvc_delete_ptr32,                  {1, MallocFamily::MSVCNew}},            // operator delete(void*)
+    {LibFunc_msvc_delete_ptr64,                  {1, MallocFamily::MSVCNew}},            // operator delete(void*)
+    {LibFunc_msvc_delete_array_ptr32,            {1, MallocFamily::MSVCArrayNew}},       // operator delete[](void*)
+    {LibFunc_msvc_delete_array_ptr64,            {1, MallocFamily::MSVCArrayNew}},       // operator delete[](void*)
+    {LibFunc_ZdlPvj,                             {2, MallocFamily::CPPNew}},             // delete(void*, uint)
+    {LibFunc_ZdlPvm,                             {2, MallocFamily::CPPNew}},             // delete(void*, ulong)
+    {LibFunc_ZdlPvRKSt9nothrow_t,                {2, MallocFamily::CPPNew}},             // delete(void*, nothrow)
+    {LibFunc_ZdlPvSt11align_val_t,               {2, MallocFamily::CPPNewAligned}},      // delete(void*, align_val_t)
+    {LibFunc_ZdaPvj,                             {2, MallocFamily::CPPNewArray}},        // delete[](void*, uint)
+    {LibFunc_ZdaPvm,                             {2, MallocFamily::CPPNewArray}},        // delete[](void*, ulong)
+    {LibFunc_ZdaPvRKSt9nothrow_t,                {2, MallocFamily::CPPNewArray}},        // delete[](void*, nothrow)
+    {LibFunc_ZdaPvSt11align_val_t,               {2, MallocFamily::CPPNewArrayAligned}}, // delete[](void*, align_val_t)
+    {LibFunc_msvc_delete_ptr32_int,              {2, MallocFamily::MSVCNew}},            // delete(void*, uint)
+    {LibFunc_msvc_delete_ptr64_longlong,         {2, MallocFamily::MSVCNew}},            // delete(void*, ulonglong)
+    {LibFunc_msvc_delete_ptr32_nothrow,          {2, MallocFamily::MSVCNew}},            // delete(void*, nothrow)
+    {LibFunc_msvc_delete_ptr64_nothrow,          {2, MallocFamily::MSVCNew}},            // delete(void*, nothrow)
+    {LibFunc_msvc_delete_array_ptr32_int,        {2, MallocFamily::MSVCArrayNew}},       // delete[](void*, uint)
+    {LibFunc_msvc_delete_array_ptr64_longlong,   {2, MallocFamily::MSVCArrayNew}},       // delete[](void*, ulonglong)
+    {LibFunc_msvc_delete_array_ptr32_nothrow,    {2, MallocFamily::MSVCArrayNew}},       // delete[](void*, nothrow)
+    {LibFunc_msvc_delete_array_ptr64_nothrow,    {2, MallocFamily::MSVCArrayNew}},       // delete[](void*, nothrow)
+    {LibFunc___kmpc_free_shared,                 {2, MallocFamily::KmpcAllocShared}},    // OpenMP Offloading RTL free
+    {LibFunc_ZdlPvSt11align_val_tRKSt9nothrow_t, {3, MallocFamily::CPPNewAligned}},      // delete(void*, align_val_t, nothrow)
+    {LibFunc_ZdaPvSt11align_val_tRKSt9nothrow_t, {3, MallocFamily::CPPNewArrayAligned}}, // delete[](void*, align_val_t, nothrow)
+    {LibFunc_ZdlPvjSt11align_val_t,              {3, MallocFamily::CPPNewAligned}},      // delete(void*, unsigned int, align_val_t)
+    {LibFunc_ZdlPvmSt11align_val_t,              {3, MallocFamily::CPPNewAligned}},      // delete(void*, unsigned long, align_val_t)
+    {LibFunc_ZdaPvjSt11align_val_t,              {3, MallocFamily::CPPNewArrayAligned}}, // delete[](void*, unsigned int, align_val_t)
+    {LibFunc_ZdaPvmSt11align_val_t,              {3, MallocFamily::CPPNewArrayAligned}}, // delete[](void*, unsigned long, align_val_t)
+};
+// clang-format on
+
+Optional<FreeFnsTy> getFreeFunctionDataForFunction(const Function *Callee,
+                                                   const LibFunc TLIFn) {
+  const auto *Iter =
+      find_if(FreeFnData, [TLIFn](const std::pair<LibFunc, FreeFnsTy> &P) {
+        return P.first == TLIFn;
+      });
+  if (Iter == std::end(FreeFnData))
+    return None;
+  return Iter->second;
+}
+
+Optional<StringRef> llvm::getAllocationFamily(const Value *I,
+                                              const TargetLibraryInfo *TLI) {
+  bool IsNoBuiltin;
+  const Function *Callee = getCalledFunction(I, IsNoBuiltin);
+  if (Callee == nullptr || IsNoBuiltin)
+    return None;
+  LibFunc TLIFn;
+
+  if (TLI && TLI->getLibFunc(*Callee, TLIFn) && TLI->has(TLIFn)) {
+    // Callee is some known library function.
+    const auto AllocData = getAllocationDataForFunction(Callee, AnyAlloc, TLI);
+    if (AllocData)
+      return mangledNameForMallocFamily(AllocData.value().Family);
+    const auto FreeData = getFreeFunctionDataForFunction(Callee, TLIFn);
+    if (FreeData)
+      return mangledNameForMallocFamily(FreeData.value().Family);
+  }
+  // Callee isn't a known library function, still check attributes.
+  if (checkFnAllocKind(I, AllocFnKind::Free | AllocFnKind::Alloc |
+                              AllocFnKind::Realloc)) {
+    Attribute Attr = cast<CallBase>(I)->getFnAttr("alloc-family");
+    if (Attr.isValid())
+      return Attr.getValueAsString();
+  }
+  return None;
+}
+
 /// isLibFreeFunction - Returns true if the function is a builtin free()
 bool llvm::isLibFreeFunction(const Function *F, const LibFunc TLIFn) {
-  unsigned ExpectedNumParams;
-  if (TLIFn == LibFunc_free ||
-      TLIFn == LibFunc_ZdlPv || // operator delete(void*)
-      TLIFn == LibFunc_ZdaPv || // operator delete[](void*)
-      TLIFn == LibFunc_msvc_delete_ptr32 || // operator delete(void*)
-      TLIFn == LibFunc_msvc_delete_ptr64 || // operator delete(void*)
-      TLIFn == LibFunc_msvc_delete_array_ptr32 || // operator delete[](void*)
-      TLIFn == LibFunc_msvc_delete_array_ptr64)   // operator delete[](void*)
-    ExpectedNumParams = 1;
-  else if (TLIFn == LibFunc_ZdlPvj ||              // delete(void*, uint)
-           TLIFn == LibFunc_ZdlPvm ||              // delete(void*, ulong)
-           TLIFn == LibFunc_ZdlPvRKSt9nothrow_t || // delete(void*, nothrow)
-           TLIFn == LibFunc_ZdlPvSt11align_val_t || // delete(void*, align_val_t)
-           TLIFn == LibFunc_ZdaPvj ||              // delete[](void*, uint)
-           TLIFn == LibFunc_ZdaPvm ||              // delete[](void*, ulong)
-           TLIFn == LibFunc_ZdaPvRKSt9nothrow_t || // delete[](void*, nothrow)
-           TLIFn == LibFunc_ZdaPvSt11align_val_t || // delete[](void*, align_val_t)
-           TLIFn == LibFunc_msvc_delete_ptr32_int ||      // delete(void*, uint)
-           TLIFn == LibFunc_msvc_delete_ptr64_longlong || // delete(void*, ulonglong)
-           TLIFn == LibFunc_msvc_delete_ptr32_nothrow || // delete(void*, nothrow)
-           TLIFn == LibFunc_msvc_delete_ptr64_nothrow || // delete(void*, nothrow)
-           TLIFn == LibFunc_msvc_delete_array_ptr32_int ||      // delete[](void*, uint)
-           TLIFn == LibFunc_msvc_delete_array_ptr64_longlong || // delete[](void*, ulonglong)
-           TLIFn == LibFunc_msvc_delete_array_ptr32_nothrow || // delete[](void*, nothrow)
-           TLIFn == LibFunc_msvc_delete_array_ptr64_nothrow || // delete[](void*, nothrow)
-           TLIFn == LibFunc___kmpc_free_shared) // OpenMP Offloading RTL free
-    ExpectedNumParams = 2;
-  else if (TLIFn == LibFunc_ZdaPvSt11align_val_tRKSt9nothrow_t || // delete(void*, align_val_t, nothrow)
-           TLIFn == LibFunc_ZdlPvSt11align_val_tRKSt9nothrow_t || // delete[](void*, align_val_t, nothrow)
-           TLIFn == LibFunc_ZdlPvjSt11align_val_t || // delete(void*, unsigned long, align_val_t)
-           TLIFn == LibFunc_ZdlPvmSt11align_val_t || // delete(void*, unsigned long, align_val_t)
-           TLIFn == LibFunc_ZdaPvjSt11align_val_t || // delete[](void*, unsigned int, align_val_t)
-           TLIFn == LibFunc_ZdaPvmSt11align_val_t || // delete[](void*, unsigned long, align_val_t)
-           TLIFn == LibFunc_rust_dealloc)
-    ExpectedNumParams = 3;
-  else
-    return false;
+  Optional<FreeFnsTy> FnData = getFreeFunctionDataForFunction(F, TLIFn);
+  if (!FnData)
+    return checkFnAllocKind(F, AllocFnKind::Free);
 
   // Check free prototype.
   // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin
@@ -444,7 +565,7 @@
   FunctionType *FTy = F->getFunctionType();
   if (!FTy->getReturnType()->isVoidTy())
     return false;
-  if (FTy->getNumParams() != ExpectedNumParams)
+  if (FTy->getNumParams() != FnData->NumParams)
     return false;
   if (FTy->getParamType(0) != Type::getInt8PtrTy(F->getContext()))
     return false;
@@ -452,21 +573,25 @@
   return true;
 }
 
-/// isFreeCall - Returns non-null if the value is a call to the builtin free()
-const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
+Value *llvm::getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI) {
   bool IsNoBuiltinCall;
-  const Function *Callee = getCalledFunction(I, IsNoBuiltinCall);
+  const Function *Callee = getCalledFunction(CB, IsNoBuiltinCall);
   if (Callee == nullptr || IsNoBuiltinCall)
     return nullptr;
 
   LibFunc TLIFn;
-  if (!TLI || !TLI->getLibFunc(*Callee, TLIFn) || !TLI->has(TLIFn))
-    return nullptr;
+  if (TLI && TLI->getLibFunc(*Callee, TLIFn) && TLI->has(TLIFn) &&
+      isLibFreeFunction(Callee, TLIFn)) {
+    // All currently supported free functions free the first argument.
+    return CB->getArgOperand(0);
+  }
 
-  return isLibFreeFunction(Callee, TLIFn) ? dyn_cast<CallInst>(I) : nullptr;
+  if (checkFnAllocKind(CB, AllocFnKind::Free))
+    return CB->getArgOperandWithAttribute(Attribute::AllocatedPointer);
+
+  return nullptr;
 }
 
-
 //===----------------------------------------------------------------------===//
 //  Utility functions to compute size of objects.
 //
@@ -495,11 +620,21 @@
                                  const DataLayout &DL,
                                  const TargetLibraryInfo *TLI,
                                  bool MustSucceed) {
+  return lowerObjectSizeCall(ObjectSize, DL, TLI, /*AAResults=*/nullptr,
+                             MustSucceed);
+}
+
+Value *llvm::lowerObjectSizeCall(IntrinsicInst *ObjectSize,
+                                 const DataLayout &DL,
+                                 const TargetLibraryInfo *TLI, AAResults *AA,
+                                 bool MustSucceed) {
   assert(ObjectSize->getIntrinsicID() == Intrinsic::objectsize &&
          "ObjectSize must be a call to llvm.objectsize!");
 
   bool MaxVal = cast<ConstantInt>(ObjectSize->getArgOperand(1))->isZero();
   ObjectSizeOpts EvalOptions;
+  EvalOptions.AA = AA;
+
   // Unless we have to fold this to something, try to be as accurate as
   // possible.
   if (MustSucceed)
@@ -563,7 +698,7 @@
 
 APInt ObjectSizeOffsetVisitor::align(APInt Size, MaybeAlign Alignment) {
   if (Options.RoundToAlign && Alignment)
-    return APInt(IntTyBits, alignTo(Size.getZExtValue(), Alignment));
+    return APInt(IntTyBits, alignTo(Size.getZExtValue(), *Alignment));
   return Size;
 }
 
@@ -645,10 +780,10 @@
   if (!I.getAllocatedType()->isSized())
     return unknown();
 
-  if (isa<ScalableVectorType>(I.getAllocatedType()))
+  TypeSize ElemSize = DL.getTypeAllocSize(I.getAllocatedType());
+  if (ElemSize.isScalable() && Options.EvalMode != ObjectSizeOpts::Mode::Min)
     return unknown();
-
-  APInt Size(IntTyBits, DL.getTypeAllocSize(I.getAllocatedType()));
+  APInt Size(IntTyBits, ElemSize.getKnownMinSize());
   if (!I.isArrayAllocation())
     return std::make_pair(align(Size, I.getAlign()), Zero);
 
@@ -679,8 +814,7 @@
 }
 
 SizeOffsetType ObjectSizeOffsetVisitor::visitCallBase(CallBase &CB) {
-  auto Mapper = [](const Value *V) { return V; };
-  if (Optional<APInt> Size = getAllocSize(&CB, TLI, Mapper))
+  if (Optional<APInt> Size = getAllocSize(&CB, TLI))
     return std::make_pair(*Size, Zero);
   return unknown();
 }
@@ -729,42 +863,161 @@
   return unknown();
 }
 
-SizeOffsetType ObjectSizeOffsetVisitor::visitLoadInst(LoadInst&) {
-  ++ObjectVisitorLoad;
-  return unknown();
+SizeOffsetType ObjectSizeOffsetVisitor::findLoadSizeOffset(
+    LoadInst &Load, BasicBlock &BB, BasicBlock::iterator From,
+    SmallDenseMap<BasicBlock *, SizeOffsetType, 8> &VisitedBlocks,
+    unsigned &ScannedInstCount) {
+  constexpr unsigned MaxInstsToScan = 128;
+
+  auto Where = VisitedBlocks.find(&BB);
+  if (Where != VisitedBlocks.end())
+    return Where->second;
+
+  auto Unknown = [this, &BB, &VisitedBlocks]() {
+    return VisitedBlocks[&BB] = unknown();
+  };
+  auto Known = [&BB, &VisitedBlocks](SizeOffsetType SO) {
+    return VisitedBlocks[&BB] = SO;
+  };
+
+  do {
+    Instruction &I = *From;
+
+    if (I.isDebugOrPseudoInst())
+      continue;
+
+    if (++ScannedInstCount > MaxInstsToScan)
+      return Unknown();
+
+    if (!I.mayWriteToMemory())
+      continue;
+
+    if (auto *SI = dyn_cast<StoreInst>(&I)) {
+      AliasResult AR =
+          Options.AA->alias(SI->getPointerOperand(), Load.getPointerOperand());
+      switch ((AliasResult::Kind)AR) {
+      case AliasResult::NoAlias:
+        continue;
+      case AliasResult::MustAlias:
+        if (SI->getValueOperand()->getType()->isPointerTy())
+          return Known(compute(SI->getValueOperand()));
+        else
+          return Unknown(); // No handling of non-pointer values by `compute`.
+      default:
+        return Unknown();
+      }
+    }
+
+    if (auto *CB = dyn_cast<CallBase>(&I)) {
+      Function *Callee = CB->getCalledFunction();
+      // Bail out on indirect call.
+      if (!Callee)
+        return Unknown();
+
+      LibFunc TLIFn;
+      if (!TLI || !TLI->getLibFunc(*CB->getCalledFunction(), TLIFn) ||
+          !TLI->has(TLIFn))
+        return Unknown();
+
+      // TODO: There's probably more interesting case to support here.
+      if (TLIFn != LibFunc_posix_memalign)
+        return Unknown();
+
+      AliasResult AR =
+          Options.AA->alias(CB->getOperand(0), Load.getPointerOperand());
+      switch ((AliasResult::Kind)AR) {
+      case AliasResult::NoAlias:
+        continue;
+      case AliasResult::MustAlias:
+        break;
+      default:
+        return Unknown();
+      }
+
+      // Is the error status of posix_memalign correctly checked? If not it
+      // would be incorrect to assume it succeeds and load doesn't see the
+      // previous value.
+      Optional<bool> Checked = isImpliedByDomCondition(
+          ICmpInst::ICMP_EQ, CB, ConstantInt::get(CB->getType(), 0), &Load, DL);
+      if (!Checked || !*Checked)
+        return Unknown();
+
+      Value *Size = CB->getOperand(2);
+      auto *C = dyn_cast<ConstantInt>(Size);
+      if (!C)
+        return Unknown();
+
+      return Known({C->getValue(), APInt(C->getValue().getBitWidth(), 0)});
+    }
+
+    return Unknown();
+  } while (From-- != BB.begin());
+
+  SmallVector<SizeOffsetType> PredecessorSizeOffsets;
+  for (auto *PredBB : predecessors(&BB)) {
+    PredecessorSizeOffsets.push_back(findLoadSizeOffset(
+        Load, *PredBB, BasicBlock::iterator(PredBB->getTerminator()),
+        VisitedBlocks, ScannedInstCount));
+    if (!bothKnown(PredecessorSizeOffsets.back()))
+      return Unknown();
+  }
+
+  if (PredecessorSizeOffsets.empty())
+    return Unknown();
+
+  return Known(std::accumulate(PredecessorSizeOffsets.begin() + 1,
+                               PredecessorSizeOffsets.end(),
+                               PredecessorSizeOffsets.front(),
+                               [this](SizeOffsetType LHS, SizeOffsetType RHS) {
+                                 return combineSizeOffset(LHS, RHS);
+                               }));
 }
 
-SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode&) {
-  // too complex to analyze statically.
-  return unknown();
+SizeOffsetType ObjectSizeOffsetVisitor::visitLoadInst(LoadInst &LI) {
+  if (!Options.AA) {
+    ++ObjectVisitorLoad;
+    return unknown();
+  }
+
+  SmallDenseMap<BasicBlock *, SizeOffsetType, 8> VisitedBlocks;
+  unsigned ScannedInstCount = 0;
+  SizeOffsetType SO =
+      findLoadSizeOffset(LI, *LI.getParent(), BasicBlock::iterator(LI),
+                         VisitedBlocks, ScannedInstCount);
+  if (!bothKnown(SO))
+    ++ObjectVisitorLoad;
+  return SO;
+}
+
+SizeOffsetType ObjectSizeOffsetVisitor::combineSizeOffset(SizeOffsetType LHS,
+                                                          SizeOffsetType RHS) {
+  if (!bothKnown(LHS) || !bothKnown(RHS))
+    return unknown();
+
+  switch (Options.EvalMode) {
+  case ObjectSizeOpts::Mode::Min:
+    return (getSizeWithOverflow(LHS).slt(getSizeWithOverflow(RHS))) ? LHS : RHS;
+  case ObjectSizeOpts::Mode::Max:
+    return (getSizeWithOverflow(LHS).sgt(getSizeWithOverflow(RHS))) ? LHS : RHS;
+  case ObjectSizeOpts::Mode::Exact:
+    return (getSizeWithOverflow(LHS).eq(getSizeWithOverflow(RHS))) ? LHS
+                                                                   : unknown();
+  }
+  llvm_unreachable("missing an eval mode");
+}
+
+SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode &PN) {
+  auto IncomingValues = PN.incoming_values();
+  return std::accumulate(IncomingValues.begin() + 1, IncomingValues.end(),
+                         compute(*IncomingValues.begin()),
+                         [this](SizeOffsetType LHS, Value *VRHS) {
+                           return combineSizeOffset(LHS, compute(VRHS));
+                         });
 }
 
 SizeOffsetType ObjectSizeOffsetVisitor::visitSelectInst(SelectInst &I) {
-  SizeOffsetType TrueSide  = compute(I.getTrueValue());
-  SizeOffsetType FalseSide = compute(I.getFalseValue());
-  if (bothKnown(TrueSide) && bothKnown(FalseSide)) {
-    if (TrueSide == FalseSide) {
-        return TrueSide;
-    }
-
-    APInt TrueResult = getSizeWithOverflow(TrueSide);
-    APInt FalseResult = getSizeWithOverflow(FalseSide);
-
-    if (TrueResult == FalseResult) {
-      return TrueSide;
-    }
-    if (Options.EvalMode == ObjectSizeOpts::Mode::Min) {
-      if (TrueResult.slt(FalseResult))
-        return TrueSide;
-      return FalseSide;
-    }
-    if (Options.EvalMode == ObjectSizeOpts::Mode::Max) {
-      if (TrueResult.sgt(FalseResult))
-        return TrueSide;
-      return FalseSide;
-    }
-  }
-  return unknown();
+  return combineSizeOffset(compute(I.getTrueValue()),
+                           compute(I.getFalseValue()));
 }
 
 SizeOffsetType ObjectSizeOffsetVisitor::visitUndefValue(UndefValue&) {
@@ -809,7 +1062,7 @@
 
     // Erase any instructions we inserted as part of the traversal.
     for (Instruction *I : InsertedInstructions) {
-      I->replaceAllUsesWith(UndefValue::get(I->getType()));
+      I->replaceAllUsesWith(PoisonValue::get(I->getType()));
       I->eraseFromParent();
     }
   }
@@ -938,7 +1191,7 @@
   return unknown();
 }
 
-SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitLoadInst(LoadInst&) {
+SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitLoadInst(LoadInst &LI) {
   return unknown();
 }
 
@@ -956,10 +1209,10 @@
     SizeOffsetEvalType EdgeData = compute_(PHI.getIncomingValue(i));
 
     if (!bothKnown(EdgeData)) {
-      OffsetPHI->replaceAllUsesWith(UndefValue::get(IntTy));
+      OffsetPHI->replaceAllUsesWith(PoisonValue::get(IntTy));
       OffsetPHI->eraseFromParent();
       InsertedInstructions.erase(OffsetPHI);
-      SizePHI->replaceAllUsesWith(UndefValue::get(IntTy));
+      SizePHI->replaceAllUsesWith(PoisonValue::get(IntTy));
       SizePHI->eraseFromParent();
       InsertedInstructions.erase(SizePHI);
       return unknown();

diff --git a/src/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/src/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index 36df462..fce9d5b 100644
--- a/src/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp

@@ -27,11 +27,7 @@
 #include "llvm/Analysis/PhiValues.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InstrTypes.h"
@@ -44,7 +40,6 @@
 #include "llvm/IR/PredIteratorCache.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Use.h"
-#include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
@@ -53,10 +48,8 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
 #include <algorithm>
 #include <cassert>
-#include <cstdint>
 #include <iterator>
 #include <utility>
 
@@ -146,10 +139,12 @@
     return ModRefInfo::ModRef;
   }
 
-  if (const CallInst *CI = isFreeCall(Inst, &TLI)) {
-    // calls to free() deallocate the entire structure
-    Loc = MemoryLocation::getAfter(CI->getArgOperand(0));
-    return ModRefInfo::Mod;
+  if (const CallBase *CB = dyn_cast<CallBase>(Inst)) {
+    if (Value *FreedOp = getFreedOperand(CB, &TLI)) {
+      // calls to free() deallocate the entire structure
+      Loc = MemoryLocation::getAfter(FreedOp);
+      return ModRefInfo::Mod;
+    }
   }
 
   if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
@@ -414,20 +409,17 @@
       isInvariantLoad = true;
   }
 
-  // Return "true" if and only if the instruction I is either a non-simple
-  // load or a non-simple store.
-  auto isNonSimpleLoadOrStore = [](Instruction *I) -> bool {
+  // True for volatile instruction.
+  // For Load/Store return true if atomic ordering is stronger than AO,
+  // for other instruction just true if it can read or write to memory.
+  auto isComplexForReordering = [](Instruction * I, AtomicOrdering AO)->bool {
+    if (I->isVolatile())
+      return true;
     if (auto *LI = dyn_cast<LoadInst>(I))
-      return !LI->isSimple();
+      return isStrongerThan(LI->getOrdering(), AO);
     if (auto *SI = dyn_cast<StoreInst>(I))
-      return !SI->isSimple();
-    return false;
-  };
-
-  // Return "true" if I is not a load and not a store, but it does access
-  // memory.
-  auto isOtherMemAccess = [](Instruction *I) -> bool {
-    return !isa<LoadInst>(I) && !isa<StoreInst>(I) && I->mayReadOrWriteMemory();
+      return isStrongerThan(SI->getOrdering(), AO);
+    return I->mayReadOrWriteMemory();
   };
 
   // Walk backwards through the basic block, looking for dependencies.
@@ -500,8 +492,8 @@
       // atomic.
       // FIXME: This is overly conservative.
       if (LI->isAtomic() && isStrongerThanUnordered(LI->getOrdering())) {
-        if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
-            isOtherMemAccess(QueryInst))
+        if (!QueryInst ||
+            isComplexForReordering(QueryInst, AtomicOrdering::NotAtomic))
           return MemDepResult::getClobber(LI);
         if (LI->getOrdering() != AtomicOrdering::Monotonic)
           return MemDepResult::getClobber(LI);
@@ -512,10 +504,10 @@
       // If we found a pointer, check if it could be the same as our pointer.
       AliasResult R = BatchAA.alias(LoadLoc, MemLoc);
 
-      if (isLoad) {
-        if (R == AliasResult::NoAlias)
-          continue;
+      if (R == AliasResult::NoAlias)
+        continue;
 
+      if (isLoad) {
         // Must aliased loads are defs of each other.
         if (R == AliasResult::MustAlias)
           return MemDepResult::getDef(Inst);
@@ -532,10 +524,6 @@
         continue;
       }
 
-      // Stores don't depend on other no-aliased accesses.
-      if (R == AliasResult::NoAlias)
-        continue;
-
       // Stores don't alias loads from read-only memory.
       if (BatchAA.pointsToConstantMemory(LoadLoc))
         continue;
@@ -549,20 +537,25 @@
       // A Monotonic store is OK if the query inst is itself not atomic.
       // FIXME: This is overly conservative.
       if (!SI->isUnordered() && SI->isAtomic()) {
-        if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
-            isOtherMemAccess(QueryInst))
+        if (!QueryInst ||
+            isComplexForReordering(QueryInst, AtomicOrdering::Unordered))
           return MemDepResult::getClobber(SI);
-        if (SI->getOrdering() != AtomicOrdering::Monotonic)
-          return MemDepResult::getClobber(SI);
+        // Ok, if we are here the guard above guarantee us that
+        // QueryInst is a non-atomic or unordered load/store.
+        // SI is atomic with monotonic or release semantic (seq_cst for store
+        // is actually a release semantic plus total order over other seq_cst
+        // instructions, as soon as QueryInst is not seq_cst we can consider it
+        // as simple release semantic).
+        // Monotonic and Release semantic allows re-ordering before store
+        // so we are safe to go further and check the aliasing. It will prohibit
+        // re-ordering in case locations are may or must alias.
       }
 
-      // FIXME: this is overly conservative.
       // While volatile access cannot be eliminated, they do not have to clobber
       // non-aliasing locations, as normal accesses can for example be reordered
       // with volatile accesses.
       if (SI->isVolatile())
-        if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
-            isOtherMemAccess(QueryInst))
+        if (!QueryInst || QueryInst->isVolatile())
           return MemDepResult::getClobber(SI);
 
       // If alias analysis can tell that this store is guaranteed to not modify
@@ -743,8 +736,6 @@
     llvm::sort(Cache);
 
     ++NumCacheDirtyNonLocal;
-    // cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: "
-    //     << Cache.size() << " cached: " << *QueryInst;
   } else {
     // Seed DirtyBlocks with each of the preds of QueryInst's block.
     BasicBlock *QueryBB = QueryCall->getParent();
@@ -1204,7 +1195,6 @@
     // If we do process a large number of blocks it becomes very expensive and
     // likely it isn't worth worrying about
     if (Result.size() > NumResultsLimit) {
-      Worklist.clear();
       // Sort it now (if needed) so that recursive invocations of
       // getNonLocalPointerDepFromBB and other routines that could reuse the
       // cache value will only see properly sorted cache arrays.

diff --git a/src/llvm-project/llvm/lib/Analysis/MemoryLocation.cpp b/src/llvm-project/llvm/lib/Analysis/MemoryLocation.cpp
index a877b19..2ed3222 100644
--- a/src/llvm-project/llvm/lib/Analysis/MemoryLocation.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/MemoryLocation.cpp

@@ -8,12 +8,10 @@
 
 #include "llvm/Analysis/MemoryLocation.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/IntrinsicsARM.h"
-#include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
 using namespace llvm;

diff --git a/src/llvm-project/llvm/lib/Analysis/MemoryProfileInfo.cpp b/src/llvm-project/llvm/lib/Analysis/MemoryProfileInfo.cpp
new file mode 100644
index 0000000..3d11cb8
--- /dev/null
+++ b/src/llvm-project/llvm/lib/Analysis/MemoryProfileInfo.cpp

@@ -0,0 +1,226 @@
+//===-- MemoryProfileInfo.cpp - memory profile info ------------------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains utilities to analyze memory profile information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/MemoryProfileInfo.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+using namespace llvm::memprof;
+
+#define DEBUG_TYPE "memory-profile-info"
+
+// Upper bound on accesses per byte for marking an allocation cold.
+cl::opt<float> MemProfAccessesPerByteColdThreshold(
+    "memprof-accesses-per-byte-cold-threshold", cl::init(10.0), cl::Hidden,
+    cl::desc("The threshold the accesses per byte must be under to consider "
+             "an allocation cold"));
+
+// Lower bound on lifetime to mark an allocation cold (in addition to accesses
+// per byte above). This is to avoid pessimizing short lived objects.
+cl::opt<unsigned> MemProfMinLifetimeColdThreshold(
+    "memprof-min-lifetime-cold-threshold", cl::init(200), cl::Hidden,
+    cl::desc("The minimum lifetime (s) for an allocation to be considered "
+             "cold"));
+
+AllocationType llvm::memprof::getAllocType(uint64_t MaxAccessCount,
+                                           uint64_t MinSize,
+                                           uint64_t MinLifetime) {
+  if (((float)MaxAccessCount) / MinSize < MemProfAccessesPerByteColdThreshold &&
+      // MinLifetime is expected to be in ms, so convert the threshold to ms.
+      MinLifetime >= MemProfMinLifetimeColdThreshold * 1000)
+    return AllocationType::Cold;
+  return AllocationType::NotCold;
+}
+
+MDNode *llvm::memprof::buildCallstackMetadata(ArrayRef<uint64_t> CallStack,
+                                              LLVMContext &Ctx) {
+  std::vector<Metadata *> StackVals;
+  for (auto Id : CallStack) {
+    auto *StackValMD =
+        ValueAsMetadata::get(ConstantInt::get(Type::getInt64Ty(Ctx), Id));
+    StackVals.push_back(StackValMD);
+  }
+  return MDNode::get(Ctx, StackVals);
+}
+
+MDNode *llvm::memprof::getMIBStackNode(const MDNode *MIB) {
+  assert(MIB->getNumOperands() == 2);
+  // The stack metadata is the first operand of each memprof MIB metadata.
+  return cast<MDNode>(MIB->getOperand(0));
+}
+
+AllocationType llvm::memprof::getMIBAllocType(const MDNode *MIB) {
+  assert(MIB->getNumOperands() == 2);
+  // The allocation type is currently the second operand of each memprof
+  // MIB metadata. This will need to change as we add additional allocation
+  // types that can be applied based on the allocation profile data.
+  auto *MDS = dyn_cast<MDString>(MIB->getOperand(1));
+  assert(MDS);
+  if (MDS->getString().equals("cold"))
+    return AllocationType::Cold;
+  return AllocationType::NotCold;
+}
+
+static std::string getAllocTypeAttributeString(AllocationType Type) {
+  switch (Type) {
+  case AllocationType::NotCold:
+    return "notcold";
+    break;
+  case AllocationType::Cold:
+    return "cold";
+    break;
+  default:
+    assert(false && "Unexpected alloc type");
+  }
+  llvm_unreachable("invalid alloc type");
+}
+
+static void addAllocTypeAttribute(LLVMContext &Ctx, CallBase *CI,
+                                  AllocationType AllocType) {
+  auto AllocTypeString = getAllocTypeAttributeString(AllocType);
+  auto A = llvm::Attribute::get(Ctx, "memprof", AllocTypeString);
+  CI->addFnAttr(A);
+}
+
+static bool hasSingleAllocType(uint8_t AllocTypes) {
+  const unsigned NumAllocTypes = countPopulation(AllocTypes);
+  assert(NumAllocTypes != 0);
+  return NumAllocTypes == 1;
+}
+
+void CallStackTrie::addCallStack(AllocationType AllocType,
+                                 ArrayRef<uint64_t> StackIds) {
+  bool First = true;
+  CallStackTrieNode *Curr = nullptr;
+  for (auto StackId : StackIds) {
+    // If this is the first stack frame, add or update alloc node.
+    if (First) {
+      First = false;
+      if (Alloc) {
+        assert(AllocStackId == StackId);
+        Alloc->AllocTypes |= static_cast<uint8_t>(AllocType);
+      } else {
+        AllocStackId = StackId;
+        Alloc = new CallStackTrieNode(AllocType);
+      }
+      Curr = Alloc;
+      continue;
+    }
+    // Update existing caller node if it exists.
+    auto Next = Curr->Callers.find(StackId);
+    if (Next != Curr->Callers.end()) {
+      Curr = Next->second;
+      Curr->AllocTypes |= static_cast<uint8_t>(AllocType);
+      continue;
+    }
+    // Otherwise add a new caller node.
+    auto *New = new CallStackTrieNode(AllocType);
+    Curr->Callers[StackId] = New;
+    Curr = New;
+  }
+  assert(Curr);
+}
+
+void CallStackTrie::addCallStack(MDNode *MIB) {
+  MDNode *StackMD = getMIBStackNode(MIB);
+  assert(StackMD);
+  std::vector<uint64_t> CallStack;
+  CallStack.reserve(StackMD->getNumOperands());
+  for (auto &MIBStackIter : StackMD->operands()) {
+    auto *StackId = mdconst::dyn_extract<ConstantInt>(MIBStackIter);
+    assert(StackId);
+    CallStack.push_back(StackId->getZExtValue());
+  }
+  addCallStack(getMIBAllocType(MIB), CallStack);
+}
+
+static MDNode *createMIBNode(LLVMContext &Ctx,
+                             std::vector<uint64_t> &MIBCallStack,
+                             AllocationType AllocType) {
+  std::vector<Metadata *> MIBPayload(
+      {buildCallstackMetadata(MIBCallStack, Ctx)});
+  MIBPayload.push_back(
+      MDString::get(Ctx, getAllocTypeAttributeString(AllocType)));
+  return MDNode::get(Ctx, MIBPayload);
+}
+
+// Recursive helper to trim contexts and create metadata nodes.
+// Caller should have pushed Node's loc to MIBCallStack. Doing this in the
+// caller makes it simpler to handle the many early returns in this method.
+bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx,
+                                  std::vector<uint64_t> &MIBCallStack,
+                                  std::vector<Metadata *> &MIBNodes,
+                                  bool CalleeHasAmbiguousCallerContext) {
+  // Trim context below the first node in a prefix with a single alloc type.
+  // Add an MIB record for the current call stack prefix.
+  if (hasSingleAllocType(Node->AllocTypes)) {
+    MIBNodes.push_back(
+        createMIBNode(Ctx, MIBCallStack, (AllocationType)Node->AllocTypes));
+    return true;
+  }
+
+  // We don't have a single allocation for all the contexts sharing this prefix,
+  // so recursively descend into callers in trie.
+  if (!Node->Callers.empty()) {
+    bool NodeHasAmbiguousCallerContext = Node->Callers.size() > 1;
+    bool AddedMIBNodesForAllCallerContexts = true;
+    for (auto &Caller : Node->Callers) {
+      MIBCallStack.push_back(Caller.first);
+      AddedMIBNodesForAllCallerContexts &=
+          buildMIBNodes(Caller.second, Ctx, MIBCallStack, MIBNodes,
+                        NodeHasAmbiguousCallerContext);
+      // Remove Caller.
+      MIBCallStack.pop_back();
+    }
+    if (AddedMIBNodesForAllCallerContexts)
+      return true;
+    // We expect that the callers should be forced to add MIBs to disambiguate
+    // the context in this case (see below).
+    assert(!NodeHasAmbiguousCallerContext);
+  }
+
+  // If we reached here, then this node does not have a single allocation type,
+  // and we didn't add metadata for a longer call stack prefix including any of
+  // Node's callers. That means we never hit a single allocation type along all
+  // call stacks with this prefix. This can happen due to recursion collapsing
+  // or the stack being deeper than tracked by the profiler runtime, leading to
+  // contexts with different allocation types being merged. In that case, we
+  // trim the context just below the deepest context split, which is this
+  // node if the callee has an ambiguous caller context (multiple callers),
+  // since the recursive calls above returned false. Conservatively give it
+  // non-cold allocation type.
+  if (!CalleeHasAmbiguousCallerContext)
+    return false;
+  MIBNodes.push_back(createMIBNode(Ctx, MIBCallStack, AllocationType::NotCold));
+  return true;
+}
+
+// Build and attach the minimal necessary MIB metadata. If the alloc has a
+// single allocation type, add a function attribute instead. Returns true if
+// memprof metadata attached, false if not (attribute added).
+bool CallStackTrie::buildAndAttachMIBMetadata(CallBase *CI) {
+  auto &Ctx = CI->getContext();
+  if (hasSingleAllocType(Alloc->AllocTypes)) {
+    addAllocTypeAttribute(Ctx, CI, (AllocationType)Alloc->AllocTypes);
+    return false;
+  }
+  std::vector<uint64_t> MIBCallStack;
+  MIBCallStack.push_back(AllocStackId);
+  std::vector<Metadata *> MIBNodes;
+  assert(!Alloc->Callers.empty() && "addCallStack has not been called yet");
+  buildMIBNodes(Alloc, Ctx, MIBCallStack, MIBNodes,
+                /*CalleeHasAmbiguousCallerContext=*/true);
+  assert(MIBCallStack.size() == 1 &&
+         "Should only be left with Alloc's location in stack");
+  CI->setMetadata(LLVMContext::MD_memprof, MDNode::get(Ctx, MIBNodes));
+  return true;
+}

diff --git a/src/llvm-project/llvm/lib/Analysis/MemorySSA.cpp b/src/llvm-project/llvm/lib/Analysis/MemorySSA.cpp
index 57f431e..76371b8 100644
--- a/src/llvm-project/llvm/lib/Analysis/MemorySSA.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/MemorySSA.cpp

@@ -36,8 +36,8 @@
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/IR/Use.h"
 #include "llvm/InitializePasses.h"
@@ -49,10 +49,10 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/GraphWriter.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
-#include <cstdlib>
 #include <iterator>
 #include <memory>
 #include <utility>
@@ -130,6 +130,12 @@
   MemorySSAWalkerAnnotatedWriter(MemorySSA *M)
       : MSSA(M), Walker(M->getWalker()) {}
 
+  void emitBasicBlockStartAnnot(const BasicBlock *BB,
+                                formatted_raw_ostream &OS) override {
+    if (MemoryAccess *MA = MSSA->getMemoryAccess(BB))
+      OS << "; " << *MA << "\n";
+  }
+
   void emitInstructionAnnot(const Instruction *I,
                             formatted_raw_ostream &OS) override {
     if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) {
@@ -732,7 +738,7 @@
   struct generic_def_path_iterator
       : public iterator_facade_base<generic_def_path_iterator<T, Walker>,
                                     std::forward_iterator_tag, T *> {
-    generic_def_path_iterator() {}
+    generic_def_path_iterator() = default;
     generic_def_path_iterator(Walker *W, ListIndex N) : W(W), N(N) {}
 
     T &operator*() const { return curNode(); }
@@ -743,9 +749,9 @@
     }
 
     bool operator==(const generic_def_path_iterator &O) const {
-      if (N.hasValue() != O.N.hasValue())
+      if (N.has_value() != O.N.has_value())
         return false;
-      return !N.hasValue() || *N == *O.N;
+      return !N || *N == *O.N;
     }
 
   private:
@@ -1397,6 +1403,9 @@
       continue;
     }
 
+    if (MU->isOptimized())
+      continue;
+
     if (isUseTriviallyOptimizableToLiveOnEntry(*AA, MU->getMemoryInst())) {
       MU->setDefiningAccess(MSSA->getLiveOnEntryDef(), true, None);
       continue;
@@ -1585,10 +1594,6 @@
   SmallPtrSet<BasicBlock *, 16> Visited;
   renamePass(DT->getRootNode(), LiveOnEntryDef.get(), Visited);
 
-  ClobberWalkerBase<BatchAAResults> WalkerBase(this, &BAA, DT);
-  CachingWalker<BatchAAResults> WalkerLocal(this, &WalkerBase);
-  OptimizeUses(this, &WalkerLocal, &BAA, DT).optimizeUses();
-
   // Mark the uses in unreachable blocks as live on entry, so that they go
   // somewhere.
   for (auto &BB : F)
@@ -2178,6 +2183,17 @@
   return dominates(Dominator, cast<MemoryAccess>(Dominatee.getUser()));
 }
 
+void MemorySSA::ensureOptimizedUses() {
+  if (IsOptimized)
+    return;
+
+  BatchAAResults BatchAA(*AA);
+  ClobberWalkerBase<BatchAAResults> WalkerBase(this, &BatchAA, DT);
+  CachingWalker<BatchAAResults> WalkerLocal(this, &WalkerBase);
+  OptimizeUses(this, &WalkerLocal, &BatchAA, DT).optimizeUses();
+  IsOptimized = true;
+}
+
 void MemoryAccess::print(raw_ostream &OS) const {
   switch (getValueID()) {
   case MemoryPhiVal: return static_cast<const MemoryPhi *>(this)->print(OS);
@@ -2350,6 +2366,7 @@
 
 bool MemorySSAPrinterLegacyPass::runOnFunction(Function &F) {
   auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
+  MSSA.ensureOptimizedUses();
   if (DotCFGMSSA != "") {
     DOTFuncMSSAInfo CFGInfo(F, MSSA);
     WriteGraph(&CFGInfo, "", false, "MSSA", DotCFGMSSA);
@@ -2382,6 +2399,7 @@
 PreservedAnalyses MemorySSAPrinterPass::run(Function &F,
                                             FunctionAnalysisManager &AM) {
   auto &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
+  MSSA.ensureOptimizedUses();
   if (DotCFGMSSA != "") {
     DOTFuncMSSAInfo CFGInfo(F, MSSA);
     WriteGraph(&CFGInfo, "", false, "MSSA", DotCFGMSSA);

diff --git a/src/llvm-project/llvm/lib/Analysis/MemorySSAUpdater.cpp b/src/llvm-project/llvm/lib/Analysis/MemorySSAUpdater.cpp
index 9c84188..9ad60f7 100644
--- a/src/llvm-project/llvm/lib/Analysis/MemorySSAUpdater.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/MemorySSAUpdater.cpp

@@ -10,22 +10,15 @@
 //
 //===----------------------------------------------------------------===//
 #include "llvm/Analysis/MemorySSAUpdater.h"
-#include "llvm/Analysis/LoopIterator.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/IteratedDominanceFrontier.h"
+#include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Analysis/MemorySSA.h"
 #include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Dominators.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/FormattedStream.h"
 #include <algorithm>
 
 #define DEBUG_TYPE "memoryssa"
@@ -243,6 +236,7 @@
 }
 
 void MemorySSAUpdater::insertUse(MemoryUse *MU, bool RenameUses) {
+  VisitedBlocks.clear();
   InsertedPHIs.clear();
   MU->setDefiningAccess(getPreviousDef(MU));
 
@@ -311,6 +305,13 @@
 // point to the correct new defs, to ensure we only have one variable, and no
 // disconnected stores.
 void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
+  // Don't bother updating dead code.
+  if (!MSSA->DT->isReachableFromEntry(MD->getBlock())) {
+    MD->setDefiningAccess(MSSA->getLiveOnEntryDef());
+    return;
+  }
+
+  VisitedBlocks.clear();
   InsertedPHIs.clear();
 
   // See if we had a local def, and if not, go hunting.
@@ -427,10 +428,10 @@
   if (NewPhiSize)
     tryRemoveTrivialPhis(ArrayRef<WeakVH>(&InsertedPHIs[NewPhiIndex], NewPhiSize));
 
-  // Now that all fixups are done, rename all uses if we are asked. Skip
-  // renaming for defs in unreachable blocks.
+  // Now that all fixups are done, rename all uses if we are asked. The defs are
+  // guaranteed to be in reachable code due to the check at the method entry.
   BasicBlock *StartBlock = MD->getBlock();
-  if (RenameUses && MSSA->getDomTree().getNode(StartBlock)) {
+  if (RenameUses) {
     SmallPtrSet<BasicBlock *, 16> Visited;
     // We are guaranteed there is a def in the block, because we just got it
     // handed to us in this function.
@@ -450,7 +451,7 @@
     }
     // Existing Phi blocks may need renaming too, if an access was previously
     // optimized and the inserted Defs "covers" the Optimized value.
-    for (auto &MP : ExistingPhis) {
+    for (const auto &MP : ExistingPhis) {
       MemoryPhi *Phi = dyn_cast_or_null<MemoryPhi>(MP);
       if (Phi)
         MSSA->renamePass(Phi->getBlock(), nullptr, Visited);
@@ -461,7 +462,7 @@
 void MemorySSAUpdater::fixupDefs(const SmallVectorImpl<WeakVH> &Vars) {
   SmallPtrSet<const BasicBlock *, 8> Seen;
   SmallVector<const BasicBlock *, 16> Worklist;
-  for (auto &Var : Vars) {
+  for (const auto &Var : Vars) {
     MemoryAccess *NewDef = dyn_cast_or_null<MemoryAccess>(Var);
     if (!NewDef)
       continue;
@@ -743,10 +744,10 @@
     cloneUsesAndDefs(BB, NewBlock, VMap, MPhiMap);
   };
 
-  for (auto BB : llvm::concat<BasicBlock *const>(LoopBlocks, ExitBlocks))
+  for (auto *BB : llvm::concat<BasicBlock *const>(LoopBlocks, ExitBlocks))
     ProcessBlock(BB);
 
-  for (auto BB : llvm::concat<BasicBlock *const>(LoopBlocks, ExitBlocks))
+  for (auto *BB : llvm::concat<BasicBlock *const>(LoopBlocks, ExitBlocks))
     if (MemoryPhi *MPhi = MSSA->getMemoryAccess(BB))
       if (MemoryAccess *NewPhi = MPhiMap.lookup(MPhi))
         FixPhiIncomingValues(MPhi, cast<MemoryPhi>(NewPhi));
@@ -810,7 +811,7 @@
   SmallVector<CFGUpdate, 4> DeleteUpdates;
   SmallVector<CFGUpdate, 4> RevDeleteUpdates;
   SmallVector<CFGUpdate, 4> InsertUpdates;
-  for (auto &Update : Updates) {
+  for (const auto &Update : Updates) {
     if (Update.getKind() == DT.Insert)
       InsertUpdates.push_back({DT.Insert, Update.getFrom(), Update.getTo()});
     else {
@@ -957,7 +958,7 @@
   };
   SmallDenseMap<BasicBlock *, PredInfo> PredMap;
 
-  for (auto &Edge : Updates) {
+  for (const auto &Edge : Updates) {
     BasicBlock *BB = Edge.getTo();
     auto &AddedBlockSet = PredMap[BB].Added;
     AddedBlockSet.insert(Edge.getFrom());
@@ -1002,7 +1003,7 @@
 
   // First create MemoryPhis in all blocks that don't have one. Create in the
   // order found in Updates, not in PredMap, to get deterministic numbering.
-  for (auto &Edge : Updates) {
+  for (const auto &Edge : Updates) {
     BasicBlock *BB = Edge.getTo();
     if (PredMap.count(BB) && !MSSA->getMemoryAccess(BB))
       InsertedPhis.push_back(MSSA->createMemoryPhi(BB));
@@ -1399,7 +1400,7 @@
 }
 
 void MemorySSAUpdater::tryRemoveTrivialPhis(ArrayRef<WeakVH> UpdatedPHIs) {
-  for (auto &VH : UpdatedPHIs)
+  for (const auto &VH : UpdatedPHIs)
     if (auto *MPhi = cast_or_null<MemoryPhi>(VH))
       tryRemoveTrivialPhi(MPhi);
 }

diff --git a/src/llvm-project/llvm/lib/Analysis/ModelUnderTrainingRunner.cpp b/src/llvm-project/llvm/lib/Analysis/ModelUnderTrainingRunner.cpp
index fab51d6..dc149f3 100644
--- a/src/llvm-project/llvm/lib/Analysis/ModelUnderTrainingRunner.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/ModelUnderTrainingRunner.cpp

@@ -22,7 +22,7 @@
     LLVMContext &Ctx, const std::string &ModelPath,
     const std::vector<TensorSpec> &InputSpecs,
     const std::vector<LoggedFeatureSpec> &OutputSpecs)
-    : MLModelRunner(Ctx, MLModelRunner::Kind::Development),
+    : MLModelRunner(Ctx, MLModelRunner::Kind::Development, InputSpecs.size()),
       OutputSpecs(OutputSpecs) {
   Evaluator = std::make_unique<TFModelEvaluator>(
       ModelPath, InputSpecs, [&](size_t I) { return OutputSpecs[I].Spec; },
@@ -32,6 +32,10 @@
     Evaluator.reset();
     return;
   }
+
+  for (size_t I = 0, E = InputSpecs.size(); I < E; ++I) {
+    setUpBufferForTensor(I, InputSpecs[I], Evaluator->getUntypedInput(I));
+  }
 }
 
 void *ModelUnderTrainingRunner::evaluateUntyped() {
@@ -43,24 +47,31 @@
   return LastEvaluationResult->getUntypedTensorValue(0);
 }
 
-void *ModelUnderTrainingRunner::getTensorUntyped(size_t Index) {
-  return Evaluator->getUntypedInput(Index);
+std::unique_ptr<ModelUnderTrainingRunner>
+ModelUnderTrainingRunner::createAndEnsureValid(
+    LLVMContext &Ctx, const std::string &ModelPath, StringRef DecisionName,
+    const std::vector<TensorSpec> &InputSpecs,
+    StringRef OutputSpecsPathOverride) {
+  if (auto MaybeOutputSpecs = loadOutputSpecs(Ctx, DecisionName, ModelPath,
+                                              OutputSpecsPathOverride))
+    return createAndEnsureValid(Ctx, ModelPath, DecisionName, InputSpecs,
+                                *MaybeOutputSpecs);
+  Ctx.emitError("Could not load the policy model from the provided path");
+  return nullptr;
 }
 
 std::unique_ptr<ModelUnderTrainingRunner>
 ModelUnderTrainingRunner::createAndEnsureValid(
     LLVMContext &Ctx, const std::string &ModelPath, StringRef DecisionName,
     const std::vector<TensorSpec> &InputSpecs,
-    StringRef OutputSpecsPathOverride) {
+    const std::vector<LoggedFeatureSpec> &OutputSpecs) {
   std::unique_ptr<ModelUnderTrainingRunner> MUTR;
-  if (auto MaybeOutputSpecs = loadOutputSpecs(Ctx, DecisionName, ModelPath,
-                                              OutputSpecsPathOverride))
-    MUTR.reset(new ModelUnderTrainingRunner(Ctx, ModelPath, InputSpecs,
-                                            *MaybeOutputSpecs));
+  MUTR.reset(
+      new ModelUnderTrainingRunner(Ctx, ModelPath, InputSpecs, OutputSpecs));
   if (MUTR && MUTR->isValid())
     return MUTR;
 
-  Ctx.emitError("Could not load the policy model from the provided path");
+  Ctx.emitError("Could not load or create model evaluator.");
   return nullptr;
 }
 

diff --git a/src/llvm-project/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp b/src/llvm-project/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
index 64fd5eb..756f92e 100644
--- a/src/llvm-project/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp

@@ -15,8 +15,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/ModuleDebugInfoPrinter.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Passes.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/InitializePasses.h"
@@ -95,7 +95,7 @@
     O << '\n';
   }
 
-  for (auto GVU : Finder.global_variables()) {
+  for (auto *GVU : Finder.global_variables()) {
     const auto *GV = GVU->getVariable();
     O << "Global variable: " << GV->getName();
     printFile(O, GV->getFilename(), GV->getDirectory(), GV->getLine());

diff --git a/src/llvm-project/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/src/llvm-project/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
index 2880ca6..efe6058 100644
--- a/src/llvm-project/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp

@@ -38,7 +38,6 @@
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ModuleSummaryIndex.h"
@@ -165,7 +164,8 @@
     SetVector<FunctionSummary::ConstVCall> &TypeCheckedLoadConstVCalls,
     DominatorTree &DT) {
   switch (CI->getCalledFunction()->getIntrinsicID()) {
-  case Intrinsic::type_test: {
+  case Intrinsic::type_test:
+  case Intrinsic::public_type_test: {
     auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(1));
     auto *TypeId = dyn_cast<MDString>(TypeMDVal->getMetadata());
     if (!TypeId)
@@ -368,7 +368,7 @@
         // We should have named any anonymous globals
         assert(CalledFunction->hasName());
         auto ScaledCount = PSI->getProfileCount(*CB, BFI);
-        auto Hotness = ScaledCount ? getHotness(ScaledCount.getValue(), PSI)
+        auto Hotness = ScaledCount ? getHotness(*ScaledCount, PSI)
                                    : CalleeInfo::HotnessType::Unknown;
         if (ForceSummaryEdgesCold != FunctionSummary::FSHT_None)
           Hotness = CalleeInfo::HotnessType::Cold;
@@ -401,7 +401,7 @@
         // to enable importing for subsequent indirect call promotion and
         // inlining.
         if (auto *MD = I.getMetadata(LLVMContext::MD_callees)) {
-          for (auto &Op : MD->operands()) {
+          for (const auto &Op : MD->operands()) {
             Function *Callee = mdconst::extract_or_null<Function>(Op);
             if (Callee)
               CallGraphEdges[Index.getOrInsertValueInfo(Callee)];
@@ -413,7 +413,7 @@
         auto CandidateProfileData =
             ICallAnalysis.getPromotionCandidatesForInstruction(
                 &I, NumVals, TotalCount, NumCandidates);
-        for (auto &Candidate : CandidateProfileData)
+        for (const auto &Candidate : CandidateProfileData)
           CallGraphEdges[Index.getOrInsertValueInfo(Candidate.Value)]
               .updateHotness(getHotness(Candidate.Count, PSI));
       }
@@ -452,7 +452,7 @@
     // If both load and store instruction reference the same variable
     // we won't be able to optimize it. Add all such reference edges
     // to RefEdges set.
-    for (auto &VI : StoreRefEdges)
+    for (const auto &VI : StoreRefEdges)
       if (LoadRefEdges.remove(VI))
         RefEdges.insert(VI);
 
@@ -460,11 +460,11 @@
     // All new reference edges inserted in two loops below are either
     // read or write only. They will be grouped in the end of RefEdges
     // vector, so we can use a single integer value to identify them.
-    for (auto &VI : LoadRefEdges)
+    for (const auto &VI : LoadRefEdges)
       RefEdges.insert(VI);
 
     unsigned FirstWORef = RefEdges.size();
-    for (auto &VI : StoreRefEdges)
+    for (const auto &VI : StoreRefEdges)
       RefEdges.insert(VI);
 
     Refs = RefEdges.takeVector();
@@ -490,8 +490,7 @@
                               HasIndirBranchToBlockAddress;
   GlobalValueSummary::GVFlags Flags(
       F.getLinkage(), F.getVisibility(), NotEligibleForImport,
-      /* Live = */ false, F.isDSOLocal(),
-      F.hasLinkOnceODRLinkage() && F.hasGlobalUnnamedAddr());
+      /* Live = */ false, F.isDSOLocal(), F.canBeOmittedFromSymbolTable());
   FunctionSummary::FFlags FunFlags{
       F.hasFnAttribute(Attribute::ReadNone),
       F.hasFnAttribute(Attribute::ReadOnly),
@@ -612,8 +611,7 @@
   bool NonRenamableLocal = isNonRenamableLocal(V);
   GlobalValueSummary::GVFlags Flags(
       V.getLinkage(), V.getVisibility(), NonRenamableLocal,
-      /* Live = */ false, V.isDSOLocal(),
-      V.hasLinkOnceODRLinkage() && V.hasGlobalUnnamedAddr());
+      /* Live = */ false, V.isDSOLocal(), V.canBeOmittedFromSymbolTable());
 
   VTableFuncList VTableFuncs;
   // If splitting is not enabled, then we compute the summary information
@@ -649,16 +647,18 @@
   Index.addGlobalValueSummary(V, std::move(GVarSummary));
 }
 
-static void
-computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A,
-                    DenseSet<GlobalValue::GUID> &CantBePromoted) {
+static void computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A,
+                                DenseSet<GlobalValue::GUID> &CantBePromoted) {
+  // Skip summary for indirect function aliases as summary for aliasee will not
+  // be emitted.
+  const GlobalObject *Aliasee = A.getAliaseeObject();
+  if (isa<GlobalIFunc>(Aliasee))
+    return;
   bool NonRenamableLocal = isNonRenamableLocal(A);
   GlobalValueSummary::GVFlags Flags(
       A.getLinkage(), A.getVisibility(), NonRenamableLocal,
-      /* Live = */ false, A.isDSOLocal(),
-      A.hasLinkOnceODRLinkage() && A.hasGlobalUnnamedAddr());
+      /* Live = */ false, A.isDSOLocal(), A.canBeOmittedFromSymbolTable());
   auto AS = std::make_unique<AliasSummary>(Flags);
-  auto *Aliasee = A.getAliaseeObject();
   auto AliaseeVI = Index.getValueInfo(Aliasee->getGUID());
   assert(AliaseeVI && "Alias expects aliasee summary to be available");
   assert(AliaseeVI.getSummaryList().size() == 1 &&
@@ -672,7 +672,7 @@
 // Set LiveRoot flag on entries matching the given value name.
 static void setLiveRoot(ModuleSummaryIndex &Index, StringRef Name) {
   if (ValueInfo VI = Index.getValueInfo(GlobalValue::getGUID(Name)))
-    for (auto &Summary : VI.getSummaryList())
+    for (const auto &Summary : VI.getSummaryList())
       Summary->setLive(true);
 }
 
@@ -733,8 +733,7 @@
               GlobalValue::InternalLinkage, GlobalValue::DefaultVisibility,
               /* NotEligibleToImport = */ true,
               /* Live = */ true,
-              /* Local */ GV->isDSOLocal(),
-              GV->hasLinkOnceODRLinkage() && GV->hasGlobalUnnamedAddr());
+              /* Local */ GV->isDSOLocal(), GV->canBeOmittedFromSymbolTable());
           CantBePromoted.insert(GV->getGUID());
           // Create the appropriate summary type.
           if (Function *F = dyn_cast<Function>(GV)) {
@@ -781,7 +780,7 @@
 
   // Compute summaries for all functions defined in module, and save in the
   // index.
-  for (auto &F : M) {
+  for (const auto &F : M) {
     if (F.isDeclaration())
       continue;
 
@@ -816,6 +815,13 @@
   for (const GlobalAlias &A : M.aliases())
     computeAliasSummary(Index, A, CantBePromoted);
 
+  // Iterate through ifuncs, set their resolvers all alive.
+  for (const GlobalIFunc &I : M.ifuncs()) {
+    I.applyAlongResolverPath([&Index](const GlobalValue &GV) {
+      Index.getGlobalValueSummary(GV)->setLive(true);
+    });
+  }
+
   for (auto *V : LocalsUsed) {
     auto *Summary = Index.getGlobalValueSummary(*V);
     assert(Summary && "Missing summary for global value");

diff --git a/src/llvm-project/llvm/lib/Analysis/MustExecute.cpp b/src/llvm-project/llvm/lib/Analysis/MustExecute.cpp
index 5ca72f5f..ac6590c 100644
--- a/src/llvm-project/llvm/lib/Analysis/MustExecute.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/MustExecute.cpp

@@ -16,14 +16,11 @@
 #include "llvm/Analysis/PostDominators.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/AssemblyAnnotationWriter.h"
-#include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/InstIterator.h"
-#include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/InitializePasses.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -84,7 +81,7 @@
   MW.clear();
   MayThrow = false;
   // Figure out the fact that at least one block may throw.
-  for (auto &BB : CurLoop->blocks())
+  for (const auto &BB : CurLoop->blocks())
     if (ICF.hasICF(&*BB)) {
       MayThrow = true;
       break;
@@ -143,7 +140,7 @@
     return false;
   auto DL = ExitBlock->getModule()->getDataLayout();
   auto *IVStart = LHS->getIncomingValueForBlock(CurLoop->getLoopPreheader());
-  auto *SimpleValOrNull = SimplifyCmpInst(Cond->getPredicate(),
+  auto *SimpleValOrNull = simplifyCmpInst(Cond->getPredicate(),
                                           IVStart, RHS,
                                           {DL, /*TLI*/ nullptr,
                                               DT, /*AC*/ nullptr, BI});
@@ -167,7 +164,7 @@
   if (BB == CurLoop->getHeader())
     return;
   SmallVector<const BasicBlock *, 4> WorkList;
-  for (auto *Pred : predecessors(BB)) {
+  for (const auto *Pred : predecessors(BB)) {
     Predecessors.insert(Pred);
     WorkList.push_back(Pred);
   }
@@ -183,7 +180,7 @@
     // @nested and @nested_no_throw in test/Analysis/MustExecute/loop-header.ll.
     // We can ignore backedge of all loops containing BB to get a sligtly more
     // optimistic result.
-    for (auto *PredPred : predecessors(Pred))
+    for (const auto *PredPred : predecessors(Pred))
       if (Predecessors.insert(PredPred).second)
         WorkList.push_back(PredPred);
   }
@@ -210,7 +207,7 @@
   // 3) Exit blocks which are not taken on 1st iteration.
   // Memoize blocks we've already checked.
   SmallPtrSet<const BasicBlock *, 4> CheckedSuccessors;
-  for (auto *Pred : Predecessors) {
+  for (const auto *Pred : Predecessors) {
     // Predecessor block may throw, so it has a side exit.
     if (blockMayThrow(Pred))
       return false;
@@ -220,7 +217,7 @@
     if (DT->dominates(BB, Pred))
       continue;
 
-    for (auto *Succ : successors(Pred))
+    for (const auto *Succ : successors(Pred))
       if (CheckedSuccessors.insert(Succ).second &&
           Succ != BB && !Predecessors.count(Succ))
         // By discharging conditions that are not executed on the 1st iteration,
@@ -288,7 +285,7 @@
   collectTransitivePredecessors(CurLoop, BB, Predecessors);
   // Find if there any instruction in either predecessor that could write
   // to memory.
-  for (auto *Pred : Predecessors)
+  for (const auto *Pred : Predecessors)
     if (MW.mayWriteToMemory(Pred))
       return false;
   return true;
@@ -416,7 +413,7 @@
 public:
   MustExecuteAnnotatedWriter(const Function &F,
                              DominatorTree &DT, LoopInfo &LI) {
-    for (auto &I: instructions(F)) {
+    for (const auto &I: instructions(F)) {
       Loop *L = LI.getLoopFor(I.getParent());
       while (L) {
         if (isMustExecuteIn(I, L, &DT)) {
@@ -428,8 +425,8 @@
   }
   MustExecuteAnnotatedWriter(const Module &M,
                              DominatorTree &DT, LoopInfo &LI) {
-    for (auto &F : M)
-    for (auto &I: instructions(F)) {
+    for (const auto &F : M)
+    for (const auto &I: instructions(F)) {
       Loop *L = LI.getLoopFor(I.getParent());
       while (L) {
         if (isMustExecuteIn(I, L, &DT)) {
@@ -494,9 +491,9 @@
 static V getOrCreateCachedOptional(K Key, DenseMap<K, Optional<V>> &Map,
                                    FnTy &&Fn, ArgsTy&&... args) {
   Optional<V> &OptVal = Map[Key];
-  if (!OptVal.hasValue())
+  if (!OptVal)
     OptVal = Fn(std::forward<ArgsTy>(args)...);
-  return OptVal.getValue();
+  return OptVal.value();
 }
 
 const BasicBlock *

diff --git a/src/llvm-project/llvm/lib/Analysis/NoInferenceModelRunner.cpp b/src/llvm-project/llvm/lib/Analysis/NoInferenceModelRunner.cpp
index 7178120..1914b22 100644
--- a/src/llvm-project/llvm/lib/Analysis/NoInferenceModelRunner.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/NoInferenceModelRunner.cpp

@@ -10,24 +10,14 @@
 // logs for the default policy, in 'development' mode, but never ask it to
 // 'run'.
 //===----------------------------------------------------------------------===//
-#include "llvm/Config/config.h"
-#if defined(LLVM_HAVE_TF_API)
-
 #include "llvm/Analysis/NoInferenceModelRunner.h"
-#include "llvm/Analysis/Utils/TFUtils.h"
 
 using namespace llvm;
 
 NoInferenceModelRunner::NoInferenceModelRunner(
     LLVMContext &Ctx, const std::vector<TensorSpec> &Inputs)
-    : MLModelRunner(Ctx, MLModelRunner::Kind::NoOp) {
-  ValuesBuffer.reserve(Inputs.size());
+    : MLModelRunner(Ctx, MLModelRunner::Kind::NoOp, Inputs.size()) {
+  size_t Index = 0;
   for (const auto &TS : Inputs)
-    ValuesBuffer.push_back(std::make_unique<char[]>(TS.getElementCount() *
-                                                    TS.getElementByteSize()));
+    setUpBufferForTensor(Index++, TS, nullptr);
 }
-
-void *NoInferenceModelRunner::getTensorUntyped(size_t Index) {
-  return ValuesBuffer[Index].get();
-}
-#endif // defined(LLVM_HAVE_TF_API)

diff --git a/src/llvm-project/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp b/src/llvm-project/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp
index 0826b30..6fe056d 100644
--- a/src/llvm-project/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp

@@ -26,8 +26,6 @@
 #include "llvm/Analysis/ObjCARCAnalysisUtils.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/IR/Function.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Value.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 

diff --git a/src/llvm-project/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp b/src/llvm-project/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp
index 6f3d4d5..17b40f0 100644
--- a/src/llvm-project/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp

@@ -47,7 +47,7 @@
 bool OptimizationRemarkEmitter::invalidate(
     Function &F, const PreservedAnalyses &PA,
     FunctionAnalysisManager::Invalidator &Inv) {
-  if (OwnedBFI.get()) {
+  if (OwnedBFI) {
     OwnedBFI.reset();
     BFI = nullptr;
   }
@@ -80,7 +80,7 @@
   computeHotness(OptDiag);
 
   // Only emit it if its hotness meets the threshold.
-  if (OptDiag.getHotness().getValueOr(0) <
+  if (OptDiag.getHotness().value_or(0) <
       F->getContext().getDiagnosticsHotnessThreshold()) {
     return;
   }

diff --git a/src/llvm-project/llvm/lib/Analysis/OverflowInstAnalysis.cpp b/src/llvm-project/llvm/lib/Analysis/OverflowInstAnalysis.cpp
index 87a85e6..8bfd664 100644
--- a/src/llvm-project/llvm/lib/Analysis/OverflowInstAnalysis.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/OverflowInstAnalysis.cpp

@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/OverflowInstAnalysis.h"
-#include "llvm/IR/Constants.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/PatternMatch.h"
 

diff --git a/src/llvm-project/llvm/lib/Analysis/PHITransAddr.cpp b/src/llvm-project/llvm/lib/Analysis/PHITransAddr.cpp
index 02d0849..5b0fbca 100644
--- a/src/llvm-project/llvm/lib/Analysis/PHITransAddr.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/PHITransAddr.cpp

@@ -17,11 +17,14 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
+static cl::opt<bool> EnableAddPhiTranslation(
+    "gvn-add-phi-translation", cl::init(false), cl::Hidden,
+    cl::desc("Enable phi-translation of add instructions"));
+
 static bool CanPHITrans(Instruction *Inst) {
   if (isa<PHINode>(Inst) ||
       isa<GetElementPtrInst>(Inst))
@@ -35,9 +38,6 @@
       isa<ConstantInt>(Inst->getOperand(1)))
     return true;
 
-  //   cerr << "MEMDEP: Could not PHI translate: " << *Pointer;
-  //   if (isa<BitCastInst>(PtrInst) || isa<GetElementPtrInst>(PtrInst))
-  //     cerr << "OP:\t\t\t\t" << *PtrInst->getOperand(0);
   return false;
 }
 
@@ -226,7 +226,7 @@
       return GEP;
 
     // Simplify the GEP to handle 'gep x, 0' -> x etc.
-    if (Value *V = SimplifyGEPInst(GEP->getSourceElementType(), GEPOps[0],
+    if (Value *V = simplifyGEPInst(GEP->getSourceElementType(), GEPOps[0],
                                    ArrayRef<Value *>(GEPOps).slice(1),
                                    GEP->isInBounds(), {DL, TLI, DT, AC})) {
       for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
@@ -240,6 +240,7 @@
     for (User *U : APHIOp->users()) {
       if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U))
         if (GEPI->getType() == GEP->getType() &&
+            GEPI->getSourceElementType() == GEP->getSourceElementType() &&
             GEPI->getNumOperands() == GEPOps.size() &&
             GEPI->getParent()->getParent() == CurBB->getParent() &&
             (!DT || DT->dominates(GEPI->getParent(), PredBB))) {
@@ -277,7 +278,7 @@
         }
 
     // See if the add simplifies away.
-    if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, {DL, TLI, DT, AC})) {
+    if (Value *Res = simplifyAddInst(LHS, RHS, isNSW, isNUW, {DL, TLI, DT, AC})) {
       // If we simplified the operands, the LHS is no longer an input, but Res
       // is.
       RemoveInstInputs(LHS, InstInputs);
@@ -413,14 +414,14 @@
     return Result;
   }
 
-#if 0
-  // FIXME: This code works, but it is unclear that we actually want to insert
-  // a big chain of computation in order to make a value available in a block.
-  // This needs to be evaluated carefully to consider its cost trade offs.
-
   // Handle add with a constant RHS.
-  if (Inst->getOpcode() == Instruction::Add &&
+  if (EnableAddPhiTranslation && Inst->getOpcode() == Instruction::Add &&
       isa<ConstantInt>(Inst->getOperand(1))) {
+
+    // FIXME: This code works, but it is unclear that we actually want to insert
+    // a big chain of computation in order to make a value available in a block.
+    // This needs to be evaluated carefully to consider its cost trade offs.
+
     // PHI translate the LHS.
     Value *OpVal = InsertPHITranslatedSubExpr(Inst->getOperand(0),
                                               CurBB, PredBB, DT, NewInsts);
@@ -434,7 +435,6 @@
     NewInsts.push_back(Res);
     return Res;
   }
-#endif
 
   return nullptr;
 }

diff --git a/src/llvm-project/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/src/llvm-project/llvm/lib/Analysis/ProfileSummaryInfo.cpp
index 268ed9d..64844f5 100644
--- a/src/llvm-project/llvm/lib/Analysis/ProfileSummaryInfo.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/ProfileSummaryInfo.cpp

@@ -15,7 +15,6 @@
 #include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ProfileSummary.h"
 #include "llvm/InitializePasses.h"
@@ -125,7 +124,7 @@
       for (const auto &I : BB)
         if (isa<CallInst>(I) || isa<InvokeInst>(I))
           if (auto CallCount = getProfileCount(cast<CallBase>(I), nullptr))
-            TotalCallCount += CallCount.getValue();
+            TotalCallCount += *CallCount;
     if (isHotCount(TotalCallCount))
       return true;
   }
@@ -154,7 +153,7 @@
       for (const auto &I : BB)
         if (isa<CallInst>(I) || isa<InvokeInst>(I))
           if (auto CallCount = getProfileCount(cast<CallBase>(I), nullptr))
-            TotalCallCount += CallCount.getValue();
+            TotalCallCount += *CallCount;
     if (!isColdCount(TotalCallCount))
       return false;
   }
@@ -166,7 +165,7 @@
 
 bool ProfileSummaryInfo::isFunctionHotnessUnknown(const Function &F) const {
   assert(hasPartialSampleProfile() && "Expect partial sample profile");
-  return !F.getEntryCount().hasValue();
+  return !F.getEntryCount();
 }
 
 template <bool isHot>
@@ -188,7 +187,7 @@
       for (const auto &I : BB)
         if (isa<CallInst>(I) || isa<InvokeInst>(I))
           if (auto CallCount = getProfileCount(cast<CallBase>(I), nullptr))
-            TotalCallCount += CallCount.getValue();
+            TotalCallCount += *CallCount;
     if (isHot && isHotCountNthPercentile(PercentileCutoff, TotalCallCount))
       return true;
     if (!isHot && !isColdCountNthPercentile(PercentileCutoff, TotalCallCount))
@@ -280,19 +279,19 @@
 }
 
 bool ProfileSummaryInfo::hasHugeWorkingSetSize() const {
-  return HasHugeWorkingSetSize && HasHugeWorkingSetSize.getValue();
+  return HasHugeWorkingSetSize && HasHugeWorkingSetSize.value();
 }
 
 bool ProfileSummaryInfo::hasLargeWorkingSetSize() const {
-  return HasLargeWorkingSetSize && HasLargeWorkingSetSize.getValue();
+  return HasLargeWorkingSetSize && HasLargeWorkingSetSize.value();
 }
 
 bool ProfileSummaryInfo::isHotCount(uint64_t C) const {
-  return HotCountThreshold && C >= HotCountThreshold.getValue();
+  return HotCountThreshold && C >= HotCountThreshold.value();
 }
 
 bool ProfileSummaryInfo::isColdCount(uint64_t C) const {
-  return ColdCountThreshold && C <= ColdCountThreshold.getValue();
+  return ColdCountThreshold && C <= ColdCountThreshold.value();
 }
 
 template <bool isHot>
@@ -300,9 +299,9 @@
                                                        uint64_t C) const {
   auto CountThreshold = computeThreshold(PercentileCutoff);
   if (isHot)
-    return CountThreshold && C >= CountThreshold.getValue();
+    return CountThreshold && C >= CountThreshold.value();
   else
-    return CountThreshold && C <= CountThreshold.getValue();
+    return CountThreshold && C <= CountThreshold.value();
 }
 
 bool ProfileSummaryInfo::isHotCountNthPercentile(int PercentileCutoff,
@@ -316,11 +315,11 @@
 }
 
 uint64_t ProfileSummaryInfo::getOrCompHotCountThreshold() const {
-  return HotCountThreshold.getValueOr(UINT64_MAX);
+  return HotCountThreshold.value_or(UINT64_MAX);
 }
 
 uint64_t ProfileSummaryInfo::getOrCompColdCountThreshold() const {
-  return ColdCountThreshold.getValueOr(0);
+  return ColdCountThreshold.value_or(0);
 }
 
 bool ProfileSummaryInfo::isHotBlock(const BasicBlock *BB,

diff --git a/src/llvm-project/llvm/lib/Analysis/PtrUseVisitor.cpp b/src/llvm-project/llvm/lib/Analysis/PtrUseVisitor.cpp
index 9a834ba..4930481 100644
--- a/src/llvm-project/llvm/lib/Analysis/PtrUseVisitor.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/PtrUseVisitor.cpp

@@ -14,7 +14,6 @@
 #include "llvm/Analysis/PtrUseVisitor.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
-#include <algorithm>
 
 using namespace llvm;
 

diff --git a/src/llvm-project/llvm/lib/Analysis/RegionInfo.cpp b/src/llvm-project/llvm/lib/Analysis/RegionInfo.cpp
index 3ba0bb9..9be23a3 100644
--- a/src/llvm-project/llvm/lib/Analysis/RegionInfo.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/RegionInfo.cpp

@@ -10,6 +10,7 @@
 
 #include "llvm/Analysis/RegionInfo.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DominanceFrontier.h"
 #include "llvm/InitializePasses.h"
 #ifndef NDEBUG
 #include "llvm/Analysis/RegionPrinter.h"

diff --git a/src/llvm-project/llvm/lib/Analysis/RegionPass.cpp b/src/llvm-project/llvm/lib/Analysis/RegionPass.cpp
index 10c8569..ddef3be 100644
--- a/src/llvm-project/llvm/lib/Analysis/RegionPass.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/RegionPass.cpp

@@ -12,14 +12,16 @@
 // Most of this code has been COPIED from LoopPass.cpp
 //
 //===----------------------------------------------------------------------===//
+
 #include "llvm/Analysis/RegionPass.h"
+#include "llvm/Analysis/RegionInfo.h"
 #include "llvm/IR/OptBisect.h"
 #include "llvm/IR/PassTimingInfo.h"
 #include "llvm/IR/PrintPasses.h"
-#include "llvm/IR/StructuralHash.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
+
 using namespace llvm;
 
 #define DEBUG_TYPE "regionpassmgr"
@@ -93,12 +95,12 @@
 
         TimeRegion PassTimer(getPassTimer(P));
 #ifdef EXPENSIVE_CHECKS
-        uint64_t RefHash = StructuralHash(F);
+        uint64_t RefHash = P->structuralHash(F);
 #endif
         LocalChanged = P->runOnRegion(CurrentRegion, *this);
 
 #ifdef EXPENSIVE_CHECKS
-        if (!LocalChanged && (RefHash != StructuralHash(F))) {
+        if (!LocalChanged && (RefHash != P->structuralHash(F))) {
           llvm::errs() << "Pass modifies its input and doesn't report it: "
                        << P->getPassName() << "\n";
           llvm_unreachable("Pass modifies its input and doesn't report it");

diff --git a/src/llvm-project/llvm/lib/Analysis/RegionPrinter.cpp b/src/llvm-project/llvm/lib/Analysis/RegionPrinter.cpp
index 1fb5faa..fbd3d17 100644
--- a/src/llvm-project/llvm/lib/Analysis/RegionPrinter.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/RegionPrinter.cpp

@@ -10,15 +10,11 @@
 
 #include "llvm/Analysis/RegionPrinter.h"
 #include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/DOTGraphTraitsPass.h"
-#include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/RegionInfo.h"
 #include "llvm/Analysis/RegionIterator.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #ifndef NDEBUG
 #include "llvm/IR/LegacyPassManager.h"
@@ -35,28 +31,20 @@
                   cl::init(false));
 
 namespace llvm {
-template<>
-struct DOTGraphTraits<RegionNode*> : public DefaultDOTGraphTraits {
 
-  DOTGraphTraits (bool isSimple=false)
-    : DefaultDOTGraphTraits(isSimple) {}
+std::string DOTGraphTraits<RegionNode *>::getNodeLabel(RegionNode *Node,
+                                                       RegionNode *Graph) {
+  if (!Node->isSubRegion()) {
+    BasicBlock *BB = Node->getNodeAs<BasicBlock>();
 
-  std::string getNodeLabel(RegionNode *Node, RegionNode *Graph) {
-
-    if (!Node->isSubRegion()) {
-      BasicBlock *BB = Node->getNodeAs<BasicBlock>();
-
-      if (isSimple())
-        return DOTGraphTraits<DOTFuncInfo *>
-          ::getSimpleNodeLabel(BB, nullptr);
-      else
-        return DOTGraphTraits<DOTFuncInfo *>
-          ::getCompleteNodeLabel(BB, nullptr);
-    }
-
-    return "Not implemented";
+    if (isSimple())
+      return DOTGraphTraits<DOTFuncInfo *>::getSimpleNodeLabel(BB, nullptr);
+    else
+      return DOTGraphTraits<DOTFuncInfo *>::getCompleteNodeLabel(BB, nullptr);
   }
-};
+
+  return "Not implemented";
+}
 
 template <>
 struct DOTGraphTraits<RegionInfo *> : public DOTGraphTraits<RegionNode *> {
@@ -138,7 +126,7 @@
     printRegionCluster(*G->getTopLevelRegion(), GW, 4);
   }
 };
-} //end namespace llvm
+} // end namespace llvm
 
 namespace {
 
@@ -149,48 +137,49 @@
 };
 
 struct RegionPrinter
-    : public DOTGraphTraitsPrinter<RegionInfoPass, false, RegionInfo *,
-                                   RegionInfoPassGraphTraits> {
+    : public DOTGraphTraitsPrinterWrapperPass<
+          RegionInfoPass, false, RegionInfo *, RegionInfoPassGraphTraits> {
   static char ID;
   RegionPrinter()
-      : DOTGraphTraitsPrinter<RegionInfoPass, false, RegionInfo *,
-                              RegionInfoPassGraphTraits>("reg", ID) {
+      : DOTGraphTraitsPrinterWrapperPass<RegionInfoPass, false, RegionInfo *,
+                                         RegionInfoPassGraphTraits>("reg", ID) {
     initializeRegionPrinterPass(*PassRegistry::getPassRegistry());
   }
 };
 char RegionPrinter::ID = 0;
 
 struct RegionOnlyPrinter
-    : public DOTGraphTraitsPrinter<RegionInfoPass, true, RegionInfo *,
-                                   RegionInfoPassGraphTraits> {
+    : public DOTGraphTraitsPrinterWrapperPass<
+          RegionInfoPass, true, RegionInfo *, RegionInfoPassGraphTraits> {
   static char ID;
   RegionOnlyPrinter()
-      : DOTGraphTraitsPrinter<RegionInfoPass, true, RegionInfo *,
-                              RegionInfoPassGraphTraits>("reg", ID) {
+      : DOTGraphTraitsPrinterWrapperPass<RegionInfoPass, true, RegionInfo *,
+                                         RegionInfoPassGraphTraits>("reg", ID) {
     initializeRegionOnlyPrinterPass(*PassRegistry::getPassRegistry());
   }
 };
 char RegionOnlyPrinter::ID = 0;
 
 struct RegionViewer
-    : public DOTGraphTraitsViewer<RegionInfoPass, false, RegionInfo *,
-                                  RegionInfoPassGraphTraits> {
+    : public DOTGraphTraitsViewerWrapperPass<
+          RegionInfoPass, false, RegionInfo *, RegionInfoPassGraphTraits> {
   static char ID;
   RegionViewer()
-      : DOTGraphTraitsViewer<RegionInfoPass, false, RegionInfo *,
-                             RegionInfoPassGraphTraits>("reg", ID) {
+      : DOTGraphTraitsViewerWrapperPass<RegionInfoPass, false, RegionInfo *,
+                                        RegionInfoPassGraphTraits>("reg", ID) {
     initializeRegionViewerPass(*PassRegistry::getPassRegistry());
   }
 };
 char RegionViewer::ID = 0;
 
 struct RegionOnlyViewer
-    : public DOTGraphTraitsViewer<RegionInfoPass, true, RegionInfo *,
-                                  RegionInfoPassGraphTraits> {
+    : public DOTGraphTraitsViewerWrapperPass<RegionInfoPass, true, RegionInfo *,
+                                             RegionInfoPassGraphTraits> {
   static char ID;
   RegionOnlyViewer()
-      : DOTGraphTraitsViewer<RegionInfoPass, true, RegionInfo *,
-                             RegionInfoPassGraphTraits>("regonly", ID) {
+      : DOTGraphTraitsViewerWrapperPass<RegionInfoPass, true, RegionInfo *,
+                                        RegionInfoPassGraphTraits>("regonly",
+                                                                   ID) {
     initializeRegionOnlyViewerPass(*PassRegistry::getPassRegistry());
   }
 };

diff --git a/src/llvm-project/llvm/lib/Analysis/ReplayInlineAdvisor.cpp b/src/llvm-project/llvm/lib/Analysis/ReplayInlineAdvisor.cpp
index 294bc38..afc3d7f 100644
--- a/src/llvm-project/llvm/lib/Analysis/ReplayInlineAdvisor.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/ReplayInlineAdvisor.cpp

@@ -14,9 +14,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/ReplayInlineAdvisor.h"
-#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/Instructions.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Support/LineIterator.h"
+#include "llvm/Support/MemoryBuffer.h"
 #include <memory>
 
 using namespace llvm;
@@ -26,8 +26,9 @@
 ReplayInlineAdvisor::ReplayInlineAdvisor(
     Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context,
     std::unique_ptr<InlineAdvisor> OriginalAdvisor,
-    const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks)
-    : InlineAdvisor(M, FAM), OriginalAdvisor(std::move(OriginalAdvisor)),
+    const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks,
+    InlineContext IC)
+    : InlineAdvisor(M, FAM, IC), OriginalAdvisor(std::move(OriginalAdvisor)),
       ReplaySettings(ReplaySettings), EmitRemarks(EmitRemarks) {
 
   auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(ReplaySettings.ReplayFile);
@@ -75,12 +76,15 @@
   HasReplayRemarks = true;
 }
 
-std::unique_ptr<InlineAdvisor> llvm::getReplayInlineAdvisor(
-    Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context,
-    std::unique_ptr<InlineAdvisor> OriginalAdvisor,
-    const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks) {
+std::unique_ptr<InlineAdvisor>
+llvm::getReplayInlineAdvisor(Module &M, FunctionAnalysisManager &FAM,
+                             LLVMContext &Context,
+                             std::unique_ptr<InlineAdvisor> OriginalAdvisor,
+                             const ReplayInlinerSettings &ReplaySettings,
+                             bool EmitRemarks, InlineContext IC) {
   auto Advisor = std::make_unique<ReplayInlineAdvisor>(
-      M, FAM, Context, std::move(OriginalAdvisor), ReplaySettings, EmitRemarks);
+      M, FAM, Context, std::move(OriginalAdvisor), ReplaySettings, EmitRemarks,
+      IC);
   if (!Advisor->areReplayRemarksLoaded())
     Advisor.reset();
   return Advisor;

diff --git a/src/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp b/src/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp
index 20661ef..2958a50 100644
--- a/src/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp

@@ -79,7 +79,6 @@
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/ScalarEvolutionDivision.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -96,7 +95,6 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalAlias.h"
 #include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
@@ -104,7 +102,6 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Metadata.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Type.h"
@@ -125,7 +122,6 @@
 #include <algorithm>
 #include <cassert>
 #include <climits>
-#include <cstddef>
 #include <cstdint>
 #include <cstdlib>
 #include <map>
@@ -146,17 +142,21 @@
 STATISTIC(NumBruteForceTripCountsComputed,
           "Number of loops with trip counts computed by force");
 
-static cl::opt<unsigned>
-MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden,
-                        cl::ZeroOrMore,
-                        cl::desc("Maximum number of iterations SCEV will "
-                                 "symbolically execute a constant "
-                                 "derived loop"),
-                        cl::init(100));
+#ifdef EXPENSIVE_CHECKS
+bool llvm::VerifySCEV = true;
+#else
+bool llvm::VerifySCEV = false;
+#endif
 
-// FIXME: Enable this with EXPENSIVE_CHECKS when the test suite is clean.
-static cl::opt<bool> VerifySCEV(
-    "verify-scev", cl::Hidden,
+static cl::opt<unsigned>
+    MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden,
+                            cl::desc("Maximum number of iterations SCEV will "
+                                     "symbolically execute a constant "
+                                     "derived loop"),
+                            cl::init(100));
+
+static cl::opt<bool, true> VerifySCEVOpt(
+    "verify-scev", cl::Hidden, cl::location(VerifySCEV),
     cl::desc("Verify ScalarEvolution's backedge taken counts (slow)"));
 static cl::opt<bool> VerifySCEVStrict(
     "verify-scev-strict", cl::Hidden,
@@ -231,6 +231,17 @@
     cl::desc("Use more powerful methods of sharpening expression ranges. May "
              "be costly in terms of compile time"));
 
+static cl::opt<unsigned> MaxPhiSCCAnalysisSize(
+    "scalar-evolution-max-scc-analysis-depth", cl::Hidden,
+    cl::desc("Maximum amount of nodes to process while searching SCEVUnknown "
+             "Phi strongly connected components"),
+    cl::init(8));
+
+static cl::opt<bool>
+    EnableFiniteLoopControl("scalar-evolution-finite-loop", cl::Hidden,
+                            cl::desc("Handle <= and >= in finite loops"),
+                            cl::init(true));
+
 //===----------------------------------------------------------------------===//
 //                           SCEV class definitions
 //===----------------------------------------------------------------------===//
@@ -519,12 +530,13 @@
 }
 
 void SCEVUnknown::allUsesReplacedWith(Value *New) {
+  // Clear this SCEVUnknown from various maps.
+  SE->forgetMemoizedResults(this);
+
   // Remove this SCEVUnknown from the uniquing map.
   SE->UniqueSCEVs.RemoveNode(this);
 
-  // Update this SCEVUnknown to point to the new value. This is needed
-  // because there may still be outstanding SCEVs which still point to
-  // this SCEVUnknown.
+  // Replace the value pointer in case someone is still using this SCEVUnknown.
   setValPtr(New);
 }
 
@@ -1146,7 +1158,7 @@
     const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
       SmallVector<const SCEV *, 2> Operands;
       bool Changed = false;
-      for (auto *Op : Expr->operands()) {
+      for (const auto *Op : Expr->operands()) {
         Operands.push_back(visit(Op));
         Changed |= Op != Operands.back();
       }
@@ -1156,7 +1168,7 @@
     const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
       SmallVector<const SCEV *, 2> Operands;
       bool Changed = false;
-      for (auto *Op : Expr->operands()) {
+      for (const auto *Op : Expr->operands()) {
         Operands.push_back(visit(Op));
         Changed |= Op != Operands.back();
       }
@@ -1643,10 +1655,12 @@
 
       // If we have special knowledge that this addrec won't overflow,
       // we don't need to do any further analysis.
-      if (AR->hasNoUnsignedWrap())
-        return getAddRecExpr(
-            getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Depth + 1),
-            getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
+      if (AR->hasNoUnsignedWrap()) {
+        Start =
+            getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Depth + 1);
+        Step = getZeroExtendExpr(Step, Ty, Depth + 1);
+        return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags());
+      }
 
       // Check whether the backedge-taken count is SCEVCouldNotCompute.
       // Note that this serves two purposes: It filters out loops that are
@@ -1688,11 +1702,10 @@
             // Cache knowledge of AR NUW, which is propagated to this AddRec.
             setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNUW);
             // Return the expression with the addrec on the outside.
-            return getAddRecExpr(
-                getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
-                                                         Depth + 1),
-                getZeroExtendExpr(Step, Ty, Depth + 1), L,
-                AR->getNoWrapFlags());
+            Start = getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
+                                                             Depth + 1);
+            Step = getZeroExtendExpr(Step, Ty, Depth + 1);
+            return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags());
           }
           // Similar to above, only this time treat the step value as signed.
           // This covers loops that count down.
@@ -1707,11 +1720,10 @@
             // Negative step causes unsigned wrap, but it still can't self-wrap.
             setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNW);
             // Return the expression with the addrec on the outside.
-            return getAddRecExpr(
-                getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
-                                                         Depth + 1),
-                getSignExtendExpr(Step, Ty, Depth + 1), L,
-                AR->getNoWrapFlags());
+            Start = getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
+                                                             Depth + 1);
+            Step = getSignExtendExpr(Step, Ty, Depth + 1);
+            return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags());
           }
         }
       }
@@ -1733,11 +1745,10 @@
           // issue.  It's not clear that the order of checks does matter, but
           // it's one of two issue possible causes for a change which was
           // reverted.  Be conservative for the moment.
-          return getAddRecExpr(
-                getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
-                                                         Depth + 1),
-                getZeroExtendExpr(Step, Ty, Depth + 1), L,
-                AR->getNoWrapFlags());
+          Start =
+              getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Depth + 1);
+          Step = getZeroExtendExpr(Step, Ty, Depth + 1);
+          return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags());
         }
         
         // For a negative step, we can extend the operands iff doing so only
@@ -1752,11 +1763,10 @@
             // still can't self-wrap.
             setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNW);
             // Return the expression with the addrec on the outside.
-            return getAddRecExpr(
-                getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
-                                                         Depth + 1),
-                getSignExtendExpr(Step, Ty, Depth + 1), L,
-                AR->getNoWrapFlags());
+            Start = getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
+                                                             Depth + 1);
+            Step = getSignExtendExpr(Step, Ty, Depth + 1);
+            return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags());
           }
         }
       }
@@ -1780,9 +1790,10 @@
 
       if (proveNoWrapByVaryingStart<SCEVZeroExtendExpr>(Start, Step, L)) {
         setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNUW);
-        return getAddRecExpr(
-            getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Depth + 1),
-            getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
+        Start =
+            getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Depth + 1);
+        Step = getZeroExtendExpr(Step, Ty, Depth + 1);
+        return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags());
       }
     }
 
@@ -1984,10 +1995,12 @@
 
       // If we have special knowledge that this addrec won't overflow,
       // we don't need to do any further analysis.
-      if (AR->hasNoSignedWrap())
-        return getAddRecExpr(
-            getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1),
-            getSignExtendExpr(Step, Ty, Depth + 1), L, SCEV::FlagNSW);
+      if (AR->hasNoSignedWrap()) {
+        Start =
+            getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1);
+        Step = getSignExtendExpr(Step, Ty, Depth + 1);
+        return getAddRecExpr(Start, Step, L, SCEV::FlagNSW);
+      }
 
       // Check whether the backedge-taken count is SCEVCouldNotCompute.
       // Note that this serves two purposes: It filters out loops that are
@@ -2030,11 +2043,10 @@
             // Cache knowledge of AR NSW, which is propagated to this AddRec.
             setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNSW);
             // Return the expression with the addrec on the outside.
-            return getAddRecExpr(
-                getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this,
-                                                         Depth + 1),
-                getSignExtendExpr(Step, Ty, Depth + 1), L,
-                AR->getNoWrapFlags());
+            Start = getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this,
+                                                             Depth + 1);
+            Step = getSignExtendExpr(Step, Ty, Depth + 1);
+            return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags());
           }
           // Similar to above, only this time treat the step value as unsigned.
           // This covers loops that count up with an unsigned step.
@@ -2056,11 +2068,10 @@
             setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNW);
 
             // Return the expression with the addrec on the outside.
-            return getAddRecExpr(
-                getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this,
-                                                         Depth + 1),
-                getZeroExtendExpr(Step, Ty, Depth + 1), L,
-                AR->getNoWrapFlags());
+            Start = getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this,
+                                                             Depth + 1);
+            Step = getZeroExtendExpr(Step, Ty, Depth + 1);
+            return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags());
           }
         }
       }
@@ -2072,9 +2083,10 @@
         // issue.  It's not clear that the order of checks does matter, but
         // it's one of two issue possible causes for a change which was
         // reverted.  Be conservative for the moment.
-        return getAddRecExpr(
-            getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1),
-            getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
+        Start =
+            getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1);
+        Step = getSignExtendExpr(Step, Ty, Depth + 1);
+        return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags());
       }
 
       // sext({C,+,Step}) --> (sext(D) + sext({C-D,+,Step}))<nuw><nsw>
@@ -2096,9 +2108,10 @@
 
       if (proveNoWrapByVaryingStart<SCEVSignExtendExpr>(Start, Step, L)) {
         setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNSW);
-        return getAddRecExpr(
-            getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1),
-            getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
+        Start =
+            getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1);
+        Step = getSignExtendExpr(Step, Ty, Depth + 1);
+        return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags());
       }
     }
 
@@ -2300,15 +2313,19 @@
 
   const SCEV *A = (this->*Extension)(
       (this->*Operation)(LHS, RHS, SCEV::FlagAnyWrap, 0), WideTy, 0);
-  const SCEV *B = (this->*Operation)((this->*Extension)(LHS, WideTy, 0),
-                                     (this->*Extension)(RHS, WideTy, 0),
-                                     SCEV::FlagAnyWrap, 0);
+  const SCEV *LHSB = (this->*Extension)(LHS, WideTy, 0);
+  const SCEV *RHSB = (this->*Extension)(RHS, WideTy, 0);
+  const SCEV *B = (this->*Operation)(LHSB, RHSB, SCEV::FlagAnyWrap, 0);
   return A == B;
 }
 
-std::pair<SCEV::NoWrapFlags, bool /*Deduced*/>
+Optional<SCEV::NoWrapFlags>
 ScalarEvolution::getStrengthenedNoWrapFlagsFromBinOp(
     const OverflowingBinaryOperator *OBO) {
+  // It cannot be done any better.
+  if (OBO->hasNoUnsignedWrap() && OBO->hasNoSignedWrap())
+    return None;
+
   SCEV::NoWrapFlags Flags = SCEV::NoWrapFlags::FlagAnyWrap;
 
   if (OBO->hasNoUnsignedWrap())
@@ -2318,13 +2335,10 @@
 
   bool Deduced = false;
 
-  if (OBO->hasNoUnsignedWrap() && OBO->hasNoSignedWrap())
-    return {Flags, Deduced};
-
   if (OBO->getOpcode() != Instruction::Add &&
       OBO->getOpcode() != Instruction::Sub &&
       OBO->getOpcode() != Instruction::Mul)
-    return {Flags, Deduced};
+    return None;
 
   const SCEV *LHS = getSCEV(OBO->getOperand(0));
   const SCEV *RHS = getSCEV(OBO->getOperand(1));
@@ -2343,7 +2357,9 @@
     Deduced = true;
   }
 
-  return {Flags, Deduced};
+  if (Deduced)
+    return Flags;
+  return None;
 }
 
 // We're trying to construct a SCEV of type `Type' with `Ops' as operands and
@@ -3106,12 +3122,13 @@
         // TODO: There are some cases where this transformation is not
         // profitable; for example, Add = (C0 + X) * Y + Z.  Maybe the scope of
         // this transformation should be narrowed down.
-        if (Add->getNumOperands() == 2 && containsConstantInAddMulChain(Add))
-          return getAddExpr(getMulExpr(LHSC, Add->getOperand(0),
-                                       SCEV::FlagAnyWrap, Depth + 1),
-                            getMulExpr(LHSC, Add->getOperand(1),
-                                       SCEV::FlagAnyWrap, Depth + 1),
-                            SCEV::FlagAnyWrap, Depth + 1);
+        if (Add->getNumOperands() == 2 && containsConstantInAddMulChain(Add)) {
+          const SCEV *LHS = getMulExpr(LHSC, Add->getOperand(0),
+                                       SCEV::FlagAnyWrap, Depth + 1);
+          const SCEV *RHS = getMulExpr(LHSC, Add->getOperand(1),
+                                       SCEV::FlagAnyWrap, Depth + 1);
+          return getAddExpr(LHS, RHS, SCEV::FlagAnyWrap, Depth + 1);
+        }
 
       if (Ops[0]->isAllOnesValue()) {
         // If we have a mul by -1 of an add, try distributing the -1 among the
@@ -3466,12 +3483,8 @@
       }
 
       // Fold if both operands are constant.
-      if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
-        Constant *LHSCV = LHSC->getValue();
-        Constant *RHSCV = RHSC->getValue();
-        return getConstant(cast<ConstantInt>(ConstantExpr::getUDiv(LHSCV,
-                                                                   RHSCV)));
-      }
+      if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS))
+        return getConstant(LHSC->getAPInt().udiv(RHSC->getAPInt()));
     }
   }
 
@@ -4002,6 +4015,59 @@
 
 } // namespace
 
+/// Return true if V is poison given that AssumedPoison is already poison.
+static bool impliesPoison(const SCEV *AssumedPoison, const SCEV *S) {
+  // The only way poison may be introduced in a SCEV expression is from a
+  // poison SCEVUnknown (ConstantExprs are also represented as SCEVUnknown,
+  // not SCEVConstant). Notably, nowrap flags in SCEV nodes can *not*
+  // introduce poison -- they encode guaranteed, non-speculated knowledge.
+  //
+  // Additionally, all SCEV nodes propagate poison from inputs to outputs,
+  // with the notable exception of umin_seq, where only poison from the first
+  // operand is (unconditionally) propagated.
+  struct SCEVPoisonCollector {
+    bool LookThroughSeq;
+    SmallPtrSet<const SCEV *, 4> MaybePoison;
+    SCEVPoisonCollector(bool LookThroughSeq) : LookThroughSeq(LookThroughSeq) {}
+
+    bool follow(const SCEV *S) {
+      // TODO: We can always follow the first operand, but the SCEVTraversal
+      // API doesn't support this.
+      if (!LookThroughSeq && isa<SCEVSequentialMinMaxExpr>(S))
+        return false;
+
+      if (auto *SU = dyn_cast<SCEVUnknown>(S)) {
+        if (!isGuaranteedNotToBePoison(SU->getValue()))
+          MaybePoison.insert(S);
+      }
+      return true;
+    }
+    bool isDone() const { return false; }
+  };
+
+  // First collect all SCEVs that might result in AssumedPoison to be poison.
+  // We need to look through umin_seq here, because we want to find all SCEVs
+  // that *might* result in poison, not only those that are *required* to.
+  SCEVPoisonCollector PC1(/* LookThroughSeq */ true);
+  visitAll(AssumedPoison, PC1);
+
+  // AssumedPoison is never poison. As the assumption is false, the implication
+  // is true. Don't bother walking the other SCEV in this case.
+  if (PC1.MaybePoison.empty())
+    return true;
+
+  // Collect all SCEVs in S that, if poison, *will* result in S being poison
+  // as well. We cannot look through umin_seq here, as its argument only *may*
+  // make the result poison.
+  SCEVPoisonCollector PC2(/* LookThroughSeq */ false);
+  visitAll(S, PC2);
+
+  // Make sure that no matter which SCEV in PC1.MaybePoison is actually poison,
+  // it will also make S poison by being part of PC2.MaybePoison.
+  return all_of(PC1.MaybePoison,
+                [&](const SCEV *S) { return PC2.MaybePoison.contains(S); });
+}
+
 const SCEV *
 ScalarEvolution::getSequentialMinMaxExpr(SCEVTypes Kind,
                                          SmallVectorImpl<const SCEV *> &Ops) {
@@ -4010,11 +4076,6 @@
   assert(!Ops.empty() && "Cannot get empty (u|s)(min|max)!");
   if (Ops.size() == 1)
     return Ops[0];
-  if (Ops.size() == 2 &&
-      any_of(Ops, [](const SCEV *Op) { return isa<SCEVConstant>(Op); }))
-    return getMinMaxExpr(
-        SCEVSequentialMinMaxExpr::getEquivalentNonSequentialSCEVType(Kind),
-        Ops);
 #ifndef NDEBUG
   Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
   for (unsigned i = 1, e = Ops.size(); i != e; ++i) {
@@ -4063,6 +4124,39 @@
       return getSequentialMinMaxExpr(Kind, Ops);
   }
 
+  const SCEV *SaturationPoint;
+  ICmpInst::Predicate Pred;
+  switch (Kind) {
+  case scSequentialUMinExpr:
+    SaturationPoint = getZero(Ops[0]->getType());
+    Pred = ICmpInst::ICMP_ULE;
+    break;
+  default:
+    llvm_unreachable("Not a sequential min/max type.");
+  }
+
+  for (unsigned i = 1, e = Ops.size(); i != e; ++i) {
+    // We can replace %x umin_seq %y with %x umin %y if either:
+    //  * %y being poison implies %x is also poison.
+    //  * %x cannot be the saturating value (e.g. zero for umin).
+    if (::impliesPoison(Ops[i], Ops[i - 1]) ||
+        isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_NE, Ops[i - 1],
+                                        SaturationPoint)) {
+      SmallVector<const SCEV *> SeqOps = {Ops[i - 1], Ops[i]};
+      Ops[i - 1] = getMinMaxExpr(
+          SCEVSequentialMinMaxExpr::getEquivalentNonSequentialSCEVType(Kind),
+          SeqOps);
+      Ops.erase(Ops.begin() + i);
+      return getSequentialMinMaxExpr(Kind, Ops);
+    }
+    // Fold %x umin_seq %y to %x if %x ule %y.
+    // TODO: We might be able to prove the predicate for a later operand.
+    if (isKnownViaNonRecursiveReasoning(Pred, Ops[i - 1], Ops[i])) {
+      Ops.erase(Ops.begin() + i);
+      return getSequentialMinMaxExpr(Kind, Ops);
+    }
+  }
+
   // Okay, it looks like we really DO need an expr.  Check to see if we
   // already have one, otherwise create a new one.
   FoldingSetNodeID ID;
@@ -4265,39 +4359,20 @@
   return FoundAddRec;
 }
 
-/// Try to split a SCEVAddExpr into a pair of {SCEV, ConstantInt}.
-/// If \p S is a SCEVAddExpr and is composed of a sub SCEV S' and an
-/// offset I, then return {S', I}, else return {\p S, nullptr}.
-static std::pair<const SCEV *, ConstantInt *> splitAddExpr(const SCEV *S) {
-  const auto *Add = dyn_cast<SCEVAddExpr>(S);
-  if (!Add)
-    return {S, nullptr};
-
-  if (Add->getNumOperands() != 2)
-    return {S, nullptr};
-
-  auto *ConstOp = dyn_cast<SCEVConstant>(Add->getOperand(0));
-  if (!ConstOp)
-    return {S, nullptr};
-
-  return {Add->getOperand(1), ConstOp->getValue()};
-}
-
 /// Return the ValueOffsetPair set for \p S. \p S can be represented
 /// by the value and offset from any ValueOffsetPair in the set.
-ScalarEvolution::ValueOffsetPairSetVector *
-ScalarEvolution::getSCEVValues(const SCEV *S) {
+ArrayRef<Value *> ScalarEvolution::getSCEVValues(const SCEV *S) {
   ExprValueMapType::iterator SI = ExprValueMap.find_as(S);
   if (SI == ExprValueMap.end())
-    return nullptr;
+    return None;
 #ifndef NDEBUG
   if (VerifySCEVMap) {
     // Check there is no dangling Value in the set returned.
-    for (const auto &VE : SI->second)
-      assert(ValueExprMap.count(VE.first));
+    for (Value *V : SI->second)
+      assert(ValueExprMap.count(V));
   }
 #endif
-  return &SI->second;
+  return SI->second.getArrayRef();
 }
 
 /// Erase Value from ValueExprMap and ExprValueMap. ValueExprMap.erase(V)
@@ -4306,20 +4381,11 @@
 void ScalarEvolution::eraseValueFromMap(Value *V) {
   ValueExprMapType::iterator I = ValueExprMap.find_as(V);
   if (I != ValueExprMap.end()) {
-    const SCEV *S = I->second;
-    // Remove {V, 0} from the set of ExprValueMap[S]
-    if (auto *SV = getSCEVValues(S))
-      SV->remove({V, nullptr});
-
-    // Remove {V, Offset} from the set of ExprValueMap[Stripped]
-    const SCEV *Stripped;
-    ConstantInt *Offset;
-    std::tie(Stripped, Offset) = splitAddExpr(S);
-    if (Offset != nullptr) {
-      if (auto *SV = getSCEVValues(Stripped))
-        SV->remove({V, Offset});
-    }
-    ValueExprMap.erase(V);
+    auto EVIt = ExprValueMap.find(I->second);
+    bool Removed = EVIt->second.remove(V);
+    (void) Removed;
+    assert(Removed && "Value not in ExprValueMap?");
+    ValueExprMap.erase(I);
   }
 }
 
@@ -4330,7 +4396,7 @@
   auto It = ValueExprMap.find_as(V);
   if (It == ValueExprMap.end()) {
     ValueExprMap.insert({SCEVCallbackVH(V, this), S});
-    ExprValueMap[S].insert({V, nullptr});
+    ExprValueMap[S].insert(V);
   }
 }
 
@@ -4339,33 +4405,9 @@
 const SCEV *ScalarEvolution::getSCEV(Value *V) {
   assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
 
-  const SCEV *S = getExistingSCEV(V);
-  if (S == nullptr) {
-    S = createSCEV(V);
-    // During PHI resolution, it is possible to create two SCEVs for the same
-    // V, so it is needed to double check whether V->S is inserted into
-    // ValueExprMap before insert S->{V, 0} into ExprValueMap.
-    std::pair<ValueExprMapType::iterator, bool> Pair =
-        ValueExprMap.insert({SCEVCallbackVH(V, this), S});
-    if (Pair.second) {
-      ExprValueMap[S].insert({V, nullptr});
-
-      // If S == Stripped + Offset, add Stripped -> {V, Offset} into
-      // ExprValueMap.
-      const SCEV *Stripped = S;
-      ConstantInt *Offset = nullptr;
-      std::tie(Stripped, Offset) = splitAddExpr(S);
-      // If stripped is SCEVUnknown, don't bother to save
-      // Stripped -> {V, offset}. It doesn't simplify and sometimes even
-      // increase the complexity of the expansion code.
-      // If V is GetElementPtrInst, don't save Stripped -> {V, offset}
-      // because it may generate add/sub instead of GEP in SCEV expansion.
-      if (Offset != nullptr && !isa<SCEVUnknown>(Stripped) &&
-          !isa<GetElementPtrInst>(V))
-        ExprValueMap[Stripped].insert({V, Offset});
-    }
-  }
-  return S;
+  if (const SCEV *S = getExistingSCEV(V))
+    return S;
+  return createSCEVIter(V);
 }
 
 const SCEV *ScalarEvolution::getExistingSCEV(Value *V) {
@@ -4620,7 +4662,7 @@
 
   // Find the max type first.
   Type *MaxType = nullptr;
-  for (auto *S : Ops)
+  for (const auto *S : Ops)
     if (MaxType)
       MaxType = getWiderType(MaxType, S->getType());
     else
@@ -4629,7 +4671,7 @@
 
   // Extend all ops to max type.
   SmallVector<const SCEV *, 2> PromotedOps;
-  for (auto *S : Ops)
+  for (const auto *S : Ops)
     PromotedOps.push_back(getNoopOrZeroExtend(S, MaxType));
 
   // Generate umin.
@@ -4795,16 +4837,16 @@
         SelectInst *SI = cast<SelectInst>(I);
         Optional<const SCEV *> Res =
             compareWithBackedgeCondition(SI->getCondition());
-        if (Res.hasValue()) {
-          bool IsOne = cast<SCEVConstant>(Res.getValue())->getValue()->isOne();
+        if (Res) {
+          bool IsOne = cast<SCEVConstant>(Res.value())->getValue()->isOne();
           Result = SE.getSCEV(IsOne ? SI->getTrueValue() : SI->getFalseValue());
         }
         break;
       }
       default: {
         Optional<const SCEV *> Res = compareWithBackedgeCondition(I);
-        if (Res.hasValue())
-          Result = Res.getValue();
+        if (Res)
+          Result = Res.value();
         break;
       }
       }
@@ -5067,6 +5109,9 @@
       // Instcombine turns add of signmask into xor as a strength reduction step.
       if (RHSC->getValue().isSignMask())
         return BinaryOp(Instruction::Add, Op->getOperand(0), Op->getOperand(1));
+    // Binary `xor` is a bit-wise `add`.
+    if (V->getType()->isIntegerTy(1))
+      return BinaryOp(Instruction::Add, Op->getOperand(0), Op->getOperand(1));
     return BinaryOp(Op);
 
   case Instruction::LShr:
@@ -5489,8 +5534,8 @@
     return true;
 
   auto areExprsEqual = [&](const SCEV *Expr1, const SCEV *Expr2) -> bool {
-    if (Expr1 != Expr2 && !Preds.implies(SE.getEqualPredicate(Expr1, Expr2)) &&
-        !Preds.implies(SE.getEqualPredicate(Expr2, Expr1)))
+    if (Expr1 != Expr2 && !Preds->implies(SE.getEqualPredicate(Expr1, Expr2)) &&
+        !Preds->implies(SE.getEqualPredicate(Expr2, Expr1)))
       return false;
     return true;
   };
@@ -5872,31 +5917,53 @@
   if (const SCEV *S = createNodeFromSelectLikePHI(PN))
     return S;
 
-  // If the PHI has a single incoming value, follow that value, unless the
-  // PHI's incoming blocks are in a different loop, in which case doing so
-  // risks breaking LCSSA form. Instcombine would normally zap these, but
-  // it doesn't have DominatorTree information, so it may miss cases.
-  if (Value *V = SimplifyInstruction(PN, {getDataLayout(), &TLI, &DT, &AC}))
-    if (LI.replacementPreservesLCSSAForm(PN, V))
-      return getSCEV(V);
+  if (Value *V = simplifyInstruction(PN, {getDataLayout(), &TLI, &DT, &AC}))
+    return getSCEV(V);
 
   // If it's not a loop phi, we can't handle it yet.
   return getUnknown(PN);
 }
 
-const SCEV *ScalarEvolution::createNodeForSelectOrPHI(Instruction *I,
-                                                      Value *Cond,
-                                                      Value *TrueVal,
-                                                      Value *FalseVal) {
-  // Handle "constant" branch or select. This can occur for instance when a
-  // loop pass transforms an inner loop and moves on to process the outer loop.
-  if (auto *CI = dyn_cast<ConstantInt>(Cond))
-    return getSCEV(CI->isOne() ? TrueVal : FalseVal);
+bool SCEVMinMaxExprContains(const SCEV *Root, const SCEV *OperandToFind,
+                            SCEVTypes RootKind) {
+  struct FindClosure {
+    const SCEV *OperandToFind;
+    const SCEVTypes RootKind; // Must be a sequential min/max expression.
+    const SCEVTypes NonSequentialRootKind; // Non-seq variant of RootKind.
 
+    bool Found = false;
+
+    bool canRecurseInto(SCEVTypes Kind) const {
+      // We can only recurse into the SCEV expression of the same effective type
+      // as the type of our root SCEV expression, and into zero-extensions.
+      return RootKind == Kind || NonSequentialRootKind == Kind ||
+             scZeroExtend == Kind;
+    };
+
+    FindClosure(const SCEV *OperandToFind, SCEVTypes RootKind)
+        : OperandToFind(OperandToFind), RootKind(RootKind),
+          NonSequentialRootKind(
+              SCEVSequentialMinMaxExpr::getEquivalentNonSequentialSCEVType(
+                  RootKind)) {}
+
+    bool follow(const SCEV *S) {
+      Found = S == OperandToFind;
+
+      return !isDone() && canRecurseInto(S->getSCEVType());
+    }
+
+    bool isDone() const { return Found; }
+  };
+
+  FindClosure FC(OperandToFind, RootKind);
+  visitAll(Root, FC);
+  return FC.Found;
+}
+
+const SCEV *ScalarEvolution::createNodeForSelectOrPHIInstWithICmpInstCond(
+    Instruction *I, ICmpInst *Cond, Value *TrueVal, Value *FalseVal) {
   // Try to match some simple smax or umax patterns.
-  auto *ICI = dyn_cast<ICmpInst>(Cond);
-  if (!ICI)
-    return getUnknown(I);
+  auto *ICI = Cond;
 
   Value *LHS = ICI->getOperand(0);
   Value *RHS = ICI->getOperand(1);
@@ -5958,31 +6025,36 @@
     }
     break;
   case ICmpInst::ICMP_NE:
-    // n != 0 ? n+x : 1+x  ->  umax(n, 1)+x
-    if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType()) &&
-        isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
-      const SCEV *One = getOne(I->getType());
-      const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType());
-      const SCEV *LA = getSCEV(TrueVal);
-      const SCEV *RA = getSCEV(FalseVal);
-      const SCEV *LDiff = getMinusSCEV(LA, LS);
-      const SCEV *RDiff = getMinusSCEV(RA, One);
-      if (LDiff == RDiff)
-        return getAddExpr(getUMaxExpr(One, LS), LDiff);
-    }
-    break;
+    // x != 0 ? x+y : C+y  ->  x == 0 ? C+y : x+y
+    std::swap(TrueVal, FalseVal);
+    LLVM_FALLTHROUGH;
   case ICmpInst::ICMP_EQ:
-    // n == 0 ? 1+x : n+x  ->  umax(n, 1)+x
+    // x == 0 ? C+y : x+y  ->  umax(x, C)+y   iff C u<= 1
     if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType()) &&
         isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
-      const SCEV *One = getOne(I->getType());
-      const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType());
-      const SCEV *LA = getSCEV(TrueVal);
-      const SCEV *RA = getSCEV(FalseVal);
-      const SCEV *LDiff = getMinusSCEV(LA, One);
-      const SCEV *RDiff = getMinusSCEV(RA, LS);
-      if (LDiff == RDiff)
-        return getAddExpr(getUMaxExpr(One, LS), LDiff);
+      const SCEV *X = getNoopOrZeroExtend(getSCEV(LHS), I->getType());
+      const SCEV *TrueValExpr = getSCEV(TrueVal);    // C+y
+      const SCEV *FalseValExpr = getSCEV(FalseVal);  // x+y
+      const SCEV *Y = getMinusSCEV(FalseValExpr, X); // y = (x+y)-x
+      const SCEV *C = getMinusSCEV(TrueValExpr, Y);  // C = (C+y)-y
+      if (isa<SCEVConstant>(C) && cast<SCEVConstant>(C)->getAPInt().ule(1))
+        return getAddExpr(getUMaxExpr(X, C), Y);
+    }
+    // x == 0 ? 0 : umin    (..., x, ...)  ->  umin_seq(x, umin    (...))
+    // x == 0 ? 0 : umin_seq(..., x, ...)  ->  umin_seq(x, umin_seq(...))
+    // x == 0 ? 0 : umin    (..., umin_seq(..., x, ...), ...)
+    //                    ->  umin_seq(x, umin (..., umin_seq(...), ...))
+    if (isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero() &&
+        isa<ConstantInt>(TrueVal) && cast<ConstantInt>(TrueVal)->isZero()) {
+      const SCEV *X = getSCEV(LHS);
+      while (auto *ZExt = dyn_cast<SCEVZeroExtendExpr>(X))
+        X = ZExt->getOperand();
+      if (getTypeSizeInBits(X->getType()) <= getTypeSizeInBits(I->getType())) {
+        const SCEV *FalseValExpr = getSCEV(FalseVal);
+        if (SCEVMinMaxExprContains(FalseValExpr, X, scSequentialUMinExpr))
+          return getUMinExpr(getNoopOrZeroExtend(X, I->getType()), FalseValExpr,
+                             /*Sequential=*/true);
+      }
     }
     break;
   default:
@@ -5992,12 +6064,95 @@
   return getUnknown(I);
 }
 
+static Optional<const SCEV *>
+createNodeForSelectViaUMinSeq(ScalarEvolution *SE, const SCEV *CondExpr,
+                              const SCEV *TrueExpr, const SCEV *FalseExpr) {
+  assert(CondExpr->getType()->isIntegerTy(1) &&
+         TrueExpr->getType() == FalseExpr->getType() &&
+         TrueExpr->getType()->isIntegerTy(1) &&
+         "Unexpected operands of a select.");
+
+  // i1 cond ? i1 x : i1 C  -->  C + (i1  cond ? (i1 x - i1 C) : i1 0)
+  //                        -->  C + (umin_seq  cond, x - C)
+  //
+  // i1 cond ? i1 C : i1 x  -->  C + (i1  cond ? i1 0 : (i1 x - i1 C))
+  //                        -->  C + (i1 ~cond ? (i1 x - i1 C) : i1 0)
+  //                        -->  C + (umin_seq ~cond, x - C)
+
+  // FIXME: while we can't legally model the case where both of the hands
+  // are fully variable, we only require that the *difference* is constant.
+  if (!isa<SCEVConstant>(TrueExpr) && !isa<SCEVConstant>(FalseExpr))
+    return None;
+
+  const SCEV *X, *C;
+  if (isa<SCEVConstant>(TrueExpr)) {
+    CondExpr = SE->getNotSCEV(CondExpr);
+    X = FalseExpr;
+    C = TrueExpr;
+  } else {
+    X = TrueExpr;
+    C = FalseExpr;
+  }
+  return SE->getAddExpr(C, SE->getUMinExpr(CondExpr, SE->getMinusSCEV(X, C),
+                                           /*Sequential=*/true));
+}
+
+static Optional<const SCEV *> createNodeForSelectViaUMinSeq(ScalarEvolution *SE,
+                                                            Value *Cond,
+                                                            Value *TrueVal,
+                                                            Value *FalseVal) {
+  if (!isa<ConstantInt>(TrueVal) && !isa<ConstantInt>(FalseVal))
+    return None;
+
+  const auto *SECond = SE->getSCEV(Cond);
+  const auto *SETrue = SE->getSCEV(TrueVal);
+  const auto *SEFalse = SE->getSCEV(FalseVal);
+  return createNodeForSelectViaUMinSeq(SE, SECond, SETrue, SEFalse);
+}
+
+const SCEV *ScalarEvolution::createNodeForSelectOrPHIViaUMinSeq(
+    Value *V, Value *Cond, Value *TrueVal, Value *FalseVal) {
+  assert(Cond->getType()->isIntegerTy(1) && "Select condition is not an i1?");
+  assert(TrueVal->getType() == FalseVal->getType() &&
+         V->getType() == TrueVal->getType() &&
+         "Types of select hands and of the result must match.");
+
+  // For now, only deal with i1-typed `select`s.
+  if (!V->getType()->isIntegerTy(1))
+    return getUnknown(V);
+
+  if (Optional<const SCEV *> S =
+          createNodeForSelectViaUMinSeq(this, Cond, TrueVal, FalseVal))
+    return *S;
+
+  return getUnknown(V);
+}
+
+const SCEV *ScalarEvolution::createNodeForSelectOrPHI(Value *V, Value *Cond,
+                                                      Value *TrueVal,
+                                                      Value *FalseVal) {
+  // Handle "constant" branch or select. This can occur for instance when a
+  // loop pass transforms an inner loop and moves on to process the outer loop.
+  if (auto *CI = dyn_cast<ConstantInt>(Cond))
+    return getSCEV(CI->isOne() ? TrueVal : FalseVal);
+
+  if (auto *I = dyn_cast<Instruction>(V)) {
+    if (auto *ICI = dyn_cast<ICmpInst>(Cond)) {
+      const SCEV *S = createNodeForSelectOrPHIInstWithICmpInstCond(
+          I, ICI, TrueVal, FalseVal);
+      if (!isa<SCEVUnknown>(S))
+        return S;
+    }
+  }
+
+  return createNodeForSelectOrPHIViaUMinSeq(V, Cond, TrueVal, FalseVal);
+}
+
 /// Expand GEP instructions into add and multiply operations. This allows them
 /// to be analyzed by regular SCEV code.
 const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
-  // Don't attempt to analyze GEPs over unsized objects.
-  if (!GEP->getSourceElementType()->isSized())
-    return getUnknown(GEP);
+  assert(GEP->getSourceElementType()->isSized() &&
+         "GEP source element type must be sized");
 
   SmallVector<const SCEV *, 4> IndexExprs;
   for (Value *Index : GEP->indices())
@@ -6430,9 +6585,9 @@
 
     // Check if the IR explicitly contains !range metadata.
     Optional<ConstantRange> MDRange = GetRangeFromMetadata(U->getValue());
-    if (MDRange.hasValue())
-      ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue(),
-                                                            RangeType);
+    if (MDRange)
+      ConservativeResult =
+          ConservativeResult.intersectWith(MDRange.value(), RangeType);
 
     // Use facts about recurrences in the underlying IR.  Note that add
     // recurrences are AddRecExprs and thus don't hit this path.  This
@@ -6481,7 +6636,7 @@
       // Make sure that we do not run over cycled Phis.
       if (PendingPhiRanges.insert(Phi).second) {
         ConstantRange RangeFromOps(BitWidth, /*isFullSet=*/false);
-        for (auto &Op : Phi->operands()) {
+        for (const auto &Op : Phi->operands()) {
           auto OpRange = getRangeRef(getSCEV(Op), SignHint);
           RangeFromOps = RangeFromOps.unionWith(OpRange);
           // No point to continue if we already have a full set.
@@ -6496,6 +6651,13 @@
       }
     }
 
+    // vscale can't be equal to zero
+    if (const auto *II = dyn_cast<IntrinsicInst>(U->getValue()))
+      if (II->getIntrinsicID() == Intrinsic::vscale) {
+        ConstantRange Disallowed = APInt::getZero(BitWidth);
+        ConservativeResult = ConservativeResult.difference(Disallowed);
+      }
+
     return setRange(U, SignHint, std::move(ConservativeResult));
   }
 
@@ -6719,7 +6881,7 @@
       FalseValue = *FalseVal;
 
       // Re-apply the cast we peeled off earlier
-      if (CastOp.hasValue())
+      if (CastOp)
         switch (*CastOp) {
         default:
           llvm_unreachable("Unknown SCEV cast type!");
@@ -6818,13 +6980,13 @@
       Ops.push_back(S);
   };
   if (auto *S2 = dyn_cast<SCEVCastExpr>(S))
-    for (auto *Op : S2->operands())
+    for (const auto *Op : S2->operands())
       InsertUnique(Op);
   else if (auto *S2 = dyn_cast<SCEVNAryExpr>(S))
-    for (auto *Op : S2->operands())
+    for (const auto *Op : S2->operands())
       InsertUnique(Op);
   else if (auto *S2 = dyn_cast<SCEVUDivExpr>(S))
-    for (auto *Op : S2->operands())
+    for (const auto *Op : S2->operands())
       InsertUnique(Op);
 }
 
@@ -6846,7 +7008,7 @@
     Worklist.push_back(S);
   };
 
-  for (auto *S : Ops)
+  for (const auto *S : Ops)
     pushOp(S);
 
   const Instruction *Bound = nullptr;
@@ -6858,7 +7020,7 @@
     } else {
       SmallVector<const SCEV *, 4> Ops;
       collectUniqueOps(S, Ops);
-      for (auto *Op : Ops)
+      for (const auto *Op : Ops)
         pushOp(Op);
     }
   }
@@ -6962,7 +7124,7 @@
   while (!PoisonStack.empty() && !LatchControlDependentOnPoison) {
     const Instruction *Poison = PoisonStack.pop_back_val();
 
-    for (auto *PoisonUser : Poison->users()) {
+    for (const auto *PoisonUser : Poison->users()) {
       if (propagatesPoison(cast<Operator>(PoisonUser))) {
         if (Pushed.insert(cast<Instruction>(PoisonUser)).second)
           PoisonStack.push_back(cast<Instruction>(PoisonUser));
@@ -7020,6 +7182,241 @@
   return isFinite(L) || (isMustProgress(L) && loopHasNoSideEffects(L));
 }
 
+const SCEV *ScalarEvolution::createSCEVIter(Value *V) {
+  // Worklist item with a Value and a bool indicating whether all operands have
+  // been visited already.
+  using PointerTy = PointerIntPair<Value *, 1, bool>;
+  SmallVector<PointerTy> Stack;
+
+  Stack.emplace_back(V, true);
+  Stack.emplace_back(V, false);
+  while (!Stack.empty()) {
+    auto E = Stack.pop_back_val();
+    Value *CurV = E.getPointer();
+
+    if (getExistingSCEV(CurV))
+      continue;
+
+    SmallVector<Value *> Ops;
+    const SCEV *CreatedSCEV = nullptr;
+    // If all operands have been visited already, create the SCEV.
+    if (E.getInt()) {
+      CreatedSCEV = createSCEV(CurV);
+    } else {
+      // Otherwise get the operands we need to create SCEV's for before creating
+      // the SCEV for CurV. If the SCEV for CurV can be constructed trivially,
+      // just use it.
+      CreatedSCEV = getOperandsToCreate(CurV, Ops);
+    }
+
+    if (CreatedSCEV) {
+      insertValueToMap(CurV, CreatedSCEV);
+    } else {
+      // Queue CurV for SCEV creation, followed by its's operands which need to
+      // be constructed first.
+      Stack.emplace_back(CurV, true);
+      for (Value *Op : Ops)
+        Stack.emplace_back(Op, false);
+    }
+  }
+
+  return getExistingSCEV(V);
+}
+
+const SCEV *
+ScalarEvolution::getOperandsToCreate(Value *V, SmallVectorImpl<Value *> &Ops) {
+  if (!isSCEVable(V->getType()))
+    return getUnknown(V);
+
+  if (Instruction *I = dyn_cast<Instruction>(V)) {
+    // Don't attempt to analyze instructions in blocks that aren't
+    // reachable. Such instructions don't matter, and they aren't required
+    // to obey basic rules for definitions dominating uses which this
+    // analysis depends on.
+    if (!DT.isReachableFromEntry(I->getParent()))
+      return getUnknown(PoisonValue::get(V->getType()));
+  } else if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
+    return getConstant(CI);
+  else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+    if (!GA->isInterposable()) {
+      Ops.push_back(GA->getAliasee());
+      return nullptr;
+    }
+    return getUnknown(V);
+  } else if (!isa<ConstantExpr>(V))
+    return getUnknown(V);
+
+  Operator *U = cast<Operator>(V);
+  if (auto BO = MatchBinaryOp(U, DT)) {
+    bool IsConstArg = isa<ConstantInt>(BO->RHS);
+    switch (BO->Opcode) {
+    case Instruction::Add: {
+      // For additions and multiplications, traverse add/mul chains for which we
+      // can potentially create a single SCEV, to reduce the number of
+      // get{Add,Mul}Expr calls.
+      do {
+        if (BO->Op) {
+          if (BO->Op != V && getExistingSCEV(BO->Op)) {
+            Ops.push_back(BO->Op);
+            break;
+          }
+        }
+        Ops.push_back(BO->RHS);
+        auto NewBO = MatchBinaryOp(BO->LHS, DT);
+        if (!NewBO || (NewBO->Opcode != Instruction::Add &&
+                       NewBO->Opcode != Instruction::Sub)) {
+          Ops.push_back(BO->LHS);
+          break;
+        }
+        BO = NewBO;
+      } while (true);
+      return nullptr;
+    }
+
+    case Instruction::Mul: {
+      do {
+        if (BO->Op) {
+          if (BO->Op != V && getExistingSCEV(BO->Op)) {
+            Ops.push_back(BO->Op);
+            break;
+          }
+        }
+        Ops.push_back(BO->RHS);
+        auto NewBO = MatchBinaryOp(BO->LHS, DT);
+        if (!NewBO || NewBO->Opcode != Instruction::Mul) {
+          Ops.push_back(BO->LHS);
+          break;
+        }
+        BO = NewBO;
+      } while (true);
+      return nullptr;
+    }
+    case Instruction::Sub:
+    case Instruction::UDiv:
+    case Instruction::URem:
+      break;
+    case Instruction::AShr:
+    case Instruction::Shl:
+    case Instruction::Xor:
+      if (!IsConstArg)
+        return nullptr;
+      break;
+    case Instruction::And:
+    case Instruction::Or:
+      if (!IsConstArg && BO->LHS->getType()->isIntegerTy(1))
+        return nullptr;
+      break;
+    case Instruction::LShr:
+      return getUnknown(V);
+    default:
+      llvm_unreachable("Unhandled binop");
+      break;
+    }
+
+    Ops.push_back(BO->LHS);
+    Ops.push_back(BO->RHS);
+    return nullptr;
+  }
+
+  switch (U->getOpcode()) {
+  case Instruction::Trunc:
+  case Instruction::ZExt:
+  case Instruction::SExt:
+  case Instruction::PtrToInt:
+    Ops.push_back(U->getOperand(0));
+    return nullptr;
+
+  case Instruction::BitCast:
+    if (isSCEVable(U->getType()) && isSCEVable(U->getOperand(0)->getType())) {
+      Ops.push_back(U->getOperand(0));
+      return nullptr;
+    }
+    return getUnknown(V);
+
+  case Instruction::SDiv:
+  case Instruction::SRem:
+    Ops.push_back(U->getOperand(0));
+    Ops.push_back(U->getOperand(1));
+    return nullptr;
+
+  case Instruction::GetElementPtr:
+    assert(cast<GEPOperator>(U)->getSourceElementType()->isSized() &&
+           "GEP source element type must be sized");
+    for (Value *Index : U->operands())
+      Ops.push_back(Index);
+    return nullptr;
+
+  case Instruction::IntToPtr:
+    return getUnknown(V);
+
+  case Instruction::PHI:
+    // Keep constructing SCEVs' for phis recursively for now.
+    return nullptr;
+
+  case Instruction::Select: {
+    // Check if U is a select that can be simplified to a SCEVUnknown.
+    auto CanSimplifyToUnknown = [this, U]() {
+      if (U->getType()->isIntegerTy(1) || isa<ConstantInt>(U->getOperand(0)))
+        return false;
+
+      auto *ICI = dyn_cast<ICmpInst>(U->getOperand(0));
+      if (!ICI)
+        return false;
+      Value *LHS = ICI->getOperand(0);
+      Value *RHS = ICI->getOperand(1);
+      if (ICI->getPredicate() == CmpInst::ICMP_EQ ||
+          ICI->getPredicate() == CmpInst::ICMP_NE) {
+        if (!(isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()))
+          return true;
+      } else if (getTypeSizeInBits(LHS->getType()) >
+                 getTypeSizeInBits(U->getType()))
+        return true;
+      return false;
+    };
+    if (CanSimplifyToUnknown())
+      return getUnknown(U);
+
+    for (Value *Inc : U->operands())
+      Ops.push_back(Inc);
+    return nullptr;
+    break;
+  }
+  case Instruction::Call:
+  case Instruction::Invoke:
+    if (Value *RV = cast<CallBase>(U)->getReturnedArgOperand()) {
+      Ops.push_back(RV);
+      return nullptr;
+    }
+
+    if (auto *II = dyn_cast<IntrinsicInst>(U)) {
+      switch (II->getIntrinsicID()) {
+      case Intrinsic::abs:
+        Ops.push_back(II->getArgOperand(0));
+        return nullptr;
+      case Intrinsic::umax:
+      case Intrinsic::umin:
+      case Intrinsic::smax:
+      case Intrinsic::smin:
+      case Intrinsic::usub_sat:
+      case Intrinsic::uadd_sat:
+        Ops.push_back(II->getArgOperand(0));
+        Ops.push_back(II->getArgOperand(1));
+        return nullptr;
+      case Intrinsic::start_loop_iterations:
+      case Intrinsic::annotation:
+      case Intrinsic::ptr_annotation:
+        Ops.push_back(II->getArgOperand(0));
+        return nullptr;
+      default:
+        break;
+      }
+    }
+    break;
+  }
+
+  return nullptr;
+}
+
 const SCEV *ScalarEvolution::createSCEV(Value *V) {
   if (!isSCEVable(V->getType()))
     return getUnknown(V);
@@ -7030,7 +7427,7 @@
     // to obey basic rules for definitions dominating uses which this
     // analysis depends on.
     if (!DT.isReachableFromEntry(I->getParent()))
-      return getUnknown(UndefValue::get(V->getType()));
+      return getUnknown(PoisonValue::get(V->getType()));
   } else if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
     return getConstant(CI);
   else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
@@ -7038,6 +7435,9 @@
   else if (!isa<ConstantExpr>(V))
     return getUnknown(V);
 
+  const SCEV *LHS;
+  const SCEV *RHS;
+
   Operator *U = cast<Operator>(V);
   if (auto BO = MatchBinaryOp(U, DT)) {
     switch (BO->Opcode) {
@@ -7103,8 +7503,9 @@
 
           SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(BO->Op);
           if (Flags != SCEV::FlagAnyWrap) {
-            MulOps.push_back(
-                getMulExpr(getSCEV(BO->LHS), getSCEV(BO->RHS), Flags));
+            LHS = getSCEV(BO->LHS);
+            RHS = getSCEV(BO->RHS);
+            MulOps.push_back(getMulExpr(LHS, RHS, Flags));
             break;
           }
         }
@@ -7121,14 +7522,20 @@
       return getMulExpr(MulOps);
     }
     case Instruction::UDiv:
-      return getUDivExpr(getSCEV(BO->LHS), getSCEV(BO->RHS));
+      LHS = getSCEV(BO->LHS);
+      RHS = getSCEV(BO->RHS);
+      return getUDivExpr(LHS, RHS);
     case Instruction::URem:
-      return getURemExpr(getSCEV(BO->LHS), getSCEV(BO->RHS));
+      LHS = getSCEV(BO->LHS);
+      RHS = getSCEV(BO->RHS);
+      return getURemExpr(LHS, RHS);
     case Instruction::Sub: {
       SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
       if (BO->Op)
         Flags = getNoWrapFlagsFromUB(BO->Op);
-      return getMinusSCEV(getSCEV(BO->LHS), getSCEV(BO->RHS), Flags);
+      LHS = getSCEV(BO->LHS);
+      RHS = getSCEV(BO->RHS);
+      return getMinusSCEV(LHS, RHS, Flags);
     }
     case Instruction::And:
       // For an expression like x&255 that merely masks off the high bits,
@@ -7180,6 +7587,12 @@
               MulCount);
         }
       }
+      // Binary `and` is a bit-wise `umin`.
+      if (BO->LHS->getType()->isIntegerTy(1)) {
+        LHS = getSCEV(BO->LHS);
+        RHS = getSCEV(BO->RHS);
+        return getUMinExpr(LHS, RHS);
+      }
       break;
 
     case Instruction::Or:
@@ -7199,6 +7612,12 @@
                             (SCEV::NoWrapFlags)(SCEV::FlagNUW | SCEV::FlagNSW));
         }
       }
+      // Binary `or` is a bit-wise `umax`.
+      if (BO->LHS->getType()->isIntegerTy(1)) {
+        LHS = getSCEV(BO->LHS);
+        RHS = getSCEV(BO->RHS);
+        return getUMaxExpr(LHS, RHS);
+      }
       break;
 
     case Instruction::Xor:
@@ -7266,9 +7685,9 @@
             Flags = (SCEV::NoWrapFlags)(Flags | SCEV::FlagNUW);
         }
 
-        Constant *X = ConstantInt::get(
+        ConstantInt *X = ConstantInt::get(
             getContext(), APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
-        return getMulExpr(getSCEV(BO->LHS), getSCEV(X), Flags);
+        return getMulExpr(getSCEV(BO->LHS), getConstant(X), Flags);
       }
       break;
 
@@ -7394,14 +7813,8 @@
     return createNodeForPHI(cast<PHINode>(U));
 
   case Instruction::Select:
-    // U can also be a select constant expr, which let fall through.  Since
-    // createNodeForSelect only works for a condition that is an `ICmpInst`, and
-    // constant expressions cannot have instructions as operands, we'd have
-    // returned getUnknown for a select constant expressions anyway.
-    if (isa<Instruction>(U))
-      return createNodeForSelectOrPHI(cast<Instruction>(U), U->getOperand(0),
-                                      U->getOperand(1), U->getOperand(2));
-    break;
+    return createNodeForSelectOrPHI(U, U->getOperand(0), U->getOperand(1),
+                                    U->getOperand(2));
 
   case Instruction::Call:
   case Instruction::Invoke:
@@ -7415,17 +7828,21 @@
             getSCEV(II->getArgOperand(0)),
             /*IsNSW=*/cast<ConstantInt>(II->getArgOperand(1))->isOne());
       case Intrinsic::umax:
-        return getUMaxExpr(getSCEV(II->getArgOperand(0)),
-                           getSCEV(II->getArgOperand(1)));
+        LHS = getSCEV(II->getArgOperand(0));
+        RHS = getSCEV(II->getArgOperand(1));
+        return getUMaxExpr(LHS, RHS);
       case Intrinsic::umin:
-        return getUMinExpr(getSCEV(II->getArgOperand(0)),
-                           getSCEV(II->getArgOperand(1)));
+        LHS = getSCEV(II->getArgOperand(0));
+        RHS = getSCEV(II->getArgOperand(1));
+        return getUMinExpr(LHS, RHS);
       case Intrinsic::smax:
-        return getSMaxExpr(getSCEV(II->getArgOperand(0)),
-                           getSCEV(II->getArgOperand(1)));
+        LHS = getSCEV(II->getArgOperand(0));
+        RHS = getSCEV(II->getArgOperand(1));
+        return getSMaxExpr(LHS, RHS);
       case Intrinsic::smin:
-        return getSMinExpr(getSCEV(II->getArgOperand(0)),
-                           getSCEV(II->getArgOperand(1)));
+        LHS = getSCEV(II->getArgOperand(0));
+        RHS = getSCEV(II->getArgOperand(1));
+        return getSMinExpr(LHS, RHS);
       case Intrinsic::usub_sat: {
         const SCEV *X = getSCEV(II->getArgOperand(0));
         const SCEV *Y = getSCEV(II->getArgOperand(1));
@@ -7439,8 +7856,10 @@
         return getAddExpr(ClampedX, Y, SCEV::FlagNUW);
       }
       case Intrinsic::start_loop_iterations:
-        // A start_loop_iterations is just equivalent to the first operand for
-        // SCEV purposes.
+      case Intrinsic::annotation:
+      case Intrinsic::ptr_annotation:
+        // A start_loop_iterations or llvm.annotation or llvm.prt.annotation is
+        // just eqivalent to the first operand for SCEV purposes.
         return getSCEV(II->getArgOperand(0));
       default:
         break;
@@ -7640,7 +8059,7 @@
       Res = Multiple;
     Res = (unsigned)GreatestCommonDivisor64(*Res, Multiple);
   }
-  return Res.getValueOr(1);
+  return Res.value_or(1);
 }
 
 unsigned ScalarEvolution::getSmallConstantTripMultiple(const Loop *L,
@@ -7708,7 +8127,7 @@
 
 const SCEV *
 ScalarEvolution::getPredicatedBackedgeTakenCount(const Loop *L,
-                                                 SCEVUnionPredicate &Preds) {
+                                                 SmallVector<const SCEVPredicate *, 4> &Preds) {
   return getPredicatedBackedgeTakenInfo(L).getExact(L, this, &Preds);
 }
 
@@ -7870,7 +8289,6 @@
     if (LoopUsersItr != LoopUsers.end()) {
       ToForget.insert(ToForget.end(), LoopUsersItr->second.begin(),
                 LoopUsersItr->second.end());
-      LoopUsers.erase(LoopUsersItr);
     }
 
     // Drop information about expressions based on loop-header PHIs.
@@ -7900,9 +8318,7 @@
 }
 
 void ScalarEvolution::forgetTopmostLoop(const Loop *L) {
-  while (Loop *Parent = L->getParentLoop())
-    L = Parent;
-  forgetLoop(L);
+  forgetLoop(L->getOutermostLoop());
 }
 
 void ScalarEvolution::forgetValue(Value *V) {
@@ -7944,7 +8360,7 @@
 /// the relevant loop exiting block using getExact(ExitingBlock, SE).
 const SCEV *
 ScalarEvolution::BackedgeTakenInfo::getExact(const Loop *L, ScalarEvolution *SE,
-                                             SCEVUnionPredicate *Preds) const {
+                                             SmallVector<const SCEVPredicate *, 4> *Preds) const {
   // If any exits were not computable, the loop is not computable.
   if (!isComplete() || ExitNotTaken.empty())
     return SE->getCouldNotCompute();
@@ -7957,7 +8373,7 @@
   // All exiting blocks we have gathered dominate loop's latch, so exact trip
   // count is simply a minimum out of all these calculated exit counts.
   SmallVector<const SCEV *, 2> Ops;
-  for (auto &ENT : ExitNotTaken) {
+  for (const auto &ENT : ExitNotTaken) {
     const SCEV *BECount = ENT.ExactNotTaken;
     assert(BECount != SE->getCouldNotCompute() && "Bad exit SCEV!");
     assert(SE->DT.dominates(ENT.ExitingBlock, Latch) &&
@@ -7966,21 +8382,25 @@
 
     Ops.push_back(BECount);
 
-    if (Preds && !ENT.hasAlwaysTruePredicate())
-      Preds->add(ENT.Predicate.get());
+    if (Preds)
+      for (const auto *P : ENT.Predicates)
+        Preds->push_back(P);
 
     assert((Preds || ENT.hasAlwaysTruePredicate()) &&
            "Predicate should be always true!");
   }
 
-  return SE->getUMinFromMismatchedTypes(Ops);
+  // If an earlier exit exits on the first iteration (exit count zero), then
+  // a later poison exit count should not propagate into the result. This are
+  // exactly the semantics provided by umin_seq.
+  return SE->getUMinFromMismatchedTypes(Ops, /* Sequential */ true);
 }
 
 /// Get the exact not taken count for this loop exit.
 const SCEV *
 ScalarEvolution::BackedgeTakenInfo::getExact(const BasicBlock *ExitingBlock,
                                              ScalarEvolution *SE) const {
-  for (auto &ENT : ExitNotTaken)
+  for (const auto &ENT : ExitNotTaken)
     if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePredicate())
       return ENT.ExactNotTaken;
 
@@ -7989,7 +8409,7 @@
 
 const SCEV *ScalarEvolution::BackedgeTakenInfo::getConstantMax(
     const BasicBlock *ExitingBlock, ScalarEvolution *SE) const {
-  for (auto &ENT : ExitNotTaken)
+  for (const auto &ENT : ExitNotTaken)
     if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePredicate())
       return ENT.MaxNotTaken;
 
@@ -8048,8 +8468,8 @@
   assert((isa<SCEVCouldNotCompute>(MaxNotTaken) ||
           isa<SCEVConstant>(MaxNotTaken)) &&
          "No point in having a non-constant max backedge taken count!");
-  for (auto *PredSet : PredSetList)
-    for (auto *P : *PredSet)
+  for (const auto *PredSet : PredSetList)
+    for (const auto *P : *PredSet)
       addPredicate(P);
   assert((isa<SCEVCouldNotCompute>(E) || !E->getType()->isPointerTy()) &&
          "Backedge count should be int");
@@ -8082,16 +8502,8 @@
       [&](const EdgeExitInfo &EEI) {
         BasicBlock *ExitBB = EEI.first;
         const ExitLimit &EL = EEI.second;
-        if (EL.Predicates.empty())
-          return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, EL.MaxNotTaken,
-                                  nullptr);
-
-        std::unique_ptr<SCEVUnionPredicate> Predicate(new SCEVUnionPredicate);
-        for (auto *Pred : EL.Predicates)
-          Predicate->add(Pred);
-
         return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, EL.MaxNotTaken,
-                                std::move(Predicate));
+                                EL.Predicates);
       });
   assert((isa<SCEVCouldNotCompute>(ConstantMax) ||
           isa<SCEVConstant>(ConstantMax)) &&
@@ -8385,11 +8797,6 @@
       BECount = getUMinFromMismatchedTypes(
           EL0.ExactNotTaken, EL1.ExactNotTaken,
           /*Sequential=*/!isa<BinaryOperator>(ExitCond));
-
-      // If EL0.ExactNotTaken was zero and ExitCond was a short-circuit form,
-      // it should have been simplified to zero (see the condition (3) above)
-      assert(!isa<BinaryOperator>(ExitCond) || !EL0.ExactNotTaken->isZero() ||
-             BECount->isZero());
     }
     if (EL0.MaxNotTaken == getCouldNotCompute())
       MaxBECount = EL1.MaxNotTaken;
@@ -8470,7 +8877,8 @@
       ControlsExit && loopHasNoAbnormalExits(L) && loopIsFiniteByAssumption(L);
   // Simplify the operands before analyzing them.
   (void)SimplifyICmpOperands(Pred, LHS, RHS, /*Depth=*/0,
-                             ControllingFiniteLoop);
+                             (EnableFiniteLoopControl ? ControllingFiniteLoop
+                                                     : false));
 
   // If we have a comparison of a chrec against a constant, try to use value
   // ranges to answer this query.
@@ -8683,7 +9091,7 @@
 
         // and the kind of shift should be match the kind of shift we peeled
         // off, if any.
-        (!PostShiftOpCode.hasValue() || *PostShiftOpCode == OpCodeOut);
+        (!PostShiftOpCode || *PostShiftOpCode == OpCodeOut);
   };
 
   PHINode *PN;
@@ -8871,13 +9279,6 @@
     Operands[i] = C;
   }
 
-  if (CmpInst *CI = dyn_cast<CmpInst>(I))
-    return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
-                                           Operands[1], DL, TLI);
-  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
-    if (!LI->isVolatile())
-      return ConstantFoldLoadFromConstPtr(Operands[0], LI->getType(), DL);
-  }
   return ConstantFoldInstOperands(I, Operands, DL, TLI);
 }
 
@@ -9121,67 +9522,44 @@
   }
   case scAddExpr: {
     const SCEVAddExpr *SA = cast<SCEVAddExpr>(V);
-    if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) {
-      if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) {
-        unsigned AS = PTy->getAddressSpace();
-        Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS);
-        C = ConstantExpr::getBitCast(C, DestPtrTy);
+    Constant *C = nullptr;
+    for (const SCEV *Op : SA->operands()) {
+      Constant *OpC = BuildConstantFromSCEV(Op);
+      if (!OpC)
+        return nullptr;
+      if (!C) {
+        C = OpC;
+        continue;
       }
-      for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) {
-        Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i));
-        if (!C2)
-          return nullptr;
-
-        // First pointer!
-        if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) {
-          unsigned AS = C2->getType()->getPointerAddressSpace();
-          std::swap(C, C2);
-          Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS);
-          // The offsets have been converted to bytes.  We can add bytes to an
-          // i8* by GEP with the byte count in the first index.
-          C = ConstantExpr::getBitCast(C, DestPtrTy);
-        }
-
-        // Don't bother trying to sum two pointers. We probably can't
-        // statically compute a load that results from it anyway.
-        if (C2->getType()->isPointerTy())
-          return nullptr;
-
-        if (C->getType()->isPointerTy()) {
-          C = ConstantExpr::getGetElementPtr(Type::getInt8Ty(C->getContext()),
-                                             C, C2);
-        } else {
-          C = ConstantExpr::getAdd(C, C2);
-        }
+      assert(!C->getType()->isPointerTy() &&
+             "Can only have one pointer, and it must be last");
+      if (auto *PT = dyn_cast<PointerType>(OpC->getType())) {
+        // The offsets have been converted to bytes.  We can add bytes to an
+        // i8* by GEP with the byte count in the first index.
+        Type *DestPtrTy =
+            Type::getInt8PtrTy(PT->getContext(), PT->getAddressSpace());
+        OpC = ConstantExpr::getBitCast(OpC, DestPtrTy);
+        C = ConstantExpr::getGetElementPtr(Type::getInt8Ty(C->getContext()),
+                                           OpC, C);
+      } else {
+        C = ConstantExpr::getAdd(C, OpC);
       }
-      return C;
     }
-    return nullptr;
+    return C;
   }
   case scMulExpr: {
     const SCEVMulExpr *SM = cast<SCEVMulExpr>(V);
-    if (Constant *C = BuildConstantFromSCEV(SM->getOperand(0))) {
-      // Don't bother with pointers at all.
-      if (C->getType()->isPointerTy())
+    Constant *C = nullptr;
+    for (const SCEV *Op : SM->operands()) {
+      assert(!Op->getType()->isPointerTy() && "Can't multiply pointers");
+      Constant *OpC = BuildConstantFromSCEV(Op);
+      if (!OpC)
         return nullptr;
-      for (unsigned i = 1, e = SM->getNumOperands(); i != e; ++i) {
-        Constant *C2 = BuildConstantFromSCEV(SM->getOperand(i));
-        if (!C2 || C2->getType()->isPointerTy())
-          return nullptr;
-        C = ConstantExpr::getMul(C, C2);
-      }
-      return C;
+      C = C ? ConstantExpr::getMul(C, OpC) : OpC;
     }
-    return nullptr;
+    return C;
   }
-  case scUDivExpr: {
-    const SCEVUDivExpr *SU = cast<SCEVUDivExpr>(V);
-    if (Constant *LHS = BuildConstantFromSCEV(SU->getLHS()))
-      if (Constant *RHS = BuildConstantFromSCEV(SU->getRHS()))
-        if (LHS->getType() == RHS->getType())
-          return ConstantExpr::getUDiv(LHS, RHS);
-    return nullptr;
-  }
+  case scUDivExpr:
   case scSMaxExpr:
   case scUMaxExpr:
   case scSMinExpr:
@@ -9297,15 +9675,7 @@
         if (MadeImprovement) {
           Constant *C = nullptr;
           const DataLayout &DL = getDataLayout();
-          if (const CmpInst *CI = dyn_cast<CmpInst>(I))
-            C = ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
-                                                Operands[1], DL, &TLI);
-          else if (const LoadInst *Load = dyn_cast<LoadInst>(I)) {
-            if (!Load->isVolatile())
-              C = ConstantFoldLoadFromConstPtr(Operands[0], Load->getType(),
-                                               DL);
-          } else
-            C = ConstantFoldInstOperands(I, Operands, DL, &TLI);
+          C = ConstantFoldInstOperands(I, Operands, DL, &TLI);
           if (!C) return V;
           return getSCEV(C);
         }
@@ -9535,15 +9905,15 @@
 /// (b) if neither X nor Y exist, return None,
 /// (c) if exactly one of X and Y exists, return that value.
 static Optional<APInt> MinOptional(Optional<APInt> X, Optional<APInt> Y) {
-  if (X.hasValue() && Y.hasValue()) {
+  if (X && Y) {
     unsigned W = std::max(X->getBitWidth(), Y->getBitWidth());
-    APInt XW = X->sextOrSelf(W);
-    APInt YW = Y->sextOrSelf(W);
+    APInt XW = X->sext(W);
+    APInt YW = Y->sext(W);
     return XW.slt(YW) ? *X : *Y;
   }
-  if (!X.hasValue() && !Y.hasValue())
+  if (!X && !Y)
     return None;
-  return X.hasValue() ? *X : *Y;
+  return X ? *X : *Y;
 }
 
 /// Helper function to truncate an optional APInt to a given BitWidth.
@@ -9558,7 +9928,7 @@
 /// equation are BW+1 bits wide (to avoid truncation when converting from
 /// the addrec to the equation).
 static Optional<APInt> TruncIfPossible(Optional<APInt> X, unsigned BitWidth) {
-  if (!X.hasValue())
+  if (!X)
     return None;
   unsigned W = X->getBitWidth();
   if (BitWidth > 1 && BitWidth < W && X->isIntN(BitWidth))
@@ -9585,13 +9955,13 @@
   APInt A, B, C, M;
   unsigned BitWidth;
   auto T = GetQuadraticEquation(AddRec);
-  if (!T.hasValue())
+  if (!T)
     return None;
 
   std::tie(A, B, C, M, BitWidth) = *T;
   LLVM_DEBUG(dbgs() << __func__ << ": solving for unsigned overflow\n");
   Optional<APInt> X = APIntOps::SolveQuadraticEquationWrap(A, B, C, BitWidth+1);
-  if (!X.hasValue())
+  if (!X)
     return None;
 
   ConstantInt *CX = ConstantInt::get(SE.getContext(), *X);
@@ -9627,7 +9997,7 @@
   APInt A, B, C, M;
   unsigned BitWidth;
   auto T = GetQuadraticEquation(AddRec);
-  if (!T.hasValue())
+  if (!T)
     return None;
 
   // Be careful about the return value: there can be two reasons for not
@@ -9672,7 +10042,7 @@
     // If SolveQuadraticEquationWrap returns None, it means that there can
     // be a solution, but the function failed to find it. We cannot treat it
     // as "no solution".
-    if (!SO.hasValue() || !UO.hasValue())
+    if (!SO || !UO)
       return { None, false };
 
     // Check the smaller value first to see if it leaves the range.
@@ -9690,8 +10060,8 @@
 
   std::tie(A, B, C, M, BitWidth) = *T;
   // Lower bound is inclusive, subtract 1 to represent the exiting value.
-  APInt Lower = Range.getLower().sextOrSelf(A.getBitWidth()) - 1;
-  APInt Upper = Range.getUpper().sextOrSelf(A.getBitWidth());
+  APInt Lower = Range.getLower().sext(A.getBitWidth()) - 1;
+  APInt Upper = Range.getUpper().sext(A.getBitWidth());
   auto SL = SolveForBoundary(Lower);
   auto SU = SolveForBoundary(Upper);
   // If any of the solutions was unknown, no meaninigful conclusions can
@@ -9776,7 +10146,7 @@
     // value at this index.  When solving for "X*X != 5", for example, we
     // should not accept a root of 2.
     if (auto S = SolveQuadraticAddRecExact(AddRec, *this)) {
-      const auto *R = cast<SCEVConstant>(getConstant(S.getValue()));
+      const auto *R = cast<SCEVConstant>(getConstant(*S));
       return ExitLimit(R, R, false, Predicates);
     }
     return getCouldNotCompute();
@@ -10187,8 +10557,8 @@
 
   // Domination relationship must be a linear order on collected loops.
 #ifndef NDEBUG
-  for (auto *L1 : LoopsUsed)
-    for (auto *L2 : LoopsUsed)
+  for (const auto *L1 : LoopsUsed)
+    for (const auto *L2 : LoopsUsed)
       assert((DT.dominates(L1->getHeader(), L2->getHeader()) ||
               DT.dominates(L2->getHeader(), L1->getHeader())) &&
              "Domination relationship is not a linear order");
@@ -10296,8 +10666,8 @@
     auto ResultSwapped =
         getMonotonicPredicateTypeImpl(LHS, ICmpInst::getSwappedPredicate(Pred));
 
-    assert(ResultSwapped.hasValue() && "should be able to analyze both!");
-    assert(ResultSwapped.getValue() != Result.getValue() &&
+    assert(ResultSwapped && "should be able to analyze both!");
+    assert(ResultSwapped.value() != Result.value() &&
            "monotonicity should flip as we flip the predicate");
   }
 #endif
@@ -10479,17 +10849,27 @@
     return false;
 
   if (Pred == CmpInst::ICMP_NE) {
-    if (CheckRanges(getSignedRange(LHS), getSignedRange(RHS)) ||
-        CheckRanges(getUnsignedRange(LHS), getUnsignedRange(RHS)))
+    auto SL = getSignedRange(LHS);
+    auto SR = getSignedRange(RHS);
+    if (CheckRanges(SL, SR))
+      return true;
+    auto UL = getUnsignedRange(LHS);
+    auto UR = getUnsignedRange(RHS);
+    if (CheckRanges(UL, UR))
       return true;
     auto *Diff = getMinusSCEV(LHS, RHS);
     return !isa<SCEVCouldNotCompute>(Diff) && isKnownNonZero(Diff);
   }
 
-  if (CmpInst::isSigned(Pred))
-    return CheckRanges(getSignedRange(LHS), getSignedRange(RHS));
+  if (CmpInst::isSigned(Pred)) {
+    auto SL = getSignedRange(LHS);
+    auto SR = getSignedRange(RHS);
+    return CheckRanges(SL, SR);
+  }
 
-  return CheckRanges(getUnsignedRange(LHS), getUnsignedRange(RHS));
+  auto UL = getUnsignedRange(LHS);
+  auto UR = getUnsignedRange(RHS);
+  return CheckRanges(UL, UR);
 }
 
 bool ScalarEvolution::isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred,
@@ -10632,8 +11012,10 @@
                                              ICmpInst::Predicate Pred,
                                              const SCEV *LHS, const SCEV *RHS) {
   // Interpret a null as meaning no loop, where there is obviously no guard
-  // (interprocedural conditions notwithstanding).
-  if (!L) return true;
+  // (interprocedural conditions notwithstanding). Do not bother about
+  // unreachable loops.
+  if (!L || !DT.isReachableFromEntry(L->getHeader()))
+    return true;
 
   if (VerifyIR)
     assert(!verifyFunction(*L->getHeader()->getParent(), &dbgs()) &&
@@ -10690,12 +11072,6 @@
       return true;
   }
 
-  // If the loop is not reachable from the entry block, we risk running into an
-  // infinite loop as we walk up into the dom tree.  These loops do not matter
-  // anyway, so we just return a conservative answer when we see them.
-  if (!DT.isReachableFromEntry(L->getHeader()))
-    return false;
-
   if (isImpliedViaGuard(Latch, Pred, LHS, RHS))
     return true;
 
@@ -10741,6 +11117,9 @@
                                                      ICmpInst::Predicate Pred,
                                                      const SCEV *LHS,
                                                      const SCEV *RHS) {
+  // Do not bother proving facts for unreachable code.
+  if (!DT.isReachableFromEntry(BB))
+    return true;
   if (VerifyIR)
     assert(!verifyFunction(*BB->getParent(), &dbgs()) &&
            "This cannot be done on broken IR!");
@@ -10774,20 +11153,6 @@
       return true;
   }
 
-  // Try to prove (Pred, LHS, RHS) using isImpliedViaGuard.
-  auto ProveViaGuard = [&](const BasicBlock *Block) {
-    if (isImpliedViaGuard(Block, Pred, LHS, RHS))
-      return true;
-    if (ProvingStrictComparison) {
-      auto ProofFn = [&](ICmpInst::Predicate P) {
-        return isImpliedViaGuard(Block, P, LHS, RHS);
-      };
-      if (SplitAndProve(ProofFn))
-        return true;
-    }
-    return false;
-  };
-
   // Try to prove (Pred, LHS, RHS) using isImpliedCond.
   auto ProveViaCond = [&](const Value *Condition, bool Inverse) {
     const Instruction *CtxI = &BB->front();
@@ -10814,17 +11179,13 @@
     PredBB = BB->getSinglePredecessor();
   for (std::pair<const BasicBlock *, const BasicBlock *> Pair(PredBB, BB);
        Pair.first; Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) {
-    if (ProveViaGuard(Pair.first))
-      return true;
-
-    const BranchInst *LoopEntryPredicate =
+    const BranchInst *BlockEntryPredicate =
         dyn_cast<BranchInst>(Pair.first->getTerminator());
-    if (!LoopEntryPredicate ||
-        LoopEntryPredicate->isUnconditional())
+    if (!BlockEntryPredicate || BlockEntryPredicate->isUnconditional())
       continue;
 
-    if (ProveViaCond(LoopEntryPredicate->getCondition(),
-                     LoopEntryPredicate->getSuccessor(0) != Pair.second))
+    if (ProveViaCond(BlockEntryPredicate->getCondition(),
+                     BlockEntryPredicate->getSuccessor(0) != Pair.second))
       return true;
   }
 
@@ -10840,6 +11201,15 @@
       return true;
   }
 
+  // Check conditions due to any @llvm.experimental.guard intrinsics.
+  auto *GuardDecl = F.getParent()->getFunction(
+      Intrinsic::getName(Intrinsic::experimental_guard));
+  if (GuardDecl)
+    for (const auto *GU : GuardDecl->users())
+      if (const auto *Guard = dyn_cast<IntrinsicInst>(GU))
+        if (Guard->getFunction() == BB->getParent() && DT.dominates(Guard, BB))
+          if (ProveViaCond(Guard->getArgOperand(0), false))
+            return true;
   return false;
 }
 
@@ -12596,7 +12966,7 @@
 
   if (isQuadratic()) {
     if (auto S = SolveQuadraticAddRecRange(this, Range, SE))
-      return SE.getConstant(S.getValue());
+      return SE.getConstant(*S);
   }
 
   return SE.getCouldNotCompute();
@@ -12636,6 +13006,15 @@
   });
 }
 
+// Return true when S contains a value that is a nullptr.
+bool ScalarEvolution::containsErasedValue(const SCEV *S) const {
+  return SCEVExprContains(S, [](const SCEV *S) {
+    if (const auto *SU = dyn_cast<SCEVUnknown>(S))
+      return SU->getValue() == nullptr;
+    return false;
+  });
+}
+
 /// Return the size of an element read or written by Inst.
 const SCEV *ScalarEvolution::getElementSize(Instruction *Inst) {
   Type *Ty;
@@ -12820,12 +13199,13 @@
   L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
   OS << ": ";
 
-  SCEVUnionPredicate Pred;
-  auto PBT = SE->getPredicatedBackedgeTakenCount(L, Pred);
+  SmallVector<const SCEVPredicate *, 4> Preds;
+  auto PBT = SE->getPredicatedBackedgeTakenCount(L, Preds);
   if (!isa<SCEVCouldNotCompute>(PBT)) {
     OS << "Predicated backedge-taken count is " << *PBT << "\n";
     OS << " Predicates:\n";
-    Pred.print(OS, 4);
+    for (const auto *P : Preds)
+      P->print(OS, 4);
   } else {
     OS << "Unpredictable predicated backedge-taken count. ";
   }
@@ -12901,7 +13281,7 @@
           }
 
           bool First = true;
-          for (auto *Iter = L; Iter; Iter = Iter->getParentLoop()) {
+          for (const auto *Iter = L; Iter; Iter = Iter->getParentLoop()) {
             if (First) {
               OS << "\t\t" "LoopDispositions: { ";
               First = false;
@@ -12913,7 +13293,7 @@
             OS << ": " << loopDispositionToStr(SE.getLoopDisposition(SV, Iter));
           }
 
-          for (auto *InnerL : depth_first(L)) {
+          for (const auto *InnerL : depth_first(L)) {
             if (InnerL == L)
               continue;
             if (First) {
@@ -12993,7 +13373,7 @@
 
     // This recurrence is variant w.r.t. L if any of its operands
     // are variant.
-    for (auto *Op : AR->operands())
+    for (const auto *Op : AR->operands())
       if (!isLoopInvariant(Op, L))
         return LoopVariant;
 
@@ -13008,7 +13388,7 @@
   case scSMinExpr:
   case scSequentialUMinExpr: {
     bool HasVarying = false;
-    for (auto *Op : cast<SCEVNAryExpr>(S)->operands()) {
+    for (const auto *Op : cast<SCEVNAryExpr>(S)->operands()) {
       LoopDisposition D = getLoopDisposition(Op, L);
       if (D == LoopVariant)
         return LoopVariant;
@@ -13174,12 +13554,12 @@
     const SCEV *Curr = Worklist.pop_back_val();
     auto Users = SCEVUsers.find(Curr);
     if (Users != SCEVUsers.end())
-      for (auto *User : Users->second)
+      for (const auto *User : Users->second)
         if (ToForget.insert(User).second)
           Worklist.push_back(User);
   }
 
-  for (auto *S : ToForget)
+  for (const auto *S : ToForget)
     forgetMemoizedResultsImpl(S);
 
   for (auto I = PredicatedSCEVRewrites.begin();
@@ -13202,12 +13582,10 @@
 
   auto ExprIt = ExprValueMap.find(S);
   if (ExprIt != ExprValueMap.end()) {
-    for (auto &ValueAndOffset : ExprIt->second) {
-      if (ValueAndOffset.second == nullptr) {
-        auto ValueIt = ValueExprMap.find_as(ValueAndOffset.first);
-        if (ValueIt != ValueExprMap.end())
-          ValueExprMap.erase(ValueIt);
-      }
+    for (Value *V : ExprIt->second) {
+      auto ValueIt = ValueExprMap.find_as(V);
+      if (ValueIt != ValueExprMap.end())
+        ValueExprMap.erase(ValueIt);
     }
     ExprValueMap.erase(ExprIt);
   }
@@ -13258,6 +13636,43 @@
   SCEVTraversal<FindUsedLoops>(F).visitAll(S);
 }
 
+void ScalarEvolution::getReachableBlocks(
+    SmallPtrSetImpl<BasicBlock *> &Reachable, Function &F) {
+  SmallVector<BasicBlock *> Worklist;
+  Worklist.push_back(&F.getEntryBlock());
+  while (!Worklist.empty()) {
+    BasicBlock *BB = Worklist.pop_back_val();
+    if (!Reachable.insert(BB).second)
+      continue;
+
+    Value *Cond;
+    BasicBlock *TrueBB, *FalseBB;
+    if (match(BB->getTerminator(), m_Br(m_Value(Cond), m_BasicBlock(TrueBB),
+                                        m_BasicBlock(FalseBB)))) {
+      if (auto *C = dyn_cast<ConstantInt>(Cond)) {
+        Worklist.push_back(C->isOne() ? TrueBB : FalseBB);
+        continue;
+      }
+
+      if (auto *Cmp = dyn_cast<ICmpInst>(Cond)) {
+        const SCEV *L = getSCEV(Cmp->getOperand(0));
+        const SCEV *R = getSCEV(Cmp->getOperand(1));
+        if (isKnownPredicateViaConstantRanges(Cmp->getPredicate(), L, R)) {
+          Worklist.push_back(TrueBB);
+          continue;
+        }
+        if (isKnownPredicateViaConstantRanges(Cmp->getInversePredicate(), L,
+                                              R)) {
+          Worklist.push_back(FalseBB);
+          continue;
+        }
+      }
+    }
+
+    append_range(Worklist, successors(BB));
+  }
+}
+
 void ScalarEvolution::verify() const {
   ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
   ScalarEvolution SE2(F, TLI, AC, DT, LI);
@@ -13282,13 +13697,44 @@
   };
 
   SCEVMapper SCM(SE2);
+  SmallPtrSet<BasicBlock *, 16> ReachableBlocks;
+  SE2.getReachableBlocks(ReachableBlocks, F);
+
+  auto GetDelta = [&](const SCEV *Old, const SCEV *New) -> const SCEV * {
+    if (containsUndefs(Old) || containsUndefs(New)) {
+      // SCEV treats "undef" as an unknown but consistent value (i.e. it does
+      // not propagate undef aggressively).  This means we can (and do) fail
+      // verification in cases where a transform makes a value go from "undef"
+      // to "undef+1" (say).  The transform is fine, since in both cases the
+      // result is "undef", but SCEV thinks the value increased by 1.
+      return nullptr;
+    }
+
+    // Unless VerifySCEVStrict is set, we only compare constant deltas.
+    const SCEV *Delta = SE2.getMinusSCEV(Old, New);
+    if (!VerifySCEVStrict && !isa<SCEVConstant>(Delta))
+      return nullptr;
+
+    return Delta;
+  };
 
   while (!LoopStack.empty()) {
     auto *L = LoopStack.pop_back_val();
     llvm::append_range(LoopStack, *L);
 
-    auto *CurBECount = SCM.visit(
-        const_cast<ScalarEvolution *>(this)->getBackedgeTakenCount(L));
+    // Only verify BECounts in reachable loops. For an unreachable loop,
+    // any BECount is legal.
+    if (!ReachableBlocks.contains(L->getHeader()))
+      continue;
+
+    // Only verify cached BECounts. Computing new BECounts may change the
+    // results of subsequent SCEV uses.
+    auto It = BackedgeTakenCounts.find(L);
+    if (It == BackedgeTakenCounts.end())
+      continue;
+
+    auto *CurBECount =
+        SCM.visit(It->second.getExact(L, const_cast<ScalarEvolution *>(this)));
     auto *NewBECount = SE2.getBackedgeTakenCount(L);
 
     if (CurBECount == SE2.getCouldNotCompute() ||
@@ -13301,16 +13747,6 @@
       continue;
     }
 
-    if (containsUndefs(CurBECount) || containsUndefs(NewBECount)) {
-      // SCEV treats "undef" as an unknown but consistent value (i.e. it does
-      // not propagate undef aggressively).  This means we can (and do) fail
-      // verification in cases where a transform makes the trip count of a loop
-      // go from "undef" to "undef+1" (say).  The transform is fine, since in
-      // both cases the loop iterates "undef" times, but SCEV thinks we
-      // increased the trip count of the loop by 1 incorrectly.
-      continue;
-    }
-
     if (SE.getTypeSizeInBits(CurBECount->getType()) >
         SE.getTypeSizeInBits(NewBECount->getType()))
       NewBECount = SE2.getZeroExtendExpr(NewBECount, CurBECount->getType());
@@ -13318,10 +13754,8 @@
              SE.getTypeSizeInBits(NewBECount->getType()))
       CurBECount = SE2.getZeroExtendExpr(CurBECount, NewBECount->getType());
 
-    const SCEV *Delta = SE2.getMinusSCEV(CurBECount, NewBECount);
-
-    // Unless VerifySCEVStrict is set, we only compare constant deltas.
-    if ((VerifySCEVStrict || isa<SCEVConstant>(Delta)) && !Delta->isZero()) {
+    const SCEV *Delta = GetDelta(CurBECount, NewBECount);
+    if (Delta && !Delta->isZero()) {
       dbgs() << "Trip Count for " << *L << " Changed!\n";
       dbgs() << "Old: " << *CurBECount << "\n";
       dbgs() << "New: " << *NewBECount << "\n";
@@ -13335,12 +13769,10 @@
   SmallVector<Loop *, 32> Worklist(LI.begin(), LI.end());
   while (!Worklist.empty()) {
     Loop *L = Worklist.pop_back_val();
-    if (ValidLoops.contains(L))
-      continue;
-    ValidLoops.insert(L);
-    Worklist.append(L->begin(), L->end());
+    if (ValidLoops.insert(L).second)
+      Worklist.append(L->begin(), L->end());
   }
-  for (auto &KV : ValueExprMap) {
+  for (const auto &KV : ValueExprMap) {
 #ifndef NDEBUG
     // Check for SCEV expressions referencing invalid/deleted loops.
     if (auto *AR = dyn_cast<SCEVAddRecExpr>(KV.second)) {
@@ -13351,27 +13783,38 @@
 
     // Check that the value is also part of the reverse map.
     auto It = ExprValueMap.find(KV.second);
-    if (It == ExprValueMap.end() || !It->second.contains({KV.first, nullptr})) {
+    if (It == ExprValueMap.end() || !It->second.contains(KV.first)) {
       dbgs() << "Value " << *KV.first
              << " is in ValueExprMap but not in ExprValueMap\n";
       std::abort();
     }
+
+    if (auto *I = dyn_cast<Instruction>(&*KV.first)) {
+      if (!ReachableBlocks.contains(I->getParent()))
+        continue;
+      const SCEV *OldSCEV = SCM.visit(KV.second);
+      const SCEV *NewSCEV = SE2.getSCEV(I);
+      const SCEV *Delta = GetDelta(OldSCEV, NewSCEV);
+      if (Delta && !Delta->isZero()) {
+        dbgs() << "SCEV for value " << *I << " changed!\n"
+               << "Old: " << *OldSCEV << "\n"
+               << "New: " << *NewSCEV << "\n"
+               << "Delta: " << *Delta << "\n";
+        std::abort();
+      }
+    }
   }
 
   for (const auto &KV : ExprValueMap) {
-    for (const auto &ValueAndOffset : KV.second) {
-      if (ValueAndOffset.second != nullptr)
-        continue;
-
-      auto It = ValueExprMap.find_as(ValueAndOffset.first);
+    for (Value *V : KV.second) {
+      auto It = ValueExprMap.find_as(V);
       if (It == ValueExprMap.end()) {
-        dbgs() << "Value " << *ValueAndOffset.first
+        dbgs() << "Value " << *V
                << " is in ExprValueMap but not in ValueExprMap\n";
         std::abort();
       }
       if (It->second != KV.first) {
-        dbgs() << "Value " << *ValueAndOffset.first
-               << " mapped to " << *It->second
+        dbgs() << "Value " << *V << " mapped to " << *It->second
                << " rather than " << *KV.first << "\n";
         std::abort();
       }
@@ -13537,18 +13980,25 @@
 
 const SCEVPredicate *ScalarEvolution::getEqualPredicate(const SCEV *LHS,
                                                         const SCEV *RHS) {
+  return getComparePredicate(ICmpInst::ICMP_EQ, LHS, RHS);
+}
+
+const SCEVPredicate *
+ScalarEvolution::getComparePredicate(const ICmpInst::Predicate Pred,
+                                     const SCEV *LHS, const SCEV *RHS) {
   FoldingSetNodeID ID;
   assert(LHS->getType() == RHS->getType() &&
          "Type mismatch between LHS and RHS");
   // Unique this node based on the arguments
-  ID.AddInteger(SCEVPredicate::P_Equal);
+  ID.AddInteger(SCEVPredicate::P_Compare);
+  ID.AddInteger(Pred);
   ID.AddPointer(LHS);
   ID.AddPointer(RHS);
   void *IP = nullptr;
   if (const auto *S = UniquePreds.FindNodeOrInsertPos(ID, IP))
     return S;
-  SCEVEqualPredicate *Eq = new (SCEVAllocator)
-      SCEVEqualPredicate(ID.Intern(SCEVAllocator), LHS, RHS);
+  SCEVComparePredicate *Eq = new (SCEVAllocator)
+    SCEVComparePredicate(ID.Intern(SCEVAllocator), Pred, LHS, RHS);
   UniquePreds.InsertNode(Eq, IP);
   return Eq;
 }
@@ -13585,18 +14035,24 @@
   /// \p NewPreds such that the result will be an AddRecExpr.
   static const SCEV *rewrite(const SCEV *S, const Loop *L, ScalarEvolution &SE,
                              SmallPtrSetImpl<const SCEVPredicate *> *NewPreds,
-                             SCEVUnionPredicate *Pred) {
+                             const SCEVPredicate *Pred) {
     SCEVPredicateRewriter Rewriter(L, SE, NewPreds, Pred);
     return Rewriter.visit(S);
   }
 
   const SCEV *visitUnknown(const SCEVUnknown *Expr) {
     if (Pred) {
-      auto ExprPreds = Pred->getPredicatesForExpr(Expr);
-      for (auto *Pred : ExprPreds)
-        if (const auto *IPred = dyn_cast<SCEVEqualPredicate>(Pred))
-          if (IPred->getLHS() == Expr)
-            return IPred->getRHS();
+      if (auto *U = dyn_cast<SCEVUnionPredicate>(Pred)) {
+        for (const auto *Pred : U->getPredicates())
+          if (const auto *IPred = dyn_cast<SCEVComparePredicate>(Pred))
+            if (IPred->getLHS() == Expr &&
+                IPred->getPredicate() == ICmpInst::ICMP_EQ)
+              return IPred->getRHS();
+      } else if (const auto *IPred = dyn_cast<SCEVComparePredicate>(Pred)) {
+        if (IPred->getLHS() == Expr &&
+            IPred->getPredicate() == ICmpInst::ICMP_EQ)
+          return IPred->getRHS();
+      }
     }
     return convertToAddRecWithPreds(Expr);
   }
@@ -13636,7 +14092,7 @@
 private:
   explicit SCEVPredicateRewriter(const Loop *L, ScalarEvolution &SE,
                         SmallPtrSetImpl<const SCEVPredicate *> *NewPreds,
-                        SCEVUnionPredicate *Pred)
+                        const SCEVPredicate *Pred)
       : SCEVRewriteVisitor(SE), NewPreds(NewPreds), Pred(Pred), L(L) {}
 
   bool addOverflowAssumption(const SCEVPredicate *P) {
@@ -13667,11 +14123,10 @@
     PredicatedRewrite = SE.createAddRecFromPHIWithCasts(Expr);
     if (!PredicatedRewrite)
       return Expr;
-    for (auto *P : PredicatedRewrite->second){
+    for (const auto *P : PredicatedRewrite->second){
       // Wrap predicates from outer loops are not supported.
       if (auto *WP = dyn_cast<const SCEVWrapPredicate>(P)) {
-        auto *AR = cast<const SCEVAddRecExpr>(WP->getExpr());
-        if (L != AR->getLoop())
+        if (L != WP->getExpr()->getLoop())
           return Expr;
       }
       if (!addOverflowAssumption(P))
@@ -13681,14 +14136,15 @@
   }
 
   SmallPtrSetImpl<const SCEVPredicate *> *NewPreds;
-  SCEVUnionPredicate *Pred;
+  const SCEVPredicate *Pred;
   const Loop *L;
 };
 
 } // end anonymous namespace
 
-const SCEV *ScalarEvolution::rewriteUsingPredicate(const SCEV *S, const Loop *L,
-                                                   SCEVUnionPredicate &Preds) {
+const SCEV *
+ScalarEvolution::rewriteUsingPredicate(const SCEV *S, const Loop *L,
+                                       const SCEVPredicate &Preds) {
   return SCEVPredicateRewriter::rewrite(S, L, *this, nullptr, &Preds);
 }
 
@@ -13704,7 +14160,7 @@
 
   // Since the transformation was successful, we can now transfer the SCEV
   // predicates.
-  for (auto *P : TransformPreds)
+  for (const auto *P : TransformPreds)
     Preds.insert(P);
 
   return AddRec;
@@ -13715,28 +14171,36 @@
                              SCEVPredicateKind Kind)
     : FastID(ID), Kind(Kind) {}
 
-SCEVEqualPredicate::SCEVEqualPredicate(const FoldingSetNodeIDRef ID,
-                                       const SCEV *LHS, const SCEV *RHS)
-    : SCEVPredicate(ID, P_Equal), LHS(LHS), RHS(RHS) {
+SCEVComparePredicate::SCEVComparePredicate(const FoldingSetNodeIDRef ID,
+                                   const ICmpInst::Predicate Pred,
+                                   const SCEV *LHS, const SCEV *RHS)
+  : SCEVPredicate(ID, P_Compare), Pred(Pred), LHS(LHS), RHS(RHS) {
   assert(LHS->getType() == RHS->getType() && "LHS and RHS types don't match");
   assert(LHS != RHS && "LHS and RHS are the same SCEV");
 }
 
-bool SCEVEqualPredicate::implies(const SCEVPredicate *N) const {
-  const auto *Op = dyn_cast<SCEVEqualPredicate>(N);
+bool SCEVComparePredicate::implies(const SCEVPredicate *N) const {
+  const auto *Op = dyn_cast<SCEVComparePredicate>(N);
 
   if (!Op)
     return false;
 
+  if (Pred != ICmpInst::ICMP_EQ)
+    return false;
+
   return Op->LHS == LHS && Op->RHS == RHS;
 }
 
-bool SCEVEqualPredicate::isAlwaysTrue() const { return false; }
+bool SCEVComparePredicate::isAlwaysTrue() const { return false; }
 
-const SCEV *SCEVEqualPredicate::getExpr() const { return LHS; }
+void SCEVComparePredicate::print(raw_ostream &OS, unsigned Depth) const {
+  if (Pred == ICmpInst::ICMP_EQ)
+    OS.indent(Depth) << "Equal predicate: " << *LHS << " == " << *RHS << "\n";
+  else
+    OS.indent(Depth) << "Compare predicate: " << *LHS
+                     << " " << CmpInst::getPredicateName(Pred) << ") "
+                     << *RHS << "\n";
 
-void SCEVEqualPredicate::print(raw_ostream &OS, unsigned Depth) const {
-  OS.indent(Depth) << "Equal predicate: " << *LHS << " == " << *RHS << "\n";
 }
 
 SCEVWrapPredicate::SCEVWrapPredicate(const FoldingSetNodeIDRef ID,
@@ -13744,7 +14208,7 @@
                                      IncrementWrapFlags Flags)
     : SCEVPredicate(ID, P_Wrap), AR(AR), Flags(Flags) {}
 
-const SCEV *SCEVWrapPredicate::getExpr() const { return AR; }
+const SCEVAddRecExpr *SCEVWrapPredicate::getExpr() const { return AR; }
 
 bool SCEVWrapPredicate::implies(const SCEVPredicate *N) const {
   const auto *Op = dyn_cast<SCEVWrapPredicate>(N);
@@ -13793,68 +14257,51 @@
 }
 
 /// Union predicates don't get cached so create a dummy set ID for it.
-SCEVUnionPredicate::SCEVUnionPredicate()
-    : SCEVPredicate(FoldingSetNodeIDRef(nullptr, 0), P_Union) {}
+SCEVUnionPredicate::SCEVUnionPredicate(ArrayRef<const SCEVPredicate *> Preds)
+  : SCEVPredicate(FoldingSetNodeIDRef(nullptr, 0), P_Union) {
+  for (const auto *P : Preds)
+    add(P);
+}
 
 bool SCEVUnionPredicate::isAlwaysTrue() const {
   return all_of(Preds,
                 [](const SCEVPredicate *I) { return I->isAlwaysTrue(); });
 }
 
-ArrayRef<const SCEVPredicate *>
-SCEVUnionPredicate::getPredicatesForExpr(const SCEV *Expr) {
-  auto I = SCEVToPreds.find(Expr);
-  if (I == SCEVToPreds.end())
-    return ArrayRef<const SCEVPredicate *>();
-  return I->second;
-}
-
 bool SCEVUnionPredicate::implies(const SCEVPredicate *N) const {
   if (const auto *Set = dyn_cast<SCEVUnionPredicate>(N))
     return all_of(Set->Preds,
                   [this](const SCEVPredicate *I) { return this->implies(I); });
 
-  auto ScevPredsIt = SCEVToPreds.find(N->getExpr());
-  if (ScevPredsIt == SCEVToPreds.end())
-    return false;
-  auto &SCEVPreds = ScevPredsIt->second;
-
-  return any_of(SCEVPreds,
+  return any_of(Preds,
                 [N](const SCEVPredicate *I) { return I->implies(N); });
 }
 
-const SCEV *SCEVUnionPredicate::getExpr() const { return nullptr; }
-
 void SCEVUnionPredicate::print(raw_ostream &OS, unsigned Depth) const {
-  for (auto Pred : Preds)
+  for (const auto *Pred : Preds)
     Pred->print(OS, Depth);
 }
 
 void SCEVUnionPredicate::add(const SCEVPredicate *N) {
   if (const auto *Set = dyn_cast<SCEVUnionPredicate>(N)) {
-    for (auto Pred : Set->Preds)
+    for (const auto *Pred : Set->Preds)
       add(Pred);
     return;
   }
 
-  if (implies(N))
-    return;
-
-  const SCEV *Key = N->getExpr();
-  assert(Key && "Only SCEVUnionPredicate doesn't have an "
-                " associated expression!");
-
-  SCEVToPreds[Key].push_back(N);
   Preds.push_back(N);
 }
 
 PredicatedScalarEvolution::PredicatedScalarEvolution(ScalarEvolution &SE,
                                                      Loop &L)
-    : SE(SE), L(L) {}
+    : SE(SE), L(L) {
+  SmallVector<const SCEVPredicate*, 4> Empty;
+  Preds = std::make_unique<SCEVUnionPredicate>(Empty);
+}
 
 void ScalarEvolution::registerUser(const SCEV *User,
                                    ArrayRef<const SCEV *> Ops) {
-  for (auto *Op : Ops)
+  for (const auto *Op : Ops)
     // We do not expect that forgetting cached data for SCEVConstants will ever
     // open any prospects for sharpening or introduce any correctness issues,
     // so we don't bother storing their dependencies.
@@ -13875,7 +14322,7 @@
   if (Entry.second)
     Expr = Entry.second;
 
-  const SCEV *NewSCEV = SE.rewriteUsingPredicate(Expr, &L, Preds);
+  const SCEV *NewSCEV = SE.rewriteUsingPredicate(Expr, &L, *Preds);
   Entry = {Generation, NewSCEV};
 
   return NewSCEV;
@@ -13883,22 +14330,27 @@
 
 const SCEV *PredicatedScalarEvolution::getBackedgeTakenCount() {
   if (!BackedgeCount) {
-    SCEVUnionPredicate BackedgePred;
-    BackedgeCount = SE.getPredicatedBackedgeTakenCount(&L, BackedgePred);
-    addPredicate(BackedgePred);
+    SmallVector<const SCEVPredicate *, 4> Preds;
+    BackedgeCount = SE.getPredicatedBackedgeTakenCount(&L, Preds);
+    for (const auto *P : Preds)
+      addPredicate(*P);
   }
   return BackedgeCount;
 }
 
 void PredicatedScalarEvolution::addPredicate(const SCEVPredicate &Pred) {
-  if (Preds.implies(&Pred))
+  if (Preds->implies(&Pred))
     return;
-  Preds.add(&Pred);
+
+  auto &OldPreds = Preds->getPredicates();
+  SmallVector<const SCEVPredicate*, 4> NewPreds(OldPreds.begin(), OldPreds.end());
+  NewPreds.push_back(&Pred);
+  Preds = std::make_unique<SCEVUnionPredicate>(NewPreds);
   updateGeneration();
 }
 
-const SCEVUnionPredicate &PredicatedScalarEvolution::getUnionPredicate() const {
-  return Preds;
+const SCEVPredicate &PredicatedScalarEvolution::getPredicate() const {
+  return *Preds;
 }
 
 void PredicatedScalarEvolution::updateGeneration() {
@@ -13906,7 +14358,7 @@
   if (++Generation == 0) {
     for (auto &II : RewriteMap) {
       const SCEV *Rewritten = II.second.second;
-      II.second = {Generation, SE.rewriteUsingPredicate(Rewritten, &L, Preds)};
+      II.second = {Generation, SE.rewriteUsingPredicate(Rewritten, &L, *Preds)};
     }
   }
 }
@@ -13951,18 +14403,18 @@
   if (!New)
     return nullptr;
 
-  for (auto *P : NewPreds)
-    Preds.add(P);
+  for (const auto *P : NewPreds)
+    addPredicate(*P);
 
-  updateGeneration();
   RewriteMap[SE.getSCEV(V)] = {Generation, New};
   return New;
 }
 
 PredicatedScalarEvolution::PredicatedScalarEvolution(
     const PredicatedScalarEvolution &Init)
-    : RewriteMap(Init.RewriteMap), SE(Init.SE), L(Init.L), Preds(Init.Preds),
-      Generation(Init.Generation), BackedgeCount(Init.BackedgeCount) {
+  : RewriteMap(Init.RewriteMap), SE(Init.SE), L(Init.L),
+    Preds(std::make_unique<SCEVUnionPredicate>(Init.Preds->getPredicates())),
+    Generation(Init.Generation), BackedgeCount(Init.BackedgeCount) {
   for (auto I : Init.FlagsMap)
     FlagsMap.insert(I);
 }
@@ -14243,12 +14695,23 @@
         ExprsToRewrite.push_back(LHS);
     }
   };
-  // First, collect conditions from dominating branches. Starting at the loop
+
+  SmallVector<std::pair<Value *, bool>> Terms;
+  // First, collect information from assumptions dominating the loop.
+  for (auto &AssumeVH : AC.assumptions()) {
+    if (!AssumeVH)
+      continue;
+    auto *AssumeI = cast<CallInst>(AssumeVH);
+    if (!DT.dominates(AssumeI, L->getHeader()))
+      continue;
+    Terms.emplace_back(AssumeI->getOperand(0), true);
+  }
+
+  // Second, collect conditions from dominating branches. Starting at the loop
   // predecessor, climb up the predecessor chain, as long as there are
   // predecessors that can be found that have unique successors leading to the
   // original header.
   // TODO: share this logic with isLoopEntryGuardedByCond.
-  SmallVector<std::pair<Value *, bool>> Terms;
   for (std::pair<const BasicBlock *, const BasicBlock *> Pair(
            L->getLoopPredecessor(), L->getHeader());
        Pair.first; Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) {
@@ -14280,8 +14743,9 @@
       if (auto *Cmp = dyn_cast<ICmpInst>(Cond)) {
         auto Predicate =
             EnterIfTrue ? Cmp->getPredicate() : Cmp->getInversePredicate();
-        CollectCondition(Predicate, getSCEV(Cmp->getOperand(0)),
-                         getSCEV(Cmp->getOperand(1)), RewriteMap);
+        const auto *LHS = getSCEV(Cmp->getOperand(0));
+        const auto *RHS = getSCEV(Cmp->getOperand(1));
+        CollectCondition(Predicate, LHS, RHS, RewriteMap);
         continue;
       }
 
@@ -14294,18 +14758,6 @@
     }
   }
 
-  // Also collect information from assumptions dominating the loop.
-  for (auto &AssumeVH : AC.assumptions()) {
-    if (!AssumeVH)
-      continue;
-    auto *AssumeI = cast<CallInst>(AssumeVH);
-    auto *Cmp = dyn_cast<ICmpInst>(AssumeI->getOperand(0));
-    if (!Cmp || !DT.dominates(AssumeI, L->getHeader()))
-      continue;
-    CollectCondition(Cmp->getPredicate(), getSCEV(Cmp->getOperand(0)),
-                     getSCEV(Cmp->getOperand(1)), RewriteMap);
-  }
-
   if (RewriteMap.empty())
     return Expr;
 

diff --git a/src/llvm-project/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/src/llvm-project/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index f4fa159..3d47dc6 100644
--- a/src/llvm-project/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp

@@ -20,6 +20,7 @@
 
 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
 #include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/InitializePasses.h"
 using namespace llvm;
 

diff --git a/src/llvm-project/llvm/lib/Analysis/ScalarEvolutionDivision.cpp b/src/llvm-project/llvm/lib/Analysis/ScalarEvolutionDivision.cpp
index 64e908b..0619569 100644
--- a/src/llvm-project/llvm/lib/Analysis/ScalarEvolutionDivision.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/ScalarEvolutionDivision.cpp

@@ -15,9 +15,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/IR/Constants.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/Support/ErrorHandling.h"
 #include <cassert>
 #include <cstdint>
 

diff --git a/src/llvm-project/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp b/src/llvm-project/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp
index 209ae66..22dff5e 100644
--- a/src/llvm-project/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp

@@ -13,6 +13,7 @@
 
 #include "llvm/Analysis/ScalarEvolutionNormalization.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 using namespace llvm;
 

diff --git a/src/llvm-project/llvm/lib/Analysis/ScopedNoAliasAA.cpp b/src/llvm-project/llvm/lib/Analysis/ScopedNoAliasAA.cpp
index e847bf8..f510991 100644
--- a/src/llvm-project/llvm/lib/Analysis/ScopedNoAliasAA.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/ScopedNoAliasAA.cpp

@@ -36,7 +36,6 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/MemoryLocation.h"
 #include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instruction.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/InitializePasses.h"

diff --git a/src/llvm-project/llvm/lib/Analysis/StackLifetime.cpp b/src/llvm-project/llvm/lib/Analysis/StackLifetime.cpp
index 9056cc0..162fd75 100644
--- a/src/llvm-project/llvm/lib/Analysis/StackLifetime.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/StackLifetime.cpp

@@ -19,17 +19,12 @@
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
-#include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FormattedStream.h"
 #include <algorithm>
-#include <memory>
 #include <tuple>
 
 using namespace llvm;
@@ -75,7 +70,7 @@
   auto AllocaSizeInBits = AI->getAllocationSizeInBits(DL);
   if (!AllocaSizeInBits)
     return nullptr;
-  int64_t AllocaSize = AllocaSizeInBits.getValue() / 8;
+  int64_t AllocaSize = *AllocaSizeInBits / 8;
 
   auto *Size = dyn_cast<ConstantInt>(II.getArgOperand(0));
   if (!Size)
@@ -187,7 +182,7 @@
 
       // Compute LiveIn by unioning together the LiveOut sets of all preds.
       BitVector LocalLiveIn;
-      for (auto *PredBB : predecessors(BB)) {
+      for (const auto *PredBB : predecessors(BB)) {
         LivenessMap::const_iterator I = BlockLiveness.find(PredBB);
         // If a predecessor is unreachable, ignore it.
         if (I == BlockLiveness.end())

diff --git a/src/llvm-project/llvm/lib/Analysis/StackSafetyAnalysis.cpp b/src/llvm-project/llvm/lib/Analysis/StackSafetyAnalysis.cpp
index 54f3605..9698af3 100644
--- a/src/llvm-project/llvm/lib/Analysis/StackSafetyAnalysis.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/StackSafetyAnalysis.cpp

@@ -15,7 +15,6 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ModuleSummaryAnalysis.h"
 #include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/StackLifetime.h"
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/DerivedTypes.h"
@@ -207,7 +206,7 @@
 
     O << "    allocas uses:\n";
     if (F) {
-      for (auto &I : instructions(F)) {
+      for (const auto &I : instructions(F)) {
         if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
           auto &AS = Allocas.find(AI)->second;
           O << "      " << AI->getName() << "["
@@ -384,9 +383,9 @@
   const SCEV *Max = SE.getMinusSCEV(ToDiffTy(SE.getConstant(Size.getUpper())),
                                     ToDiffTy(AccessSize));
   return SE.evaluatePredicateAt(ICmpInst::Predicate::ICMP_SGE, Diff, Min, I)
-             .getValueOr(false) &&
+             .value_or(false) &&
          SE.evaluatePredicateAt(ICmpInst::Predicate::ICMP_SLE, Diff, Max, I)
-             .getValueOr(false);
+             .value_or(false);
 }
 
 /// The function analyzes all local uses of Ptr (alloca or argument) and
@@ -764,7 +763,7 @@
                                      uint32_t ParamNo) {
   assert(FS.isLive());
   assert(FS.isDSOLocal());
-  for (auto &PS : FS.paramAccesses())
+  for (const auto &PS : FS.paramAccesses())
     if (ParamNo == PS.ParamNo)
       return &PS.Use;
   return nullptr;
@@ -824,7 +823,7 @@
       Copy.begin()->first->getParent()->getDataLayout().getPointerSizeInBits();
   StackSafetyDataFlowAnalysis<GlobalValue> SSDFA(PointerSize, std::move(Copy));
 
-  for (auto &F : SSDFA.run()) {
+  for (const auto &F : SSDFA.run()) {
     auto FI = F.second;
     auto &SrcF = Functions[F.first];
     for (auto &KV : FI.Allocas) {
@@ -923,7 +922,7 @@
     FunctionSummary::ParamAccess &Param = ParamAccesses.back();
 
     Param.Calls.reserve(PS.Calls.size());
-    for (auto &C : PS.Calls) {
+    for (const auto &C : PS.Calls) {
       // Parameter forwarded into another function by any or unknown offset
       // will make ParamAccess::Range as FullSet anyway. So we can drop the
       // entire parameter like we did above.
@@ -979,7 +978,7 @@
   if (SSI.empty())
     return;
   const Module &M = *SSI.begin()->first->getParent();
-  for (auto &F : M.functions()) {
+  for (const auto &F : M.functions()) {
     if (!F.isDeclaration()) {
       SSI.find(&F)->second.print(O, F.getName(), &F);
       O << "    safe accesses:"
@@ -1095,7 +1094,7 @@
 bool llvm::needsParamAccessSummary(const Module &M) {
   if (StackSafetyRun)
     return true;
-  for (auto &F : M.functions())
+  for (const auto &F : M.functions())
     if (F.hasFnAttribute(Attribute::SanitizeMemTag))
       return true;
   return false;
@@ -1127,13 +1126,13 @@
         continue;
       if (FS->isLive() && FS->isDSOLocal()) {
         FunctionInfo<FunctionSummary> FI;
-        for (auto &PS : FS->paramAccesses()) {
+        for (const auto &PS : FS->paramAccesses()) {
           auto &US =
               FI.Params
                   .emplace(PS.ParamNo, FunctionSummary::ParamAccess::RangeWidth)
                   .first->second;
           US.Range = PS.Use;
-          for (auto &Call : PS.Calls) {
+          for (const auto &Call : PS.Calls) {
             assert(!Call.Offsets.isFullSet());
             FunctionSummary *S =
                 findCalleeFunctionSummary(Call.Callee, FS->modulePath());
@@ -1159,10 +1158,10 @@
   NumCombinedDataFlowNodes += Functions.size();
   StackSafetyDataFlowAnalysis<FunctionSummary> SSDFA(
       FunctionSummary::ParamAccess::RangeWidth, std::move(Functions));
-  for (auto &KV : SSDFA.run()) {
+  for (const auto &KV : SSDFA.run()) {
     std::vector<FunctionSummary::ParamAccess> NewParams;
     NewParams.reserve(KV.second.Params.size());
-    for (auto &Param : KV.second.Params) {
+    for (const auto &Param : KV.second.Params) {
       // It's not needed as FullSet is processed the same as a missing value.
       if (Param.second.Range.isFullSet())
         continue;

diff --git a/src/llvm-project/llvm/lib/Analysis/StratifiedSets.h b/src/llvm-project/llvm/lib/Analysis/StratifiedSets.h
index 60ea245..883ebd2 100644
--- a/src/llvm-project/llvm/lib/Analysis/StratifiedSets.h
+++ b/src/llvm-project/llvm/lib/Analysis/StratifiedSets.h

@@ -340,10 +340,10 @@
     return StratifiedSets<T>(std::move(Values), std::move(StratLinks));
   }
 
-  bool has(const T &Elem) const { return get(Elem).hasValue(); }
+  bool has(const T &Elem) const { return get(Elem).has_value(); }
 
   bool add(const T &Main) {
-    if (get(Main).hasValue())
+    if (get(Main))
       return false;
 
     auto NewIndex = getNewUnlinkedIndex();
@@ -560,7 +560,7 @@
 
   Optional<StratifiedIndex> indexOf(const T &Val) {
     auto MaybeVal = get(Val);
-    if (!MaybeVal.hasValue())
+    if (!MaybeVal)
       return None;
     auto *Info = *MaybeVal;
     auto &Link = linksAt(Info->Index);

diff --git a/src/llvm-project/llvm/lib/Analysis/SyncDependenceAnalysis.cpp b/src/llvm-project/llvm/lib/Analysis/SyncDependenceAnalysis.cpp
index ff833b5..3446e50 100644
--- a/src/llvm-project/llvm/lib/Analysis/SyncDependenceAnalysis.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/SyncDependenceAnalysis.cpp

@@ -116,18 +116,16 @@
 //   around from the latch.
 //
 //===----------------------------------------------------------------------===//
+
 #include "llvm/Analysis/SyncDependenceAnalysis.h"
-#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 
 #include <functional>
-#include <stack>
-#include <unordered_set>
 
 #define DEBUG_TYPE "sync-dependence"
 
@@ -257,7 +255,7 @@
                     [&](const BasicBlock &BB) { LoopPO.appendBlock(BB); });
 }
 
-SyncDependenceAnalysis::~SyncDependenceAnalysis() {}
+SyncDependenceAnalysis::~SyncDependenceAnalysis() = default;
 
 // divergence propagator for reducible CFGs
 struct DivergencePropagator {

diff --git a/src/llvm-project/llvm/lib/Analysis/SyntheticCountsUtils.cpp b/src/llvm-project/llvm/lib/Analysis/SyntheticCountsUtils.cpp
index a3edce7..29c41fd 100644
--- a/src/llvm-project/llvm/lib/Analysis/SyntheticCountsUtils.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/SyntheticCountsUtils.cpp

@@ -14,9 +14,6 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SCCIterator.h"
 #include "llvm/Analysis/CallGraph.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/Instructions.h"
 #include "llvm/IR/ModuleSummaryIndex.h"
 
 using namespace llvm;
@@ -57,7 +54,7 @@
     if (!OptProfCount)
       continue;
     auto Callee = CGT::edge_dest(E.second);
-    AdditionalCounts[Callee] += OptProfCount.getValue();
+    AdditionalCounts[Callee] += *OptProfCount;
   }
 
   // Update the counts for the nodes in the SCC.
@@ -70,7 +67,7 @@
     if (!OptProfCount)
       continue;
     auto Callee = CGT::edge_dest(E.second);
-    AddCount(Callee, OptProfCount.getValue());
+    AddCount(Callee, *OptProfCount);
   }
 }
 

diff --git a/src/llvm-project/llvm/lib/Analysis/TFUtils.cpp b/src/llvm-project/llvm/lib/Analysis/TFUtils.cpp
index 26bc639..682fc09 100644
--- a/src/llvm-project/llvm/lib/Analysis/TFUtils.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/TFUtils.cpp

@@ -18,7 +18,6 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/JSON.h"
-#include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
@@ -49,19 +48,17 @@
 
 struct TFInitializer {
   TFInitializer() {
-    assert(!IsInitialized && "TFInitialized should be called only once");
     int Argc = 1;
     const char *Name = "";
     const char **NamePtr = &Name;
     TF_InitMain(Name, &Argc, const_cast<char ***>(&NamePtr));
-    IsInitialized = true;
   }
-  bool IsInitialized = false;
 };
 
-llvm::ManagedStatic<TFInitializer> TFLibInitializer;
-
-bool ensureInitTF() { return TFLibInitializer->IsInitialized; }
+bool ensureInitTF() {
+  static TFInitializer TFLibInitializer;
+  return true;
+}
 
 TFGraphPtr createTFGraph() {
   return TFGraphPtr(TF_NewGraph(), &TF_DeleteGraph);
@@ -82,6 +79,33 @@
     *OutStr = SE.SerializeAsString();
   }
 }
+
+int getTFTypeIndex(TensorType TType) {
+  switch (TType) {
+  case TensorType::Double:
+    return TF_DOUBLE;
+  case TensorType::Float:
+    return TF_FLOAT;
+  case TensorType::Int8:
+    return TF_INT8;
+  case TensorType::UInt8:
+    return TF_UINT8;
+  case TensorType::Int16:
+    return TF_INT16;
+  case TensorType::UInt16:
+    return TF_UINT16;
+  case TensorType::Int32:
+    return TF_INT32;
+  case TensorType::UInt32:
+    return TF_UINT32;
+  case TensorType::Int64:
+    return TF_INT64;
+  case TensorType::UInt64:
+    return TF_UINT64;
+  case TensorType::Invalid:
+    llvm_unreachable("Unknown tensor type");
+  }
+}
 } // namespace
 
 namespace llvm {
@@ -105,116 +129,6 @@
   std::vector<TF_Tensor *> Output;
 };
 
-size_t TensorSpec::getElementByteSize() const {
-  return TF_DataTypeSize(static_cast<TF_DataType>(TypeIndex));
-}
-
-TensorSpec::TensorSpec(const std::string &Name, int Port, int TypeIndex,
-                       const std::vector<int64_t> &Shape)
-    : Name(Name), Port(Port), TypeIndex(TypeIndex), Shape(Shape),
-      ElementCount(std::accumulate(Shape.begin(), Shape.end(), 1,
-                                   std::multiplies<int64_t>())) {}
-
-Optional<TensorSpec> getTensorSpecFromJSON(LLVMContext &Ctx,
-                                           const json::Value &Value) {
-  auto EmitError = [&](const llvm::Twine &Message) -> Optional<TensorSpec> {
-    std::string S;
-    llvm::raw_string_ostream OS(S);
-    OS << Value;
-    Ctx.emitError("Unable to parse JSON Value as spec (" + Message + "): " + S);
-    return None;
-  };
-  // FIXME: accept a Path as a parameter, and use it for error reporting.
-  json::Path::Root Root("tensor_spec");
-  json::ObjectMapper Mapper(Value, Root);
-  if (!Mapper)
-    return EmitError("Value is not a dict");
-
-  std::string TensorName;
-  int TensorPort = -1;
-  std::string TensorType;
-  std::vector<int64_t> TensorShape;
-
-  if (!Mapper.map<std::string>("name", TensorName))
-    return EmitError("'name' property not present or not a string");
-  if (!Mapper.map<std::string>("type", TensorType))
-    return EmitError("'type' property not present or not a string");
-  if (!Mapper.map<int>("port", TensorPort))
-    return EmitError("'port' property not present or not an int");
-  if (!Mapper.map<std::vector<int64_t>>("shape", TensorShape))
-    return EmitError("'shape' property not present or not an int array");
-
-#define PARSE_TYPE(T, E)                                                       \
-  if (TensorType == #T)                                                        \
-    return TensorSpec::createSpec<T>(TensorName, TensorShape, TensorPort);
-  TFUTILS_SUPPORTED_TYPES(PARSE_TYPE)
-#undef PARSE_TYPE
-  return None;
-}
-
-Optional<std::vector<LoggedFeatureSpec>>
-loadOutputSpecs(LLVMContext &Ctx, StringRef ExpectedDecisionName,
-                StringRef ModelPath, StringRef SpecFileOverride) {
-  SmallVector<char, 128> OutputSpecsPath;
-  StringRef FileName = SpecFileOverride;
-  if (FileName.empty()) {
-    llvm::sys::path::append(OutputSpecsPath, ModelPath, "output_spec.json");
-    FileName = {OutputSpecsPath.data(), OutputSpecsPath.size()};
-  }
-
-  auto BufferOrError = MemoryBuffer::getFileOrSTDIN(FileName);
-  if (!BufferOrError) {
-    Ctx.emitError("Error opening output specs file: " + FileName + " : " +
-                  BufferOrError.getError().message());
-    return None;
-  }
-  auto ParsedJSONValues = json::parse(BufferOrError.get()->getBuffer());
-  if (!ParsedJSONValues) {
-    Ctx.emitError("Could not parse specs file: " + FileName);
-    return None;
-  }
-  auto ValuesArray = ParsedJSONValues->getAsArray();
-  if (!ValuesArray) {
-    Ctx.emitError("Expected an array of {tensor_spec:<TensorSpec>, "
-                  "logging_name:<name>} dictionaries");
-    return None;
-  }
-  std::vector<LoggedFeatureSpec> Ret;
-  for (const auto &Value : *ValuesArray)
-    if (const auto *Obj = Value.getAsObject())
-      if (const auto *SpecPart = Obj->get("tensor_spec"))
-        if (auto TensorSpec = getTensorSpecFromJSON(Ctx, *SpecPart))
-          if (auto LoggingName = Obj->getString("logging_name")) {
-            if (!TensorSpec->isElementType<int64_t>() &&
-                !TensorSpec->isElementType<int32_t>() &&
-                !TensorSpec->isElementType<float>()) {
-              Ctx.emitError(
-                  "Only int64, int32, and float tensors are supported. "
-                  "Found unsupported type for tensor named " +
-                  TensorSpec->name());
-              return None;
-            }
-            Ret.push_back({*TensorSpec, LoggingName->str()});
-          }
-
-  if (ValuesArray->size() != Ret.size()) {
-    Ctx.emitError(
-        "Unable to parse output spec. It should be a json file containing an "
-        "array of dictionaries. Each dictionary must have a 'tensor_spec' key, "
-        "with a json object describing a TensorSpec; and a 'logging_name' key, "
-        "which is a string to use as name when logging this tensor in the "
-        "training log.");
-    return None;
-  }
-  if (Ret.empty() || *Ret[0].LoggingName != ExpectedDecisionName) {
-    Ctx.emitError("The first output spec must describe the decision tensor, "
-                  "and must have the logging_name " +
-                  StringRef(ExpectedDecisionName));
-    return None;
-  }
-  return Ret;
-}
-
 class TFModelEvaluatorImpl {
 public:
   TFModelEvaluatorImpl(StringRef SavedModelPath,
@@ -383,16 +297,29 @@
     errs() << TF_Message(Status.get());
     invalidate();
   }
+  size_t NrSupported = 0;
   for (size_t I = 0; I < InputSpecs.size(); ++I) {
     auto &InputSpec = InputSpecs[I];
     InputFeed[I] = {
         TF_GraphOperationByName(Graph.get(), (InputSpec.name()).c_str()),
         InputSpec.port()};
+    if (!InputFeed[I].oper) {
+      continue;
+    }
+    if (NrSupported++ != I) {
+      errs()
+          << "Unsupported features must be placed at the end of the InputSpecs";
+      invalidate();
+      return;
+    }
     if (!checkReportAndInvalidate(InputFeed[I], InputSpec))
       return;
-    initInput(I, static_cast<TF_DataType>(InputSpec.typeIndex()),
+    initInput(I, static_cast<TF_DataType>(getTFTypeIndex(InputSpec.type())),
               InputSpec.shape());
   }
+  InputFeed.resize(NrSupported);
+  Input.resize(NrSupported);
+
   for (size_t I = 0; I < OutputSpecsSize; ++I) {
     auto OutputSpec = GetOutputSpecs(I);
     OutputFeed[I] = {
@@ -470,7 +397,9 @@
 }
 
 void *TFModelEvaluator::getUntypedInput(size_t Index) {
-  return TF_TensorData(Impl->getInput()[Index]);
+  if (Index < Impl->getInput().size())
+    return TF_TensorData(Impl->getInput()[Index]);
+  return nullptr;
 }
 
 TFModelEvaluator::EvaluationResult::EvaluationResult(
@@ -495,13 +424,6 @@
   return TF_TensorData(Impl->getOutput()[Index]);
 }
 
-#define TFUTILS_GETDATATYPE_IMPL(T, E)                                         \
-  template <> int TensorSpec::getDataType<T>() { return E; }
-
-TFUTILS_SUPPORTED_TYPES(TFUTILS_GETDATATYPE_IMPL)
-
-#undef TFUTILS_GETDATATYPE_IMPL
-
 TFModelEvaluator::EvaluationResult::~EvaluationResult() {}
 TFModelEvaluator::~TFModelEvaluator() {}
 

diff --git a/src/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp b/src/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp
index 22d6a5f..8ebdb65 100644
--- a/src/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp

@@ -659,12 +659,12 @@
     TLI.setUnavailable(LibFunc_stpncpy);
   }
 
-  if (T.isPS4()) {
-    // PS4 does have memalign.
+  if (T.isPS()) {
+    // PS4/PS5 do have memalign.
     TLI.setAvailable(LibFunc_memalign);
 
-    // PS4 does not have new/delete with "unsigned int" size parameter;
-    // it only has the "unsigned long" versions.
+    // PS4/PS5 do not have new/delete with "unsigned int" size parameter;
+    // they only have the "unsigned long" versions.
     TLI.setUnavailable(LibFunc_ZdaPvj);
     TLI.setUnavailable(LibFunc_ZdaPvjSt11align_val_t);
     TLI.setUnavailable(LibFunc_ZdlPvj);
@@ -1110,9 +1110,11 @@
   case LibFunc_system:
     return (NumParams == 1 && FTy.getParamType(0)->isPointerTy());
   case LibFunc___kmpc_alloc_shared:
+    return NumParams == 1 && FTy.getReturnType()->isPointerTy();
   case LibFunc_malloc:
   case LibFunc_vec_malloc:
-    return (NumParams == 1 && FTy.getReturnType()->isPointerTy());
+    return NumParams == 1 && FTy.getParamType(0)->isIntegerTy(SizeTBits) &&
+           FTy.getReturnType()->isPointerTy();
   case LibFunc_memcmp:
     return NumParams == 3 && FTy.getReturnType()->isIntegerTy(32) &&
            FTy.getParamType(0)->isPointerTy() &&
@@ -1793,26 +1795,6 @@
     else
       return false;
   }
-
-  case LibFunc_rust_alloc:
-  case LibFunc_rust_alloc_zeroed:
-    return (NumParams == 2 && FTy.getReturnType()->isPointerTy() &&
-            FTy.getParamType(0)->isIntegerTy() &&
-            FTy.getParamType(1)->isIntegerTy());
-
-  case LibFunc_rust_dealloc:
-    return (NumParams == 3 && FTy.getReturnType()->isVoidTy() &&
-            FTy.getParamType(0)->isPointerTy() &&
-            FTy.getParamType(1)->isIntegerTy() &&
-            FTy.getParamType(2)->isIntegerTy());
-
-  case LibFunc_rust_realloc:
-    return (NumParams == 4 && FTy.getReturnType()->isPointerTy() &&
-            FTy.getParamType(0)->isPointerTy() &&
-            FTy.getParamType(1)->isIntegerTy() &&
-            FTy.getParamType(2)->isIntegerTy() &&
-            FTy.getParamType(3)->isIntegerTy());
-
   case LibFunc::NumLibFuncs:
   case LibFunc::NotLibFunc:
     break;

diff --git a/src/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp b/src/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp
index 25e9dee..143f03c 100644
--- a/src/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp

@@ -11,7 +11,6 @@
 #include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Analysis/TargetTransformInfoImpl.h"
 #include "llvm/IR/CFG.h"
-#include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
@@ -21,7 +20,6 @@
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ErrorHandling.h"
 #include <utility>
 
 using namespace llvm;
@@ -33,6 +31,11 @@
                                      cl::Hidden,
                                      cl::desc("Recognize reduction patterns."));
 
+static cl::opt<unsigned> CacheLineSize(
+    "cache-line-size", cl::init(0), cl::Hidden,
+    cl::desc("Use this to override the target cache line size when "
+             "specified by the user."));
+
 namespace {
 /// No-op implementation of the TTI interface using the utility base
 /// classes.
@@ -55,14 +58,16 @@
 }
 
 IntrinsicCostAttributes::IntrinsicCostAttributes(
-    Intrinsic::ID Id, const CallBase &CI, InstructionCost ScalarizationCost)
+    Intrinsic::ID Id, const CallBase &CI, InstructionCost ScalarizationCost,
+    bool TypeBasedOnly)
     : II(dyn_cast<IntrinsicInst>(&CI)), RetTy(CI.getType()), IID(Id),
       ScalarizationCost(ScalarizationCost) {
 
   if (const auto *FPMO = dyn_cast<FPMathOperator>(&CI))
     FMF = FPMO->getFastMathFlags();
 
-  Arguments.insert(Arguments.begin(), CI.arg_begin(), CI.arg_end());
+  if (!TypeBasedOnly)
+    Arguments.insert(Arguments.begin(), CI.arg_begin(), CI.arg_end());
   FunctionType *FTy = CI.getCalledFunction()->getFunctionType();
   ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end());
 }
@@ -179,7 +184,7 @@
 TargetTransformInfo::TargetTransformInfo(const DataLayout &DL)
     : TTIImpl(new Model<NoTTIImpl>(NoTTIImpl(DL))) {}
 
-TargetTransformInfo::~TargetTransformInfo() {}
+TargetTransformInfo::~TargetTransformInfo() = default;
 
 TargetTransformInfo::TargetTransformInfo(TargetTransformInfo &&Arg)
     : TTIImpl(std::move(Arg.TTIImpl)) {}
@@ -291,11 +296,11 @@
 bool TargetTransformInfo::preferPredicateOverEpilogue(
     Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC,
     TargetLibraryInfo *TLI, DominatorTree *DT,
-    const LoopAccessInfo *LAI) const {
-  return TTIImpl->preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
+    LoopVectorizationLegality *LVL) const {
+  return TTIImpl->preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LVL);
 }
 
-bool TargetTransformInfo::emitGetActiveLaneMask() const {
+PredicationStyle TargetTransformInfo::emitGetActiveLaneMask() const {
   return TTIImpl->emitGetActiveLaneMask();
 }
 
@@ -350,7 +355,8 @@
                                         Scale, AddrSpace, I);
 }
 
-bool TargetTransformInfo::isLSRCostLess(LSRCost &C1, LSRCost &C2) const {
+bool TargetTransformInfo::isLSRCostLess(const LSRCost &C1,
+                                        const LSRCost &C2) const {
   return TTIImpl->isLSRCostLess(C1, C2);
 }
 
@@ -398,11 +404,22 @@
   return TTIImpl->isLegalNTLoad(DataType, Alignment);
 }
 
+bool TargetTransformInfo::isLegalBroadcastLoad(Type *ElementTy,
+                                               ElementCount NumElements) const {
+  return TTIImpl->isLegalBroadcastLoad(ElementTy, NumElements);
+}
+
 bool TargetTransformInfo::isLegalMaskedGather(Type *DataType,
                                               Align Alignment) const {
   return TTIImpl->isLegalMaskedGather(DataType, Alignment);
 }
 
+bool TargetTransformInfo::isLegalAltInstr(
+    VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
+    const SmallBitVector &OpcodeMask) const {
+  return TTIImpl->isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask);
+}
+
 bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType,
                                                Align Alignment) const {
   return TTIImpl->isLegalMaskedScatter(DataType, Alignment);
@@ -470,7 +487,7 @@
   return TTIImpl->isTypeLegal(Ty);
 }
 
-InstructionCost TargetTransformInfo::getRegUsageForType(Type *Ty) const {
+unsigned TargetTransformInfo::getRegUsageForType(Type *Ty) const {
   return TTIImpl->getRegUsageForType(Ty);
 }
 
@@ -507,6 +524,10 @@
   return TTIImpl->supportsEfficientVectorElementLoadStore();
 }
 
+bool TargetTransformInfo::supportsTailCalls() const {
+  return TTIImpl->supportsTailCalls();
+}
+
 bool TargetTransformInfo::enableAggressiveInterleaving(
     bool LoopHasReductions) const {
   return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
@@ -623,8 +644,9 @@
   return TTIImpl->getVScaleForTuning();
 }
 
-bool TargetTransformInfo::shouldMaximizeVectorBandwidth() const {
-  return TTIImpl->shouldMaximizeVectorBandwidth();
+bool TargetTransformInfo::shouldMaximizeVectorBandwidth(
+    TargetTransformInfo::RegisterKind K) const {
+  return TTIImpl->shouldMaximizeVectorBandwidth(K);
 }
 
 ElementCount TargetTransformInfo::getMinimumVF(unsigned ElemWidth,
@@ -637,6 +659,11 @@
   return TTIImpl->getMaximumVF(ElemWidth, Opcode);
 }
 
+unsigned TargetTransformInfo::getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
+                                                Type *ScalarValTy) const {
+  return TTIImpl->getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
+}
+
 bool TargetTransformInfo::shouldConsiderAddressTypePromotion(
     const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
   return TTIImpl->shouldConsiderAddressTypePromotion(
@@ -644,7 +671,8 @@
 }
 
 unsigned TargetTransformInfo::getCacheLineSize() const {
-  return TTIImpl->getCacheLineSize();
+  return CacheLineSize.getNumOccurrences() > 0 ? CacheLineSize
+                                               : TTIImpl->getCacheLineSize();
 }
 
 llvm::Optional<unsigned>
@@ -742,12 +770,11 @@
   return Cost;
 }
 
-InstructionCost TargetTransformInfo::getShuffleCost(ShuffleKind Kind,
-                                                    VectorType *Ty,
-                                                    ArrayRef<int> Mask,
-                                                    int Index,
-                                                    VectorType *SubTp) const {
-  InstructionCost Cost = TTIImpl->getShuffleCost(Kind, Ty, Mask, Index, SubTp);
+InstructionCost TargetTransformInfo::getShuffleCost(
+    ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask, int Index,
+    VectorType *SubTp, ArrayRef<const Value *> Args) const {
+  InstructionCost Cost =
+      TTIImpl->getShuffleCost(Kind, Ty, Mask, Index, SubTp, Args);
   assert(Cost >= 0 && "TTI should not produce negative costs!");
   return Cost;
 }
@@ -973,18 +1000,21 @@
 
 Type *TargetTransformInfo::getMemcpyLoopLoweringType(
     LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
-    unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign) const {
+    unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
+    Optional<uint32_t> AtomicElementSize) const {
   return TTIImpl->getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
-                                            DestAddrSpace, SrcAlign, DestAlign);
+                                            DestAddrSpace, SrcAlign, DestAlign,
+                                            AtomicElementSize);
 }
 
 void TargetTransformInfo::getMemcpyLoopResidualLoweringType(
     SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
     unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
-    unsigned SrcAlign, unsigned DestAlign) const {
-  TTIImpl->getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
-                                             SrcAddrSpace, DestAddrSpace,
-                                             SrcAlign, DestAlign);
+    unsigned SrcAlign, unsigned DestAlign,
+    Optional<uint32_t> AtomicCpySize) const {
+  TTIImpl->getMemcpyLoopResidualLoweringType(
+      OpsOut, Context, RemainingBytes, SrcAddrSpace, DestAddrSpace, SrcAlign,
+      DestAlign, AtomicCpySize);
 }
 
 bool TargetTransformInfo::areInlineCompatible(const Function *Caller,
@@ -1078,6 +1108,10 @@
   return TTIImpl->getGISelRematGlobalCost();
 }
 
+unsigned TargetTransformInfo::getMinTripCountTailFoldingThreshold() const {
+  return TTIImpl->getMinTripCountTailFoldingThreshold();
+}
+
 bool TargetTransformInfo::supportsScalableVectors() const {
   return TTIImpl->supportsScalableVectors();
 }
@@ -1155,7 +1189,7 @@
   }
 }
 
-TargetTransformInfo::Concept::~Concept() {}
+TargetTransformInfo::Concept::~Concept() = default;
 
 TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
 

diff --git a/src/llvm-project/llvm/lib/Analysis/TensorSpec.cpp b/src/llvm-project/llvm/lib/Analysis/TensorSpec.cpp
new file mode 100644
index 0000000..f6a5882
--- /dev/null
+++ b/src/llvm-project/llvm/lib/Analysis/TensorSpec.cpp

@@ -0,0 +1,144 @@
+//===- TensorSpec.cpp - tensor type abstraction ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation file for the abstraction of a tensor type, and JSON loading
+// utils.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/Config/config.h"
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/TensorSpec.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <numeric>
+
+using namespace llvm;
+
+namespace llvm {
+
+#define TFUTILS_GETDATATYPE_IMPL(T, E)                                         \
+  template <> TensorType TensorSpec::getDataType<T>() { return TensorType::E; }
+
+SUPPORTED_TENSOR_TYPES(TFUTILS_GETDATATYPE_IMPL)
+
+#undef TFUTILS_GETDATATYPE_IMPL
+
+TensorSpec::TensorSpec(const std::string &Name, int Port, TensorType Type,
+                       size_t ElementSize, const std::vector<int64_t> &Shape)
+    : Name(Name), Port(Port), Type(Type), Shape(Shape),
+      ElementCount(std::accumulate(Shape.begin(), Shape.end(), 1,
+                                   std::multiplies<int64_t>())),
+      ElementSize(ElementSize) {}
+
+Optional<TensorSpec> getTensorSpecFromJSON(LLVMContext &Ctx,
+                                           const json::Value &Value) {
+  auto EmitError = [&](const llvm::Twine &Message) -> Optional<TensorSpec> {
+    std::string S;
+    llvm::raw_string_ostream OS(S);
+    OS << Value;
+    Ctx.emitError("Unable to parse JSON Value as spec (" + Message + "): " + S);
+    return None;
+  };
+  // FIXME: accept a Path as a parameter, and use it for error reporting.
+  json::Path::Root Root("tensor_spec");
+  json::ObjectMapper Mapper(Value, Root);
+  if (!Mapper)
+    return EmitError("Value is not a dict");
+
+  std::string TensorName;
+  int TensorPort = -1;
+  std::string TensorType;
+  std::vector<int64_t> TensorShape;
+
+  if (!Mapper.map<std::string>("name", TensorName))
+    return EmitError("'name' property not present or not a string");
+  if (!Mapper.map<std::string>("type", TensorType))
+    return EmitError("'type' property not present or not a string");
+  if (!Mapper.map<int>("port", TensorPort))
+    return EmitError("'port' property not present or not an int");
+  if (!Mapper.map<std::vector<int64_t>>("shape", TensorShape))
+    return EmitError("'shape' property not present or not an int array");
+
+#define PARSE_TYPE(T, E)                                                       \
+  if (TensorType == #T)                                                        \
+    return TensorSpec::createSpec<T>(TensorName, TensorShape, TensorPort);
+  SUPPORTED_TENSOR_TYPES(PARSE_TYPE)
+#undef PARSE_TYPE
+  return None;
+}
+
+Optional<std::vector<LoggedFeatureSpec>>
+loadOutputSpecs(LLVMContext &Ctx, StringRef ExpectedDecisionName,
+                StringRef ModelPath, StringRef SpecFileOverride) {
+  SmallVector<char, 128> OutputSpecsPath;
+  StringRef FileName = SpecFileOverride;
+  if (FileName.empty()) {
+    llvm::sys::path::append(OutputSpecsPath, ModelPath, "output_spec.json");
+    FileName = {OutputSpecsPath.data(), OutputSpecsPath.size()};
+  }
+
+  auto BufferOrError = MemoryBuffer::getFileOrSTDIN(FileName);
+  if (!BufferOrError) {
+    Ctx.emitError("Error opening output specs file: " + FileName + " : " +
+                  BufferOrError.getError().message());
+    return None;
+  }
+  auto ParsedJSONValues = json::parse(BufferOrError.get()->getBuffer());
+  if (!ParsedJSONValues) {
+    Ctx.emitError("Could not parse specs file: " + FileName);
+    return None;
+  }
+  auto ValuesArray = ParsedJSONValues->getAsArray();
+  if (!ValuesArray) {
+    Ctx.emitError("Expected an array of {tensor_spec:<TensorSpec>, "
+                  "logging_name:<name>} dictionaries");
+    return None;
+  }
+  std::vector<LoggedFeatureSpec> Ret;
+  for (const auto &Value : *ValuesArray)
+    if (const auto *Obj = Value.getAsObject())
+      if (const auto *SpecPart = Obj->get("tensor_spec"))
+        if (auto TensorSpec = getTensorSpecFromJSON(Ctx, *SpecPart))
+          if (auto LoggingName = Obj->getString("logging_name")) {
+            if (!TensorSpec->isElementType<int64_t>() &&
+                !TensorSpec->isElementType<int32_t>() &&
+                !TensorSpec->isElementType<float>()) {
+              Ctx.emitError(
+                  "Only int64, int32, and float tensors are supported. "
+                  "Found unsupported type for tensor named " +
+                  TensorSpec->name());
+              return None;
+            }
+            Ret.push_back({*TensorSpec, LoggingName->str()});
+          }
+
+  if (ValuesArray->size() != Ret.size()) {
+    Ctx.emitError(
+        "Unable to parse output spec. It should be a json file containing an "
+        "array of dictionaries. Each dictionary must have a 'tensor_spec' key, "
+        "with a json object describing a TensorSpec; and a 'logging_name' key, "
+        "which is a string to use as name when logging this tensor in the "
+        "training log.");
+    return None;
+  }
+  if (Ret.empty() || *Ret[0].LoggingName != ExpectedDecisionName) {
+    Ctx.emitError("The first output spec must describe the decision tensor, "
+                  "and must have the logging_name " +
+                  StringRef(ExpectedDecisionName));
+    return None;
+  }
+  return Ret;
+}
+} // namespace llvm

diff --git a/src/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/src/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 627a78a..2160277 100644
--- a/src/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp

@@ -112,7 +112,6 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instruction.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/InitializePasses.h"
@@ -304,24 +303,27 @@
   /// given offset. Update the offset to be relative to the field type.
   TBAAStructTypeNode getField(uint64_t &Offset) const {
     bool NewFormat = isNewFormat();
+    const ArrayRef<MDOperand> Operands = Node->operands();
+    const unsigned NumOperands = Operands.size();
+
     if (NewFormat) {
       // New-format root and scalar type nodes have no fields.
-      if (Node->getNumOperands() < 6)
+      if (NumOperands < 6)
         return TBAAStructTypeNode();
     } else {
       // Parent can be omitted for the root node.
-      if (Node->getNumOperands() < 2)
+      if (NumOperands < 2)
         return TBAAStructTypeNode();
 
       // Fast path for a scalar type node and a struct type node with a single
       // field.
-      if (Node->getNumOperands() <= 3) {
-        uint64_t Cur = Node->getNumOperands() == 2
-                           ? 0
-                           : mdconst::extract<ConstantInt>(Node->getOperand(2))
-                                 ->getZExtValue();
+      if (NumOperands <= 3) {
+        uint64_t Cur =
+            NumOperands == 2
+                ? 0
+                : mdconst::extract<ConstantInt>(Operands[2])->getZExtValue();
         Offset -= Cur;
-        MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
+        MDNode *P = dyn_cast_or_null<MDNode>(Operands[1]);
         if (!P)
           return TBAAStructTypeNode();
         return TBAAStructTypeNode(P);
@@ -333,10 +335,11 @@
     unsigned FirstFieldOpNo = NewFormat ? 3 : 1;
     unsigned NumOpsPerField = NewFormat ? 3 : 2;
     unsigned TheIdx = 0;
-    for (unsigned Idx = FirstFieldOpNo; Idx < Node->getNumOperands();
+
+    for (unsigned Idx = FirstFieldOpNo; Idx < NumOperands;
          Idx += NumOpsPerField) {
-      uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(Idx + 1))
-                         ->getZExtValue();
+      uint64_t Cur =
+          mdconst::extract<ConstantInt>(Operands[Idx + 1])->getZExtValue();
       if (Cur > Offset) {
         assert(Idx >= FirstFieldOpNo + NumOpsPerField &&
                "TBAAStructTypeNode::getField should have an offset match!");
@@ -346,11 +349,11 @@
     }
     // Move along the last field.
     if (TheIdx == 0)
-      TheIdx = Node->getNumOperands() - NumOpsPerField;
-    uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(TheIdx + 1))
-                       ->getZExtValue();
+      TheIdx = NumOperands - NumOpsPerField;
+    uint64_t Cur =
+        mdconst::extract<ConstantInt>(Operands[TheIdx + 1])->getZExtValue();
     Offset -= Cur;
-    MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(TheIdx));
+    MDNode *P = dyn_cast_or_null<MDNode>(Operands[TheIdx]);
     if (!P)
       return TBAAStructTypeNode();
     return TBAAStructTypeNode(P);
@@ -808,7 +811,8 @@
     return nullptr;
 
   // Otherwise, create TBAA with the new Len
-  SmallVector<Metadata *, 4> NextNodes(MD->operands());
+  ArrayRef<MDOperand> MDOperands = MD->operands();
+  SmallVector<Metadata *, 4> NextNodes(MDOperands.begin(), MDOperands.end());
   ConstantInt *PreviousSize = mdconst::extract<ConstantInt>(NextNodes[3]);
 
   // Don't create a new MDNode if it is the same length.

diff --git a/src/llvm-project/llvm/lib/Analysis/TypeMetadataUtils.cpp b/src/llvm-project/llvm/lib/Analysis/TypeMetadataUtils.cpp
index 80051fd..e128187 100644
--- a/src/llvm-project/llvm/lib/Analysis/TypeMetadataUtils.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/TypeMetadataUtils.cpp

@@ -16,7 +16,6 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Module.h"
 
 using namespace llvm;
@@ -76,7 +75,9 @@
     SmallVectorImpl<DevirtCallSite> &DevirtCalls,
     SmallVectorImpl<CallInst *> &Assumes, const CallInst *CI,
     DominatorTree &DT) {
-  assert(CI->getCalledFunction()->getIntrinsicID() == Intrinsic::type_test);
+  assert(CI->getCalledFunction()->getIntrinsicID() == Intrinsic::type_test ||
+         CI->getCalledFunction()->getIntrinsicID() ==
+             Intrinsic::public_type_test);
 
   const Module *M = CI->getParent()->getParent()->getParent();
 

diff --git a/src/llvm-project/llvm/lib/Analysis/VFABIDemangling.cpp b/src/llvm-project/llvm/lib/Analysis/VFABIDemangling.cpp
index 7573975..e6d2978 100644
--- a/src/llvm-project/llvm/lib/Analysis/VFABIDemangling.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/VFABIDemangling.cpp

@@ -6,8 +6,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallString.h"
 #include "llvm/Analysis/VectorUtils.h"
 
 using namespace llvm;

diff --git a/src/llvm-project/llvm/lib/Analysis/ValueLatticeUtils.cpp b/src/llvm-project/llvm/lib/Analysis/ValueLatticeUtils.cpp
index 53638c3..2bcb4d5 100644
--- a/src/llvm-project/llvm/lib/Analysis/ValueLatticeUtils.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/ValueLatticeUtils.cpp

@@ -29,12 +29,13 @@
       !GV->hasDefinitiveInitializer())
     return false;
   return all_of(GV->users(), [&](User *U) {
-    // Currently all users of a global variable have to be none-volatile loads
-    // or stores and the global cannot be stored itself.
+    // Currently all users of a global variable have to be non-volatile loads
+    // or stores of the global type, and the global cannot be stored itself.
     if (auto *Store = dyn_cast<StoreInst>(U))
-      return Store->getValueOperand() != GV && !Store->isVolatile();
+      return Store->getValueOperand() != GV && !Store->isVolatile() &&
+             Store->getValueOperand()->getType() == GV->getValueType();
     if (auto *Load = dyn_cast<LoadInst>(U))
-      return !Load->isVolatile();
+      return !Load->isVolatile() && Load->getType() == GV->getValueType();
 
     return false;
   });

diff --git a/src/llvm-project/llvm/lib/Analysis/ValueTracking.cpp b/src/llvm-project/llvm/lib/Analysis/ValueTracking.cpp
index c14bdb8..2dd671b 100644
--- a/src/llvm-project/llvm/lib/Analysis/ValueTracking.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/ValueTracking.cpp

@@ -26,6 +26,7 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumeBundleQueries.h"
 #include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/Analysis/GuardUtils.h"
 #include "llvm/Analysis/InstructionSimplify.h"
@@ -70,10 +71,8 @@
 #include "llvm/Support/KnownBits.h"
 #include "llvm/Support/MathExtras.h"
 #include <algorithm>
-#include <array>
 #include <cassert>
 #include <cstdint>
-#include <iterator>
 #include <utility>
 
 using namespace llvm;
@@ -86,13 +85,12 @@
 
 // According to the LangRef, branching on a poison condition is absolutely
 // immediate full UB.  However, historically we haven't implemented that
-// consistently as we have an important transformation (non-trivial unswitch)
-// which introduces instances of branch on poison/undef to otherwise well
-// defined programs.  This flag exists to let us test optimization benefit
-// of exploiting the specified behavior (in combination with enabling the
-// unswitch fix.)
+// consistently as we had an important transformation (non-trivial unswitch)
+// which introduced instances of branch on poison/undef to otherwise well
+// defined programs.  This issue has since been fixed, but the flag is
+// temporarily retained to easily diagnose potential regressions.
 static cl::opt<bool> BranchOnPoisonAsUB("branch-on-poison-as-ub",
-                                        cl::Hidden, cl::init(false));
+                                        cl::Hidden, cl::init(true));
 
 
 /// Returns the bitwidth of the given scalar or pointer type. For vector types,
@@ -275,13 +273,39 @@
   assert(LHS->getType()->isIntOrIntVectorTy() &&
          "LHS and RHS should be integers");
   // Look for an inverted mask: (X & ~M) op (Y & M).
-  Value *M;
-  if (match(LHS, m_c_And(m_Not(m_Value(M)), m_Value())) &&
-      match(RHS, m_c_And(m_Specific(M), m_Value())))
+  {
+    Value *M;
+    if (match(LHS, m_c_And(m_Not(m_Value(M)), m_Value())) &&
+        match(RHS, m_c_And(m_Specific(M), m_Value())))
+      return true;
+    if (match(RHS, m_c_And(m_Not(m_Value(M)), m_Value())) &&
+        match(LHS, m_c_And(m_Specific(M), m_Value())))
+      return true;
+  }
+
+  // X op (Y & ~X)
+  if (match(RHS, m_c_And(m_Not(m_Specific(LHS)), m_Value())) ||
+      match(LHS, m_c_And(m_Not(m_Specific(RHS)), m_Value())))
     return true;
-  if (match(RHS, m_c_And(m_Not(m_Value(M)), m_Value())) &&
-      match(LHS, m_c_And(m_Specific(M), m_Value())))
+
+  // X op ((X & Y) ^ Y) -- this is the canonical form of the previous pattern
+  // for constant Y.
+  Value *Y;
+  if (match(RHS,
+            m_c_Xor(m_c_And(m_Specific(LHS), m_Value(Y)), m_Deferred(Y))) ||
+      match(LHS, m_c_Xor(m_c_And(m_Specific(RHS), m_Value(Y)), m_Deferred(Y))))
     return true;
+
+  // Look for: (A & B) op ~(A | B)
+  {
+    Value *A, *B;
+    if (match(LHS, m_And(m_Value(A), m_Value(B))) &&
+        match(RHS, m_Not(m_c_Or(m_Specific(A), m_Specific(B)))))
+      return true;
+    if (match(RHS, m_And(m_Value(A), m_Value(B))) &&
+        match(LHS, m_Not(m_c_Or(m_Specific(A), m_Specific(B)))))
+      return true;
+  }
   IntegerType *IT = cast<IntegerType>(LHS->getType()->getScalarType());
   KnownBits LHSKnown(IT->getBitWidth());
   KnownBits RHSKnown(IT->getBitWidth());
@@ -451,7 +475,12 @@
     }
   }
 
-  Known = KnownBits::mul(Known, Known2);
+  bool SelfMultiply = Op0 == Op1;
+  // TODO: SelfMultiply can be poison, but not undef.
+  if (SelfMultiply)
+    SelfMultiply &=
+        isGuaranteedNotToBeUndefOrPoison(Op0, Q.AC, Q.CxtI, Q.DT, Depth + 1);
+  Known = KnownBits::mul(Known, Known2, SelfMultiply);
 
   // Only make use of no-wrap flags if we failed to compute the sign bit
   // directly.  This matters if the multiplication always overflows, in
@@ -656,7 +685,8 @@
   if (V->getType()->isPointerTy()) {
     if (RetainedKnowledge RK = getKnowledgeValidInContext(
             V, {Attribute::Alignment}, Q.CxtI, Q.DT, Q.AC)) {
-      Known.Zero.setLowBits(Log2_64(RK.ArgValue));
+      if (isPowerOf2_64(RK.ArgValue))
+        Known.Zero.setLowBits(Log2_64(RK.ArgValue));
     }
   }
 
@@ -1041,7 +1071,7 @@
     // bits. This check is sunk down as far as possible to avoid the expensive
     // call to isKnownNonZero if the cheaper checks above fail.
     if (ShiftAmt == 0) {
-      if (!ShifterOperandIsNonZero.hasValue())
+      if (!ShifterOperandIsNonZero)
         ShifterOperandIsNonZero =
             isKnownNonZero(I->getOperand(1), DemandedElts, Depth + 1, Q);
       if (*ShifterOperandIsNonZero)
@@ -1726,8 +1756,7 @@
           break;
         }
 
-        unsigned FirstZeroHighBit =
-            32 - countLeadingZeros(VScaleMax.getValue());
+        unsigned FirstZeroHighBit = 32 - countLeadingZeros(*VScaleMax);
         if (FirstZeroHighBit < BitWidth)
           Known.Zero.setBitsFrom(FirstZeroHighBit);
 
@@ -2007,6 +2036,63 @@
   assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
 }
 
+/// Try to detect a recurrence that the value of the induction variable is
+/// always a power of two (or zero).
+static bool isPowerOfTwoRecurrence(const PHINode *PN, bool OrZero,
+                                   unsigned Depth, Query &Q) {
+  BinaryOperator *BO = nullptr;
+  Value *Start = nullptr, *Step = nullptr;
+  if (!matchSimpleRecurrence(PN, BO, Start, Step))
+    return false;
+
+  // Initial value must be a power of two.
+  for (const Use &U : PN->operands()) {
+    if (U.get() == Start) {
+      // Initial value comes from a different BB, need to adjust context
+      // instruction for analysis.
+      Q.CxtI = PN->getIncomingBlock(U)->getTerminator();
+      if (!isKnownToBeAPowerOfTwo(Start, OrZero, Depth, Q))
+        return false;
+    }
+  }
+
+  // Except for Mul, the induction variable must be on the left side of the
+  // increment expression, otherwise its value can be arbitrary.
+  if (BO->getOpcode() != Instruction::Mul && BO->getOperand(1) != Step)
+    return false;
+
+  Q.CxtI = BO->getParent()->getTerminator();
+  switch (BO->getOpcode()) {
+  case Instruction::Mul:
+    // Power of two is closed under multiplication.
+    return (OrZero || Q.IIQ.hasNoUnsignedWrap(BO) ||
+            Q.IIQ.hasNoSignedWrap(BO)) &&
+           isKnownToBeAPowerOfTwo(Step, OrZero, Depth, Q);
+  case Instruction::SDiv:
+    // Start value must not be signmask for signed division, so simply being a
+    // power of two is not sufficient, and it has to be a constant.
+    if (!match(Start, m_Power2()) || match(Start, m_SignMask()))
+      return false;
+    LLVM_FALLTHROUGH;
+  case Instruction::UDiv:
+    // Divisor must be a power of two.
+    // If OrZero is false, cannot guarantee induction variable is non-zero after
+    // division, same for Shr, unless it is exact division.
+    return (OrZero || Q.IIQ.isExact(BO)) &&
+           isKnownToBeAPowerOfTwo(Step, false, Depth, Q);
+  case Instruction::Shl:
+    return OrZero || Q.IIQ.hasNoUnsignedWrap(BO) || Q.IIQ.hasNoSignedWrap(BO);
+  case Instruction::AShr:
+    if (!match(Start, m_Power2()) || match(Start, m_SignMask()))
+      return false;
+    LLVM_FALLTHROUGH;
+  case Instruction::LShr:
+    return OrZero || Q.IIQ.isExact(BO);
+  default:
+    return false;
+  }
+}
+
 /// Return true if the given value is known to have exactly one
 /// bit set when defined. For vectors return true if every element is known to
 /// be a power of two when defined. Supports values with integer or pointer
@@ -2098,6 +2184,30 @@
     }
   }
 
+  // A PHI node is power of two if all incoming values are power of two, or if
+  // it is an induction variable where in each step its value is a power of two.
+  if (const PHINode *PN = dyn_cast<PHINode>(V)) {
+    Query RecQ = Q;
+
+    // Check if it is an induction variable and always power of two.
+    if (isPowerOfTwoRecurrence(PN, OrZero, Depth, RecQ))
+      return true;
+
+    // Recursively check all incoming values. Limit recursion to 2 levels, so
+    // that search complexity is limited to number of operands^2.
+    unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1);
+    return llvm::all_of(PN->operands(), [&](const Use &U) {
+      // Value is power of 2 if it is coming from PHI node itself by induction.
+      if (U.get() == PN)
+        return true;
+
+      // Change the context instruction to the incoming block where it is
+      // evaluated.
+      RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator();
+      return isKnownToBeAPowerOfTwo(U.get(), OrZero, NewDepth, RecQ);
+    });
+  }
+
   // An exact divide or right shift can only shift off zero bits, so the result
   // is a power of two only if the first operand is a power of two and not
   // copying a sign bit (sdiv int_min, 2).
@@ -2187,7 +2297,7 @@
     return false;
 
   unsigned NumUsesExplored = 0;
-  for (auto *U : V->users()) {
+  for (const auto *U : V->users()) {
     // Avoid massive lists
     if (NumUsesExplored >= DomConditionsMaxUses)
       break;
@@ -2228,7 +2338,7 @@
 
     SmallVector<const User *, 4> WorkList;
     SmallPtrSet<const User *, 4> Visited;
-    for (auto *CmpU : U->users()) {
+    for (const auto *CmpU : U->users()) {
       assert(WorkList.empty() && "Should be!");
       if (Visited.insert(CmpU).second)
         WorkList.push_back(CmpU);
@@ -2242,7 +2352,7 @@
         // TODO: Support similar logic of OR and EQ predicate?
         if (NonNullIfTrue)
           if (match(Curr, m_LogicalAnd(m_Value(), m_Value()))) {
-            for (auto *CurrU : Curr->users())
+            for (const auto *CurrU : Curr->users())
               if (Visited.insert(CurrU).second)
                 WorkList.push_back(CurrU);
             continue;
@@ -2588,6 +2698,9 @@
     if (isKnownNonZero(Op, Depth, Q) &&
         isGuaranteedNotToBePoison(Op, Q.AC, Q.CxtI, Q.DT, Depth))
       return true;
+  } else if (const auto *II = dyn_cast<IntrinsicInst>(V)) {
+    if (II->getIntrinsicID() == Intrinsic::vscale)
+      return true;
   }
 
   KnownBits Known(BitWidth);
@@ -2885,6 +2998,24 @@
   return CLow->sle(*CHigh);
 }
 
+static bool isSignedMinMaxIntrinsicClamp(const IntrinsicInst *II,
+                                         const APInt *&CLow,
+                                         const APInt *&CHigh) {
+  assert((II->getIntrinsicID() == Intrinsic::smin ||
+          II->getIntrinsicID() == Intrinsic::smax) && "Must be smin/smax");
+
+  Intrinsic::ID InverseID = getInverseMinMaxIntrinsic(II->getIntrinsicID());
+  auto *InnerII = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
+  if (!InnerII || InnerII->getIntrinsicID() != InverseID ||
+      !match(II->getArgOperand(1), m_APInt(CLow)) ||
+      !match(InnerII->getArgOperand(1), m_APInt(CHigh)))
+    return false;
+
+  if (II->getIntrinsicID() == Intrinsic::smin)
+    std::swap(CLow, CHigh);
+  return CLow->sle(*CHigh);
+}
+
 /// For vector constants, loop over the elements and find the constant with the
 /// minimum number of sign bits. Return 0 if the value is not a vector constant
 /// or if any element was not analyzed; otherwise, return the count for the
@@ -3225,6 +3356,12 @@
 
           // Absolute value reduces number of sign bits by at most 1.
           return Tmp - 1;
+        case Intrinsic::smin:
+        case Intrinsic::smax: {
+          const APInt *CLow, *CHigh;
+          if (isSignedMinMaxIntrinsicClamp(II, CLow, CHigh))
+            return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits());
+        }
         }
       }
     }
@@ -3358,9 +3495,6 @@
 /// NOTE: Do not check 'nsz' here because that fast-math-flag does not guarantee
 ///       that a value is not -0.0. It only guarantees that -0.0 may be treated
 ///       the same as +0.0 in floating-point ops.
-///
-/// NOTE: this function will need to be revisited when we support non-default
-/// rounding modes!
 bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI,
                                 unsigned Depth) {
   if (auto *CFP = dyn_cast<ConstantFP>(V))
@@ -3390,9 +3524,21 @@
     case Intrinsic::sqrt:
     case Intrinsic::canonicalize:
       return CannotBeNegativeZero(Call->getArgOperand(0), TLI, Depth + 1);
+    case Intrinsic::experimental_constrained_sqrt: {
+      // NOTE: This rounding mode restriction may be too strict.
+      const auto *CI = cast<ConstrainedFPIntrinsic>(Call);
+      if (CI->getRoundingMode() == RoundingMode::NearestTiesToEven)
+        return CannotBeNegativeZero(Call->getArgOperand(0), TLI, Depth + 1);
+      else
+        return false;
+    }
     // fabs(x) != -0.0
     case Intrinsic::fabs:
       return true;
+    // sitofp and uitofp turn into +0.0 for zero.
+    case Intrinsic::experimental_constrained_sitofp:
+    case Intrinsic::experimental_constrained_uitofp:
+      return true;
     }
   }
 
@@ -4032,69 +4178,83 @@
   return true;
 }
 
+// If V refers to an initialized global constant, set Slice either to
+// its initializer if the size of its elements equals ElementSize, or,
+// for ElementSize == 8, to its representation as an array of unsiged
+// char. Return true on success.
 bool llvm::getConstantDataArrayInfo(const Value *V,
                                     ConstantDataArraySlice &Slice,
                                     unsigned ElementSize, uint64_t Offset) {
   assert(V);
 
-  // Look through bitcast instructions and geps.
-  V = V->stripPointerCasts();
-
-  // If the value is a GEP instruction or constant expression, treat it as an
-  // offset.
-  if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
-    // The GEP operator should be based on a pointer to string constant, and is
-    // indexing into the string constant.
-    if (!isGEPBasedOnPointerToString(GEP, ElementSize))
-      return false;
-
-    // If the second index isn't a ConstantInt, then this is a variable index
-    // into the array.  If this occurs, we can't say anything meaningful about
-    // the string.
-    uint64_t StartIdx = 0;
-    if (const ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2)))
-      StartIdx = CI->getZExtValue();
-    else
-      return false;
-    return getConstantDataArrayInfo(GEP->getOperand(0), Slice, ElementSize,
-                                    StartIdx + Offset);
-  }
-
-  // The GEP instruction, constant or instruction, must reference a global
-  // variable that is a constant and is initialized. The referenced constant
-  // initializer is the array that we'll use for optimization.
-  const GlobalVariable *GV = dyn_cast<GlobalVariable>(V);
+  // Drill down into the pointer expression V, ignoring any intervening
+  // casts, and determine the identity of the object it references along
+  // with the cumulative byte offset into it.
+  const GlobalVariable *GV =
+    dyn_cast<GlobalVariable>(getUnderlyingObject(V));
   if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
+    // Fail if V is not based on constant global object.
     return false;
 
-  const ConstantDataArray *Array;
-  ArrayType *ArrayTy;
+  const DataLayout &DL = GV->getParent()->getDataLayout();
+  APInt Off(DL.getIndexTypeSizeInBits(V->getType()), 0);
+
+  if (GV != V->stripAndAccumulateConstantOffsets(DL, Off,
+                                                 /*AllowNonInbounds*/ true))
+    // Fail if a constant offset could not be determined.
+    return false;
+
+  uint64_t StartIdx = Off.getLimitedValue();
+  if (StartIdx == UINT64_MAX)
+    // Fail if the constant offset is excessive.
+    return false;
+
+  Offset += StartIdx;
+
+  ConstantDataArray *Array = nullptr;
+  ArrayType *ArrayTy = nullptr;
+
   if (GV->getInitializer()->isNullValue()) {
     Type *GVTy = GV->getValueType();
-    if ( (ArrayTy = dyn_cast<ArrayType>(GVTy)) ) {
-      // A zeroinitializer for the array; there is no ConstantDataArray.
-      Array = nullptr;
-    } else {
-      const DataLayout &DL = GV->getParent()->getDataLayout();
-      uint64_t SizeInBytes = DL.getTypeStoreSize(GVTy).getFixedSize();
-      uint64_t Length = SizeInBytes / (ElementSize / 8);
-      if (Length <= Offset)
-        return false;
+    uint64_t SizeInBytes = DL.getTypeStoreSize(GVTy).getFixedSize();
+    uint64_t Length = SizeInBytes / (ElementSize / 8);
 
-      Slice.Array = nullptr;
-      Slice.Offset = 0;
-      Slice.Length = Length - Offset;
-      return true;
-    }
-  } else {
-    // This must be a ConstantDataArray.
-    Array = dyn_cast<ConstantDataArray>(GV->getInitializer());
-    if (!Array)
-      return false;
-    ArrayTy = Array->getType();
+    Slice.Array = nullptr;
+    Slice.Offset = 0;
+    // Return an empty Slice for undersized constants to let callers
+    // transform even undefined library calls into simpler, well-defined
+    // expressions.  This is preferable to making the calls although it
+    // prevents sanitizers from detecting such calls.
+    Slice.Length = Length < Offset ? 0 : Length - Offset;
+    return true;
   }
-  if (!ArrayTy->getElementType()->isIntegerTy(ElementSize))
-    return false;
+
+  auto *Init = const_cast<Constant *>(GV->getInitializer());
+  if (auto *ArrayInit = dyn_cast<ConstantDataArray>(Init)) {
+    Type *InitElTy = ArrayInit->getElementType();
+    if (InitElTy->isIntegerTy(ElementSize)) {
+      // If Init is an initializer for an array of the expected type
+      // and size, use it as is.
+      Array = ArrayInit;
+      ArrayTy = ArrayInit->getType();
+    }
+  }
+
+  if (!Array) {
+    if (ElementSize != 8)
+      // TODO: Handle conversions to larger integral types.
+      return false;
+
+    // Otherwise extract the portion of the initializer starting
+    // at Offset as an array of bytes, and reset Offset.
+    Init = ReadByteArrayFromGlobal(GV, Offset);
+    if (!Init)
+      return false;
+
+    Offset = 0;
+    Array = dyn_cast<ConstantDataArray>(Init);
+    ArrayTy = dyn_cast<ArrayType>(Init->getType());
+  }
 
   uint64_t NumElts = ArrayTy->getArrayNumElements();
   if (Offset > NumElts)
@@ -4106,9 +4266,10 @@
   return true;
 }
 
-/// This function computes the length of a null-terminated C string pointed to
-/// by V. If successful, it returns true and returns the string in Str.
-/// If unsuccessful, it returns false.
+/// Extract bytes from the initializer of the constant array V, which need
+/// not be a nul-terminated string.  On success, store the bytes in Str and
+/// return true.  When TrimAtNul is set, Str will contain only the bytes up
+/// to but not including the first nul.  Return false on failure.
 bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
                                  uint64_t Offset, bool TrimAtNul) {
   ConstantDataArraySlice Slice;
@@ -4117,6 +4278,12 @@
 
   if (Slice.Array == nullptr) {
     if (TrimAtNul) {
+      // Return a nul-terminated string even for an empty Slice.  This is
+      // safe because all existing SimplifyLibcalls callers require string
+      // arguments and the behavior of the functions they fold is undefined
+      // otherwise.  Folding the calls this way is preferable to making
+      // the undefined library calls, even though it prevents sanitizers
+      // from reporting such calls.
       Str = StringRef();
       return true;
     }
@@ -4196,9 +4363,13 @@
     return 0;
 
   if (Slice.Array == nullptr)
+    // Zeroinitializer (including an empty one).
     return 1;
 
-  // Search for nul characters
+  // Search for the first nul character.  Return a conservative result even
+  // when there is no nul.  This is safe since otherwise the string function
+  // being folded such as strlen is undefined, and can be preferable to
+  // making the undefined library call.
   unsigned NullIndex = 0;
   for (unsigned E = Slice.Length; NullIndex < E; ++NullIndex) {
     if (Slice.Array->getElementAsInteger(Slice.Offset + NullIndex) == 0)
@@ -4509,21 +4680,38 @@
     F.hasFnAttribute(Attribute::SanitizeHWAddress);
 }
 
-
-bool llvm::isSafeToSpeculativelyExecute(const Value *V,
+bool llvm::isSafeToSpeculativelyExecute(const Instruction *Inst,
                                         const Instruction *CtxI,
                                         const DominatorTree *DT,
                                         const TargetLibraryInfo *TLI) {
-  const Operator *Inst = dyn_cast<Operator>(V);
-  if (!Inst)
-    return false;
+  return isSafeToSpeculativelyExecuteWithOpcode(Inst->getOpcode(), Inst, CtxI,
+                                                DT, TLI);
+}
 
-  for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
-    if (Constant *C = dyn_cast<Constant>(Inst->getOperand(i)))
-      if (C->canTrap())
-        return false;
+bool llvm::isSafeToSpeculativelyExecuteWithOpcode(
+    unsigned Opcode, const Instruction *Inst, const Instruction *CtxI,
+    const DominatorTree *DT, const TargetLibraryInfo *TLI) {
+#ifndef NDEBUG
+  if (Inst->getOpcode() != Opcode) {
+    // Check that the operands are actually compatible with the Opcode override.
+    auto hasEqualReturnAndLeadingOperandTypes =
+        [](const Instruction *Inst, unsigned NumLeadingOperands) {
+          if (Inst->getNumOperands() < NumLeadingOperands)
+            return false;
+          const Type *ExpectedType = Inst->getType();
+          for (unsigned ItOp = 0; ItOp < NumLeadingOperands; ++ItOp)
+            if (Inst->getOperand(ItOp)->getType() != ExpectedType)
+              return false;
+          return true;
+        };
+    assert(!Instruction::isBinaryOp(Opcode) ||
+           hasEqualReturnAndLeadingOperandTypes(Inst, 2));
+    assert(!Instruction::isUnaryOp(Opcode) ||
+           hasEqualReturnAndLeadingOperandTypes(Inst, 1));
+  }
+#endif
 
-  switch (Inst->getOpcode()) {
+  switch (Opcode) {
   default:
     return true;
   case Instruction::UDiv:
@@ -4554,7 +4742,9 @@
     return false;
   }
   case Instruction::Load: {
-    const LoadInst *LI = cast<LoadInst>(Inst);
+    const LoadInst *LI = dyn_cast<LoadInst>(Inst);
+    if (!LI)
+      return false;
     if (mustSuppressSpeculation(*LI))
       return false;
     const DataLayout &DL = LI->getModule()->getDataLayout();
@@ -4563,7 +4753,9 @@
         TLI);
   }
   case Instruction::Call: {
-    auto *CI = cast<const CallInst>(Inst);
+    auto *CI = dyn_cast<const CallInst>(Inst);
+    if (!CI)
+      return false;
     const Function *Callee = CI->getCalledFunction();
 
     // The called function could have undefined behavior or side-effects, even
@@ -4595,8 +4787,20 @@
   }
 }
 
-bool llvm::mayBeMemoryDependent(const Instruction &I) {
-  return I.mayReadOrWriteMemory() || !isSafeToSpeculativelyExecute(&I);
+bool llvm::mayHaveNonDefUseDependency(const Instruction &I) {
+  if (I.mayReadOrWriteMemory())
+    // Memory dependency possible
+    return true;
+  if (!isSafeToSpeculativelyExecute(&I))
+    // Can't move above a maythrow call or infinite loop.  Or if an
+    // inalloca alloca, above a stacksave call.
+    return true;
+  if (!isGuaranteedToTransferExecutionToSuccessor(&I))
+    // 1) Can't reorder two inf-loop calls, even if readonly
+    // 2) Also can't reorder an inf-loop call below a instruction which isn't
+    //    safe to speculative execute.  (Inverse of above)
+    return true;
+  return false;
 }
 
 /// Convert ConstantRange OverflowResult into ValueTracking OverflowResult.
@@ -4766,6 +4970,22 @@
                                                    AssumptionCache *AC,
                                                    const Instruction *CxtI,
                                                    const DominatorTree *DT) {
+  // X - (X % ?)
+  // The remainder of a value can't have greater magnitude than itself,
+  // so the subtraction can't overflow.
+
+  // X - (X -nuw ?)
+  // In the minimal case, this would simplify to "?", so there's no subtract
+  // at all. But if this analysis is used to peek through casts, for example,
+  // then determining no-overflow may allow other transforms.
+
+  // TODO: There are other patterns like this.
+  //       See simplifyICmpWithBinOpOnLHS() for candidates.
+  if (match(RHS, m_URem(m_Specific(LHS), m_Value())) ||
+      match(RHS, m_NUWSub(m_Specific(LHS), m_Value())))
+    if (isGuaranteedNotToBeUndefOrPoison(LHS, AC, CxtI, DT))
+      return OverflowResult::NeverOverflows;
+
   // Checking for conditions implied by dominating conditions may be expensive.
   // Limit it to usub_with_overflow calls for now.
   if (match(CxtI,
@@ -4789,6 +5009,19 @@
                                                  AssumptionCache *AC,
                                                  const Instruction *CxtI,
                                                  const DominatorTree *DT) {
+  // X - (X % ?)
+  // The remainder of a value can't have greater magnitude than itself,
+  // so the subtraction can't overflow.
+
+  // X - (X -nsw ?)
+  // In the minimal case, this would simplify to "?", so there's no subtract
+  // at all. But if this analysis is used to peek through casts, for example,
+  // then determining no-overflow may allow other transforms.
+  if (match(RHS, m_SRem(m_Specific(LHS), m_Value())) ||
+      match(RHS, m_NSWSub(m_Specific(LHS), m_Value())))
+    if (isGuaranteedNotToBeUndefOrPoison(LHS, AC, CxtI, DT))
+      return OverflowResult::NeverOverflows;
+
   // If LHS and RHS each have at least two sign bits, the subtraction
   // cannot overflow.
   if (ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT) > 1 &&
@@ -4841,7 +5074,7 @@
       if (DT.dominates(NoWrapEdge, Result->getParent()))
         continue;
 
-      for (auto &RU : Result->uses())
+      for (const auto &RU : Result->uses())
         if (!DT.dominates(NoWrapEdge, RU))
           return false;
     }
@@ -5100,7 +5333,9 @@
   }
 
   if (auto *I = dyn_cast<LoadInst>(V))
-    if (I->getMetadata(LLVMContext::MD_noundef))
+    if (I->hasMetadata(LLVMContext::MD_noundef) ||
+        I->hasMetadata(LLVMContext::MD_dereferenceable) ||
+        I->hasMetadata(LLVMContext::MD_dereferenceable_or_null))
       return true;
 
   if (programUndefinedIfUndefOrPoison(V, PoisonOnly))
@@ -5125,10 +5360,10 @@
     auto *TI = Dominator->getBlock()->getTerminator();
 
     Value *Cond = nullptr;
-    if (auto BI = dyn_cast<BranchInst>(TI)) {
+    if (auto BI = dyn_cast_or_null<BranchInst>(TI)) {
       if (BI->isConditional())
         Cond = BI->getCondition();
-    } else if (auto SI = dyn_cast<SwitchInst>(TI)) {
+    } else if (auto SI = dyn_cast_or_null<SwitchInst>(TI)) {
       Cond = SI->getCondition();
     }
 
@@ -5411,7 +5646,7 @@
     // whether a value is directly passed to an instruction that must take
     // well-defined operands.
 
-    for (auto &I : make_range(Begin, End)) {
+    for (const auto &I : make_range(Begin, End)) {
       if (isa<DbgInfoIntrinsic>(I))
         continue;
       if (--ScanLimit == 0)
@@ -5442,7 +5677,7 @@
   Visited.insert(BB);
 
   while (true) {
-    for (auto &I : make_range(Begin, End)) {
+    for (const auto &I : make_range(Begin, End)) {
       if (isa<DbgInfoIntrinsic>(I))
         continue;
       if (--ScanLimit == 0)
@@ -5763,20 +5998,6 @@
   if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT)
     return {SPF_UNKNOWN, SPNB_NA, false};
 
-  // Z = X -nsw Y
-  // (X >s Y) ? 0 : Z ==> (Z >s 0) ? 0 : Z ==> SMIN(Z, 0)
-  // (X <s Y) ? 0 : Z ==> (Z <s 0) ? 0 : Z ==> SMAX(Z, 0)
-  if (match(TrueVal, m_Zero()) &&
-      match(FalseVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS))))
-    return {Pred == CmpInst::ICMP_SGT ? SPF_SMIN : SPF_SMAX, SPNB_NA, false};
-
-  // Z = X -nsw Y
-  // (X >s Y) ? Z : 0 ==> (Z >s 0) ? Z : 0 ==> SMAX(Z, 0)
-  // (X <s Y) ? Z : 0 ==> (Z <s 0) ? Z : 0 ==> SMIN(Z, 0)
-  if (match(FalseVal, m_Zero()) &&
-      match(TrueVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS))))
-    return {Pred == CmpInst::ICMP_SGT ? SPF_SMAX : SPF_SMIN, SPNB_NA, false};
-
   const APInt *C1;
   if (!match(CmpRHS, m_APInt(C1)))
     return {SPF_UNKNOWN, SPNB_NA, false};
@@ -6323,7 +6544,6 @@
 static bool isTruePredicate(CmpInst::Predicate Pred, const Value *LHS,
                             const Value *RHS, const DataLayout &DL,
                             unsigned Depth) {
-  assert(!LHS->getType()->isVectorTy() && "TODO: extend to handle vectors!");
   if (ICmpInst::isTrueWhenEqual(Pred) && LHS == RHS)
     return true;
 
@@ -6436,14 +6656,12 @@
 /// Return true if "icmp APred X, C1" implies "icmp BPred X, C2" is true.
 /// Return false if "icmp APred X, C1" implies "icmp BPred X, C2" is false.
 /// Otherwise, return None if we can't infer anything.
-static Optional<bool>
-isImpliedCondMatchingImmOperands(CmpInst::Predicate APred,
-                                 const ConstantInt *C1,
-                                 CmpInst::Predicate BPred,
-                                 const ConstantInt *C2) {
-  ConstantRange DomCR =
-      ConstantRange::makeExactICmpRegion(APred, C1->getValue());
-  ConstantRange CR = ConstantRange::makeExactICmpRegion(BPred, C2->getValue());
+static Optional<bool> isImpliedCondMatchingImmOperands(CmpInst::Predicate APred,
+                                                       const APInt &C1,
+                                                       CmpInst::Predicate BPred,
+                                                       const APInt &C2) {
+  ConstantRange DomCR = ConstantRange::makeExactICmpRegion(APred, C1);
+  ConstantRange CR = ConstantRange::makeExactICmpRegion(BPred, C2);
   ConstantRange Intersection = DomCR.intersectWith(CR);
   ConstantRange Difference = DomCR.difference(CR);
   if (Intersection.isEmptySet())
@@ -6481,14 +6699,9 @@
 
   // Can we infer anything when the LHS operands match and the RHS operands are
   // constants (not necessarily matching)?
-  if (ALHS == BLHS && isa<ConstantInt>(ARHS) && isa<ConstantInt>(BRHS)) {
-    if (Optional<bool> Implication = isImpliedCondMatchingImmOperands(
-            APred, cast<ConstantInt>(ARHS), BPred, cast<ConstantInt>(BRHS)))
-      return Implication;
-    // No amount of additional analysis will infer the second condition, so
-    // early exit.
-    return None;
-  }
+  const APInt *AC, *BC;
+  if (ALHS == BLHS && match(ARHS, m_APInt(AC)) && match(BRHS, m_APInt(BC)))
+    return isImpliedCondMatchingImmOperands(APred, *AC, BPred, *BC);
 
   if (APred == BPred)
     return isImpliedCondOperands(APred, ALHS, ARHS, BLHS, BRHS, DL, Depth);
@@ -6541,14 +6754,8 @@
   if (RHSOp0->getType()->isVectorTy() != LHS->getType()->isVectorTy())
     return None;
 
-  Type *OpTy = LHS->getType();
-  assert(OpTy->isIntOrIntVectorTy(1) && "Expected integer type only!");
-
-  // FIXME: Extending the code below to handle vectors.
-  if (OpTy->isVectorTy())
-    return None;
-
-  assert(OpTy->isIntegerTy(1) && "implied by above");
+  assert(LHS->getType()->isIntOrIntVectorTy(1) &&
+         "Expected integer type only!");
 
   // Both LHS and RHS are icmps.
   const ICmpInst *LHSCmp = dyn_cast<ICmpInst>(LHS);
@@ -6576,11 +6783,38 @@
   if (LHS == RHS)
     return LHSIsTrue;
 
-  const ICmpInst *RHSCmp = dyn_cast<ICmpInst>(RHS);
-  if (RHSCmp)
+  if (const ICmpInst *RHSCmp = dyn_cast<ICmpInst>(RHS))
     return isImpliedCondition(LHS, RHSCmp->getPredicate(),
                               RHSCmp->getOperand(0), RHSCmp->getOperand(1), DL,
                               LHSIsTrue, Depth);
+
+  if (Depth == MaxAnalysisRecursionDepth)
+    return None;
+
+  // LHS ==> (RHS1 || RHS2) if LHS ==> RHS1 or LHS ==> RHS2
+  // LHS ==> !(RHS1 && RHS2) if LHS ==> !RHS1 or LHS ==> !RHS2
+  const Value *RHS1, *RHS2;
+  if (match(RHS, m_LogicalOr(m_Value(RHS1), m_Value(RHS2)))) {
+    if (Optional<bool> Imp =
+            isImpliedCondition(LHS, RHS1, DL, LHSIsTrue, Depth + 1))
+      if (*Imp == true)
+        return true;
+    if (Optional<bool> Imp =
+            isImpliedCondition(LHS, RHS2, DL, LHSIsTrue, Depth + 1))
+      if (*Imp == true)
+        return true;
+  }
+  if (match(RHS, m_LogicalAnd(m_Value(RHS1), m_Value(RHS2)))) {
+    if (Optional<bool> Imp =
+            isImpliedCondition(LHS, RHS1, DL, LHSIsTrue, Depth + 1))
+      if (*Imp == false)
+        return false;
+    if (Optional<bool> Imp =
+            isImpliedCondition(LHS, RHS2, DL, LHSIsTrue, Depth + 1))
+      if (*Imp == false)
+        return false;
+  }
+
   return None;
 }
 
@@ -7072,66 +7306,25 @@
 
 Optional<int64_t> llvm::isPointerOffset(const Value *Ptr1, const Value *Ptr2,
                                         const DataLayout &DL) {
-  Ptr1 = Ptr1->stripPointerCasts();
-  Ptr2 = Ptr2->stripPointerCasts();
+  APInt Offset1(DL.getIndexTypeSizeInBits(Ptr1->getType()), 0);
+  APInt Offset2(DL.getIndexTypeSizeInBits(Ptr2->getType()), 0);
+  Ptr1 = Ptr1->stripAndAccumulateConstantOffsets(DL, Offset1, true);
+  Ptr2 = Ptr2->stripAndAccumulateConstantOffsets(DL, Offset2, true);
 
   // Handle the trivial case first.
-  if (Ptr1 == Ptr2) {
-    return 0;
-  }
+  if (Ptr1 == Ptr2)
+    return Offset2.getSExtValue() - Offset1.getSExtValue();
 
   const GEPOperator *GEP1 = dyn_cast<GEPOperator>(Ptr1);
   const GEPOperator *GEP2 = dyn_cast<GEPOperator>(Ptr2);
 
-  // If one pointer is a GEP see if the GEP is a constant offset from the base,
-  // as in "P" and "gep P, 1".
-  // Also do this iteratively to handle the the following case:
-  //   Ptr_t1 = GEP Ptr1, c1
-  //   Ptr_t2 = GEP Ptr_t1, c2
-  //   Ptr2 = GEP Ptr_t2, c3
-  // where we will return c1+c2+c3.
-  // TODO: Handle the case when both Ptr1 and Ptr2 are GEPs of some common base
-  // -- replace getOffsetFromBase with getOffsetAndBase, check that the bases
-  // are the same, and return the difference between offsets.
-  auto getOffsetFromBase = [&DL](const GEPOperator *GEP,
-                                 const Value *Ptr) -> Optional<int64_t> {
-    const GEPOperator *GEP_T = GEP;
-    int64_t OffsetVal = 0;
-    bool HasSameBase = false;
-    while (GEP_T) {
-      auto Offset = getOffsetFromIndex(GEP_T, 1, DL);
-      if (!Offset)
-        return None;
-      OffsetVal += *Offset;
-      auto Op0 = GEP_T->getOperand(0)->stripPointerCasts();
-      if (Op0 == Ptr) {
-        HasSameBase = true;
-        break;
-      }
-      GEP_T = dyn_cast<GEPOperator>(Op0);
-    }
-    if (!HasSameBase)
-      return None;
-    return OffsetVal;
-  };
-
-  if (GEP1) {
-    auto Offset = getOffsetFromBase(GEP1, Ptr2);
-    if (Offset)
-      return -*Offset;
-  }
-  if (GEP2) {
-    auto Offset = getOffsetFromBase(GEP2, Ptr1);
-    if (Offset)
-      return Offset;
-  }
-
   // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical
   // base.  After that base, they may have some number of common (and
   // potentially variable) indices.  After that they handle some constant
   // offset, which determines their offset from each other.  At this point, we
   // handle no other case.
-  if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0))
+  if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0) ||
+      GEP1->getSourceElementType() != GEP2->getSourceElementType())
     return None;
 
   // Skip any common indices and track the GEP types.
@@ -7140,9 +7333,10 @@
     if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx))
       break;
 
-  auto Offset1 = getOffsetFromIndex(GEP1, Idx, DL);
-  auto Offset2 = getOffsetFromIndex(GEP2, Idx, DL);
-  if (!Offset1 || !Offset2)
+  auto IOffset1 = getOffsetFromIndex(GEP1, Idx, DL);
+  auto IOffset2 = getOffsetFromIndex(GEP2, Idx, DL);
+  if (!IOffset1 || !IOffset2)
     return None;
-  return *Offset2 - *Offset1;
+  return *IOffset2 - *IOffset1 + Offset2.getSExtValue() -
+         Offset1.getSExtValue();
 }

diff --git a/src/llvm-project/llvm/lib/Analysis/VectorUtils.cpp b/src/llvm-project/llvm/lib/Analysis/VectorUtils.cpp
index 655c248..c4795a8 100644
--- a/src/llvm-project/llvm/lib/Analysis/VectorUtils.cpp
+++ b/src/llvm-project/llvm/lib/Analysis/VectorUtils.cpp

@@ -40,7 +40,7 @@
 /// Return true if all of the intrinsic's arguments and return type are scalars
 /// for the scalar form of the intrinsic, and vectors for the vector form of the
 /// intrinsic (except operands that are marked as always being scalar by
-/// hasVectorInstrinsicScalarOpd).
+/// isVectorIntrinsicWithScalarOpAtArg).
 bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
   switch (ID) {
   case Intrinsic::abs:   // Begin integer bit-manipulation.
@@ -89,6 +89,8 @@
   case Intrinsic::fmuladd:
   case Intrinsic::powi:
   case Intrinsic::canonicalize:
+  case Intrinsic::fptosi_sat:
+  case Intrinsic::fptoui_sat:
     return true;
   default:
     return false;
@@ -96,8 +98,8 @@
 }
 
 /// Identifies if the vector form of the intrinsic has a scalar operand.
-bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID,
-                                        unsigned ScalarOpdIdx) {
+bool llvm::isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
+                                              unsigned ScalarOpdIdx) {
   switch (ID) {
   case Intrinsic::abs:
   case Intrinsic::ctlz:
@@ -114,11 +116,14 @@
   }
 }
 
-bool llvm::hasVectorInstrinsicOverloadedScalarOpd(Intrinsic::ID ID,
-                                                  unsigned ScalarOpdIdx) {
+bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID,
+                                                  unsigned OpdIdx) {
   switch (ID) {
+  case Intrinsic::fptosi_sat:
+  case Intrinsic::fptoui_sat:
+    return OpdIdx == 0;
   case Intrinsic::powi:
-    return (ScalarOpdIdx == 1);
+    return OpdIdx == 1;
   default:
     return false;
   }
@@ -496,6 +501,116 @@
   return true;
 }
 
+void llvm::processShuffleMasks(
+    ArrayRef<int> Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs,
+    unsigned NumOfUsedRegs, function_ref<void()> NoInputAction,
+    function_ref<void(ArrayRef<int>, unsigned, unsigned)> SingleInputAction,
+    function_ref<void(ArrayRef<int>, unsigned, unsigned)> ManyInputsAction) {
+  SmallVector<SmallVector<SmallVector<int>>> Res(NumOfDestRegs);
+  // Try to perform better estimation of the permutation.
+  // 1. Split the source/destination vectors into real registers.
+  // 2. Do the mask analysis to identify which real registers are
+  // permuted.
+  int Sz = Mask.size();
+  unsigned SzDest = Sz / NumOfDestRegs;
+  unsigned SzSrc = Sz / NumOfSrcRegs;
+  for (unsigned I = 0; I < NumOfDestRegs; ++I) {
+    auto &RegMasks = Res[I];
+    RegMasks.assign(NumOfSrcRegs, {});
+    // Check that the values in dest registers are in the one src
+    // register.
+    for (unsigned K = 0; K < SzDest; ++K) {
+      int Idx = I * SzDest + K;
+      if (Idx == Sz)
+        break;
+      if (Mask[Idx] >= Sz || Mask[Idx] == UndefMaskElem)
+        continue;
+      int SrcRegIdx = Mask[Idx] / SzSrc;
+      // Add a cost of PermuteTwoSrc for each new source register permute,
+      // if we have more than one source registers.
+      if (RegMasks[SrcRegIdx].empty())
+        RegMasks[SrcRegIdx].assign(SzDest, UndefMaskElem);
+      RegMasks[SrcRegIdx][K] = Mask[Idx] % SzSrc;
+    }
+  }
+  // Process split mask.
+  for (unsigned I = 0; I < NumOfUsedRegs; ++I) {
+    auto &Dest = Res[I];
+    int NumSrcRegs =
+        count_if(Dest, [](ArrayRef<int> Mask) { return !Mask.empty(); });
+    switch (NumSrcRegs) {
+    case 0:
+      // No input vectors were used!
+      NoInputAction();
+      break;
+    case 1: {
+      // Find the only mask with at least single undef mask elem.
+      auto *It =
+          find_if(Dest, [](ArrayRef<int> Mask) { return !Mask.empty(); });
+      unsigned SrcReg = std::distance(Dest.begin(), It);
+      SingleInputAction(*It, SrcReg, I);
+      break;
+    }
+    default: {
+      // The first mask is a permutation of a single register. Since we have >2
+      // input registers to shuffle, we merge the masks for 2 first registers
+      // and generate a shuffle of 2 registers rather than the reordering of the
+      // first register and then shuffle with the second register. Next,
+      // generate the shuffles of the resulting register + the remaining
+      // registers from the list.
+      auto &&CombineMasks = [](MutableArrayRef<int> FirstMask,
+                               ArrayRef<int> SecondMask) {
+        for (int Idx = 0, VF = FirstMask.size(); Idx < VF; ++Idx) {
+          if (SecondMask[Idx] != UndefMaskElem) {
+            assert(FirstMask[Idx] == UndefMaskElem &&
+                   "Expected undefined mask element.");
+            FirstMask[Idx] = SecondMask[Idx] + VF;
+          }
+        }
+      };
+      auto &&NormalizeMask = [](MutableArrayRef<int> Mask) {
+        for (int Idx = 0, VF = Mask.size(); Idx < VF; ++Idx) {
+          if (Mask[Idx] != UndefMaskElem)
+            Mask[Idx] = Idx;
+        }
+      };
+      int SecondIdx;
+      do {
+        int FirstIdx = -1;
+        SecondIdx = -1;
+        MutableArrayRef<int> FirstMask, SecondMask;
+        for (unsigned I = 0; I < NumOfDestRegs; ++I) {
+          SmallVectorImpl<int> &RegMask = Dest[I];
+          if (RegMask.empty())
+            continue;
+
+          if (FirstIdx == SecondIdx) {
+            FirstIdx = I;
+            FirstMask = RegMask;
+            continue;
+          }
+          SecondIdx = I;
+          SecondMask = RegMask;
+          CombineMasks(FirstMask, SecondMask);
+          ManyInputsAction(FirstMask, FirstIdx, SecondIdx);
+          NormalizeMask(FirstMask);
+          RegMask.clear();
+          SecondMask = FirstMask;
+          SecondIdx = FirstIdx;
+        }
+        if (FirstIdx != SecondIdx && SecondIdx >= 0) {
+          CombineMasks(SecondMask, FirstMask);
+          ManyInputsAction(SecondMask, SecondIdx, FirstIdx);
+          Dest[FirstIdx].clear();
+          NormalizeMask(SecondMask);
+        }
+      } while (SecondIdx >= 0);
+      break;
+    }
+    }
+  }
+}
+
 MapVector<Instruction *, uint64_t>
 llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
                                const TargetTransformInfo *TTI) {
@@ -543,9 +658,8 @@
     Value *Val = Worklist.pop_back_val();
     Value *Leader = ECs.getOrInsertLeaderValue(Val);
 
-    if (Visited.count(Val))
+    if (!Visited.insert(Val).second)
       continue;
-    Visited.insert(Val);
 
     // Non-instructions terminate a chain successfully.
     if (!isa<Instruction>(Val))
@@ -648,7 +762,7 @@
     return;
   }
 
-  for (auto &AccGroupListOp : AccGroups->operands()) {
+  for (const auto &AccGroupListOp : AccGroups->operands()) {
     auto *Item = cast<MDNode>(AccGroupListOp.get());
     assert(isValidAsAccessGroup(Item) && "List item must be an access group");
     List.insert(Item);
@@ -1383,12 +1497,12 @@
   SmallVector<StringRef, 8> ListAttr;
   S.split(ListAttr, ",");
 
-  for (auto &S : SetVector<StringRef>(ListAttr.begin(), ListAttr.end())) {
+  for (const auto &S : SetVector<StringRef>(ListAttr.begin(), ListAttr.end())) {
 #ifndef NDEBUG
     LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << S << "'\n");
     Optional<VFInfo> Info = VFABI::tryDemangleForVFABI(S, *(CI.getModule()));
-    assert(Info.hasValue() && "Invalid name for a VFABI variant.");
-    assert(CI.getModule()->getFunction(Info.getValue().VectorName) &&
+    assert(Info && "Invalid name for a VFABI variant.");
+    assert(CI.getModule()->getFunction(Info.value().VectorName) &&
            "Vector function is missing.");
 #endif
     VariantMappings.push_back(std::string(S));

diff --git a/src/llvm-project/llvm/lib/Analysis/models/gen-regalloc-eviction-test-model.py b/src/llvm-project/llvm/lib/Analysis/models/gen-regalloc-eviction-test-model.py
index 1cb2492..476163d 100644
--- a/src/llvm-project/llvm/lib/Analysis/models/gen-regalloc-eviction-test-model.py
+++ b/src/llvm-project/llvm/lib/Analysis/models/gen-regalloc-eviction-test-model.py

@@ -22,19 +22,7 @@
     }
 ]
 """
-PER_REGISTER_INT64_FEATURE_LIST = [
-    'mask', 'is_hint', 'is_local', 'is_free', 'max_stage', 'min_stage'
-]
-PER_REGISTER_FLOAT32_FEATURE_LIST = ['nr_urgent',
-    'weighed_reads_by_max', 'weighed_writes_by_max',
-    'weighed_read_writes_by_max', 'weighed_indvars_by_max',
-    'hint_weights_by_max', 'start_bb_freq_by_max', 'end_bb_freq_by_max',
-    'hottest_bb_freq_by_max', 'liverange_size', 'use_def_density',
-    'nr_defs_and_uses', 'nr_broken_hints', 'nr_rematerializable'
-]
-PER_REGISTER_FEATURE_LIST = PER_REGISTER_FLOAT32_FEATURE_LIST + \
-    PER_REGISTER_INT64_FEATURE_LIST
-CONTEXT_FEATURE_LIST = ('progress', 'discount', 'reward', 'step_type')
+PER_REGISTER_FEATURE_LIST = ['mask']
 NUM_REGISTERS = 33
 
 
@@ -42,19 +30,7 @@
   """Returns (time_step_spec, action_spec) for LLVM register allocation."""
   inputs = dict(
       (key, tf.TensorSpec(dtype=tf.int64, shape=(NUM_REGISTERS), name=key))
-      for key in PER_REGISTER_INT64_FEATURE_LIST)
-  inputs.update(
-      dict((key,
-            tf.TensorSpec(dtype=tf.float32, shape=(NUM_REGISTERS), name=key))
-           for key in PER_REGISTER_FLOAT32_FEATURE_LIST))
-  inputs['progress'] = tf.TensorSpec(
-      dtype=tf.float32, shape=(), name='progress')
-  inputs.update(
-      dict((key, tf.TensorSpec(dtype=tf.float32, shape=(), name=key))
-           for key in ['discount', 'reward']))
-  inputs.update(
-      dict((key, tf.TensorSpec(dtype=tf.int32, shape=(), name=key))
-           for key in ['step_type']))
+      for key in PER_REGISTER_FEATURE_LIST)
   return inputs
 
 
@@ -70,17 +46,7 @@
   module.var = tf.Variable(0, dtype=tf.int64)
 
   def action(*inputs):
-    s1 = tf.reduce_sum([
-        tf.cast(inputs[0][key], tf.float32) for key in PER_REGISTER_FEATURE_LIST
-    ],
-        axis=0)
-    s2 = tf.reduce_sum(
-        [tf.cast(inputs[0][key], tf.float32) for key in CONTEXT_FEATURE_LIST])
-    # Add a large number so s won't be 0.
-    s = s1 + s2 + 123456789.123456789
-    # Equals to mask feature.
-    mask_alias = tf.not_equal(s * tf.cast(inputs[0]['mask'], tf.float32), 0)
-    result = tf.math.argmax(mask_alias, axis=-1) + module.var
+    result = tf.math.argmax(inputs[0]['mask'], axis=-1) + module.var
     return {POLICY_DECISION_LABEL: result}
   module.action = tf.function()(action)
   action = {
commit	2f380c1f7952c665764c26a26842375bb02f5296	[log] [tgz]
author	Chris Wailes <[email protected]>	Wed Nov 09 13:04:22 2022 -0800
committer	Chris Wailes <[email protected]>	Mon Dec 05 17:08:55 2022 -0800
tree	e5cdc6dae1dcc406dd7cc2386c6f84d44bc25f86
parent	b1d328019a485dc2e58d5dc46556067c927ec9f2 [diff]