Importing rustc-1.38.0 Bug: 146571186 Change-Id: Idd23b6282b5f216d22a897103cac97284f84b416

commit: 086005343729daa43222945cd264a392ebe3367d [log] [tgz]
author: Chih-Hung Hsieh <[email protected]> Thu Dec 19 15:55:38 2019 -0800
committer: Chih-Hung Hsieh <[email protected]> Thu Dec 19 16:00:07 2019 -0800
tree: 8bf3e03d05866f72c0af72b911dfaae21c250a8f
parent: bc2a45aebf2f3f158c878db02ee23d906d1269df [diff]
diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/AsmParser/LLVMBuild.txt b/src/llvm-project/llvm/lib/Target/PowerPC/AsmParser/LLVMBuild.txt
index 801f27b..20ea0ef 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/AsmParser/LLVMBuild.txt
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/AsmParser/LLVMBuild.txt

@@ -1,9 +1,8 @@
 ;===- ./lib/Target/PowerPC/AsmParser/LLVMBuild.txt -------------*- Conf -*--===;
 ;
-;                     The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
+; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+; See https://llvm.org/LICENSE.txt for license information.
+; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 ;
 ;===------------------------------------------------------------------------===;
 ;

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index 8b3480f..c9524da 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp

@@ -1,15 +1,15 @@
 //===-- PPCAsmParser.cpp - Parse PowerPC asm to MCInst instructions -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/PPCMCExpr.h"
 #include "MCTargetDesc/PPCMCTargetDesc.h"
 #include "PPCTargetStreamer.h"
+#include "TargetInfo/PowerPCTargetInfo.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
@@ -147,8 +147,7 @@
     : MCTargetAsmParser(Options, STI, MII) {
     // Check for 64-bit vs. 32-bit pointer mode.
     const Triple &TheTriple = STI.getTargetTriple();
-    IsPPC64 = (TheTriple.getArch() == Triple::ppc64 ||
-               TheTriple.getArch() == Triple::ppc64le);
+    IsPPC64 = TheTriple.isPPC64();
     IsDarwin = TheTriple.isMacOSX();
     // Initialize the set of available features.
     setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
@@ -1129,7 +1128,7 @@
   }
 }
 
-static std::string PPCMnemonicSpellCheck(StringRef S, uint64_t FBS,
+static std::string PPCMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS,
                                          unsigned VariantID = 0);
 
 bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
@@ -1148,7 +1147,7 @@
   case Match_MissingFeature:
     return Error(IDLoc, "instruction use requires an option to be enabled");
   case Match_MnemonicFail: {
-    uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
+    FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
     std::string Suggestion = PPCMnemonicSpellCheck(
         ((PPCOperand &)*Operands[0]).getToken(), FBS);
     return Error(IDLoc, "invalid instruction" + Suggestion,

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/CMakeLists.txt b/src/llvm-project/llvm/lib/Target/PowerPC/CMakeLists.txt
index 3130d10..affe295 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/CMakeLists.txt
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/CMakeLists.txt

@@ -19,6 +19,7 @@
   PPCAsmPrinter.cpp
   PPCBranchSelector.cpp
   PPCBranchCoalescing.cpp
+  PPCCallingConv.cpp
   PPCCCState.cpp
   PPCCTRLoops.cpp
   PPCHazardRecognizers.cpp
@@ -31,6 +32,7 @@
   PPCLoopPreIncPrep.cpp
   PPCMCInstLower.cpp
   PPCMachineFunctionInfo.cpp
+  PPCMachineScheduler.cpp
   PPCMIPeephole.cpp
   PPCRegisterInfo.cpp
   PPCQPXLoadSplat.cpp
@@ -50,6 +52,5 @@
 
 add_subdirectory(AsmParser)
 add_subdirectory(Disassembler)
-add_subdirectory(InstPrinter)
 add_subdirectory(MCTargetDesc)
 add_subdirectory(TargetInfo)

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/Disassembler/LLVMBuild.txt b/src/llvm-project/llvm/lib/Target/PowerPC/Disassembler/LLVMBuild.txt
index ea3e7ea..5d9de6d 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/Disassembler/LLVMBuild.txt
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/Disassembler/LLVMBuild.txt

@@ -1,9 +1,8 @@
 ;===-- ./lib/Target/PowerPC/Disassembler/LLVMBuild.txt ---------*- Conf -*--===;
 ;
-;                     The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
+; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+; See https://llvm.org/LICENSE.txt for license information.
+; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 ;
 ;===------------------------------------------------------------------------===;
 ;

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index 26869f2..7a8af57 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp

@@ -1,13 +1,13 @@
 //===------ PPCDisassembler.cpp - Disassembler for PowerPC ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "TargetInfo/PowerPCTargetInfo.h"
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
 #include "llvm/MC/MCFixedLenDisassembler.h"
 #include "llvm/MC/MCInst.h"
@@ -61,6 +61,14 @@
                                          createPPCLEDisassembler);
 }
 
+static DecodeStatus DecodePCRel24BranchTarget(MCInst &Inst, unsigned Imm,
+                                              uint64_t Addr,
+                                              const void *Decoder) {
+  int32_t Offset = SignExtend32<24>(Imm);
+  Inst.addOperand(MCOperand::createImm(Offset));
+  return MCDisassembler::Success;
+}
+
 // FIXME: These can be generated by TableGen from the existing register
 // encoding values!
 
@@ -78,12 +86,6 @@
   return decodeRegisterClass(Inst, RegNo, CRRegs);
 }
 
-static DecodeStatus DecodeCRRC0RegisterClass(MCInst &Inst, uint64_t RegNo,
-                                            uint64_t Address,
-                                            const void *Decoder) {
-  return decodeRegisterClass(Inst, RegNo, CRRegs);
-}
-
 static DecodeStatus DecodeCRBITRCRegisterClass(MCInst &Inst, uint64_t RegNo,
                                             uint64_t Address,
                                             const void *Decoder) {

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/InstPrinter/CMakeLists.txt b/src/llvm-project/llvm/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
deleted file mode 100644
index ab30a11..0000000
--- a/src/llvm-project/llvm/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
+++ /dev/null

@@ -1,3 +0,0 @@
-add_llvm_library(LLVMPowerPCAsmPrinter
-  PPCInstPrinter.cpp
-  )

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt b/src/llvm-project/llvm/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt
deleted file mode 100644
index 7c691de..0000000
--- a/src/llvm-project/llvm/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt
+++ /dev/null

@@ -1,23 +0,0 @@
-;===- ./lib/Target/PowerPC/InstPrinter/LLVMBuild.txt -----------*- Conf -*--===;
-;
-;                     The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
-;
-;===------------------------------------------------------------------------===;
-;
-; This is an LLVMBuild description file for the components in this subdirectory.
-;
-; For more information on the LLVMBuild system, please see:
-;
-;   http://llvm.org/docs/LLVMBuild.html
-;
-;===------------------------------------------------------------------------===;
-
-[component_0]
-type = Library
-name = PowerPCAsmPrinter
-parent = PowerPC
-required_libraries = MC Support
-add_to_library_groups = PowerPC

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/LLVMBuild.txt b/src/llvm-project/llvm/lib/Target/PowerPC/LLVMBuild.txt
index fd5fa56..34c2957 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/LLVMBuild.txt
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/LLVMBuild.txt

@@ -1,9 +1,8 @@
 ;===- ./lib/Target/PowerPC/LLVMBuild.txt -----------------------*- Conf -*--===;
 ;
-;                     The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
+; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+; See https://llvm.org/LICENSE.txt for license information.
+; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 ;
 ;===------------------------------------------------------------------------===;
 ;
@@ -16,7 +15,7 @@
 ;===------------------------------------------------------------------------===;
 
 [common]
-subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo
+subdirectories = AsmParser Disassembler MCTargetDesc TargetInfo
 
 [component_0]
 type = TargetGroup
@@ -31,5 +30,5 @@
 type = Library
 name = PowerPCCodeGen
 parent = PowerPC
-required_libraries = Analysis AsmPrinter CodeGen Core MC PowerPCAsmPrinter PowerPCDesc PowerPCInfo Scalar SelectionDAG Support Target TransformUtils
+required_libraries = Analysis AsmPrinter CodeGen Core MC PowerPCDesc PowerPCInfo Scalar SelectionDAG Support Target TransformUtils
 add_to_library_groups = PowerPC

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt b/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
index 3cea65e..5a2f605 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt

@@ -1,5 +1,6 @@
 add_llvm_library(LLVMPowerPCDesc
   PPCAsmBackend.cpp
+  PPCInstPrinter.cpp
   PPCMCTargetDesc.cpp
   PPCMCAsmInfo.cpp
   PPCMCCodeEmitter.cpp
@@ -7,4 +8,5 @@
   PPCPredicates.cpp
   PPCMachObjectWriter.cpp
   PPCELFObjectWriter.cpp
+  PPCXCOFFObjectWriter.cpp
   )

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt b/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt
index d3a567d..2dcd3de 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt

@@ -1,9 +1,8 @@
 ;===- ./lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===;
 ;
-;                     The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
+; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+; See https://llvm.org/LICENSE.txt for license information.
+; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 ;
 ;===------------------------------------------------------------------------===;
 ;
@@ -19,5 +18,5 @@
 type = Library
 name = PowerPCDesc
 parent = PowerPC
-required_libraries = MC PowerPCAsmPrinter PowerPCInfo Support
+required_libraries = MC PowerPCInfo Support
 add_to_library_groups = PowerPC

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index a405dd7..8778e91 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp

@@ -1,9 +1,8 @@
 //===-- PPCAsmBackend.cpp - PPC Assembler Backend -------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -29,6 +28,7 @@
   switch (Kind) {
   default:
     llvm_unreachable("Unknown fixup kind!");
+  case FK_NONE:
   case FK_Data_1:
   case FK_Data_2:
   case FK_Data_4:
@@ -52,6 +52,8 @@
   switch (Kind) {
   default:
     llvm_unreachable("Unknown fixup kind!");
+  case FK_NONE:
+    return 0;
   case FK_Data_1:
     return 1;
   case FK_Data_2:
@@ -74,10 +76,12 @@
 namespace {
 
 class PPCAsmBackend : public MCAsmBackend {
-  const Target &TheTarget;
+protected:
+  Triple TT;
 public:
-  PPCAsmBackend(const Target &T, support::endianness Endian)
-      : MCAsmBackend(Endian), TheTarget(T) {}
+  PPCAsmBackend(const Target &T, const Triple &TT)
+      : MCAsmBackend(TT.isLittleEndian() ? support::little : support::big),
+        TT(TT) {}
 
   unsigned getNumFixupKinds() const override {
     return PPC::NumTargetFixupKinds;
@@ -136,9 +140,11 @@
 
   bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
                              const MCValue &Target) override {
-    switch ((PPC::Fixups)Fixup.getKind()) {
+    switch ((unsigned)Fixup.getKind()) {
     default:
       return false;
+    case FK_NONE:
+      return true;
     case PPC::fixup_ppc_br24:
     case PPC::fixup_ppc_br24abs:
       // If the target symbol has a local entry point we must not attempt
@@ -187,59 +193,76 @@
 
     return true;
   }
-
-  unsigned getPointerSize() const {
-    StringRef Name = TheTarget.getName();
-    if (Name == "ppc64" || Name == "ppc64le") return 8;
-    assert(Name == "ppc32" && "Unknown target name!");
-    return 4;
-  }
 };
 } // end anonymous namespace
 
 
 // FIXME: This should be in a separate file.
 namespace {
-  class DarwinPPCAsmBackend : public PPCAsmBackend {
-  public:
-    DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T, support::big) { }
 
-    std::unique_ptr<MCObjectTargetWriter>
-    createObjectTargetWriter() const override {
-      bool is64 = getPointerSize() == 8;
-      return createPPCMachObjectWriter(
-          /*Is64Bit=*/is64,
-          (is64 ? MachO::CPU_TYPE_POWERPC64 : MachO::CPU_TYPE_POWERPC),
-          MachO::CPU_SUBTYPE_POWERPC_ALL);
-    }
-  };
+class DarwinPPCAsmBackend : public PPCAsmBackend {
+public:
+  DarwinPPCAsmBackend(const Target &T, const Triple &TT)
+      : PPCAsmBackend(T, TT) {}
 
-  class ELFPPCAsmBackend : public PPCAsmBackend {
-    uint8_t OSABI;
-  public:
-    ELFPPCAsmBackend(const Target &T, support::endianness Endian,
-                     uint8_t OSABI)
-        : PPCAsmBackend(T, Endian), OSABI(OSABI) {}
+  std::unique_ptr<MCObjectTargetWriter>
+  createObjectTargetWriter() const override {
+    bool Is64 = TT.isPPC64();
+    return createPPCMachObjectWriter(
+        /*Is64Bit=*/Is64,
+        (Is64 ? MachO::CPU_TYPE_POWERPC64 : MachO::CPU_TYPE_POWERPC),
+        MachO::CPU_SUBTYPE_POWERPC_ALL);
+  }
+};
 
-    std::unique_ptr<MCObjectTargetWriter>
-    createObjectTargetWriter() const override {
-      bool is64 = getPointerSize() == 8;
-      return createPPCELFObjectWriter(is64, OSABI);
-    }
-  };
+class ELFPPCAsmBackend : public PPCAsmBackend {
+public:
+  ELFPPCAsmBackend(const Target &T, const Triple &TT) : PPCAsmBackend(T, TT) {}
+
+  std::unique_ptr<MCObjectTargetWriter>
+  createObjectTargetWriter() const override {
+    uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
+    bool Is64 = TT.isPPC64();
+    return createPPCELFObjectWriter(Is64, OSABI);
+  }
+
+  Optional<MCFixupKind> getFixupKind(StringRef Name) const override;
+};
+
+class XCOFFPPCAsmBackend : public PPCAsmBackend {
+public:
+  XCOFFPPCAsmBackend(const Target &T, const Triple &TT)
+      : PPCAsmBackend(T, TT) {}
+
+  std::unique_ptr<MCObjectTargetWriter>
+  createObjectTargetWriter() const override {
+    return createPPCXCOFFObjectWriter(TT.isArch64Bit());
+  }
+};
 
 } // end anonymous namespace
 
+Optional<MCFixupKind> ELFPPCAsmBackend::getFixupKind(StringRef Name) const {
+  if (TT.isPPC64()) {
+    if (Name == "R_PPC64_NONE")
+      return FK_NONE;
+  } else {
+    if (Name == "R_PPC_NONE")
+      return FK_NONE;
+  }
+  return MCAsmBackend::getFixupKind(Name);
+}
+
 MCAsmBackend *llvm::createPPCAsmBackend(const Target &T,
                                         const MCSubtargetInfo &STI,
                                         const MCRegisterInfo &MRI,
                                         const MCTargetOptions &Options) {
   const Triple &TT = STI.getTargetTriple();
   if (TT.isOSDarwin())
-    return new DarwinPPCAsmBackend(T);
+    return new DarwinPPCAsmBackend(T, TT);
 
-  uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
-  bool IsLittleEndian = TT.getArch() == Triple::ppc64le;
-  return new ELFPPCAsmBackend(
-      T, IsLittleEndian ? support::little : support::big, OSABI);
+  if (TT.isOSBinFormatXCOFF())
+    return new XCOFFPPCAsmBackend(T, TT);
+
+  return new ELFPPCAsmBackend(T, TT);
 }

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index a3caf9a..042ddf4 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp

@@ -1,9 +1,8 @@
 //===-- PPCELFObjectWriter.cpp - PPC ELF Writer ---------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -134,6 +133,9 @@
   } else {
     switch ((unsigned)Fixup.getKind()) {
       default: llvm_unreachable("invalid fixup kind!");
+    case FK_NONE:
+      Type = ELF::R_PPC_NONE;
+      break;
     case PPC::fixup_ppc_br24abs:
       Type = ELF::R_PPC_ADDR24;
       break;

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
index dce44399..8454897 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h

@@ -1,9 +1,8 @@
 //===-- PPCFixupKinds.h - PPC Specific Fixup Entries ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
similarity index 94%
rename from src/llvm-project/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
rename to src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
index fc29e4e..0e64ae5 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp

@@ -1,9 +1,8 @@
 //===-- PPCInstPrinter.cpp - Convert PPC MCInst to assembly syntax --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -11,7 +10,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "PPCInstPrinter.h"
+#include "MCTargetDesc/PPCInstPrinter.h"
 #include "MCTargetDesc/PPCMCTargetDesc.h"
 #include "MCTargetDesc/PPCPredicates.h"
 #include "PPCInstrInfo.h"
@@ -382,8 +381,11 @@
 
   // Branches can take an immediate operand.  This is used by the branch
   // selection pass to print .+8, an eight byte displacement from the PC.
-  O << ".+";
-  printAbsBranchOperand(MI, OpNo, O);
+  O << ".";
+  int32_t Imm = SignExtend32<32>((unsigned)MI->getOperand(OpNo).getImm() << 2);
+  if (Imm >= 0)
+    O << "+";
+  O << Imm;
 }
 
 void PPCInstPrinter::printAbsBranchOperand(const MCInst *MI, unsigned OpNo,
@@ -442,13 +444,22 @@
   // On PPC64, VariantKind is VK_None, but on PPC32, it's VK_PLT, and it must
   // come at the _end_ of the expression.
   const MCOperand &Op = MI->getOperand(OpNo);
-  const MCSymbolRefExpr &refExp = cast<MCSymbolRefExpr>(*Op.getExpr());
-  O << refExp.getSymbol().getName();
+  const MCSymbolRefExpr *RefExp = nullptr;
+  const MCConstantExpr *ConstExp = nullptr;
+  if (const MCBinaryExpr *BinExpr = dyn_cast<MCBinaryExpr>(Op.getExpr())) {
+    RefExp = cast<MCSymbolRefExpr>(BinExpr->getLHS());
+    ConstExp = cast<MCConstantExpr>(BinExpr->getRHS());
+  } else
+    RefExp = cast<MCSymbolRefExpr>(Op.getExpr());
+
+  O << RefExp->getSymbol().getName();
   O << '(';
   printOperand(MI, OpNo+1, O);
   O << ')';
-  if (refExp.getKind() != MCSymbolRefExpr::VK_None)
-    O << '@' << MCSymbolRefExpr::getVariantKindName(refExp.getKind());
+  if (RefExp->getKind() != MCSymbolRefExpr::VK_None)
+    O << '@' << MCSymbolRefExpr::getVariantKindName(RefExp->getKind());
+  if (ConstExp != nullptr)
+    O << '+' << ConstExp->getValue();
 }
 
 /// showRegistersWithPercentPrefix - Check if this register name should be

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h
similarity index 90%
rename from src/llvm-project/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
rename to src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h
index 351ccef..725ae2a 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h

@@ -1,9 +1,8 @@
 //===- PPCInstPrinter.h - Convert PPC MCInst to assembly syntax -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -11,8 +10,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_LIB_TARGET_POWERPC_INSTPRINTER_PPCINSTPRINTER_H
-#define LLVM_LIB_TARGET_POWERPC_INSTPRINTER_PPCINSTPRINTER_H
+#ifndef LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCINSTPRINTER_H
+#define LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCINSTPRINTER_H
 
 #include "llvm/ADT/Triple.h"
 #include "llvm/MC/MCInstPrinter.h"

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index fb7bf23..5f0005e 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp

@@ -1,9 +1,8 @@
 //===-- PPCMCAsmInfo.cpp - PPC asm properties -----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -82,3 +81,9 @@
   UseIntegratedAssembler = true;
 }
 
+void PPCXCOFFMCAsmInfo::anchor() {}
+
+PPCXCOFFMCAsmInfo::PPCXCOFFMCAsmInfo(bool Is64Bit, const Triple &T) {
+  assert(!IsLittleEndian && "Little-endian XCOFF not supported.");
+  CodePointerSize = CalleeSaveStackSlotSize = Is64Bit ? 8 : 4;
+}

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
index e252ac9..42cb62a 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h

@@ -1,13 +1,12 @@
 //===-- PPCMCAsmInfo.h - PPC asm properties --------------------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains the declaration of the MCAsmInfoDarwin class.
+// This file contains the declarations of the PowerPC MCAsmInfo classes.
 //
 //===----------------------------------------------------------------------===//
 
@@ -16,6 +15,7 @@
 
 #include "llvm/MC/MCAsmInfoDarwin.h"
 #include "llvm/MC/MCAsmInfoELF.h"
+#include "llvm/MC/MCAsmInfoXCOFF.h"
 
 namespace llvm {
 class Triple;
@@ -34,6 +34,13 @@
   explicit PPCELFMCAsmInfo(bool is64Bit, const Triple &);
 };
 
+class PPCXCOFFMCAsmInfo : public MCAsmInfoXCOFF {
+  virtual void anchor();
+
+public:
+  explicit PPCXCOFFMCAsmInfo(bool is64Bit, const Triple &);
+};
+
 } // namespace llvm
 
 #endif

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 8c15ade..676efc5 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp

@@ -1,9 +1,8 @@
 //===-- PPCMCCodeEmitter.cpp - Convert PPC code to machine code -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -217,7 +216,7 @@
   Fixups.push_back(MCFixup::create(0, MO.getExpr(),
                                    (MCFixupKind)PPC::fixup_ppc_nofixup));
   const Triple &TT = STI.getTargetTriple();
-  bool isPPC64 = TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le;
+  bool isPPC64 = TT.isPPC64();
   return CTX.getRegisterInfo()->getEncodingValue(isPPC64 ? PPC::X13 : PPC::R2);
 }
 

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h b/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
index a4bcff4..1324faa 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h

@@ -1,9 +1,8 @@
 //===-- PPCMCCodeEmitter.h - Convert PPC code to machine code -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -99,9 +98,10 @@
   unsigned getInstSizeInBytes(const MCInst &MI) const;
 
 private:
-  uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
-  void verifyInstructionPredicates(const MCInst &MI,
-                                   uint64_t AvailableFeatures) const;
+  FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const;
+  void
+  verifyInstructionPredicates(const MCInst &MI,
+                              const FeatureBitset &AvailableFeatures) const;
 };
 
 } // namespace llvm

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
index 32e6a0b..d467f5c 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp

@@ -1,9 +1,8 @@
 //===-- PPCMCExpr.cpp - PPC specific MC expression classes ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
index 8bb4791..449e2c3 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h

@@ -1,9 +1,8 @@
 //===-- PPCMCExpr.h - PPC specific MC expression classes --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index a1e4e07..90c3c8d 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp

@@ -1,9 +1,8 @@
 //===-- PPCMCTargetDesc.cpp - PowerPC Target Descriptions -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,9 +11,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/PPCMCTargetDesc.h"
-#include "InstPrinter/PPCInstPrinter.h"
+#include "MCTargetDesc/PPCInstPrinter.h"
 #include "MCTargetDesc/PPCMCAsmInfo.h"
 #include "PPCTargetStreamer.h"
+#include "TargetInfo/PowerPCTargetInfo.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/BinaryFormat/ELF.h"
@@ -47,9 +48,9 @@
 #define GET_REGINFO_MC_DESC
 #include "PPCGenRegisterInfo.inc"
 
-// Pin the vtable to this file.
 PPCTargetStreamer::PPCTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
 
+// Pin the vtable to this file.
 PPCTargetStreamer::~PPCTargetStreamer() = default;
 
 static MCInstrInfo *createPPCMCInstrInfo() {
@@ -82,6 +83,8 @@
   MCAsmInfo *MAI;
   if (TheTriple.isOSDarwin())
     MAI = new PPCMCAsmInfoDarwin(isPPC64, TheTriple);
+  else if (TheTriple.isOSBinFormatXCOFF())
+    MAI = new PPCXCOFFMCAsmInfo(isPPC64, TheTriple);
   else
     MAI = new PPCELFMCAsmInfo(isPPC64, TheTriple);
 
@@ -182,16 +185,33 @@
 
   void emitAssignment(MCSymbol *S, const MCExpr *Value) override {
     auto *Symbol = cast<MCSymbolELF>(S);
+
     // When encoding an assignment to set symbol A to symbol B, also copy
     // the st_other bits encoding the local entry point offset.
-    if (Value->getKind() != MCExpr::SymbolRef)
-      return;
-    const auto &RhsSym = cast<MCSymbolELF>(
-        static_cast<const MCSymbolRefExpr *>(Value)->getSymbol());
-    unsigned Other = Symbol->getOther();
+    if (copyLocalEntry(Symbol, Value))
+      UpdateOther.insert(Symbol);
+    else
+      UpdateOther.erase(Symbol);
+  }
+
+  void finish() override {
+    for (auto *Sym : UpdateOther)
+      copyLocalEntry(Sym, Sym->getVariableValue());
+  }
+
+private:
+  SmallPtrSet<MCSymbolELF *, 32> UpdateOther;
+
+  bool copyLocalEntry(MCSymbolELF *D, const MCExpr *S) {
+    auto *Ref = dyn_cast<const MCSymbolRefExpr>(S);
+    if (!Ref)
+      return false;
+    const auto &RhsSym = cast<MCSymbolELF>(Ref->getSymbol());
+    unsigned Other = D->getOther();
     Other &= ~ELF::STO_PPC64_LOCAL_MASK;
     Other |= RhsSym.getOther() & ELF::STO_PPC64_LOCAL_MASK;
-    Symbol->setOther(Other);
+    D->setOther(Other);
+    return true;
   }
 };
 
@@ -217,6 +237,27 @@
   }
 };
 
+class PPCTargetXCOFFStreamer : public PPCTargetStreamer {
+public:
+  PPCTargetXCOFFStreamer(MCStreamer &S) : PPCTargetStreamer(S) {}
+
+  void emitTCEntry(const MCSymbol &S) override {
+    report_fatal_error("TOC entries not supported yet.");
+  }
+
+  void emitMachine(StringRef CPU) override {
+    llvm_unreachable("Machine pseudo-ops are invalid for XCOFF.");
+  }
+
+  void emitAbiVersion(int AbiVersion) override {
+    llvm_unreachable("ABI-version pseudo-ops are invalid for XCOFF.");
+  }
+
+  void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) override {
+    llvm_unreachable("Local-entry pseudo-ops are invalid for XCOFF.");
+  }
+};
+
 } // end anonymous namespace
 
 static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S,
@@ -231,6 +272,8 @@
   const Triple &TT = STI.getTargetTriple();
   if (TT.isOSBinFormatELF())
     return new PPCTargetELFStreamer(S);
+  if (TT.isOSBinFormatXCOFF())
+    return new PPCTargetXCOFFStreamer(S);
   return new PPCTargetMachOStreamer(S);
 }
 

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index d6e450c..74b67bd 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h

@@ -1,9 +1,8 @@
 //===-- PPCMCTargetDesc.h - PowerPC Target Descriptions ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -37,10 +36,6 @@
 class StringRef;
 class raw_pwrite_stream;
 
-Target &getThePPC32Target();
-Target &getThePPC64Target();
-Target &getThePPC64LETarget();
-
 MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
                                       const MCRegisterInfo &MRI,
                                       MCContext &Ctx);
@@ -56,6 +51,9 @@
 std::unique_ptr<MCObjectTargetWriter>
 createPPCMachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype);
 
+/// Construct a PPC XCOFF object writer.
+std::unique_ptr<MCObjectTargetWriter> createPPCXCOFFObjectWriter(bool Is64Bit);
+
 /// Returns true iff Val consists of one contiguous run of 1s with any number of
 /// 0s on either side.  The 1s are allowed to wrap from LSB to MSB, so
 /// 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs.  0x0F0F0000 is not,

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
index ff6cf58..4cf7fd1 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp

@@ -1,9 +1,8 @@
 //===-- PPCMachObjectWriter.cpp - PPC Mach-O Writer -----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
index c2987b6..284e52c 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp

@@ -1,9 +1,8 @@
 //===-- PPCPredicates.cpp - PPC Branch Predicate Information --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
index 481ba3f..d686a8e 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h

@@ -1,9 +1,8 @@
 //===-- PPCPredicates.h - PPC Branch Predicate Information ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
new file mode 100644
index 0000000..9c66128
--- /dev/null
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp

@@ -0,0 +1,29 @@
+//===-- PPCXCOFFObjectWriter.cpp - PowerPC XCOFF Writer -------------------===//
+//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCMCTargetDesc.h"
+#include "llvm/MC/MCXCOFFObjectWriter.h"
+
+using namespace llvm;
+
+namespace {
+class PPCXCOFFObjectWriter : public MCXCOFFObjectTargetWriter {
+
+public:
+  PPCXCOFFObjectWriter(bool Is64Bit);
+};
+} // end anonymous namespace
+
+PPCXCOFFObjectWriter::PPCXCOFFObjectWriter(bool Is64Bit)
+    : MCXCOFFObjectTargetWriter(Is64Bit) {}
+
+std::unique_ptr<MCObjectTargetWriter>
+llvm::createPPCXCOFFObjectWriter(bool Is64Bit) {
+  return llvm::make_unique<PPCXCOFFObjectWriter>(Is64Bit);
+}

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/P9InstrResources.td b/src/llvm-project/llvm/lib/Target/PowerPC/P9InstrResources.td
index 17c3796..2a10322 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/P9InstrResources.td
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/P9InstrResources.td

@@ -1,22 +1,21 @@
-//===- P9InstrResources.td - P9 Instruction Resource Defs  -*- tablegen -*-===//
+//===- P9InstrResources.td - P9 Instruction Resource Defs  -*- tablegen -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines the resources required by P9 instructions. This is part
-// P9 processor model used for instruction scheduling. This file should contain
-// all of the instructions that may be used on Power 9. This is not just
-// instructions that are new on Power 9 but also instructions that were
+// This file defines the resources required by P9 instructions. This is part of
+// the P9 processor model used for instruction scheduling. This file should
+// contain all the instructions that may be used on Power 9. This is not
+// just instructions that are new on Power 9 but also instructions that were
 // available on earlier architectures and are still used in Power 9.
 //
 // The makeup of the P9 CPU is modeled as follows:
 //   - Each CPU is made up of two superslices.
 //   - Each superslice is made up of two slices. Therefore, there are 4 slices
-//      for each CPU.
+//   for each CPU.
 //   - Up to 6 instructions can be dispatched to each CPU. Three per superslice.
 //   - Each CPU has:
 //     - One CY (Crypto) unit P9_CY_*
@@ -33,9 +32,8 @@
 
 // Two cycle ALU vector operation that uses an entire superslice.
 // Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
-// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
-def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C,
-              DISP_1C, DISP_1C, DISP_1C],
+// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice.
+def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
       (instrs
     (instregex "VADDU(B|H|W|D)M$"),
     (instregex "VAND(C)?$"),
@@ -85,9 +83,9 @@
 )>;
 
 // Restricted Dispatch ALU operation for 3 cycles. The operation runs on a
-// slingle slice. However, since it is Restricted it requires all 3 dispatches
+// single slice. However, since it is Restricted, it requires all 3 dispatches
 // (DISP) for that superslice.
-def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_3SLOTS_1C],
       (instrs
     (instregex "TABORT(D|W)C(I)?$"),
     (instregex "MTFSB(0|1)$"),
@@ -103,7 +101,7 @@
 )>;
 
 // Standard Dispatch ALU operation for 3 cycles. Only one slice used.
-def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C],
       (instrs
     (instregex "XSMAX(C|J)?DP$"),
     (instregex "XSMIN(C|J)?DP$"),
@@ -120,11 +118,11 @@
 )>;
 
 // Standard Dispatch ALU operation for 2 cycles. Only one slice used.
-def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C],
       (instrs
     (instregex "S(L|R)D$"),
     (instregex "SRAD(I)?$"),
-    (instregex "EXTSWSLI$"),
+    (instregex "EXTSWSLI_32_64$"),
     (instregex "MFV(S)?RD$"),
     (instregex "MTVSRD$"),
     (instregex "MTVSRW(A|Z)$"),
@@ -160,6 +158,7 @@
     XSNEGDP,
     XSCPSGNDP,
     MFVSRWZ,
+    EXTSWSLI,
     SRADI_32,
     RLDIC,
     RFEBB,
@@ -171,9 +170,9 @@
 )>;
 
 // Restricted Dispatch ALU operation for 2 cycles. The operation runs on a
-//  slingle slice. However, since it is Restricted it requires all 3 dispatches
-//  (DISP) for that superslice.
-def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+// single slice. However, since it is Restricted, it requires all 3 dispatches
+// (DISP) for that superslice.
+def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_3SLOTS_1C],
       (instrs
     (instregex "RLDC(L|R)$"),
     (instregex "RLWIMI(8)?$"),
@@ -200,9 +199,8 @@
 
 // Three cycle ALU vector operation that uses an entire superslice.
 // Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
-// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
-def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C,
-              DISP_1C, DISP_1C, DISP_1C],
+// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice.
+def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
       (instrs
     (instregex "M(T|F)VSCR$"),
     (instregex "VCMPNEZ(B|H|W)$"),
@@ -285,10 +283,9 @@
 )>;
 
 // 7 cycle DP vector operation that uses an entire superslice.
-//  Uses both DP units (the even DPE and odd DPO units), two pipelines
-//  (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
-def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C,
-              DISP_1C, DISP_1C, DISP_1C],
+// Uses both DP units (the even DPE and odd DPO units), two pipelines (EXECE,
+// EXECO) and all three dispatches (DISP) to the given superslice.
+def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
       (instrs
     VADDFP,
     VCTSXS,
@@ -395,18 +392,17 @@
     VSUMSWS
 )>;
 
-
 // 5 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
-//  dispatch units for the superslice.
-def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+// dispatch units for the superslice.
+def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_3SLOTS_1C],
       (instrs
-    (instregex "MADD(HD|HDU|LD)$"),
+    (instregex "MADD(HD|HDU|LD|LD8)$"),
     (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?$")
 )>;
 
 // 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
-//  dispatch units for the superslice.
-def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+// dispatch units for the superslice.
+def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_3SLOTS_1C],
       (instrs
     FRSP,
     (instregex "FRI(N|P|Z|M)(D|S)$"),
@@ -448,26 +444,26 @@
 )>;
 
 // 7 cycle Restricted DP operation and one 3 cycle ALU operation.
-// These operations can be done in parallel.
-//  The DP is restricted so we need a full 5 dispatches.
+// These operations can be done in parallel. The DP is restricted so we need a
+// full 4 dispatches.
 def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_1C],
       (instrs
     (instregex "FSEL(D|S)o$")
 )>;
 
 // 5 Cycle Restricted DP operation and one 2 cycle ALU operation.
 def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_1C],
       (instrs
     (instregex "MUL(H|L)(D|W)(U)?o$")
 )>;
 
 // 7 cycle Restricted DP operation and one 3 cycle ALU operation.
-// These operations must be done sequentially.
-//  The DP is restricted so we need a full 5 dispatches.
+// These operations must be done sequentially.The DP is restricted so we need a
+// full 4 dispatches.
 def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_1C],
       (instrs
     (instregex "FRI(N|P|Z|M)(D|S)o$"),
     (instregex "FRE(S)?o$"),
@@ -483,8 +479,8 @@
     FRSPo
 )>;
 
-// 7 cycle DP operation. One DP unit, one EXEC pipeline and two dispatch units.
-def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C],
+// 7 cycle DP operation. One DP unit, one EXEC pipeline and 1 dispatch units.
+def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C],
       (instrs
     XSADDDP,
     XSADDSP,
@@ -520,9 +516,9 @@
 )>;
 
 // Three Cycle PM operation. Only one PM unit per superslice so we use the whole
-//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-//  dispatches.
-def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and one
+// dispatch.
+def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
       (instrs
     (instregex "LVS(L|R)$"),
     (instregex "VSPLTIS(W|H|B)$"),
@@ -628,9 +624,9 @@
 )>;
 
 // 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
-//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-//  dispatches.
-def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and one
+// dispatch.
+def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
       (instrs
     BCDSRo,
     XSADDQP,
@@ -652,17 +648,17 @@
 )>;
 
 // 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
-//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-//  dispatches.
-def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and one
+// dispatch.
+def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
       (instrs
     BCDCTSQo
 )>;
 
 // 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
-//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-//  dispatches.
-def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and one
+// dispatch.
+def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
       (instrs
     XSMADDQP,
     XSMADDQPO,
@@ -677,39 +673,39 @@
 )>;
 
 // 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
-//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-//  dispatches.
-def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and one
+// dispatch.
+def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
       (instrs
     BCDCFSQo
 )>;
 
 // 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
-//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-//  dispatches.
-def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and one
+// dispatch.
+def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
       (instrs
     XSDIVQP,
     XSDIVQPO
 )>;
 
 // 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
-//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-//  dispatches.
-def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+// dispatches.
+def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
       (instrs
     XSSQRTQP,
     XSSQRTQPO
 )>;
 
 // 6 Cycle Load uses a single slice.
-def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C],
       (instrs
     (instregex "LXVL(L)?")
 )>;
 
 // 5 Cycle Load uses a single slice.
-def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C],
       (instrs
     (instregex "LVE(B|H|W)X$"),
     (instregex "LVX(L)?"),
@@ -728,7 +724,7 @@
 )>;
 
 // 4 Cycle Load uses a single slice.
-def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C],
       (instrs
     (instregex "DCB(F|T|ST)(EP)?$"),
     (instregex "DCBZ(L)?(EP)?$"),
@@ -757,8 +753,8 @@
 )>;
 
 // 4 Cycle Restricted load uses a single slice but the dispatch for the whole
-//  superslice.
-def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
+// superslice.
+def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_3SLOTS_1C],
       (instrs
     LFIWZX,
     LFDX,
@@ -768,7 +764,7 @@
 // Cracked Load Instructions.
 // Load instructions that can be done in parallel.
 def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_PAIR_1C],
       (instrs
     SLBIA,
     SLBIE,
@@ -782,17 +778,26 @@
 // Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
 // operations can be run in parallel.
 def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_PAIR_1C, DISP_PAIR_1C],
       (instrs
-    (instregex "L(W|H)ZU(X)?(8)?$"),
+    (instregex "L(W|H)ZU(X)?(8)?$")
+)>;
+
+// Cracked TEND Instruction.
+// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
+// operations can be run in parallel.
+def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
+              DISP_1C, DISP_1C],
+      (instrs
     TEND
 )>;
 
+
 // Cracked Store Instruction
 // Consecutive Store and ALU instructions. The store is restricted and requires
 // three dispatches.
 def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_1C],
       (instrs
     (instregex "ST(B|H|W|D)CX$")
 )>;
@@ -800,16 +805,16 @@
 // Cracked Load Instruction.
 // Two consecutive load operations for a total of 8 cycles.
 def : InstRW<[P9_LoadAndLoadOp_8C, IP_AGEN_1C, IP_AGEN_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_1C, DISP_1C],
       (instrs
     LDMX
 )>;
 
 // Cracked Load instruction.
 // Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
-//  operations cannot be done at the same time and so their latencies are added.
+// operations cannot be done at the same time and so their latencies are added.
 def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_1C, DISP_1C],
       (instrs
     (instregex "LHA(X)?(8)?$"),
     (instregex "CP_PASTE(8)?o$"),
@@ -819,20 +824,19 @@
 
 // Cracked Restricted Load instruction.
 // Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
-//  operations cannot be done at the same time and so their latencies are added.
+// operations cannot be done at the same time and so their latencies are added.
 // Full 6 dispatches are required as this is both cracked and restricted.
 def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
       (instrs
     LFIWAX
 )>;
 
 // Cracked Load instruction.
 // Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU
-//  operations cannot be done at the same time and so their latencies are added.
+// operations cannot be done at the same time and so their latencies are added.
 // Full 4 dispatches are required as this is a cracked instruction.
-def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
       (instrs
     LXSIWAX,
     LIWAX
@@ -844,7 +848,7 @@
 // their latencies are added.
 // Full 6 dispatches are required as this is a restricted instruction.
 def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
       (instrs
     LFSX,
     LFS
@@ -852,10 +856,9 @@
 
 // Cracked Load instruction.
 // Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU
-//  operations cannot be done at the same time and so their latencies are added.
+// operations cannot be done at the same time and so their latencies are added.
 // Full 4 dispatches are required as this is a cracked instruction.
-def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
       (instrs
     LXSSP,
     LXSSPX,
@@ -866,7 +869,7 @@
 // Cracked 3-Way Load Instruction
 // Load with two ALU operations that depend on each other
 def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_PAIR_1C, DISP_PAIR_1C, DISP_1C],
       (instrs
     (instregex "LHAU(X)?(8)?$"),
     LWAUX
@@ -874,12 +877,11 @@
 
 // Cracked Load that requires the PM resource.
 // Since the Load and the PM cannot be done at the same time the latencies are
-//  added. Requires 8 cycles.
-// Since the PM requires the full superslice we need both EXECE, EXECO pipelines
-//  as well as 3 dispatches for the PM. The Load requires the remaining 2
-//  dispatches.
+// added. Requires 8 cycles. Since the PM requires the full superslice we need
+// both EXECE, EXECO pipelines as well as 1 dispatch for the PM. The Load
+// requires the remaining 1 dispatch.
 def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_1C, DISP_1C],
       (instrs
     LXVH8X,
     LXVDSX,
@@ -887,8 +889,8 @@
 )>;
 
 // Single slice Restricted store operation. The restricted operation requires
-//  all three dispatches for the superslice.
-def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
+// all three dispatches for the superslice.
+def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C],
       (instrs
     (instregex "STF(S|D|IWX|SX|DX)$"),
     (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"),
@@ -905,10 +907,9 @@
 )>;
 
 // Vector Store Instruction
-// Requires the whole superslice and therefore requires all three dispatches
+// Requires the whole superslice and therefore requires one dispatch
 // as well as both the Even and Odd exec pipelines.
-def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C,
-              DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C, DISP_1C],
       (instrs
     (instregex "STVE(B|H|W)X$"),
     (instregex "STVX(L)?$"),
@@ -916,18 +917,18 @@
 )>;
 
 // 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
-// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+// superslice. That includes both exec pipelines (EXECO, EXECE) and two
 // dispatches.
-def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
       (instrs
     (instregex "MTCTR(8)?(loop)?$"),
     (instregex "MTLR(8)?$")
 )>;
 
 // 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
-// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+// superslice. That includes both exec pipelines (EXECO, EXECE) and two
 // dispatches.
-def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
       (instrs
     (instregex "M(T|F)VRSAVE(v)?$"),
     (instregex "M(T|F)PMR$"),
@@ -938,10 +939,9 @@
 )>;
 
 // 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
-//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-//  dispatches.
-def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C,
-              DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and two
+// dispatches.
+def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
       (instrs
     DIVW,
     DIVWU,
@@ -949,10 +949,9 @@
 )>;
 
 // 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
-//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-//  dispatches.
-def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C,
-              DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and two
+// dispatches.
+def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
       (instrs
     DIVWE,
     DIVD,
@@ -964,29 +963,28 @@
 )>;
 
 // 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
-//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-//  dispatches.
-def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C,
-              DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+// dispatches.
+def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
       (instrs
     DIVDE,
     DIVDEU
 )>;
 
 // Cracked DIV and ALU operation. Requires one full slice for the ALU operation
-//  and one full superslice for the DIV operation since there is only one DIV
-//  per superslice. Latency of DIV plus ALU is 26.
+// and one full superslice for the DIV operation since there is only one DIV per
+// superslice. Latency of DIV plus ALU is 26.
 def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_EVEN_1C, DISP_1C],
       (instrs
     (instregex "DIVW(U)?(O)?o$")
 )>;
 
 // Cracked DIV and ALU operation. Requires one full slice for the ALU operation
-//  and one full superslice for the DIV operation since there is only one DIV
-//  per superslice. Latency of DIV plus ALU is 26.
+// and one full superslice for the DIV operation since there is only one DIV per
+// superslice. Latency of DIV plus ALU is 26.
 def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_EVEN_1C, DISP_1C],
       (instrs
     DIVDo,
     DIVDUo,
@@ -995,10 +993,10 @@
 )>;
 
 // Cracked DIV and ALU operation. Requires one full slice for the ALU operation
-//  and one full superslice for the DIV operation since there is only one DIV
-//  per superslice. Latency of DIV plus ALU is 42.
+// and one full superslice for the DIV operation since there is only one DIV per
+// superslice. Latency of DIV plus ALU is 42.
 def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_EVEN_1C, DISP_1C],
       (instrs
     DIVDEo,
     DIVDEUo
@@ -1008,11 +1006,11 @@
 
 // Cracked, restricted, ALU operations.
 // Here the two ALU ops can actually be done in parallel and therefore the
-//  latencies are not added together. Otherwise this is like having two
-//  instructions running together on two pipelines and 6 dispatches.
-// ALU ops are 2 cycles each.
+// latencies are not added together. Otherwise this is like having two
+// instructions running together on two pipelines and 6 dispatches. ALU ops are
+// 2 cycles each.
 def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
       (instrs
     MTCRF,
     MTCRF8
@@ -1020,11 +1018,11 @@
 
 // Cracked ALU operations.
 // Here the two ALU ops can actually be done in parallel and therefore the
-//  latencies are not added together. Otherwise this is like having two
-//  instructions running together on two pipelines and 4 dispatches.
-// ALU ops are 2 cycles each.
+// latencies are not added together. Otherwise this is like having two
+// instructions running together on two pipelines and 2 dispatches. ALU ops are
+// 2 cycles each.
 def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_1C, DISP_1C],
       (instrs
     (instregex "ADDC(8)?o$"),
     (instregex "SUBFC(8)?o$")
@@ -1036,7 +1034,7 @@
 // One of the ALU ops is restricted the other is not so we have a total of
 // 5 dispatches.
 def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_1C],
       (instrs
     (instregex "F(N)?ABS(D|S)o$"),
     (instregex "FCPSGN(D|S)o$"),
@@ -1046,22 +1044,22 @@
 
 // Cracked ALU operations.
 // Here the two ALU ops can actually be done in parallel and therefore the
-//  latencies are not added together. Otherwise this is like having two
-//  instructions running together on two pipelines and 4 dispatches.
+// latencies are not added together. Otherwise this is like having two
+// instructions running together on two pipelines and 2 dispatches.
 // ALU ops are 3 cycles each.
 def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_1C, DISP_1C],
       (instrs
     MCRFS
 )>;
 
 // Cracked Restricted ALU operations.
 // Here the two ALU ops can actually be done in parallel and therefore the
-//  latencies are not added together. Otherwise this is like having two
-//  instructions running together on two pipelines and 6 dispatches.
+// latencies are not added together. Otherwise this is like having two
+// instructions running together on two pipelines and 6 dispatches.
 // ALU ops are 3 cycles each.
 def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
       (instrs
     (instregex "MTFSF(b|o)?$"),
     (instregex "MTFSFI(o)?$")
@@ -1071,7 +1069,7 @@
 // The two ops cannot be done in parallel.
 // One of the ALU ops is restricted and takes 3 dispatches.
 def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_1C],
       (instrs
     (instregex "RLD(I)?C(R|L)o$"),
     (instregex "RLW(IMI|INM|NM)(8)?o$"),
@@ -1086,7 +1084,7 @@
 // The two ops cannot be done in parallel.
 // Both of the ALU ops are restricted and take 3 dispatches.
 def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
       (instrs
     (instregex "MFFS(L|CE|o)?$")
 )>;
@@ -1095,143 +1093,141 @@
 // total of 6 cycles. All of the ALU operations are also restricted so each
 // takes 3 dispatches for a total of 9.
 def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
-              DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_3SLOTS_1C],
       (instrs
     (instregex "MFCR(8)?$")
 )>;
 
 // Cracked instruction made of two ALU ops.
 // The two ops cannot be done in parallel.
-def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
       (instrs
-    (instregex "EXTSWSLIo$"),
+    (instregex "EXTSWSLI_32_64o$"),
     (instregex "SRAD(I)?o$"),
+    EXTSWSLIo,
     SLDo,
     SRDo,
     RLDICo
 )>;
 
 // 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
-def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_3SLOTS_1C],
       (instrs
     FDIV
 )>;
 
 // 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
 def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_1C],
       (instrs
     FDIVo
 )>;
 
 // 36 Cycle DP Instruction.
 // Instruction can be done on a single slice.
-def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C],
       (instrs
     XSSQRTDP
 )>;
 
 // 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
-def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_3SLOTS_1C],
       (instrs
     FSQRT
 )>;
 
 // 36 Cycle DP Vector Instruction.
 def : InstRW<[P9_DPE_36C_10, P9_DPO_36C_10, IP_EXECE_1C, IP_EXECO_1C,
-              DISP_1C, DISP_1C, DISP_1C],
+              DISP_1C],
       (instrs
     XVSQRTDP
 )>;
 
 // 27 Cycle DP Vector Instruction.
 def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C,
-              DISP_1C, DISP_1C, DISP_1C],
+              DISP_1C],
       (instrs
     XVSQRTSP
 )>;
 
 // 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
 def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_1C],
       (instrs
     FSQRTo
 )>;
 
 // 26 Cycle DP Instruction.
-def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C],
       (instrs
     XSSQRTSP
 )>;
 
 // 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
-def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
       (instrs
     FSQRTS
 )>;
 
 // 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
 def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_1C],
       (instrs
     FSQRTSo
 )>;
 
-// 33 Cycle DP Instruction. Takes one slice and 2 dispatches.
-def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C],
+// 33 Cycle DP Instruction. Takes one slice and 1 dispatch.
+def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C],
       (instrs
     XSDIVDP
 )>;
 
 // 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
-def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
       (instrs
     FDIVS
 )>;
 
 // 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU.
 def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_1C],
       (instrs
     FDIVSo
 )>;
 
-// 22 Cycle DP Instruction. Takes one slice and 2 dispatches.
-def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C],
+// 22 Cycle DP Instruction. Takes one slice and 1 dispatch.
+def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C],
       (instrs
     XSDIVSP
 )>;
 
 // 24 Cycle DP Vector Instruction. Takes one full superslice.
-// Includes both EXECE, EXECO pipelines and all 3 dispatches for the given
-//  superslice.
+// Includes both EXECE, EXECO pipelines and 1 dispatch for the given
+// superslice.
 def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C,
-              DISP_1C, DISP_1C, DISP_1C],
+              DISP_1C],
       (instrs
     XVDIVSP
 )>;
 
 // 33 Cycle DP Vector Instruction. Takes one full superslice.
-// Includes both EXECE, EXECO pipelines and all 3 dispatches for the given
-//  superslice.
+// Includes both EXECE, EXECO pipelines and 1 dispatch for the given
+// superslice.
 def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C,
-              DISP_1C, DISP_1C, DISP_1C],
+              DISP_1C],
       (instrs
     XVDIVDP
 )>;
 
 // Instruction cracked into three pieces. One Load and two ALU operations.
 // The Load and one of the ALU ops cannot be run at the same time and so the
-//  latencies are added together for 6 cycles. The remainaing ALU is 2 cycles.
+// latencies are added together for 6 cycles. The remainaing ALU is 2 cycles.
 // Both the load and the ALU that depends on it are restricted and so they take
-//  a total of 6 dispatches. The final 2 dispatches come from the second ALU op.
+// a total of 7 dispatches. The final 2 dispatches come from the second ALU op.
 // The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load.
 def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C,
               IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_1C],
       (instrs
     (instregex "LF(SU|SUX)$")
 )>;
@@ -1240,7 +1236,7 @@
 // the store and so it can be run at the same time as the store. The store is
 // also restricted.
 def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_1C],
       (instrs
     (instregex "STF(S|D)U(X)?$"),
     (instregex "ST(B|H|W|D)U(X)?(8)?$")
@@ -1249,20 +1245,19 @@
 // Cracked instruction made up of a Load and an ALU. The ALU does not depend on
 // the load and so it can be run at the same time as the load.
 def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_PAIR_1C, DISP_PAIR_1C],
       (instrs
     (instregex "LBZU(X)?(8)?$"),
     (instregex "LDU(X)?$")
 )>;
 
-
 // Cracked instruction made up of a Load and an ALU. The ALU does not depend on
-//  the load and so it can be run at the same time as the load. The load is also
-//  restricted. 3 dispatches are from the restricted load while the other two
-//  are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline
-//  is required for the ALU.
+// the load and so it can be run at the same time as the load. The load is also
+// restricted. 3 dispatches are from the restricted load while the other two
+// are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline
+// is required for the ALU.
 def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_1C],
       (instrs
     (instregex "LF(DU|DUX)$")
 )>;
@@ -1270,9 +1265,9 @@
 // Crypto Instructions
 
 // 6 Cycle CY operation. Only one CY unit per CPU so we use a whole
-//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-//  dispatches.
-def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and one
+// dispatch.
+def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
       (instrs
     (instregex "VPMSUM(B|H|W|D)$"),
     (instregex "V(N)?CIPHER(LAST)?$"),
@@ -1282,14 +1277,14 @@
 // Branch Instructions
 
 // Two Cycle Branch
-def : InstRW<[P9_BR_2C, DISP_1C, DISP_1C],
+def : InstRW<[P9_BR_2C, DISP_BR_1C],
       (instrs
   (instregex "BCCCTR(L)?(8)?$"),
   (instregex "BCCL(A|R|RL)?$"),
   (instregex "BCCTR(L)?(8)?(n)?$"),
   (instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"),
   (instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"),
-  (instregex "BL(_TLS)?$"),
+  (instregex "BL(_TLS|_NOP)?$"),
   (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?$"),
   (instregex "BLA(8|8_NOP)?$"),
   (instregex "BLR(8|L)?$"),
@@ -1313,8 +1308,7 @@
 
 // Five Cycle Branch with a 2 Cycle ALU Op
 // Operations must be done consecutively and not in parallel.
-def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C, DISP_BR_1C, DISP_1C],
       (instrs
     ADDPCIS
 )>;
@@ -1324,17 +1318,15 @@
 // Atomic Load
 def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C,
               IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C,
-              IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
-              DISP_1C],
+              IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, 
+              DISP_3SLOTS_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     (instregex "L(D|W)AT$")
 )>;
 
 // Atomic Store
 def : InstRW<[P9_LS_1C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C,
-              IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
-              DISP_1C],
+              IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, DISP_1C],
       (instrs
     (instregex "ST(D|W)AT$")
 )>;
@@ -1406,6 +1398,7 @@
   MBAR,
   MSYNC,
   SLBSYNC,
+  SLBFEEo,
   NAP,
   STOP,
   TRAP,

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPC.h b/src/llvm-project/llvm/lib/Target/PowerPC/PPC.h
index bfc613a..c6951ab 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPC.h
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPC.h

@@ -1,9 +1,8 @@
 //===-- PPC.h - Top-level interface for PowerPC Target ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -16,7 +15,6 @@
 #define LLVM_LIB_TARGET_POWERPC_PPC_H
 
 #include "llvm/Support/CodeGen.h"
-#include "MCTargetDesc/PPCMCTargetDesc.h"
 
 // GCC #defines PPC on Linux but we use it as our namespace name
 #undef PPC
@@ -57,12 +55,26 @@
                                          MCOperand &OutMO, AsmPrinter &AP,
                                          bool isDarwin);
 
+  void initializePPCCTRLoopsPass(PassRegistry&);
+#ifndef NDEBUG
+  void initializePPCCTRLoopsVerifyPass(PassRegistry&);
+#endif
+  void initializePPCLoopPreIncPrepPass(PassRegistry&);
+  void initializePPCTOCRegDepsPass(PassRegistry&);
+  void initializePPCEarlyReturnPass(PassRegistry&);
+  void initializePPCVSXCopyPass(PassRegistry&);
   void initializePPCVSXFMAMutatePass(PassRegistry&);
+  void initializePPCVSXSwapRemovalPass(PassRegistry&);
+  void initializePPCReduceCRLogicalsPass(PassRegistry&);
+  void initializePPCBSelPass(PassRegistry&);
+  void initializePPCBranchCoalescingPass(PassRegistry&);
+  void initializePPCQPXLoadSplatPass(PassRegistry&);
   void initializePPCBoolRetToIntPass(PassRegistry&);
   void initializePPCExpandISELPass(PassRegistry &);
   void initializePPCPreEmitPeepholePass(PassRegistry &);
   void initializePPCTLSDynamicCallPass(PassRegistry &);
   void initializePPCMIPeepholePass(PassRegistry&);
+
   extern char &PPCVSXFMAMutateID;
 
   namespace PPCII {

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPC.td b/src/llvm-project/llvm/lib/Target/PowerPC/PPC.td
index 98e6e98..8e94a2a 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPC.td
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPC.td

@@ -1,9 +1,8 @@
 //===-- PPC.td - Describe the PowerPC Target Machine -------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -136,6 +135,9 @@
 def FeatureVSX       : SubtargetFeature<"vsx","HasVSX", "true",
                                         "Enable VSX instructions",
                                         [FeatureAltivec]>;
+def FeatureTwoConstNR :
+  SubtargetFeature<"two-const-nr", "NeedsTwoConstNR", "true",
+                   "Requires two constant Newton-Raphson computation">;
 def FeatureP8Altivec : SubtargetFeature<"power8-altivec", "HasP8Altivec", "true",
                                         "Enable POWER8 Altivec instructions",
                                         [FeatureAltivec]>;
@@ -162,8 +164,12 @@
                                   "Enable Hardware Transactional Memory instructions">;
 def FeatureMFTB   : SubtargetFeature<"", "FeatureMFTB", "true",
                                         "Implement mftb using the mfspr instruction">;
-def FeatureFusion : SubtargetFeature<"fusion", "HasFusion", "true",
-                                     "Target supports add/load integer fusion.">;
+def FeaturePPCPreRASched:
+  SubtargetFeature<"ppc-prera-sched", "UsePPCPreRASchedStrategy", "true",
+                   "Use PowerPC pre-RA scheduling strategy">;
+def FeaturePPCPostRASched:
+  SubtargetFeature<"ppc-postra-sched", "UsePPCPostRASchedStrategy", "true",
+                   "Use PowerPC post-RA scheduling strategy">;
 def FeatureFloat128 :
   SubtargetFeature<"float128", "HasFloat128", "true",
                    "Enable the __float128 data type for IEEE-754R Binary128.",
@@ -191,6 +197,13 @@
                                         "Enable POWER9 vector instructions",
                                         [FeatureISA3_0, FeatureP8Vector,
                                          FeatureP9Altivec]>;
+// A separate feature for this even though it is equivalent to P9Vector
+// because this is a feature of the implementation rather than the architecture
+// and may go away with future CPU's.
+def FeatureVectorsUseTwoUnits : SubtargetFeature<"vectors-use-two-units",
+                                                 "VectorsUseTwoUnits",
+                                                 "true",
+                                                 "Vectors use two units">;
 
 // Since new processors generally contain a superset of features of those that
 // came before them, the idea is to make implementations of new processors
@@ -215,15 +228,15 @@
        FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
        Feature64Bit /*, Feature64BitRegs */,
        FeatureBPERMD, FeatureExtDiv,
-       FeatureMFTB, DeprecatedDST];
+       FeatureMFTB, DeprecatedDST, FeatureTwoConstNR];
   list<SubtargetFeature> Power8SpecificFeatures =
       [DirectivePwr8, FeatureP8Altivec, FeatureP8Vector, FeatureP8Crypto,
-       FeatureHTM, FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic,
-       FeatureFusion];
+       FeatureHTM, FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic];
   list<SubtargetFeature> Power8FeatureList =
       !listconcat(Power7FeatureList, Power8SpecificFeatures);
   list<SubtargetFeature> Power9SpecificFeatures =
-      [DirectivePwr9, FeatureP9Altivec, FeatureP9Vector, FeatureISA3_0];
+      [DirectivePwr9, FeatureP9Altivec, FeatureP9Vector, FeatureISA3_0,
+       FeatureVectorsUseTwoUnits, FeaturePPCPreRASched, FeaturePPCPostRASched];
   list<SubtargetFeature> Power9FeatureList =
       !listconcat(Power8FeatureList, Power9SpecificFeatures);
 }
@@ -279,10 +292,9 @@
 
 def getAltVSXFMAOpcode : InstrMapping {
   let FilterClass = "AltVSXFMARel";
-  // Instructions with the same BaseName and Interpretation64Bit values
-  // form a row.
+  // Instructions with the same BaseName value form a row.
   let RowFields = ["BaseName"];
-  // Instructions with the same RC value form a column.
+  // Instructions with the same IsVSXFMAAlt value form a column.
   let ColFields = ["IsVSXFMAAlt"];
   // The key column are the (default) addend-killing instructions.
   let KeyCol = ["0"];

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 04aa3c9..269b84b 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp

@@ -1,9 +1,8 @@
 //===-- PPCAsmPrinter.cpp - Print machine instrs to PowerPC assembly ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -16,7 +15,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "InstPrinter/PPCInstPrinter.h"
+#include "MCTargetDesc/PPCInstPrinter.h"
 #include "MCTargetDesc/PPCMCExpr.h"
 #include "MCTargetDesc/PPCMCTargetDesc.h"
 #include "MCTargetDesc/PPCPredicates.h"
@@ -26,6 +25,7 @@
 #include "PPCSubtarget.h"
 #include "PPCTargetMachine.h"
 #include "PPCTargetStreamer.h"
+#include "TargetInfo/PowerPCTargetInfo.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
@@ -95,69 +95,103 @@
     return AsmPrinter::doInitialization(M);
   }
 
-    void EmitInstruction(const MachineInstr *MI) override;
+  void EmitInstruction(const MachineInstr *MI) override;
 
-    void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
+  /// This function is for PrintAsmOperand and PrintAsmMemoryOperand,
+  /// invoked by EmitMSInlineAsmStr and EmitGCCInlineAsmStr only.
+  /// The \p MI would be INLINEASM ONLY.
+  void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
 
-    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                         unsigned AsmVariant, const char *ExtraCode,
-                         raw_ostream &O) override;
-    bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
-                               unsigned AsmVariant, const char *ExtraCode,
-                               raw_ostream &O) override;
+  void PrintSymbolOperand(const MachineOperand &MO, raw_ostream &O) override;
+  bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                       const char *ExtraCode, raw_ostream &O) override;
+  bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+                             const char *ExtraCode, raw_ostream &O) override;
 
-    void EmitEndOfAsmFile(Module &M) override;
+  void EmitEndOfAsmFile(Module &M) override;
 
-    void LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI);
-    void LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI);
-    void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK);
-    bool runOnMachineFunction(MachineFunction &MF) override {
-      Subtarget = &MF.getSubtarget<PPCSubtarget>();
-      bool Changed = AsmPrinter::runOnMachineFunction(MF);
-      emitXRayTable();
-      return Changed;
-    }
-  };
+  void LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI);
+  void LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI);
+  void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK);
+  bool runOnMachineFunction(MachineFunction &MF) override {
+    Subtarget = &MF.getSubtarget<PPCSubtarget>();
+    bool Changed = AsmPrinter::runOnMachineFunction(MF);
+    emitXRayTable();
+    return Changed;
+  }
+};
 
-  /// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
-  class PPCLinuxAsmPrinter : public PPCAsmPrinter {
-  public:
-    explicit PPCLinuxAsmPrinter(TargetMachine &TM,
-                                std::unique_ptr<MCStreamer> Streamer)
-        : PPCAsmPrinter(TM, std::move(Streamer)) {}
+/// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
+class PPCLinuxAsmPrinter : public PPCAsmPrinter {
+public:
+  explicit PPCLinuxAsmPrinter(TargetMachine &TM,
+                              std::unique_ptr<MCStreamer> Streamer)
+      : PPCAsmPrinter(TM, std::move(Streamer)) {}
 
-    StringRef getPassName() const override {
-      return "Linux PPC Assembly Printer";
-    }
+  StringRef getPassName() const override {
+    return "Linux PPC Assembly Printer";
+  }
 
-    bool doFinalization(Module &M) override;
-    void EmitStartOfAsmFile(Module &M) override;
+  bool doFinalization(Module &M) override;
+  void EmitStartOfAsmFile(Module &M) override;
 
-    void EmitFunctionEntryLabel() override;
+  void EmitFunctionEntryLabel() override;
 
-    void EmitFunctionBodyStart() override;
-    void EmitFunctionBodyEnd() override;
-    void EmitInstruction(const MachineInstr *MI) override;
-  };
+  void EmitFunctionBodyStart() override;
+  void EmitFunctionBodyEnd() override;
+  void EmitInstruction(const MachineInstr *MI) override;
+};
 
-  /// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac
-  /// OS X
-  class PPCDarwinAsmPrinter : public PPCAsmPrinter {
-  public:
-    explicit PPCDarwinAsmPrinter(TargetMachine &TM,
-                                 std::unique_ptr<MCStreamer> Streamer)
-        : PPCAsmPrinter(TM, std::move(Streamer)) {}
+/// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac
+/// OS X
+class PPCDarwinAsmPrinter : public PPCAsmPrinter {
+public:
+  explicit PPCDarwinAsmPrinter(TargetMachine &TM,
+                               std::unique_ptr<MCStreamer> Streamer)
+      : PPCAsmPrinter(TM, std::move(Streamer)) {}
 
-    StringRef getPassName() const override {
-      return "Darwin PPC Assembly Printer";
-    }
+  StringRef getPassName() const override {
+    return "Darwin PPC Assembly Printer";
+  }
 
-    bool doFinalization(Module &M) override;
-    void EmitStartOfAsmFile(Module &M) override;
-  };
+  bool doFinalization(Module &M) override;
+  void EmitStartOfAsmFile(Module &M) override;
+};
+
+class PPCAIXAsmPrinter : public PPCAsmPrinter {
+public:
+  PPCAIXAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
+      : PPCAsmPrinter(TM, std::move(Streamer)) {}
+
+  StringRef getPassName() const override { return "AIX PPC Assembly Printer"; }
+};
 
 } // end anonymous namespace
 
+void PPCAsmPrinter::PrintSymbolOperand(const MachineOperand &MO,
+                                       raw_ostream &O) {
+  // Computing the address of a global symbol, not calling it.
+  const GlobalValue *GV = MO.getGlobal();
+  MCSymbol *SymToPrint;
+
+  // External or weakly linked global variables need non-lazily-resolved stubs
+  if (Subtarget->hasLazyResolverStub(GV)) {
+    SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+    MachineModuleInfoImpl::StubValueTy &StubSym =
+        MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(
+            SymToPrint);
+    if (!StubSym.getPointer())
+      StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(GV),
+                                                   !GV->hasInternalLinkage());
+  } else {
+    SymToPrint = getSymbol(GV);
+  }
+
+  SymToPrint->print(O, MAI);
+
+  printOffset(MO.getOffset(), O);
+}
+
 void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
                                  raw_ostream &O) {
   const DataLayout &DL = getDataLayout();
@@ -165,10 +199,8 @@
 
   switch (MO.getType()) {
   case MachineOperand::MO_Register: {
-    unsigned Reg = PPCInstrInfo::getRegNumForOperand(MI->getDesc(),
-                                                     MO.getReg(), OpNo);
-
-    const char *RegName = PPCInstPrinter::getRegisterName(Reg);
+    // The MI is INLINEASM ONLY and UseVSXReg is always false.
+    const char *RegName = PPCInstPrinter::getRegisterName(MO.getReg());
 
     // Linux assembler (Others?) does not take register mnemonics.
     // FIXME - What about special registers used in mfspr/mtspr?
@@ -192,26 +224,7 @@
     GetBlockAddressSymbol(MO.getBlockAddress())->print(O, MAI);
     return;
   case MachineOperand::MO_GlobalAddress: {
-    // Computing the address of a global symbol, not calling it.
-    const GlobalValue *GV = MO.getGlobal();
-    MCSymbol *SymToPrint;
-
-    // External or weakly linked global variables need non-lazily-resolved stubs
-    if (Subtarget->hasLazyResolverStub(GV)) {
-      SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
-      MachineModuleInfoImpl::StubValueTy &StubSym =
-          MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(
-              SymToPrint);
-      if (!StubSym.getPointer())
-        StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(GV),
-                                                     !GV->hasInternalLinkage());
-    } else {
-      SymToPrint = getSymbol(GV);
-    }
-
-    SymToPrint->print(O, MAI);
-
-    printOffset(MO.getOffset(), O);
+    PrintSymbolOperand(MO, O);
     return;
   }
 
@@ -224,7 +237,6 @@
 /// PrintAsmOperand - Print out an operand for an inline asm expression.
 ///
 bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                                    unsigned AsmVariant,
                                     const char *ExtraCode, raw_ostream &O) {
   // Does this asm operand have a single letter operand modifier?
   if (ExtraCode && ExtraCode[0]) {
@@ -233,9 +245,7 @@
     switch (ExtraCode[0]) {
     default:
       // See if this is a generic print operand
-      return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
-    case 'c': // Don't print "$" before a global var name or constant.
-      break; // PPC never has a prefix.
+      return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
     case 'L': // Write second word of DImode reference.
       // Verify that this operand has two consecutive registers.
       if (!MI->getOperand(OpNo).isReg() ||
@@ -277,7 +287,6 @@
 // assembler operand.
 
 bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
-                                          unsigned AsmVariant,
                                           const char *ExtraCode,
                                           raw_ostream &O) {
   if (ExtraCode && ExtraCode[0]) {
@@ -460,6 +469,7 @@
   StringRef Name = "__tls_get_addr";
   MCSymbol *TlsGetAddr = OutContext.getOrCreateSymbol(Name);
   MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None;
+  const Module *M = MF->getFunction().getParent();
 
   assert(MI->getOperand(0).isReg() &&
          ((Subtarget->isPPC64() && MI->getOperand(0).getReg() == PPC::X3) ||
@@ -473,8 +483,14 @@
   if (!Subtarget->isPPC64() && !Subtarget->isDarwin() &&
       isPositionIndependent())
     Kind = MCSymbolRefExpr::VK_PLT;
-  const MCSymbolRefExpr *TlsRef =
+  const MCExpr *TlsRef =
     MCSymbolRefExpr::create(TlsGetAddr, Kind, OutContext);
+
+  // Add 32768 offset to the symbol so we follow up the latest GOT/PLT ABI.
+  if (Kind == MCSymbolRefExpr::VK_PLT && Subtarget->isSecurePlt() &&
+      M->getPICLevel() == PICLevel::BigPIC)
+    TlsRef = MCBinaryExpr::createAdd(
+        TlsRef, MCConstantExpr::create(32768, OutContext), OutContext);
   const MachineOperand &MO = MI->getOperand(2);
   const GlobalValue *GValue = MO.getGlobal();
   MCSymbol *MOSymbol = getSymbol(GValue);
@@ -576,34 +592,30 @@
     // Into: lwz %rt, .L0$poff - .L0$pb(%ri)
     //       add %rd, %rt, %ri
     // or into (if secure plt mode is on):
-    //       addis r30, r30, .LTOC - .L0$pb@ha
-    //       addi r30, r30, .LTOC - .L0$pb@l
+    //       addis r30, r30, {.LTOC,_GLOBAL_OFFSET_TABLE} - .L0$pb@ha
+    //       addi r30, r30, {.LTOC,_GLOBAL_OFFSET_TABLE} - .L0$pb@l
     // Get the offset from the GOT Base Register to the GOT
     LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
     if (Subtarget->isSecurePlt() && isPositionIndependent() ) {
       unsigned PICR = TmpInst.getOperand(0).getReg();
-      MCSymbol *LTOCSymbol = OutContext.getOrCreateSymbol(StringRef(".LTOC"));
+      MCSymbol *BaseSymbol = OutContext.getOrCreateSymbol(
+          M->getPICLevel() == PICLevel::SmallPIC ? "_GLOBAL_OFFSET_TABLE_"
+                                                 : ".LTOC");
       const MCExpr *PB =
-        MCSymbolRefExpr::create(MF->getPICBaseSymbol(),
-                                OutContext);
+          MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext);
 
-      const MCExpr *LTOCDeltaExpr =
-        MCBinaryExpr::createSub(MCSymbolRefExpr::create(LTOCSymbol, OutContext),
-                                PB, OutContext);
+      const MCExpr *DeltaExpr = MCBinaryExpr::createSub(
+          MCSymbolRefExpr::create(BaseSymbol, OutContext), PB, OutContext);
 
-      const MCExpr *LTOCDeltaHi =
-        PPCMCExpr::createHa(LTOCDeltaExpr, false, OutContext);
-      EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS)
-                                   .addReg(PICR)
-                                   .addReg(PICR)
-                                   .addExpr(LTOCDeltaHi));
+      const MCExpr *DeltaHi = PPCMCExpr::createHa(DeltaExpr, false, OutContext);
+      EmitToStreamer(
+          *OutStreamer,
+          MCInstBuilder(PPC::ADDIS).addReg(PICR).addReg(PICR).addExpr(DeltaHi));
 
-      const MCExpr *LTOCDeltaLo =
-        PPCMCExpr::createLo(LTOCDeltaExpr, false, OutContext);
-      EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDI)
-                                   .addReg(PICR)
-                                   .addReg(PICR)
-                                   .addExpr(LTOCDeltaLo));
+      const MCExpr *DeltaLo = PPCMCExpr::createLo(DeltaExpr, false, OutContext);
+      EmitToStreamer(
+          *OutStreamer,
+          MCInstBuilder(PPC::ADDI).addReg(PICR).addReg(PICR).addExpr(DeltaLo));
       return;
     } else {
       MCSymbol *PICOffset =
@@ -854,8 +866,10 @@
     const GlobalValue *GValue = MO.getGlobal();
     MCSymbol *MOSymbol = getSymbol(GValue);
     const MCExpr *Exp =
-      MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO,
-                              OutContext);
+        MCSymbolRefExpr::create(MOSymbol,
+                                isPPC64 ? MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO
+                                        : MCSymbolRefExpr::VK_PPC_GOT_TPREL,
+                                OutContext);
     TmpInst.getOperand(1) = MCOperand::createExpr(Exp);
     EmitToStreamer(*OutStreamer, TmpInst);
     return;
@@ -1640,6 +1654,9 @@
                         std::unique_ptr<MCStreamer> &&Streamer) {
   if (tm.getTargetTriple().isMacOSX())
     return new PPCDarwinAsmPrinter(tm, std::move(Streamer));
+  if (tm.getTargetTriple().isOSAIX())
+    return new PPCAIXAsmPrinter(tm, std::move(Streamer));
+
   return new PPCLinuxAsmPrinter(tm, std::move(Streamer));
 }
 

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp
index 55e105d..104cf2b 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp

@@ -1,9 +1,8 @@
 //===- PPCBoolRetToInt.cpp ------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp
index bbb977f..5e9a661 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp

@@ -1,9 +1,8 @@
 //===-- CoalesceBranches.cpp - Coalesce blocks with the same condition ---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -34,10 +33,6 @@
 STATISTIC(NumPHINotMoved, "Number of PHI Nodes that cannot be merged");
 STATISTIC(NumBlocksNotCoalesced, "Number of blocks not coalesced");
 
-namespace llvm {
-    void initializePPCBranchCoalescingPass(PassRegistry&);
-}
-
 //===----------------------------------------------------------------------===//
 //                               PPCBranchCoalescing
 //===----------------------------------------------------------------------===//

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
index 0d1bb92..793d690 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp

@@ -1,9 +1,8 @@
 //===-- PPCBranchSelector.cpp - Emit long conditional branches ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -26,16 +25,13 @@
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Target/TargetMachine.h"
+#include <algorithm>
 using namespace llvm;
 
 #define DEBUG_TYPE "ppc-branch-select"
 
 STATISTIC(NumExpanded, "Number of branches expanded to long format");
 
-namespace llvm {
-  void initializePPCBSelPass(PassRegistry&);
-}
-
 namespace {
   struct PPCBSel : public MachineFunctionPass {
     static char ID;
@@ -48,6 +44,17 @@
     // size that is due to potential padding.
     std::vector<std::pair<unsigned, unsigned>> BlockSizes;
 
+    // The first block number which has imprecise instruction address.
+    int FirstImpreciseBlock = -1;
+
+    unsigned GetAlignmentAdjustment(MachineBasicBlock &MBB, unsigned Offset);
+    unsigned ComputeBlockSizes(MachineFunction &Fn);
+    void modifyAdjustment(MachineFunction &Fn);
+    int computeBranchSize(MachineFunction &Fn,
+                          const MachineBasicBlock *Src,
+                          const MachineBasicBlock *Dest,
+                          unsigned BrOffset);
+
     bool runOnMachineFunction(MachineFunction &Fn) override;
 
     MachineFunctionProperties getRequiredProperties() const override {
@@ -70,43 +77,47 @@
   return new PPCBSel();
 }
 
-bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
-  const PPCInstrInfo *TII =
-      static_cast<const PPCInstrInfo *>(Fn.getSubtarget().getInstrInfo());
-  // Give the blocks of the function a dense, in-order, numbering.
-  Fn.RenumberBlocks();
-  BlockSizes.resize(Fn.getNumBlockIDs());
+/// In order to make MBB aligned, we need to add an adjustment value to the
+/// original Offset.
+unsigned PPCBSel::GetAlignmentAdjustment(MachineBasicBlock &MBB,
+                                         unsigned Offset) {
+  unsigned Align = MBB.getAlignment();
+  if (!Align)
+    return 0;
 
-  auto GetAlignmentAdjustment =
-    [](MachineBasicBlock &MBB, unsigned Offset) -> unsigned {
-    unsigned Align = MBB.getAlignment();
-    if (!Align)
-      return 0;
+  unsigned AlignAmt = 1 << Align;
+  unsigned ParentAlign = MBB.getParent()->getAlignment();
 
-    unsigned AlignAmt = 1 << Align;
-    unsigned ParentAlign = MBB.getParent()->getAlignment();
+  if (Align <= ParentAlign)
+    return OffsetToAlignment(Offset, AlignAmt);
 
-    if (Align <= ParentAlign)
-      return OffsetToAlignment(Offset, AlignAmt);
+  // The alignment of this MBB is larger than the function's alignment, so we
+  // can't tell whether or not it will insert nops. Assume that it will.
+  if (FirstImpreciseBlock < 0)
+    FirstImpreciseBlock = MBB.getNumber();
+  return AlignAmt + OffsetToAlignment(Offset, AlignAmt);
+}
 
-    // The alignment of this MBB is larger than the function's alignment, so we
-    // can't tell whether or not it will insert nops. Assume that it will.
-    return AlignAmt + OffsetToAlignment(Offset, AlignAmt);
-  };
-
-  // We need to be careful about the offset of the first block in the function
-  // because it might not have the function's alignment. This happens because,
-  // under the ELFv2 ABI, for functions which require a TOC pointer, we add a
-  // two-instruction sequence to the start of the function.
-  // Note: This needs to be synchronized with the check in
-  // PPCLinuxAsmPrinter::EmitFunctionBodyStart.
+/// We need to be careful about the offset of the first block in the function
+/// because it might not have the function's alignment. This happens because,
+/// under the ELFv2 ABI, for functions which require a TOC pointer, we add a
+/// two-instruction sequence to the start of the function.
+/// Note: This needs to be synchronized with the check in
+/// PPCLinuxAsmPrinter::EmitFunctionBodyStart.
+static inline unsigned GetInitialOffset(MachineFunction &Fn) {
   unsigned InitialOffset = 0;
   if (Fn.getSubtarget<PPCSubtarget>().isELFv2ABI() &&
       !Fn.getRegInfo().use_empty(PPC::X2))
     InitialOffset = 8;
+  return InitialOffset;
+}
 
-  // Measure each MBB and compute a size for the entire function.
-  unsigned FuncSize = InitialOffset;
+/// Measure each MBB and compute a size for the entire function.
+unsigned PPCBSel::ComputeBlockSizes(MachineFunction &Fn) {
+  const PPCInstrInfo *TII =
+      static_cast<const PPCInstrInfo *>(Fn.getSubtarget().getInstrInfo());
+  unsigned FuncSize = GetInitialOffset(Fn);
+
   for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
        ++MFI) {
     MachineBasicBlock *MBB = &*MFI;
@@ -124,13 +135,145 @@
     }
 
     unsigned BlockSize = 0;
-    for (MachineInstr &MI : *MBB)
+    for (MachineInstr &MI : *MBB) {
       BlockSize += TII->getInstSizeInBytes(MI);
+      if (MI.isInlineAsm() && (FirstImpreciseBlock < 0))
+        FirstImpreciseBlock = MBB->getNumber();
+    }
 
     BlockSizes[MBB->getNumber()].first = BlockSize;
     FuncSize += BlockSize;
   }
 
+  return FuncSize;
+}
+
+/// Modify the basic block align adjustment.
+void PPCBSel::modifyAdjustment(MachineFunction &Fn) {
+  unsigned Offset = GetInitialOffset(Fn);
+  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+       ++MFI) {
+    MachineBasicBlock *MBB = &*MFI;
+
+    if (MBB->getNumber() > 0) {
+      auto &BS = BlockSizes[MBB->getNumber()-1];
+      BS.first -= BS.second;
+      Offset -= BS.second;
+
+      unsigned AlignExtra = GetAlignmentAdjustment(*MBB, Offset);
+
+      BS.first += AlignExtra;
+      BS.second = AlignExtra;
+
+      Offset += AlignExtra;
+    }
+
+    Offset += BlockSizes[MBB->getNumber()].first;
+  }
+}
+
+/// Determine the offset from the branch in Src block to the Dest block.
+/// BrOffset is the offset of the branch instruction inside Src block.
+int PPCBSel::computeBranchSize(MachineFunction &Fn,
+                               const MachineBasicBlock *Src,
+                               const MachineBasicBlock *Dest,
+                               unsigned BrOffset) {
+  int BranchSize;
+  unsigned MaxAlign = 2;
+  bool NeedExtraAdjustment = false;
+  if (Dest->getNumber() <= Src->getNumber()) {
+    // If this is a backwards branch, the delta is the offset from the
+    // start of this block to this branch, plus the sizes of all blocks
+    // from this block to the dest.
+    BranchSize = BrOffset;
+    MaxAlign = std::max(MaxAlign, Src->getAlignment());
+
+    int DestBlock = Dest->getNumber();
+    BranchSize += BlockSizes[DestBlock].first;
+    for (unsigned i = DestBlock+1, e = Src->getNumber(); i < e; ++i) {
+      BranchSize += BlockSizes[i].first;
+      MaxAlign = std::max(MaxAlign,
+                          Fn.getBlockNumbered(i)->getAlignment());
+    }
+
+    NeedExtraAdjustment = (FirstImpreciseBlock >= 0) &&
+                          (DestBlock >= FirstImpreciseBlock);
+  } else {
+    // Otherwise, add the size of the blocks between this block and the
+    // dest to the number of bytes left in this block.
+    unsigned StartBlock = Src->getNumber();
+    BranchSize = BlockSizes[StartBlock].first - BrOffset;
+
+    MaxAlign = std::max(MaxAlign, Dest->getAlignment());
+    for (unsigned i = StartBlock+1, e = Dest->getNumber(); i != e; ++i) {
+      BranchSize += BlockSizes[i].first;
+      MaxAlign = std::max(MaxAlign,
+                          Fn.getBlockNumbered(i)->getAlignment());
+    }
+
+    NeedExtraAdjustment = (FirstImpreciseBlock >= 0) &&
+                          (Src->getNumber() >= FirstImpreciseBlock);
+  }
+
+  // We tend to over estimate code size due to large alignment and
+  // inline assembly. Usually it causes larger computed branch offset.
+  // But sometimes it may also causes smaller computed branch offset
+  // than actual branch offset. If the offset is close to the limit of
+  // encoding, it may cause problem at run time.
+  // Following is a simplified example.
+  //
+  //              actual        estimated
+  //              address        address
+  //    ...
+  //   bne Far      100            10c
+  //   .p2align 4
+  //   Near:        110            110
+  //    ...
+  //   Far:        8108           8108
+  //
+  //   Actual offset:    0x8108 - 0x100 = 0x8008
+  //   Computed offset:  0x8108 - 0x10c = 0x7ffc
+  //
+  // This example also shows when we can get the largest gap between
+  // estimated offset and actual offset. If there is an aligned block
+  // ABB between branch and target, assume its alignment is <align>
+  // bits. Now consider the accumulated function size FSIZE till the end
+  // of previous block PBB. If the estimated FSIZE is multiple of
+  // 2^<align>, we don't need any padding for the estimated address of
+  // ABB. If actual FSIZE at the end of PBB is 4 bytes more than
+  // multiple of 2^<align>, then we need (2^<align> - 4) bytes of
+  // padding. It also means the actual branch offset is (2^<align> - 4)
+  // larger than computed offset. Other actual FSIZE needs less padding
+  // bytes, so causes smaller gap between actual and computed offset.
+  //
+  // On the other hand, if the inline asm or large alignment occurs
+  // between the branch block and destination block, the estimated address
+  // can be <delta> larger than actual address. If padding bytes are
+  // needed for a later aligned block, the actual number of padding bytes
+  // is at most <delta> more than estimated padding bytes. So the actual
+  // aligned block address is less than or equal to the estimated aligned
+  // block address. So the actual branch offset is less than or equal to
+  // computed branch offset.
+  //
+  // The computed offset is at most ((1 << alignment) - 4) bytes smaller
+  // than actual offset. So we add this number to the offset for safety.
+  if (NeedExtraAdjustment)
+    BranchSize += (1 << MaxAlign) - 4;
+
+  return BranchSize;
+}
+
+bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
+  const PPCInstrInfo *TII =
+      static_cast<const PPCInstrInfo *>(Fn.getSubtarget().getInstrInfo());
+  // Give the blocks of the function a dense, in-order, numbering.
+  Fn.RenumberBlocks();
+  BlockSizes.resize(Fn.getNumBlockIDs());
+  FirstImpreciseBlock = -1;
+
+  // Measure each MBB and compute a size for the entire function.
+  unsigned FuncSize = ComputeBlockSizes(Fn);
+
   // If the entire function is smaller than the displacement of a branch field,
   // we know we don't need to shrink any branches in this function.  This is a
   // common case.
@@ -178,23 +321,7 @@
 
         // Determine the offset from the current branch to the destination
         // block.
-        int BranchSize;
-        if (Dest->getNumber() <= MBB.getNumber()) {
-          // If this is a backwards branch, the delta is the offset from the
-          // start of this block to this branch, plus the sizes of all blocks
-          // from this block to the dest.
-          BranchSize = MBBStartOffset;
-
-          for (unsigned i = Dest->getNumber(), e = MBB.getNumber(); i != e; ++i)
-            BranchSize += BlockSizes[i].first;
-        } else {
-          // Otherwise, add the size of the blocks between this block and the
-          // dest to the number of bytes left in this block.
-          BranchSize = -MBBStartOffset;
-
-          for (unsigned i = MBB.getNumber(), e = Dest->getNumber(); i != e; ++i)
-            BranchSize += BlockSizes[i].first;
-        }
+        int BranchSize = computeBranchSize(Fn, &MBB, Dest, MBBStartOffset);
 
         // If this branch is in range, ignore it.
         if (isInt<16>(BranchSize)) {
@@ -253,26 +380,7 @@
     if (MadeChange) {
       // If we're going to iterate again, make sure we've updated our
       // padding-based contributions to the block sizes.
-      unsigned Offset = InitialOffset;
-      for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
-           ++MFI) {
-        MachineBasicBlock *MBB = &*MFI;
-
-        if (MBB->getNumber() > 0) {
-          auto &BS = BlockSizes[MBB->getNumber()-1];
-          BS.first -= BS.second;
-          Offset -= BS.second;
-
-          unsigned AlignExtra = GetAlignmentAdjustment(*MBB, Offset);
-
-          BS.first += AlignExtra;
-          BS.second = AlignExtra;
-
-          Offset += AlignExtra;
-        }
-
-        Offset += BlockSizes[MBB->getNumber()].first;
-      }
+      modifyAdjustment(Fn);
     }
 
     EverMadeChange |= MadeChange;

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCCCState.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/PPCCCState.cpp
index 5510a95..5116f0d 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCCCState.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCCCState.cpp

@@ -1,9 +1,8 @@
 //===---- PPCCCState.cpp - CCState with PowerPC specific extensions ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCCCState.h b/src/llvm-project/llvm/lib/Target/PowerPC/PPCCCState.h
index 9be9f11..e349959 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCCCState.h
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCCCState.h

@@ -1,9 +1,8 @@
 //===---- PPCCCState.h - CCState with PowerPC specific extensions -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
index 6b9e2383..2b8d9b8 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp

@@ -1,9 +1,8 @@
 //===-- PPCCTRLoops.cpp - Identify and generate CTR loops -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -72,70 +71,7 @@
 static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1));
 #endif
 
-// The latency of mtctr is only justified if there are more than 4
-// comparisons that will be removed as a result.
-static cl::opt<unsigned>
-SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden,
-                      cl::desc("Loops with a constant trip count smaller than "
-                               "this value will not use the count register."));
-
-STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops");
-
-namespace llvm {
-  void initializePPCCTRLoopsPass(PassRegistry&);
-#ifndef NDEBUG
-  void initializePPCCTRLoopsVerifyPass(PassRegistry&);
-#endif
-}
-
 namespace {
-  struct PPCCTRLoops : public FunctionPass {
-
-#ifndef NDEBUG
-    static int Counter;
-#endif
-
-  public:
-    static char ID;
-
-    PPCCTRLoops() : FunctionPass(ID) {
-      initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
-    }
-
-    bool runOnFunction(Function &F) override;
-
-    void getAnalysisUsage(AnalysisUsage &AU) const override {
-      AU.addRequired<LoopInfoWrapperPass>();
-      AU.addPreserved<LoopInfoWrapperPass>();
-      AU.addRequired<DominatorTreeWrapperPass>();
-      AU.addPreserved<DominatorTreeWrapperPass>();
-      AU.addRequired<ScalarEvolutionWrapperPass>();
-      AU.addRequired<AssumptionCacheTracker>();
-      AU.addRequired<TargetTransformInfoWrapperPass>();
-    }
-
-  private:
-    bool mightUseCTR(BasicBlock *BB);
-    bool convertToCTRLoop(Loop *L);
-
-  private:
-    const PPCTargetMachine *TM;
-    const PPCSubtarget *STI;
-    const PPCTargetLowering *TLI;
-    const DataLayout *DL;
-    const TargetLibraryInfo *LibInfo;
-    const TargetTransformInfo *TTI;
-    LoopInfo *LI;
-    ScalarEvolution *SE;
-    DominatorTree *DT;
-    bool PreserveLCSSA;
-    TargetSchedModel SchedModel;
-  };
-
-  char PPCCTRLoops::ID = 0;
-#ifndef NDEBUG
-  int PPCCTRLoops::Counter = 0;
-#endif
 
 #ifndef NDEBUG
   struct PPCCTRLoopsVerify : public MachineFunctionPass {
@@ -161,16 +97,6 @@
 #endif // NDEBUG
 } // end anonymous namespace
 
-INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
-                      false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
-                    false, false)
-
-FunctionPass *llvm::createPPCCTRLoops() { return new PPCCTRLoops(); }
-
 #ifndef NDEBUG
 INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
                       "PowerPC CTR Loops Verify", false, false)
@@ -183,511 +109,6 @@
 }
 #endif // NDEBUG
 
-bool PPCCTRLoops::runOnFunction(Function &F) {
-  if (skipFunction(F))
-    return false;
-
-  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
-  if (!TPC)
-    return false;
-
-  TM = &TPC->getTM<PPCTargetMachine>();
-  STI = TM->getSubtargetImpl(F);
-  TLI = STI->getTargetLowering();
-
-  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
-  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
-  DL = &F.getParent()->getDataLayout();
-  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
-  LibInfo = TLIP ? &TLIP->getTLI() : nullptr;
-  PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
-
-  bool MadeChange = false;
-
-  for (LoopInfo::iterator I = LI->begin(), E = LI->end();
-       I != E; ++I) {
-    Loop *L = *I;
-    if (!L->getParentLoop())
-      MadeChange |= convertToCTRLoop(L);
-  }
-
-  return MadeChange;
-}
-
-static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) {
-  if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
-    return ITy->getBitWidth() > (Is32Bit ? 32U : 64U);
-
-  return false;
-}
-
-// Determining the address of a TLS variable results in a function call in
-// certain TLS models.
-static bool memAddrUsesCTR(const PPCTargetMachine &TM, const Value *MemAddr) {
-  const auto *GV = dyn_cast<GlobalValue>(MemAddr);
-  if (!GV) {
-    // Recurse to check for constants that refer to TLS global variables.
-    if (const auto *CV = dyn_cast<Constant>(MemAddr))
-      for (const auto &CO : CV->operands())
-        if (memAddrUsesCTR(TM, CO))
-          return true;
-
-    return false;
-  }
-
-  if (!GV->isThreadLocal())
-    return false;
-  TLSModel::Model Model = TM.getTLSModel(GV);
-  return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic;
-}
-
-// Loop through the inline asm constraints and look for something that clobbers
-// ctr.
-static bool asmClobbersCTR(InlineAsm *IA) {
-  InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints();
-  for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) {
-    InlineAsm::ConstraintInfo &C = CIV[i];
-    if (C.Type != InlineAsm::isInput)
-      for (unsigned j = 0, je = C.Codes.size(); j < je; ++j)
-        if (StringRef(C.Codes[j]).equals_lower("{ctr}"))
-          return true;
-  }
-  return false;
-}
-
-bool PPCCTRLoops::mightUseCTR(BasicBlock *BB) {
-  for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
-       J != JE; ++J) {
-    if (CallInst *CI = dyn_cast<CallInst>(J)) {
-      // Inline ASM is okay, unless it clobbers the ctr register.
-      if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) {
-        if (asmClobbersCTR(IA))
-          return true;
-        continue;
-      }
-
-      if (Function *F = CI->getCalledFunction()) {
-        // Most intrinsics don't become function calls, but some might.
-        // sin, cos, exp and log are always calls.
-        unsigned Opcode = 0;
-        if (F->getIntrinsicID() != Intrinsic::not_intrinsic) {
-          switch (F->getIntrinsicID()) {
-          default: continue;
-          // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr
-          // we're definitely using CTR.
-          case Intrinsic::ppc_is_decremented_ctr_nonzero:
-          case Intrinsic::ppc_mtctr:
-            return true;
-
-// VisualStudio defines setjmp as _setjmp
-#if defined(_MSC_VER) && defined(setjmp) && \
-                       !defined(setjmp_undefined_for_msvc)
-#  pragma push_macro("setjmp")
-#  undef setjmp
-#  define setjmp_undefined_for_msvc
-#endif
-
-          case Intrinsic::setjmp:
-
-#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
- // let's return it to _setjmp state
-#  pragma pop_macro("setjmp")
-#  undef setjmp_undefined_for_msvc
-#endif
-
-          case Intrinsic::longjmp:
-
-          // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp
-          // because, although it does clobber the counter register, the
-          // control can't then return to inside the loop unless there is also
-          // an eh_sjlj_setjmp.
-          case Intrinsic::eh_sjlj_setjmp:
-
-          case Intrinsic::memcpy:
-          case Intrinsic::memmove:
-          case Intrinsic::memset:
-          case Intrinsic::powi:
-          case Intrinsic::log:
-          case Intrinsic::log2:
-          case Intrinsic::log10:
-          case Intrinsic::exp:
-          case Intrinsic::exp2:
-          case Intrinsic::pow:
-          case Intrinsic::sin:
-          case Intrinsic::cos:
-            return true;
-          case Intrinsic::copysign:
-            if (CI->getArgOperand(0)->getType()->getScalarType()->
-                isPPC_FP128Ty())
-              return true;
-            else
-              continue; // ISD::FCOPYSIGN is never a library call.
-          case Intrinsic::sqrt:               Opcode = ISD::FSQRT;      break;
-          case Intrinsic::floor:              Opcode = ISD::FFLOOR;     break;
-          case Intrinsic::ceil:               Opcode = ISD::FCEIL;      break;
-          case Intrinsic::trunc:              Opcode = ISD::FTRUNC;     break;
-          case Intrinsic::rint:               Opcode = ISD::FRINT;      break;
-          case Intrinsic::nearbyint:          Opcode = ISD::FNEARBYINT; break;
-          case Intrinsic::round:              Opcode = ISD::FROUND;     break;
-          case Intrinsic::minnum:             Opcode = ISD::FMINNUM;    break;
-          case Intrinsic::maxnum:             Opcode = ISD::FMAXNUM;    break;
-          case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO;      break;
-          case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO;      break;
-          }
-        }
-
-        // PowerPC does not use [US]DIVREM or other library calls for
-        // operations on regular types which are not otherwise library calls
-        // (i.e. soft float or atomics). If adapting for targets that do,
-        // additional care is required here.
-
-        LibFunc Func;
-        if (!F->hasLocalLinkage() && F->hasName() && LibInfo &&
-            LibInfo->getLibFunc(F->getName(), Func) &&
-            LibInfo->hasOptimizedCodeGen(Func)) {
-          // Non-read-only functions are never treated as intrinsics.
-          if (!CI->onlyReadsMemory())
-            return true;
-
-          // Conversion happens only for FP calls.
-          if (!CI->getArgOperand(0)->getType()->isFloatingPointTy())
-            return true;
-
-          switch (Func) {
-          default: return true;
-          case LibFunc_copysign:
-          case LibFunc_copysignf:
-            continue; // ISD::FCOPYSIGN is never a library call.
-          case LibFunc_copysignl:
-            return true;
-          case LibFunc_fabs:
-          case LibFunc_fabsf:
-          case LibFunc_fabsl:
-            continue; // ISD::FABS is never a library call.
-          case LibFunc_sqrt:
-          case LibFunc_sqrtf:
-          case LibFunc_sqrtl:
-            Opcode = ISD::FSQRT; break;
-          case LibFunc_floor:
-          case LibFunc_floorf:
-          case LibFunc_floorl:
-            Opcode = ISD::FFLOOR; break;
-          case LibFunc_nearbyint:
-          case LibFunc_nearbyintf:
-          case LibFunc_nearbyintl:
-            Opcode = ISD::FNEARBYINT; break;
-          case LibFunc_ceil:
-          case LibFunc_ceilf:
-          case LibFunc_ceill:
-            Opcode = ISD::FCEIL; break;
-          case LibFunc_rint:
-          case LibFunc_rintf:
-          case LibFunc_rintl:
-            Opcode = ISD::FRINT; break;
-          case LibFunc_round:
-          case LibFunc_roundf:
-          case LibFunc_roundl:
-            Opcode = ISD::FROUND; break;
-          case LibFunc_trunc:
-          case LibFunc_truncf:
-          case LibFunc_truncl:
-            Opcode = ISD::FTRUNC; break;
-          case LibFunc_fmin:
-          case LibFunc_fminf:
-          case LibFunc_fminl:
-            Opcode = ISD::FMINNUM; break;
-          case LibFunc_fmax:
-          case LibFunc_fmaxf:
-          case LibFunc_fmaxl:
-            Opcode = ISD::FMAXNUM; break;
-          }
-        }
-
-        if (Opcode) {
-          EVT EVTy =
-              TLI->getValueType(*DL, CI->getArgOperand(0)->getType(), true);
-
-          if (EVTy == MVT::Other)
-            return true;
-
-          if (TLI->isOperationLegalOrCustom(Opcode, EVTy))
-            continue;
-          else if (EVTy.isVector() &&
-                   TLI->isOperationLegalOrCustom(Opcode, EVTy.getScalarType()))
-            continue;
-
-          return true;
-        }
-      }
-
-      return true;
-    } else if (isa<BinaryOperator>(J) &&
-               J->getType()->getScalarType()->isPPC_FP128Ty()) {
-      // Most operations on ppc_f128 values become calls.
-      return true;
-    } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||
-               isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {
-      CastInst *CI = cast<CastInst>(J);
-      if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
-          CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
-          isLargeIntegerTy(!TM->isPPC64(), CI->getSrcTy()->getScalarType()) ||
-          isLargeIntegerTy(!TM->isPPC64(), CI->getDestTy()->getScalarType()))
-        return true;
-    } else if (isLargeIntegerTy(!TM->isPPC64(),
-                                J->getType()->getScalarType()) &&
-               (J->getOpcode() == Instruction::UDiv ||
-                J->getOpcode() == Instruction::SDiv ||
-                J->getOpcode() == Instruction::URem ||
-                J->getOpcode() == Instruction::SRem)) {
-      return true;
-    } else if (!TM->isPPC64() &&
-               isLargeIntegerTy(false, J->getType()->getScalarType()) &&
-               (J->getOpcode() == Instruction::Shl ||
-                J->getOpcode() == Instruction::AShr ||
-                J->getOpcode() == Instruction::LShr)) {
-      // Only on PPC32, for 128-bit integers (specifically not 64-bit
-      // integers), these might be runtime calls.
-      return true;
-    } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
-      // On PowerPC, indirect jumps use the counter register.
-      return true;
-    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
-      if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries())
-        return true;
-    }
-
-    // FREM is always a call.
-    if (J->getOpcode() == Instruction::FRem)
-      return true;
-
-    if (STI->useSoftFloat()) {
-      switch(J->getOpcode()) {
-      case Instruction::FAdd:
-      case Instruction::FSub:
-      case Instruction::FMul:
-      case Instruction::FDiv:
-      case Instruction::FPTrunc:
-      case Instruction::FPExt:
-      case Instruction::FPToUI:
-      case Instruction::FPToSI:
-      case Instruction::UIToFP:
-      case Instruction::SIToFP:
-      case Instruction::FCmp:
-        return true;
-      }
-    }
-
-    for (Value *Operand : J->operands())
-      if (memAddrUsesCTR(*TM, Operand))
-        return true;
-  }
-
-  return false;
-}
-bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
-  bool MadeChange = false;
-
-  // Do not convert small short loops to CTR loop.
-  unsigned ConstTripCount = SE->getSmallConstantTripCount(L);
-  if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) {
-    SmallPtrSet<const Value *, 32> EphValues;
-    auto AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
-        *L->getHeader()->getParent());
-    CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
-    CodeMetrics Metrics;
-    for (BasicBlock *BB : L->blocks())
-      Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
-    // 6 is an approximate latency for the mtctr instruction.
-    if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth()))
-      return false;
-  }
-
-  // Process nested loops first.
-  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
-    MadeChange |= convertToCTRLoop(*I);
-    LLVM_DEBUG(dbgs() << "Nested loop converted\n");
-  }
-
-  // If a nested loop has been converted, then we can't convert this loop.
-  if (MadeChange)
-    return MadeChange;
-
-  // Bail out if the loop has irreducible control flow.
-  LoopBlocksRPO RPOT(L);
-  RPOT.perform(LI);
-  if (containsIrreducibleCFG<const BasicBlock *>(RPOT, *LI))
-    return false;
-
-#ifndef NDEBUG
-  // Stop trying after reaching the limit (if any).
-  int Limit = CTRLoopLimit;
-  if (Limit >= 0) {
-    if (Counter >= CTRLoopLimit)
-      return false;
-    Counter++;
-  }
-#endif
-
-  // We don't want to spill/restore the counter register, and so we don't
-  // want to use the counter register if the loop contains calls.
-  for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
-       I != IE; ++I)
-    if (mightUseCTR(*I))
-      return MadeChange;
-
-  SmallVector<BasicBlock*, 4> ExitingBlocks;
-  L->getExitingBlocks(ExitingBlocks);
-
-  // If there is an exit edge known to be frequently taken,
-  // we should not transform this loop.
-  for (auto &BB : ExitingBlocks) {
-    Instruction *TI = BB->getTerminator();
-    if (!TI) continue;
-
-    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
-      uint64_t TrueWeight = 0, FalseWeight = 0;
-      if (!BI->isConditional() ||
-          !BI->extractProfMetadata(TrueWeight, FalseWeight))
-        continue;
-
-      // If the exit path is more frequent than the loop path,
-      // we return here without further analysis for this loop.
-      bool TrueIsExit = !L->contains(BI->getSuccessor(0));
-      if (( TrueIsExit && FalseWeight < TrueWeight) ||
-          (!TrueIsExit && FalseWeight > TrueWeight))
-        return MadeChange;
-    }
-  }
-
-  BasicBlock *CountedExitBlock = nullptr;
-  const SCEV *ExitCount = nullptr;
-  BranchInst *CountedExitBranch = nullptr;
-  for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
-       IE = ExitingBlocks.end(); I != IE; ++I) {
-    const SCEV *EC = SE->getExitCount(L, *I);
-    LLVM_DEBUG(dbgs() << "Exit Count for " << *L << " from block "
-                      << (*I)->getName() << ": " << *EC << "\n");
-    if (isa<SCEVCouldNotCompute>(EC))
-      continue;
-    if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
-      if (ConstEC->getValue()->isZero())
-        continue;
-    } else if (!SE->isLoopInvariant(EC, L))
-      continue;
-
-    if (SE->getTypeSizeInBits(EC->getType()) > (TM->isPPC64() ? 64 : 32))
-      continue;
-
-    // If this exiting block is contained in a nested loop, it is not eligible
-    // for insertion of the branch-and-decrement since the inner loop would
-    // end up messing up the value in the CTR.
-    if (LI->getLoopFor(*I) != L)
-      continue;
-
-    // We now have a loop-invariant count of loop iterations (which is not the
-    // constant zero) for which we know that this loop will not exit via this
-    // existing block.
-
-    // We need to make sure that this block will run on every loop iteration.
-    // For this to be true, we must dominate all blocks with backedges. Such
-    // blocks are in-loop predecessors to the header block.
-    bool NotAlways = false;
-    for (pred_iterator PI = pred_begin(L->getHeader()),
-         PIE = pred_end(L->getHeader()); PI != PIE; ++PI) {
-      if (!L->contains(*PI))
-        continue;
-
-      if (!DT->dominates(*I, *PI)) {
-        NotAlways = true;
-        break;
-      }
-    }
-
-    if (NotAlways)
-      continue;
-
-    // Make sure this blocks ends with a conditional branch.
-    Instruction *TI = (*I)->getTerminator();
-    if (!TI)
-      continue;
-
-    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
-      if (!BI->isConditional())
-        continue;
-
-      CountedExitBranch = BI;
-    } else
-      continue;
-
-    // Note that this block may not be the loop latch block, even if the loop
-    // has a latch block.
-    CountedExitBlock = *I;
-    ExitCount = EC;
-    break;
-  }
-
-  if (!CountedExitBlock)
-    return MadeChange;
-
-  BasicBlock *Preheader = L->getLoopPreheader();
-
-  // If we don't have a preheader, then insert one. If we already have a
-  // preheader, then we can use it (except if the preheader contains a use of
-  // the CTR register because some such uses might be reordered by the
-  // selection DAG after the mtctr instruction).
-  if (!Preheader || mightUseCTR(Preheader))
-    Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
-  if (!Preheader)
-    return MadeChange;
-
-  LLVM_DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName()
-                    << "\n");
-
-  // Insert the count into the preheader and replace the condition used by the
-  // selected branch.
-  MadeChange = true;
-
-  SCEVExpander SCEVE(*SE, *DL, "loopcnt");
-  LLVMContext &C = SE->getContext();
-  Type *CountType = TM->isPPC64() ? Type::getInt64Ty(C) : Type::getInt32Ty(C);
-  if (!ExitCount->getType()->isPointerTy() &&
-      ExitCount->getType() != CountType)
-    ExitCount = SE->getZeroExtendExpr(ExitCount, CountType);
-  ExitCount = SE->getAddExpr(ExitCount, SE->getOne(CountType));
-  Value *ECValue =
-      SCEVE.expandCodeFor(ExitCount, CountType, Preheader->getTerminator());
-
-  IRBuilder<> CountBuilder(Preheader->getTerminator());
-  Module *M = Preheader->getParent()->getParent();
-  Value *MTCTRFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr,
-                                               CountType);
-  CountBuilder.CreateCall(MTCTRFunc, ECValue);
-
-  IRBuilder<> CondBuilder(CountedExitBranch);
-  Value *DecFunc =
-    Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero);
-  Value *NewCond = CondBuilder.CreateCall(DecFunc, {});
-  Value *OldCond = CountedExitBranch->getCondition();
-  CountedExitBranch->setCondition(NewCond);
-
-  // The false branch must exit the loop.
-  if (!L->contains(CountedExitBranch->getSuccessor(0)))
-    CountedExitBranch->swapSuccessors();
-
-  // The old condition may be dead now, and may have even created a dead PHI
-  // (the original induction variable).
-  RecursivelyDeleteTriviallyDeadInstructions(OldCond);
-  // Run through the basic blocks of the loop and see if any of them have dead
-  // PHIs that can be removed.
-  for (auto I : L->blocks())
-    DeleteDeadPHIs(I);
-
-  ++NumCTRLoops;
-  return MadeChange;
-}
-
 #ifndef NDEBUG
 static bool clobbersCTR(const MachineInstr &MI) {
   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCCallingConv.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/PPCCallingConv.cpp
new file mode 100644
index 0000000..77cdf5c
--- /dev/null
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCCallingConv.cpp

@@ -0,0 +1,162 @@
+//===-- PPCCallingConv.h - --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCRegisterInfo.h"
+#include "PPCCallingConv.h"
+#include "PPCSubtarget.h"
+#include "PPCCCState.h"
+using namespace llvm;
+
+inline bool CC_PPC_AnyReg_Error(unsigned &, MVT &, MVT &,
+                                CCValAssign::LocInfo &, ISD::ArgFlagsTy &,
+                                CCState &) {
+  llvm_unreachable("The AnyReg calling convention is only supported by the " \
+                   "stackmap and patchpoint intrinsics.");
+  // gracefully fallback to PPC C calling convention on Release builds.
+  return false;
+}
+
+static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                       CCValAssign::LocInfo &LocInfo,
+                                       ISD::ArgFlagsTy &ArgFlags,
+                                       CCState &State) {
+  return true;
+}
+
+static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+                                              MVT &LocVT,
+                                              CCValAssign::LocInfo &LocInfo,
+                                              ISD::ArgFlagsTy &ArgFlags,
+                                              CCState &State) {
+  static const MCPhysReg ArgRegs[] = {
+    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+  };
+  const unsigned NumArgRegs = array_lengthof(ArgRegs);
+
+  unsigned RegNum = State.getFirstUnallocated(ArgRegs);
+
+  // Skip one register if the first unallocated register has an even register
+  // number and there are still argument registers available which have not been
+  // allocated yet. RegNum is actually an index into ArgRegs, which means we
+  // need to skip a register if RegNum is odd.
+  if (RegNum != NumArgRegs && RegNum % 2 == 1) {
+    State.AllocateReg(ArgRegs[RegNum]);
+  }
+
+  // Always return false here, as this function only makes sure that the first
+  // unallocated register has an odd register number and does not actually
+  // allocate a register for the current argument.
+  return false;
+}
+
+static bool CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(
+    unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+    ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+  static const MCPhysReg ArgRegs[] = {
+    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+  };
+  const unsigned NumArgRegs = array_lengthof(ArgRegs);
+
+  unsigned RegNum = State.getFirstUnallocated(ArgRegs);
+  int RegsLeft = NumArgRegs - RegNum;
+
+  // Skip if there is not enough registers left for long double type (4 gpr regs
+  // in soft float mode) and put long double argument on the stack.
+  if (RegNum != NumArgRegs && RegsLeft < 4) {
+    for (int i = 0; i < RegsLeft; i++) {
+      State.AllocateReg(ArgRegs[RegNum + i]);
+    }
+  }
+
+  return false;
+}
+
+static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+                                                MVT &LocVT,
+                                                CCValAssign::LocInfo &LocInfo,
+                                                ISD::ArgFlagsTy &ArgFlags,
+                                                CCState &State) {
+  static const MCPhysReg ArgRegs[] = {
+    PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+    PPC::F8
+  };
+
+  const unsigned NumArgRegs = array_lengthof(ArgRegs);
+
+  unsigned RegNum = State.getFirstUnallocated(ArgRegs);
+
+  // If there is only one Floating-point register left we need to put both f64
+  // values of a split ppc_fp128 value on the stack.
+  if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
+    State.AllocateReg(ArgRegs[RegNum]);
+  }
+
+  // Always return false here, as this function only makes sure that the two f64
+  // values a ppc_fp128 value is split into are both passed in registers or both
+  // passed on the stack and does not actually allocate a register for the
+  // current argument.
+  return false;
+}
+
+// Split F64 arguments into two 32-bit consecutive registers.
+static bool CC_PPC32_SPE_CustomSplitFP64(unsigned &ValNo, MVT &ValVT,
+                                        MVT &LocVT,
+                                        CCValAssign::LocInfo &LocInfo,
+                                        ISD::ArgFlagsTy &ArgFlags,
+                                        CCState &State) {
+  static const MCPhysReg HiRegList[] = { PPC::R3, PPC::R5, PPC::R7, PPC::R9 };
+  static const MCPhysReg LoRegList[] = { PPC::R4, PPC::R6, PPC::R8, PPC::R10 };
+
+  // Try to get the first register.
+  unsigned Reg = State.AllocateReg(HiRegList);
+  if (!Reg)
+    return false;
+
+  unsigned i;
+  for (i = 0; i < sizeof(HiRegList) / sizeof(HiRegList[0]); ++i)
+    if (HiRegList[i] == Reg)
+      break;
+
+  unsigned T = State.AllocateReg(LoRegList[i]);
+  (void)T;
+  assert(T == LoRegList[i] && "Could not allocate register");
+
+  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
+                                         LocVT, LocInfo));
+  return true;
+}
+
+// Same as above, but for return values, so only allocate for R3 and R4
+static bool CC_PPC32_SPE_RetF64(unsigned &ValNo, MVT &ValVT,
+                               MVT &LocVT,
+                               CCValAssign::LocInfo &LocInfo,
+                               ISD::ArgFlagsTy &ArgFlags,
+                               CCState &State) {
+  static const MCPhysReg HiRegList[] = { PPC::R3 };
+  static const MCPhysReg LoRegList[] = { PPC::R4 };
+
+  // Try to get the first register.
+  unsigned Reg = State.AllocateReg(HiRegList, LoRegList);
+  if (!Reg)
+    return false;
+
+  unsigned i;
+  for (i = 0; i < sizeof(HiRegList) / sizeof(HiRegList[0]); ++i)
+    if (HiRegList[i] == Reg)
+      break;
+
+  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
+                                         LocVT, LocInfo));
+  return true;
+}
+
+#include "PPCGenCallingConv.inc"

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCCallingConv.h b/src/llvm-project/llvm/lib/Target/PowerPC/PPCCallingConv.h
index eb904a8..03d9be0 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCCallingConv.h
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCCallingConv.h

@@ -1,9 +1,8 @@
 //=== PPCCallingConv.h - PPC Custom Calling Convention Routines -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -20,14 +19,27 @@
 
 namespace llvm {
 
-inline bool CC_PPC_AnyReg_Error(unsigned &, MVT &, MVT &,
-                                CCValAssign::LocInfo &, ISD::ArgFlagsTy &,
-                                CCState &) {
-  llvm_unreachable("The AnyReg calling convention is only supported by the " \
-                   "stackmap and patchpoint intrinsics.");
-  // gracefully fallback to PPC C calling convention on Release builds.
-  return false;
-}
+bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT,
+               CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+               CCState &State);
+bool RetCC_PPC64_ELF_FIS(unsigned ValNo, MVT ValVT, MVT LocVT,
+                         CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+                         CCState &State);
+bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT,
+                    CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+                    CCState &State);
+bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT,
+                   CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+                   CCState &State);
+bool CC_PPC64_ELF_FIS(unsigned ValNo, MVT ValVT, MVT LocVT,
+                      CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+                      CCState &State);
+bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT,
+                         CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+                         CCState &State);
+bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT,
+                          CCValAssign::LocInfo LocInfo,
+                          ISD::ArgFlagsTy ArgFlags, CCState &State);
 
 } // End llvm namespace
 

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCCallingConv.td b/src/llvm-project/llvm/lib/Target/PowerPC/PPCCallingConv.td
index 22842d5..369b9ce 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCCallingConv.td
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCCallingConv.td

@@ -1,9 +1,8 @@
 //===- PPCCallingConv.td - Calling Conventions for PowerPC -*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -46,6 +45,7 @@
 ]>;
 
 // Return-value convention for PowerPC coldcc.
+let Entry = 1 in
 def RetCC_PPC_Cold : CallingConv<[
   // Use the same return registers as RetCC_PPC, but limited to only
   // one return value. The remaining return values will be saved to
@@ -70,6 +70,7 @@
 ]>;
 
 // Return-value convention for PowerPC
+let Entry = 1 in
 def RetCC_PPC : CallingConv<[
   CCIfCC<"CallingConv::AnyReg", CCDelegateTo<RetCC_PPC64_AnyReg>>,
 
@@ -90,7 +91,7 @@
   CCIfSubtarget<"hasSPE()",
        CCIfType<[f32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>>,
   CCIfSubtarget<"hasSPE()",
-       CCIfType<[f64], CCAssignToReg<[S3, S4, S5, S6, S7, S8, S9, S10]>>>,
+       CCIfType<[f64], CCCustom<"CC_PPC32_SPE_RetF64">>>,
 
   // For P9, f128 are passed in vector registers.
   CCIfType<[f128],
@@ -126,6 +127,7 @@
 // Simple calling convention for 64-bit ELF PowerPC fast isel.
 // Only handle ints and floats.  All ints are promoted to i64.
 // Vector types and quadword ints are not handled.
+let Entry = 1 in
 def CC_PPC64_ELF_FIS : CallingConv<[
   CCIfCC<"CallingConv::AnyReg", CCDelegateTo<CC_PPC64_AnyReg>>,
 
@@ -141,6 +143,7 @@
 // All small ints are promoted to i64.  Vector types, quadword ints,
 // and multiple register returns are "supported" to avoid compile
 // errors, but none are handled by the fast selector.
+let Entry = 1 in
 def RetCC_PPC64_ELF_FIS : CallingConv<[
   CCIfCC<"CallingConv::AnyReg", CCDelegateTo<RetCC_PPC64_AnyReg>>,
 
@@ -179,6 +182,9 @@
   CCIfType<[i32],
   CCIfSplit<CCIfNotSubtarget<"useSoftFloat()", 
                             CCCustom<"CC_PPC32_SVR4_Custom_AlignArgRegs">>>>,
+  CCIfType<[f64],
+  CCIfSubtarget<"hasSPE()",
+                CCCustom<"CC_PPC32_SVR4_Custom_AlignArgRegs">>>,
   CCIfSplit<CCIfSubtarget<"useSoftFloat()",
                           CCIfOrigArgWasPPCF128<CCCustom<
                           "CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128">>>>,
@@ -199,7 +205,7 @@
                             CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>>,
   CCIfType<[f64],
            CCIfSubtarget<"hasSPE()",
-                         CCAssignToReg<[S3, S4, S5, S6, S7, S8, S9, S10]>>>,
+                         CCCustom<"CC_PPC32_SPE_CustomSplitFP64">>>,
   CCIfType<[f32],
            CCIfSubtarget<"hasSPE()",
                          CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>>,
@@ -228,12 +234,14 @@
 // This calling convention puts vector arguments always on the stack. It is used
 // to assign vector arguments which belong to the variable portion of the
 // parameter list of a variable argument function.
+let Entry = 1 in
 def CC_PPC32_SVR4_VarArg : CallingConv<[
   CCDelegateTo<CC_PPC32_SVR4_Common>
 ]>;
 
 // In contrast to CC_PPC32_SVR4_VarArg, this calling convention first tries to
 // put vector arguments in vector registers before putting them on the stack.
+let Entry = 1 in
 def CC_PPC32_SVR4 : CallingConv<[
   // QPX vectors mirror the scalar FP convention.
   CCIfType<[v4f64, v4f32, v4i1], CCIfSubtarget<"hasQPX()",
@@ -265,6 +273,7 @@
 // The only purpose of CC_PPC32_SVR4_Custom_Dummy is to skip arguments which are
 // not passed by value.
  
+let Entry = 1 in
 def CC_PPC32_SVR4_ByVal : CallingConv<[
   CCIfByVal<CCPassByVal<4, 4>>,
   
@@ -300,6 +309,13 @@
 
 def CSR_SVR432_SPE : CalleeSavedRegs<(add CSR_SVR432_COMM, CSR_SPE)>;
 
+def CSR_AIX32 : CalleeSavedRegs<(add R13, R14, R15, R16, R17, R18, R19, R20,
+                                     R21, R22, R23, R24, R25, R26, R27, R28,
+                                     R29, R30, R31, F14, F15, F16, F17, F18,
+                                     F19, F20, F21, F22, F23, F24, F25, F26,
+                                     F27, F28, F29, F30, F31, CR2, CR3, CR4
+                                )>;
+
 def CSR_Darwin64 : CalleeSavedRegs<(add X13, X14, X15, X16, X17, X18, X19, X20,
                                         X21, X22, X23, X24, X25, X26, X27, X28,
                                         X29, X30, X31, F14, F15, F16, F17, F18,
@@ -316,6 +332,13 @@
                                         F27, F28, F29, F30, F31, CR2, CR3, CR4
                                    )>;
 
+def CSR_AIX64 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20,
+                                     X21, X22, X23, X24, X25, X26, X27, X28,
+                                     X29, X30, X31, F14, F15, F16, F17, F18,
+                                     F19, F20, F21, F22, F23, F24, F25, F26,
+                                     F27, F28, F29, F30, F31, CR2, CR3, CR4
+                                )>;
+
 // CSRs that are handled by prologue, epilogue.
 def CSR_SRV464_TLS_PE : CalleeSavedRegs<(add)>;
 
@@ -343,15 +366,22 @@
 // and value may be altered by inter-library calls.
 // Do not include r12 as it is used as a scratch register.
 // Do not include return registers r3, f1, v2.
-def CSR_SVR32_ColdCC : CalleeSavedRegs<(add (sequence "R%u", 4, 10),
-                                          (sequence "R%u", 14, 31),
-                                          F0, (sequence "F%u", 2, 31),
-                                          (sequence "CR%u", 0, 7))>;
+def CSR_SVR32_ColdCC_Common : CalleeSavedRegs<(add (sequence "R%u", 4, 10),
+                                                (sequence "R%u", 14, 31),
+                                                (sequence "CR%u", 0, 7))>;
+
+def CSR_SVR32_ColdCC : CalleeSavedRegs<(add CSR_SVR32_ColdCC_Common,
+                                          F0, (sequence "F%u", 2, 31))>;
+
 
 def CSR_SVR32_ColdCC_Altivec : CalleeSavedRegs<(add CSR_SVR32_ColdCC,
                                             (sequence "V%u", 0, 1),
                                             (sequence "V%u", 3, 31))>;
 
+def CSR_SVR32_ColdCC_SPE : CalleeSavedRegs<(add CSR_SVR32_ColdCC_Common,
+                                            (sequence "S%u", 4, 10),
+                                            (sequence "S%u", 14, 31))>;
+
 def CSR_SVR64_ColdCC : CalleeSavedRegs<(add  (sequence "X%u", 4, 10),
                                              (sequence "X%u", 14, 31),
                                              F0, (sequence "F%u", 2, 31),

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
index ac931f7..aa5d830 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp

@@ -1,9 +1,8 @@
 //===------------- PPCEarlyReturn.cpp - Form Early Returns ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -37,10 +36,6 @@
 STATISTIC(NumBCLR, "Number of early conditional returns");
 STATISTIC(NumBLR,  "Number of early returns");
 
-namespace llvm {
-  void initializePPCEarlyReturnPass(PassRegistry&);
-}
-
 namespace {
   // PPCEarlyReturn pass - For simple functions without epilogue code, move
   // returns up, and create conditional returns, to avoid unnecessary
@@ -184,11 +179,11 @@
       // nothing to do.
       if (MF.size() < 2)
         return Changed;
-
-      for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
+      
+      // We can't use a range-based for loop due to clobbering the iterator.
+      for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E;) {
         MachineBasicBlock &B = *I++;
-        if (processBlock(B))
-          Changed = true;
+        Changed |= processBlock(B);
       }
 
       return Changed;

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp
index a03e691..e8ef451 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp

@@ -1,9 +1,8 @@
 //===------------- PPCExpandISEL.cpp - Expand ISEL instruction ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/PPCFastISel.cpp
index 3b2d92d..264d6b5 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCFastISel.cpp

@@ -1,9 +1,8 @@
 //===-- PPCFastISel.cpp - PowerPC FastISel implementation -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -152,6 +151,14 @@
     bool isVSSRCRegClass(const TargetRegisterClass *RC) const {
       return RC->getID() == PPC::VSSRCRegClassID;
     }
+    unsigned copyRegToRegClass(const TargetRegisterClass *ToRC,
+                               unsigned SrcReg, unsigned Flag = 0,
+                               unsigned SubReg = 0) {
+      unsigned TmpReg = createResultReg(ToRC);
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+              TII.get(TargetOpcode::COPY), TmpReg).addReg(SrcReg, Flag, SubReg);
+      return TmpReg;
+    }
     bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value,
                     bool isZExt, unsigned DestReg,
                     const PPC::Predicate Pred);
@@ -187,7 +194,6 @@
                          unsigned &NumBytes,
                          bool IsVarArg);
     bool finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes);
-    LLVM_ATTRIBUTE_UNUSED CCAssignFn *usePPC32CCs(unsigned Flag);
 
   private:
   #include "PPCGenFastISel.inc"
@@ -196,23 +202,6 @@
 
 } // end anonymous namespace
 
-#include "PPCGenCallingConv.inc"
-
-// Function whose sole purpose is to kill compiler warnings
-// stemming from unused functions included from PPCGenCallingConv.inc.
-CCAssignFn *PPCFastISel::usePPC32CCs(unsigned Flag) {
-  if (Flag == 1)
-    return CC_PPC32_SVR4;
-  else if (Flag == 2)
-    return CC_PPC32_SVR4_ByVal;
-  else if (Flag == 3)
-    return CC_PPC32_SVR4_VarArg;
-  else if (Flag == 4)
-    return RetCC_PPC_Cold;
-  else
-    return RetCC_PPC;
-}
-
 static Optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) {
   switch (Pred) {
     // These are not representable with any single compare.
@@ -874,7 +863,10 @@
 
   unsigned CmpOpc;
   bool NeedsExt = false;
-  auto RC = MRI.getRegClass(SrcReg1);
+
+  auto RC1 = MRI.getRegClass(SrcReg1);
+  auto RC2 = SrcReg2 != 0 ? MRI.getRegClass(SrcReg2) : nullptr;
+
   switch (SrcVT.SimpleTy) {
     default: return false;
     case MVT::f32:
@@ -893,12 +885,10 @@
         }
       } else {
         CmpOpc = PPC::FCMPUS;
-        if (isVSSRCRegClass(RC)) {
-          unsigned TmpReg = createResultReg(&PPC::F4RCRegClass);
-          BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
-                  TII.get(TargetOpcode::COPY), TmpReg).addReg(SrcReg1);
-          SrcReg1 = TmpReg;
-        }
+        if (isVSSRCRegClass(RC1))
+          SrcReg1 = copyRegToRegClass(&PPC::F4RCRegClass, SrcReg1);
+        if (RC2 && isVSSRCRegClass(RC2))
+          SrcReg2 = copyRegToRegClass(&PPC::F4RCRegClass, SrcReg2);
       }
       break;
     case MVT::f64:
@@ -915,7 +905,7 @@
             CmpOpc = PPC::EFDCMPGT;
             break;
         }
-      } else if (isVSFRCRegClass(RC)) {
+      } else if (isVSFRCRegClass(RC1) || (RC2 && isVSFRCRegClass(RC2))) {
         CmpOpc = PPC::XSCMPUDP;
       } else {
         CmpOpc = PPC::FCMPUD;
@@ -997,12 +987,17 @@
 
   // Round the result to single precision.
   unsigned DestReg;
-
+  auto RC = MRI.getRegClass(SrcReg);
   if (PPCSubTarget->hasSPE()) {
     DestReg = createResultReg(&PPC::SPE4RCRegClass);
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
       TII.get(PPC::EFSCFD), DestReg)
       .addReg(SrcReg);
+  } else if (isVSFRCRegClass(RC)) {
+    DestReg = createResultReg(&PPC::VSSRCRegClass);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+      TII.get(PPC::XSRSP), DestReg)
+      .addReg(SrcReg);
   } else {
     DestReg = createResultReg(&PPC::F4RCRegClass);
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -1217,21 +1212,19 @@
   if (SrcReg == 0)
     return false;
 
-  // Convert f32 to f64 if necessary.  This is just a meaningless copy
-  // to get the register class right.
+  // Convert f32 to f64 or convert VSSRC to VSFRC if necessary. This is just a
+  // meaningless copy to get the register class right.
   const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg);
-  if (InRC == &PPC::F4RCRegClass) {
-    unsigned TmpReg = createResultReg(&PPC::F8RCRegClass);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
-            TII.get(TargetOpcode::COPY), TmpReg)
-      .addReg(SrcReg);
-    SrcReg = TmpReg;
-  }
+  if (InRC == &PPC::F4RCRegClass)
+    SrcReg = copyRegToRegClass(&PPC::F8RCRegClass, SrcReg);
+  else if (InRC == &PPC::VSSRCRegClass)
+    SrcReg = copyRegToRegClass(&PPC::VSFRCRegClass, SrcReg);
 
   // Determine the opcode for the conversion, which takes place
-  // entirely within FPRs.
+  // entirely within FPRs or VSRs.
   unsigned DestReg;
   unsigned Opc;
+  auto RC = MRI.getRegClass(SrcReg);
 
   if (PPCSubTarget->hasSPE()) {
     DestReg = createResultReg(&PPC::GPRCRegClass);
@@ -1239,6 +1232,12 @@
       Opc = InRC == &PPC::SPE4RCRegClass ? PPC::EFSCTSIZ : PPC::EFDCTSIZ;
     else
       Opc = InRC == &PPC::SPE4RCRegClass ? PPC::EFSCTUIZ : PPC::EFDCTUIZ;
+  } else if (isVSFRCRegClass(RC)) {
+    DestReg = createResultReg(&PPC::VSFRCRegClass);
+    if (DstVT == MVT::i32) 
+      Opc = IsSigned ? PPC::XSCVDPSXWS : PPC::XSCVDPUXWS;
+    else
+      Opc = IsSigned ? PPC::XSCVDPSXDS : PPC::XSCVDPUXDS;
   } else {
     DestReg = createResultReg(&PPC::F8RCRegClass);
     if (DstVT == MVT::i32)
@@ -1520,11 +1519,7 @@
 
     if (RetVT == CopyVT) {
       const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT);
-      ResultReg = createResultReg(CpyRC);
-
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
-              TII.get(TargetOpcode::COPY), ResultReg)
-        .addReg(SourcePhysReg);
+      ResultReg = copyRegToRegClass(CpyRC, SourcePhysReg);
 
     // If necessary, round the floating result to single precision.
     } else if (CopyVT == MVT::f64) {
@@ -1537,12 +1532,9 @@
     // used along the fast-isel path (not lowered), and downstream logic
     // also doesn't like a direct subreg copy on a physical reg.)
     } else if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) {
-      ResultReg = createResultReg(&PPC::GPRCRegClass);
       // Convert physical register from G8RC to GPRC.
       SourcePhysReg -= PPC::X0 - PPC::R0;
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
-              TII.get(TargetOpcode::COPY), ResultReg)
-        .addReg(SourcePhysReg);
+      ResultReg = copyRegToRegClass(&PPC::GPRCRegClass, SourcePhysReg);
     }
 
     assert(ResultReg && "ResultReg unset!");
@@ -1894,13 +1886,8 @@
     return false;
 
   // The only interesting case is when we need to switch register classes.
-  if (SrcVT == MVT::i64) {
-    unsigned ResultReg = createResultReg(&PPC::GPRCRegClass);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
-            TII.get(TargetOpcode::COPY),
-            ResultReg).addReg(SrcReg, 0, PPC::sub_32);
-    SrcReg = ResultReg;
-  }
+  if (SrcVT == MVT::i64)
+    SrcReg = copyRegToRegClass(&PPC::GPRCRegClass, SrcReg, 0, PPC::sub_32);
 
   updateValueMap(I, SrcReg);
   return true;
@@ -1977,6 +1964,13 @@
     case Instruction::Sub:
       return SelectBinaryIntOp(I, ISD::SUB);
     case Instruction::Call:
+      // On AIX, call lowering uses the DAG-ISEL path currently so that the
+      // callee of the direct function call instruction will be mapped to the
+      // symbol for the function's entry point, which is distinct from the
+      // function descriptor symbol. The latter is the symbol whose XCOFF symbol
+      // name is the C-linkage name of the source level function.
+      if (TM.getTargetTriple().isOSAIX())
+        break;
       return selectCall(I);
     case Instruction::Ret:
       return SelectRet(I);

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 8263954..ebfb1ef 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp

@@ -1,9 +1,8 @@
 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -30,7 +29,6 @@
 using namespace llvm;
 
 #define DEBUG_TYPE "framelowering"
-STATISTIC(NumNoNeedForFrame, "Number of functions without frames");
 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
 
@@ -73,10 +71,10 @@
 }
 
 static unsigned computeLinkageSize(const PPCSubtarget &STI) {
-  if (STI.isDarwinABI() || STI.isPPC64())
+  if ((STI.isDarwinABI() || STI.isAIXABI()) || STI.isPPC64())
     return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
 
-  // SVR4 ABI:
+  // 32-bit SVR4 ABI:
   return 8;
 }
 
@@ -446,12 +444,27 @@
   return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
 }
 
+/// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
+/// call frame size. Update the MachineFunction object with the stack size.
+unsigned
+PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
+                                                bool UseEstimate) const {
+  unsigned NewMaxCallFrameSize = 0;
+  unsigned FrameSize = determineFrameLayout(MF, UseEstimate,
+                                            &NewMaxCallFrameSize);
+  MF.getFrameInfo().setStackSize(FrameSize);
+  MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
+  return FrameSize;
+}
+
 /// determineFrameLayout - Determine the size of the frame and maximum call
 /// frame size.
-unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
-                                                bool UpdateMF,
-                                                bool UseEstimate) const {
-  MachineFrameInfo &MFI = MF.getFrameInfo();
+unsigned
+PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
+                                       bool UseEstimate,
+                                       unsigned *NewMaxCallFrameSize) const {
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
 
   // Get the number of bytes to allocate from the FrameInfo
   unsigned FrameSize =
@@ -469,6 +482,7 @@
   bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
                        !MFI.adjustsStack() &&       // No calls.
                        !MustSaveLR(MF, LR) &&       // No need to save LR.
+                       !FI->mustSaveTOC() &&        // No need to save TOC.
                        !RegInfo->hasBasePointer(MF); // No special alignment.
 
   // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless
@@ -477,10 +491,7 @@
 
   // Check whether we can skip adjusting the stack pointer (by using red zone)
   if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
-    NumNoNeedForFrame++;
     // No need for frame
-    if (UpdateMF)
-      MFI.setStackSize(0);
     return 0;
   }
 
@@ -496,9 +507,9 @@
   if (MFI.hasVarSizedObjects())
     maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
 
-  // Update maximum call frame size.
-  if (UpdateMF)
-    MFI.setMaxCallFrameSize(maxCallFrameSize);
+  // Update the new max call frame size if the caller passes in a valid pointer.
+  if (NewMaxCallFrameSize)
+    *NewMaxCallFrameSize = maxCallFrameSize;
 
   // Include call frame size in total.
   FrameSize += maxCallFrameSize;
@@ -506,10 +517,6 @@
   // Make sure the frame is aligned.
   FrameSize = (FrameSize + AlignMask) & ~AlignMask;
 
-  // Update frame info.
-  if (UpdateMF)
-    MFI.setStackSize(FrameSize);
-
   return FrameSize;
 }
 
@@ -690,7 +697,7 @@
   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
   MachineFunction &MF = *(MBB->getParent());
   bool HasBP = RegInfo->hasBasePointer(MF);
-  unsigned FrameSize = determineFrameLayout(MF, false);
+  unsigned FrameSize = determineFrameLayout(MF);
   int NegFrameSize = -FrameSize;
   bool IsLargeFrame = !isInt<16>(NegFrameSize);
   MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -713,6 +720,50 @@
   return findScratchRegister(TmpMBB, true);
 }
 
+bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
+  const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+
+  // Abort if there is no register info or function info.
+  if (!RegInfo || !FI)
+    return false;
+
+  // Only move the stack update on ELFv2 ABI and PPC64.
+  if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
+    return false;
+
+  // Check the frame size first and return false if it does not fit the
+  // requirements.
+  // We need a non-zero frame size as well as a frame that will fit in the red
+  // zone. This is because by moving the stack pointer update we are now storing
+  // to the red zone until the stack pointer is updated. If we get an interrupt
+  // inside the prologue but before the stack update we now have a number of
+  // stores to the red zone and those stores must all fit.
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  unsigned FrameSize = MFI.getStackSize();
+  if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
+    return false;
+
+  // Frame pointers and base pointers complicate matters so don't do anything
+  // if we have them. For example having a frame pointer will sometimes require
+  // a copy of r1 into r31 and that makes keeping track of updates to r1 more
+  // difficult.
+  if (hasFP(MF) || RegInfo->hasBasePointer(MF))
+    return false;
+
+  // Calls to fast_cc functions use different rules for passing parameters on
+  // the stack from the ABI and using PIC base in the function imposes
+  // similar restrictions to using the base pointer. It is not generally safe
+  // to move the stack pointer update in these situations.
+  if (FI->hasFastCall() || FI->usesPICBase())
+    return false;
+
+  // Finally we can move the stack update if we do not require register
+  // scavenging. Register scavenging can introduce more spills and so
+  // may make the frame size larger than we have computed.
+  return !RegInfo->requiresFrameIndexScavenging(MF);
+}
+
 void PPCFrameLowering::emitPrologue(MachineFunction &MF,
                                     MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -748,7 +799,7 @@
   MBBI = MBB.begin();
 
   // Work out frame sizes.
-  unsigned FrameSize = determineFrameLayout(MF);
+  unsigned FrameSize = determineFrameLayoutAndUpdate(MF);
   int NegFrameSize = -FrameSize;
   if (!isInt<32>(NegFrameSize))
     llvm_unreachable("Unhandled stack size!");
@@ -759,6 +810,7 @@
   // Check if the link register (LR) must be saved.
   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
   bool MustSaveLR = FI->mustSaveLR();
+  bool MustSaveTOC = FI->mustSaveTOC();
   const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
   bool MustSaveCR = !MustSaveCRs.empty();
   // Do we have a frame pointer and/or base pointer for this function?
@@ -770,6 +822,7 @@
   unsigned BPReg       = RegInfo->getBaseRegister(MF);
   unsigned FPReg       = isPPC64 ? PPC::X31 : PPC::R31;
   unsigned LRReg       = isPPC64 ? PPC::LR8 : PPC::LR;
+  unsigned TOCReg      = isPPC64 ? PPC::X2 :  PPC::R2;
   unsigned ScratchReg  = 0;
   unsigned TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
   //  ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
@@ -855,6 +908,45 @@
   assert((isPPC64 || !MustSaveCR) &&
          "Prologue CR saving supported only in 64-bit mode");
 
+  // Check if we can move the stack update instruction (stdu) down the prologue
+  // past the callee saves. Hopefully this will avoid the situation where the
+  // saves are waiting for the update on the store with update to complete.
+  MachineBasicBlock::iterator StackUpdateLoc = MBBI;
+  bool MovingStackUpdateDown = false;
+
+  // Check if we can move the stack update.
+  if (stackUpdateCanBeMoved(MF)) {
+    const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
+    for (CalleeSavedInfo CSI : Info) {
+      int FrIdx = CSI.getFrameIdx();
+      // If the frame index is not negative the callee saved info belongs to a
+      // stack object that is not a fixed stack object. We ignore non-fixed
+      // stack objects because we won't move the stack update pointer past them.
+      if (FrIdx >= 0)
+        continue;
+
+      if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
+        StackUpdateLoc++;
+        MovingStackUpdateDown = true;
+      } else {
+        // We need all of the Frame Indices to meet these conditions.
+        // If they do not, abort the whole operation.
+        StackUpdateLoc = MBBI;
+        MovingStackUpdateDown = false;
+        break;
+      }
+    }
+
+    // If the operation was not aborted then update the object offset.
+    if (MovingStackUpdateDown) {
+      for (CalleeSavedInfo CSI : Info) {
+        int FrIdx = CSI.getFrameIdx();
+        if (FrIdx < 0)
+          MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
+      }
+    }
+  }
+
   // If we need to spill the CR and the LR but we don't have two separate
   // registers available, we must spill them one at a time
   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
@@ -918,7 +1010,7 @@
   }
 
   if (MustSaveLR)
-    BuildMI(MBB, MBBI, dl, StoreInst)
+    BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
       .addReg(ScratchReg, getKillRegState(true))
       .addImm(LROffset)
       .addReg(SPReg);
@@ -986,7 +1078,7 @@
     HasSTUX = true;
 
   } else if (!isLargeFrame) {
-    BuildMI(MBB, MBBI, dl, StoreUpdtInst, SPReg)
+    BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
       .addReg(SPReg)
       .addImm(NegFrameSize)
       .addReg(SPReg);
@@ -1004,6 +1096,16 @@
     HasSTUX = true;
   }
 
+  // Save the TOC register after the stack pointer update if a prologue TOC
+  // save is required for the function.
+  if (MustSaveTOC) {
+    assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
+    BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
+      .addReg(TOCReg, getKillRegState(true))
+      .addImm(TOCSaveOffset)
+      .addReg(SPReg);
+  }
+
   if (!HasRedZone) {
     assert(!isPPC64 && "A red zone is always available on PPC64");
     if (HasSTUX) {
@@ -1205,6 +1307,9 @@
       if (PPC::CRBITRCRegClass.contains(Reg))
         continue;
 
+      if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
+        continue;
+
       // For SVR4, don't emit a move for the CR spill slot if we haven't
       // spilled CRs.
       if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
@@ -1234,6 +1339,12 @@
           .addCFIIndex(CFIRegister);
       } else {
         int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
+        // We have changed the object offset above but we do not want to change
+        // the actual offsets in the CFI instruction so we have to undo the
+        // offset change here.
+        if (MovingStackUpdateDown)
+          Offset -= NegFrameSize;
+
         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
             nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
@@ -1380,6 +1491,32 @@
   unsigned RBReg = SPReg;
   unsigned SPAdd = 0;
 
+  // Check if we can move the stack update instruction up the epilogue
+  // past the callee saves. This will allow the move to LR instruction
+  // to be executed before the restores of the callee saves which means
+  // that the callee saves can hide the latency from the MTLR instrcution.
+  MachineBasicBlock::iterator StackUpdateLoc = MBBI;
+  if (stackUpdateCanBeMoved(MF)) {
+    const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
+    for (CalleeSavedInfo CSI : Info) {
+      int FrIdx = CSI.getFrameIdx();
+      // If the frame index is not negative the callee saved info belongs to a
+      // stack object that is not a fixed stack object. We ignore non-fixed
+      // stack objects because we won't move the update of the stack pointer
+      // past them.
+      if (FrIdx >= 0)
+        continue;
+
+      if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
+        StackUpdateLoc--;
+      else {
+        // Abort the operation as we can't update all CSR restores.
+        StackUpdateLoc = MBBI;
+        break;
+      }
+    }
+  }
+
   if (FrameSize) {
     // In the prologue, the loaded (or persistent) stack pointer value is
     // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
@@ -1409,7 +1546,7 @@
       }
     } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
       if (HasRedZone) {
-        BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
+        BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
           .addReg(SPReg)
           .addImm(FrameSize);
       } else {
@@ -1433,7 +1570,7 @@
             .addReg(FPReg);
         RBReg = FPReg;
       }
-      BuildMI(MBB, MBBI, dl, LoadInst, RBReg)
+      BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
         .addImm(0)
         .addReg(SPReg);
     }
@@ -1466,7 +1603,7 @@
   // a base register anyway, because it may happen to be R0.
   bool LoadedLR = false;
   if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
-    BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
+    BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
       .addImm(LROffset+SPAdd)
       .addReg(RBReg);
     LoadedLR = true;
@@ -1538,7 +1675,7 @@
         .addReg(TempReg, getKillRegState(i == e-1));
 
   if (MustSaveLR)
-    BuildMI(MBB, MBBI, dl, MTLRInst).addReg(ScratchReg);
+    BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
 
   // Callee pop calling convention. Pop parameter/linkage area. Used for tail
   // call optimization
@@ -1732,6 +1869,9 @@
 
   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
     unsigned Reg = CSI[i].getReg();
+    assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
+            (Reg != PPC::X2 && Reg != PPC::R2)) &&
+           "Not expecting to try to spill R2 in a function that must save TOC");
     if (PPC::GPRCRegClass.contains(Reg) ||
         PPC::SPE4RCRegClass.contains(Reg)) {
       HasGPSaveArea = true;
@@ -1947,7 +2087,7 @@
   // the 16-bit immediate. We don't know the complete frame size here
   // because we've not yet computed callee-saved register spills or the
   // needed alignment padding.
-  unsigned StackSize = determineFrameLayout(MF, false, true);
+  unsigned StackSize = determineFrameLayout(MF, true);
   MachineFrameInfo &MFI = MF.getFrameInfo();
   if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
       hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
@@ -2041,6 +2181,8 @@
 
   MachineFunction *MF = MBB.getParent();
   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
+  PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
+  bool MustSaveTOC = FI->mustSaveTOC();
   DebugLoc DL;
   bool CRSpilled = false;
   MachineInstrBuilder CRMIB;
@@ -2071,6 +2213,10 @@
       continue;
     }
 
+    // The actual spill will happen in the prologue.
+    if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
+      continue;
+
     // Insert the spill to the stack frame.
     if (IsCRField) {
       PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
@@ -2198,6 +2344,8 @@
 
   MachineFunction *MF = MBB.getParent();
   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
+  PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
+  bool MustSaveTOC = FI->mustSaveTOC();
   bool CR2Spilled = false;
   bool CR3Spilled = false;
   bool CR4Spilled = false;
@@ -2220,6 +2368,9 @@
     if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI())
       continue;
 
+    if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
+      continue;
+
     if (Reg == PPC::CR2) {
       CR2Spilled = true;
       // The spill slot is associated only with CR2, which is the

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/src/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.h
index 69bd148..d116e9f 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.h

@@ -1,9 +1,8 @@
 //===-- PPCFrameLowering.h - Define frame lowering for PowerPC --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -13,7 +12,6 @@
 #ifndef LLVM_LIB_TARGET_POWERPC_PPCFRAMELOWERING_H
 #define LLVM_LIB_TARGET_POWERPC_PPCFRAMELOWERING_H
 
-#include "PPC.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
@@ -73,12 +71,29 @@
    */
   void createTailCallBranchInstr(MachineBasicBlock &MBB) const;
 
+  /**
+    * Check if the conditions are correct to allow for the stack update
+    * to be moved past the CSR save/restore code.
+    */
+  bool stackUpdateCanBeMoved(MachineFunction &MF) const;
+
 public:
   PPCFrameLowering(const PPCSubtarget &STI);
 
-  unsigned determineFrameLayout(MachineFunction &MF,
-                                bool UpdateMF = true,
-                                bool UseEstimate = false) const;
+  /**
+   * Determine the frame layout and update the machine function.
+   */
+  unsigned determineFrameLayoutAndUpdate(MachineFunction &MF,
+                                         bool UseEstimate = false) const;
+
+  /**
+   * Determine the frame layout but do not update the machine function.
+   * The MachineFunction object can be const in this case as it is not
+   * modified.
+   */
+  unsigned determineFrameLayout(const MachineFunction &MF,
+                                bool UseEstimate = false,
+                                unsigned *NewMaxCallFrameSize = nullptr) const;
 
   /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
   /// the function.

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index 5f6966c..391ebcc 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp

@@ -1,9 +1,8 @@
 //===-- PPCHazardRecognizers.cpp - PowerPC Hazard Recognizer Impls --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,9 +11,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPCHazardRecognizers.h"
-#include "PPC.h"
 #include "PPCInstrInfo.h"
-#include "PPCTargetMachine.h"
+#include "PPCSubtarget.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h b/src/llvm-project/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h
index 4b50214..5b32147 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h

@@ -1,9 +1,8 @@
 //===-- PPCHazardRecognizers.h - PowerPC Hazard Recognizers -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 31acd0f..543cac0 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp

@@ -1,9 +1,8 @@
 //===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -219,13 +218,6 @@
     SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
                      const SDLoc &dl);
 
-    /// SelectAddrImm - Returns true if the address N can be represented by
-    /// a base register plus a signed 16-bit displacement [r+imm].
-    bool SelectAddrImm(SDValue N, SDValue &Disp,
-                       SDValue &Base) {
-      return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 0);
-    }
-
     /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
     /// immediate field.  Note that the operand at this point is already the
     /// result of a prior SelectAddressRegImm call.
@@ -239,26 +231,61 @@
       return false;
     }
 
-    /// SelectAddrIdx - Given the specified addressed, check to see if it can be
-    /// represented as an indexed [r+r] operation.  Returns false if it can
-    /// be represented by [r+imm], which are preferred.
+    /// SelectAddrIdx - Given the specified address, check to see if it can be
+    /// represented as an indexed [r+r] operation.
+    /// This is for xform instructions whose associated displacement form is D.
+    /// The last parameter \p 0 means associated D form has no requirment for 16
+    /// bit signed displacement.
+    /// Returns false if it can be represented by [r+imm], which are preferred.
     bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
-      return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG);
+      return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 0);
     }
 
-    /// SelectAddrIdxOnly - Given the specified addressed, force it to be
+    /// SelectAddrIdx4 - Given the specified address, check to see if it can be
+    /// represented as an indexed [r+r] operation.
+    /// This is for xform instructions whose associated displacement form is DS.
+    /// The last parameter \p 4 means associated DS form 16 bit signed
+    /// displacement must be a multiple of 4.
+    /// Returns false if it can be represented by [r+imm], which are preferred.
+    bool SelectAddrIdxX4(SDValue N, SDValue &Base, SDValue &Index) {
+      return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 4);
+    }
+
+    /// SelectAddrIdx16 - Given the specified address, check to see if it can be
+    /// represented as an indexed [r+r] operation.
+    /// This is for xform instructions whose associated displacement form is DQ.
+    /// The last parameter \p 16 means associated DQ form 16 bit signed
+    /// displacement must be a multiple of 16.
+    /// Returns false if it can be represented by [r+imm], which are preferred.
+    bool SelectAddrIdxX16(SDValue N, SDValue &Base, SDValue &Index) {
+      return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 16);
+    }
+
+    /// SelectAddrIdxOnly - Given the specified address, force it to be
     /// represented as an indexed [r+r] operation.
     bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
       return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
     }
+    
+    /// SelectAddrImm - Returns true if the address N can be represented by
+    /// a base register plus a signed 16-bit displacement [r+imm].
+    /// The last parameter \p 0 means D form has no requirment for 16 bit signed
+    /// displacement.
+    bool SelectAddrImm(SDValue N, SDValue &Disp,
+                       SDValue &Base) {
+      return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 0);
+    }
 
     /// SelectAddrImmX4 - Returns true if the address N can be represented by
-    /// a base register plus a signed 16-bit displacement that is a multiple of 4.
-    /// Suitable for use by STD and friends.
+    /// a base register plus a signed 16-bit displacement that is a multiple of
+    /// 4 (last parameter). Suitable for use by STD and friends.
     bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
       return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 4);
     }
 
+    /// SelectAddrImmX16 - Returns true if the address N can be represented by
+    /// a base register plus a signed 16-bit displacement that is a multiple of
+    /// 16(last parameter). Suitable for use by STXV and friends.
     bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) {
       return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 16);
     }
@@ -412,7 +439,8 @@
     if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {
       if (PPCSubTarget->isTargetELF()) {
         GlobalBaseReg = PPC::R30;
-        if (M->getPICLevel() == PICLevel::SmallPIC) {
+        if (!PPCSubTarget->isSecurePlt() &&
+            M->getPICLevel() == PICLevel::SmallPIC) {
           BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));
           BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
           MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
@@ -2373,7 +2401,7 @@
 
   // Here we try to match complex bit permutations into a set of
   // rotate-and-shift/shift/and/or instructions, using a set of heuristics
-  // known to produce optimial code for common cases (like i32 byte swapping).
+  // known to produce optimal code for common cases (like i32 byte swapping).
   SDNode *Select(SDNode *N) {
     Memoizer.clear();
     auto Result =
@@ -4214,12 +4242,12 @@
 
   // Without this setb optimization, the outer SELECT_CC will be manually
   // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass
-  // transforms pseduo instruction to isel instruction. When there are more than
+  // transforms pseudo instruction to isel instruction. When there are more than
   // one use for result like zext/sext, with current optimization we only see
   // isel is replaced by setb but can't see any significant gain. Since
   // setb has longer latency than original isel, we should avoid this. Another
   // point is that setb requires comparison always kept, it can break the
-  // oppotunity to get the comparison away if we have in future.
+  // opportunity to get the comparison away if we have in future.
   if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse()))
     return false;
 
@@ -4354,13 +4382,23 @@
     if (trySETCC(N))
       return;
     break;
-
-  case PPCISD::CALL: {
-    const Module *M = MF->getFunction().getParent();
-
+  // These nodes will be transformed into GETtlsADDR32 node, which
+  // later becomes BL_TLS __tls_get_addr(sym at tlsgd)@PLT
+  case PPCISD::ADDI_TLSLD_L_ADDR:
+  case PPCISD::ADDI_TLSGD_L_ADDR: {
+    const Module *Mod = MF->getFunction().getParent();
     if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
         !PPCSubTarget->isSecurePlt() || !PPCSubTarget->isTargetELF() ||
-        M->getPICLevel() == PICLevel::SmallPIC)
+        Mod->getPICLevel() == PICLevel::SmallPIC)
+      break;
+    // Attach global base pointer on GETtlsADDR32 node in order to
+    // generate secure plt code for TLS symbols.
+    getGlobalBaseReg();
+  } break;
+  case PPCISD::CALL: {
+    if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
+        !TM.isPositionIndependent() || !PPCSubTarget->isSecurePlt() ||
+        !PPCSubTarget->isTargetELF())
       break;
 
     SDValue Op = N->getOperand(1);
@@ -5305,7 +5343,7 @@
     SDValue V = Queue.pop_back_val();
 
     for (const SDValue &O : V.getNode()->ops()) {
-      unsigned b;
+      unsigned b = 0;
       uint64_t M = 0, A = 0;
       SDValue OLHS, ORHS;
       if (O.getOpcode() == ISD::OR) {

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 39608cb..24d5007 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp

@@ -1,9 +1,8 @@
 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -45,6 +44,7 @@
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RuntimeLibcalls.h"
@@ -70,8 +70,10 @@
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Use.h"
 #include "llvm/IR/Value.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSymbolXCOFF.h"
 #include "llvm/Support/AtomicOrdering.h"
 #include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/Casting.h"
@@ -111,6 +113,9 @@
 static cl::opt<bool> DisableSCO("disable-ppc-sco",
 cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
 
+static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
+cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
+
 static cl::opt<bool> EnableQuadPrecision("enable-ppc-quad-precision",
 cl::desc("enable quad precision float support on ppc"), cl::Hidden);
 
@@ -119,6 +124,8 @@
 
 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
 
+static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
+
 // FIXME: Remove this once the bug has been fixed!
 extern cl::opt<bool> ANDIGlueBug;
 
@@ -550,7 +557,18 @@
       // add/sub are legal for all supported vector VT's.
       setOperationAction(ISD::ADD, VT, Legal);
       setOperationAction(ISD::SUB, VT, Legal);
-      setOperationAction(ISD::ABS, VT, Custom);
+
+      // For v2i64, these are only valid with P8Vector. This is corrected after
+      // the loop.
+      setOperationAction(ISD::SMAX, VT, Legal);
+      setOperationAction(ISD::SMIN, VT, Legal);
+      setOperationAction(ISD::UMAX, VT, Legal);
+      setOperationAction(ISD::UMIN, VT, Legal);
+
+      if (Subtarget.hasVSX()) {
+        setOperationAction(ISD::FMAXNUM, VT, Legal);
+        setOperationAction(ISD::FMINNUM, VT, Legal);
+      }
 
       // Vector instructions introduced in P8
       if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
@@ -635,11 +653,28 @@
         setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
       }
     }
+    if (!Subtarget.hasP8Vector()) {
+      setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
+      setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
+      setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
+      setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
+    }
+
+    for (auto VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8})
+      setOperationAction(ISD::ABS, VT, Custom);
 
     // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
     // with merges, splats, etc.
     setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
 
+    // Vector truncates to sub-word integer that fit in an Altivec/VSX register
+    // are cheap, so handle them before they get expanded to scalar.
+    setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
+    setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
+    setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
+    setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
+    setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
+
     setOperationAction(ISD::AND   , MVT::v4i32, Legal);
     setOperationAction(ISD::OR    , MVT::v4i32, Legal);
     setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
@@ -804,6 +839,8 @@
       setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
       setOperationAction(ISD::FABS, MVT::v4f32, Legal);
       setOperationAction(ISD::FABS, MVT::v2f64, Legal);
+      setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
+      setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Legal);
 
       if (Subtarget.hasDirectMove())
         setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
@@ -866,6 +903,7 @@
         setOperationAction(ISD::FPOWI, MVT::f128, Expand);
         setOperationAction(ISD::FREM, MVT::f128, Expand);
       }
+      setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
 
     }
 
@@ -1060,6 +1098,7 @@
   setTargetDAGCombine(ISD::SHL);
   setTargetDAGCombine(ISD::SRA);
   setTargetDAGCombine(ISD::SRL);
+  setTargetDAGCombine(ISD::MUL);
   setTargetDAGCombine(ISD::SINT_TO_FP);
   setTargetDAGCombine(ISD::BUILD_VECTOR);
   if (Subtarget.hasFPCVT())
@@ -1232,22 +1271,6 @@
   return Align;
 }
 
-unsigned PPCTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
-                                                          CallingConv:: ID CC,
-                                                          EVT VT) const {
-  if (Subtarget.hasSPE() && VT == MVT::f64)
-    return 2;
-  return PPCTargetLowering::getNumRegisters(Context, VT);
-}
-
-MVT PPCTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
-                                                     CallingConv:: ID CC,
-                                                     EVT VT) const {
-  if (Subtarget.hasSPE() && VT == MVT::f64)
-    return MVT::i32;
-  return PPCTargetLowering::getRegisterType(Context, VT);
-}
-
 bool PPCTargetLowering::useSoftFloat() const {
   return Subtarget.useSoftFloat();
 }
@@ -1256,6 +1279,10 @@
   return Subtarget.hasSPE();
 }
 
+bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
+  return VT.isScalarInteger();
+}
+
 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   switch ((PPCISD::NodeType)Opcode) {
   case PPCISD::FIRST_NUMBER:    break;
@@ -1365,7 +1392,11 @@
   case PPCISD::QBFLT:           return "PPCISD::QBFLT";
   case PPCISD::QVLFSb:          return "PPCISD::QVLFSb";
   case PPCISD::BUILD_FP128:     return "PPCISD::BUILD_FP128";
+  case PPCISD::BUILD_SPE64:     return "PPCISD::BUILD_SPE64";
+  case PPCISD::EXTRACT_SPE:     return "PPCISD::EXTRACT_SPE";
   case PPCISD::EXTSWSLI:        return "PPCISD::EXTSWSLI";
+  case PPCISD::LD_VSX_LH:       return "PPCISD::LD_VSX_LH";
+  case PPCISD::FP_EXTEND_LH:    return "PPCISD::FP_EXTEND_LH";
   }
   return nullptr;
 }
@@ -2202,16 +2233,43 @@
   return isIntS16Immediate(Op.getNode(), Imm);
 }
 
+
+/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
+/// be represented as an indexed [r+r] operation.
+bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
+                                               SDValue &Index,
+                                               SelectionDAG &DAG) const {
+  for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
+      UI != E; ++UI) {
+    if (MemSDNode *Memop = dyn_cast<MemSDNode>(*UI)) {
+      if (Memop->getMemoryVT() == MVT::f64) {
+          Base = N.getOperand(0);
+          Index = N.getOperand(1);
+          return true;
+      }
+    }
+  }
+  return false;
+}
+
 /// SelectAddressRegReg - Given the specified addressed, check to see if it
 /// can be represented as an indexed [r+r] operation.  Returns false if it
-/// can be more efficiently represented with [r+imm].
+/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
+/// non-zero and N can be represented by a base register plus a signed 16-bit
+/// displacement, make a more precise judgement by checking (displacement % \p
+/// EncodingAlignment).
 bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
-                                            SDValue &Index,
-                                            SelectionDAG &DAG) const {
+                                            SDValue &Index, SelectionDAG &DAG,
+                                            unsigned EncodingAlignment) const {
   int16_t imm = 0;
   if (N.getOpcode() == ISD::ADD) {
-    if (isIntS16Immediate(N.getOperand(1), imm))
-      return false;    // r+i
+    // Is there any SPE load/store (f64), which can't handle 16bit offset?
+    // SPE load/store can only handle 8-bit offsets.
+    if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
+        return true;
+    if (isIntS16Immediate(N.getOperand(1), imm) &&
+        (!EncodingAlignment || !(imm % EncodingAlignment)))
+      return false; // r+i
     if (N.getOperand(1).getOpcode() == PPCISD::Lo)
       return false;    // r+i
 
@@ -2219,8 +2277,9 @@
     Index = N.getOperand(1);
     return true;
   } else if (N.getOpcode() == ISD::OR) {
-    if (isIntS16Immediate(N.getOperand(1), imm))
-      return false;    // r+i can fold it if we can.
+    if (isIntS16Immediate(N.getOperand(1), imm) &&
+        (!EncodingAlignment || !(imm % EncodingAlignment)))
+      return false; // r+i can fold it if we can.
 
     // If this is an or of disjoint bitfields, we can codegen this as an add
     // (for better address arithmetic) if the LHS and RHS of the OR are provably
@@ -2284,22 +2343,22 @@
 
 /// Returns true if the address N can be represented by a base register plus
 /// a signed 16-bit displacement [r+imm], and if it is not better
-/// represented as reg+reg.  If \p Alignment is non-zero, only accept
+/// represented as reg+reg.  If \p EncodingAlignment is non-zero, only accept
 /// displacements that are multiples of that value.
 bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
                                             SDValue &Base,
                                             SelectionDAG &DAG,
-                                            unsigned Alignment) const {
+                                            unsigned EncodingAlignment) const {
   // FIXME dl should come from parent load or store, not from address
   SDLoc dl(N);
   // If this can be more profitably realized as r+r, fail.
-  if (SelectAddressRegReg(N, Disp, Base, DAG))
+  if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
     return false;
 
   if (N.getOpcode() == ISD::ADD) {
     int16_t imm = 0;
     if (isIntS16Immediate(N.getOperand(1), imm) &&
-        (!Alignment || (imm % Alignment) == 0)) {
+        (!EncodingAlignment || (imm % EncodingAlignment) == 0)) {
       Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
         Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
@@ -2323,7 +2382,7 @@
   } else if (N.getOpcode() == ISD::OR) {
     int16_t imm = 0;
     if (isIntS16Immediate(N.getOperand(1), imm) &&
-        (!Alignment || (imm % Alignment) == 0)) {
+        (!EncodingAlignment || (imm % EncodingAlignment) == 0)) {
       // If this is an or of disjoint bitfields, we can codegen this as an add
       // (for better address arithmetic) if the LHS and RHS of the OR are
       // provably disjoint.
@@ -2349,7 +2408,8 @@
     // If this address fits entirely in a 16-bit sext immediate field, codegen
     // this as "d, 0"
     int16_t Imm;
-    if (isIntS16Immediate(CN, Imm) && (!Alignment || (Imm % Alignment) == 0)) {
+    if (isIntS16Immediate(CN, Imm) &&
+        (!EncodingAlignment || (Imm % EncodingAlignment) == 0)) {
       Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
       Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
                              CN->getValueType(0));
@@ -2359,7 +2419,7 @@
     // Handle 32-bit sext immediates with LIS + addr mode.
     if ((CN->getValueType(0) == MVT::i32 ||
          (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
-        (!Alignment || (CN->getZExtValue() % Alignment) == 0)) {
+        (!EncodingAlignment || (CN->getZExtValue() % EncodingAlignment) == 0)) {
       int Addr = (int)CN->getZExtValue();
 
       // Otherwise, break this down into an LIS + disp.
@@ -2416,24 +2476,45 @@
 
 /// Returns true if we should use a direct load into vector instruction
 /// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
-static bool usePartialVectorLoads(SDNode *N) {
-  if (!N->hasOneUse())
-    return false;
+static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
 
   // If there are any other uses other than scalar to vector, then we should
   // keep it as a scalar load -> direct move pattern to prevent multiple
-  // loads.  Currently, only check for i64 since we have lxsd/lfd to do this
-  // efficiently, but no update equivalent.
-  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
-    EVT MemVT = LD->getMemoryVT();
-    if (MemVT.isSimple() && MemVT.getSimpleVT().SimpleTy == MVT::i64) {
-      SDNode *User = *(LD->use_begin());
-      if (User->getOpcode() == ISD::SCALAR_TO_VECTOR)
-        return true;
-    }
+  // loads.
+  LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
+  if (!LD)
+    return false;
+
+  EVT MemVT = LD->getMemoryVT();
+  if (!MemVT.isSimple())
+    return false;
+  switch(MemVT.getSimpleVT().SimpleTy) {
+  case MVT::i64:
+    break;
+  case MVT::i32:
+    if (!ST.hasP8Vector())
+      return false;
+    break;
+  case MVT::i16:
+  case MVT::i8:
+    if (!ST.hasP9Vector())
+      return false;
+    break;
+  default:
+    return false;
   }
 
-  return false;
+  SDValue LoadedVal(N, 0);
+  if (!LoadedVal.hasOneUse())
+    return false;
+
+  for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
+       UI != UE; ++UI)
+    if (UI.getUse().get().getResNo() == 0 &&
+        UI->getOpcode() != ISD::SCALAR_TO_VECTOR)
+      return false;
+
+  return true;
 }
 
 /// getPreIndexedAddressParts - returns true by value, base pointer and
@@ -2464,7 +2545,7 @@
   // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
   // instructions because we can fold these into a more efficient instruction
   // instead, (such as LXSD).
-  if (isLoad && usePartialVectorLoads(N)) {
+  if (isLoad && usePartialVectorLoads(N, Subtarget)) {
     return false;
   }
 
@@ -2745,7 +2826,8 @@
   const Module *M = DAG.getMachineFunction().getFunction().getParent();
   PICLevel::Level picLevel = M->getPICLevel();
 
-  TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
+  const TargetMachine &TM = getTargetMachine();
+  TLSModel::Model Model = TM.getTLSModel(GV);
 
   if (Model == TLSModel::LocalExec) {
     SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
@@ -2769,8 +2851,14 @@
       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
       GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
                            PtrVT, GOTReg, TGA);
-    } else
-      GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
+    } else {
+      if (!TM.isPositionIndependent())
+        GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
+      else if (picLevel == PICLevel::SmallPIC)
+        GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
+      else
+        GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
+    }
     SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
                                    PtrVT, TGA, GOTPtr);
     return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
@@ -3147,101 +3235,6 @@
                       MachinePointerInfo(SV, nextOffset));
 }
 
-#include "PPCGenCallingConv.inc"
-
-// Function whose sole purpose is to kill compiler warnings
-// stemming from unused functions included from PPCGenCallingConv.inc.
-CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
-  return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
-}
-
-bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
-                                      CCValAssign::LocInfo &LocInfo,
-                                      ISD::ArgFlagsTy &ArgFlags,
-                                      CCState &State) {
-  return true;
-}
-
-bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
-                                             MVT &LocVT,
-                                             CCValAssign::LocInfo &LocInfo,
-                                             ISD::ArgFlagsTy &ArgFlags,
-                                             CCState &State) {
-  static const MCPhysReg ArgRegs[] = {
-    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
-    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
-  };
-  const unsigned NumArgRegs = array_lengthof(ArgRegs);
-
-  unsigned RegNum = State.getFirstUnallocated(ArgRegs);
-
-  // Skip one register if the first unallocated register has an even register
-  // number and there are still argument registers available which have not been
-  // allocated yet. RegNum is actually an index into ArgRegs, which means we
-  // need to skip a register if RegNum is odd.
-  if (RegNum != NumArgRegs && RegNum % 2 == 1) {
-    State.AllocateReg(ArgRegs[RegNum]);
-  }
-
-  // Always return false here, as this function only makes sure that the first
-  // unallocated register has an odd register number and does not actually
-  // allocate a register for the current argument.
-  return false;
-}
-
-bool
-llvm::CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT,
-                                                  MVT &LocVT,
-                                                  CCValAssign::LocInfo &LocInfo,
-                                                  ISD::ArgFlagsTy &ArgFlags,
-                                                  CCState &State) {
-  static const MCPhysReg ArgRegs[] = {
-    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
-    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
-  };
-  const unsigned NumArgRegs = array_lengthof(ArgRegs);
-
-  unsigned RegNum = State.getFirstUnallocated(ArgRegs);
-  int RegsLeft = NumArgRegs - RegNum;
-
-  // Skip if there is not enough registers left for long double type (4 gpr regs
-  // in soft float mode) and put long double argument on the stack.
-  if (RegNum != NumArgRegs && RegsLeft < 4) {
-    for (int i = 0; i < RegsLeft; i++) {
-      State.AllocateReg(ArgRegs[RegNum + i]);
-    }
-  }
-
-  return false;
-}
-
-bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
-                                               MVT &LocVT,
-                                               CCValAssign::LocInfo &LocInfo,
-                                               ISD::ArgFlagsTy &ArgFlags,
-                                               CCState &State) {
-  static const MCPhysReg ArgRegs[] = {
-    PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
-    PPC::F8
-  };
-
-  const unsigned NumArgRegs = array_lengthof(ArgRegs);
-
-  unsigned RegNum = State.getFirstUnallocated(ArgRegs);
-
-  // If there is only one Floating-point register left we need to put both f64
-  // values of a split ppc_fp128 value on the stack.
-  if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
-    State.AllocateReg(ArgRegs[RegNum]);
-  }
-
-  // Always return false here, as this function only makes sure that the two f64
-  // values a ppc_fp128 value is split into are both passed in registers or both
-  // passed on the stack and does not actually allocate a register for the
-  // current argument.
-  return false;
-}
-
 /// FPR - The set of FP registers that should be allocated for arguments,
 /// on Darwin.
 static const MCPhysReg FPR[] = {PPC::F1,  PPC::F2,  PPC::F3, PPC::F4, PPC::F5,
@@ -3449,7 +3442,7 @@
   // Reserve space for the linkage area on the stack.
   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
   CCInfo.AllocateStack(LinkageSize, PtrByteSize);
-  if (useSoftFloat() || hasSPE())
+  if (useSoftFloat())
     CCInfo.PreAnalyzeFormalArguments(Ins);
 
   CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
@@ -3482,7 +3475,8 @@
           if (Subtarget.hasVSX())
             RC = &PPC::VSFRCRegClass;
           else if (Subtarget.hasSPE())
-            RC = &PPC::SPERCRegClass;
+            // SPE passes doubles in GPR pairs.
+            RC = &PPC::GPRCRegClass;
           else
             RC = &PPC::F8RCRegClass;
           break;
@@ -3506,13 +3500,26 @@
           break;
       }
 
-      // Transform the arguments stored in physical registers into virtual ones.
-      unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
-      SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
-                                            ValVT == MVT::i1 ? MVT::i32 : ValVT);
-
-      if (ValVT == MVT::i1)
-        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
+      SDValue ArgValue;
+      // Transform the arguments stored in physical registers into
+      // virtual ones.
+      if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
+        assert(i + 1 < e && "No second half of double precision argument");
+        unsigned RegLo = MF.addLiveIn(VA.getLocReg(), RC);
+        unsigned RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
+        SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
+        SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
+        if (!Subtarget.isLittleEndian())
+          std::swap (ArgValueLo, ArgValueHi);
+        ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
+                               ArgValueHi);
+      } else {
+        unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+        ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
+                                      ValVT == MVT::i1 ? MVT::i32 : ValVT);
+        if (ValVT == MVT::i1)
+          ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
+      }
 
       InVals.push_back(ArgValue);
     } else {
@@ -4448,24 +4455,27 @@
 static bool
 callsShareTOCBase(const Function *Caller, SDValue Callee,
                     const TargetMachine &TM) {
-  // If !G, Callee can be an external symbol.
-  GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
-  if (!G)
-    return false;
+   // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
+   // don't have enough information to determine if the caller and calle share
+   // the same  TOC base, so we have to pessimistically assume they don't for
+   // correctness.
+   GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
+   if (!G)
+     return false;
 
+   const GlobalValue *GV = G->getGlobal();
   // The medium and large code models are expected to provide a sufficiently
   // large TOC to provide all data addressing needs of a module with a
   // single TOC. Since each module will be addressed with a single TOC then we
   // only need to check that caller and callee don't cross dso boundaries.
   if (CodeModel::Medium == TM.getCodeModel() ||
       CodeModel::Large == TM.getCodeModel())
-    return TM.shouldAssumeDSOLocal(*Caller->getParent(), G->getGlobal());
+    return TM.shouldAssumeDSOLocal(*Caller->getParent(), GV);
 
   // Otherwise we need to ensure callee and caller are in the same section,
   // since the linker may allocate multiple TOCs, and we don't know which
   // sections will belong to the same TOC base.
 
-  const GlobalValue *GV = G->getGlobal();
   if (!GV->isStrongDefinitionForLinker())
     return false;
 
@@ -4917,6 +4927,7 @@
   bool isPPC64 = Subtarget.isPPC64();
   bool isSVR4ABI = Subtarget.isSVR4ABI();
   bool isELFv2ABI = Subtarget.isELFv2ABI();
+  bool isAIXABI = Subtarget.isAIXABI();
 
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
   NodeTys.push_back(MVT::Other);   // Returns a chain
@@ -4943,17 +4954,18 @@
   bool Local = TM.shouldAssumeDSOLocal(*Mod, GV);
   bool UsePlt = !Local && Subtarget.isTargetELF() && !isPPC64;
 
+  // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
+  // every direct call is) turn it into a TargetGlobalAddress /
+  // TargetExternalSymbol node so that legalize doesn't hack it.
   if (isFunctionGlobalAddress(Callee)) {
     GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
+
     // A call to a TLS address is actually an indirect call to a
     // thread-specific pointer.
     unsigned OpFlags = 0;
     if (UsePlt)
       OpFlags = PPCII::MO_PLT;
 
-    // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
-    // every direct call is) turn it into a TargetGlobalAddress /
-    // TargetExternalSymbol node so that legalize doesn't hack it.
     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
                                         Callee.getValueType(), 0, OpFlags);
     needIndirectCall = false;
@@ -5095,17 +5107,18 @@
     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
                                   RegsToPass[i].second.getValueType()));
 
-  // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live
-  // into the call.
-  // We do need to reserve X2 to appease the verifier for the PATCHPOINT.
-  if (isSVR4ABI && isPPC64) {
+  // All calls, in the AIX ABI and 64-bit ELF ABIs, need the TOC register
+  // live into the call.
+  // We do need to reserve R2/X2 to appease the verifier for the PATCHPOINT.
+  if ((isSVR4ABI && isPPC64) || isAIXABI) {
     setUsesTOCBasePtr(DAG);
 
-    // We cannot add X2 as an operand here for PATCHPOINT, because there is no
-    // way to mark dependencies as implicit here. We will add the X2 dependency
-    // in EmitInstrWithCustomInserter.
-    if (!isPatchPoint) 
-      Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
+    // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
+    // no way to mark dependencies as implicit here.
+    // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
+    if (!isPatchPoint)
+      Ops.push_back(DAG.getRegister(isPPC64 ? PPC::X2
+                                            : PPC::R2, PtrVT));
   }
 
   return CallOpc;
@@ -5129,10 +5142,27 @@
     CCValAssign &VA = RVLocs[i];
     assert(VA.isRegLoc() && "Can only return in registers!");
 
-    SDValue Val = DAG.getCopyFromReg(Chain, dl,
-                                     VA.getLocReg(), VA.getLocVT(), InFlag);
-    Chain = Val.getValue(1);
-    InFlag = Val.getValue(2);
+    SDValue Val;
+
+    if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
+      SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
+                                      InFlag);
+      Chain = Lo.getValue(1);
+      InFlag = Lo.getValue(2);
+      VA = RVLocs[++i]; // skip ahead to next loc
+      SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
+                                      InFlag);
+      Chain = Hi.getValue(1);
+      InFlag = Hi.getValue(2);
+      if (!Subtarget.isLittleEndian())
+        std::swap (Lo, Hi);
+      Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
+    } else {
+      Val = DAG.getCopyFromReg(Chain, dl,
+                               VA.getLocReg(), VA.getLocVT(), InFlag);
+      Chain = Val.getValue(1);
+      InFlag = Val.getValue(2);
+    }
 
     switch (VA.getLocInfo()) {
     default: llvm_unreachable("Unknown loc info!");
@@ -5206,18 +5236,24 @@
   }
 
   // Add a NOP immediately after the branch instruction when using the 64-bit
-  // SVR4 ABI. At link time, if caller and callee are in a different module and
+  // SVR4 or the AIX ABI.
+  // At link time, if caller and callee are in a different module and
   // thus have a different TOC, the call will be replaced with a call to a stub
   // function which saves the current TOC, loads the TOC of the callee and
   // branches to the callee. The NOP will be replaced with a load instruction
   // which restores the TOC of the caller from the TOC save slot of the current
   // stack frame. If caller and callee belong to the same module (and have the
-  // same TOC), the NOP will remain unchanged.
+  // same TOC), the NOP will remain unchanged, or become some other NOP.
 
   MachineFunction &MF = DAG.getMachineFunction();
-  if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() &&
-      !isPatchPoint) {
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
+  if (!isTailCall && !isPatchPoint &&
+      ((Subtarget.isSVR4ABI() && Subtarget.isPPC64()) ||
+       Subtarget.isAIXABI())) {
     if (CallOpc == PPCISD::BCTRL) {
+      if (Subtarget.isAIXABI())
+        report_fatal_error("Indirect call on AIX is not implemented.");
+
       // This is a call through a function pointer.
       // Restore the caller TOC from the save area into R2.
       // See PrepareCall() for more information about calls through function
@@ -5229,7 +5265,6 @@
       // allocated and an unnecessary move instruction being generated.
       CallOpc = PPCISD::BCTRL_LOAD_TOC;
 
-      EVT PtrVT = getPointerTy(DAG.getDataLayout());
       SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
       unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
       SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
@@ -5245,6 +5280,19 @@
     }
   }
 
+  if (Subtarget.isAIXABI() && isFunctionGlobalAddress(Callee)) {
+    // On AIX, direct function calls reference the symbol for the function's
+    // entry point, which is named by inserting a "." before the function's
+    // C-linkage name.
+    GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
+    auto &Context = DAG.getMachineFunction().getMMI().getContext();
+    MCSymbol *S = Context.getOrCreateSymbol(Twine(".") +
+                                            Twine(G->getGlobal()->getName()));
+    Callee = DAG.getMCSymbol(S, PtrVT);
+    // Replace the GlobalAddressSDNode Callee with the MCSymbolSDNode.
+    Ops[1] = Callee;
+  }
+
   Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
   InFlag = Chain.getValue(1);
 
@@ -5314,16 +5362,20 @@
       !isTailCall)
     Callee = LowerGlobalAddress(Callee, DAG);
 
-  if (Subtarget.isSVR4ABI()) {
-    if (Subtarget.isPPC64())
-      return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
-                              isTailCall, isPatchPoint, Outs, OutVals, Ins,
-                              dl, DAG, InVals, CS);
-    else
-      return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
-                              isTailCall, isPatchPoint, Outs, OutVals, Ins,
-                              dl, DAG, InVals, CS);
-  }
+  if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
+    return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
+                            isTailCall, isPatchPoint, Outs, OutVals, Ins,
+                            dl, DAG, InVals, CS);
+
+  if (Subtarget.isSVR4ABI())
+    return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
+                            isTailCall, isPatchPoint, Outs, OutVals, Ins,
+                            dl, DAG, InVals, CS);
+
+  if (Subtarget.isAIXABI())
+    return LowerCall_AIX(Chain, Callee, CallConv, isVarArg,
+                         isTailCall, isPatchPoint, Outs, OutVals, Ins,
+                         dl, DAG, InVals, CS);
 
   return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
                           isTailCall, isPatchPoint, Outs, OutVals, Ins,
@@ -5444,12 +5496,15 @@
 
   bool seenFloatArg = false;
   // Walk the register/memloc assignments, inserting copies/loads.
-  for (unsigned i = 0, j = 0, e = ArgLocs.size();
+  // i - Tracks the index into the list of registers allocated for the call
+  // RealArgIdx - Tracks the index into the list of actual function arguments
+  // j - Tracks the index into the list of byval arguments
+  for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
        i != e;
-       ++i) {
+       ++i, ++RealArgIdx) {
     CCValAssign &VA = ArgLocs[i];
-    SDValue Arg = OutVals[i];
-    ISD::ArgFlagsTy Flags = Outs[i].Flags;
+    SDValue Arg = OutVals[RealArgIdx];
+    ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
 
     if (Flags.isByVal()) {
       // Argument is an aggregate which is passed by value, thus we need to
@@ -5498,7 +5553,17 @@
     if (VA.isRegLoc()) {
       seenFloatArg |= VA.getLocVT().isFloatingPoint();
       // Put argument in a physical register.
-      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+      if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
+        bool IsLE = Subtarget.isLittleEndian();
+        SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
+                        DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
+        RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
+        SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
+                           DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
+        RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
+                             SVal.getValue(0)));
+      } else
+        RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
     } else {
       // Put argument in the parameter list area of the current stack frame.
       assert(VA.isMemLoc());
@@ -6613,6 +6678,128 @@
                     NumBytes, Ins, InVals, CS);
 }
 
+
+SDValue PPCTargetLowering::LowerCall_AIX(
+    SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
+    bool isTailCall, bool isPatchPoint,
+    const SmallVectorImpl<ISD::OutputArg> &Outs,
+    const SmallVectorImpl<SDValue> &OutVals,
+    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
+    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
+    ImmutableCallSite CS) const {
+
+  assert((CallConv == CallingConv::C || CallConv == CallingConv::Fast) &&
+         "Unimplemented calling convention!");
+  if (isVarArg || isPatchPoint)
+    report_fatal_error("This call type is unimplemented on AIX.");
+
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
+  bool isPPC64 = PtrVT == MVT::i64;
+  unsigned PtrByteSize = isPPC64 ? 8 : 4;
+  unsigned NumOps = Outs.size();
+
+
+  // Count how many bytes are to be pushed on the stack, including the linkage
+  // area, parameter list area.
+  // On XCOFF, we start with 24/48, which is reserved space for
+  // [SP][CR][LR][2 x reserved][TOC].
+  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
+
+  // The prolog code of the callee may store up to 8 GPR argument registers to
+  // the stack, allowing va_start to index over them in memory if the callee
+  // is variadic.
+  // Because we cannot tell if this is needed on the caller side, we have to
+  // conservatively assume that it is needed.  As such, make sure we have at
+  // least enough stack space for the caller to store the 8 GPRs.
+  unsigned NumBytes = LinkageSize + 8 * PtrByteSize;
+
+  // Adjust the stack pointer for the new arguments...
+  // These operations are automatically eliminated by the prolog/epilog
+  // inserter pass.
+  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
+  SDValue CallSeqStart = Chain;
+
+  static const MCPhysReg GPR_32[] = {           // 32-bit registers.
+    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+    PPC::R7, PPC::R8, PPC::R9, PPC::R10
+  };
+  static const MCPhysReg GPR_64[] = {           // 64-bit registers.
+    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+    PPC::X7, PPC::X8, PPC::X9, PPC::X10
+  };
+
+  const unsigned NumGPRs = isPPC64 ? array_lengthof(GPR_64)
+                                   : array_lengthof(GPR_32);
+  const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
+  unsigned GPR_idx = 0;
+
+  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+
+  if (isTailCall)
+    report_fatal_error("Handling of tail call is unimplemented!");
+  int SPDiff = 0;
+
+  for (unsigned i = 0; i != NumOps; ++i) {
+    SDValue Arg = OutVals[i];
+    ISD::ArgFlagsTy Flags = Outs[i].Flags;
+
+    // Promote integers if needed.
+    if (Arg.getValueType() == MVT::i1 ||
+        (isPPC64 && Arg.getValueType() == MVT::i32)) {
+      unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+      Arg = DAG.getNode(ExtOp, dl, PtrVT, Arg);
+    }
+
+    // Note: "by value" is code for passing a structure by value, not
+    // basic types.
+    if (Flags.isByVal())
+      report_fatal_error("Passing structure by value is unimplemented!");
+
+    switch (Arg.getSimpleValueType().SimpleTy) {
+    default: llvm_unreachable("Unexpected ValueType for argument!");
+    case MVT::i1:
+    case MVT::i32:
+    case MVT::i64:
+      if (GPR_idx != NumGPRs)
+        RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
+      else
+        report_fatal_error("Handling of placing parameters on the stack is "
+                           "unimplemented!");
+      break;
+    case MVT::f32:
+    case MVT::f64:
+    case MVT::v4f32:
+    case MVT::v4i32:
+    case MVT::v8i16:
+    case MVT::v16i8:
+    case MVT::v2f64:
+    case MVT::v2i64:
+    case MVT::v1i128:
+    case MVT::f128:
+    case MVT::v4f64:
+    case MVT::v4i1:
+      report_fatal_error("Handling of this parameter type is unimplemented!");
+    }
+  }
+
+  if (!isFunctionGlobalAddress(Callee) &&
+      !isa<ExternalSymbolSDNode>(Callee))
+    report_fatal_error("Handling of indirect call is unimplemented!");
+
+  // Build a sequence of copy-to-reg nodes chained together with token chain
+  // and flag operands which copy the outgoing args into the appropriate regs.
+  SDValue InFlag;
+  for (auto Reg : RegsToPass) {
+    Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
+                    /* unused except on PPC64 ELFv1 */ false, DAG,
+                    RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
+                    NumBytes, Ins, InVals, CS);
+}
+
 bool
 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
                                   MachineFunction &MF, bool isVarArg,
@@ -6644,11 +6831,11 @@
   SmallVector<SDValue, 4> RetOps(1, Chain);
 
   // Copy the result values into the output registers.
-  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+  for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
     CCValAssign &VA = RVLocs[i];
     assert(VA.isRegLoc() && "Can only return in registers!");
 
-    SDValue Arg = OutVals[i];
+    SDValue Arg = OutVals[RealResIdx];
 
     switch (VA.getLocInfo()) {
     default: llvm_unreachable("Unknown loc info!");
@@ -6663,8 +6850,21 @@
       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
       break;
     }
-
-    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
+    if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
+      bool isLittleEndian = Subtarget.isLittleEndian();
+      // Legalize ret f64 -> ret 2 x i32.
+      SDValue SVal =
+          DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
+                      DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
+      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
+      RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+      SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
+                         DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
+      Flag = Chain.getValue(1);
+      VA = RVLocs[++i]; // skip ahead to next loc
+      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
+    } else
+      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
     Flag = Chain.getValue(1);
     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
   }
@@ -6890,6 +7090,61 @@
                      Op.getOperand(0));
 }
 
+SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
+                                               SelectionDAG &DAG) const {
+
+  // Implements a vector truncate that fits in a vector register as a shuffle.
+  // We want to legalize vector truncates down to where the source fits in
+  // a vector register (and target is therefore smaller than vector register
+  // size).  At that point legalization will try to custom lower the sub-legal
+  // result and get here - where we can contain the truncate as a single target
+  // operation.
+
+  // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
+  //   <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
+  //
+  // We will implement it for big-endian ordering as this (where x denotes
+  // undefined):
+  //   < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
+  //   < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
+  //
+  // The same operation in little-endian ordering will be:
+  //   <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
+  //   <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
+
+  assert(Op.getValueType().isVector() && "Vector type expected.");
+
+  SDLoc DL(Op);
+  SDValue N1 = Op.getOperand(0);
+  unsigned SrcSize = N1.getValueType().getSizeInBits();
+  assert(SrcSize <= 128 && "Source must fit in an Altivec/VSX vector");
+  SDValue WideSrc = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
+
+  EVT TrgVT = Op.getValueType();
+  unsigned TrgNumElts = TrgVT.getVectorNumElements();
+  EVT EltVT = TrgVT.getVectorElementType();
+  unsigned WideNumElts = 128 / EltVT.getSizeInBits();
+  EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
+
+  // First list the elements we want to keep.
+  unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
+  SmallVector<int, 16> ShuffV;
+  if (Subtarget.isLittleEndian())
+    for (unsigned i = 0; i < TrgNumElts; ++i)
+      ShuffV.push_back(i * SizeMult);
+  else
+    for (unsigned i = 1; i <= TrgNumElts; ++i)
+      ShuffV.push_back(i * SizeMult - 1);
+
+  // Populate the remaining elements with undefs.
+  for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
+    // ShuffV.push_back(i + WideNumElts);
+    ShuffV.push_back(WideNumElts + 1);
+
+  SDValue Conv = DAG.getNode(ISD::BITCAST, DL, WideVT, WideSrc);
+  return DAG.getVectorShuffle(WideVT, DL, Conv, DAG.getUNDEF(WideVT), ShuffV);
+}
+
 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
 /// possible.
 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
@@ -9604,10 +9859,63 @@
     BifID = Intrinsic::ppc_altivec_vmaxsh;
   else if (VT == MVT::v16i8)
     BifID = Intrinsic::ppc_altivec_vmaxsb;
-  
+
   return BuildIntrinsicOp(BifID, X, Y, DAG, dl, VT);
 }
 
+// Custom lowering for fpext vf32 to v2f64
+SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
+
+  assert(Op.getOpcode() == ISD::FP_EXTEND &&
+         "Should only be called for ISD::FP_EXTEND");
+
+  // We only want to custom lower an extend from v2f32 to v2f64.
+  if (Op.getValueType() != MVT::v2f64 ||
+      Op.getOperand(0).getValueType() != MVT::v2f32)
+    return SDValue();
+
+  SDLoc dl(Op);
+  SDValue Op0 = Op.getOperand(0);
+
+  switch (Op0.getOpcode()) {
+  default:
+    return SDValue();
+  case ISD::FADD:
+  case ISD::FMUL:
+  case ISD::FSUB: {
+    SDValue NewLoad[2];
+    for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
+      // Ensure both input are loads.
+      SDValue LdOp = Op0.getOperand(i);
+      if (LdOp.getOpcode() != ISD::LOAD)
+        return SDValue();
+      // Generate new load node.
+      LoadSDNode *LD = cast<LoadSDNode>(LdOp);
+      SDValue LoadOps[] = { LD->getChain(), LD->getBasePtr() };
+      NewLoad[i] =
+        DAG.getMemIntrinsicNode(PPCISD::LD_VSX_LH, dl,
+                                DAG.getVTList(MVT::v4f32, MVT::Other),
+                                LoadOps, LD->getMemoryVT(),
+                                LD->getMemOperand());
+    }
+    SDValue NewOp = DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32,
+                              NewLoad[0], NewLoad[1],
+                              Op0.getNode()->getFlags());
+    return DAG.getNode(PPCISD::FP_EXTEND_LH, dl, MVT::v2f64, NewOp);
+  }
+  case ISD::LOAD: {
+    LoadSDNode *LD = cast<LoadSDNode>(Op0);
+    SDValue LoadOps[] = { LD->getChain(), LD->getBasePtr() };
+    SDValue NewLd =
+      DAG.getMemIntrinsicNode(PPCISD::LD_VSX_LH, dl,
+                              DAG.getVTList(MVT::v4f32, MVT::Other),
+                              LoadOps, LD->getMemoryVT(), LD->getMemOperand());
+    return DAG.getNode(PPCISD::FP_EXTEND_LH, dl, MVT::v2f64, NewLd);
+  }
+  }
+  llvm_unreachable("ERROR:Should return for all cases within swtich.");
+}
+
 /// LowerOperation - Provide custom lowering hooks for some operations.
 ///
 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -9661,6 +9969,7 @@
   case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
   case ISD::MUL:                return LowerMUL(Op, DAG);
   case ISD::ABS:                return LowerABS(Op, DAG);
+  case ISD::FP_EXTEND:          return LowerFP_EXTEND(Op, DAG);
 
   // For counter-based loop handling.
   case ISD::INTRINSIC_W_CHAIN:  return SDValue();
@@ -9701,7 +10010,7 @@
   }
   case ISD::INTRINSIC_W_CHAIN: {
     if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
-        Intrinsic::ppc_is_decremented_ctr_nonzero)
+        Intrinsic::loop_decrement)
       break;
 
     assert(N->getValueType(0) == MVT::i1 &&
@@ -9737,6 +10046,14 @@
       return;
     Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
     return;
+  case ISD::TRUNCATE: {
+    EVT TrgVT = N->getValueType(0);
+    if (TrgVT.isVector() &&
+        isOperationCustom(N->getOpcode(), TrgVT) &&
+        N->getOperand(0).getValueType().getSizeInBits() <= 128)
+      Results.push_back(LowerTRUNCATEVector(SDValue(N, 0), DAG));
+    return;
+  }
   case ISD::BITCAST:
     // Don't handle bitcast here.
     return;
@@ -9822,10 +10139,10 @@
   MachineFunction *F = BB->getParent();
   MachineFunction::iterator It = ++BB->getIterator();
 
-  unsigned dest = MI.getOperand(0).getReg();
-  unsigned ptrA = MI.getOperand(1).getReg();
-  unsigned ptrB = MI.getOperand(2).getReg();
-  unsigned incr = MI.getOperand(3).getReg();
+  Register dest = MI.getOperand(0).getReg();
+  Register ptrA = MI.getOperand(1).getReg();
+  Register ptrB = MI.getOperand(2).getReg();
+  Register incr = MI.getOperand(3).getReg();
   DebugLoc dl = MI.getDebugLoc();
 
   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
@@ -9841,7 +10158,7 @@
   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
 
   MachineRegisterInfo &RegInfo = F->getRegInfo();
-  unsigned TmpReg = (!BinOpcode) ? incr :
+  Register TmpReg = (!BinOpcode) ? incr :
     RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
                                            : &PPC::GPRCRegClass);
 
@@ -9949,20 +10266,20 @@
       is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
   const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
 
-  unsigned PtrReg = RegInfo.createVirtualRegister(RC);
-  unsigned Shift1Reg = RegInfo.createVirtualRegister(GPRC);
-  unsigned ShiftReg =
+  Register PtrReg = RegInfo.createVirtualRegister(RC);
+  Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
+  Register ShiftReg =
       isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
-  unsigned Incr2Reg = RegInfo.createVirtualRegister(GPRC);
-  unsigned MaskReg = RegInfo.createVirtualRegister(GPRC);
-  unsigned Mask2Reg = RegInfo.createVirtualRegister(GPRC);
-  unsigned Mask3Reg = RegInfo.createVirtualRegister(GPRC);
-  unsigned Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
-  unsigned Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
-  unsigned Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
-  unsigned TmpDestReg = RegInfo.createVirtualRegister(GPRC);
-  unsigned Ptr1Reg;
-  unsigned TmpReg =
+  Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
+  Register MaskReg = RegInfo.createVirtualRegister(GPRC);
+  Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
+  Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
+  Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
+  Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
+  Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
+  Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
+  Register Ptr1Reg;
+  Register TmpReg =
       (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
 
   //  thisMBB:
@@ -10764,23 +11081,23 @@
         is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
     const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
 
-    unsigned PtrReg = RegInfo.createVirtualRegister(RC);
-    unsigned Shift1Reg = RegInfo.createVirtualRegister(GPRC);
-    unsigned ShiftReg =
+    Register PtrReg = RegInfo.createVirtualRegister(RC);
+    Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
+    Register ShiftReg =
         isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
-    unsigned NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
-    unsigned NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
-    unsigned OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
-    unsigned OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
-    unsigned MaskReg = RegInfo.createVirtualRegister(GPRC);
-    unsigned Mask2Reg = RegInfo.createVirtualRegister(GPRC);
-    unsigned Mask3Reg = RegInfo.createVirtualRegister(GPRC);
-    unsigned Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
-    unsigned Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
-    unsigned TmpDestReg = RegInfo.createVirtualRegister(GPRC);
-    unsigned Ptr1Reg;
-    unsigned TmpReg = RegInfo.createVirtualRegister(GPRC);
-    unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
+    Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
+    Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
+    Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
+    Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
+    Register MaskReg = RegInfo.createVirtualRegister(GPRC);
+    Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
+    Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
+    Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
+    Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
+    Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
+    Register Ptr1Reg;
+    Register TmpReg = RegInfo.createVirtualRegister(GPRC);
+    Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
     //  thisMBB:
     //   ...
     //   fallthrough --> loopMBB
@@ -10968,7 +11285,147 @@
     MachineRegisterInfo &RegInfo = F->getRegInfo();
     unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
     BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
-    return BB;
+    BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
+            MI.getOperand(0).getReg())
+        .addReg(CRReg);
+  } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
+    DebugLoc Dl = MI.getDebugLoc();
+    unsigned Imm = MI.getOperand(1).getImm();
+    BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
+    BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
+            MI.getOperand(0).getReg())
+        .addReg(PPC::CR0EQ);
+  } else if (MI.getOpcode() == PPC::SETRNDi) {
+    DebugLoc dl = MI.getDebugLoc();
+    unsigned OldFPSCRReg = MI.getOperand(0).getReg();
+
+    // Save FPSCR value.
+    BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
+
+    // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
+    // the following settings:
+    //   00 Round to nearest
+    //   01 Round to 0
+    //   10 Round to +inf
+    //   11 Round to -inf
+
+    // When the operand is immediate, using the two least significant bits of
+    // the immediate to set the bits 62:63 of FPSCR.
+    unsigned Mode = MI.getOperand(1).getImm();
+    BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
+      .addImm(31);
+
+    BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
+      .addImm(30);
+  } else if (MI.getOpcode() == PPC::SETRND) {
+    DebugLoc dl = MI.getDebugLoc();
+
+    // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
+    // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
+    // If the target doesn't have DirectMove, we should use stack to do the
+    // conversion, because the target doesn't have the instructions like mtvsrd
+    // or mfvsrd to do this conversion directly.
+    auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
+      if (Subtarget.hasDirectMove()) {
+        BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
+          .addReg(SrcReg);
+      } else {
+        // Use stack to do the register copy.
+        unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
+        MachineRegisterInfo &RegInfo = F->getRegInfo();
+        const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
+        if (RC == &PPC::F8RCRegClass) {
+          // Copy register from F8RCRegClass to G8RCRegclass.
+          assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
+                 "Unsupported RegClass.");
+
+          StoreOp = PPC::STFD;
+          LoadOp = PPC::LD;
+        } else {
+          // Copy register from G8RCRegClass to F8RCRegclass.
+          assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
+                 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
+                 "Unsupported RegClass.");
+        }
+
+        MachineFrameInfo &MFI = F->getFrameInfo();
+        int FrameIdx = MFI.CreateStackObject(8, 8, false);
+
+        MachineMemOperand *MMOStore = F->getMachineMemOperand(
+          MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
+          MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
+          MFI.getObjectAlignment(FrameIdx));
+
+        // Store the SrcReg into the stack.
+        BuildMI(*BB, MI, dl, TII->get(StoreOp))
+          .addReg(SrcReg)
+          .addImm(0)
+          .addFrameIndex(FrameIdx)
+          .addMemOperand(MMOStore);
+
+        MachineMemOperand *MMOLoad = F->getMachineMemOperand(
+          MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
+          MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
+          MFI.getObjectAlignment(FrameIdx));
+
+        // Load from the stack where SrcReg is stored, and save to DestReg,
+        // so we have done the RegClass conversion from RegClass::SrcReg to
+        // RegClass::DestReg.
+        BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
+          .addImm(0)
+          .addFrameIndex(FrameIdx)
+          .addMemOperand(MMOLoad);
+      }
+    };
+
+    unsigned OldFPSCRReg = MI.getOperand(0).getReg();
+
+    // Save FPSCR value.
+    BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
+
+    // When the operand is gprc register, use two least significant bits of the
+    // register and mtfsf instruction to set the bits 62:63 of FPSCR.
+    //
+    // copy OldFPSCRTmpReg, OldFPSCRReg
+    // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
+    // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
+    // copy NewFPSCRReg, NewFPSCRTmpReg
+    // mtfsf 255, NewFPSCRReg
+    MachineOperand SrcOp = MI.getOperand(1);
+    MachineRegisterInfo &RegInfo = F->getRegInfo();
+    unsigned OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
+
+    copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
+
+    unsigned ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
+    unsigned ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
+
+    // The first operand of INSERT_SUBREG should be a register which has
+    // subregisters, we only care about its RegClass, so we should use an
+    // IMPLICIT_DEF register.
+    BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
+    BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
+      .addReg(ImDefReg)
+      .add(SrcOp)
+      .addImm(1);
+
+    unsigned NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
+    BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
+      .addReg(OldFPSCRTmpReg)
+      .addReg(ExtSrcReg)
+      .addImm(0)
+      .addImm(62);
+
+    unsigned NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
+    copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
+
+    // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
+    // bits of FPSCR.
+    BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
+      .addImm(255)
+      .addReg(NewFPSCRReg)
+      .addImm(0)
+      .addImm(0);
   } else {
     llvm_unreachable("Unexpected instr type to insert");
   }
@@ -11006,7 +11463,9 @@
     if (RefinementSteps == ReciprocalEstimate::Unspecified)
       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
 
-    UseOneConstNR = true;
+    // The Newton-Raphson computation with a single constant does not provide
+    // enough accuracy on some CPUs.
+    UseOneConstNR = !Subtarget.needsTwoConstNR();
     return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
   }
   return SDValue();
@@ -12062,9 +12521,14 @@
          "Should be called with a BUILD_VECTOR node");
 
   SDLoc dl(N);
+
+  // Return early for non byte-sized type, as they can't be consecutive.
+  if (!N->getValueType(0).getVectorElementType().isByteSized())
+    return SDValue();
+
   bool InputsAreConsecutiveLoads = true;
   bool InputsAreReverseConsecutive = true;
-  unsigned ElemSize = N->getValueType(0).getScalarSizeInBits() / 8;
+  unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
   SDValue FirstInput = N->getOperand(0);
   bool IsRoundOfExtLoad = false;
 
@@ -12332,9 +12796,8 @@
   ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
   if (!Ext1Op || !Ext2Op)
     return SDValue();
-  if (Ext1.getValueType() != MVT::i32 ||
-      Ext2.getValueType() != MVT::i32)
-  if (Ext1.getOperand(0) != Ext2.getOperand(0))
+  if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
+      Ext1.getOperand(0) != Ext2.getOperand(0))
     return SDValue();
 
   int FirstElem = Ext1Op->getZExtValue();
@@ -12664,6 +13127,8 @@
     return combineSRA(N, DCI);
   case ISD::SRL:
     return combineSRL(N, DCI);
+  case ISD::MUL:
+    return combineMUL(N, DCI);
   case PPCISD::SHL:
     if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
         return N->getOperand(0);
@@ -13246,7 +13711,7 @@
 
     if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
         cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
-          Intrinsic::ppc_is_decremented_ctr_nonzero) {
+          Intrinsic::loop_decrement) {
 
       // We now need to make the intrinsic dead (it cannot be instruction
       // selected).
@@ -13272,14 +13737,14 @@
     if (LHS.getOpcode() == ISD::AND &&
         LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
         cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
-          Intrinsic::ppc_is_decremented_ctr_nonzero &&
+          Intrinsic::loop_decrement &&
         isa<ConstantSDNode>(LHS.getOperand(1)) &&
         !isNullConstant(LHS.getOperand(1)))
       LHS = LHS.getOperand(0);
 
     if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
         cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
-          Intrinsic::ppc_is_decremented_ctr_nonzero &&
+          Intrinsic::loop_decrement &&
         isa<ConstantSDNode>(RHS)) {
       assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
              "Counter decrement comparison is not EQ or NE");
@@ -13355,9 +13820,9 @@
   }
   case ISD::BUILD_VECTOR:
     return DAGCombineBuildVector(N, DCI);
-  case ISD::ABS: 
+  case ISD::ABS:
     return combineABS(N, DCI);
-  case ISD::VSELECT: 
+  case ISD::VSELECT:
     return combineVSelect(N, DCI);
   }
 
@@ -13453,6 +13918,15 @@
     if (!ML)
       break;
 
+    if (!DisableInnermostLoopAlign32) {
+      // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
+      // so that we can decrease cache misses and branch-prediction misses.
+      // Actual alignment of the loop will depend on the hotness check and other
+      // logic in alignBlocks.
+      if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
+        return 5;
+    }
+
     const PPCInstrInfo *TII = Subtarget.getInstrInfo();
 
     // For small loops (between 5 and 8 instructions), align to a 32-byte
@@ -13502,7 +13976,7 @@
     return C_RegisterClass;
   } else if (Constraint == "wa" || Constraint == "wd" ||
              Constraint == "wf" || Constraint == "ws" ||
-             Constraint == "wi") {
+             Constraint == "wi" || Constraint == "ww") {
     return C_RegisterClass; // VSX registers.
   }
   return TargetLowering::getConstraintType(Constraint);
@@ -13530,10 +14004,12 @@
             StringRef(constraint) == "wf") &&
            type->isVectorTy())
     return CW_Register;
-  else if (StringRef(constraint) == "ws" && type->isDoubleTy())
-    return CW_Register;
   else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
     return CW_Register; // just hold 64-bit integers data.
+  else if (StringRef(constraint) == "ws" && type->isDoubleTy())
+    return CW_Register;
+  else if (StringRef(constraint) == "ww" && type->isFloatTy())
+    return CW_Register;
 
   switch (*constraint) {
   default:
@@ -13619,7 +14095,7 @@
              Constraint == "wf" || Constraint == "wi") &&
              Subtarget.hasVSX()) {
     return std::make_pair(0U, &PPC::VSRCRegClass);
-  } else if (Constraint == "ws" && Subtarget.hasVSX()) {
+  } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
     if (VT == MVT::f32 && Subtarget.hasP8Vector())
       return std::make_pair(0U, &PPC::VSSRCRegClass);
     else
@@ -13865,7 +14341,7 @@
   if (CModel == CodeModel::Small || CModel == CodeModel::Large)
     return true;
 
-  // JumpTable and BlockAddress are accessed as got-indirect. 
+  // JumpTable and BlockAddress are accessed as got-indirect.
   if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
     return true;
 
@@ -14082,18 +14558,16 @@
 /// source is constant so it does not need to be loaded.
 /// It returns EVT::Other if the type should be determined using generic
 /// target-independent logic.
-EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
-                                           unsigned DstAlign, unsigned SrcAlign,
-                                           bool IsMemset, bool ZeroMemset,
-                                           bool MemcpyStrSrc,
-                                           MachineFunction &MF) const {
+EVT PPCTargetLowering::getOptimalMemOpType(
+    uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
+    bool ZeroMemset, bool MemcpyStrSrc,
+    const AttributeList &FuncAttributes) const {
   if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
-    const Function &F = MF.getFunction();
     // When expanding a memset, require at least two QPX instructions to cover
     // the cost of loading the value to be stored from the constant pool.
     if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
        (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
-        !F.hasFnAttribute(Attribute::NoImplicitFloat)) {
+        !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
       return MVT::v4f64;
     }
 
@@ -14178,6 +14652,7 @@
 bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
                                                        unsigned,
                                                        unsigned,
+                                                       MachineMemOperand::Flags,
                                                        bool *Fast) const {
   if (DisablePPCUnaligned)
     return false;
@@ -14324,7 +14799,7 @@
     BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
       .addReg(*I);
 
-    // Insert the copy-back instructions right before the terminator
+    // Insert the copy-back instructions right before the terminator.
     for (auto *Exit : Exits)
       BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
               TII->get(TargetOpcode::COPY), *I)
@@ -14345,7 +14820,8 @@
     return TargetLowering::insertSSPDeclarations(M);
 }
 
-bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
+                                     bool ForCodeSize) const {
   if (!VT.isSimple() || !Subtarget.hasVSX())
     return false;
 
@@ -14585,6 +15061,89 @@
   return SDValue();
 }
 
+SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+
+  ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
+  if (!ConstOpOrElement)
+    return SDValue();
+
+  // An imul is usually smaller than the alternative sequence for legal type.
+  if (DAG.getMachineFunction().getFunction().hasMinSize() &&
+      isOperationLegal(ISD::MUL, N->getValueType(0)))
+    return SDValue();
+
+  auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
+    switch (this->Subtarget.getDarwinDirective()) {
+    default:
+      // TODO: enhance the condition for subtarget before pwr8
+      return false;
+    case PPC::DIR_PWR8:
+      //  type        mul     add    shl
+      // scalar        4       1      1
+      // vector        7       2      2
+      return true;
+    case PPC::DIR_PWR9:
+      //  type        mul     add    shl
+      // scalar        5       2      2
+      // vector        7       2      2
+
+      // The cycle RATIO of related operations are showed as a table above.
+      // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
+      // scalar and vector type. For 2 instrs patterns, add/sub + shl
+      // are 4, it is always profitable; but for 3 instrs patterns
+      // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
+      // So we should only do it for vector type.
+      return IsAddOne && IsNeg ? VT.isVector() : true;
+    }
+  };
+
+  EVT VT = N->getValueType(0);
+  SDLoc DL(N);
+
+  const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
+  bool IsNeg = MulAmt.isNegative();
+  APInt MulAmtAbs = MulAmt.abs();
+
+  if ((MulAmtAbs - 1).isPowerOf2()) {
+    // (mul x, 2^N + 1) => (add (shl x, N), x)
+    // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
+
+    if (!IsProfitable(IsNeg, true, VT))
+      return SDValue();
+
+    SDValue Op0 = N->getOperand(0);
+    SDValue Op1 =
+        DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+                    DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
+    SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
+
+    if (!IsNeg)
+      return Res;
+
+    return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
+  } else if ((MulAmtAbs + 1).isPowerOf2()) {
+    // (mul x, 2^N - 1) => (sub (shl x, N), x)
+    // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
+
+    if (!IsProfitable(IsNeg, false, VT))
+      return SDValue();
+
+    SDValue Op0 = N->getOperand(0);
+    SDValue Op1 =
+        DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+                    DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
+
+    if (!IsNeg)
+      return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
+    else
+      return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
+
+  } else {
+    return SDValue();
+  }
+}
+
 bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
   // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
   if (!Subtarget.isSVR4ABI() || !Subtarget.isPPC64())

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h b/src/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 30acd60..97422c6 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h

@@ -1,9 +1,8 @@
 //===-- PPCISelLowering.h - PPC32 DAG Lowering Interface --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -15,7 +14,6 @@
 #ifndef LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H
 #define LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H
 
-#include "PPC.h"
 #include "PPCInstrInfo.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -41,7 +39,7 @@
     // the enum. The order of elements in this enum matters!
     // Values that are added after this entry:
     //     STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE
-    // are considerd memory opcodes and are treated differently than entries
+    // are considered memory opcodes and are treated differently than entries
     // that come before it. For example, ADD or MUL should be placed before
     // the ISD::FIRST_TARGET_MEMORY_OPCODE while a LOAD or STORE should come
     // after it.
@@ -161,7 +159,7 @@
 
       /// CALL - A direct function call.
       /// CALL_NOP is a call with the special NOP which follows 64-bit
-      /// SVR4 calls.
+      /// SVR4 calls and 32-bit/64-bit AIX calls.
       CALL, CALL_NOP,
 
       /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
@@ -193,9 +191,18 @@
       /// Direct move from a GPR to a VSX register (zero)
       MTVSRZ,
 
-      /// Direct move of 2 consective GPR to a VSX register.
+      /// Direct move of 2 consecutive GPR to a VSX register.
       BUILD_FP128,
 
+      /// BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and
+      /// EXTRACT_ELEMENT but take f64 arguments instead of i64, as i64 is
+      /// unsupported for this target.
+      /// Merge 2 GPRs to a single SPE register.
+      BUILD_SPE64,
+
+      /// Extract SPE register component, second argument is high or low.
+      EXTRACT_SPE,
+
       /// Extract a subvector from signed integer vector and convert to FP.
       /// It is primarily used to convert a (widened) illegal integer vector
       /// type to a legal floating point vector type.
@@ -265,11 +272,11 @@
       CR6UNSET,
 
       /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by initial-exec TLS
-      /// on PPC32.
+      /// for non-position independent code on PPC32.
       PPC32_GOT,
 
       /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and
-      /// local dynamic TLS on PPC32.
+      /// local dynamic TLS and position indendepent code on PPC32.
       PPC32_PICGOT,
 
       /// G8RC = ADDIS_GOT_TPREL_HA %x2, Symbol - Used by the initial-exec
@@ -405,6 +412,9 @@
       /// representation.
       QBFLT,
 
+      /// Custom extend v4f32 to v2f64.
+      FP_EXTEND_LH,
+
       /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
       /// byte-swapping store instruction.  It byte-swaps the low "Type" bits of
       /// the GPRC input, then stores it through Ptr.  Type can be either i16 or
@@ -446,6 +456,10 @@
       /// an xxswapd.
       LXVD2X,
 
+      /// VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a
+      /// v2f32 value into the lower half of a VSR register.
+      LD_VSX_LH,
+
       /// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
       /// Maps directly to an stxvd2x instruction that will be preceded by
       /// an xxswapd.
@@ -620,6 +634,8 @@
       return true;
     }
 
+    bool preferIncOfAddToSubOfNot(EVT VT) const override;
+
     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
       return VT.isScalarInteger();
     }
@@ -653,18 +669,27 @@
                                    ISD::MemIndexedMode &AM,
                                    SelectionDAG &DAG) const override;
 
+    /// SelectAddressEVXRegReg - Given the specified addressed, check to see if
+    /// it can be more efficiently represented as [r+imm].
+    bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index,
+                                SelectionDAG &DAG) const;
+
     /// SelectAddressRegReg - Given the specified addressed, check to see if it
-    /// can be represented as an indexed [r+r] operation.  Returns false if it
-    /// can be more efficiently represented with [r+imm].
+    /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment
+    /// is non-zero, only accept displacement which is not suitable for [r+imm].
+    /// Returns false if it can be represented by [r+imm], which are preferred.
     bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index,
-                             SelectionDAG &DAG) const;
+                             SelectionDAG &DAG,
+                             unsigned EncodingAlignment = 0) const;
 
     /// SelectAddressRegImm - Returns true if the address N can be represented
     /// by a base register plus a signed 16-bit displacement [r+imm], and if it
-    /// is not better represented as reg+reg.  If Aligned is true, only accept
-    /// displacements suitable for STD and friends, i.e. multiples of 4.
+    /// is not better represented as reg+reg. If \p EncodingAlignment is
+    /// non-zero, only accept displacements suitable for instruction encoding
+    /// requirement, i.e. multiples of 4 for DS form.
     bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base,
-                             SelectionDAG &DAG, unsigned Alignment) const;
+                             SelectionDAG &DAG,
+                             unsigned EncodingAlignment) const;
 
     /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
     /// represented as an indexed [r+r] operation.
@@ -833,14 +858,14 @@
     EVT
     getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
                         bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
-                        MachineFunction &MF) const override;
+                        const AttributeList &FuncAttributes) const override;
 
     /// Is unaligned memory access allowed for the given type, and is it fast
     /// relative to software emulation.
-    bool allowsMisalignedMemoryAccesses(EVT VT,
-                                        unsigned AddrSpace,
-                                        unsigned Align = 1,
-                                        bool *Fast = nullptr) const override;
+    bool allowsMisalignedMemoryAccesses(
+        EVT VT, unsigned AddrSpace, unsigned Align = 1,
+        MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
+        bool *Fast = nullptr) const override;
 
     /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
     /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
@@ -888,7 +913,8 @@
     bool useLoadStackGuardNode() const override;
     void insertSSPDeclarations(Module &M) const override;
 
-    bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
+    bool isFPImmLegal(const APFloat &Imm, EVT VT,
+                      bool ForCodeSize) const override;
 
     unsigned getJumpTableEncoding() const override;
     bool isJumpTableRelative() const override;
@@ -898,14 +924,6 @@
                                                unsigned JTI,
                                                MCContext &Ctx) const override;
 
-    unsigned getNumRegistersForCallingConv(LLVMContext &Context,
-                                           CallingConv:: ID CC,
-                                           EVT VT) const override;
-
-    MVT getRegisterTypeForCallingConv(LLVMContext &Context,
-                                      CallingConv:: ID CC,
-                                      EVT VT) const override;
-
   private:
     struct ReuseLoadInfo {
       SDValue Ptr;
@@ -953,6 +971,8 @@
     SDValue LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
                                  const SDLoc &dl) const;
 
+    SDValue LowerTRUNCATEVector(SDValue Op, SelectionDAG &DAG) const;
+
     SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
     SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
 
@@ -1019,6 +1039,7 @@
     SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
 
     SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;
@@ -1106,6 +1127,15 @@
                              const SDLoc &dl, SelectionDAG &DAG,
                              SmallVectorImpl<SDValue> &InVals,
                              ImmutableCallSite CS) const;
+    SDValue LowerCall_AIX(SDValue Chain, SDValue Callee,
+                          CallingConv::ID CallConv, bool isVarArg,
+                          bool isTailCall, bool isPatchPoint,
+                          const SmallVectorImpl<ISD::OutputArg> &Outs,
+                          const SmallVectorImpl<SDValue> &OutVals,
+                          const SmallVectorImpl<ISD::InputArg> &Ins,
+                          const SDLoc &dl, SelectionDAG &DAG,
+                          SmallVectorImpl<SDValue> &InVals,
+                          ImmutableCallSite CS) const;
 
     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
@@ -1119,6 +1149,7 @@
     SDValue combineSHL(SDNode *N, DAGCombinerInfo &DCI) const;
     SDValue combineSRA(SDNode *N, DAGCombinerInfo &DCI) const;
     SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const;
+    SDValue combineMUL(SDNode *N, DAGCombinerInfo &DCI) const;
     SDValue combineADD(SDNode *N, DAGCombinerInfo &DCI) const;
     SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const;
     SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const;
@@ -1137,8 +1168,6 @@
                              int &RefinementSteps) const override;
     unsigned combineRepeatedFPDivisors() const override;
 
-    CCAssignFn *useFastISelCCs(unsigned Flag) const;
-
     SDValue
     combineElementTruncationToVectorTruncation(SDNode *N,
                                                DAGCombinerInfo &DCI) const;
@@ -1169,30 +1198,6 @@
 
   } // end namespace PPC
 
-  bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
-                                  CCValAssign::LocInfo &LocInfo,
-                                  ISD::ArgFlagsTy &ArgFlags,
-                                  CCState &State);
-
-  bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
-                                         MVT &LocVT,
-                                         CCValAssign::LocInfo &LocInfo,
-                                         ISD::ArgFlagsTy &ArgFlags,
-                                         CCState &State);
-
-  bool
-  CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT,
-                                                 MVT &LocVT,
-                                                 CCValAssign::LocInfo &LocInfo,
-                                                 ISD::ArgFlagsTy &ArgFlags,
-                                                 CCState &State);
-
-  bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
-                                           MVT &LocVT,
-                                           CCValAssign::LocInfo &LocInfo,
-                                           ISD::ArgFlagsTy &ArgFlags,
-                                           CCState &State);
-
   bool isIntS16Immediate(SDNode *N, int16_t &Imm);
   bool isIntS16Immediate(SDValue Op, int16_t &Imm);
 

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 2ce6ad3..d598567 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstr64Bit.td

@@ -1,9 +1,8 @@
 //===-- PPCInstr64Bit.td - The PowerPC 64-bit Support ------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -168,7 +167,7 @@
     XLForm_2_ext_and_DSForm_1<19, 528, 20, 0, 1, 58, 0, (outs),
                               (ins memrix:$src),
                               "bctrl\n\tld 2, $src", IIC_BrB,
-                              [(PPCbctrl_load_toc ixaddr:$src)]>,
+                              [(PPCbctrl_load_toc iaddrX4:$src)]>,
     Requires<[In64BitMode]>;
 }
 
@@ -193,6 +192,12 @@
 def : Pat<(PPCcall_nop (i64 texternalsym:$dst)),
           (BL8_NOP texternalsym:$dst)>;
 
+// Calls for AIX
+def : Pat<(PPCcall (i64 mcsym:$dst)),
+          (BL8 mcsym:$dst)>;
+def : Pat<(PPCcall_nop (i64 mcsym:$dst)),
+          (BL8_NOP mcsym:$dst)>;
+
 // Atomic operations
 // FIXME: some of these might be used with constant operands. This will result
 // in constant materialization instructions that may be redundant. We currently
@@ -383,7 +388,7 @@
              PPC970_DGroup_First, PPC970_Unit_FXU;
 }
 let hasSideEffects = 1, Defs = [CTR8] in {
-let Pattern = [(int_ppc_mtctr i64:$rS)] in
+let Pattern = [(int_set_loop_iterations i64:$rS)] in
 def MTCTR8loop : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS),
                                "mtctr $rS", IIC_SprMTSPR>,
                  PPC970_DGroup_First, PPC970_Unit_FXU;
@@ -720,10 +725,17 @@
                          "sradi", "$rA, $rS, $SH", IIC_IntRotateDI,
                          [(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64;
 
-defm EXTSWSLI : XSForm_1r<31, 445, (outs g8rc:$rA), (ins gprc:$rS, u6imm:$SH),
-                          "extswsli", "$rA, $rS, $SH", IIC_IntRotateDI,
-                          [(set i64:$rA, (PPCextswsli i32:$rS, (i32 imm:$SH)))]>,
-                          isPPC64, Requires<[IsISA3_0]>;
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+defm EXTSWSLI_32_64 : XSForm_1r<31, 445, (outs g8rc:$rA),
+                                (ins gprc:$rS, u6imm:$SH),
+                                "extswsli", "$rA, $rS, $SH", IIC_IntRotateDI,
+                                [(set i64:$rA,
+                                      (PPCextswsli i32:$rS, (i32 imm:$SH)))]>,
+                                isPPC64, Requires<[IsISA3_0]>;
+
+defm EXTSWSLI : XSForm_1rc<31, 445, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH),
+                           "extswsli", "$rA, $rS, $SH", IIC_IntRotateDI,
+                           []>, isPPC64, Requires<[IsISA3_0]>;
 
 // For fast-isel:
 let isCodeGenOnly = 1, Defs = [CARRY] in
@@ -773,13 +785,21 @@
 let Predicates = [IsISA3_0] in {
 def MADDHD : VAForm_1a<48, (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC),
                        "maddhd $RT, $RA, $RB, $RC", IIC_IntMulHD, []>, isPPC64;
-def MADDHDU : VAForm_1a<49, (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC),
+def MADDHDU : VAForm_1a<49, 
+                       (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC),
                        "maddhdu $RT, $RA, $RB, $RC", IIC_IntMulHD, []>, isPPC64;
-def MADDLD : VAForm_1a<51, (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC),
-                       "maddld $RT, $RA, $RB, $RC", IIC_IntMulHD, []>, isPPC64;
+def MADDLD : VAForm_1a<51, (outs gprc :$RT), (ins gprc:$RA, gprc:$RB, gprc:$RC),
+                       "maddld $RT, $RA, $RB, $RC", IIC_IntMulHD,
+                       [(set i32:$RT, (add_without_simm16 (mul_without_simm16 i32:$RA, i32:$RB), i32:$RC))]>,
+                       isPPC64;
 def SETB : XForm_44<31, 128, (outs gprc:$RT), (ins crrc:$BFA),
                        "setb $RT, $BFA", IIC_IntGeneral>, isPPC64;
 let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+  def MADDLD8 : VAForm_1a<51, 
+                       (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC),
+                       "maddld $RT, $RA, $RB, $RC", IIC_IntMulHD,
+                       [(set i64:$RT, (add_without_simm16 (mul_without_simm16 i64:$RA, i64:$RB), i64:$RC))]>,
+                       isPPC64;
   def SETB8 : XForm_44<31, 128, (outs g8rc:$RT), (ins crrc:$BFA),
                        "setb $RT, $BFA", IIC_IntGeneral>, isPPC64;
 }
@@ -911,7 +931,7 @@
 def LWA  : DSForm_1<58, 2, (outs g8rc:$rD), (ins memrix:$src),
                     "lwa $rD, $src", IIC_LdStLWA,
                     [(set i64:$rD,
-                          (aligned4sextloadi32 ixaddr:$src))]>, isPPC64,
+                          (aligned4sextloadi32 iaddrX4:$src))]>, isPPC64,
                     PPC970_DGroup_Cracked;
 let Interpretation64Bit = 1, isCodeGenOnly = 1 in
 def LHAX8: XForm_1_memOp<31, 343, (outs g8rc:$rD), (ins memrr:$src),
@@ -920,7 +940,7 @@
                         PPC970_DGroup_Cracked;
 def LWAX : XForm_1_memOp<31, 341, (outs g8rc:$rD), (ins memrr:$src),
                         "lwax $rD, $src", IIC_LdStLHA,
-                        [(set i64:$rD, (sextloadi32 xaddr:$src))]>, isPPC64,
+                        [(set i64:$rD, (sextloadi32 xaddrX4:$src))]>, isPPC64,
                         PPC970_DGroup_Cracked;
 // For fast-isel:
 let isCodeGenOnly = 1, mayLoad = 1 in {
@@ -1022,7 +1042,7 @@
 let PPC970_Unit = 2 in {
 def LD   : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src),
                     "ld $rD, $src", IIC_LdStLD,
-                    [(set i64:$rD, (aligned4load ixaddr:$src))]>, isPPC64;
+                    [(set i64:$rD, (aligned4load iaddrX4:$src))]>, isPPC64;
 // The following four definitions are selected for small code model only.
 // Otherwise, we need to create two instructions to form a 32-bit offset,
 // so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select().
@@ -1045,7 +1065,7 @@
 
 def LDX  : XForm_1_memOp<31,  21, (outs g8rc:$rD), (ins memrr:$src),
                         "ldx $rD, $src", IIC_LdStLD,
-                        [(set i64:$rD, (load xaddr:$src))]>, isPPC64;
+                        [(set i64:$rD, (load xaddrX4:$src))]>, isPPC64;
 def LDBRX : XForm_1_memOp<31,  532, (outs g8rc:$rD), (ins memrr:$src),
                           "ldbrx $rD, $src", IIC_LdStLoad,
                           [(set i64:$rD, (PPClbrx xoaddr:$src, i64))]>, isPPC64;
@@ -1214,10 +1234,10 @@
 // Normal 8-byte stores.
 def STD  : DSForm_1<62, 0, (outs), (ins g8rc:$rS, memrix:$dst),
                     "std $rS, $dst", IIC_LdStSTD,
-                    [(aligned4store i64:$rS, ixaddr:$dst)]>, isPPC64;
+                    [(aligned4store i64:$rS, iaddrX4:$dst)]>, isPPC64;
 def STDX  : XForm_8_memOp<31, 149, (outs), (ins g8rc:$rS, memrr:$dst),
                           "stdx $rS, $dst", IIC_LdStSTD,
-                          [(store i64:$rS, xaddr:$dst)]>, isPPC64,
+                          [(store i64:$rS, xaddrX4:$dst)]>, isPPC64,
                           PPC970_DGroup_Cracked;
 def STDBRX: XForm_8_memOp<31, 660, (outs), (ins g8rc:$rS, memrr:$dst),
                           "stdbrx $rS, $dst", IIC_LdStStore,
@@ -1433,11 +1453,11 @@
           (STDX $rS, xoaddr:$dst)>;
 
 // 64-bits atomic loads and stores
-def : Pat<(atomic_load_64 ixaddr:$src), (LD  memrix:$src)>;
-def : Pat<(atomic_load_64 xaddr:$src),  (LDX memrr:$src)>;
+def : Pat<(atomic_load_64 iaddrX4:$src), (LD  memrix:$src)>;
+def : Pat<(atomic_load_64 xaddrX4:$src),  (LDX memrr:$src)>;
 
-def : Pat<(atomic_store_64 ixaddr:$ptr, i64:$val), (STD  g8rc:$val, memrix:$ptr)>;
-def : Pat<(atomic_store_64 xaddr:$ptr,  i64:$val), (STDX g8rc:$val, memrr:$ptr)>;
+def : Pat<(atomic_store_64 iaddrX4:$ptr, i64:$val), (STD  g8rc:$val, memrix:$ptr)>;
+def : Pat<(atomic_store_64 xaddrX4:$ptr,  i64:$val), (STDX g8rc:$val, memrr:$ptr)>;
 
 let Predicates = [IsISA3_0] in {
 

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index 69b19e4..8176c51 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstrAltivec.td

@@ -1,9 +1,8 @@
 //===-- PPCInstrAltivec.td - The PowerPC Altivec Extension -*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -822,7 +821,9 @@
 def VCMPGTUW  : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>;
 def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
 
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
+    isReMaterializable = 1 in {
+
 def V_SET0B : VXForm_setzero<1220, (outs vrrc:$vD), (ins),
                       "vxor $vD, $vD, $vD", IIC_VecFP,
                       [(set v16i8:$vD, (v16i8 immAllZerosV))]>;
@@ -899,6 +900,32 @@
 def : Pat<(v1i128 (bitconvert (v4f32 VRRC:$src))), (v1i128 VRRC:$src)>;
 def : Pat<(v1i128 (bitconvert (v2i64 VRRC:$src))), (v1i128 VRRC:$src)>;
 
+// Max/Min
+def : Pat<(v16i8 (umax v16i8:$src1, v16i8:$src2)),
+          (v16i8 (VMAXUB $src1, $src2))>;
+def : Pat<(v16i8 (smax v16i8:$src1, v16i8:$src2)),
+          (v16i8 (VMAXSB $src1, $src2))>;
+def : Pat<(v8i16 (umax v8i16:$src1, v8i16:$src2)),
+          (v8i16 (VMAXUH $src1, $src2))>;
+def : Pat<(v8i16 (smax v8i16:$src1, v8i16:$src2)),
+          (v8i16 (VMAXSH $src1, $src2))>;
+def : Pat<(v4i32 (umax v4i32:$src1, v4i32:$src2)),
+          (v4i32 (VMAXUW $src1, $src2))>;
+def : Pat<(v4i32 (smax v4i32:$src1, v4i32:$src2)),
+          (v4i32 (VMAXSW $src1, $src2))>;
+def : Pat<(v16i8 (umin v16i8:$src1, v16i8:$src2)),
+          (v16i8 (VMINUB $src1, $src2))>;
+def : Pat<(v16i8 (smin v16i8:$src1, v16i8:$src2)),
+          (v16i8 (VMINSB $src1, $src2))>;
+def : Pat<(v8i16 (umin v8i16:$src1, v8i16:$src2)),
+          (v8i16 (VMINUH $src1, $src2))>;
+def : Pat<(v8i16 (smin v8i16:$src1, v8i16:$src2)),
+          (v8i16 (VMINSH $src1, $src2))>;
+def : Pat<(v4i32 (umin v4i32:$src1, v4i32:$src2)),
+          (v4i32 (VMINUW $src1, $src2))>;
+def : Pat<(v4i32 (smin v4i32:$src1, v4i32:$src2)),
+          (v4i32 (VMINSW $src1, $src2))>;
+
 // Shuffles.
 
 // Match vsldoi(x,x), vpkuwum(x,x), vpkuhum(x,x)

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstrBuilder.h b/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstrBuilder.h
index cf71b1c..323f7e3 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstrBuilder.h
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstrBuilder.h

@@ -1,9 +1,8 @@
 //===-- PPCInstrBuilder.h - Aides for building PPC insts --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstrFormats.td
index 2fe765d..a48eb16 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstrFormats.td

@@ -1,9 +1,8 @@
 //===- PowerPCInstrFormats.td - PowerPC Instruction Formats --*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -38,14 +37,6 @@
   let TSFlags{2}   = PPC970_Cracked;
   let TSFlags{5-3} = PPC970_Unit;
 
-  /// Indicate that the VSX instruction is to use VSX numbering/encoding.
-  /// Since ISA 3.0, there are scalar instructions that use the upper
-  /// half of the VSX register set only. Rather than adding further complexity
-  /// to the register class set, the VSX registers just include the Altivec
-  /// registers and this flag decides the numbering to be used for them.
-  bits<1> UseVSXReg = 0;
-  let TSFlags{6}   = UseVSXReg;
-
   // Indicate that this instruction is of type X-Form Load or Store
   bits<1> XFormMemOp = 0;
   let TSFlags{7}  = XFormMemOp;
@@ -74,7 +65,6 @@
 class PPC970_Unit_VPERM    { bits<3> PPC970_Unit = 6;   }
 class PPC970_Unit_BRU      { bits<3> PPC970_Unit = 7;   }
 
-class UseVSXReg { bits<1> UseVSXReg = 1; }
 class XFormMemOp { bits<1> XFormMemOp = 1; }
 
 // Two joined instructions; used to emit two adjacent instructions as one.
@@ -730,6 +720,7 @@
   : XForm_base_r3xo_memOp<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
 }
 
+// [PO RT /// RB XO RC]
 class XForm_26<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
                InstrItinClass itin, list<dag> pattern>
   : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
@@ -1193,9 +1184,9 @@
   let Inst{11-15} = DCMX{4-0};
   let Inst{16-20} = XB{4-0};
   let Inst{21-24} = xo1;
-  let Inst{25}    = DCMX{5};
+  let Inst{25}    = DCMX{6};
   let Inst{26-28} = xo2;
-  let Inst{29}    = DCMX{6};
+  let Inst{29}    = DCMX{5};
   let Inst{30}    = XB{5};
   let Inst{31}    = XT{5};
 }

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstrHTM.td b/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstrHTM.td
index 0efe797..104b57a 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstrHTM.td
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstrHTM.td

@@ -1,9 +1,8 @@
 //===-- PPCInstrHTM.td - The PowerPC Hardware Transactional Memory  -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -21,55 +20,53 @@
 }]>;
 
 let hasSideEffects = 1 in {
-def TCHECK_RET : PPCCustomInserterPseudo<(outs crrc:$out), (ins), "#TCHECK_RET", []>;
+def TCHECK_RET : PPCCustomInserterPseudo<(outs gprc:$out), (ins), "#TCHECK_RET", []>;
+def TBEGIN_RET : PPCCustomInserterPseudo<(outs gprc:$out), (ins u1imm:$R), "#TBEGIN_RET", []>;
 }
 
 
 let Predicates = [HasHTM] in {
 
+let Defs = [CR0] in {
 def TBEGIN : XForm_htm0 <31, 654,
-                         (outs crrc0:$ret), (ins u1imm:$R), "tbegin. $R", IIC_SprMTSPR, []>;
+                         (outs), (ins u1imm:$R), "tbegin. $R", IIC_SprMTSPR, []>;
 
 def TEND : XForm_htm1 <31, 686,
-                       (outs crrc0:$ret), (ins u1imm:$A), "tend. $A", IIC_SprMTSPR, []>;
+                       (outs), (ins u1imm:$A), "tend. $A", IIC_SprMTSPR, []>;
 
 def TABORT : XForm_base_r3xo <31, 910,
-                              (outs crrc0:$ret), (ins gprc:$A), "tabort. $A", IIC_SprMTSPR,
+                              (outs), (ins gprc:$A), "tabort. $A", IIC_SprMTSPR,
                               []>, isDOT {
   let RST = 0;
   let B = 0;
 }
 
 def TABORTWC : XForm_base_r3xo <31, 782,
-                                (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, gprc:$B),
+                                (outs), (ins u5imm:$RTS, gprc:$A, gprc:$B),
                                 "tabortwc. $RTS, $A, $B", IIC_SprMTSPR, []>,
                                 isDOT;
 
 def TABORTWCI : XForm_base_r3xo <31, 846,
-                                 (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, u5imm:$B),
+                                 (outs), (ins u5imm:$RTS, gprc:$A, u5imm:$B),
                                  "tabortwci. $RTS, $A, $B", IIC_SprMTSPR, []>,
                                  isDOT;
 
 def TABORTDC : XForm_base_r3xo <31, 814,
-                                (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, gprc:$B),
+                                (outs), (ins u5imm:$RTS, gprc:$A, gprc:$B),
                                 "tabortdc. $RTS, $A, $B", IIC_SprMTSPR, []>,
                                 isDOT;
 
 def TABORTDCI : XForm_base_r3xo <31, 878,
-                                 (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, u5imm:$B),
+                                 (outs), (ins u5imm:$RTS, gprc:$A, u5imm:$B),
                                  "tabortdci. $RTS, $A, $B", IIC_SprMTSPR, []>,
                                  isDOT;
 
 def TSR : XForm_htm2 <31, 750,
-                      (outs crrc0:$ret), (ins u1imm:$L), "tsr. $L", IIC_SprMTSPR, []>,
+                      (outs), (ins u1imm:$L), "tsr. $L", IIC_SprMTSPR, []>,
                       isDOT;
 
-def TCHECK : XForm_htm3 <31, 718,
-                        (outs), (ins crrc:$BF), "tcheck $BF", IIC_SprMTSPR, []>;
-
-
 def TRECLAIM : XForm_base_r3xo <31, 942,
-                                (outs crrc:$ret), (ins gprc:$A), "treclaim. $A",
+                                (outs), (ins gprc:$A), "treclaim. $A",
                                 IIC_SprMTSPR, []>,
                                 isDOT {
   let RST = 0;
@@ -77,13 +74,17 @@
 }
 
 def TRECHKPT : XForm_base_r3xo <31, 1006,
-                                (outs crrc:$ret), (ins), "trechkpt.", IIC_SprMTSPR, []>,
+                                (outs), (ins), "trechkpt.", IIC_SprMTSPR, []>,
                                 isDOT {
   let RST = 0;
   let A = 0;
   let B = 0;
 }
 
+}
+
+def TCHECK : XForm_htm3 <31, 718,
+                        (outs crrc:$BF), (ins), "tcheck $BF", IIC_SprMTSPR, []>;
 // Builtins
 
 // All HTM instructions, with the exception of tcheck, set CR0 with the
@@ -94,15 +95,11 @@
 // tbegin builtin API which defines a return value of 1 as success.
 
 def : Pat<(int_ppc_tbegin i32:$R),
-           (XORI
-             (EXTRACT_SUBREG (
-               TBEGIN (HTM_get_imm imm:$R)), sub_eq),
-            1)>;
+           (XORI (TBEGIN_RET(HTM_get_imm imm:$R)), 1)>;
 
 def : Pat<(int_ppc_tend i32:$R),
           (TEND (HTM_get_imm imm:$R))>;
 
-
 def : Pat<(int_ppc_tabort i32:$R),
           (TABORT $R)>;
 
@@ -167,6 +164,8 @@
           (TSR 0)>;
 
 def : Pat<(i64 (int_ppc_ttest)),
-          (RLDICL (i64 (COPY (TABORTWCI 0, ZERO, 0))), 36, 28)>;
+          (RLDICL (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+                                      (TABORTWCI 0, (LI 0), 0), sub_32)),
+                   36, 28)>;
 
 } // [HasHTM]

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index d754ce2..a787bdd 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

@@ -1,9 +1,8 @@
 //===-- PPCInstrInfo.cpp - PowerPC Instruction Information ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -333,6 +332,17 @@
   case PPC::ADDIStocHA:
   case PPC::ADDItocL:
   case PPC::LOAD_STACK_GUARD:
+  case PPC::XXLXORz:
+  case PPC::XXLXORspz:
+  case PPC::XXLXORdpz:
+  case PPC::V_SET0B:
+  case PPC::V_SET0H:
+  case PPC::V_SET0:
+  case PPC::V_SETALLONESB:
+  case PPC::V_SETALLONESH:
+  case PPC::V_SETALLONES:
+  case PPC::CRSET:
+  case PPC::CRUNSET:
     return true;
   }
   return false;
@@ -381,9 +391,9 @@
   // Swap op1/op2
   assert(((OpIdx1 == 1 && OpIdx2 == 2) || (OpIdx1 == 2 && OpIdx2 == 1)) &&
          "Only the operands 1 and 2 can be swapped in RLSIMI/RLWIMIo.");
-  unsigned Reg0 = MI.getOperand(0).getReg();
-  unsigned Reg1 = MI.getOperand(1).getReg();
-  unsigned Reg2 = MI.getOperand(2).getReg();
+  Register Reg0 = MI.getOperand(0).getReg();
+  Register Reg1 = MI.getOperand(1).getReg();
+  Register Reg2 = MI.getOperand(2).getReg();
   unsigned SubReg1 = MI.getOperand(1).getSubReg();
   unsigned SubReg2 = MI.getOperand(2).getSubReg();
   bool Reg1IsKill = MI.getOperand(1).isKill();
@@ -411,7 +421,7 @@
 
   if (NewMI) {
     // Create a new instruction.
-    unsigned Reg0 = ChangeReg0 ? Reg2 : MI.getOperand(0).getReg();
+    Register Reg0 = ChangeReg0 ? Reg2 : MI.getOperand(0).getReg();
     bool Reg0IsDead = MI.getOperand(0).isDead();
     return BuildMI(MF, MI.getDebugLoc(), MI.getDesc())
         .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead))
@@ -942,12 +952,16 @@
     return;
   } else if (PPC::G8RCRegClass.contains(SrcReg) &&
              PPC::VSFRCRegClass.contains(DestReg)) {
+    assert(Subtarget.hasDirectMove() &&
+           "Subtarget doesn't support directmove, don't know how to copy.");
     BuildMI(MBB, I, DL, get(PPC::MTVSRD), DestReg).addReg(SrcReg);
     NumGPRtoVSRSpill++;
     getKillRegState(KillSrc);
     return;
   } else if (PPC::VSFRCRegClass.contains(SrcReg) &&
              PPC::G8RCRegClass.contains(DestReg)) {
+    assert(Subtarget.hasDirectMove() &&
+           "Subtarget doesn't support directmove, don't know how to copy.");
     BuildMI(MBB, I, DL, get(PPC::MFVSRD), DestReg).addReg(SrcReg);
     getKillRegState(KillSrc);
     return;
@@ -963,7 +977,6 @@
     return;
   }
 
-
   unsigned Opc;
   if (PPC::GPRCRegClass.contains(DestReg, SrcReg))
     Opc = PPC::OR;
@@ -996,6 +1009,8 @@
     Opc = PPC::QVFMRb;
   else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
     Opc = PPC::CROR;
+  else if (PPC::SPE4RCRegClass.contains(DestReg, SrcReg))
+    Opc = PPC::OR;
   else if (PPC::SPERCRegClass.contains(DestReg, SrcReg))
     Opc = PPC::EVOR;
   else
@@ -1066,6 +1081,10 @@
       OpcodeIndex = SOK_Float8Spill;
     } else if (PPC::F4RCRegClass.contains(Reg)) {
       OpcodeIndex = SOK_Float4Spill;
+    } else if (PPC::SPERCRegClass.contains(Reg)) {
+      OpcodeIndex = SOK_SPESpill;
+    } else if (PPC::SPE4RCRegClass.contains(Reg)) {
+      OpcodeIndex = SOK_SPE4Spill;
     } else if (PPC::CRRCRegClass.contains(Reg)) {
       OpcodeIndex = SOK_CRSpill;
     } else if (PPC::CRBITRCRegClass.contains(Reg)) {
@@ -1152,6 +1171,10 @@
       OpcodeIndex = SOK_Float8Spill;
     } else if (PPC::F4RCRegClass.contains(Reg)) {
       OpcodeIndex = SOK_Float4Spill;
+    } else if (PPC::SPERCRegClass.contains(Reg)) {
+      OpcodeIndex = SOK_SPESpill;
+    } else if (PPC::SPE4RCRegClass.contains(Reg)) {
+      OpcodeIndex = SOK_SPE4Spill;
     } else if (PPC::CRRCRegClass.contains(Reg)) {
       OpcodeIndex = SOK_CRSpill;
     } else if (PPC::CRBITRCRegClass.contains(Reg)) {
@@ -1632,6 +1655,7 @@
   if (OpC == PPC::FCMPUS || OpC == PPC::FCMPUD)
     return false;
 
+  const TargetRegisterInfo *TRI = &getRegisterInfo();
   // The record forms set the condition register based on a signed comparison
   // with zero (so says the ISA manual). This is not as straightforward as it
   // seems, however, because this is always a 64-bit comparison on PPC64, even
@@ -1645,6 +1669,11 @@
   bool is32BitUnsignedCompare = OpC == PPC::CMPLWI || OpC == PPC::CMPLW;
   bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD;
 
+  // Look through copies unless that gets us to a physical register.
+  unsigned ActualSrc = TRI->lookThruCopyLike(SrcReg, MRI);
+  if (TargetRegisterInfo::isVirtualRegister(ActualSrc))
+    SrcReg = ActualSrc;
+
   // Get the unique definition of SrcReg.
   MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
   if (!MI) return false;
@@ -1745,7 +1774,6 @@
       return false;
 
     PPC::Predicate Pred = (PPC::Predicate)UseMI->getOperand(0).getImm();
-    PPC::Predicate NewPred = Pred;
     unsigned PredCond = PPC::getPredicateCondition(Pred);
     unsigned PredHint = PPC::getPredicateHint(Pred);
     int16_t Immed = (int16_t)Value;
@@ -1755,25 +1783,23 @@
     if (Immed == -1 && PredCond == PPC::PRED_GT)
       // We convert "greater than -1" into "greater than or equal to 0",
       // since we are assuming signed comparison by !equalityOnly
-      NewPred = PPC::getPredicate(PPC::PRED_GE, PredHint);
+      Pred = PPC::getPredicate(PPC::PRED_GE, PredHint);
     else if (Immed == -1 && PredCond == PPC::PRED_LE)
       // We convert "less than or equal to -1" into "less than 0".
-      NewPred = PPC::getPredicate(PPC::PRED_LT, PredHint);
+      Pred = PPC::getPredicate(PPC::PRED_LT, PredHint);
     else if (Immed == 1 && PredCond == PPC::PRED_LT)
       // We convert "less than 1" into "less than or equal to 0".
-      NewPred = PPC::getPredicate(PPC::PRED_LE, PredHint);
+      Pred = PPC::getPredicate(PPC::PRED_LE, PredHint);
     else if (Immed == 1 && PredCond == PPC::PRED_GE)
       // We convert "greater than or equal to 1" into "greater than 0".
-      NewPred = PPC::getPredicate(PPC::PRED_GT, PredHint);
+      Pred = PPC::getPredicate(PPC::PRED_GT, PredHint);
     else
       return false;
 
-    PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)),
-                                            NewPred));
+    PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)), Pred));
   }
 
   // Search for Sub.
-  const TargetRegisterInfo *TRI = &getRegisterInfo();
   --I;
 
   // Get ready to iterate backward from CmpInstr.
@@ -1992,7 +2018,7 @@
 unsigned PPCInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
   unsigned Opcode = MI.getOpcode();
 
-  if (Opcode == PPC::INLINEASM) {
+  if (Opcode == PPC::INLINEASM || Opcode == PPC::INLINEASM_BR) {
     const MachineFunction *MF = MI.getParent()->getParent();
     const char *AsmStr = MI.getOperand(0).getSymbolName();
     return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
@@ -2358,13 +2384,6 @@
         MachineBasicBlock::reverse_iterator E = MI.getParent()->rend(), It = MI;
         It++;
         unsigned Reg = MI.getOperand(i).getReg();
-        // MachineInstr::readsRegister only returns true if the machine
-        // instruction reads the exact register or its super-register. It
-        // does not consider uses of sub-registers which seems like strange
-        // behaviour. Nonetheless, if we end up with a 64-bit register here,
-        // get the corresponding 32-bit register to check.
-        if (PPC::G8RCRegClass.contains(Reg))
-          Reg = Reg - PPC::X0 + PPC::R0;
 
         // Is this register defined by some form of add-immediate (including
         // load-immediate) within this basic block?
@@ -2381,7 +2400,7 @@
               return &*It;
             }
             break;
-          } else if (It->readsRegister(Reg, &getRegisterInfo())) 
+          } else if (It->readsRegister(Reg, &getRegisterInfo()))
             // If we see another use of this reg between the def and the MI,
             // we want to flat it so the def isn't deleted.
             SeenIntermediateUse = true;
@@ -2424,6 +2443,83 @@
   return OpcodesForSpill[(Subtarget.hasP9Vector()) ? 1 : 0];
 }
 
+void PPCInstrInfo::fixupIsDeadOrKill(MachineInstr &StartMI, MachineInstr &EndMI,
+                                     unsigned RegNo) const {
+  const MachineRegisterInfo &MRI =
+      StartMI.getParent()->getParent()->getRegInfo();
+  if (MRI.isSSA())
+    return;
+
+  // Instructions between [StartMI, EndMI] should be in same basic block.
+  assert((StartMI.getParent() == EndMI.getParent()) &&
+         "Instructions are not in same basic block");
+
+  bool IsKillSet = false;
+
+  auto clearOperandKillInfo = [=] (MachineInstr &MI, unsigned Index) {
+    MachineOperand &MO = MI.getOperand(Index);
+    if (MO.isReg() && MO.isUse() && MO.isKill() &&
+        getRegisterInfo().regsOverlap(MO.getReg(), RegNo))
+      MO.setIsKill(false);
+  };
+
+  // Set killed flag for EndMI.
+  // No need to do anything if EndMI defines RegNo.
+  int UseIndex =
+      EndMI.findRegisterUseOperandIdx(RegNo, false, &getRegisterInfo());
+  if (UseIndex != -1) {
+    EndMI.getOperand(UseIndex).setIsKill(true);
+    IsKillSet = true;
+    // Clear killed flag for other EndMI operands related to RegNo. In some
+    // upexpected cases, killed may be set multiple times for same register
+    // operand in same MI.
+    for (int i = 0, e = EndMI.getNumOperands(); i != e; ++i)
+      if (i != UseIndex)
+        clearOperandKillInfo(EndMI, i);
+  }
+
+  // Walking the inst in reverse order (EndMI -> StartMI].
+  MachineBasicBlock::reverse_iterator It = EndMI;
+  MachineBasicBlock::reverse_iterator E = EndMI.getParent()->rend();
+  // EndMI has been handled above, skip it here.
+  It++;
+  MachineOperand *MO = nullptr;
+  for (; It != E; ++It) {
+    // Skip insturctions which could not be a def/use of RegNo.
+    if (It->isDebugInstr() || It->isPosition())
+      continue;
+
+    // Clear killed flag for all It operands related to RegNo. In some
+    // upexpected cases, killed may be set multiple times for same register
+    // operand in same MI.
+    for (int i = 0, e = It->getNumOperands(); i != e; ++i)
+        clearOperandKillInfo(*It, i);
+
+    // If killed is not set, set killed for its last use or set dead for its def
+    // if no use found.
+    if (!IsKillSet) {
+      if ((MO = It->findRegisterUseOperand(RegNo, false, &getRegisterInfo()))) {
+        // Use found, set it killed.
+        IsKillSet = true;
+        MO->setIsKill(true);
+        continue;
+      } else if ((MO = It->findRegisterDefOperand(RegNo, false, true,
+                                                  &getRegisterInfo()))) {
+        // No use found, set dead for its def.
+        assert(&*It == &StartMI && "No new def between StartMI and EndMI.");
+        MO->setIsDead(true);
+        break;
+      }
+    }
+
+    if ((&*It) == &StartMI)
+      break;
+  }
+  // Ensure RegMo liveness is killed after EndMI.
+  assert((IsKillSet || (MO && MO->isDead())) &&
+         "RegNo should be killed or dead");
+}
+
 // If this instruction has an immediate form and one of its operands is a
 // result of a load-immediate or an add-immediate, convert it to
 // the immediate form if the constant is in range.
@@ -2440,8 +2536,9 @@
     return false;
   assert(ForwardingOperand < MI.getNumOperands() &&
          "The forwarding operand needs to be valid at this point");
-  bool KillFwdDefMI = !SeenIntermediateUse &&
-    MI.getOperand(ForwardingOperand).isKill();
+  bool IsForwardingOperandKilled = MI.getOperand(ForwardingOperand).isKill();
+  bool KillFwdDefMI = !SeenIntermediateUse && IsForwardingOperandKilled;
+  unsigned ForwardingOperandReg = MI.getOperand(ForwardingOperand).getReg();
   if (KilledDef && KillFwdDefMI)
     *KilledDef = DefMI;
 
@@ -2450,8 +2547,9 @@
   // If this is a reg+reg instruction that has a reg+imm form,
   // and one of the operands is produced by an add-immediate,
   // try to convert it.
-  if (HasImmForm && transformToImmFormFedByAdd(MI, III, ForwardingOperand,
-                                               *DefMI, KillFwdDefMI))
+  if (HasImmForm &&
+      transformToImmFormFedByAdd(MI, III, ForwardingOperand, *DefMI,
+                                 KillFwdDefMI))
     return true;
 
   if ((DefMI->getOpcode() != PPC::LI && DefMI->getOpcode() != PPC::LI8) ||
@@ -2466,7 +2564,7 @@
   // If this is a reg+reg instruction that has a reg+imm form,
   // and one of the operands is produced by LI, convert it now.
   if (HasImmForm)
-    return transformToImmFormFedByLI(MI, III, ForwardingOperand, SExtImm);
+    return transformToImmFormFedByLI(MI, III, ForwardingOperand, *DefMI, SExtImm);
 
   bool ReplaceWithLI = false;
   bool Is64BitLI = false;
@@ -2486,6 +2584,8 @@
   case PPC::CMPLDI: {
     // Doing this post-RA would require dataflow analysis to reliably find uses
     // of the CR register set by the compare.
+    // No need to fixup killed/dead flag since this transformation is only valid
+    // before RA.
     if (PostRA)
       return false;
     // If a compare-immediate is fed by an immediate and is itself an input of
@@ -2662,6 +2762,14 @@
     if (KilledDef && SetCR)
       *KilledDef = nullptr;
     replaceInstrWithLI(MI, LII);
+
+    // Fixup killed/dead flag after transformation.
+    // Pattern:
+    // ForwardingOperandReg = LI imm1
+    // y = op2 imm2, ForwardingOperandReg(killed)
+    if (IsForwardingOperandKilled)
+      fixupIsDeadOrKill(*DefMI, MI, ForwardingOperandReg);
+
     LLVM_DEBUG(dbgs() << "With:\n");
     LLVM_DEBUG(MI.dump());
     return true;
@@ -2669,10 +2777,6 @@
   return false;
 }
 
-static bool isVFReg(unsigned Reg) {
-  return PPC::VFRCRegClass.contains(Reg);
-}
-
 bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
                                    ImmInstrInfo &III, bool PostRA) const {
   unsigned Opc = MI.getOpcode();
@@ -3007,7 +3111,7 @@
       break;
     case PPC::LXSSPX:
       if (PostRA) {
-        if (isVFReg(MI.getOperand(0).getReg()))
+        if (isVFRegister(MI.getOperand(0).getReg()))
           III.ImmOpcode = PPC::LXSSP;
         else {
           III.ImmOpcode = PPC::LFS;
@@ -3021,7 +3125,7 @@
       break;
     case PPC::LXSDX:
       if (PostRA) {
-        if (isVFReg(MI.getOperand(0).getReg()))
+        if (isVFRegister(MI.getOperand(0).getReg()))
           III.ImmOpcode = PPC::LXSD;
         else {
           III.ImmOpcode = PPC::LFD;
@@ -3039,7 +3143,7 @@
       break;
     case PPC::STXSSPX:
       if (PostRA) {
-        if (isVFReg(MI.getOperand(0).getReg()))
+        if (isVFRegister(MI.getOperand(0).getReg()))
           III.ImmOpcode = PPC::STXSSP;
         else {
           III.ImmOpcode = PPC::STFS;
@@ -3053,7 +3157,7 @@
       break;
     case PPC::STXSDX:
       if (PostRA) {
-        if (isVFReg(MI.getOperand(0).getReg()))
+        if (isVFRegister(MI.getOperand(0).getReg()))
           III.ImmOpcode = PPC::STXSD;
         else {
           III.ImmOpcode = PPC::STFD;
@@ -3110,7 +3214,7 @@
   }
 }
 
-// Check if the 'MI' that has the index OpNoForForwarding 
+// Check if the 'MI' that has the index OpNoForForwarding
 // meets the requirement described in the ImmInstrInfo.
 bool PPCInstrInfo::isUseMIElgibleForForwarding(MachineInstr &MI,
                                                const ImmInstrInfo &III,
@@ -3156,7 +3260,7 @@
                                                MachineOperand *&RegMO) const {
   unsigned Opc = DefMI.getOpcode();
   if (Opc != PPC::ADDItocL && Opc != PPC::ADDI && Opc != PPC::ADDI8)
-    return false; 
+    return false;
 
   assert(DefMI.getNumOperands() >= 3 &&
          "Add inst must have at least three operands");
@@ -3169,11 +3273,10 @@
   return isAnImmediateOperand(*ImmMO);
 }
 
-bool PPCInstrInfo::isRegElgibleForForwarding(const MachineOperand &RegMO,
-                                             const MachineInstr &DefMI,
-                                             const MachineInstr &MI,
-                                             bool KillDefMI
-                                             ) const {
+bool PPCInstrInfo::isRegElgibleForForwarding(
+    const MachineOperand &RegMO, const MachineInstr &DefMI,
+    const MachineInstr &MI, bool KillDefMI,
+    bool &IsFwdFeederRegKilled) const {
   // x = addi y, imm
   // ...
   // z = lfdx 0, x   -> z = lfd imm(y)
@@ -3184,14 +3287,7 @@
   if (MRI.isSSA())
     return false;
 
-  // MachineInstr::readsRegister only returns true if the machine
-  // instruction reads the exact register or its super-register. It
-  // does not consider uses of sub-registers which seems like strange
-  // behaviour. Nonetheless, if we end up with a 64-bit register here,
-  // get the corresponding 32-bit register to check.
   unsigned Reg = RegMO.getReg();
-  if (PPC::G8RCRegClass.contains(Reg))
-    Reg = Reg - PPC::X0 + PPC::R0;
 
   // Walking the inst in reverse(MI-->DefMI) to get the last DEF of the Reg.
   MachineBasicBlock::const_reverse_iterator It = MI;
@@ -3200,15 +3296,17 @@
   for (; It != E; ++It) {
     if (It->modifiesRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
       return false;
+    else if (It->killsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
+      IsFwdFeederRegKilled = true;
     // Made it to DefMI without encountering a clobber.
     if ((&*It) == &DefMI)
       break;
   }
   assert((&*It) == &DefMI && "DefMI is missing");
 
-  // If DefMI also uses the register to be forwarded, we can only forward it
+  // If DefMI also defines the register to be forwarded, we can only forward it
   // if DefMI is being erased.
-  if (DefMI.readsRegister(Reg, &getRegisterInfo()))
+  if (DefMI.modifiesRegister(Reg, &getRegisterInfo()))
     return KillDefMI;
 
   return true;
@@ -3271,11 +3369,9 @@
 // is the literal zero, attempt to forward the source of the add-immediate to
 // the corresponding D-Form instruction with the displacement coming from
 // the immediate being added.
-bool PPCInstrInfo::transformToImmFormFedByAdd(MachineInstr &MI,
-                                              const ImmInstrInfo &III,
-                                              unsigned OpNoForForwarding,
-                                              MachineInstr &DefMI,
-                                              bool KillDefMI) const {
+bool PPCInstrInfo::transformToImmFormFedByAdd(
+    MachineInstr &MI, const ImmInstrInfo &III, unsigned OpNoForForwarding,
+    MachineInstr &DefMI, bool KillDefMI) const {
   //         RegMO ImmMO
   //           |    |
   // x = addi reg, imm  <----- DefMI
@@ -3300,10 +3396,19 @@
   if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm))
     return false;
 
+  bool IsFwdFeederRegKilled = false;
   // Check if the RegMO can be forwarded to MI.
-  if (!isRegElgibleForForwarding(*RegMO, DefMI, MI, KillDefMI))
+  if (!isRegElgibleForForwarding(*RegMO, DefMI, MI, KillDefMI,
+                                 IsFwdFeederRegKilled))
     return false;
 
+  // Get killed info in case fixup needed after transformation.
+  unsigned ForwardKilledOperandReg = ~0U;
+  MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+  bool PostRA = !MRI.isSSA();
+  if (PostRA && MI.getOperand(OpNoForForwarding).isKill())
+    ForwardKilledOperandReg = MI.getOperand(OpNoForForwarding).getReg();
+
   // We know that, the MI and DefMI both meet the pattern, and
   // the Imm also meet the requirement with the new Imm-form.
   // It is safe to do the transformation now.
@@ -3327,7 +3432,7 @@
     // Otherwise, it is Constant Pool Index(CPI) or Global,
     // which is relocation in fact. We need to replace the special zero
     // register with ImmMO.
-    // Before that, we need to fixup the target flags for imm. 
+    // Before that, we need to fixup the target flags for imm.
     // For some reason, we miss to set the flag for the ImmMO if it is CPI.
     if (DefMI.getOpcode() == PPC::ADDItocL)
       ImmMO->setTargetFlags(PPCII::MO_TOC_LO);
@@ -3354,6 +3459,22 @@
   // Update the opcode.
   MI.setDesc(get(III.ImmOpcode));
 
+  // Fix up killed/dead flag after transformation.
+  // Pattern 1:
+  // x = ADD KilledFwdFeederReg, imm
+  // n = opn KilledFwdFeederReg(killed), regn
+  // y = XOP 0, x
+  // Pattern 2:
+  // x = ADD reg(killed), imm
+  // y = XOP 0, x
+  if (IsFwdFeederRegKilled || RegMO->isKill())
+    fixupIsDeadOrKill(DefMI, MI, RegMO->getReg());
+  // Pattern 3:
+  // ForwardKilledOperandReg = ADD reg, imm
+  // y = XOP 0, ForwardKilledOperandReg(killed)
+  if (ForwardKilledOperandReg != ~0U)
+    fixupIsDeadOrKill(DefMI, MI, ForwardKilledOperandReg);
+
   LLVM_DEBUG(dbgs() << "With:\n");
   LLVM_DEBUG(MI.dump());
 
@@ -3363,6 +3484,7 @@
 bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
                                              const ImmInstrInfo &III,
                                              unsigned ConstantOpNo,
+                                             MachineInstr &DefMI,
                                              int64_t Imm) const {
   MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
   bool PostRA = !MRI.isSSA();
@@ -3401,6 +3523,11 @@
       return false;
   }
 
+  // Get killed info in case fixup needed after transformation.
+  unsigned ForwardKilledOperandReg = ~0U;
+  if (PostRA && MI.getOperand(ConstantOpNo).isKill())
+    ForwardKilledOperandReg = MI.getOperand(ConstantOpNo).getReg();
+
   unsigned Opc = MI.getOpcode();
   bool SpecialShift32 =
     Opc == PPC::SLW || Opc == PPC::SLWo || Opc == PPC::SRW || Opc == PPC::SRWo;
@@ -3483,6 +3610,13 @@
       }
     }
   }
+
+  // Fix up killed/dead flag after transformation.
+  // Pattern:
+  // ForwardKilledOperandReg = LI imm
+  // y = XOP reg, ForwardKilledOperandReg(killed)
+  if (ForwardKilledOperandReg != ~0U)
+    fixupIsDeadOrKill(DefMI, MI, ForwardKilledOperandReg);
   return true;
 }
 
@@ -3784,3 +3918,133 @@
   }
   return false;
 }
+
+bool PPCInstrInfo::isBDNZ(unsigned Opcode) const {
+  return (Opcode == (Subtarget.isPPC64() ? PPC::BDNZ8 : PPC::BDNZ));
+}
+
+bool PPCInstrInfo::analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst,
+                               MachineInstr *&CmpInst) const {
+  MachineBasicBlock *LoopEnd = L.getBottomBlock();
+  MachineBasicBlock::iterator I = LoopEnd->getFirstTerminator();
+  // We really "analyze" only CTR loops right now.
+  if (I != LoopEnd->end() && isBDNZ(I->getOpcode())) {
+    IndVarInst = nullptr;
+    CmpInst = &*I;
+    return false;
+  }
+  return true;
+}
+
+MachineInstr *
+PPCInstrInfo::findLoopInstr(MachineBasicBlock &PreHeader) const {
+
+  unsigned LOOPi = (Subtarget.isPPC64() ? PPC::MTCTR8loop : PPC::MTCTRloop);
+
+  // The loop set-up instruction should be in preheader
+  for (auto &I : PreHeader.instrs())
+    if (I.getOpcode() == LOOPi)
+      return &I;
+  return nullptr;
+}
+
+unsigned PPCInstrInfo::reduceLoopCount(
+    MachineBasicBlock &MBB, MachineBasicBlock &PreHeader, MachineInstr *IndVar,
+    MachineInstr &Cmp, SmallVectorImpl<MachineOperand> &Cond,
+    SmallVectorImpl<MachineInstr *> &PrevInsts, unsigned Iter,
+    unsigned MaxIter) const {
+  // We expect a hardware loop currently. This means that IndVar is set
+  // to null, and the compare is the ENDLOOP instruction.
+  assert((!IndVar) && isBDNZ(Cmp.getOpcode()) && "Expecting a CTR loop");
+  MachineFunction *MF = MBB.getParent();
+  DebugLoc DL = Cmp.getDebugLoc();
+  MachineInstr *Loop = findLoopInstr(PreHeader);
+  if (!Loop)
+    return 0;
+  unsigned LoopCountReg = Loop->getOperand(0).getReg();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  MachineInstr *LoopCount = MRI.getUniqueVRegDef(LoopCountReg);
+
+  if (!LoopCount)
+    return 0;
+  // If the loop trip count is a compile-time value, then just change the
+  // value.
+  if (LoopCount->getOpcode() == PPC::LI8 || LoopCount->getOpcode() == PPC::LI) {
+    int64_t Offset = LoopCount->getOperand(1).getImm();
+    if (Offset <= 1) {
+      LoopCount->eraseFromParent();
+      Loop->eraseFromParent();
+      return 0;
+    }
+    LoopCount->getOperand(1).setImm(Offset - 1);
+    return Offset - 1;
+  }
+
+  // The loop trip count is a run-time value.
+  // We need to subtract one from the trip count,
+  // and insert branch later to check if we're done with the loop.
+
+  // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
+  // so we don't need to generate any thing here.
+  Cond.push_back(MachineOperand::CreateImm(0));
+  Cond.push_back(MachineOperand::CreateReg(
+      Subtarget.isPPC64() ? PPC::CTR8 : PPC::CTR, true));
+  return LoopCountReg;
+}
+
+// Return true if get the base operand, byte offset of an instruction and the
+// memory width. Width is the size of memory that is being loaded/stored.
+bool PPCInstrInfo::getMemOperandWithOffsetWidth(
+  const MachineInstr &LdSt,
+  const MachineOperand *&BaseReg,
+  int64_t &Offset,
+  unsigned &Width,
+  const TargetRegisterInfo *TRI) const {
+  assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
+
+  // Handle only loads/stores with base register followed by immediate offset.
+  if (LdSt.getNumExplicitOperands() != 3)
+    return false;
+  if (!LdSt.getOperand(1).isImm() || !LdSt.getOperand(2).isReg())
+    return false;
+
+  if (!LdSt.hasOneMemOperand())
+    return false;
+
+  Width = (*LdSt.memoperands_begin())->getSize();
+  Offset = LdSt.getOperand(1).getImm();
+  BaseReg = &LdSt.getOperand(2);
+  return true;
+}
+
+bool PPCInstrInfo::areMemAccessesTriviallyDisjoint(
+    const MachineInstr &MIa, const MachineInstr &MIb,
+    AliasAnalysis * /*AA*/) const {
+  assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
+  assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
+
+  if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
+      MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
+    return false;
+
+  // Retrieve the base register, offset from the base register and width. Width
+  // is the size of memory that is being loaded/stored (e.g. 1, 2, 4).  If
+  // base registers are identical, and the offset of a lower memory access +
+  // the width doesn't overlap the offset of a higher memory access,
+  // then the memory accesses are different.
+  const TargetRegisterInfo *TRI = &getRegisterInfo();
+  const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
+  int64_t OffsetA = 0, OffsetB = 0;
+  unsigned int WidthA = 0, WidthB = 0;
+  if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
+      getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
+    if (BaseOpA->isIdenticalTo(*BaseOpB)) {
+      int LowOffset = std::min(OffsetA, OffsetB);
+      int HighOffset = std::max(OffsetA, OffsetB);
+      int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
+      if (LowOffset + LowWidth <= HighOffset)
+        return true;
+    }
+  }
+  return false;
+}

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 7ed558b..70fb757e 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.h

@@ -1,9 +1,8 @@
 //===-- PPCInstrInfo.h - PowerPC Instruction Information --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -14,7 +13,6 @@
 #ifndef LLVM_LIB_TARGET_POWERPC_PPCINSTRINFO_H
 #define LLVM_LIB_TARGET_POWERPC_PPCINSTRINFO_H
 
-#include "PPC.h"
 #include "PPCRegisterInfo.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 
@@ -66,9 +64,6 @@
   /// Shift count to bypass PPC970 flags
   NewDef_Shift = 6,
 
-  /// The VSX instruction that uses VSX register (vs0-vs63), instead of VMX
-  /// register (v0-v31).
-  UseVSXReg = 0x1 << NewDef_Shift,
   /// This instruction is an X-Form memory operation.
   XFormMemOp = 0x1 << (NewDef_Shift+1)
 };
@@ -129,12 +124,12 @@
   // If the inst has imm-form and one of its operand is produced by a LI,
   // put the imm into the inst directly and remove the LI if possible.
   bool transformToImmFormFedByLI(MachineInstr &MI, const ImmInstrInfo &III,
-                                 unsigned ConstantOpNo, int64_t Imm) const;
+                                 unsigned ConstantOpNo, MachineInstr &DefMI,
+                                 int64_t Imm) const;
   // If the inst has imm-form and one of its operand is produced by an
   // add-immediate, try to transform it when possible.
   bool transformToImmFormFedByAdd(MachineInstr &MI, const ImmInstrInfo &III,
-                                  unsigned ConstantOpNo,
-                                  MachineInstr &DefMI,
+                                  unsigned ConstantOpNo, MachineInstr &DefMI,
                                   bool KillDefMI) const;
   // Try to find that, if the instruction 'MI' contains any operand that
   // could be forwarded from some inst that feeds it. If yes, return the
@@ -159,8 +154,8 @@
                                  int64_t &Imm) const;
   bool isRegElgibleForForwarding(const MachineOperand &RegMO,
                                  const MachineInstr &DefMI,
-                                 const MachineInstr &MI,
-                                 bool KillDefMI) const;
+                                 const MachineInstr &MI, bool KillDefMI,
+                                 bool &IsFwdFeederRegKilled) const;
   const unsigned *getStoreOpcodesForSpillArray() const;
   const unsigned *getLoadOpcodesForSpillArray() const;
   virtual void anchor();
@@ -362,6 +357,22 @@
                             unsigned SrcReg2, int Mask, int Value,
                             const MachineRegisterInfo *MRI) const override;
 
+
+  /// Return true if get the base operand, byte offset of an instruction and
+  /// the memory width. Width is the size of memory that is being
+  /// loaded/stored (e.g. 1, 2, 4, 8).
+  bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt,
+                                    const MachineOperand *&BaseOp,
+                                    int64_t &Offset, unsigned &Width,
+                                    const TargetRegisterInfo *TRI) const;
+
+  /// Return true if two MIs access different memory addresses and false
+  /// otherwise
+  bool
+  areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
+                                  const MachineInstr &MIb,
+                                  AliasAnalysis *AA = nullptr) const override;
+
   /// GetInstSize - Return the number of bytes of code the specified
   /// instruction may be.  This returns the maximum number of bytes.
   ///
@@ -412,6 +423,18 @@
 
   bool convertToImmediateForm(MachineInstr &MI,
                               MachineInstr **KilledDef = nullptr) const;
+
+  /// Fixup killed/dead flag for register \p RegNo between instructions [\p
+  /// StartMI, \p EndMI]. Some PostRA transformations may violate register
+  /// killed/dead flags semantics, this function can be called to fix up. Before
+  /// calling this function,
+  /// 1. Ensure that \p RegNo liveness is killed after instruction \p EndMI.
+  /// 2. Ensure that there is no new definition between (\p StartMI, \p EndMI)
+  ///    and possible definition for \p RegNo is \p StartMI or \p EndMI.
+  /// 3. Ensure that all instructions between [\p StartMI, \p EndMI] are in same
+  ///    basic block.
+  void fixupIsDeadOrKill(MachineInstr &StartMI, MachineInstr &EndMI,
+                         unsigned RegNo) const;
   void replaceInstrWithLI(MachineInstr &MI, const LoadImmediateInfo &LII) const;
   void replaceInstrOperandWithImm(MachineInstr &MI, unsigned OpNo,
                                   int64_t Imm) const;
@@ -429,14 +452,55 @@
   /// operands).
   static unsigned getRegNumForOperand(const MCInstrDesc &Desc, unsigned Reg,
                                       unsigned OpNo) {
-    if (Desc.TSFlags & PPCII::UseVSXReg) {
-      if (isVRRegister(Reg))
-        Reg = PPC::VSX32 + (Reg - PPC::V0);
-      else if (isVFRegister(Reg))
-        Reg = PPC::VSX32 + (Reg - PPC::VF0);
+    int16_t regClass = Desc.OpInfo[OpNo].RegClass;
+    switch (regClass) {
+      // We store F0-F31, VF0-VF31 in MCOperand and it should be F0-F31,
+      // VSX32-VSX63 during encoding/disassembling
+      case PPC::VSSRCRegClassID:
+      case PPC::VSFRCRegClassID:
+        if (isVFRegister(Reg))
+          return PPC::VSX32 + (Reg - PPC::VF0);
+        break;
+      // We store VSL0-VSL31, V0-V31 in MCOperand and it should be VSL0-VSL31,
+      // VSX32-VSX63 during encoding/disassembling
+      case PPC::VSRCRegClassID:
+        if (isVRRegister(Reg))
+          return PPC::VSX32 + (Reg - PPC::V0);
+        break;
+      // Other RegClass doesn't need mapping
+      default:
+        break;
     }
     return Reg;
   }
+
+  /// Check \p Opcode is BDNZ (Decrement CTR and branch if it is still nonzero).
+  bool isBDNZ(unsigned Opcode) const;
+
+  /// Find the hardware loop instruction used to set-up the specified loop.
+  /// On PPC, we have two instructions used to set-up the hardware loop
+  /// (MTCTRloop, MTCTR8loop) with corresponding endloop (BDNZ, BDNZ8)
+  /// instructions to indicate the end of a loop.
+  MachineInstr *findLoopInstr(MachineBasicBlock &PreHeader) const;
+
+  /// Analyze the loop code to find the loop induction variable and compare used
+  /// to compute the number of iterations. Currently, we analyze loop that are
+  /// controlled using hardware loops.  In this case, the induction variable
+  /// instruction is null.  For all other cases, this function returns true,
+  /// which means we're unable to analyze it. \p IndVarInst and \p CmpInst will
+  /// return new values when we can analyze the readonly loop \p L, otherwise,
+  /// nothing got changed
+  bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst,
+                   MachineInstr *&CmpInst) const override;
+  /// Generate code to reduce the loop iteration by one and check if the loop
+  /// is finished.  Return the value/register of the new loop count.  We need
+  /// this function when peeling off one or more iterations of a loop. This
+  /// function assumes the last iteration is peeled first.
+  unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineBasicBlock &PreHeader,
+                           MachineInstr *IndVar, MachineInstr &Cmp,
+                           SmallVectorImpl<MachineOperand> &Cond,
+                           SmallVectorImpl<MachineInstr *> &PrevInsts,
+                           unsigned Iter, unsigned MaxIter) const override;
 };
 
 }

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index dd3f1ac..c313337 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td

@@ -1,9 +1,8 @@
 //===-- PPCInstrInfo.td - The PowerPC Instruction Set ------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -231,6 +230,18 @@
                               SDTCisSameAs<1,2>]>,
                            []>;
 
+def PPCbuild_spe64: SDNode<"PPCISD::BUILD_SPE64",
+                           SDTypeProfile<1, 2,
+                             [SDTCisVT<0, f64>, SDTCisVT<1,i32>,
+                             SDTCisVT<1,i32>]>,
+                           []>;
+
+def PPCextract_spe : SDNode<"PPCISD::EXTRACT_SPE",
+                            SDTypeProfile<1, 2,
+                              [SDTCisVT<0, i32>, SDTCisVT<1, f64>,
+                              SDTCisPtrTy<2>]>,
+                              []>;
+
 // These are target-independent nodes, but have target-specific formats.
 def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeqStart,
                            [SDNPHasChain, SDNPOutGlue]>;
@@ -458,6 +469,17 @@
   return !isOffsetMultipleOf(N, 16);
 }]>;
 
+// PatFrag for binary operation whose operands are both non-constant
+class BinOpWithoutSImm16Operand<SDNode opcode> :
+  PatFrag<(ops node:$left, node:$right), (opcode node:$left, node:$right), [{
+    int16_t Imm;
+    return !isIntS16Immediate(N->getOperand(0), Imm)
+             && !isIntS16Immediate(N->getOperand(1), Imm);
+}]>;
+
+def add_without_simm16 : BinOpWithoutSImm16Operand<add>;
+def mul_without_simm16 : BinOpWithoutSImm16Operand<mul>;
+
 //===----------------------------------------------------------------------===//
 // PowerPC Flag Definitions.
 
@@ -546,10 +568,6 @@
 def crrc : RegisterOperand<CRRC> {
   let ParserMatchClass = PPCRegCRRCAsmOperand;
 }
-def crrc0 : RegisterOperand<CRRC0> {
-  let ParserMatchClass = PPCRegCRRCAsmOperand;
-}
-
 def PPCRegSPERCAsmOperand : AsmOperandClass {
   let Name = "RegSPERC"; let PredicateMethod = "isRegNumber";
 }
@@ -737,7 +755,9 @@
 def calltarget : Operand<iPTR> {
   let PrintMethod = "printBranchOperand";
   let EncoderMethod = "getDirectBrEncoding";
+  let DecoderMethod = "DecodePCRel24BranchTarget";
   let ParserMatchClass = PPCDirectBrAsmOperand;
+  let OperandType = "OPERAND_PCREL";
 }
 def abscalltarget : Operand<iPTR> {
   let PrintMethod = "printAbsBranchOperand";
@@ -881,11 +901,24 @@
 }
 
 // Define PowerPC specific addressing mode.
-def iaddr  : ComplexPattern<iPTR, 2, "SelectAddrImm",    [], []>;
-def xaddr  : ComplexPattern<iPTR, 2, "SelectAddrIdx",    [], []>;
+
+// d-form
+def iaddr    : ComplexPattern<iPTR, 2, "SelectAddrImm",     [], []>;  // "stb"
+// ds-form
+def iaddrX4  : ComplexPattern<iPTR, 2, "SelectAddrImmX4",   [], []>;  // "std"
+// dq-form
+def iaddrX16 : ComplexPattern<iPTR, 2, "SelectAddrImmX16",  [], []>;  // "stxv"
+
+// Below forms are all x-form addressing mode, use three different ones so we
+// can make a accurate check for x-form instructions in ISEL.
+// x-form addressing mode whose associated diplacement form is D.
+def xaddr  : ComplexPattern<iPTR, 2, "SelectAddrIdx",     [], []>;    // "stbx"
+// x-form addressing mode whose associated diplacement form is DS.
+def xaddrX4 : ComplexPattern<iPTR, 2, "SelectAddrIdxX4",    [], []>;  // "stdx"
+// x-form addressing mode whose associated diplacement form is DQ.
+def xaddrX16 : ComplexPattern<iPTR, 2, "SelectAddrIdxX16",   [], []>; // "stxvx"
+
 def xoaddr : ComplexPattern<iPTR, 2, "SelectAddrIdxOnly",[], []>;
-def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmX4",  [], []>;  // "std"
-def iqaddr : ComplexPattern<iPTR, 2, "SelectAddrImmX16",  [], []>; // "stxv"
 
 // The address in a single register. This is used with the SjLj
 // pseudo-instructions.
@@ -1309,6 +1342,15 @@
   }
 }
 
+// Set the float rounding mode.
+let Uses = [RM], Defs = [RM] in { 
+def SETRNDi : PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins u2imm:$RND),
+                    "#SETRNDi", [(set f64:$FRT, (int_ppc_setrnd (i32 imm:$RND)))]>;
+
+def SETRND : PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins gprc:$in),
+                    "#SETRND", [(set f64:$FRT, (int_ppc_setrnd gprc :$in))]>;
+}
+
 let Defs = [LR] in
   def MovePCtoLR : PPCEmitTimePseudo<(outs), (ins), "#MovePCtoLR", []>,
                    PPC970_Unit_BRU;
@@ -1435,6 +1477,9 @@
       def BCLn : BForm_4<16, 4, 0, 1, (outs),
                          (ins crbitrc:$bi, condbrtarget:$dst),
                          "bcl 4, $bi, $dst">;
+      def BL_NOP  : IForm_and_DForm_4_zero<18, 0, 1, 24,
+                                           (outs), (ins calltarget:$func),
+                                           "bl $func\n\tnop", IIC_BrB, []>;
     }
   }
   let Uses = [CTR, RM] in {
@@ -2512,6 +2557,7 @@
                       [(set i1:$CRD, (or i1:$CRA, (not i1:$CRB)))]>;
 
 let isCodeGenOnly = 1 in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
 def CRSET  : XLForm_1_ext<19, 289, (outs crbitrc:$dst), (ins),
               "creqv $dst, $dst, $dst", IIC_BrCR,
               [(set i1:$dst, 1)]>;
@@ -2519,6 +2565,7 @@
 def CRUNSET: XLForm_1_ext<19, 193, (outs crbitrc:$dst), (ins),
               "crxor $dst, $dst, $dst", IIC_BrCR,
               [(set i1:$dst, 0)]>;
+}
 
 let Defs = [CR1EQ], CRD = 6 in {
 def CR6SET  : XLForm_1_ext<19, 289, (outs), (ins),
@@ -2566,7 +2613,7 @@
             PPC970_DGroup_First, PPC970_Unit_FXU;
 }
 let hasSideEffects = 1, isCodeGenOnly = 1, Defs = [CTR] in {
-let Pattern = [(int_ppc_mtctr i32:$rS)] in
+let Pattern = [(int_set_loop_iterations i32:$rS)] in
 def MTCTRloop : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS),
                               "mtctr $rS", IIC_SprMTSPR>,
                 PPC970_DGroup_First, PPC970_Unit_FXU;
@@ -2993,9 +3040,16 @@
 // Calls
 def : Pat<(PPCcall (i32 tglobaladdr:$dst)),
           (BL tglobaladdr:$dst)>;
+
 def : Pat<(PPCcall (i32 texternalsym:$dst)),
           (BL texternalsym:$dst)>;
 
+// Calls for AIX only
+def : Pat<(PPCcall (i32 mcsym:$dst)),
+          (BL mcsym:$dst)>;
+def : Pat<(PPCcall_nop (i32 mcsym:$dst)),
+          (BL_NOP mcsym:$dst)>;
+
 def : Pat<(PPCtc_return (i32 tglobaladdr:$dst),  imm:$imm),
           (TCRETURNdi tglobaladdr:$dst, imm:$imm)>;
 
@@ -4071,6 +4125,10 @@
 
 def SLBIA : XForm_0<31, 498, (outs), (ins), "slbia", IIC_SprSLBIA, []>;
 
+let Defs = [CR0] in
+def SLBFEEo : XForm_26<31, 979, (outs gprc:$RT), (ins gprc:$RB),
+                         "slbfee. $RT, $RB", IIC_SprSLBFEE, []>, isDOT;
+
 def TLBIA : XForm_0<31, 370, (outs), (ins),
                         "tlbia", IIC_SprTLBIA, []>;
 

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstrQPX.td b/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstrQPX.td
index ef589ad..d67041d 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstrQPX.td
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstrQPX.td

@@ -1,9 +1,8 @@
 //===- PPCInstrQPX.td - The PowerPC QPX Extension --*- tablegen -*-===//
 // 
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 // 
 //===----------------------------------------------------------------------===//
 //

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstrSPE.td b/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstrSPE.td
index 9f5891a..935c304 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstrSPE.td
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstrSPE.td

@@ -1,9 +1,8 @@
 //=======-- PPCInstrSPE.td - The PowerPC SPE Extension -*- tablegen -*-=======//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -512,7 +511,7 @@
 
 def EVMERGEHI      : EVXForm_1<556, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB),
                                "evmergehi $RT, $RA, $RB", IIC_VecGeneral, []>;
-def EVMERGELO      : EVXForm_1<557, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB),
+def EVMERGELO      : EVXForm_1<557, (outs sperc:$RT), (ins gprc:$RA, gprc:$RB),
                                "evmergelo $RT, $RA, $RB", IIC_VecGeneral, []>;
 def EVMERGEHILO    : EVXForm_1<558, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB),
                                "evmergehilo $RT, $RA, $RB", IIC_VecGeneral, []>;
@@ -887,4 +886,14 @@
           (SELECT_SPE (CRANDC $lhs, $rhs), $tval, $fval)>;
 def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)),
           (SELECT_SPE (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+
+def : Pat<(f64 (PPCbuild_spe64 i32:$rB, i32:$rA)),
+          (f64 (COPY_TO_REGCLASS (EVMERGELO $rA, $rB), SPERC))>;
+
+def : Pat<(i32 (PPCextract_spe f64:$rA, 1)),
+          (i32 (EXTRACT_SUBREG (EVMERGEHI $rA, $rA), sub_32))>;
+def : Pat<(i32 (PPCextract_spe f64:$rA, 0)),
+          (i32 (EXTRACT_SUBREG $rA, sub_32))>;
+
 }

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 0f07338..07f38a6 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCInstrVSX.td

@@ -1,9 +1,8 @@
 //===- PPCInstrVSX.td - The PowerPC VSX Extension --*- tablegen -*-===//
 // 
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 // 
 //===----------------------------------------------------------------------===//
 //
@@ -54,6 +53,15 @@
 def spilltovsrrc : RegisterOperand<SPILLTOVSRRC> {
   let ParserMatchClass = PPCRegSPILLTOVSRRCAsmOperand;
 }
+
+def SDT_PPCldvsxlh : SDTypeProfile<1, 1, [
+  SDTCisVT<0, v4f32>, SDTCisPtrTy<1>
+]>;
+
+def SDT_PPCfpextlh : SDTypeProfile<1, 1, [
+  SDTCisVT<0, v2f64>, SDTCisVT<1, v4f32>
+]>;
+
 // Little-endian-specific nodes.
 def SDT_PPClxvd2x : SDTypeProfile<1, 1, [
   SDTCisVT<0, v2f64>, SDTCisPtrTy<1>
@@ -85,6 +93,10 @@
 def PPCswapNoChain : SDNode<"PPCISD::SWAP_NO_CHAIN", SDT_PPCxxswapd>;
 def PPCvabsd : SDNode<"PPCISD::VABSD", SDTVabsd, []>;
 
+def PPCfpextlh : SDNode<"PPCISD::FP_EXTEND_LH", SDT_PPCfpextlh, []>;
+def PPCldvsxlh : SDNode<"PPCISD::LD_VSX_LH", SDT_PPCldvsxlh,
+                        [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+
 multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase,
                     string asmstr, InstrItinClass itin, Intrinsic Int,
                     ValueType OutTy, ValueType InTy> {
@@ -124,7 +136,6 @@
 
 let Predicates = [HasVSX] in {
 let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
-let UseVSXReg = 1 in {
 let hasSideEffects = 0 in { // VSX instructions don't have side effects.
 let Uses = [RM] in {
 
@@ -841,12 +852,12 @@
                        "xxlxor $XT, $XA, $XB", IIC_VecGeneral,
                        [(set v4i32:$XT, (xor v4i32:$XA, v4i32:$XB))]>;
   } // isCommutable
-  let isCodeGenOnly = 1 in
-  def XXLXORz : XX3Form_Zero<60, 154, (outs vsrc:$XT), (ins),
+
+  let isCodeGenOnly = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
+      isReMaterializable = 1 in {
+    def XXLXORz : XX3Form_Zero<60, 154, (outs vsrc:$XT), (ins),
                        "xxlxor $XT, $XT, $XT", IIC_VecGeneral,
                        [(set v4i32:$XT, (v4i32 immAllZerosV))]>;
-
-  let isCodeGenOnly = 1 in {
     def XXLXORdpz : XX3Form_SetZero<60, 154,
                          (outs vsfrc:$XT), (ins),
                          "xxlxor $XT, $XT, $XT", IIC_VecGeneral,
@@ -895,11 +906,10 @@
                              (PPCxxsplt v4i32:$XB, imm32SExt16:$UIM))]>;
   let isCodeGenOnly = 1 in
   def XXSPLTWs : XX2Form_2<60, 164,
-                       (outs vsrc:$XT), (ins vfrc:$XB, u2imm:$UIM),
+                       (outs vsrc:$XT), (ins vsfrc:$XB, u2imm:$UIM),
                        "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>;
 
 } // hasSideEffects
-} // UseVSXReg = 1
 
 // SELECT_CC_* - Used to implement the SELECT_CC DAG operation.  Expanded after
 // instruction selection into a branch sequence.
@@ -961,6 +971,10 @@
 
 def : Pat<(v4i32 (vnot_ppc v4i32:$A)),
           (v4i32 (XXLNOR $A, $A))>;
+def : Pat<(v4i32 (or (and (vnot_ppc v4i32:$C), v4i32:$A),
+                     (and v4i32:$B, v4i32:$C))),
+          (v4i32 (XXSEL $A, $B, $C))>;
+
 let Predicates = [IsBigEndian] in {
 def : Pat<(v2f64 (scalar_to_vector f64:$A)),
           (v2f64 (SUBREG_TO_REG (i64 1), $A, sub_64))>;
@@ -1063,6 +1077,8 @@
 def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 1)),
           (v2f64 (XVCVUXWDP (v2i64 (XXMRGLW $C, $C))))>;
 
+def : Pat<(v2f64 (PPCfpextlh v4f32:$C)), (XVCVSPDP (XXMRGHW $C, $C))>;
+
 // Loads.
 let Predicates = [HasVSX, HasOnlySwappingMemOps] in {
   def : Pat<(v2f64 (PPClxvd2x xoaddr:$src)), (LXVD2X xoaddr:$src)>;
@@ -1176,6 +1192,15 @@
 def : Pat<(vselect v2i64:$vA, v2f64:$vB, v2f64:$vC),
           (XXSEL $vC, $vB, $vA)>;
 
+def : Pat<(v4f32 (fmaxnum v4f32:$src1, v4f32:$src2)),
+          (v4f32 (XVMAXSP $src1, $src2))>;
+def : Pat<(v4f32 (fminnum v4f32:$src1, v4f32:$src2)),
+          (v4f32 (XVMINSP $src1, $src2))>;
+def : Pat<(v2f64 (fmaxnum v2f64:$src1, v2f64:$src2)),
+          (v2f64 (XVMAXDP $src1, $src2))>;
+def : Pat<(v2f64 (fminnum v2f64:$src1, v2f64:$src2)),
+          (v2f64 (XVMINDP $src1, $src2))>;
+
 let Predicates = [IsLittleEndian] in {
 def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
           (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
@@ -1248,7 +1273,7 @@
 def NoP9Vector : Predicate<"!PPCSubTarget->hasP9Vector()">;
 let Predicates = [HasP8Vector] in {
 let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
-  let isCommutable = 1, UseVSXReg = 1 in {
+  let isCommutable = 1 in {
     def XXLEQV : XX3Form<60, 186,
                          (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
                          "xxleqv $XT, $XA, $XB", IIC_VecGeneral,
@@ -1258,12 +1283,11 @@
                           "xxlnand $XT, $XA, $XB", IIC_VecGeneral,
                           [(set v4i32:$XT, (vnot_ppc (and v4i32:$XA,
                                                     v4i32:$XB)))]>;
-  } // isCommutable, UseVSXReg
+  } // isCommutable
 
   def : Pat<(int_ppc_vsx_xxleqv v4i32:$A, v4i32:$B),
             (XXLEQV $A, $B)>;
 
-  let UseVSXReg = 1 in {
   def XXLORC : XX3Form<60, 170,
                        (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
                        "xxlorc $XT, $XA, $XB", IIC_VecGeneral,
@@ -1312,7 +1336,6 @@
                        "#STIWX",
                       [(PPCstfiwx f64:$XT, xoaddr:$dst)]>;
   } // mayStore
-  } // UseVSXReg = 1
 
   def : Pat<(f64 (extloadf32 xoaddr:$src)),
             (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$src), VSFRC)>;
@@ -1342,7 +1365,6 @@
   def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)),
             (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
 
-  let UseVSXReg = 1 in {
   // VSX Elementary Scalar FP arithmetic (SP)
   let isCommutable = 1 in {
     def XSADDSP : XX3Form<60, 0,
@@ -1354,7 +1376,10 @@
                           "xsmulsp $XT, $XA, $XB", IIC_VecFP,
                           [(set f32:$XT, (fmul f32:$XA, f32:$XB))]>;
   } // isCommutable
-
+  def XSSUBSP : XX3Form<60, 8,
+                        (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
+                        "xssubsp $XT, $XA, $XB", IIC_VecFP,
+                        [(set f32:$XT, (fsub f32:$XA, f32:$XB))]>;
   def XSDIVSP : XX3Form<60, 24,
                         (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
                         "xsdivsp $XT, $XA, $XB", IIC_FPDivS,
@@ -1374,10 +1399,6 @@
                            (outs vssrc:$XT), (ins vssrc:$XB),
                            "xsrsqrtesp $XT, $XB", IIC_VecFP,
                            [(set f32:$XT, (PPCfrsqrte f32:$XB))]>;
-  def XSSUBSP : XX3Form<60, 8,
-                        (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
-                        "xssubsp $XT, $XA, $XB", IIC_VecFP,
-                        [(set f32:$XT, (fsub f32:$XA, f32:$XB))]>;
 
   // FMA Instructions
   let BaseName = "XSMADDASP" in {
@@ -1470,7 +1491,6 @@
                           "xscvdpspn $XT, $XB", IIC_VecFP, []>;
   def XSCVSPDPN : XX2Form<60, 331, (outs vssrc:$XT), (ins vsrc:$XB),
                           "xscvspdpn $XT, $XB", IIC_VecFP, []>;
-  } // UseVSXReg = 1
 
   let Predicates = [IsLittleEndian] in {
   def : Pat<DWToSPExtractConv.El0SS1,
@@ -1514,10 +1534,22 @@
               (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 4),
             (STIWX (XSCVDPUXWS f64:$src), xoaddr:$dst)>;
 
+  def : Pat<(v2i64 (smax v2i64:$src1, v2i64:$src2)),
+            (v2i64 (VMAXSD (COPY_TO_REGCLASS $src1, VRRC),
+                           (COPY_TO_REGCLASS $src2, VRRC)))>;
+  def : Pat<(v2i64 (umax v2i64:$src1, v2i64:$src2)),
+            (v2i64 (VMAXUD (COPY_TO_REGCLASS $src1, VRRC),
+                           (COPY_TO_REGCLASS $src2, VRRC)))>;
+  def : Pat<(v2i64 (smin v2i64:$src1, v2i64:$src2)),
+            (v2i64 (VMINSD (COPY_TO_REGCLASS $src1, VRRC),
+                           (COPY_TO_REGCLASS $src2, VRRC)))>;
+  def : Pat<(v2i64 (umin v2i64:$src1, v2i64:$src2)),
+            (v2i64 (VMINUD (COPY_TO_REGCLASS $src1, VRRC),
+                           (COPY_TO_REGCLASS $src2, VRRC)))>;
 } // AddedComplexity = 400
 } // HasP8Vector
 
-let UseVSXReg = 1, AddedComplexity = 400 in {
+let AddedComplexity = 400 in {
 let Predicates = [HasDirectMove] in {
   // VSX direct move instructions
   def MFVSRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsfrc:$XT),
@@ -1525,7 +1557,7 @@
                               [(set i64:$rA, (PPCmfvsr f64:$XT))]>,
       Requires<[In64BitMode]>;
   let isCodeGenOnly = 1 in
-  def MFVRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vrrc:$XT),
+  def MFVRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsrc:$XT),
                              "mfvsrd $rA, $XT", IIC_VecGeneral,
                              []>,
       Requires<[In64BitMode]>;
@@ -1557,7 +1589,7 @@
                               []>, Requires<[In64BitMode]>;
 
 } // IsISA3_0, HasDirectMove
-} // UseVSXReg = 1
+} // AddedComplexity = 400
 
 // We want to parse this from asm, but we don't want to emit this as it would
 // be emitted with a VSX reg. So leave Emit = 0 here.
@@ -2415,7 +2447,6 @@
                          list<dag> pattern>
     : X_VT5_XO5_VB5_VSFR<opcode, xo2, xo, opc, pattern>, isDOT;
 
-  let UseVSXReg = 1 in {
   // [PO T XO B XO BX /]
   class XX2_RT5_XO5_XB6<bits<6> opcode, bits<5> xo2, bits<9> xo, string opc,
                         list<dag> pattern>
@@ -2434,7 +2465,6 @@
                   InstrItinClass itin, list<dag> pattern>
     : XX3Form<opcode, xo, (outs xty:$XT), (ins aty:$XA, bty:$XB),
               !strconcat(opc, " $XT, $XA, $XB"), itin, pattern>;
-  } // UseVSXReg = 1
 
   // [PO VRT VRA VRB XO /]
   class X_VT5_VA5_VB5<bits<6> opcode, bits<10> xo, string opc,
@@ -2482,69 +2512,70 @@
   let isCommutable = 1 in {
   def XSADDQP   : X_VT5_VA5_VB5   <63,   4, "xsaddqp",
                                    [(set f128:$vT, (fadd f128:$vA, f128:$vB))]>;
-  def XSADDQPO : X_VT5_VA5_VB5_Ro<63, 4, "xsaddqpo",
-                                  [(set f128:$vT,
-                                  (int_ppc_addf128_round_to_odd
-                                  f128:$vA, f128:$vB))]>;
   def XSMULQP   : X_VT5_VA5_VB5   <63,  36, "xsmulqp",
                                    [(set f128:$vT, (fmul f128:$vA, f128:$vB))]>;
-  def XSMULQPO : X_VT5_VA5_VB5_Ro<63, 36, "xsmulqpo",
-                                  [(set f128:$vT,
-                                  (int_ppc_mulf128_round_to_odd
-                                  f128:$vA, f128:$vB))]>;
   }
-
   def XSSUBQP   : X_VT5_VA5_VB5   <63, 516, "xssubqp" ,
                                    [(set f128:$vT, (fsub f128:$vA, f128:$vB))]>;
-  def XSSUBQPO : X_VT5_VA5_VB5_Ro<63, 516, "xssubqpo",
-                                  [(set f128:$vT,
-                                  (int_ppc_subf128_round_to_odd
-                                  f128:$vA, f128:$vB))]>;
   def XSDIVQP   : X_VT5_VA5_VB5   <63, 548, "xsdivqp",
                                    [(set f128:$vT, (fdiv f128:$vA, f128:$vB))]>;
-  def XSDIVQPO : X_VT5_VA5_VB5_Ro<63, 548, "xsdivqpo",
-                                  [(set f128:$vT,
-                                  (int_ppc_divf128_round_to_odd
-                                  f128:$vA, f128:$vB))]>;
-
   // Square-Root
   def XSSQRTQP  : X_VT5_XO5_VB5   <63, 27, 804, "xssqrtqp",
                                    [(set f128:$vT, (fsqrt f128:$vB))]>;
-  def XSSQRTQPO : X_VT5_XO5_VB5_Ro<63, 27, 804, "xssqrtqpo",
-                                  [(set f128:$vT,
-                                  (int_ppc_sqrtf128_round_to_odd f128:$vB))]>;
-
   // (Negative) Multiply-{Add/Subtract}
   def XSMADDQP : X_VT5_VA5_VB5_FMA <63, 388, "xsmaddqp",
                                     [(set f128:$vT,
                                           (fma f128:$vA, f128:$vB,
                                                f128:$vTi))]>;
+  def XSMSUBQP  : X_VT5_VA5_VB5_FMA   <63, 420, "xsmsubqp"  ,
+                                       [(set f128:$vT,
+                                             (fma f128:$vA, f128:$vB,
+                                                  (fneg f128:$vTi)))]>;
+  def XSNMADDQP : X_VT5_VA5_VB5_FMA <63, 452, "xsnmaddqp",
+                                     [(set f128:$vT,
+                                           (fneg (fma f128:$vA, f128:$vB,
+                                                      f128:$vTi)))]>;
+  def XSNMSUBQP : X_VT5_VA5_VB5_FMA <63, 484, "xsnmsubqp",
+                                     [(set f128:$vT,
+                                           (fneg (fma f128:$vA, f128:$vB,
+                                                      (fneg f128:$vTi))))]>;
+
+  let isCommutable = 1 in {
+  def XSADDQPO : X_VT5_VA5_VB5_Ro<63, 4, "xsaddqpo",
+                                  [(set f128:$vT,
+                                  (int_ppc_addf128_round_to_odd
+                                  f128:$vA, f128:$vB))]>;
+  def XSMULQPO : X_VT5_VA5_VB5_Ro<63, 36, "xsmulqpo",
+                                  [(set f128:$vT,
+                                  (int_ppc_mulf128_round_to_odd
+                                  f128:$vA, f128:$vB))]>;
+  }
+  def XSSUBQPO : X_VT5_VA5_VB5_Ro<63, 516, "xssubqpo",
+                                  [(set f128:$vT,
+                                  (int_ppc_subf128_round_to_odd
+                                  f128:$vA, f128:$vB))]>;
+  def XSDIVQPO : X_VT5_VA5_VB5_Ro<63, 548, "xsdivqpo",
+                                  [(set f128:$vT,
+                                  (int_ppc_divf128_round_to_odd
+                                  f128:$vA, f128:$vB))]>;
+  def XSSQRTQPO : X_VT5_XO5_VB5_Ro<63, 27, 804, "xssqrtqpo",
+                                  [(set f128:$vT,
+                                  (int_ppc_sqrtf128_round_to_odd f128:$vB))]>;
+
 
   def XSMADDQPO : X_VT5_VA5_VB5_FMA_Ro<63, 388, "xsmaddqpo",
                                       [(set f128:$vT,
                                       (int_ppc_fmaf128_round_to_odd
                                       f128:$vA,f128:$vB,f128:$vTi))]>;
 
-  def XSMSUBQP  : X_VT5_VA5_VB5_FMA   <63, 420, "xsmsubqp"  ,
-                                       [(set f128:$vT,
-                                             (fma f128:$vA, f128:$vB,
-                                                  (fneg f128:$vTi)))]>;
   def XSMSUBQPO : X_VT5_VA5_VB5_FMA_Ro<63, 420, "xsmsubqpo" ,
                                       [(set f128:$vT,
                                       (int_ppc_fmaf128_round_to_odd
                                       f128:$vA, f128:$vB, (fneg f128:$vTi)))]>;
-  def XSNMADDQP : X_VT5_VA5_VB5_FMA <63, 452, "xsnmaddqp",
-                                     [(set f128:$vT,
-                                           (fneg (fma f128:$vA, f128:$vB,
-                                                      f128:$vTi)))]>;
   def XSNMADDQPO: X_VT5_VA5_VB5_FMA_Ro<63, 452, "xsnmaddqpo",
                                       [(set f128:$vT,
                                       (fneg (int_ppc_fmaf128_round_to_odd
                                       f128:$vA, f128:$vB, f128:$vTi)))]>;
-  def XSNMSUBQP : X_VT5_VA5_VB5_FMA <63, 484, "xsnmsubqp",
-                                     [(set f128:$vT,
-                                           (fneg (fma f128:$vA, f128:$vB,
-                                                      (fneg f128:$vTi))))]>;
   def XSNMSUBQPO: X_VT5_VA5_VB5_FMA_Ro<63, 484, "xsnmsubqpo",
                                       [(set f128:$vT,
                                       (fneg (int_ppc_fmaf128_round_to_odd
@@ -2572,8 +2603,7 @@
   // DP/QP Compare Exponents
   def XSCMPEXPDP : XX3Form_1<60, 59,
                              (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
-                             "xscmpexpdp $crD, $XA, $XB", IIC_FPCompare, []>,
-                   UseVSXReg;
+                             "xscmpexpdp $crD, $XA, $XB", IIC_FPCompare, []>;
   def XSCMPEXPQP : X_BF3_VA5_VB5<63, 164, "xscmpexpqp", []>;
 
   // DP Compare ==, >=, >, !=
@@ -2631,7 +2661,6 @@
   def : Pat<(f128 (uint_to_fp (i32 (load xoaddr:$src)))),
             (f128 (XSCVUDQP (LIWZX xoaddr:$src)))>;
 
-  let UseVSXReg = 1 in {
   //===--------------------------------------------------------------------===//
   // Round to Floating-Point Integer Instructions
 
@@ -2648,8 +2677,6 @@
                                  [(set v4f32:$XT,
                                      (int_ppc_vsx_xvcvsphp v4f32:$XB))]>;
 
-  } // UseVSXReg = 1
-
   // Pattern for matching Vector HP -> Vector SP intrinsic. Defined as a
   // separate pattern so that it can convert the input register class from
   // VRRC(v8i16) to VSRC.
@@ -2691,7 +2718,7 @@
   // Insert Exponent DP/QP
   // XT NOTE: XT.dword[1] = 0xUUUU_UUUU_UUUU_UUUU
   def XSIEXPDP : XX1Form <60, 918, (outs vsrc:$XT), (ins g8rc:$rA, g8rc:$rB),
-                          "xsiexpdp $XT, $rA, $rB", IIC_VecFP, []>, UseVSXReg;
+                          "xsiexpdp $XT, $rA, $rB", IIC_VecFP, []>;
   // vB NOTE: only vB.dword[0] is used, that's why we don't use
   //          X_VT5_VA5_VB5 form
   def XSIEXPQP : XForm_18<63, 868, (outs vrrc:$vT), (ins vrrc:$vA, vsfrc:$vB),
@@ -2712,7 +2739,6 @@
                            (v2i64 (XSXEXPQP $vA)), sub_64)))>;
 
   // Vector Insert Word
-  let UseVSXReg = 1 in {
   // XB NOTE: Only XB.dword[1] is used, but we use vsrc on XB.
   def XXINSERTW   :
     XX2_RD6_UIM5_RS6<60, 181, (outs vsrc:$XT),
@@ -2726,7 +2752,6 @@
   def XXEXTRACTUW : XX2_RD6_UIM5_RS6<60, 165,
                                   (outs vsfrc:$XT), (ins vsrc:$XB, u4imm:$UIMM),
                                   "xxextractuw $XT, $XB, $UIMM", IIC_VecFP, []>;
-  } // UseVSXReg = 1
 
   // Vector Insert Exponent DP/SP
   def XVIEXPDP : XX3_XT5_XA5_XB5<60, 248, "xviexpdp", vsrc, vsrc, vsrc,
@@ -2759,20 +2784,17 @@
   //===--------------------------------------------------------------------===//
 
   // Test Data Class SP/DP/QP
-  let UseVSXReg = 1 in {
   def XSTSTDCSP : XX2_BF3_DCMX7_RS6<60, 298,
                               (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB),
                               "xststdcsp $BF, $XB, $DCMX", IIC_VecFP, []>;
   def XSTSTDCDP : XX2_BF3_DCMX7_RS6<60, 362,
                               (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB),
                               "xststdcdp $BF, $XB, $DCMX", IIC_VecFP, []>;
-  } // UseVSXReg = 1
   def XSTSTDCQP : X_BF3_DCMX7_RS5  <63, 708,
                               (outs crrc:$BF), (ins u7imm:$DCMX, vrrc:$vB),
                               "xststdcqp $BF, $vB, $DCMX", IIC_VecFP, []>;
 
   // Vector Test Data Class SP/DP
-  let UseVSXReg = 1 in {
   def XVTSTDCSP : XX2_RD6_DCMX7_RS6<60, 13, 5,
                               (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB),
                               "xvtstdcsp $XT, $XB, $DCMX", IIC_VecFP,
@@ -2783,7 +2805,6 @@
                               "xvtstdcdp $XT, $XB, $DCMX", IIC_VecFP,
                               [(set v2i64: $XT,
                                (int_ppc_vsx_xvtstdcdp v2f64:$XB, imm:$DCMX))]>;
-  } // UseVSXReg = 1
 
   //===--------------------------------------------------------------------===//
 
@@ -2824,7 +2845,7 @@
 
   // Vector Splat Immediate Byte
   def XXSPLTIB : X_RD6_IMM8<60, 360, (outs vsrc:$XT), (ins u8imm:$IMM8),
-                            "xxspltib $XT, $IMM8", IIC_VecPerm, []>, UseVSXReg;
+                            "xxspltib $XT, $IMM8", IIC_VecPerm, []>;
 
   //===--------------------------------------------------------------------===//
   // Vector/Scalar Load/Store Instructions
@@ -2834,7 +2855,7 @@
   let mayLoad = 1, mayStore = 0 in {
   // Load Vector
   def LXV : DQ_RD6_RS5_DQ12<61, 1, (outs vsrc:$XT), (ins memrix16:$src),
-                            "lxv $XT, $src", IIC_LdStLFD, []>, UseVSXReg;
+                            "lxv $XT, $src", IIC_LdStLFD, []>;
   // Load DWord
   def LXSD  : DSForm_1<57, 2, (outs vfrc:$vD), (ins memrix:$src),
                        "lxsd $vD, $src", IIC_LdStLFD, []>;
@@ -2847,7 +2868,7 @@
   class X_XT6_RA5_RB5<bits<6> opcode, bits<10> xo, string opc,
                       RegisterOperand vtype, list<dag> pattern>
     : XX1Form_memOp<opcode, xo, (outs vtype:$XT), (ins memrr:$src),
-              !strconcat(opc, " $XT, $src"), IIC_LdStLFD, pattern>, UseVSXReg;
+              !strconcat(opc, " $XT, $src"), IIC_LdStLFD, pattern>;
 
   // Load as Integer Byte/Halfword & Zero Indexed
   def LXSIBZX : X_XT6_RA5_RB5<31, 781, "lxsibzx", vsfrc,
@@ -2861,16 +2882,14 @@
 
   // Load Vector Indexed
   def LXVX    : X_XT6_RA5_RB5<31, 268, "lxvx"   , vsrc,
-                [(set v2f64:$XT, (load xaddr:$src))]>;
+                [(set v2f64:$XT, (load xaddrX16:$src))]>;
   // Load Vector (Left-justified) with Length
   def LXVL : XX1Form_memOp<31, 269, (outs vsrc:$XT), (ins memr:$src, g8rc:$rB),
                    "lxvl $XT, $src, $rB", IIC_LdStLoad,
-                   [(set v4i32:$XT, (int_ppc_vsx_lxvl addr:$src, i64:$rB))]>,
-                    UseVSXReg;
+                   [(set v4i32:$XT, (int_ppc_vsx_lxvl addr:$src, i64:$rB))]>;
   def LXVLL : XX1Form_memOp<31,301, (outs vsrc:$XT), (ins memr:$src, g8rc:$rB),
                    "lxvll $XT, $src, $rB", IIC_LdStLoad,
-                   [(set v4i32:$XT, (int_ppc_vsx_lxvll addr:$src, i64:$rB))]>,
-                    UseVSXReg;
+                   [(set v4i32:$XT, (int_ppc_vsx_lxvll addr:$src, i64:$rB))]>;
 
   // Load Vector Word & Splat Indexed
   def LXVWSX  : X_XT6_RA5_RB5<31, 364, "lxvwsx" , vsrc, []>;
@@ -2881,7 +2900,7 @@
   let mayStore = 1, mayLoad = 0 in {
   // Store Vector
   def STXV : DQ_RD6_RS5_DQ12<61, 5, (outs), (ins vsrc:$XT, memrix16:$dst),
-                             "stxv $XT, $dst", IIC_LdStSTFD, []>, UseVSXReg;
+                             "stxv $XT, $dst", IIC_LdStSTFD, []>;
   // Store DWord
   def STXSD  : DSForm_1<61, 2, (outs), (ins vfrc:$vS, memrix:$dst),
                         "stxsd $vS, $dst", IIC_LdStSTFD, []>;
@@ -2893,7 +2912,7 @@
   class X_XS6_RA5_RB5<bits<6> opcode, bits<10> xo, string opc,
                       RegisterOperand vtype, list<dag> pattern>
     : XX1Form_memOp<opcode, xo, (outs), (ins vtype:$XT, memrr:$dst),
-              !strconcat(opc, " $XT, $dst"), IIC_LdStSTFD, pattern>, UseVSXReg;
+              !strconcat(opc, " $XT, $dst"), IIC_LdStSTFD, pattern>;
 
   // Store as Integer Byte/Halfword Indexed
   def STXSIBX  : X_XS6_RA5_RB5<31,  909, "stxsibx" , vsfrc,
@@ -2901,8 +2920,8 @@
   def STXSIHX  : X_XS6_RA5_RB5<31,  941, "stxsihx" , vsfrc,
                                [(PPCstxsix f64:$XT, xoaddr:$dst, 2)]>;
   let isCodeGenOnly = 1 in {
-    def STXSIBXv  : X_XS6_RA5_RB5<31,  909, "stxsibx" , vrrc, []>;
-    def STXSIHXv  : X_XS6_RA5_RB5<31,  941, "stxsihx" , vrrc, []>;
+    def STXSIBXv  : X_XS6_RA5_RB5<31,  909, "stxsibx" , vsrc, []>;
+    def STXSIHXv  : X_XS6_RA5_RB5<31,  941, "stxsihx" , vsrc, []>;
   }
 
   // Store Vector Halfword*8/Byte*16 Indexed
@@ -2911,21 +2930,19 @@
 
   // Store Vector Indexed
   def STXVX    : X_XS6_RA5_RB5<31,  396, "stxvx"   , vsrc,
-                 [(store v2f64:$XT, xaddr:$dst)]>;
+                 [(store v2f64:$XT, xaddrX16:$dst)]>;
 
   // Store Vector (Left-justified) with Length
   def STXVL : XX1Form_memOp<31, 397, (outs),
                             (ins vsrc:$XT, memr:$dst, g8rc:$rB),
                             "stxvl $XT, $dst, $rB", IIC_LdStLoad,
                             [(int_ppc_vsx_stxvl v4i32:$XT, addr:$dst,
-                              i64:$rB)]>,
-                            UseVSXReg;
+                              i64:$rB)]>;
   def STXVLL : XX1Form_memOp<31, 429, (outs),
                             (ins vsrc:$XT, memr:$dst, g8rc:$rB),
                             "stxvll $XT, $dst, $rB", IIC_LdStLoad,
                             [(int_ppc_vsx_stxvll v4i32:$XT, addr:$dst,
-                              i64:$rB)]>,
-                            UseVSXReg;
+                              i64:$rB)]>;
   } // mayStore
 
   let Predicates = [IsLittleEndian] in {
@@ -3045,24 +3062,24 @@
   } // IsLittleEndian, HasP9Vector
 
   // D-Form Load/Store
-  def : Pat<(v4i32 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>;
-  def : Pat<(v4f32 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>;
-  def : Pat<(v2i64 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>;
-  def : Pat<(v2f64 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>;
-  def : Pat<(f128  (quadwOffsetLoad iqaddr:$src)),
+  def : Pat<(v4i32 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
+  def : Pat<(v4f32 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
+  def : Pat<(v2i64 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
+  def : Pat<(v2f64 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
+  def : Pat<(f128  (quadwOffsetLoad iaddrX16:$src)),
             (COPY_TO_REGCLASS (LXV memrix16:$src), VRRC)>;
-  def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iqaddr:$src)), (LXV memrix16:$src)>;
-  def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iqaddr:$src)), (LXV memrix16:$src)>;
+  def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iaddrX16:$src)), (LXV memrix16:$src)>;
+  def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iaddrX16:$src)), (LXV memrix16:$src)>;
 
-  def : Pat<(quadwOffsetStore v4f32:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>;
-  def : Pat<(quadwOffsetStore v4i32:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>;
-  def : Pat<(quadwOffsetStore v2f64:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>;
-  def : Pat<(quadwOffsetStore  f128:$rS, iqaddr:$dst),
+  def : Pat<(quadwOffsetStore v4f32:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>;
+  def : Pat<(quadwOffsetStore v4i32:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>;
+  def : Pat<(quadwOffsetStore v2f64:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>;
+  def : Pat<(quadwOffsetStore  f128:$rS, iaddrX16:$dst),
             (STXV (COPY_TO_REGCLASS $rS, VSRC), memrix16:$dst)>;
-  def : Pat<(quadwOffsetStore v2i64:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>;
-  def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iqaddr:$dst),
+  def : Pat<(quadwOffsetStore v2i64:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>;
+  def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iaddrX16:$dst),
             (STXV $rS, memrix16:$dst)>;
-  def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, iqaddr:$dst),
+  def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, iaddrX16:$dst),
             (STXV $rS, memrix16:$dst)>;
 
 
@@ -3159,109 +3176,109 @@
   let Predicates = [IsBigEndian, HasP9Vector] in {
   // Scalar stores of i8
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 9)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 10)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 11)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 11)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 12)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 13)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 13)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 14)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 15)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 15)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst),
-            (STXSIBXv $S, xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 1)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 1)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 2)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 3)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 3)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 4)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 5)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 5)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 6)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 7)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 7)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 8)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
 
   // Scalar stores of i16
   def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst),
-            (STXSIHXv (v16i8 (VSLDOI $S, $S, 10)), xoaddr:$dst)>;
+            (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst),
-            (STXSIHXv (v16i8 (VSLDOI $S, $S, 12)), xoaddr:$dst)>;
+            (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst),
-            (STXSIHXv (v16i8 (VSLDOI $S, $S, 14)), xoaddr:$dst)>;
+            (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst),
-            (STXSIHXv $S, xoaddr:$dst)>;
+            (STXSIHXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst),
-            (STXSIHXv (v16i8 (VSLDOI $S, $S, 2)), xoaddr:$dst)>;
+            (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst),
-            (STXSIHXv (v16i8 (VSLDOI $S, $S, 4)), xoaddr:$dst)>;
+            (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst),
-            (STXSIHXv (v16i8 (VSLDOI $S, $S, 6)), xoaddr:$dst)>;
+            (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst),
-            (STXSIHXv (v16i8 (VSLDOI $S, $S, 8)), xoaddr:$dst)>;
+            (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
   } // IsBigEndian, HasP9Vector
 
   let Predicates = [IsLittleEndian, HasP9Vector] in {
   // Scalar stores of i8
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 8)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 7)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 7)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 6)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 5)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 5)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 4)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 3)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 3)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 2)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 1)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 1)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst),
-            (STXSIBXv $S, xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 15)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 15)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 14)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 13)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 13)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 12)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 11)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 11)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 10)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 9)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), xoaddr:$dst)>;
 
   // Scalar stores of i16
   def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst),
-            (STXSIHXv (v16i8 (VSLDOI $S, $S, 8)), xoaddr:$dst)>;
+            (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst),
-            (STXSIHXv (v16i8 (VSLDOI $S, $S, 6)), xoaddr:$dst)>;
+            (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst),
-            (STXSIHXv (v16i8 (VSLDOI $S, $S, 4)), xoaddr:$dst)>;
+            (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst),
-            (STXSIHXv (v16i8 (VSLDOI $S, $S, 2)), xoaddr:$dst)>;
+            (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst),
-            (STXSIHXv $S, xoaddr:$dst)>;
+            (STXSIHXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst),
-            (STXSIHXv (v16i8 (VSLDOI $S, $S, 14)), xoaddr:$dst)>;
+            (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst),
-            (STXSIHXv (v16i8 (VSLDOI $S, $S, 12)), xoaddr:$dst)>;
+            (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst),
-            (STXSIHXv (v16i8 (VSLDOI $S, $S, 10)), xoaddr:$dst)>;
+            (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
   } // IsLittleEndian, HasP9Vector
 
 
@@ -3273,53 +3290,97 @@
 
   def DFLOADf32  : PPCPostRAExpPseudo<(outs vssrc:$XT), (ins memrix:$src),
                           "#DFLOADf32",
-                          [(set f32:$XT, (load ixaddr:$src))]>;
+                          [(set f32:$XT, (load iaddrX4:$src))]>;
   def DFLOADf64  : PPCPostRAExpPseudo<(outs vsfrc:$XT), (ins memrix:$src),
                           "#DFLOADf64",
-                          [(set f64:$XT, (load ixaddr:$src))]>;
+                          [(set f64:$XT, (load iaddrX4:$src))]>;
   def DFSTOREf32 : PPCPostRAExpPseudo<(outs), (ins vssrc:$XT, memrix:$dst),
                           "#DFSTOREf32",
-                          [(store f32:$XT, ixaddr:$dst)]>;
+                          [(store f32:$XT, iaddrX4:$dst)]>;
   def DFSTOREf64 : PPCPostRAExpPseudo<(outs), (ins vsfrc:$XT, memrix:$dst),
                           "#DFSTOREf64",
-                          [(store f64:$XT, ixaddr:$dst)]>;
+                          [(store f64:$XT, iaddrX4:$dst)]>;
 
-  def : Pat<(f64 (extloadf32 ixaddr:$src)),
-            (COPY_TO_REGCLASS (DFLOADf32 ixaddr:$src), VSFRC)>;
-  def : Pat<(f32 (fpround (f64 (extloadf32 ixaddr:$src)))),
-            (f32 (DFLOADf32 ixaddr:$src))>;
+  def : Pat<(f64 (extloadf32 iaddrX4:$src)),
+            (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$src), VSFRC)>;
+  def : Pat<(f32 (fpround (f64 (extloadf32 iaddrX4:$src)))),
+            (f32 (DFLOADf32 iaddrX4:$src))>;
 
+  def : Pat<(v4f32 (PPCldvsxlh xaddr:$src)),
+            (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC)>;
+  def : Pat<(v4f32 (PPCldvsxlh iaddrX4:$src)),
+            (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC)>;
 
   let AddedComplexity = 400 in {
   // The following pseudoinstructions are used to ensure the utilization
   // of all 64 VSX registers.
     let Predicates = [IsLittleEndian, HasP9Vector] in {
-      def : Pat<(v2i64 (scalar_to_vector (i64 (load ixaddr:$src)))),
+      def : Pat<(v2i64 (scalar_to_vector (i64 (load iaddrX4:$src)))),
                 (v2i64 (XXPERMDIs
-                (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC), 2))>;
-      def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddr:$src)))),
+                (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC), 2))>;
+      def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddrX4:$src)))),
                 (v2i64 (XXPERMDIs
-		(COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC), 2))>;
+		(COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC), 2))>;
 
-      def : Pat<(v2f64 (scalar_to_vector (f64 (load ixaddr:$src)))),
+      def : Pat<(v2f64 (scalar_to_vector (f64 (load iaddrX4:$src)))),
                 (v2f64 (XXPERMDIs
-                (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC), 2))>;
-      def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddr:$src)))),
+                (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC), 2))>;
+      def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddrX4:$src)))),
                 (v2f64 (XXPERMDIs
-                (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC), 2))>;
-    }
+                (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC), 2))>;
+      def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddrX4:$src),
+                (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+                             sub_64), xaddrX4:$src)>;
+      def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddrX4:$src),
+                (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+                             sub_64), xaddrX4:$src)>;
+      def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddrX4:$src),
+                (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
+      def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddrX4:$src),
+                (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
+      def : Pat<(store (i64 (extractelt v2i64:$A, 0)), iaddrX4:$src),
+                (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+                             sub_64), iaddrX4:$src)>;
+      def : Pat<(store (f64 (extractelt v2f64:$A, 0)), iaddrX4:$src),
+                (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+                            iaddrX4:$src)>;
+      def : Pat<(store (i64 (extractelt v2i64:$A, 1)), iaddrX4:$src),
+                (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>;
+      def : Pat<(store (f64 (extractelt v2f64:$A, 1)), iaddrX4:$src),
+                (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>;
+    } // IsLittleEndian, HasP9Vector
 
     let Predicates = [IsBigEndian, HasP9Vector] in {
-      def : Pat<(v2i64 (scalar_to_vector (i64 (load ixaddr:$src)))),
-                (v2i64 (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC))>;
-      def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddr:$src)))),
-                (v2i64 (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC))>;
+      def : Pat<(v2i64 (scalar_to_vector (i64 (load iaddrX4:$src)))),
+                (v2i64 (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC))>;
+      def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddrX4:$src)))),
+                (v2i64 (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC))>;
 
-      def : Pat<(v2f64 (scalar_to_vector (f64 (load ixaddr:$src)))),
-                (v2f64 (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC))>;
-      def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddr:$src)))),
-                (v2f64 (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC))>;
-    }
+      def : Pat<(v2f64 (scalar_to_vector (f64 (load iaddrX4:$src)))),
+                (v2f64 (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC))>;
+      def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddrX4:$src)))),
+                (v2f64 (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC))>;
+      def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddrX4:$src),
+                (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+                             sub_64), xaddrX4:$src)>;
+      def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddrX4:$src),
+                (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+                             sub_64), xaddrX4:$src)>;
+      def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddrX4:$src),
+                (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
+      def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddrX4:$src),
+                (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
+      def : Pat<(store (i64 (extractelt v2i64:$A, 1)), iaddrX4:$src),
+                (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+                             sub_64), iaddrX4:$src)>;
+      def : Pat<(store (f64 (extractelt v2f64:$A, 1)), iaddrX4:$src),
+                (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+                             sub_64), iaddrX4:$src)>;
+      def : Pat<(store (i64 (extractelt v2i64:$A, 0)), iaddrX4:$src),
+                (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>;
+      def : Pat<(store (f64 (extractelt v2f64:$A, 0)), iaddrX4:$src),
+                (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>;
+    } // IsBigEndian, HasP9Vector
   }
 
   let Predicates = [IsBigEndian, HasP9Vector] in {
@@ -3455,14 +3516,14 @@
   } // IsLittleEndian, HasP9Vector
 
   // Convert (Un)Signed DWord in memory -> QP
-  def : Pat<(f128 (sint_to_fp (i64 (load xaddr:$src)))),
-            (f128 (XSCVSDQP (LXSDX xaddr:$src)))>;
-  def : Pat<(f128 (sint_to_fp (i64 (load ixaddr:$src)))),
-            (f128 (XSCVSDQP (LXSD ixaddr:$src)))>;
-  def : Pat<(f128 (uint_to_fp (i64 (load xaddr:$src)))),
-            (f128 (XSCVUDQP (LXSDX xaddr:$src)))>;
-  def : Pat<(f128 (uint_to_fp (i64 (load ixaddr:$src)))),
-            (f128 (XSCVUDQP (LXSD ixaddr:$src)))>;
+  def : Pat<(f128 (sint_to_fp (i64 (load xaddrX4:$src)))),
+            (f128 (XSCVSDQP (LXSDX xaddrX4:$src)))>;
+  def : Pat<(f128 (sint_to_fp (i64 (load iaddrX4:$src)))),
+            (f128 (XSCVSDQP (LXSD iaddrX4:$src)))>;
+  def : Pat<(f128 (uint_to_fp (i64 (load xaddrX4:$src)))),
+            (f128 (XSCVUDQP (LXSDX xaddrX4:$src)))>;
+  def : Pat<(f128 (uint_to_fp (i64 (load iaddrX4:$src)))),
+            (f128 (XSCVUDQP (LXSD iaddrX4:$src)))>;
 
   // Convert Unsigned HWord in memory -> QP
   def : Pat<(f128 (uint_to_fp ScalarLoads.ZELi16)),
@@ -3483,13 +3544,13 @@
   // Instructions for store(fptosi).
   // The 8-byte version is repeated here due to availability of D-Form STXSD.
   def : Pat<(PPCstore_scal_int_from_vsr
-              (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xaddr:$dst, 8),
+              (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xaddrX4:$dst, 8),
             (STXSDX (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC),
-                    xaddr:$dst)>;
+                    xaddrX4:$dst)>;
   def : Pat<(PPCstore_scal_int_from_vsr
-              (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), ixaddr:$dst, 8),
+              (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), iaddrX4:$dst, 8),
             (STXSD (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC),
-                   ixaddr:$dst)>;
+                   iaddrX4:$dst)>;
   def : Pat<(PPCstore_scal_int_from_vsr
               (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 4),
             (STXSIWX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>;
@@ -3500,11 +3561,11 @@
               (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 1),
             (STXSIBX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>;
   def : Pat<(PPCstore_scal_int_from_vsr
-              (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xaddr:$dst, 8),
-            (STXSDX (XSCVDPSXDS f64:$src), xaddr:$dst)>;
+              (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xaddrX4:$dst, 8),
+            (STXSDX (XSCVDPSXDS f64:$src), xaddrX4:$dst)>;
   def : Pat<(PPCstore_scal_int_from_vsr
-              (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), ixaddr:$dst, 8),
-            (STXSD (XSCVDPSXDS f64:$src), ixaddr:$dst)>;
+              (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), iaddrX4:$dst, 8),
+            (STXSD (XSCVDPSXDS f64:$src), iaddrX4:$dst)>;
   def : Pat<(PPCstore_scal_int_from_vsr
               (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 2),
             (STXSIHX (XSCVDPSXWS f64:$src), xoaddr:$dst)>;
@@ -3514,13 +3575,13 @@
 
   // Instructions for store(fptoui).
   def : Pat<(PPCstore_scal_int_from_vsr
-              (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xaddr:$dst, 8),
+              (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xaddrX4:$dst, 8),
             (STXSDX (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC),
-                    xaddr:$dst)>;
+                    xaddrX4:$dst)>;
   def : Pat<(PPCstore_scal_int_from_vsr
-              (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), ixaddr:$dst, 8),
+              (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), iaddrX4:$dst, 8),
             (STXSD (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC),
-                   ixaddr:$dst)>;
+                   iaddrX4:$dst)>;
   def : Pat<(PPCstore_scal_int_from_vsr
               (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 4),
             (STXSIWX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>;
@@ -3531,11 +3592,11 @@
               (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 1),
             (STXSIBX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>;
   def : Pat<(PPCstore_scal_int_from_vsr
-              (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xaddr:$dst, 8),
-            (STXSDX (XSCVDPUXDS f64:$src), xaddr:$dst)>;
+              (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xaddrX4:$dst, 8),
+            (STXSDX (XSCVDPUXDS f64:$src), xaddrX4:$dst)>;
   def : Pat<(PPCstore_scal_int_from_vsr
-              (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), ixaddr:$dst, 8),
-            (STXSD (XSCVDPUXDS f64:$src), ixaddr:$dst)>;
+              (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), iaddrX4:$dst, 8),
+            (STXSD (XSCVDPUXDS f64:$src), iaddrX4:$dst)>;
   def : Pat<(PPCstore_scal_int_from_vsr
               (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 2),
             (STXSIHX (XSCVDPUXWS f64:$src), xoaddr:$dst)>;
@@ -3668,13 +3729,13 @@
   dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 xoaddr:$A)))));
 }
 def FltToLongLoadP9 {
-  dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 ixaddr:$A)))));
+  dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 iaddrX4:$A)))));
 }
 def FltToULongLoad {
   dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 xoaddr:$A)))));
 }
 def FltToULongLoadP9 {
-  dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 ixaddr:$A)))));
+  dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 iaddrX4:$A)))));
 }
 def FltToLong {
   dag A = (i64 (PPCmfvsr (f64 (PPCfctidz (fpextend f32:$A)))));
@@ -3704,13 +3765,13 @@
   dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load xoaddr:$A)))));
 }
 def DblToIntLoadP9 {
-  dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load ixaddr:$A)))));
+  dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load iaddrX4:$A)))));
 }
 def DblToUIntLoad {
   dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load xoaddr:$A)))));
 }
 def DblToUIntLoadP9 {
-  dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load ixaddr:$A)))));
+  dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load iaddrX4:$A)))));
 }
 def DblToLongLoad {
   dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (load xoaddr:$A)))));
@@ -3834,8 +3895,38 @@
     def : Pat<DWToSPExtractConv.BVS,
               (v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3),
                               (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3)))>;
+    def : Pat<(store (i32 (extractelt v4i32:$A, 1)), xoaddr:$src),
+              (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+    def : Pat<(store (f32 (extractelt v4f32:$A, 1)), xoaddr:$src),
+              (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+
+    // Elements in a register on a BE system are in order <0, 1, 2, 3>.
+    // The store instructions store the second word from the left.
+    // So to align element zero, we need to modulo-left-shift by 3 words.
+    // Similar logic applies for elements 2 and 3.
+    foreach Idx = [ [0,3], [2,1], [3,2] ] in {
+      def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src),
+                (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
+                                       sub_64), xoaddr:$src)>;
+      def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src),
+                (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
+                                       sub_64), xoaddr:$src)>;
+    }
   }
 
+  let Predicates = [HasP8Vector, IsBigEndian, NoP9Vector] in {
+    def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src),
+              (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+    def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src),
+              (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+    def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src),
+              (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+                          xoaddr:$src)>;
+    def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src),
+              (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+                          xoaddr:$src)>;
+   }
+
   // Big endian, available on all targets with VSX
   let Predicates = [IsBigEndian, HasVSX] in {
     def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
@@ -3871,8 +3962,38 @@
     def : Pat<DWToSPExtractConv.BVS,
               (v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3),
                               (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3)))>;
+    def : Pat<(store (i32 (extractelt v4i32:$A, 2)), xoaddr:$src),
+              (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+    def : Pat<(store (f32 (extractelt v4f32:$A, 2)), xoaddr:$src),
+              (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+
+    // Elements in a register on a LE system are in order <3, 2, 1, 0>.
+    // The store instructions store the second word from the left.
+    // So to align element 3, we need to modulo-left-shift by 3 words.
+    // Similar logic applies for elements 0 and 1.
+    foreach Idx = [ [0,2], [1,1], [3,3] ] in {
+      def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src),
+                (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
+                                       sub_64), xoaddr:$src)>;
+      def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src),
+                (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
+                                       sub_64), xoaddr:$src)>;
+    }
   }
 
+  let Predicates = [HasP8Vector, IsLittleEndian, NoP9Vector] in {
+    def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src),
+              (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+                          xoaddr:$src)>;
+    def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src),
+              (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+                          xoaddr:$src)>;
+    def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src),
+              (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+    def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src),
+              (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+   }
+
   let Predicates = [IsLittleEndian, HasVSX] in {
   // Little endian, available on all targets with VSX
     def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
@@ -3969,17 +4090,17 @@
               (v4i32 (XVCVSPUXWS (LXVWSX xoaddr:$A)))>;
     def : Pat<(v4i32 (scalar_to_vector DblToIntLoadP9.A)),
               (v4i32 (XXSPLTW (COPY_TO_REGCLASS
-                                (XSCVDPSXWS (DFLOADf64 ixaddr:$A)), VSRC), 1))>;
+                                (XSCVDPSXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1))>;
     def : Pat<(v4i32 (scalar_to_vector DblToUIntLoadP9.A)),
               (v4i32 (XXSPLTW (COPY_TO_REGCLASS
-                                (XSCVDPUXWS (DFLOADf64 ixaddr:$A)), VSRC), 1))>;
+                                (XSCVDPUXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1))>;
     def : Pat<(v2i64 (scalar_to_vector FltToLongLoadP9.A)),
               (v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS
-                                              (DFLOADf32 ixaddr:$A),
+                                              (DFLOADf32 iaddrX4:$A),
                                               VSFRC)), 0))>;
     def : Pat<(v2i64 (scalar_to_vector FltToULongLoadP9.A)),
               (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS
-                                              (DFLOADf32 ixaddr:$A),
+                                              (DFLOADf32 iaddrX4:$A),
                                               VSFRC)), 0))>;
   }
 

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
index 0b57dd9..4d45d96 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp

@@ -1,9 +1,8 @@
 //===------ PPCLoopPreIncPrep.cpp - Loop Pre-Inc. AM Prep. Pass -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -65,12 +64,6 @@
 
 STATISTIC(PHINodeAlreadyExists, "PHI node already in pre-increment form");
 
-namespace llvm {
-
-  void initializePPCLoopPreIncPrepPass(PassRegistry&);
-
-} // end namespace llvm
-
 namespace {
 
   class PPCLoopPreIncPrep : public FunctionPass {
@@ -338,7 +331,7 @@
   // iteration space), insert a new preheader for the loop.
   if (!LoopPredecessor ||
       !LoopPredecessor->getTerminator()->getType()->isVoidTy()) {
-    LoopPredecessor = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
+    LoopPredecessor = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA);
     if (LoopPredecessor)
       MadeChange = true;
   }

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
index e731c0b..027e6bd 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp

@@ -1,9 +1,8 @@
 //===-- PPCMCInstLower.cpp - Convert PPC MachineInstr to an MCInst --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -111,16 +110,16 @@
     RefKind = MCSymbolRefExpr::VK_PLT;
 
   const MachineFunction *MF = MO.getParent()->getParent()->getParent();
+  const Module *M = MF->getFunction().getParent();
   const PPCSubtarget *Subtarget = &(MF->getSubtarget<PPCSubtarget>());
   const TargetMachine &TM = Printer.TM;
   const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, RefKind, Ctx);
-  // -msecure-plt option works only in PIC mode. If secure plt mode
-  // is on add 32768 to symbol.
+  // If -msecure-plt -fPIC, add 32768 to symbol.
   if (Subtarget->isSecurePlt() && TM.isPositionIndependent() &&
+      M->getPICLevel() == PICLevel::BigPIC &&
       MO.getTargetFlags() == PPCII::MO_PLT)
-    Expr = MCBinaryExpr::createAdd(Expr,
-                                   MCConstantExpr::create(32768, Ctx),
-                                   Ctx);
+    Expr =
+        MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(32768, Ctx), Ctx);
 
   if (!MO.isJTI() && MO.getOffset())
     Expr = MCBinaryExpr::createAdd(Expr,

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index 0068df19..4462463 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp

@@ -1,9 +1,8 @@
 //===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===---------------------------------------------------------------------===//
 //
@@ -22,9 +21,12 @@
 #include "PPC.h"
 #include "PPCInstrBuilder.h"
 #include "PPCInstrInfo.h"
+#include "PPCMachineFunctionInfo.h"
 #include "PPCTargetMachine.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -38,6 +40,7 @@
 STATISTIC(RemoveTOCSave, "Number of TOC saves removed");
 STATISTIC(MultiTOCSaves,
           "Number of functions with multiple TOC saves that must be kept");
+STATISTIC(NumTOCSavesInPrologue, "Number of TOC saves placed in the prologue");
 STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions");
 STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions");
 STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
@@ -48,6 +51,10 @@
 STATISTIC(NumFixedPointIterations,
           "Number of fixed-point iterations converting reg-reg instructions "
           "to reg-imm ones");
+STATISTIC(NumRotatesCollapsed,
+          "Number of pairs of rotate left, clear left/right collapsed");
+STATISTIC(NumEXTSWAndSLDICombined,
+          "Number of pairs of EXTSW and SLDI combined as EXTSWSLI");
 
 static cl::opt<bool>
 FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true),
@@ -83,6 +90,9 @@
 
 private:
   MachineDominatorTree *MDT;
+  MachinePostDominatorTree *MPDT;
+  MachineBlockFrequencyInfo *MBFI;
+  uint64_t EntryFreq;
 
   // Initialize class variables.
   void initialize(MachineFunction &MFParm);
@@ -93,6 +103,8 @@
   // Perform peepholes.
   bool eliminateRedundantCompare(void);
   bool eliminateRedundantTOCSaves(std::map<MachineInstr *, bool> &TOCSaves);
+  bool combineSEXTAndSHL(MachineInstr &MI, MachineInstr *&ToErase);
+  bool emitRLDICWhenLoweringJumpTables(MachineInstr &MI);
   void UpdateTOCSaves(std::map<MachineInstr *, bool> &TOCSaves,
                       MachineInstr *MI);
 
@@ -100,7 +112,11 @@
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<MachineDominatorTree>();
+    AU.addRequired<MachinePostDominatorTree>();
+    AU.addRequired<MachineBlockFrequencyInfo>();
     AU.addPreserved<MachineDominatorTree>();
+    AU.addPreserved<MachinePostDominatorTree>();
+    AU.addPreserved<MachineBlockFrequencyInfo>();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 
@@ -118,6 +134,9 @@
   MF = &MFParm;
   MRI = &MF->getRegInfo();
   MDT = &getAnalysis<MachineDominatorTree>();
+  MPDT = &getAnalysis<MachinePostDominatorTree>();
+  MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+  EntryFreq = MBFI->getEntryFreq();
   TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
   LLVM_DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
   LLVM_DEBUG(MF->dump());
@@ -198,6 +217,30 @@
 void PPCMIPeephole::UpdateTOCSaves(
   std::map<MachineInstr *, bool> &TOCSaves, MachineInstr *MI) {
   assert(TII->isTOCSaveMI(*MI) && "Expecting a TOC save instruction here");
+  assert(MF->getSubtarget<PPCSubtarget>().isELFv2ABI() &&
+         "TOC-save removal only supported on ELFv2");
+  PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
+
+  MachineBasicBlock *Entry = &MF->front();
+  uint64_t CurrBlockFreq = MBFI->getBlockFreq(MI->getParent()).getFrequency();
+
+  // If the block in which the TOC save resides is in a block that
+  // post-dominates Entry, or a block that is hotter than entry (keep in mind
+  // that early MachineLICM has already run so the TOC save won't be hoisted)
+  // we can just do the save in the prologue.
+  if (CurrBlockFreq > EntryFreq || MPDT->dominates(MI->getParent(), Entry))
+    FI->setMustSaveTOC(true);
+
+  // If we are saving the TOC in the prologue, all the TOC saves can be removed
+  // from the code.
+  if (FI->mustSaveTOC()) {
+    for (auto &TOCSave : TOCSaves)
+      TOCSave.second = false;
+    // Add new instruction to map.
+    TOCSaves[MI] = false;
+    return;
+  }
+
   bool Keep = true;
   for (auto It = TOCSaves.begin(); It != TOCSaves.end(); It++ ) {
     MachineInstr *CurrInst = It->first;
@@ -758,6 +801,11 @@
         NumOptADDLIs++;
         break;
       }
+      case PPC::RLDICR: {
+        Simplified |= emitRLDICWhenLoweringJumpTables(MI) ||
+                      combineSEXTAndSHL(MI, ToErase);
+        break;
+      }
       }
     }
 
@@ -771,6 +819,10 @@
 
   // Eliminate all the TOC save instructions which are redundant.
   Simplified |= eliminateRedundantTOCSaves(TOCSaves);
+  PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
+  if (FI->mustSaveTOC())
+    NumTOCSavesInPrologue++;
+
   // We try to eliminate redundant compare instruction.
   Simplified |= eliminateRedundantCompare();
 
@@ -1275,10 +1327,136 @@
   return Simplified;
 }
 
+// We miss the opportunity to emit an RLDIC when lowering jump tables
+// since ISEL sees only a single basic block. When selecting, the clear
+// and shift left will be in different blocks.
+bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI) {
+  if (MI.getOpcode() != PPC::RLDICR)
+    return false;
+
+  unsigned SrcReg = MI.getOperand(1).getReg();
+  if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+    return false;
+
+  MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
+  if (SrcMI->getOpcode() != PPC::RLDICL)
+    return false;
+
+  MachineOperand MOpSHSrc = SrcMI->getOperand(2);
+  MachineOperand MOpMBSrc = SrcMI->getOperand(3);
+  MachineOperand MOpSHMI = MI.getOperand(2);
+  MachineOperand MOpMEMI = MI.getOperand(3);
+  if (!(MOpSHSrc.isImm() && MOpMBSrc.isImm() && MOpSHMI.isImm() &&
+        MOpMEMI.isImm()))
+    return false;
+
+  uint64_t SHSrc = MOpSHSrc.getImm();
+  uint64_t MBSrc = MOpMBSrc.getImm();
+  uint64_t SHMI = MOpSHMI.getImm();
+  uint64_t MEMI = MOpMEMI.getImm();
+  uint64_t NewSH = SHSrc + SHMI;
+  uint64_t NewMB = MBSrc - SHMI;
+  if (NewMB > 63 || NewSH > 63)
+    return false;
+
+  // The bits cleared with RLDICL are [0, MBSrc).
+  // The bits cleared with RLDICR are (MEMI, 63].
+  // After the sequence, the bits cleared are:
+  // [0, MBSrc-SHMI) and (MEMI, 63).
+  //
+  // The bits cleared with RLDIC are [0, NewMB) and (63-NewSH, 63].
+  if ((63 - NewSH) != MEMI)
+    return false;
+
+  LLVM_DEBUG(dbgs() << "Converting pair: ");
+  LLVM_DEBUG(SrcMI->dump());
+  LLVM_DEBUG(MI.dump());
+
+  MI.setDesc(TII->get(PPC::RLDIC));
+  MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
+  MI.getOperand(2).setImm(NewSH);
+  MI.getOperand(3).setImm(NewMB);
+
+  LLVM_DEBUG(dbgs() << "To: ");
+  LLVM_DEBUG(MI.dump());
+  NumRotatesCollapsed++;
+  return true;
+}
+
+// For case in LLVM IR
+// entry:
+//   %iconv = sext i32 %index to i64
+//   br i1 undef label %true, label %false
+// true:
+//   %ptr = getelementptr inbounds i32, i32* null, i64 %iconv
+// ...
+// PPCISelLowering::combineSHL fails to combine, because sext and shl are in
+// different BBs when conducting instruction selection. We can do a peephole
+// optimization to combine these two instructions into extswsli after
+// instruction selection.
+bool PPCMIPeephole::combineSEXTAndSHL(MachineInstr &MI,
+                                      MachineInstr *&ToErase) {
+  if (MI.getOpcode() != PPC::RLDICR)
+    return false;
+
+  if (!MF->getSubtarget<PPCSubtarget>().isISA3_0())
+    return false;
+
+  assert(MI.getNumOperands() == 4 && "RLDICR should have 4 operands");
+
+  MachineOperand MOpSHMI = MI.getOperand(2);
+  MachineOperand MOpMEMI = MI.getOperand(3);
+  if (!(MOpSHMI.isImm() && MOpMEMI.isImm()))
+    return false;
+
+  uint64_t SHMI = MOpSHMI.getImm();
+  uint64_t MEMI = MOpMEMI.getImm();
+  if (SHMI + MEMI != 63)
+    return false;
+
+  unsigned SrcReg = MI.getOperand(1).getReg();
+  if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+    return false;
+
+  MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
+  if (SrcMI->getOpcode() != PPC::EXTSW &&
+      SrcMI->getOpcode() != PPC::EXTSW_32_64)
+    return false;
+
+  // If the register defined by extsw has more than one use, combination is not
+  // needed.
+  if (!MRI->hasOneNonDBGUse(SrcReg))
+    return false;
+
+  LLVM_DEBUG(dbgs() << "Combining pair: ");
+  LLVM_DEBUG(SrcMI->dump());
+  LLVM_DEBUG(MI.dump());
+
+  MachineInstr *NewInstr =
+      BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(),
+              SrcMI->getOpcode() == PPC::EXTSW ? TII->get(PPC::EXTSWSLI)
+                                               : TII->get(PPC::EXTSWSLI_32_64),
+              MI.getOperand(0).getReg())
+          .add(SrcMI->getOperand(1))
+          .add(MOpSHMI);
+  (void)NewInstr;
+
+  LLVM_DEBUG(dbgs() << "TO: ");
+  LLVM_DEBUG(NewInstr->dump());
+  ++NumEXTSWAndSLDICombined;
+  ToErase = &MI;
+  // SrcMI, which is extsw, is of no use now, erase it.
+  SrcMI->eraseFromParent();
+  return true;
+}
+
 } // end default namespace
 
 INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,
                       "PowerPC MI Peephole Optimization", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
 INITIALIZE_PASS_END(PPCMIPeephole, DEBUG_TYPE,
                     "PowerPC MI Peephole Optimization", false, false)
 

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
index 3923417..2f65d6a 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp

@@ -1,9 +1,8 @@
 //===-- PPCMachineFunctionInfo.cpp - Private data used for PowerPC --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/src/llvm-project/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index 8a3f50a..dfae198 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h

@@ -1,9 +1,8 @@
 //===-- PPCMachineFunctionInfo.h - Private data used for PowerPC --*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -45,6 +44,12 @@
   /// PEI.
   bool MustSaveLR;
 
+  /// MustSaveTOC - Indicates that the TOC save needs to be performed in the
+  /// prologue of the function. This is typically the case when there are
+  /// indirect calls in the function and it is more profitable to save the
+  /// TOC pointer in the prologue than in the block(s) containing the call(s).
+  bool MustSaveTOC = false;
+
   /// Do we have to disable shrink-wrapping? This has to be set if we emit any
   /// instructions that clobber LR in the entry block because discovering this
   /// in PEI is too late (happens after shrink-wrapping);
@@ -152,6 +157,9 @@
   void setMustSaveLR(bool U) { MustSaveLR = U; }
   bool mustSaveLR() const    { return MustSaveLR; }
 
+  void setMustSaveTOC(bool U) { MustSaveTOC = U; }
+  bool mustSaveTOC() const    { return MustSaveTOC; }
+
   /// We certainly don't want to shrink wrap functions if we've emitted a
   /// MovePCtoLR8 as that has to go into the entry, so the prologue definitely
   /// has to go into the entry block.

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp
new file mode 100644
index 0000000..a38c8f4
--- /dev/null
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp

@@ -0,0 +1,83 @@
+//===- PPCMachineScheduler.cpp - MI Scheduler for PowerPC -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCMachineScheduler.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+
+using namespace llvm;
+
+static cl::opt<bool> 
+DisableAddiLoadHeuristic("disable-ppc-sched-addi-load",
+                         cl::desc("Disable scheduling addi instruction before" 
+                                  "load for ppc"), cl::Hidden);
+
+bool PPCPreRASchedStrategy::biasAddiLoadCandidate(SchedCandidate &Cand,
+                                                  SchedCandidate &TryCand,
+                                                  SchedBoundary &Zone) const {
+  if (DisableAddiLoadHeuristic)
+    return false;
+
+  auto isADDIInstr = [&] (const MachineInstr &Inst) {
+    return Inst.getOpcode() == PPC::ADDI || Inst.getOpcode() == PPC::ADDI8;
+  };
+
+  SchedCandidate &FirstCand = Zone.isTop() ? TryCand : Cand;
+  SchedCandidate &SecondCand = Zone.isTop() ? Cand : TryCand;
+  if (isADDIInstr(*FirstCand.SU->getInstr()) &&
+      SecondCand.SU->getInstr()->mayLoad()) {
+    TryCand.Reason = Stall;
+    return true;
+  }
+  if (FirstCand.SU->getInstr()->mayLoad() &&
+      isADDIInstr(*SecondCand.SU->getInstr())) {
+    TryCand.Reason = NoCand;
+    return true;
+  }
+
+  return false;
+}
+
+void PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand,
+                                         SchedCandidate &TryCand,
+                                         SchedBoundary *Zone) const {
+  GenericScheduler::tryCandidate(Cand, TryCand, Zone);
+
+  if (!Cand.isValid() || !Zone)
+    return;
+
+  // Add powerpc specific heuristic only when TryCand isn't selected or
+  // selected as node order.
+  if (TryCand.Reason != NodeOrder && TryCand.Reason != NoCand)
+    return;
+
+  // There are some benefits to schedule the ADDI before the load to hide the
+  // latency, as RA may create a true dependency between the load and addi.
+  if (biasAddiLoadCandidate(Cand, TryCand, *Zone))
+    return;
+}
+
+void PPCPostRASchedStrategy::enterMBB(MachineBasicBlock *MBB) {
+  // Custom PPC PostRA specific behavior here.
+  PostGenericScheduler::enterMBB(MBB);
+}
+
+void PPCPostRASchedStrategy::leaveMBB() {
+  // Custom PPC PostRA specific behavior here.
+  PostGenericScheduler::leaveMBB();
+}
+
+void PPCPostRASchedStrategy::initialize(ScheduleDAGMI *Dag) {
+  // Custom PPC PostRA specific initialization here.
+  PostGenericScheduler::initialize(Dag);
+}
+
+SUnit *PPCPostRASchedStrategy::pickNode(bool &IsTopNode) {
+  // Custom PPC PostRA specific scheduling here.
+  return PostGenericScheduler::pickNode(IsTopNode);
+}
+

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCMachineScheduler.h b/src/llvm-project/llvm/lib/Target/PowerPC/PPCMachineScheduler.h
new file mode 100644
index 0000000..93532d9
--- /dev/null
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCMachineScheduler.h

@@ -0,0 +1,49 @@
+//===- PPCMachineScheduler.h - Custom PowerPC MI scheduler --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Custom PowerPC MI scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_POWERPC_POWERPCMACHINESCHEDULER_H
+#define LLVM_LIB_TARGET_POWERPC_POWERPCMACHINESCHEDULER_H
+
+#include "llvm/CodeGen/MachineScheduler.h"
+
+namespace llvm {
+
+/// A MachineSchedStrategy implementation for PowerPC pre RA scheduling.
+class PPCPreRASchedStrategy : public GenericScheduler {
+public:
+  PPCPreRASchedStrategy(const MachineSchedContext *C) :
+    GenericScheduler(C) {}
+protected:
+  void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
+                    SchedBoundary *Zone) const override;
+private:
+  bool biasAddiLoadCandidate(SchedCandidate &Cand,
+                             SchedCandidate &TryCand,
+                             SchedBoundary &Zone) const;
+};
+
+/// A MachineSchedStrategy implementation for PowerPC post RA scheduling.
+class PPCPostRASchedStrategy : public PostGenericScheduler {
+public:
+  PPCPostRASchedStrategy(const MachineSchedContext *C) :
+    PostGenericScheduler(C) {}
+
+protected:
+  void initialize(ScheduleDAGMI *Dag) override;
+  SUnit *pickNode(bool &IsTopNode) override;
+  void enterMBB(MachineBasicBlock *MBB) override;
+  void leaveMBB() override;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_POWERPC_POWERPCMACHINESCHEDULER_H

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h b/src/llvm-project/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h
index 8a1d680..d0d84ef 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h

@@ -1,9 +1,8 @@
 //===-- PPCPerfectShuffle.h - Altivec Perfect Shuffle Table -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCPfmCounters.td b/src/llvm-project/llvm/lib/Target/PowerPC/PPCPfmCounters.td
index d2a09f3..20b9efd 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCPfmCounters.td
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCPfmCounters.td

@@ -1,9 +1,8 @@
 //===-- PPCPfmCounters.td - PPC Hardware Counters ----------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
index 4458b92..d83c922 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp

@@ -1,9 +1,8 @@
 //===--------- PPCPreEmitPeephole.cpp - Late peephole optimizations -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCQPXLoadSplat.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/PPCQPXLoadSplat.cpp
index 25b2b54..3a83cc2 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCQPXLoadSplat.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCQPXLoadSplat.cpp

@@ -1,9 +1,8 @@
 //===----- PPCQPXLoadSplat.cpp - QPX Load Splat Simplification ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -31,10 +30,6 @@
 
 STATISTIC(NumSimplified, "Number of QPX load splats simplified");
 
-namespace llvm {
-  void initializePPCQPXLoadSplatPass(PassRegistry&);
-}
-
 namespace {
   struct PPCQPXLoadSplat : public MachineFunctionPass {
     static char ID;

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
index 173fc18..8eaa6df 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp

@@ -1,9 +1,8 @@
 //===---- PPCReduceCRLogicals.cpp - Reduce CR Bit Logical operations ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===---------------------------------------------------------------------===//
 //
@@ -49,10 +48,6 @@
 STATISTIC(NumNotSplitWrongOpcode,
           "Number of blocks not split due to the wrong opcode.");
 
-namespace llvm {
-  void initializePPCReduceCRLogicalsPass(PassRegistry&);
-}
-
 /// Given a basic block \p Successor that potentially contains PHIs, this
 /// function will look for any incoming values in the PHIs that are supposed to
 /// be coming from \p OrigMBB but whose definition is actually in \p NewMBB.
@@ -171,9 +166,33 @@
                                            : *ThisMBB->succ_begin();
   MachineBasicBlock *NewBRTarget =
       BSI.BranchToFallThrough ? OrigFallThrough : OrigTarget;
-  BranchProbability ProbToNewTarget =
-      !BSI.MBPI ? BranchProbability::getUnknown()
-                : BSI.MBPI->getEdgeProbability(ThisMBB, NewBRTarget);
+
+  // It's impossible to know the precise branch probability after the split.
+  // But it still needs to be reasonable, the whole probability to original
+  // targets should not be changed.
+  // After split NewBRTarget will get two incoming edges. Assume P0 is the
+  // original branch probability to NewBRTarget, P1 and P2 are new branch
+  // probabilies to NewBRTarget after split. If the two edge frequencies are
+  // same, then
+  //      F * P1 = F * P0 / 2            ==>  P1 = P0 / 2
+  //      F * (1 - P1) * P2 = F * P1     ==>  P2 = P1 / (1 - P1)
+  BranchProbability ProbToNewTarget, ProbFallThrough;     // Prob for new Br.
+  BranchProbability ProbOrigTarget, ProbOrigFallThrough;  // Prob for orig Br.
+  ProbToNewTarget = ProbFallThrough = BranchProbability::getUnknown();
+  ProbOrigTarget = ProbOrigFallThrough = BranchProbability::getUnknown();
+  if (BSI.MBPI) {
+    if (BSI.BranchToFallThrough) {
+      ProbToNewTarget = BSI.MBPI->getEdgeProbability(ThisMBB, OrigFallThrough) / 2;
+      ProbFallThrough = ProbToNewTarget.getCompl();
+      ProbOrigFallThrough = ProbToNewTarget / ProbToNewTarget.getCompl();
+      ProbOrigTarget = ProbOrigFallThrough.getCompl();
+    } else {
+      ProbToNewTarget = BSI.MBPI->getEdgeProbability(ThisMBB, OrigTarget) / 2;
+      ProbFallThrough = ProbToNewTarget.getCompl();
+      ProbOrigTarget = ProbToNewTarget / ProbToNewTarget.getCompl();
+      ProbOrigFallThrough = ProbOrigTarget.getCompl();
+    }
+  }
 
   // Create a new basic block.
   MachineBasicBlock::iterator InsertPoint = BSI.SplitBefore;
@@ -185,11 +204,16 @@
   // Move everything after SplitBefore into the new block.
   NewMBB->splice(NewMBB->end(), ThisMBB, InsertPoint, ThisMBB->end());
   NewMBB->transferSuccessors(ThisMBB);
+  if (!ProbOrigTarget.isUnknown()) {
+    auto MBBI = std::find(NewMBB->succ_begin(), NewMBB->succ_end(), OrigTarget);
+    NewMBB->setSuccProbability(MBBI, ProbOrigTarget);
+    MBBI = std::find(NewMBB->succ_begin(), NewMBB->succ_end(), OrigFallThrough);
+    NewMBB->setSuccProbability(MBBI, ProbOrigFallThrough);
+  }
 
-  // Add the two successors to ThisMBB. The probabilities come from the
-  // existing blocks if available.
+  // Add the two successors to ThisMBB.
   ThisMBB->addSuccessor(NewBRTarget, ProbToNewTarget);
-  ThisMBB->addSuccessor(NewMBB, ProbToNewTarget.getCompl());
+  ThisMBB->addSuccessor(NewMBB, ProbFallThrough);
 
   // Add the branches to ThisMBB.
   BuildMI(*ThisMBB, ThisMBB->end(), BSI.SplitBefore->getDebugLoc(),

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 3d067aa..12554ea 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp

@@ -1,9 +1,8 @@
 //===-- PPCRegisterInfo.cpp - PowerPC Register Information ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -13,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPCRegisterInfo.h"
-#include "PPC.h"
 #include "PPCFrameLowering.h"
 #include "PPCInstrBuilder.h"
 #include "PPCMachineFunctionInfo.h"
@@ -71,6 +69,14 @@
                          "caller preserved registers can be LICM candidates"),
                 cl::init(true), cl::Hidden);
 
+static cl::opt<unsigned>
+MaxCRBitSpillDist("ppc-max-crbit-spill-dist",
+                  cl::desc("Maximum search distance for definition of CR bit "
+                           "spill on ppc"),
+                  cl::Hidden, cl::init(100));
+
+static unsigned offsetMinAlignForOpcode(unsigned OpC);
+
 PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM)
   : PPCGenRegisterInfo(TM.isPPC64() ? PPC::LR8 : PPC::LR,
                        TM.isPPC64() ? 0 : 1,
@@ -153,30 +159,39 @@
   if (TM.isPPC64() && MF->getInfo<PPCFunctionInfo>()->isSplitCSR())
     return CSR_SRV464_TLS_PE_SaveList;
 
-  if (Subtarget.hasSPE())
-    return CSR_SVR432_SPE_SaveList;
-
   // On PPC64, we might need to save r2 (but only if it is not reserved).
   bool SaveR2 = MF->getRegInfo().isAllocatable(PPC::X2);
 
+  // Cold calling convention CSRs.
   if (MF->getFunction().getCallingConv() == CallingConv::Cold) {
-    return TM.isPPC64()
-               ? (Subtarget.hasAltivec()
-                      ? (SaveR2 ? CSR_SVR64_ColdCC_R2_Altivec_SaveList
-                                : CSR_SVR64_ColdCC_Altivec_SaveList)
-                      : (SaveR2 ? CSR_SVR64_ColdCC_R2_SaveList
-                                : CSR_SVR64_ColdCC_SaveList))
-               : (Subtarget.hasAltivec() ? CSR_SVR32_ColdCC_Altivec_SaveList
-                                         : CSR_SVR32_ColdCC_SaveList);
+    if (TM.isPPC64()) {
+      if (Subtarget.hasAltivec())
+        return SaveR2 ? CSR_SVR64_ColdCC_R2_Altivec_SaveList
+                      : CSR_SVR64_ColdCC_Altivec_SaveList;
+      return SaveR2 ? CSR_SVR64_ColdCC_R2_SaveList
+                    : CSR_SVR64_ColdCC_SaveList;
+    }
+    // 32-bit targets.
+    if (Subtarget.hasAltivec())
+      return CSR_SVR32_ColdCC_Altivec_SaveList;
+    else if (Subtarget.hasSPE())
+      return CSR_SVR32_ColdCC_SPE_SaveList;
+    return CSR_SVR32_ColdCC_SaveList;
   }
-
-  return TM.isPPC64()
-             ? (Subtarget.hasAltivec()
-                    ? (SaveR2 ? CSR_SVR464_R2_Altivec_SaveList
-                              : CSR_SVR464_Altivec_SaveList)
-                    : (SaveR2 ? CSR_SVR464_R2_SaveList : CSR_SVR464_SaveList))
-             : (Subtarget.hasAltivec() ? CSR_SVR432_Altivec_SaveList
-                                       : CSR_SVR432_SaveList);
+  // Standard calling convention CSRs.
+  if (TM.isPPC64()) {
+    if (Subtarget.hasAltivec())
+      return SaveR2 ? CSR_SVR464_R2_Altivec_SaveList
+                    : CSR_SVR464_Altivec_SaveList;
+    return SaveR2 ? CSR_SVR464_R2_SaveList
+                  : CSR_SVR464_SaveList;
+  }
+  // 32-bit targets.
+  if (Subtarget.hasAltivec())
+    return CSR_SVR432_Altivec_SaveList;
+  else if (Subtarget.hasSPE())
+    return CSR_SVR432_SPE_SaveList;
+  return CSR_SVR432_SaveList;
 }
 
 const MCPhysReg *
@@ -221,18 +236,26 @@
                                                   : CSR_Darwin64_RegMask)
                         : (Subtarget.hasAltivec() ? CSR_Darwin32_Altivec_RegMask
                                                   : CSR_Darwin32_RegMask);
+  if (Subtarget.isAIXABI()) {
+    assert(!Subtarget.hasAltivec() && "Altivec is not implemented on AIX yet.");
+    return TM.isPPC64() ? CSR_AIX64_RegMask : CSR_AIX32_RegMask;
+  }
 
   if (CC == CallingConv::Cold) {
     return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_SVR64_ColdCC_Altivec_RegMask
                                                   : CSR_SVR64_ColdCC_RegMask)
                         : (Subtarget.hasAltivec() ? CSR_SVR32_ColdCC_Altivec_RegMask
-                                                  : CSR_SVR32_ColdCC_RegMask);
+                                                  : (Subtarget.hasSPE()
+                                                  ? CSR_SVR32_ColdCC_SPE_RegMask
+                                                  : CSR_SVR32_ColdCC_RegMask));
   }
 
   return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_SVR464_Altivec_RegMask
                                                 : CSR_SVR464_RegMask)
                       : (Subtarget.hasAltivec() ? CSR_SVR432_Altivec_RegMask
-                                                : CSR_SVR432_RegMask);
+                                                : (Subtarget.hasSPE()
+                                                  ? CSR_SVR432_SPE_RegMask
+                                                  : CSR_SVR432_RegMask));
 }
 
 const uint32_t*
@@ -288,6 +311,11 @@
     markSuperRegs(Reserved, PPC::R13); // Small Data Area pointer register
   }
 
+  // Always reserve r2 on AIX for now.
+  // TODO: Make r2 allocatable on AIX/XCOFF for some leaf functions.
+  if (Subtarget.isAIXABI())
+    markSuperRegs(Reserved, PPC::R2);  // System-reserved register
+
   // On PPC64, r13 is the thread pointer. Never allocate this register.
   if (TM.isPPC64())
     markSuperRegs(Reserved, PPC::R13);
@@ -316,6 +344,51 @@
   return Reserved;
 }
 
+bool PPCRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) const {
+  const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+  const PPCInstrInfo *InstrInfo =  Subtarget.getInstrInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
+
+  // If the callee saved info is invalid we have to default to true for safety.
+  if (!MFI.isCalleeSavedInfoValid())
+    return true;
+
+  // We will require the use of X-Forms because the frame is larger than what
+  // can be represented in signed 16 bits that fit in the immediate of a D-Form.
+  // If we need an X-Form then we need a register to store the address offset.
+  unsigned FrameSize = MFI.getStackSize();
+  // Signed 16 bits means that the FrameSize cannot be more than 15 bits.
+  if (FrameSize & ~0x7FFF)
+    return true;
+
+  // The callee saved info is valid so it can be traversed.
+  // Checking for registers that need saving that do not have load or store
+  // forms where the address offset is an immediate.
+  for (unsigned i = 0; i < Info.size(); i++) {
+    int FrIdx = Info[i].getFrameIdx();
+    unsigned Reg = Info[i].getReg();
+
+    unsigned Opcode = InstrInfo->getStoreOpcodeForSpill(Reg);
+    if (!MFI.isFixedObjectIndex(FrIdx)) {
+      // This is not a fixed object. If it requires alignment then we may still
+      // need to use the XForm.
+      if (offsetMinAlignForOpcode(Opcode) > 1)
+        return true;
+    }
+
+    // This is eiher:
+    // 1) A fixed frame index object which we know are aligned so
+    // as long as we have a valid DForm/DSForm/DQForm (non XForm) we don't
+    // need to consider the alignement here.
+    // 2) A not fixed object but in that case we now know that the min required
+    // alignment is no more than 1 based on the previous check.
+    if (InstrInfo->isXFormMemOp(Opcode))
+      return true;
+  }
+  return false;
+}
+
 bool PPCRegisterInfo::isCallerPreservedPhysReg(unsigned PhysReg,
                                                const MachineFunction &MF) const {
   assert(TargetRegisterInfo::isPhysicalRegister(PhysReg));
@@ -664,6 +737,7 @@
   MachineFunction &MF = *MBB.getParent();
   const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+  const TargetRegisterInfo* TRI = Subtarget.getRegisterInfo();
   DebugLoc dl = MI.getDebugLoc();
 
   bool LP64 = TM.isPPC64();
@@ -673,27 +747,59 @@
   unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
   unsigned SrcReg = MI.getOperand(0).getReg();
 
-  // We need to move the CR field that contains the CR bit we are spilling.
-  // The super register may not be explicitly defined (i.e. it can be defined
-  // by a CR-logical that only defines the subreg) so we state that the CR
-  // field is undef. Also, in order to preserve the kill flag on the CR bit,
-  // we add it as an implicit use.
-  BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg)
+  // Search up the BB to find the definition of the CR bit.
+  MachineBasicBlock::reverse_iterator Ins;
+  unsigned CRBitSpillDistance = 0;
+  for (Ins = MI; Ins != MBB.rend(); Ins++) {
+    // Definition found.
+    if (Ins->modifiesRegister(SrcReg, TRI))
+      break;
+    // Unable to find CR bit definition within maximum search distance.
+    if (CRBitSpillDistance == MaxCRBitSpillDist) {
+      Ins = MI;
+      break;
+    }
+    // Skip debug instructions when counting CR bit spill distance.
+    if (!Ins->isDebugInstr())
+      CRBitSpillDistance++;
+  }
+
+  // Unable to find the definition of the CR bit in the MBB.
+  if (Ins == MBB.rend())
+    Ins = MI;
+
+  // There is no need to extract the CR bit if its value is already known.
+  switch (Ins->getOpcode()) {
+  case PPC::CRUNSET:
+    BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::LI8 : PPC::LI), Reg)
+      .addImm(0);
+    break;
+  case PPC::CRSET:
+    BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::LIS8 : PPC::LIS), Reg)
+      .addImm(-32768);
+    break;
+  default:
+    // We need to move the CR field that contains the CR bit we are spilling.
+    // The super register may not be explicitly defined (i.e. it can be defined
+    // by a CR-logical that only defines the subreg) so we state that the CR
+    // field is undef. Also, in order to preserve the kill flag on the CR bit,
+    // we add it as an implicit use.
+    BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg)
       .addReg(getCRFromCRBit(SrcReg), RegState::Undef)
       .addReg(SrcReg,
               RegState::Implicit | getKillRegState(MI.getOperand(0).isKill()));
 
-  // If the saved register wasn't CR0LT, shift the bits left so that the bit to
-  // store is the first one. Mask all but that bit.
-  unsigned Reg1 = Reg;
-  Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+    // If the saved register wasn't CR0LT, shift the bits left so that the bit
+    // to store is the first one. Mask all but that bit.
+    unsigned Reg1 = Reg;
+    Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
 
-  // rlwinm rA, rA, ShiftBits, 0, 0.
-  BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg)
-    .addReg(Reg1, RegState::Kill)
-    .addImm(getEncodingValue(SrcReg))
-    .addImm(0).addImm(0);
-
+    // rlwinm rA, rA, ShiftBits, 0, 0.
+    BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg)
+      .addReg(Reg1, RegState::Kill)
+      .addImm(getEncodingValue(SrcReg))
+      .addImm(0).addImm(0);
+  }
   addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::STW8 : PPC::STW))
                     .addReg(Reg, RegState::Kill),
                     FrameIndex);
@@ -826,9 +932,7 @@
 }
 
 // If the offset must be a multiple of some value, return what that value is.
-static unsigned offsetMinAlign(const MachineInstr &MI) {
-  unsigned OpC = MI.getOpcode();
-
+static unsigned offsetMinAlignForOpcode(unsigned OpC) {
   switch (OpC) {
   default:
     return 1;
@@ -847,12 +951,21 @@
   case PPC::STXSD:
   case PPC::STXSSP:
     return 4;
+  case PPC::EVLDD:
+  case PPC::EVSTDD:
+    return 8;
   case PPC::LXV:
   case PPC::STXV:
     return 16;
   }
 }
 
+// If the offset must be a multiple of some value, return what that value is.
+static unsigned offsetMinAlign(const MachineInstr &MI) {
+  unsigned OpC = MI.getOpcode();
+  return offsetMinAlignForOpcode(OpC);
+}
+
 // Return the OffsetOperandNo given the FIOperandNum (and the instruction).
 static unsigned getOffsetONFromFION(const MachineInstr &MI,
                                     unsigned FIOperandNum) {
@@ -963,7 +1076,10 @@
   // happen in invalid code.
   assert(OpC != PPC::DBG_VALUE &&
          "This should be handled in a target-independent way");
-  if (!noImmForm && ((isInt<16>(Offset) &&
+  bool OffsetFitsMnemonic = (OpC == PPC::EVSTDD || OpC == PPC::EVLDD) ?
+                            isUInt<8>(Offset) :
+                            isInt<16>(Offset);
+  if (!noImmForm && ((OffsetFitsMnemonic &&
                       ((Offset % offsetMinAlign(MI)) == 0)) ||
                      OpC == TargetOpcode::STACKMAP ||
                      OpC == TargetOpcode::PATCHPOINT)) {
@@ -1001,7 +1117,8 @@
 
   if (noImmForm)
     OperandBase = 1;
-  else if (OpC != TargetOpcode::INLINEASM) {
+  else if (OpC != TargetOpcode::INLINEASM &&
+           OpC != TargetOpcode::INLINEASM_BR) {
     assert(ImmToIdxMap.count(OpC) &&
            "No indexed form of load or store available!");
     unsigned NewOpcode = ImmToIdxMap.find(OpC)->second;
@@ -1016,7 +1133,7 @@
   MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false, false, true);
 }
 
-unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+Register PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   const PPCFrameLowering *TFI = getFrameLowering(MF);
 
   if (!TM.isPPC64())
@@ -1025,7 +1142,7 @@
     return TFI->hasFP(MF) ? PPC::X31 : PPC::X1;
 }
 
-unsigned PPCRegisterInfo::getBaseRegister(const MachineFunction &MF) const {
+Register PPCRegisterInfo::getBaseRegister(const MachineFunction &MF) const {
   const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
   if (!hasBasePointer(MF))
     return getFrameRegister(MF);
@@ -1080,7 +1197,7 @@
   MachineBasicBlock &MBB = *MI->getParent();
   MachineFunction &MF = *MBB.getParent();
   const PPCFrameLowering *TFI = getFrameLowering(MF);
-  unsigned StackEst = TFI->determineFrameLayout(MF, false, true);
+  unsigned StackEst = TFI->determineFrameLayout(MF, true);
 
   // If we likely don't need a stack frame, then we probably don't need a
   // virtual base register either.

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/src/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index e93fe4c..a50e059 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.h

@@ -1,9 +1,8 @@
 //===-- PPCRegisterInfo.h - PowerPC Register Information Impl ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -15,13 +14,14 @@
 #ifndef LLVM_LIB_TARGET_POWERPC_PPCREGISTERINFO_H
 #define LLVM_LIB_TARGET_POWERPC_PPCREGISTERINFO_H
 
-#include "PPC.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
 #include "llvm/ADT/DenseMap.h"
 
 #define GET_REGINFO_HEADER
 #include "PPCGenRegisterInfo.inc"
 
 namespace llvm {
+class PPCTargetMachine;
 
 inline static unsigned getCRFromCRBit(unsigned SrcReg) {
   unsigned Reg = 0;
@@ -90,9 +90,7 @@
     return true;
   }
 
-  bool requiresFrameIndexScavenging(const MachineFunction &MF) const override {
-    return true;
-  }
+  bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
 
   bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override {
     return true;
@@ -134,10 +132,10 @@
                           int64_t Offset) const override;
 
   // Debug information queries.
-  unsigned getFrameRegister(const MachineFunction &MF) const override;
+  Register getFrameRegister(const MachineFunction &MF) const override;
 
   // Base pointer (stack realignment) support.
-  unsigned getBaseRegister(const MachineFunction &MF) const;
+  Register getBaseRegister(const MachineFunction &MF) const;
   bool hasBasePointer(const MachineFunction &MF) const;
 
   /// stripRegisterPrefix - This method strips the character prefix from a

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/src/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
index d0d29b6..af0dff6 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.td

@@ -1,9 +1,8 @@
 //===-- PPCRegisterInfo.td - The PowerPC Register File -----*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -375,8 +374,6 @@
 def CRRC : RegisterClass<"PPC", [i32], 32, (add CR0, CR1, CR5, CR6,
                                                 CR7, CR2, CR3, CR4)>;
 
-def CRRC0 : RegisterClass<"PPC", [i32], 32, (add CR0)>;
-
 // The CTR registers are not allocatable because they're used by the
 // decrement-and-branch instructions, and thus need to stay live across
 // multiple basic blocks.

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCSchedule.td b/src/llvm-project/llvm/lib/Target/PowerPC/PPCSchedule.td
index c8fe7d7..4fa29d9 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCSchedule.td
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCSchedule.td

@@ -1,9 +1,8 @@
 //===-- PPCSchedule.td - PowerPC Scheduling Definitions ----*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -106,6 +105,7 @@
 def IIC_VecVSR       : InstrItinClass;
 def IIC_SprMTMSRD    : InstrItinClass;
 def IIC_SprSLIE      : InstrItinClass;
+def IIC_SprSLBFEE    : InstrItinClass;
 def IIC_SprSLBIE     : InstrItinClass;
 def IIC_SprSLBIEG    : InstrItinClass;
 def IIC_SprSLBMTE    : InstrItinClass;

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCSchedule440.td b/src/llvm-project/llvm/lib/Target/PowerPC/PPCSchedule440.td
index 646822e..708261f 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCSchedule440.td
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCSchedule440.td

@@ -1,9 +1,8 @@
 //===-- PPCSchedule440.td - PPC 440 Scheduling Definitions -*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleA2.td b/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleA2.td
index f34c1ac..c2b2985 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleA2.td
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleA2.td

@@ -1,9 +1,8 @@
 //===- PPCScheduleA2.td - PPC A2 Scheduling Definitions --*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleE500.td b/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleE500.td
index 479a970..74744dd 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleE500.td
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleE500.td

@@ -1,9 +1,8 @@
 //===-- PPCScheduleE500.td - e500 Scheduling Defs ------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td b/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
index d8bda07..1a1c041 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td

@@ -1,9 +1,8 @@
 //===-- PPCScheduleE500mc.td - e500mc Scheduling Defs ------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleE5500.td b/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
index 3e50803..4480d7f 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleE5500.td

@@ -1,9 +1,8 @@
 //===-- PPCScheduleE500mc.td - e5500 Scheduling Defs -------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleG3.td b/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleG3.td
index 0995b72..8f1907f 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleG3.td
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleG3.td

@@ -1,9 +1,8 @@
 //===-- PPCScheduleG3.td - PPC G3 Scheduling Definitions ---*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleG4.td b/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleG4.td
index 1b15c7b..0eabc49 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleG4.td
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleG4.td

@@ -1,9 +1,8 @@
 //===-- PPCScheduleG4.td - PPC G4 Scheduling Definitions ---*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td b/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td
index 0044c3c..9c84aec 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td

@@ -1,9 +1,8 @@
 //===-- PPCScheduleG4Plus.td - PPC G4+ Scheduling Defs. ----*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleG5.td b/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleG5.td
index c802b80..0870735 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleG5.td
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleG5.td

@@ -1,9 +1,8 @@
 //===-- PPCScheduleG5.td - PPC G5 Scheduling Definitions ---*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP7.td b/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP7.td
index 1d6e509..5a8c1eb2 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP7.td
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP7.td

@@ -1,9 +1,8 @@
 //===-- PPCScheduleP7.td - PPC P7 Scheduling Definitions ---*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP8.td b/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP8.td
index ff39dfd..70a58f4 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP8.td
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP8.td

@@ -1,9 +1,8 @@
 //===-- PPCScheduleP8.td - PPC P8 Scheduling Definitions ---*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP9.td b/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP9.td
index a1e625c..6a79cca 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP9.td
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP9.td

@@ -1,9 +1,8 @@
 //===-- PPCScheduleP9.td - PPC P9 Scheduling Definitions ---*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -51,8 +50,21 @@
 
   // ***************** Processor Resources *****************
 
-  //Dispatcher:
-  def DISPATCHER : ProcResource<12>;
+  // Dispatcher slots:
+  // x0, x1, x2, and x3 are the dedicated slice dispatch ports, where each
+  // corresponds to one of the four execution slices.
+  def DISPx02 : ProcResource<2>;
+  def DISPx13 : ProcResource<2>;
+  // The xa and xb ports can be used to send an iop to either of the two slices
+  // of the superslice, but are restricted to iops with only two primary sources.
+  def DISPxab : ProcResource<2>;
+  // b0 and b1 are dedicated dispatch ports into the branch slice.
+  def DISPb01 : ProcResource<2>;
+
+  // Any non BR dispatch ports
+  def DISP_NBR
+      : ProcResGroup<[ DISPx02, DISPx13, DISPxab]>;
+  def DISP_SS : ProcResGroup<[ DISPx02, DISPx13]>;
 
   // Issue Ports
   // An instruction can go down one of two issue queues.
@@ -117,8 +129,37 @@
 
   // ***************** SchedWriteRes Definitions *****************
 
-  //Dispatcher
-  def DISP_1C : SchedWriteRes<[DISPATCHER]> {
+  // Dispatcher
+  // Dispatch Rules: '-' or 'V'
+  // Vector ('V') - vector iops (128-bit operand) take only one decode and
+  // dispatch slot but are dispatched to both the even and odd slices of a
+  // superslice.
+  def DISP_1C : SchedWriteRes<[DISP_NBR]> {
+    let NumMicroOps = 0;
+    let Latency = 1;
+  }
+  // Dispatch Rules: 'E' 
+  // Even slice ('E')- certain operations must be sent only to an even slice.
+  // Also consumes odd dispatch slice slot of the same superslice at dispatch
+  def DISP_EVEN_1C : SchedWriteRes<[ DISPx02, DISPx13 ]> {
+    let NumMicroOps = 0;
+    let Latency = 1;
+  }
+  // Dispatch Rules: 'P'
+  // Paired ('P') - certain cracked and expanded iops are paired such that they
+  // must dispatch together to the same superslice.
+  def DISP_PAIR_1C : SchedWriteRes<[ DISP_SS, DISP_SS]> {
+    let NumMicroOps = 0;
+    let Latency = 1;
+  }
+  // Tuple Restricted ('R') - certain iops preclude dispatching more than one
+  // operation per slice for the super- slice to which they are dispatched
+  def DISP_3SLOTS_1C : SchedWriteRes<[DISPx02, DISPx13, DISPxab]> {
+    let NumMicroOps = 0;
+    let Latency = 1;
+  }
+  // Each execution and branch slice can receive up to two iops per cycle
+  def DISP_BR_1C : SchedWriteRes<[ DISPxab ]> {
     let NumMicroOps = 0;
     let Latency = 1;
   }
@@ -148,7 +189,7 @@
 
   // ALU Units
   // An ALU may take either 2 or 3 cycles to complete the operation.
-  // However, the ALU unit is only every busy for 1 cycle at a time and may
+  // However, the ALU unit is only ever busy for 1 cycle at a time and may
   // receive new instructions each cycle.
   def P9_ALU_2C : SchedWriteRes<[ALU]> {
     let Latency = 2;
@@ -203,10 +244,6 @@
   // DP Unit
   // A DP unit may take from 2 to 36 cycles to complete.
   // Some DP operations keep the unit busy for up to 10 cycles.
-  def P9_DP_2C : SchedWriteRes<[DP]> {
-    let Latency = 2;
-  }
-
   def P9_DP_5C : SchedWriteRes<[DP]> {
     let Latency = 5;
   }
@@ -228,11 +265,6 @@
     let Latency = 22;
   }
 
-  def P9_DP_24C_8 : SchedWriteRes<[DP]> {
-    let ResourceCycles = [8];
-    let Latency = 24;
-  }
-
   def P9_DPO_24C_8 : SchedWriteRes<[DPO]> {
     let ResourceCycles = [8];
     let Latency = 24;
@@ -248,11 +280,6 @@
     let Latency = 22;
   }
 
-  def P9_DP_27C_7 : SchedWriteRes<[DP]> {
-    let ResourceCycles = [7];
-    let Latency = 27;
-  }
-
   def P9_DPE_27C_10 : SchedWriteRes<[DP]> {
     let ResourceCycles = [10];
     let Latency = 27;
@@ -383,16 +410,12 @@
   def P9_IntDivAndALUOp_26C_8 : WriteSequence<[P9_DIV_24C_8, P9_ALU_2C]>;
   def P9_IntDivAndALUOp_42C_8 : WriteSequence<[P9_DIV_40C_8, P9_ALU_2C]>;
   def P9_StoreAndALUOp_3C : WriteSequence<[P9_LS_1C, P9_ALU_2C]>;
-  def P9_StoreAndALUOp_4C : WriteSequence<[P9_LS_1C, P9_ALU_3C]>;
   def P9_ALUOpAndALUOp_4C : WriteSequence<[P9_ALU_2C, P9_ALU_2C]>;
   def P9_ALU2OpAndALU2Op_6C : WriteSequence<[P9_ALU_3C, P9_ALU_3C]>;
   def P9_ALUOpAndALUOpAndALUOp_6C :
     WriteSequence<[P9_ALU_2C, P9_ALU_2C, P9_ALU_2C]>;
   def P9_DPOpAndALUOp_7C : WriteSequence<[P9_DP_5C, P9_ALU_2C]>;
-  def P9_DPOpAndALUOp_9C : WriteSequence<[P9_DP_7C, P9_ALU_2C]>;
   def P9_DPOpAndALU2Op_10C : WriteSequence<[P9_DP_7C, P9_ALU_3C]>;
-  def P9_DPOpAndALUOp_24C_5 : WriteSequence<[P9_DP_22C_5, P9_ALU_2C]>;
-  def P9_DPOpAndALUOp_35C_8 : WriteSequence<[P9_DP_33C_8, P9_ALU_2C]>;
   def P9_DPOpAndALU2Op_25C_5 : WriteSequence<[P9_DP_22C_5, P9_ALU_3C]>;
   def P9_DPOpAndALU2Op_29C_5 : WriteSequence<[P9_DP_26C_5, P9_ALU_3C]>;
   def P9_DPOpAndALU2Op_36C_8 : WriteSequence<[P9_DP_33C_8, P9_ALU_3C]>;

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index c0cbfd7..6aa7528 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.cpp

@@ -1,9 +1,8 @@
 //===-- PowerPCSubtarget.cpp - PPC Subtarget Information ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -40,6 +39,11 @@
   cl::desc("Even when QPX is enabled the stack is not 32-byte aligned"),
   cl::Hidden);
 
+static cl::opt<bool>
+    EnableMachinePipeliner("ppc-enable-pipeliner",
+                           cl::desc("Enable Machine Pipeliner for PPC"),
+                           cl::init(false), cl::Hidden);
+
 PPCSubtarget &PPCSubtarget::initializeSubtargetDependencies(StringRef CPU,
                                                             StringRef FS) {
   initializeEnvironment();
@@ -68,6 +72,7 @@
   HasFPU = false;
   HasQPX = false;
   HasVSX = false;
+  NeedsTwoConstNR = false;
   HasP8Vector = false;
   HasP8Altivec = false;
   HasP8Crypto = false;
@@ -103,11 +108,13 @@
   HasDirectMove = false;
   IsQPXStackUnaligned = false;
   HasHTM = false;
-  HasFusion = false;
   HasFloat128 = false;
   IsISA3_0 = false;
   UseLongCalls = false;
   SecurePlt = false;
+  VectorsUseTwoUnits = false;
+  UsePPCPreRASchedStrategy = false;
+  UsePPCPostRASchedStrategy = false;
 
   HasPOPCNTD = POPCNTD_Unavailable;
 }
@@ -138,6 +145,10 @@
   if (isDarwin())
     HasLazyResolverStubs = true;
 
+  if (TargetTriple.isOSNetBSD() || TargetTriple.isOSOpenBSD() ||
+      TargetTriple.isMusl())
+    SecurePlt = true;
+
   if (HasSPE && IsPPC64)
     report_fatal_error( "SPE is only supported for 32-bit targets.\n", false);
   if (HasSPE && (HasAltivec || HasQPX || HasVSX || HasFPU))
@@ -175,10 +186,14 @@
   return false;
 }
 
-bool PPCSubtarget::enableMachineScheduler() const {
-  return true;
+bool PPCSubtarget::enableMachineScheduler() const { return true; }
+
+bool PPCSubtarget::enableMachinePipeliner() const {
+  return (DarwinDirective == PPC::DIR_PWR9) && EnableMachinePipeliner;
 }
 
+bool PPCSubtarget::useDFAforSMS() const { return false; }
+
 // This overrides the PostRAScheduler bit in the SchedModel for each CPU.
 bool PPCSubtarget::enablePostRAScheduler() const { return true; }
 

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.h b/src/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.h
index c56f254..55fec1c 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.h

@@ -1,9 +1,8 @@
 //===-- PPCSubtarget.h - Define Subtarget for the PPC ----------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -99,6 +98,7 @@
   bool HasSPE;
   bool HasQPX;
   bool HasVSX;
+  bool NeedsTwoConstNR;
   bool HasP8Vector;
   bool HasP8Altivec;
   bool HasP8Crypto;
@@ -131,11 +131,13 @@
   bool HasPartwordAtomics;
   bool HasDirectMove;
   bool HasHTM;
-  bool HasFusion;
   bool HasFloat128;
   bool IsISA3_0;
   bool UseLongCalls;
   bool SecurePlt;
+  bool VectorsUseTwoUnits;
+  bool UsePPCPreRASchedStrategy;
+  bool UsePPCPostRASchedStrategy;
 
   POPCNTDKind HasPOPCNTD;
 
@@ -244,6 +246,7 @@
   bool hasFPU() const { return HasFPU; }
   bool hasQPX() const { return HasQPX; }
   bool hasVSX() const { return HasVSX; }
+  bool needsTwoConstNR() const { return NeedsTwoConstNR; }
   bool hasP8Vector() const { return HasP8Vector; }
   bool hasP8Altivec() const { return HasP8Altivec; }
   bool hasP8Crypto() const { return HasP8Crypto; }
@@ -260,6 +263,7 @@
   bool isPPC4xx() const { return IsPPC4xx; }
   bool isPPC6xx() const { return IsPPC6xx; }
   bool isSecurePlt() const {return SecurePlt; }
+  bool vectorsUseTwoUnits() const {return VectorsUseTwoUnits; }
   bool isE500() const { return IsE500; }
   bool isFeatureMFTB() const { return FeatureMFTB; }
   bool isDeprecatedDST() const { return DeprecatedDST; }
@@ -267,6 +271,8 @@
   bool hasInvariantFunctionDescriptors() const {
     return HasInvariantFunctionDescriptors;
   }
+  bool usePPCPreRASchedStrategy() const { return UsePPCPreRASchedStrategy; }
+  bool usePPCPostRASchedStrategy() const { return UsePPCPostRASchedStrategy; }
   bool hasPartwordAtomics() const { return HasPartwordAtomics; }
   bool hasDirectMove() const { return HasDirectMove; }
 
@@ -285,7 +291,6 @@
   }
 
   bool hasHTM() const { return HasHTM; }
-  bool hasFusion() const { return HasFusion; }
   bool hasFloat128() const { return HasFloat128; }
   bool isISA3_0() const { return IsISA3_0; }
   bool useLongCalls() const { return UseLongCalls; }
@@ -307,16 +312,21 @@
   bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
 
   bool isDarwinABI() const { return isTargetMachO() || isDarwin(); }
-  bool isSVR4ABI() const { return !isDarwinABI(); }
+  bool isAIXABI() const { return TargetTriple.isOSAIX(); }
+  bool isSVR4ABI() const { return !isDarwinABI() && !isAIXABI(); }
   bool isELFv2ABI() const;
 
   /// Originally, this function return hasISEL(). Now we always enable it,
   /// but may expand the ISEL instruction later.
   bool enableEarlyIfConversion() const override { return true; }
 
-  // Scheduling customization.
+  /// Scheduling customization.
   bool enableMachineScheduler() const override;
-  // This overrides the PostRAScheduler bit in the SchedModel for each CPU.
+  /// Pipeliner customization.
+  bool enableMachinePipeliner() const override;
+  /// Machine Pipeliner customization
+  bool useDFAforSMS() const override;
+  /// This overrides the PostRAScheduler bit in the SchedModel for each CPU.
   bool enablePostRAScheduler() const override;
   AntiDepBreakMode getAntiDepBreakMode() const override;
   void getCriticalPathRCs(RegClassVector &CriticalPathRCs) const override;

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index ac36abb..fb826c4 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp

@@ -1,9 +1,8 @@
 //===---------- PPCTLSDynamicCall.cpp - TLS Dynamic Call Fixup ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -35,10 +34,6 @@
 
 #define DEBUG_TYPE "ppc-tls-dynamic-call"
 
-namespace llvm {
-  void initializePPCTLSDynamicCallPass(PassRegistry&);
-}
-
 namespace {
   struct PPCTLSDynamicCall : public MachineFunctionPass {
     static char ID;

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp
index 17345b6..3eb0569 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp

@@ -1,9 +1,8 @@
 //===-- PPCTOCRegDeps.cpp - Add Extra TOC Register Dependencies -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -83,10 +82,6 @@
 
 #define DEBUG_TYPE "ppc-toc-reg-deps"
 
-namespace llvm {
-  void initializePPCTOCRegDepsPass(PassRegistry&);
-}
-
 namespace {
   // PPCTOCRegDeps pass - For simple functions without epilogue code, move
   // returns up, and create conditional returns, to avoid unnecessary

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 580d0576..ce00f84 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp

@@ -1,9 +1,8 @@
 //===-- PPCTargetMachine.cpp - Define TargetMachine for PowerPC -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -14,9 +13,11 @@
 #include "PPCTargetMachine.h"
 #include "MCTargetDesc/PPCMCTargetDesc.h"
 #include "PPC.h"
+#include "PPCMachineScheduler.h"
 #include "PPCSubtarget.h"
 #include "PPCTargetObjectFile.h"
 #include "PPCTargetTransformInfo.h"
+#include "TargetInfo/PowerPCTargetInfo.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
@@ -100,6 +101,19 @@
   RegisterTargetMachine<PPCTargetMachine> C(getThePPC64LETarget());
 
   PassRegistry &PR = *PassRegistry::getPassRegistry();
+#ifndef NDEBUG
+  initializePPCCTRLoopsVerifyPass(PR);
+#endif
+  initializePPCLoopPreIncPrepPass(PR);
+  initializePPCTOCRegDepsPass(PR);
+  initializePPCEarlyReturnPass(PR);
+  initializePPCVSXCopyPass(PR);
+  initializePPCVSXFMAMutatePass(PR);
+  initializePPCVSXSwapRemovalPass(PR);
+  initializePPCReduceCRLogicalsPass(PR);
+  initializePPCBSelPass(PR);
+  initializePPCBranchCoalescingPass(PR);
+  initializePPCQPXLoadSplatPass(PR);
   initializePPCBoolRetToIntPass(PR);
   initializePPCExpandISELPass(PR);
   initializePPCPreEmitPeepholePass(PR);
@@ -199,6 +213,8 @@
   case Triple::ppc64le:
     return PPCTargetMachine::PPC_ABI_ELFv2;
   case Triple::ppc64:
+    if (TT.getEnvironment() == llvm::Triple::ELFv2)
+      return PPCTargetMachine::PPC_ABI_ELFv2;
     return PPCTargetMachine::PPC_ABI_ELFv1;
   default:
     return PPCTargetMachine::PPC_ABI_UNKNOWN;
@@ -227,9 +243,9 @@
                                                  bool JIT) {
   if (CM) {
     if (*CM == CodeModel::Tiny)
-      report_fatal_error("Target does not support the tiny CodeModel");
+      report_fatal_error("Target does not support the tiny CodeModel", false);
     if (*CM == CodeModel::Kernel)
-      report_fatal_error("Target does not support the kernel CodeModel");
+      report_fatal_error("Target does not support the kernel CodeModel", false);
     return *CM;
   }
   if (!TT.isOSDarwin() && !JIT &&
@@ -238,6 +254,29 @@
   return CodeModel::Small;
 }
 
+
+static ScheduleDAGInstrs *createPPCMachineScheduler(MachineSchedContext *C) {
+  const PPCSubtarget &ST = C->MF->getSubtarget<PPCSubtarget>();
+  ScheduleDAGMILive *DAG =
+    new ScheduleDAGMILive(C, ST.usePPCPreRASchedStrategy() ?
+                          llvm::make_unique<PPCPreRASchedStrategy>(C) :
+                          llvm::make_unique<GenericScheduler>(C));
+  // add DAG Mutations here.
+  DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
+  return DAG;
+}
+
+static ScheduleDAGInstrs *createPPCPostMachineScheduler(
+  MachineSchedContext *C) {
+  const PPCSubtarget &ST = C->MF->getSubtarget<PPCSubtarget>();
+  ScheduleDAGMI *DAG =
+    new ScheduleDAGMI(C, ST.usePPCPostRASchedStrategy() ?
+                      llvm::make_unique<PPCPostRASchedStrategy>(C) :
+                      llvm::make_unique<PostGenericScheduler>(C), true);
+  // add DAG Mutations here.
+  return DAG;
+}
+
 // The FeatureString here is a little subtle. We are modifying the feature
 // string with what are (currently) non-function specific overrides as it goes
 // into the LLVMTargetMachine constructor and then using the stored value in the
@@ -331,6 +370,14 @@
   void addPreRegAlloc() override;
   void addPreSched2() override;
   void addPreEmitPass() override;
+  ScheduleDAGInstrs *
+  createMachineScheduler(MachineSchedContext *C) const override {
+    return createPPCMachineScheduler(C);
+  }
+  ScheduleDAGInstrs *
+  createPostMachineScheduler(MachineSchedContext *C) const override {
+    return createPPCPostMachineScheduler(C);
+  }
 };
 
 } // end anonymous namespace
@@ -374,7 +421,7 @@
     addPass(createPPCLoopPreIncPrepPass(getPPCTargetMachine()));
 
   if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
-    addPass(createPPCCTRLoops());
+    addPass(createHardwareLoopsPass());
 
   return false;
 }
@@ -441,6 +488,9 @@
   }
   if (EnableExtraTOCRegDeps)
     addPass(createPPCTOCRegDepsPass());
+
+  if (getOptLevel() != CodeGenOpt::None)
+    addPass(&MachinePipelinerID);
 }
 
 void PPCPassConfig::addPreSched2() {
@@ -469,3 +519,13 @@
 PPCTargetMachine::getTargetTransformInfo(const Function &F) {
   return TargetTransformInfo(PPCTTIImpl(this, F));
 }
+
+static MachineSchedRegistry
+PPCPreRASchedRegistry("ppc-prera",
+                      "Run PowerPC PreRA specific scheduler",
+                      createPPCMachineScheduler);
+
+static MachineSchedRegistry
+PPCPostRASchedRegistry("ppc-postra",
+                       "Run PowerPC PostRA specific scheduler",
+                       createPPCPostMachineScheduler);

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCTargetMachine.h b/src/llvm-project/llvm/lib/Target/PowerPC/PPCTargetMachine.h
index 75b98a8..fd1d14a 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCTargetMachine.h

@@ -1,9 +1,8 @@
 //===-- PPCTargetMachine.h - Define TargetMachine for PowerPC ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -59,10 +58,6 @@
     const Triple &TT = getTargetTriple();
     return (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le);
   };
-
-  bool isMachineVerifierClean() const override {
-    return false;
-  }
 };
 } // end namespace llvm
 

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp
index a049dc3..e237fab 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp

@@ -1,9 +1,8 @@
 //===-- PPCTargetObjectFile.cpp - PPC Object Info -------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCTargetObjectFile.h b/src/llvm-project/llvm/lib/Target/PowerPC/PPCTargetObjectFile.h
index 417b8ed..78a5840 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCTargetObjectFile.h
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCTargetObjectFile.h

@@ -1,9 +1,8 @@
 //===-- PPCTargetObjectFile.h - PPC Object Info -----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCTargetStreamer.h b/src/llvm-project/llvm/lib/Target/PowerPC/PPCTargetStreamer.h
index 310fea9..e17361d 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCTargetStreamer.h
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCTargetStreamer.h

@@ -1,9 +1,8 @@
 //===- PPCTargetStreamer.h - PPC Target Streamer ----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index bc9bcab..ff3dfbf 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp

@@ -1,17 +1,18 @@
 //===-- PPCTargetTransformInfo.cpp - PPC specific TTI ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 #include "PPCTargetTransformInfo.h"
+#include "llvm/Analysis/CodeMetrics.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/BasicTTIImpl.h"
 #include "llvm/CodeGen/CostTable.h"
 #include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetSchedule.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 using namespace llvm;
@@ -32,6 +33,13 @@
                 cl::desc("Enable using coldcc calling conv for cold "
                          "internal functions"));
 
+// The latency of mtctr is only justified if there are more than 4
+// comparisons that will be removed as a result.
+static cl::opt<unsigned>
+SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden,
+                      cl::desc("Loops with a constant trip count smaller than "
+                               "this value will not use the count register."));
+
 //===----------------------------------------------------------------------===//
 //
 // PPC cost model.
@@ -205,6 +213,341 @@
   return BaseT::getUserCost(U, Operands);
 }
 
+bool PPCTTIImpl::mightUseCTR(BasicBlock *BB,
+                             TargetLibraryInfo *LibInfo) {
+  const PPCTargetMachine &TM = ST->getTargetMachine();
+
+  // Loop through the inline asm constraints and look for something that
+  // clobbers ctr.
+  auto asmClobbersCTR = [](InlineAsm *IA) {
+    InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints();
+    for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) {
+      InlineAsm::ConstraintInfo &C = CIV[i];
+      if (C.Type != InlineAsm::isInput)
+        for (unsigned j = 0, je = C.Codes.size(); j < je; ++j)
+          if (StringRef(C.Codes[j]).equals_lower("{ctr}"))
+            return true;
+    }
+    return false;
+  };
+
+  // Determining the address of a TLS variable results in a function call in
+  // certain TLS models.
+  std::function<bool(const Value*)> memAddrUsesCTR =
+    [&memAddrUsesCTR, &TM](const Value *MemAddr) -> bool {
+    const auto *GV = dyn_cast<GlobalValue>(MemAddr);
+    if (!GV) {
+      // Recurse to check for constants that refer to TLS global variables.
+      if (const auto *CV = dyn_cast<Constant>(MemAddr))
+        for (const auto &CO : CV->operands())
+          if (memAddrUsesCTR(CO))
+            return true;
+
+      return false;
+    }
+
+    if (!GV->isThreadLocal())
+      return false;
+    TLSModel::Model Model = TM.getTLSModel(GV);
+    return Model == TLSModel::GeneralDynamic ||
+      Model == TLSModel::LocalDynamic;
+  };
+
+  auto isLargeIntegerTy = [](bool Is32Bit, Type *Ty) {
+    if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
+      return ITy->getBitWidth() > (Is32Bit ? 32U : 64U);
+
+    return false;
+  };
+
+  for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
+       J != JE; ++J) {
+    if (CallInst *CI = dyn_cast<CallInst>(J)) {
+      // Inline ASM is okay, unless it clobbers the ctr register.
+      if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) {
+        if (asmClobbersCTR(IA))
+          return true;
+        continue;
+      }
+
+      if (Function *F = CI->getCalledFunction()) {
+        // Most intrinsics don't become function calls, but some might.
+        // sin, cos, exp and log are always calls.
+        unsigned Opcode = 0;
+        if (F->getIntrinsicID() != Intrinsic::not_intrinsic) {
+          switch (F->getIntrinsicID()) {
+          default: continue;
+          // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr
+          // we're definitely using CTR.
+          case Intrinsic::set_loop_iterations:
+          case Intrinsic::loop_decrement:
+            return true;
+
+// VisualStudio defines setjmp as _setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+                       !defined(setjmp_undefined_for_msvc)
+#  pragma push_macro("setjmp")
+#  undef setjmp
+#  define setjmp_undefined_for_msvc
+#endif
+
+          case Intrinsic::setjmp:
+
+#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
+ // let's return it to _setjmp state
+#  pragma pop_macro("setjmp")
+#  undef setjmp_undefined_for_msvc
+#endif
+
+          case Intrinsic::longjmp:
+
+          // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp
+          // because, although it does clobber the counter register, the
+          // control can't then return to inside the loop unless there is also
+          // an eh_sjlj_setjmp.
+          case Intrinsic::eh_sjlj_setjmp:
+
+          case Intrinsic::memcpy:
+          case Intrinsic::memmove:
+          case Intrinsic::memset:
+          case Intrinsic::powi:
+          case Intrinsic::log:
+          case Intrinsic::log2:
+          case Intrinsic::log10:
+          case Intrinsic::exp:
+          case Intrinsic::exp2:
+          case Intrinsic::pow:
+          case Intrinsic::sin:
+          case Intrinsic::cos:
+            return true;
+          case Intrinsic::copysign:
+            if (CI->getArgOperand(0)->getType()->getScalarType()->
+                isPPC_FP128Ty())
+              return true;
+            else
+              continue; // ISD::FCOPYSIGN is never a library call.
+          case Intrinsic::sqrt:               Opcode = ISD::FSQRT;      break;
+          case Intrinsic::floor:              Opcode = ISD::FFLOOR;     break;
+          case Intrinsic::ceil:               Opcode = ISD::FCEIL;      break;
+          case Intrinsic::trunc:              Opcode = ISD::FTRUNC;     break;
+          case Intrinsic::rint:               Opcode = ISD::FRINT;      break;
+          case Intrinsic::nearbyint:          Opcode = ISD::FNEARBYINT; break;
+          case Intrinsic::round:              Opcode = ISD::FROUND;     break;
+          case Intrinsic::minnum:             Opcode = ISD::FMINNUM;    break;
+          case Intrinsic::maxnum:             Opcode = ISD::FMAXNUM;    break;
+          case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO;      break;
+          case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO;      break;
+          }
+        }
+
+        // PowerPC does not use [US]DIVREM or other library calls for
+        // operations on regular types which are not otherwise library calls
+        // (i.e. soft float or atomics). If adapting for targets that do,
+        // additional care is required here.
+
+        LibFunc Func;
+        if (!F->hasLocalLinkage() && F->hasName() && LibInfo &&
+            LibInfo->getLibFunc(F->getName(), Func) &&
+            LibInfo->hasOptimizedCodeGen(Func)) {
+          // Non-read-only functions are never treated as intrinsics.
+          if (!CI->onlyReadsMemory())
+            return true;
+
+          // Conversion happens only for FP calls.
+          if (!CI->getArgOperand(0)->getType()->isFloatingPointTy())
+            return true;
+
+          switch (Func) {
+          default: return true;
+          case LibFunc_copysign:
+          case LibFunc_copysignf:
+            continue; // ISD::FCOPYSIGN is never a library call.
+          case LibFunc_copysignl:
+            return true;
+          case LibFunc_fabs:
+          case LibFunc_fabsf:
+          case LibFunc_fabsl:
+            continue; // ISD::FABS is never a library call.
+          case LibFunc_sqrt:
+          case LibFunc_sqrtf:
+          case LibFunc_sqrtl:
+            Opcode = ISD::FSQRT; break;
+          case LibFunc_floor:
+          case LibFunc_floorf:
+          case LibFunc_floorl:
+            Opcode = ISD::FFLOOR; break;
+          case LibFunc_nearbyint:
+          case LibFunc_nearbyintf:
+          case LibFunc_nearbyintl:
+            Opcode = ISD::FNEARBYINT; break;
+          case LibFunc_ceil:
+          case LibFunc_ceilf:
+          case LibFunc_ceill:
+            Opcode = ISD::FCEIL; break;
+          case LibFunc_rint:
+          case LibFunc_rintf:
+          case LibFunc_rintl:
+            Opcode = ISD::FRINT; break;
+          case LibFunc_round:
+          case LibFunc_roundf:
+          case LibFunc_roundl:
+            Opcode = ISD::FROUND; break;
+          case LibFunc_trunc:
+          case LibFunc_truncf:
+          case LibFunc_truncl:
+            Opcode = ISD::FTRUNC; break;
+          case LibFunc_fmin:
+          case LibFunc_fminf:
+          case LibFunc_fminl:
+            Opcode = ISD::FMINNUM; break;
+          case LibFunc_fmax:
+          case LibFunc_fmaxf:
+          case LibFunc_fmaxl:
+            Opcode = ISD::FMAXNUM; break;
+          }
+        }
+
+        if (Opcode) {
+          EVT EVTy =
+              TLI->getValueType(DL, CI->getArgOperand(0)->getType(), true);
+
+          if (EVTy == MVT::Other)
+            return true;
+
+          if (TLI->isOperationLegalOrCustom(Opcode, EVTy))
+            continue;
+          else if (EVTy.isVector() &&
+                   TLI->isOperationLegalOrCustom(Opcode, EVTy.getScalarType()))
+            continue;
+
+          return true;
+        }
+      }
+
+      return true;
+    } else if (isa<BinaryOperator>(J) &&
+               J->getType()->getScalarType()->isPPC_FP128Ty()) {
+      // Most operations on ppc_f128 values become calls.
+      return true;
+    } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||
+               isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {
+      CastInst *CI = cast<CastInst>(J);
+      if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
+          CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
+          isLargeIntegerTy(!TM.isPPC64(), CI->getSrcTy()->getScalarType()) ||
+          isLargeIntegerTy(!TM.isPPC64(), CI->getDestTy()->getScalarType()))
+        return true;
+    } else if (isLargeIntegerTy(!TM.isPPC64(),
+                                J->getType()->getScalarType()) &&
+               (J->getOpcode() == Instruction::UDiv ||
+                J->getOpcode() == Instruction::SDiv ||
+                J->getOpcode() == Instruction::URem ||
+                J->getOpcode() == Instruction::SRem)) {
+      return true;
+    } else if (!TM.isPPC64() &&
+               isLargeIntegerTy(false, J->getType()->getScalarType()) &&
+               (J->getOpcode() == Instruction::Shl ||
+                J->getOpcode() == Instruction::AShr ||
+                J->getOpcode() == Instruction::LShr)) {
+      // Only on PPC32, for 128-bit integers (specifically not 64-bit
+      // integers), these might be runtime calls.
+      return true;
+    } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
+      // On PowerPC, indirect jumps use the counter register.
+      return true;
+    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
+      if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries())
+        return true;
+    }
+
+    // FREM is always a call.
+    if (J->getOpcode() == Instruction::FRem)
+      return true;
+
+    if (ST->useSoftFloat()) {
+      switch(J->getOpcode()) {
+      case Instruction::FAdd:
+      case Instruction::FSub:
+      case Instruction::FMul:
+      case Instruction::FDiv:
+      case Instruction::FPTrunc:
+      case Instruction::FPExt:
+      case Instruction::FPToUI:
+      case Instruction::FPToSI:
+      case Instruction::UIToFP:
+      case Instruction::SIToFP:
+      case Instruction::FCmp:
+        return true;
+      }
+    }
+
+    for (Value *Operand : J->operands())
+      if (memAddrUsesCTR(Operand))
+        return true;
+  }
+
+  return false;
+}
+
+bool PPCTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
+                                          AssumptionCache &AC,
+                                          TargetLibraryInfo *LibInfo,
+                                          HardwareLoopInfo &HWLoopInfo) {
+  const PPCTargetMachine &TM = ST->getTargetMachine();
+  TargetSchedModel SchedModel;
+  SchedModel.init(ST);
+
+  // Do not convert small short loops to CTR loop.
+  unsigned ConstTripCount = SE.getSmallConstantTripCount(L);
+  if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) {
+    SmallPtrSet<const Value *, 32> EphValues;
+    CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
+    CodeMetrics Metrics;
+    for (BasicBlock *BB : L->blocks())
+      Metrics.analyzeBasicBlock(BB, *this, EphValues);
+    // 6 is an approximate latency for the mtctr instruction.
+    if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth()))
+      return false;
+  }
+
+  // We don't want to spill/restore the counter register, and so we don't
+  // want to use the counter register if the loop contains calls.
+  for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
+       I != IE; ++I)
+    if (mightUseCTR(*I, LibInfo))
+      return false;
+
+  SmallVector<BasicBlock*, 4> ExitingBlocks;
+  L->getExitingBlocks(ExitingBlocks);
+
+  // If there is an exit edge known to be frequently taken,
+  // we should not transform this loop.
+  for (auto &BB : ExitingBlocks) {
+    Instruction *TI = BB->getTerminator();
+    if (!TI) continue;
+
+    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+      uint64_t TrueWeight = 0, FalseWeight = 0;
+      if (!BI->isConditional() ||
+          !BI->extractProfMetadata(TrueWeight, FalseWeight))
+        continue;
+
+      // If the exit path is more frequent than the loop path,
+      // we return here without further analysis for this loop.
+      bool TrueIsExit = !L->contains(BI->getSuccessor(0));
+      if (( TrueIsExit && FalseWeight < TrueWeight) ||
+          (!TrueIsExit && FalseWeight > TrueWeight))
+        return false;
+    }
+  }
+
+  LLVMContext &C = L->getHeader()->getContext();
+  HWLoopInfo.CountType = TM.isPPC64() ?
+    Type::getInt64Ty(C) : Type::getInt32Ty(C);
+  HWLoopInfo.LoopDecrement = ConstantInt::get(HWLoopInfo.CountType, 1);
+  return true;
+}
+
 void PPCTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
                                          TTI::UnrollingPreferences &UP) {
   if (ST->getDarwinDirective() == PPC::DIR_A2) {
@@ -239,17 +582,12 @@
   return LoopHasReductions;
 }
 
-const PPCTTIImpl::TTI::MemCmpExpansionOptions *
-PPCTTIImpl::enableMemCmpExpansion(bool IsZeroCmp) const {
-  static const auto Options = []() {
-    TTI::MemCmpExpansionOptions Options;
-    Options.LoadSizes.push_back(8);
-    Options.LoadSizes.push_back(4);
-    Options.LoadSizes.push_back(2);
-    Options.LoadSizes.push_back(1);
-    return Options;
-  }();
-  return &Options;
+PPCTTIImpl::TTI::MemCmpExpansionOptions
+PPCTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
+  TTI::MemCmpExpansionOptions Options;
+  Options.LoadSizes = {8, 4, 2, 1};
+  Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
+  return Options;
 }
 
 bool PPCTTIImpl::enableInterleavedAccessVectorization() {
@@ -324,6 +662,33 @@
   return 2;
 }
 
+// Adjust the cost of vector instructions on targets which there is overlap
+// between the vector and scalar units, thereby reducing the overall throughput
+// of vector code wrt. scalar code.
+int PPCTTIImpl::vectorCostAdjustment(int Cost, unsigned Opcode, Type *Ty1,
+                                     Type *Ty2) {
+  if (!ST->vectorsUseTwoUnits() || !Ty1->isVectorTy())
+    return Cost;
+
+  std::pair<int, MVT> LT1 = TLI->getTypeLegalizationCost(DL, Ty1);
+  // If type legalization involves splitting the vector, we don't want to
+  // double the cost at every step - only the last step.
+  if (LT1.first != 1 || !LT1.second.isVector())
+    return Cost;
+
+  int ISD = TLI->InstructionOpcodeToISD(Opcode);
+  if (TLI->isOperationExpand(ISD, LT1.second))
+    return Cost;
+
+  if (Ty2) {
+    std::pair<int, MVT> LT2 = TLI->getTypeLegalizationCost(DL, Ty2);
+    if (LT2.first != 1 || !LT2.second.isVector())
+      return Cost;
+  }
+
+  return Cost * 2;
+}
+
 int PPCTTIImpl::getArithmeticInstrCost(
     unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
     TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
@@ -331,8 +696,9 @@
   assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
 
   // Fallback to the default implementation.
-  return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
-                                       Opd1PropInfo, Opd2PropInfo);
+  int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
+                                           Opd1PropInfo, Opd2PropInfo);
+  return vectorCostAdjustment(Cost, Opcode, Ty, nullptr);
 }
 
 int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
@@ -345,19 +711,22 @@
   // instruction). We need one such shuffle instruction for each actual
   // register (this is not true for arbitrary shuffles, but is true for the
   // structured types of shuffles covered by TTI::ShuffleKind).
-  return LT.first;
+  return vectorCostAdjustment(LT.first, Instruction::ShuffleVector, Tp,
+                              nullptr);
 }
 
 int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
                                  const Instruction *I) {
   assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
 
-  return BaseT::getCastInstrCost(Opcode, Dst, Src);
+  int Cost = BaseT::getCastInstrCost(Opcode, Dst, Src);
+  return vectorCostAdjustment(Cost, Opcode, Dst, Src);
 }
 
 int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
                                    const Instruction *I) {
-  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
+  int Cost = BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
+  return vectorCostAdjustment(Cost, Opcode, ValTy, nullptr);
 }
 
 int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
@@ -366,18 +735,23 @@
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
   assert(ISD && "Invalid opcode");
 
+  int Cost = BaseT::getVectorInstrCost(Opcode, Val, Index);
+  Cost = vectorCostAdjustment(Cost, Opcode, Val, nullptr);
+
   if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) {
-    // Double-precision scalars are already located in index #0.
-    if (Index == 0)
+    // Double-precision scalars are already located in index #0 (or #1 if LE).
+    if (ISD == ISD::EXTRACT_VECTOR_ELT &&
+        Index == (ST->isLittleEndian() ? 1 : 0))
       return 0;
 
-    return BaseT::getVectorInstrCost(Opcode, Val, Index);
+    return Cost;
+
   } else if (ST->hasQPX() && Val->getScalarType()->isFloatingPointTy()) {
     // Floating point scalars are already located in index #0.
     if (Index == 0)
       return 0;
 
-    return BaseT::getVectorInstrCost(Opcode, Val, Index);
+    return Cost;
   }
 
   // Estimated cost of a load-hit-store delay.  This was obtained
@@ -394,9 +768,9 @@
   // these need to be estimated as very costly.
   if (ISD == ISD::EXTRACT_VECTOR_ELT ||
       ISD == ISD::INSERT_VECTOR_ELT)
-    return LHSPenalty + BaseT::getVectorInstrCost(Opcode, Val, Index);
+    return LHSPenalty + Cost;
 
-  return BaseT::getVectorInstrCost(Opcode, Val, Index);
+  return Cost;
 }
 
 int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
@@ -407,6 +781,7 @@
          "Invalid Opcode");
 
   int Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+  Cost = vectorCostAdjustment(Cost, Opcode, Src, nullptr);
 
   bool IsAltivecType = ST->hasAltivec() &&
                        (LT.second == MVT::v16i8 || LT.second == MVT::v8i16 ||
@@ -500,3 +875,25 @@
   return Cost;
 }
 
+bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
+                            LoopInfo *LI, DominatorTree *DT,
+                            AssumptionCache *AC, TargetLibraryInfo *LibInfo) {
+  // Process nested loops first.
+  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+    if (canSaveCmp(*I, BI, SE, LI, DT, AC, LibInfo))
+      return false; // Stop search.
+
+  HardwareLoopInfo HWLoopInfo(L);
+
+  if (!HWLoopInfo.canAnalyze(*LI))
+    return false;
+
+  if (!isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo))
+    return false;
+
+  if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT))
+    return false;
+
+  *BI = HWLoopInfo.ExitBranch;
+  return true;
+}

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/src/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 9221a91..5d76ee4 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h

@@ -1,9 +1,8 @@
 //===-- PPCTargetTransformInfo.h - PPC specific TTI -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -17,7 +16,6 @@
 #ifndef LLVM_LIB_TARGET_POWERPC_PPCTARGETTRANSFORMINFO_H
 #define LLVM_LIB_TARGET_POWERPC_PPCTARGETTRANSFORMINFO_H
 
-#include "PPC.h"
 #include "PPCTargetMachine.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/BasicTTIImpl.h"
@@ -35,6 +33,7 @@
 
   const PPCSubtarget *getST() const { return ST; }
   const PPCTargetLowering *getTLI() const { return TLI; }
+  bool mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo);
 
 public:
   explicit PPCTTIImpl(const PPCTargetMachine *TM, const Function &F)
@@ -54,6 +53,13 @@
   unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands);
 
   TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
+  bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
+                                AssumptionCache &AC,
+                                TargetLibraryInfo *LibInfo,
+                                HardwareLoopInfo &HWLoopInfo);
+  bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
+                  DominatorTree *DT, AssumptionCache *AC,
+                  TargetLibraryInfo *LibInfo);
   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
                                TTI::UnrollingPreferences &UP);
 
@@ -63,14 +69,15 @@
   /// @{
   bool useColdCCForColdCall(Function &F);
   bool enableAggressiveInterleaving(bool LoopHasReductions);
-  const TTI::MemCmpExpansionOptions *enableMemCmpExpansion(
-      bool IsZeroCmp) const;
+  TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
+                                                    bool IsZeroCmp) const;
   bool enableInterleavedAccessVectorization();
   unsigned getNumberOfRegisters(bool Vector);
   unsigned getRegisterBitWidth(bool Vector) const;
   unsigned getCacheLineSize();
   unsigned getPrefetchDistance();
   unsigned getMaxInterleaveFactor(unsigned VF);
+  int vectorCostAdjustment(int Cost, unsigned Opcode, Type *Ty1, Type *Ty2);
   int getArithmeticInstrCost(
       unsigned Opcode, Type *Ty,
       TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
index 93fe323..719ed7b 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp

@@ -1,9 +1,8 @@
 //===-------------- PPCVSXCopy.cpp - VSX Copy Legalization ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -37,10 +36,6 @@
 
 #define DEBUG_TYPE "ppc-vsx-copy"
 
-namespace llvm {
-  void initializePPCVSXCopyPass(PassRegistry&);
-}
-
 namespace {
   // PPCVSXCopy pass - For copies between VSX registers and non-VSX registers
   // (Altivec and scalar floating-point registers), we need to transform the

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
index 6586f50..ce78239 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp

@@ -1,9 +1,8 @@
 //===--------------- PPCVSXFMAMutate.cpp - VSX FMA Mutation ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
index 1be193e..44175af 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp

@@ -1,9 +1,8 @@
 //===----------- PPCVSXSwapRemoval.cpp - Remove VSX LE Swaps -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===---------------------------------------------------------------------===//
 //
@@ -60,10 +59,6 @@
 
 #define DEBUG_TYPE "ppc-vsx-swaps"
 
-namespace llvm {
-  void initializePPCVSXSwapRemovalPass(PassRegistry&);
-}
-
 namespace {
 
 // A PPCVSXSwapEntry is created for each machine instruction that
@@ -427,6 +422,7 @@
       // of opcodes having a common attribute in TableGen.  Should this
       // change, this is a prime candidate to use such a mechanism.
       case PPC::INLINEASM:
+      case PPC::INLINEASM_BR:
       case PPC::EXTRACT_SUBREG:
       case PPC::INSERT_SUBREG:
       case PPC::COPY_TO_REGCLASS:

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/README_P9.txt b/src/llvm-project/llvm/lib/Target/PowerPC/README_P9.txt
index d56f7cc..c9984b7 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/README_P9.txt
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/README_P9.txt

@@ -512,8 +512,8 @@
                         "lxsdx $XT, $src", IIC_LdStLFD,
                         [(set f64:$XT, (load xoaddr:$src))]>;
 
-  . (set f64:$XT, (load ixaddr:$src))
-    (set f64:$XT, (store ixaddr:$dst))
+  . (set f64:$XT, (load iaddrX4:$src))
+    (set f64:$XT, (store iaddrX4:$dst))
 
 - Load/Store SP, with conversion from/to DP: lxssp stxssp
   . Similar to lxsspx/stxsspx:
@@ -521,8 +521,8 @@
                          "lxsspx $XT, $src", IIC_LdStLFD,
                          [(set f32:$XT, (load xoaddr:$src))]>;
 
-  . (set f32:$XT, (load ixaddr:$src))
-    (set f32:$XT, (store ixaddr:$dst))
+  . (set f32:$XT, (load iaddrX4:$src))
+    (set f32:$XT, (store iaddrX4:$dst))
 
 - Load as Integer Byte/Halfword & Zero Indexed: lxsibzx lxsihzx
   . Similar to lxsiwzx:

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt b/src/llvm-project/llvm/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt
index 4102346..dc74399 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt

@@ -1,9 +1,8 @@
 ;===- ./lib/Target/PowerPC/TargetInfo/LLVMBuild.txt ------------*- Conf -*--===;
 ;
-;                     The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
+; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+; See https://llvm.org/LICENSE.txt for license information.
+; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 ;
 ;===------------------------------------------------------------------------===;
 ;

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp b/src/llvm-project/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
index 9795952..99b5dec 100644
--- a/src/llvm-project/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp

@@ -1,14 +1,12 @@
 //===-- PowerPCTargetInfo.cpp - PowerPC Target Implementation -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
-#include "PPC.h"
-#include "llvm/IR/Module.h"
+#include "TargetInfo/PowerPCTargetInfo.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 

diff --git a/src/llvm-project/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.h b/src/llvm-project/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.h
new file mode 100644
index 0000000..2d0afbf
--- /dev/null
+++ b/src/llvm-project/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.h

@@ -0,0 +1,22 @@
+//===-- PowerPCTargetInfo.h - PowerPC Target Implementation -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_POWERPC_TARGETINFO_POWERPCTARGETINFO_H
+#define LLVM_LIB_TARGET_POWERPC_TARGETINFO_POWERPCTARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+Target &getThePPC32Target();
+Target &getThePPC64Target();
+Target &getThePPC64LETarget();
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_POWERPC_TARGETINFO_POWERPCTARGETINFO_H
commit	086005343729daa43222945cd264a392ebe3367d	[log] [tgz]
author	Chih-Hung Hsieh <[email protected]>	Thu Dec 19 15:55:38 2019 -0800
committer	Chih-Hung Hsieh <[email protected]>	Thu Dec 19 16:00:07 2019 -0800
tree	8bf3e03d05866f72c0af72b911dfaae21c250a8f
parent	bc2a45aebf2f3f158c878db02ee23d906d1269df [diff]