Importing rustc-1.56.0

Change-Id: I98941481270706fa55f8fb2cb91686ae3bd30f38
diff --git a/compiler/rustc_codegen_llvm/src/abi.rs b/compiler/rustc_codegen_llvm/src/abi.rs
index 854e3cc..abf0ea8 100644
--- a/compiler/rustc_codegen_llvm/src/abi.rs
+++ b/compiler/rustc_codegen_llvm/src/abi.rs
@@ -353,7 +353,11 @@
 
 impl<'tcx> FnAbiLlvmExt<'tcx> for FnAbi<'tcx, Ty<'tcx>> {
     fn llvm_type(&self, cx: &CodegenCx<'ll, 'tcx>) -> &'ll Type {
-        let args_capacity: usize = self.args.iter().map(|arg|
+        // Ignore "extra" args from the call site for C variadic functions.
+        // Only the "fixed" args are part of the LLVM function signature.
+        let args = if self.c_variadic { &self.args[..self.fixed_count] } else { &self.args };
+
+        let args_capacity: usize = args.iter().map(|arg|
             if arg.pad.is_some() { 1 } else { 0 } +
             if let PassMode::Pair(_, _) = arg.mode { 2 } else { 1 }
         ).sum();
@@ -371,7 +375,7 @@
             }
         };
 
-        for arg in &self.args {
+        for arg in args {
             // add padding
             if let Some(ty) = arg.pad {
                 llargument_tys.push(ty.llvm_type(cx));
diff --git a/compiler/rustc_codegen_llvm/src/allocator.rs b/compiler/rustc_codegen_llvm/src/allocator.rs
index 068e5e9..2d79b73 100644
--- a/compiler/rustc_codegen_llvm/src/allocator.rs
+++ b/compiler/rustc_codegen_llvm/src/allocator.rs
@@ -78,8 +78,14 @@
             .enumerate()
             .map(|(i, _)| llvm::LLVMGetParam(llfn, i as c_uint))
             .collect::<Vec<_>>();
-        let ret =
-            llvm::LLVMRustBuildCall(llbuilder, callee, args.as_ptr(), args.len() as c_uint, None);
+        let ret = llvm::LLVMRustBuildCall(
+            llbuilder,
+            ty,
+            callee,
+            args.as_ptr(),
+            args.len() as c_uint,
+            None,
+        );
         llvm::LLVMSetTailCall(ret, True);
         if output.is_some() {
             llvm::LLVMBuildRet(llbuilder, ret);
@@ -121,7 +127,8 @@
         .enumerate()
         .map(|(i, _)| llvm::LLVMGetParam(llfn, i as c_uint))
         .collect::<Vec<_>>();
-    let ret = llvm::LLVMRustBuildCall(llbuilder, callee, args.as_ptr(), args.len() as c_uint, None);
+    let ret =
+        llvm::LLVMRustBuildCall(llbuilder, ty, callee, args.as_ptr(), args.len() as c_uint, None);
     llvm::LLVMSetTailCall(ret, True);
     llvm::LLVMBuildRetVoid(llbuilder);
     llvm::LLVMDisposeBuilder(llbuilder);
diff --git a/compiler/rustc_codegen_llvm/src/asm.rs b/compiler/rustc_codegen_llvm/src/asm.rs
index 7bd9397..e0d3127 100644
--- a/compiler/rustc_codegen_llvm/src/asm.rs
+++ b/compiler/rustc_codegen_llvm/src/asm.rs
@@ -302,11 +302,19 @@
                         "~{flags}".to_string(),
                     ]);
                 }
-                InlineAsmArch::RiscV32 | InlineAsmArch::RiscV64 => {}
+                InlineAsmArch::RiscV32 | InlineAsmArch::RiscV64 => {
+                    constraints.extend_from_slice(&[
+                        "~{vtype}".to_string(),
+                        "~{vl}".to_string(),
+                        "~{vxsat}".to_string(),
+                        "~{vxrm}".to_string(),
+                    ]);
+                }
                 InlineAsmArch::Nvptx64 => {}
                 InlineAsmArch::PowerPC | InlineAsmArch::PowerPC64 => {}
                 InlineAsmArch::Hexagon => {}
                 InlineAsmArch::Mips | InlineAsmArch::Mips64 => {}
+                InlineAsmArch::S390x => {}
                 InlineAsmArch::SpirV => {}
                 InlineAsmArch::Wasm32 => {}
                 InlineAsmArch::Bpf => {}
@@ -425,7 +433,7 @@
     }
 }
 
-fn inline_asm_call(
+pub(crate) fn inline_asm_call(
     bx: &mut Builder<'a, 'll, 'tcx>,
     asm: &str,
     cons: &str,
@@ -464,7 +472,7 @@
                 alignstack,
                 llvm::AsmDialect::from_generic(dia),
             );
-            let call = bx.call(v, inputs, None);
+            let call = bx.call(fty, v, inputs, None);
 
             // Store mark in a metadata node so we can map LLVM errors
             // back to source locations.  See #17552.
@@ -608,6 +616,10 @@
             InlineAsmRegClass::PowerPC(PowerPCInlineAsmRegClass::reg) => "r",
             InlineAsmRegClass::PowerPC(PowerPCInlineAsmRegClass::reg_nonzero) => "b",
             InlineAsmRegClass::PowerPC(PowerPCInlineAsmRegClass::freg) => "f",
+            InlineAsmRegClass::PowerPC(PowerPCInlineAsmRegClass::cr)
+            | InlineAsmRegClass::PowerPC(PowerPCInlineAsmRegClass::xer) => {
+                unreachable!("clobber-only")
+            }
             InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::reg) => "r",
             InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::freg) => "f",
             InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::vreg) => {
@@ -626,6 +638,8 @@
             InlineAsmRegClass::Wasm(WasmInlineAsmRegClass::local) => "r",
             InlineAsmRegClass::Bpf(BpfInlineAsmRegClass::reg) => "r",
             InlineAsmRegClass::Bpf(BpfInlineAsmRegClass::wreg) => "w",
+            InlineAsmRegClass::S390x(S390xInlineAsmRegClass::reg) => "r",
+            InlineAsmRegClass::S390x(S390xInlineAsmRegClass::freg) => "f",
             InlineAsmRegClass::SpirV(SpirVInlineAsmRegClass::reg) => {
                 bug!("LLVM backend does not support SPIR-V")
             }
@@ -704,6 +718,7 @@
         }
         InlineAsmRegClass::Wasm(WasmInlineAsmRegClass::local) => None,
         InlineAsmRegClass::Bpf(_) => None,
+        InlineAsmRegClass::S390x(_) => None,
         InlineAsmRegClass::SpirV(SpirVInlineAsmRegClass::reg) => {
             bug!("LLVM backend does not support SPIR-V")
         }
@@ -744,6 +759,10 @@
         InlineAsmRegClass::PowerPC(PowerPCInlineAsmRegClass::reg) => cx.type_i32(),
         InlineAsmRegClass::PowerPC(PowerPCInlineAsmRegClass::reg_nonzero) => cx.type_i32(),
         InlineAsmRegClass::PowerPC(PowerPCInlineAsmRegClass::freg) => cx.type_f64(),
+        InlineAsmRegClass::PowerPC(PowerPCInlineAsmRegClass::cr)
+        | InlineAsmRegClass::PowerPC(PowerPCInlineAsmRegClass::xer) => {
+            unreachable!("clobber-only")
+        }
         InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::reg) => cx.type_i32(),
         InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::freg) => cx.type_f32(),
         InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::vreg) => {
@@ -762,6 +781,8 @@
         InlineAsmRegClass::Wasm(WasmInlineAsmRegClass::local) => cx.type_i32(),
         InlineAsmRegClass::Bpf(BpfInlineAsmRegClass::reg) => cx.type_i64(),
         InlineAsmRegClass::Bpf(BpfInlineAsmRegClass::wreg) => cx.type_i32(),
+        InlineAsmRegClass::S390x(S390xInlineAsmRegClass::reg) => cx.type_i32(),
+        InlineAsmRegClass::S390x(S390xInlineAsmRegClass::freg) => cx.type_f64(),
         InlineAsmRegClass::SpirV(SpirVInlineAsmRegClass::reg) => {
             bug!("LLVM backend does not support SPIR-V")
         }
diff --git a/compiler/rustc_codegen_llvm/src/back/write.rs b/compiler/rustc_codegen_llvm/src/back/write.rs
index 5b4a187..791604a 100644
--- a/compiler/rustc_codegen_llvm/src/back/write.rs
+++ b/compiler/rustc_codegen_llvm/src/back/write.rs
@@ -296,39 +296,8 @@
     }
     let (cgcx, _) = *(user as *const (&CodegenContext<LlvmCodegenBackend>, &Handler));
 
-    // Recover the post-substitution assembly code from LLVM for better
-    // diagnostics.
-    let mut have_source = false;
-    let mut buffer = String::new();
-    let mut level = llvm::DiagnosticLevel::Error;
-    let mut loc = 0;
-    let mut ranges = [0; 8];
-    let mut num_ranges = ranges.len() / 2;
-    let msg = llvm::build_string(|msg| {
-        buffer = llvm::build_string(|buffer| {
-            have_source = llvm::LLVMRustUnpackSMDiagnostic(
-                diag,
-                msg,
-                buffer,
-                &mut level,
-                &mut loc,
-                ranges.as_mut_ptr(),
-                &mut num_ranges,
-            );
-        })
-        .expect("non-UTF8 inline asm");
-    })
-    .expect("non-UTF8 SMDiagnostic");
-
-    let source = have_source.then(|| {
-        let mut spans = vec![InnerSpan::new(loc as usize, loc as usize)];
-        for i in 0..num_ranges {
-            spans.push(InnerSpan::new(ranges[i * 2] as usize, ranges[i * 2 + 1] as usize));
-        }
-        (buffer, spans)
-    });
-
-    report_inline_asm(cgcx, msg, level, cookie, source);
+    let smdiag = llvm::diagnostic::SrcMgrDiagnostic::unpack(diag);
+    report_inline_asm(cgcx, smdiag.message, smdiag.level, cookie, smdiag.source);
 }
 
 unsafe extern "C" fn diagnostic_handler(info: &DiagnosticInfo, user: *mut c_void) {
@@ -339,13 +308,7 @@
 
     match llvm::diagnostic::Diagnostic::unpack(info) {
         llvm::diagnostic::InlineAsm(inline) => {
-            report_inline_asm(
-                cgcx,
-                llvm::twine_to_string(inline.message),
-                inline.level,
-                inline.cookie,
-                None,
-            );
+            report_inline_asm(cgcx, inline.message, inline.level, inline.cookie, inline.source);
         }
 
         llvm::diagnostic::Optimization(opt) => {
diff --git a/compiler/rustc_codegen_llvm/src/base.rs b/compiler/rustc_codegen_llvm/src/base.rs
index cc3cbea..a6bdbd1 100644
--- a/compiler/rustc_codegen_llvm/src/base.rs
+++ b/compiler/rustc_codegen_llvm/src/base.rs
@@ -157,16 +157,18 @@
             }
 
             // Finalize code coverage by injecting the coverage map. Note, the coverage map will
-            // also be added to the `llvm.used` variable, created next.
+            // also be added to the `llvm.compiler.used` variable, created next.
             if cx.sess().instrument_coverage() {
                 cx.coverageinfo_finalize();
             }
 
-            // Create the llvm.used variable
-            // This variable has type [N x i8*] and is stored in the llvm.metadata section
+            // Create the llvm.used and llvm.compiler.used variables.
             if !cx.used_statics().borrow().is_empty() {
                 cx.create_used_variable()
             }
+            if !cx.compiler_used_statics().borrow().is_empty() {
+                cx.create_compiler_used_variable()
+            }
 
             // Finalize debuginfo
             if cx.sess().opts.debuginfo != DebugInfo::None {
diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs
index 5675a5d..47529f7 100644
--- a/compiler/rustc_codegen_llvm/src/builder.rs
+++ b/compiler/rustc_codegen_llvm/src/builder.rs
@@ -88,7 +88,7 @@
     }
 }
 
-impl abi::LayoutOf for Builder<'_, '_, 'tcx> {
+impl abi::LayoutOf<'tcx> for Builder<'_, '_, 'tcx> {
     type Ty = Ty<'tcx>;
     type TyAndLayout = TyAndLayout<'tcx>;
 
@@ -200,6 +200,7 @@
 
     fn invoke(
         &mut self,
+        llty: &'ll Type,
         llfn: &'ll Value,
         args: &[&'ll Value],
         then: &'ll BasicBlock,
@@ -208,13 +209,14 @@
     ) -> &'ll Value {
         debug!("invoke {:?} with args ({:?})", llfn, args);
 
-        let args = self.check_call("invoke", llfn, args);
+        let args = self.check_call("invoke", llty, llfn, args);
         let bundle = funclet.map(|funclet| funclet.bundle());
         let bundle = bundle.as_ref().map(|b| &*b.raw);
 
         unsafe {
             llvm::LLVMRustBuildInvoke(
                 self.llbuilder,
+                llty,
                 llfn,
                 args.as_ptr(),
                 args.len() as c_uint,
@@ -369,8 +371,7 @@
             },
         };
 
-        let intrinsic = self.get_intrinsic(&name);
-        let res = self.call(intrinsic, &[lhs, rhs], None);
+        let res = self.call_intrinsic(name, &[lhs, rhs]);
         (self.extract_value(res, 0), self.extract_value(res, 1))
     }
 
@@ -461,7 +462,6 @@
             load: &'ll Value,
             scalar: &abi::Scalar,
         ) {
-            let vr = scalar.valid_range.clone();
             match scalar.value {
                 abi::Int(..) => {
                     let range = scalar.valid_range_exclusive(bx);
@@ -469,7 +469,7 @@
                         bx.range_metadata(load, range);
                     }
                 }
-                abi::Pointer if vr.start() < vr.end() && !vr.contains(&0) => {
+                abi::Pointer if !scalar.valid_range.contains_zero() => {
                     bx.nonnull_metadata(load);
                 }
                 _ => {}
@@ -497,9 +497,10 @@
             OperandValue::Immediate(self.to_immediate(llval, place.layout))
         } else if let abi::Abi::ScalarPair(ref a, ref b) = place.layout.abi {
             let b_offset = a.value.size(self).align_to(b.value.align(self).abi);
+            let pair_ty = place.layout.llvm_type(self);
 
             let mut load = |i, scalar: &abi::Scalar, align| {
-                let llptr = self.struct_gep(place.llval, i as u64);
+                let llptr = self.struct_gep(pair_ty, place.llval, i as u64);
                 let llty = place.layout.scalar_pair_element_llvm_type(self, i, false);
                 let load = self.load(llty, llptr, align);
                 scalar_load_metadata(self, load, scalar);
@@ -543,7 +544,11 @@
             .val
             .store(&mut body_bx, PlaceRef::new_sized_aligned(current, cg_elem.layout, align));
 
-        let next = body_bx.inbounds_gep(current, &[self.const_usize(1)]);
+        let next = body_bx.inbounds_gep(
+            self.backend_type(cg_elem.layout),
+            current,
+            &[self.const_usize(1)],
+        );
         body_bx.br(header_bx.llbb());
         header_bx.add_incoming_to_phi(current, next, body_bx.llbb());
 
@@ -552,7 +557,7 @@
 
     fn range_metadata(&mut self, load: &'ll Value, range: Range<u128>) {
         if self.sess().target.arch == "amdgpu" {
-            // amdgpu/LLVM does something weird and thinks a i64 value is
+            // amdgpu/LLVM does something weird and thinks an i64 value is
             // split into a v2i32, halving the bitwidth LLVM expects,
             // tripping an assertion. So, for now, just disable this
             // optimization.
@@ -639,10 +644,11 @@
         }
     }
 
-    fn gep(&mut self, ptr: &'ll Value, indices: &[&'ll Value]) -> &'ll Value {
+    fn gep(&mut self, ty: &'ll Type, ptr: &'ll Value, indices: &[&'ll Value]) -> &'ll Value {
         unsafe {
-            llvm::LLVMBuildGEP(
+            llvm::LLVMBuildGEP2(
                 self.llbuilder,
+                ty,
                 ptr,
                 indices.as_ptr(),
                 indices.len() as c_uint,
@@ -651,10 +657,16 @@
         }
     }
 
-    fn inbounds_gep(&mut self, ptr: &'ll Value, indices: &[&'ll Value]) -> &'ll Value {
+    fn inbounds_gep(
+        &mut self,
+        ty: &'ll Type,
+        ptr: &'ll Value,
+        indices: &[&'ll Value],
+    ) -> &'ll Value {
         unsafe {
-            llvm::LLVMBuildInBoundsGEP(
+            llvm::LLVMBuildInBoundsGEP2(
                 self.llbuilder,
+                ty,
                 ptr,
                 indices.as_ptr(),
                 indices.len() as c_uint,
@@ -663,9 +675,9 @@
         }
     }
 
-    fn struct_gep(&mut self, ptr: &'ll Value, idx: u64) -> &'ll Value {
+    fn struct_gep(&mut self, ty: &'ll Type, ptr: &'ll Value, idx: u64) -> &'ll Value {
         assert_eq!(idx as c_uint as u64, idx);
-        unsafe { llvm::LLVMBuildStructGEP(self.llbuilder, ptr, idx as c_uint, UNNAMED) }
+        unsafe { llvm::LLVMBuildStructGEP2(self.llbuilder, ty, ptr, idx as c_uint, UNNAMED) }
     }
 
     /* Casts */
@@ -683,8 +695,7 @@
             let float_width = self.cx.float_width(src_ty);
             let int_width = self.cx.int_width(dest_ty);
             let name = format!("llvm.fptoui.sat.i{}.f{}", int_width, float_width);
-            let intrinsic = self.get_intrinsic(&name);
-            return Some(self.call(intrinsic, &[val], None));
+            return Some(self.call_intrinsic(&name, &[val]));
         }
 
         None
@@ -696,8 +707,7 @@
             let float_width = self.cx.float_width(src_ty);
             let int_width = self.cx.int_width(dest_ty);
             let name = format!("llvm.fptosi.sat.i{}.f{}", int_width, float_width);
-            let intrinsic = self.get_intrinsic(&name);
-            return Some(self.call(intrinsic, &[val], None));
+            return Some(self.call_intrinsic(&name, &[val]));
         }
 
         None
@@ -731,8 +741,7 @@
                     _ => None,
                 };
                 if let Some(name) = name {
-                    let intrinsic = self.get_intrinsic(name);
-                    return self.call(intrinsic, &[val], None);
+                    return self.call_intrinsic(name, &[val]);
                 }
             }
         }
@@ -754,8 +763,7 @@
                     _ => None,
                 };
                 if let Some(name) = name {
-                    let intrinsic = self.get_intrinsic(name);
-                    return self.call(intrinsic, &[val], None);
+                    return self.call_intrinsic(name, &[val]);
                 }
             }
         }
@@ -1103,12 +1111,17 @@
         );
 
         let llfn = unsafe { llvm::LLVMRustGetInstrProfIncrementIntrinsic(self.cx().llmod) };
+        let llty = self.cx.type_func(
+            &[self.cx.type_i8p(), self.cx.type_i64(), self.cx.type_i32(), self.cx.type_i32()],
+            self.cx.type_void(),
+        );
         let args = &[fn_name, hash, num_counters, index];
-        let args = self.check_call("call", llfn, args);
+        let args = self.check_call("call", llty, llfn, args);
 
         unsafe {
             let _ = llvm::LLVMRustBuildCall(
                 self.llbuilder,
+                llty,
                 llfn,
                 args.as_ptr() as *const &llvm::Value,
                 args.len() as c_uint,
@@ -1119,19 +1132,21 @@
 
     fn call(
         &mut self,
+        llty: &'ll Type,
         llfn: &'ll Value,
         args: &[&'ll Value],
         funclet: Option<&Funclet<'ll>>,
     ) -> &'ll Value {
         debug!("call {:?} with args ({:?})", llfn, args);
 
-        let args = self.check_call("call", llfn, args);
+        let args = self.check_call("call", llty, llfn, args);
         let bundle = funclet.map(|funclet| funclet.bundle());
         let bundle = bundle.as_ref().map(|b| &*b.raw);
 
         unsafe {
             llvm::LLVMRustBuildCall(
                 self.llbuilder,
+                llty,
                 llfn,
                 args.as_ptr() as *const &llvm::Value,
                 args.len() as c_uint,
@@ -1301,15 +1316,10 @@
     fn check_call<'b>(
         &mut self,
         typ: &str,
+        fn_ty: &'ll Type,
         llfn: &'ll Value,
         args: &'b [&'ll Value],
     ) -> Cow<'b, [&'ll Value]> {
-        let mut fn_ty = self.cx.val_ty(llfn);
-        // Strip off pointers
-        while self.cx.type_kind(fn_ty) == TypeKind::Pointer {
-            fn_ty = self.cx.element_type(fn_ty);
-        }
-
         assert!(
             self.cx.type_kind(fn_ty) == TypeKind::Function,
             "builder::{} not passed a function, but {:?}",
@@ -1350,6 +1360,11 @@
         unsafe { llvm::LLVMBuildVAArg(self.llbuilder, list, ty, UNNAMED) }
     }
 
+    crate fn call_intrinsic(&mut self, intrinsic: &str, args: &[&'ll Value]) -> &'ll Value {
+        let (ty, f) = self.cx.get_intrinsic(intrinsic);
+        self.call(ty, f, args, None)
+    }
+
     fn call_lifetime_intrinsic(&mut self, intrinsic: &str, ptr: &'ll Value, size: Size) {
         let size = size.bytes();
         if size == 0 {
@@ -1360,10 +1375,8 @@
             return;
         }
 
-        let lifetime_intrinsic = self.cx.get_intrinsic(intrinsic);
-
         let ptr = self.pointercast(ptr, self.cx.type_i8p());
-        self.call(lifetime_intrinsic, &[self.cx.const_u64(size), ptr], None);
+        self.call_intrinsic(intrinsic, &[self.cx.const_u64(size), ptr]);
     }
 
     pub(crate) fn phi(
diff --git a/compiler/rustc_codegen_llvm/src/common.rs b/compiler/rustc_codegen_llvm/src/common.rs
index 35e7262..5532f53 100644
--- a/compiler/rustc_codegen_llvm/src/common.rs
+++ b/compiler/rustc_codegen_llvm/src/common.rs
@@ -268,7 +268,8 @@
                     }
                 };
                 let llval = unsafe {
-                    llvm::LLVMConstInBoundsGEP(
+                    llvm::LLVMRustConstInBoundsGEP2(
+                        self.type_i8(),
                         self.const_bitcast(base_addr, self.type_i8p_ext(base_addr_space)),
                         &self.const_usize(offset.bytes()),
                         1,
@@ -303,7 +304,8 @@
             let base_addr = self.static_addr_of(init, alloc.align, None);
 
             let llval = unsafe {
-                llvm::LLVMConstInBoundsGEP(
+                llvm::LLVMRustConstInBoundsGEP2(
+                    self.type_i8(),
                     self.const_bitcast(base_addr, self.type_i8p()),
                     &self.const_usize(offset.bytes()),
                     1,
diff --git a/compiler/rustc_codegen_llvm/src/consts.rs b/compiler/rustc_codegen_llvm/src/consts.rs
index 3ca295f..a4e4fc4 100644
--- a/compiler/rustc_codegen_llvm/src/consts.rs
+++ b/compiler/rustc_codegen_llvm/src/consts.rs
@@ -11,12 +11,16 @@
 use rustc_hir::def_id::DefId;
 use rustc_middle::middle::codegen_fn_attrs::{CodegenFnAttrFlags, CodegenFnAttrs};
 use rustc_middle::mir::interpret::{
-    read_target_uint, Allocation, ErrorHandled, GlobalAlloc, Pointer, Scalar as InterpScalar,
+    read_target_uint, Allocation, ErrorHandled, GlobalAlloc, InitChunk, Pointer,
+    Scalar as InterpScalar,
 };
 use rustc_middle::mir::mono::MonoItem;
 use rustc_middle::ty::{self, Instance, Ty};
 use rustc_middle::{bug, span_bug};
-use rustc_target::abi::{AddressSpace, Align, HasDataLayout, LayoutOf, Primitive, Scalar, Size};
+use rustc_target::abi::{
+    AddressSpace, Align, HasDataLayout, LayoutOf, Primitive, Scalar, Size, WrappingRange,
+};
+use std::ops::Range;
 use tracing::debug;
 
 pub fn const_alloc_to_llvm(cx: &CodegenCx<'ll, '_>, alloc: &Allocation) -> &'ll Value {
@@ -24,6 +28,57 @@
     let dl = cx.data_layout();
     let pointer_size = dl.pointer_size.bytes() as usize;
 
+    // Note: this function may call `inspect_with_uninit_and_ptr_outside_interpreter`,
+    // so `range` must be within the bounds of `alloc` and not contain or overlap a relocation.
+    fn append_chunks_of_init_and_uninit_bytes<'ll, 'a, 'b>(
+        llvals: &mut Vec<&'ll Value>,
+        cx: &'a CodegenCx<'ll, 'b>,
+        alloc: &'a Allocation,
+        range: Range<usize>,
+    ) {
+        let mut chunks = alloc
+            .init_mask()
+            .range_as_init_chunks(Size::from_bytes(range.start), Size::from_bytes(range.end));
+
+        let chunk_to_llval = move |chunk| match chunk {
+            InitChunk::Init(range) => {
+                let range = (range.start.bytes() as usize)..(range.end.bytes() as usize);
+                let bytes = alloc.inspect_with_uninit_and_ptr_outside_interpreter(range);
+                cx.const_bytes(bytes)
+            }
+            InitChunk::Uninit(range) => {
+                let len = range.end.bytes() - range.start.bytes();
+                cx.const_undef(cx.type_array(cx.type_i8(), len))
+            }
+        };
+
+        // Generating partially-uninit consts inhibits optimizations, so it is disabled by default.
+        // See https://github.com/rust-lang/rust/issues/84565.
+        let allow_partially_uninit =
+            match cx.sess().opts.debugging_opts.partially_uninit_const_threshold {
+                Some(max) => range.len() <= max,
+                None => false,
+            };
+
+        if allow_partially_uninit {
+            llvals.extend(chunks.map(chunk_to_llval));
+        } else {
+            let llval = match (chunks.next(), chunks.next()) {
+                (Some(chunk), None) => {
+                    // exactly one chunk, either fully init or fully uninit
+                    chunk_to_llval(chunk)
+                }
+                _ => {
+                    // partially uninit, codegen as if it was initialized
+                    // (using some arbitrary value for uninit bytes)
+                    let bytes = alloc.inspect_with_uninit_and_ptr_outside_interpreter(range);
+                    cx.const_bytes(bytes)
+                }
+            };
+            llvals.push(llval);
+        }
+    }
+
     let mut next_offset = 0;
     for &(offset, alloc_id) in alloc.relocations().iter() {
         let offset = offset.bytes();
@@ -32,12 +87,8 @@
         if offset > next_offset {
             // This `inspect` is okay since we have checked that it is not within a relocation, it
             // is within the bounds of the allocation, and it doesn't affect interpreter execution
-            // (we inspect the result after interpreter execution). Any undef byte is replaced with
-            // some arbitrary byte value.
-            //
-            // FIXME: relay undef bytes to codegen as undef const bytes
-            let bytes = alloc.inspect_with_uninit_and_ptr_outside_interpreter(next_offset..offset);
-            llvals.push(cx.const_bytes(bytes));
+            // (we inspect the result after interpreter execution).
+            append_chunks_of_init_and_uninit_bytes(&mut llvals, cx, alloc, next_offset..offset);
         }
         let ptr_offset = read_target_uint(
             dl.endian,
@@ -59,7 +110,7 @@
                 Pointer::new(alloc_id, Size::from_bytes(ptr_offset)),
                 &cx.tcx,
             ),
-            &Scalar { value: Primitive::Pointer, valid_range: 0..=!0 },
+            &Scalar { value: Primitive::Pointer, valid_range: WrappingRange { start: 0, end: !0 } },
             cx.type_i8p_ext(address_space),
         ));
         next_offset = offset + pointer_size;
@@ -68,12 +119,8 @@
         let range = next_offset..alloc.len();
         // This `inspect` is okay since we have check that it is after all relocations, it is
         // within the bounds of the allocation, and it doesn't affect interpreter execution (we
-        // inspect the result after interpreter execution). Any undef byte is replaced with some
-        // arbitrary byte value.
-        //
-        // FIXME: relay undef bytes to codegen as undef const bytes
-        let bytes = alloc.inspect_with_uninit_and_ptr_outside_interpreter(range);
-        llvals.push(cx.const_bytes(bytes));
+        // inspect the result after interpreter execution).
+        append_chunks_of_init_and_uninit_bytes(&mut llvals, cx, alloc, range);
     }
 
     cx.const_struct(&llvals, true)
@@ -474,7 +521,13 @@
             }
 
             if attrs.flags.contains(CodegenFnAttrFlags::USED) {
-                self.add_used_global(g);
+                // The semantics of #[used] in Rust only require the symbol to make it into the
+                // object file. It is explicitly allowed for the linker to strip the symbol if it
+                // is dead. As such, use llvm.compiler.used instead of llvm.used.
+                // Additionally, https://reviews.llvm.org/D97448 in LLVM 13 started emitting unique
+                // sections with SHF_GNU_RETAIN flag for llvm.used symbols, which may trigger bugs
+                // in some versions of the gold linker.
+                self.add_compiler_used_global(g);
             }
         }
     }
@@ -484,4 +537,11 @@
         let cast = unsafe { llvm::LLVMConstPointerCast(global, self.type_i8p()) };
         self.used_statics.borrow_mut().push(cast);
     }
+
+    /// Add a global value to a list to be stored in the `llvm.compiler.used` variable,
+    /// an array of i8*.
+    fn add_compiler_used_global(&self, global: &'ll Value) {
+        let cast = unsafe { llvm::LLVMConstPointerCast(global, self.type_i8p()) };
+        self.compiler_used_statics.borrow_mut().push(cast);
+    }
 }
diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs
index 5925985..45da18d 100644
--- a/compiler/rustc_codegen_llvm/src/context.rs
+++ b/compiler/rustc_codegen_llvm/src/context.rs
@@ -24,6 +24,7 @@
 use rustc_span::symbol::Symbol;
 use rustc_target::abi::{HasDataLayout, LayoutOf, PointeeInfo, Size, TargetDataLayout, VariantIdx};
 use rustc_target::spec::{HasTargetSpec, RelocModel, Target, TlsModel};
+use smallvec::SmallVec;
 
 use std::cell::{Cell, RefCell};
 use std::ffi::CStr;
@@ -74,8 +75,16 @@
     /// See <https://llvm.org/docs/LangRef.html#the-llvm-used-global-variable> for details
     pub used_statics: RefCell<Vec<&'ll Value>>,
 
-    pub lltypes: RefCell<FxHashMap<(Ty<'tcx>, Option<VariantIdx>), &'ll Type>>,
+    /// Statics that will be placed in the llvm.compiler.used variable
+    /// See <https://llvm.org/docs/LangRef.html#the-llvm-compiler-used-global-variable> for details
+    pub compiler_used_statics: RefCell<Vec<&'ll Value>>,
+
+    /// Mapping of non-scalar types to llvm types and field remapping if needed.
+    pub type_lowering: RefCell<FxHashMap<(Ty<'tcx>, Option<VariantIdx>), TypeLowering<'ll>>>,
+
+    /// Mapping of scalar types to llvm types.
     pub scalar_lltypes: RefCell<FxHashMap<Ty<'tcx>, &'ll Type>>,
+
     pub pointee_infos: RefCell<FxHashMap<(Ty<'tcx>, Size), Option<PointeeInfo>>>,
     pub isize_ty: &'ll Type,
 
@@ -84,14 +93,23 @@
 
     eh_personality: Cell<Option<&'ll Value>>,
     eh_catch_typeinfo: Cell<Option<&'ll Value>>,
-    pub rust_try_fn: Cell<Option<&'ll Value>>,
+    pub rust_try_fn: Cell<Option<(&'ll Type, &'ll Value)>>,
 
-    intrinsics: RefCell<FxHashMap<&'static str, &'ll Value>>,
+    intrinsics: RefCell<FxHashMap<&'static str, (&'ll Type, &'ll Value)>>,
 
     /// A counter that is used for generating local symbol names
     local_gen_sym_counter: Cell<usize>,
 }
 
+pub struct TypeLowering<'ll> {
+    /// Associated LLVM type
+    pub lltype: &'ll Type,
+
+    /// If padding is used the slice maps fields from source order
+    /// to llvm order.
+    pub field_remapping: Option<SmallVec<[u32; 4]>>,
+}
+
 fn to_llvm_tls_model(tls_model: TlsModel) -> llvm::ThreadLocalMode {
     match tls_model {
         TlsModel::GeneralDynamic => llvm::ThreadLocalMode::GeneralDynamic,
@@ -101,10 +119,6 @@
     }
 }
 
-fn strip_powerpc64_vectors(data_layout: String) -> String {
-    data_layout.replace("-v256:256:256-v512:512:512", "")
-}
-
 pub unsafe fn create_module(
     tcx: TyCtxt<'_>,
     llcx: &'ll llvm::Context,
@@ -116,7 +130,18 @@
 
     let mut target_data_layout = sess.target.data_layout.clone();
     if llvm_util::get_version() < (12, 0, 0) && sess.target.arch == "powerpc64" {
-        target_data_layout = strip_powerpc64_vectors(target_data_layout);
+        target_data_layout = target_data_layout.replace("-v256:256:256-v512:512:512", "");
+    }
+    if llvm_util::get_version() < (13, 0, 0) {
+        if sess.target.arch == "powerpc64" {
+            target_data_layout = target_data_layout.replace("-S128", "");
+        }
+        if sess.target.arch == "wasm32" {
+            target_data_layout = "e-m:e-p:32:32-i64:64-n32:64-S128".to_string();
+        }
+        if sess.target.arch == "wasm64" {
+            target_data_layout = "e-m:e-p:64:64-i64:64-n32:64-S128".to_string();
+        }
     }
 
     // Ensure the data-layout values hardcoded remain the defaults.
@@ -304,7 +329,8 @@
             const_globals: Default::default(),
             statics_to_rauw: RefCell::new(Vec::new()),
             used_statics: RefCell::new(Vec::new()),
-            lltypes: Default::default(),
+            compiler_used_statics: RefCell::new(Vec::new()),
+            type_lowering: Default::default(),
             scalar_lltypes: Default::default(),
             pointee_infos: Default::default(),
             isize_ty,
@@ -326,6 +352,18 @@
     pub fn coverage_context(&'a self) -> Option<&'a coverageinfo::CrateCoverageContext<'ll, 'tcx>> {
         self.coverage_cx.as_ref()
     }
+
+    fn create_used_variable_impl(&self, name: &'static CStr, values: &[&'ll Value]) {
+        let section = cstr!("llvm.metadata");
+        let array = self.const_array(&self.type_ptr_to(self.type_i8()), values);
+
+        unsafe {
+            let g = llvm::LLVMAddGlobal(self.llmod, self.val_ty(array), name.as_ptr());
+            llvm::LLVMSetInitializer(g, array);
+            llvm::LLVMRustSetLinkage(g, llvm::Linkage::AppendingLinkage);
+            llvm::LLVMSetSection(g, section.as_ptr());
+        }
+    }
 }
 
 impl MiscMethods<'tcx> for CodegenCx<'ll, 'tcx> {
@@ -416,6 +454,10 @@
         &self.used_statics
     }
 
+    fn compiler_used_statics(&self) -> &RefCell<Vec<&'ll Value>> {
+        &self.compiler_used_statics
+    }
+
     fn set_frame_pointer_type(&self, llfn: &'ll Value) {
         attributes::set_frame_pointer_type(self, llfn)
     }
@@ -426,17 +468,14 @@
     }
 
     fn create_used_variable(&self) {
-        let name = cstr!("llvm.used");
-        let section = cstr!("llvm.metadata");
-        let array =
-            self.const_array(&self.type_ptr_to(self.type_i8()), &*self.used_statics.borrow());
+        self.create_used_variable_impl(cstr!("llvm.used"), &*self.used_statics.borrow());
+    }
 
-        unsafe {
-            let g = llvm::LLVMAddGlobal(self.llmod, self.val_ty(array), name.as_ptr());
-            llvm::LLVMSetInitializer(g, array);
-            llvm::LLVMRustSetLinkage(g, llvm::Linkage::AppendingLinkage);
-            llvm::LLVMSetSection(g, section.as_ptr());
-        }
+    fn create_compiler_used_variable(&self) {
+        self.create_used_variable_impl(
+            cstr!("llvm.compiler.used"),
+            &*self.compiler_used_statics.borrow(),
+        );
     }
 
     fn declare_c_main(&self, fn_type: Self::Type) -> Option<Self::Function> {
@@ -452,7 +491,7 @@
 }
 
 impl CodegenCx<'b, 'tcx> {
-    crate fn get_intrinsic(&self, key: &str) -> &'b Value {
+    crate fn get_intrinsic(&self, key: &str) -> (&'b Type, &'b Value) {
         if let Some(v) = self.intrinsics.borrow().get(key).cloned() {
             return v;
         }
@@ -465,18 +504,18 @@
         name: &'static str,
         args: Option<&[&'b llvm::Type]>,
         ret: &'b llvm::Type,
-    ) -> &'b llvm::Value {
+    ) -> (&'b llvm::Type, &'b llvm::Value) {
         let fn_ty = if let Some(args) = args {
             self.type_func(args, ret)
         } else {
             self.type_variadic_func(&[], ret)
         };
         let f = self.declare_cfn(name, llvm::UnnamedAddr::No, fn_ty);
-        self.intrinsics.borrow_mut().insert(name, f);
-        f
+        self.intrinsics.borrow_mut().insert(name, (fn_ty, f));
+        (fn_ty, f)
     }
 
-    fn declare_intrinsic(&self, key: &str) -> Option<&'b Value> {
+    fn declare_intrinsic(&self, key: &str) -> Option<(&'b Type, &'b Value)> {
         macro_rules! ifn {
             ($name:expr, fn() -> $ret:expr) => (
                 if key == $name {
@@ -796,7 +835,7 @@
     }
 }
 
-impl LayoutOf for CodegenCx<'ll, 'tcx> {
+impl LayoutOf<'tcx> for CodegenCx<'ll, 'tcx> {
     type Ty = Ty<'tcx>;
     type TyAndLayout = TyAndLayout<'tcx>;
 
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs b/compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs
index de3f719..c33d35c 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs
@@ -15,12 +15,11 @@
 /// .debug_gdb_scripts global is referenced, so it isn't removed by the linker.
 pub fn insert_reference_to_gdb_debug_scripts_section_global(bx: &mut Builder<'_, '_, '_>) {
     if needs_gdb_debug_scripts_section(bx) {
-        let gdb_debug_scripts_section = get_or_insert_gdb_debug_scripts_section_global(bx);
+        let gdb_debug_scripts_section =
+            bx.const_bitcast(get_or_insert_gdb_debug_scripts_section_global(bx), bx.type_i8p());
         // Load just the first byte as that's all that's necessary to force
         // LLVM to keep around the reference to the global.
-        let indices = [bx.const_i32(0), bx.const_i32(0)];
-        let element = bx.inbounds_gep(gdb_debug_scripts_section, &indices);
-        let volative_load_instruction = bx.volatile_load(bx.type_i8(), element);
+        let volative_load_instruction = bx.volatile_load(bx.type_i8(), gdb_debug_scripts_section);
         unsafe {
             llvm::LLVMSetAlignment(volative_load_instruction, 1);
         }
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
index 7e136c1..346c51c 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
@@ -35,6 +35,7 @@
 use rustc_middle::{bug, span_bug};
 use rustc_session::config::{self, DebugInfo};
 use rustc_span::symbol::{Interner, Symbol};
+use rustc_span::FileNameDisplayPreference;
 use rustc_span::{self, SourceFile, SourceFileHash, Span};
 use rustc_target::abi::{Abi, Align, HasDataLayout, Integer, LayoutOf, TagEncoding};
 use rustc_target::abi::{Int, Pointer, F32, F64};
@@ -771,7 +772,13 @@
     let hash = Some(&source_file.src_hash);
     let file_name = Some(source_file.name.prefer_remapped().to_string());
     let directory = if source_file.is_real_file() && !source_file.is_imported() {
-        Some(cx.sess().working_dir.to_string_lossy(false).to_string())
+        Some(
+            cx.sess()
+                .opts
+                .working_dir
+                .to_string_lossy(FileNameDisplayPreference::Remapped)
+                .to_string(),
+        )
     } else {
         // If the path comes from an upstream crate we assume it has been made
         // independent of the compiler's working directory one way or another.
@@ -999,7 +1006,7 @@
     let producer = format!("clang LLVM ({})", rustc_producer);
 
     let name_in_debuginfo = name_in_debuginfo.to_string_lossy();
-    let work_dir = tcx.sess.working_dir.to_string_lossy(false);
+    let work_dir = tcx.sess.opts.working_dir.to_string_lossy(FileNameDisplayPreference::Remapped);
     let flags = "\0";
     let output_filenames = tcx.output_filenames(());
     let out_dir = &output_filenames.out_directory;
@@ -1280,6 +1287,31 @@
 // Tuples
 //=-----------------------------------------------------------------------------
 
+/// Returns names of captured upvars for closures and generators.
+///
+/// Here are some examples:
+///  - `name__field1__field2` when the upvar is captured by value.
+///  - `_ref__name__field` when the upvar is captured by reference.
+fn closure_saved_names_of_captured_variables(tcx: TyCtxt<'tcx>, def_id: DefId) -> Vec<String> {
+    let body = tcx.optimized_mir(def_id);
+
+    body.var_debug_info
+        .iter()
+        .filter_map(|var| {
+            let is_ref = match var.value {
+                mir::VarDebugInfoContents::Place(place) if place.local == mir::Local::new(1) => {
+                    // The projection is either `[.., Field, Deref]` or `[.., Field]`. It
+                    // implies whether the variable is captured by value or by reference.
+                    matches!(place.projection.last().unwrap(), mir::ProjectionElem::Deref)
+                }
+                _ => return None,
+            };
+            let prefix = if is_ref { "_ref__" } else { "" };
+            Some(prefix.to_owned() + &var.name.as_str())
+        })
+        .collect::<Vec<_>>()
+}
+
 /// Creates `MemberDescription`s for the fields of a tuple.
 struct TupleMemberDescriptionFactory<'tcx> {
     ty: Ty<'tcx>,
@@ -1289,14 +1321,25 @@
 
 impl<'tcx> TupleMemberDescriptionFactory<'tcx> {
     fn create_member_descriptions(&self, cx: &CodegenCx<'ll, 'tcx>) -> Vec<MemberDescription<'ll>> {
+        let mut capture_names = match *self.ty.kind() {
+            ty::Generator(def_id, ..) | ty::Closure(def_id, ..) => {
+                Some(closure_saved_names_of_captured_variables(cx.tcx, def_id).into_iter())
+            }
+            _ => None,
+        };
         let layout = cx.layout_of(self.ty);
         self.component_types
             .iter()
             .enumerate()
             .map(|(i, &component_type)| {
                 let (size, align) = cx.size_and_align_of(component_type);
+                let name = if let Some(names) = capture_names.as_mut() {
+                    names.next().unwrap()
+                } else {
+                    format!("__{}", i)
+                };
                 MemberDescription {
-                    name: format!("__{}", i),
+                    name,
                     type_metadata: type_metadata(cx, component_type, self.span),
                     offset: layout.fields.offset(i),
                     size,
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs b/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs
index 8375d4c..914376d 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs
@@ -499,7 +499,7 @@
                         ty::Adt(def, ..) if !def.is_box() => {
                             // Again, only create type information if full debuginfo is enabled
                             if cx.sess().opts.debuginfo == DebugInfo::Full
-                                && !impl_self_ty.needs_subst()
+                                && !impl_self_ty.definitely_needs_subst(cx.tcx)
                             {
                                 Some(type_metadata(cx, impl_self_ty, rustc_span::DUMMY_SP))
                             } else {
diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs
index a48a694..e30c492 100644
--- a/compiler/rustc_codegen_llvm/src/intrinsic.rs
+++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs
@@ -1,4 +1,4 @@
-use crate::abi::{Abi, FnAbi, LlvmType, PassMode};
+use crate::abi::{Abi, FnAbi, FnAbiLlvmExt, LlvmType, PassMode};
 use crate::builder::Builder;
 use crate::context::CodegenCx;
 use crate::llvm;
@@ -7,6 +7,7 @@
 use crate::va_arg::emit_va_arg;
 use crate::value::Value;
 
+use rustc_ast as ast;
 use rustc_codegen_ssa::base::{compare_simd_types, wants_msvc_seh};
 use rustc_codegen_ssa::common::span_invalid_monomorphization_error;
 use rustc_codegen_ssa::common::{IntPredicate, TypeKind};
@@ -24,7 +25,7 @@
 use std::cmp::Ordering;
 use std::iter;
 
-fn get_simple_intrinsic(cx: &CodegenCx<'ll, '_>, name: Symbol) -> Option<&'ll Value> {
+fn get_simple_intrinsic(cx: &CodegenCx<'ll, '_>, name: Symbol) -> Option<(&'ll Type, &'ll Value)> {
     let llvm_name = match name {
         sym::sqrtf32 => "llvm.sqrt.f32",
         sym::sqrtf64 => "llvm.sqrt.f64",
@@ -102,19 +103,20 @@
 
         let simple = get_simple_intrinsic(self, name);
         let llval = match name {
-            _ if simple.is_some() => self.call(
-                simple.unwrap(),
-                &args.iter().map(|arg| arg.immediate()).collect::<Vec<_>>(),
-                None,
-            ),
+            _ if simple.is_some() => {
+                let (simple_ty, simple_fn) = simple.unwrap();
+                self.call(
+                    simple_ty,
+                    simple_fn,
+                    &args.iter().map(|arg| arg.immediate()).collect::<Vec<_>>(),
+                    None,
+                )
+            }
             sym::likely => {
-                let expect = self.get_intrinsic(&("llvm.expect.i1"));
-                self.call(expect, &[args[0].immediate(), self.const_bool(true)], None)
+                self.call_intrinsic("llvm.expect.i1", &[args[0].immediate(), self.const_bool(true)])
             }
-            sym::unlikely => {
-                let expect = self.get_intrinsic(&("llvm.expect.i1"));
-                self.call(expect, &[args[0].immediate(), self.const_bool(false)], None)
-            }
+            sym::unlikely => self
+                .call_intrinsic("llvm.expect.i1", &[args[0].immediate(), self.const_bool(false)]),
             kw::Try => {
                 try_intrinsic(
                     self,
@@ -125,13 +127,9 @@
                 );
                 return;
             }
-            sym::breakpoint => {
-                let llfn = self.get_intrinsic(&("llvm.debugtrap"));
-                self.call(llfn, &[], None)
-            }
+            sym::breakpoint => self.call_intrinsic("llvm.debugtrap", &[]),
             sym::va_copy => {
-                let intrinsic = self.cx().get_intrinsic(&("llvm.va_copy"));
-                self.call(intrinsic, &[args[0].immediate(), args[1].immediate()], None)
+                self.call_intrinsic("llvm.va_copy", &[args[0].immediate(), args[1].immediate()])
             }
             sym::va_arg => {
                 match fn_abi.ret.layout.abi {
@@ -139,9 +137,9 @@
                         match scalar.value {
                             Primitive::Int(..) => {
                                 if self.cx().size_of(ret_ty).bytes() < 4 {
-                                    // `va_arg` should not be called on a integer type
+                                    // `va_arg` should not be called on an integer type
                                     // less than 4 bytes in length. If it is, promote
-                                    // the integer to a `i32` and truncate the result
+                                    // the integer to an `i32` and truncate the result
                                     // back to the smaller type.
                                     let promoted_result = emit_va_arg(self, args[0], tcx.types.i32);
                                     self.trunc(promoted_result, llret_ty)
@@ -194,7 +192,6 @@
             | sym::prefetch_write_data
             | sym::prefetch_read_instruction
             | sym::prefetch_write_instruction => {
-                let expect = self.get_intrinsic(&("llvm.prefetch"));
                 let (rw, cache_type) = match name {
                     sym::prefetch_read_data => (0, 1),
                     sym::prefetch_write_data => (1, 1),
@@ -202,15 +199,14 @@
                     sym::prefetch_write_instruction => (1, 0),
                     _ => bug!(),
                 };
-                self.call(
-                    expect,
+                self.call_intrinsic(
+                    "llvm.prefetch",
                     &[
                         args[0].immediate(),
                         self.const_i32(rw),
                         args[1].immediate(),
                         self.const_i32(cache_type),
                     ],
-                    None,
                 )
             }
             sym::ctlz
@@ -229,35 +225,33 @@
                     Some((width, signed)) => match name {
                         sym::ctlz | sym::cttz => {
                             let y = self.const_bool(false);
-                            let llfn = self.get_intrinsic(&format!("llvm.{}.i{}", name, width));
-                            self.call(llfn, &[args[0].immediate(), y], None)
+                            self.call_intrinsic(
+                                &format!("llvm.{}.i{}", name, width),
+                                &[args[0].immediate(), y],
+                            )
                         }
                         sym::ctlz_nonzero | sym::cttz_nonzero => {
                             let y = self.const_bool(true);
                             let llvm_name = &format!("llvm.{}.i{}", &name_str[..4], width);
-                            let llfn = self.get_intrinsic(llvm_name);
-                            self.call(llfn, &[args[0].immediate(), y], None)
+                            self.call_intrinsic(llvm_name, &[args[0].immediate(), y])
                         }
-                        sym::ctpop => self.call(
-                            self.get_intrinsic(&format!("llvm.ctpop.i{}", width)),
+                        sym::ctpop => self.call_intrinsic(
+                            &format!("llvm.ctpop.i{}", width),
                             &[args[0].immediate()],
-                            None,
                         ),
                         sym::bswap => {
                             if width == 8 {
                                 args[0].immediate() // byte swap a u8/i8 is just a no-op
                             } else {
-                                self.call(
-                                    self.get_intrinsic(&format!("llvm.bswap.i{}", width)),
+                                self.call_intrinsic(
+                                    &format!("llvm.bswap.i{}", width),
                                     &[args[0].immediate()],
-                                    None,
                                 )
                             }
                         }
-                        sym::bitreverse => self.call(
-                            self.get_intrinsic(&format!("llvm.bitreverse.i{}", width)),
+                        sym::bitreverse => self.call_intrinsic(
+                            &format!("llvm.bitreverse.i{}", width),
                             &[args[0].immediate()],
-                            None,
                         ),
                         sym::rotate_left | sym::rotate_right => {
                             let is_left = name == sym::rotate_left;
@@ -266,8 +260,7 @@
                             // rotate = funnel shift with first two args the same
                             let llvm_name =
                                 &format!("llvm.fsh{}.i{}", if is_left { 'l' } else { 'r' }, width);
-                            let llfn = self.get_intrinsic(llvm_name);
-                            self.call(llfn, &[val, val, raw_shift], None)
+                            self.call_intrinsic(llvm_name, &[val, val, raw_shift])
                         }
                         sym::saturating_add | sym::saturating_sub => {
                             let is_add = name == sym::saturating_add;
@@ -279,8 +272,7 @@
                                 if is_add { "add" } else { "sub" },
                                 width
                             );
-                            let llfn = self.get_intrinsic(llvm_name);
-                            self.call(llfn, &[lhs, rhs], None)
+                            self.call_intrinsic(llvm_name, &[lhs, rhs])
                         }
                         _ => bug!(),
                     },
@@ -331,12 +323,36 @@
                     let a_ptr = self.bitcast(a, i8p_ty);
                     let b_ptr = self.bitcast(b, i8p_ty);
                     let n = self.const_usize(layout.size.bytes());
-                    let llfn = self.get_intrinsic("memcmp");
-                    let cmp = self.call(llfn, &[a_ptr, b_ptr, n], None);
+                    let cmp = self.call_intrinsic("memcmp", &[a_ptr, b_ptr, n]);
                     self.icmp(IntPredicate::IntEQ, cmp, self.const_i32(0))
                 }
             }
 
+            sym::black_box => {
+                args[0].val.store(self, result);
+
+                // We need to "use" the argument in some way LLVM can't introspect, and on
+                // targets that support it we can typically leverage inline assembly to do
+                // this. LLVM's interpretation of inline assembly is that it's, well, a black
+                // box. This isn't the greatest implementation since it probably deoptimizes
+                // more than we want, but it's so far good enough.
+                crate::asm::inline_asm_call(
+                    self,
+                    "",
+                    "r,~{memory}",
+                    &[result.llval],
+                    self.type_void(),
+                    true,
+                    false,
+                    ast::LlvmAsmDialect::Att,
+                    &[span],
+                )
+                .unwrap_or_else(|| bug!("failed to generate inline asm call for `black_box`"));
+
+                // We have copied the value to `result` already.
+                return;
+            }
+
             _ if name_str.starts_with("simd_") => {
                 match generic_simd_intrinsic(self, name, callee_ty, args, ret_ty, llret_ty, span) {
                     Ok(llval) => llval,
@@ -361,18 +377,15 @@
     }
 
     fn abort(&mut self) {
-        let fnname = self.get_intrinsic(&("llvm.trap"));
-        self.call(fnname, &[], None);
+        self.call_intrinsic("llvm.trap", &[]);
     }
 
     fn assume(&mut self, val: Self::Value) {
-        let assume_intrinsic = self.get_intrinsic("llvm.assume");
-        self.call(assume_intrinsic, &[val], None);
+        self.call_intrinsic("llvm.assume", &[val]);
     }
 
     fn expect(&mut self, cond: Self::Value, expected: bool) -> Self::Value {
-        let expect = self.get_intrinsic(&"llvm.expect.i1");
-        self.call(expect, &[cond, self.const_bool(expected)], None)
+        self.call_intrinsic("llvm.expect.i1", &[cond, self.const_bool(expected)])
     }
 
     fn sideeffect(&mut self) {
@@ -380,19 +393,16 @@
         // caller of this function is in `rustc_codegen_ssa`, which is agnostic to whether LLVM
         // codegen backend being used, and so is unable to check the LLVM version.
         if unsafe { llvm::LLVMRustVersionMajor() } < 12 {
-            let fnname = self.get_intrinsic(&("llvm.sideeffect"));
-            self.call(fnname, &[], None);
+            self.call_intrinsic("llvm.sideeffect", &[]);
         }
     }
 
     fn va_start(&mut self, va_list: &'ll Value) -> &'ll Value {
-        let intrinsic = self.cx().get_intrinsic("llvm.va_start");
-        self.call(intrinsic, &[va_list], None)
+        self.call_intrinsic("llvm.va_start", &[va_list])
     }
 
     fn va_end(&mut self, va_list: &'ll Value) -> &'ll Value {
-        let intrinsic = self.cx().get_intrinsic("llvm.va_end");
-        self.call(intrinsic, &[va_list], None)
+        self.call_intrinsic("llvm.va_end", &[va_list])
     }
 }
 
@@ -404,7 +414,8 @@
     dest: &'ll Value,
 ) {
     if bx.sess().panic_strategy() == PanicStrategy::Abort {
-        bx.call(try_func, &[data], None);
+        let try_func_ty = bx.type_func(&[bx.type_i8p()], bx.type_void());
+        bx.call(try_func_ty, try_func, &[data], None);
         // Return 0 unconditionally from the intrinsic call;
         // we can never unwind.
         let ret_align = bx.tcx().data_layout.i32_align.abi;
@@ -432,7 +443,7 @@
     catch_func: &'ll Value,
     dest: &'ll Value,
 ) {
-    let llfn = get_rust_try_fn(bx, &mut |mut bx| {
+    let (llty, llfn) = get_rust_try_fn(bx, &mut |mut bx| {
         bx.set_personality_fn(bx.eh_personality());
 
         let mut normal = bx.build_sibling_block("normal");
@@ -502,7 +513,8 @@
         // More information can be found in libstd's seh.rs implementation.
         let ptr_align = bx.tcx().data_layout.pointer_align.abi;
         let slot = bx.alloca(bx.type_i8p(), ptr_align);
-        bx.invoke(try_func, &[data], normal.llbb(), catchswitch.llbb(), None);
+        let try_func_ty = bx.type_func(&[bx.type_i8p()], bx.type_void());
+        bx.invoke(try_func_ty, try_func, &[data], normal.llbb(), catchswitch.llbb(), None);
 
         normal.ret(bx.const_i32(0));
 
@@ -544,14 +556,15 @@
         let flags = bx.const_i32(8);
         let funclet = catchpad_rust.catch_pad(cs, &[tydesc, flags, slot]);
         let ptr = catchpad_rust.load(bx.type_i8p(), slot, ptr_align);
-        catchpad_rust.call(catch_func, &[data, ptr], Some(&funclet));
+        let catch_ty = bx.type_func(&[bx.type_i8p(), bx.type_i8p()], bx.type_void());
+        catchpad_rust.call(catch_ty, catch_func, &[data, ptr], Some(&funclet));
         catchpad_rust.catch_ret(&funclet, caught.llbb());
 
         // The flag value of 64 indicates a "catch-all".
         let flags = bx.const_i32(64);
         let null = bx.const_null(bx.type_i8p());
         let funclet = catchpad_foreign.catch_pad(cs, &[null, flags, null]);
-        catchpad_foreign.call(catch_func, &[data, null], Some(&funclet));
+        catchpad_foreign.call(catch_ty, catch_func, &[data, null], Some(&funclet));
         catchpad_foreign.catch_ret(&funclet, caught.llbb());
 
         caught.ret(bx.const_i32(1));
@@ -559,7 +572,7 @@
 
     // Note that no invoke is used here because by definition this function
     // can't panic (that's what it's catching).
-    let ret = bx.call(llfn, &[try_func, data, catch_func], None);
+    let ret = bx.call(llty, llfn, &[try_func, data, catch_func], None);
     let i32_align = bx.tcx().data_layout.i32_align.abi;
     bx.store(ret, dest, i32_align);
 }
@@ -582,7 +595,7 @@
     catch_func: &'ll Value,
     dest: &'ll Value,
 ) {
-    let llfn = get_rust_try_fn(bx, &mut |mut bx| {
+    let (llty, llfn) = get_rust_try_fn(bx, &mut |mut bx| {
         // Codegens the shims described above:
         //
         //   bx:
@@ -601,7 +614,8 @@
         let try_func = llvm::get_param(bx.llfn(), 0);
         let data = llvm::get_param(bx.llfn(), 1);
         let catch_func = llvm::get_param(bx.llfn(), 2);
-        bx.invoke(try_func, &[data], then.llbb(), catch.llbb(), None);
+        let try_func_ty = bx.type_func(&[bx.type_i8p()], bx.type_void());
+        bx.invoke(try_func_ty, try_func, &[data], then.llbb(), catch.llbb(), None);
         then.ret(bx.const_i32(0));
 
         // Type indicator for the exception being thrown.
@@ -615,13 +629,14 @@
         let tydesc = bx.const_null(bx.type_i8p());
         catch.add_clause(vals, tydesc);
         let ptr = catch.extract_value(vals, 0);
-        catch.call(catch_func, &[data, ptr], None);
+        let catch_ty = bx.type_func(&[bx.type_i8p(), bx.type_i8p()], bx.type_void());
+        catch.call(catch_ty, catch_func, &[data, ptr], None);
         catch.ret(bx.const_i32(1));
     });
 
     // Note that no invoke is used here because by definition this function
     // can't panic (that's what it's catching).
-    let ret = bx.call(llfn, &[try_func, data, catch_func], None);
+    let ret = bx.call(llty, llfn, &[try_func, data, catch_func], None);
     let i32_align = bx.tcx().data_layout.i32_align.abi;
     bx.store(ret, dest, i32_align);
 }
@@ -636,7 +651,7 @@
     catch_func: &'ll Value,
     dest: &'ll Value,
 ) {
-    let llfn = get_rust_try_fn(bx, &mut |mut bx| {
+    let (llty, llfn) = get_rust_try_fn(bx, &mut |mut bx| {
         // Codegens the shims described above:
         //
         //   bx:
@@ -660,7 +675,8 @@
         let try_func = llvm::get_param(bx.llfn(), 0);
         let data = llvm::get_param(bx.llfn(), 1);
         let catch_func = llvm::get_param(bx.llfn(), 2);
-        bx.invoke(try_func, &[data], then.llbb(), catch.llbb(), None);
+        let try_func_ty = bx.type_func(&[bx.type_i8p()], bx.type_void());
+        bx.invoke(try_func_ty, try_func, &[data], then.llbb(), catch.llbb(), None);
         then.ret(bx.const_i32(0));
 
         // Type indicator for the exception being thrown.
@@ -677,8 +693,7 @@
         let selector = catch.extract_value(vals, 1);
 
         // Check if the typeid we got is the one for a Rust panic.
-        let llvm_eh_typeid_for = bx.get_intrinsic("llvm.eh.typeid.for");
-        let rust_typeid = catch.call(llvm_eh_typeid_for, &[tydesc], None);
+        let rust_typeid = catch.call_intrinsic("llvm.eh.typeid.for", &[tydesc]);
         let is_rust_panic = catch.icmp(IntPredicate::IntEQ, selector, rust_typeid);
         let is_rust_panic = catch.zext(is_rust_panic, bx.type_bool());
 
@@ -686,21 +701,30 @@
         // create an alloca and pass a pointer to that.
         let ptr_align = bx.tcx().data_layout.pointer_align.abi;
         let i8_align = bx.tcx().data_layout.i8_align.abi;
-        let catch_data =
-            catch.alloca(bx.type_struct(&[bx.type_i8p(), bx.type_bool()], false), ptr_align);
-        let catch_data_0 = catch.inbounds_gep(catch_data, &[bx.const_usize(0), bx.const_usize(0)]);
+        let catch_data_type = bx.type_struct(&[bx.type_i8p(), bx.type_bool()], false);
+        let catch_data = catch.alloca(catch_data_type, ptr_align);
+        let catch_data_0 = catch.inbounds_gep(
+            catch_data_type,
+            catch_data,
+            &[bx.const_usize(0), bx.const_usize(0)],
+        );
         catch.store(ptr, catch_data_0, ptr_align);
-        let catch_data_1 = catch.inbounds_gep(catch_data, &[bx.const_usize(0), bx.const_usize(1)]);
+        let catch_data_1 = catch.inbounds_gep(
+            catch_data_type,
+            catch_data,
+            &[bx.const_usize(0), bx.const_usize(1)],
+        );
         catch.store(is_rust_panic, catch_data_1, i8_align);
         let catch_data = catch.bitcast(catch_data, bx.type_i8p());
 
-        catch.call(catch_func, &[data, catch_data], None);
+        let catch_ty = bx.type_func(&[bx.type_i8p(), bx.type_i8p()], bx.type_void());
+        catch.call(catch_ty, catch_func, &[data, catch_data], None);
         catch.ret(bx.const_i32(1));
     });
 
     // Note that no invoke is used here because by definition this function
     // can't panic (that's what it's catching).
-    let ret = bx.call(llfn, &[try_func, data, catch_func], None);
+    let ret = bx.call(llty, llfn, &[try_func, data, catch_func], None);
     let i32_align = bx.tcx().data_layout.i32_align.abi;
     bx.store(ret, dest, i32_align);
 }
@@ -712,8 +736,9 @@
     name: &str,
     rust_fn_sig: ty::PolyFnSig<'tcx>,
     codegen: &mut dyn FnMut(Builder<'_, 'll, 'tcx>),
-) -> &'ll Value {
+) -> (&'ll Type, &'ll Value) {
     let fn_abi = FnAbi::of_fn_ptr(cx, rust_fn_sig, &[]);
+    let llty = fn_abi.llvm_type(cx);
     let llfn = cx.declare_fn(name, &fn_abi);
     cx.set_frame_pointer_type(llfn);
     cx.apply_target_cpu_attr(llfn);
@@ -722,7 +747,7 @@
     let llbb = Builder::append_block(cx, llfn, "entry-block");
     let bx = Builder::build(cx, llbb);
     codegen(bx);
-    llfn
+    (llty, llfn)
 }
 
 // Helper function used to get a handle to the `__rust_try` function used to
@@ -732,7 +757,7 @@
 fn get_rust_try_fn<'ll, 'tcx>(
     cx: &CodegenCx<'ll, 'tcx>,
     codegen: &mut dyn FnMut(Builder<'_, 'll, 'tcx>),
-) -> &'ll Value {
+) -> (&'ll Type, &'ll Value) {
     if let Some(llfn) = cx.rust_try_fn.get() {
         return llfn;
     }
@@ -1006,7 +1031,7 @@
         // vector mask and returns an unsigned integer containing the most
         // significant bit (MSB) of each lane.
 
-        // If the vector has less than 8 lanes, an u8 is returned with zeroed
+        // If the vector has less than 8 lanes, a u8 is returned with zeroed
         // trailing bits.
         let expected_int_bits = in_len.max(8);
         match ret_ty.kind() {
@@ -1115,7 +1140,8 @@
         };
         let llvm_name = &format!("llvm.{0}.v{1}{2}", intr_name, in_len, elem_ty_str);
         let f = bx.declare_cfn(&llvm_name, llvm::UnnamedAddr::No, fn_ty);
-        let c = bx.call(f, &args.iter().map(|arg| arg.immediate()).collect::<Vec<_>>(), None);
+        let c =
+            bx.call(fn_ty, f, &args.iter().map(|arg| arg.immediate()).collect::<Vec<_>>(), None);
         Ok(c)
     }
 
@@ -1292,15 +1318,13 @@
 
         let llvm_intrinsic =
             format!("llvm.masked.gather.{}.{}", llvm_elem_vec_str, llvm_pointer_vec_str);
-        let f = bx.declare_cfn(
-            &llvm_intrinsic,
-            llvm::UnnamedAddr::No,
-            bx.type_func(
-                &[llvm_pointer_vec_ty, alignment_ty, mask_ty, llvm_elem_vec_ty],
-                llvm_elem_vec_ty,
-            ),
+        let fn_ty = bx.type_func(
+            &[llvm_pointer_vec_ty, alignment_ty, mask_ty, llvm_elem_vec_ty],
+            llvm_elem_vec_ty,
         );
-        let v = bx.call(f, &[args[1].immediate(), alignment, mask, args[0].immediate()], None);
+        let f = bx.declare_cfn(&llvm_intrinsic, llvm::UnnamedAddr::No, fn_ty);
+        let v =
+            bx.call(fn_ty, f, &[args[1].immediate(), alignment, mask, args[0].immediate()], None);
         return Ok(v);
     }
 
@@ -1422,12 +1446,11 @@
 
         let llvm_intrinsic =
             format!("llvm.masked.scatter.{}.{}", llvm_elem_vec_str, llvm_pointer_vec_str);
-        let f = bx.declare_cfn(
-            &llvm_intrinsic,
-            llvm::UnnamedAddr::No,
-            bx.type_func(&[llvm_elem_vec_ty, llvm_pointer_vec_ty, alignment_ty, mask_ty], ret_t),
-        );
-        let v = bx.call(f, &[args[0].immediate(), args[1].immediate(), alignment, mask], None);
+        let fn_ty =
+            bx.type_func(&[llvm_elem_vec_ty, llvm_pointer_vec_ty, alignment_ty, mask_ty], ret_t);
+        let f = bx.declare_cfn(&llvm_intrinsic, llvm::UnnamedAddr::No, fn_ty);
+        let v =
+            bx.call(fn_ty, f, &[args[0].immediate(), args[1].immediate(), alignment, mask], None);
         return Ok(v);
     }
 
@@ -1749,12 +1772,9 @@
         );
         let vec_ty = bx.cx.type_vector(elem_ty, in_len as u64);
 
-        let f = bx.declare_cfn(
-            &llvm_intrinsic,
-            llvm::UnnamedAddr::No,
-            bx.type_func(&[vec_ty, vec_ty], vec_ty),
-        );
-        let v = bx.call(f, &[lhs, rhs], None);
+        let fn_ty = bx.type_func(&[vec_ty, vec_ty], vec_ty);
+        let f = bx.declare_cfn(&llvm_intrinsic, llvm::UnnamedAddr::No, fn_ty);
+        let v = bx.call(fn_ty, f, &[lhs, rhs], None);
         return Ok(v);
     }
 
diff --git a/compiler/rustc_codegen_llvm/src/lib.rs b/compiler/rustc_codegen_llvm/src/lib.rs
index aa4db16..1e6e525 100644
--- a/compiler/rustc_codegen_llvm/src/lib.rs
+++ b/compiler/rustc_codegen_llvm/src/lib.rs
@@ -352,8 +352,8 @@
 impl Drop for ModuleLlvm {
     fn drop(&mut self) {
         unsafe {
-            llvm::LLVMContextDispose(&mut *(self.llcx as *mut _));
             llvm::LLVMRustDisposeTargetMachine(&mut *(self.tm as *mut _));
+            llvm::LLVMContextDispose(&mut *(self.llcx as *mut _));
         }
     }
 }
diff --git a/compiler/rustc_codegen_llvm/src/llvm/diagnostic.rs b/compiler/rustc_codegen_llvm/src/llvm/diagnostic.rs
index ccd3e42..36aa022 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/diagnostic.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/diagnostic.rs
@@ -6,7 +6,8 @@
 use crate::value::Value;
 use libc::c_uint;
 
-use super::{DiagnosticInfo, Twine};
+use super::{DiagnosticInfo, SMDiagnostic};
+use rustc_span::InnerSpan;
 
 #[derive(Copy, Clone)]
 pub enum OptimizationDiagnosticKind {
@@ -86,36 +87,91 @@
     }
 }
 
-#[derive(Copy, Clone)]
-pub struct InlineAsmDiagnostic<'ll> {
+pub struct SrcMgrDiagnostic {
     pub level: super::DiagnosticLevel,
-    pub cookie: c_uint,
-    pub message: &'ll Twine,
-    pub instruction: Option<&'ll Value>,
+    pub message: String,
+    pub source: Option<(String, Vec<InnerSpan>)>,
 }
 
-impl InlineAsmDiagnostic<'ll> {
-    unsafe fn unpack(di: &'ll DiagnosticInfo) -> Self {
+impl SrcMgrDiagnostic {
+    pub unsafe fn unpack(diag: &SMDiagnostic) -> SrcMgrDiagnostic {
+        // Recover the post-substitution assembly code from LLVM for better
+        // diagnostics.
+        let mut have_source = false;
+        let mut buffer = String::new();
+        let mut level = super::DiagnosticLevel::Error;
+        let mut loc = 0;
+        let mut ranges = [0; 8];
+        let mut num_ranges = ranges.len() / 2;
+        let message = super::build_string(|message| {
+            buffer = super::build_string(|buffer| {
+                have_source = super::LLVMRustUnpackSMDiagnostic(
+                    diag,
+                    message,
+                    buffer,
+                    &mut level,
+                    &mut loc,
+                    ranges.as_mut_ptr(),
+                    &mut num_ranges,
+                );
+            })
+            .expect("non-UTF8 inline asm");
+        })
+        .expect("non-UTF8 SMDiagnostic");
+
+        SrcMgrDiagnostic {
+            message,
+            level,
+            source: have_source.then(|| {
+                let mut spans = vec![InnerSpan::new(loc as usize, loc as usize)];
+                for i in 0..num_ranges {
+                    spans.push(InnerSpan::new(ranges[i * 2] as usize, ranges[i * 2 + 1] as usize));
+                }
+                (buffer, spans)
+            }),
+        }
+    }
+}
+
+#[derive(Clone)]
+pub struct InlineAsmDiagnostic {
+    pub level: super::DiagnosticLevel,
+    pub cookie: c_uint,
+    pub message: String,
+    pub source: Option<(String, Vec<InnerSpan>)>,
+}
+
+impl InlineAsmDiagnostic {
+    unsafe fn unpackInlineAsm(di: &'ll DiagnosticInfo) -> Self {
         let mut cookie = 0;
         let mut message = None;
-        let mut instruction = None;
         let mut level = super::DiagnosticLevel::Error;
 
-        super::LLVMRustUnpackInlineAsmDiagnostic(
-            di,
-            &mut level,
-            &mut cookie,
-            &mut message,
-            &mut instruction,
-        );
+        super::LLVMRustUnpackInlineAsmDiagnostic(di, &mut level, &mut cookie, &mut message);
 
-        InlineAsmDiagnostic { level, cookie, message: message.unwrap(), instruction }
+        InlineAsmDiagnostic {
+            level,
+            cookie,
+            message: super::twine_to_string(message.unwrap()),
+            source: None,
+        }
+    }
+
+    unsafe fn unpackSrcMgr(di: &'ll DiagnosticInfo) -> Self {
+        let mut cookie = 0;
+        let smdiag = SrcMgrDiagnostic::unpack(super::LLVMRustGetSMDiagnostic(di, &mut cookie));
+        InlineAsmDiagnostic {
+            level: smdiag.level,
+            cookie,
+            message: smdiag.message,
+            source: smdiag.source,
+        }
     }
 }
 
 pub enum Diagnostic<'ll> {
     Optimization(OptimizationDiagnostic<'ll>),
-    InlineAsm(InlineAsmDiagnostic<'ll>),
+    InlineAsm(InlineAsmDiagnostic),
     PGO(&'ll DiagnosticInfo),
     Linker(&'ll DiagnosticInfo),
     Unsupported(&'ll DiagnosticInfo),
@@ -130,7 +186,7 @@
         let kind = super::LLVMRustGetDiagInfoKind(di);
 
         match kind {
-            Dk::InlineAsm => InlineAsm(InlineAsmDiagnostic::unpack(di)),
+            Dk::InlineAsm => InlineAsm(InlineAsmDiagnostic::unpackInlineAsm(di)),
 
             Dk::OptimizationRemark => {
                 Optimization(OptimizationDiagnostic::unpack(OptimizationRemark, di))
@@ -162,6 +218,8 @@
             Dk::Linker => Linker(di),
             Dk::Unsupported => Unsupported(di),
 
+            Dk::SrcMgr => InlineAsm(InlineAsmDiagnostic::unpackSrcMgr(di)),
+
             _ => UnknownDiagnostic(di),
         }
     }
diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
index 68d566c..3f2ed02 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
@@ -490,6 +490,7 @@
     PGOProfile,
     Linker,
     Unsupported,
+    SrcMgr,
 }
 
 /// LLVMRustDiagnosticLevel
@@ -1011,7 +1012,8 @@
     pub fn LLVMConstVector(ScalarConstantVals: *const &Value, Size: c_uint) -> &Value;
 
     // Constant expressions
-    pub fn LLVMConstInBoundsGEP(
+    pub fn LLVMRustConstInBoundsGEP2(
+        ty: &'a Type,
         ConstantVal: &'a Value,
         ConstantIndices: *const &'a Value,
         NumIndices: c_uint,
@@ -1154,6 +1156,7 @@
     ) -> &'a Value;
     pub fn LLVMRustBuildInvoke(
         B: &Builder<'a>,
+        Ty: &'a Type,
         Fn: &'a Value,
         Args: *const &'a Value,
         NumArgs: c_uint,
@@ -1394,22 +1397,25 @@
 
     pub fn LLVMBuildStore(B: &Builder<'a>, Val: &'a Value, Ptr: &'a Value) -> &'a Value;
 
-    pub fn LLVMBuildGEP(
+    pub fn LLVMBuildGEP2(
         B: &Builder<'a>,
+        Ty: &'a Type,
         Pointer: &'a Value,
         Indices: *const &'a Value,
         NumIndices: c_uint,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildInBoundsGEP(
+    pub fn LLVMBuildInBoundsGEP2(
         B: &Builder<'a>,
+        Ty: &'a Type,
         Pointer: &'a Value,
         Indices: *const &'a Value,
         NumIndices: c_uint,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildStructGEP(
+    pub fn LLVMBuildStructGEP2(
         B: &Builder<'a>,
+        Ty: &'a Type,
         Pointer: &'a Value,
         Idx: c_uint,
         Name: *const c_char,
@@ -1522,6 +1528,7 @@
     pub fn LLVMRustGetInstrProfIncrementIntrinsic(M: &Module) -> &'a Value;
     pub fn LLVMRustBuildCall(
         B: &Builder<'a>,
+        Ty: &'a Type,
         Fn: &'a Value,
         Args: *const &'a Value,
         NumArgs: c_uint,
@@ -2258,13 +2265,17 @@
         level_out: &mut DiagnosticLevel,
         cookie_out: &mut c_uint,
         message_out: &mut Option<&'a Twine>,
-        instruction_out: &mut Option<&'a Value>,
     );
 
     #[allow(improper_ctypes)]
     pub fn LLVMRustWriteDiagnosticInfoToString(DI: &DiagnosticInfo, s: &RustString);
     pub fn LLVMRustGetDiagInfoKind(DI: &DiagnosticInfo) -> DiagnosticKind;
 
+    pub fn LLVMRustGetSMDiagnostic(
+        DI: &'a DiagnosticInfo,
+        cookie_out: &mut c_uint,
+    ) -> &'a SMDiagnostic;
+
     pub fn LLVMRustSetInlineAsmDiagnosticHandler(
         C: &Context,
         H: InlineAsmDiagHandler,
diff --git a/compiler/rustc_codegen_llvm/src/llvm_util.rs b/compiler/rustc_codegen_llvm/src/llvm_util.rs
index cb9c626..3b64ec1 100644
--- a/compiler/rustc_codegen_llvm/src/llvm_util.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm_util.rs
@@ -89,13 +89,14 @@
             add("-generate-arange-section", false);
         }
 
-        // FIXME(nagisa): disable the machine outliner by default in LLVM versions 11, where it was
-        // introduced and up.
+        // Disable the machine outliner by default in LLVM versions 11 and LLVM
+        // version 12, where it leads to miscompilation.
         //
-        // This should remain in place until https://reviews.llvm.org/D103167 is fixed. If LLVM
-        // has been upgraded since, consider adjusting the version check below to contain an upper
-        // bound.
-        if llvm_util::get_version() >= (11, 0, 0) {
+        // Ref:
+        // - https://github.com/rust-lang/rust/issues/85351
+        // - https://reviews.llvm.org/D103167
+        let llvm_version = llvm_util::get_version();
+        if llvm_version >= (11, 0, 0) && llvm_version < (13, 0, 0) {
             add("-enable-machine-outliner=never", false);
         }
 
@@ -365,7 +366,7 @@
 
                 features_string
             };
-            features.extend(features_string.split(",").map(String::from));
+            features.extend(features_string.split(',').map(String::from));
         }
         Some(_) | None => {}
     };
@@ -374,7 +375,7 @@
         if s.is_empty() {
             return None;
         }
-        let feature = if s.starts_with("+") || s.starts_with("-") {
+        let feature = if s.starts_with('+') || s.starts_with('-') {
             &s[1..]
         } else {
             return Some(s.to_string());
diff --git a/compiler/rustc_codegen_llvm/src/mono_item.rs b/compiler/rustc_codegen_llvm/src/mono_item.rs
index 9345644..8a8ece6 100644
--- a/compiler/rustc_codegen_llvm/src/mono_item.rs
+++ b/compiler/rustc_codegen_llvm/src/mono_item.rs
@@ -135,6 +135,11 @@
             return false;
         }
 
+        // Match clang by only supporting COFF and ELF for now.
+        if self.tcx.sess.target.is_like_osx {
+            return false;
+        }
+
         // Static relocation model should force copy relocations everywhere.
         if self.tcx.sess.relocation_model() == RelocModel::Static {
             return true;
diff --git a/compiler/rustc_codegen_llvm/src/type_.rs b/compiler/rustc_codegen_llvm/src/type_.rs
index 8fd0caae..c7f4287 100644
--- a/compiler/rustc_codegen_llvm/src/type_.rs
+++ b/compiler/rustc_codegen_llvm/src/type_.rs
@@ -203,7 +203,11 @@
     }
 
     fn element_type(&self, ty: &'ll Type) -> &'ll Type {
-        unsafe { llvm::LLVMGetElementType(ty) }
+        match self.type_kind(ty) {
+            TypeKind::Array | TypeKind::Vector => unsafe { llvm::LLVMGetElementType(ty) },
+            TypeKind::Pointer => bug!("element_type is not supported for opaque pointers"),
+            other => bug!("element_type called on unsupported type {:?}", other),
+        }
     }
 
     fn vector_length(&self, ty: &'ll Type) -> usize {
@@ -262,7 +266,7 @@
         layout.is_llvm_scalar_pair()
     }
     fn backend_field_index(&self, layout: TyAndLayout<'tcx>, index: usize) -> u64 {
-        layout.llvm_field_index(index)
+        layout.llvm_field_index(self, index)
     }
     fn scalar_pair_element_backend_type(
         &self,
@@ -275,6 +279,9 @@
     fn cast_backend_type(&self, ty: &CastTarget) -> &'ll Type {
         ty.llvm_type(self)
     }
+    fn fn_decl_backend_type(&self, fn_abi: &FnAbi<'tcx, Ty<'tcx>>) -> &'ll Type {
+        fn_abi.llvm_type(self)
+    }
     fn fn_ptr_backend_type(&self, fn_abi: &FnAbi<'tcx, Ty<'tcx>>) -> &'ll Type {
         fn_abi.ptr_to_llvm_type(self)
     }
diff --git a/compiler/rustc_codegen_llvm/src/type_of.rs b/compiler/rustc_codegen_llvm/src/type_of.rs
index 0876907..9818905 100644
--- a/compiler/rustc_codegen_llvm/src/type_of.rs
+++ b/compiler/rustc_codegen_llvm/src/type_of.rs
@@ -1,5 +1,6 @@
 use crate::abi::FnAbi;
 use crate::common::*;
+use crate::context::TypeLowering;
 use crate::type_::Type;
 use rustc_codegen_ssa::traits::*;
 use rustc_middle::bug;
@@ -8,7 +9,8 @@
 use rustc_middle::ty::{self, Ty, TypeFoldable};
 use rustc_target::abi::{Abi, AddressSpace, Align, FieldsShape};
 use rustc_target::abi::{Int, Pointer, F32, F64};
-use rustc_target::abi::{LayoutOf, PointeeInfo, Scalar, Size, TyAndLayoutMethods, Variants};
+use rustc_target::abi::{LayoutOf, PointeeInfo, Scalar, Size, TyAbiInterface, Variants};
+use smallvec::{smallvec, SmallVec};
 use tracing::debug;
 
 use std::fmt::Write;
@@ -17,6 +19,7 @@
     cx: &CodegenCx<'a, 'tcx>,
     layout: TyAndLayout<'tcx>,
     defer: &mut Option<(&'a Type, TyAndLayout<'tcx>)>,
+    field_remapping: &mut Option<SmallVec<[u32; 4]>>,
 ) -> &'a Type {
     match layout.abi {
         Abi::Scalar(_) => bug!("handled elsewhere"),
@@ -75,7 +78,8 @@
         FieldsShape::Array { count, .. } => cx.type_array(layout.field(cx, 0).llvm_type(cx), count),
         FieldsShape::Arbitrary { .. } => match name {
             None => {
-                let (llfields, packed) = struct_llfields(cx, layout);
+                let (llfields, packed, new_field_remapping) = struct_llfields(cx, layout);
+                *field_remapping = new_field_remapping;
                 cx.type_struct(&llfields, packed)
             }
             Some(ref name) => {
@@ -90,7 +94,7 @@
 fn struct_llfields<'a, 'tcx>(
     cx: &CodegenCx<'a, 'tcx>,
     layout: TyAndLayout<'tcx>,
-) -> (Vec<&'a Type>, bool) {
+) -> (Vec<&'a Type>, bool, Option<SmallVec<[u32; 4]>>) {
     debug!("struct_llfields: {:#?}", layout);
     let field_count = layout.fields.count();
 
@@ -98,6 +102,7 @@
     let mut offset = Size::ZERO;
     let mut prev_effective_align = layout.align.abi;
     let mut result: Vec<_> = Vec::with_capacity(1 + field_count * 2);
+    let mut field_remapping = smallvec![0; field_count];
     for i in layout.fields.index_by_increasing_offset() {
         let target_offset = layout.fields.offset(i as usize);
         let field = layout.field(cx, i);
@@ -116,33 +121,37 @@
         );
         assert!(target_offset >= offset);
         let padding = target_offset - offset;
-        let padding_align = prev_effective_align.min(effective_field_align);
-        assert_eq!(offset.align_to(padding_align) + padding, target_offset);
-        result.push(cx.type_padding_filler(padding, padding_align));
-        debug!("    padding before: {:?}", padding);
-
+        if padding != Size::ZERO {
+            let padding_align = prev_effective_align.min(effective_field_align);
+            assert_eq!(offset.align_to(padding_align) + padding, target_offset);
+            result.push(cx.type_padding_filler(padding, padding_align));
+            debug!("    padding before: {:?}", padding);
+        }
+        field_remapping[i] = result.len() as u32;
         result.push(field.llvm_type(cx));
         offset = target_offset + field.size;
         prev_effective_align = effective_field_align;
     }
+    let padding_used = result.len() > field_count;
     if !layout.is_unsized() && field_count > 0 {
         if offset > layout.size {
             bug!("layout: {:#?} stride: {:?} offset: {:?}", layout, layout.size, offset);
         }
         let padding = layout.size - offset;
-        let padding_align = prev_effective_align;
-        assert_eq!(offset.align_to(padding_align) + padding, layout.size);
-        debug!(
-            "struct_llfields: pad_bytes: {:?} offset: {:?} stride: {:?}",
-            padding, offset, layout.size
-        );
-        result.push(cx.type_padding_filler(padding, padding_align));
-        assert_eq!(result.len(), 1 + field_count * 2);
+        if padding != Size::ZERO {
+            let padding_align = prev_effective_align;
+            assert_eq!(offset.align_to(padding_align) + padding, layout.size);
+            debug!(
+                "struct_llfields: pad_bytes: {:?} offset: {:?} stride: {:?}",
+                padding, offset, layout.size
+            );
+            result.push(cx.type_padding_filler(padding, padding_align));
+        }
     } else {
         debug!("struct_llfields: offset: {:?} stride: {:?}", offset, layout.size);
     }
-
-    (result, packed)
+    let field_remapping = if padding_used { Some(field_remapping) } else { None };
+    (result, packed, field_remapping)
 }
 
 impl<'a, 'tcx> CodegenCx<'a, 'tcx> {
@@ -177,7 +186,7 @@
         index: usize,
         immediate: bool,
     ) -> &'a Type;
-    fn llvm_field_index(&self, index: usize) -> u64;
+    fn llvm_field_index<'a>(&self, cx: &CodegenCx<'a, 'tcx>, index: usize) -> u64;
     fn pointee_info_at<'a>(&self, cx: &CodegenCx<'a, 'tcx>, offset: Size) -> Option<PointeeInfo>;
 }
 
@@ -234,8 +243,8 @@
             Variants::Single { index } => Some(index),
             _ => None,
         };
-        if let Some(&llty) = cx.lltypes.borrow().get(&(self.ty, variant_index)) {
-            return llty;
+        if let Some(ref llty) = cx.type_lowering.borrow().get(&(self.ty, variant_index)) {
+            return llty.lltype;
         }
 
         debug!("llvm_type({:#?})", self);
@@ -247,6 +256,7 @@
         let normal_ty = cx.tcx.erase_regions(self.ty);
 
         let mut defer = None;
+        let mut field_remapping = None;
         let llty = if self.ty != normal_ty {
             let mut layout = cx.layout_of(normal_ty);
             if let Some(v) = variant_index {
@@ -254,17 +264,24 @@
             }
             layout.llvm_type(cx)
         } else {
-            uncached_llvm_type(cx, *self, &mut defer)
+            uncached_llvm_type(cx, *self, &mut defer, &mut field_remapping)
         };
         debug!("--> mapped {:#?} to llty={:?}", self, llty);
 
-        cx.lltypes.borrow_mut().insert((self.ty, variant_index), llty);
+        cx.type_lowering.borrow_mut().insert(
+            (self.ty, variant_index),
+            TypeLowering { lltype: llty, field_remapping: field_remapping },
+        );
 
         if let Some((llty, layout)) = defer {
-            let (llfields, packed) = struct_llfields(cx, layout);
-            cx.set_struct_body(llty, &llfields, packed)
+            let (llfields, packed, new_field_remapping) = struct_llfields(cx, layout);
+            cx.set_struct_body(llty, &llfields, packed);
+            cx.type_lowering
+                .borrow_mut()
+                .get_mut(&(self.ty, variant_index))
+                .unwrap()
+                .field_remapping = new_field_remapping;
         }
-
         llty
     }
 
@@ -340,7 +357,7 @@
         self.scalar_llvm_type_at(cx, scalar, offset)
     }
 
-    fn llvm_field_index(&self, index: usize) -> u64 {
+    fn llvm_field_index<'a>(&self, cx: &CodegenCx<'a, 'tcx>, index: usize) -> u64 {
         match self.abi {
             Abi::Scalar(_) | Abi::ScalarPair(..) => {
                 bug!("TyAndLayout::llvm_field_index({:?}): not applicable", self)
@@ -354,16 +371,37 @@
 
             FieldsShape::Array { .. } => index as u64,
 
-            FieldsShape::Arbitrary { .. } => 1 + (self.fields.memory_index(index) as u64) * 2,
+            FieldsShape::Arbitrary { .. } => {
+                let variant_index = match self.variants {
+                    Variants::Single { index } => Some(index),
+                    _ => None,
+                };
+
+                // Look up llvm field if indexes do not match memory order due to padding. If
+                // `field_remapping` is `None` no padding was used and the llvm field index
+                // matches the memory index.
+                match cx.type_lowering.borrow().get(&(self.ty, variant_index)) {
+                    Some(TypeLowering { field_remapping: Some(ref remap), .. }) => {
+                        remap[index] as u64
+                    }
+                    Some(_) => self.fields.memory_index(index) as u64,
+                    None => {
+                        bug!("TyAndLayout::llvm_field_index({:?}): type info not found", self)
+                    }
+                }
+            }
         }
     }
 
+    // FIXME(eddyb) this having the same name as `TyAndLayout::pointee_info_at`
+    // (the inherent method, which is lacking this caching logic) can result in
+    // the uncached version being called - not wrong, but potentially inefficient.
     fn pointee_info_at<'a>(&self, cx: &CodegenCx<'a, 'tcx>, offset: Size) -> Option<PointeeInfo> {
         if let Some(&pointee) = cx.pointee_infos.borrow().get(&(self.ty, offset)) {
             return pointee;
         }
 
-        let result = Ty::pointee_info_at(*self, cx, offset);
+        let result = Ty::ty_and_layout_pointee_info_at(*self, cx, offset);
 
         cx.pointee_infos.borrow_mut().insert((self.ty, offset), result);
         result
diff --git a/compiler/rustc_codegen_llvm/src/va_arg.rs b/compiler/rustc_codegen_llvm/src/va_arg.rs
index 9df1bd7..2208ec3 100644
--- a/compiler/rustc_codegen_llvm/src/va_arg.rs
+++ b/compiler/rustc_codegen_llvm/src/va_arg.rs
@@ -50,12 +50,12 @@
 
     let aligned_size = size.align_to(slot_size).bytes() as i32;
     let full_direct_size = bx.cx().const_i32(aligned_size);
-    let next = bx.inbounds_gep(addr, &[full_direct_size]);
+    let next = bx.inbounds_gep(bx.type_i8(), addr, &[full_direct_size]);
     bx.store(next, va_list_addr, bx.tcx().data_layout.pointer_align.abi);
 
     if size.bytes() < slot_size.bytes() && bx.tcx().sess.target.endian == Endian::Big {
         let adjusted_size = bx.cx().const_i32((slot_size.bytes() - size.bytes()) as i32);
-        let adjusted = bx.inbounds_gep(addr, &[adjusted_size]);
+        let adjusted = bx.inbounds_gep(bx.type_i8(), addr, &[adjusted_size]);
         (bx.bitcast(adjusted, bx.cx().type_ptr_to(llty)), addr_align)
     } else {
         (bx.bitcast(addr, bx.cx().type_ptr_to(llty)), addr_align)
@@ -98,6 +98,8 @@
     // Implementation of the AAPCS64 calling convention for va_args see
     // https://github.com/ARM-software/abi-aa/blob/master/aapcs64/aapcs64.rst
     let va_list_addr = list.immediate();
+    let va_list_layout = list.deref(bx.cx).layout;
+    let va_list_ty = va_list_layout.llvm_type(bx);
     let layout = bx.cx.layout_of(target_ty);
 
     let mut maybe_reg = bx.build_sibling_block("va_arg.maybe_reg");
@@ -109,13 +111,15 @@
 
     let gr_type = target_ty.is_any_ptr() || target_ty.is_integral();
     let (reg_off, reg_top_index, slot_size) = if gr_type {
-        let gr_offs = bx.struct_gep(va_list_addr, 7);
+        let gr_offs =
+            bx.struct_gep(va_list_ty, va_list_addr, va_list_layout.llvm_field_index(bx.cx, 3));
         let nreg = (layout.size.bytes() + 7) / 8;
-        (gr_offs, 3, nreg * 8)
+        (gr_offs, va_list_layout.llvm_field_index(bx.cx, 1), nreg * 8)
     } else {
-        let vr_off = bx.struct_gep(va_list_addr, 9);
+        let vr_off =
+            bx.struct_gep(va_list_ty, va_list_addr, va_list_layout.llvm_field_index(bx.cx, 4));
         let nreg = (layout.size.bytes() + 15) / 16;
-        (vr_off, 5, nreg * 16)
+        (vr_off, va_list_layout.llvm_field_index(bx.cx, 2), nreg * 16)
     };
 
     // if the offset >= 0 then the value will be on the stack
@@ -141,15 +145,15 @@
     maybe_reg.cond_br(use_stack, &on_stack.llbb(), &in_reg.llbb());
 
     let top_type = bx.type_i8p();
-    let top = in_reg.struct_gep(va_list_addr, reg_top_index);
+    let top = in_reg.struct_gep(va_list_ty, va_list_addr, reg_top_index);
     let top = in_reg.load(top_type, top, bx.tcx().data_layout.pointer_align.abi);
 
     // reg_value = *(@top + reg_off_v);
-    let mut reg_addr = in_reg.gep(top, &[reg_off_v]);
+    let mut reg_addr = in_reg.gep(bx.type_i8(), top, &[reg_off_v]);
     if bx.tcx().sess.target.endian == Endian::Big && layout.size.bytes() != slot_size {
         // On big-endian systems the value is right-aligned in its slot.
         let offset = bx.const_i32((slot_size - layout.size.bytes()) as i32);
-        reg_addr = in_reg.gep(reg_addr, &[offset]);
+        reg_addr = in_reg.gep(bx.type_i8(), reg_addr, &[offset]);
     }
     let reg_type = layout.llvm_type(bx);
     let reg_addr = in_reg.bitcast(reg_addr, bx.cx.type_ptr_to(reg_type));