Importing rustc-1.38.0
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl b/src/llvm-project/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
index 6f923b7..cdbf28b 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
@@ -43,7 +43,7 @@
 struct LargeStructOneMember g_s;
 #endif
 
-// X86-LABEL: define void @foo(%struct.Mat4X4* noalias sret %agg.result, %struct.Mat3X3* byval align 4 %in)
+// X86-LABEL: define void @foo(%struct.Mat4X4* noalias sret %agg.result, %struct.Mat3X3* byval(%struct.Mat3X3) align 4 %in)
 // AMDGCN-LABEL: define %struct.Mat4X4 @foo([9 x i32] %in.coerce)
 Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) {
   Mat4X4 out;
@@ -63,8 +63,8 @@
   out[0] = foo(in[1]);
 }
 
-// X86-LABEL: define void @foo_large(%struct.Mat64X64* noalias sret %agg.result, %struct.Mat32X32* byval align 4 %in)
-// AMDGCN-LABEL: define void @foo_large(%struct.Mat64X64 addrspace(5)* noalias sret %agg.result, %struct.Mat32X32 addrspace(5)* byval align 4 %in)
+// X86-LABEL: define void @foo_large(%struct.Mat64X64* noalias sret %agg.result, %struct.Mat32X32* byval(%struct.Mat32X32) align 4 %in)
+// AMDGCN-LABEL: define void @foo_large(%struct.Mat64X64 addrspace(5)* noalias sret %agg.result, %struct.Mat32X32 addrspace(5)* byval(%struct.Mat32X32) align 4 %in)
 Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) {
   Mat64X64 out;
   return out;
@@ -86,7 +86,7 @@
   u.x = (int2)(0, 0);
 }
 
-// AMDGCN-LABEL: define void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %u)
+// AMDGCN-LABEL: define void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval(%struct.LargeStructOneMember) align 8 %u)
 // AMDGCN-NOT: addrspacecast
 // AMDGCN:   store <2 x i32> %{{.*}}, <2 x i32> addrspace(5)*
 void FuncOneLargeMember(struct LargeStructOneMember u) {
@@ -97,7 +97,7 @@
 // AMDGCN20:  %[[byval_temp:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
 // AMDGCN20:  %[[r0:.*]] = bitcast %struct.LargeStructOneMember addrspace(5)* %[[byval_temp]] to i8 addrspace(5)*
 // AMDGCN20:  call void @llvm.memcpy.p5i8.p1i8.i64(i8 addrspace(5)* align 8 %[[r0]], i8 addrspace(1)* align 8 bitcast (%struct.LargeStructOneMember addrspace(1)* @g_s to i8 addrspace(1)*), i64 800, i1 false)
-// AMDGCN20:  call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %[[byval_temp]])
+// AMDGCN20:  call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval(%struct.LargeStructOneMember) align 8 %[[byval_temp]])
 #if __OPENCL_C_VERSION__ >= 200
 void test_indirect_arg_globl(void) {
   FuncOneLargeMember(g_s);
@@ -108,7 +108,7 @@
 // AMDGCN: %[[byval_temp:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
 // AMDGCN: %[[r0:.*]] = bitcast %struct.LargeStructOneMember addrspace(5)* %[[byval_temp]] to i8 addrspace(5)*
 // AMDGCN: call void @llvm.memcpy.p5i8.p3i8.i64(i8 addrspace(5)* align 8 %[[r0]], i8 addrspace(3)* align 8 bitcast (%struct.LargeStructOneMember addrspace(3)* @test_indirect_arg_local.l_s to i8 addrspace(3)*), i64 800, i1 false)
-// AMDGCN: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %[[byval_temp]])
+// AMDGCN: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval(%struct.LargeStructOneMember) align 8 %[[byval_temp]])
 kernel void test_indirect_arg_local(void) {
   local struct LargeStructOneMember l_s;
   FuncOneLargeMember(l_s);
@@ -117,7 +117,7 @@
 // AMDGCN-LABEL: define void @test_indirect_arg_private()
 // AMDGCN: %[[p_s:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
 // AMDGCN-NOT: @llvm.memcpy
-// AMDGCN-NEXT: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %[[p_s]])
+// AMDGCN-NEXT: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval(%struct.LargeStructOneMember) align 8 %[[p_s]])
 void test_indirect_arg_private(void) {
   struct LargeStructOneMember p_s;
   FuncOneLargeMember(p_s);
@@ -142,7 +142,7 @@
 // AMDGCN-LABEL: define amdgpu_kernel void @KernelLargeOneMember(
 // AMDGCN:  %[[U:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
 // AMDGCN:  store %struct.LargeStructOneMember %u.coerce, %struct.LargeStructOneMember addrspace(5)* %[[U]], align 8
-// AMDGCN:  call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %[[U]])
+// AMDGCN:  call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval(%struct.LargeStructOneMember) align 8 %[[U]])
 kernel void KernelLargeOneMember(struct LargeStructOneMember u) {
   FuncOneLargeMember(u);
 }
@@ -152,7 +152,7 @@
   u.y = (int2)(0, 0);
 }
 
-// AMDGCN-LABEL: define void @FuncLargeTwoMember(%struct.LargeStructTwoMember addrspace(5)* byval align 8 %u)
+// AMDGCN-LABEL: define void @FuncLargeTwoMember(%struct.LargeStructTwoMember addrspace(5)* byval(%struct.LargeStructTwoMember) align 8 %u)
 void FuncLargeTwoMember(struct LargeStructTwoMember u) {
   u.y[0] = (int2)(0, 0);
 }
@@ -171,7 +171,7 @@
 // AMDGCN-SAME:  (%struct.LargeStructTwoMember %[[u_coerce:.*]])
 // AMDGCN:  %[[u:.*]] = alloca %struct.LargeStructTwoMember, align 8, addrspace(5)
 // AMDGCN:  store %struct.LargeStructTwoMember %[[u_coerce]], %struct.LargeStructTwoMember addrspace(5)* %[[u]]
-// AMDGCN:  call void @FuncLargeTwoMember(%struct.LargeStructTwoMember addrspace(5)* byval align 8 %[[u]])
+// AMDGCN:  call void @FuncLargeTwoMember(%struct.LargeStructTwoMember addrspace(5)* byval(%struct.LargeStructTwoMember) align 8 %[[u]])
 kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
   FuncLargeTwoMember(u);
 }
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/address-spaces-conversions.cl b/src/llvm-project/clang/test/CodeGenOpenCL/address-spaces-conversions.cl
index c947db4..52feccc 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/address-spaces-conversions.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/address-spaces-conversions.cl
@@ -13,7 +13,7 @@
   // CHECK-NOFAKE-NOT: addrspacecast
 
   arg_gen = &var_priv; // implicit cast with obtaining adr, private -> generic
-  // CHECK: %{{[0-9]+}} = addrspacecast i32* %var_priv to i32 addrspace(4)*
+  // CHECK: %{{[._a-z0-9]+}} = addrspacecast i32* %{{[._a-z0-9]+}} to i32 addrspace(4)*
   // CHECK-NOFAKE-NOT: addrspacecast
 
   arg_glob = (global int *)arg_gen; // explicit cast
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/address-spaces-mangling.cl b/src/llvm-project/clang/test/CodeGenOpenCL/address-spaces-mangling.cl
index b6e6b87..50622f0 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/address-spaces-mangling.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/address-spaces-mangling.cl
@@ -1,7 +1,7 @@
-// RUN: %clang_cc1 %s -ffake-address-space-map -faddress-space-map-mangling=yes -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes=ASMANG,ASMAN10 %s
-// RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -faddress-space-map-mangling=yes -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes=ASMANG,ASMAN20 %s
-// RUN: %clang_cc1 %s -ffake-address-space-map -faddress-space-map-mangling=no -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes=NOASMANG,NOASMAN10 %s
-// RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -faddress-space-map-mangling=no -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes=NOASMANG,NOASMAN20 %s
+// RUN: %clang_cc1 %s -ffake-address-space-map -faddress-space-map-mangling=yes -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes="ASMANG,ASMANG10" %s
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -faddress-space-map-mangling=yes -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes="ASMANG,ASMANG20" %s
+// RUN: %clang_cc1 %s -ffake-address-space-map -faddress-space-map-mangling=no -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes="NOASMANG,NOASMANG10" %s
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -faddress-space-map-mangling=no -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes="NOASMANG,NOASMANG20" %s
 
 // We check that the address spaces are mangled the same in both version of OpenCL
 // RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL2.0 -emit-llvm -o - | FileCheck -check-prefix=OCL-20 %s
@@ -14,7 +14,7 @@
 void ff(int *arg) { }
 // ASMANG10: @_Z2ffPi
 // ASMANG20: @_Z2ffPU3AS4i
-// NOASMANG10: @_Z2ffPi
+// NOASMANG10: @_Z2ffPU9CLprivatei
 // NOASMANG20: @_Z2ffPU9CLgenerici
 // OCL-20-DAG: @_Z2ffPU3AS4i
 // OCL-12-DAG: @_Z2ffPi
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl b/src/llvm-project/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl
index 59f38f8..7216cb5 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl
@@ -42,7 +42,7 @@
   // CL20: store i32* %[[r0]], i32* addrspace(5)* %lp1, align 8
   int *lp1 = &lv1;
 
-  // CHECK: %[[arraydecay:.*]] = getelementptr inbounds [100 x i32], [100 x i32] addrspace(5)* %la, i32 0, i32 0
+  // CHECK: %[[arraydecay:.*]] = getelementptr inbounds [100 x i32], [100 x i32] addrspace(5)* %la, i64 0, i64 0
   // CL12: store i32 addrspace(5)* %[[arraydecay]], i32 addrspace(5)* addrspace(5)* %lp2, align 4
   // CL20: %[[r1:.*]] = addrspacecast i32 addrspace(5)* %[[arraydecay]] to i32*
   // CL20: store i32* %[[r1]], i32* addrspace(5)* %lp2, align 8
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/src/llvm-project/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index aec00e7..0a7f289 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -309,7 +309,7 @@
 // CHECK: void @func_different_size_type_pair_arg(i64 %arg1.coerce0, i32 %arg1.coerce1)
 void func_different_size_type_pair_arg(different_size_type_pair arg1) { }
 
-// CHECK: void @func_flexible_array_arg(%struct.flexible_array addrspace(5)* byval nocapture align 4 %arg)
+// CHECK: void @func_flexible_array_arg(%struct.flexible_array addrspace(5)* nocapture byval(%struct.flexible_array) align 4 %arg)
 void func_flexible_array_arg(flexible_array arg) { }
 
 // CHECK: define float @func_f32_ret()
@@ -450,11 +450,11 @@
 // CHECK: define void @func_reg_state_lo(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %s.coerce0, float %s.coerce1, i32 %s.coerce2)
 void func_reg_state_lo(int4 arg0, int4 arg1, int4 arg2, int arg3, struct_arg_t s) { }
 
-// CHECK: define void @func_reg_state_hi(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %arg4, %struct.struct_arg addrspace(5)* byval nocapture align 4 %s)
+// CHECK: define void @func_reg_state_hi(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %arg4, %struct.struct_arg addrspace(5)* nocapture byval(%struct.struct_arg) align 4 %s)
 void func_reg_state_hi(int4 arg0, int4 arg1, int4 arg2, int arg3, int arg4, struct_arg_t s) { }
 
 // XXX - Why don't the inner structs flatten?
-// CHECK: define void @func_reg_state_num_regs_nested_struct(<4 x i32> %arg0, i32 %arg1, i32 %arg2.coerce0, %struct.nested %arg2.coerce1, i32 %arg3.coerce0, %struct.nested %arg3.coerce1, %struct.num_regs_nested_struct addrspace(5)* byval nocapture align 8 %arg4)
+// CHECK: define void @func_reg_state_num_regs_nested_struct(<4 x i32> %arg0, i32 %arg1, i32 %arg2.coerce0, %struct.nested %arg2.coerce1, i32 %arg3.coerce0, %struct.nested %arg3.coerce1, %struct.num_regs_nested_struct addrspace(5)* nocapture byval(%struct.num_regs_nested_struct) align 8 %arg4)
 void func_reg_state_num_regs_nested_struct(int4 arg0, int arg1, num_regs_nested_struct arg2, num_regs_nested_struct arg3, num_regs_nested_struct arg4) { }
 
 // CHECK: define void @func_double_nested_struct_arg(<4 x i32> %arg0, i32 %arg1, i32 %arg2.coerce0, %struct.double_nested %arg2.coerce1, i16 %arg2.coerce2)
@@ -469,7 +469,7 @@
 // CHECK: define void @func_large_struct_padding_arg_direct(i8 %arg.coerce0, i32 %arg.coerce1, i8 %arg.coerce2, i32 %arg.coerce3, i8 %arg.coerce4, i8 %arg.coerce5, i16 %arg.coerce6, i16 %arg.coerce7, [3 x i8] %arg.coerce8, i64 %arg.coerce9, i32 %arg.coerce10, i8 %arg.coerce11, i32 %arg.coerce12, i16 %arg.coerce13, i8 %arg.coerce14)
 void func_large_struct_padding_arg_direct(large_struct_padding arg) { }
 
-// CHECK: define void @func_large_struct_padding_arg_store(%struct.large_struct_padding addrspace(1)* nocapture %out, %struct.large_struct_padding addrspace(5)* byval nocapture readonly align 8 %arg)
+// CHECK: define void @func_large_struct_padding_arg_store(%struct.large_struct_padding addrspace(1)* nocapture %out, %struct.large_struct_padding addrspace(5)* nocapture readonly byval(%struct.large_struct_padding) align 8 %arg)
 void func_large_struct_padding_arg_store(global large_struct_padding* out, large_struct_padding arg) {
   *out = arg;
 }
@@ -487,7 +487,7 @@
 void v4i16_reg_count(short4 arg0, short4 arg1, short4 arg2, short4 arg3,
                      short4 arg4, short4 arg5, struct_4regs arg6) { }
 
-// CHECK: define void @v4i16_pair_reg_count_over(<4 x i16> %arg0, <4 x i16> %arg1, <4 x i16> %arg2, <4 x i16> %arg3, <4 x i16> %arg4, <4 x i16> %arg5, <4 x i16> %arg6, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg7)
+// CHECK: define void @v4i16_pair_reg_count_over(<4 x i16> %arg0, <4 x i16> %arg1, <4 x i16> %arg2, <4 x i16> %arg3, <4 x i16> %arg4, <4 x i16> %arg5, <4 x i16> %arg6, %struct.struct_4regs addrspace(5)* nocapture byval(%struct.struct_4regs) align 4 %arg7)
 void v4i16_pair_reg_count_over(short4 arg0, short4 arg1, short4 arg2, short4 arg3,
                                short4 arg4, short4 arg5, short4 arg6, struct_4regs arg7) { }
 
@@ -495,7 +495,7 @@
 void v3i16_reg_count(short3 arg0, short3 arg1, short3 arg2, short3 arg3,
                      short3 arg4, short3 arg5, struct_4regs arg6) { }
 
-// CHECK: define void @v3i16_reg_count_over(<3 x i16> %arg0, <3 x i16> %arg1, <3 x i16> %arg2, <3 x i16> %arg3, <3 x i16> %arg4, <3 x i16> %arg5, <3 x i16> %arg6, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg7)
+// CHECK: define void @v3i16_reg_count_over(<3 x i16> %arg0, <3 x i16> %arg1, <3 x i16> %arg2, <3 x i16> %arg3, <3 x i16> %arg4, <3 x i16> %arg5, <3 x i16> %arg6, %struct.struct_4regs addrspace(5)* nocapture byval(%struct.struct_4regs) align 4 %arg7)
 void v3i16_reg_count_over(short3 arg0, short3 arg1, short3 arg2, short3 arg3,
                           short3 arg4, short3 arg5, short3 arg6, struct_4regs arg7) { }
 
@@ -505,7 +505,7 @@
                      short2 arg8, short2 arg9, short2 arg10, short2 arg11,
                      struct_4regs arg13) { }
 
-// CHECK: define void @v2i16_reg_count_over(<2 x i16> %arg0, <2 x i16> %arg1, <2 x i16> %arg2, <2 x i16> %arg3, <2 x i16> %arg4, <2 x i16> %arg5, <2 x i16> %arg6, <2 x i16> %arg7, <2 x i16> %arg8, <2 x i16> %arg9, <2 x i16> %arg10, <2 x i16> %arg11, <2 x i16> %arg12, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg13)
+// CHECK: define void @v2i16_reg_count_over(<2 x i16> %arg0, <2 x i16> %arg1, <2 x i16> %arg2, <2 x i16> %arg3, <2 x i16> %arg4, <2 x i16> %arg5, <2 x i16> %arg6, <2 x i16> %arg7, <2 x i16> %arg8, <2 x i16> %arg9, <2 x i16> %arg10, <2 x i16> %arg11, <2 x i16> %arg12, %struct.struct_4regs addrspace(5)* nocapture byval(%struct.struct_4regs) align 4 %arg13)
 void v2i16_reg_count_over(short2 arg0, short2 arg1, short2 arg2, short2 arg3,
                           short2 arg4, short2 arg5, short2 arg6, short2 arg7,
                           short2 arg8, short2 arg9, short2 arg10, short2 arg11,
@@ -515,7 +515,7 @@
 void v2i8_reg_count(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
                     char2 arg4, char2 arg5, struct_4regs arg6) { }
 
-// CHECK: define void @v2i8_reg_count_over(<2 x i8> %arg0, <2 x i8> %arg1, <2 x i8> %arg2, <2 x i8> %arg3, <2 x i8> %arg4, <2 x i8> %arg5, i32 %arg6, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg7)
+// CHECK: define void @v2i8_reg_count_over(<2 x i8> %arg0, <2 x i8> %arg1, <2 x i8> %arg2, <2 x i8> %arg3, <2 x i8> %arg4, <2 x i8> %arg5, i32 %arg6, %struct.struct_4regs addrspace(5)* nocapture byval(%struct.struct_4regs)  align 4 %arg7)
 void v2i8_reg_count_over(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
                          char2 arg4, char2 arg5, int arg6, struct_4regs arg7) { }
 
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/amdgpu-alignment.cl b/src/llvm-project/clang/test/CodeGenOpenCL/amdgpu-alignment.cl
index b5dc47a..3241da6 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/amdgpu-alignment.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/amdgpu-alignment.cl
@@ -92,48 +92,48 @@
 
 
 // CHECK-LABEL: @local_memory_alignment_global(
-// CHECK: store volatile i8 0, i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @local_memory_alignment_global.lds_i8, i32 0, i32 0), align 1
-// CHECK: store volatile <2 x i8> zeroinitializer, <2 x i8> addrspace(3)* getelementptr inbounds ([4 x <2 x i8>], [4 x <2 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v2i8, i32 0, i32 0), align 2
+// CHECK: store volatile i8 0, i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @local_memory_alignment_global.lds_i8, i64 0, i64 0), align 1
+// CHECK: store volatile <2 x i8> zeroinitializer, <2 x i8> addrspace(3)* getelementptr inbounds ([4 x <2 x i8>], [4 x <2 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v2i8, i64 0, i64 0), align 2
 // CHECK: store volatile <4 x i8> <i8 0, i8 0, i8 0, i8 undef>, <4 x i8> addrspace(3)* bitcast ([4 x <3 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v3i8 to <4 x i8> addrspace(3)*), align 4
-// CHECK: store volatile <4 x i8> zeroinitializer, <4 x i8> addrspace(3)* getelementptr inbounds ([4 x <4 x i8>], [4 x <4 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v4i8, i32 0, i32 0), align 4
-// CHECK: store volatile <8 x i8> zeroinitializer, <8 x i8> addrspace(3)* getelementptr inbounds ([4 x <8 x i8>], [4 x <8 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v8i8, i32 0, i32 0), align 8
-// CHECK: store volatile <16 x i8> zeroinitializer, <16 x i8> addrspace(3)* getelementptr inbounds ([4 x <16 x i8>], [4 x <16 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v16i8, i32 0, i32 0), align 16
-// CHECK: store volatile i16 0, i16 addrspace(3)* getelementptr inbounds ([4 x i16], [4 x i16] addrspace(3)* @local_memory_alignment_global.lds_i16, i32 0, i32 0), align 2
-// CHECK: store volatile <2 x i16> zeroinitializer, <2 x i16> addrspace(3)* getelementptr inbounds ([4 x <2 x i16>], [4 x <2 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v2i16, i32 0, i32 0), align 4
+// CHECK: store volatile <4 x i8> zeroinitializer, <4 x i8> addrspace(3)* getelementptr inbounds ([4 x <4 x i8>], [4 x <4 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v4i8, i64 0, i64 0), align 4
+// CHECK: store volatile <8 x i8> zeroinitializer, <8 x i8> addrspace(3)* getelementptr inbounds ([4 x <8 x i8>], [4 x <8 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v8i8, i64 0, i64 0), align 8
+// CHECK: store volatile <16 x i8> zeroinitializer, <16 x i8> addrspace(3)* getelementptr inbounds ([4 x <16 x i8>], [4 x <16 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v16i8, i64 0, i64 0), align 16
+// CHECK: store volatile i16 0, i16 addrspace(3)* getelementptr inbounds ([4 x i16], [4 x i16] addrspace(3)* @local_memory_alignment_global.lds_i16, i64 0, i64 0), align 2
+// CHECK: store volatile <2 x i16> zeroinitializer, <2 x i16> addrspace(3)* getelementptr inbounds ([4 x <2 x i16>], [4 x <2 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v2i16, i64 0, i64 0), align 4
 // CHECK: store volatile <4 x i16> <i16 0, i16 0, i16 0, i16 undef>, <4 x i16> addrspace(3)* bitcast ([4 x <3 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v3i16 to <4 x i16> addrspace(3)*), align 8
-// CHECK: store volatile <4 x i16> zeroinitializer, <4 x i16> addrspace(3)* getelementptr inbounds ([4 x <4 x i16>], [4 x <4 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v4i16, i32 0, i32 0), align 8
-// CHECK: store volatile <8 x i16> zeroinitializer, <8 x i16> addrspace(3)* getelementptr inbounds ([4 x <8 x i16>], [4 x <8 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v8i16, i32 0, i32 0), align 16
-// CHECK: store volatile <16 x i16> zeroinitializer, <16 x i16> addrspace(3)* getelementptr inbounds ([4 x <16 x i16>], [4 x <16 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v16i16, i32 0, i32 0), align 32
-// CHECK: store volatile i32 0, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @local_memory_alignment_global.lds_i32, i32 0, i32 0), align 4
-// CHECK: store volatile <2 x i32> zeroinitializer, <2 x i32> addrspace(3)* getelementptr inbounds ([4 x <2 x i32>], [4 x <2 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v2i32, i32 0, i32 0), align 8
+// CHECK: store volatile <4 x i16> zeroinitializer, <4 x i16> addrspace(3)* getelementptr inbounds ([4 x <4 x i16>], [4 x <4 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v4i16, i64 0, i64 0), align 8
+// CHECK: store volatile <8 x i16> zeroinitializer, <8 x i16> addrspace(3)* getelementptr inbounds ([4 x <8 x i16>], [4 x <8 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v8i16, i64 0, i64 0), align 16
+// CHECK: store volatile <16 x i16> zeroinitializer, <16 x i16> addrspace(3)* getelementptr inbounds ([4 x <16 x i16>], [4 x <16 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v16i16, i64 0, i64 0), align 32
+// CHECK: store volatile i32 0, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @local_memory_alignment_global.lds_i32, i64 0, i64 0), align 4
+// CHECK: store volatile <2 x i32> zeroinitializer, <2 x i32> addrspace(3)* getelementptr inbounds ([4 x <2 x i32>], [4 x <2 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v2i32, i64 0, i64 0), align 8
 // CHECK: store volatile <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, <4 x i32> addrspace(3)* bitcast ([4 x <3 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v3i32 to <4 x i32> addrspace(3)*), align 16
-// CHECK: store volatile <4 x i32> zeroinitializer, <4 x i32> addrspace(3)* getelementptr inbounds ([4 x <4 x i32>], [4 x <4 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v4i32, i32 0, i32 0), align 16
-// CHECK: store volatile <8 x i32> zeroinitializer, <8 x i32> addrspace(3)* getelementptr inbounds ([4 x <8 x i32>], [4 x <8 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v8i32, i32 0, i32 0), align 32
-// CHECK: store volatile <16 x i32> zeroinitializer, <16 x i32> addrspace(3)* getelementptr inbounds ([4 x <16 x i32>], [4 x <16 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v16i32, i32 0, i32 0), align 64
-// CHECK: store volatile i64 0, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @local_memory_alignment_global.lds_i64, i32 0, i32 0), align 8
-// CHECK: store volatile <2 x i64> zeroinitializer, <2 x i64> addrspace(3)* getelementptr inbounds ([4 x <2 x i64>], [4 x <2 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v2i64, i32 0, i32 0), align 16
+// CHECK: store volatile <4 x i32> zeroinitializer, <4 x i32> addrspace(3)* getelementptr inbounds ([4 x <4 x i32>], [4 x <4 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v4i32, i64 0, i64 0), align 16
+// CHECK: store volatile <8 x i32> zeroinitializer, <8 x i32> addrspace(3)* getelementptr inbounds ([4 x <8 x i32>], [4 x <8 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v8i32, i64 0, i64 0), align 32
+// CHECK: store volatile <16 x i32> zeroinitializer, <16 x i32> addrspace(3)* getelementptr inbounds ([4 x <16 x i32>], [4 x <16 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v16i32, i64 0, i64 0), align 64
+// CHECK: store volatile i64 0, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @local_memory_alignment_global.lds_i64, i64 0, i64 0), align 8
+// CHECK: store volatile <2 x i64> zeroinitializer, <2 x i64> addrspace(3)* getelementptr inbounds ([4 x <2 x i64>], [4 x <2 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v2i64, i64 0, i64 0), align 16
 // CHECK: store volatile <4 x i64> <i64 0, i64 0, i64 0, i64 undef>, <4 x i64> addrspace(3)* bitcast ([4 x <3 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v3i64 to <4 x i64> addrspace(3)*), align 32
-// CHECK: store volatile <4 x i64> zeroinitializer, <4 x i64> addrspace(3)* getelementptr inbounds ([4 x <4 x i64>], [4 x <4 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v4i64, i32 0, i32 0), align 32
-// CHECK: store volatile <8 x i64> zeroinitializer, <8 x i64> addrspace(3)* getelementptr inbounds ([4 x <8 x i64>], [4 x <8 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v8i64, i32 0, i32 0), align 64
-// CHECK: store volatile <16 x i64> zeroinitializer, <16 x i64> addrspace(3)* getelementptr inbounds ([4 x <16 x i64>], [4 x <16 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v16i64, i32 0, i32 0), align 128
-// CHECK: store volatile half 0xH0000, half addrspace(3)* getelementptr inbounds ([4 x half], [4 x half] addrspace(3)* @local_memory_alignment_global.lds_f16, i32 0, i32 0), align 2
-// CHECK: store volatile <2 x half> zeroinitializer, <2 x half> addrspace(3)* getelementptr inbounds ([4 x <2 x half>], [4 x <2 x half>] addrspace(3)* @local_memory_alignment_global.lds_v2f16, i32 0, i32 0), align 4
+// CHECK: store volatile <4 x i64> zeroinitializer, <4 x i64> addrspace(3)* getelementptr inbounds ([4 x <4 x i64>], [4 x <4 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v4i64, i64 0, i64 0), align 32
+// CHECK: store volatile <8 x i64> zeroinitializer, <8 x i64> addrspace(3)* getelementptr inbounds ([4 x <8 x i64>], [4 x <8 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v8i64, i64 0, i64 0), align 64
+// CHECK: store volatile <16 x i64> zeroinitializer, <16 x i64> addrspace(3)* getelementptr inbounds ([4 x <16 x i64>], [4 x <16 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v16i64, i64 0, i64 0), align 128
+// CHECK: store volatile half 0xH0000, half addrspace(3)* getelementptr inbounds ([4 x half], [4 x half] addrspace(3)* @local_memory_alignment_global.lds_f16, i64 0, i64 0), align 2
+// CHECK: store volatile <2 x half> zeroinitializer, <2 x half> addrspace(3)* getelementptr inbounds ([4 x <2 x half>], [4 x <2 x half>] addrspace(3)* @local_memory_alignment_global.lds_v2f16, i64 0, i64 0), align 4
 // CHECK: store volatile <4 x half> <half 0xH0000, half 0xH0000, half 0xH0000, half undef>, <4 x half> addrspace(3)* bitcast ([4 x <3 x half>] addrspace(3)* @local_memory_alignment_global.lds_v3f16 to <4 x half> addrspace(3)*), align 8
-// CHECK: store volatile <4 x half> zeroinitializer, <4 x half> addrspace(3)* getelementptr inbounds ([4 x <4 x half>], [4 x <4 x half>] addrspace(3)* @local_memory_alignment_global.lds_v4f16, i32 0, i32 0), align 8
-// CHECK: store volatile <8 x half> zeroinitializer, <8 x half> addrspace(3)* getelementptr inbounds ([4 x <8 x half>], [4 x <8 x half>] addrspace(3)* @local_memory_alignment_global.lds_v8f16, i32 0, i32 0), align 16
-// CHECK: store volatile <16 x half> zeroinitializer, <16 x half> addrspace(3)* getelementptr inbounds ([4 x <16 x half>], [4 x <16 x half>] addrspace(3)* @local_memory_alignment_global.lds_v16f16, i32 0, i32 0), align 32
-// CHECK: store volatile float 0.000000e+00, float addrspace(3)* getelementptr inbounds ([4 x float], [4 x float] addrspace(3)* @local_memory_alignment_global.lds_f32, i32 0, i32 0), align 4
-// CHECK: store volatile <2 x float> zeroinitializer, <2 x float> addrspace(3)* getelementptr inbounds ([4 x <2 x float>], [4 x <2 x float>] addrspace(3)* @local_memory_alignment_global.lds_v2f32, i32 0, i32 0), align 8
+// CHECK: store volatile <4 x half> zeroinitializer, <4 x half> addrspace(3)* getelementptr inbounds ([4 x <4 x half>], [4 x <4 x half>] addrspace(3)* @local_memory_alignment_global.lds_v4f16, i64 0, i64 0), align 8
+// CHECK: store volatile <8 x half> zeroinitializer, <8 x half> addrspace(3)* getelementptr inbounds ([4 x <8 x half>], [4 x <8 x half>] addrspace(3)* @local_memory_alignment_global.lds_v8f16, i64 0, i64 0), align 16
+// CHECK: store volatile <16 x half> zeroinitializer, <16 x half> addrspace(3)* getelementptr inbounds ([4 x <16 x half>], [4 x <16 x half>] addrspace(3)* @local_memory_alignment_global.lds_v16f16, i64 0, i64 0), align 32
+// CHECK: store volatile float 0.000000e+00, float addrspace(3)* getelementptr inbounds ([4 x float], [4 x float] addrspace(3)* @local_memory_alignment_global.lds_f32, i64 0, i64 0), align 4
+// CHECK: store volatile <2 x float> zeroinitializer, <2 x float> addrspace(3)* getelementptr inbounds ([4 x <2 x float>], [4 x <2 x float>] addrspace(3)* @local_memory_alignment_global.lds_v2f32, i64 0, i64 0), align 8
 // CHECK: store volatile <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>, <4 x float> addrspace(3)* bitcast ([4 x <3 x float>] addrspace(3)* @local_memory_alignment_global.lds_v3f32 to <4 x float> addrspace(3)*), align 16
-// CHECK: store volatile <4 x float> zeroinitializer, <4 x float> addrspace(3)* getelementptr inbounds ([4 x <4 x float>], [4 x <4 x float>] addrspace(3)* @local_memory_alignment_global.lds_v4f32, i32 0, i32 0), align 16
-// CHECK: store volatile <8 x float> zeroinitializer, <8 x float> addrspace(3)* getelementptr inbounds ([4 x <8 x float>], [4 x <8 x float>] addrspace(3)* @local_memory_alignment_global.lds_v8f32, i32 0, i32 0), align 32
-// CHECK: store volatile <16 x float> zeroinitializer, <16 x float> addrspace(3)* getelementptr inbounds ([4 x <16 x float>], [4 x <16 x float>] addrspace(3)* @local_memory_alignment_global.lds_v16f32, i32 0, i32 0), align 64
-// CHECK: store volatile double 0.000000e+00, double addrspace(3)* getelementptr inbounds ([4 x double], [4 x double] addrspace(3)* @local_memory_alignment_global.lds_f64, i32 0, i32 0), align 8
-// CHECK: store volatile <2 x double> zeroinitializer, <2 x double> addrspace(3)* getelementptr inbounds ([4 x <2 x double>], [4 x <2 x double>] addrspace(3)* @local_memory_alignment_global.lds_v2f64, i32 0, i32 0), align 16
+// CHECK: store volatile <4 x float> zeroinitializer, <4 x float> addrspace(3)* getelementptr inbounds ([4 x <4 x float>], [4 x <4 x float>] addrspace(3)* @local_memory_alignment_global.lds_v4f32, i64 0, i64 0), align 16
+// CHECK: store volatile <8 x float> zeroinitializer, <8 x float> addrspace(3)* getelementptr inbounds ([4 x <8 x float>], [4 x <8 x float>] addrspace(3)* @local_memory_alignment_global.lds_v8f32, i64 0, i64 0), align 32
+// CHECK: store volatile <16 x float> zeroinitializer, <16 x float> addrspace(3)* getelementptr inbounds ([4 x <16 x float>], [4 x <16 x float>] addrspace(3)* @local_memory_alignment_global.lds_v16f32, i64 0, i64 0), align 64
+// CHECK: store volatile double 0.000000e+00, double addrspace(3)* getelementptr inbounds ([4 x double], [4 x double] addrspace(3)* @local_memory_alignment_global.lds_f64, i64 0, i64 0), align 8
+// CHECK: store volatile <2 x double> zeroinitializer, <2 x double> addrspace(3)* getelementptr inbounds ([4 x <2 x double>], [4 x <2 x double>] addrspace(3)* @local_memory_alignment_global.lds_v2f64, i64 0, i64 0), align 16
 // CHECK: store volatile <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double undef>, <4 x double> addrspace(3)* bitcast ([4 x <3 x double>] addrspace(3)* @local_memory_alignment_global.lds_v3f64 to <4 x double> addrspace(3)*), align 32
-// CHECK: store volatile <4 x double> zeroinitializer, <4 x double> addrspace(3)* getelementptr inbounds ([4 x <4 x double>], [4 x <4 x double>] addrspace(3)* @local_memory_alignment_global.lds_v4f64, i32 0, i32 0), align 32
-// CHECK: store volatile <8 x double> zeroinitializer, <8 x double> addrspace(3)* getelementptr inbounds ([4 x <8 x double>], [4 x <8 x double>] addrspace(3)* @local_memory_alignment_global.lds_v8f64, i32 0, i32 0), align 64
-// CHECK: store volatile <16 x double> zeroinitializer, <16 x double> addrspace(3)* getelementptr inbounds ([4 x <16 x double>], [4 x <16 x double>] addrspace(3)* @local_memory_alignment_global.lds_v16f64, i32 0, i32 0), align 128
+// CHECK: store volatile <4 x double> zeroinitializer, <4 x double> addrspace(3)* getelementptr inbounds ([4 x <4 x double>], [4 x <4 x double>] addrspace(3)* @local_memory_alignment_global.lds_v4f64, i64 0, i64 0), align 32
+// CHECK: store volatile <8 x double> zeroinitializer, <8 x double> addrspace(3)* getelementptr inbounds ([4 x <8 x double>], [4 x <8 x double>] addrspace(3)* @local_memory_alignment_global.lds_v8f64, i64 0, i64 0), align 64
+// CHECK: store volatile <16 x double> zeroinitializer, <16 x double> addrspace(3)* getelementptr inbounds ([4 x <16 x double>], [4 x <16 x double>] addrspace(3)* @local_memory_alignment_global.lds_v16f64, i64 0, i64 0), align 128
 kernel void local_memory_alignment_global()
 {
   volatile local char lds_i8[4];
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/amdgpu-attrs.cl b/src/llvm-project/clang/test/CodeGenOpenCL/amdgpu-attrs.cl
index ad13a2c..ba4322f 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/amdgpu-attrs.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/amdgpu-attrs.cl
@@ -158,30 +158,30 @@
 // CHECK-NOT: "amdgpu-num-sgpr"="0"
 // CHECK-NOT: "amdgpu-num-vgpr"="0"
 
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" 
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_64_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="64,64" "amdgpu-implicitarg-num-bytes"="48" 
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_16_128]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="16,128" "amdgpu-implicitarg-num-bytes"="48" 
-// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-waves-per-eu"="2,4"
-// CHECK-DAG: attributes [[NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" 
-// CHECK-DAG: attributes [[NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-vgpr"="64" 
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" 
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_64_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="64,64" "amdgpu-implicitarg-num-bytes"="56" 
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_16_128]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="16,128" "amdgpu-implicitarg-num-bytes"="56" 
+// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="56" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="56" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" 
+// CHECK-DAG: attributes [[NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-vgpr"="64" 
 
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-waves-per-eu"="2,4"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" 
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-vgpr"="64" 
-// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
-// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
-// CHECK-DAG: attributes [[NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" 
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" 
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-vgpr"="64" 
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" 
 
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
 
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
 
 // CHECK-DAG: attributes [[A_FUNCTION]] = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false"
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl b/src/llvm-project/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl
index bcb00be..4a91652 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 %s -O0 -triple amdgcn -emit-llvm -o - | FileCheck %s
 // RUN: %clang_cc1 %s -O0 -triple amdgcn---opencl -emit-llvm -o - | FileCheck %s
 
-// CHECK: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
+// CHECK: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7"
 void foo(void) {}
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/amdgpu-features.cl b/src/llvm-project/clang/test/CodeGenOpenCL/amdgpu-features.cl
index 7aac4d3..0bb3d6f 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/amdgpu-features.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/amdgpu-features.cl
@@ -5,14 +5,22 @@
 
 // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx904 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX904 %s
 // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx906 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX906 %s
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx908 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX908 %s
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1010 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1010 %s
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1011 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1011 %s
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1012 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1012 %s
 // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx801 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX801 %s
 // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx700 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX700 %s
 // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx600 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX600 %s
 // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx601 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX601 %s
 
-// GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx9-insts,+s-memrealtime,+vi-insts"
-// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx9-insts,+s-memrealtime,+vi-insts"
-// GFX801: "target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+s-memrealtime,+vi-insts"
+// GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime"
+// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime"
+// GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime"
+// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime"
+// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime"
+// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime"
+// GFX801: "target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+s-memrealtime"
 // GFX700: "target-features"="+ci-insts,+fp64-fp16-denormals,-fp32-denormals"
 // GFX600: "target-features"="+fp64-fp16-denormals,-fp32-denormals"
 // GFX601: "target-features"="+fp64-fp16-denormals,-fp32-denormals"
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/arm-integer-dot-product.cl b/src/llvm-project/clang/test/CodeGenOpenCL/arm-integer-dot-product.cl
new file mode 100644
index 0000000..d1ab6ac
--- /dev/null
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/arm-integer-dot-product.cl
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -finclude-default-header -cl-std=CL1.2 -emit-llvm -o - -O0 | FileCheck %s
+
+#pragma OPENCL EXTENSION cl_arm_integer_dot_product_int8 : enable
+void test_int8(uchar4 ua, uchar4 ub, char4 sa, char4 sb) {
+    uint ur = arm_dot(ua, ub);
+    // CHECK: call spir_func i32 @_Z7arm_dotDv4_hS_
+    int sr = arm_dot(sa, sb);
+    // CHECK: call spir_func i32 @_Z7arm_dotDv4_cS_
+}
+#pragma OPENCL EXTENSION cl_arm_integer_dot_product_int8 : disable
+
+#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int8 : enable
+void test_accumulate_int8(uchar4 ua, uchar4 ub, uint uc, char4 sa, char4 sb, int c) {
+    uint ur = arm_dot_acc(ua, ub, uc);
+    // CHECK: call spir_func i32 @_Z11arm_dot_accDv4_hS_j
+    int sr = arm_dot_acc(sa, sb, c);
+    // CHECK: call spir_func i32 @_Z11arm_dot_accDv4_cS_i
+}
+#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int8 : disable
+
+#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int16 : enable
+void test_accumulate_int16(ushort2 ua, ushort2 ub, uint uc, short2 sa, short2 sb, int c) {
+    uint ur = arm_dot_acc(ua, ub, uc);
+    // CHECK: call spir_func i32 @_Z11arm_dot_accDv2_tS_j
+    int sr = arm_dot_acc(sa, sb, c);
+    // CHECK: call spir_func i32 @_Z11arm_dot_accDv2_sS_i
+}
+#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int16 : disable
+
+#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_saturate_int8 : enable
+void test_accumulate_saturate_int8(uchar4 ua, uchar4 ub, uint uc, char4 sa, char4 sb, int c) {
+    uint ur = arm_dot_acc_sat(ua, ub, uc);
+    // CHECK: call spir_func i32 @_Z15arm_dot_acc_satDv4_hS_j
+    int sr = arm_dot_acc_sat(sa, sb, c);
+    // CHECK: call spir_func i32 @_Z15arm_dot_acc_satDv4_cS_i
+}
+#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_saturate_int8 : disable
+
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/atomic-ops.cl b/src/llvm-project/clang/test/CodeGenOpenCL/atomic-ops.cl
index 160f7fb..88f2e0d 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/atomic-ops.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/atomic-ops.cl
@@ -41,7 +41,7 @@
   // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} seq_cst
   x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_all_svm_devices);
 
-  // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("subgroup") seq_cst
+  // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("wavefront") seq_cst
   x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_sub_group);
 }
 
@@ -83,7 +83,7 @@
 
 bool fi4(atomic_int *i) {
   // CHECK-LABEL: @fi4(
-  // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg i32* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] syncscope("workgroup") acquire acquire
+  // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg i32* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] syncscope("workgroup-one-as") acquire acquire
   // CHECK: [[OLD:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 0
   // CHECK: [[CMP:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 1
   // CHECK: br i1 [[CMP]], label %[[STORE_EXPECTED:[.0-9A-Z_a-z]+]], label %[[CONTINUE:[.0-9A-Z_a-z]+]]
@@ -109,7 +109,7 @@
   // CHECK: load atomic i32, i32* %{{.*}} seq_cst
   // CHECK: br label %[[continue]]
   // CHECK: [[opencl_subgroup]]:
-  // CHECK: load atomic i32, i32* %{{.*}} syncscope("subgroup") seq_cst
+  // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront") seq_cst
   // CHECK: br label %[[continue]]
   // CHECK: [[continue]]:
   int x = __opencl_atomic_load(i, memory_order_seq_cst, scope);
@@ -141,21 +141,21 @@
   // CHECK-NEXT: i32 4, label %[[SEQ_SUB:.*]]
   // CHECK-NEXT: ]
   // CHECK: [[MON_WG]]:
-  // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") monotonic
+  // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup-one-as") monotonic
   // CHECK: [[MON_DEV]]:
-  // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") monotonic
+  // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent-one-as") monotonic
   // CHECK: [[MON_ALL]]:
   // CHECK: load atomic i32, i32* %{{.*}} monotonic
   // CHECK: [[MON_SUB]]:
-  // CHECK: load atomic i32, i32* %{{.*}} syncscope("subgroup") monotonic
+  // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront-one-as") monotonic
   // CHECK: [[ACQ_WG]]:
-  // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") acquire
+  // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup-one-as") acquire
   // CHECK: [[ACQ_DEV]]:
-  // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") acquire
+  // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent-one-as") acquire
   // CHECK: [[ACQ_ALL]]:
   // CHECK: load atomic i32, i32* %{{.*}} acquire
   // CHECK: [[ACQ_SUB]]:
-  // CHECK: load atomic i32, i32* %{{.*}} syncscope("subgroup") acquire
+  // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront-one-as") acquire
   // CHECK: [[SEQ_WG]]:
   // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") seq_cst
   // CHECK: [[SEQ_DEV]]:
@@ -163,19 +163,19 @@
   // CHECK: [[SEQ_ALL]]:
   // CHECK: load atomic i32, i32* %{{.*}} seq_cst
   // CHECK: [[SEQ_SUB]]:
-  // CHECK: load atomic i32, i32* %{{.*}} syncscope("subgroup") seq_cst
+  // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront") seq_cst
   int x = __opencl_atomic_load(i, order, scope);
 }
 
 float ff1(global atomic_float *d) {
   // CHECK-LABEL: @ff1
-  // CHECK: load atomic i32, i32 addrspace(1)* {{.*}} syncscope("workgroup") monotonic
+  // CHECK: load atomic i32, i32 addrspace(1)* {{.*}} syncscope("workgroup-one-as") monotonic
   return __opencl_atomic_load(d, memory_order_relaxed, memory_scope_work_group);
 }
 
 void ff2(atomic_float *d) {
   // CHECK-LABEL: @ff2
-  // CHECK: store atomic i32 {{.*}} syncscope("workgroup") release
+  // CHECK: store atomic i32 {{.*}} syncscope("workgroup-one-as") release
   __opencl_atomic_store(d, 1, memory_order_release, memory_scope_work_group);
 }
 
@@ -198,7 +198,7 @@
 
 // CHECK-LABEL: @failureOrder
 void failureOrder(atomic_int *ptr, int *ptr2) {
-  // CHECK: cmpxchg i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") acquire monotonic
+  // CHECK: cmpxchg i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup-one-as") acquire monotonic
   __opencl_atomic_compare_exchange_strong(ptr, ptr2, 43, memory_order_acquire, memory_order_relaxed, memory_scope_work_group);
 
   // CHECK: cmpxchg weak i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") seq_cst acquire
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/blocks.cl b/src/llvm-project/clang/test/CodeGenOpenCL/blocks.cl
index 675240c..c3e2685 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/blocks.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/blocks.cl
@@ -35,31 +35,23 @@
   // SPIR: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]], i32 0, i32 3
   // SPIR: %[[i_value:.*]] = load i32, i32* %i
   // SPIR: store i32 %[[i_value]], i32* %[[block_captured]],
-  // SPIR: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]] to i32 ()*
-  // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast i32 ()* %[[blk_ptr]] to i32 () addrspace(4)*
-  // SPIR: store i32 () addrspace(4)* %[[blk_gen_ptr]], i32 () addrspace(4)** %[[block_B:.*]],
-  // SPIR: %[[blk_gen_ptr:.*]] = load i32 () addrspace(4)*, i32 () addrspace(4)** %[[block_B]]
-  // SPIR: %[[block_literal:.*]] = bitcast i32 () addrspace(4)* %[[blk_gen_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)*
-  // SPIR: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]], i32 0, i32 2
+  // SPIR: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]] to %struct.__opencl_block_literal_generic*
+  // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic* %[[blk_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)*
+  // SPIR: store %struct.__opencl_block_literal_generic addrspace(4)* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B:.*]],
+  // SPIR: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic addrspace(4)*, %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B]]
   // SPIR: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]] to i8 addrspace(4)*
-  // SPIR: %[[invoke_func_ptr:.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %[[invoke_addr]]
-  // SPIR: %[[invoke_func:.*]] = addrspacecast i8 addrspace(4)* %[[invoke_func_ptr]] to i32 (i8 addrspace(4)*)*
-  // SPIR: call {{.*}}i32 %[[invoke_func]](i8 addrspace(4)* %[[blk_gen_ptr]])
+  // SPIR: call {{.*}}i32 @__foo_block_invoke(i8 addrspace(4)* %[[blk_gen_ptr]])
   // AMDGCN: %[[block_invoke:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block:.*]], i32 0, i32 2
   // AMDGCN: store i8* bitcast (i32 (i8*)* @__foo_block_invoke to i8*), i8* addrspace(5)* %[[block_invoke]]
   // AMDGCN: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]], i32 0, i32 3
   // AMDGCN: %[[i_value:.*]] = load i32, i32 addrspace(5)* %i
   // AMDGCN: store i32 %[[i_value]], i32 addrspace(5)* %[[block_captured]],
-  // AMDGCN: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]] to i32 () addrspace(5)*
-  // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast i32 () addrspace(5)* %[[blk_ptr]] to i32 ()*
-  // AMDGCN: store i32 ()* %[[blk_gen_ptr]], i32 ()* addrspace(5)* %[[block_B:.*]],
-  // AMDGCN: %[[blk_gen_ptr:.*]] = load i32 ()*, i32 ()* addrspace(5)* %[[block_B]]
-  // AMDGCN: %[[block_literal:.*]] = bitcast i32 ()* %[[blk_gen_ptr]] to %struct.__opencl_block_literal_generic*
-  // AMDGCN: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic* %[[block_literal]], i32 0, i32 2
+  // AMDGCN: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]] to %struct.__opencl_block_literal_generic addrspace(5)*
+  // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic addrspace(5)* %[[blk_ptr]] to %struct.__opencl_block_literal_generic*
+  // AMDGCN: store %struct.__opencl_block_literal_generic* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B:.*]],
+  // AMDGCN: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic*, %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B]]
   // AMDGCN: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic* %[[block_literal]] to i8*
-  // AMDGCN: %[[invoke_func_ptr:.*]] = load i8*, i8** %[[invoke_addr]]
-  // AMDGCN: %[[invoke_func:.*]] = bitcast i8* %[[invoke_func_ptr]] to i32 (i8*)*
-  // AMDGCN: call {{.*}}i32 %[[invoke_func]](i8* %[[blk_gen_ptr]])
+  // AMDGCN: call {{.*}}i32 @__foo_block_invoke(i8* %[[blk_gen_ptr]])
 
   int (^ block_B)(void) = ^{
     return i;
@@ -98,6 +90,12 @@
   return blockArgFunc(^{return 42;});
 }
 
+// COMMON-LABEL: define {{.*}}@call_block
+// call {{.*}}@__call_block_block_invoke
+int call_block() {
+  return ^int(int num) { return num; } (11);
+}
+
 // CHECK-DEBUG: !DIDerivedType(tag: DW_TAG_member, name: "__size"
 // CHECK-DEBUG: !DIDerivedType(tag: DW_TAG_member, name: "__align"
 
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-ci.cl b/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-ci.cl
index 4127526..f6fcfa5 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-ci.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-ci.cl
@@ -2,6 +2,9 @@
 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu hawaii -S -emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu fiji -S -emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx906 -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -S -emit-llvm -o - %s | FileCheck %s
+
+typedef unsigned int uint;
 
 // CHECK-LABEL: @test_s_dcache_inv_vol
 // CHECK: call void @llvm.amdgcn.s.dcache.inv.vol(
@@ -17,3 +20,9 @@
   __builtin_amdgcn_buffer_wbinvl1_vol();
 }
 
+// CHECK-LABEL: @test_gws_sema_release_all(
+// CHECK: call void @llvm.amdgcn.ds.gws.sema.release.all(i32 %id)
+void test_gws_sema_release_all(uint id)
+{
+  __builtin_amdgcn_ds_gws_sema_release_all(id);
+}
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err-clamp.cl b/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err-clamp.cl
index f8356fe..d056f02 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err-clamp.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err-clamp.cl
@@ -1,6 +1,7 @@
 // REQUIRES: amdgpu-registered-target
 
 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx906 -verify -S -emit-llvm -o - %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -verify -S -emit-llvm -o - %s
 
 typedef unsigned int uint;
 typedef half __attribute__((ext_vector_type(2))) half2;
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl b/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl
index e2c03a4..cb83c0b 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl
@@ -1,6 +1,7 @@
 // REQUIRES: amdgpu-registered-target
 
 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -verify -S -emit-llvm -o - %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -verify -S -emit-llvm -o - %s
 
 typedef unsigned int uint;
 typedef half __attribute__((ext_vector_type(2))) half2;
@@ -12,24 +13,24 @@
     half2 v2hA, half2 v2hB, float fC,
     short2 v2ssA, short2 v2ssB, int siA, int siB, int siC,
     ushort2 v2usA, ushort2 v2usB, uint uiA, uint uiB, uint uiC) {
-  fOut[0] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, false);     // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dot-insts}}
-  fOut[1] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, true);      // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dot-insts}}
+  fOut[0] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, false);     // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dot2-insts}}
+  fOut[1] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, true);      // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dot2-insts}}
 
-  siOut[0] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, false); // expected-error {{'__builtin_amdgcn_sdot2' needs target feature dot-insts}}
-  siOut[1] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, true);  // expected-error {{'__builtin_amdgcn_sdot2' needs target feature dot-insts}}
+  siOut[0] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, false); // expected-error {{'__builtin_amdgcn_sdot2' needs target feature dot2-insts}}
+  siOut[1] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, true);  // expected-error {{'__builtin_amdgcn_sdot2' needs target feature dot2-insts}}
 
-  uiOut[0] = __builtin_amdgcn_udot2(v2usA, v2usB, uiC, false); // expected-error {{'__builtin_amdgcn_udot2' needs target feature dot-insts}}
-  uiOut[1] = __builtin_amdgcn_udot2(v2usA, v2usB, uiC, true);  // expected-error {{'__builtin_amdgcn_udot2' needs target feature dot-insts}}
+  uiOut[0] = __builtin_amdgcn_udot2(v2usA, v2usB, uiC, false); // expected-error {{'__builtin_amdgcn_udot2' needs target feature dot2-insts}}
+  uiOut[1] = __builtin_amdgcn_udot2(v2usA, v2usB, uiC, true);  // expected-error {{'__builtin_amdgcn_udot2' needs target feature dot2-insts}}
 
-  siOut[2] = __builtin_amdgcn_sdot4(siA, siB, siC, false);     // expected-error {{'__builtin_amdgcn_sdot4' needs target feature dot-insts}}
-  siOut[3] = __builtin_amdgcn_sdot4(siA, siB, siC, true);      // expected-error {{'__builtin_amdgcn_sdot4' needs target feature dot-insts}}
+  siOut[2] = __builtin_amdgcn_sdot4(siA, siB, siC, false);     // expected-error {{'__builtin_amdgcn_sdot4' needs target feature dot1-insts}}
+  siOut[3] = __builtin_amdgcn_sdot4(siA, siB, siC, true);      // expected-error {{'__builtin_amdgcn_sdot4' needs target feature dot1-insts}}
 
-  uiOut[2] = __builtin_amdgcn_udot4(uiA, uiB, uiC, false);     // expected-error {{'__builtin_amdgcn_udot4' needs target feature dot-insts}}
-  uiOut[3] = __builtin_amdgcn_udot4(uiA, uiB, uiC, true);      // expected-error {{'__builtin_amdgcn_udot4' needs target feature dot-insts}}
+  uiOut[2] = __builtin_amdgcn_udot4(uiA, uiB, uiC, false);     // expected-error {{'__builtin_amdgcn_udot4' needs target feature dot2-insts}}
+  uiOut[3] = __builtin_amdgcn_udot4(uiA, uiB, uiC, true);      // expected-error {{'__builtin_amdgcn_udot4' needs target feature dot2-insts}}
 
-  siOut[4] = __builtin_amdgcn_sdot8(siA, siB, siC, false);     // expected-error {{'__builtin_amdgcn_sdot8' needs target feature dot-insts}}
-  siOut[5] = __builtin_amdgcn_sdot8(siA, siB, siC, true);      // expected-error {{'__builtin_amdgcn_sdot8' needs target feature dot-insts}}
+  siOut[4] = __builtin_amdgcn_sdot8(siA, siB, siC, false);     // expected-error {{'__builtin_amdgcn_sdot8' needs target feature dot1-insts}}
+  siOut[5] = __builtin_amdgcn_sdot8(siA, siB, siC, true);      // expected-error {{'__builtin_amdgcn_sdot8' needs target feature dot1-insts}}
 
-  uiOut[4] = __builtin_amdgcn_udot8(uiA, uiB, uiC, false);     // expected-error {{'__builtin_amdgcn_udot8' needs target feature dot-insts}}
-  uiOut[5] = __builtin_amdgcn_udot8(uiA, uiB, uiC, true);      // expected-error {{'__builtin_amdgcn_udot8' needs target feature dot-insts}}
+  uiOut[4] = __builtin_amdgcn_udot8(uiA, uiB, uiC, false);     // expected-error {{'__builtin_amdgcn_udot8' needs target feature dot2-insts}}
+  uiOut[5] = __builtin_amdgcn_udot8(uiA, uiB, uiC, true);      // expected-error {{'__builtin_amdgcn_udot8' needs target feature dot2-insts}}
 }
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts.cl b/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts.cl
index e5633fb..a7d635a 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts.cl
@@ -1,6 +1,8 @@
 // REQUIRES: amdgpu-registered-target
 
 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx906 -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1011 -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1012 -S -emit-llvm -o - %s | FileCheck %s
 
 typedef unsigned int uint;
 typedef half __attribute__((ext_vector_type(2))) half2;
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl b/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl
new file mode 100644
index 0000000..3921cb9
--- /dev/null
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl
@@ -0,0 +1,24 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1011 -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1012 -S -emit-llvm -o - %s | FileCheck %s
+
+typedef unsigned int uint;
+
+// CHECK-LABEL: @test_permlane16(
+// CHECK: call i32 @llvm.amdgcn.permlane16(i32 %a, i32 %b, i32 %c, i32 %d, i1 true, i1 true)
+void test_permlane16(global uint* out, uint a, uint b, uint c, uint d) {
+  *out = __builtin_amdgcn_permlane16(a, b, c, d, 1, 1);
+}
+
+// CHECK-LABEL: @test_permlanex16(
+// CHECK: call i32 @llvm.amdgcn.permlanex16(i32 %a, i32 %b, i32 %c, i32 %d, i1 true, i1 true)
+void test_permlanex16(global uint* out, uint a, uint b, uint c, uint d) {
+  *out = __builtin_amdgcn_permlanex16(a, b, c, d, 1, 1);
+}
+
+// CHECK-LABEL: @test_mov_dpp8(
+// CHECK: call i32 @llvm.amdgcn.mov.dpp8.i32(i32 %a, i32 1)
+void test_mov_dpp8(global uint* out, uint a) {
+  *out = __builtin_amdgcn_mov_dpp8(a, 1);
+}
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx9.cl b/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx9.cl
index 333b610..344af2b 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx9.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx9.cl
@@ -1,5 +1,6 @@
 // REQUIRES: amdgpu-registered-target
 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -S -emit-llvm -o - %s | FileCheck %s
 
 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
 
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-interp.cl b/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-interp.cl
new file mode 100644
index 0000000..39d913e
--- /dev/null
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-interp.cl
@@ -0,0 +1,34 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -S -emit-llvm -o - %s | FileCheck %s
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// CHECK-LABEL: test_interp_f16
+// CHECK: call float @llvm.amdgcn.interp.p1.f16
+// CHECK: call half @llvm.amdgcn.interp.p2.f16
+// CHECK: call float @llvm.amdgcn.interp.p1.f16
+// CHECK: call half @llvm.amdgcn.interp.p2.f16
+void test_interp_f16(global half* out, float i, float j, int m0)
+{
+  float p1_0 = __builtin_amdgcn_interp_p1_f16(i, 2, 3, false, m0);
+  half p2_0 = __builtin_amdgcn_interp_p2_f16(p1_0, j, 2, 3, false, m0);
+  float p1_1 = __builtin_amdgcn_interp_p1_f16(i, 2, 3, true, m0);
+  half p2_1 = __builtin_amdgcn_interp_p2_f16(p1_1, j, 2, 3, true, m0);
+  *out = p2_0 + p2_1;
+}
+
+// CHECK-LABEL: test_interp_f32
+// CHECK: call float @llvm.amdgcn.interp.p1
+// CHECK: call float @llvm.amdgcn.interp.p2
+void test_interp_f32(global float* out, float i, float j, int m0)
+{
+  float p1 = __builtin_amdgcn_interp_p1(i, 1, 4, m0);
+  *out = __builtin_amdgcn_interp_p2(p1, j, 1, 4, m0);
+}
+
+// CHECK-LABEL: test_interp_mov
+// CHECK: call float @llvm.amdgcn.interp.mov
+void test_interp_mov(global float* out, float i, float j, int m0)
+{
+  *out = __builtin_amdgcn_interp_mov(2, 3, 4, m0);
+}
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl b/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
index 2201421..e3e6b81 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
@@ -1,5 +1,8 @@
 // REQUIRES: amdgpu-registered-target
 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tonga -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1012 -S -emit-llvm -o - %s | FileCheck %s
 
 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
 
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
index dc7f480..bbae5ea 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -5,6 +5,10 @@
 
 typedef unsigned long ulong;
 typedef unsigned int uint;
+typedef unsigned short ushort;
+typedef half __attribute__((ext_vector_type(2))) half2;
+typedef short __attribute__((ext_vector_type(2))) short2;
+typedef ushort __attribute__((ext_vector_type(2))) ushort2;
 
 // CHECK-LABEL: @test_div_scale_f64
 // CHECK: call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true)
@@ -224,28 +228,28 @@
 }
 
 // CHECK-LABEL: @test_sicmp_i32
-// CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 32)
+// CHECK: call i64 @llvm.amdgcn.icmp.i64.i32(i32 %a, i32 %b, i32 32)
 void test_sicmp_i32(global ulong* out, int a, int b)
 {
   *out = __builtin_amdgcn_sicmp(a, b, 32);
 }
 
 // CHECK-LABEL: @test_uicmp_i32
-// CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 32)
+// CHECK: call i64 @llvm.amdgcn.icmp.i64.i32(i32 %a, i32 %b, i32 32)
 void test_uicmp_i32(global ulong* out, uint a, uint b)
 {
   *out = __builtin_amdgcn_uicmp(a, b, 32);
 }
 
 // CHECK-LABEL: @test_sicmp_i64
-// CHECK: call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 38)
+// CHECK: call i64 @llvm.amdgcn.icmp.i64.i64(i64 %a, i64 %b, i32 38)
 void test_sicmp_i64(global ulong* out, long a, long b)
 {
   *out = __builtin_amdgcn_sicmpl(a, b, 39-1);
 }
 
 // CHECK-LABEL: @test_uicmp_i64
-// CHECK: call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 35)
+// CHECK: call i64 @llvm.amdgcn.icmp.i64.i64(i64 %a, i64 %b, i32 35)
 void test_uicmp_i64(global ulong* out, ulong a, ulong b)
 {
   *out = __builtin_amdgcn_uicmpl(a, b, 30+5);
@@ -287,14 +291,14 @@
 }
 
 // CHECK-LABEL: @test_fcmp_f32
-// CHECK: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 5)
+// CHECK: call i64 @llvm.amdgcn.fcmp.i64.f32(float %a, float %b, i32 5)
 void test_fcmp_f32(global ulong* out, float a, float b)
 {
   *out = __builtin_amdgcn_fcmpf(a, b, 5);
 }
 
 // CHECK-LABEL: @test_fcmp_f64
-// CHECK: call i64 @llvm.amdgcn.fcmp.f64(double %a, double %b, i32 6)
+// CHECK: call i64 @llvm.amdgcn.fcmp.i64.f64(double %a, double %b, i32 6)
 void test_fcmp_f64(global ulong* out, double a, double b)
 {
   *out = __builtin_amdgcn_fcmp(a, b, 3+3);
@@ -536,6 +540,120 @@
   *out = __builtin_amdgcn_s_getpc();
 }
 
+// CHECK-LABEL: @test_ds_append_lds(
+// CHECK: call i32 @llvm.amdgcn.ds.append.p3i32(i32 addrspace(3)* %ptr, i1 false)
+kernel void test_ds_append_lds(global int* out, local int* ptr) {
+  *out = __builtin_amdgcn_ds_append(ptr);
+}
+
+// CHECK-LABEL: @test_ds_consume_lds(
+// CHECK: call i32 @llvm.amdgcn.ds.consume.p3i32(i32 addrspace(3)* %ptr, i1 false)
+kernel void test_ds_consume_lds(global int* out, local int* ptr) {
+  *out = __builtin_amdgcn_ds_consume(ptr);
+}
+
+// CHECK-LABEL: @test_gws_init(
+// CHECK: call void @llvm.amdgcn.ds.gws.init(i32 %value, i32 %id)
+kernel void test_gws_init(uint value, uint id) {
+  __builtin_amdgcn_ds_gws_init(value, id);
+}
+
+// CHECK-LABEL: @test_gws_barrier(
+// CHECK: call void @llvm.amdgcn.ds.gws.barrier(i32 %value, i32 %id)
+kernel void test_gws_barrier(uint value, uint id) {
+  __builtin_amdgcn_ds_gws_barrier(value, id);
+}
+
+// CHECK-LABEL: @test_gws_sema_v(
+// CHECK: call void @llvm.amdgcn.ds.gws.sema.v(i32 %id)
+kernel void test_gws_sema_v(uint id) {
+  __builtin_amdgcn_ds_gws_sema_v(id);
+}
+
+// CHECK-LABEL: @test_gws_sema_br(
+// CHECK: call void @llvm.amdgcn.ds.gws.sema.br(i32 %value, i32 %id)
+kernel void test_gws_sema_br(uint value, uint id) {
+  __builtin_amdgcn_ds_gws_sema_br(value, id);
+}
+
+// CHECK-LABEL: @test_gws_sema_p(
+// CHECK: call void @llvm.amdgcn.ds.gws.sema.p(i32 %id)
+kernel void test_gws_sema_p(uint id) {
+  __builtin_amdgcn_ds_gws_sema_p(id);
+}
+
+// CHECK-LABEL: @test_mbcnt_lo(
+// CHECK: call i32 @llvm.amdgcn.mbcnt.lo(i32 %src0, i32 %src1)
+kernel void test_mbcnt_lo(global uint* out, uint src0, uint src1) {
+  *out = __builtin_amdgcn_mbcnt_lo(src0, src1);
+}
+
+// CHECK-LABEL: @test_mbcnt_hi(
+// CHECK: call i32 @llvm.amdgcn.mbcnt.hi(i32 %src0, i32 %src1)
+kernel void test_mbcnt_hi(global uint* out, uint src0, uint src1) {
+  *out = __builtin_amdgcn_mbcnt_hi(src0, src1);
+}
+
+// CHECK-LABEL: @test_alignbit(
+// CHECK: tail call i32 @llvm.amdgcn.alignbit(i32 %src0, i32 %src1, i32 %src2)
+kernel void test_alignbit(global uint* out, uint src0, uint src1, uint src2) {
+  *out = __builtin_amdgcn_alignbit(src0, src1, src2);
+}
+
+// CHECK-LABEL: @test_alignbyte(
+// CHECK: tail call i32 @llvm.amdgcn.alignbyte(i32 %src0, i32 %src1, i32 %src2)
+kernel void test_alignbyte(global uint* out, uint src0, uint src1, uint src2) {
+  *out = __builtin_amdgcn_alignbyte(src0, src1, src2);
+}
+
+// CHECK-LABEL: @test_ubfe(
+// CHECK: tail call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src2)
+kernel void test_ubfe(global uint* out, uint src0, uint src1, uint src2) {
+  *out = __builtin_amdgcn_ubfe(src0, src1, src2);
+}
+
+// CHECK-LABEL: @test_sbfe(
+// CHECK: tail call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 %src2)
+kernel void test_sbfe(global uint* out, uint src0, uint src1, uint src2) {
+  *out = __builtin_amdgcn_sbfe(src0, src1, src2);
+}
+
+// CHECK-LABEL: @test_cvt_pkrtz(
+// CHECK: tail call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %src0, float %src1)
+kernel void test_cvt_pkrtz(global half2* out, float src0, float src1) {
+  *out = __builtin_amdgcn_cvt_pkrtz(src0, src1);
+}
+
+// CHECK-LABEL: @test_cvt_pknorm_i16(
+// CHECK: tail call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %src0, float %src1)
+kernel void test_cvt_pknorm_i16(global short2* out, float src0, float src1) {
+  *out = __builtin_amdgcn_cvt_pknorm_i16(src0, src1);
+}
+
+// CHECK-LABEL: @test_cvt_pknorm_u16(
+// CHECK: tail call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %src0, float %src1)
+kernel void test_cvt_pknorm_u16(global ushort2* out, float src0, float src1) {
+  *out = __builtin_amdgcn_cvt_pknorm_u16(src0, src1);
+}
+
+// CHECK-LABEL: @test_cvt_pk_i16(
+// CHECK: tail call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 %src0, i32 %src1)
+kernel void test_cvt_pk_i16(global short2* out, int src0, int src1) {
+  *out = __builtin_amdgcn_cvt_pk_i16(src0, src1);
+}
+
+// CHECK-LABEL: @test_cvt_pk_u16(
+// CHECK: tail call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 %src0, i32 %src1)
+kernel void test_cvt_pk_u16(global ushort2* out, uint src0, uint src1) {
+  *out = __builtin_amdgcn_cvt_pk_u16(src0, src1);
+}
+
+// CHECK-LABEL: @test_cvt_pk_u8_f32
+// CHECK: tail call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src0, i32 %src1, i32 %src2)
+kernel void test_cvt_pk_u8_f32(global uint* out, float src0, uint src1, uint src2) {
+  *out = __builtin_amdgcn_cvt_pk_u8_f32(src0, src1, src2);
+}
+
 // CHECK-DAG: [[$WI_RANGE]] = !{i32 0, i32 1024}
 // CHECK-DAG: attributes #[[$NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly }
 // CHECK-DAG: attributes #[[$READ_EXEC_ATTRS]] = { convergent }
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/builtins.cl b/src/llvm-project/clang/test/CodeGenOpenCL/builtins.cl
index 3fba83d..36a6fb6 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/builtins.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/builtins.cl
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -finclude-default-header -cl-std=CL2.0 -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s
+// RUN: %clang_cc1 %s -finclude-default-header -cl-std=clc++ -fblocks -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s
 
 void testBranchingOnEnqueueKernel(queue_t default_queue, unsigned flags, ndrange_t ndrange) {
     // Ensure `enqueue_kernel` can be branched upon.
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl b/src/llvm-project/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
index 4732194..4abeb92 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
@@ -11,7 +11,7 @@
 
 // For a block global variable, first emit the block literal as a global variable, then emit the block variable itself.
 // COMMON: [[BL_GLOBAL:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* [[INV_G:@[^ ]+]] to i8*) to i8 addrspace(4)*) }
-// COMMON: @block_G =  addrspace(1) constant void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*)
+// COMMON: @block_G = addrspace(1) constant %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*)
 
 // For anonymous blocks without captures, emit block literals as global variable.
 // COMMON: [[BLG1:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) }
@@ -77,11 +77,11 @@
   // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
   // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
   // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL1:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
-  // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to void ()*
-  // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to void ()*
-  // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)*
+  // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to %struct.__opencl_block_literal_generic*
+  // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to %struct.__opencl_block_literal_generic*
+  // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[BL]] to i8 addrspace(4)*
   // COMMON-LABEL: call i32 @__enqueue_kernel_basic(
-  // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}},
+  // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval(%struct.ndrange_t) [[NDR]]{{([0-9]+)?}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK1:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* [[BL_I8]])
   enqueue_kernel(default_queue, flags, ndrange,
@@ -95,8 +95,8 @@
   // COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %event_wait_list to %opencl.clk_event_t{{.*}}* addrspace(4)*
   // COMMON: [[EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)*
   // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL2:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
-  // COMMON: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to void ()*
-  // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)*
+  // COMMON: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to %struct.__opencl_block_literal_generic*
+  // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[BL]] to i8 addrspace(4)*
   // COMMON-LABEL: call i32 @__enqueue_kernel_basic_events
   // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]],  %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]],
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK2:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
@@ -107,8 +107,8 @@
                  });
 
   // COMMON-LABEL: call i32 @__enqueue_kernel_basic_events
-  // COMMON-SAME: (%opencl.queue_t{{.*}}* {{%[0-9]+}}, i32 {{%[0-9]+}}, %struct.ndrange_t* {{.*}}, i32 1, %opencl.clk_event_t{{.*}}* addrspace(4)* {{%[0-9]+}}, %opencl.clk_event_t{{.*}}* addrspace(4)* null,
-  enqueue_kernel(default_queue, flags, ndrange, 1, &event_wait_list, 0,
+  // COMMON-SAME: (%opencl.queue_t{{.*}}* {{%[0-9]+}}, i32 {{%[0-9]+}}, %struct.ndrange_t* {{.*}}, i32 1, %opencl.clk_event_t{{.*}}* addrspace(4)* null, %opencl.clk_event_t{{.*}}* addrspace(4)* null,
+  enqueue_kernel(default_queue, flags, ndrange, 1, 0, 0,
                  ^(void) {
                    return;
                  });
@@ -165,7 +165,7 @@
   // Emits global block literal [[BLG3]] and block kernel [[INVGK3]].
   // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
   // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
-  // COMMON: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0
+  // COMMON: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i{{32|64}} 0, i{{32|64}} 0
   // COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** [[AD]] to %opencl.clk_event_t{{.*}}* addrspace(4)*
   // COMMON: [[EVNT:%[0-9]+]]  = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)*
   // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* %[[BLOCK_SIZES3]] to i8*
@@ -192,7 +192,7 @@
   // Emits global block literal [[BLG4]] and block kernel [[INVGK4]].
   // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
   // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
-  // COMMON: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0
+  // COMMON: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i{{32|64}} 0, i{{32|64}} 0
   // COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** [[AD]] to %opencl.clk_event_t{{.*}}* addrspace(4)*
   // COMMON: [[EVNT:%[0-9]+]]  = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)*
   // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* %[[BLOCK_SIZES4]] to i8*
@@ -300,28 +300,26 @@
   // Emits global block literal [[BLG8]] and invoke function [[INVG8]].
   // The full type of these expressions are long (and repeated elsewhere), so we
   // capture it as part of the regex for convenience and clarity.
-  // COMMON: store void () addrspace(4)* addrspacecast (void () addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to void () addrspace(1)*) to void () addrspace(4)*), void () addrspace(4)** %block_A
+  // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %block_A
   void (^const block_A)(void) = ^{
     return;
   };
 
   // Emits global block literal [[BLG9]] and invoke function [[INVG9]].
-  // COMMON: store void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG9]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*), void (i8 addrspace(3)*) addrspace(4)** %block_B
+  // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG9]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %block_B
   void (^const block_B)(local void *) = ^(local void *a) {
     return;
   };
 
   // Uses global block literal [[BLG8]] and invoke function [[INVG8]].
-  // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2)
-  // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)*
-  // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+  // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
   block_A();
 
   // Emits global block literal [[BLG8]] and block kernel [[INVGK8]]. [[INVGK8]] calls [[INVG8]].
   // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
   // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
   // COMMON-LABEL: call i32 @__enqueue_kernel_basic(
-  // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}},
+  // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval(%struct.ndrange_t) [[NDR]]{{([0-9]+)?}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK8:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
   enqueue_kernel(default_queue, flags, ndrange, block_A);
@@ -333,22 +331,42 @@
   unsigned size = get_kernel_work_group_size(block_A);
 
   // Uses global block literal [[BLG8]] and invoke function [[INVG8]]. Make sure no redundant block literal and invoke functions are emitted.
-  // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2)
-  // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)*
-  // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+  // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
   block_A();
 
+  // Make sure that block invoke function is resolved correctly after sequence of assignements.
+  // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)*
+  // COMMON-SAME: addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)*
+  // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*)
+  // COMMON-SAME: to %struct.__opencl_block_literal_generic addrspace(4)*),
+  // COMMON-SAME: %struct.__opencl_block_literal_generic addrspace(4)** %b1,
+  bl_t b1 = block_G;
+  // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)*
+  // COMMON-SAME: addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)*
+  // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*)
+  // COMMON-SAME: to %struct.__opencl_block_literal_generic addrspace(4)*),
+  // COMMON-SAME: %struct.__opencl_block_literal_generic addrspace(4)** %b2,
+  bl_t b2 = b1;
+  // COMMON: call spir_func void @block_G_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)*
+  // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*)
+  // COOMON-SAME: to i8 addrspace(4)*), i8 addrspace(3)* null)
+  b2(0);
+  // Uses global block literal [[BL_GLOBAL]] and block kernel [[INV_G_K]]. [[INV_G_K]] calls [[INV_G]].
+  // COMMON: call i32 @__get_kernel_preferred_work_group_size_multiple_impl(
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INV_G_K:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+  size = get_kernel_preferred_work_group_size_multiple(b2);
+
   void (^block_C)(void) = ^{
     callee(i, a);
   };
-
   // Emits block literal on stack and block kernel [[INVLK3]].
   // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL3:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
   // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
   // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
-  // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* {{.*}} to i8 addrspace(4)*
+  // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* {{.*}} to i8 addrspace(4)*
   // COMMON-LABEL: call i32 @__enqueue_kernel_basic(
-  // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}},
+  // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval(%struct.ndrange_t) [[NDR]]{{([0-9]+)?}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK3:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* [[BL_I8]])
   enqueue_kernel(default_queue, flags, ndrange, block_C);
@@ -404,8 +422,8 @@
 // COMMON: define internal spir_func void [[INVG8]](i8 addrspace(4)*{{.*}})
 // COMMON: define internal spir_func void [[INVG9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)* %{{.*}})
 // COMMON: define internal spir_kernel void [[INVGK8]](i8 addrspace(4)*{{.*}})
+// COMMON: define internal spir_kernel void [[INV_G_K]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
 // COMMON: define internal spir_kernel void [[INVLK3]](i8 addrspace(4)*{{.*}})
 // COMMON: define internal spir_kernel void [[INVGK9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
-// COMMON: define internal spir_kernel void [[INV_G_K]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
 // COMMON: define internal spir_kernel void [[INVGK10]](i8 addrspace(4)*{{.*}})
 // COMMON: define internal spir_kernel void [[INVGK11]](i8 addrspace(4)*{{.*}})
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/constant-addr-space-globals.cl b/src/llvm-project/clang/test/CodeGenOpenCL/constant-addr-space-globals.cl
index 5fcf117..47e180c 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/constant-addr-space-globals.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/constant-addr-space-globals.cl
@@ -26,6 +26,6 @@
 
   constant int var1 = 1;
   
-  // CHECK: call spir_func void @foo(i32 addrspace(2)* @k.var1, i32 addrspace(2)* getelementptr inbounds ([3 x i32], [3 x i32] addrspace(2)* @k.arr1, i32 0, i32 0)
+  // CHECK: call spir_func void @foo(i32 addrspace(2)* @k.var1, i32 addrspace(2)* getelementptr inbounds ([3 x i32], [3 x i32] addrspace(2)* @k.arr1, i64 0, i64 0)
   foo(&var1, arr1, arr2, arr3);
 }
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/convergent.cl b/src/llvm-project/clang/test/CodeGenOpenCL/convergent.cl
index a011920..193d391 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/convergent.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/convergent.cl
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm %s -o - | opt -instnamer -S | FileCheck -enable-var-scope %s
+// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm %s -o - -fno-experimental-new-pass-manager | opt -instnamer -S | FileCheck -enable-var-scope %s --check-prefixes=CHECK,CHECK-LEGACY
+// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm %s -o - -fexperimental-new-pass-manager | opt -instnamer -S | FileCheck -enable-var-scope %s --check-prefixes=CHECK,CHECK-NEWPM
 
 // This is initially assumed convergent, but can be deduced to not require it.
 
@@ -117,7 +118,12 @@
 // CHECK: [[for_body]]:
 // CHECK:  tail call spir_func void @nodupfun() #[[attr5:[0-9]+]]
 // CHECK-NOT: call spir_func void @nodupfun()
-// CHECK:  br i1 %{{.+}}, label %[[for_body]], label %[[for_cond_cleanup]]
+
+// The new PM produces a slightly different IR for the loop from the legacy PM,
+// but the test still checks that the loop is not unrolled.
+// CHECK-LEGACY:  br i1 %{{.+}}, label %[[for_body]], label %[[for_cond_cleanup]]
+// CHECK-NEW:     br i1 %{{.+}}, label %[[for_body_crit_edge:.+]], label %[[for_cond_cleanup]]
+// CHECK-NEW:     [[for_body_crit_edge]]:
 
 void test_not_unroll() {
   for (int i = 0; i < 10; i++)
@@ -133,7 +139,7 @@
   __asm__ volatile("s_barrier");
 }
 
-// CHECK: attributes #0 = { noinline norecurse nounwind "
+// CHECK: attributes #0 = { nofree noinline norecurse nounwind "
 // CHECK: attributes #1 = { {{[^}]*}}convergent{{[^}]*}} }
 // CHECK: attributes #2 = { {{[^}]*}}convergent{{[^}]*}} }
 // CHECK: attributes #3 = { {{[^}]*}}convergent noduplicate{{[^}]*}} }
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/images.cl b/src/llvm-project/clang/test/CodeGenOpenCL/images.cl
index eb054ec..326322d 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/images.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/images.cl
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -o - -cl-std=clc++ | FileCheck %s
 
 __attribute__((overloadable)) void read_image(read_only image1d_t img_ro);
 __attribute__((overloadable)) void read_image(write_only image1d_t img_wo);
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl b/src/llvm-project/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
index 5bb52e9..e892376 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
@@ -27,7 +27,7 @@
 kernel void test_single(int_single input, global int* output) {
 // CHECK: spir_kernel
 // AMDGCN: define amdgpu_kernel void @test_single
-// CHECK: struct.int_single* byval nocapture
+// CHECK: struct.int_single* nocapture {{.*}} byval(%struct.int_single)
 // CHECK: i32* nocapture %output
  output[0] = input.a;
 }
@@ -35,7 +35,7 @@
 kernel void test_pair(int_pair input, global int* output) {
 // CHECK: spir_kernel
 // AMDGCN: define amdgpu_kernel void @test_pair
-// CHECK: struct.int_pair* byval nocapture
+// CHECK: struct.int_pair* nocapture {{.*}} byval(%struct.int_pair)
 // CHECK: i32* nocapture %output
  output[0] = (int)input.a;
  output[1] = (int)input.b;
@@ -44,7 +44,7 @@
 kernel void test_kernel(test_struct input, global int* output) {
 // CHECK: spir_kernel
 // AMDGCN: define amdgpu_kernel void @test_kernel
-// CHECK: struct.test_struct* byval nocapture
+// CHECK: struct.test_struct* nocapture {{.*}} byval(%struct.test_struct)
 // CHECK: i32* nocapture %output
  output[0] = input.elementA;
  output[1] = input.elementB;
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/logical-ops.cl b/src/llvm-project/clang/test/CodeGenOpenCL/logical-ops.cl
index ac1c1b5..f083a85 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/logical-ops.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/logical-ops.cl
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL1.2 -O1 -triple x86_64-unknown-linux-gnu | FileCheck %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=clc++ -O1 -triple x86_64-unknown-linux-gnu | FileCheck %s
 
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/no-signed-zeros.cl b/src/llvm-project/clang/test/CodeGenOpenCL/no-signed-zeros.cl
index 14f6411..5cbff4d 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/no-signed-zeros.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/no-signed-zeros.cl
@@ -1,10 +1,10 @@
-// RUN: %clang_cc1 %s -emit-llvm -o - | FileCheck %s -check-prefix=NORMAL

-// RUN: %clang_cc1 %s -emit-llvm -cl-no-signed-zeros -o - | FileCheck %s -check-prefix=NO-SIGNED-ZEROS

-

-float signedzeros(float a) {

-  return a;

-}

-

-// CHECK: attributes

-// NORMAL: "no-signed-zeros-fp-math"="false"

-// NO-SIGNED-ZEROS: "no-signed-zeros-fp-math"="true"

+// RUN: %clang_cc1 %s -emit-llvm -o - | FileCheck %s -check-prefix=NORMAL
+// RUN: %clang_cc1 %s -emit-llvm -cl-no-signed-zeros -o - | FileCheck %s -check-prefix=NO-SIGNED-ZEROS
+
+float signedzeros(float a) {
+  return a;
+}
+
+// CHECK: attributes
+// NORMAL: "no-signed-zeros-fp-math"="false"
+// NO-SIGNED-ZEROS: "no-signed-zeros-fp-math"="true"
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/numbered-address-space.cl b/src/llvm-project/clang/test/CodeGenOpenCL/numbered-address-space.cl
index dbaba87..296923e 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/numbered-address-space.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/numbered-address-space.cl
@@ -11,12 +11,6 @@
   *generic_ptr = 4;
 }
 
-// CHECK-LABEL: @test_numbered_as_to_builtin(
-// CHECK: addrspacecast i32 addrspace(42)* %0 to float addrspace(3)*
-void test_numbered_as_to_builtin(__attribute__((address_space(42))) int *arbitary_numbered_ptr, float src) {
-  volatile float result = __builtin_amdgcn_ds_fmaxf(arbitary_numbered_ptr, src, 0, 0, false);
-}
-
 // CHECK-LABEL: @test_generic_as_to_builtin_parameter_explicit_cast(
 // CHECK: addrspacecast i32 addrspace(3)* %0 to i32*
 void test_generic_as_to_builtin_parameter_explicit_cast(__local int *local_ptr, float src) {
@@ -25,10 +19,7 @@
 }
 
 // CHECK-LABEL: @test_generic_as_to_builtin_parameter_implicit_cast(
-// CHECK: addrspacecast i32* %2 to float addrspace(3)*
+// CHECK: bitcast i32 addrspace(3)* %0 to float addrspace(3)*
 void test_generic_as_to_builtin_parameter_implicit_cast(__local int *local_ptr, float src) {
-  generic int* generic_ptr = local_ptr;
-
-  volatile float result = __builtin_amdgcn_ds_fmaxf(generic_ptr, src, 0, 0, false);
+  volatile float result = __builtin_amdgcn_ds_fmaxf(local_ptr, src, 0, 0, false);
 }
-
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/pipe_builtin.cl b/src/llvm-project/clang/test/CodeGenOpenCL/pipe_builtin.cl
index 2a533c5..02b9669 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/pipe_builtin.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/pipe_builtin.cl
@@ -1,4 +1,6 @@
-// RUN: %clang_cc1 -emit-llvm -cl-ext=+cl_khr_subgroups -O0 -cl-std=CL2.0 -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -cl-ext=+cl_khr_subgroups -O0 -cl-std=clc++ -o - %s | FileCheck %s
+// FIXME: Add MS ABI manglings of OpenCL things and remove %itanium_abi_triple
+// above to support OpenCL in the MS C++ ABI.
 
 // CHECK-DAG: %opencl.pipe_ro_t = type opaque
 // CHECK-DAG: %opencl.pipe_wo_t = type opaque
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/sampler.cl b/src/llvm-project/clang/test/CodeGenOpenCL/sampler.cl
index 22976c5..ed3f32d 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/sampler.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/sampler.cl
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 %s -emit-llvm -triple spir-unknown-unknown -o - -O0 | FileCheck %s
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -triple spir-unknown-unknown -o - -O0 | FileCheck %s
+// RUN: %clang_cc1 %s -cl-std=clc++ -emit-llvm -triple spir-unknown-unknown -o - -O0 | FileCheck %s
 //
 // This test covers 5 cases of sampler initialzation:
 //   1. function argument passing
@@ -6,8 +8,9 @@
 //      1b. argument is a function-scope variable
 //      1c. argument is one of caller function's parameters
 //   2. variable initialization
-//      2a. initializing a file-scope variable
+//      2a. initializing a file-scope variable with constant addr space qualifier
 //      2b. initializing a function-scope variable
+//      2c. initializing a file-scope variable with const qualifier
 
 #define CLK_ADDRESS_CLAMP_TO_EDGE       2
 #define CLK_NORMALIZED_COORDS_TRUE      1
@@ -20,10 +23,14 @@
 constant sampler_t glb_smp = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_LINEAR;
 // CHECK-NOT: glb_smp
 
+// Case 2c
+const sampler_t glb_smp_const = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_LINEAR;
+// CHECK-NOT: glb_smp_const
+
 int get_sampler_initializer(void);
 
 void fnc4smp(sampler_t s) {}
-// CHECK: define spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* %
+// CHECK: define spir_func void [[FUNCNAME:@.*fnc4smp.*]](%opencl.sampler_t addrspace(2)* %
 
 kernel void foo(sampler_t smp_par) {
   // CHECK-LABEL: define spir_kernel void @foo(%opencl.sampler_t addrspace(2)* %smp_par)
@@ -39,27 +46,32 @@
   fnc4smp(smp);
   // CHECK-NOT: call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 19)
   // CHECK: [[SAMP:%[0-9]+]] = load %opencl.sampler_t addrspace(2)*, %opencl.sampler_t addrspace(2)** [[smp_ptr]]
-  // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]])
+  // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]])
 
   // Case 1b
   fnc4smp(smp);
   // CHECK-NOT: call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 19)
   // CHECK: [[SAMP:%[0-9]+]] = load %opencl.sampler_t addrspace(2)*, %opencl.sampler_t addrspace(2)** [[smp_ptr]]
-  // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]])
+  // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]])
 
-  // Case 1a
+  // Case 1a/2a
   fnc4smp(glb_smp);
   // CHECK: [[SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 35)
-  // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]])
+  // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]])
+
+  // Case 1a/2c
+  fnc4smp(glb_smp_const);
+  // CHECK: [[SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 35)
+  // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]])
 
   // Case 1c
   fnc4smp(smp_par);
   // CHECK: [[SAMP:%[0-9]+]] = load %opencl.sampler_t addrspace(2)*, %opencl.sampler_t addrspace(2)** [[smp_par_ptr]]
-  // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]])
+  // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]])
 
   fnc4smp(5);
   // CHECK: [[SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 5)
-  // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]])
+  // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]])
 
   const sampler_t const_smp = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_LINEAR;
   fnc4smp(const_smp);
@@ -67,12 +79,12 @@
   // CHECK: store %opencl.sampler_t addrspace(2)* [[CONST_SAMP]], %opencl.sampler_t addrspace(2)** [[CONST_SMP_PTR:%[a-zA-Z0-9]+]]
   fnc4smp(const_smp);
   // CHECK: [[SAMP:%[0-9]+]] = load %opencl.sampler_t addrspace(2)*, %opencl.sampler_t addrspace(2)** [[CONST_SMP_PTR]]
-  // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]])
+  // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]])
 
   constant sampler_t constant_smp = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_LINEAR;
   fnc4smp(constant_smp);
   // CHECK: [[SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 35)
-  // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]])
+  // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]])
 
   // TODO: enable sampler initialization with non-constant integer.
   //const sampler_t const_smp_func_init = get_sampler_initializer();
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/spir_version.cl b/src/llvm-project/clang/test/CodeGenOpenCL/spir_version.cl
index ac5b8e8..39893de 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/spir_version.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/spir_version.cl
@@ -5,6 +5,9 @@
 // RUN: %clang_cc1 %s -triple "spir64-unknown-unknown" -emit-llvm -o - -cl-std=CL1.2 | FileCheck %s --check-prefix=CHECK-SPIR-CL12
 // RUN: %clang_cc1 %s -triple "spir64-unknown-unknown" -emit-llvm -o - -cl-std=CL2.0 | FileCheck %s --check-prefix=CHECK-SPIR-CL20
 
+
+// RUN: %clang_cc1 %s -triple "spir64-unknown-unknown" -emit-llvm -o - -cl-std=clc++ | FileCheck %s --check-prefix=CHECK-SPIR-CL20
+
 // RUN: %clang_cc1 %s -triple "amdgcn--amdhsa" -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-AMDGCN-CL10
 // RUN: %clang_cc1 %s -triple "amdgcn--amdhsa" -emit-llvm -o - -cl-std=CL1.2 | FileCheck %s --check-prefix=CHECK-AMDGCN-CL12
 // RUN: %clang_cc1 %s -triple "amdgcn--amdhsa" -emit-llvm -o - -cl-std=CL2.0 | FileCheck %s --check-prefix=CHECK-AMDGCN-CL20
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/to_addr_builtin.cl b/src/llvm-project/clang/test/CodeGenOpenCL/to_addr_builtin.cl
index 72c09da..52dd72f 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/to_addr_builtin.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/to_addr_builtin.cl
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm -O0 -cl-std=CL2.0 -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm -O0 -cl-std=clc++ -o - %s | FileCheck %s
 
 // CHECK: %[[A:.*]] = type { float, float, float }
 typedef struct {
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/unroll-hint.cl b/src/llvm-project/clang/test/CodeGenOpenCL/unroll-hint.cl
index 6a9ba87..0f84450 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/unroll-hint.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/unroll-hint.cl
@@ -18,12 +18,12 @@
 // CHECK: br label %{{.*}}, !llvm.loop ![[FOR_DISABLE:.*]]
 }
 
-void for_full()
+void for_enable()
 {
-// CHECK-LABEL: for_full
+// CHECK-LABEL: for_enable
     __attribute__((opencl_unroll_hint))
     for( int i = 0; i < 1000; ++i);
-// CHECK: br label %{{.*}}, !llvm.loop ![[FOR_FULL:.*]]
+// CHECK: br label %{{.*}}, !llvm.loop ![[FOR_ENABLE:.*]]
 }
 
 /*** while ***/
@@ -45,13 +45,13 @@
 // CHECK: br label %{{.*}}, !llvm.loop ![[WHILE_DISABLE:.*]]
 }
 
-void while_full()
+void while_enable()
 {
-// CHECK-LABEL: while_full
+// CHECK-LABEL: while_enable
     int i = 1000;
     __attribute__((opencl_unroll_hint))
     while(i-->0);
-// CHECK: br label %{{.*}}, !llvm.loop ![[WHILE_FULL:.*]]
+// CHECK: br label %{{.*}}, !llvm.loop ![[WHILE_ENABLE:.*]]
 }
 
 /*** do ***/
@@ -73,13 +73,13 @@
 // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !llvm.loop ![[DO_DISABLE:.*]]
 }
 
-void do_full()
+void do_enable()
 {
-// CHECK-LABEL: do_full
+// CHECK-LABEL: do_enable
     int i = 1000;
     __attribute__((opencl_unroll_hint))
     do {} while(i--> 0);
-// CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !llvm.loop ![[DO_FULL:.*]]
+// CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !llvm.loop ![[DO_ENABLE:.*]]
 }
 
 
@@ -87,11 +87,11 @@
 // CHECK: ![[COUNT]]         =  !{!"llvm.loop.unroll.count", i32 8}
 // CHECK: ![[FOR_DISABLE]]   =  distinct !{![[FOR_DISABLE]],  ![[DISABLE:.*]]}
 // CHECK: ![[DISABLE]]       =  !{!"llvm.loop.unroll.disable"}
-// CHECK: ![[FOR_FULL]]      =  distinct !{![[FOR_FULL]],  ![[FULL:.*]]}
-// CHECK: ![[FULL]]          =  !{!"llvm.loop.unroll.full"}
+// CHECK: ![[FOR_ENABLE]]      =  distinct !{![[FOR_ENABLE]],  ![[ENABLE:.*]]}
+// CHECK: ![[ENABLE]]          =  !{!"llvm.loop.unroll.enable"}
 // CHECK: ![[WHILE_COUNT]]   =  distinct !{![[WHILE_COUNT]],    ![[COUNT]]}
 // CHECK: ![[WHILE_DISABLE]] =  distinct !{![[WHILE_DISABLE]],  ![[DISABLE]]}
-// CHECK: ![[WHILE_FULL]]    =  distinct !{![[WHILE_FULL]],     ![[FULL]]}
+// CHECK: ![[WHILE_ENABLE]]    =  distinct !{![[WHILE_ENABLE]],     ![[ENABLE]]}
 // CHECK: ![[DO_COUNT]]      =  distinct !{![[DO_COUNT]],       ![[COUNT]]}
 // CHECK: ![[DO_DISABLE]]    =  distinct !{![[DO_DISABLE]],     ![[DISABLE]]}
-// CHECK: ![[DO_FULL]]       =  distinct !{![[DO_FULL]],        ![[FULL]]}
+// CHECK: ![[DO_ENABLE]]       =  distinct !{![[DO_ENABLE]],        ![[ENABLE]]}
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/vector_literals_nested.cl b/src/llvm-project/clang/test/CodeGenOpenCL/vector_literals_nested.cl
deleted file mode 100644
index b9013d0..0000000
--- a/src/llvm-project/clang/test/CodeGenOpenCL/vector_literals_nested.cl
+++ /dev/null
@@ -1,23 +0,0 @@
-// RUN: %clang_cc1 %s -emit-llvm -O3 -o - | FileCheck %s
-
-typedef int int2 __attribute((ext_vector_type(2)));
-typedef int int4 __attribute((ext_vector_type(4)));
-
-__constant const int4 itest1 = (int4)(1, 2, ((int2)(3, 4)));
-// CHECK: constant <4 x i32> <i32 1, i32 2, i32 3, i32 4>
-__constant const int4 itest2 = (int4)(1, 2, ((int2)(3)));
-// CHECK: constant <4 x i32> <i32 1, i32 2, i32 3, i32 3>
-
-typedef float float2 __attribute((ext_vector_type(2)));
-typedef float float4 __attribute((ext_vector_type(4)));
-
-void ftest1(float4 *p) {
-  *p = (float4)(1.1f, 1.2f, ((float2)(1.3f, 1.4f)));
-// CHECK: store <4 x float> <float 0x3FF19999A0000000, float 0x3FF3333340000000, float 0x3FF4CCCCC0000000, float 0x3FF6666660000000>
-}
-
-float4 ftest2(float4 *p) {
-   *p =  (float4)(1.1f, 1.2f, ((float2)(1.3f)));
-// CHECK: store <4 x float> <float 0x3FF19999A0000000, float 0x3FF3333340000000, float 0x3FF4CCCCC0000000, float 0x3FF4CCCCC0000000>
-}
-
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/vector_literals_valid.cl b/src/llvm-project/clang/test/CodeGenOpenCL/vector_literals_valid.cl
index bba5b23..249c95c 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/vector_literals_valid.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/vector_literals_valid.cl
@@ -1,22 +1,65 @@
-// RUN: %clang_cc1 -emit-llvm %s -o %t
+// RUN: %clang_cc1 -emit-llvm %s -o - -O0 | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm %s -o - -cl-std=clc++ -O0 | FileCheck %s
 
-typedef __attribute__(( ext_vector_type(2) ))  int int2;
-typedef __attribute__(( ext_vector_type(3) ))  int int3;
-typedef __attribute__(( ext_vector_type(4) ))  int int4;
-typedef __attribute__(( ext_vector_type(8) ))  int int8;
-typedef __attribute__(( ext_vector_type(4) ))  float float4;
+typedef __attribute__((ext_vector_type(2))) int int2;
+typedef __attribute__((ext_vector_type(3))) int int3;
+typedef __attribute__((ext_vector_type(4)))  int int4;
+typedef __attribute__((ext_vector_type(8)))  int int8;
+typedef __attribute__((ext_vector_type(4))) float float4;
+
+__constant const int4 c1 = (int4)(1, 2, ((int2)(3)));
+// CHECK: constant <4 x i32> <i32 1, i32 2, i32 3, i32 3>
+
+__constant const int4 c2 = (int4)(1, 2, ((int2)(3, 4)));
+// CHECK: constant <4 x i32> <i32 1, i32 2, i32 3, i32 4>
 
 void vector_literals_valid() {
-  int4 a_1_1_1_1 = (int4)(1,2,3,4);
-  int4 a_2_1_1 = (int4)((int2)(1,2),3,4);
-  int4 a_1_2_1 = (int4)(1,(int2)(2,3),4);
-  int4 a_1_1_2 = (int4)(1,2,(int2)(3,4));
-  int4 a_2_2 = (int4)((int2)(1,2),(int2)(3,4));
-  int4 a_3_1 = (int4)((int3)(1,2,3),4);
-  int4 a_1_3 = (int4)(1,(int3)(2,3,4));
+  //CHECK: insertelement <4 x i32> <i32 1, i32 2, i32 undef, i32 undef>, i32 %{{.+}}, i32 2
+  //CHECK: insertelement <4 x i32> %{{.+}}, i32 %{{.+}}, i32 3
+  int4 a_1_1_1_1 = (int4)(1, 2, c1.s2, c2.s3);
+
+  //CHECK: store <2 x i32> <i32 1, i32 2>, <2 x i32>*
+  //CHECK: shufflevector <2 x i32> %{{[0-9]+}}, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  //CHECK: shufflevector <4 x i32> %{{.+}}, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  //CHECK: insertelement <4 x i32> %{{.+}}, i32 3, i32 2
+  //CHECK: insertelement <4 x i32> %{{.+}}, i32 4, i32 3
+  int4 a_2_1_1 = (int4)((int2)(1, 2), 3, 4);
+
+  //CHECK: store <2 x i32> <i32 2, i32 3>, <2 x i32>*
+  //CHECK: shufflevector <2 x i32> %{{[0-9]+}}, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  //CHECK: shufflevector <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>, <4 x i32> %{{.+}}, <4 x i32> <i32 0, i32 4, i32 5, i32 undef>
+  //CHECK: insertelement <4 x i32> %{{.+}}, i32 4, i32 3
+  int4 a_1_2_1 = (int4)(1, (int2)(2, 3), 4);
+
+  //CHECK: store <2 x i32> <i32 3, i32 4>, <2 x i32>*
+  //CHECK: shufflevector <2 x i32> %{{[0-9]+}}, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  //CHECK: shufflevector <4 x i32> <i32 1, i32 2, i32 undef, i32 undef>, <4 x i32> %{{.+}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  int4 a_1_1_2 = (int4)(1, 2, (int2)(3, 4));
+
+  //CHECK: store <2 x i32> <i32 1, i32 2>, <2 x i32>*
+  //CHECK: shufflevector <2 x i32> %{{[0-9]+}}, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  //CHECK: shufflevector <4 x i32> %{{.+}}, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  //CHECK: shufflevector <4 x i32> %{{.+}}, <4 x i32> <i32 3, i32 3, i32 undef, i32 undef>, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  int4 a_2_2 = (int4)((int2)(1, 2), (int2)(3));
+
+  //CHECK: store <4 x i32> <i32 2, i32 3, i32 4, i32 undef>, <4 x i32>*
+  //CHECK: shufflevector <4 x i32> %{{.+}}, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  //CHECK: shufflevector <3 x i32> %{{.+}}, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  //CHECK: shufflevector <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>, <4 x i32> %{{.+}}, <4 x i32> <i32 0, i32 4, i32 5, i32 6>
+  int4 a_1_3 = (int4)(1, (int3)(2, 3, 4));
+
+  //CHECK: store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32>* %a
   int4 a = (int4)(1);
-  int8 b = (int8)(1,2,a.xy,a);
-  float4 V2 = (float4) (1);
+
+  //CHECK: load <4 x i32>, <4 x i32>* %a
+  //CHECK: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+  //CHECK: shufflevector <2 x i32> %{{[0-9]+}}, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  //CHECK: shufflevector <8 x i32> <i32 1, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, <8 x i32> %{{.+}}, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef>
+  //CHECK: load <4 x i32>, <4 x i32>* %a
+  //CHECK: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  //CHECK: shufflevector <8 x i32> %{{.+}}, <8 x i32> %{{.+}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+  int8 b = (int8)(1, 2, a.xy, a);
+
+  //CHECK: store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %V2
+  float4 V2 = (float4)(1);
 }
-
-
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/visibility.cl b/src/llvm-project/clang/test/CodeGenOpenCL/visibility.cl
new file mode 100644
index 0000000..8ce8017
--- /dev/null
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/visibility.cl
@@ -0,0 +1,128 @@
+// RUN: %clang_cc1 -std=cl2.0 -fapply-global-visibility-to-externs -fvisibility default -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck --check-prefix=FVIS-DEFAULT %s
+// RUN: %clang_cc1 -std=cl2.0 -fapply-global-visibility-to-externs -fvisibility protected -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck --check-prefix=FVIS-PROTECTED %s
+// RUN: %clang_cc1 -std=cl2.0 -fapply-global-visibility-to-externs -fvisibility hidden -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck --check-prefix=FVIS-HIDDEN %s
+
+// REQUIRES: amdgpu-registered-target
+
+// FVIS-DEFAULT:  @glob = local_unnamed_addr
+// FVIS-PROTECTED: @glob = protected local_unnamed_addr
+// FVIS-HIDDEN: @glob = hidden local_unnamed_addr
+int glob = 0;
+// FVIS-DEFAULT:  @glob_hidden = hidden local_unnamed_addr
+// FVIS-PROTECTED: @glob_hidden = hidden local_unnamed_addr
+// FVIS-HIDDEN: @glob_hidden = hidden local_unnamed_addr
+__attribute__((visibility("hidden"))) int glob_hidden = 0;
+// FVIS-DEFAULT:  @glob_protected = protected local_unnamed_addr
+// FVIS-PROTECTED: @glob_protected = protected local_unnamed_addr
+// FVIS-HIDDEN: @glob_protected = protected local_unnamed_addr
+__attribute__((visibility("protected"))) int glob_protected = 0;
+// FVIS-DEFAULT:  @glob_default = local_unnamed_addr
+// FVIS-PROTECTED: @glob_default = local_unnamed_addr
+// FVIS-HIDDEN: @glob_default = local_unnamed_addr
+__attribute__((visibility("default"))) int glob_default = 0;
+
+// FVIS-DEFAULT:  @ext = external local_unnamed_addr
+// FVIS-PROTECTED: @ext = external protected local_unnamed_addr
+// FVIS-HIDDEN: @ext = external hidden local_unnamed_addr
+extern int ext;
+// FVIS-DEFAULT:  @ext_hidden = external hidden local_unnamed_addr
+// FVIS-PROTECTED: @ext_hidden = external hidden local_unnamed_addr
+// FVIS-HIDDEN: @ext_hidden = external hidden local_unnamed_addr
+__attribute__((visibility("hidden"))) extern int ext_hidden;
+// FVIS-DEFAULT:  @ext_protected = external protected local_unnamed_addr
+// FVIS-PROTECTED: @ext_protected = external protected local_unnamed_addr
+// FVIS-HIDDEN: @ext_protected = external protected local_unnamed_addr
+__attribute__((visibility("protected"))) extern int ext_protected;
+// FVIS-DEFAULT:  @ext_default = external local_unnamed_addr
+// FVIS-PROTECTED: @ext_default = external local_unnamed_addr
+// FVIS-HIDDEN: @ext_default = external local_unnamed_addr
+__attribute__((visibility("default"))) extern int ext_default;
+
+// FVIS-DEFAULT: define amdgpu_kernel void @kern()
+// FVIS-PROTECTED: define protected amdgpu_kernel void @kern()
+// FVIS-HIDDEN: define protected amdgpu_kernel void @kern()
+kernel void kern() {}
+// FVIS-DEFAULT: define protected amdgpu_kernel void @kern_hidden()
+// FVIS-PROTECTED: define protected amdgpu_kernel void @kern_hidden()
+// FVIS-HIDDEN: define protected amdgpu_kernel void @kern_hidden()
+__attribute__((visibility("hidden"))) kernel void kern_hidden() {}
+// FVIS-DEFAULT: define protected amdgpu_kernel void @kern_protected()
+// FVIS-PROTECTED: define protected amdgpu_kernel void @kern_protected()
+// FVIS-HIDDEN: define protected amdgpu_kernel void @kern_protected()
+__attribute__((visibility("protected"))) kernel void kern_protected() {}
+// FVIS-DEFAULT: define amdgpu_kernel void @kern_default()
+// FVIS-PROTECTED: define amdgpu_kernel void @kern_default()
+// FVIS-HIDDEN: define amdgpu_kernel void @kern_default()
+__attribute__((visibility("default"))) kernel void kern_default() {}
+
+// FVIS-DEFAULT: define void @func()
+// FVIS-PROTECTED: define protected void @func()
+// FVIS-HIDDEN: define hidden void @func()
+void func() {}
+// FVIS-DEFAULT: define hidden void @func_hidden()
+// FVIS-PROTECTED: define hidden void @func_hidden()
+// FVIS-HIDDEN: define hidden void @func_hidden()
+__attribute__((visibility("hidden"))) void func_hidden() {}
+// FVIS-DEFAULT: define protected void @func_protected()
+// FVIS-PROTECTED: define protected void @func_protected()
+// FVIS-HIDDEN: define protected void @func_protected()
+__attribute__((visibility("protected"))) void func_protected() {}
+// FVIS-DEFAULT: define void @func_default()
+// FVIS-PROTECTED: define void @func_default()
+// FVIS-HIDDEN: define void @func_default()
+__attribute__((visibility("default"))) void func_default() {}
+
+extern kernel void ext_kern();
+__attribute__((visibility("hidden"))) extern kernel void ext_kern_hidden();
+__attribute__((visibility("protected"))) extern kernel void ext_kern_protected();
+__attribute__((visibility("default"))) extern kernel void ext_kern_default();
+
+extern void ext_func();
+__attribute__((visibility("hidden"))) extern void ext_func_hidden();
+__attribute__((visibility("protected"))) extern void ext_func_protected();
+__attribute__((visibility("default"))) extern void ext_func_default();
+
+void use() {
+    glob = ext + ext_hidden + ext_protected + ext_default;
+    ext_kern();
+    ext_kern_hidden();
+    ext_kern_protected();
+    ext_kern_default();
+    ext_func();
+    ext_func_hidden();
+    ext_func_protected();
+    ext_func_default();
+}
+
+// FVIS-DEFAULT: declare amdgpu_kernel void @ext_kern()
+// FVIS-PROTECTED: declare protected amdgpu_kernel void @ext_kern()
+// FVIS-HIDDEN: declare protected amdgpu_kernel void @ext_kern()
+
+// FVIS-DEFAULT: declare protected amdgpu_kernel void @ext_kern_hidden()
+// FVIS-PROTECTED: declare protected amdgpu_kernel void @ext_kern_hidden()
+// FVIS-HIDDEN: declare protected amdgpu_kernel void @ext_kern_hidden()
+
+// FVIS-DEFAULT: declare protected amdgpu_kernel void @ext_kern_protected()
+// FVIS-PROTECTED: declare protected amdgpu_kernel void @ext_kern_protected()
+// FVIS-HIDDEN: declare protected amdgpu_kernel void @ext_kern_protected()
+
+// FVIS-DEFAULT: declare amdgpu_kernel void @ext_kern_default()
+// FVIS-PROTECTED: declare amdgpu_kernel void @ext_kern_default()
+// FVIS-HIDDEN: declare amdgpu_kernel void @ext_kern_default()
+
+
+// FVIS-DEFAULT: declare void @ext_func()
+// FVIS-PROTECTED: declare protected void @ext_func()
+// FVIS-HIDDEN: declare hidden void @ext_func()
+
+// FVIS-DEFAULT: declare hidden void @ext_func_hidden()
+// FVIS-PROTECTED: declare hidden void @ext_func_hidden()
+// FVIS-HIDDEN: declare hidden void @ext_func_hidden()
+
+// FVIS-DEFAULT: declare protected void @ext_func_protected()
+// FVIS-PROTECTED: declare protected void @ext_func_protected()
+// FVIS-HIDDEN: declare protected void @ext_func_protected()
+
+// FVIS-DEFAULT: declare void @ext_func_default()
+// FVIS-PROTECTED: declare void @ext_func_default()
+// FVIS-HIDDEN: declare void @ext_func_default()