Importing rustc-1.38.0
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl b/src/llvm-project/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
index 6f923b7..cdbf28b 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
@@ -43,7 +43,7 @@
struct LargeStructOneMember g_s;
#endif
-// X86-LABEL: define void @foo(%struct.Mat4X4* noalias sret %agg.result, %struct.Mat3X3* byval align 4 %in)
+// X86-LABEL: define void @foo(%struct.Mat4X4* noalias sret %agg.result, %struct.Mat3X3* byval(%struct.Mat3X3) align 4 %in)
// AMDGCN-LABEL: define %struct.Mat4X4 @foo([9 x i32] %in.coerce)
Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) {
Mat4X4 out;
@@ -63,8 +63,8 @@
out[0] = foo(in[1]);
}
-// X86-LABEL: define void @foo_large(%struct.Mat64X64* noalias sret %agg.result, %struct.Mat32X32* byval align 4 %in)
-// AMDGCN-LABEL: define void @foo_large(%struct.Mat64X64 addrspace(5)* noalias sret %agg.result, %struct.Mat32X32 addrspace(5)* byval align 4 %in)
+// X86-LABEL: define void @foo_large(%struct.Mat64X64* noalias sret %agg.result, %struct.Mat32X32* byval(%struct.Mat32X32) align 4 %in)
+// AMDGCN-LABEL: define void @foo_large(%struct.Mat64X64 addrspace(5)* noalias sret %agg.result, %struct.Mat32X32 addrspace(5)* byval(%struct.Mat32X32) align 4 %in)
Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) {
Mat64X64 out;
return out;
@@ -86,7 +86,7 @@
u.x = (int2)(0, 0);
}
-// AMDGCN-LABEL: define void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %u)
+// AMDGCN-LABEL: define void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval(%struct.LargeStructOneMember) align 8 %u)
// AMDGCN-NOT: addrspacecast
// AMDGCN: store <2 x i32> %{{.*}}, <2 x i32> addrspace(5)*
void FuncOneLargeMember(struct LargeStructOneMember u) {
@@ -97,7 +97,7 @@
// AMDGCN20: %[[byval_temp:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
// AMDGCN20: %[[r0:.*]] = bitcast %struct.LargeStructOneMember addrspace(5)* %[[byval_temp]] to i8 addrspace(5)*
// AMDGCN20: call void @llvm.memcpy.p5i8.p1i8.i64(i8 addrspace(5)* align 8 %[[r0]], i8 addrspace(1)* align 8 bitcast (%struct.LargeStructOneMember addrspace(1)* @g_s to i8 addrspace(1)*), i64 800, i1 false)
-// AMDGCN20: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %[[byval_temp]])
+// AMDGCN20: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval(%struct.LargeStructOneMember) align 8 %[[byval_temp]])
#if __OPENCL_C_VERSION__ >= 200
void test_indirect_arg_globl(void) {
FuncOneLargeMember(g_s);
@@ -108,7 +108,7 @@
// AMDGCN: %[[byval_temp:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
// AMDGCN: %[[r0:.*]] = bitcast %struct.LargeStructOneMember addrspace(5)* %[[byval_temp]] to i8 addrspace(5)*
// AMDGCN: call void @llvm.memcpy.p5i8.p3i8.i64(i8 addrspace(5)* align 8 %[[r0]], i8 addrspace(3)* align 8 bitcast (%struct.LargeStructOneMember addrspace(3)* @test_indirect_arg_local.l_s to i8 addrspace(3)*), i64 800, i1 false)
-// AMDGCN: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %[[byval_temp]])
+// AMDGCN: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval(%struct.LargeStructOneMember) align 8 %[[byval_temp]])
kernel void test_indirect_arg_local(void) {
local struct LargeStructOneMember l_s;
FuncOneLargeMember(l_s);
@@ -117,7 +117,7 @@
// AMDGCN-LABEL: define void @test_indirect_arg_private()
// AMDGCN: %[[p_s:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
// AMDGCN-NOT: @llvm.memcpy
-// AMDGCN-NEXT: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %[[p_s]])
+// AMDGCN-NEXT: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval(%struct.LargeStructOneMember) align 8 %[[p_s]])
void test_indirect_arg_private(void) {
struct LargeStructOneMember p_s;
FuncOneLargeMember(p_s);
@@ -142,7 +142,7 @@
// AMDGCN-LABEL: define amdgpu_kernel void @KernelLargeOneMember(
// AMDGCN: %[[U:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
// AMDGCN: store %struct.LargeStructOneMember %u.coerce, %struct.LargeStructOneMember addrspace(5)* %[[U]], align 8
-// AMDGCN: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %[[U]])
+// AMDGCN: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval(%struct.LargeStructOneMember) align 8 %[[U]])
kernel void KernelLargeOneMember(struct LargeStructOneMember u) {
FuncOneLargeMember(u);
}
@@ -152,7 +152,7 @@
u.y = (int2)(0, 0);
}
-// AMDGCN-LABEL: define void @FuncLargeTwoMember(%struct.LargeStructTwoMember addrspace(5)* byval align 8 %u)
+// AMDGCN-LABEL: define void @FuncLargeTwoMember(%struct.LargeStructTwoMember addrspace(5)* byval(%struct.LargeStructTwoMember) align 8 %u)
void FuncLargeTwoMember(struct LargeStructTwoMember u) {
u.y[0] = (int2)(0, 0);
}
@@ -171,7 +171,7 @@
// AMDGCN-SAME: (%struct.LargeStructTwoMember %[[u_coerce:.*]])
// AMDGCN: %[[u:.*]] = alloca %struct.LargeStructTwoMember, align 8, addrspace(5)
// AMDGCN: store %struct.LargeStructTwoMember %[[u_coerce]], %struct.LargeStructTwoMember addrspace(5)* %[[u]]
-// AMDGCN: call void @FuncLargeTwoMember(%struct.LargeStructTwoMember addrspace(5)* byval align 8 %[[u]])
+// AMDGCN: call void @FuncLargeTwoMember(%struct.LargeStructTwoMember addrspace(5)* byval(%struct.LargeStructTwoMember) align 8 %[[u]])
kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
FuncLargeTwoMember(u);
}
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/address-spaces-conversions.cl b/src/llvm-project/clang/test/CodeGenOpenCL/address-spaces-conversions.cl
index c947db4..52feccc 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/address-spaces-conversions.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/address-spaces-conversions.cl
@@ -13,7 +13,7 @@
// CHECK-NOFAKE-NOT: addrspacecast
arg_gen = &var_priv; // implicit cast with obtaining adr, private -> generic
- // CHECK: %{{[0-9]+}} = addrspacecast i32* %var_priv to i32 addrspace(4)*
+ // CHECK: %{{[._a-z0-9]+}} = addrspacecast i32* %{{[._a-z0-9]+}} to i32 addrspace(4)*
// CHECK-NOFAKE-NOT: addrspacecast
arg_glob = (global int *)arg_gen; // explicit cast
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/address-spaces-mangling.cl b/src/llvm-project/clang/test/CodeGenOpenCL/address-spaces-mangling.cl
index b6e6b87..50622f0 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/address-spaces-mangling.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/address-spaces-mangling.cl
@@ -1,7 +1,7 @@
-// RUN: %clang_cc1 %s -ffake-address-space-map -faddress-space-map-mangling=yes -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes=ASMANG,ASMAN10 %s
-// RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -faddress-space-map-mangling=yes -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes=ASMANG,ASMAN20 %s
-// RUN: %clang_cc1 %s -ffake-address-space-map -faddress-space-map-mangling=no -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes=NOASMANG,NOASMAN10 %s
-// RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -faddress-space-map-mangling=no -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes=NOASMANG,NOASMAN20 %s
+// RUN: %clang_cc1 %s -ffake-address-space-map -faddress-space-map-mangling=yes -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes="ASMANG,ASMANG10" %s
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -faddress-space-map-mangling=yes -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes="ASMANG,ASMANG20" %s
+// RUN: %clang_cc1 %s -ffake-address-space-map -faddress-space-map-mangling=no -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes="NOASMANG,NOASMANG10" %s
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -faddress-space-map-mangling=no -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes="NOASMANG,NOASMANG20" %s
// We check that the address spaces are mangled the same in both version of OpenCL
// RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL2.0 -emit-llvm -o - | FileCheck -check-prefix=OCL-20 %s
@@ -14,7 +14,7 @@
void ff(int *arg) { }
// ASMANG10: @_Z2ffPi
// ASMANG20: @_Z2ffPU3AS4i
-// NOASMANG10: @_Z2ffPi
+// NOASMANG10: @_Z2ffPU9CLprivatei
// NOASMANG20: @_Z2ffPU9CLgenerici
// OCL-20-DAG: @_Z2ffPU3AS4i
// OCL-12-DAG: @_Z2ffPi
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl b/src/llvm-project/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl
index 59f38f8..7216cb5 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl
@@ -42,7 +42,7 @@
// CL20: store i32* %[[r0]], i32* addrspace(5)* %lp1, align 8
int *lp1 = &lv1;
- // CHECK: %[[arraydecay:.*]] = getelementptr inbounds [100 x i32], [100 x i32] addrspace(5)* %la, i32 0, i32 0
+ // CHECK: %[[arraydecay:.*]] = getelementptr inbounds [100 x i32], [100 x i32] addrspace(5)* %la, i64 0, i64 0
// CL12: store i32 addrspace(5)* %[[arraydecay]], i32 addrspace(5)* addrspace(5)* %lp2, align 4
// CL20: %[[r1:.*]] = addrspacecast i32 addrspace(5)* %[[arraydecay]] to i32*
// CL20: store i32* %[[r1]], i32* addrspace(5)* %lp2, align 8
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/src/llvm-project/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index aec00e7..0a7f289 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -309,7 +309,7 @@
// CHECK: void @func_different_size_type_pair_arg(i64 %arg1.coerce0, i32 %arg1.coerce1)
void func_different_size_type_pair_arg(different_size_type_pair arg1) { }
-// CHECK: void @func_flexible_array_arg(%struct.flexible_array addrspace(5)* byval nocapture align 4 %arg)
+// CHECK: void @func_flexible_array_arg(%struct.flexible_array addrspace(5)* nocapture byval(%struct.flexible_array) align 4 %arg)
void func_flexible_array_arg(flexible_array arg) { }
// CHECK: define float @func_f32_ret()
@@ -450,11 +450,11 @@
// CHECK: define void @func_reg_state_lo(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %s.coerce0, float %s.coerce1, i32 %s.coerce2)
void func_reg_state_lo(int4 arg0, int4 arg1, int4 arg2, int arg3, struct_arg_t s) { }
-// CHECK: define void @func_reg_state_hi(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %arg4, %struct.struct_arg addrspace(5)* byval nocapture align 4 %s)
+// CHECK: define void @func_reg_state_hi(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %arg4, %struct.struct_arg addrspace(5)* nocapture byval(%struct.struct_arg) align 4 %s)
void func_reg_state_hi(int4 arg0, int4 arg1, int4 arg2, int arg3, int arg4, struct_arg_t s) { }
// XXX - Why don't the inner structs flatten?
-// CHECK: define void @func_reg_state_num_regs_nested_struct(<4 x i32> %arg0, i32 %arg1, i32 %arg2.coerce0, %struct.nested %arg2.coerce1, i32 %arg3.coerce0, %struct.nested %arg3.coerce1, %struct.num_regs_nested_struct addrspace(5)* byval nocapture align 8 %arg4)
+// CHECK: define void @func_reg_state_num_regs_nested_struct(<4 x i32> %arg0, i32 %arg1, i32 %arg2.coerce0, %struct.nested %arg2.coerce1, i32 %arg3.coerce0, %struct.nested %arg3.coerce1, %struct.num_regs_nested_struct addrspace(5)* nocapture byval(%struct.num_regs_nested_struct) align 8 %arg4)
void func_reg_state_num_regs_nested_struct(int4 arg0, int arg1, num_regs_nested_struct arg2, num_regs_nested_struct arg3, num_regs_nested_struct arg4) { }
// CHECK: define void @func_double_nested_struct_arg(<4 x i32> %arg0, i32 %arg1, i32 %arg2.coerce0, %struct.double_nested %arg2.coerce1, i16 %arg2.coerce2)
@@ -469,7 +469,7 @@
// CHECK: define void @func_large_struct_padding_arg_direct(i8 %arg.coerce0, i32 %arg.coerce1, i8 %arg.coerce2, i32 %arg.coerce3, i8 %arg.coerce4, i8 %arg.coerce5, i16 %arg.coerce6, i16 %arg.coerce7, [3 x i8] %arg.coerce8, i64 %arg.coerce9, i32 %arg.coerce10, i8 %arg.coerce11, i32 %arg.coerce12, i16 %arg.coerce13, i8 %arg.coerce14)
void func_large_struct_padding_arg_direct(large_struct_padding arg) { }
-// CHECK: define void @func_large_struct_padding_arg_store(%struct.large_struct_padding addrspace(1)* nocapture %out, %struct.large_struct_padding addrspace(5)* byval nocapture readonly align 8 %arg)
+// CHECK: define void @func_large_struct_padding_arg_store(%struct.large_struct_padding addrspace(1)* nocapture %out, %struct.large_struct_padding addrspace(5)* nocapture readonly byval(%struct.large_struct_padding) align 8 %arg)
void func_large_struct_padding_arg_store(global large_struct_padding* out, large_struct_padding arg) {
*out = arg;
}
@@ -487,7 +487,7 @@
void v4i16_reg_count(short4 arg0, short4 arg1, short4 arg2, short4 arg3,
short4 arg4, short4 arg5, struct_4regs arg6) { }
-// CHECK: define void @v4i16_pair_reg_count_over(<4 x i16> %arg0, <4 x i16> %arg1, <4 x i16> %arg2, <4 x i16> %arg3, <4 x i16> %arg4, <4 x i16> %arg5, <4 x i16> %arg6, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg7)
+// CHECK: define void @v4i16_pair_reg_count_over(<4 x i16> %arg0, <4 x i16> %arg1, <4 x i16> %arg2, <4 x i16> %arg3, <4 x i16> %arg4, <4 x i16> %arg5, <4 x i16> %arg6, %struct.struct_4regs addrspace(5)* nocapture byval(%struct.struct_4regs) align 4 %arg7)
void v4i16_pair_reg_count_over(short4 arg0, short4 arg1, short4 arg2, short4 arg3,
short4 arg4, short4 arg5, short4 arg6, struct_4regs arg7) { }
@@ -495,7 +495,7 @@
void v3i16_reg_count(short3 arg0, short3 arg1, short3 arg2, short3 arg3,
short3 arg4, short3 arg5, struct_4regs arg6) { }
-// CHECK: define void @v3i16_reg_count_over(<3 x i16> %arg0, <3 x i16> %arg1, <3 x i16> %arg2, <3 x i16> %arg3, <3 x i16> %arg4, <3 x i16> %arg5, <3 x i16> %arg6, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg7)
+// CHECK: define void @v3i16_reg_count_over(<3 x i16> %arg0, <3 x i16> %arg1, <3 x i16> %arg2, <3 x i16> %arg3, <3 x i16> %arg4, <3 x i16> %arg5, <3 x i16> %arg6, %struct.struct_4regs addrspace(5)* nocapture byval(%struct.struct_4regs) align 4 %arg7)
void v3i16_reg_count_over(short3 arg0, short3 arg1, short3 arg2, short3 arg3,
short3 arg4, short3 arg5, short3 arg6, struct_4regs arg7) { }
@@ -505,7 +505,7 @@
short2 arg8, short2 arg9, short2 arg10, short2 arg11,
struct_4regs arg13) { }
-// CHECK: define void @v2i16_reg_count_over(<2 x i16> %arg0, <2 x i16> %arg1, <2 x i16> %arg2, <2 x i16> %arg3, <2 x i16> %arg4, <2 x i16> %arg5, <2 x i16> %arg6, <2 x i16> %arg7, <2 x i16> %arg8, <2 x i16> %arg9, <2 x i16> %arg10, <2 x i16> %arg11, <2 x i16> %arg12, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg13)
+// CHECK: define void @v2i16_reg_count_over(<2 x i16> %arg0, <2 x i16> %arg1, <2 x i16> %arg2, <2 x i16> %arg3, <2 x i16> %arg4, <2 x i16> %arg5, <2 x i16> %arg6, <2 x i16> %arg7, <2 x i16> %arg8, <2 x i16> %arg9, <2 x i16> %arg10, <2 x i16> %arg11, <2 x i16> %arg12, %struct.struct_4regs addrspace(5)* nocapture byval(%struct.struct_4regs) align 4 %arg13)
void v2i16_reg_count_over(short2 arg0, short2 arg1, short2 arg2, short2 arg3,
short2 arg4, short2 arg5, short2 arg6, short2 arg7,
short2 arg8, short2 arg9, short2 arg10, short2 arg11,
@@ -515,7 +515,7 @@
void v2i8_reg_count(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
char2 arg4, char2 arg5, struct_4regs arg6) { }
-// CHECK: define void @v2i8_reg_count_over(<2 x i8> %arg0, <2 x i8> %arg1, <2 x i8> %arg2, <2 x i8> %arg3, <2 x i8> %arg4, <2 x i8> %arg5, i32 %arg6, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg7)
+// CHECK: define void @v2i8_reg_count_over(<2 x i8> %arg0, <2 x i8> %arg1, <2 x i8> %arg2, <2 x i8> %arg3, <2 x i8> %arg4, <2 x i8> %arg5, i32 %arg6, %struct.struct_4regs addrspace(5)* nocapture byval(%struct.struct_4regs) align 4 %arg7)
void v2i8_reg_count_over(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
char2 arg4, char2 arg5, int arg6, struct_4regs arg7) { }
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/amdgpu-alignment.cl b/src/llvm-project/clang/test/CodeGenOpenCL/amdgpu-alignment.cl
index b5dc47a..3241da6 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/amdgpu-alignment.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/amdgpu-alignment.cl
@@ -92,48 +92,48 @@
// CHECK-LABEL: @local_memory_alignment_global(
-// CHECK: store volatile i8 0, i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @local_memory_alignment_global.lds_i8, i32 0, i32 0), align 1
-// CHECK: store volatile <2 x i8> zeroinitializer, <2 x i8> addrspace(3)* getelementptr inbounds ([4 x <2 x i8>], [4 x <2 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v2i8, i32 0, i32 0), align 2
+// CHECK: store volatile i8 0, i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @local_memory_alignment_global.lds_i8, i64 0, i64 0), align 1
+// CHECK: store volatile <2 x i8> zeroinitializer, <2 x i8> addrspace(3)* getelementptr inbounds ([4 x <2 x i8>], [4 x <2 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v2i8, i64 0, i64 0), align 2
// CHECK: store volatile <4 x i8> <i8 0, i8 0, i8 0, i8 undef>, <4 x i8> addrspace(3)* bitcast ([4 x <3 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v3i8 to <4 x i8> addrspace(3)*), align 4
-// CHECK: store volatile <4 x i8> zeroinitializer, <4 x i8> addrspace(3)* getelementptr inbounds ([4 x <4 x i8>], [4 x <4 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v4i8, i32 0, i32 0), align 4
-// CHECK: store volatile <8 x i8> zeroinitializer, <8 x i8> addrspace(3)* getelementptr inbounds ([4 x <8 x i8>], [4 x <8 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v8i8, i32 0, i32 0), align 8
-// CHECK: store volatile <16 x i8> zeroinitializer, <16 x i8> addrspace(3)* getelementptr inbounds ([4 x <16 x i8>], [4 x <16 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v16i8, i32 0, i32 0), align 16
-// CHECK: store volatile i16 0, i16 addrspace(3)* getelementptr inbounds ([4 x i16], [4 x i16] addrspace(3)* @local_memory_alignment_global.lds_i16, i32 0, i32 0), align 2
-// CHECK: store volatile <2 x i16> zeroinitializer, <2 x i16> addrspace(3)* getelementptr inbounds ([4 x <2 x i16>], [4 x <2 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v2i16, i32 0, i32 0), align 4
+// CHECK: store volatile <4 x i8> zeroinitializer, <4 x i8> addrspace(3)* getelementptr inbounds ([4 x <4 x i8>], [4 x <4 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v4i8, i64 0, i64 0), align 4
+// CHECK: store volatile <8 x i8> zeroinitializer, <8 x i8> addrspace(3)* getelementptr inbounds ([4 x <8 x i8>], [4 x <8 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v8i8, i64 0, i64 0), align 8
+// CHECK: store volatile <16 x i8> zeroinitializer, <16 x i8> addrspace(3)* getelementptr inbounds ([4 x <16 x i8>], [4 x <16 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v16i8, i64 0, i64 0), align 16
+// CHECK: store volatile i16 0, i16 addrspace(3)* getelementptr inbounds ([4 x i16], [4 x i16] addrspace(3)* @local_memory_alignment_global.lds_i16, i64 0, i64 0), align 2
+// CHECK: store volatile <2 x i16> zeroinitializer, <2 x i16> addrspace(3)* getelementptr inbounds ([4 x <2 x i16>], [4 x <2 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v2i16, i64 0, i64 0), align 4
// CHECK: store volatile <4 x i16> <i16 0, i16 0, i16 0, i16 undef>, <4 x i16> addrspace(3)* bitcast ([4 x <3 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v3i16 to <4 x i16> addrspace(3)*), align 8
-// CHECK: store volatile <4 x i16> zeroinitializer, <4 x i16> addrspace(3)* getelementptr inbounds ([4 x <4 x i16>], [4 x <4 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v4i16, i32 0, i32 0), align 8
-// CHECK: store volatile <8 x i16> zeroinitializer, <8 x i16> addrspace(3)* getelementptr inbounds ([4 x <8 x i16>], [4 x <8 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v8i16, i32 0, i32 0), align 16
-// CHECK: store volatile <16 x i16> zeroinitializer, <16 x i16> addrspace(3)* getelementptr inbounds ([4 x <16 x i16>], [4 x <16 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v16i16, i32 0, i32 0), align 32
-// CHECK: store volatile i32 0, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @local_memory_alignment_global.lds_i32, i32 0, i32 0), align 4
-// CHECK: store volatile <2 x i32> zeroinitializer, <2 x i32> addrspace(3)* getelementptr inbounds ([4 x <2 x i32>], [4 x <2 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v2i32, i32 0, i32 0), align 8
+// CHECK: store volatile <4 x i16> zeroinitializer, <4 x i16> addrspace(3)* getelementptr inbounds ([4 x <4 x i16>], [4 x <4 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v4i16, i64 0, i64 0), align 8
+// CHECK: store volatile <8 x i16> zeroinitializer, <8 x i16> addrspace(3)* getelementptr inbounds ([4 x <8 x i16>], [4 x <8 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v8i16, i64 0, i64 0), align 16
+// CHECK: store volatile <16 x i16> zeroinitializer, <16 x i16> addrspace(3)* getelementptr inbounds ([4 x <16 x i16>], [4 x <16 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v16i16, i64 0, i64 0), align 32
+// CHECK: store volatile i32 0, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @local_memory_alignment_global.lds_i32, i64 0, i64 0), align 4
+// CHECK: store volatile <2 x i32> zeroinitializer, <2 x i32> addrspace(3)* getelementptr inbounds ([4 x <2 x i32>], [4 x <2 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v2i32, i64 0, i64 0), align 8
// CHECK: store volatile <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, <4 x i32> addrspace(3)* bitcast ([4 x <3 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v3i32 to <4 x i32> addrspace(3)*), align 16
-// CHECK: store volatile <4 x i32> zeroinitializer, <4 x i32> addrspace(3)* getelementptr inbounds ([4 x <4 x i32>], [4 x <4 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v4i32, i32 0, i32 0), align 16
-// CHECK: store volatile <8 x i32> zeroinitializer, <8 x i32> addrspace(3)* getelementptr inbounds ([4 x <8 x i32>], [4 x <8 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v8i32, i32 0, i32 0), align 32
-// CHECK: store volatile <16 x i32> zeroinitializer, <16 x i32> addrspace(3)* getelementptr inbounds ([4 x <16 x i32>], [4 x <16 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v16i32, i32 0, i32 0), align 64
-// CHECK: store volatile i64 0, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @local_memory_alignment_global.lds_i64, i32 0, i32 0), align 8
-// CHECK: store volatile <2 x i64> zeroinitializer, <2 x i64> addrspace(3)* getelementptr inbounds ([4 x <2 x i64>], [4 x <2 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v2i64, i32 0, i32 0), align 16
+// CHECK: store volatile <4 x i32> zeroinitializer, <4 x i32> addrspace(3)* getelementptr inbounds ([4 x <4 x i32>], [4 x <4 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v4i32, i64 0, i64 0), align 16
+// CHECK: store volatile <8 x i32> zeroinitializer, <8 x i32> addrspace(3)* getelementptr inbounds ([4 x <8 x i32>], [4 x <8 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v8i32, i64 0, i64 0), align 32
+// CHECK: store volatile <16 x i32> zeroinitializer, <16 x i32> addrspace(3)* getelementptr inbounds ([4 x <16 x i32>], [4 x <16 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v16i32, i64 0, i64 0), align 64
+// CHECK: store volatile i64 0, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @local_memory_alignment_global.lds_i64, i64 0, i64 0), align 8
+// CHECK: store volatile <2 x i64> zeroinitializer, <2 x i64> addrspace(3)* getelementptr inbounds ([4 x <2 x i64>], [4 x <2 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v2i64, i64 0, i64 0), align 16
// CHECK: store volatile <4 x i64> <i64 0, i64 0, i64 0, i64 undef>, <4 x i64> addrspace(3)* bitcast ([4 x <3 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v3i64 to <4 x i64> addrspace(3)*), align 32
-// CHECK: store volatile <4 x i64> zeroinitializer, <4 x i64> addrspace(3)* getelementptr inbounds ([4 x <4 x i64>], [4 x <4 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v4i64, i32 0, i32 0), align 32
-// CHECK: store volatile <8 x i64> zeroinitializer, <8 x i64> addrspace(3)* getelementptr inbounds ([4 x <8 x i64>], [4 x <8 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v8i64, i32 0, i32 0), align 64
-// CHECK: store volatile <16 x i64> zeroinitializer, <16 x i64> addrspace(3)* getelementptr inbounds ([4 x <16 x i64>], [4 x <16 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v16i64, i32 0, i32 0), align 128
-// CHECK: store volatile half 0xH0000, half addrspace(3)* getelementptr inbounds ([4 x half], [4 x half] addrspace(3)* @local_memory_alignment_global.lds_f16, i32 0, i32 0), align 2
-// CHECK: store volatile <2 x half> zeroinitializer, <2 x half> addrspace(3)* getelementptr inbounds ([4 x <2 x half>], [4 x <2 x half>] addrspace(3)* @local_memory_alignment_global.lds_v2f16, i32 0, i32 0), align 4
+// CHECK: store volatile <4 x i64> zeroinitializer, <4 x i64> addrspace(3)* getelementptr inbounds ([4 x <4 x i64>], [4 x <4 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v4i64, i64 0, i64 0), align 32
+// CHECK: store volatile <8 x i64> zeroinitializer, <8 x i64> addrspace(3)* getelementptr inbounds ([4 x <8 x i64>], [4 x <8 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v8i64, i64 0, i64 0), align 64
+// CHECK: store volatile <16 x i64> zeroinitializer, <16 x i64> addrspace(3)* getelementptr inbounds ([4 x <16 x i64>], [4 x <16 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v16i64, i64 0, i64 0), align 128
+// CHECK: store volatile half 0xH0000, half addrspace(3)* getelementptr inbounds ([4 x half], [4 x half] addrspace(3)* @local_memory_alignment_global.lds_f16, i64 0, i64 0), align 2
+// CHECK: store volatile <2 x half> zeroinitializer, <2 x half> addrspace(3)* getelementptr inbounds ([4 x <2 x half>], [4 x <2 x half>] addrspace(3)* @local_memory_alignment_global.lds_v2f16, i64 0, i64 0), align 4
// CHECK: store volatile <4 x half> <half 0xH0000, half 0xH0000, half 0xH0000, half undef>, <4 x half> addrspace(3)* bitcast ([4 x <3 x half>] addrspace(3)* @local_memory_alignment_global.lds_v3f16 to <4 x half> addrspace(3)*), align 8
-// CHECK: store volatile <4 x half> zeroinitializer, <4 x half> addrspace(3)* getelementptr inbounds ([4 x <4 x half>], [4 x <4 x half>] addrspace(3)* @local_memory_alignment_global.lds_v4f16, i32 0, i32 0), align 8
-// CHECK: store volatile <8 x half> zeroinitializer, <8 x half> addrspace(3)* getelementptr inbounds ([4 x <8 x half>], [4 x <8 x half>] addrspace(3)* @local_memory_alignment_global.lds_v8f16, i32 0, i32 0), align 16
-// CHECK: store volatile <16 x half> zeroinitializer, <16 x half> addrspace(3)* getelementptr inbounds ([4 x <16 x half>], [4 x <16 x half>] addrspace(3)* @local_memory_alignment_global.lds_v16f16, i32 0, i32 0), align 32
-// CHECK: store volatile float 0.000000e+00, float addrspace(3)* getelementptr inbounds ([4 x float], [4 x float] addrspace(3)* @local_memory_alignment_global.lds_f32, i32 0, i32 0), align 4
-// CHECK: store volatile <2 x float> zeroinitializer, <2 x float> addrspace(3)* getelementptr inbounds ([4 x <2 x float>], [4 x <2 x float>] addrspace(3)* @local_memory_alignment_global.lds_v2f32, i32 0, i32 0), align 8
+// CHECK: store volatile <4 x half> zeroinitializer, <4 x half> addrspace(3)* getelementptr inbounds ([4 x <4 x half>], [4 x <4 x half>] addrspace(3)* @local_memory_alignment_global.lds_v4f16, i64 0, i64 0), align 8
+// CHECK: store volatile <8 x half> zeroinitializer, <8 x half> addrspace(3)* getelementptr inbounds ([4 x <8 x half>], [4 x <8 x half>] addrspace(3)* @local_memory_alignment_global.lds_v8f16, i64 0, i64 0), align 16
+// CHECK: store volatile <16 x half> zeroinitializer, <16 x half> addrspace(3)* getelementptr inbounds ([4 x <16 x half>], [4 x <16 x half>] addrspace(3)* @local_memory_alignment_global.lds_v16f16, i64 0, i64 0), align 32
+// CHECK: store volatile float 0.000000e+00, float addrspace(3)* getelementptr inbounds ([4 x float], [4 x float] addrspace(3)* @local_memory_alignment_global.lds_f32, i64 0, i64 0), align 4
+// CHECK: store volatile <2 x float> zeroinitializer, <2 x float> addrspace(3)* getelementptr inbounds ([4 x <2 x float>], [4 x <2 x float>] addrspace(3)* @local_memory_alignment_global.lds_v2f32, i64 0, i64 0), align 8
// CHECK: store volatile <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>, <4 x float> addrspace(3)* bitcast ([4 x <3 x float>] addrspace(3)* @local_memory_alignment_global.lds_v3f32 to <4 x float> addrspace(3)*), align 16
-// CHECK: store volatile <4 x float> zeroinitializer, <4 x float> addrspace(3)* getelementptr inbounds ([4 x <4 x float>], [4 x <4 x float>] addrspace(3)* @local_memory_alignment_global.lds_v4f32, i32 0, i32 0), align 16
-// CHECK: store volatile <8 x float> zeroinitializer, <8 x float> addrspace(3)* getelementptr inbounds ([4 x <8 x float>], [4 x <8 x float>] addrspace(3)* @local_memory_alignment_global.lds_v8f32, i32 0, i32 0), align 32
-// CHECK: store volatile <16 x float> zeroinitializer, <16 x float> addrspace(3)* getelementptr inbounds ([4 x <16 x float>], [4 x <16 x float>] addrspace(3)* @local_memory_alignment_global.lds_v16f32, i32 0, i32 0), align 64
-// CHECK: store volatile double 0.000000e+00, double addrspace(3)* getelementptr inbounds ([4 x double], [4 x double] addrspace(3)* @local_memory_alignment_global.lds_f64, i32 0, i32 0), align 8
-// CHECK: store volatile <2 x double> zeroinitializer, <2 x double> addrspace(3)* getelementptr inbounds ([4 x <2 x double>], [4 x <2 x double>] addrspace(3)* @local_memory_alignment_global.lds_v2f64, i32 0, i32 0), align 16
+// CHECK: store volatile <4 x float> zeroinitializer, <4 x float> addrspace(3)* getelementptr inbounds ([4 x <4 x float>], [4 x <4 x float>] addrspace(3)* @local_memory_alignment_global.lds_v4f32, i64 0, i64 0), align 16
+// CHECK: store volatile <8 x float> zeroinitializer, <8 x float> addrspace(3)* getelementptr inbounds ([4 x <8 x float>], [4 x <8 x float>] addrspace(3)* @local_memory_alignment_global.lds_v8f32, i64 0, i64 0), align 32
+// CHECK: store volatile <16 x float> zeroinitializer, <16 x float> addrspace(3)* getelementptr inbounds ([4 x <16 x float>], [4 x <16 x float>] addrspace(3)* @local_memory_alignment_global.lds_v16f32, i64 0, i64 0), align 64
+// CHECK: store volatile double 0.000000e+00, double addrspace(3)* getelementptr inbounds ([4 x double], [4 x double] addrspace(3)* @local_memory_alignment_global.lds_f64, i64 0, i64 0), align 8
+// CHECK: store volatile <2 x double> zeroinitializer, <2 x double> addrspace(3)* getelementptr inbounds ([4 x <2 x double>], [4 x <2 x double>] addrspace(3)* @local_memory_alignment_global.lds_v2f64, i64 0, i64 0), align 16
// CHECK: store volatile <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double undef>, <4 x double> addrspace(3)* bitcast ([4 x <3 x double>] addrspace(3)* @local_memory_alignment_global.lds_v3f64 to <4 x double> addrspace(3)*), align 32
-// CHECK: store volatile <4 x double> zeroinitializer, <4 x double> addrspace(3)* getelementptr inbounds ([4 x <4 x double>], [4 x <4 x double>] addrspace(3)* @local_memory_alignment_global.lds_v4f64, i32 0, i32 0), align 32
-// CHECK: store volatile <8 x double> zeroinitializer, <8 x double> addrspace(3)* getelementptr inbounds ([4 x <8 x double>], [4 x <8 x double>] addrspace(3)* @local_memory_alignment_global.lds_v8f64, i32 0, i32 0), align 64
-// CHECK: store volatile <16 x double> zeroinitializer, <16 x double> addrspace(3)* getelementptr inbounds ([4 x <16 x double>], [4 x <16 x double>] addrspace(3)* @local_memory_alignment_global.lds_v16f64, i32 0, i32 0), align 128
+// CHECK: store volatile <4 x double> zeroinitializer, <4 x double> addrspace(3)* getelementptr inbounds ([4 x <4 x double>], [4 x <4 x double>] addrspace(3)* @local_memory_alignment_global.lds_v4f64, i64 0, i64 0), align 32
+// CHECK: store volatile <8 x double> zeroinitializer, <8 x double> addrspace(3)* getelementptr inbounds ([4 x <8 x double>], [4 x <8 x double>] addrspace(3)* @local_memory_alignment_global.lds_v8f64, i64 0, i64 0), align 64
+// CHECK: store volatile <16 x double> zeroinitializer, <16 x double> addrspace(3)* getelementptr inbounds ([4 x <16 x double>], [4 x <16 x double>] addrspace(3)* @local_memory_alignment_global.lds_v16f64, i64 0, i64 0), align 128
kernel void local_memory_alignment_global()
{
volatile local char lds_i8[4];
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/amdgpu-attrs.cl b/src/llvm-project/clang/test/CodeGenOpenCL/amdgpu-attrs.cl
index ad13a2c..ba4322f 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/amdgpu-attrs.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/amdgpu-attrs.cl
@@ -158,30 +158,30 @@
// CHECK-NOT: "amdgpu-num-sgpr"="0"
// CHECK-NOT: "amdgpu-num-vgpr"="0"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_64_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="64,64" "amdgpu-implicitarg-num-bytes"="48"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_16_128]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="16,128" "amdgpu-implicitarg-num-bytes"="48"
-// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-waves-per-eu"="2,4"
-// CHECK-DAG: attributes [[NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32"
-// CHECK-DAG: attributes [[NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-vgpr"="64"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_64_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="64,64" "amdgpu-implicitarg-num-bytes"="56"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_16_128]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="16,128" "amdgpu-implicitarg-num-bytes"="56"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="56" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="56" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32"
+// CHECK-DAG: attributes [[NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-vgpr"="64"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-waves-per-eu"="2,4"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-vgpr"="64"
-// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
-// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
-// CHECK-DAG: attributes [[NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-vgpr"="64"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
// CHECK-DAG: attributes [[A_FUNCTION]] = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false"
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl b/src/llvm-project/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl
index bcb00be..4a91652 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl
@@ -1,5 +1,5 @@
// RUN: %clang_cc1 %s -O0 -triple amdgcn -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 %s -O0 -triple amdgcn---opencl -emit-llvm -o - | FileCheck %s
-// CHECK: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
+// CHECK: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7"
void foo(void) {}
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/amdgpu-features.cl b/src/llvm-project/clang/test/CodeGenOpenCL/amdgpu-features.cl
index 7aac4d3..0bb3d6f 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/amdgpu-features.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/amdgpu-features.cl
@@ -5,14 +5,22 @@
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx904 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX904 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx906 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX906 %s
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx908 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX908 %s
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1010 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1010 %s
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1011 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1011 %s
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1012 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1012 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx801 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX801 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx700 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX700 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx600 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX600 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx601 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX601 %s
-// GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx9-insts,+s-memrealtime,+vi-insts"
-// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx9-insts,+s-memrealtime,+vi-insts"
-// GFX801: "target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+s-memrealtime,+vi-insts"
+// GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime"
+// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime"
+// GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime"
+// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime"
+// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime"
+// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime"
+// GFX801: "target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+s-memrealtime"
// GFX700: "target-features"="+ci-insts,+fp64-fp16-denormals,-fp32-denormals"
// GFX600: "target-features"="+fp64-fp16-denormals,-fp32-denormals"
// GFX601: "target-features"="+fp64-fp16-denormals,-fp32-denormals"
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/arm-integer-dot-product.cl b/src/llvm-project/clang/test/CodeGenOpenCL/arm-integer-dot-product.cl
new file mode 100644
index 0000000..d1ab6ac
--- /dev/null
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/arm-integer-dot-product.cl
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -finclude-default-header -cl-std=CL1.2 -emit-llvm -o - -O0 | FileCheck %s
+
+#pragma OPENCL EXTENSION cl_arm_integer_dot_product_int8 : enable
+void test_int8(uchar4 ua, uchar4 ub, char4 sa, char4 sb) {
+ uint ur = arm_dot(ua, ub);
+ // CHECK: call spir_func i32 @_Z7arm_dotDv4_hS_
+ int sr = arm_dot(sa, sb);
+ // CHECK: call spir_func i32 @_Z7arm_dotDv4_cS_
+}
+#pragma OPENCL EXTENSION cl_arm_integer_dot_product_int8 : disable
+
+#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int8 : enable
+void test_accumulate_int8(uchar4 ua, uchar4 ub, uint uc, char4 sa, char4 sb, int c) {
+ uint ur = arm_dot_acc(ua, ub, uc);
+ // CHECK: call spir_func i32 @_Z11arm_dot_accDv4_hS_j
+ int sr = arm_dot_acc(sa, sb, c);
+ // CHECK: call spir_func i32 @_Z11arm_dot_accDv4_cS_i
+}
+#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int8 : disable
+
+#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int16 : enable
+void test_accumulate_int16(ushort2 ua, ushort2 ub, uint uc, short2 sa, short2 sb, int c) {
+ uint ur = arm_dot_acc(ua, ub, uc);
+ // CHECK: call spir_func i32 @_Z11arm_dot_accDv2_tS_j
+ int sr = arm_dot_acc(sa, sb, c);
+ // CHECK: call spir_func i32 @_Z11arm_dot_accDv2_sS_i
+}
+#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int16 : disable
+
+#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_saturate_int8 : enable
+void test_accumulate_saturate_int8(uchar4 ua, uchar4 ub, uint uc, char4 sa, char4 sb, int c) {
+ uint ur = arm_dot_acc_sat(ua, ub, uc);
+ // CHECK: call spir_func i32 @_Z15arm_dot_acc_satDv4_hS_j
+ int sr = arm_dot_acc_sat(sa, sb, c);
+ // CHECK: call spir_func i32 @_Z15arm_dot_acc_satDv4_cS_i
+}
+#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_saturate_int8 : disable
+
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/atomic-ops.cl b/src/llvm-project/clang/test/CodeGenOpenCL/atomic-ops.cl
index 160f7fb..88f2e0d 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/atomic-ops.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/atomic-ops.cl
@@ -41,7 +41,7 @@
// CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} seq_cst
x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_all_svm_devices);
- // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("subgroup") seq_cst
+ // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("wavefront") seq_cst
x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_sub_group);
}
@@ -83,7 +83,7 @@
bool fi4(atomic_int *i) {
// CHECK-LABEL: @fi4(
- // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg i32* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] syncscope("workgroup") acquire acquire
+ // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg i32* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] syncscope("workgroup-one-as") acquire acquire
// CHECK: [[OLD:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 0
// CHECK: [[CMP:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 1
// CHECK: br i1 [[CMP]], label %[[STORE_EXPECTED:[.0-9A-Z_a-z]+]], label %[[CONTINUE:[.0-9A-Z_a-z]+]]
@@ -109,7 +109,7 @@
// CHECK: load atomic i32, i32* %{{.*}} seq_cst
// CHECK: br label %[[continue]]
// CHECK: [[opencl_subgroup]]:
- // CHECK: load atomic i32, i32* %{{.*}} syncscope("subgroup") seq_cst
+ // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront") seq_cst
// CHECK: br label %[[continue]]
// CHECK: [[continue]]:
int x = __opencl_atomic_load(i, memory_order_seq_cst, scope);
@@ -141,21 +141,21 @@
// CHECK-NEXT: i32 4, label %[[SEQ_SUB:.*]]
// CHECK-NEXT: ]
// CHECK: [[MON_WG]]:
- // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") monotonic
+ // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup-one-as") monotonic
// CHECK: [[MON_DEV]]:
- // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") monotonic
+ // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent-one-as") monotonic
// CHECK: [[MON_ALL]]:
// CHECK: load atomic i32, i32* %{{.*}} monotonic
// CHECK: [[MON_SUB]]:
- // CHECK: load atomic i32, i32* %{{.*}} syncscope("subgroup") monotonic
+ // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront-one-as") monotonic
// CHECK: [[ACQ_WG]]:
- // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") acquire
+ // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup-one-as") acquire
// CHECK: [[ACQ_DEV]]:
- // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") acquire
+ // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent-one-as") acquire
// CHECK: [[ACQ_ALL]]:
// CHECK: load atomic i32, i32* %{{.*}} acquire
// CHECK: [[ACQ_SUB]]:
- // CHECK: load atomic i32, i32* %{{.*}} syncscope("subgroup") acquire
+ // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront-one-as") acquire
// CHECK: [[SEQ_WG]]:
// CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") seq_cst
// CHECK: [[SEQ_DEV]]:
@@ -163,19 +163,19 @@
// CHECK: [[SEQ_ALL]]:
// CHECK: load atomic i32, i32* %{{.*}} seq_cst
// CHECK: [[SEQ_SUB]]:
- // CHECK: load atomic i32, i32* %{{.*}} syncscope("subgroup") seq_cst
+ // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront") seq_cst
int x = __opencl_atomic_load(i, order, scope);
}
float ff1(global atomic_float *d) {
// CHECK-LABEL: @ff1
- // CHECK: load atomic i32, i32 addrspace(1)* {{.*}} syncscope("workgroup") monotonic
+ // CHECK: load atomic i32, i32 addrspace(1)* {{.*}} syncscope("workgroup-one-as") monotonic
return __opencl_atomic_load(d, memory_order_relaxed, memory_scope_work_group);
}
void ff2(atomic_float *d) {
// CHECK-LABEL: @ff2
- // CHECK: store atomic i32 {{.*}} syncscope("workgroup") release
+ // CHECK: store atomic i32 {{.*}} syncscope("workgroup-one-as") release
__opencl_atomic_store(d, 1, memory_order_release, memory_scope_work_group);
}
@@ -198,7 +198,7 @@
// CHECK-LABEL: @failureOrder
void failureOrder(atomic_int *ptr, int *ptr2) {
- // CHECK: cmpxchg i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") acquire monotonic
+ // CHECK: cmpxchg i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup-one-as") acquire monotonic
__opencl_atomic_compare_exchange_strong(ptr, ptr2, 43, memory_order_acquire, memory_order_relaxed, memory_scope_work_group);
// CHECK: cmpxchg weak i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") seq_cst acquire
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/blocks.cl b/src/llvm-project/clang/test/CodeGenOpenCL/blocks.cl
index 675240c..c3e2685 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/blocks.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/blocks.cl
@@ -35,31 +35,23 @@
// SPIR: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]], i32 0, i32 3
// SPIR: %[[i_value:.*]] = load i32, i32* %i
// SPIR: store i32 %[[i_value]], i32* %[[block_captured]],
- // SPIR: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]] to i32 ()*
- // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast i32 ()* %[[blk_ptr]] to i32 () addrspace(4)*
- // SPIR: store i32 () addrspace(4)* %[[blk_gen_ptr]], i32 () addrspace(4)** %[[block_B:.*]],
- // SPIR: %[[blk_gen_ptr:.*]] = load i32 () addrspace(4)*, i32 () addrspace(4)** %[[block_B]]
- // SPIR: %[[block_literal:.*]] = bitcast i32 () addrspace(4)* %[[blk_gen_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)*
- // SPIR: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]], i32 0, i32 2
+ // SPIR: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]] to %struct.__opencl_block_literal_generic*
+ // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic* %[[blk_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)*
+ // SPIR: store %struct.__opencl_block_literal_generic addrspace(4)* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B:.*]],
+ // SPIR: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic addrspace(4)*, %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B]]
// SPIR: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]] to i8 addrspace(4)*
- // SPIR: %[[invoke_func_ptr:.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %[[invoke_addr]]
- // SPIR: %[[invoke_func:.*]] = addrspacecast i8 addrspace(4)* %[[invoke_func_ptr]] to i32 (i8 addrspace(4)*)*
- // SPIR: call {{.*}}i32 %[[invoke_func]](i8 addrspace(4)* %[[blk_gen_ptr]])
+ // SPIR: call {{.*}}i32 @__foo_block_invoke(i8 addrspace(4)* %[[blk_gen_ptr]])
// AMDGCN: %[[block_invoke:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block:.*]], i32 0, i32 2
// AMDGCN: store i8* bitcast (i32 (i8*)* @__foo_block_invoke to i8*), i8* addrspace(5)* %[[block_invoke]]
// AMDGCN: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]], i32 0, i32 3
// AMDGCN: %[[i_value:.*]] = load i32, i32 addrspace(5)* %i
// AMDGCN: store i32 %[[i_value]], i32 addrspace(5)* %[[block_captured]],
- // AMDGCN: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]] to i32 () addrspace(5)*
- // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast i32 () addrspace(5)* %[[blk_ptr]] to i32 ()*
- // AMDGCN: store i32 ()* %[[blk_gen_ptr]], i32 ()* addrspace(5)* %[[block_B:.*]],
- // AMDGCN: %[[blk_gen_ptr:.*]] = load i32 ()*, i32 ()* addrspace(5)* %[[block_B]]
- // AMDGCN: %[[block_literal:.*]] = bitcast i32 ()* %[[blk_gen_ptr]] to %struct.__opencl_block_literal_generic*
- // AMDGCN: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic* %[[block_literal]], i32 0, i32 2
+ // AMDGCN: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]] to %struct.__opencl_block_literal_generic addrspace(5)*
+ // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic addrspace(5)* %[[blk_ptr]] to %struct.__opencl_block_literal_generic*
+ // AMDGCN: store %struct.__opencl_block_literal_generic* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B:.*]],
+ // AMDGCN: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic*, %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B]]
// AMDGCN: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic* %[[block_literal]] to i8*
- // AMDGCN: %[[invoke_func_ptr:.*]] = load i8*, i8** %[[invoke_addr]]
- // AMDGCN: %[[invoke_func:.*]] = bitcast i8* %[[invoke_func_ptr]] to i32 (i8*)*
- // AMDGCN: call {{.*}}i32 %[[invoke_func]](i8* %[[blk_gen_ptr]])
+ // AMDGCN: call {{.*}}i32 @__foo_block_invoke(i8* %[[blk_gen_ptr]])
int (^ block_B)(void) = ^{
return i;
@@ -98,6 +90,12 @@
return blockArgFunc(^{return 42;});
}
+// COMMON-LABEL: define {{.*}}@call_block
+// call {{.*}}@__call_block_block_invoke
+int call_block() {
+ return ^int(int num) { return num; } (11);
+}
+
// CHECK-DEBUG: !DIDerivedType(tag: DW_TAG_member, name: "__size"
// CHECK-DEBUG: !DIDerivedType(tag: DW_TAG_member, name: "__align"
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-ci.cl b/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-ci.cl
index 4127526..f6fcfa5 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-ci.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-ci.cl
@@ -2,6 +2,9 @@
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu hawaii -S -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu fiji -S -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx906 -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -S -emit-llvm -o - %s | FileCheck %s
+
+typedef unsigned int uint;
// CHECK-LABEL: @test_s_dcache_inv_vol
// CHECK: call void @llvm.amdgcn.s.dcache.inv.vol(
@@ -17,3 +20,9 @@
__builtin_amdgcn_buffer_wbinvl1_vol();
}
+// CHECK-LABEL: @test_gws_sema_release_all(
+// CHECK: call void @llvm.amdgcn.ds.gws.sema.release.all(i32 %id)
+void test_gws_sema_release_all(uint id)
+{
+ __builtin_amdgcn_ds_gws_sema_release_all(id);
+}
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err-clamp.cl b/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err-clamp.cl
index f8356fe..d056f02 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err-clamp.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err-clamp.cl
@@ -1,6 +1,7 @@
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx906 -verify -S -emit-llvm -o - %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -verify -S -emit-llvm -o - %s
typedef unsigned int uint;
typedef half __attribute__((ext_vector_type(2))) half2;
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl b/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl
index e2c03a4..cb83c0b 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl
@@ -1,6 +1,7 @@
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -verify -S -emit-llvm -o - %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -verify -S -emit-llvm -o - %s
typedef unsigned int uint;
typedef half __attribute__((ext_vector_type(2))) half2;
@@ -12,24 +13,24 @@
half2 v2hA, half2 v2hB, float fC,
short2 v2ssA, short2 v2ssB, int siA, int siB, int siC,
ushort2 v2usA, ushort2 v2usB, uint uiA, uint uiB, uint uiC) {
- fOut[0] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, false); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dot-insts}}
- fOut[1] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, true); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dot-insts}}
+ fOut[0] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, false); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dot2-insts}}
+ fOut[1] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, true); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dot2-insts}}
- siOut[0] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, false); // expected-error {{'__builtin_amdgcn_sdot2' needs target feature dot-insts}}
- siOut[1] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, true); // expected-error {{'__builtin_amdgcn_sdot2' needs target feature dot-insts}}
+ siOut[0] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, false); // expected-error {{'__builtin_amdgcn_sdot2' needs target feature dot2-insts}}
+ siOut[1] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, true); // expected-error {{'__builtin_amdgcn_sdot2' needs target feature dot2-insts}}
- uiOut[0] = __builtin_amdgcn_udot2(v2usA, v2usB, uiC, false); // expected-error {{'__builtin_amdgcn_udot2' needs target feature dot-insts}}
- uiOut[1] = __builtin_amdgcn_udot2(v2usA, v2usB, uiC, true); // expected-error {{'__builtin_amdgcn_udot2' needs target feature dot-insts}}
+ uiOut[0] = __builtin_amdgcn_udot2(v2usA, v2usB, uiC, false); // expected-error {{'__builtin_amdgcn_udot2' needs target feature dot2-insts}}
+ uiOut[1] = __builtin_amdgcn_udot2(v2usA, v2usB, uiC, true); // expected-error {{'__builtin_amdgcn_udot2' needs target feature dot2-insts}}
- siOut[2] = __builtin_amdgcn_sdot4(siA, siB, siC, false); // expected-error {{'__builtin_amdgcn_sdot4' needs target feature dot-insts}}
- siOut[3] = __builtin_amdgcn_sdot4(siA, siB, siC, true); // expected-error {{'__builtin_amdgcn_sdot4' needs target feature dot-insts}}
+ siOut[2] = __builtin_amdgcn_sdot4(siA, siB, siC, false); // expected-error {{'__builtin_amdgcn_sdot4' needs target feature dot1-insts}}
+ siOut[3] = __builtin_amdgcn_sdot4(siA, siB, siC, true); // expected-error {{'__builtin_amdgcn_sdot4' needs target feature dot1-insts}}
- uiOut[2] = __builtin_amdgcn_udot4(uiA, uiB, uiC, false); // expected-error {{'__builtin_amdgcn_udot4' needs target feature dot-insts}}
- uiOut[3] = __builtin_amdgcn_udot4(uiA, uiB, uiC, true); // expected-error {{'__builtin_amdgcn_udot4' needs target feature dot-insts}}
+ uiOut[2] = __builtin_amdgcn_udot4(uiA, uiB, uiC, false); // expected-error {{'__builtin_amdgcn_udot4' needs target feature dot2-insts}}
+ uiOut[3] = __builtin_amdgcn_udot4(uiA, uiB, uiC, true); // expected-error {{'__builtin_amdgcn_udot4' needs target feature dot2-insts}}
- siOut[4] = __builtin_amdgcn_sdot8(siA, siB, siC, false); // expected-error {{'__builtin_amdgcn_sdot8' needs target feature dot-insts}}
- siOut[5] = __builtin_amdgcn_sdot8(siA, siB, siC, true); // expected-error {{'__builtin_amdgcn_sdot8' needs target feature dot-insts}}
+ siOut[4] = __builtin_amdgcn_sdot8(siA, siB, siC, false); // expected-error {{'__builtin_amdgcn_sdot8' needs target feature dot1-insts}}
+ siOut[5] = __builtin_amdgcn_sdot8(siA, siB, siC, true); // expected-error {{'__builtin_amdgcn_sdot8' needs target feature dot1-insts}}
- uiOut[4] = __builtin_amdgcn_udot8(uiA, uiB, uiC, false); // expected-error {{'__builtin_amdgcn_udot8' needs target feature dot-insts}}
- uiOut[5] = __builtin_amdgcn_udot8(uiA, uiB, uiC, true); // expected-error {{'__builtin_amdgcn_udot8' needs target feature dot-insts}}
+ uiOut[4] = __builtin_amdgcn_udot8(uiA, uiB, uiC, false); // expected-error {{'__builtin_amdgcn_udot8' needs target feature dot2-insts}}
+ uiOut[5] = __builtin_amdgcn_udot8(uiA, uiB, uiC, true); // expected-error {{'__builtin_amdgcn_udot8' needs target feature dot2-insts}}
}
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts.cl b/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts.cl
index e5633fb..a7d635a 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts.cl
@@ -1,6 +1,8 @@
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx906 -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1011 -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1012 -S -emit-llvm -o - %s | FileCheck %s
typedef unsigned int uint;
typedef half __attribute__((ext_vector_type(2))) half2;
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl b/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl
new file mode 100644
index 0000000..3921cb9
--- /dev/null
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl
@@ -0,0 +1,24 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1011 -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1012 -S -emit-llvm -o - %s | FileCheck %s
+
+typedef unsigned int uint;
+
+// CHECK-LABEL: @test_permlane16(
+// CHECK: call i32 @llvm.amdgcn.permlane16(i32 %a, i32 %b, i32 %c, i32 %d, i1 true, i1 true)
+void test_permlane16(global uint* out, uint a, uint b, uint c, uint d) {
+ *out = __builtin_amdgcn_permlane16(a, b, c, d, 1, 1);
+}
+
+// CHECK-LABEL: @test_permlanex16(
+// CHECK: call i32 @llvm.amdgcn.permlanex16(i32 %a, i32 %b, i32 %c, i32 %d, i1 true, i1 true)
+void test_permlanex16(global uint* out, uint a, uint b, uint c, uint d) {
+ *out = __builtin_amdgcn_permlanex16(a, b, c, d, 1, 1);
+}
+
+// CHECK-LABEL: @test_mov_dpp8(
+// CHECK: call i32 @llvm.amdgcn.mov.dpp8.i32(i32 %a, i32 1)
+void test_mov_dpp8(global uint* out, uint a) {
+ *out = __builtin_amdgcn_mov_dpp8(a, 1);
+}
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx9.cl b/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx9.cl
index 333b610..344af2b 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx9.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx9.cl
@@ -1,5 +1,6 @@
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -S -emit-llvm -o - %s | FileCheck %s
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-interp.cl b/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-interp.cl
new file mode 100644
index 0000000..39d913e
--- /dev/null
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-interp.cl
@@ -0,0 +1,34 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -S -emit-llvm -o - %s | FileCheck %s
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// CHECK-LABEL: test_interp_f16
+// CHECK: call float @llvm.amdgcn.interp.p1.f16
+// CHECK: call half @llvm.amdgcn.interp.p2.f16
+// CHECK: call float @llvm.amdgcn.interp.p1.f16
+// CHECK: call half @llvm.amdgcn.interp.p2.f16
+void test_interp_f16(global half* out, float i, float j, int m0)
+{
+ float p1_0 = __builtin_amdgcn_interp_p1_f16(i, 2, 3, false, m0);
+ half p2_0 = __builtin_amdgcn_interp_p2_f16(p1_0, j, 2, 3, false, m0);
+ float p1_1 = __builtin_amdgcn_interp_p1_f16(i, 2, 3, true, m0);
+ half p2_1 = __builtin_amdgcn_interp_p2_f16(p1_1, j, 2, 3, true, m0);
+ *out = p2_0 + p2_1;
+}
+
+// CHECK-LABEL: test_interp_f32
+// CHECK: call float @llvm.amdgcn.interp.p1
+// CHECK: call float @llvm.amdgcn.interp.p2
+void test_interp_f32(global float* out, float i, float j, int m0)
+{
+ float p1 = __builtin_amdgcn_interp_p1(i, 1, 4, m0);
+ *out = __builtin_amdgcn_interp_p2(p1, j, 1, 4, m0);
+}
+
+// CHECK-LABEL: test_interp_mov
+// CHECK: call float @llvm.amdgcn.interp.mov
+void test_interp_mov(global float* out, float i, float j, int m0)
+{
+ *out = __builtin_amdgcn_interp_mov(2, 3, 4, m0);
+}
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl b/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
index 2201421..e3e6b81 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
@@ -1,5 +1,8 @@
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tonga -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1012 -S -emit-llvm -o - %s | FileCheck %s
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
index dc7f480..bbae5ea 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -5,6 +5,10 @@
typedef unsigned long ulong;
typedef unsigned int uint;
+typedef unsigned short ushort;
+typedef half __attribute__((ext_vector_type(2))) half2;
+typedef short __attribute__((ext_vector_type(2))) short2;
+typedef ushort __attribute__((ext_vector_type(2))) ushort2;
// CHECK-LABEL: @test_div_scale_f64
// CHECK: call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true)
@@ -224,28 +228,28 @@
}
// CHECK-LABEL: @test_sicmp_i32
-// CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 32)
+// CHECK: call i64 @llvm.amdgcn.icmp.i64.i32(i32 %a, i32 %b, i32 32)
void test_sicmp_i32(global ulong* out, int a, int b)
{
*out = __builtin_amdgcn_sicmp(a, b, 32);
}
// CHECK-LABEL: @test_uicmp_i32
-// CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 32)
+// CHECK: call i64 @llvm.amdgcn.icmp.i64.i32(i32 %a, i32 %b, i32 32)
void test_uicmp_i32(global ulong* out, uint a, uint b)
{
*out = __builtin_amdgcn_uicmp(a, b, 32);
}
// CHECK-LABEL: @test_sicmp_i64
-// CHECK: call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 38)
+// CHECK: call i64 @llvm.amdgcn.icmp.i64.i64(i64 %a, i64 %b, i32 38)
void test_sicmp_i64(global ulong* out, long a, long b)
{
*out = __builtin_amdgcn_sicmpl(a, b, 39-1);
}
// CHECK-LABEL: @test_uicmp_i64
-// CHECK: call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 35)
+// CHECK: call i64 @llvm.amdgcn.icmp.i64.i64(i64 %a, i64 %b, i32 35)
void test_uicmp_i64(global ulong* out, ulong a, ulong b)
{
*out = __builtin_amdgcn_uicmpl(a, b, 30+5);
@@ -287,14 +291,14 @@
}
// CHECK-LABEL: @test_fcmp_f32
-// CHECK: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 5)
+// CHECK: call i64 @llvm.amdgcn.fcmp.i64.f32(float %a, float %b, i32 5)
void test_fcmp_f32(global ulong* out, float a, float b)
{
*out = __builtin_amdgcn_fcmpf(a, b, 5);
}
// CHECK-LABEL: @test_fcmp_f64
-// CHECK: call i64 @llvm.amdgcn.fcmp.f64(double %a, double %b, i32 6)
+// CHECK: call i64 @llvm.amdgcn.fcmp.i64.f64(double %a, double %b, i32 6)
void test_fcmp_f64(global ulong* out, double a, double b)
{
*out = __builtin_amdgcn_fcmp(a, b, 3+3);
@@ -536,6 +540,120 @@
*out = __builtin_amdgcn_s_getpc();
}
+// CHECK-LABEL: @test_ds_append_lds(
+// CHECK: call i32 @llvm.amdgcn.ds.append.p3i32(i32 addrspace(3)* %ptr, i1 false)
+kernel void test_ds_append_lds(global int* out, local int* ptr) {
+ *out = __builtin_amdgcn_ds_append(ptr);
+}
+
+// CHECK-LABEL: @test_ds_consume_lds(
+// CHECK: call i32 @llvm.amdgcn.ds.consume.p3i32(i32 addrspace(3)* %ptr, i1 false)
+kernel void test_ds_consume_lds(global int* out, local int* ptr) {
+ *out = __builtin_amdgcn_ds_consume(ptr);
+}
+
+// CHECK-LABEL: @test_gws_init(
+// CHECK: call void @llvm.amdgcn.ds.gws.init(i32 %value, i32 %id)
+kernel void test_gws_init(uint value, uint id) {
+ __builtin_amdgcn_ds_gws_init(value, id);
+}
+
+// CHECK-LABEL: @test_gws_barrier(
+// CHECK: call void @llvm.amdgcn.ds.gws.barrier(i32 %value, i32 %id)
+kernel void test_gws_barrier(uint value, uint id) {
+ __builtin_amdgcn_ds_gws_barrier(value, id);
+}
+
+// CHECK-LABEL: @test_gws_sema_v(
+// CHECK: call void @llvm.amdgcn.ds.gws.sema.v(i32 %id)
+kernel void test_gws_sema_v(uint id) {
+ __builtin_amdgcn_ds_gws_sema_v(id);
+}
+
+// CHECK-LABEL: @test_gws_sema_br(
+// CHECK: call void @llvm.amdgcn.ds.gws.sema.br(i32 %value, i32 %id)
+kernel void test_gws_sema_br(uint value, uint id) {
+ __builtin_amdgcn_ds_gws_sema_br(value, id);
+}
+
+// CHECK-LABEL: @test_gws_sema_p(
+// CHECK: call void @llvm.amdgcn.ds.gws.sema.p(i32 %id)
+kernel void test_gws_sema_p(uint id) {
+ __builtin_amdgcn_ds_gws_sema_p(id);
+}
+
+// CHECK-LABEL: @test_mbcnt_lo(
+// CHECK: call i32 @llvm.amdgcn.mbcnt.lo(i32 %src0, i32 %src1)
+kernel void test_mbcnt_lo(global uint* out, uint src0, uint src1) {
+ *out = __builtin_amdgcn_mbcnt_lo(src0, src1);
+}
+
+// CHECK-LABEL: @test_mbcnt_hi(
+// CHECK: call i32 @llvm.amdgcn.mbcnt.hi(i32 %src0, i32 %src1)
+kernel void test_mbcnt_hi(global uint* out, uint src0, uint src1) {
+ *out = __builtin_amdgcn_mbcnt_hi(src0, src1);
+}
+
+// CHECK-LABEL: @test_alignbit(
+// CHECK: tail call i32 @llvm.amdgcn.alignbit(i32 %src0, i32 %src1, i32 %src2)
+kernel void test_alignbit(global uint* out, uint src0, uint src1, uint src2) {
+ *out = __builtin_amdgcn_alignbit(src0, src1, src2);
+}
+
+// CHECK-LABEL: @test_alignbyte(
+// CHECK: tail call i32 @llvm.amdgcn.alignbyte(i32 %src0, i32 %src1, i32 %src2)
+kernel void test_alignbyte(global uint* out, uint src0, uint src1, uint src2) {
+ *out = __builtin_amdgcn_alignbyte(src0, src1, src2);
+}
+
+// CHECK-LABEL: @test_ubfe(
+// CHECK: tail call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src2)
+kernel void test_ubfe(global uint* out, uint src0, uint src1, uint src2) {
+ *out = __builtin_amdgcn_ubfe(src0, src1, src2);
+}
+
+// CHECK-LABEL: @test_sbfe(
+// CHECK: tail call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 %src2)
+kernel void test_sbfe(global uint* out, uint src0, uint src1, uint src2) {
+ *out = __builtin_amdgcn_sbfe(src0, src1, src2);
+}
+
+// CHECK-LABEL: @test_cvt_pkrtz(
+// CHECK: tail call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %src0, float %src1)
+kernel void test_cvt_pkrtz(global half2* out, float src0, float src1) {
+ *out = __builtin_amdgcn_cvt_pkrtz(src0, src1);
+}
+
+// CHECK-LABEL: @test_cvt_pknorm_i16(
+// CHECK: tail call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %src0, float %src1)
+kernel void test_cvt_pknorm_i16(global short2* out, float src0, float src1) {
+ *out = __builtin_amdgcn_cvt_pknorm_i16(src0, src1);
+}
+
+// CHECK-LABEL: @test_cvt_pknorm_u16(
+// CHECK: tail call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %src0, float %src1)
+kernel void test_cvt_pknorm_u16(global ushort2* out, float src0, float src1) {
+ *out = __builtin_amdgcn_cvt_pknorm_u16(src0, src1);
+}
+
+// CHECK-LABEL: @test_cvt_pk_i16(
+// CHECK: tail call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 %src0, i32 %src1)
+kernel void test_cvt_pk_i16(global short2* out, int src0, int src1) {
+ *out = __builtin_amdgcn_cvt_pk_i16(src0, src1);
+}
+
+// CHECK-LABEL: @test_cvt_pk_u16(
+// CHECK: tail call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 %src0, i32 %src1)
+kernel void test_cvt_pk_u16(global ushort2* out, uint src0, uint src1) {
+ *out = __builtin_amdgcn_cvt_pk_u16(src0, src1);
+}
+
+// CHECK-LABEL: @test_cvt_pk_u8_f32
+// CHECK: tail call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src0, i32 %src1, i32 %src2)
+kernel void test_cvt_pk_u8_f32(global uint* out, float src0, uint src1, uint src2) {
+ *out = __builtin_amdgcn_cvt_pk_u8_f32(src0, src1, src2);
+}
+
// CHECK-DAG: [[$WI_RANGE]] = !{i32 0, i32 1024}
// CHECK-DAG: attributes #[[$NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly }
// CHECK-DAG: attributes #[[$READ_EXEC_ATTRS]] = { convergent }
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/builtins.cl b/src/llvm-project/clang/test/CodeGenOpenCL/builtins.cl
index 3fba83d..36a6fb6 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/builtins.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/builtins.cl
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -finclude-default-header -cl-std=CL2.0 -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s
+// RUN: %clang_cc1 %s -finclude-default-header -cl-std=clc++ -fblocks -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s
void testBranchingOnEnqueueKernel(queue_t default_queue, unsigned flags, ndrange_t ndrange) {
// Ensure `enqueue_kernel` can be branched upon.
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl b/src/llvm-project/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
index 4732194..4abeb92 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
@@ -11,7 +11,7 @@
// For a block global variable, first emit the block literal as a global variable, then emit the block variable itself.
// COMMON: [[BL_GLOBAL:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* [[INV_G:@[^ ]+]] to i8*) to i8 addrspace(4)*) }
-// COMMON: @block_G = addrspace(1) constant void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*)
+// COMMON: @block_G = addrspace(1) constant %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*)
// For anonymous blocks without captures, emit block literals as global variable.
// COMMON: [[BLG1:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) }
@@ -77,11 +77,11 @@
// COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
// COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
// COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL1:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
- // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to void ()*
- // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to void ()*
- // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)*
+ // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to %struct.__opencl_block_literal_generic*
+ // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to %struct.__opencl_block_literal_generic*
+ // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[BL]] to i8 addrspace(4)*
// COMMON-LABEL: call i32 @__enqueue_kernel_basic(
- // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}},
+ // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval(%struct.ndrange_t) [[NDR]]{{([0-9]+)?}},
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK1:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
// COMMON-SAME: i8 addrspace(4)* [[BL_I8]])
enqueue_kernel(default_queue, flags, ndrange,
@@ -95,8 +95,8 @@
// COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %event_wait_list to %opencl.clk_event_t{{.*}}* addrspace(4)*
// COMMON: [[EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)*
// COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL2:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
- // COMMON: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to void ()*
- // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)*
+ // COMMON: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to %struct.__opencl_block_literal_generic*
+ // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[BL]] to i8 addrspace(4)*
// COMMON-LABEL: call i32 @__enqueue_kernel_basic_events
// COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]],
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK2:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
@@ -107,8 +107,8 @@
});
// COMMON-LABEL: call i32 @__enqueue_kernel_basic_events
- // COMMON-SAME: (%opencl.queue_t{{.*}}* {{%[0-9]+}}, i32 {{%[0-9]+}}, %struct.ndrange_t* {{.*}}, i32 1, %opencl.clk_event_t{{.*}}* addrspace(4)* {{%[0-9]+}}, %opencl.clk_event_t{{.*}}* addrspace(4)* null,
- enqueue_kernel(default_queue, flags, ndrange, 1, &event_wait_list, 0,
+ // COMMON-SAME: (%opencl.queue_t{{.*}}* {{%[0-9]+}}, i32 {{%[0-9]+}}, %struct.ndrange_t* {{.*}}, i32 1, %opencl.clk_event_t{{.*}}* addrspace(4)* null, %opencl.clk_event_t{{.*}}* addrspace(4)* null,
+ enqueue_kernel(default_queue, flags, ndrange, 1, 0, 0,
^(void) {
return;
});
@@ -165,7 +165,7 @@
// Emits global block literal [[BLG3]] and block kernel [[INVGK3]].
// COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
// COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
- // COMMON: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0
+ // COMMON: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i{{32|64}} 0, i{{32|64}} 0
// COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** [[AD]] to %opencl.clk_event_t{{.*}}* addrspace(4)*
// COMMON: [[EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)*
// CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* %[[BLOCK_SIZES3]] to i8*
@@ -192,7 +192,7 @@
// Emits global block literal [[BLG4]] and block kernel [[INVGK4]].
// COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
// COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
- // COMMON: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0
+ // COMMON: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i{{32|64}} 0, i{{32|64}} 0
// COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** [[AD]] to %opencl.clk_event_t{{.*}}* addrspace(4)*
// COMMON: [[EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)*
// CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* %[[BLOCK_SIZES4]] to i8*
@@ -300,28 +300,26 @@
// Emits global block literal [[BLG8]] and invoke function [[INVG8]].
// The full type of these expressions are long (and repeated elsewhere), so we
// capture it as part of the regex for convenience and clarity.
- // COMMON: store void () addrspace(4)* addrspacecast (void () addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to void () addrspace(1)*) to void () addrspace(4)*), void () addrspace(4)** %block_A
+ // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %block_A
void (^const block_A)(void) = ^{
return;
};
// Emits global block literal [[BLG9]] and invoke function [[INVG9]].
- // COMMON: store void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG9]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*), void (i8 addrspace(3)*) addrspace(4)** %block_B
+ // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG9]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %block_B
void (^const block_B)(local void *) = ^(local void *a) {
return;
};
// Uses global block literal [[BLG8]] and invoke function [[INVG8]].
- // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2)
- // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)*
- // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+ // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
block_A();
// Emits global block literal [[BLG8]] and block kernel [[INVGK8]]. [[INVGK8]] calls [[INVG8]].
// COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
// COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
// COMMON-LABEL: call i32 @__enqueue_kernel_basic(
- // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}},
+ // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval(%struct.ndrange_t) [[NDR]]{{([0-9]+)?}},
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK8:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
enqueue_kernel(default_queue, flags, ndrange, block_A);
@@ -333,22 +331,42 @@
unsigned size = get_kernel_work_group_size(block_A);
// Uses global block literal [[BLG8]] and invoke function [[INVG8]]. Make sure no redundant block literal and invoke functions are emitted.
- // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2)
- // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)*
- // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+ // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
block_A();
+ // Make sure that block invoke function is resolved correctly after sequence of assignements.
+ // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)*
+ // COMMON-SAME: addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)*
+ // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*)
+ // COMMON-SAME: to %struct.__opencl_block_literal_generic addrspace(4)*),
+ // COMMON-SAME: %struct.__opencl_block_literal_generic addrspace(4)** %b1,
+ bl_t b1 = block_G;
+ // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)*
+ // COMMON-SAME: addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)*
+ // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*)
+ // COMMON-SAME: to %struct.__opencl_block_literal_generic addrspace(4)*),
+ // COMMON-SAME: %struct.__opencl_block_literal_generic addrspace(4)** %b2,
+ bl_t b2 = b1;
+ // COMMON: call spir_func void @block_G_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)*
+ // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*)
+ // COOMON-SAME: to i8 addrspace(4)*), i8 addrspace(3)* null)
+ b2(0);
+ // Uses global block literal [[BL_GLOBAL]] and block kernel [[INV_G_K]]. [[INV_G_K]] calls [[INV_G]].
+ // COMMON: call i32 @__get_kernel_preferred_work_group_size_multiple_impl(
+ // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INV_G_K:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
+ // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+ size = get_kernel_preferred_work_group_size_multiple(b2);
+
void (^block_C)(void) = ^{
callee(i, a);
};
-
// Emits block literal on stack and block kernel [[INVLK3]].
// COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL3:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
// COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
// COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
- // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* {{.*}} to i8 addrspace(4)*
+ // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* {{.*}} to i8 addrspace(4)*
// COMMON-LABEL: call i32 @__enqueue_kernel_basic(
- // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}},
+ // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval(%struct.ndrange_t) [[NDR]]{{([0-9]+)?}},
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK3:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
// COMMON-SAME: i8 addrspace(4)* [[BL_I8]])
enqueue_kernel(default_queue, flags, ndrange, block_C);
@@ -404,8 +422,8 @@
// COMMON: define internal spir_func void [[INVG8]](i8 addrspace(4)*{{.*}})
// COMMON: define internal spir_func void [[INVG9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)* %{{.*}})
// COMMON: define internal spir_kernel void [[INVGK8]](i8 addrspace(4)*{{.*}})
+// COMMON: define internal spir_kernel void [[INV_G_K]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
// COMMON: define internal spir_kernel void [[INVLK3]](i8 addrspace(4)*{{.*}})
// COMMON: define internal spir_kernel void [[INVGK9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
-// COMMON: define internal spir_kernel void [[INV_G_K]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
// COMMON: define internal spir_kernel void [[INVGK10]](i8 addrspace(4)*{{.*}})
// COMMON: define internal spir_kernel void [[INVGK11]](i8 addrspace(4)*{{.*}})
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/constant-addr-space-globals.cl b/src/llvm-project/clang/test/CodeGenOpenCL/constant-addr-space-globals.cl
index 5fcf117..47e180c 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/constant-addr-space-globals.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/constant-addr-space-globals.cl
@@ -26,6 +26,6 @@
constant int var1 = 1;
- // CHECK: call spir_func void @foo(i32 addrspace(2)* @k.var1, i32 addrspace(2)* getelementptr inbounds ([3 x i32], [3 x i32] addrspace(2)* @k.arr1, i32 0, i32 0)
+ // CHECK: call spir_func void @foo(i32 addrspace(2)* @k.var1, i32 addrspace(2)* getelementptr inbounds ([3 x i32], [3 x i32] addrspace(2)* @k.arr1, i64 0, i64 0)
foo(&var1, arr1, arr2, arr3);
}
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/convergent.cl b/src/llvm-project/clang/test/CodeGenOpenCL/convergent.cl
index a011920..193d391 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/convergent.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/convergent.cl
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm %s -o - | opt -instnamer -S | FileCheck -enable-var-scope %s
+// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm %s -o - -fno-experimental-new-pass-manager | opt -instnamer -S | FileCheck -enable-var-scope %s --check-prefixes=CHECK,CHECK-LEGACY
+// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm %s -o - -fexperimental-new-pass-manager | opt -instnamer -S | FileCheck -enable-var-scope %s --check-prefixes=CHECK,CHECK-NEWPM
// This is initially assumed convergent, but can be deduced to not require it.
@@ -117,7 +118,12 @@
// CHECK: [[for_body]]:
// CHECK: tail call spir_func void @nodupfun() #[[attr5:[0-9]+]]
// CHECK-NOT: call spir_func void @nodupfun()
-// CHECK: br i1 %{{.+}}, label %[[for_body]], label %[[for_cond_cleanup]]
+
+// The new PM produces a slightly different IR for the loop from the legacy PM,
+// but the test still checks that the loop is not unrolled.
+// CHECK-LEGACY: br i1 %{{.+}}, label %[[for_body]], label %[[for_cond_cleanup]]
+// CHECK-NEW: br i1 %{{.+}}, label %[[for_body_crit_edge:.+]], label %[[for_cond_cleanup]]
+// CHECK-NEW: [[for_body_crit_edge]]:
void test_not_unroll() {
for (int i = 0; i < 10; i++)
@@ -133,7 +139,7 @@
__asm__ volatile("s_barrier");
}
-// CHECK: attributes #0 = { noinline norecurse nounwind "
+// CHECK: attributes #0 = { nofree noinline norecurse nounwind "
// CHECK: attributes #1 = { {{[^}]*}}convergent{{[^}]*}} }
// CHECK: attributes #2 = { {{[^}]*}}convergent{{[^}]*}} }
// CHECK: attributes #3 = { {{[^}]*}}convergent noduplicate{{[^}]*}} }
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/images.cl b/src/llvm-project/clang/test/CodeGenOpenCL/images.cl
index eb054ec..326322d 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/images.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/images.cl
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -o - -cl-std=clc++ | FileCheck %s
__attribute__((overloadable)) void read_image(read_only image1d_t img_ro);
__attribute__((overloadable)) void read_image(write_only image1d_t img_wo);
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl b/src/llvm-project/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
index 5bb52e9..e892376 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
@@ -27,7 +27,7 @@
kernel void test_single(int_single input, global int* output) {
// CHECK: spir_kernel
// AMDGCN: define amdgpu_kernel void @test_single
-// CHECK: struct.int_single* byval nocapture
+// CHECK: struct.int_single* nocapture {{.*}} byval(%struct.int_single)
// CHECK: i32* nocapture %output
output[0] = input.a;
}
@@ -35,7 +35,7 @@
kernel void test_pair(int_pair input, global int* output) {
// CHECK: spir_kernel
// AMDGCN: define amdgpu_kernel void @test_pair
-// CHECK: struct.int_pair* byval nocapture
+// CHECK: struct.int_pair* nocapture {{.*}} byval(%struct.int_pair)
// CHECK: i32* nocapture %output
output[0] = (int)input.a;
output[1] = (int)input.b;
@@ -44,7 +44,7 @@
kernel void test_kernel(test_struct input, global int* output) {
// CHECK: spir_kernel
// AMDGCN: define amdgpu_kernel void @test_kernel
-// CHECK: struct.test_struct* byval nocapture
+// CHECK: struct.test_struct* nocapture {{.*}} byval(%struct.test_struct)
// CHECK: i32* nocapture %output
output[0] = input.elementA;
output[1] = input.elementB;
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/logical-ops.cl b/src/llvm-project/clang/test/CodeGenOpenCL/logical-ops.cl
index ac1c1b5..f083a85 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/logical-ops.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/logical-ops.cl
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL1.2 -O1 -triple x86_64-unknown-linux-gnu | FileCheck %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=clc++ -O1 -triple x86_64-unknown-linux-gnu | FileCheck %s
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/no-signed-zeros.cl b/src/llvm-project/clang/test/CodeGenOpenCL/no-signed-zeros.cl
index 14f6411..5cbff4d 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/no-signed-zeros.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/no-signed-zeros.cl
@@ -1,10 +1,10 @@
-// RUN: %clang_cc1 %s -emit-llvm -o - | FileCheck %s -check-prefix=NORMAL
-// RUN: %clang_cc1 %s -emit-llvm -cl-no-signed-zeros -o - | FileCheck %s -check-prefix=NO-SIGNED-ZEROS
-
-float signedzeros(float a) {
- return a;
-}
-
-// CHECK: attributes
-// NORMAL: "no-signed-zeros-fp-math"="false"
-// NO-SIGNED-ZEROS: "no-signed-zeros-fp-math"="true"
+// RUN: %clang_cc1 %s -emit-llvm -o - | FileCheck %s -check-prefix=NORMAL
+// RUN: %clang_cc1 %s -emit-llvm -cl-no-signed-zeros -o - | FileCheck %s -check-prefix=NO-SIGNED-ZEROS
+
+float signedzeros(float a) {
+ return a;
+}
+
+// CHECK: attributes
+// NORMAL: "no-signed-zeros-fp-math"="false"
+// NO-SIGNED-ZEROS: "no-signed-zeros-fp-math"="true"
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/numbered-address-space.cl b/src/llvm-project/clang/test/CodeGenOpenCL/numbered-address-space.cl
index dbaba87..296923e 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/numbered-address-space.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/numbered-address-space.cl
@@ -11,12 +11,6 @@
*generic_ptr = 4;
}
-// CHECK-LABEL: @test_numbered_as_to_builtin(
-// CHECK: addrspacecast i32 addrspace(42)* %0 to float addrspace(3)*
-void test_numbered_as_to_builtin(__attribute__((address_space(42))) int *arbitary_numbered_ptr, float src) {
- volatile float result = __builtin_amdgcn_ds_fmaxf(arbitary_numbered_ptr, src, 0, 0, false);
-}
-
// CHECK-LABEL: @test_generic_as_to_builtin_parameter_explicit_cast(
// CHECK: addrspacecast i32 addrspace(3)* %0 to i32*
void test_generic_as_to_builtin_parameter_explicit_cast(__local int *local_ptr, float src) {
@@ -25,10 +19,7 @@
}
// CHECK-LABEL: @test_generic_as_to_builtin_parameter_implicit_cast(
-// CHECK: addrspacecast i32* %2 to float addrspace(3)*
+// CHECK: bitcast i32 addrspace(3)* %0 to float addrspace(3)*
void test_generic_as_to_builtin_parameter_implicit_cast(__local int *local_ptr, float src) {
- generic int* generic_ptr = local_ptr;
-
- volatile float result = __builtin_amdgcn_ds_fmaxf(generic_ptr, src, 0, 0, false);
+ volatile float result = __builtin_amdgcn_ds_fmaxf(local_ptr, src, 0, 0, false);
}
-
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/pipe_builtin.cl b/src/llvm-project/clang/test/CodeGenOpenCL/pipe_builtin.cl
index 2a533c5..02b9669 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/pipe_builtin.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/pipe_builtin.cl
@@ -1,4 +1,6 @@
-// RUN: %clang_cc1 -emit-llvm -cl-ext=+cl_khr_subgroups -O0 -cl-std=CL2.0 -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -cl-ext=+cl_khr_subgroups -O0 -cl-std=clc++ -o - %s | FileCheck %s
+// FIXME: Add MS ABI manglings of OpenCL things and remove %itanium_abi_triple
+// above to support OpenCL in the MS C++ ABI.
// CHECK-DAG: %opencl.pipe_ro_t = type opaque
// CHECK-DAG: %opencl.pipe_wo_t = type opaque
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/sampler.cl b/src/llvm-project/clang/test/CodeGenOpenCL/sampler.cl
index 22976c5..ed3f32d 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/sampler.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/sampler.cl
@@ -1,4 +1,6 @@
// RUN: %clang_cc1 %s -emit-llvm -triple spir-unknown-unknown -o - -O0 | FileCheck %s
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -triple spir-unknown-unknown -o - -O0 | FileCheck %s
+// RUN: %clang_cc1 %s -cl-std=clc++ -emit-llvm -triple spir-unknown-unknown -o - -O0 | FileCheck %s
//
// This test covers 5 cases of sampler initialzation:
// 1. function argument passing
@@ -6,8 +8,9 @@
// 1b. argument is a function-scope variable
// 1c. argument is one of caller function's parameters
// 2. variable initialization
-// 2a. initializing a file-scope variable
+// 2a. initializing a file-scope variable with constant addr space qualifier
// 2b. initializing a function-scope variable
+// 2c. initializing a file-scope variable with const qualifier
#define CLK_ADDRESS_CLAMP_TO_EDGE 2
#define CLK_NORMALIZED_COORDS_TRUE 1
@@ -20,10 +23,14 @@
constant sampler_t glb_smp = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_LINEAR;
// CHECK-NOT: glb_smp
+// Case 2c
+const sampler_t glb_smp_const = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_LINEAR;
+// CHECK-NOT: glb_smp_const
+
int get_sampler_initializer(void);
void fnc4smp(sampler_t s) {}
-// CHECK: define spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* %
+// CHECK: define spir_func void [[FUNCNAME:@.*fnc4smp.*]](%opencl.sampler_t addrspace(2)* %
kernel void foo(sampler_t smp_par) {
// CHECK-LABEL: define spir_kernel void @foo(%opencl.sampler_t addrspace(2)* %smp_par)
@@ -39,27 +46,32 @@
fnc4smp(smp);
// CHECK-NOT: call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 19)
// CHECK: [[SAMP:%[0-9]+]] = load %opencl.sampler_t addrspace(2)*, %opencl.sampler_t addrspace(2)** [[smp_ptr]]
- // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]])
+ // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]])
// Case 1b
fnc4smp(smp);
// CHECK-NOT: call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 19)
// CHECK: [[SAMP:%[0-9]+]] = load %opencl.sampler_t addrspace(2)*, %opencl.sampler_t addrspace(2)** [[smp_ptr]]
- // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]])
+ // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]])
- // Case 1a
+ // Case 1a/2a
fnc4smp(glb_smp);
// CHECK: [[SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 35)
- // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]])
+ // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]])
+
+ // Case 1a/2c
+ fnc4smp(glb_smp_const);
+ // CHECK: [[SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 35)
+ // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]])
// Case 1c
fnc4smp(smp_par);
// CHECK: [[SAMP:%[0-9]+]] = load %opencl.sampler_t addrspace(2)*, %opencl.sampler_t addrspace(2)** [[smp_par_ptr]]
- // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]])
+ // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]])
fnc4smp(5);
// CHECK: [[SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 5)
- // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]])
+ // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]])
const sampler_t const_smp = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_LINEAR;
fnc4smp(const_smp);
@@ -67,12 +79,12 @@
// CHECK: store %opencl.sampler_t addrspace(2)* [[CONST_SAMP]], %opencl.sampler_t addrspace(2)** [[CONST_SMP_PTR:%[a-zA-Z0-9]+]]
fnc4smp(const_smp);
// CHECK: [[SAMP:%[0-9]+]] = load %opencl.sampler_t addrspace(2)*, %opencl.sampler_t addrspace(2)** [[CONST_SMP_PTR]]
- // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]])
+ // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]])
constant sampler_t constant_smp = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_LINEAR;
fnc4smp(constant_smp);
// CHECK: [[SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 35)
- // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]])
+ // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]])
// TODO: enable sampler initialization with non-constant integer.
//const sampler_t const_smp_func_init = get_sampler_initializer();
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/spir_version.cl b/src/llvm-project/clang/test/CodeGenOpenCL/spir_version.cl
index ac5b8e8..39893de 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/spir_version.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/spir_version.cl
@@ -5,6 +5,9 @@
// RUN: %clang_cc1 %s -triple "spir64-unknown-unknown" -emit-llvm -o - -cl-std=CL1.2 | FileCheck %s --check-prefix=CHECK-SPIR-CL12
// RUN: %clang_cc1 %s -triple "spir64-unknown-unknown" -emit-llvm -o - -cl-std=CL2.0 | FileCheck %s --check-prefix=CHECK-SPIR-CL20
+
+// RUN: %clang_cc1 %s -triple "spir64-unknown-unknown" -emit-llvm -o - -cl-std=clc++ | FileCheck %s --check-prefix=CHECK-SPIR-CL20
+
// RUN: %clang_cc1 %s -triple "amdgcn--amdhsa" -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-AMDGCN-CL10
// RUN: %clang_cc1 %s -triple "amdgcn--amdhsa" -emit-llvm -o - -cl-std=CL1.2 | FileCheck %s --check-prefix=CHECK-AMDGCN-CL12
// RUN: %clang_cc1 %s -triple "amdgcn--amdhsa" -emit-llvm -o - -cl-std=CL2.0 | FileCheck %s --check-prefix=CHECK-AMDGCN-CL20
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/to_addr_builtin.cl b/src/llvm-project/clang/test/CodeGenOpenCL/to_addr_builtin.cl
index 72c09da..52dd72f 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/to_addr_builtin.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/to_addr_builtin.cl
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm -O0 -cl-std=CL2.0 -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm -O0 -cl-std=clc++ -o - %s | FileCheck %s
// CHECK: %[[A:.*]] = type { float, float, float }
typedef struct {
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/unroll-hint.cl b/src/llvm-project/clang/test/CodeGenOpenCL/unroll-hint.cl
index 6a9ba87..0f84450 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/unroll-hint.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/unroll-hint.cl
@@ -18,12 +18,12 @@
// CHECK: br label %{{.*}}, !llvm.loop ![[FOR_DISABLE:.*]]
}
-void for_full()
+void for_enable()
{
-// CHECK-LABEL: for_full
+// CHECK-LABEL: for_enable
__attribute__((opencl_unroll_hint))
for( int i = 0; i < 1000; ++i);
-// CHECK: br label %{{.*}}, !llvm.loop ![[FOR_FULL:.*]]
+// CHECK: br label %{{.*}}, !llvm.loop ![[FOR_ENABLE:.*]]
}
/*** while ***/
@@ -45,13 +45,13 @@
// CHECK: br label %{{.*}}, !llvm.loop ![[WHILE_DISABLE:.*]]
}
-void while_full()
+void while_enable()
{
-// CHECK-LABEL: while_full
+// CHECK-LABEL: while_enable
int i = 1000;
__attribute__((opencl_unroll_hint))
while(i-->0);
-// CHECK: br label %{{.*}}, !llvm.loop ![[WHILE_FULL:.*]]
+// CHECK: br label %{{.*}}, !llvm.loop ![[WHILE_ENABLE:.*]]
}
/*** do ***/
@@ -73,13 +73,13 @@
// CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !llvm.loop ![[DO_DISABLE:.*]]
}
-void do_full()
+void do_enable()
{
-// CHECK-LABEL: do_full
+// CHECK-LABEL: do_enable
int i = 1000;
__attribute__((opencl_unroll_hint))
do {} while(i--> 0);
-// CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !llvm.loop ![[DO_FULL:.*]]
+// CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !llvm.loop ![[DO_ENABLE:.*]]
}
@@ -87,11 +87,11 @@
// CHECK: ![[COUNT]] = !{!"llvm.loop.unroll.count", i32 8}
// CHECK: ![[FOR_DISABLE]] = distinct !{![[FOR_DISABLE]], ![[DISABLE:.*]]}
// CHECK: ![[DISABLE]] = !{!"llvm.loop.unroll.disable"}
-// CHECK: ![[FOR_FULL]] = distinct !{![[FOR_FULL]], ![[FULL:.*]]}
-// CHECK: ![[FULL]] = !{!"llvm.loop.unroll.full"}
+// CHECK: ![[FOR_ENABLE]] = distinct !{![[FOR_ENABLE]], ![[ENABLE:.*]]}
+// CHECK: ![[ENABLE]] = !{!"llvm.loop.unroll.enable"}
// CHECK: ![[WHILE_COUNT]] = distinct !{![[WHILE_COUNT]], ![[COUNT]]}
// CHECK: ![[WHILE_DISABLE]] = distinct !{![[WHILE_DISABLE]], ![[DISABLE]]}
-// CHECK: ![[WHILE_FULL]] = distinct !{![[WHILE_FULL]], ![[FULL]]}
+// CHECK: ![[WHILE_ENABLE]] = distinct !{![[WHILE_ENABLE]], ![[ENABLE]]}
// CHECK: ![[DO_COUNT]] = distinct !{![[DO_COUNT]], ![[COUNT]]}
// CHECK: ![[DO_DISABLE]] = distinct !{![[DO_DISABLE]], ![[DISABLE]]}
-// CHECK: ![[DO_FULL]] = distinct !{![[DO_FULL]], ![[FULL]]}
+// CHECK: ![[DO_ENABLE]] = distinct !{![[DO_ENABLE]], ![[ENABLE]]}
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/vector_literals_nested.cl b/src/llvm-project/clang/test/CodeGenOpenCL/vector_literals_nested.cl
deleted file mode 100644
index b9013d0..0000000
--- a/src/llvm-project/clang/test/CodeGenOpenCL/vector_literals_nested.cl
+++ /dev/null
@@ -1,23 +0,0 @@
-// RUN: %clang_cc1 %s -emit-llvm -O3 -o - | FileCheck %s
-
-typedef int int2 __attribute((ext_vector_type(2)));
-typedef int int4 __attribute((ext_vector_type(4)));
-
-__constant const int4 itest1 = (int4)(1, 2, ((int2)(3, 4)));
-// CHECK: constant <4 x i32> <i32 1, i32 2, i32 3, i32 4>
-__constant const int4 itest2 = (int4)(1, 2, ((int2)(3)));
-// CHECK: constant <4 x i32> <i32 1, i32 2, i32 3, i32 3>
-
-typedef float float2 __attribute((ext_vector_type(2)));
-typedef float float4 __attribute((ext_vector_type(4)));
-
-void ftest1(float4 *p) {
- *p = (float4)(1.1f, 1.2f, ((float2)(1.3f, 1.4f)));
-// CHECK: store <4 x float> <float 0x3FF19999A0000000, float 0x3FF3333340000000, float 0x3FF4CCCCC0000000, float 0x3FF6666660000000>
-}
-
-float4 ftest2(float4 *p) {
- *p = (float4)(1.1f, 1.2f, ((float2)(1.3f)));
-// CHECK: store <4 x float> <float 0x3FF19999A0000000, float 0x3FF3333340000000, float 0x3FF4CCCCC0000000, float 0x3FF4CCCCC0000000>
-}
-
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/vector_literals_valid.cl b/src/llvm-project/clang/test/CodeGenOpenCL/vector_literals_valid.cl
index bba5b23..249c95c 100644
--- a/src/llvm-project/clang/test/CodeGenOpenCL/vector_literals_valid.cl
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/vector_literals_valid.cl
@@ -1,22 +1,65 @@
-// RUN: %clang_cc1 -emit-llvm %s -o %t
+// RUN: %clang_cc1 -emit-llvm %s -o - -O0 | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm %s -o - -cl-std=clc++ -O0 | FileCheck %s
-typedef __attribute__(( ext_vector_type(2) )) int int2;
-typedef __attribute__(( ext_vector_type(3) )) int int3;
-typedef __attribute__(( ext_vector_type(4) )) int int4;
-typedef __attribute__(( ext_vector_type(8) )) int int8;
-typedef __attribute__(( ext_vector_type(4) )) float float4;
+typedef __attribute__((ext_vector_type(2))) int int2;
+typedef __attribute__((ext_vector_type(3))) int int3;
+typedef __attribute__((ext_vector_type(4))) int int4;
+typedef __attribute__((ext_vector_type(8))) int int8;
+typedef __attribute__((ext_vector_type(4))) float float4;
+
+__constant const int4 c1 = (int4)(1, 2, ((int2)(3)));
+// CHECK: constant <4 x i32> <i32 1, i32 2, i32 3, i32 3>
+
+__constant const int4 c2 = (int4)(1, 2, ((int2)(3, 4)));
+// CHECK: constant <4 x i32> <i32 1, i32 2, i32 3, i32 4>
void vector_literals_valid() {
- int4 a_1_1_1_1 = (int4)(1,2,3,4);
- int4 a_2_1_1 = (int4)((int2)(1,2),3,4);
- int4 a_1_2_1 = (int4)(1,(int2)(2,3),4);
- int4 a_1_1_2 = (int4)(1,2,(int2)(3,4));
- int4 a_2_2 = (int4)((int2)(1,2),(int2)(3,4));
- int4 a_3_1 = (int4)((int3)(1,2,3),4);
- int4 a_1_3 = (int4)(1,(int3)(2,3,4));
+ //CHECK: insertelement <4 x i32> <i32 1, i32 2, i32 undef, i32 undef>, i32 %{{.+}}, i32 2
+ //CHECK: insertelement <4 x i32> %{{.+}}, i32 %{{.+}}, i32 3
+ int4 a_1_1_1_1 = (int4)(1, 2, c1.s2, c2.s3);
+
+ //CHECK: store <2 x i32> <i32 1, i32 2>, <2 x i32>*
+ //CHECK: shufflevector <2 x i32> %{{[0-9]+}}, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+ //CHECK: shufflevector <4 x i32> %{{.+}}, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+ //CHECK: insertelement <4 x i32> %{{.+}}, i32 3, i32 2
+ //CHECK: insertelement <4 x i32> %{{.+}}, i32 4, i32 3
+ int4 a_2_1_1 = (int4)((int2)(1, 2), 3, 4);
+
+ //CHECK: store <2 x i32> <i32 2, i32 3>, <2 x i32>*
+ //CHECK: shufflevector <2 x i32> %{{[0-9]+}}, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+ //CHECK: shufflevector <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>, <4 x i32> %{{.+}}, <4 x i32> <i32 0, i32 4, i32 5, i32 undef>
+ //CHECK: insertelement <4 x i32> %{{.+}}, i32 4, i32 3
+ int4 a_1_2_1 = (int4)(1, (int2)(2, 3), 4);
+
+ //CHECK: store <2 x i32> <i32 3, i32 4>, <2 x i32>*
+ //CHECK: shufflevector <2 x i32> %{{[0-9]+}}, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+ //CHECK: shufflevector <4 x i32> <i32 1, i32 2, i32 undef, i32 undef>, <4 x i32> %{{.+}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+ int4 a_1_1_2 = (int4)(1, 2, (int2)(3, 4));
+
+ //CHECK: store <2 x i32> <i32 1, i32 2>, <2 x i32>*
+ //CHECK: shufflevector <2 x i32> %{{[0-9]+}}, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+ //CHECK: shufflevector <4 x i32> %{{.+}}, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+ //CHECK: shufflevector <4 x i32> %{{.+}}, <4 x i32> <i32 3, i32 3, i32 undef, i32 undef>, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+ int4 a_2_2 = (int4)((int2)(1, 2), (int2)(3));
+
+ //CHECK: store <4 x i32> <i32 2, i32 3, i32 4, i32 undef>, <4 x i32>*
+ //CHECK: shufflevector <4 x i32> %{{.+}}, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
+ //CHECK: shufflevector <3 x i32> %{{.+}}, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+ //CHECK: shufflevector <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>, <4 x i32> %{{.+}}, <4 x i32> <i32 0, i32 4, i32 5, i32 6>
+ int4 a_1_3 = (int4)(1, (int3)(2, 3, 4));
+
+ //CHECK: store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32>* %a
int4 a = (int4)(1);
- int8 b = (int8)(1,2,a.xy,a);
- float4 V2 = (float4) (1);
+
+ //CHECK: load <4 x i32>, <4 x i32>* %a
+ //CHECK: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+ //CHECK: shufflevector <2 x i32> %{{[0-9]+}}, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ //CHECK: shufflevector <8 x i32> <i32 1, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, <8 x i32> %{{.+}}, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef>
+ //CHECK: load <4 x i32>, <4 x i32>* %a
+ //CHECK: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+ //CHECK: shufflevector <8 x i32> %{{.+}}, <8 x i32> %{{.+}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+ int8 b = (int8)(1, 2, a.xy, a);
+
+ //CHECK: store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %V2
+ float4 V2 = (float4)(1);
}
-
-
diff --git a/src/llvm-project/clang/test/CodeGenOpenCL/visibility.cl b/src/llvm-project/clang/test/CodeGenOpenCL/visibility.cl
new file mode 100644
index 0000000..8ce8017
--- /dev/null
+++ b/src/llvm-project/clang/test/CodeGenOpenCL/visibility.cl
@@ -0,0 +1,128 @@
+// RUN: %clang_cc1 -std=cl2.0 -fapply-global-visibility-to-externs -fvisibility default -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck --check-prefix=FVIS-DEFAULT %s
+// RUN: %clang_cc1 -std=cl2.0 -fapply-global-visibility-to-externs -fvisibility protected -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck --check-prefix=FVIS-PROTECTED %s
+// RUN: %clang_cc1 -std=cl2.0 -fapply-global-visibility-to-externs -fvisibility hidden -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck --check-prefix=FVIS-HIDDEN %s
+
+// REQUIRES: amdgpu-registered-target
+
+// FVIS-DEFAULT: @glob = local_unnamed_addr
+// FVIS-PROTECTED: @glob = protected local_unnamed_addr
+// FVIS-HIDDEN: @glob = hidden local_unnamed_addr
+int glob = 0;
+// FVIS-DEFAULT: @glob_hidden = hidden local_unnamed_addr
+// FVIS-PROTECTED: @glob_hidden = hidden local_unnamed_addr
+// FVIS-HIDDEN: @glob_hidden = hidden local_unnamed_addr
+__attribute__((visibility("hidden"))) int glob_hidden = 0;
+// FVIS-DEFAULT: @glob_protected = protected local_unnamed_addr
+// FVIS-PROTECTED: @glob_protected = protected local_unnamed_addr
+// FVIS-HIDDEN: @glob_protected = protected local_unnamed_addr
+__attribute__((visibility("protected"))) int glob_protected = 0;
+// FVIS-DEFAULT: @glob_default = local_unnamed_addr
+// FVIS-PROTECTED: @glob_default = local_unnamed_addr
+// FVIS-HIDDEN: @glob_default = local_unnamed_addr
+__attribute__((visibility("default"))) int glob_default = 0;
+
+// FVIS-DEFAULT: @ext = external local_unnamed_addr
+// FVIS-PROTECTED: @ext = external protected local_unnamed_addr
+// FVIS-HIDDEN: @ext = external hidden local_unnamed_addr
+extern int ext;
+// FVIS-DEFAULT: @ext_hidden = external hidden local_unnamed_addr
+// FVIS-PROTECTED: @ext_hidden = external hidden local_unnamed_addr
+// FVIS-HIDDEN: @ext_hidden = external hidden local_unnamed_addr
+__attribute__((visibility("hidden"))) extern int ext_hidden;
+// FVIS-DEFAULT: @ext_protected = external protected local_unnamed_addr
+// FVIS-PROTECTED: @ext_protected = external protected local_unnamed_addr
+// FVIS-HIDDEN: @ext_protected = external protected local_unnamed_addr
+__attribute__((visibility("protected"))) extern int ext_protected;
+// FVIS-DEFAULT: @ext_default = external local_unnamed_addr
+// FVIS-PROTECTED: @ext_default = external local_unnamed_addr
+// FVIS-HIDDEN: @ext_default = external local_unnamed_addr
+__attribute__((visibility("default"))) extern int ext_default;
+
+// FVIS-DEFAULT: define amdgpu_kernel void @kern()
+// FVIS-PROTECTED: define protected amdgpu_kernel void @kern()
+// FVIS-HIDDEN: define protected amdgpu_kernel void @kern()
+kernel void kern() {}
+// FVIS-DEFAULT: define protected amdgpu_kernel void @kern_hidden()
+// FVIS-PROTECTED: define protected amdgpu_kernel void @kern_hidden()
+// FVIS-HIDDEN: define protected amdgpu_kernel void @kern_hidden()
+__attribute__((visibility("hidden"))) kernel void kern_hidden() {}
+// FVIS-DEFAULT: define protected amdgpu_kernel void @kern_protected()
+// FVIS-PROTECTED: define protected amdgpu_kernel void @kern_protected()
+// FVIS-HIDDEN: define protected amdgpu_kernel void @kern_protected()
+__attribute__((visibility("protected"))) kernel void kern_protected() {}
+// FVIS-DEFAULT: define amdgpu_kernel void @kern_default()
+// FVIS-PROTECTED: define amdgpu_kernel void @kern_default()
+// FVIS-HIDDEN: define amdgpu_kernel void @kern_default()
+__attribute__((visibility("default"))) kernel void kern_default() {}
+
+// FVIS-DEFAULT: define void @func()
+// FVIS-PROTECTED: define protected void @func()
+// FVIS-HIDDEN: define hidden void @func()
+void func() {}
+// FVIS-DEFAULT: define hidden void @func_hidden()
+// FVIS-PROTECTED: define hidden void @func_hidden()
+// FVIS-HIDDEN: define hidden void @func_hidden()
+__attribute__((visibility("hidden"))) void func_hidden() {}
+// FVIS-DEFAULT: define protected void @func_protected()
+// FVIS-PROTECTED: define protected void @func_protected()
+// FVIS-HIDDEN: define protected void @func_protected()
+__attribute__((visibility("protected"))) void func_protected() {}
+// FVIS-DEFAULT: define void @func_default()
+// FVIS-PROTECTED: define void @func_default()
+// FVIS-HIDDEN: define void @func_default()
+__attribute__((visibility("default"))) void func_default() {}
+
+extern kernel void ext_kern();
+__attribute__((visibility("hidden"))) extern kernel void ext_kern_hidden();
+__attribute__((visibility("protected"))) extern kernel void ext_kern_protected();
+__attribute__((visibility("default"))) extern kernel void ext_kern_default();
+
+extern void ext_func();
+__attribute__((visibility("hidden"))) extern void ext_func_hidden();
+__attribute__((visibility("protected"))) extern void ext_func_protected();
+__attribute__((visibility("default"))) extern void ext_func_default();
+
+void use() {
+ glob = ext + ext_hidden + ext_protected + ext_default;
+ ext_kern();
+ ext_kern_hidden();
+ ext_kern_protected();
+ ext_kern_default();
+ ext_func();
+ ext_func_hidden();
+ ext_func_protected();
+ ext_func_default();
+}
+
+// FVIS-DEFAULT: declare amdgpu_kernel void @ext_kern()
+// FVIS-PROTECTED: declare protected amdgpu_kernel void @ext_kern()
+// FVIS-HIDDEN: declare protected amdgpu_kernel void @ext_kern()
+
+// FVIS-DEFAULT: declare protected amdgpu_kernel void @ext_kern_hidden()
+// FVIS-PROTECTED: declare protected amdgpu_kernel void @ext_kern_hidden()
+// FVIS-HIDDEN: declare protected amdgpu_kernel void @ext_kern_hidden()
+
+// FVIS-DEFAULT: declare protected amdgpu_kernel void @ext_kern_protected()
+// FVIS-PROTECTED: declare protected amdgpu_kernel void @ext_kern_protected()
+// FVIS-HIDDEN: declare protected amdgpu_kernel void @ext_kern_protected()
+
+// FVIS-DEFAULT: declare amdgpu_kernel void @ext_kern_default()
+// FVIS-PROTECTED: declare amdgpu_kernel void @ext_kern_default()
+// FVIS-HIDDEN: declare amdgpu_kernel void @ext_kern_default()
+
+
+// FVIS-DEFAULT: declare void @ext_func()
+// FVIS-PROTECTED: declare protected void @ext_func()
+// FVIS-HIDDEN: declare hidden void @ext_func()
+
+// FVIS-DEFAULT: declare hidden void @ext_func_hidden()
+// FVIS-PROTECTED: declare hidden void @ext_func_hidden()
+// FVIS-HIDDEN: declare hidden void @ext_func_hidden()
+
+// FVIS-DEFAULT: declare protected void @ext_func_protected()
+// FVIS-PROTECTED: declare protected void @ext_func_protected()
+// FVIS-HIDDEN: declare protected void @ext_func_protected()
+
+// FVIS-DEFAULT: declare void @ext_func_default()
+// FVIS-PROTECTED: declare void @ext_func_default()
+// FVIS-HIDDEN: declare void @ext_func_default()