| // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py |
| // RUN: %clang_cc1 %s -O0 -ffreestanding -triple=x86_64-unknown-unknown -target-feature +kl -target-feature +widekl -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=CHECK64 |
| // RUN: %clang_cc1 %s -O0 -ffreestanding -triple=i386-unknown-unknown -target-feature +kl -target-feature +widekl -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=CHECK32 |
| |
| #include <x86intrin.h> |
| |
| // CHECK64-LABEL: @test_loadiwkey( |
| // CHECK64-NEXT: entry: |
| // CHECK64-NEXT: [[__CTL_ADDR_I:%.*]] = alloca i32, align 4 |
| // CHECK64-NEXT: [[__INTKEY_ADDR_I:%.*]] = alloca <2 x i64>, align 16 |
| // CHECK64-NEXT: [[__ENKEY_LO_ADDR_I:%.*]] = alloca <2 x i64>, align 16 |
| // CHECK64-NEXT: [[__ENKEY_HI_ADDR_I:%.*]] = alloca <2 x i64>, align 16 |
| // CHECK64-NEXT: [[CTL_ADDR:%.*]] = alloca i32, align 4 |
| // CHECK64-NEXT: [[INTKEY_ADDR:%.*]] = alloca <2 x i64>, align 16 |
| // CHECK64-NEXT: [[ENKEY_LO_ADDR:%.*]] = alloca <2 x i64>, align 16 |
| // CHECK64-NEXT: [[ENKEY_HI_ADDR:%.*]] = alloca <2 x i64>, align 16 |
| // CHECK64-NEXT: store i32 [[CTL:%.*]], i32* [[CTL_ADDR]], align 4 |
| // CHECK64-NEXT: store <2 x i64> [[INTKEY:%.*]], <2 x i64>* [[INTKEY_ADDR]], align 16 |
| // CHECK64-NEXT: store <2 x i64> [[ENKEY_LO:%.*]], <2 x i64>* [[ENKEY_LO_ADDR]], align 16 |
| // CHECK64-NEXT: store <2 x i64> [[ENKEY_HI:%.*]], <2 x i64>* [[ENKEY_HI_ADDR]], align 16 |
| // CHECK64-NEXT: [[TMP0:%.*]] = load i32, i32* [[CTL_ADDR]], align 4 |
| // CHECK64-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[INTKEY_ADDR]], align 16 |
| // CHECK64-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[ENKEY_LO_ADDR]], align 16 |
| // CHECK64-NEXT: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ENKEY_HI_ADDR]], align 16 |
| // CHECK64-NEXT: store i32 [[TMP0]], i32* [[__CTL_ADDR_I]], align 4 |
| // CHECK64-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[__INTKEY_ADDR_I]], align 16 |
| // CHECK64-NEXT: store <2 x i64> [[TMP2]], <2 x i64>* [[__ENKEY_LO_ADDR_I]], align 16 |
| // CHECK64-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[__ENKEY_HI_ADDR_I]], align 16 |
| // CHECK64-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[__INTKEY_ADDR_I]], align 16 |
| // CHECK64-NEXT: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[__ENKEY_LO_ADDR_I]], align 16 |
| // CHECK64-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[__ENKEY_HI_ADDR_I]], align 16 |
| // CHECK64-NEXT: [[TMP7:%.*]] = load i32, i32* [[__CTL_ADDR_I]], align 4 |
| // CHECK64-NEXT: call void @llvm.x86.loadiwkey(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], i32 [[TMP7]]) #[[ATTR1:[0-9]+]] |
| // CHECK64-NEXT: ret void |
| // |
| // CHECK32-LABEL: @test_loadiwkey( |
| // CHECK32-NEXT: entry: |
| // CHECK32-NEXT: [[__CTL_ADDR_I:%.*]] = alloca i32, align 4 |
| // CHECK32-NEXT: [[__INTKEY_ADDR_I:%.*]] = alloca <2 x i64>, align 16 |
| // CHECK32-NEXT: [[__ENKEY_LO_ADDR_I:%.*]] = alloca <2 x i64>, align 16 |
| // CHECK32-NEXT: [[__ENKEY_HI_ADDR_I:%.*]] = alloca <2 x i64>, align 16 |
| // CHECK32-NEXT: [[CTL_ADDR:%.*]] = alloca i32, align 4 |
| // CHECK32-NEXT: [[INTKEY_ADDR:%.*]] = alloca <2 x i64>, align 16 |
| // CHECK32-NEXT: [[ENKEY_LO_ADDR:%.*]] = alloca <2 x i64>, align 16 |
| // CHECK32-NEXT: [[ENKEY_HI_ADDR:%.*]] = alloca <2 x i64>, align 16 |
| // CHECK32-NEXT: store i32 [[CTL:%.*]], i32* [[CTL_ADDR]], align 4 |
| // CHECK32-NEXT: store <2 x i64> [[INTKEY:%.*]], <2 x i64>* [[INTKEY_ADDR]], align 16 |
| // CHECK32-NEXT: store <2 x i64> [[ENKEY_LO:%.*]], <2 x i64>* [[ENKEY_LO_ADDR]], align 16 |
| // CHECK32-NEXT: store <2 x i64> [[ENKEY_HI:%.*]], <2 x i64>* [[ENKEY_HI_ADDR]], align 16 |
| // CHECK32-NEXT: [[TMP0:%.*]] = load i32, i32* [[CTL_ADDR]], align 4 |
| // CHECK32-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[INTKEY_ADDR]], align 16 |
| // CHECK32-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[ENKEY_LO_ADDR]], align 16 |
| // CHECK32-NEXT: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ENKEY_HI_ADDR]], align 16 |
| // CHECK32-NEXT: store i32 [[TMP0]], i32* [[__CTL_ADDR_I]], align 4 |
| // CHECK32-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[__INTKEY_ADDR_I]], align 16 |
| // CHECK32-NEXT: store <2 x i64> [[TMP2]], <2 x i64>* [[__ENKEY_LO_ADDR_I]], align 16 |
| // CHECK32-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[__ENKEY_HI_ADDR_I]], align 16 |
| // CHECK32-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[__INTKEY_ADDR_I]], align 16 |
| // CHECK32-NEXT: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[__ENKEY_LO_ADDR_I]], align 16 |
| // CHECK32-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[__ENKEY_HI_ADDR_I]], align 16 |
| // CHECK32-NEXT: [[TMP7:%.*]] = load i32, i32* [[__CTL_ADDR_I]], align 4 |
| // CHECK32-NEXT: call void @llvm.x86.loadiwkey(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], i32 [[TMP7]]) #[[ATTR1:[0-9]+]] |
| // CHECK32-NEXT: ret void |
| // |
| void test_loadiwkey(unsigned int ctl, __m128i intkey, __m128i enkey_lo, __m128i enkey_hi) { |
| _mm_loadiwkey(ctl, intkey, enkey_lo, enkey_hi); |
| } |
| |
| // CHECK64-LABEL: @test_encodekey128_u32( |
| // CHECK64-NEXT: entry: |
| // CHECK64-NEXT: [[__HTYPE_ADDR_I:%.*]] = alloca i32, align 4 |
| // CHECK64-NEXT: [[__KEY_ADDR_I:%.*]] = alloca <2 x i64>, align 16 |
| // CHECK64-NEXT: [[__H_ADDR_I:%.*]] = alloca i8*, align 8 |
| // CHECK64-NEXT: [[HTYPE_ADDR:%.*]] = alloca i32, align 4 |
| // CHECK64-NEXT: [[KEY_ADDR:%.*]] = alloca <2 x i64>, align 16 |
| // CHECK64-NEXT: [[H_ADDR:%.*]] = alloca i8*, align 8 |
| // CHECK64-NEXT: store i32 [[HTYPE:%.*]], i32* [[HTYPE_ADDR]], align 4 |
| // CHECK64-NEXT: store <2 x i64> [[KEY:%.*]], <2 x i64>* [[KEY_ADDR]], align 16 |
| // CHECK64-NEXT: store i8* [[H:%.*]], i8** [[H_ADDR]], align 8 |
| // CHECK64-NEXT: [[TMP0:%.*]] = load i32, i32* [[HTYPE_ADDR]], align 4 |
| // CHECK64-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[KEY_ADDR]], align 16 |
| // CHECK64-NEXT: [[TMP2:%.*]] = load i8*, i8** [[H_ADDR]], align 8 |
| // CHECK64-NEXT: store i32 [[TMP0]], i32* [[__HTYPE_ADDR_I]], align 4 |
| // CHECK64-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[__KEY_ADDR_I]], align 16 |
| // CHECK64-NEXT: store i8* [[TMP2]], i8** [[__H_ADDR_I]], align 8 |
| // CHECK64-NEXT: [[TMP3:%.*]] = load i32, i32* [[__HTYPE_ADDR_I]], align 4 |
| // CHECK64-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[__KEY_ADDR_I]], align 16 |
| // CHECK64-NEXT: [[TMP5:%.*]] = load i8*, i8** [[__H_ADDR_I]], align 8 |
| // CHECK64-NEXT: [[TMP6:%.*]] = call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32 [[TMP3]], <2 x i64> [[TMP4]]) #[[ATTR1]] |
| // CHECK64-NEXT: [[TMP7:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP6]], 1 |
| // CHECK64-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP5]] to <2 x i64>* |
| // CHECK64-NEXT: store <2 x i64> [[TMP7]], <2 x i64>* [[TMP8]], align 1 |
| // CHECK64-NEXT: [[TMP9:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP6]], 2 |
| // CHECK64-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[TMP5]], i32 16 |
| // CHECK64-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to <2 x i64>* |
| // CHECK64-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[TMP11]], align 1 |
| // CHECK64-NEXT: [[TMP12:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP6]], 3 |
| // CHECK64-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* [[TMP5]], i32 32 |
| // CHECK64-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to <2 x i64>* |
| // CHECK64-NEXT: store <2 x i64> [[TMP12]], <2 x i64>* [[TMP14]], align 1 |
| // CHECK64-NEXT: [[TMP15:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP6]], 4 |
| // CHECK64-NEXT: [[TMP16:%.*]] = getelementptr i8, i8* [[TMP5]], i32 48 |
| // CHECK64-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to <2 x i64>* |
| // CHECK64-NEXT: store <2 x i64> [[TMP15]], <2 x i64>* [[TMP17]], align 1 |
| // CHECK64-NEXT: [[TMP18:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP6]], 5 |
| // CHECK64-NEXT: [[TMP19:%.*]] = getelementptr i8, i8* [[TMP5]], i32 64 |
| // CHECK64-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to <2 x i64>* |
| // CHECK64-NEXT: store <2 x i64> [[TMP18]], <2 x i64>* [[TMP20]], align 1 |
| // CHECK64-NEXT: [[TMP21:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP6]], 6 |
| // CHECK64-NEXT: [[TMP22:%.*]] = getelementptr i8, i8* [[TMP5]], i32 80 |
| // CHECK64-NEXT: [[TMP23:%.*]] = bitcast i8* [[TMP22]] to <2 x i64>* |
| // CHECK64-NEXT: store <2 x i64> [[TMP21]], <2 x i64>* [[TMP23]], align 1 |
| // CHECK64-NEXT: [[TMP24:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP6]], 0 |
| // CHECK64-NEXT: ret i32 [[TMP24]] |
| // |
| // CHECK32-LABEL: @test_encodekey128_u32( |
| // CHECK32-NEXT: entry: |
| // CHECK32-NEXT: [[__HTYPE_ADDR_I:%.*]] = alloca i32, align 4 |
| // CHECK32-NEXT: [[__KEY_ADDR_I:%.*]] = alloca <2 x i64>, align 16 |
| // CHECK32-NEXT: [[__H_ADDR_I:%.*]] = alloca i8*, align 4 |
| // CHECK32-NEXT: [[HTYPE_ADDR:%.*]] = alloca i32, align 4 |
| // CHECK32-NEXT: [[KEY_ADDR:%.*]] = alloca <2 x i64>, align 16 |
| // CHECK32-NEXT: [[H_ADDR:%.*]] = alloca i8*, align 4 |
| // CHECK32-NEXT: store i32 [[HTYPE:%.*]], i32* [[HTYPE_ADDR]], align 4 |
| // CHECK32-NEXT: store <2 x i64> [[KEY:%.*]], <2 x i64>* [[KEY_ADDR]], align 16 |
| // CHECK32-NEXT: store i8* [[H:%.*]], i8** [[H_ADDR]], align 4 |
| // CHECK32-NEXT: [[TMP0:%.*]] = load i32, i32* [[HTYPE_ADDR]], align 4 |
| // CHECK32-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[KEY_ADDR]], align 16 |
| // CHECK32-NEXT: [[TMP2:%.*]] = load i8*, i8** [[H_ADDR]], align 4 |
| // CHECK32-NEXT: store i32 [[TMP0]], i32* [[__HTYPE_ADDR_I]], align 4 |
| // CHECK32-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[__KEY_ADDR_I]], align 16 |
| // CHECK32-NEXT: store i8* [[TMP2]], i8** [[__H_ADDR_I]], align 4 |
| // CHECK32-NEXT: [[TMP3:%.*]] = load i32, i32* [[__HTYPE_ADDR_I]], align 4 |
| // CHECK32-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[__KEY_ADDR_I]], align 16 |
| // CHECK32-NEXT: [[TMP5:%.*]] = load i8*, i8** [[__H_ADDR_I]], align 4 |
| // CHECK32-NEXT: [[TMP6:%.*]] = call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32 [[TMP3]], <2 x i64> [[TMP4]]) #[[ATTR1]] |
| // CHECK32-NEXT: [[TMP7:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP6]], 1 |
| // CHECK32-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP5]] to <2 x i64>* |
| // CHECK32-NEXT: store <2 x i64> [[TMP7]], <2 x i64>* [[TMP8]], align 1 |
| // CHECK32-NEXT: [[TMP9:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP6]], 2 |
| // CHECK32-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[TMP5]], i32 16 |
| // CHECK32-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to <2 x i64>* |
| // CHECK32-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[TMP11]], align 1 |
| // CHECK32-NEXT: [[TMP12:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP6]], 3 |
| // CHECK32-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* [[TMP5]], i32 32 |
| // CHECK32-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to <2 x i64>* |
| // CHECK32-NEXT: store <2 x i64> [[TMP12]], <2 x i64>* [[TMP14]], align 1 |
| // CHECK32-NEXT: [[TMP15:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP6]], 4 |
| // CHECK32-NEXT: [[TMP16:%.*]] = getelementptr i8, i8* [[TMP5]], i32 48 |
| // CHECK32-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to <2 x i64>* |
| // CHECK32-NEXT: store <2 x i64> [[TMP15]], <2 x i64>* [[TMP17]], align 1 |
| // CHECK32-NEXT: [[TMP18:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP6]], 5 |
| // CHECK32-NEXT: [[TMP19:%.*]] = getelementptr i8, i8* [[TMP5]], i32 64 |
| // CHECK32-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to <2 x i64>* |
| // CHECK32-NEXT: store <2 x i64> [[TMP18]], <2 x i64>* [[TMP20]], align 1 |
| // CHECK32-NEXT: [[TMP21:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP6]], 6 |
| // CHECK32-NEXT: [[TMP22:%.*]] = getelementptr i8, i8* [[TMP5]], i32 80 |
| // CHECK32-NEXT: [[TMP23:%.*]] = bitcast i8* [[TMP22]] to <2 x i64>* |
| // CHECK32-NEXT: store <2 x i64> [[TMP21]], <2 x i64>* [[TMP23]], align 1 |
| // CHECK32-NEXT: [[TMP24:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP6]], 0 |
| // CHECK32-NEXT: ret i32 [[TMP24]] |
| // |
| unsigned int test_encodekey128_u32(unsigned int htype, __m128i key, void *h) { |
| return _mm_encodekey128_u32(htype, key, h); |
| } |
| |
| // CHECK64-LABEL: @test_encodekey256_u32( |
| // CHECK64-NEXT: entry: |
| // CHECK64-NEXT: [[__HTYPE_ADDR_I:%.*]] = alloca i32, align 4 |
| // CHECK64-NEXT: [[__KEY_LO_ADDR_I:%.*]] = alloca <2 x i64>, align 16 |
| // CHECK64-NEXT: [[__KEY_HI_ADDR_I:%.*]] = alloca <2 x i64>, align 16 |
| // CHECK64-NEXT: [[__H_ADDR_I:%.*]] = alloca i8*, align 8 |
| // CHECK64-NEXT: [[HTYPE_ADDR:%.*]] = alloca i32, align 4 |
| // CHECK64-NEXT: [[KEY_LO_ADDR:%.*]] = alloca <2 x i64>, align 16 |
| // CHECK64-NEXT: [[KEY_HI_ADDR:%.*]] = alloca <2 x i64>, align 16 |
| // CHECK64-NEXT: [[H_ADDR:%.*]] = alloca i8*, align 8 |
| // CHECK64-NEXT: store i32 [[HTYPE:%.*]], i32* [[HTYPE_ADDR]], align 4 |
| // CHECK64-NEXT: store <2 x i64> [[KEY_LO:%.*]], <2 x i64>* [[KEY_LO_ADDR]], align 16 |
| // CHECK64-NEXT: store <2 x i64> [[KEY_HI:%.*]], <2 x i64>* [[KEY_HI_ADDR]], align 16 |
| // CHECK64-NEXT: store i8* [[H:%.*]], i8** [[H_ADDR]], align 8 |
| // CHECK64-NEXT: [[TMP0:%.*]] = load i32, i32* [[HTYPE_ADDR]], align 4 |
| // CHECK64-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[KEY_LO_ADDR]], align 16 |
| // CHECK64-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[KEY_HI_ADDR]], align 16 |
| // CHECK64-NEXT: [[TMP3:%.*]] = load i8*, i8** [[H_ADDR]], align 8 |
| // CHECK64-NEXT: store i32 [[TMP0]], i32* [[__HTYPE_ADDR_I]], align 4 |
| // CHECK64-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[__KEY_LO_ADDR_I]], align 16 |
| // CHECK64-NEXT: store <2 x i64> [[TMP2]], <2 x i64>* [[__KEY_HI_ADDR_I]], align 16 |
| // CHECK64-NEXT: store i8* [[TMP3]], i8** [[__H_ADDR_I]], align 8 |
| // CHECK64-NEXT: [[TMP4:%.*]] = load i32, i32* [[__HTYPE_ADDR_I]], align 4 |
| // CHECK64-NEXT: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[__KEY_LO_ADDR_I]], align 16 |
| // CHECK64-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[__KEY_HI_ADDR_I]], align 16 |
| // CHECK64-NEXT: [[TMP7:%.*]] = load i8*, i8** [[__H_ADDR_I]], align 8 |
| // CHECK64-NEXT: [[TMP8:%.*]] = call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey256(i32 [[TMP4]], <2 x i64> [[TMP5]], <2 x i64> [[TMP6]]) #[[ATTR1]] |
| // CHECK64-NEXT: [[TMP9:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 1 |
| // CHECK64-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP7]] to <2 x i64>* |
| // CHECK64-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[TMP10]], align 1 |
| // CHECK64-NEXT: [[TMP11:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 2 |
| // CHECK64-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[TMP7]], i32 16 |
| // CHECK64-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to <2 x i64>* |
| // CHECK64-NEXT: store <2 x i64> [[TMP11]], <2 x i64>* [[TMP13]], align 1 |
| // CHECK64-NEXT: [[TMP14:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 3 |
| // CHECK64-NEXT: [[TMP15:%.*]] = getelementptr i8, i8* [[TMP7]], i32 32 |
| // CHECK64-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to <2 x i64>* |
| // CHECK64-NEXT: store <2 x i64> [[TMP14]], <2 x i64>* [[TMP16]], align 1 |
| // CHECK64-NEXT: [[TMP17:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 4 |
| // CHECK64-NEXT: [[TMP18:%.*]] = getelementptr i8, i8* [[TMP7]], i32 48 |
| // CHECK64-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP18]] to <2 x i64>* |
| // CHECK64-NEXT: store <2 x i64> [[TMP17]], <2 x i64>* [[TMP19]], align 1 |
| // CHECK64-NEXT: [[TMP20:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 5 |
| // CHECK64-NEXT: [[TMP21:%.*]] = getelementptr i8, i8* [[TMP7]], i32 64 |
| // CHECK64-NEXT: [[TMP22:%.*]] = bitcast i8* [[TMP21]] to <2 x i64>* |
| // CHECK64-NEXT: store <2 x i64> [[TMP20]], <2 x i64>* [[TMP22]], align 1 |
| // CHECK64-NEXT: [[TMP23:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 6 |
| // CHECK64-NEXT: [[TMP24:%.*]] = getelementptr i8, i8* [[TMP7]], i32 80 |
| // CHECK64-NEXT: [[TMP25:%.*]] = bitcast i8* [[TMP24]] to <2 x i64>* |
| // CHECK64-NEXT: store <2 x i64> [[TMP23]], <2 x i64>* [[TMP25]], align 1 |
| // CHECK64-NEXT: [[TMP26:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 7 |
| // CHECK64-NEXT: [[TMP27:%.*]] = getelementptr i8, i8* [[TMP7]], i32 96 |
| // CHECK64-NEXT: [[TMP28:%.*]] = bitcast i8* [[TMP27]] to <2 x i64>* |
| // CHECK64-NEXT: store <2 x i64> [[TMP26]], <2 x i64>* [[TMP28]], align 1 |
| // CHECK64-NEXT: [[TMP29:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 0 |
| // CHECK64-NEXT: ret i32 [[TMP29]] |
| // |
| // CHECK32-LABEL: @test_encodekey256_u32( |
| // CHECK32-NEXT: entry: |
| // CHECK32-NEXT: [[__HTYPE_ADDR_I:%.*]] = alloca i32, align 4 |
| // CHECK32-NEXT: [[__KEY_LO_ADDR_I:%.*]] = alloca <2 x i64>, align 16 |
| // CHECK32-NEXT: [[__KEY_HI_ADDR_I:%.*]] = alloca <2 x i64>, align 16 |
| // CHECK32-NEXT: [[__H_ADDR_I:%.*]] = alloca i8*, align 4 |
| // CHECK32-NEXT: [[HTYPE_ADDR:%.*]] = alloca i32, align 4 |
| // CHECK32-NEXT: [[KEY_LO_ADDR:%.*]] = alloca <2 x i64>, align 16 |
| // CHECK32-NEXT: [[KEY_HI_ADDR:%.*]] = alloca <2 x i64>, align 16 |
| // CHECK32-NEXT: [[H_ADDR:%.*]] = alloca i8*, align 4 |
| // CHECK32-NEXT: store i32 [[HTYPE:%.*]], i32* [[HTYPE_ADDR]], align 4 |
| // CHECK32-NEXT: store <2 x i64> [[KEY_LO:%.*]], <2 x i64>* [[KEY_LO_ADDR]], align 16 |
| // CHECK32-NEXT: store <2 x i64> [[KEY_HI:%.*]], <2 x i64>* [[KEY_HI_ADDR]], align 16 |
| // CHECK32-NEXT: store i8* [[H:%.*]], i8** [[H_ADDR]], align 4 |
| // CHECK32-NEXT: [[TMP0:%.*]] = load i32, i32* [[HTYPE_ADDR]], align 4 |
| // CHECK32-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[KEY_LO_ADDR]], align 16 |
| // CHECK32-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[KEY_HI_ADDR]], align 16 |
| // CHECK32-NEXT: [[TMP3:%.*]] = load i8*, i8** [[H_ADDR]], align 4 |
| // CHECK32-NEXT: store i32 [[TMP0]], i32* [[__HTYPE_ADDR_I]], align 4 |
| // CHECK32-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[__KEY_LO_ADDR_I]], align 16 |
| // CHECK32-NEXT: store <2 x i64> [[TMP2]], <2 x i64>* [[__KEY_HI_ADDR_I]], align 16 |
| // CHECK32-NEXT: store i8* [[TMP3]], i8** [[__H_ADDR_I]], align 4 |
| // CHECK32-NEXT: [[TMP4:%.*]] = load i32, i32* [[__HTYPE_ADDR_I]], align 4 |
| // CHECK32-NEXT: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[__KEY_LO_ADDR_I]], align 16 |
| // CHECK32-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[__KEY_HI_ADDR_I]], align 16 |
| // CHECK32-NEXT: [[TMP7:%.*]] = load i8*, i8** [[__H_ADDR_I]], align 4 |
| // CHECK32-NEXT: [[TMP8:%.*]] = call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey256(i32 [[TMP4]], <2 x i64> [[TMP5]], <2 x i64> [[TMP6]]) #[[ATTR1]] |
| // CHECK32-NEXT: [[TMP9:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 1 |
| // CHECK32-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP7]] to <2 x i64>* |
| // CHECK32-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[TMP10]], align 1 |
| // CHECK32-NEXT: [[TMP11:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 2 |
| // CHECK32-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[TMP7]], i32 16 |
| // CHECK32-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to <2 x i64>* |
| // CHECK32-NEXT: store <2 x i64> [[TMP11]], <2 x i64>* [[TMP13]], align 1 |
| // CHECK32-NEXT: [[TMP14:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 3 |
| // CHECK32-NEXT: [[TMP15:%.*]] = getelementptr i8, i8* [[TMP7]], i32 32 |
| // CHECK32-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to <2 x i64>* |
| // CHECK32-NEXT: store <2 x i64> [[TMP14]], <2 x i64>* [[TMP16]], align 1 |
| // CHECK32-NEXT: [[TMP17:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 4 |
| // CHECK32-NEXT: [[TMP18:%.*]] = getelementptr i8, i8* [[TMP7]], i32 48 |
| // CHECK32-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP18]] to <2 x i64>* |
| // CHECK32-NEXT: store <2 x i64> [[TMP17]], <2 x i64>* [[TMP19]], align 1 |
| // CHECK32-NEXT: [[TMP20:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 5 |
| // CHECK32-NEXT: [[TMP21:%.*]] = getelementptr i8, i8* [[TMP7]], i32 64 |
| // CHECK32-NEXT: [[TMP22:%.*]] = bitcast i8* [[TMP21]] to <2 x i64>* |
| // CHECK32-NEXT: store <2 x i64> [[TMP20]], <2 x i64>* [[TMP22]], align 1 |
| // CHECK32-NEXT: [[TMP23:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 6 |
| // CHECK32-NEXT: [[TMP24:%.*]] = getelementptr i8, i8* [[TMP7]], i32 80 |
| // CHECK32-NEXT: [[TMP25:%.*]] = bitcast i8* [[TMP24]] to <2 x i64>* |
| // CHECK32-NEXT: store <2 x i64> [[TMP23]], <2 x i64>* [[TMP25]], align 1 |
| // CHECK32-NEXT: [[TMP26:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 7 |
| // CHECK32-NEXT: [[TMP27:%.*]] = getelementptr i8, i8* [[TMP7]], i32 96 |
| // CHECK32-NEXT: [[TMP28:%.*]] = bitcast i8* [[TMP27]] to <2 x i64>* |
| // CHECK32-NEXT: store <2 x i64> [[TMP26]], <2 x i64>* [[TMP28]], align 1 |
| // CHECK32-NEXT: [[TMP29:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 0 |
| // CHECK32-NEXT: ret i32 [[TMP29]] |
| // |
| unsigned int test_encodekey256_u32(unsigned int htype, __m128i key_lo, __m128i key_hi, void *h) { |
| return _mm_encodekey256_u32(htype, key_lo, key_hi, h); |
| } |
| |
| // CHECK64-LABEL: @test_mm_aesenc256kl_u8( |
| // CHECK64-NEXT: entry: |
| // CHECK64-NEXT: [[__ODATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 8 |
| // CHECK64-NEXT: [[__IDATA_ADDR_I:%.*]] = alloca <2 x i64>, align 16 |
| // CHECK64-NEXT: [[__H_ADDR_I:%.*]] = alloca i8*, align 8 |
| // CHECK64-NEXT: [[ODATA_ADDR:%.*]] = alloca <2 x i64>*, align 8 |
| // CHECK64-NEXT: [[IDATA_ADDR:%.*]] = alloca <2 x i64>, align 16 |
| // CHECK64-NEXT: [[H_ADDR:%.*]] = alloca i8*, align 8 |
| // CHECK64-NEXT: store <2 x i64>* [[ODATA:%.*]], <2 x i64>** [[ODATA_ADDR]], align 8 |
| // CHECK64-NEXT: store <2 x i64> [[IDATA:%.*]], <2 x i64>* [[IDATA_ADDR]], align 16 |
| // CHECK64-NEXT: store i8* [[H:%.*]], i8** [[H_ADDR]], align 8 |
| // CHECK64-NEXT: [[TMP0:%.*]] = load <2 x i64>*, <2 x i64>** [[ODATA_ADDR]], align 8 |
| // CHECK64-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[IDATA_ADDR]], align 16 |
| // CHECK64-NEXT: [[TMP2:%.*]] = load i8*, i8** [[H_ADDR]], align 8 |
| // CHECK64-NEXT: store <2 x i64>* [[TMP0]], <2 x i64>** [[__ODATA_ADDR_I]], align 8 |
| // CHECK64-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[__IDATA_ADDR_I]], align 16 |
| // CHECK64-NEXT: store i8* [[TMP2]], i8** [[__H_ADDR_I]], align 8 |
| // CHECK64-NEXT: [[TMP3:%.*]] = load <2 x i64>*, <2 x i64>** [[__ODATA_ADDR_I]], align 8 |
| // CHECK64-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[__IDATA_ADDR_I]], align 16 |
| // CHECK64-NEXT: [[TMP5:%.*]] = load i8*, i8** [[__H_ADDR_I]], align 8 |
| // CHECK64-NEXT: [[TMP6:%.*]] = call { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64> [[TMP4]], i8* [[TMP5]]) #[[ATTR1]] |
| // CHECK64-NEXT: [[TMP7:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 0 |
| // CHECK64-NEXT: [[TMP8:%.*]] = trunc i8 [[TMP7]] to i1 |
| // CHECK64-NEXT: [[TMP9:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 1 |
| // CHECK64-NEXT: br i1 [[TMP8]], label [[AESENC256KL_NO_ERROR_I:%.*]], label [[AESENC256KL_ERROR_I:%.*]] |
| // CHECK64: aesenc256kl_no_error.i: |
| // CHECK64-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[TMP3]], align 16 |
| // CHECK64-NEXT: br label [[_MM_AESENC256KL_U8_EXIT:%.*]] |
| // CHECK64: aesenc256kl_error.i: |
| // CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP3]], align 16 |
| // CHECK64-NEXT: br label [[_MM_AESENC256KL_U8_EXIT]] |
| // CHECK64: _mm_aesenc256kl_u8.exit: |
| // CHECK64-NEXT: [[TMP10:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 0 |
| // CHECK64-NEXT: ret i8 [[TMP10]] |
| // |
| // CHECK32-LABEL: @test_mm_aesenc256kl_u8( |
| // CHECK32-NEXT: entry: |
| // CHECK32-NEXT: [[__ODATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 4 |
| // CHECK32-NEXT: [[__IDATA_ADDR_I:%.*]] = alloca <2 x i64>, align 16 |
| // CHECK32-NEXT: [[__H_ADDR_I:%.*]] = alloca i8*, align 4 |
| // CHECK32-NEXT: [[ODATA_ADDR:%.*]] = alloca <2 x i64>*, align 4 |
| // CHECK32-NEXT: [[IDATA_ADDR:%.*]] = alloca <2 x i64>, align 16 |
| // CHECK32-NEXT: [[H_ADDR:%.*]] = alloca i8*, align 4 |
| // CHECK32-NEXT: store <2 x i64>* [[ODATA:%.*]], <2 x i64>** [[ODATA_ADDR]], align 4 |
| // CHECK32-NEXT: store <2 x i64> [[IDATA:%.*]], <2 x i64>* [[IDATA_ADDR]], align 16 |
| // CHECK32-NEXT: store i8* [[H:%.*]], i8** [[H_ADDR]], align 4 |
| // CHECK32-NEXT: [[TMP0:%.*]] = load <2 x i64>*, <2 x i64>** [[ODATA_ADDR]], align 4 |
| // CHECK32-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[IDATA_ADDR]], align 16 |
| // CHECK32-NEXT: [[TMP2:%.*]] = load i8*, i8** [[H_ADDR]], align 4 |
| // CHECK32-NEXT: store <2 x i64>* [[TMP0]], <2 x i64>** [[__ODATA_ADDR_I]], align 4 |
| // CHECK32-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[__IDATA_ADDR_I]], align 16 |
| // CHECK32-NEXT: store i8* [[TMP2]], i8** [[__H_ADDR_I]], align 4 |
| // CHECK32-NEXT: [[TMP3:%.*]] = load <2 x i64>*, <2 x i64>** [[__ODATA_ADDR_I]], align 4 |
| // CHECK32-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[__IDATA_ADDR_I]], align 16 |
| // CHECK32-NEXT: [[TMP5:%.*]] = load i8*, i8** [[__H_ADDR_I]], align 4 |
| // CHECK32-NEXT: [[TMP6:%.*]] = call { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64> [[TMP4]], i8* [[TMP5]]) #[[ATTR1]] |
| // CHECK32-NEXT: [[TMP7:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 0 |
| // CHECK32-NEXT: [[TMP8:%.*]] = trunc i8 [[TMP7]] to i1 |
| // CHECK32-NEXT: [[TMP9:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 1 |
| // CHECK32-NEXT: br i1 [[TMP8]], label [[AESENC256KL_NO_ERROR_I:%.*]], label [[AESENC256KL_ERROR_I:%.*]] |
| // CHECK32: aesenc256kl_no_error.i: |
| // CHECK32-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[TMP3]], align 16 |
| // CHECK32-NEXT: br label [[_MM_AESENC256KL_U8_EXIT:%.*]] |
| // CHECK32: aesenc256kl_error.i: |
| // CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP3]], align 16 |
| // CHECK32-NEXT: br label [[_MM_AESENC256KL_U8_EXIT]] |
| // CHECK32: _mm_aesenc256kl_u8.exit: |
| // CHECK32-NEXT: [[TMP10:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 0 |
| // CHECK32-NEXT: ret i8 [[TMP10]] |
| // |
| unsigned char test_mm_aesenc256kl_u8(__m128i *odata, __m128i idata, const void *h) { |
| return _mm_aesenc256kl_u8(odata, idata, h); |
| } |
| |
| // CHECK64-LABEL: @test_mm_aesdec256kl_u8( |
| // CHECK64-NEXT: entry: |
| // CHECK64-NEXT: [[__ODATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 8 |
| // CHECK64-NEXT: [[__IDATA_ADDR_I:%.*]] = alloca <2 x i64>, align 16 |
| // CHECK64-NEXT: [[__H_ADDR_I:%.*]] = alloca i8*, align 8 |
| // CHECK64-NEXT: [[ODATA_ADDR:%.*]] = alloca <2 x i64>*, align 8 |
| // CHECK64-NEXT: [[IDATA_ADDR:%.*]] = alloca <2 x i64>, align 16 |
| // CHECK64-NEXT: [[H_ADDR:%.*]] = alloca i8*, align 8 |
| // CHECK64-NEXT: store <2 x i64>* [[ODATA:%.*]], <2 x i64>** [[ODATA_ADDR]], align 8 |
| // CHECK64-NEXT: store <2 x i64> [[IDATA:%.*]], <2 x i64>* [[IDATA_ADDR]], align 16 |
| // CHECK64-NEXT: store i8* [[H:%.*]], i8** [[H_ADDR]], align 8 |
| // CHECK64-NEXT: [[TMP0:%.*]] = load <2 x i64>*, <2 x i64>** [[ODATA_ADDR]], align 8 |
| // CHECK64-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[IDATA_ADDR]], align 16 |
| // CHECK64-NEXT: [[TMP2:%.*]] = load i8*, i8** [[H_ADDR]], align 8 |
| // CHECK64-NEXT: store <2 x i64>* [[TMP0]], <2 x i64>** [[__ODATA_ADDR_I]], align 8 |
| // CHECK64-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[__IDATA_ADDR_I]], align 16 |
| // CHECK64-NEXT: store i8* [[TMP2]], i8** [[__H_ADDR_I]], align 8 |
| // CHECK64-NEXT: [[TMP3:%.*]] = load <2 x i64>*, <2 x i64>** [[__ODATA_ADDR_I]], align 8 |
| // CHECK64-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[__IDATA_ADDR_I]], align 16 |
| // CHECK64-NEXT: [[TMP5:%.*]] = load i8*, i8** [[__H_ADDR_I]], align 8 |
| // CHECK64-NEXT: [[TMP6:%.*]] = call { i8, <2 x i64> } @llvm.x86.aesdec256kl(<2 x i64> [[TMP4]], i8* [[TMP5]]) #[[ATTR1]] |
| // CHECK64-NEXT: [[TMP7:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 0 |
| // CHECK64-NEXT: [[TMP8:%.*]] = trunc i8 [[TMP7]] to i1 |
| // CHECK64-NEXT: [[TMP9:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 1 |
| // CHECK64-NEXT: br i1 [[TMP8]], label [[AESDEC256KL_NO_ERROR_I:%.*]], label [[AESDEC256KL_ERROR_I:%.*]] |
| // CHECK64: aesdec256kl_no_error.i: |
| // CHECK64-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[TMP3]], align 16 |
| // CHECK64-NEXT: br label [[_MM_AESDEC256KL_U8_EXIT:%.*]] |
| // CHECK64: aesdec256kl_error.i: |
| // CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP3]], align 16 |
| // CHECK64-NEXT: br label [[_MM_AESDEC256KL_U8_EXIT]] |
| // CHECK64: _mm_aesdec256kl_u8.exit: |
| // CHECK64-NEXT: [[TMP10:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 0 |
| // CHECK64-NEXT: ret i8 [[TMP10]] |
| // |
| // CHECK32-LABEL: @test_mm_aesdec256kl_u8( |
| // CHECK32-NEXT: entry: |
| // CHECK32-NEXT: [[__ODATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 4 |
| // CHECK32-NEXT: [[__IDATA_ADDR_I:%.*]] = alloca <2 x i64>, align 16 |
| // CHECK32-NEXT: [[__H_ADDR_I:%.*]] = alloca i8*, align 4 |
| // CHECK32-NEXT: [[ODATA_ADDR:%.*]] = alloca <2 x i64>*, align 4 |
| // CHECK32-NEXT: [[IDATA_ADDR:%.*]] = alloca <2 x i64>, align 16 |
| // CHECK32-NEXT: [[H_ADDR:%.*]] = alloca i8*, align 4 |
| // CHECK32-NEXT: store <2 x i64>* [[ODATA:%.*]], <2 x i64>** [[ODATA_ADDR]], align 4 |
| // CHECK32-NEXT: store <2 x i64> [[IDATA:%.*]], <2 x i64>* [[IDATA_ADDR]], align 16 |
| // CHECK32-NEXT: store i8* [[H:%.*]], i8** [[H_ADDR]], align 4 |
| // CHECK32-NEXT: [[TMP0:%.*]] = load <2 x i64>*, <2 x i64>** [[ODATA_ADDR]], align 4 |
| // CHECK32-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[IDATA_ADDR]], align 16 |
| // CHECK32-NEXT: [[TMP2:%.*]] = load i8*, i8** [[H_ADDR]], align 4 |
| // CHECK32-NEXT: store <2 x i64>* [[TMP0]], <2 x i64>** [[__ODATA_ADDR_I]], align 4 |
| // CHECK32-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[__IDATA_ADDR_I]], align 16 |
| // CHECK32-NEXT: store i8* [[TMP2]], i8** [[__H_ADDR_I]], align 4 |
| // CHECK32-NEXT: [[TMP3:%.*]] = load <2 x i64>*, <2 x i64>** [[__ODATA_ADDR_I]], align 4 |
| // CHECK32-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[__IDATA_ADDR_I]], align 16 |
| // CHECK32-NEXT: [[TMP5:%.*]] = load i8*, i8** [[__H_ADDR_I]], align 4 |
| // CHECK32-NEXT: [[TMP6:%.*]] = call { i8, <2 x i64> } @llvm.x86.aesdec256kl(<2 x i64> [[TMP4]], i8* [[TMP5]]) #[[ATTR1]] |
| // CHECK32-NEXT: [[TMP7:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 0 |
| // CHECK32-NEXT: [[TMP8:%.*]] = trunc i8 [[TMP7]] to i1 |
| // CHECK32-NEXT: [[TMP9:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 1 |
| // CHECK32-NEXT: br i1 [[TMP8]], label [[AESDEC256KL_NO_ERROR_I:%.*]], label [[AESDEC256KL_ERROR_I:%.*]] |
| // CHECK32: aesdec256kl_no_error.i: |
| // CHECK32-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[TMP3]], align 16 |
| // CHECK32-NEXT: br label [[_MM_AESDEC256KL_U8_EXIT:%.*]] |
| // CHECK32: aesdec256kl_error.i: |
| // CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP3]], align 16 |
| // CHECK32-NEXT: br label [[_MM_AESDEC256KL_U8_EXIT]] |
| // CHECK32: _mm_aesdec256kl_u8.exit: |
| // CHECK32-NEXT: [[TMP10:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 0 |
| // CHECK32-NEXT: ret i8 [[TMP10]] |
| // |
| unsigned char test_mm_aesdec256kl_u8(__m128i *odata, __m128i idata, const void *h) { |
| return _mm_aesdec256kl_u8(odata, idata, h); |
| } |
| |
| // CHECK64-LABEL: @test_mm_aesenc128kl_u8( |
| // CHECK64-NEXT: entry: |
| // CHECK64-NEXT: [[__ODATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 8 |
| // CHECK64-NEXT: [[__IDATA_ADDR_I:%.*]] = alloca <2 x i64>, align 16 |
| // CHECK64-NEXT: [[__H_ADDR_I:%.*]] = alloca i8*, align 8 |
| // CHECK64-NEXT: [[ODATA_ADDR:%.*]] = alloca <2 x i64>*, align 8 |
| // CHECK64-NEXT: [[IDATA_ADDR:%.*]] = alloca <2 x i64>, align 16 |
| // CHECK64-NEXT: [[H_ADDR:%.*]] = alloca i8*, align 8 |
| // CHECK64-NEXT: store <2 x i64>* [[ODATA:%.*]], <2 x i64>** [[ODATA_ADDR]], align 8 |
| // CHECK64-NEXT: store <2 x i64> [[IDATA:%.*]], <2 x i64>* [[IDATA_ADDR]], align 16 |
| // CHECK64-NEXT: store i8* [[H:%.*]], i8** [[H_ADDR]], align 8 |
| // CHECK64-NEXT: [[TMP0:%.*]] = load <2 x i64>*, <2 x i64>** [[ODATA_ADDR]], align 8 |
| // CHECK64-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[IDATA_ADDR]], align 16 |
| // CHECK64-NEXT: [[TMP2:%.*]] = load i8*, i8** [[H_ADDR]], align 8 |
| // CHECK64-NEXT: store <2 x i64>* [[TMP0]], <2 x i64>** [[__ODATA_ADDR_I]], align 8 |
| // CHECK64-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[__IDATA_ADDR_I]], align 16 |
| // CHECK64-NEXT: store i8* [[TMP2]], i8** [[__H_ADDR_I]], align 8 |
| // CHECK64-NEXT: [[TMP3:%.*]] = load <2 x i64>*, <2 x i64>** [[__ODATA_ADDR_I]], align 8 |
| // CHECK64-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[__IDATA_ADDR_I]], align 16 |
| // CHECK64-NEXT: [[TMP5:%.*]] = load i8*, i8** [[__H_ADDR_I]], align 8 |
| // CHECK64-NEXT: [[TMP6:%.*]] = call { i8, <2 x i64> } @llvm.x86.aesenc128kl(<2 x i64> [[TMP4]], i8* [[TMP5]]) #[[ATTR1]] |
| // CHECK64-NEXT: [[TMP7:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 0 |
| // CHECK64-NEXT: [[TMP8:%.*]] = trunc i8 [[TMP7]] to i1 |
| // CHECK64-NEXT: [[TMP9:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 1 |
| // CHECK64-NEXT: br i1 [[TMP8]], label [[AESENC128KL_NO_ERROR_I:%.*]], label [[AESENC128KL_ERROR_I:%.*]] |
| // CHECK64: aesenc128kl_no_error.i: |
| // CHECK64-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[TMP3]], align 16 |
| // CHECK64-NEXT: br label [[_MM_AESENC128KL_U8_EXIT:%.*]] |
| // CHECK64: aesenc128kl_error.i: |
| // CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP3]], align 16 |
| // CHECK64-NEXT: br label [[_MM_AESENC128KL_U8_EXIT]] |
| // CHECK64: _mm_aesenc128kl_u8.exit: |
| // CHECK64-NEXT: [[TMP10:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 0 |
| // CHECK64-NEXT: ret i8 [[TMP10]] |
| // |
| // CHECK32-LABEL: @test_mm_aesenc128kl_u8( |
| // CHECK32-NEXT: entry: |
| // CHECK32-NEXT: [[__ODATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 4 |
| // CHECK32-NEXT: [[__IDATA_ADDR_I:%.*]] = alloca <2 x i64>, align 16 |
| // CHECK32-NEXT: [[__H_ADDR_I:%.*]] = alloca i8*, align 4 |
| // CHECK32-NEXT: [[ODATA_ADDR:%.*]] = alloca <2 x i64>*, align 4 |
| // CHECK32-NEXT: [[IDATA_ADDR:%.*]] = alloca <2 x i64>, align 16 |
| // CHECK32-NEXT: [[H_ADDR:%.*]] = alloca i8*, align 4 |
| // CHECK32-NEXT: store <2 x i64>* [[ODATA:%.*]], <2 x i64>** [[ODATA_ADDR]], align 4 |
| // CHECK32-NEXT: store <2 x i64> [[IDATA:%.*]], <2 x i64>* [[IDATA_ADDR]], align 16 |
| // CHECK32-NEXT: store i8* [[H:%.*]], i8** [[H_ADDR]], align 4 |
| // CHECK32-NEXT: [[TMP0:%.*]] = load <2 x i64>*, <2 x i64>** [[ODATA_ADDR]], align 4 |
| // CHECK32-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[IDATA_ADDR]], align 16 |
| // CHECK32-NEXT: [[TMP2:%.*]] = load i8*, i8** [[H_ADDR]], align 4 |
| // CHECK32-NEXT: store <2 x i64>* [[TMP0]], <2 x i64>** [[__ODATA_ADDR_I]], align 4 |
| // CHECK32-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[__IDATA_ADDR_I]], align 16 |
| // CHECK32-NEXT: store i8* [[TMP2]], i8** [[__H_ADDR_I]], align 4 |
| // CHECK32-NEXT: [[TMP3:%.*]] = load <2 x i64>*, <2 x i64>** [[__ODATA_ADDR_I]], align 4 |
| // CHECK32-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[__IDATA_ADDR_I]], align 16 |
| // CHECK32-NEXT: [[TMP5:%.*]] = load i8*, i8** [[__H_ADDR_I]], align 4 |
| // CHECK32-NEXT: [[TMP6:%.*]] = call { i8, <2 x i64> } @llvm.x86.aesenc128kl(<2 x i64> [[TMP4]], i8* [[TMP5]]) #[[ATTR1]] |
| // CHECK32-NEXT: [[TMP7:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 0 |
| // CHECK32-NEXT: [[TMP8:%.*]] = trunc i8 [[TMP7]] to i1 |
| // CHECK32-NEXT: [[TMP9:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 1 |
| // CHECK32-NEXT: br i1 [[TMP8]], label [[AESENC128KL_NO_ERROR_I:%.*]], label [[AESENC128KL_ERROR_I:%.*]] |
| // CHECK32: aesenc128kl_no_error.i: |
| // CHECK32-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[TMP3]], align 16 |
| // CHECK32-NEXT: br label [[_MM_AESENC128KL_U8_EXIT:%.*]] |
| // CHECK32: aesenc128kl_error.i: |
| // CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP3]], align 16 |
| // CHECK32-NEXT: br label [[_MM_AESENC128KL_U8_EXIT]] |
| // CHECK32: _mm_aesenc128kl_u8.exit: |
| // CHECK32-NEXT: [[TMP10:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 0 |
| // CHECK32-NEXT: ret i8 [[TMP10]] |
| // |
| unsigned char test_mm_aesenc128kl_u8(__m128i *odata, __m128i idata, const void *h) { |
| return _mm_aesenc128kl_u8(odata, idata, h); |
| } |
| |
| // CHECK64-LABEL: @test_mm_aesdec128kl_u8( |
| // CHECK64-NEXT: entry: |
| // CHECK64-NEXT: [[__ODATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 8 |
| // CHECK64-NEXT: [[__IDATA_ADDR_I:%.*]] = alloca <2 x i64>, align 16 |
| // CHECK64-NEXT: [[__H_ADDR_I:%.*]] = alloca i8*, align 8 |
| // CHECK64-NEXT: [[ODATA_ADDR:%.*]] = alloca <2 x i64>*, align 8 |
| // CHECK64-NEXT: [[IDATA_ADDR:%.*]] = alloca <2 x i64>, align 16 |
| // CHECK64-NEXT: [[H_ADDR:%.*]] = alloca i8*, align 8 |
| // CHECK64-NEXT: store <2 x i64>* [[ODATA:%.*]], <2 x i64>** [[ODATA_ADDR]], align 8 |
| // CHECK64-NEXT: store <2 x i64> [[IDATA:%.*]], <2 x i64>* [[IDATA_ADDR]], align 16 |
| // CHECK64-NEXT: store i8* [[H:%.*]], i8** [[H_ADDR]], align 8 |
| // CHECK64-NEXT: [[TMP0:%.*]] = load <2 x i64>*, <2 x i64>** [[ODATA_ADDR]], align 8 |
| // CHECK64-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[IDATA_ADDR]], align 16 |
| // CHECK64-NEXT: [[TMP2:%.*]] = load i8*, i8** [[H_ADDR]], align 8 |
| // CHECK64-NEXT: store <2 x i64>* [[TMP0]], <2 x i64>** [[__ODATA_ADDR_I]], align 8 |
| // CHECK64-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[__IDATA_ADDR_I]], align 16 |
| // CHECK64-NEXT: store i8* [[TMP2]], i8** [[__H_ADDR_I]], align 8 |
| // CHECK64-NEXT: [[TMP3:%.*]] = load <2 x i64>*, <2 x i64>** [[__ODATA_ADDR_I]], align 8 |
| // CHECK64-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[__IDATA_ADDR_I]], align 16 |
| // CHECK64-NEXT: [[TMP5:%.*]] = load i8*, i8** [[__H_ADDR_I]], align 8 |
| // CHECK64-NEXT: [[TMP6:%.*]] = call { i8, <2 x i64> } @llvm.x86.aesdec128kl(<2 x i64> [[TMP4]], i8* [[TMP5]]) #[[ATTR1]] |
| // CHECK64-NEXT: [[TMP7:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 0 |
| // CHECK64-NEXT: [[TMP8:%.*]] = trunc i8 [[TMP7]] to i1 |
| // CHECK64-NEXT: [[TMP9:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 1 |
| // CHECK64-NEXT: br i1 [[TMP8]], label [[AESDEC128KL_NO_ERROR_I:%.*]], label [[AESDEC128KL_ERROR_I:%.*]] |
| // CHECK64: aesdec128kl_no_error.i: |
| // CHECK64-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[TMP3]], align 16 |
| // CHECK64-NEXT: br label [[_MM_AESDEC128KL_U8_EXIT:%.*]] |
| // CHECK64: aesdec128kl_error.i: |
| // CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP3]], align 16 |
| // CHECK64-NEXT: br label [[_MM_AESDEC128KL_U8_EXIT]] |
| // CHECK64: _mm_aesdec128kl_u8.exit: |
| // CHECK64-NEXT: [[TMP10:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 0 |
| // CHECK64-NEXT: ret i8 [[TMP10]] |
| // |
| // CHECK32-LABEL: @test_mm_aesdec128kl_u8( |
| // CHECK32-NEXT: entry: |
| // CHECK32-NEXT: [[__ODATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 4 |
| // CHECK32-NEXT: [[__IDATA_ADDR_I:%.*]] = alloca <2 x i64>, align 16 |
| // CHECK32-NEXT: [[__H_ADDR_I:%.*]] = alloca i8*, align 4 |
| // CHECK32-NEXT: [[ODATA_ADDR:%.*]] = alloca <2 x i64>*, align 4 |
| // CHECK32-NEXT: [[IDATA_ADDR:%.*]] = alloca <2 x i64>, align 16 |
| // CHECK32-NEXT: [[H_ADDR:%.*]] = alloca i8*, align 4 |
| // CHECK32-NEXT: store <2 x i64>* [[ODATA:%.*]], <2 x i64>** [[ODATA_ADDR]], align 4 |
| // CHECK32-NEXT: store <2 x i64> [[IDATA:%.*]], <2 x i64>* [[IDATA_ADDR]], align 16 |
| // CHECK32-NEXT: store i8* [[H:%.*]], i8** [[H_ADDR]], align 4 |
| // CHECK32-NEXT: [[TMP0:%.*]] = load <2 x i64>*, <2 x i64>** [[ODATA_ADDR]], align 4 |
| // CHECK32-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[IDATA_ADDR]], align 16 |
| // CHECK32-NEXT: [[TMP2:%.*]] = load i8*, i8** [[H_ADDR]], align 4 |
| // CHECK32-NEXT: store <2 x i64>* [[TMP0]], <2 x i64>** [[__ODATA_ADDR_I]], align 4 |
| // CHECK32-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[__IDATA_ADDR_I]], align 16 |
| // CHECK32-NEXT: store i8* [[TMP2]], i8** [[__H_ADDR_I]], align 4 |
| // CHECK32-NEXT: [[TMP3:%.*]] = load <2 x i64>*, <2 x i64>** [[__ODATA_ADDR_I]], align 4 |
| // CHECK32-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[__IDATA_ADDR_I]], align 16 |
| // CHECK32-NEXT: [[TMP5:%.*]] = load i8*, i8** [[__H_ADDR_I]], align 4 |
| // CHECK32-NEXT: [[TMP6:%.*]] = call { i8, <2 x i64> } @llvm.x86.aesdec128kl(<2 x i64> [[TMP4]], i8* [[TMP5]]) #[[ATTR1]] |
| // CHECK32-NEXT: [[TMP7:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 0 |
| // CHECK32-NEXT: [[TMP8:%.*]] = trunc i8 [[TMP7]] to i1 |
| // CHECK32-NEXT: [[TMP9:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 1 |
| // CHECK32-NEXT: br i1 [[TMP8]], label [[AESDEC128KL_NO_ERROR_I:%.*]], label [[AESDEC128KL_ERROR_I:%.*]] |
| // CHECK32: aesdec128kl_no_error.i: |
| // CHECK32-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[TMP3]], align 16 |
| // CHECK32-NEXT: br label [[_MM_AESDEC128KL_U8_EXIT:%.*]] |
| // CHECK32: aesdec128kl_error.i: |
| // CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP3]], align 16 |
| // CHECK32-NEXT: br label [[_MM_AESDEC128KL_U8_EXIT]] |
| // CHECK32: _mm_aesdec128kl_u8.exit: |
| // CHECK32-NEXT: [[TMP10:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 0 |
| // CHECK32-NEXT: ret i8 [[TMP10]] |
| // |
| unsigned char test_mm_aesdec128kl_u8(__m128i *odata, __m128i idata, const void *h) { |
| return _mm_aesdec128kl_u8(odata, idata, h); |
| } |
| |
| // CHECK64-LABEL: @test__mm_aesencwide128kl_u8( |
| // CHECK64-NEXT: entry: |
| // CHECK64-NEXT: [[__ODATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 8 |
| // CHECK64-NEXT: [[__IDATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 8 |
| // CHECK64-NEXT: [[__H_ADDR_I:%.*]] = alloca i8*, align 8 |
| // CHECK64-NEXT: [[ODATA_ADDR:%.*]] = alloca <2 x i64>*, align 8 |
| // CHECK64-NEXT: [[IDATA_ADDR:%.*]] = alloca <2 x i64>*, align 8 |
| // CHECK64-NEXT: [[H_ADDR:%.*]] = alloca i8*, align 8 |
| // CHECK64-NEXT: store <2 x i64>* [[ODATA:%.*]], <2 x i64>** [[ODATA_ADDR]], align 8 |
| // CHECK64-NEXT: store <2 x i64>* [[IDATA:%.*]], <2 x i64>** [[IDATA_ADDR]], align 8 |
| // CHECK64-NEXT: store i8* [[H:%.*]], i8** [[H_ADDR]], align 8 |
| // CHECK64-NEXT: [[TMP0:%.*]] = load <2 x i64>*, <2 x i64>** [[ODATA_ADDR]], align 8 |
| // CHECK64-NEXT: [[TMP1:%.*]] = load <2 x i64>*, <2 x i64>** [[IDATA_ADDR]], align 8 |
| // CHECK64-NEXT: [[TMP2:%.*]] = load i8*, i8** [[H_ADDR]], align 8 |
| // CHECK64-NEXT: store <2 x i64>* [[TMP0]], <2 x i64>** [[__ODATA_ADDR_I]], align 8 |
| // CHECK64-NEXT: store <2 x i64>* [[TMP1]], <2 x i64>** [[__IDATA_ADDR_I]], align 8 |
| // CHECK64-NEXT: store i8* [[TMP2]], i8** [[__H_ADDR_I]], align 8 |
| // CHECK64-NEXT: [[TMP3:%.*]] = load <2 x i64>*, <2 x i64>** [[__ODATA_ADDR_I]], align 8 |
| // CHECK64-NEXT: [[TMP4:%.*]] = load <2 x i64>*, <2 x i64>** [[__IDATA_ADDR_I]], align 8 |
| // CHECK64-NEXT: [[TMP5:%.*]] = load i8*, i8** [[__H_ADDR_I]], align 8 |
| // CHECK64-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[TMP4]], align 16 |
| // CHECK64-NEXT: [[TMP7:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 1 |
| // CHECK64-NEXT: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* [[TMP7]], align 16 |
| // CHECK64-NEXT: [[TMP9:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 2 |
| // CHECK64-NEXT: [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* [[TMP9]], align 16 |
| // CHECK64-NEXT: [[TMP11:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 3 |
| // CHECK64-NEXT: [[TMP12:%.*]] = load <2 x i64>, <2 x i64>* [[TMP11]], align 16 |
| // CHECK64-NEXT: [[TMP13:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 4 |
| // CHECK64-NEXT: [[TMP14:%.*]] = load <2 x i64>, <2 x i64>* [[TMP13]], align 16 |
| // CHECK64-NEXT: [[TMP15:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 5 |
| // CHECK64-NEXT: [[TMP16:%.*]] = load <2 x i64>, <2 x i64>* [[TMP15]], align 16 |
| // CHECK64-NEXT: [[TMP17:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 6 |
| // CHECK64-NEXT: [[TMP18:%.*]] = load <2 x i64>, <2 x i64>* [[TMP17]], align 16 |
| // CHECK64-NEXT: [[TMP19:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 7 |
| // CHECK64-NEXT: [[TMP20:%.*]] = load <2 x i64>, <2 x i64>* [[TMP19]], align 16 |
| // CHECK64-NEXT: [[TMP21:%.*]] = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(i8* [[TMP5]], <2 x i64> [[TMP6]], <2 x i64> [[TMP8]], <2 x i64> [[TMP10]], <2 x i64> [[TMP12]], <2 x i64> [[TMP14]], <2 x i64> [[TMP16]], <2 x i64> [[TMP18]], <2 x i64> [[TMP20]]) #[[ATTR1]] |
| // CHECK64-NEXT: [[TMP22:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 0 |
| // CHECK64-NEXT: [[TMP23:%.*]] = trunc i8 [[TMP22]] to i1 |
| // CHECK64-NEXT: br i1 [[TMP23]], label [[AESENCWIDE128KL_NO_ERROR_I:%.*]], label [[AESENCWIDE128KL_ERROR_I:%.*]] |
| // CHECK64: aesencwide128kl_no_error.i: |
| // CHECK64-NEXT: [[TMP24:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 1 |
| // CHECK64-NEXT: store <2 x i64> [[TMP24]], <2 x i64>* [[TMP3]], align 16 |
| // CHECK64-NEXT: [[TMP25:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 2 |
| // CHECK64-NEXT: [[TMP26:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 1 |
| // CHECK64-NEXT: store <2 x i64> [[TMP25]], <2 x i64>* [[TMP26]], align 16 |
| // CHECK64-NEXT: [[TMP27:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 3 |
| // CHECK64-NEXT: [[TMP28:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 2 |
| // CHECK64-NEXT: store <2 x i64> [[TMP27]], <2 x i64>* [[TMP28]], align 16 |
| // CHECK64-NEXT: [[TMP29:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 4 |
| // CHECK64-NEXT: [[TMP30:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 3 |
| // CHECK64-NEXT: store <2 x i64> [[TMP29]], <2 x i64>* [[TMP30]], align 16 |
| // CHECK64-NEXT: [[TMP31:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 5 |
| // CHECK64-NEXT: [[TMP32:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 4 |
| // CHECK64-NEXT: store <2 x i64> [[TMP31]], <2 x i64>* [[TMP32]], align 16 |
| // CHECK64-NEXT: [[TMP33:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 6 |
| // CHECK64-NEXT: [[TMP34:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 5 |
| // CHECK64-NEXT: store <2 x i64> [[TMP33]], <2 x i64>* [[TMP34]], align 16 |
| // CHECK64-NEXT: [[TMP35:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 7 |
| // CHECK64-NEXT: [[TMP36:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 6 |
| // CHECK64-NEXT: store <2 x i64> [[TMP35]], <2 x i64>* [[TMP36]], align 16 |
| // CHECK64-NEXT: [[TMP37:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 8 |
| // CHECK64-NEXT: [[TMP38:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 7 |
| // CHECK64-NEXT: store <2 x i64> [[TMP37]], <2 x i64>* [[TMP38]], align 16 |
| // CHECK64-NEXT: br label [[_MM_AESENCWIDE128KL_U8_EXIT:%.*]] |
| // CHECK64: aesencwide128kl_error.i: |
| // CHECK64-NEXT: [[TMP39:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 1 |
| // CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP3]], align 16 |
| // CHECK64-NEXT: [[TMP40:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 2 |
| // CHECK64-NEXT: [[TMP41:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 1 |
| // CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP41]], align 16 |
| // CHECK64-NEXT: [[TMP42:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 3 |
| // CHECK64-NEXT: [[TMP43:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 2 |
| // CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP43]], align 16 |
| // CHECK64-NEXT: [[TMP44:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 4 |
| // CHECK64-NEXT: [[TMP45:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 3 |
| // CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP45]], align 16 |
| // CHECK64-NEXT: [[TMP46:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 5 |
| // CHECK64-NEXT: [[TMP47:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 4 |
| // CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP47]], align 16 |
| // CHECK64-NEXT: [[TMP48:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 6 |
| // CHECK64-NEXT: [[TMP49:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 5 |
| // CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP49]], align 16 |
| // CHECK64-NEXT: [[TMP50:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 7 |
| // CHECK64-NEXT: [[TMP51:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 6 |
| // CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP51]], align 16 |
| // CHECK64-NEXT: [[TMP52:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 8 |
| // CHECK64-NEXT: [[TMP53:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 7 |
| // CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP53]], align 16 |
| // CHECK64-NEXT: br label [[_MM_AESENCWIDE128KL_U8_EXIT]] |
| // CHECK64: _mm_aesencwide128kl_u8.exit: |
| // CHECK64-NEXT: [[TMP54:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 0 |
| // CHECK64-NEXT: ret i8 [[TMP54]] |
| // |
| // CHECK32-LABEL: @test__mm_aesencwide128kl_u8( |
| // CHECK32-NEXT: entry: |
| // CHECK32-NEXT: [[__ODATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 4 |
| // CHECK32-NEXT: [[__IDATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 4 |
| // CHECK32-NEXT: [[__H_ADDR_I:%.*]] = alloca i8*, align 4 |
| // CHECK32-NEXT: [[ODATA_ADDR:%.*]] = alloca <2 x i64>*, align 4 |
| // CHECK32-NEXT: [[IDATA_ADDR:%.*]] = alloca <2 x i64>*, align 4 |
| // CHECK32-NEXT: [[H_ADDR:%.*]] = alloca i8*, align 4 |
| // CHECK32-NEXT: store <2 x i64>* [[ODATA:%.*]], <2 x i64>** [[ODATA_ADDR]], align 4 |
| // CHECK32-NEXT: store <2 x i64>* [[IDATA:%.*]], <2 x i64>** [[IDATA_ADDR]], align 4 |
| // CHECK32-NEXT: store i8* [[H:%.*]], i8** [[H_ADDR]], align 4 |
| // CHECK32-NEXT: [[TMP0:%.*]] = load <2 x i64>*, <2 x i64>** [[ODATA_ADDR]], align 4 |
| // CHECK32-NEXT: [[TMP1:%.*]] = load <2 x i64>*, <2 x i64>** [[IDATA_ADDR]], align 4 |
| // CHECK32-NEXT: [[TMP2:%.*]] = load i8*, i8** [[H_ADDR]], align 4 |
| // CHECK32-NEXT: store <2 x i64>* [[TMP0]], <2 x i64>** [[__ODATA_ADDR_I]], align 4 |
| // CHECK32-NEXT: store <2 x i64>* [[TMP1]], <2 x i64>** [[__IDATA_ADDR_I]], align 4 |
| // CHECK32-NEXT: store i8* [[TMP2]], i8** [[__H_ADDR_I]], align 4 |
| // CHECK32-NEXT: [[TMP3:%.*]] = load <2 x i64>*, <2 x i64>** [[__ODATA_ADDR_I]], align 4 |
| // CHECK32-NEXT: [[TMP4:%.*]] = load <2 x i64>*, <2 x i64>** [[__IDATA_ADDR_I]], align 4 |
| // CHECK32-NEXT: [[TMP5:%.*]] = load i8*, i8** [[__H_ADDR_I]], align 4 |
| // CHECK32-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[TMP4]], align 16 |
| // CHECK32-NEXT: [[TMP7:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 1 |
| // CHECK32-NEXT: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* [[TMP7]], align 16 |
| // CHECK32-NEXT: [[TMP9:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 2 |
| // CHECK32-NEXT: [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* [[TMP9]], align 16 |
| // CHECK32-NEXT: [[TMP11:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 3 |
| // CHECK32-NEXT: [[TMP12:%.*]] = load <2 x i64>, <2 x i64>* [[TMP11]], align 16 |
| // CHECK32-NEXT: [[TMP13:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 4 |
| // CHECK32-NEXT: [[TMP14:%.*]] = load <2 x i64>, <2 x i64>* [[TMP13]], align 16 |
| // CHECK32-NEXT: [[TMP15:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 5 |
| // CHECK32-NEXT: [[TMP16:%.*]] = load <2 x i64>, <2 x i64>* [[TMP15]], align 16 |
| // CHECK32-NEXT: [[TMP17:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 6 |
| // CHECK32-NEXT: [[TMP18:%.*]] = load <2 x i64>, <2 x i64>* [[TMP17]], align 16 |
| // CHECK32-NEXT: [[TMP19:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 7 |
| // CHECK32-NEXT: [[TMP20:%.*]] = load <2 x i64>, <2 x i64>* [[TMP19]], align 16 |
| // CHECK32-NEXT: [[TMP21:%.*]] = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(i8* [[TMP5]], <2 x i64> [[TMP6]], <2 x i64> [[TMP8]], <2 x i64> [[TMP10]], <2 x i64> [[TMP12]], <2 x i64> [[TMP14]], <2 x i64> [[TMP16]], <2 x i64> [[TMP18]], <2 x i64> [[TMP20]]) #[[ATTR1]] |
| // CHECK32-NEXT: [[TMP22:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 0 |
| // CHECK32-NEXT: [[TMP23:%.*]] = trunc i8 [[TMP22]] to i1 |
| // CHECK32-NEXT: br i1 [[TMP23]], label [[AESENCWIDE128KL_NO_ERROR_I:%.*]], label [[AESENCWIDE128KL_ERROR_I:%.*]] |
| // CHECK32: aesencwide128kl_no_error.i: |
| // CHECK32-NEXT: [[TMP24:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 1 |
| // CHECK32-NEXT: store <2 x i64> [[TMP24]], <2 x i64>* [[TMP3]], align 16 |
| // CHECK32-NEXT: [[TMP25:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 2 |
| // CHECK32-NEXT: [[TMP26:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 1 |
| // CHECK32-NEXT: store <2 x i64> [[TMP25]], <2 x i64>* [[TMP26]], align 16 |
| // CHECK32-NEXT: [[TMP27:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 3 |
| // CHECK32-NEXT: [[TMP28:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 2 |
| // CHECK32-NEXT: store <2 x i64> [[TMP27]], <2 x i64>* [[TMP28]], align 16 |
| // CHECK32-NEXT: [[TMP29:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 4 |
| // CHECK32-NEXT: [[TMP30:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 3 |
| // CHECK32-NEXT: store <2 x i64> [[TMP29]], <2 x i64>* [[TMP30]], align 16 |
| // CHECK32-NEXT: [[TMP31:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 5 |
| // CHECK32-NEXT: [[TMP32:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 4 |
| // CHECK32-NEXT: store <2 x i64> [[TMP31]], <2 x i64>* [[TMP32]], align 16 |
| // CHECK32-NEXT: [[TMP33:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 6 |
| // CHECK32-NEXT: [[TMP34:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 5 |
| // CHECK32-NEXT: store <2 x i64> [[TMP33]], <2 x i64>* [[TMP34]], align 16 |
| // CHECK32-NEXT: [[TMP35:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 7 |
| // CHECK32-NEXT: [[TMP36:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 6 |
| // CHECK32-NEXT: store <2 x i64> [[TMP35]], <2 x i64>* [[TMP36]], align 16 |
| // CHECK32-NEXT: [[TMP37:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 8 |
| // CHECK32-NEXT: [[TMP38:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 7 |
| // CHECK32-NEXT: store <2 x i64> [[TMP37]], <2 x i64>* [[TMP38]], align 16 |
| // CHECK32-NEXT: br label [[_MM_AESENCWIDE128KL_U8_EXIT:%.*]] |
| // CHECK32: aesencwide128kl_error.i: |
| // CHECK32-NEXT: [[TMP39:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 1 |
| // CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP3]], align 16 |
| // CHECK32-NEXT: [[TMP40:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 2 |
| // CHECK32-NEXT: [[TMP41:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 1 |
| // CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP41]], align 16 |
| // CHECK32-NEXT: [[TMP42:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 3 |
| // CHECK32-NEXT: [[TMP43:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 2 |
| // CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP43]], align 16 |
| // CHECK32-NEXT: [[TMP44:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 4 |
| // CHECK32-NEXT: [[TMP45:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 3 |
| // CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP45]], align 16 |
| // CHECK32-NEXT: [[TMP46:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 5 |
| // CHECK32-NEXT: [[TMP47:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 4 |
| // CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP47]], align 16 |
| // CHECK32-NEXT: [[TMP48:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 6 |
| // CHECK32-NEXT: [[TMP49:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 5 |
| // CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP49]], align 16 |
| // CHECK32-NEXT: [[TMP50:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 7 |
| // CHECK32-NEXT: [[TMP51:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 6 |
| // CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP51]], align 16 |
| // CHECK32-NEXT: [[TMP52:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 8 |
| // CHECK32-NEXT: [[TMP53:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 7 |
| // CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP53]], align 16 |
| // CHECK32-NEXT: br label [[_MM_AESENCWIDE128KL_U8_EXIT]] |
| // CHECK32: _mm_aesencwide128kl_u8.exit: |
| // CHECK32-NEXT: [[TMP54:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 0 |
| // CHECK32-NEXT: ret i8 [[TMP54]] |
| // |
| unsigned char test__mm_aesencwide128kl_u8(__m128i odata[8], const __m128i idata[8], const void* h) { |
| return _mm_aesencwide128kl_u8(odata, idata, h); |
| } |
| |
| // CHECK64-LABEL: @test__mm_aesdecwide128kl_u8( |
| // CHECK64-NEXT: entry: |
| // CHECK64-NEXT: [[__ODATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 8 |
| // CHECK64-NEXT: [[__IDATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 8 |
| // CHECK64-NEXT: [[__H_ADDR_I:%.*]] = alloca i8*, align 8 |
| // CHECK64-NEXT: [[ODATA_ADDR:%.*]] = alloca <2 x i64>*, align 8 |
| // CHECK64-NEXT: [[IDATA_ADDR:%.*]] = alloca <2 x i64>*, align 8 |
| // CHECK64-NEXT: [[H_ADDR:%.*]] = alloca i8*, align 8 |
| // CHECK64-NEXT: store <2 x i64>* [[ODATA:%.*]], <2 x i64>** [[ODATA_ADDR]], align 8 |
| // CHECK64-NEXT: store <2 x i64>* [[IDATA:%.*]], <2 x i64>** [[IDATA_ADDR]], align 8 |
| // CHECK64-NEXT: store i8* [[H:%.*]], i8** [[H_ADDR]], align 8 |
| // CHECK64-NEXT: [[TMP0:%.*]] = load <2 x i64>*, <2 x i64>** [[ODATA_ADDR]], align 8 |
| // CHECK64-NEXT: [[TMP1:%.*]] = load <2 x i64>*, <2 x i64>** [[IDATA_ADDR]], align 8 |
| // CHECK64-NEXT: [[TMP2:%.*]] = load i8*, i8** [[H_ADDR]], align 8 |
| // CHECK64-NEXT: store <2 x i64>* [[TMP0]], <2 x i64>** [[__ODATA_ADDR_I]], align 8 |
| // CHECK64-NEXT: store <2 x i64>* [[TMP1]], <2 x i64>** [[__IDATA_ADDR_I]], align 8 |
| // CHECK64-NEXT: store i8* [[TMP2]], i8** [[__H_ADDR_I]], align 8 |
| // CHECK64-NEXT: [[TMP3:%.*]] = load <2 x i64>*, <2 x i64>** [[__ODATA_ADDR_I]], align 8 |
| // CHECK64-NEXT: [[TMP4:%.*]] = load <2 x i64>*, <2 x i64>** [[__IDATA_ADDR_I]], align 8 |
| // CHECK64-NEXT: [[TMP5:%.*]] = load i8*, i8** [[__H_ADDR_I]], align 8 |
| // CHECK64-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[TMP4]], align 16 |
| // CHECK64-NEXT: [[TMP7:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 1 |
| // CHECK64-NEXT: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* [[TMP7]], align 16 |
| // CHECK64-NEXT: [[TMP9:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 2 |
| // CHECK64-NEXT: [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* [[TMP9]], align 16 |
| // CHECK64-NEXT: [[TMP11:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 3 |
| // CHECK64-NEXT: [[TMP12:%.*]] = load <2 x i64>, <2 x i64>* [[TMP11]], align 16 |
| // CHECK64-NEXT: [[TMP13:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 4 |
| // CHECK64-NEXT: [[TMP14:%.*]] = load <2 x i64>, <2 x i64>* [[TMP13]], align 16 |
| // CHECK64-NEXT: [[TMP15:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 5 |
| // CHECK64-NEXT: [[TMP16:%.*]] = load <2 x i64>, <2 x i64>* [[TMP15]], align 16 |
| // CHECK64-NEXT: [[TMP17:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 6 |
| // CHECK64-NEXT: [[TMP18:%.*]] = load <2 x i64>, <2 x i64>* [[TMP17]], align 16 |
| // CHECK64-NEXT: [[TMP19:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 7 |
| // CHECK64-NEXT: [[TMP20:%.*]] = load <2 x i64>, <2 x i64>* [[TMP19]], align 16 |
| // CHECK64-NEXT: [[TMP21:%.*]] = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide128kl(i8* [[TMP5]], <2 x i64> [[TMP6]], <2 x i64> [[TMP8]], <2 x i64> [[TMP10]], <2 x i64> [[TMP12]], <2 x i64> [[TMP14]], <2 x i64> [[TMP16]], <2 x i64> [[TMP18]], <2 x i64> [[TMP20]]) #[[ATTR1]] |
| // CHECK64-NEXT: [[TMP22:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 0 |
| // CHECK64-NEXT: [[TMP23:%.*]] = trunc i8 [[TMP22]] to i1 |
| // CHECK64-NEXT: br i1 [[TMP23]], label [[AESDECWIDE128KL_NO_ERROR_I:%.*]], label [[AESDECWIDE128KL_ERROR_I:%.*]] |
| // CHECK64: aesdecwide128kl_no_error.i: |
| // CHECK64-NEXT: [[TMP24:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 1 |
| // CHECK64-NEXT: store <2 x i64> [[TMP24]], <2 x i64>* [[TMP3]], align 16 |
| // CHECK64-NEXT: [[TMP25:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 2 |
| // CHECK64-NEXT: [[TMP26:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 1 |
| // CHECK64-NEXT: store <2 x i64> [[TMP25]], <2 x i64>* [[TMP26]], align 16 |
| // CHECK64-NEXT: [[TMP27:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 3 |
| // CHECK64-NEXT: [[TMP28:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 2 |
| // CHECK64-NEXT: store <2 x i64> [[TMP27]], <2 x i64>* [[TMP28]], align 16 |
| // CHECK64-NEXT: [[TMP29:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 4 |
| // CHECK64-NEXT: [[TMP30:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 3 |
| // CHECK64-NEXT: store <2 x i64> [[TMP29]], <2 x i64>* [[TMP30]], align 16 |
| // CHECK64-NEXT: [[TMP31:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 5 |
| // CHECK64-NEXT: [[TMP32:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 4 |
| // CHECK64-NEXT: store <2 x i64> [[TMP31]], <2 x i64>* [[TMP32]], align 16 |
| // CHECK64-NEXT: [[TMP33:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 6 |
| // CHECK64-NEXT: [[TMP34:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 5 |
| // CHECK64-NEXT: store <2 x i64> [[TMP33]], <2 x i64>* [[TMP34]], align 16 |
| // CHECK64-NEXT: [[TMP35:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 7 |
| // CHECK64-NEXT: [[TMP36:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 6 |
| // CHECK64-NEXT: store <2 x i64> [[TMP35]], <2 x i64>* [[TMP36]], align 16 |
| // CHECK64-NEXT: [[TMP37:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 8 |
| // CHECK64-NEXT: [[TMP38:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 7 |
| // CHECK64-NEXT: store <2 x i64> [[TMP37]], <2 x i64>* [[TMP38]], align 16 |
| // CHECK64-NEXT: br label [[_MM_AESDECWIDE128KL_U8_EXIT:%.*]] |
| // CHECK64: aesdecwide128kl_error.i: |
| // CHECK64-NEXT: [[TMP39:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 1 |
| // CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP3]], align 16 |
| // CHECK64-NEXT: [[TMP40:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 2 |
| // CHECK64-NEXT: [[TMP41:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 1 |
| // CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP41]], align 16 |
| // CHECK64-NEXT: [[TMP42:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 3 |
| // CHECK64-NEXT: [[TMP43:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 2 |
| // CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP43]], align 16 |
| // CHECK64-NEXT: [[TMP44:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 4 |
| // CHECK64-NEXT: [[TMP45:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 3 |
| // CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP45]], align 16 |
| // CHECK64-NEXT: [[TMP46:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 5 |
| // CHECK64-NEXT: [[TMP47:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 4 |
| // CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP47]], align 16 |
| // CHECK64-NEXT: [[TMP48:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 6 |
| // CHECK64-NEXT: [[TMP49:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 5 |
| // CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP49]], align 16 |
| // CHECK64-NEXT: [[TMP50:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 7 |
| // CHECK64-NEXT: [[TMP51:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 6 |
| // CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP51]], align 16 |
| // CHECK64-NEXT: [[TMP52:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 8 |
| // CHECK64-NEXT: [[TMP53:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 7 |
| // CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP53]], align 16 |
| // CHECK64-NEXT: br label [[_MM_AESDECWIDE128KL_U8_EXIT]] |
| // CHECK64: _mm_aesdecwide128kl_u8.exit: |
| // CHECK64-NEXT: [[TMP54:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 0 |
| // CHECK64-NEXT: ret i8 [[TMP54]] |
| // |
| // CHECK32-LABEL: @test__mm_aesdecwide128kl_u8( |
| // CHECK32-NEXT: entry: |
| // CHECK32-NEXT: [[__ODATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 4 |
| // CHECK32-NEXT: [[__IDATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 4 |
| // CHECK32-NEXT: [[__H_ADDR_I:%.*]] = alloca i8*, align 4 |
| // CHECK32-NEXT: [[ODATA_ADDR:%.*]] = alloca <2 x i64>*, align 4 |
| // CHECK32-NEXT: [[IDATA_ADDR:%.*]] = alloca <2 x i64>*, align 4 |
| // CHECK32-NEXT: [[H_ADDR:%.*]] = alloca i8*, align 4 |
| // CHECK32-NEXT: store <2 x i64>* [[ODATA:%.*]], <2 x i64>** [[ODATA_ADDR]], align 4 |
| // CHECK32-NEXT: store <2 x i64>* [[IDATA:%.*]], <2 x i64>** [[IDATA_ADDR]], align 4 |
| // CHECK32-NEXT: store i8* [[H:%.*]], i8** [[H_ADDR]], align 4 |
| // CHECK32-NEXT: [[TMP0:%.*]] = load <2 x i64>*, <2 x i64>** [[ODATA_ADDR]], align 4 |
| // CHECK32-NEXT: [[TMP1:%.*]] = load <2 x i64>*, <2 x i64>** [[IDATA_ADDR]], align 4 |
| // CHECK32-NEXT: [[TMP2:%.*]] = load i8*, i8** [[H_ADDR]], align 4 |
| // CHECK32-NEXT: store <2 x i64>* [[TMP0]], <2 x i64>** [[__ODATA_ADDR_I]], align 4 |
| // CHECK32-NEXT: store <2 x i64>* [[TMP1]], <2 x i64>** [[__IDATA_ADDR_I]], align 4 |
| // CHECK32-NEXT: store i8* [[TMP2]], i8** [[__H_ADDR_I]], align 4 |
| // CHECK32-NEXT: [[TMP3:%.*]] = load <2 x i64>*, <2 x i64>** [[__ODATA_ADDR_I]], align 4 |
| // CHECK32-NEXT: [[TMP4:%.*]] = load <2 x i64>*, <2 x i64>** [[__IDATA_ADDR_I]], align 4 |
| // CHECK32-NEXT: [[TMP5:%.*]] = load i8*, i8** [[__H_ADDR_I]], align 4 |
| // CHECK32-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[TMP4]], align 16 |
| // CHECK32-NEXT: [[TMP7:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 1 |
| // CHECK32-NEXT: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* [[TMP7]], align 16 |
| // CHECK32-NEXT: [[TMP9:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 2 |
| // CHECK32-NEXT: [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* [[TMP9]], align 16 |
| // CHECK32-NEXT: [[TMP11:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 3 |
| // CHECK32-NEXT: [[TMP12:%.*]] = load <2 x i64>, <2 x i64>* [[TMP11]], align 16 |
| // CHECK32-NEXT: [[TMP13:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 4 |
| // CHECK32-NEXT: [[TMP14:%.*]] = load <2 x i64>, <2 x i64>* [[TMP13]], align 16 |
| // CHECK32-NEXT: [[TMP15:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 5 |
| // CHECK32-NEXT: [[TMP16:%.*]] = load <2 x i64>, <2 x i64>* [[TMP15]], align 16 |
| // CHECK32-NEXT: [[TMP17:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 6 |
| // CHECK32-NEXT: [[TMP18:%.*]] = load <2 x i64>, <2 x i64>* [[TMP17]], align 16 |
| // CHECK32-NEXT: [[TMP19:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 7 |
| // CHECK32-NEXT: [[TMP20:%.*]] = load <2 x i64>, <2 x i64>* [[TMP19]], align 16 |
| // CHECK32-NEXT: [[TMP21:%.*]] = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide128kl(i8* [[TMP5]], <2 x i64> [[TMP6]], <2 x i64> [[TMP8]], <2 x i64> [[TMP10]], <2 x i64> [[TMP12]], <2 x i64> [[TMP14]], <2 x i64> [[TMP16]], <2 x i64> [[TMP18]], <2 x i64> [[TMP20]]) #[[ATTR1]] |
| // CHECK32-NEXT: [[TMP22:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 0 |
| // CHECK32-NEXT: [[TMP23:%.*]] = trunc i8 [[TMP22]] to i1 |
| // CHECK32-NEXT: br i1 [[TMP23]], label [[AESDECWIDE128KL_NO_ERROR_I:%.*]], label [[AESDECWIDE128KL_ERROR_I:%.*]] |
| // CHECK32: aesdecwide128kl_no_error.i: |
| // CHECK32-NEXT: [[TMP24:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 1 |
| // CHECK32-NEXT: store <2 x i64> [[TMP24]], <2 x i64>* [[TMP3]], align 16 |
| // CHECK32-NEXT: [[TMP25:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 2 |
| // CHECK32-NEXT: [[TMP26:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 1 |
| // CHECK32-NEXT: store <2 x i64> [[TMP25]], <2 x i64>* [[TMP26]], align 16 |
| // CHECK32-NEXT: [[TMP27:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 3 |
| // CHECK32-NEXT: [[TMP28:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 2 |
| // CHECK32-NEXT: store <2 x i64> [[TMP27]], <2 x i64>* [[TMP28]], align 16 |
| // CHECK32-NEXT: [[TMP29:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 4 |
| // CHECK32-NEXT: [[TMP30:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 3 |
| // CHECK32-NEXT: store <2 x i64> [[TMP29]], <2 x i64>* [[TMP30]], align 16 |
| // CHECK32-NEXT: [[TMP31:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 5 |
| // CHECK32-NEXT: [[TMP32:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 4 |
| // CHECK32-NEXT: store <2 x i64> [[TMP31]], <2 x i64>* [[TMP32]], align 16 |
| // CHECK32-NEXT: [[TMP33:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 6 |
| // CHECK32-NEXT: [[TMP34:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 5 |
| // CHECK32-NEXT: store <2 x i64> [[TMP33]], <2 x i64>* [[TMP34]], align 16 |
| // CHECK32-NEXT: [[TMP35:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 7 |
| // CHECK32-NEXT: [[TMP36:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 6 |
| // CHECK32-NEXT: store <2 x i64> [[TMP35]], <2 x i64>* [[TMP36]], align 16 |
| // CHECK32-NEXT: [[TMP37:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 8 |
| // CHECK32-NEXT: [[TMP38:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 7 |
| // CHECK32-NEXT: store <2 x i64> [[TMP37]], <2 x i64>* [[TMP38]], align 16 |
| // CHECK32-NEXT: br label [[_MM_AESDECWIDE128KL_U8_EXIT:%.*]] |
| // CHECK32: aesdecwide128kl_error.i: |
| // CHECK32-NEXT: [[TMP39:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 1 |
| // CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP3]], align 16 |
| // CHECK32-NEXT: [[TMP40:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 2 |
| // CHECK32-NEXT: [[TMP41:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 1 |
| // CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP41]], align 16 |
| // CHECK32-NEXT: [[TMP42:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 3 |
| // CHECK32-NEXT: [[TMP43:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 2 |
| // CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP43]], align 16 |
| // CHECK32-NEXT: [[TMP44:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 4 |
| // CHECK32-NEXT: [[TMP45:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 3 |
| // CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP45]], align 16 |
| // CHECK32-NEXT: [[TMP46:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 5 |
| // CHECK32-NEXT: [[TMP47:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 4 |
| // CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP47]], align 16 |
| // CHECK32-NEXT: [[TMP48:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 6 |
| // CHECK32-NEXT: [[TMP49:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 5 |
| // CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP49]], align 16 |
| // CHECK32-NEXT: [[TMP50:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 7 |
| // CHECK32-NEXT: [[TMP51:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 6 |
| // CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP51]], align 16 |
| // CHECK32-NEXT: [[TMP52:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 8 |
| // CHECK32-NEXT: [[TMP53:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 7 |
| // CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP53]], align 16 |
| // CHECK32-NEXT: br label [[_MM_AESDECWIDE128KL_U8_EXIT]] |
| // CHECK32: _mm_aesdecwide128kl_u8.exit: |
| // CHECK32-NEXT: [[TMP54:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 0 |
| // CHECK32-NEXT: ret i8 [[TMP54]] |
| // |
| unsigned char test__mm_aesdecwide128kl_u8(__m128i odata[8], const __m128i idata[8], const void* h) { |
| return _mm_aesdecwide128kl_u8(odata, idata, h); |
| } |
| |
| // CHECK64-LABEL: @test__mm_aesencwide256kl_u8( |
| // CHECK64-NEXT: entry: |
| // CHECK64-NEXT: [[__ODATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 8 |
| // CHECK64-NEXT: [[__IDATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 8 |
| // CHECK64-NEXT: [[__H_ADDR_I:%.*]] = alloca i8*, align 8 |
| // CHECK64-NEXT: [[ODATA_ADDR:%.*]] = alloca <2 x i64>*, align 8 |
| // CHECK64-NEXT: [[IDATA_ADDR:%.*]] = alloca <2 x i64>*, align 8 |
| // CHECK64-NEXT: [[H_ADDR:%.*]] = alloca i8*, align 8 |
| // CHECK64-NEXT: store <2 x i64>* [[ODATA:%.*]], <2 x i64>** [[ODATA_ADDR]], align 8 |
| // CHECK64-NEXT: store <2 x i64>* [[IDATA:%.*]], <2 x i64>** [[IDATA_ADDR]], align 8 |
| // CHECK64-NEXT: store i8* [[H:%.*]], i8** [[H_ADDR]], align 8 |
| // CHECK64-NEXT: [[TMP0:%.*]] = load <2 x i64>*, <2 x i64>** [[ODATA_ADDR]], align 8 |
| // CHECK64-NEXT: [[TMP1:%.*]] = load <2 x i64>*, <2 x i64>** [[IDATA_ADDR]], align 8 |
| // CHECK64-NEXT: [[TMP2:%.*]] = load i8*, i8** [[H_ADDR]], align 8 |
| // CHECK64-NEXT: store <2 x i64>* [[TMP0]], <2 x i64>** [[__ODATA_ADDR_I]], align 8 |
| // CHECK64-NEXT: store <2 x i64>* [[TMP1]], <2 x i64>** [[__IDATA_ADDR_I]], align 8 |
| // CHECK64-NEXT: store i8* [[TMP2]], i8** [[__H_ADDR_I]], align 8 |
| // CHECK64-NEXT: [[TMP3:%.*]] = load <2 x i64>*, <2 x i64>** [[__ODATA_ADDR_I]], align 8 |
| // CHECK64-NEXT: [[TMP4:%.*]] = load <2 x i64>*, <2 x i64>** [[__IDATA_ADDR_I]], align 8 |
| // CHECK64-NEXT: [[TMP5:%.*]] = load i8*, i8** [[__H_ADDR_I]], align 8 |
| // CHECK64-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[TMP4]], align 16 |
| // CHECK64-NEXT: [[TMP7:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 1 |
| // CHECK64-NEXT: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* [[TMP7]], align 16 |
| // CHECK64-NEXT: [[TMP9:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 2 |
| // CHECK64-NEXT: [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* [[TMP9]], align 16 |
| // CHECK64-NEXT: [[TMP11:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 3 |
| // CHECK64-NEXT: [[TMP12:%.*]] = load <2 x i64>, <2 x i64>* [[TMP11]], align 16 |
| // CHECK64-NEXT: [[TMP13:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 4 |
| // CHECK64-NEXT: [[TMP14:%.*]] = load <2 x i64>, <2 x i64>* [[TMP13]], align 16 |
| // CHECK64-NEXT: [[TMP15:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 5 |
| // CHECK64-NEXT: [[TMP16:%.*]] = load <2 x i64>, <2 x i64>* [[TMP15]], align 16 |
| // CHECK64-NEXT: [[TMP17:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 6 |
| // CHECK64-NEXT: [[TMP18:%.*]] = load <2 x i64>, <2 x i64>* [[TMP17]], align 16 |
| // CHECK64-NEXT: [[TMP19:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 7 |
| // CHECK64-NEXT: [[TMP20:%.*]] = load <2 x i64>, <2 x i64>* [[TMP19]], align 16 |
| // CHECK64-NEXT: [[TMP21:%.*]] = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(i8* [[TMP5]], <2 x i64> [[TMP6]], <2 x i64> [[TMP8]], <2 x i64> [[TMP10]], <2 x i64> [[TMP12]], <2 x i64> [[TMP14]], <2 x i64> [[TMP16]], <2 x i64> [[TMP18]], <2 x i64> [[TMP20]]) #[[ATTR1]] |
| // CHECK64-NEXT: [[TMP22:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 0 |
| // CHECK64-NEXT: [[TMP23:%.*]] = trunc i8 [[TMP22]] to i1 |
| // CHECK64-NEXT: br i1 [[TMP23]], label [[AESENCWIDE256KL_NO_ERROR_I:%.*]], label [[AESENCWIDE256KL_ERROR_I:%.*]] |
| // CHECK64: aesencwide256kl_no_error.i: |
| // CHECK64-NEXT: [[TMP24:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 1 |
| // CHECK64-NEXT: store <2 x i64> [[TMP24]], <2 x i64>* [[TMP3]], align 16 |
| // CHECK64-NEXT: [[TMP25:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 2 |
| // CHECK64-NEXT: [[TMP26:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 1 |
| // CHECK64-NEXT: store <2 x i64> [[TMP25]], <2 x i64>* [[TMP26]], align 16 |
| // CHECK64-NEXT: [[TMP27:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 3 |
| // CHECK64-NEXT: [[TMP28:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 2 |
| // CHECK64-NEXT: store <2 x i64> [[TMP27]], <2 x i64>* [[TMP28]], align 16 |
| // CHECK64-NEXT: [[TMP29:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 4 |
| // CHECK64-NEXT: [[TMP30:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 3 |
| // CHECK64-NEXT: store <2 x i64> [[TMP29]], <2 x i64>* [[TMP30]], align 16 |
| // CHECK64-NEXT: [[TMP31:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 5 |
| // CHECK64-NEXT: [[TMP32:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 4 |
| // CHECK64-NEXT: store <2 x i64> [[TMP31]], <2 x i64>* [[TMP32]], align 16 |
| // CHECK64-NEXT: [[TMP33:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 6 |
| // CHECK64-NEXT: [[TMP34:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 5 |
| // CHECK64-NEXT: store <2 x i64> [[TMP33]], <2 x i64>* [[TMP34]], align 16 |
| // CHECK64-NEXT: [[TMP35:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 7 |
| // CHECK64-NEXT: [[TMP36:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 6 |
| // CHECK64-NEXT: store <2 x i64> [[TMP35]], <2 x i64>* [[TMP36]], align 16 |
| // CHECK64-NEXT: [[TMP37:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 8 |
| // CHECK64-NEXT: [[TMP38:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 7 |
| // CHECK64-NEXT: store <2 x i64> [[TMP37]], <2 x i64>* [[TMP38]], align 16 |
| // CHECK64-NEXT: br label [[_MM_AESENCWIDE256KL_U8_EXIT:%.*]] |
| // CHECK64: aesencwide256kl_error.i: |
| // CHECK64-NEXT: [[TMP39:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 1 |
| // CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP3]], align 16 |
| // CHECK64-NEXT: [[TMP40:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 2 |
| // CHECK64-NEXT: [[TMP41:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 1 |
| // CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP41]], align 16 |
| // CHECK64-NEXT: [[TMP42:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 3 |
| // CHECK64-NEXT: [[TMP43:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 2 |
| // CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP43]], align 16 |
| // CHECK64-NEXT: [[TMP44:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 4 |
| // CHECK64-NEXT: [[TMP45:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 3 |
| // CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP45]], align 16 |
| // CHECK64-NEXT: [[TMP46:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 5 |
| // CHECK64-NEXT: [[TMP47:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 4 |
| // CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP47]], align 16 |
| // CHECK64-NEXT: [[TMP48:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 6 |
| // CHECK64-NEXT: [[TMP49:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 5 |
| // CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP49]], align 16 |
| // CHECK64-NEXT: [[TMP50:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 7 |
| // CHECK64-NEXT: [[TMP51:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 6 |
| // CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP51]], align 16 |
| // CHECK64-NEXT: [[TMP52:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 8 |
| // CHECK64-NEXT: [[TMP53:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 7 |
| // CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP53]], align 16 |
| // CHECK64-NEXT: br label [[_MM_AESENCWIDE256KL_U8_EXIT]] |
| // CHECK64: _mm_aesencwide256kl_u8.exit: |
| // CHECK64-NEXT: [[TMP54:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 0 |
| // CHECK64-NEXT: ret i8 [[TMP54]] |
| // |
| // CHECK32-LABEL: @test__mm_aesencwide256kl_u8( |
| // CHECK32-NEXT: entry: |
| // CHECK32-NEXT: [[__ODATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 4 |
| // CHECK32-NEXT: [[__IDATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 4 |
| // CHECK32-NEXT: [[__H_ADDR_I:%.*]] = alloca i8*, align 4 |
| // CHECK32-NEXT: [[ODATA_ADDR:%.*]] = alloca <2 x i64>*, align 4 |
| // CHECK32-NEXT: [[IDATA_ADDR:%.*]] = alloca <2 x i64>*, align 4 |
| // CHECK32-NEXT: [[H_ADDR:%.*]] = alloca i8*, align 4 |
| // CHECK32-NEXT: store <2 x i64>* [[ODATA:%.*]], <2 x i64>** [[ODATA_ADDR]], align 4 |
| // CHECK32-NEXT: store <2 x i64>* [[IDATA:%.*]], <2 x i64>** [[IDATA_ADDR]], align 4 |
| // CHECK32-NEXT: store i8* [[H:%.*]], i8** [[H_ADDR]], align 4 |
| // CHECK32-NEXT: [[TMP0:%.*]] = load <2 x i64>*, <2 x i64>** [[ODATA_ADDR]], align 4 |
| // CHECK32-NEXT: [[TMP1:%.*]] = load <2 x i64>*, <2 x i64>** [[IDATA_ADDR]], align 4 |
| // CHECK32-NEXT: [[TMP2:%.*]] = load i8*, i8** [[H_ADDR]], align 4 |
| // CHECK32-NEXT: store <2 x i64>* [[TMP0]], <2 x i64>** [[__ODATA_ADDR_I]], align 4 |
| // CHECK32-NEXT: store <2 x i64>* [[TMP1]], <2 x i64>** [[__IDATA_ADDR_I]], align 4 |
| // CHECK32-NEXT: store i8* [[TMP2]], i8** [[__H_ADDR_I]], align 4 |
| // CHECK32-NEXT: [[TMP3:%.*]] = load <2 x i64>*, <2 x i64>** [[__ODATA_ADDR_I]], align 4 |
| // CHECK32-NEXT: [[TMP4:%.*]] = load <2 x i64>*, <2 x i64>** [[__IDATA_ADDR_I]], align 4 |
| // CHECK32-NEXT: [[TMP5:%.*]] = load i8*, i8** [[__H_ADDR_I]], align 4 |
| // CHECK32-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[TMP4]], align 16 |
| // CHECK32-NEXT: [[TMP7:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 1 |
| // CHECK32-NEXT: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* [[TMP7]], align 16 |
| // CHECK32-NEXT: [[TMP9:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 2 |
| // CHECK32-NEXT: [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* [[TMP9]], align 16 |
| // CHECK32-NEXT: [[TMP11:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 3 |
| // CHECK32-NEXT: [[TMP12:%.*]] = load <2 x i64>, <2 x i64>* [[TMP11]], align 16 |
| // CHECK32-NEXT: [[TMP13:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 4 |
| // CHECK32-NEXT: [[TMP14:%.*]] = load <2 x i64>, <2 x i64>* [[TMP13]], align 16 |
| // CHECK32-NEXT: [[TMP15:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 5 |
| // CHECK32-NEXT: [[TMP16:%.*]] = load <2 x i64>, <2 x i64>* [[TMP15]], align 16 |
| // CHECK32-NEXT: [[TMP17:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 6 |
| // CHECK32-NEXT: [[TMP18:%.*]] = load <2 x i64>, <2 x i64>* [[TMP17]], align 16 |
| // CHECK32-NEXT: [[TMP19:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 7 |
| // CHECK32-NEXT: [[TMP20:%.*]] = load <2 x i64>, <2 x i64>* [[TMP19]], align 16 |
| // CHECK32-NEXT: [[TMP21:%.*]] = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(i8* [[TMP5]], <2 x i64> [[TMP6]], <2 x i64> [[TMP8]], <2 x i64> [[TMP10]], <2 x i64> [[TMP12]], <2 x i64> [[TMP14]], <2 x i64> [[TMP16]], <2 x i64> [[TMP18]], <2 x i64> [[TMP20]]) #[[ATTR1]] |
| // CHECK32-NEXT: [[TMP22:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 0 |
| // CHECK32-NEXT: [[TMP23:%.*]] = trunc i8 [[TMP22]] to i1 |
| // CHECK32-NEXT: br i1 [[TMP23]], label [[AESENCWIDE256KL_NO_ERROR_I:%.*]], label [[AESENCWIDE256KL_ERROR_I:%.*]] |
| // CHECK32: aesencwide256kl_no_error.i: |
| // CHECK32-NEXT: [[TMP24:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 1 |
| // CHECK32-NEXT: store <2 x i64> [[TMP24]], <2 x i64>* [[TMP3]], align 16 |
| // CHECK32-NEXT: [[TMP25:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 2 |
| // CHECK32-NEXT: [[TMP26:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 1 |
| // CHECK32-NEXT: store <2 x i64> [[TMP25]], <2 x i64>* [[TMP26]], align 16 |
| // CHECK32-NEXT: [[TMP27:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 3 |
| // CHECK32-NEXT: [[TMP28:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 2 |
| // CHECK32-NEXT: store <2 x i64> [[TMP27]], <2 x i64>* [[TMP28]], align 16 |
| // CHECK32-NEXT: [[TMP29:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 4 |
| // CHECK32-NEXT: [[TMP30:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 3 |
| // CHECK32-NEXT: store <2 x i64> [[TMP29]], <2 x i64>* [[TMP30]], align 16 |
| // CHECK32-NEXT: [[TMP31:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 5 |
| // CHECK32-NEXT: [[TMP32:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 4 |
| // CHECK32-NEXT: store <2 x i64> [[TMP31]], <2 x i64>* [[TMP32]], align 16 |
| // CHECK32-NEXT: [[TMP33:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 6 |
| // CHECK32-NEXT: [[TMP34:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 5 |
| // CHECK32-NEXT: store <2 x i64> [[TMP33]], <2 x i64>* [[TMP34]], align 16 |
| // CHECK32-NEXT: [[TMP35:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 7 |
| // CHECK32-NEXT: [[TMP36:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 6 |
| // CHECK32-NEXT: store <2 x i64> [[TMP35]], <2 x i64>* [[TMP36]], align 16 |
| // CHECK32-NEXT: [[TMP37:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 8 |
| // CHECK32-NEXT: [[TMP38:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 7 |
| // CHECK32-NEXT: store <2 x i64> [[TMP37]], <2 x i64>* [[TMP38]], align 16 |
| // CHECK32-NEXT: br label [[_MM_AESENCWIDE256KL_U8_EXIT:%.*]] |
| // CHECK32: aesencwide256kl_error.i: |
| // CHECK32-NEXT: [[TMP39:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 1 |
| // CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP3]], align 16 |
| // CHECK32-NEXT: [[TMP40:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 2 |
| // CHECK32-NEXT: [[TMP41:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 1 |
| // CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP41]], align 16 |
| // CHECK32-NEXT: [[TMP42:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 3 |
| // CHECK32-NEXT: [[TMP43:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 2 |
| // CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP43]], align 16 |
| // CHECK32-NEXT: [[TMP44:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 4 |
| // CHECK32-NEXT: [[TMP45:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 3 |
| // CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP45]], align 16 |
| // CHECK32-NEXT: [[TMP46:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 5 |
| // CHECK32-NEXT: [[TMP47:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 4 |
| // CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP47]], align 16 |
| // CHECK32-NEXT: [[TMP48:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 6 |
| // CHECK32-NEXT: [[TMP49:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 5 |
| // CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP49]], align 16 |
| // CHECK32-NEXT: [[TMP50:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 7 |
| // CHECK32-NEXT: [[TMP51:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 6 |
| // CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP51]], align 16 |
| // CHECK32-NEXT: [[TMP52:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 8 |
| // CHECK32-NEXT: [[TMP53:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 7 |
| // CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP53]], align 16 |
| // CHECK32-NEXT: br label [[_MM_AESENCWIDE256KL_U8_EXIT]] |
| // CHECK32: _mm_aesencwide256kl_u8.exit: |
| // CHECK32-NEXT: [[TMP54:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 0 |
| // CHECK32-NEXT: ret i8 [[TMP54]] |
| // |
| unsigned char test__mm_aesencwide256kl_u8(__m128i odata[8], const __m128i idata[8], const void* h) { |
| return _mm_aesencwide256kl_u8(odata, idata, h); |
| } |
| |
| // CHECK64-LABEL: @test__mm_aesdecwide256kl_u8( |
| // CHECK64-NEXT: entry: |
| // CHECK64-NEXT: [[__ODATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 8 |
| // CHECK64-NEXT: [[__IDATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 8 |
| // CHECK64-NEXT: [[__H_ADDR_I:%.*]] = alloca i8*, align 8 |
| // CHECK64-NEXT: [[ODATA_ADDR:%.*]] = alloca <2 x i64>*, align 8 |
| // CHECK64-NEXT: [[IDATA_ADDR:%.*]] = alloca <2 x i64>*, align 8 |
| // CHECK64-NEXT: [[H_ADDR:%.*]] = alloca i8*, align 8 |
| // CHECK64-NEXT: store <2 x i64>* [[ODATA:%.*]], <2 x i64>** [[ODATA_ADDR]], align 8 |
| // CHECK64-NEXT: store <2 x i64>* [[IDATA:%.*]], <2 x i64>** [[IDATA_ADDR]], align 8 |
| // CHECK64-NEXT: store i8* [[H:%.*]], i8** [[H_ADDR]], align 8 |
| // CHECK64-NEXT: [[TMP0:%.*]] = load <2 x i64>*, <2 x i64>** [[ODATA_ADDR]], align 8 |
| // CHECK64-NEXT: [[TMP1:%.*]] = load <2 x i64>*, <2 x i64>** [[IDATA_ADDR]], align 8 |
| // CHECK64-NEXT: [[TMP2:%.*]] = load i8*, i8** [[H_ADDR]], align 8 |
| // CHECK64-NEXT: store <2 x i64>* [[TMP0]], <2 x i64>** [[__ODATA_ADDR_I]], align 8 |
| // CHECK64-NEXT: store <2 x i64>* [[TMP1]], <2 x i64>** [[__IDATA_ADDR_I]], align 8 |
| // CHECK64-NEXT: store i8* [[TMP2]], i8** [[__H_ADDR_I]], align 8 |
| // CHECK64-NEXT: [[TMP3:%.*]] = load <2 x i64>*, <2 x i64>** [[__ODATA_ADDR_I]], align 8 |
| // CHECK64-NEXT: [[TMP4:%.*]] = load <2 x i64>*, <2 x i64>** [[__IDATA_ADDR_I]], align 8 |
| // CHECK64-NEXT: [[TMP5:%.*]] = load i8*, i8** [[__H_ADDR_I]], align 8 |
| // CHECK64-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[TMP4]], align 16 |
| // CHECK64-NEXT: [[TMP7:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 1 |
| // CHECK64-NEXT: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* [[TMP7]], align 16 |
| // CHECK64-NEXT: [[TMP9:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 2 |
| // CHECK64-NEXT: [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* [[TMP9]], align 16 |
| // CHECK64-NEXT: [[TMP11:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 3 |
| // CHECK64-NEXT: [[TMP12:%.*]] = load <2 x i64>, <2 x i64>* [[TMP11]], align 16 |
| // CHECK64-NEXT: [[TMP13:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 4 |
| // CHECK64-NEXT: [[TMP14:%.*]] = load <2 x i64>, <2 x i64>* [[TMP13]], align 16 |
| // CHECK64-NEXT: [[TMP15:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 5 |
| // CHECK64-NEXT: [[TMP16:%.*]] = load <2 x i64>, <2 x i64>* [[TMP15]], align 16 |
| // CHECK64-NEXT: [[TMP17:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 6 |
| // CHECK64-NEXT: [[TMP18:%.*]] = load <2 x i64>, <2 x i64>* [[TMP17]], align 16 |
| // CHECK64-NEXT: [[TMP19:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 7 |
| // CHECK64-NEXT: [[TMP20:%.*]] = load <2 x i64>, <2 x i64>* [[TMP19]], align 16 |
| // CHECK64-NEXT: [[TMP21:%.*]] = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(i8* [[TMP5]], <2 x i64> [[TMP6]], <2 x i64> [[TMP8]], <2 x i64> [[TMP10]], <2 x i64> [[TMP12]], <2 x i64> [[TMP14]], <2 x i64> [[TMP16]], <2 x i64> [[TMP18]], <2 x i64> [[TMP20]]) #[[ATTR1]] |
| // CHECK64-NEXT: [[TMP22:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 0 |
| // CHECK64-NEXT: [[TMP23:%.*]] = trunc i8 [[TMP22]] to i1 |
| // CHECK64-NEXT: br i1 [[TMP23]], label [[AESDECWIDE256KL_NO_ERROR_I:%.*]], label [[AESDECWIDE256KL_ERROR_I:%.*]] |
| // CHECK64: aesdecwide256kl_no_error.i: |
| // CHECK64-NEXT: [[TMP24:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 1 |
| // CHECK64-NEXT: store <2 x i64> [[TMP24]], <2 x i64>* [[TMP3]], align 16 |
| // CHECK64-NEXT: [[TMP25:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 2 |
| // CHECK64-NEXT: [[TMP26:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 1 |
| // CHECK64-NEXT: store <2 x i64> [[TMP25]], <2 x i64>* [[TMP26]], align 16 |
| // CHECK64-NEXT: [[TMP27:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 3 |
| // CHECK64-NEXT: [[TMP28:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 2 |
| // CHECK64-NEXT: store <2 x i64> [[TMP27]], <2 x i64>* [[TMP28]], align 16 |
| // CHECK64-NEXT: [[TMP29:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 4 |
| // CHECK64-NEXT: [[TMP30:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 3 |
| // CHECK64-NEXT: store <2 x i64> [[TMP29]], <2 x i64>* [[TMP30]], align 16 |
| // CHECK64-NEXT: [[TMP31:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 5 |
| // CHECK64-NEXT: [[TMP32:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 4 |
| // CHECK64-NEXT: store <2 x i64> [[TMP31]], <2 x i64>* [[TMP32]], align 16 |
| // CHECK64-NEXT: [[TMP33:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 6 |
| // CHECK64-NEXT: [[TMP34:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 5 |
| // CHECK64-NEXT: store <2 x i64> [[TMP33]], <2 x i64>* [[TMP34]], align 16 |
| // CHECK64-NEXT: [[TMP35:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 7 |
| // CHECK64-NEXT: [[TMP36:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 6 |
| // CHECK64-NEXT: store <2 x i64> [[TMP35]], <2 x i64>* [[TMP36]], align 16 |
| // CHECK64-NEXT: [[TMP37:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 8 |
| // CHECK64-NEXT: [[TMP38:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 7 |
| // CHECK64-NEXT: store <2 x i64> [[TMP37]], <2 x i64>* [[TMP38]], align 16 |
| // CHECK64-NEXT: br label [[_MM_AESDECWIDE256KL_U8_EXIT:%.*]] |
| // CHECK64: aesdecwide256kl_error.i: |
| // CHECK64-NEXT: [[TMP39:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 1 |
| // CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP3]], align 16 |
| // CHECK64-NEXT: [[TMP40:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 2 |
| // CHECK64-NEXT: [[TMP41:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 1 |
| // CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP41]], align 16 |
| // CHECK64-NEXT: [[TMP42:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 3 |
| // CHECK64-NEXT: [[TMP43:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 2 |
| // CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP43]], align 16 |
| // CHECK64-NEXT: [[TMP44:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 4 |
| // CHECK64-NEXT: [[TMP45:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 3 |
| // CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP45]], align 16 |
| // CHECK64-NEXT: [[TMP46:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 5 |
| // CHECK64-NEXT: [[TMP47:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 4 |
| // CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP47]], align 16 |
| // CHECK64-NEXT: [[TMP48:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 6 |
| // CHECK64-NEXT: [[TMP49:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 5 |
| // CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP49]], align 16 |
| // CHECK64-NEXT: [[TMP50:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 7 |
| // CHECK64-NEXT: [[TMP51:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 6 |
| // CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP51]], align 16 |
| // CHECK64-NEXT: [[TMP52:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 8 |
| // CHECK64-NEXT: [[TMP53:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 7 |
| // CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP53]], align 16 |
| // CHECK64-NEXT: br label [[_MM_AESDECWIDE256KL_U8_EXIT]] |
| // CHECK64: _mm_aesdecwide256kl_u8.exit: |
| // CHECK64-NEXT: [[TMP54:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 0 |
| // CHECK64-NEXT: ret i8 [[TMP54]] |
| // |
| // CHECK32-LABEL: @test__mm_aesdecwide256kl_u8( |
| // CHECK32-NEXT: entry: |
| // CHECK32-NEXT: [[__ODATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 4 |
| // CHECK32-NEXT: [[__IDATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 4 |
| // CHECK32-NEXT: [[__H_ADDR_I:%.*]] = alloca i8*, align 4 |
| // CHECK32-NEXT: [[ODATA_ADDR:%.*]] = alloca <2 x i64>*, align 4 |
| // CHECK32-NEXT: [[IDATA_ADDR:%.*]] = alloca <2 x i64>*, align 4 |
| // CHECK32-NEXT: [[H_ADDR:%.*]] = alloca i8*, align 4 |
| // CHECK32-NEXT: store <2 x i64>* [[ODATA:%.*]], <2 x i64>** [[ODATA_ADDR]], align 4 |
| // CHECK32-NEXT: store <2 x i64>* [[IDATA:%.*]], <2 x i64>** [[IDATA_ADDR]], align 4 |
| // CHECK32-NEXT: store i8* [[H:%.*]], i8** [[H_ADDR]], align 4 |
| // CHECK32-NEXT: [[TMP0:%.*]] = load <2 x i64>*, <2 x i64>** [[ODATA_ADDR]], align 4 |
| // CHECK32-NEXT: [[TMP1:%.*]] = load <2 x i64>*, <2 x i64>** [[IDATA_ADDR]], align 4 |
| // CHECK32-NEXT: [[TMP2:%.*]] = load i8*, i8** [[H_ADDR]], align 4 |
| // CHECK32-NEXT: store <2 x i64>* [[TMP0]], <2 x i64>** [[__ODATA_ADDR_I]], align 4 |
| // CHECK32-NEXT: store <2 x i64>* [[TMP1]], <2 x i64>** [[__IDATA_ADDR_I]], align 4 |
| // CHECK32-NEXT: store i8* [[TMP2]], i8** [[__H_ADDR_I]], align 4 |
| // CHECK32-NEXT: [[TMP3:%.*]] = load <2 x i64>*, <2 x i64>** [[__ODATA_ADDR_I]], align 4 |
| // CHECK32-NEXT: [[TMP4:%.*]] = load <2 x i64>*, <2 x i64>** [[__IDATA_ADDR_I]], align 4 |
| // CHECK32-NEXT: [[TMP5:%.*]] = load i8*, i8** [[__H_ADDR_I]], align 4 |
| // CHECK32-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[TMP4]], align 16 |
| // CHECK32-NEXT: [[TMP7:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 1 |
| // CHECK32-NEXT: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* [[TMP7]], align 16 |
| // CHECK32-NEXT: [[TMP9:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 2 |
| // CHECK32-NEXT: [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* [[TMP9]], align 16 |
| // CHECK32-NEXT: [[TMP11:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 3 |
| // CHECK32-NEXT: [[TMP12:%.*]] = load <2 x i64>, <2 x i64>* [[TMP11]], align 16 |
| // CHECK32-NEXT: [[TMP13:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 4 |
| // CHECK32-NEXT: [[TMP14:%.*]] = load <2 x i64>, <2 x i64>* [[TMP13]], align 16 |
| // CHECK32-NEXT: [[TMP15:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 5 |
| // CHECK32-NEXT: [[TMP16:%.*]] = load <2 x i64>, <2 x i64>* [[TMP15]], align 16 |
| // CHECK32-NEXT: [[TMP17:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 6 |
| // CHECK32-NEXT: [[TMP18:%.*]] = load <2 x i64>, <2 x i64>* [[TMP17]], align 16 |
| // CHECK32-NEXT: [[TMP19:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 7 |
| // CHECK32-NEXT: [[TMP20:%.*]] = load <2 x i64>, <2 x i64>* [[TMP19]], align 16 |
| // CHECK32-NEXT: [[TMP21:%.*]] = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(i8* [[TMP5]], <2 x i64> [[TMP6]], <2 x i64> [[TMP8]], <2 x i64> [[TMP10]], <2 x i64> [[TMP12]], <2 x i64> [[TMP14]], <2 x i64> [[TMP16]], <2 x i64> [[TMP18]], <2 x i64> [[TMP20]]) #[[ATTR1]] |
| // CHECK32-NEXT: [[TMP22:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 0 |
| // CHECK32-NEXT: [[TMP23:%.*]] = trunc i8 [[TMP22]] to i1 |
| // CHECK32-NEXT: br i1 [[TMP23]], label [[AESDECWIDE256KL_NO_ERROR_I:%.*]], label [[AESDECWIDE256KL_ERROR_I:%.*]] |
| // CHECK32: aesdecwide256kl_no_error.i: |
| // CHECK32-NEXT: [[TMP24:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 1 |
| // CHECK32-NEXT: store <2 x i64> [[TMP24]], <2 x i64>* [[TMP3]], align 16 |
| // CHECK32-NEXT: [[TMP25:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 2 |
| // CHECK32-NEXT: [[TMP26:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 1 |
| // CHECK32-NEXT: store <2 x i64> [[TMP25]], <2 x i64>* [[TMP26]], align 16 |
| // CHECK32-NEXT: [[TMP27:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 3 |
| // CHECK32-NEXT: [[TMP28:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 2 |
| // CHECK32-NEXT: store <2 x i64> [[TMP27]], <2 x i64>* [[TMP28]], align 16 |
| // CHECK32-NEXT: [[TMP29:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 4 |
| // CHECK32-NEXT: [[TMP30:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 3 |
| // CHECK32-NEXT: store <2 x i64> [[TMP29]], <2 x i64>* [[TMP30]], align 16 |
| // CHECK32-NEXT: [[TMP31:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 5 |
| // CHECK32-NEXT: [[TMP32:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 4 |
| // CHECK32-NEXT: store <2 x i64> [[TMP31]], <2 x i64>* [[TMP32]], align 16 |
| // CHECK32-NEXT: [[TMP33:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 6 |
| // CHECK32-NEXT: [[TMP34:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 5 |
| // CHECK32-NEXT: store <2 x i64> [[TMP33]], <2 x i64>* [[TMP34]], align 16 |
| // CHECK32-NEXT: [[TMP35:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 7 |
| // CHECK32-NEXT: [[TMP36:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 6 |
| // CHECK32-NEXT: store <2 x i64> [[TMP35]], <2 x i64>* [[TMP36]], align 16 |
| // CHECK32-NEXT: [[TMP37:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 8 |
| // CHECK32-NEXT: [[TMP38:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 7 |
| // CHECK32-NEXT: store <2 x i64> [[TMP37]], <2 x i64>* [[TMP38]], align 16 |
| // CHECK32-NEXT: br label [[_MM_AESDECWIDE256KL_U8_EXIT:%.*]] |
| // CHECK32: aesdecwide256kl_error.i: |
| // CHECK32-NEXT: [[TMP39:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 1 |
| // CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP3]], align 16 |
| // CHECK32-NEXT: [[TMP40:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 2 |
| // CHECK32-NEXT: [[TMP41:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 1 |
| // CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP41]], align 16 |
| // CHECK32-NEXT: [[TMP42:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 3 |
| // CHECK32-NEXT: [[TMP43:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 2 |
| // CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP43]], align 16 |
| // CHECK32-NEXT: [[TMP44:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 4 |
| // CHECK32-NEXT: [[TMP45:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 3 |
| // CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP45]], align 16 |
| // CHECK32-NEXT: [[TMP46:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 5 |
| // CHECK32-NEXT: [[TMP47:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 4 |
| // CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP47]], align 16 |
| // CHECK32-NEXT: [[TMP48:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 6 |
| // CHECK32-NEXT: [[TMP49:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 5 |
| // CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP49]], align 16 |
| // CHECK32-NEXT: [[TMP50:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 7 |
| // CHECK32-NEXT: [[TMP51:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 6 |
| // CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP51]], align 16 |
| // CHECK32-NEXT: [[TMP52:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 8 |
| // CHECK32-NEXT: [[TMP53:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 7 |
| // CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP53]], align 16 |
| // CHECK32-NEXT: br label [[_MM_AESDECWIDE256KL_U8_EXIT]] |
| // CHECK32: _mm_aesdecwide256kl_u8.exit: |
| // CHECK32-NEXT: [[TMP54:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 0 |
| // CHECK32-NEXT: ret i8 [[TMP54]] |
| // |
| unsigned char test__mm_aesdecwide256kl_u8(__m128i odata[8], const __m128i idata[8], const void* h) { |
| return _mm_aesdecwide256kl_u8(odata, idata, h); |
| } |