| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| ; RUN: opt -passes=slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -slp-threshold=-127 | FileCheck %s |
| ; RUN: opt -passes=slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -slp-threshold=-115 -mattr=+avx2 | FileCheck %s --check-prefix=AVX2 |
| |
| define void @test(i64 %p0, i64 %p1, i64 %p2, i64 %p3) { |
| ; CHECK-LABEL: @test( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> poison, i64 [[P0:%.*]], i32 0 |
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> [[TMP0]], i64 [[P1:%.*]], i32 1 |
| ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> [[TMP1]], i64 [[P2:%.*]], i32 2 |
| ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[P3:%.*]], i32 3 |
| ; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[TMP3]], [[TMP3]] |
| ; CHECK-NEXT: [[TMP5:%.*]] = mul <4 x i64> [[TMP3]], [[TMP3]] |
| ; CHECK-NEXT: [[TMP6:%.*]] = sdiv <4 x i64> [[TMP3]], [[TMP3]] |
| ; CHECK-NEXT: [[TMP7:%.*]] = sub <4 x i64> [[TMP5]], [[TMP6]] |
| ; CHECK-NEXT: [[TMP8:%.*]] = shl <4 x i64> [[TMP4]], [[TMP7]] |
| ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 poison, i32 poison> |
| ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP6]], <4 x i64> [[TMP5]], <4 x i32> <i32 poison, i32 poison, i32 0, i32 4> |
| ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> [[TMP10]], <4 x i32> <i32 0, i32 1, i32 6, i32 7> |
| ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 poison, i32 poison> |
| ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i64> [[TMP6]], <4 x i64> [[TMP5]], <4 x i32> <i32 poison, i32 poison, i32 1, i32 5> |
| ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i64> [[TMP12]], <4 x i64> [[TMP13]], <4 x i32> <i32 0, i32 1, i32 6, i32 7> |
| ; CHECK-NEXT: [[TMP15:%.*]] = or <4 x i64> [[TMP11]], [[TMP14]] |
| ; CHECK-NEXT: [[TMP16:%.*]] = trunc <4 x i64> [[TMP15]] to <4 x i32> |
| ; CHECK-NEXT: br label [[BB:%.*]] |
| ; CHECK: bb: |
| ; CHECK-NEXT: [[TMP17:%.*]] = phi <4 x i32> [ [[TMP18:%.*]], [[BB]] ], [ [[TMP16]], [[ENTRY:%.*]] ] |
| ; CHECK-NEXT: [[TMP18]] = trunc <4 x i64> [[TMP8]] to <4 x i32> |
| ; CHECK-NEXT: br label [[BB]] |
| ; |
| ; AVX2-LABEL: @test( |
| ; AVX2-NEXT: entry: |
| ; AVX2-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> poison, i64 [[P0:%.*]], i32 0 |
| ; AVX2-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> [[TMP0]], i64 [[P1:%.*]], i32 1 |
| ; AVX2-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> [[TMP1]], i64 [[P2:%.*]], i32 2 |
| ; AVX2-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[P3:%.*]], i32 3 |
| ; AVX2-NEXT: [[TMP4:%.*]] = add <4 x i64> [[TMP3]], [[TMP3]] |
| ; AVX2-NEXT: [[TMP5:%.*]] = mul <4 x i64> [[TMP3]], [[TMP3]] |
| ; AVX2-NEXT: [[TMP6:%.*]] = sdiv <4 x i64> [[TMP3]], [[TMP3]] |
| ; AVX2-NEXT: [[TMP7:%.*]] = sub <4 x i64> [[TMP5]], [[TMP6]] |
| ; AVX2-NEXT: [[TMP8:%.*]] = shl <4 x i64> [[TMP4]], [[TMP7]] |
| ; AVX2-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 poison, i32 4> |
| ; AVX2-NEXT: [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 3> |
| ; AVX2-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 poison, i32 5> |
| ; AVX2-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP11]], <4 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 5, i32 3> |
| ; AVX2-NEXT: [[TMP13:%.*]] = or <4 x i64> [[TMP10]], [[TMP12]] |
| ; AVX2-NEXT: [[TMP14:%.*]] = trunc <4 x i64> [[TMP13]] to <4 x i32> |
| ; AVX2-NEXT: br label [[BB:%.*]] |
| ; AVX2: bb: |
| ; AVX2-NEXT: [[TMP15:%.*]] = phi <4 x i32> [ [[TMP16:%.*]], [[BB]] ], [ [[TMP14]], [[ENTRY:%.*]] ] |
| ; AVX2-NEXT: [[TMP16]] = trunc <4 x i64> [[TMP8]] to <4 x i32> |
| ; AVX2-NEXT: br label [[BB]] |
| ; |
| entry: |
| %a0 = add i64 %p0, %p0 |
| %a1 = add i64 %p1, %p1 |
| %a2 = add i64 %p2, %p2 |
| %a3 = add i64 %p3, %p3 |
| %m0 = mul i64 %p0, %p0 |
| %m1 = mul i64 %p1, %p1 |
| %m2 = mul i64 %p2, %p2 |
| %m3 = mul i64 %p3, %p3 |
| %d0 = sdiv i64 %p0, %p0 |
| %d1 = sdiv i64 %p1, %p1 |
| %d2 = sdiv i64 %p2, %p2 |
| %d3 = sdiv i64 %p3, %p3 |
| %s0 = sub i64 %m0, %d0 |
| %s1 = sub i64 %m1, %d1 |
| %s2 = sub i64 %m2, %d2 |
| %s3 = sub i64 %m3, %d3 |
| %shl1 = shl i64 %a0, %s0 |
| %shl2 = shl i64 %a1, %s1 |
| %shl3 = shl i64 %a2, %s2 |
| %shl4 = shl i64 %a3, %s3 |
| %o0 = or i64 %a0, %a1 |
| %tt0 = trunc i64 %o0 to i32 |
| %o1 = or i64 %m0, %m1 |
| %tt1 = trunc i64 %o1 to i32 |
| %o2 = or i64 %d0, %d1 |
| %tt2 = trunc i64 %o2 to i32 |
| %o3 = or i64 %m0, %m1 |
| %tt3 = trunc i64 %o3 to i32 |
| br label %bb |
| |
| bb: |
| %phi0 = phi i32 [ %t1, %bb ], [ %tt0, %entry ] |
| %phi1 = phi i32 [ %t2, %bb ], [ %tt1, %entry ] |
| %phi2 = phi i32 [ %t3, %bb ], [ %tt2, %entry ] |
| %phi3 = phi i32 [ %t4, %bb ], [ %tt3, %entry ] |
| %t1 = trunc i64 %shl1 to i32 |
| %t2 = trunc i64 %shl2 to i32 |
| %t3 = trunc i64 %shl3 to i32 |
| %t4 = trunc i64 %shl4 to i32 |
| br label %bb |
| } |