| ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s |
| |
| ; CHECK: vpaddq %ymm |
| define <4 x i64> @test_vpaddq(<4 x i64> %i, <4 x i64> %j) nounwind readnone { |
| %x = add <4 x i64> %i, %j |
| ret <4 x i64> %x |
| } |
| |
| ; CHECK: vpaddd %ymm |
| define <8 x i32> @test_vpaddd(<8 x i32> %i, <8 x i32> %j) nounwind readnone { |
| %x = add <8 x i32> %i, %j |
| ret <8 x i32> %x |
| } |
| |
| ; CHECK: vpaddw %ymm |
| define <16 x i16> @test_vpaddw(<16 x i16> %i, <16 x i16> %j) nounwind readnone { |
| %x = add <16 x i16> %i, %j |
| ret <16 x i16> %x |
| } |
| |
| ; CHECK: vpaddb %ymm |
| define <32 x i8> @test_vpaddb(<32 x i8> %i, <32 x i8> %j) nounwind readnone { |
| %x = add <32 x i8> %i, %j |
| ret <32 x i8> %x |
| } |
| |
| ; CHECK: vpsubq %ymm |
| define <4 x i64> @test_vpsubq(<4 x i64> %i, <4 x i64> %j) nounwind readnone { |
| %x = sub <4 x i64> %i, %j |
| ret <4 x i64> %x |
| } |
| |
| ; CHECK: vpsubd %ymm |
| define <8 x i32> @test_vpsubd(<8 x i32> %i, <8 x i32> %j) nounwind readnone { |
| %x = sub <8 x i32> %i, %j |
| ret <8 x i32> %x |
| } |
| |
| ; CHECK: vpsubw %ymm |
| define <16 x i16> @test_vpsubw(<16 x i16> %i, <16 x i16> %j) nounwind readnone { |
| %x = sub <16 x i16> %i, %j |
| ret <16 x i16> %x |
| } |
| |
| ; CHECK: vpsubb %ymm |
| define <32 x i8> @test_vpsubb(<32 x i8> %i, <32 x i8> %j) nounwind readnone { |
| %x = sub <32 x i8> %i, %j |
| ret <32 x i8> %x |
| } |
| |
| ; CHECK: vpmulld %ymm |
| define <8 x i32> @test_vpmulld(<8 x i32> %i, <8 x i32> %j) nounwind readnone { |
| %x = mul <8 x i32> %i, %j |
| ret <8 x i32> %x |
| } |
| |
| ; CHECK: vpmullw %ymm |
| define <16 x i16> @test_vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone { |
| %x = mul <16 x i16> %i, %j |
| ret <16 x i16> %x |
| } |
| |
| ; CHECK: mul-v4i64 |
| ; CHECK: vpmuludq %ymm |
| ; CHECK-NEXT: vpsrlq $32, %ymm |
| ; CHECK-NEXT: vpmuludq %ymm |
| ; CHECK-NEXT: vpsllq $32, %ymm |
| ; CHECK-NEXT: vpaddq %ymm |
| ; CHECK-NEXT: vpsrlq $32, %ymm |
| ; CHECK-NEXT: vpmuludq %ymm |
| ; CHECK-NEXT: vpsllq $32, %ymm |
| ; CHECK-NEXT: vpaddq %ymm |
| define <4 x i64> @mul-v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone { |
| %x = mul <4 x i64> %i, %j |
| ret <4 x i64> %x |
| } |
| |
| ; CHECK: mul_const1 |
| ; CHECK: vpaddd |
| ; CHECK: ret |
| define <8 x i32> @mul_const1(<8 x i32> %x) { |
| %y = mul <8 x i32> %x, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> |
| ret <8 x i32> %y |
| } |
| |
| ; CHECK: mul_const2 |
| ; CHECK: vpsllq $2 |
| ; CHECK: ret |
| define <4 x i64> @mul_const2(<4 x i64> %x) { |
| %y = mul <4 x i64> %x, <i64 4, i64 4, i64 4, i64 4> |
| ret <4 x i64> %y |
| } |
| |
| ; CHECK: mul_const3 |
| ; CHECK: vpsllw $3 |
| ; CHECK: ret |
| define <16 x i16> @mul_const3(<16 x i16> %x) { |
| %y = mul <16 x i16> %x, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> |
| ret <16 x i16> %y |
| } |
| |
| ; CHECK: mul_const4 |
| ; CHECK: vpxor |
| ; CHECK: vpsubq |
| ; CHECK: ret |
| define <4 x i64> @mul_const4(<4 x i64> %x) { |
| %y = mul <4 x i64> %x, <i64 -1, i64 -1, i64 -1, i64 -1> |
| ret <4 x i64> %y |
| } |
| |
| ; CHECK: mul_const5 |
| ; CHECK: vxorps |
| ; CHECK-NEXT: ret |
| define <8 x i32> @mul_const5(<8 x i32> %x) { |
| %y = mul <8 x i32> %x, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> |
| ret <8 x i32> %y |
| } |
| |
| ; CHECK: mul_const6 |
| ; CHECK: vpmulld |
| ; CHECK: ret |
| define <8 x i32> @mul_const6(<8 x i32> %x) { |
| %y = mul <8 x i32> %x, <i32 0, i32 0, i32 0, i32 2, i32 0, i32 2, i32 0, i32 0> |
| ret <8 x i32> %y |
| } |
| |
| ; CHECK: mul_const7 |
| ; CHECK: vpaddq |
| ; CHECK: vpaddq |
| ; CHECK: ret |
| define <8 x i64> @mul_const7(<8 x i64> %x) { |
| %y = mul <8 x i64> %x, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> |
| ret <8 x i64> %y |
| } |
| |
| ; CHECK: mul_const8 |
| ; CHECK: vpsllw $3 |
| ; CHECK: ret |
| define <8 x i16> @mul_const8(<8 x i16> %x) { |
| %y = mul <8 x i16> %x, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> |
| ret <8 x i16> %y |
| } |
| |
| ; CHECK: mul_const9 |
| ; CHECK: vpmulld |
| ; CHECK: ret |
| define <8 x i32> @mul_const9(<8 x i32> %x) { |
| %y = mul <8 x i32> %x, <i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> |
| ret <8 x i32> %y |
| } |
| |
| ; CHECK: mul_const10 |
| ; CHECK: vpmulld |
| ; CHECK: ret |
| define <4 x i32> @mul_const10(<4 x i32> %x) { |
| ; %x * 0x01010101 |
| %m = mul <4 x i32> %x, <i32 16843009, i32 16843009, i32 16843009, i32 16843009> |
| ret <4 x i32> %m |
| } |
| |
| ; CHECK: mul_const11 |
| ; CHECK: vpmulld |
| ; CHECK: ret |
| define <4 x i32> @mul_const11(<4 x i32> %x) { |
| ; %x * 0x80808080 |
| %m = mul <4 x i32> %x, <i32 2155905152, i32 2155905152, i32 2155905152, i32 2155905152> |
| ret <4 x i32> %m |
| } |