| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -O3 -disable-peephole -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avxvnniint16 | FileCheck %s |
| |
| declare <4 x i32> @llvm.x86.avx2.vpdpwsud.128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) |
| declare <8 x i32> @llvm.x86.avx2.vpdpwsud.256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) |
| declare <4 x i32> @llvm.x86.avx2.vpdpwsuds.128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) |
| declare <8 x i32> @llvm.x86.avx2.vpdpwsuds.256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) |
| declare <4 x i32> @llvm.x86.avx2.vpdpwusd.128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) |
| declare <8 x i32> @llvm.x86.avx2.vpdpwusd.256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) |
| declare <4 x i32> @llvm.x86.avx2.vpdpwusds.128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) |
| declare <8 x i32> @llvm.x86.avx2.vpdpwusds.256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) |
| declare <4 x i32> @llvm.x86.avx2.vpdpwuud.128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) |
| declare <8 x i32> @llvm.x86.avx2.vpdpwuud.256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) |
| declare <4 x i32> @llvm.x86.avx2.vpdpwuuds.128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) |
| declare <8 x i32> @llvm.x86.avx2.vpdpwuuds.256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) |
| |
| define <4 x i32> @test_int_x86_avx2_vpdpwsud_128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) { |
| ; CHECK-LABEL: test_int_x86_avx2_vpdpwsud_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: # encoding: [0xc5,0xf8,0x29,0x54,0x24,0xe8] |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop # encoding: [0x90] |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vpdpwsud {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload |
| ; CHECK-NEXT: # encoding: [0xc4,0xe2,0x72,0xd2,0x44,0x24,0xe8] |
| ; CHECK-NEXT: retq # encoding: [0xc3] |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %ret = call <4 x i32> @llvm.x86.avx2.vpdpwsud.128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) |
| ret <4 x i32> %ret |
| } |
| |
| define <8 x i32> @test_int_x86_avx2_vpdpwsud_256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) { |
| ; CHECK-LABEL: test_int_x86_avx2_vpdpwsud_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: # encoding: [0xc5,0xfc,0x11,0x54,0x24,0xd8] |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop # encoding: [0x90] |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vpdpwsud {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload |
| ; CHECK-NEXT: # encoding: [0xc4,0xe2,0x76,0xd2,0x44,0x24,0xd8] |
| ; CHECK-NEXT: retq # encoding: [0xc3] |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %ret = call <8 x i32> @llvm.x86.avx2.vpdpwsud.256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) |
| ret <8 x i32> %ret |
| } |
| |
| define <4 x i32> @test_int_x86_avx2_vpdpwsuds_128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) { |
| ; CHECK-LABEL: test_int_x86_avx2_vpdpwsuds_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: # encoding: [0xc5,0xf8,0x29,0x54,0x24,0xe8] |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop # encoding: [0x90] |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vpdpwsuds {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload |
| ; CHECK-NEXT: # encoding: [0xc4,0xe2,0x72,0xd3,0x44,0x24,0xe8] |
| ; CHECK-NEXT: retq # encoding: [0xc3] |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %ret = call <4 x i32> @llvm.x86.avx2.vpdpwsuds.128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) |
| ret <4 x i32> %ret |
| } |
| |
| define <8 x i32> @test_int_x86_avx2_vpdpwsuds_256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) { |
| ; CHECK-LABEL: test_int_x86_avx2_vpdpwsuds_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: # encoding: [0xc5,0xfc,0x11,0x54,0x24,0xd8] |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop # encoding: [0x90] |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vpdpwsuds {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload |
| ; CHECK-NEXT: # encoding: [0xc4,0xe2,0x76,0xd3,0x44,0x24,0xd8] |
| ; CHECK-NEXT: retq # encoding: [0xc3] |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %ret = call <8 x i32> @llvm.x86.avx2.vpdpwsuds.256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) |
| ret <8 x i32> %ret |
| } |
| |
| define <4 x i32> @test_int_x86_avx2_vpdpwusd_128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) { |
| ; CHECK-LABEL: test_int_x86_avx2_vpdpwusd_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: # encoding: [0xc5,0xf8,0x29,0x54,0x24,0xe8] |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop # encoding: [0x90] |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vpdpwusd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload |
| ; CHECK-NEXT: # encoding: [0xc4,0xe2,0x71,0xd2,0x44,0x24,0xe8] |
| ; CHECK-NEXT: retq # encoding: [0xc3] |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %ret = call <4 x i32> @llvm.x86.avx2.vpdpwusd.128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) |
| ret <4 x i32> %ret |
| } |
| |
| define <8 x i32> @test_int_x86_avx2_vpdpwusd_256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) { |
| ; CHECK-LABEL: test_int_x86_avx2_vpdpwusd_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: # encoding: [0xc5,0xfc,0x11,0x54,0x24,0xd8] |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop # encoding: [0x90] |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vpdpwusd {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload |
| ; CHECK-NEXT: # encoding: [0xc4,0xe2,0x75,0xd2,0x44,0x24,0xd8] |
| ; CHECK-NEXT: retq # encoding: [0xc3] |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %ret = call <8 x i32> @llvm.x86.avx2.vpdpwusd.256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) |
| ret <8 x i32> %ret |
| } |
| |
| define <4 x i32> @test_int_x86_avx2_vpdpwusds_128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) { |
| ; CHECK-LABEL: test_int_x86_avx2_vpdpwusds_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: # encoding: [0xc5,0xf8,0x29,0x54,0x24,0xe8] |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop # encoding: [0x90] |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vpdpwusds {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload |
| ; CHECK-NEXT: # encoding: [0xc4,0xe2,0x71,0xd3,0x44,0x24,0xe8] |
| ; CHECK-NEXT: retq # encoding: [0xc3] |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %ret = call <4 x i32> @llvm.x86.avx2.vpdpwusds.128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) |
| ret <4 x i32> %ret |
| } |
| |
| define <8 x i32> @test_int_x86_avx2_vpdpwusds_256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) { |
| ; CHECK-LABEL: test_int_x86_avx2_vpdpwusds_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: # encoding: [0xc5,0xfc,0x11,0x54,0x24,0xd8] |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop # encoding: [0x90] |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vpdpwusds {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload |
| ; CHECK-NEXT: # encoding: [0xc4,0xe2,0x75,0xd3,0x44,0x24,0xd8] |
| ; CHECK-NEXT: retq # encoding: [0xc3] |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %ret = call <8 x i32> @llvm.x86.avx2.vpdpwusds.256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) |
| ret <8 x i32> %ret |
| } |
| |
| define <4 x i32> @test_int_x86_avx2_vpdpwuud_128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) { |
| ; CHECK-LABEL: test_int_x86_avx2_vpdpwuud_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: # encoding: [0xc5,0xf8,0x29,0x54,0x24,0xe8] |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop # encoding: [0x90] |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vpdpwuud {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload |
| ; CHECK-NEXT: # encoding: [0xc4,0xe2,0x70,0xd2,0x44,0x24,0xe8] |
| ; CHECK-NEXT: retq # encoding: [0xc3] |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %ret = call <4 x i32> @llvm.x86.avx2.vpdpwuud.128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) |
| ret <4 x i32> %ret |
| } |
| |
| define <4 x i32> @test_int_x86_avx2_vpdpwuud_128_commuted(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) { |
| ; CHECK-LABEL: test_int_x86_avx2_vpdpwuud_128_commuted: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: # encoding: [0xc5,0xf8,0x29,0x54,0x24,0xe8] |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop # encoding: [0x90] |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vpdpwuud {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload |
| ; CHECK-NEXT: # encoding: [0xc4,0xe2,0x70,0xd2,0x44,0x24,0xe8] |
| ; CHECK-NEXT: retq # encoding: [0xc3] |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %ret = call <4 x i32> @llvm.x86.avx2.vpdpwuud.128(<4 x i32> %A, <4 x i32> %C, <4 x i32> %B) |
| ret <4 x i32> %ret |
| } |
| |
| define <8 x i32> @test_int_x86_avx2_vpdpwuud_256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) { |
| ; CHECK-LABEL: test_int_x86_avx2_vpdpwuud_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: # encoding: [0xc5,0xfc,0x11,0x54,0x24,0xd8] |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop # encoding: [0x90] |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vpdpwuud {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload |
| ; CHECK-NEXT: # encoding: [0xc4,0xe2,0x74,0xd2,0x44,0x24,0xd8] |
| ; CHECK-NEXT: retq # encoding: [0xc3] |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %ret = call <8 x i32> @llvm.x86.avx2.vpdpwuud.256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) |
| ret <8 x i32> %ret |
| } |
| |
| define <8 x i32> @test_int_x86_avx2_vpdpwuud_256_commuted(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) { |
| ; CHECK-LABEL: test_int_x86_avx2_vpdpwuud_256_commuted: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: # encoding: [0xc5,0xfc,0x11,0x54,0x24,0xd8] |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop # encoding: [0x90] |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vpdpwuud {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload |
| ; CHECK-NEXT: # encoding: [0xc4,0xe2,0x74,0xd2,0x44,0x24,0xd8] |
| ; CHECK-NEXT: retq # encoding: [0xc3] |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %ret = call <8 x i32> @llvm.x86.avx2.vpdpwuud.256(<8 x i32> %A, <8 x i32> %C, <8 x i32> %B) |
| ret <8 x i32> %ret |
| } |
| |
| define <4 x i32> @test_int_x86_avx2_vpdpwuuds_128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) { |
| ; CHECK-LABEL: test_int_x86_avx2_vpdpwuuds_128: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: # encoding: [0xc5,0xf8,0x29,0x54,0x24,0xe8] |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop # encoding: [0x90] |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vpdpwuuds {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload |
| ; CHECK-NEXT: # encoding: [0xc4,0xe2,0x70,0xd3,0x44,0x24,0xe8] |
| ; CHECK-NEXT: retq # encoding: [0xc3] |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %ret = call <4 x i32> @llvm.x86.avx2.vpdpwuuds.128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) |
| ret <4 x i32> %ret |
| } |
| |
| define <4 x i32> @test_int_x86_avx2_vpdpwuuds_128_commuted(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) { |
| ; CHECK-LABEL: test_int_x86_avx2_vpdpwuuds_128_commuted: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| ; CHECK-NEXT: # encoding: [0xc5,0xf8,0x29,0x54,0x24,0xe8] |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop # encoding: [0x90] |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vpdpwuuds {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload |
| ; CHECK-NEXT: # encoding: [0xc4,0xe2,0x70,0xd3,0x44,0x24,0xe8] |
| ; CHECK-NEXT: retq # encoding: [0xc3] |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %ret = call <4 x i32> @llvm.x86.avx2.vpdpwuuds.128(<4 x i32> %A, <4 x i32> %C, <4 x i32> %B) |
| ret <4 x i32> %ret |
| } |
| |
| define <8 x i32> @test_int_x86_avx2_vpdpwuuds_256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) { |
| ; CHECK-LABEL: test_int_x86_avx2_vpdpwuuds_256: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: # encoding: [0xc5,0xfc,0x11,0x54,0x24,0xd8] |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop # encoding: [0x90] |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vpdpwuuds {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload |
| ; CHECK-NEXT: # encoding: [0xc4,0xe2,0x74,0xd3,0x44,0x24,0xd8] |
| ; CHECK-NEXT: retq # encoding: [0xc3] |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %ret = call <8 x i32> @llvm.x86.avx2.vpdpwuuds.256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) |
| ret <8 x i32> %ret |
| } |
| |
| define <8 x i32> @test_int_x86_avx2_vpdpwuuds_256_commuted(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) { |
| ; CHECK-LABEL: test_int_x86_avx2_vpdpwuuds_256_commuted: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill |
| ; CHECK-NEXT: # encoding: [0xc5,0xfc,0x11,0x54,0x24,0xd8] |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: nop # encoding: [0x90] |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vpdpwuuds {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload |
| ; CHECK-NEXT: # encoding: [0xc4,0xe2,0x74,0xd3,0x44,0x24,0xd8] |
| ; CHECK-NEXT: retq # encoding: [0xc3] |
| %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() |
| %ret = call <8 x i32> @llvm.x86.avx2.vpdpwuuds.256(<8 x i32> %A, <8 x i32> %C, <8 x i32> %B) |
| ret <8 x i32> %ret |
| } |