| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s |
| ; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s |
| ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE |
| |
| |
| target triple = "aarch64-unknown-linux-gnu" |
| |
| define <2 x i64> @masked_gather_v2i64(ptr %a, ptr %b) vscale_range(2, 2) { |
| ; CHECK-LABEL: masked_gather_v2i64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: sub sp, sp, #16 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: ptrue p0.d, vl2 |
| ; CHECK-NEXT: ldr q0, [x0] |
| ; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, #0 |
| ; CHECK-NEXT: index z0.d, #1, #1 |
| ; CHECK-NEXT: mov z1.d, p1/z, #-1 // =0xffffffffffffffff |
| ; CHECK-NEXT: and z0.d, z1.d, z0.d |
| ; CHECK-NEXT: ldr q1, [x1] |
| ; CHECK-NEXT: uaddv d0, p0, z0.d |
| ; CHECK-NEXT: ptrue p0.d |
| ; CHECK-NEXT: fmov x8, d0 |
| ; CHECK-NEXT: strb w8, [sp, #12] |
| ; CHECK-NEXT: and w8, w8, #0xff |
| ; CHECK-NEXT: tbz w8, #0, .LBB0_2 |
| ; CHECK-NEXT: // %bb.1: // %cond.load |
| ; CHECK-NEXT: fmov x9, d1 |
| ; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x9] |
| ; CHECK-NEXT: tbnz w8, #1, .LBB0_3 |
| ; CHECK-NEXT: b .LBB0_4 |
| ; CHECK-NEXT: .LBB0_2: |
| ; CHECK-NEXT: adrp x9, .LCPI0_0 |
| ; CHECK-NEXT: ldr q0, [x9, :lo12:.LCPI0_0] |
| ; CHECK-NEXT: tbz w8, #1, .LBB0_4 |
| ; CHECK-NEXT: .LBB0_3: // %cond.load1 |
| ; CHECK-NEXT: mov w8, #1 // =0x1 |
| ; CHECK-NEXT: index z2.d, #0, #1 |
| ; CHECK-NEXT: mov z1.d, z1.d[1] |
| ; CHECK-NEXT: mov z3.d, x8 |
| ; CHECK-NEXT: fmov x8, d1 |
| ; CHECK-NEXT: cmpeq p0.d, p0/z, z2.d, z3.d |
| ; CHECK-NEXT: ldr x8, [x8] |
| ; CHECK-NEXT: mov z0.d, p0/m, x8 |
| ; CHECK-NEXT: .LBB0_4: // %else2 |
| ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 |
| ; CHECK-NEXT: add sp, sp, #16 |
| ; CHECK-NEXT: ret |
| ; |
| ; NONEON-NOSVE-LABEL: masked_gather_v2i64: |
| ; NONEON-NOSVE: // %bb.0: |
| ; NONEON-NOSVE-NEXT: sub sp, sp, #144 |
| ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 144 |
| ; NONEON-NOSVE-NEXT: ldr q0, [x0] |
| ; NONEON-NOSVE-NEXT: ldr q1, [x1] |
| ; NONEON-NOSVE-NEXT: mov w8, #2 // =0x2 |
| ; NONEON-NOSVE-NEXT: str q0, [sp, #112] |
| ; NONEON-NOSVE-NEXT: ldp x10, x9, [sp, #112] |
| ; NONEON-NOSVE-NEXT: cmp x9, #0 |
| ; NONEON-NOSVE-NEXT: csel x8, x8, xzr, eq |
| ; NONEON-NOSVE-NEXT: cmp x10, #0 |
| ; NONEON-NOSVE-NEXT: csetm x9, eq |
| ; NONEON-NOSVE-NEXT: sub w8, w8, w9 |
| ; NONEON-NOSVE-NEXT: strb w8, [sp, #140] |
| ; NONEON-NOSVE-NEXT: and w8, w8, #0xff |
| ; NONEON-NOSVE-NEXT: tbz w8, #0, .LBB0_2 |
| ; NONEON-NOSVE-NEXT: // %bb.1: // %cond.load |
| ; NONEON-NOSVE-NEXT: str q1, [sp, #96] |
| ; NONEON-NOSVE-NEXT: ldr x9, [sp, #96] |
| ; NONEON-NOSVE-NEXT: ldr x9, [x9] |
| ; NONEON-NOSVE-NEXT: str x9, [sp, #80] |
| ; NONEON-NOSVE-NEXT: ldr q0, [sp, #80] |
| ; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB0_3 |
| ; NONEON-NOSVE-NEXT: b .LBB0_4 |
| ; NONEON-NOSVE-NEXT: .LBB0_2: |
| ; NONEON-NOSVE-NEXT: adrp x9, .LCPI0_0 |
| ; NONEON-NOSVE-NEXT: ldr q0, [x9, :lo12:.LCPI0_0] |
| ; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB0_4 |
| ; NONEON-NOSVE-NEXT: .LBB0_3: // %cond.load1 |
| ; NONEON-NOSVE-NEXT: str q1, [sp, #64] |
| ; NONEON-NOSVE-NEXT: ldr x8, [sp, #72] |
| ; NONEON-NOSVE-NEXT: ldr x8, [x8] |
| ; NONEON-NOSVE-NEXT: str q0, [sp] |
| ; NONEON-NOSVE-NEXT: ldr x9, [sp] |
| ; NONEON-NOSVE-NEXT: str x8, [sp, #48] |
| ; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] |
| ; NONEON-NOSVE-NEXT: str q0, [sp, #16] |
| ; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] |
| ; NONEON-NOSVE-NEXT: stp x9, x8, [sp, #32] |
| ; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] |
| ; NONEON-NOSVE-NEXT: .LBB0_4: // %else2 |
| ; NONEON-NOSVE-NEXT: add sp, sp, #144 |
| ; NONEON-NOSVE-NEXT: ret |
| %vals = load <2 x i64>, ptr %a |
| %ptrs = load <2 x ptr>, ptr %b |
| %mask = icmp eq <2 x i64> %vals, zeroinitializer |
| %res = call <2 x i64> @llvm.masked.gather.v2i64(<2 x ptr> %ptrs, i32 8, <2 x i1> %mask, <2 x i64> poison) |
| ret <2 x i64> %res |
| } |
| |
| define void @masked_scatter_v2i64(ptr %a, ptr %b) vscale_range(2, 2) { |
| ; CHECK-LABEL: masked_scatter_v2i64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: sub sp, sp, #16 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: ptrue p0.d, vl2 |
| ; CHECK-NEXT: ldr q0, [x0] |
| ; CHECK-NEXT: index z1.d, #1, #1 |
| ; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, #0 |
| ; CHECK-NEXT: mov z2.d, p1/z, #-1 // =0xffffffffffffffff |
| ; CHECK-NEXT: and z1.d, z2.d, z1.d |
| ; CHECK-NEXT: uaddv d1, p0, z1.d |
| ; CHECK-NEXT: fmov x8, d1 |
| ; CHECK-NEXT: ldr q1, [x1] |
| ; CHECK-NEXT: strb w8, [sp, #12] |
| ; CHECK-NEXT: and w8, w8, #0xff |
| ; CHECK-NEXT: tbnz w8, #0, .LBB1_3 |
| ; CHECK-NEXT: // %bb.1: // %else |
| ; CHECK-NEXT: tbnz w8, #1, .LBB1_4 |
| ; CHECK-NEXT: .LBB1_2: // %else2 |
| ; CHECK-NEXT: add sp, sp, #16 |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .LBB1_3: // %cond.store |
| ; CHECK-NEXT: fmov x9, d0 |
| ; CHECK-NEXT: fmov x10, d1 |
| ; CHECK-NEXT: str x9, [x10] |
| ; CHECK-NEXT: tbz w8, #1, .LBB1_2 |
| ; CHECK-NEXT: .LBB1_4: // %cond.store1 |
| ; CHECK-NEXT: mov z0.d, z0.d[1] |
| ; CHECK-NEXT: mov z1.d, z1.d[1] |
| ; CHECK-NEXT: fmov x8, d0 |
| ; CHECK-NEXT: fmov x9, d1 |
| ; CHECK-NEXT: str x8, [x9] |
| ; CHECK-NEXT: add sp, sp, #16 |
| ; CHECK-NEXT: ret |
| ; |
| ; NONEON-NOSVE-LABEL: masked_scatter_v2i64: |
| ; NONEON-NOSVE: // %bb.0: |
| ; NONEON-NOSVE-NEXT: sub sp, sp, #96 |
| ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 |
| ; NONEON-NOSVE-NEXT: ldr q1, [x0] |
| ; NONEON-NOSVE-NEXT: ldr q0, [x1] |
| ; NONEON-NOSVE-NEXT: mov w8, #2 // =0x2 |
| ; NONEON-NOSVE-NEXT: str q1, [sp, #64] |
| ; NONEON-NOSVE-NEXT: ldp x10, x9, [sp, #64] |
| ; NONEON-NOSVE-NEXT: cmp x9, #0 |
| ; NONEON-NOSVE-NEXT: csel x8, x8, xzr, eq |
| ; NONEON-NOSVE-NEXT: cmp x10, #0 |
| ; NONEON-NOSVE-NEXT: csetm x9, eq |
| ; NONEON-NOSVE-NEXT: sub w8, w8, w9 |
| ; NONEON-NOSVE-NEXT: strb w8, [sp, #92] |
| ; NONEON-NOSVE-NEXT: and w8, w8, #0xff |
| ; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB1_3 |
| ; NONEON-NOSVE-NEXT: // %bb.1: // %else |
| ; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB1_4 |
| ; NONEON-NOSVE-NEXT: .LBB1_2: // %else2 |
| ; NONEON-NOSVE-NEXT: add sp, sp, #96 |
| ; NONEON-NOSVE-NEXT: ret |
| ; NONEON-NOSVE-NEXT: .LBB1_3: // %cond.store |
| ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] |
| ; NONEON-NOSVE-NEXT: ldr x9, [sp, #32] |
| ; NONEON-NOSVE-NEXT: ldr x10, [sp, #48] |
| ; NONEON-NOSVE-NEXT: str x9, [x10] |
| ; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB1_2 |
| ; NONEON-NOSVE-NEXT: .LBB1_4: // %cond.store1 |
| ; NONEON-NOSVE-NEXT: stp q1, q0, [sp] |
| ; NONEON-NOSVE-NEXT: ldr x8, [sp, #8] |
| ; NONEON-NOSVE-NEXT: ldr x9, [sp, #24] |
| ; NONEON-NOSVE-NEXT: str x8, [x9] |
| ; NONEON-NOSVE-NEXT: add sp, sp, #96 |
| ; NONEON-NOSVE-NEXT: ret |
| %vals = load <2 x i64>, ptr %a |
| %ptrs = load <2 x ptr>, ptr %b |
| %mask = icmp eq <2 x i64> %vals, zeroinitializer |
| call void @llvm.masked.scatter.v2i64(<2 x i64> %vals, <2 x ptr> %ptrs, i32 8, <2 x i1> %mask) |
| ret void |
| } |
| |
| declare void @llvm.masked.scatter.v2i64(<2 x i64>, <2 x ptr>, i32, <2 x i1>) |
| declare <2 x i64> @llvm.masked.gather.v2i64(<2 x ptr>, i32, <2 x i1>, <2 x i64>) |