| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s |
| ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE |
| |
| |
| target triple = "aarch64-unknown-linux-gnu" |
| |
| ; |
| ; Masked Store |
| ; |
| |
| define void @masked_store_v4i8(ptr %dst, <4 x i1> %mask) { |
| ; CHECK-LABEL: masked_store_v4i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 |
| ; CHECK-NEXT: ptrue p0.h, vl4 |
| ; CHECK-NEXT: lsl z0.h, z0.h, #15 |
| ; CHECK-NEXT: asr z0.h, z0.h, #15 |
| ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 |
| ; CHECK-NEXT: mov z0.h, #0 // =0x0 |
| ; CHECK-NEXT: st1b { z0.h }, p0, [x0] |
| ; CHECK-NEXT: ret |
| ; |
| ; NONEON-NOSVE-LABEL: masked_store_v4i8: |
| ; NONEON-NOSVE: // %bb.0: |
| ; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! |
| ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 |
| ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] |
| ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] |
| ; NONEON-NOSVE-NEXT: ldrh w10, [sp, #6] |
| ; NONEON-NOSVE-NEXT: ldrh w11, [sp] |
| ; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 |
| ; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 |
| ; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1 |
| ; NONEON-NOSVE-NEXT: and w8, w8, #0x2 |
| ; NONEON-NOSVE-NEXT: and w9, w9, #0x4 |
| ; NONEON-NOSVE-NEXT: and w10, w10, #0x8 |
| ; NONEON-NOSVE-NEXT: bfxil w8, w11, #0, #1 |
| ; NONEON-NOSVE-NEXT: orr w9, w9, w10 |
| ; NONEON-NOSVE-NEXT: orr w8, w8, w9 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB0_5 |
| ; NONEON-NOSVE-NEXT: // %bb.1: // %else |
| ; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB0_6 |
| ; NONEON-NOSVE-NEXT: .LBB0_2: // %else2 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB0_7 |
| ; NONEON-NOSVE-NEXT: .LBB0_3: // %else4 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB0_8 |
| ; NONEON-NOSVE-NEXT: .LBB0_4: // %else6 |
| ; NONEON-NOSVE-NEXT: add sp, sp, #16 |
| ; NONEON-NOSVE-NEXT: ret |
| ; NONEON-NOSVE-NEXT: .LBB0_5: // %cond.store |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0] |
| ; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB0_2 |
| ; NONEON-NOSVE-NEXT: .LBB0_6: // %cond.store1 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #1] |
| ; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB0_3 |
| ; NONEON-NOSVE-NEXT: .LBB0_7: // %cond.store3 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #2] |
| ; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB0_4 |
| ; NONEON-NOSVE-NEXT: .LBB0_8: // %cond.store5 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #3] |
| ; NONEON-NOSVE-NEXT: add sp, sp, #16 |
| ; NONEON-NOSVE-NEXT: ret |
| call void @llvm.masked.store.v4i8(<4 x i8> zeroinitializer, ptr %dst, i32 8, <4 x i1> %mask) |
| ret void |
| } |
| |
| define void @masked_store_v8i8(ptr %dst, <8 x i1> %mask) { |
| ; CHECK-LABEL: masked_store_v8i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 |
| ; CHECK-NEXT: ptrue p0.b, vl8 |
| ; CHECK-NEXT: lsl z0.b, z0.b, #7 |
| ; CHECK-NEXT: asr z0.b, z0.b, #7 |
| ; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0 |
| ; CHECK-NEXT: mov z0.b, #0 // =0x0 |
| ; CHECK-NEXT: st1b { z0.b }, p0, [x0] |
| ; CHECK-NEXT: ret |
| ; |
| ; NONEON-NOSVE-LABEL: masked_store_v8i8: |
| ; NONEON-NOSVE: // %bb.0: |
| ; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! |
| ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 |
| ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] |
| ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] |
| ; NONEON-NOSVE-NEXT: ldrb w10, [sp, #1] |
| ; NONEON-NOSVE-NEXT: ldrb w12, [sp, #4] |
| ; NONEON-NOSVE-NEXT: ldrb w13, [sp, #5] |
| ; NONEON-NOSVE-NEXT: ldrb w14, [sp, #6] |
| ; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 |
| ; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 |
| ; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1 |
| ; NONEON-NOSVE-NEXT: ldrb w11, [sp] |
| ; NONEON-NOSVE-NEXT: sbfx w12, w12, #0, #1 |
| ; NONEON-NOSVE-NEXT: sbfx w13, w13, #0, #1 |
| ; NONEON-NOSVE-NEXT: and w8, w8, #0x4 |
| ; NONEON-NOSVE-NEXT: and w9, w9, #0x8 |
| ; NONEON-NOSVE-NEXT: sbfx w14, w14, #0, #1 |
| ; NONEON-NOSVE-NEXT: orr w8, w8, w9 |
| ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] |
| ; NONEON-NOSVE-NEXT: and w10, w10, #0x2 |
| ; NONEON-NOSVE-NEXT: and w12, w12, #0x10 |
| ; NONEON-NOSVE-NEXT: bfxil w10, w11, #0, #1 |
| ; NONEON-NOSVE-NEXT: and w11, w13, #0x20 |
| ; NONEON-NOSVE-NEXT: orr w8, w8, w12 |
| ; NONEON-NOSVE-NEXT: and w12, w14, #0x40 |
| ; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 |
| ; NONEON-NOSVE-NEXT: orr w8, w10, w8 |
| ; NONEON-NOSVE-NEXT: orr w10, w11, w12 |
| ; NONEON-NOSVE-NEXT: orr w8, w8, w10 |
| ; NONEON-NOSVE-NEXT: and w9, w9, #0x80 |
| ; NONEON-NOSVE-NEXT: add w9, w8, w9 |
| ; NONEON-NOSVE-NEXT: and w8, w9, #0xff |
| ; NONEON-NOSVE-NEXT: tbnz w9, #0, .LBB1_9 |
| ; NONEON-NOSVE-NEXT: // %bb.1: // %else |
| ; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB1_10 |
| ; NONEON-NOSVE-NEXT: .LBB1_2: // %else2 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB1_11 |
| ; NONEON-NOSVE-NEXT: .LBB1_3: // %else4 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB1_12 |
| ; NONEON-NOSVE-NEXT: .LBB1_4: // %else6 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB1_13 |
| ; NONEON-NOSVE-NEXT: .LBB1_5: // %else8 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB1_14 |
| ; NONEON-NOSVE-NEXT: .LBB1_6: // %else10 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB1_15 |
| ; NONEON-NOSVE-NEXT: .LBB1_7: // %else12 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB1_16 |
| ; NONEON-NOSVE-NEXT: .LBB1_8: // %else14 |
| ; NONEON-NOSVE-NEXT: add sp, sp, #16 |
| ; NONEON-NOSVE-NEXT: ret |
| ; NONEON-NOSVE-NEXT: .LBB1_9: // %cond.store |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0] |
| ; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB1_2 |
| ; NONEON-NOSVE-NEXT: .LBB1_10: // %cond.store1 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #1] |
| ; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB1_3 |
| ; NONEON-NOSVE-NEXT: .LBB1_11: // %cond.store3 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #2] |
| ; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB1_4 |
| ; NONEON-NOSVE-NEXT: .LBB1_12: // %cond.store5 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #3] |
| ; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB1_5 |
| ; NONEON-NOSVE-NEXT: .LBB1_13: // %cond.store7 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #4] |
| ; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB1_6 |
| ; NONEON-NOSVE-NEXT: .LBB1_14: // %cond.store9 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #5] |
| ; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB1_7 |
| ; NONEON-NOSVE-NEXT: .LBB1_15: // %cond.store11 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #6] |
| ; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB1_8 |
| ; NONEON-NOSVE-NEXT: .LBB1_16: // %cond.store13 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #7] |
| ; NONEON-NOSVE-NEXT: add sp, sp, #16 |
| ; NONEON-NOSVE-NEXT: ret |
| call void @llvm.masked.store.v8i8(<8 x i8> zeroinitializer, ptr %dst, i32 8, <8 x i1> %mask) |
| ret void |
| } |
| |
| define void @masked_store_v16i8(ptr %dst, <16 x i1> %mask) { |
| ; CHECK-LABEL: masked_store_v16i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 |
| ; CHECK-NEXT: ptrue p0.b, vl16 |
| ; CHECK-NEXT: lsl z0.b, z0.b, #7 |
| ; CHECK-NEXT: asr z0.b, z0.b, #7 |
| ; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0 |
| ; CHECK-NEXT: mov z0.b, #0 // =0x0 |
| ; CHECK-NEXT: st1b { z0.b }, p0, [x0] |
| ; CHECK-NEXT: ret |
| ; |
| ; NONEON-NOSVE-LABEL: masked_store_v16i8: |
| ; NONEON-NOSVE: // %bb.0: |
| ; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! |
| ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 |
| ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] |
| ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] |
| ; NONEON-NOSVE-NEXT: ldrb w10, [sp, #1] |
| ; NONEON-NOSVE-NEXT: ldrb w12, [sp, #4] |
| ; NONEON-NOSVE-NEXT: ldrb w13, [sp, #5] |
| ; NONEON-NOSVE-NEXT: ldrb w14, [sp, #6] |
| ; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 |
| ; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 |
| ; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1 |
| ; NONEON-NOSVE-NEXT: ldrb w11, [sp] |
| ; NONEON-NOSVE-NEXT: sbfx w12, w12, #0, #1 |
| ; NONEON-NOSVE-NEXT: sbfx w13, w13, #0, #1 |
| ; NONEON-NOSVE-NEXT: and w8, w8, #0x4 |
| ; NONEON-NOSVE-NEXT: and w9, w9, #0x8 |
| ; NONEON-NOSVE-NEXT: sbfx w14, w14, #0, #1 |
| ; NONEON-NOSVE-NEXT: orr w8, w8, w9 |
| ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] |
| ; NONEON-NOSVE-NEXT: and w10, w10, #0x2 |
| ; NONEON-NOSVE-NEXT: and w12, w12, #0x10 |
| ; NONEON-NOSVE-NEXT: bfxil w10, w11, #0, #1 |
| ; NONEON-NOSVE-NEXT: and w11, w13, #0x20 |
| ; NONEON-NOSVE-NEXT: orr w8, w8, w12 |
| ; NONEON-NOSVE-NEXT: and w12, w14, #0x40 |
| ; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 |
| ; NONEON-NOSVE-NEXT: orr w8, w10, w8 |
| ; NONEON-NOSVE-NEXT: orr w10, w11, w12 |
| ; NONEON-NOSVE-NEXT: orr w8, w8, w10 |
| ; NONEON-NOSVE-NEXT: and w9, w9, #0xffffff80 |
| ; NONEON-NOSVE-NEXT: add w8, w8, w9 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB2_17 |
| ; NONEON-NOSVE-NEXT: // %bb.1: // %else |
| ; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB2_18 |
| ; NONEON-NOSVE-NEXT: .LBB2_2: // %else2 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB2_19 |
| ; NONEON-NOSVE-NEXT: .LBB2_3: // %else4 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB2_20 |
| ; NONEON-NOSVE-NEXT: .LBB2_4: // %else6 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB2_21 |
| ; NONEON-NOSVE-NEXT: .LBB2_5: // %else8 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB2_22 |
| ; NONEON-NOSVE-NEXT: .LBB2_6: // %else10 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB2_23 |
| ; NONEON-NOSVE-NEXT: .LBB2_7: // %else12 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB2_24 |
| ; NONEON-NOSVE-NEXT: .LBB2_8: // %else14 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #8, .LBB2_25 |
| ; NONEON-NOSVE-NEXT: .LBB2_9: // %else16 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #9, .LBB2_26 |
| ; NONEON-NOSVE-NEXT: .LBB2_10: // %else18 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #10, .LBB2_27 |
| ; NONEON-NOSVE-NEXT: .LBB2_11: // %else20 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #11, .LBB2_28 |
| ; NONEON-NOSVE-NEXT: .LBB2_12: // %else22 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #12, .LBB2_29 |
| ; NONEON-NOSVE-NEXT: .LBB2_13: // %else24 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #13, .LBB2_30 |
| ; NONEON-NOSVE-NEXT: .LBB2_14: // %else26 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #14, .LBB2_31 |
| ; NONEON-NOSVE-NEXT: .LBB2_15: // %else28 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #15, .LBB2_32 |
| ; NONEON-NOSVE-NEXT: .LBB2_16: // %else30 |
| ; NONEON-NOSVE-NEXT: add sp, sp, #16 |
| ; NONEON-NOSVE-NEXT: ret |
| ; NONEON-NOSVE-NEXT: .LBB2_17: // %cond.store |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0] |
| ; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB2_2 |
| ; NONEON-NOSVE-NEXT: .LBB2_18: // %cond.store1 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #1] |
| ; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB2_3 |
| ; NONEON-NOSVE-NEXT: .LBB2_19: // %cond.store3 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #2] |
| ; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB2_4 |
| ; NONEON-NOSVE-NEXT: .LBB2_20: // %cond.store5 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #3] |
| ; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB2_5 |
| ; NONEON-NOSVE-NEXT: .LBB2_21: // %cond.store7 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #4] |
| ; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB2_6 |
| ; NONEON-NOSVE-NEXT: .LBB2_22: // %cond.store9 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #5] |
| ; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB2_7 |
| ; NONEON-NOSVE-NEXT: .LBB2_23: // %cond.store11 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #6] |
| ; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB2_8 |
| ; NONEON-NOSVE-NEXT: .LBB2_24: // %cond.store13 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #7] |
| ; NONEON-NOSVE-NEXT: tbz w8, #8, .LBB2_9 |
| ; NONEON-NOSVE-NEXT: .LBB2_25: // %cond.store15 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #8] |
| ; NONEON-NOSVE-NEXT: tbz w8, #9, .LBB2_10 |
| ; NONEON-NOSVE-NEXT: .LBB2_26: // %cond.store17 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #9] |
| ; NONEON-NOSVE-NEXT: tbz w8, #10, .LBB2_11 |
| ; NONEON-NOSVE-NEXT: .LBB2_27: // %cond.store19 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #10] |
| ; NONEON-NOSVE-NEXT: tbz w8, #11, .LBB2_12 |
| ; NONEON-NOSVE-NEXT: .LBB2_28: // %cond.store21 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #11] |
| ; NONEON-NOSVE-NEXT: tbz w8, #12, .LBB2_13 |
| ; NONEON-NOSVE-NEXT: .LBB2_29: // %cond.store23 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #12] |
| ; NONEON-NOSVE-NEXT: tbz w8, #13, .LBB2_14 |
| ; NONEON-NOSVE-NEXT: .LBB2_30: // %cond.store25 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #13] |
| ; NONEON-NOSVE-NEXT: tbz w8, #14, .LBB2_15 |
| ; NONEON-NOSVE-NEXT: .LBB2_31: // %cond.store27 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #14] |
| ; NONEON-NOSVE-NEXT: tbz w8, #15, .LBB2_16 |
| ; NONEON-NOSVE-NEXT: .LBB2_32: // %cond.store29 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #15] |
| ; NONEON-NOSVE-NEXT: add sp, sp, #16 |
| ; NONEON-NOSVE-NEXT: ret |
| call void @llvm.masked.store.v16i8(<16 x i8> zeroinitializer, ptr %dst, i32 8, <16 x i1> %mask) |
| ret void |
| } |
| |
| define void @masked_store_v32i8(ptr %dst, <32 x i1> %mask) { |
| ; CHECK-LABEL: masked_store_v32i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: sub sp, sp, #32 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 32 |
| ; CHECK-NEXT: ldr w8, [sp, #96] |
| ; CHECK-NEXT: ldr w9, [sp, #88] |
| ; CHECK-NEXT: ptrue p0.b, vl16 |
| ; CHECK-NEXT: ldr w10, [sp, #120] |
| ; CHECK-NEXT: strb w7, [sp, #6] |
| ; CHECK-NEXT: strb w8, [sp, #15] |
| ; CHECK-NEXT: ldr w8, [sp, #80] |
| ; CHECK-NEXT: strb w9, [sp, #14] |
| ; CHECK-NEXT: ldr w9, [sp, #72] |
| ; CHECK-NEXT: strb w8, [sp, #13] |
| ; CHECK-NEXT: ldr w8, [sp, #64] |
| ; CHECK-NEXT: strb w9, [sp, #12] |
| ; CHECK-NEXT: ldr w9, [sp, #56] |
| ; CHECK-NEXT: strb w8, [sp, #11] |
| ; CHECK-NEXT: ldr w8, [sp, #48] |
| ; CHECK-NEXT: strb w9, [sp, #10] |
| ; CHECK-NEXT: ldr w9, [sp, #40] |
| ; CHECK-NEXT: strb w8, [sp, #9] |
| ; CHECK-NEXT: ldr w8, [sp, #32] |
| ; CHECK-NEXT: strb w9, [sp, #8] |
| ; CHECK-NEXT: ldr w9, [sp, #216] |
| ; CHECK-NEXT: strb w8, [sp, #7] |
| ; CHECK-NEXT: ldr w8, [sp, #224] |
| ; CHECK-NEXT: strb w9, [sp, #30] |
| ; CHECK-NEXT: ldr w9, [sp, #200] |
| ; CHECK-NEXT: strb w8, [sp, #31] |
| ; CHECK-NEXT: ldr w8, [sp, #208] |
| ; CHECK-NEXT: strb w9, [sp, #28] |
| ; CHECK-NEXT: ldr w9, [sp, #184] |
| ; CHECK-NEXT: strb w8, [sp, #29] |
| ; CHECK-NEXT: ldr w8, [sp, #192] |
| ; CHECK-NEXT: strb w9, [sp, #26] |
| ; CHECK-NEXT: ldr w9, [sp, #168] |
| ; CHECK-NEXT: strb w8, [sp, #27] |
| ; CHECK-NEXT: ldr w8, [sp, #176] |
| ; CHECK-NEXT: strb w9, [sp, #24] |
| ; CHECK-NEXT: ldr w9, [sp, #152] |
| ; CHECK-NEXT: strb w8, [sp, #25] |
| ; CHECK-NEXT: ldr w8, [sp, #160] |
| ; CHECK-NEXT: strb w9, [sp, #22] |
| ; CHECK-NEXT: ldr w9, [sp, #136] |
| ; CHECK-NEXT: strb w8, [sp, #23] |
| ; CHECK-NEXT: ldr w8, [sp, #144] |
| ; CHECK-NEXT: strb w9, [sp, #20] |
| ; CHECK-NEXT: ldr w9, [sp, #112] |
| ; CHECK-NEXT: strb w8, [sp, #21] |
| ; CHECK-NEXT: ldr w8, [sp, #128] |
| ; CHECK-NEXT: strb w6, [sp, #5] |
| ; CHECK-NEXT: strb w8, [sp, #19] |
| ; CHECK-NEXT: ldr w8, [sp, #104] |
| ; CHECK-NEXT: strb w5, [sp, #4] |
| ; CHECK-NEXT: strb w4, [sp, #3] |
| ; CHECK-NEXT: strb w3, [sp, #2] |
| ; CHECK-NEXT: strb w2, [sp, #1] |
| ; CHECK-NEXT: strb w1, [sp] |
| ; CHECK-NEXT: strb w10, [sp, #18] |
| ; CHECK-NEXT: strb w9, [sp, #17] |
| ; CHECK-NEXT: strb w8, [sp, #16] |
| ; CHECK-NEXT: mov w8, #16 // =0x10 |
| ; CHECK-NEXT: ldp q1, q0, [sp] |
| ; CHECK-NEXT: lsl z0.b, z0.b, #7 |
| ; CHECK-NEXT: lsl z1.b, z1.b, #7 |
| ; CHECK-NEXT: asr z0.b, z0.b, #7 |
| ; CHECK-NEXT: asr z1.b, z1.b, #7 |
| ; CHECK-NEXT: cmpne p1.b, p0/z, z0.b, #0 |
| ; CHECK-NEXT: cmpne p0.b, p0/z, z1.b, #0 |
| ; CHECK-NEXT: mov z0.b, #0 // =0x0 |
| ; CHECK-NEXT: st1b { z0.b }, p1, [x0, x8] |
| ; CHECK-NEXT: st1b { z0.b }, p0, [x0] |
| ; CHECK-NEXT: add sp, sp, #32 |
| ; CHECK-NEXT: ret |
| ; |
| ; NONEON-NOSVE-LABEL: masked_store_v32i8: |
| ; NONEON-NOSVE: // %bb.0: |
| ; NONEON-NOSVE-NEXT: ldr w8, [sp, #80] |
| ; NONEON-NOSVE-NEXT: ldr w9, [sp, #88] |
| ; NONEON-NOSVE-NEXT: sbfx w15, w7, #0, #1 |
| ; NONEON-NOSVE-NEXT: ldr w10, [sp, #96] |
| ; NONEON-NOSVE-NEXT: ldr w12, [sp, #104] |
| ; NONEON-NOSVE-NEXT: ldr w11, [sp, #72] |
| ; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 |
| ; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 |
| ; NONEON-NOSVE-NEXT: ldr w13, [sp, #120] |
| ; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1 |
| ; NONEON-NOSVE-NEXT: sbfx w12, w12, #0, #1 |
| ; NONEON-NOSVE-NEXT: ldr w14, [sp, #128] |
| ; NONEON-NOSVE-NEXT: and w8, w8, #0x2 |
| ; NONEON-NOSVE-NEXT: and w9, w9, #0x4 |
| ; NONEON-NOSVE-NEXT: ldr w16, [sp] |
| ; NONEON-NOSVE-NEXT: bfxil w8, w11, #0, #1 |
| ; NONEON-NOSVE-NEXT: ldr w11, [sp, #112] |
| ; NONEON-NOSVE-NEXT: and w10, w10, #0x8 |
| ; NONEON-NOSVE-NEXT: orr w9, w9, w10 |
| ; NONEON-NOSVE-NEXT: and w10, w12, #0x10 |
| ; NONEON-NOSVE-NEXT: sbfx w12, w4, #0, #1 |
| ; NONEON-NOSVE-NEXT: sbfx w11, w11, #0, #1 |
| ; NONEON-NOSVE-NEXT: orr w9, w9, w10 |
| ; NONEON-NOSVE-NEXT: sbfx w10, w13, #0, #1 |
| ; NONEON-NOSVE-NEXT: orr w8, w8, w9 |
| ; NONEON-NOSVE-NEXT: sbfx w13, w5, #0, #1 |
| ; NONEON-NOSVE-NEXT: and w12, w12, #0x8 |
| ; NONEON-NOSVE-NEXT: and w9, w11, #0x20 |
| ; NONEON-NOSVE-NEXT: and w10, w10, #0x40 |
| ; NONEON-NOSVE-NEXT: sbfx w11, w3, #0, #1 |
| ; NONEON-NOSVE-NEXT: orr w9, w9, w10 |
| ; NONEON-NOSVE-NEXT: sbfx w10, w2, #0, #1 |
| ; NONEON-NOSVE-NEXT: sbfx w14, w14, #0, #1 |
| ; NONEON-NOSVE-NEXT: and w11, w11, #0x4 |
| ; NONEON-NOSVE-NEXT: orr w8, w8, w9 |
| ; NONEON-NOSVE-NEXT: and w10, w10, #0x2 |
| ; NONEON-NOSVE-NEXT: orr w11, w11, w12 |
| ; NONEON-NOSVE-NEXT: and w12, w13, #0x10 |
| ; NONEON-NOSVE-NEXT: sbfx w13, w6, #0, #1 |
| ; NONEON-NOSVE-NEXT: bfxil w10, w1, #0, #1 |
| ; NONEON-NOSVE-NEXT: orr w11, w11, w12 |
| ; NONEON-NOSVE-NEXT: and w12, w13, #0x20 |
| ; NONEON-NOSVE-NEXT: and w13, w15, #0x40 |
| ; NONEON-NOSVE-NEXT: sbfx w15, w16, #0, #1 |
| ; NONEON-NOSVE-NEXT: orr w9, w10, w11 |
| ; NONEON-NOSVE-NEXT: orr w10, w12, w13 |
| ; NONEON-NOSVE-NEXT: and w11, w14, #0xff80 |
| ; NONEON-NOSVE-NEXT: orr w9, w9, w10 |
| ; NONEON-NOSVE-NEXT: and w10, w15, #0xff80 |
| ; NONEON-NOSVE-NEXT: add w11, w8, w11 |
| ; NONEON-NOSVE-NEXT: add w8, w9, w10 |
| ; NONEON-NOSVE-NEXT: bfi w8, w11, #16, #16 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB3_33 |
| ; NONEON-NOSVE-NEXT: // %bb.1: // %else |
| ; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB3_34 |
| ; NONEON-NOSVE-NEXT: .LBB3_2: // %else2 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB3_35 |
| ; NONEON-NOSVE-NEXT: .LBB3_3: // %else4 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB3_36 |
| ; NONEON-NOSVE-NEXT: .LBB3_4: // %else6 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB3_37 |
| ; NONEON-NOSVE-NEXT: .LBB3_5: // %else8 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB3_38 |
| ; NONEON-NOSVE-NEXT: .LBB3_6: // %else10 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB3_39 |
| ; NONEON-NOSVE-NEXT: .LBB3_7: // %else12 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB3_40 |
| ; NONEON-NOSVE-NEXT: .LBB3_8: // %else14 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #8, .LBB3_41 |
| ; NONEON-NOSVE-NEXT: .LBB3_9: // %else16 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #9, .LBB3_42 |
| ; NONEON-NOSVE-NEXT: .LBB3_10: // %else18 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #10, .LBB3_43 |
| ; NONEON-NOSVE-NEXT: .LBB3_11: // %else20 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #11, .LBB3_44 |
| ; NONEON-NOSVE-NEXT: .LBB3_12: // %else22 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #12, .LBB3_45 |
| ; NONEON-NOSVE-NEXT: .LBB3_13: // %else24 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #13, .LBB3_46 |
| ; NONEON-NOSVE-NEXT: .LBB3_14: // %else26 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #14, .LBB3_47 |
| ; NONEON-NOSVE-NEXT: .LBB3_15: // %else28 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #15, .LBB3_48 |
| ; NONEON-NOSVE-NEXT: .LBB3_16: // %else30 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #16, .LBB3_49 |
| ; NONEON-NOSVE-NEXT: .LBB3_17: // %else32 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #17, .LBB3_50 |
| ; NONEON-NOSVE-NEXT: .LBB3_18: // %else34 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #18, .LBB3_51 |
| ; NONEON-NOSVE-NEXT: .LBB3_19: // %else36 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #19, .LBB3_52 |
| ; NONEON-NOSVE-NEXT: .LBB3_20: // %else38 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #20, .LBB3_53 |
| ; NONEON-NOSVE-NEXT: .LBB3_21: // %else40 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #21, .LBB3_54 |
| ; NONEON-NOSVE-NEXT: .LBB3_22: // %else42 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #22, .LBB3_55 |
| ; NONEON-NOSVE-NEXT: .LBB3_23: // %else44 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #23, .LBB3_56 |
| ; NONEON-NOSVE-NEXT: .LBB3_24: // %else46 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #24, .LBB3_57 |
| ; NONEON-NOSVE-NEXT: .LBB3_25: // %else48 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #25, .LBB3_58 |
| ; NONEON-NOSVE-NEXT: .LBB3_26: // %else50 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #26, .LBB3_59 |
| ; NONEON-NOSVE-NEXT: .LBB3_27: // %else52 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #27, .LBB3_60 |
| ; NONEON-NOSVE-NEXT: .LBB3_28: // %else54 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #28, .LBB3_61 |
| ; NONEON-NOSVE-NEXT: .LBB3_29: // %else56 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #29, .LBB3_62 |
| ; NONEON-NOSVE-NEXT: .LBB3_30: // %else58 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #30, .LBB3_63 |
| ; NONEON-NOSVE-NEXT: .LBB3_31: // %else60 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #31, .LBB3_64 |
| ; NONEON-NOSVE-NEXT: .LBB3_32: // %else62 |
| ; NONEON-NOSVE-NEXT: ret |
| ; NONEON-NOSVE-NEXT: .LBB3_33: // %cond.store |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0] |
| ; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB3_2 |
| ; NONEON-NOSVE-NEXT: .LBB3_34: // %cond.store1 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #1] |
| ; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB3_3 |
| ; NONEON-NOSVE-NEXT: .LBB3_35: // %cond.store3 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #2] |
| ; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB3_4 |
| ; NONEON-NOSVE-NEXT: .LBB3_36: // %cond.store5 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #3] |
| ; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB3_5 |
| ; NONEON-NOSVE-NEXT: .LBB3_37: // %cond.store7 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #4] |
| ; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB3_6 |
| ; NONEON-NOSVE-NEXT: .LBB3_38: // %cond.store9 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #5] |
| ; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB3_7 |
| ; NONEON-NOSVE-NEXT: .LBB3_39: // %cond.store11 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #6] |
| ; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB3_8 |
| ; NONEON-NOSVE-NEXT: .LBB3_40: // %cond.store13 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #7] |
| ; NONEON-NOSVE-NEXT: tbz w8, #8, .LBB3_9 |
| ; NONEON-NOSVE-NEXT: .LBB3_41: // %cond.store15 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #8] |
| ; NONEON-NOSVE-NEXT: tbz w8, #9, .LBB3_10 |
| ; NONEON-NOSVE-NEXT: .LBB3_42: // %cond.store17 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #9] |
| ; NONEON-NOSVE-NEXT: tbz w8, #10, .LBB3_11 |
| ; NONEON-NOSVE-NEXT: .LBB3_43: // %cond.store19 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #10] |
| ; NONEON-NOSVE-NEXT: tbz w8, #11, .LBB3_12 |
| ; NONEON-NOSVE-NEXT: .LBB3_44: // %cond.store21 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #11] |
| ; NONEON-NOSVE-NEXT: tbz w8, #12, .LBB3_13 |
| ; NONEON-NOSVE-NEXT: .LBB3_45: // %cond.store23 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #12] |
| ; NONEON-NOSVE-NEXT: tbz w8, #13, .LBB3_14 |
| ; NONEON-NOSVE-NEXT: .LBB3_46: // %cond.store25 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #13] |
| ; NONEON-NOSVE-NEXT: tbz w8, #14, .LBB3_15 |
| ; NONEON-NOSVE-NEXT: .LBB3_47: // %cond.store27 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #14] |
| ; NONEON-NOSVE-NEXT: tbz w8, #15, .LBB3_16 |
| ; NONEON-NOSVE-NEXT: .LBB3_48: // %cond.store29 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #15] |
| ; NONEON-NOSVE-NEXT: tbz w8, #16, .LBB3_17 |
| ; NONEON-NOSVE-NEXT: .LBB3_49: // %cond.store31 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #16] |
| ; NONEON-NOSVE-NEXT: tbz w8, #17, .LBB3_18 |
| ; NONEON-NOSVE-NEXT: .LBB3_50: // %cond.store33 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #17] |
| ; NONEON-NOSVE-NEXT: tbz w8, #18, .LBB3_19 |
| ; NONEON-NOSVE-NEXT: .LBB3_51: // %cond.store35 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #18] |
| ; NONEON-NOSVE-NEXT: tbz w8, #19, .LBB3_20 |
| ; NONEON-NOSVE-NEXT: .LBB3_52: // %cond.store37 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #19] |
| ; NONEON-NOSVE-NEXT: tbz w8, #20, .LBB3_21 |
| ; NONEON-NOSVE-NEXT: .LBB3_53: // %cond.store39 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #20] |
| ; NONEON-NOSVE-NEXT: tbz w8, #21, .LBB3_22 |
| ; NONEON-NOSVE-NEXT: .LBB3_54: // %cond.store41 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #21] |
| ; NONEON-NOSVE-NEXT: tbz w8, #22, .LBB3_23 |
| ; NONEON-NOSVE-NEXT: .LBB3_55: // %cond.store43 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #22] |
| ; NONEON-NOSVE-NEXT: tbz w8, #23, .LBB3_24 |
| ; NONEON-NOSVE-NEXT: .LBB3_56: // %cond.store45 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #23] |
| ; NONEON-NOSVE-NEXT: tbz w8, #24, .LBB3_25 |
| ; NONEON-NOSVE-NEXT: .LBB3_57: // %cond.store47 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #24] |
| ; NONEON-NOSVE-NEXT: tbz w8, #25, .LBB3_26 |
| ; NONEON-NOSVE-NEXT: .LBB3_58: // %cond.store49 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #25] |
| ; NONEON-NOSVE-NEXT: tbz w8, #26, .LBB3_27 |
| ; NONEON-NOSVE-NEXT: .LBB3_59: // %cond.store51 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #26] |
| ; NONEON-NOSVE-NEXT: tbz w8, #27, .LBB3_28 |
| ; NONEON-NOSVE-NEXT: .LBB3_60: // %cond.store53 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #27] |
| ; NONEON-NOSVE-NEXT: tbz w8, #28, .LBB3_29 |
| ; NONEON-NOSVE-NEXT: .LBB3_61: // %cond.store55 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #28] |
| ; NONEON-NOSVE-NEXT: tbz w8, #29, .LBB3_30 |
| ; NONEON-NOSVE-NEXT: .LBB3_62: // %cond.store57 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #29] |
| ; NONEON-NOSVE-NEXT: tbz w8, #30, .LBB3_31 |
| ; NONEON-NOSVE-NEXT: .LBB3_63: // %cond.store59 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #30] |
| ; NONEON-NOSVE-NEXT: tbz w8, #31, .LBB3_32 |
| ; NONEON-NOSVE-NEXT: .LBB3_64: // %cond.store61 |
| ; NONEON-NOSVE-NEXT: strb wzr, [x0, #31] |
| ; NONEON-NOSVE-NEXT: ret |
| call void @llvm.masked.store.v32i8(<32 x i8> zeroinitializer, ptr %dst, i32 8, <32 x i1> %mask) |
| ret void |
| } |
| |
| define void @masked_store_v2f16(ptr %dst, <2 x i1> %mask) { |
| ; CHECK-LABEL: masked_store_v2f16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: sub sp, sp, #16 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 |
| ; CHECK-NEXT: mov z1.s, z0.s[1] |
| ; CHECK-NEXT: fmov w8, s0 |
| ; CHECK-NEXT: str wzr, [sp, #12] |
| ; CHECK-NEXT: ptrue p0.h, vl4 |
| ; CHECK-NEXT: strh w8, [sp, #8] |
| ; CHECK-NEXT: fmov w8, s1 |
| ; CHECK-NEXT: strh w8, [sp, #10] |
| ; CHECK-NEXT: ldr d0, [sp, #8] |
| ; CHECK-NEXT: lsl z0.h, z0.h, #15 |
| ; CHECK-NEXT: asr z0.h, z0.h, #15 |
| ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 |
| ; CHECK-NEXT: mov z0.h, #0 // =0x0 |
| ; CHECK-NEXT: st1h { z0.h }, p0, [x0] |
| ; CHECK-NEXT: add sp, sp, #16 |
| ; CHECK-NEXT: ret |
| ; |
| ; NONEON-NOSVE-LABEL: masked_store_v2f16: |
| ; NONEON-NOSVE: // %bb.0: |
| ; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! |
| ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 |
| ; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] |
| ; NONEON-NOSVE-NEXT: ldrb w9, [sp] |
| ; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 |
| ; NONEON-NOSVE-NEXT: and w8, w8, #0x2 |
| ; NONEON-NOSVE-NEXT: bfxil w8, w9, #0, #1 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB4_3 |
| ; NONEON-NOSVE-NEXT: // %bb.1: // %else |
| ; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB4_4 |
| ; NONEON-NOSVE-NEXT: .LBB4_2: // %else2 |
| ; NONEON-NOSVE-NEXT: add sp, sp, #16 |
| ; NONEON-NOSVE-NEXT: ret |
| ; NONEON-NOSVE-NEXT: .LBB4_3: // %cond.store |
| ; NONEON-NOSVE-NEXT: fmov s0, wzr |
| ; NONEON-NOSVE-NEXT: str h0, [x0] |
| ; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB4_2 |
| ; NONEON-NOSVE-NEXT: .LBB4_4: // %cond.store1 |
| ; NONEON-NOSVE-NEXT: fmov s0, wzr |
| ; NONEON-NOSVE-NEXT: str h0, [x0, #2] |
| ; NONEON-NOSVE-NEXT: add sp, sp, #16 |
| ; NONEON-NOSVE-NEXT: ret |
| call void @llvm.masked.store.v2f16(<2 x half> zeroinitializer, ptr %dst, i32 8, <2 x i1> %mask) |
| ret void |
| } |
| |
| define void @masked_store_v4f16(ptr %dst, <4 x i1> %mask) { |
| ; CHECK-LABEL: masked_store_v4f16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 |
| ; CHECK-NEXT: ptrue p0.h, vl4 |
| ; CHECK-NEXT: lsl z0.h, z0.h, #15 |
| ; CHECK-NEXT: asr z0.h, z0.h, #15 |
| ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 |
| ; CHECK-NEXT: mov z0.h, #0 // =0x0 |
| ; CHECK-NEXT: st1h { z0.h }, p0, [x0] |
| ; CHECK-NEXT: ret |
| ; |
| ; NONEON-NOSVE-LABEL: masked_store_v4f16: |
| ; NONEON-NOSVE: // %bb.0: |
| ; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! |
| ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 |
| ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] |
| ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] |
| ; NONEON-NOSVE-NEXT: ldrh w10, [sp, #6] |
| ; NONEON-NOSVE-NEXT: ldrh w11, [sp] |
| ; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 |
| ; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 |
| ; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1 |
| ; NONEON-NOSVE-NEXT: and w8, w8, #0x2 |
| ; NONEON-NOSVE-NEXT: and w9, w9, #0x4 |
| ; NONEON-NOSVE-NEXT: and w10, w10, #0x8 |
| ; NONEON-NOSVE-NEXT: bfxil w8, w11, #0, #1 |
| ; NONEON-NOSVE-NEXT: orr w9, w9, w10 |
| ; NONEON-NOSVE-NEXT: orr w8, w8, w9 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB5_5 |
| ; NONEON-NOSVE-NEXT: // %bb.1: // %else |
| ; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB5_6 |
| ; NONEON-NOSVE-NEXT: .LBB5_2: // %else2 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB5_7 |
| ; NONEON-NOSVE-NEXT: .LBB5_3: // %else4 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB5_8 |
| ; NONEON-NOSVE-NEXT: .LBB5_4: // %else6 |
| ; NONEON-NOSVE-NEXT: add sp, sp, #16 |
| ; NONEON-NOSVE-NEXT: ret |
| ; NONEON-NOSVE-NEXT: .LBB5_5: // %cond.store |
| ; NONEON-NOSVE-NEXT: fmov s0, wzr |
| ; NONEON-NOSVE-NEXT: str h0, [x0] |
| ; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB5_2 |
| ; NONEON-NOSVE-NEXT: .LBB5_6: // %cond.store1 |
| ; NONEON-NOSVE-NEXT: fmov s0, wzr |
| ; NONEON-NOSVE-NEXT: str h0, [x0, #2] |
| ; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB5_3 |
| ; NONEON-NOSVE-NEXT: .LBB5_7: // %cond.store3 |
| ; NONEON-NOSVE-NEXT: fmov s0, wzr |
| ; NONEON-NOSVE-NEXT: str h0, [x0, #4] |
| ; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB5_4 |
| ; NONEON-NOSVE-NEXT: .LBB5_8: // %cond.store5 |
| ; NONEON-NOSVE-NEXT: fmov s0, wzr |
| ; NONEON-NOSVE-NEXT: str h0, [x0, #6] |
| ; NONEON-NOSVE-NEXT: add sp, sp, #16 |
| ; NONEON-NOSVE-NEXT: ret |
| call void @llvm.masked.store.v4f16(<4 x half> zeroinitializer, ptr %dst, i32 8, <4 x i1> %mask) |
| ret void |
| } |
| |
| define void @masked_store_v8f16(ptr %dst, <8 x i1> %mask) { |
| ; CHECK-LABEL: masked_store_v8f16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 |
| ; CHECK-NEXT: ptrue p0.h, vl8 |
| ; CHECK-NEXT: uunpklo z0.h, z0.b |
| ; CHECK-NEXT: lsl z0.h, z0.h, #15 |
| ; CHECK-NEXT: asr z0.h, z0.h, #15 |
| ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 |
| ; CHECK-NEXT: mov z0.h, #0 // =0x0 |
| ; CHECK-NEXT: st1h { z0.h }, p0, [x0] |
| ; CHECK-NEXT: ret |
| ; |
| ; NONEON-NOSVE-LABEL: masked_store_v8f16: |
| ; NONEON-NOSVE: // %bb.0: |
| ; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! |
| ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 |
| ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] |
| ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] |
| ; NONEON-NOSVE-NEXT: ldrb w10, [sp, #1] |
| ; NONEON-NOSVE-NEXT: ldrb w12, [sp, #4] |
| ; NONEON-NOSVE-NEXT: ldrb w13, [sp, #5] |
| ; NONEON-NOSVE-NEXT: ldrb w14, [sp, #6] |
| ; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 |
| ; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 |
| ; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1 |
| ; NONEON-NOSVE-NEXT: ldrb w11, [sp] |
| ; NONEON-NOSVE-NEXT: sbfx w12, w12, #0, #1 |
| ; NONEON-NOSVE-NEXT: sbfx w13, w13, #0, #1 |
| ; NONEON-NOSVE-NEXT: and w8, w8, #0x4 |
| ; NONEON-NOSVE-NEXT: and w9, w9, #0x8 |
| ; NONEON-NOSVE-NEXT: sbfx w14, w14, #0, #1 |
| ; NONEON-NOSVE-NEXT: orr w8, w8, w9 |
| ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] |
| ; NONEON-NOSVE-NEXT: and w10, w10, #0x2 |
| ; NONEON-NOSVE-NEXT: and w12, w12, #0x10 |
| ; NONEON-NOSVE-NEXT: bfxil w10, w11, #0, #1 |
| ; NONEON-NOSVE-NEXT: and w11, w13, #0x20 |
| ; NONEON-NOSVE-NEXT: orr w8, w8, w12 |
| ; NONEON-NOSVE-NEXT: and w12, w14, #0x40 |
| ; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 |
| ; NONEON-NOSVE-NEXT: orr w8, w10, w8 |
| ; NONEON-NOSVE-NEXT: orr w10, w11, w12 |
| ; NONEON-NOSVE-NEXT: orr w8, w8, w10 |
| ; NONEON-NOSVE-NEXT: and w9, w9, #0x80 |
| ; NONEON-NOSVE-NEXT: add w9, w8, w9 |
| ; NONEON-NOSVE-NEXT: and w8, w9, #0xff |
| ; NONEON-NOSVE-NEXT: tbnz w9, #0, .LBB6_9 |
| ; NONEON-NOSVE-NEXT: // %bb.1: // %else |
| ; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB6_10 |
| ; NONEON-NOSVE-NEXT: .LBB6_2: // %else2 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB6_11 |
| ; NONEON-NOSVE-NEXT: .LBB6_3: // %else4 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB6_12 |
| ; NONEON-NOSVE-NEXT: .LBB6_4: // %else6 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB6_13 |
| ; NONEON-NOSVE-NEXT: .LBB6_5: // %else8 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB6_14 |
| ; NONEON-NOSVE-NEXT: .LBB6_6: // %else10 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB6_15 |
| ; NONEON-NOSVE-NEXT: .LBB6_7: // %else12 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB6_16 |
| ; NONEON-NOSVE-NEXT: .LBB6_8: // %else14 |
| ; NONEON-NOSVE-NEXT: add sp, sp, #16 |
| ; NONEON-NOSVE-NEXT: ret |
| ; NONEON-NOSVE-NEXT: .LBB6_9: // %cond.store |
| ; NONEON-NOSVE-NEXT: fmov s0, wzr |
| ; NONEON-NOSVE-NEXT: str h0, [x0] |
| ; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB6_2 |
| ; NONEON-NOSVE-NEXT: .LBB6_10: // %cond.store1 |
| ; NONEON-NOSVE-NEXT: fmov s0, wzr |
| ; NONEON-NOSVE-NEXT: str h0, [x0, #2] |
| ; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB6_3 |
| ; NONEON-NOSVE-NEXT: .LBB6_11: // %cond.store3 |
| ; NONEON-NOSVE-NEXT: fmov s0, wzr |
| ; NONEON-NOSVE-NEXT: str h0, [x0, #4] |
| ; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB6_4 |
| ; NONEON-NOSVE-NEXT: .LBB6_12: // %cond.store5 |
| ; NONEON-NOSVE-NEXT: fmov s0, wzr |
| ; NONEON-NOSVE-NEXT: str h0, [x0, #6] |
| ; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB6_5 |
| ; NONEON-NOSVE-NEXT: .LBB6_13: // %cond.store7 |
| ; NONEON-NOSVE-NEXT: fmov s0, wzr |
| ; NONEON-NOSVE-NEXT: str h0, [x0, #8] |
| ; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB6_6 |
| ; NONEON-NOSVE-NEXT: .LBB6_14: // %cond.store9 |
| ; NONEON-NOSVE-NEXT: fmov s0, wzr |
| ; NONEON-NOSVE-NEXT: str h0, [x0, #10] |
| ; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB6_7 |
| ; NONEON-NOSVE-NEXT: .LBB6_15: // %cond.store11 |
| ; NONEON-NOSVE-NEXT: fmov s0, wzr |
| ; NONEON-NOSVE-NEXT: str h0, [x0, #12] |
| ; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB6_8 |
| ; NONEON-NOSVE-NEXT: .LBB6_16: // %cond.store13 |
| ; NONEON-NOSVE-NEXT: fmov s0, wzr |
| ; NONEON-NOSVE-NEXT: str h0, [x0, #14] |
| ; NONEON-NOSVE-NEXT: add sp, sp, #16 |
| ; NONEON-NOSVE-NEXT: ret |
| call void @llvm.masked.store.v8f16(<8 x half> zeroinitializer, ptr %dst, i32 8, <8 x i1> %mask) |
| ret void |
| } |
| |
| define void @masked_store_v16f16(ptr %dst, <16 x i1> %mask) { |
| ; CHECK-LABEL: masked_store_v16f16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 |
| ; CHECK-NEXT: uunpklo z1.h, z0.b |
| ; CHECK-NEXT: ptrue p0.h, vl8 |
| ; CHECK-NEXT: mov x8, #8 // =0x8 |
| ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 |
| ; CHECK-NEXT: uunpklo z0.h, z0.b |
| ; CHECK-NEXT: lsl z1.h, z1.h, #15 |
| ; CHECK-NEXT: asr z1.h, z1.h, #15 |
| ; CHECK-NEXT: lsl z0.h, z0.h, #15 |
| ; CHECK-NEXT: asr z0.h, z0.h, #15 |
| ; CHECK-NEXT: cmpne p1.h, p0/z, z0.h, #0 |
| ; CHECK-NEXT: cmpne p0.h, p0/z, z1.h, #0 |
| ; CHECK-NEXT: mov z0.h, #0 // =0x0 |
| ; CHECK-NEXT: st1h { z0.h }, p1, [x0, x8, lsl #1] |
| ; CHECK-NEXT: st1h { z0.h }, p0, [x0] |
| ; CHECK-NEXT: ret |
| ; |
| ; NONEON-NOSVE-LABEL: masked_store_v16f16: |
| ; NONEON-NOSVE: // %bb.0: |
| ; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! |
| ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 |
| ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] |
| ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] |
| ; NONEON-NOSVE-NEXT: ldrb w10, [sp, #1] |
| ; NONEON-NOSVE-NEXT: ldrb w12, [sp, #4] |
| ; NONEON-NOSVE-NEXT: ldrb w13, [sp, #5] |
| ; NONEON-NOSVE-NEXT: ldrb w14, [sp, #6] |
| ; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 |
| ; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 |
| ; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1 |
| ; NONEON-NOSVE-NEXT: ldrb w11, [sp] |
| ; NONEON-NOSVE-NEXT: sbfx w12, w12, #0, #1 |
| ; NONEON-NOSVE-NEXT: sbfx w13, w13, #0, #1 |
| ; NONEON-NOSVE-NEXT: and w8, w8, #0x4 |
| ; NONEON-NOSVE-NEXT: and w9, w9, #0x8 |
| ; NONEON-NOSVE-NEXT: sbfx w14, w14, #0, #1 |
| ; NONEON-NOSVE-NEXT: orr w8, w8, w9 |
| ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] |
| ; NONEON-NOSVE-NEXT: and w10, w10, #0x2 |
| ; NONEON-NOSVE-NEXT: and w12, w12, #0x10 |
| ; NONEON-NOSVE-NEXT: bfxil w10, w11, #0, #1 |
| ; NONEON-NOSVE-NEXT: and w11, w13, #0x20 |
| ; NONEON-NOSVE-NEXT: orr w8, w8, w12 |
| ; NONEON-NOSVE-NEXT: and w12, w14, #0x40 |
| ; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 |
| ; NONEON-NOSVE-NEXT: orr w8, w10, w8 |
| ; NONEON-NOSVE-NEXT: orr w10, w11, w12 |
| ; NONEON-NOSVE-NEXT: orr w8, w8, w10 |
| ; NONEON-NOSVE-NEXT: and w9, w9, #0xffffff80 |
| ; NONEON-NOSVE-NEXT: add w8, w8, w9 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB7_17 |
| ; NONEON-NOSVE-NEXT: // %bb.1: // %else |
| ; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB7_18 |
| ; NONEON-NOSVE-NEXT: .LBB7_2: // %else2 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB7_19 |
| ; NONEON-NOSVE-NEXT: .LBB7_3: // %else4 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB7_20 |
| ; NONEON-NOSVE-NEXT: .LBB7_4: // %else6 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB7_21 |
| ; NONEON-NOSVE-NEXT: .LBB7_5: // %else8 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB7_22 |
| ; NONEON-NOSVE-NEXT: .LBB7_6: // %else10 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB7_23 |
| ; NONEON-NOSVE-NEXT: .LBB7_7: // %else12 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB7_24 |
| ; NONEON-NOSVE-NEXT: .LBB7_8: // %else14 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #8, .LBB7_25 |
| ; NONEON-NOSVE-NEXT: .LBB7_9: // %else16 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #9, .LBB7_26 |
| ; NONEON-NOSVE-NEXT: .LBB7_10: // %else18 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #10, .LBB7_27 |
| ; NONEON-NOSVE-NEXT: .LBB7_11: // %else20 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #11, .LBB7_28 |
| ; NONEON-NOSVE-NEXT: .LBB7_12: // %else22 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #12, .LBB7_29 |
| ; NONEON-NOSVE-NEXT: .LBB7_13: // %else24 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #13, .LBB7_30 |
| ; NONEON-NOSVE-NEXT: .LBB7_14: // %else26 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #14, .LBB7_31 |
| ; NONEON-NOSVE-NEXT: .LBB7_15: // %else28 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #15, .LBB7_32 |
| ; NONEON-NOSVE-NEXT: .LBB7_16: // %else30 |
| ; NONEON-NOSVE-NEXT: add sp, sp, #16 |
| ; NONEON-NOSVE-NEXT: ret |
| ; NONEON-NOSVE-NEXT: .LBB7_17: // %cond.store |
| ; NONEON-NOSVE-NEXT: fmov s0, wzr |
| ; NONEON-NOSVE-NEXT: str h0, [x0] |
| ; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB7_2 |
| ; NONEON-NOSVE-NEXT: .LBB7_18: // %cond.store1 |
| ; NONEON-NOSVE-NEXT: fmov s0, wzr |
| ; NONEON-NOSVE-NEXT: str h0, [x0, #2] |
| ; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB7_3 |
| ; NONEON-NOSVE-NEXT: .LBB7_19: // %cond.store3 |
| ; NONEON-NOSVE-NEXT: fmov s0, wzr |
| ; NONEON-NOSVE-NEXT: str h0, [x0, #4] |
| ; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB7_4 |
| ; NONEON-NOSVE-NEXT: .LBB7_20: // %cond.store5 |
| ; NONEON-NOSVE-NEXT: fmov s0, wzr |
| ; NONEON-NOSVE-NEXT: str h0, [x0, #6] |
| ; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB7_5 |
| ; NONEON-NOSVE-NEXT: .LBB7_21: // %cond.store7 |
| ; NONEON-NOSVE-NEXT: fmov s0, wzr |
| ; NONEON-NOSVE-NEXT: str h0, [x0, #8] |
| ; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB7_6 |
| ; NONEON-NOSVE-NEXT: .LBB7_22: // %cond.store9 |
| ; NONEON-NOSVE-NEXT: fmov s0, wzr |
| ; NONEON-NOSVE-NEXT: str h0, [x0, #10] |
| ; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB7_7 |
| ; NONEON-NOSVE-NEXT: .LBB7_23: // %cond.store11 |
| ; NONEON-NOSVE-NEXT: fmov s0, wzr |
| ; NONEON-NOSVE-NEXT: str h0, [x0, #12] |
| ; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB7_8 |
| ; NONEON-NOSVE-NEXT: .LBB7_24: // %cond.store13 |
| ; NONEON-NOSVE-NEXT: fmov s0, wzr |
| ; NONEON-NOSVE-NEXT: str h0, [x0, #14] |
| ; NONEON-NOSVE-NEXT: tbz w8, #8, .LBB7_9 |
| ; NONEON-NOSVE-NEXT: .LBB7_25: // %cond.store15 |
| ; NONEON-NOSVE-NEXT: fmov s0, wzr |
| ; NONEON-NOSVE-NEXT: str h0, [x0, #16] |
| ; NONEON-NOSVE-NEXT: tbz w8, #9, .LBB7_10 |
| ; NONEON-NOSVE-NEXT: .LBB7_26: // %cond.store17 |
| ; NONEON-NOSVE-NEXT: fmov s0, wzr |
| ; NONEON-NOSVE-NEXT: str h0, [x0, #18] |
| ; NONEON-NOSVE-NEXT: tbz w8, #10, .LBB7_11 |
| ; NONEON-NOSVE-NEXT: .LBB7_27: // %cond.store19 |
| ; NONEON-NOSVE-NEXT: fmov s0, wzr |
| ; NONEON-NOSVE-NEXT: str h0, [x0, #20] |
| ; NONEON-NOSVE-NEXT: tbz w8, #11, .LBB7_12 |
| ; NONEON-NOSVE-NEXT: .LBB7_28: // %cond.store21 |
| ; NONEON-NOSVE-NEXT: fmov s0, wzr |
| ; NONEON-NOSVE-NEXT: str h0, [x0, #22] |
| ; NONEON-NOSVE-NEXT: tbz w8, #12, .LBB7_13 |
| ; NONEON-NOSVE-NEXT: .LBB7_29: // %cond.store23 |
| ; NONEON-NOSVE-NEXT: fmov s0, wzr |
| ; NONEON-NOSVE-NEXT: str h0, [x0, #24] |
| ; NONEON-NOSVE-NEXT: tbz w8, #13, .LBB7_14 |
| ; NONEON-NOSVE-NEXT: .LBB7_30: // %cond.store25 |
| ; NONEON-NOSVE-NEXT: fmov s0, wzr |
| ; NONEON-NOSVE-NEXT: str h0, [x0, #26] |
| ; NONEON-NOSVE-NEXT: tbz w8, #14, .LBB7_15 |
| ; NONEON-NOSVE-NEXT: .LBB7_31: // %cond.store27 |
| ; NONEON-NOSVE-NEXT: fmov s0, wzr |
| ; NONEON-NOSVE-NEXT: str h0, [x0, #28] |
| ; NONEON-NOSVE-NEXT: tbz w8, #15, .LBB7_16 |
| ; NONEON-NOSVE-NEXT: .LBB7_32: // %cond.store29 |
| ; NONEON-NOSVE-NEXT: fmov s0, wzr |
| ; NONEON-NOSVE-NEXT: str h0, [x0, #30] |
| ; NONEON-NOSVE-NEXT: add sp, sp, #16 |
| ; NONEON-NOSVE-NEXT: ret |
| call void @llvm.masked.store.v16f16(<16 x half> zeroinitializer, ptr %dst, i32 8, <16 x i1> %mask) |
| ret void |
| } |
| |
| define void @masked_store_v4f32(ptr %dst, <4 x i1> %mask) { |
| ; CHECK-LABEL: masked_store_v4f32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 |
| ; CHECK-NEXT: ptrue p0.s, vl4 |
| ; CHECK-NEXT: uunpklo z0.s, z0.h |
| ; CHECK-NEXT: lsl z0.s, z0.s, #31 |
| ; CHECK-NEXT: asr z0.s, z0.s, #31 |
| ; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 |
| ; CHECK-NEXT: mov z0.s, #0 // =0x0 |
| ; CHECK-NEXT: st1w { z0.s }, p0, [x0] |
| ; CHECK-NEXT: ret |
| ; |
| ; NONEON-NOSVE-LABEL: masked_store_v4f32: |
| ; NONEON-NOSVE: // %bb.0: |
| ; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! |
| ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 |
| ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] |
| ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] |
| ; NONEON-NOSVE-NEXT: ldrh w10, [sp, #6] |
| ; NONEON-NOSVE-NEXT: ldrh w11, [sp] |
| ; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 |
| ; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 |
| ; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1 |
| ; NONEON-NOSVE-NEXT: and w8, w8, #0x2 |
| ; NONEON-NOSVE-NEXT: and w9, w9, #0x4 |
| ; NONEON-NOSVE-NEXT: and w10, w10, #0x8 |
| ; NONEON-NOSVE-NEXT: bfxil w8, w11, #0, #1 |
| ; NONEON-NOSVE-NEXT: orr w9, w9, w10 |
| ; NONEON-NOSVE-NEXT: orr w8, w8, w9 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB8_5 |
| ; NONEON-NOSVE-NEXT: // %bb.1: // %else |
| ; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB8_6 |
| ; NONEON-NOSVE-NEXT: .LBB8_2: // %else2 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB8_7 |
| ; NONEON-NOSVE-NEXT: .LBB8_3: // %else4 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB8_8 |
| ; NONEON-NOSVE-NEXT: .LBB8_4: // %else6 |
| ; NONEON-NOSVE-NEXT: add sp, sp, #16 |
| ; NONEON-NOSVE-NEXT: ret |
| ; NONEON-NOSVE-NEXT: .LBB8_5: // %cond.store |
| ; NONEON-NOSVE-NEXT: str wzr, [x0] |
| ; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB8_2 |
| ; NONEON-NOSVE-NEXT: .LBB8_6: // %cond.store1 |
| ; NONEON-NOSVE-NEXT: str wzr, [x0, #4] |
| ; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB8_3 |
| ; NONEON-NOSVE-NEXT: .LBB8_7: // %cond.store3 |
| ; NONEON-NOSVE-NEXT: str wzr, [x0, #8] |
| ; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB8_4 |
| ; NONEON-NOSVE-NEXT: .LBB8_8: // %cond.store5 |
| ; NONEON-NOSVE-NEXT: str wzr, [x0, #12] |
| ; NONEON-NOSVE-NEXT: add sp, sp, #16 |
| ; NONEON-NOSVE-NEXT: ret |
| call void @llvm.masked.store.v4f32(<4 x float> zeroinitializer, ptr %dst, i32 8, <4 x i1> %mask) |
| ret void |
| } |
| |
| define void @masked_store_v8f32(ptr %dst, <8 x i1> %mask) { |
| ; CHECK-LABEL: masked_store_v8f32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: sub sp, sp, #16 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 |
| ; CHECK-NEXT: mov z1.b, z0.b[7] |
| ; CHECK-NEXT: mov z2.b, z0.b[6] |
| ; CHECK-NEXT: mov z3.b, z0.b[5] |
| ; CHECK-NEXT: mov z4.b, z0.b[4] |
| ; CHECK-NEXT: ptrue p0.s, vl4 |
| ; CHECK-NEXT: fmov w8, s1 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[3] |
| ; CHECK-NEXT: strh w8, [sp, #14] |
| ; CHECK-NEXT: fmov w8, s3 |
| ; CHECK-NEXT: mov z3.b, z0.b[2] |
| ; CHECK-NEXT: strh w9, [sp, #12] |
| ; CHECK-NEXT: fmov w9, s4 |
| ; CHECK-NEXT: mov z4.b, z0.b[1] |
| ; CHECK-NEXT: strh w8, [sp, #10] |
| ; CHECK-NEXT: mov x8, #4 // =0x4 |
| ; CHECK-NEXT: strh w9, [sp, #8] |
| ; CHECK-NEXT: fmov w9, s0 |
| ; CHECK-NEXT: ldr d1, [sp, #8] |
| ; CHECK-NEXT: uunpklo z1.s, z1.h |
| ; CHECK-NEXT: lsl z1.s, z1.s, #31 |
| ; CHECK-NEXT: asr z1.s, z1.s, #31 |
| ; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0 |
| ; CHECK-NEXT: mov z1.s, #0 // =0x0 |
| ; CHECK-NEXT: st1w { z1.s }, p1, [x0, x8, lsl #2] |
| ; CHECK-NEXT: fmov w8, s2 |
| ; CHECK-NEXT: strh w9, [sp] |
| ; CHECK-NEXT: strh w8, [sp, #6] |
| ; CHECK-NEXT: fmov w8, s3 |
| ; CHECK-NEXT: strh w8, [sp, #4] |
| ; CHECK-NEXT: fmov w8, s4 |
| ; CHECK-NEXT: strh w8, [sp, #2] |
| ; CHECK-NEXT: ldr d0, [sp] |
| ; CHECK-NEXT: uunpklo z0.s, z0.h |
| ; CHECK-NEXT: lsl z0.s, z0.s, #31 |
| ; CHECK-NEXT: asr z0.s, z0.s, #31 |
| ; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 |
| ; CHECK-NEXT: st1w { z1.s }, p0, [x0] |
| ; CHECK-NEXT: add sp, sp, #16 |
| ; CHECK-NEXT: ret |
| ; |
| ; NONEON-NOSVE-LABEL: masked_store_v8f32: |
| ; NONEON-NOSVE: // %bb.0: |
| ; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! |
| ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 |
| ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] |
| ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] |
| ; NONEON-NOSVE-NEXT: ldrb w10, [sp, #1] |
| ; NONEON-NOSVE-NEXT: ldrb w12, [sp, #4] |
| ; NONEON-NOSVE-NEXT: ldrb w13, [sp, #5] |
| ; NONEON-NOSVE-NEXT: ldrb w14, [sp, #6] |
| ; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 |
| ; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 |
| ; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1 |
| ; NONEON-NOSVE-NEXT: ldrb w11, [sp] |
| ; NONEON-NOSVE-NEXT: sbfx w12, w12, #0, #1 |
| ; NONEON-NOSVE-NEXT: sbfx w13, w13, #0, #1 |
| ; NONEON-NOSVE-NEXT: and w8, w8, #0x4 |
| ; NONEON-NOSVE-NEXT: and w9, w9, #0x8 |
| ; NONEON-NOSVE-NEXT: sbfx w14, w14, #0, #1 |
| ; NONEON-NOSVE-NEXT: orr w8, w8, w9 |
| ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] |
| ; NONEON-NOSVE-NEXT: and w10, w10, #0x2 |
| ; NONEON-NOSVE-NEXT: and w12, w12, #0x10 |
| ; NONEON-NOSVE-NEXT: bfxil w10, w11, #0, #1 |
| ; NONEON-NOSVE-NEXT: and w11, w13, #0x20 |
| ; NONEON-NOSVE-NEXT: orr w8, w8, w12 |
| ; NONEON-NOSVE-NEXT: and w12, w14, #0x40 |
| ; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 |
| ; NONEON-NOSVE-NEXT: orr w8, w10, w8 |
| ; NONEON-NOSVE-NEXT: orr w10, w11, w12 |
| ; NONEON-NOSVE-NEXT: orr w8, w8, w10 |
| ; NONEON-NOSVE-NEXT: and w9, w9, #0x80 |
| ; NONEON-NOSVE-NEXT: add w9, w8, w9 |
| ; NONEON-NOSVE-NEXT: and w8, w9, #0xff |
| ; NONEON-NOSVE-NEXT: tbnz w9, #0, .LBB9_9 |
| ; NONEON-NOSVE-NEXT: // %bb.1: // %else |
| ; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB9_10 |
| ; NONEON-NOSVE-NEXT: .LBB9_2: // %else2 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB9_11 |
| ; NONEON-NOSVE-NEXT: .LBB9_3: // %else4 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB9_12 |
| ; NONEON-NOSVE-NEXT: .LBB9_4: // %else6 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB9_13 |
| ; NONEON-NOSVE-NEXT: .LBB9_5: // %else8 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB9_14 |
| ; NONEON-NOSVE-NEXT: .LBB9_6: // %else10 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB9_15 |
| ; NONEON-NOSVE-NEXT: .LBB9_7: // %else12 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB9_16 |
| ; NONEON-NOSVE-NEXT: .LBB9_8: // %else14 |
| ; NONEON-NOSVE-NEXT: add sp, sp, #16 |
| ; NONEON-NOSVE-NEXT: ret |
| ; NONEON-NOSVE-NEXT: .LBB9_9: // %cond.store |
| ; NONEON-NOSVE-NEXT: str wzr, [x0] |
| ; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB9_2 |
| ; NONEON-NOSVE-NEXT: .LBB9_10: // %cond.store1 |
| ; NONEON-NOSVE-NEXT: str wzr, [x0, #4] |
| ; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB9_3 |
| ; NONEON-NOSVE-NEXT: .LBB9_11: // %cond.store3 |
| ; NONEON-NOSVE-NEXT: str wzr, [x0, #8] |
| ; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB9_4 |
| ; NONEON-NOSVE-NEXT: .LBB9_12: // %cond.store5 |
| ; NONEON-NOSVE-NEXT: str wzr, [x0, #12] |
| ; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB9_5 |
| ; NONEON-NOSVE-NEXT: .LBB9_13: // %cond.store7 |
| ; NONEON-NOSVE-NEXT: str wzr, [x0, #16] |
| ; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB9_6 |
| ; NONEON-NOSVE-NEXT: .LBB9_14: // %cond.store9 |
| ; NONEON-NOSVE-NEXT: str wzr, [x0, #20] |
| ; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB9_7 |
| ; NONEON-NOSVE-NEXT: .LBB9_15: // %cond.store11 |
| ; NONEON-NOSVE-NEXT: str wzr, [x0, #24] |
| ; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB9_8 |
| ; NONEON-NOSVE-NEXT: .LBB9_16: // %cond.store13 |
| ; NONEON-NOSVE-NEXT: str wzr, [x0, #28] |
| ; NONEON-NOSVE-NEXT: add sp, sp, #16 |
| ; NONEON-NOSVE-NEXT: ret |
| call void @llvm.masked.store.v8f32(<8 x float> zeroinitializer, ptr %dst, i32 8, <8 x i1> %mask) |
| ret void |
| } |
| |
| define void @masked_store_v2f64(ptr %dst, <2 x i1> %mask) { |
| ; CHECK-LABEL: masked_store_v2f64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 |
| ; CHECK-NEXT: ptrue p0.d, vl2 |
| ; CHECK-NEXT: uunpklo z0.d, z0.s |
| ; CHECK-NEXT: lsl z0.d, z0.d, #63 |
| ; CHECK-NEXT: asr z0.d, z0.d, #63 |
| ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 |
| ; CHECK-NEXT: mov z0.d, #0 // =0x0 |
| ; CHECK-NEXT: st1d { z0.d }, p0, [x0] |
| ; CHECK-NEXT: ret |
| ; |
| ; NONEON-NOSVE-LABEL: masked_store_v2f64: |
| ; NONEON-NOSVE: // %bb.0: |
| ; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! |
| ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 |
| ; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] |
| ; NONEON-NOSVE-NEXT: ldrb w9, [sp] |
| ; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 |
| ; NONEON-NOSVE-NEXT: and w8, w8, #0x2 |
| ; NONEON-NOSVE-NEXT: bfxil w8, w9, #0, #1 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB10_3 |
| ; NONEON-NOSVE-NEXT: // %bb.1: // %else |
| ; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB10_4 |
| ; NONEON-NOSVE-NEXT: .LBB10_2: // %else2 |
| ; NONEON-NOSVE-NEXT: add sp, sp, #16 |
| ; NONEON-NOSVE-NEXT: ret |
| ; NONEON-NOSVE-NEXT: .LBB10_3: // %cond.store |
| ; NONEON-NOSVE-NEXT: str xzr, [x0] |
| ; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB10_2 |
| ; NONEON-NOSVE-NEXT: .LBB10_4: // %cond.store1 |
| ; NONEON-NOSVE-NEXT: str xzr, [x0, #8] |
| ; NONEON-NOSVE-NEXT: add sp, sp, #16 |
| ; NONEON-NOSVE-NEXT: ret |
| call void @llvm.masked.store.v2f64(<2 x double> zeroinitializer, ptr %dst, i32 8, <2 x i1> %mask) |
| ret void |
| } |
| |
| define void @masked_store_v4f64(ptr %dst, <4 x i1> %mask) { |
| ; CHECK-LABEL: masked_store_v4f64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 |
| ; CHECK-NEXT: ptrue p0.d, vl2 |
| ; CHECK-NEXT: mov x8, #2 // =0x2 |
| ; CHECK-NEXT: uunpklo z0.s, z0.h |
| ; CHECK-NEXT: uunpklo z1.d, z0.s |
| ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 |
| ; CHECK-NEXT: uunpklo z0.d, z0.s |
| ; CHECK-NEXT: lsl z1.d, z1.d, #63 |
| ; CHECK-NEXT: lsl z0.d, z0.d, #63 |
| ; CHECK-NEXT: asr z1.d, z1.d, #63 |
| ; CHECK-NEXT: asr z0.d, z0.d, #63 |
| ; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, #0 |
| ; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 |
| ; CHECK-NEXT: mov z0.d, #0 // =0x0 |
| ; CHECK-NEXT: st1d { z0.d }, p1, [x0, x8, lsl #3] |
| ; CHECK-NEXT: st1d { z0.d }, p0, [x0] |
| ; CHECK-NEXT: ret |
| ; |
| ; NONEON-NOSVE-LABEL: masked_store_v4f64: |
| ; NONEON-NOSVE: // %bb.0: |
| ; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! |
| ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 |
| ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] |
| ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] |
| ; NONEON-NOSVE-NEXT: ldrh w10, [sp, #6] |
| ; NONEON-NOSVE-NEXT: ldrh w11, [sp] |
| ; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 |
| ; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 |
| ; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1 |
| ; NONEON-NOSVE-NEXT: and w8, w8, #0x2 |
| ; NONEON-NOSVE-NEXT: and w9, w9, #0x4 |
| ; NONEON-NOSVE-NEXT: and w10, w10, #0x8 |
| ; NONEON-NOSVE-NEXT: bfxil w8, w11, #0, #1 |
| ; NONEON-NOSVE-NEXT: orr w9, w9, w10 |
| ; NONEON-NOSVE-NEXT: orr w8, w8, w9 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB11_5 |
| ; NONEON-NOSVE-NEXT: // %bb.1: // %else |
| ; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB11_6 |
| ; NONEON-NOSVE-NEXT: .LBB11_2: // %else2 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB11_7 |
| ; NONEON-NOSVE-NEXT: .LBB11_3: // %else4 |
| ; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB11_8 |
| ; NONEON-NOSVE-NEXT: .LBB11_4: // %else6 |
| ; NONEON-NOSVE-NEXT: add sp, sp, #16 |
| ; NONEON-NOSVE-NEXT: ret |
| ; NONEON-NOSVE-NEXT: .LBB11_5: // %cond.store |
| ; NONEON-NOSVE-NEXT: str xzr, [x0] |
| ; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB11_2 |
| ; NONEON-NOSVE-NEXT: .LBB11_6: // %cond.store1 |
| ; NONEON-NOSVE-NEXT: str xzr, [x0, #8] |
| ; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB11_3 |
| ; NONEON-NOSVE-NEXT: .LBB11_7: // %cond.store3 |
| ; NONEON-NOSVE-NEXT: str xzr, [x0, #16] |
| ; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB11_4 |
| ; NONEON-NOSVE-NEXT: .LBB11_8: // %cond.store5 |
| ; NONEON-NOSVE-NEXT: str xzr, [x0, #24] |
| ; NONEON-NOSVE-NEXT: add sp, sp, #16 |
| ; NONEON-NOSVE-NEXT: ret |
| call void @llvm.masked.store.v4f64(<4 x double> zeroinitializer, ptr %dst, i32 8, <4 x i1> %mask) |
| ret void |
| } |
| |
| declare void @llvm.masked.store.v4i8(<4 x i8>, ptr, i32, <4 x i1>) |
| declare void @llvm.masked.store.v8i8(<8 x i8>, ptr, i32, <8 x i1>) |
| declare void @llvm.masked.store.v16i8(<16 x i8>, ptr, i32, <16 x i1>) |
| declare void @llvm.masked.store.v32i8(<32 x i8>, ptr, i32, <32 x i1>) |
| declare void @llvm.masked.store.v2f16(<2 x half>, ptr, i32, <2 x i1>) |
| declare void @llvm.masked.store.v4f16(<4 x half>, ptr, i32, <4 x i1>) |
| declare void @llvm.masked.store.v8f16(<8 x half>, ptr, i32, <8 x i1>) |
| declare void @llvm.masked.store.v16f16(<16 x half>, ptr, i32, <16 x i1>) |
| declare void @llvm.masked.store.v4f32(<4 x float>, ptr, i32, <4 x i1>) |
| declare void @llvm.masked.store.v8f32(<8 x float>, ptr, i32, <8 x i1>) |
| declare void @llvm.masked.store.v2f64(<2 x double>, ptr, i32, <2 x i1>) |
| declare void @llvm.masked.store.v4f64(<4 x double>, ptr, i32, <4 x i1>) |