| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 |
| ; RUN: llc -mtriple=aarch64 -O0 -mattr=+sme < %s | FileCheck %s |
| |
| target triple = "aarch64-linux-gnu" |
| |
| declare void @llvm.trap() #0 |
| |
| ; This test checks that we don't assert/crash due to not being able to reach the |
| ; emergency spill slot by ensuring that we use a BP for streaming functions. |
| |
| define void @quux() #1 { |
| ; CHECK-LABEL: quux: |
| ; CHECK: // %bb.0: // %prelude |
| ; CHECK-NEXT: stp x29, x30, [sp, #-96]! // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill |
| ; CHECK-NEXT: mov x29, sp |
| ; CHECK-NEXT: sub sp, sp, #384 |
| ; CHECK-NEXT: addvl sp, sp, #-1 |
| ; CHECK-NEXT: mov x19, sp |
| ; CHECK-NEXT: .cfi_def_cfa w29, 96 |
| ; CHECK-NEXT: .cfi_offset w19, -8 |
| ; CHECK-NEXT: .cfi_offset w20, -16 |
| ; CHECK-NEXT: .cfi_offset w21, -24 |
| ; CHECK-NEXT: .cfi_offset w22, -32 |
| ; CHECK-NEXT: .cfi_offset w23, -40 |
| ; CHECK-NEXT: .cfi_offset w24, -48 |
| ; CHECK-NEXT: .cfi_offset w25, -56 |
| ; CHECK-NEXT: .cfi_offset w26, -64 |
| ; CHECK-NEXT: .cfi_offset w27, -72 |
| ; CHECK-NEXT: .cfi_offset w28, -80 |
| ; CHECK-NEXT: .cfi_offset w30, -88 |
| ; CHECK-NEXT: .cfi_offset w29, -96 |
| ; CHECK-NEXT: rdsvl x8, #1 |
| ; CHECK-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEXT: cbz x8, .LBB0_2 |
| ; CHECK-NEXT: b .LBB0_1 |
| ; CHECK-NEXT: .LBB0_1: // %save.za |
| ; CHECK-NEXT: bl __arm_tpidr2_save |
| ; CHECK-NEXT: mov x8, xzr |
| ; CHECK-NEXT: msr TPIDR2_EL0, x8 |
| ; CHECK-NEXT: b .LBB0_2 |
| ; CHECK-NEXT: .LBB0_2: // %bb |
| ; CHECK-NEXT: smstart za |
| ; CHECK-NEXT: zero {za} |
| ; CHECK-NEXT: mov w9, #15 // =0xf |
| ; CHECK-NEXT: // implicit-def: $x8 |
| ; CHECK-NEXT: mov w8, w9 |
| ; CHECK-NEXT: mov x9, x8 |
| ; CHECK-NEXT: incd x9 |
| ; CHECK-NEXT: mov w0, w9 |
| ; CHECK-NEXT: // implicit-def: $x9 |
| ; CHECK-NEXT: mov w9, w0 |
| ; CHECK-NEXT: and x14, x9, #0x70 |
| ; CHECK-NEXT: str x14, [x19, #16] // 8-byte Folded Spill |
| ; CHECK-NEXT: mov x9, sp |
| ; CHECK-NEXT: subs x9, x9, x14 |
| ; CHECK-NEXT: mov sp, x9 |
| ; CHECK-NEXT: str x9, [x19, #24] // 8-byte Folded Spill |
| ; CHECK-NEXT: mov x9, sp |
| ; CHECK-NEXT: subs x9, x9, x14 |
| ; CHECK-NEXT: mov sp, x9 |
| ; CHECK-NEXT: str x9, [x19, #32] // 8-byte Folded Spill |
| ; CHECK-NEXT: addvl x9, x8, #1 |
| ; CHECK-NEXT: mov w0, w9 |
| ; CHECK-NEXT: // implicit-def: $x9 |
| ; CHECK-NEXT: mov w9, w0 |
| ; CHECK-NEXT: and x10, x9, #0x3f0 |
| ; CHECK-NEXT: str x10, [x19, #40] // 8-byte Folded Spill |
| ; CHECK-NEXT: mov x9, sp |
| ; CHECK-NEXT: subs x9, x9, x10 |
| ; CHECK-NEXT: mov sp, x9 |
| ; CHECK-NEXT: str x9, [x19, #48] // 8-byte Folded Spill |
| ; CHECK-NEXT: mov x9, sp |
| ; CHECK-NEXT: subs x9, x9, x10 |
| ; CHECK-NEXT: mov sp, x9 |
| ; CHECK-NEXT: str x9, [x19, #56] // 8-byte Folded Spill |
| ; CHECK-NEXT: mov x9, sp |
| ; CHECK-NEXT: subs x9, x9, x14 |
| ; CHECK-NEXT: mov sp, x9 |
| ; CHECK-NEXT: str x9, [x19, #64] // 8-byte Folded Spill |
| ; CHECK-NEXT: mov x9, sp |
| ; CHECK-NEXT: subs x9, x9, x14 |
| ; CHECK-NEXT: mov sp, x9 |
| ; CHECK-NEXT: str x9, [x19, #72] // 8-byte Folded Spill |
| ; CHECK-NEXT: mov x9, sp |
| ; CHECK-NEXT: subs x9, x9, x10 |
| ; CHECK-NEXT: mov sp, x9 |
| ; CHECK-NEXT: str x9, [x19, #80] // 8-byte Folded Spill |
| ; CHECK-NEXT: mov x9, sp |
| ; CHECK-NEXT: subs x9, x9, x10 |
| ; CHECK-NEXT: mov sp, x9 |
| ; CHECK-NEXT: str x9, [x19, #88] // 8-byte Folded Spill |
| ; CHECK-NEXT: mov x9, sp |
| ; CHECK-NEXT: subs x9, x9, x14 |
| ; CHECK-NEXT: mov sp, x9 |
| ; CHECK-NEXT: str x9, [x19, #96] // 8-byte Folded Spill |
| ; CHECK-NEXT: mov x9, sp |
| ; CHECK-NEXT: subs x9, x9, x14 |
| ; CHECK-NEXT: mov sp, x9 |
| ; CHECK-NEXT: str x9, [x19, #104] // 8-byte Folded Spill |
| ; CHECK-NEXT: mov x9, sp |
| ; CHECK-NEXT: subs x9, x9, x10 |
| ; CHECK-NEXT: mov sp, x9 |
| ; CHECK-NEXT: str x9, [x19, #112] // 8-byte Folded Spill |
| ; CHECK-NEXT: mov x9, sp |
| ; CHECK-NEXT: subs x9, x9, x10 |
| ; CHECK-NEXT: mov sp, x9 |
| ; CHECK-NEXT: str x9, [x19, #120] // 8-byte Folded Spill |
| ; CHECK-NEXT: mov x9, sp |
| ; CHECK-NEXT: subs x9, x9, x14 |
| ; CHECK-NEXT: mov sp, x9 |
| ; CHECK-NEXT: str x9, [x19, #128] // 8-byte Folded Spill |
| ; CHECK-NEXT: mov x9, sp |
| ; CHECK-NEXT: subs x9, x9, x14 |
| ; CHECK-NEXT: mov sp, x9 |
| ; CHECK-NEXT: str x9, [x19, #136] // 8-byte Folded Spill |
| ; CHECK-NEXT: mov x9, sp |
| ; CHECK-NEXT: subs x9, x9, x10 |
| ; CHECK-NEXT: mov sp, x9 |
| ; CHECK-NEXT: str x9, [x19, #144] // 8-byte Folded Spill |
| ; CHECK-NEXT: mov x9, sp |
| ; CHECK-NEXT: subs x9, x9, x10 |
| ; CHECK-NEXT: mov sp, x9 |
| ; CHECK-NEXT: str x9, [x19, #152] // 8-byte Folded Spill |
| ; CHECK-NEXT: mov x9, sp |
| ; CHECK-NEXT: subs x9, x9, #16 |
| ; CHECK-NEXT: mov sp, x9 |
| ; CHECK-NEXT: mov x9, sp |
| ; CHECK-NEXT: subs x9, x9, #16 |
| ; CHECK-NEXT: mov sp, x9 |
| ; CHECK-NEXT: mov x9, sp |
| ; CHECK-NEXT: subs x9, x9, #16 |
| ; CHECK-NEXT: mov sp, x9 |
| ; CHECK-NEXT: mov x9, sp |
| ; CHECK-NEXT: subs x9, x9, #16 |
| ; CHECK-NEXT: mov sp, x9 |
| ; CHECK-NEXT: mov x9, sp |
| ; CHECK-NEXT: subs x9, x9, #16 |
| ; CHECK-NEXT: mov sp, x9 |
| ; CHECK-NEXT: mov x9, sp |
| ; CHECK-NEXT: subs x9, x9, x14 |
| ; CHECK-NEXT: mov sp, x9 |
| ; CHECK-NEXT: mov x9, sp |
| ; CHECK-NEXT: subs x9, x9, #16 |
| ; CHECK-NEXT: mov sp, x9 |
| ; CHECK-NEXT: addvl x9, x8, #2 |
| ; CHECK-NEXT: mov w0, w9 |
| ; CHECK-NEXT: // implicit-def: $x9 |
| ; CHECK-NEXT: mov w9, w0 |
| ; CHECK-NEXT: and x9, x9, #0x7f0 |
| ; CHECK-NEXT: mov x10, sp |
| ; CHECK-NEXT: subs x10, x10, x9 |
| ; CHECK-NEXT: and x10, x10, #0xffffffffffffffe0 |
| ; CHECK-NEXT: mov sp, x10 |
| ; CHECK-NEXT: mov x2, sp |
| ; CHECK-NEXT: subs x10, x2, #16 |
| ; CHECK-NEXT: mov sp, x10 |
| ; CHECK-NEXT: str x10, [x19, #160] // 8-byte Folded Spill |
| ; CHECK-NEXT: mov x10, sp |
| ; CHECK-NEXT: subs x11, x10, x14 |
| ; CHECK-NEXT: mov sp, x11 |
| ; CHECK-NEXT: mov x10, x11 |
| ; CHECK-NEXT: str x10, [x19, #168] // 8-byte Folded Spill |
| ; CHECK-NEXT: mov x0, sp |
| ; CHECK-NEXT: subs x10, x0, #16 |
| ; CHECK-NEXT: mov sp, x10 |
| ; CHECK-NEXT: str x10, [x19, #176] // 8-byte Folded Spill |
| ; CHECK-NEXT: mov x17, sp |
| ; CHECK-NEXT: subs x10, x17, #16 |
| ; CHECK-NEXT: mov sp, x10 |
| ; CHECK-NEXT: str x10, [x19, #184] // 8-byte Folded Spill |
| ; CHECK-NEXT: mov x10, sp |
| ; CHECK-NEXT: subs x10, x10, x14 |
| ; CHECK-NEXT: str x10, [x19, #360] // 8-byte Folded Spill |
| ; CHECK-NEXT: mov sp, x10 |
| ; CHECK-NEXT: str x10, [x19, #192] // 8-byte Folded Spill |
| ; CHECK-NEXT: mov x15, sp |
| ; CHECK-NEXT: subs x10, x15, #16 |
| ; CHECK-NEXT: mov sp, x10 |
| ; CHECK-NEXT: str x10, [x19, #200] // 8-byte Folded Spill |
| ; CHECK-NEXT: mov x13, sp |
| ; CHECK-NEXT: subs x10, x13, #16 |
| ; CHECK-NEXT: mov sp, x10 |
| ; CHECK-NEXT: str x10, [x19, #208] // 8-byte Folded Spill |
| ; CHECK-NEXT: incw x8 |
| ; CHECK-NEXT: mov w1, w8 |
| ; CHECK-NEXT: // implicit-def: $x8 |
| ; CHECK-NEXT: mov w8, w1 |
| ; CHECK-NEXT: and x12, x8, #0xf0 |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: subs x10, x8, x12 |
| ; CHECK-NEXT: mov sp, x10 |
| ; CHECK-NEXT: mov x8, x10 |
| ; CHECK-NEXT: str x8, [x19, #216] // 8-byte Folded Spill |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: subs x8, x8, x12 |
| ; CHECK-NEXT: str x8, [x19, #368] // 8-byte Folded Spill |
| ; CHECK-NEXT: mov sp, x8 |
| ; CHECK-NEXT: str x8, [x19, #224] // 8-byte Folded Spill |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: subs x8, x8, x9 |
| ; CHECK-NEXT: and x8, x8, #0xffffffffffffffe0 |
| ; CHECK-NEXT: mov sp, x8 |
| ; CHECK-NEXT: str x8, [x19, #232] // 8-byte Folded Spill |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: subs x8, x8, x9 |
| ; CHECK-NEXT: and x8, x8, #0xffffffffffffffe0 |
| ; CHECK-NEXT: mov sp, x8 |
| ; CHECK-NEXT: str x8, [x19, #240] // 8-byte Folded Spill |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x19, #336] // 8-byte Folded Spill |
| ; CHECK-NEXT: subs x8, x8, #16 |
| ; CHECK-NEXT: mov sp, x8 |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x19, #344] // 8-byte Folded Spill |
| ; CHECK-NEXT: subs x8, x8, #16 |
| ; CHECK-NEXT: mov sp, x8 |
| ; CHECK-NEXT: mov x24, sp |
| ; CHECK-NEXT: subs x8, x24, #16 |
| ; CHECK-NEXT: mov sp, x8 |
| ; CHECK-NEXT: mov x7, sp |
| ; CHECK-NEXT: subs x8, x7, #16 |
| ; CHECK-NEXT: mov sp, x8 |
| ; CHECK-NEXT: mov x27, sp |
| ; CHECK-NEXT: subs x8, x27, #16 |
| ; CHECK-NEXT: mov sp, x8 |
| ; CHECK-NEXT: mov x26, sp |
| ; CHECK-NEXT: subs x8, x26, #16 |
| ; CHECK-NEXT: mov sp, x8 |
| ; CHECK-NEXT: mov x1, sp |
| ; CHECK-NEXT: subs x8, x1, #16 |
| ; CHECK-NEXT: mov sp, x8 |
| ; CHECK-NEXT: mov x9, sp |
| ; CHECK-NEXT: subs x8, x9, #16 |
| ; CHECK-NEXT: mov sp, x8 |
| ; CHECK-NEXT: mov x20, sp |
| ; CHECK-NEXT: subs x8, x20, #16 |
| ; CHECK-NEXT: mov sp, x8 |
| ; CHECK-NEXT: mov x16, sp |
| ; CHECK-NEXT: subs x8, x16, #16 |
| ; CHECK-NEXT: mov sp, x8 |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x19, #248] // 8-byte Folded Spill |
| ; CHECK-NEXT: subs x8, x8, #16 |
| ; CHECK-NEXT: mov sp, x8 |
| ; CHECK-NEXT: mov x5, sp |
| ; CHECK-NEXT: subs x8, x5, #16 |
| ; CHECK-NEXT: mov sp, x8 |
| ; CHECK-NEXT: mov x12, sp |
| ; CHECK-NEXT: subs x8, x12, #16 |
| ; CHECK-NEXT: mov sp, x8 |
| ; CHECK-NEXT: mov x22, sp |
| ; CHECK-NEXT: subs x8, x22, #16 |
| ; CHECK-NEXT: mov sp, x8 |
| ; CHECK-NEXT: mov x25, sp |
| ; CHECK-NEXT: subs x8, x25, #16 |
| ; CHECK-NEXT: mov sp, x8 |
| ; CHECK-NEXT: mov x30, sp |
| ; CHECK-NEXT: subs x8, x30, #16 |
| ; CHECK-NEXT: mov sp, x8 |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x19, #296] // 8-byte Folded Spill |
| ; CHECK-NEXT: subs x8, x8, #16 |
| ; CHECK-NEXT: mov sp, x8 |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x19, #328] // 8-byte Folded Spill |
| ; CHECK-NEXT: subs x8, x8, #16 |
| ; CHECK-NEXT: mov sp, x8 |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x19, #264] // 8-byte Folded Spill |
| ; CHECK-NEXT: subs x8, x8, #16 |
| ; CHECK-NEXT: mov sp, x8 |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x19, #256] // 8-byte Folded Spill |
| ; CHECK-NEXT: subs x8, x8, #16 |
| ; CHECK-NEXT: mov sp, x8 |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x19, #272] // 8-byte Folded Spill |
| ; CHECK-NEXT: subs x8, x8, #16 |
| ; CHECK-NEXT: mov sp, x8 |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x19, #312] // 8-byte Folded Spill |
| ; CHECK-NEXT: subs x8, x8, #16 |
| ; CHECK-NEXT: mov sp, x8 |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x19, #280] // 8-byte Folded Spill |
| ; CHECK-NEXT: subs x8, x8, #16 |
| ; CHECK-NEXT: mov sp, x8 |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x19, #304] // 8-byte Folded Spill |
| ; CHECK-NEXT: subs x8, x8, #16 |
| ; CHECK-NEXT: mov sp, x8 |
| ; CHECK-NEXT: mov x6, sp |
| ; CHECK-NEXT: subs x8, x6, #16 |
| ; CHECK-NEXT: mov sp, x8 |
| ; CHECK-NEXT: mov x21, sp |
| ; CHECK-NEXT: subs x8, x21, #16 |
| ; CHECK-NEXT: mov sp, x8 |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x19, #352] // 8-byte Folded Spill |
| ; CHECK-NEXT: subs x8, x8, #16 |
| ; CHECK-NEXT: mov sp, x8 |
| ; CHECK-NEXT: mov x28, sp |
| ; CHECK-NEXT: subs x8, x28, #16 |
| ; CHECK-NEXT: mov sp, x8 |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: subs x4, x8, x14 |
| ; CHECK-NEXT: mov sp, x4 |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: subs x3, x8, x14 |
| ; CHECK-NEXT: mov sp, x3 |
| ; CHECK-NEXT: mov x23, sp |
| ; CHECK-NEXT: subs x8, x23, #16 |
| ; CHECK-NEXT: mov sp, x8 |
| ; CHECK-NEXT: mov x18, sp |
| ; CHECK-NEXT: subs x8, x18, #16 |
| ; CHECK-NEXT: mov sp, x8 |
| ; CHECK-NEXT: mov x14, sp |
| ; CHECK-NEXT: subs x8, x14, #16 |
| ; CHECK-NEXT: mov sp, x8 |
| ; CHECK-NEXT: mov w8, wzr |
| ; CHECK-NEXT: sturb w8, [x9, #-16] |
| ; CHECK-NEXT: ldr x9, [x19, #248] // 8-byte Folded Reload |
| ; CHECK-NEXT: sturb w8, [x9, #-16] |
| ; CHECK-NEXT: ldr x9, [x19, #296] // 8-byte Folded Reload |
| ; CHECK-NEXT: sturb w8, [x30, #-16] |
| ; CHECK-NEXT: mov x8, xzr |
| ; CHECK-NEXT: str x8, [x19, #376] // 8-byte Folded Spill |
| ; CHECK-NEXT: stur x8, [x9, #-16] |
| ; CHECK-NEXT: ldur x8, [x20, #-16] |
| ; CHECK-NEXT: ldur x9, [x27, #-16] |
| ; CHECK-NEXT: add x30, x8, x9, lsl #2 |
| ; CHECK-NEXT: ldur x8, [x1, #-16] |
| ; CHECK-NEXT: subs x8, x8, #1 |
| ; CHECK-NEXT: ldur x9, [x16, #-16] |
| ; CHECK-NEXT: mul x8, x8, x9 |
| ; CHECK-NEXT: ldr x9, [x19, #328] // 8-byte Folded Reload |
| ; CHECK-NEXT: add x30, x30, x8, lsl #2 |
| ; CHECK-NEXT: ldr x8, [x19, #296] // 8-byte Folded Reload |
| ; CHECK-NEXT: stur x30, [x8, #-16] |
| ; CHECK-NEXT: ldr x8, [x19, #376] // 8-byte Folded Reload |
| ; CHECK-NEXT: stur x8, [x9, #-16] |
| ; CHECK-NEXT: ldur x8, [x5, #-16] |
| ; CHECK-NEXT: ldur x9, [x26, #-16] |
| ; CHECK-NEXT: add x30, x8, x9, lsl #2 |
| ; CHECK-NEXT: ldur x8, [x1, #-16] |
| ; CHECK-NEXT: subs x8, x8, #1 |
| ; CHECK-NEXT: ldur x9, [x12, #-16] |
| ; CHECK-NEXT: mul x8, x8, x9 |
| ; CHECK-NEXT: ldr x9, [x19, #264] // 8-byte Folded Reload |
| ; CHECK-NEXT: add x30, x30, x8, lsl #2 |
| ; CHECK-NEXT: ldr x8, [x19, #328] // 8-byte Folded Reload |
| ; CHECK-NEXT: stur x30, [x8, #-16] |
| ; CHECK-NEXT: ldr x8, [x19, #376] // 8-byte Folded Reload |
| ; CHECK-NEXT: stur x8, [x9, #-16] |
| ; CHECK-NEXT: ldur x8, [x22, #-16] |
| ; CHECK-NEXT: ldur x9, [x27, #-16] |
| ; CHECK-NEXT: add x30, x8, x9, lsl #2 |
| ; CHECK-NEXT: ldur x8, [x26, #-16] |
| ; CHECK-NEXT: subs x8, x8, #1 |
| ; CHECK-NEXT: ldur x9, [x25, #-16] |
| ; CHECK-NEXT: mul x8, x8, x9 |
| ; CHECK-NEXT: ldr x9, [x19, #256] // 8-byte Folded Reload |
| ; CHECK-NEXT: add x30, x30, x8, lsl #2 |
| ; CHECK-NEXT: ldr x8, [x19, #264] // 8-byte Folded Reload |
| ; CHECK-NEXT: stur x30, [x8, #-16] |
| ; CHECK-NEXT: ldr x8, [x19, #272] // 8-byte Folded Reload |
| ; CHECK-NEXT: mov w30, #32 // =0x20 |
| ; CHECK-NEXT: // kill: def $lr killed $w30 |
| ; CHECK-NEXT: stur x30, [x9, #-16] |
| ; CHECK-NEXT: ldr x9, [x19, #312] // 8-byte Folded Reload |
| ; CHECK-NEXT: stur x30, [x8, #-16] |
| ; CHECK-NEXT: ldr x8, [x19, #376] // 8-byte Folded Reload |
| ; CHECK-NEXT: stur x8, [x9, #-16] |
| ; CHECK-NEXT: ldur x8, [x1, #-16] |
| ; CHECK-NEXT: lsl x8, x8, #5 |
| ; CHECK-NEXT: stur x8, [x9, #-16] |
| ; CHECK-NEXT: ldr x9, [x19, #280] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldr x8, [x19, #376] // 8-byte Folded Reload |
| ; CHECK-NEXT: stur x30, [x16, #-16] |
| ; CHECK-NEXT: stur x8, [x9, #-16] |
| ; CHECK-NEXT: ldur x8, [x27, #-16] |
| ; CHECK-NEXT: subs x8, x8, #1 |
| ; CHECK-NEXT: lsr x8, x8, #5 |
| ; CHECK-NEXT: add x8, x8, #1 |
| ; CHECK-NEXT: stur x8, [x9, #-16] |
| ; CHECK-NEXT: ldur x8, [x20, #-16] |
| ; CHECK-NEXT: str x8, [x19, #288] // 8-byte Folded Spill |
| ; CHECK-NEXT: ldr x8, [x19, #312] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldur x9, [x9, #-16] |
| ; CHECK-NEXT: ldur x8, [x8, #-16] |
| ; CHECK-NEXT: mul x9, x9, x8 |
| ; CHECK-NEXT: ldr x8, [x19, #288] // 8-byte Folded Reload |
| ; CHECK-NEXT: add x8, x8, x9, lsl #2 |
| ; CHECK-NEXT: ldr x9, [x19, #296] // 8-byte Folded Reload |
| ; CHECK-NEXT: stur x8, [x9, #-16] |
| ; CHECK-NEXT: ldr x9, [x19, #304] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldr x8, [x19, #376] // 8-byte Folded Reload |
| ; CHECK-NEXT: stur x30, [x12, #-16] |
| ; CHECK-NEXT: stur x8, [x9, #-16] |
| ; CHECK-NEXT: ldur x8, [x26, #-16] |
| ; CHECK-NEXT: subs x8, x8, #1 |
| ; CHECK-NEXT: lsr x8, x8, #5 |
| ; CHECK-NEXT: add x8, x8, #1 |
| ; CHECK-NEXT: stur x8, [x9, #-16] |
| ; CHECK-NEXT: ldur x8, [x5, #-16] |
| ; CHECK-NEXT: str x8, [x19, #320] // 8-byte Folded Spill |
| ; CHECK-NEXT: ldr x8, [x19, #312] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldur x9, [x9, #-16] |
| ; CHECK-NEXT: ldur x8, [x8, #-16] |
| ; CHECK-NEXT: mul x9, x9, x8 |
| ; CHECK-NEXT: ldr x8, [x19, #320] // 8-byte Folded Reload |
| ; CHECK-NEXT: add x8, x8, x9, lsl #2 |
| ; CHECK-NEXT: ldr x9, [x19, #328] // 8-byte Folded Reload |
| ; CHECK-NEXT: stur x8, [x9, #-16] |
| ; CHECK-NEXT: ldr x9, [x19, #352] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldr x8, [x19, #376] // 8-byte Folded Reload |
| ; CHECK-NEXT: stur x8, [x6, #-16] |
| ; CHECK-NEXT: stur x8, [x6, #-16] |
| ; CHECK-NEXT: stur x8, [x21, #-16] |
| ; CHECK-NEXT: stur x8, [x21, #-16] |
| ; CHECK-NEXT: stur x8, [x9, #-16] |
| ; CHECK-NEXT: ldur x8, [x27, #-16] |
| ; CHECK-NEXT: ldur x9, [x21, #-16] |
| ; CHECK-NEXT: subs x8, x8, x9 |
| ; CHECK-NEXT: ldr x9, [x19, #336] // 8-byte Folded Reload |
| ; CHECK-NEXT: stur x8, [x9, #-16] |
| ; CHECK-NEXT: ldr x8, [x19, #344] // 8-byte Folded Reload |
| ; CHECK-NEXT: stur x30, [x8, #-16] |
| ; CHECK-NEXT: ldr x8, [x19, #352] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldur x9, [x9, #-16] |
| ; CHECK-NEXT: stur x9, [x8, #-16] |
| ; CHECK-NEXT: ldr x8, [x19, #376] // 8-byte Folded Reload |
| ; CHECK-NEXT: stur x8, [x28, #-16] |
| ; CHECK-NEXT: ldur x8, [x26, #-16] |
| ; CHECK-NEXT: ldur x9, [x6, #-16] |
| ; CHECK-NEXT: subs x8, x8, x9 |
| ; CHECK-NEXT: ldr x9, [x19, #360] // 8-byte Folded Reload |
| ; CHECK-NEXT: stur x8, [x24, #-16] |
| ; CHECK-NEXT: ldr x8, [x19, #368] // 8-byte Folded Reload |
| ; CHECK-NEXT: stur x30, [x7, #-16] |
| ; CHECK-NEXT: ldr x7, [x19, #376] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldur x24, [x24, #-16] |
| ; CHECK-NEXT: stur x24, [x28, #-16] |
| ; CHECK-NEXT: ldur x24, [x21, #-16] |
| ; CHECK-NEXT: ldur x27, [x27, #-16] |
| ; CHECK-NEXT: whilelt pn8.s, x24, x27, vlx2 |
| ; CHECK-NEXT: str pn8, [x4] |
| ; CHECK-NEXT: ldur x24, [x6, #-16] |
| ; CHECK-NEXT: ldur x26, [x26, #-16] |
| ; CHECK-NEXT: whilelt pn8.s, x24, x26, vlx2 |
| ; CHECK-NEXT: str pn8, [x3] |
| ; CHECK-NEXT: stur x7, [x23, #-16] |
| ; CHECK-NEXT: ldur x22, [x22, #-16] |
| ; CHECK-NEXT: ldur x24, [x21, #-16] |
| ; CHECK-NEXT: add x22, x22, x24, lsl #2 |
| ; CHECK-NEXT: ldur x24, [x6, #-16] |
| ; CHECK-NEXT: ldur x25, [x25, #-16] |
| ; CHECK-NEXT: mul x24, x24, x25 |
| ; CHECK-NEXT: add x22, x22, x24, lsl #2 |
| ; CHECK-NEXT: stur x22, [x23, #-16] |
| ; CHECK-NEXT: zero {za} |
| ; CHECK-NEXT: stur x7, [x18, #-16] |
| ; CHECK-NEXT: ldur x20, [x20, #-16] |
| ; CHECK-NEXT: ldur x21, [x21, #-16] |
| ; CHECK-NEXT: ldur x22, [x1, #-16] |
| ; CHECK-NEXT: mul x21, x21, x22 |
| ; CHECK-NEXT: add x20, x20, x21, lsl #2 |
| ; CHECK-NEXT: stur x20, [x18, #-16] |
| ; CHECK-NEXT: stur x7, [x14, #-16] |
| ; CHECK-NEXT: ldur x5, [x5, #-16] |
| ; CHECK-NEXT: ldur x6, [x6, #-16] |
| ; CHECK-NEXT: ldur x7, [x1, #-16] |
| ; CHECK-NEXT: mul x6, x6, x7 |
| ; CHECK-NEXT: add x5, x5, x6, lsl #2 |
| ; CHECK-NEXT: stur x5, [x14, #-16] |
| ; CHECK-NEXT: ldur x1, [x1, #-16] |
| ; CHECK-NEXT: ldr p1, [x4] |
| ; CHECK-NEXT: ldur x18, [x18, #-16] |
| ; CHECK-NEXT: ldur x16, [x16, #-16] |
| ; CHECK-NEXT: lsr x16, x16, #2 |
| ; CHECK-NEXT: ldr p0, [x3] |
| ; CHECK-NEXT: ldur x14, [x14, #-16] |
| ; CHECK-NEXT: ldur x12, [x12, #-16] |
| ; CHECK-NEXT: lsr x12, x12, #2 |
| ; CHECK-NEXT: stur x1, [x2, #-16] |
| ; CHECK-NEXT: str p1, [x11] |
| ; CHECK-NEXT: stur x18, [x0, #-16] |
| ; CHECK-NEXT: stur x16, [x17, #-16] |
| ; CHECK-NEXT: str p0, [x9] |
| ; CHECK-NEXT: stur x14, [x15, #-16] |
| ; CHECK-NEXT: stur x12, [x13, #-16] |
| ; CHECK-NEXT: ldr p0, [x11] |
| ; CHECK-NEXT: mov p8.b, p0.b |
| ; CHECK-NEXT: pext { p3.s, p4.s }, pn8[0] |
| ; CHECK-NEXT: mov p0.b, p3.b |
| ; CHECK-NEXT: ptrue p2.s |
| ; CHECK-NEXT: and p0.b, p0/z, p0.b, p2.b |
| ; CHECK-NEXT: mov p1.b, p4.b |
| ; CHECK-NEXT: and p1.b, p1/z, p1.b, p2.b |
| ; CHECK-NEXT: mov x11, x10 |
| ; CHECK-NEXT: incd x11 |
| ; CHECK-NEXT: str p1, [x11] |
| ; CHECK-NEXT: str p0, [x10] |
| ; CHECK-NEXT: ldr p0, [x9] |
| ; CHECK-NEXT: mov p8.b, p0.b |
| ; CHECK-NEXT: pext { p3.s, p4.s }, pn8[0] |
| ; CHECK-NEXT: mov p0.b, p3.b |
| ; CHECK-NEXT: and p0.b, p0/z, p0.b, p2.b |
| ; CHECK-NEXT: mov p1.b, p4.b |
| ; CHECK-NEXT: and p1.b, p1/z, p1.b, p2.b |
| ; CHECK-NEXT: mov x9, x8 |
| ; CHECK-NEXT: incd x9 |
| ; CHECK-NEXT: str p1, [x9] |
| ; CHECK-NEXT: str p0, [x8] |
| ; CHECK-NEXT: b .LBB0_3 |
| ; CHECK-NEXT: .LBB0_3: // %bb178 |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: ldr x9, [x19, #160] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldr x8, [x19, #56] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldr x10, [x19, #48] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldr x11, [x19, #32] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldr x12, [x19, #24] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldr x13, [x19, #240] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldr x14, [x19, #232] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldr x17, [x19, #88] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldr x18, [x19, #80] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldr x0, [x19, #72] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldr x1, [x19, #64] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldr x2, [x19, #216] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldr x3, [x19, #120] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldr x4, [x19, #112] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldr x5, [x19, #104] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldr x6, [x19, #96] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldr x7, [x19, #224] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldr x20, [x19, #152] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldr x21, [x19, #144] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldr x22, [x19, #136] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldr x23, [x19, #128] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldr x16, [x19, #200] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldr x15, [x19, #208] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldr x24, [x19, #192] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldr x26, [x19, #176] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldr x25, [x19, #184] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldr x27, [x19, #168] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldr p0, [x27] |
| ; CHECK-NEXT: ldr x27, [x26] |
| ; CHECK-NEXT: mov p8.b, p0.b |
| ; CHECK-NEXT: ld1w { z16.s, z24.s }, pn8/z, [x27] |
| ; CHECK-NEXT: mov z0.d, z16.d |
| ; CHECK-NEXT: mov z1.d, z24.d |
| ; CHECK-NEXT: ptrue p2.s |
| ; CHECK-NEXT: str p2, [x29, #-1, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: st1w { z1.s }, p2, [x14, #1, mul vl] |
| ; CHECK-NEXT: st1w { z0.s }, p2, [x14] |
| ; CHECK-NEXT: ldr x27, [x25] |
| ; CHECK-NEXT: ldr x25, [x26] |
| ; CHECK-NEXT: add x25, x25, x27, lsl #2 |
| ; CHECK-NEXT: str x25, [x26] |
| ; CHECK-NEXT: ldr p0, [x24] |
| ; CHECK-NEXT: ldr x24, [x16] |
| ; CHECK-NEXT: mov p8.b, p0.b |
| ; CHECK-NEXT: ld1w { z16.s, z24.s }, pn8/z, [x24] |
| ; CHECK-NEXT: mov z0.d, z16.d |
| ; CHECK-NEXT: mov z1.d, z24.d |
| ; CHECK-NEXT: st1w { z1.s }, p2, [x13, #1, mul vl] |
| ; CHECK-NEXT: st1w { z0.s }, p2, [x13] |
| ; CHECK-NEXT: ldr x24, [x15] |
| ; CHECK-NEXT: ldr x15, [x16] |
| ; CHECK-NEXT: add x15, x15, x24, lsl #2 |
| ; CHECK-NEXT: str x15, [x16] |
| ; CHECK-NEXT: mov x16, x2 |
| ; CHECK-NEXT: incd x16 |
| ; CHECK-NEXT: ldr p1, [x2] |
| ; CHECK-NEXT: mov x15, x7 |
| ; CHECK-NEXT: incd x15 |
| ; CHECK-NEXT: ldr p0, [x7] |
| ; CHECK-NEXT: ld1w { z1.s }, p2/z, [x14] |
| ; CHECK-NEXT: ld1w { z0.s }, p2/z, [x13] |
| ; CHECK-NEXT: str p1, [x23] |
| ; CHECK-NEXT: str p0, [x22] |
| ; CHECK-NEXT: st1w { z1.s }, p2, [x21] |
| ; CHECK-NEXT: st1w { z0.s }, p2, [x20] |
| ; CHECK-NEXT: ldr p0, [x23] |
| ; CHECK-NEXT: ldr p1, [x22] |
| ; CHECK-NEXT: ld1w { z0.s }, p2/z, [x21] |
| ; CHECK-NEXT: ld1w { z1.s }, p2/z, [x20] |
| ; CHECK-NEXT: fmopa za0.s, p0/m, p1/m, z0.s, z1.s |
| ; CHECK-NEXT: ldr p1, [x16] |
| ; CHECK-NEXT: ldr p0, [x7] |
| ; CHECK-NEXT: ld1w { z1.s }, p2/z, [x14, #1, mul vl] |
| ; CHECK-NEXT: ld1w { z0.s }, p2/z, [x13] |
| ; CHECK-NEXT: str p1, [x6] |
| ; CHECK-NEXT: str p0, [x5] |
| ; CHECK-NEXT: st1w { z1.s }, p2, [x4] |
| ; CHECK-NEXT: st1w { z0.s }, p2, [x3] |
| ; CHECK-NEXT: ldr p0, [x6] |
| ; CHECK-NEXT: ldr p1, [x5] |
| ; CHECK-NEXT: ld1w { z0.s }, p2/z, [x4] |
| ; CHECK-NEXT: ld1w { z1.s }, p2/z, [x3] |
| ; CHECK-NEXT: fmopa za1.s, p0/m, p1/m, z0.s, z1.s |
| ; CHECK-NEXT: ldr p1, [x2] |
| ; CHECK-NEXT: ldr p0, [x15] |
| ; CHECK-NEXT: ld1w { z1.s }, p2/z, [x14] |
| ; CHECK-NEXT: ld1w { z0.s }, p2/z, [x13, #1, mul vl] |
| ; CHECK-NEXT: str p1, [x1] |
| ; CHECK-NEXT: str p0, [x0] |
| ; CHECK-NEXT: st1w { z1.s }, p2, [x18] |
| ; CHECK-NEXT: st1w { z0.s }, p2, [x17] |
| ; CHECK-NEXT: ldr p0, [x1] |
| ; CHECK-NEXT: ldr p1, [x0] |
| ; CHECK-NEXT: ld1w { z0.s }, p2/z, [x18] |
| ; CHECK-NEXT: ld1w { z1.s }, p2/z, [x17] |
| ; CHECK-NEXT: fmopa za2.s, p0/m, p1/m, z0.s, z1.s |
| ; CHECK-NEXT: ldr p1, [x16] |
| ; CHECK-NEXT: ldr p0, [x15] |
| ; CHECK-NEXT: ld1w { z1.s }, p2/z, [x14, #1, mul vl] |
| ; CHECK-NEXT: ld1w { z0.s }, p2/z, [x13, #1, mul vl] |
| ; CHECK-NEXT: str p1, [x12] |
| ; CHECK-NEXT: str p0, [x11] |
| ; CHECK-NEXT: st1w { z1.s }, p2, [x10] |
| ; CHECK-NEXT: st1w { z0.s }, p2, [x8] |
| ; CHECK-NEXT: ldr p0, [x12] |
| ; CHECK-NEXT: ldr p1, [x11] |
| ; CHECK-NEXT: ld1w { z0.s }, p2/z, [x10] |
| ; CHECK-NEXT: ld1w { z1.s }, p2/z, [x8] |
| ; CHECK-NEXT: fmopa za3.s, p0/m, p1/m, z0.s, z1.s |
| ; CHECK-NEXT: ldr x8, [x9] |
| ; CHECK-NEXT: subs x8, x8, #1 |
| ; CHECK-NEXT: str x8, [x9] |
| ; CHECK-NEXT: b .LBB0_3 |
| bb: |
| %alloca = alloca <vscale x 16 x i1>, align 2 |
| %alloca1 = alloca <vscale x 16 x i1>, align 2 |
| %alloca2 = alloca <vscale x 4 x float>, align 16 |
| %alloca3 = alloca <vscale x 4 x float>, align 16 |
| %alloca4 = alloca <vscale x 16 x i1>, align 2 |
| %alloca5 = alloca <vscale x 16 x i1>, align 2 |
| %alloca6 = alloca <vscale x 4 x float>, align 16 |
| %alloca7 = alloca <vscale x 4 x float>, align 16 |
| %alloca8 = alloca <vscale x 16 x i1>, align 2 |
| %alloca9 = alloca <vscale x 16 x i1>, align 2 |
| %alloca10 = alloca <vscale x 4 x float>, align 16 |
| %alloca11 = alloca <vscale x 4 x float>, align 16 |
| %alloca12 = alloca <vscale x 16 x i1>, align 2 |
| %alloca13 = alloca <vscale x 16 x i1>, align 2 |
| %alloca14 = alloca <vscale x 4 x float>, align 16 |
| %alloca15 = alloca <vscale x 4 x float>, align 16 |
| %alloca16 = alloca i64, align 8 |
| %alloca17 = alloca i64, align 8 |
| %alloca18 = alloca ptr, align 8 |
| %alloca19 = alloca i64, align 8 |
| %alloca20 = alloca i64, align 8 |
| %alloca21 = alloca target("aarch64.svcount"), align 2 |
| %alloca22 = alloca i32, align 4 |
| %alloca23 = alloca <vscale x 32 x i8>, align 16 |
| %alloca24 = alloca i64, align 8 |
| %alloca25 = alloca target("aarch64.svcount"), align 2 |
| %alloca26 = alloca ptr, align 8 |
| %alloca27 = alloca i64, align 8 |
| %alloca28 = alloca target("aarch64.svcount"), align 2 |
| %alloca29 = alloca ptr, align 8 |
| %alloca30 = alloca i64, align 8 |
| %alloca31 = alloca <vscale x 32 x i1>, align 2 |
| %alloca32 = alloca <vscale x 32 x i1>, align 2 |
| %alloca33 = alloca <vscale x 8 x float>, align 16 |
| %alloca34 = alloca <vscale x 8 x float>, align 16 |
| %alloca35 = alloca i64, align 8 |
| %alloca36 = alloca i64, align 8 |
| %alloca37 = alloca i64, align 8 |
| %alloca38 = alloca i64, align 8 |
| %alloca39 = alloca i64, align 8 |
| %alloca40 = alloca i64, align 8 |
| %alloca41 = alloca i64, align 8 |
| %alloca42 = alloca i8, align 1 |
| %alloca43 = alloca ptr, align 8 |
| %alloca44 = alloca i64, align 8 |
| %alloca45 = alloca i8, align 1 |
| %alloca46 = alloca ptr, align 8 |
| %alloca47 = alloca i64, align 8 |
| %alloca48 = alloca ptr, align 8 |
| %alloca49 = alloca i64, align 8 |
| %alloca50 = alloca i8, align 1 |
| %alloca51 = alloca ptr, align 8 |
| %alloca52 = alloca ptr, align 8 |
| %alloca53 = alloca ptr, align 8 |
| %alloca54 = alloca i64, align 8 |
| %alloca55 = alloca i64, align 8 |
| %alloca56 = alloca i64, align 8 |
| %alloca57 = alloca i64, align 8 |
| %alloca58 = alloca i64, align 8 |
| %alloca59 = alloca i64, align 8 |
| %alloca60 = alloca i64, align 8 |
| %alloca61 = alloca i64, align 8 |
| %alloca62 = alloca i64, align 8 |
| %alloca63 = alloca target("aarch64.svcount"), align 2 |
| %alloca64 = alloca target("aarch64.svcount"), align 2 |
| %alloca65 = alloca ptr, align 8 |
| %alloca66 = alloca ptr, align 8 |
| %alloca67 = alloca ptr, align 8 |
| store i8 0, ptr %alloca42, align 1 |
| store i8 0, ptr %alloca45, align 1 |
| store i8 0, ptr %alloca50, align 1 |
| store ptr null, ptr %alloca51, align 8 |
| %load = load ptr, ptr %alloca43, align 8 |
| %load68 = load i64, ptr %alloca39, align 8 |
| %getelementptr = getelementptr inbounds float, ptr %load, i64 %load68 |
| %load69 = load i64, ptr %alloca41, align 8 |
| %sub = sub i64 %load69, 1 |
| %load70 = load i64, ptr %alloca44, align 8 |
| %mul = mul i64 %sub, %load70 |
| %getelementptr71 = getelementptr inbounds float, ptr %getelementptr, i64 %mul |
| store ptr %getelementptr71, ptr %alloca51, align 8 |
| store ptr null, ptr %alloca52, align 8 |
| %load72 = load ptr, ptr %alloca46, align 8 |
| %load73 = load i64, ptr %alloca40, align 8 |
| %getelementptr74 = getelementptr inbounds float, ptr %load72, i64 %load73 |
| %load75 = load i64, ptr %alloca41, align 8 |
| %sub76 = sub i64 %load75, 1 |
| %load77 = load i64, ptr %alloca47, align 8 |
| %mul78 = mul i64 %sub76, %load77 |
| %getelementptr79 = getelementptr inbounds float, ptr %getelementptr74, i64 %mul78 |
| store ptr %getelementptr79, ptr %alloca52, align 8 |
| store ptr null, ptr %alloca53, align 8 |
| %load80 = load ptr, ptr %alloca48, align 8 |
| %load81 = load i64, ptr %alloca39, align 8 |
| %getelementptr82 = getelementptr inbounds float, ptr %load80, i64 %load81 |
| %load83 = load i64, ptr %alloca40, align 8 |
| %sub84 = sub i64 %load83, 1 |
| %load85 = load i64, ptr %alloca49, align 8 |
| %mul86 = mul i64 %sub84, %load85 |
| %getelementptr87 = getelementptr inbounds float, ptr %getelementptr82, i64 %mul86 |
| store ptr %getelementptr87, ptr %alloca53, align 8 |
| store i64 32, ptr %alloca54, align 8 |
| store i64 32, ptr %alloca55, align 8 |
| store i64 0, ptr %alloca56, align 8 |
| %load88 = load i64, ptr %alloca41, align 8 |
| %mul89 = mul i64 32, %load88 |
| store i64 %mul89, ptr %alloca56, align 8 |
| %load90 = load i8, ptr %alloca42, align 1 |
| %trunc = trunc i8 %load90 to i1 |
| store i64 32, ptr %alloca44, align 8 |
| store i64 0, ptr %alloca57, align 8 |
| %load91 = load i64, ptr %alloca39, align 8 |
| %sub92 = sub i64 %load91, 1 |
| %udiv = udiv i64 %sub92, 32 |
| %add = add i64 %udiv, 1 |
| store i64 %add, ptr %alloca57, align 8 |
| %load93 = load ptr, ptr %alloca43, align 8 |
| %load94 = load i64, ptr %alloca57, align 8 |
| %load95 = load i64, ptr %alloca56, align 8 |
| %mul96 = mul i64 %load94, %load95 |
| %getelementptr97 = getelementptr inbounds float, ptr %load93, i64 %mul96 |
| store ptr %getelementptr97, ptr %alloca51, align 8 |
| %load98 = load i8, ptr %alloca45, align 1 |
| %trunc99 = trunc i8 %load98 to i1 |
| store i64 32, ptr %alloca47, align 8 |
| store i64 0, ptr %alloca58, align 8 |
| %load100 = load i64, ptr %alloca40, align 8 |
| %sub101 = sub i64 %load100, 1 |
| %udiv102 = udiv i64 %sub101, 32 |
| %add103 = add i64 %udiv102, 1 |
| store i64 %add103, ptr %alloca58, align 8 |
| %load104 = load ptr, ptr %alloca46, align 8 |
| %load105 = load i64, ptr %alloca58, align 8 |
| %load106 = load i64, ptr %alloca56, align 8 |
| %mul107 = mul i64 %load105, %load106 |
| %getelementptr108 = getelementptr inbounds float, ptr %load104, i64 %mul107 |
| store ptr %getelementptr108, ptr %alloca52, align 8 |
| store i64 0, ptr %alloca59, align 8 |
| store i64 0, ptr %alloca59, align 8 |
| %load109 = load i64, ptr %alloca59, align 8 |
| %load110 = load i64, ptr %alloca40, align 8 |
| %icmp = icmp ult i64 %load109, %load110 |
| store i64 0, ptr %alloca60, align 8 |
| store i64 0, ptr %alloca60, align 8 |
| %load111 = load i64, ptr %alloca60, align 8 |
| %load112 = load i64, ptr %alloca39, align 8 |
| %icmp113 = icmp ult i64 %load111, %load112 |
| store i64 0, ptr %alloca61, align 8 |
| %load114 = load i64, ptr %alloca39, align 8 |
| %load115 = load i64, ptr %alloca60, align 8 |
| %sub116 = sub i64 %load114, %load115 |
| store i64 %sub116, ptr %alloca35, align 8 |
| store i64 32, ptr %alloca36, align 8 |
| %load117 = load i64, ptr %alloca35, align 8 |
| %load118 = load i64, ptr %alloca36, align 8 |
| %icmp119 = icmp ult i64 %load117, %load118 |
| %load120 = load i64, ptr %alloca35, align 8 |
| store i64 %load120, ptr %alloca61, align 8 |
| store i64 0, ptr %alloca62, align 8 |
| %load121 = load i64, ptr %alloca40, align 8 |
| %load122 = load i64, ptr %alloca59, align 8 |
| %sub123 = sub i64 %load121, %load122 |
| store i64 %sub123, ptr %alloca37, align 8 |
| store i64 32, ptr %alloca38, align 8 |
| %load124 = load i64, ptr %alloca37, align 8 |
| %load125 = load i64, ptr %alloca38, align 8 |
| %icmp126 = icmp ult i64 %load124, %load125 |
| %load127 = load i64, ptr %alloca37, align 8 |
| store i64 %load127, ptr %alloca62, align 8 |
| %load128 = load i64, ptr %alloca60, align 8 |
| %load129 = load i64, ptr %alloca39, align 8 |
| %call = call target("aarch64.svcount") @llvm.aarch64.sve.whilelt.c32(i64 %load128, i64 %load129, i32 2) |
| store target("aarch64.svcount") %call, ptr %alloca63, align 2 |
| %load130 = load i64, ptr %alloca59, align 8 |
| %load131 = load i64, ptr %alloca40, align 8 |
| %call132 = call target("aarch64.svcount") @llvm.aarch64.sve.whilelt.c32(i64 %load130, i64 %load131, i32 2) |
| store target("aarch64.svcount") %call132, ptr %alloca64, align 2 |
| store ptr null, ptr %alloca65, align 8 |
| %load133 = load ptr, ptr %alloca48, align 8 |
| %load134 = load i64, ptr %alloca60, align 8 |
| %getelementptr135 = getelementptr inbounds float, ptr %load133, i64 %load134 |
| %load136 = load i64, ptr %alloca59, align 8 |
| %load137 = load i64, ptr %alloca49, align 8 |
| %mul138 = mul i64 %load136, %load137 |
| %getelementptr139 = getelementptr inbounds float, ptr %getelementptr135, i64 %mul138 |
| store ptr %getelementptr139, ptr %alloca65, align 8 |
| call void @llvm.aarch64.sme.zero(i32 255) |
| store ptr null, ptr %alloca66, align 8 |
| %load140 = load i8, ptr %alloca42, align 1 |
| %trunc141 = trunc i8 %load140 to i1 |
| %load142 = load ptr, ptr %alloca43, align 8 |
| %load143 = load i64, ptr %alloca60, align 8 |
| %load144 = load i64, ptr %alloca41, align 8 |
| %mul145 = mul i64 %load143, %load144 |
| %getelementptr146 = getelementptr inbounds float, ptr %load142, i64 %mul145 |
| store ptr %getelementptr146, ptr %alloca66, align 8 |
| store ptr null, ptr %alloca67, align 8 |
| %load147 = load i8, ptr %alloca45, align 1 |
| %trunc148 = trunc i8 %load147 to i1 |
| %load149 = load ptr, ptr %alloca46, align 8 |
| %load150 = load i64, ptr %alloca59, align 8 |
| %load151 = load i64, ptr %alloca41, align 8 |
| %mul152 = mul i64 %load150, %load151 |
| %getelementptr153 = getelementptr inbounds float, ptr %load149, i64 %mul152 |
| store ptr %getelementptr153, ptr %alloca67, align 8 |
| %load154 = load i64, ptr %alloca41, align 8 |
| %load155 = load target("aarch64.svcount"), ptr %alloca63, align 2 |
| %load156 = load ptr, ptr %alloca66, align 8 |
| %load157 = load i64, ptr %alloca44, align 8 |
| %udiv158 = udiv i64 %load157, 4 |
| %load159 = load target("aarch64.svcount"), ptr %alloca64, align 2 |
| %load160 = load ptr, ptr %alloca67, align 8 |
| %load161 = load i64, ptr %alloca47, align 8 |
| %udiv162 = udiv i64 %load161, 4 |
| store i64 %load154, ptr %alloca24, align 8 |
| store target("aarch64.svcount") %load155, ptr %alloca25, align 2 |
| store ptr %load156, ptr %alloca26, align 8 |
| store i64 %udiv158, ptr %alloca27, align 8 |
| store target("aarch64.svcount") %load159, ptr %alloca28, align 2 |
| store ptr %load160, ptr %alloca29, align 8 |
| store i64 %udiv162, ptr %alloca30, align 8 |
| %load163 = load target("aarch64.svcount"), ptr %alloca25, align 2 |
| %call164 = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.pext.x2.nxv4i1(target("aarch64.svcount") %load163, i32 0) |
| %extractvalue = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %call164, 0 |
| %call165 = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %extractvalue) |
| %call166 = call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> %call165, i64 0) |
| %extractvalue167 = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %call164, 1 |
| %call168 = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %extractvalue167) |
| %call169 = call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> %call166, <vscale x 16 x i1> %call168, i64 16) |
| store <vscale x 32 x i1> %call169, ptr %alloca31, align 2 |
| %load170 = load target("aarch64.svcount"), ptr %alloca28, align 2 |
| %call171 = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.pext.x2.nxv4i1(target("aarch64.svcount") %load170, i32 0) |
| %extractvalue172 = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %call171, 0 |
| %call173 = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %extractvalue172) |
| %call174 = call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> %call173, i64 0) |
| %extractvalue175 = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %call171, 1 |
| %call176 = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %extractvalue175) |
| %call177 = call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> %call174, <vscale x 16 x i1> %call176, i64 16) |
| store <vscale x 32 x i1> %call177, ptr %alloca32, align 2 |
| br label %bb178 |
| |
| bb178: ; preds = %bb178, %bb |
| %load179 = load i64, ptr %alloca24, align 8 |
| %icmp180 = icmp ugt i64 %load179, 0 |
| %load181 = load target("aarch64.svcount"), ptr %alloca25, align 2 |
| %load182 = load ptr, ptr %alloca26, align 8 |
| %call183 = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x2.nxv4f32(target("aarch64.svcount") %load181, ptr %load182) |
| %extractvalue184 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } %call183, 0 |
| %call185 = call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> %extractvalue184, i64 0) |
| %extractvalue186 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } %call183, 1 |
| %call187 = call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> %call185, <vscale x 4 x float> %extractvalue186, i64 4) |
| store <vscale x 8 x float> %call187, ptr %alloca33, align 16 |
| %load188 = load i64, ptr %alloca27, align 8 |
| %load189 = load ptr, ptr %alloca26, align 8 |
| %getelementptr190 = getelementptr inbounds float, ptr %load189, i64 %load188 |
| store ptr %getelementptr190, ptr %alloca26, align 8 |
| %load191 = load target("aarch64.svcount"), ptr %alloca28, align 2 |
| %load192 = load ptr, ptr %alloca29, align 8 |
| %call193 = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x2.nxv4f32(target("aarch64.svcount") %load191, ptr %load192) |
| %extractvalue194 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } %call193, 0 |
| %call195 = call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> %extractvalue194, i64 0) |
| %extractvalue196 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } %call193, 1 |
| %call197 = call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> %call195, <vscale x 4 x float> %extractvalue196, i64 4) |
| store <vscale x 8 x float> %call197, ptr %alloca34, align 16 |
| %load198 = load i64, ptr %alloca30, align 8 |
| %load199 = load ptr, ptr %alloca29, align 8 |
| %getelementptr200 = getelementptr inbounds float, ptr %load199, i64 %load198 |
| store ptr %getelementptr200, ptr %alloca29, align 8 |
| %load201 = load <vscale x 32 x i1>, ptr %alloca31, align 2 |
| %call202 = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv32i1(<vscale x 32 x i1> %load201, i64 0) |
| %load203 = load <vscale x 32 x i1>, ptr %alloca32, align 2 |
| %call204 = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv32i1(<vscale x 32 x i1> %load203, i64 0) |
| %load205 = load <vscale x 8 x float>, ptr %alloca33, align 16 |
| %call206 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> %load205, i64 0) |
| %load207 = load <vscale x 8 x float>, ptr %alloca34, align 16 |
| %call208 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> %load207, i64 0) |
| store <vscale x 16 x i1> %call202, ptr %alloca12, align 2 |
| store <vscale x 16 x i1> %call204, ptr %alloca13, align 2 |
| store <vscale x 4 x float> %call206, ptr %alloca14, align 16 |
| store <vscale x 4 x float> %call208, ptr %alloca15, align 16 |
| %load209 = load <vscale x 16 x i1>, ptr %alloca12, align 2 |
| %load210 = load <vscale x 16 x i1>, ptr %alloca13, align 2 |
| %load211 = load <vscale x 4 x float>, ptr %alloca14, align 16 |
| %load212 = load <vscale x 4 x float>, ptr %alloca15, align 16 |
| %call213 = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %load209) |
| %call214 = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %load210) |
| call void @llvm.aarch64.sme.mopa.nxv4f32(i32 0, <vscale x 4 x i1> %call213, <vscale x 4 x i1> %call214, <vscale x 4 x float> %load211, <vscale x 4 x float> %load212) |
| %load215 = load <vscale x 32 x i1>, ptr %alloca31, align 2 |
| %call216 = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv32i1(<vscale x 32 x i1> %load215, i64 16) |
| %load217 = load <vscale x 32 x i1>, ptr %alloca32, align 2 |
| %call218 = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv32i1(<vscale x 32 x i1> %load217, i64 0) |
| %load219 = load <vscale x 8 x float>, ptr %alloca33, align 16 |
| %call220 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> %load219, i64 4) |
| %load221 = load <vscale x 8 x float>, ptr %alloca34, align 16 |
| %call222 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> %load221, i64 0) |
| store <vscale x 16 x i1> %call216, ptr %alloca8, align 2 |
| store <vscale x 16 x i1> %call218, ptr %alloca9, align 2 |
| store <vscale x 4 x float> %call220, ptr %alloca10, align 16 |
| store <vscale x 4 x float> %call222, ptr %alloca11, align 16 |
| %load223 = load <vscale x 16 x i1>, ptr %alloca8, align 2 |
| %load224 = load <vscale x 16 x i1>, ptr %alloca9, align 2 |
| %load225 = load <vscale x 4 x float>, ptr %alloca10, align 16 |
| %load226 = load <vscale x 4 x float>, ptr %alloca11, align 16 |
| %call227 = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %load223) |
| %call228 = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %load224) |
| call void @llvm.aarch64.sme.mopa.nxv4f32(i32 1, <vscale x 4 x i1> %call227, <vscale x 4 x i1> %call228, <vscale x 4 x float> %load225, <vscale x 4 x float> %load226) |
| %load229 = load <vscale x 32 x i1>, ptr %alloca31, align 2 |
| %call230 = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv32i1(<vscale x 32 x i1> %load229, i64 0) |
| %load231 = load <vscale x 32 x i1>, ptr %alloca32, align 2 |
| %call232 = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv32i1(<vscale x 32 x i1> %load231, i64 16) |
| %load233 = load <vscale x 8 x float>, ptr %alloca33, align 16 |
| %call234 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> %load233, i64 0) |
| %load235 = load <vscale x 8 x float>, ptr %alloca34, align 16 |
| %call236 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> %load235, i64 4) |
| store <vscale x 16 x i1> %call230, ptr %alloca4, align 2 |
| store <vscale x 16 x i1> %call232, ptr %alloca5, align 2 |
| store <vscale x 4 x float> %call234, ptr %alloca6, align 16 |
| store <vscale x 4 x float> %call236, ptr %alloca7, align 16 |
| %load237 = load <vscale x 16 x i1>, ptr %alloca4, align 2 |
| %load238 = load <vscale x 16 x i1>, ptr %alloca5, align 2 |
| %load239 = load <vscale x 4 x float>, ptr %alloca6, align 16 |
| %load240 = load <vscale x 4 x float>, ptr %alloca7, align 16 |
| %call241 = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %load237) |
| %call242 = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %load238) |
| call void @llvm.aarch64.sme.mopa.nxv4f32(i32 2, <vscale x 4 x i1> %call241, <vscale x 4 x i1> %call242, <vscale x 4 x float> %load239, <vscale x 4 x float> %load240) |
| %load243 = load <vscale x 32 x i1>, ptr %alloca31, align 2 |
| %call244 = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv32i1(<vscale x 32 x i1> %load243, i64 16) |
| %load245 = load <vscale x 32 x i1>, ptr %alloca32, align 2 |
| %call246 = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv32i1(<vscale x 32 x i1> %load245, i64 16) |
| %load247 = load <vscale x 8 x float>, ptr %alloca33, align 16 |
| %call248 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> %load247, i64 4) |
| %load249 = load <vscale x 8 x float>, ptr %alloca34, align 16 |
| %call250 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> %load249, i64 4) |
| store <vscale x 16 x i1> %call244, ptr %alloca, align 2 |
| store <vscale x 16 x i1> %call246, ptr %alloca1, align 2 |
| store <vscale x 4 x float> %call248, ptr %alloca2, align 16 |
| store <vscale x 4 x float> %call250, ptr %alloca3, align 16 |
| %load251 = load <vscale x 16 x i1>, ptr %alloca, align 2 |
| %load252 = load <vscale x 16 x i1>, ptr %alloca1, align 2 |
| %load253 = load <vscale x 4 x float>, ptr %alloca2, align 16 |
| %load254 = load <vscale x 4 x float>, ptr %alloca3, align 16 |
| %call255 = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %load251) |
| %call256 = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %load252) |
| call void @llvm.aarch64.sme.mopa.nxv4f32(i32 3, <vscale x 4 x i1> %call255, <vscale x 4 x i1> %call256, <vscale x 4 x float> %load253, <vscale x 4 x float> %load254) |
| %load257 = load i64, ptr %alloca24, align 8 |
| %add258 = add i64 %load257, -1 |
| store i64 %add258, ptr %alloca24, align 8 |
| br label %bb178 |
| } |
| |
| ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) |
| declare target("aarch64.svcount") @llvm.aarch64.sve.whilelt.c32(i64, i64, i32 immarg) #2 |
| |
| ; Function Attrs: nocallback nofree nosync nounwind willreturn |
| declare void @llvm.aarch64.sme.zero(i32 immarg) #3 |
| |
| ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) |
| declare { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.pext.x2.nxv4i1(target("aarch64.svcount"), i32 immarg) #2 |
| |
| ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) |
| declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>) #2 |
| |
| ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) |
| declare <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1>, <vscale x 16 x i1>, i64 immarg) #4 |
| |
| ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: read) |
| declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x2.nxv4f32(target("aarch64.svcount"), ptr) #5 |
| |
| ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) |
| declare <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float>, <vscale x 4 x float>, i64 immarg) #4 |
| |
| ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) |
| declare <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv32i1(<vscale x 32 x i1>, i64 immarg) #4 |
| |
| ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) |
| declare <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float>, i64 immarg) #4 |
| |
| ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) |
| declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>) #2 |
| |
| ; Function Attrs: nocallback nofree nosync nounwind willreturn |
| declare void @llvm.aarch64.sme.mopa.nxv4f32(i32 immarg, <vscale x 4 x i1>, <vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>) #3 |
| |
| ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) |
| declare target("aarch64.svcount") @llvm.aarch64.sve.whilelt.c8(i64, i64, i32 immarg) #2 |
| |
| ; Function Attrs: nocallback nofree nosync nounwind willreturn |
| declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32, i32) #3 |
| |
| ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) |
| declare <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8>, <vscale x 16 x i8>, i64 immarg) #4 |
| |
| ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) |
| declare <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8>, i64 immarg) #4 |
| |
| ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: write) |
| declare void @llvm.aarch64.sve.st1.pn.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, target("aarch64.svcount"), ptr) #6 |
| |
| attributes #0 = { cold noreturn nounwind } |
| attributes #1 = { mustprogress noinline optnone ssp uwtable(sync) vscale_range(1,16) "aarch64_new_za" "aarch64_pstate_sm_enabled" "frame-pointer"="non-leaf" "target-features"="+fp-armv8,+fullfp16,+sme,+sme-f64f64,+sme2" } |
| attributes #2 = { nocallback nofree nosync nounwind willreturn memory(none) } |
| attributes #3 = { nocallback nofree nosync nounwind willreturn } |
| attributes #4 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } |
| attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: read) } |
| attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) } |