| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 |
| ; RUN: llc -mtriple=aarch64 -mattr=+sve %s -o - | FileCheck %s |
| |
| define <vscale x 16 x i8> @i8_1v_4s(ptr %b) { |
| ; CHECK-LABEL: i8_1v_4s: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: rdvl x8, #1 |
| ; CHECK-NEXT: ptrue p0.b |
| ; CHECK-NEXT: mov w9, #4 // =0x4 |
| ; CHECK-NEXT: add x8, x0, x8 |
| ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8, x9] |
| ; CHECK-NEXT: ret |
| entry: |
| %0 = tail call i64 @llvm.vscale.i64() |
| %1 = shl nuw nsw i64 %0, 4 |
| %add.ptr = getelementptr inbounds i8, ptr %b, i64 %1 |
| %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 4 |
| %2 = load <vscale x 16 x i8>, ptr %add.ptr1, align 16 |
| ret <vscale x 16 x i8> %2 |
| } |
| |
| define <vscale x 16 x i8> @i8_4s_1v(ptr %b) { |
| ; CHECK-LABEL: i8_4s_1v: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: ptrue p0.b |
| ; CHECK-NEXT: add x8, x0, #4 |
| ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8, #1, mul vl] |
| ; CHECK-NEXT: ret |
| entry: |
| %add.ptr = getelementptr inbounds i8, ptr %b, i64 4 |
| %0 = tail call i64 @llvm.vscale.i64() |
| %1 = shl nuw nsw i64 %0, 4 |
| %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 %1 |
| %2 = load <vscale x 16 x i8>, ptr %add.ptr1, align 16 |
| ret <vscale x 16 x i8> %2 |
| } |
| |
| define <vscale x 8 x i16> @i16_1v_8s(ptr %b) { |
| ; CHECK-LABEL: i16_1v_8s: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: rdvl x8, #1 |
| ; CHECK-NEXT: ptrue p0.h |
| ; CHECK-NEXT: mov x9, #4 // =0x4 |
| ; CHECK-NEXT: add x8, x0, x8 |
| ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1] |
| ; CHECK-NEXT: ret |
| entry: |
| %0 = tail call i64 @llvm.vscale.i64() |
| %1 = shl nuw nsw i64 %0, 3 |
| %add.ptr = getelementptr inbounds i16, ptr %b, i64 %1 |
| %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 8 |
| %2 = load <vscale x 8 x i16>, ptr %add.ptr1, align 16 |
| ret <vscale x 8 x i16> %2 |
| } |
| |
| define <vscale x 8 x i16> @i16_8s_1v(ptr %b) { |
| ; CHECK-LABEL: i16_8s_1v: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: ptrue p0.h |
| ; CHECK-NEXT: add x8, x0, #8 |
| ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, #1, mul vl] |
| ; CHECK-NEXT: ret |
| entry: |
| %add.ptr = getelementptr inbounds i8, ptr %b, i64 8 |
| %0 = tail call i64 @llvm.vscale.i64() |
| %1 = shl nuw nsw i64 %0, 3 |
| %add.ptr1 = getelementptr inbounds i16, ptr %add.ptr, i64 %1 |
| %2 = load <vscale x 8 x i16>, ptr %add.ptr1, align 16 |
| ret <vscale x 8 x i16> %2 |
| } |
| |
| define <vscale x 8 x i16> @i16_2v_8s(ptr %b) { |
| ; CHECK-LABEL: i16_2v_8s: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: rdvl x8, #2 |
| ; CHECK-NEXT: ptrue p0.h |
| ; CHECK-NEXT: mov x9, #4 // =0x4 |
| ; CHECK-NEXT: add x8, x0, x8 |
| ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1] |
| ; CHECK-NEXT: ret |
| entry: |
| %0 = tail call i64 @llvm.vscale.i64() |
| %1 = shl nuw nsw i64 %0, 4 |
| %add.ptr = getelementptr inbounds i16, ptr %b, i64 %1 |
| %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 8 |
| %2 = load <vscale x 8 x i16>, ptr %add.ptr1, align 16 |
| ret <vscale x 8 x i16> %2 |
| } |
| |
| define <vscale x 8 x i16> @i16_8s_2v(ptr %b) { |
| ; CHECK-LABEL: i16_8s_2v: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: ptrue p0.h |
| ; CHECK-NEXT: add x8, x0, #8 |
| ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, #2, mul vl] |
| ; CHECK-NEXT: ret |
| entry: |
| %add.ptr = getelementptr inbounds i8, ptr %b, i64 8 |
| %0 = tail call i64 @llvm.vscale.i64() |
| %1 = shl nuw nsw i64 %0, 4 |
| %add.ptr1 = getelementptr inbounds i16, ptr %add.ptr, i64 %1 |
| %2 = load <vscale x 8 x i16>, ptr %add.ptr1, align 16 |
| ret <vscale x 8 x i16> %2 |
| } |
| |
| define <vscale x 4 x i32> @i32_1v_16s(ptr %b) { |
| ; CHECK-LABEL: i32_1v_16s: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: rdvl x8, #1 |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: mov x9, #4 // =0x4 |
| ; CHECK-NEXT: add x8, x0, x8 |
| ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2] |
| ; CHECK-NEXT: ret |
| entry: |
| %0 = tail call i64 @llvm.vscale.i64() |
| %1 = shl nuw nsw i64 %0, 2 |
| %add.ptr = getelementptr inbounds i32, ptr %b, i64 %1 |
| %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 16 |
| %2 = load <vscale x 4 x i32>, ptr %add.ptr1, align 16 |
| ret <vscale x 4 x i32> %2 |
| } |
| |
| define <vscale x 4 x i32> @i32_16s_2v(ptr %b) { |
| ; CHECK-LABEL: i32_16s_2v: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: add x8, x0, #16 |
| ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, #1, mul vl] |
| ; CHECK-NEXT: ret |
| entry: |
| %add.ptr = getelementptr inbounds i8, ptr %b, i64 16 |
| %0 = tail call i64 @llvm.vscale.i64() |
| %1 = shl nuw nsw i64 %0, 2 |
| %add.ptr1 = getelementptr inbounds i32, ptr %add.ptr, i64 %1 |
| %2 = load <vscale x 4 x i32>, ptr %add.ptr1, align 16 |
| ret <vscale x 4 x i32> %2 |
| } |
| |
| define <vscale x 2 x i64> @i64_1v_32s(ptr %b) { |
| ; CHECK-LABEL: i64_1v_32s: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: rdvl x8, #1 |
| ; CHECK-NEXT: ptrue p0.d |
| ; CHECK-NEXT: mov x9, #4 // =0x4 |
| ; CHECK-NEXT: add x8, x0, x8 |
| ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3] |
| ; CHECK-NEXT: ret |
| entry: |
| %0 = tail call i64 @llvm.vscale.i64() |
| %1 = shl nuw nsw i64 %0, 1 |
| %add.ptr = getelementptr inbounds i64, ptr %b, i64 %1 |
| %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 32 |
| %2 = load <vscale x 2 x i64>, ptr %add.ptr1, align 16 |
| ret <vscale x 2 x i64> %2 |
| } |
| |
| define <vscale x 2 x i64> @i64_32s_2v(ptr %b) { |
| ; CHECK-LABEL: i64_32s_2v: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: ptrue p0.d |
| ; CHECK-NEXT: add x8, x0, #32 |
| ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, #1, mul vl] |
| ; CHECK-NEXT: ret |
| entry: |
| %add.ptr = getelementptr inbounds i8, ptr %b, i64 32 |
| %0 = tail call i64 @llvm.vscale.i64() |
| %1 = shl nuw nsw i64 %0, 1 |
| %add.ptr1 = getelementptr inbounds i64, ptr %add.ptr, i64 %1 |
| %2 = load <vscale x 2 x i64>, ptr %add.ptr1, align 16 |
| ret <vscale x 2 x i64> %2 |
| } |
| |
| |
| define <vscale x 16 x i8> @i8_m2v_4s(ptr %b) { |
| ; CHECK-LABEL: i8_m2v_4s: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: cnth x8, all, mul #4 |
| ; CHECK-NEXT: ptrue p0.b |
| ; CHECK-NEXT: mov w9, #4 // =0x4 |
| ; CHECK-NEXT: sub x8, x0, x8 |
| ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8, x9] |
| ; CHECK-NEXT: ret |
| entry: |
| %0 = tail call i64 @llvm.vscale.i64() |
| %1 = mul i64 %0, -32 |
| %add.ptr = getelementptr inbounds i8, ptr %b, i64 %1 |
| %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 4 |
| %2 = load <vscale x 16 x i8>, ptr %add.ptr1, align 16 |
| ret <vscale x 16 x i8> %2 |
| } |
| |
| define <vscale x 16 x i8> @i8_4s_m2v(ptr %b) { |
| ; CHECK-LABEL: i8_4s_m2v: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: ptrue p0.b |
| ; CHECK-NEXT: add x8, x0, #4 |
| ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8, #-2, mul vl] |
| ; CHECK-NEXT: ret |
| entry: |
| %add.ptr = getelementptr inbounds i8, ptr %b, i64 4 |
| %0 = tail call i64 @llvm.vscale.i64() |
| %1 = mul i64 %0, -32 |
| %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 %1 |
| %2 = load <vscale x 16 x i8>, ptr %add.ptr1, align 16 |
| ret <vscale x 16 x i8> %2 |
| } |
| |
| define <vscale x 8 x i16> @i16_m2v_8s(ptr %b) { |
| ; CHECK-LABEL: i16_m2v_8s: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: cnth x8, all, mul #4 |
| ; CHECK-NEXT: ptrue p0.h |
| ; CHECK-NEXT: mov x9, #4 // =0x4 |
| ; CHECK-NEXT: sub x8, x0, x8 |
| ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1] |
| ; CHECK-NEXT: ret |
| entry: |
| %0 = tail call i64 @llvm.vscale.i64() |
| %1 = mul i64 %0, -32 |
| %add.ptr = getelementptr inbounds i8, ptr %b, i64 %1 |
| %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 8 |
| %2 = load <vscale x 8 x i16>, ptr %add.ptr1, align 16 |
| ret <vscale x 8 x i16> %2 |
| } |
| |
| define <vscale x 8 x i16> @i16_8s_m2v(ptr %b) { |
| ; CHECK-LABEL: i16_8s_m2v: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: ptrue p0.h |
| ; CHECK-NEXT: add x8, x0, #8 |
| ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, #-2, mul vl] |
| ; CHECK-NEXT: ret |
| entry: |
| %add.ptr = getelementptr inbounds i8, ptr %b, i64 8 |
| %0 = tail call i64 @llvm.vscale.i64() |
| %1 = mul i64 %0, -32 |
| %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 %1 |
| %2 = load <vscale x 8 x i16>, ptr %add.ptr1, align 16 |
| ret <vscale x 8 x i16> %2 |
| } |
| |
| define <vscale x 4 x i32> @i32_m2v_16s(ptr %b) { |
| ; CHECK-LABEL: i32_m2v_16s: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: cnth x8, all, mul #4 |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: mov x9, #4 // =0x4 |
| ; CHECK-NEXT: sub x8, x0, x8 |
| ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2] |
| ; CHECK-NEXT: ret |
| entry: |
| %0 = tail call i64 @llvm.vscale.i64() |
| %1 = mul i64 %0, -32 |
| %add.ptr = getelementptr inbounds i8, ptr %b, i64 %1 |
| %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 16 |
| %2 = load <vscale x 4 x i32>, ptr %add.ptr1, align 16 |
| ret <vscale x 4 x i32> %2 |
| } |
| |
| define <vscale x 4 x i32> @i32_16s_m2v(ptr %b) { |
| ; CHECK-LABEL: i32_16s_m2v: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: add x8, x0, #16 |
| ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, #-2, mul vl] |
| ; CHECK-NEXT: ret |
| entry: |
| %add.ptr = getelementptr inbounds i8, ptr %b, i64 16 |
| %0 = tail call i64 @llvm.vscale.i64() |
| %1 = mul i64 %0, -32 |
| %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 %1 |
| %2 = load <vscale x 4 x i32>, ptr %add.ptr1, align 16 |
| ret <vscale x 4 x i32> %2 |
| } |
| |
| define <vscale x 2 x i64> @i64_m2v_32s(ptr %b) { |
| ; CHECK-LABEL: i64_m2v_32s: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: cnth x8, all, mul #4 |
| ; CHECK-NEXT: ptrue p0.d |
| ; CHECK-NEXT: mov x9, #4 // =0x4 |
| ; CHECK-NEXT: sub x8, x0, x8 |
| ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3] |
| ; CHECK-NEXT: ret |
| entry: |
| %0 = tail call i64 @llvm.vscale.i64() |
| %1 = mul i64 %0, -32 |
| %add.ptr = getelementptr inbounds i8, ptr %b, i64 %1 |
| %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 32 |
| %2 = load <vscale x 2 x i64>, ptr %add.ptr1, align 16 |
| ret <vscale x 2 x i64> %2 |
| } |
| |
| define <vscale x 2 x i64> @i64_32s_m2v(ptr %b) { |
| ; CHECK-LABEL: i64_32s_m2v: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: ptrue p0.d |
| ; CHECK-NEXT: add x8, x0, #32 |
| ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, #-2, mul vl] |
| ; CHECK-NEXT: ret |
| entry: |
| %add.ptr = getelementptr inbounds i8, ptr %b, i64 32 |
| %0 = tail call i64 @llvm.vscale.i64() |
| %1 = mul i64 %0, -32 |
| %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 %1 |
| %2 = load <vscale x 2 x i64>, ptr %add.ptr1, align 16 |
| ret <vscale x 2 x i64> %2 |
| } |
| |
| declare i64 @llvm.vscale.i64() |