| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| ; RUN: opt -passes=vector-combine -mtriple=arm64-apple-darwinos -S %s | FileCheck --check-prefixes=CHECK,LIMIT-DEFAULT %s |
| ; RUN: opt -passes=vector-combine -mtriple=arm64-apple-darwinos -vector-combine-max-scan-instrs=2 -S %s | FileCheck --check-prefixes=CHECK,LIMIT2 %s |
| |
| define i32 @load_extract_idx_0(ptr %x) { |
| ; CHECK-LABEL: @load_extract_idx_0( |
| ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 3 |
| ; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP1]], align 4 |
| ; CHECK-NEXT: ret i32 [[R]] |
| ; |
| %lv = load <4 x i32>, ptr %x |
| %r = extractelement <4 x i32> %lv, i32 3 |
| ret i32 %r |
| } |
| |
| define i32 @vscale_load_extract_idx_0(ptr %x) { |
| ; CHECK-LABEL: @vscale_load_extract_idx_0( |
| ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <vscale x 4 x i32>, ptr [[X:%.*]], i32 0, i32 0 |
| ; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP1]], align 16 |
| ; CHECK-NEXT: ret i32 [[R]] |
| ; |
| %lv = load <vscale x 4 x i32>, ptr %x |
| %r = extractelement <vscale x 4 x i32> %lv, i32 0 |
| ret i32 %r |
| } |
| |
| ; If the original load had a smaller alignment than the scalar type, the |
| ; smaller alignment should be used. |
| define i32 @load_extract_idx_0_small_alignment(ptr %x) { |
| ; CHECK-LABEL: @load_extract_idx_0_small_alignment( |
| ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 3 |
| ; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP1]], align 2 |
| ; CHECK-NEXT: ret i32 [[R]] |
| ; |
| %lv = load <4 x i32>, ptr %x, align 2 |
| %r = extractelement <4 x i32> %lv, i32 3 |
| ret i32 %r |
| } |
| |
| define i32 @load_extract_idx_1(ptr %x) { |
| ; CHECK-LABEL: @load_extract_idx_1( |
| ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 1 |
| ; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP1]], align 4 |
| ; CHECK-NEXT: ret i32 [[R]] |
| ; |
| %lv = load <4 x i32>, ptr %x |
| %r = extractelement <4 x i32> %lv, i32 1 |
| ret i32 %r |
| } |
| |
| define i32 @load_extract_idx_2(ptr %x) { |
| ; CHECK-LABEL: @load_extract_idx_2( |
| ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 2 |
| ; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP1]], align 8 |
| ; CHECK-NEXT: ret i32 [[R]] |
| ; |
| %lv = load <4 x i32>, ptr %x |
| %r = extractelement <4 x i32> %lv, i32 2 |
| ret i32 %r |
| } |
| |
| define i32 @vscale_load_extract_idx_2(ptr %x) { |
| ; CHECK-LABEL: @vscale_load_extract_idx_2( |
| ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <vscale x 4 x i32>, ptr [[X:%.*]], i32 0, i32 2 |
| ; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP1]], align 8 |
| ; CHECK-NEXT: ret i32 [[R]] |
| ; |
| %lv = load <vscale x 4 x i32>, ptr %x |
| %r = extractelement <vscale x 4 x i32> %lv, i32 2 |
| ret i32 %r |
| } |
| |
| define i32 @load_extract_idx_3(ptr %x) { |
| ; CHECK-LABEL: @load_extract_idx_3( |
| ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 3 |
| ; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP1]], align 4 |
| ; CHECK-NEXT: ret i32 [[R]] |
| ; |
| %lv = load <4 x i32>, ptr %x |
| %r = extractelement <4 x i32> %lv, i32 3 |
| ret i32 %r |
| } |
| |
| ; Out-of-bounds index for extractelement, should not be converted to narrow |
| ; load, because it would introduce a dereference of a poison pointer. |
| define i32 @load_extract_idx_4(ptr %x) { |
| ; CHECK-LABEL: @load_extract_idx_4( |
| ; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 |
| ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 4 |
| ; CHECK-NEXT: ret i32 [[R]] |
| ; |
| %lv = load <4 x i32>, ptr %x |
| %r = extractelement <4 x i32> %lv, i32 4 |
| ret i32 %r |
| } |
| |
| define i32 @vscale_load_extract_idx_4(ptr %x) { |
| ; CHECK-LABEL: @vscale_load_extract_idx_4( |
| ; CHECK-NEXT: [[LV:%.*]] = load <vscale x 4 x i32>, ptr [[X:%.*]], align 16 |
| ; CHECK-NEXT: [[R:%.*]] = extractelement <vscale x 4 x i32> [[LV]], i32 4 |
| ; CHECK-NEXT: ret i32 [[R]] |
| ; |
| %lv = load <vscale x 4 x i32>, ptr %x |
| %r = extractelement <vscale x 4 x i32> %lv, i32 4 |
| ret i32 %r |
| } |
| |
| define i32 @load_extract_idx_var_i64(ptr %x, i64 %idx) { |
| ; CHECK-LABEL: @load_extract_idx_var_i64( |
| ; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 |
| ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX:%.*]] |
| ; CHECK-NEXT: ret i32 [[R]] |
| ; |
| %lv = load <4 x i32>, ptr %x |
| %r = extractelement <4 x i32> %lv, i64 %idx |
| ret i32 %r |
| } |
| |
| declare void @maythrow() readnone |
| |
| define i32 @load_extract_idx_var_i64_known_valid_by_assume(ptr %x, i64 %idx) { |
| ; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_assume( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX:%.*]], 4 |
| ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) |
| ; CHECK-NEXT: call void @maythrow() |
| ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX]] |
| ; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP0]], align 4 |
| ; CHECK-NEXT: ret i32 [[R]] |
| ; |
| entry: |
| %cmp = icmp ult i64 %idx, 4 |
| call void @llvm.assume(i1 %cmp) |
| %lv = load <4 x i32>, ptr %x |
| call void @maythrow() |
| %r = extractelement <4 x i32> %lv, i64 %idx |
| ret i32 %r |
| } |
| |
| define i32 @vscale_load_extract_idx_var_i64_known_valid_by_assume(ptr %x, i64 %idx) { |
| ; CHECK-LABEL: @vscale_load_extract_idx_var_i64_known_valid_by_assume( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX:%.*]], 4 |
| ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) |
| ; CHECK-NEXT: call void @maythrow() |
| ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <vscale x 4 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX]] |
| ; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP0]], align 4 |
| ; CHECK-NEXT: ret i32 [[R]] |
| ; |
| entry: |
| %cmp = icmp ult i64 %idx, 4 |
| call void @llvm.assume(i1 %cmp) |
| %lv = load <vscale x 4 x i32>, ptr %x |
| call void @maythrow() |
| %r = extractelement <vscale x 4 x i32> %lv, i64 %idx |
| ret i32 %r |
| } |
| |
| declare i1 @cond() |
| |
| define i32 @load_extract_idx_var_i64_known_valid_by_assume_in_dominating_block(ptr %x, i64 %idx, i1 %c.1) { |
| ; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_assume_in_dominating_block( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX:%.*]], 4 |
| ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) |
| ; CHECK-NEXT: br i1 [[C_1:%.*]], label [[LOOP:%.*]], label [[EXIT:%.*]] |
| ; CHECK: loop: |
| ; CHECK-NEXT: call void @maythrow() |
| ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX]] |
| ; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP0]], align 4 |
| ; CHECK-NEXT: [[C_2:%.*]] = call i1 @cond() |
| ; CHECK-NEXT: br i1 [[C_2]], label [[LOOP]], label [[EXIT]] |
| ; CHECK: exit: |
| ; CHECK-NEXT: [[P:%.*]] = phi i32 [ [[R]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] |
| ; CHECK-NEXT: ret i32 [[P]] |
| ; |
| entry: |
| %cmp = icmp ult i64 %idx, 4 |
| call void @llvm.assume(i1 %cmp) |
| br i1 %c.1, label %loop, label %exit |
| |
| loop: |
| %lv = load <4 x i32>, ptr %x |
| call void @maythrow() |
| %r = extractelement <4 x i32> %lv, i64 %idx |
| %c.2 = call i1 @cond() |
| br i1 %c.2, label %loop, label %exit |
| |
| exit: |
| %p = phi i32 [ %r, %loop ], [ 0, %entry ] |
| ret i32 %p |
| } |
| |
| define i32 @load_extract_idx_var_i64_known_valid_by_assume_in_non_dominating_block(ptr %x, i64 %idx, i1 %c.1, i1 %c.2) { |
| ; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_assume_in_non_dominating_block( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: br i1 [[C_1:%.*]], label [[ASSUME_CHECK:%.*]], label [[LOOP:%.*]] |
| ; CHECK: assume_check: |
| ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX:%.*]], 4 |
| ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) |
| ; CHECK-NEXT: br i1 [[C_2:%.*]], label [[LOOP]], label [[EXIT:%.*]] |
| ; CHECK: loop: |
| ; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 |
| ; CHECK-NEXT: call void @maythrow() |
| ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX]] |
| ; CHECK-NEXT: [[C_3:%.*]] = call i1 @cond() |
| ; CHECK-NEXT: br i1 [[C_3]], label [[LOOP]], label [[EXIT]] |
| ; CHECK: exit: |
| ; CHECK-NEXT: [[P:%.*]] = phi i32 [ [[R]], [[LOOP]] ], [ 0, [[ASSUME_CHECK]] ] |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| entry: |
| br i1 %c.1, label %assume_check, label %loop |
| |
| assume_check: |
| %cmp = icmp ult i64 %idx, 4 |
| call void @llvm.assume(i1 %cmp) |
| br i1 %c.2, label %loop, label %exit |
| |
| loop: |
| %lv = load <4 x i32>, ptr %x |
| call void @maythrow() |
| %r = extractelement <4 x i32> %lv, i64 %idx |
| %c.3 = call i1 @cond() |
| br i1 %c.3, label %loop, label %exit |
| |
| exit: |
| %p = phi i32 [ %r, %loop ], [ 0, %assume_check ] |
| ret i32 0 |
| } |
| |
| define i32 @load_extract_idx_var_i64_not_known_valid_by_assume_after_load(ptr %x, i64 %idx) { |
| ; CHECK-LABEL: @load_extract_idx_var_i64_not_known_valid_by_assume_after_load( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX:%.*]], 4 |
| ; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 |
| ; CHECK-NEXT: call void @maythrow() |
| ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) |
| ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX]] |
| ; CHECK-NEXT: ret i32 [[R]] |
| ; |
| entry: |
| %cmp = icmp ult i64 %idx, 4 |
| %lv = load <4 x i32>, ptr %x |
| call void @maythrow() |
| call void @llvm.assume(i1 %cmp) |
| %r = extractelement <4 x i32> %lv, i64 %idx |
| ret i32 %r |
| } |
| |
| define i32 @load_extract_idx_var_i64_not_known_valid_by_assume(ptr %x, i64 %idx) { |
| ; CHECK-LABEL: @load_extract_idx_var_i64_not_known_valid_by_assume( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX:%.*]], 5 |
| ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) |
| ; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 |
| ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX]] |
| ; CHECK-NEXT: ret i32 [[R]] |
| ; |
| entry: |
| %cmp = icmp ult i64 %idx, 5 |
| call void @llvm.assume(i1 %cmp) |
| %lv = load <4 x i32>, ptr %x |
| %r = extractelement <4 x i32> %lv, i64 %idx |
| ret i32 %r |
| } |
| |
| define i32 @vscale_load_extract_idx_var_i64_not_known_valid_by_assume_0(ptr %x, i64 %idx) { |
| ; CHECK-LABEL: @vscale_load_extract_idx_var_i64_not_known_valid_by_assume_0( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX:%.*]], 5 |
| ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) |
| ; CHECK-NEXT: [[LV:%.*]] = load <vscale x 4 x i32>, ptr [[X:%.*]], align 16 |
| ; CHECK-NEXT: [[R:%.*]] = extractelement <vscale x 4 x i32> [[LV]], i64 [[IDX]] |
| ; CHECK-NEXT: ret i32 [[R]] |
| ; |
| entry: |
| %cmp = icmp ult i64 %idx, 5 |
| call void @llvm.assume(i1 %cmp) |
| %lv = load <vscale x 4 x i32>, ptr %x |
| %r = extractelement <vscale x 4 x i32> %lv, i64 %idx |
| ret i32 %r |
| } |
| |
| define i32 @vscale_load_extract_idx_var_i64_not_known_valid_by_assume_1(ptr %x, i64 %idx) { |
| ; CHECK-LABEL: @vscale_load_extract_idx_var_i64_not_known_valid_by_assume_1( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[VS:%.*]] = call i64 @llvm.vscale.i64() |
| ; CHECK-NEXT: [[VM:%.*]] = mul i64 [[VS]], 4 |
| ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX:%.*]], [[VM]] |
| ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) |
| ; CHECK-NEXT: [[LV:%.*]] = load <vscale x 4 x i32>, ptr [[X:%.*]], align 16 |
| ; CHECK-NEXT: [[R:%.*]] = extractelement <vscale x 4 x i32> [[LV]], i64 [[IDX]] |
| ; CHECK-NEXT: ret i32 [[R]] |
| ; |
| entry: |
| %vs = call i64 @llvm.vscale.i64() |
| %vm = mul i64 %vs, 4 |
| %cmp = icmp ult i64 %idx, %vm |
| call void @llvm.assume(i1 %cmp) |
| %lv = load <vscale x 4 x i32>, ptr %x |
| %r = extractelement <vscale x 4 x i32> %lv, i64 %idx |
| ret i32 %r |
| } |
| |
| declare i64 @llvm.vscale.i64() |
| declare void @llvm.assume(i1) |
| |
| define i32 @load_extract_idx_var_i64_known_valid_by_and(ptr %x, i64 %idx) { |
| ; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_and( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[IDX_FROZEN:%.*]] = freeze i64 [[IDX:%.*]] |
| ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i64 [[IDX_FROZEN]], 3 |
| ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX_CLAMPED]] |
| ; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP0]], align 4 |
| ; CHECK-NEXT: ret i32 [[R]] |
| ; |
| entry: |
| %idx.clamped = and i64 %idx, 3 |
| %lv = load <4 x i32>, ptr %x |
| %r = extractelement <4 x i32> %lv, i64 %idx.clamped |
| ret i32 %r |
| } |
| |
| define i32 @vscale_load_extract_idx_var_i64_known_valid_by_and(ptr %x, i64 %idx) { |
| ; CHECK-LABEL: @vscale_load_extract_idx_var_i64_known_valid_by_and( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[IDX_FROZEN:%.*]] = freeze i64 [[IDX:%.*]] |
| ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i64 [[IDX_FROZEN]], 3 |
| ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <vscale x 4 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX_CLAMPED]] |
| ; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP0]], align 4 |
| ; CHECK-NEXT: ret i32 [[R]] |
| ; |
| entry: |
| %idx.clamped = and i64 %idx, 3 |
| %lv = load <vscale x 4 x i32>, ptr %x |
| %r = extractelement <vscale x 4 x i32> %lv, i64 %idx.clamped |
| ret i32 %r |
| } |
| |
| define i32 @load_extract_idx_var_i64_known_valid_by_and_noundef(ptr %x, i64 noundef %idx) { |
| ; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_and_noundef( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i64 [[IDX:%.*]], 3 |
| ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX_CLAMPED]] |
| ; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP0]], align 4 |
| ; CHECK-NEXT: ret i32 [[R]] |
| ; |
| entry: |
| %idx.clamped = and i64 %idx, 3 |
| %lv = load <4 x i32>, ptr %x |
| %r = extractelement <4 x i32> %lv, i64 %idx.clamped |
| ret i32 %r |
| } |
| |
| define i32 @load_extract_idx_var_i64_not_known_valid_by_and(ptr %x, i64 %idx) { |
| ; CHECK-LABEL: @load_extract_idx_var_i64_not_known_valid_by_and( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i64 [[IDX:%.*]], 4 |
| ; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 |
| ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_CLAMPED]] |
| ; CHECK-NEXT: ret i32 [[R]] |
| ; |
| entry: |
| %idx.clamped = and i64 %idx, 4 |
| %lv = load <4 x i32>, ptr %x |
| %r = extractelement <4 x i32> %lv, i64 %idx.clamped |
| ret i32 %r |
| } |
| |
| define i32 @vscale_load_extract_idx_var_i64_not_known_valid_by_and(ptr %x, i64 %idx) { |
| ; CHECK-LABEL: @vscale_load_extract_idx_var_i64_not_known_valid_by_and( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i64 [[IDX:%.*]], 4 |
| ; CHECK-NEXT: [[LV:%.*]] = load <vscale x 4 x i32>, ptr [[X:%.*]], align 16 |
| ; CHECK-NEXT: [[R:%.*]] = extractelement <vscale x 4 x i32> [[LV]], i64 [[IDX_CLAMPED]] |
| ; CHECK-NEXT: ret i32 [[R]] |
| ; |
| entry: |
| %idx.clamped = and i64 %idx, 4 |
| %lv = load <vscale x 4 x i32>, ptr %x |
| %r = extractelement <vscale x 4 x i32> %lv, i64 %idx.clamped |
| ret i32 %r |
| } |
| |
| define i32 @load_extract_idx_var_i64_known_valid_by_urem(ptr %x, i64 %idx) { |
| ; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_urem( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[IDX_FROZEN:%.*]] = freeze i64 [[IDX:%.*]] |
| ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i64 [[IDX_FROZEN]], 4 |
| ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX_CLAMPED]] |
| ; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP0]], align 4 |
| ; CHECK-NEXT: ret i32 [[R]] |
| ; |
| entry: |
| %idx.clamped = urem i64 %idx, 4 |
| %lv = load <4 x i32>, ptr %x |
| %r = extractelement <4 x i32> %lv, i64 %idx.clamped |
| ret i32 %r |
| } |
| |
| define i32 @vscale_load_extract_idx_var_i64_known_valid_by_urem(ptr %x, i64 %idx) { |
| ; CHECK-LABEL: @vscale_load_extract_idx_var_i64_known_valid_by_urem( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[IDX_FROZEN:%.*]] = freeze i64 [[IDX:%.*]] |
| ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i64 [[IDX_FROZEN]], 4 |
| ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <vscale x 4 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX_CLAMPED]] |
| ; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP0]], align 4 |
| ; CHECK-NEXT: ret i32 [[R]] |
| ; |
| entry: |
| %idx.clamped = urem i64 %idx, 4 |
| %lv = load <vscale x 4 x i32>, ptr %x |
| %r = extractelement <vscale x 4 x i32> %lv, i64 %idx.clamped |
| ret i32 %r |
| } |
| |
| define i32 @load_extract_idx_var_i64_known_valid_by_urem_noundef(ptr %x, i64 noundef %idx) { |
| ; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_urem_noundef( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i64 [[IDX:%.*]], 4 |
| ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX_CLAMPED]] |
| ; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP0]], align 4 |
| ; CHECK-NEXT: ret i32 [[R]] |
| ; |
| entry: |
| %idx.clamped = urem i64 %idx, 4 |
| %lv = load <4 x i32>, ptr %x |
| %r = extractelement <4 x i32> %lv, i64 %idx.clamped |
| ret i32 %r |
| } |
| |
| define i32 @load_extract_idx_var_i64_not_known_valid_by_urem(ptr %x, i64 %idx) { |
| ; CHECK-LABEL: @load_extract_idx_var_i64_not_known_valid_by_urem( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i64 [[IDX:%.*]], 5 |
| ; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 |
| ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_CLAMPED]] |
| ; CHECK-NEXT: ret i32 [[R]] |
| ; |
| entry: |
| %idx.clamped = urem i64 %idx, 5 |
| %lv = load <4 x i32>, ptr %x |
| %r = extractelement <4 x i32> %lv, i64 %idx.clamped |
| ret i32 %r |
| } |
| |
| define i32 @vscale_load_extract_idx_var_i64_not_known_valid_by_urem(ptr %x, i64 %idx) { |
| ; CHECK-LABEL: @vscale_load_extract_idx_var_i64_not_known_valid_by_urem( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i64 [[IDX:%.*]], 5 |
| ; CHECK-NEXT: [[LV:%.*]] = load <vscale x 4 x i32>, ptr [[X:%.*]], align 16 |
| ; CHECK-NEXT: [[R:%.*]] = extractelement <vscale x 4 x i32> [[LV]], i64 [[IDX_CLAMPED]] |
| ; CHECK-NEXT: ret i32 [[R]] |
| ; |
| entry: |
| %idx.clamped = urem i64 %idx, 5 |
| %lv = load <vscale x 4 x i32>, ptr %x |
| %r = extractelement <vscale x 4 x i32> %lv, i64 %idx.clamped |
| ret i32 %r |
| } |
| |
| define i32 @load_extract_idx_var_i32(ptr %x, i32 %idx) { |
| ; CHECK-LABEL: @load_extract_idx_var_i32( |
| ; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 |
| ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 [[IDX:%.*]] |
| ; CHECK-NEXT: ret i32 [[R]] |
| ; |
| %lv = load <4 x i32>, ptr %x |
| %r = extractelement <4 x i32> %lv, i32 %idx |
| ret i32 %r |
| } |
| |
| declare void @clobber() |
| |
| define i32 @load_extract_clobber_call_before(ptr %x) { |
| ; CHECK-LABEL: @load_extract_clobber_call_before( |
| ; CHECK-NEXT: call void @clobber() |
| ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 2 |
| ; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP1]], align 8 |
| ; CHECK-NEXT: ret i32 [[R]] |
| ; |
| call void @clobber() |
| %lv = load <4 x i32>, ptr %x |
| %r = extractelement <4 x i32> %lv, i32 2 |
| ret i32 %r |
| } |
| |
| define i32 @load_extract_clobber_call_between(ptr %x) { |
| ; CHECK-LABEL: @load_extract_clobber_call_between( |
| ; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 |
| ; CHECK-NEXT: call void @clobber() |
| ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 2 |
| ; CHECK-NEXT: ret i32 [[R]] |
| ; |
| %lv = load <4 x i32>, ptr %x |
| call void @clobber() |
| %r = extractelement <4 x i32> %lv, i32 2 |
| ret i32 %r |
| } |
| |
| define i32 @load_extract_clobber_call_after(ptr %x) { |
| ; CHECK-LABEL: @load_extract_clobber_call_after( |
| ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 2 |
| ; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP1]], align 8 |
| ; CHECK-NEXT: call void @clobber() |
| ; CHECK-NEXT: ret i32 [[R]] |
| ; |
| %lv = load <4 x i32>, ptr %x |
| %r = extractelement <4 x i32> %lv, i32 2 |
| call void @clobber() |
| ret i32 %r |
| } |
| |
| define i32 @load_extract_clobber_store_before(ptr %x, ptr %y) { |
| ; CHECK-LABEL: @load_extract_clobber_store_before( |
| ; CHECK-NEXT: store i8 0, ptr [[Y:%.*]], align 1 |
| ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 2 |
| ; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP1]], align 8 |
| ; CHECK-NEXT: ret i32 [[R]] |
| ; |
| store i8 0, ptr %y |
| %lv = load <4 x i32>, ptr %x |
| %r = extractelement <4 x i32> %lv, i32 2 |
| ret i32 %r |
| } |
| |
| define i32 @load_extract_clobber_store_between(ptr %x, ptr %y) { |
| ; CHECK-LABEL: @load_extract_clobber_store_between( |
| ; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 |
| ; CHECK-NEXT: store i8 0, ptr [[Y:%.*]], align 1 |
| ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 2 |
| ; CHECK-NEXT: ret i32 [[R]] |
| ; |
| %lv = load <4 x i32>, ptr %x |
| store i8 0, ptr %y |
| %r = extractelement <4 x i32> %lv, i32 2 |
| ret i32 %r |
| } |
| |
| define i32 @load_extract_clobber_store_between_limit(ptr %x, ptr %y, <8 x i32> %z) { |
| ; CHECK-LABEL: @load_extract_clobber_store_between_limit( |
| ; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 |
| ; CHECK-NEXT: [[Z_0:%.*]] = extractelement <8 x i32> [[Z:%.*]], i32 0 |
| ; CHECK-NEXT: [[Z_1:%.*]] = extractelement <8 x i32> [[Z]], i32 1 |
| ; CHECK-NEXT: [[ADD_0:%.*]] = add i32 [[Z_0]], [[Z_1]] |
| ; CHECK-NEXT: [[Z_2:%.*]] = extractelement <8 x i32> [[Z]], i32 2 |
| ; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[ADD_0]], [[Z_2]] |
| ; CHECK-NEXT: [[Z_3:%.*]] = extractelement <8 x i32> [[Z]], i32 3 |
| ; CHECK-NEXT: [[ADD_2:%.*]] = add i32 [[ADD_1]], [[Z_3]] |
| ; CHECK-NEXT: [[Z_4:%.*]] = extractelement <8 x i32> [[Z]], i32 4 |
| ; CHECK-NEXT: [[ADD_3:%.*]] = add i32 [[ADD_2]], [[Z_4]] |
| ; CHECK-NEXT: store i8 0, ptr [[Y:%.*]], align 1 |
| ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 2 |
| ; CHECK-NEXT: [[ADD_4:%.*]] = add i32 [[ADD_3]], [[R]] |
| ; CHECK-NEXT: ret i32 [[ADD_4]] |
| ; |
| %lv = load <4 x i32>, ptr %x |
| %z.0 = extractelement <8 x i32> %z, i32 0 |
| %z.1 = extractelement <8 x i32> %z, i32 1 |
| %add.0 = add i32 %z.0, %z.1 |
| %z.2 = extractelement <8 x i32> %z, i32 2 |
| %add.1 = add i32 %add.0, %z.2 |
| %z.3 = extractelement <8 x i32> %z, i32 3 |
| %add.2 = add i32 %add.1, %z.3 |
| %z.4 = extractelement <8 x i32> %z, i32 4 |
| %add.3 = add i32 %add.2, %z.4 |
| store i8 0, ptr %y |
| %r = extractelement <4 x i32> %lv, i32 2 |
| %add.4 = add i32 %add.3, %r |
| ret i32 %add.4 |
| } |
| |
| define i32 @load_extract_clobber_store_after_limit(ptr %x, ptr %y, <8 x i32> %z) { |
| ; LIMIT-DEFAULT-LABEL: @load_extract_clobber_store_after_limit( |
| ; LIMIT-DEFAULT-NEXT: [[Z_0:%.*]] = extractelement <8 x i32> [[Z:%.*]], i32 0 |
| ; LIMIT-DEFAULT-NEXT: [[Z_1:%.*]] = extractelement <8 x i32> [[Z]], i32 1 |
| ; LIMIT-DEFAULT-NEXT: [[ADD_0:%.*]] = add i32 [[Z_0]], [[Z_1]] |
| ; LIMIT-DEFAULT-NEXT: [[Z_2:%.*]] = extractelement <8 x i32> [[Z]], i32 2 |
| ; LIMIT-DEFAULT-NEXT: [[ADD_1:%.*]] = add i32 [[ADD_0]], [[Z_2]] |
| ; LIMIT-DEFAULT-NEXT: [[Z_3:%.*]] = extractelement <8 x i32> [[Z]], i32 3 |
| ; LIMIT-DEFAULT-NEXT: [[ADD_2:%.*]] = add i32 [[ADD_1]], [[Z_3]] |
| ; LIMIT-DEFAULT-NEXT: [[Z_4:%.*]] = extractelement <8 x i32> [[Z]], i32 4 |
| ; LIMIT-DEFAULT-NEXT: [[ADD_3:%.*]] = add i32 [[ADD_2]], [[Z_4]] |
| ; LIMIT-DEFAULT-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 2 |
| ; LIMIT-DEFAULT-NEXT: [[R:%.*]] = load i32, ptr [[TMP1]], align 8 |
| ; LIMIT-DEFAULT-NEXT: store i8 0, ptr [[Y:%.*]], align 1 |
| ; LIMIT-DEFAULT-NEXT: [[ADD_4:%.*]] = add i32 [[ADD_3]], [[R]] |
| ; LIMIT-DEFAULT-NEXT: ret i32 [[ADD_4]] |
| ; |
| ; LIMIT2-LABEL: @load_extract_clobber_store_after_limit( |
| ; LIMIT2-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 |
| ; LIMIT2-NEXT: [[Z_0:%.*]] = extractelement <8 x i32> [[Z:%.*]], i32 0 |
| ; LIMIT2-NEXT: [[Z_1:%.*]] = extractelement <8 x i32> [[Z]], i32 1 |
| ; LIMIT2-NEXT: [[ADD_0:%.*]] = add i32 [[Z_0]], [[Z_1]] |
| ; LIMIT2-NEXT: [[Z_2:%.*]] = extractelement <8 x i32> [[Z]], i32 2 |
| ; LIMIT2-NEXT: [[ADD_1:%.*]] = add i32 [[ADD_0]], [[Z_2]] |
| ; LIMIT2-NEXT: [[Z_3:%.*]] = extractelement <8 x i32> [[Z]], i32 3 |
| ; LIMIT2-NEXT: [[ADD_2:%.*]] = add i32 [[ADD_1]], [[Z_3]] |
| ; LIMIT2-NEXT: [[Z_4:%.*]] = extractelement <8 x i32> [[Z]], i32 4 |
| ; LIMIT2-NEXT: [[ADD_3:%.*]] = add i32 [[ADD_2]], [[Z_4]] |
| ; LIMIT2-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 2 |
| ; LIMIT2-NEXT: store i8 0, ptr [[Y:%.*]], align 1 |
| ; LIMIT2-NEXT: [[ADD_4:%.*]] = add i32 [[ADD_3]], [[R]] |
| ; LIMIT2-NEXT: ret i32 [[ADD_4]] |
| ; |
| %lv = load <4 x i32>, ptr %x |
| %z.0 = extractelement <8 x i32> %z, i32 0 |
| %z.1 = extractelement <8 x i32> %z, i32 1 |
| %add.0 = add i32 %z.0, %z.1 |
| %z.2 = extractelement <8 x i32> %z, i32 2 |
| %add.1 = add i32 %add.0, %z.2 |
| %z.3 = extractelement <8 x i32> %z, i32 3 |
| %add.2 = add i32 %add.1, %z.3 |
| %z.4 = extractelement <8 x i32> %z, i32 4 |
| %add.3 = add i32 %add.2, %z.4 |
| %r = extractelement <4 x i32> %lv, i32 2 |
| store i8 0, ptr %y |
| %add.4 = add i32 %add.3, %r |
| ret i32 %add.4 |
| } |
| |
| declare void @use.v4i32(<4 x i32>) |
| |
| define i32 @load_extract_idx_different_bbs(ptr %x, i1 %c) { |
| ; CHECK-LABEL: @load_extract_idx_different_bbs( |
| ; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 |
| ; CHECK-NEXT: br i1 [[C:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]] |
| ; CHECK: then: |
| ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 1 |
| ; CHECK-NEXT: ret i32 [[R]] |
| ; CHECK: else: |
| ; CHECK-NEXT: call void @use.v4i32(<4 x i32> [[LV]]) |
| ; CHECK-NEXT: ret i32 20 |
| ; |
| %lv = load <4 x i32>, ptr %x |
| br i1 %c, label %then, label %else |
| |
| then: |
| %r = extractelement <4 x i32> %lv, i32 1 |
| ret i32 %r |
| |
| else: |
| call void @use.v4i32(<4 x i32> %lv) |
| ret i32 20 |
| } |
| |
| define i31 @load_with_non_power_of_2_element_type(ptr %x) { |
| ; CHECK-LABEL: @load_with_non_power_of_2_element_type( |
| ; CHECK-NEXT: [[LV:%.*]] = load <4 x i31>, ptr [[X:%.*]], align 16 |
| ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i31> [[LV]], i32 1 |
| ; CHECK-NEXT: ret i31 [[R]] |
| ; |
| %lv = load <4 x i31>, ptr %x |
| %r = extractelement <4 x i31> %lv, i32 1 |
| ret i31 %r |
| } |
| |
| define i1 @load_with_non_power_of_2_element_type_2(ptr %x) { |
| ; CHECK-LABEL: @load_with_non_power_of_2_element_type_2( |
| ; CHECK-NEXT: [[LV:%.*]] = load <8 x i1>, ptr [[X:%.*]], align 1 |
| ; CHECK-NEXT: [[R:%.*]] = extractelement <8 x i1> [[LV]], i32 1 |
| ; CHECK-NEXT: ret i1 [[R]] |
| ; |
| %lv = load <8 x i1>, ptr %x |
| %r = extractelement <8 x i1> %lv, i32 1 |
| ret i1 %r |
| } |
| |
| ; Scalarizing the load for multiple constant indices may not be profitable. |
| define i32 @load_multiple_extracts_with_constant_idx(ptr %x) { |
| ; CHECK-LABEL: @load_multiple_extracts_with_constant_idx( |
| ; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 |
| ; CHECK-NEXT: [[E_0:%.*]] = extractelement <4 x i32> [[LV]], i32 0 |
| ; CHECK-NEXT: [[E_1:%.*]] = extractelement <4 x i32> [[LV]], i32 1 |
| ; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]] |
| ; CHECK-NEXT: ret i32 [[RES]] |
| ; |
| %lv = load <4 x i32>, ptr %x |
| %e.0 = extractelement <4 x i32> %lv, i32 0 |
| %e.1 = extractelement <4 x i32> %lv, i32 1 |
| %res = add i32 %e.0, %e.1 |
| ret i32 %res |
| } |
| |
| ; Scalarizing the load for multiple extracts is profitable in this case, |
| ; because the vector large vector requires 2 vector registers. |
| define i32 @load_multiple_extracts_with_constant_idx_profitable(ptr %x) { |
| ; CHECK-LABEL: @load_multiple_extracts_with_constant_idx_profitable( |
| ; CHECK-NEXT: [[LV:%.*]] = load <8 x i32>, ptr [[X:%.*]], align 16 |
| ; CHECK-NEXT: [[E_0:%.*]] = extractelement <8 x i32> [[LV]], i32 0 |
| ; CHECK-NEXT: [[E_1:%.*]] = extractelement <8 x i32> [[LV]], i32 6 |
| ; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]] |
| ; CHECK-NEXT: ret i32 [[RES]] |
| ; |
| %lv = load <8 x i32>, ptr %x, align 16 |
| %e.0 = extractelement <8 x i32> %lv, i32 0 |
| %e.1 = extractelement <8 x i32> %lv, i32 6 |
| %res = add i32 %e.0, %e.1 |
| ret i32 %res |
| } |
| |
| ; Scalarizing may or may not be profitable, depending on the target. |
| define i32 @load_multiple_2_with_variable_indices(ptr %x, i64 %idx.0, i64 %idx.1) { |
| ; CHECK-LABEL: @load_multiple_2_with_variable_indices( |
| ; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 |
| ; CHECK-NEXT: [[E_0:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_0:%.*]] |
| ; CHECK-NEXT: [[E_1:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_1:%.*]] |
| ; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]] |
| ; CHECK-NEXT: ret i32 [[RES]] |
| ; |
| %lv = load <4 x i32>, ptr %x |
| %e.0 = extractelement <4 x i32> %lv, i64 %idx.0 |
| %e.1 = extractelement <4 x i32> %lv, i64 %idx.1 |
| %res = add i32 %e.0, %e.1 |
| ret i32 %res |
| } |
| |
| define i32 @load_4_extracts_with_variable_indices_short_vector(ptr %x, i64 %idx.0, i64 %idx.1, i64 %idx.2, i64 %idx.3) { |
| ; CHECK-LABEL: @load_4_extracts_with_variable_indices_short_vector( |
| ; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 |
| ; CHECK-NEXT: [[E_0:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_0:%.*]] |
| ; CHECK-NEXT: [[E_1:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_1:%.*]] |
| ; CHECK-NEXT: [[E_2:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_2:%.*]] |
| ; CHECK-NEXT: [[E_3:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_3:%.*]] |
| ; CHECK-NEXT: [[RES_0:%.*]] = add i32 [[E_0]], [[E_1]] |
| ; CHECK-NEXT: [[RES_1:%.*]] = add i32 [[RES_0]], [[E_2]] |
| ; CHECK-NEXT: [[RES_2:%.*]] = add i32 [[RES_1]], [[E_3]] |
| ; CHECK-NEXT: ret i32 [[RES_2]] |
| ; |
| %lv = load <4 x i32>, ptr %x |
| %e.0 = extractelement <4 x i32> %lv, i64 %idx.0 |
| %e.1 = extractelement <4 x i32> %lv, i64 %idx.1 |
| %e.2 = extractelement <4 x i32> %lv, i64 %idx.2 |
| %e.3 = extractelement <4 x i32> %lv, i64 %idx.3 |
| %res.0 = add i32 %e.0, %e.1 |
| %res.1 = add i32 %res.0, %e.2 |
| %res.2 = add i32 %res.1, %e.3 |
| ret i32 %res.2 |
| } |
| |
| define i32 @load_multiple_extracts_with_variable_indices_large_vector_only_first_valid(ptr %x, i64 %idx.0, i64 %idx.1) { |
| ; CHECK-LABEL: @load_multiple_extracts_with_variable_indices_large_vector_only_first_valid( |
| ; CHECK-NEXT: [[CMP_IDX_0:%.*]] = icmp ult i64 [[IDX_0:%.*]], 16 |
| ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_IDX_0]]) |
| ; CHECK-NEXT: [[LV:%.*]] = load <16 x i32>, ptr [[X:%.*]], align 64 |
| ; CHECK-NEXT: [[E_0:%.*]] = extractelement <16 x i32> [[LV]], i64 [[IDX_0]] |
| ; CHECK-NEXT: [[E_1:%.*]] = extractelement <16 x i32> [[LV]], i64 [[IDX_1:%.*]] |
| ; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]] |
| ; CHECK-NEXT: ret i32 [[RES]] |
| ; |
| %cmp.idx.0 = icmp ult i64 %idx.0, 16 |
| call void @llvm.assume(i1 %cmp.idx.0) |
| |
| %lv = load <16 x i32>, ptr %x |
| %e.0 = extractelement <16 x i32> %lv, i64 %idx.0 |
| %e.1 = extractelement <16 x i32> %lv, i64 %idx.1 |
| %res = add i32 %e.0, %e.1 |
| ret i32 %res |
| } |
| |
| define i32 @load_multiple_extracts_with_variable_indices_large_vector_only_all_valid(ptr %x, i64 %idx.0, i64 %idx.1) { |
| ; CHECK-LABEL: @load_multiple_extracts_with_variable_indices_large_vector_only_all_valid( |
| ; CHECK-NEXT: [[CMP_IDX_0:%.*]] = icmp ult i64 [[IDX_0:%.*]], 16 |
| ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_IDX_0]]) |
| ; CHECK-NEXT: [[CMP_IDX_1:%.*]] = icmp ult i64 [[IDX_1:%.*]], 16 |
| ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_IDX_1]]) |
| ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <16 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX_0]] |
| ; CHECK-NEXT: [[E_0:%.*]] = load i32, ptr [[TMP1]], align 4 |
| ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <16 x i32>, ptr [[X]], i32 0, i64 [[IDX_1]] |
| ; CHECK-NEXT: [[E_1:%.*]] = load i32, ptr [[TMP2]], align 4 |
| ; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]] |
| ; CHECK-NEXT: ret i32 [[RES]] |
| ; |
| %cmp.idx.0 = icmp ult i64 %idx.0, 16 |
| call void @llvm.assume(i1 %cmp.idx.0) |
| %cmp.idx.1 = icmp ult i64 %idx.1, 16 |
| call void @llvm.assume(i1 %cmp.idx.1) |
| |
| %lv = load <16 x i32>, ptr %x |
| %e.0 = extractelement <16 x i32> %lv, i64 %idx.0 |
| %e.1 = extractelement <16 x i32> %lv, i64 %idx.1 |
| %res = add i32 %e.0, %e.1 |
| ret i32 %res |
| } |
| |
| define i32 @load_multiple_extracts_with_variable_indices_large_vector_only_first_valid_by_and(ptr %x, i64 %idx.0, i64 %idx.1) { |
| ; CHECK-LABEL: @load_multiple_extracts_with_variable_indices_large_vector_only_first_valid_by_and( |
| ; CHECK-NEXT: [[IDX_0_CLAMPED:%.*]] = and i64 [[IDX_0:%.*]], 15 |
| ; CHECK-NEXT: [[LV:%.*]] = load <16 x i32>, ptr [[X:%.*]], align 64 |
| ; CHECK-NEXT: [[E_0:%.*]] = extractelement <16 x i32> [[LV]], i64 [[IDX_0_CLAMPED]] |
| ; CHECK-NEXT: [[E_1:%.*]] = extractelement <16 x i32> [[LV]], i64 [[IDX_1:%.*]] |
| ; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]] |
| ; CHECK-NEXT: ret i32 [[RES]] |
| ; |
| %idx.0.clamped = and i64 %idx.0, 15 |
| |
| %lv = load <16 x i32>, ptr %x |
| %e.0 = extractelement <16 x i32> %lv, i64 %idx.0.clamped |
| %e.1 = extractelement <16 x i32> %lv, i64 %idx.1 |
| %res = add i32 %e.0, %e.1 |
| ret i32 %res |
| } |
| |
| define i32 @load_multiple_extracts_with_variable_indices_large_vector_all_valid_by_and(ptr %x, i64 %idx.0, i64 %idx.1) { |
| ; CHECK-LABEL: @load_multiple_extracts_with_variable_indices_large_vector_all_valid_by_and( |
| ; CHECK-NEXT: [[IDX_0_FROZEN:%.*]] = freeze i64 [[IDX_0:%.*]] |
| ; CHECK-NEXT: [[IDX_0_CLAMPED:%.*]] = and i64 [[IDX_0_FROZEN]], 15 |
| ; CHECK-NEXT: [[IDX_1_FROZEN:%.*]] = freeze i64 [[IDX_1:%.*]] |
| ; CHECK-NEXT: [[IDX_1_CLAMPED:%.*]] = and i64 [[IDX_1_FROZEN]], 15 |
| ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <16 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX_0_CLAMPED]] |
| ; CHECK-NEXT: [[E_0:%.*]] = load i32, ptr [[TMP1]], align 4 |
| ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <16 x i32>, ptr [[X]], i32 0, i64 [[IDX_1_CLAMPED]] |
| ; CHECK-NEXT: [[E_1:%.*]] = load i32, ptr [[TMP2]], align 4 |
| ; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]] |
| ; CHECK-NEXT: ret i32 [[RES]] |
| ; |
| %idx.0.clamped = and i64 %idx.0, 15 |
| %idx.1.clamped = and i64 %idx.1, 15 |
| |
| %lv = load <16 x i32>, ptr %x |
| %e.0 = extractelement <16 x i32> %lv, i64 %idx.0.clamped |
| %e.1 = extractelement <16 x i32> %lv, i64 %idx.1.clamped |
| %res = add i32 %e.0, %e.1 |
| ret i32 %res |
| } |
| |
| define i32 @load_multiple_extracts_with_unique_variable_indices_large_vector_valid_by_and(ptr %x, ptr %y, i64 %idx) { |
| ; LIMIT-DEFAULT-LABEL: @load_multiple_extracts_with_unique_variable_indices_large_vector_valid_by_and( |
| ; LIMIT-DEFAULT-NEXT: [[IDX_FROZEN:%.*]] = freeze i64 [[IDX:%.*]] |
| ; LIMIT-DEFAULT-NEXT: [[IDX_CLAMPED:%.*]] = and i64 [[IDX_FROZEN]], 15 |
| ; LIMIT-DEFAULT-NEXT: [[TMP1:%.*]] = getelementptr inbounds <16 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX_CLAMPED]] |
| ; LIMIT-DEFAULT-NEXT: [[E_0:%.*]] = load i32, ptr [[TMP1]], align 4 |
| ; LIMIT-DEFAULT-NEXT: [[TMP2:%.*]] = getelementptr inbounds <16 x i32>, ptr [[Y:%.*]], i32 0, i64 [[IDX_CLAMPED]] |
| ; LIMIT-DEFAULT-NEXT: [[E_1:%.*]] = load i32, ptr [[TMP2]], align 4 |
| ; LIMIT-DEFAULT-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]] |
| ; LIMIT-DEFAULT-NEXT: ret i32 [[RES]] |
| ; |
| ; LIMIT2-LABEL: @load_multiple_extracts_with_unique_variable_indices_large_vector_valid_by_and( |
| ; LIMIT2-NEXT: [[IDX_FROZEN:%.*]] = freeze i64 [[IDX:%.*]] |
| ; LIMIT2-NEXT: [[IDX_CLAMPED:%.*]] = and i64 [[IDX_FROZEN]], 15 |
| ; LIMIT2-NEXT: [[LY:%.*]] = load <16 x i32>, ptr [[Y:%.*]], align 64 |
| ; LIMIT2-NEXT: [[TMP1:%.*]] = getelementptr inbounds <16 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX_CLAMPED]] |
| ; LIMIT2-NEXT: [[E_0:%.*]] = load i32, ptr [[TMP1]], align 4 |
| ; LIMIT2-NEXT: [[E_1:%.*]] = extractelement <16 x i32> [[LY]], i64 [[IDX_CLAMPED]] |
| ; LIMIT2-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]] |
| ; LIMIT2-NEXT: ret i32 [[RES]] |
| ; |
| %idx.clamped = and i64 %idx, 15 |
| |
| %lx = load <16 x i32>, ptr %x |
| %ly = load <16 x i32>, ptr %y |
| %e.0 = extractelement <16 x i32> %lx, i64 %idx.clamped |
| %e.1 = extractelement <16 x i32> %ly, i64 %idx.clamped |
| %res = add i32 %e.0, %e.1 |
| ret i32 %res |
| } |
| |
| define i32 @load_multiple_extracts_with_variable_indices_large_vector_all_valid_by_and_some_noundef(ptr %x, i64 %idx.0, i64 noundef %idx.1) { |
| ; CHECK-LABEL: @load_multiple_extracts_with_variable_indices_large_vector_all_valid_by_and_some_noundef( |
| ; CHECK-NEXT: [[IDX_0_FROZEN:%.*]] = freeze i64 [[IDX_0:%.*]] |
| ; CHECK-NEXT: [[IDX_0_CLAMPED:%.*]] = and i64 [[IDX_0_FROZEN]], 15 |
| ; CHECK-NEXT: [[IDX_1_CLAMPED:%.*]] = and i64 [[IDX_1:%.*]], 15 |
| ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <16 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX_0_CLAMPED]] |
| ; CHECK-NEXT: [[E_0:%.*]] = load i32, ptr [[TMP1]], align 4 |
| ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <16 x i32>, ptr [[X]], i32 0, i64 [[IDX_1_CLAMPED]] |
| ; CHECK-NEXT: [[E_1:%.*]] = load i32, ptr [[TMP2]], align 4 |
| ; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]] |
| ; CHECK-NEXT: ret i32 [[RES]] |
| ; |
| %idx.0.clamped = and i64 %idx.0, 15 |
| %idx.1.clamped = and i64 %idx.1, 15 |
| |
| %lv = load <16 x i32>, ptr %x |
| %e.0 = extractelement <16 x i32> %lv, i64 %idx.0.clamped |
| %e.1 = extractelement <16 x i32> %lv, i64 %idx.1.clamped |
| %res = add i32 %e.0, %e.1 |
| ret i32 %res |
| } |
| |
| ; Test case from PR51992. |
| define i8 @load_extract_safe_due_to_branch_on_poison(<8 x i8> %in, ptr %src) { |
| ; CHECK-LABEL: @load_extract_safe_due_to_branch_on_poison( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[EXT_IDX:%.*]] = extractelement <8 x i8> [[IN:%.*]], i32 0 |
| ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[EXT_IDX]], 99 |
| ; CHECK-NEXT: br i1 [[CMP]], label [[THEN:%.*]], label [[EXIT:%.*]] |
| ; CHECK: then: |
| ; CHECK-NEXT: br label [[EXIT]] |
| ; CHECK: exit: |
| ; CHECK-NEXT: ret i8 0 |
| ; |
| entry: |
| %ext.idx = extractelement <8 x i8> %in, i32 0 |
| %ext.idx.i32 = zext i8 %ext.idx to i32 |
| %cmp = icmp ult i8 %ext.idx, 99 |
| br i1 %cmp, label %then, label %exit |
| |
| then: |
| %load = load <16 x i8>, ptr %src, align 16 |
| %and = and i32 %ext.idx.i32, 15 |
| %ext = extractelement <16 x i8> %load, i32 %and |
| br label %exit |
| |
| exit: |
| %p = phi i8 [ 0, %entry ], [ %ext, %then ] |
| ret i8 0 |
| } |
| |
| declare void @use(...) |
| |
| ; Make sure we don't assert. |
| define void @pr69820(ptr %p, i32 %arg) { |
| ; CHECK-LABEL: @pr69820( |
| ; CHECK-NEXT: [[V:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16 |
| ; CHECK-NEXT: [[AND:%.*]] = and i32 [[ARG:%.*]], 3 |
| ; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x float> [[V]], i32 [[AND]] |
| ; CHECK-NEXT: call void @use(<4 x float> [[V]], float [[EXT]]) |
| ; CHECK-NEXT: ret void |
| ; |
| %v = load <4 x float>, ptr %p, align 16 |
| %and = and i32 %arg, 3 |
| %ext = extractelement <4 x float> %v, i32 %and |
| call void @use(<4 x float> %v, float %ext) |
| ret void |
| |
| ; uselistorder directives |
| uselistorder <4 x float> %v, { 1, 0 } |
| } |