| ; RUN: opt < %s -S -passes='loop(loop-flatten),verify' -verify-loop-info -verify-dom-info -verify-scev | FileCheck %s |
| |
| target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" |
| |
| ; We should be able to flatten the loops and turn the two geps into one. |
| ; CHECK-LABEL: test1 |
| define void @test1(i32 %N, ptr %A) { |
| entry: |
| %cmp3 = icmp ult i32 0, %N |
| br i1 %cmp3, label %for.outer.preheader, label %for.end |
| |
| ; CHECK-LABEL: for.outer.preheader: |
| ; CHECK: %flatten.tripcount = mul i32 %N, %N |
| for.outer.preheader: |
| br label %for.inner.preheader |
| |
| ; CHECK-LABEL: for.inner.preheader: |
| ; CHECK: %flatten.arrayidx = getelementptr inbounds i32, ptr %A, i32 %i |
| for.inner.preheader: |
| %i = phi i32 [ 0, %for.outer.preheader ], [ %inc2, %for.outer ] |
| br label %for.inner |
| |
| ; CHECK-LABEL: for.inner: |
| ; CHECK: store i32 0, ptr %flatten.arrayidx, align 4 |
| ; CHECK: br label %for.outer |
| for.inner: |
| %j = phi i32 [ 0, %for.inner.preheader ], [ %inc1, %for.inner ] |
| %mul = mul i32 %i, %N |
| %gep = getelementptr inbounds i32, ptr %A, i32 %mul |
| %arrayidx = getelementptr inbounds i32, ptr %gep, i32 %j |
| store i32 0, ptr %arrayidx, align 4 |
| %inc1 = add nuw i32 %j, 1 |
| %cmp2 = icmp ult i32 %inc1, %N |
| br i1 %cmp2, label %for.inner, label %for.outer |
| |
| ; CHECK-LABEL: for.outer: |
| ; CHECK: %cmp1 = icmp ult i32 %inc2, %flatten.tripcount |
| for.outer: |
| %inc2 = add i32 %i, 1 |
| %cmp1 = icmp ult i32 %inc2, %N |
| br i1 %cmp1, label %for.inner.preheader, label %for.end.loopexit |
| |
| for.end.loopexit: |
| br label %for.end |
| |
| for.end: |
| ret void |
| } |
| |
| ; We can flatten, but the flattened gep has to be inserted after the load it |
| ; depends on. |
| ; CHECK-LABEL: test2 |
| define void @test2(i32 %N, ptr %A) { |
| entry: |
| %cmp3 = icmp ult i32 0, %N |
| br i1 %cmp3, label %for.outer.preheader, label %for.end |
| |
| ; CHECK-LABEL: for.outer.preheader: |
| ; CHECK: %flatten.tripcount = mul i32 %N, %N |
| for.outer.preheader: |
| br label %for.inner.preheader |
| |
| ; CHECK-LABEL: for.inner.preheader: |
| ; CHECK-NOT: getelementptr inbounds i32, ptr %ptr, i32 %i |
| for.inner.preheader: |
| %i = phi i32 [ 0, %for.outer.preheader ], [ %inc2, %for.outer ] |
| br label %for.inner |
| |
| ; CHECK-LABEL: for.inner: |
| ; CHECK: %flatten.arrayidx = getelementptr inbounds i32, ptr %ptr, i32 %i |
| ; CHECK: store i32 0, ptr %flatten.arrayidx, align 4 |
| ; CHECK: br label %for.outer |
| for.inner: |
| %j = phi i32 [ 0, %for.inner.preheader ], [ %inc1, %for.inner ] |
| %ptr = load volatile ptr, ptr %A, align 4 |
| %mul = mul i32 %i, %N |
| %gep = getelementptr inbounds i32, ptr %ptr, i32 %mul |
| %arrayidx = getelementptr inbounds i32, ptr %gep, i32 %j |
| store i32 0, ptr %arrayidx, align 4 |
| %inc1 = add nuw i32 %j, 1 |
| %cmp2 = icmp ult i32 %inc1, %N |
| br i1 %cmp2, label %for.inner, label %for.outer |
| |
| ; CHECK-LABEL: for.outer: |
| ; CHECK: %cmp1 = icmp ult i32 %inc2, %flatten.tripcount |
| for.outer: |
| %inc2 = add i32 %i, 1 |
| %cmp1 = icmp ult i32 %inc2, %N |
| br i1 %cmp1, label %for.inner.preheader, label %for.end.loopexit |
| |
| for.end.loopexit: |
| br label %for.end |
| |
| for.end: |
| ret void |
| } |
| |
| ; We can't flatten if the gep offset is smaller than the pointer size. |
| ; CHECK-LABEL: test3 |
| define void @test3(i16 %N, ptr %A) { |
| entry: |
| %cmp3 = icmp ult i16 0, %N |
| br i1 %cmp3, label %for.outer.preheader, label %for.end |
| |
| for.outer.preheader: |
| br label %for.inner.preheader |
| |
| ; CHECK-LABEL: for.inner.preheader: |
| ; CHECK-NOT: getelementptr i32, ptr %A, i16 %i |
| for.inner.preheader: |
| %i = phi i16 [ 0, %for.outer.preheader ], [ %inc2, %for.outer ] |
| br label %for.inner |
| |
| ; CHECK-LABEL: for.inner: |
| ; CHECK-NOT: getelementptr i32, ptr %A, i16 %i |
| ; CHECK: br i1 %cmp2, label %for.inner, label %for.outer |
| for.inner: |
| %j = phi i16 [ 0, %for.inner.preheader ], [ %inc1, %for.inner ] |
| %mul = mul i16 %i, %N |
| %gep = getelementptr inbounds i32, ptr %A, i16 %mul |
| %arrayidx = getelementptr inbounds i32, ptr %gep, i16 %j |
| store i32 0, ptr %arrayidx, align 4 |
| %inc1 = add nuw i16 %j, 1 |
| %cmp2 = icmp ult i16 %inc1, %N |
| br i1 %cmp2, label %for.inner, label %for.outer |
| |
| for.outer: |
| %inc2 = add i16 %i, 1 |
| %cmp1 = icmp ult i16 %inc2, %N |
| br i1 %cmp1, label %for.inner.preheader, label %for.end.loopexit |
| |
| for.end.loopexit: |
| br label %for.end |
| |
| for.end: |
| ret void |
| } |