Check in LLVM r95781.
diff --git a/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll b/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll
new file mode 100644
index 0000000..a0235f7
--- /dev/null
+++ b/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=arm -mattr=+v6
+
+%struct.layer_data = type { i32, [2048 x i8], i8*, [16 x i8], i32, i8*, i32, i32, [64 x i32], [64 x i32], [64 x i32], [64 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [12 x [64 x i16]] }
+@ld = external global %struct.layer_data* ; <%struct.layer_data**> [#uses=1]
+
+define void @main() {
+entry:
+ br i1 false, label %bb169.i, label %cond_true11
+
+bb169.i: ; preds = %entry
+ ret void
+
+cond_true11: ; preds = %entry
+ %tmp.i32 = load %struct.layer_data** @ld ; <%struct.layer_data*> [#uses=2]
+ %tmp3.i35 = getelementptr %struct.layer_data* %tmp.i32, i32 0, i32 1, i32 2048; <i8*> [#uses=2]
+ %tmp.i36 = getelementptr %struct.layer_data* %tmp.i32, i32 0, i32 2 ; <i8**> [#uses=1]
+ store i8* %tmp3.i35, i8** %tmp.i36
+ store i8* %tmp3.i35, i8** null
+ ret void
+}
diff --git a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
new file mode 100644
index 0000000..81483cb4
--- /dev/null
+++ b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
@@ -0,0 +1,103 @@
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2
+
+@quant_coef = external global [6 x [4 x [4 x i32]]] ; <[6 x [4 x [4 x i32]]]*> [#uses=1]
+@dequant_coef = external global [6 x [4 x [4 x i32]]] ; <[6 x [4 x [4 x i32]]]*> [#uses=1]
+@A = external global [4 x [4 x i32]] ; <[4 x [4 x i32]]*> [#uses=1]
+
+define fastcc i32 @dct_luma_sp(i32 %block_x, i32 %block_y, i32* %coeff_cost) {
+entry:
+ %predicted_block = alloca [4 x [4 x i32]], align 4 ; <[4 x [4 x i32]]*> [#uses=1]
+ br label %cond_next489
+
+cond_next489: ; preds = %cond_false, %bb471
+ %j.7.in = load i8* null ; <i8> [#uses=1]
+ %i.8.in = load i8* null ; <i8> [#uses=1]
+ %i.8 = zext i8 %i.8.in to i32 ; <i32> [#uses=4]
+ %j.7 = zext i8 %j.7.in to i32 ; <i32> [#uses=4]
+ %tmp495 = getelementptr [4 x [4 x i32]]* %predicted_block, i32 0, i32 %i.8, i32 %j.7 ; <i32*> [#uses=2]
+ %tmp496 = load i32* %tmp495 ; <i32> [#uses=2]
+ %tmp502 = load i32* null ; <i32> [#uses=1]
+ %tmp542 = getelementptr [6 x [4 x [4 x i32]]]* @quant_coef, i32 0, i32 0, i32 %i.8, i32 %j.7 ; <i32*> [#uses=1]
+ %tmp543 = load i32* %tmp542 ; <i32> [#uses=1]
+ %tmp548 = ashr i32 0, 0 ; <i32> [#uses=3]
+ %tmp561 = sub i32 0, %tmp496 ; <i32> [#uses=3]
+ %abscond563 = icmp sgt i32 %tmp561, -1 ; <i1> [#uses=1]
+ %abs564 = select i1 %abscond563, i32 %tmp561, i32 0 ; <i32> [#uses=1]
+ %tmp572 = mul i32 %abs564, %tmp543 ; <i32> [#uses=1]
+ %tmp574 = add i32 %tmp572, 0 ; <i32> [#uses=1]
+ %tmp576 = ashr i32 %tmp574, 0 ; <i32> [#uses=7]
+ %tmp579 = icmp eq i32 %tmp548, %tmp576 ; <i1> [#uses=1]
+ br i1 %tmp579, label %bb712, label %cond_next589
+
+cond_next589: ; preds = %cond_next489
+ %tmp605 = getelementptr [6 x [4 x [4 x i32]]]* @dequant_coef, i32 0, i32 0, i32 %i.8, i32 %j.7 ; <i32*> [#uses=1]
+ %tmp606 = load i32* %tmp605 ; <i32> [#uses=1]
+ %tmp612 = load i32* null ; <i32> [#uses=1]
+ %tmp629 = load i32* null ; <i32> [#uses=1]
+ %tmp629a = sitofp i32 %tmp629 to double ; <double> [#uses=1]
+ %tmp631 = fmul double %tmp629a, 0.000000e+00 ; <double> [#uses=1]
+ %tmp632 = fadd double 0.000000e+00, %tmp631 ; <double> [#uses=1]
+ %tmp642 = call fastcc i32 @sign( i32 %tmp576, i32 %tmp561 ) ; <i32> [#uses=1]
+ %tmp650 = mul i32 %tmp606, %tmp642 ; <i32> [#uses=1]
+ %tmp656 = mul i32 %tmp650, %tmp612 ; <i32> [#uses=1]
+ %tmp658 = shl i32 %tmp656, 0 ; <i32> [#uses=1]
+ %tmp659 = ashr i32 %tmp658, 6 ; <i32> [#uses=1]
+ %tmp660 = sub i32 0, %tmp659 ; <i32> [#uses=1]
+ %tmp666 = sub i32 %tmp660, %tmp496 ; <i32> [#uses=1]
+ %tmp667 = sitofp i32 %tmp666 to double ; <double> [#uses=2]
+ call void @levrun_linfo_inter( i32 %tmp576, i32 0, i32* null, i32* null )
+ %tmp671 = fmul double %tmp667, %tmp667 ; <double> [#uses=1]
+ %tmp675 = fadd double %tmp671, 0.000000e+00 ; <double> [#uses=1]
+ %tmp678 = fcmp oeq double %tmp632, %tmp675 ; <i1> [#uses=1]
+ br i1 %tmp678, label %cond_true679, label %cond_false693
+
+cond_true679: ; preds = %cond_next589
+ %abscond681 = icmp sgt i32 %tmp548, -1 ; <i1> [#uses=1]
+ %abs682 = select i1 %abscond681, i32 %tmp548, i32 0 ; <i32> [#uses=1]
+ %abscond684 = icmp sgt i32 %tmp576, -1 ; <i1> [#uses=1]
+ %abs685 = select i1 %abscond684, i32 %tmp576, i32 0 ; <i32> [#uses=1]
+ %tmp686 = icmp slt i32 %abs682, %abs685 ; <i1> [#uses=1]
+ br i1 %tmp686, label %cond_next702, label %cond_false689
+
+cond_false689: ; preds = %cond_true679
+ %tmp739 = icmp eq i32 %tmp576, 0 ; <i1> [#uses=1]
+ br i1 %tmp579, label %bb737, label %cond_false708
+
+cond_false693: ; preds = %cond_next589
+ ret i32 0
+
+cond_next702: ; preds = %cond_true679
+ ret i32 0
+
+cond_false708: ; preds = %cond_false689
+ ret i32 0
+
+bb712: ; preds = %cond_next489
+ ret i32 0
+
+bb737: ; preds = %cond_false689
+ br i1 %tmp739, label %cond_next791, label %cond_true740
+
+cond_true740: ; preds = %bb737
+ %tmp761 = call fastcc i32 @sign( i32 %tmp576, i32 0 ) ; <i32> [#uses=1]
+ %tmp780 = load i32* null ; <i32> [#uses=1]
+ %tmp785 = getelementptr [4 x [4 x i32]]* @A, i32 0, i32 %i.8, i32 %j.7 ; <i32*> [#uses=1]
+ %tmp786 = load i32* %tmp785 ; <i32> [#uses=1]
+ %tmp781 = mul i32 %tmp780, %tmp761 ; <i32> [#uses=1]
+ %tmp787 = mul i32 %tmp781, %tmp786 ; <i32> [#uses=1]
+ %tmp789 = shl i32 %tmp787, 0 ; <i32> [#uses=1]
+ %tmp790 = ashr i32 %tmp789, 6 ; <i32> [#uses=1]
+ br label %cond_next791
+
+cond_next791: ; preds = %cond_true740, %bb737
+ %ilev.1 = phi i32 [ %tmp790, %cond_true740 ], [ 0, %bb737 ] ; <i32> [#uses=1]
+ %tmp796 = load i32* %tmp495 ; <i32> [#uses=1]
+ %tmp798 = add i32 %tmp796, %ilev.1 ; <i32> [#uses=1]
+ %tmp812 = mul i32 0, %tmp502 ; <i32> [#uses=0]
+ %tmp818 = call fastcc i32 @sign( i32 0, i32 %tmp798 ) ; <i32> [#uses=0]
+ unreachable
+}
+
+declare i32 @sign(i32, i32)
+
+declare void @levrun_linfo_inter(i32, i32, i32*, i32*)
diff --git a/test/CodeGen/ARM/2007-03-07-CombinerCrash.ll b/test/CodeGen/ARM/2007-03-07-CombinerCrash.ll
new file mode 100644
index 0000000..83b26d3
--- /dev/null
+++ b/test/CodeGen/ARM/2007-03-07-CombinerCrash.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2
+
+define fastcc i8* @read_sleb128(i8* %p, i32* %val) {
+ br label %bb
+
+bb: ; preds = %bb, %0
+ %p_addr.0 = getelementptr i8* %p, i32 0 ; <i8*> [#uses=1]
+ %tmp2 = load i8* %p_addr.0 ; <i8> [#uses=2]
+ %tmp4.rec = add i32 0, 1 ; <i32> [#uses=1]
+ %tmp4 = getelementptr i8* %p, i32 %tmp4.rec ; <i8*> [#uses=1]
+ %tmp56 = zext i8 %tmp2 to i32 ; <i32> [#uses=1]
+ %tmp7 = and i32 %tmp56, 127 ; <i32> [#uses=1]
+ %tmp9 = shl i32 %tmp7, 0 ; <i32> [#uses=1]
+ %tmp11 = or i32 %tmp9, 0 ; <i32> [#uses=1]
+ icmp slt i8 %tmp2, 0 ; <i1>:1 [#uses=1]
+ br i1 %1, label %bb, label %cond_next28
+
+cond_next28: ; preds = %bb
+ store i32 %tmp11, i32* %val
+ ret i8* %tmp4
+}
diff --git a/test/CodeGen/ARM/2007-03-13-InstrSched.ll b/test/CodeGen/ARM/2007-03-13-InstrSched.ll
new file mode 100644
index 0000000..33f935e
--- /dev/null
+++ b/test/CodeGen/ARM/2007-03-13-InstrSched.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic \
+; RUN: -mattr=+v6 | grep r9
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic \
+; RUN: -mattr=+v6 -arm-reserve-r9 -ifcvt-limit=0 -stats |& grep asm-printer
+; | grep 35
+
+define void @test(i32 %tmp56222, i32 %tmp36224, i32 %tmp46223, i32 %i.0196.0.ph, i32 %tmp8, i32* %tmp1011, i32** %tmp1, i32* %d2.1.out, i32* %d3.1.out, i32* %d0.1.out, i32* %d1.1.out) {
+newFuncRoot:
+ br label %bb74
+
+bb78.exitStub: ; preds = %bb74
+ store i32 %d2.1, i32* %d2.1.out
+ store i32 %d3.1, i32* %d3.1.out
+ store i32 %d0.1, i32* %d0.1.out
+ store i32 %d1.1, i32* %d1.1.out
+ ret void
+
+bb74: ; preds = %bb26, %newFuncRoot
+ %fp.1.rec = phi i32 [ 0, %newFuncRoot ], [ %tmp71.rec, %bb26 ] ; <i32> [#uses=3]
+ %fm.1.in = phi i32* [ %tmp71, %bb26 ], [ %tmp1011, %newFuncRoot ] ; <i32*> [#uses=1]
+ %d0.1 = phi i32 [ %tmp44, %bb26 ], [ 8192, %newFuncRoot ] ; <i32> [#uses=2]
+ %d1.1 = phi i32 [ %tmp54, %bb26 ], [ 8192, %newFuncRoot ] ; <i32> [#uses=2]
+ %d2.1 = phi i32 [ %tmp64, %bb26 ], [ 8192, %newFuncRoot ] ; <i32> [#uses=2]
+ %d3.1 = phi i32 [ %tmp69, %bb26 ], [ 8192, %newFuncRoot ] ; <i32> [#uses=2]
+ %fm.1 = load i32* %fm.1.in ; <i32> [#uses=4]
+ icmp eq i32 %fp.1.rec, %tmp8 ; <i1>:0 [#uses=1]
+ br i1 %0, label %bb78.exitStub, label %bb26
+
+bb26: ; preds = %bb74
+ %tmp28 = getelementptr i32** %tmp1, i32 %fp.1.rec ; <i32**> [#uses=1]
+ %tmp30 = load i32** %tmp28 ; <i32*> [#uses=4]
+ %tmp33 = getelementptr i32* %tmp30, i32 %i.0196.0.ph ; <i32*> [#uses=1]
+ %tmp34 = load i32* %tmp33 ; <i32> [#uses=1]
+ %tmp38 = getelementptr i32* %tmp30, i32 %tmp36224 ; <i32*> [#uses=1]
+ %tmp39 = load i32* %tmp38 ; <i32> [#uses=1]
+ %tmp42 = mul i32 %tmp34, %fm.1 ; <i32> [#uses=1]
+ %tmp44 = add i32 %tmp42, %d0.1 ; <i32> [#uses=1]
+ %tmp48 = getelementptr i32* %tmp30, i32 %tmp46223 ; <i32*> [#uses=1]
+ %tmp49 = load i32* %tmp48 ; <i32> [#uses=1]
+ %tmp52 = mul i32 %tmp39, %fm.1 ; <i32> [#uses=1]
+ %tmp54 = add i32 %tmp52, %d1.1 ; <i32> [#uses=1]
+ %tmp58 = getelementptr i32* %tmp30, i32 %tmp56222 ; <i32*> [#uses=1]
+ %tmp59 = load i32* %tmp58 ; <i32> [#uses=1]
+ %tmp62 = mul i32 %tmp49, %fm.1 ; <i32> [#uses=1]
+ %tmp64 = add i32 %tmp62, %d2.1 ; <i32> [#uses=1]
+ %tmp67 = mul i32 %tmp59, %fm.1 ; <i32> [#uses=1]
+ %tmp69 = add i32 %tmp67, %d3.1 ; <i32> [#uses=1]
+ %tmp71.rec = add i32 %fp.1.rec, 1 ; <i32> [#uses=2]
+ %tmp71 = getelementptr i32* %tmp1011, i32 %tmp71.rec ; <i32*> [#uses=1]
+ br label %bb74
+}
diff --git a/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll b/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll
new file mode 100644
index 0000000..b0953dc
--- /dev/null
+++ b/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll
@@ -0,0 +1,96 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; PR1257
+
+ %struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32 }
+ %struct.arm_stack_offsets = type { i32, i32, i32, i32, i32 }
+ %struct.c_arg_info = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i8 }
+ %struct.c_language_function = type { %struct.stmt_tree_s }
+ %struct.c_switch = type opaque
+ %struct.eh_status = type opaque
+ %struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack*, i32, %struct.location_t, i32, i8*, %struct.rtx_def** }
+ %struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
+ %struct.function = type { %struct.eh_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, %struct.initial_value_struct*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i8, i32, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.varray_head_tag*, %struct.temp_slot*, i32, %struct.var_refs_queue*, i32, i32, %struct.rtvec_def*, %struct.tree_node*, i32, i32, i32, %struct.machine_function*, i32, i32, i8, i8, %struct.language_function*, %struct.rtx_def*, i32, i32, i32, i32, %struct.location_t, %struct.varray_head_tag*, %struct.tree_node*, i8, i8, i8 }
+ %struct.ht_identifier = type { i8*, i32, i32 }
+ %struct.initial_value_struct = type opaque
+ %struct.lang_decl = type { i8 }
+ %struct.language_function = type { %struct.c_language_function, %struct.tree_node*, %struct.tree_node*, %struct.c_switch*, %struct.c_arg_info*, i32, i32, i32, i32 }
+ %struct.location_t = type { i8*, i32 }
+ %struct.machine_function = type { %struct.rtx_def*, i32, i32, i32, %struct.arm_stack_offsets, i32, i32, i32, [14 x %struct.rtx_def*] }
+ %struct.rtvec_def = type { i32, [1 x %struct.rtx_def*] }
+ %struct.rtx_def = type { i16, i8, i8, %struct.u }
+ %struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack* }
+ %struct.stmt_tree_s = type { %struct.tree_node*, i32 }
+ %struct.temp_slot = type opaque
+ %struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %union.tree_ann_d*, i8, i8, i8, i8, i8 }
+ %struct.tree_decl = type { %struct.tree_common, %struct.location_t, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, i8, i8, i32, %struct.tree_decl_u1, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, i32, %struct.tree_decl_u2, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }
+ %struct.tree_decl_u1 = type { i64 }
+ %struct.tree_decl_u2 = type { %struct.function* }
+ %struct.tree_identifier = type { %struct.tree_common, %struct.ht_identifier }
+ %struct.tree_node = type { %struct.tree_decl }
+ %struct.u = type { [1 x i64] }
+ %struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
+ %struct.varasm_status = type opaque
+ %struct.varray_head_tag = type opaque
+ %union.tree_ann_d = type opaque
+
+
+define void @declspecs_add_type(i32 %spec.1) {
+entry:
+ %spec.1961 = zext i32 %spec.1 to i64 ; <i64> [#uses=1]
+ %spec.1961.adj = shl i64 %spec.1961, 32 ; <i64> [#uses=1]
+ %spec.1961.adj.ins = or i64 %spec.1961.adj, 0 ; <i64> [#uses=2]
+ %tmp10959 = lshr i64 %spec.1961.adj.ins, 32 ; <i64> [#uses=2]
+ %tmp1920 = inttoptr i64 %tmp10959 to %struct.tree_common* ; <%struct.tree_common*> [#uses=1]
+ %tmp21 = getelementptr %struct.tree_common* %tmp1920, i32 0, i32 3 ; <i8*> [#uses=1]
+ %tmp2122 = bitcast i8* %tmp21 to i32* ; <i32*> [#uses=1]
+ br i1 false, label %cond_next53, label %cond_true
+
+cond_true: ; preds = %entry
+ ret void
+
+cond_next53: ; preds = %entry
+ br i1 false, label %cond_true63, label %cond_next689
+
+cond_true63: ; preds = %cond_next53
+ ret void
+
+cond_next689: ; preds = %cond_next53
+ br i1 false, label %cond_false841, label %bb743
+
+bb743: ; preds = %cond_next689
+ ret void
+
+cond_false841: ; preds = %cond_next689
+ br i1 false, label %cond_true851, label %cond_true918
+
+cond_true851: ; preds = %cond_false841
+ tail call void @lookup_name( )
+ br i1 false, label %bb866, label %cond_next856
+
+cond_next856: ; preds = %cond_true851
+ ret void
+
+bb866: ; preds = %cond_true851
+ %tmp874 = load i32* %tmp2122 ; <i32> [#uses=1]
+ %tmp876877 = trunc i32 %tmp874 to i8 ; <i8> [#uses=1]
+ icmp eq i8 %tmp876877, 1 ; <i1>:0 [#uses=1]
+ br i1 %0, label %cond_next881, label %cond_true878
+
+cond_true878: ; preds = %bb866
+ unreachable
+
+cond_next881: ; preds = %bb866
+ %tmp884885 = inttoptr i64 %tmp10959 to %struct.tree_identifier* ; <%struct.tree_identifier*> [#uses=1]
+ %tmp887 = getelementptr %struct.tree_identifier* %tmp884885, i32 0, i32 1, i32 0 ; <i8**> [#uses=1]
+ %tmp888 = load i8** %tmp887 ; <i8*> [#uses=1]
+ tail call void (i32, ...)* @error( i32 undef, i8* %tmp888 )
+ ret void
+
+cond_true918: ; preds = %cond_false841
+ %tmp920957 = trunc i64 %spec.1961.adj.ins to i32 ; <i32> [#uses=0]
+ ret void
+}
+
+declare void @error(i32, ...)
+
+declare void @lookup_name()
diff --git a/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll b/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll
new file mode 100644
index 0000000..d741112
--- /dev/null
+++ b/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll
@@ -0,0 +1,947 @@
+; RUN: llc < %s -march=arm
+; PR1266
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "arm-linux-gnueabi"
+ %struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32 }
+ %struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i32, [52 x i8] }
+ %struct.VEC_edge = type { i32, i32, [1 x %struct.edge_def*] }
+ %struct.VEC_tree = type { i32, i32, [1 x %struct.tree_node*] }
+ %struct._IO_marker = type { %struct._IO_marker*, %struct.FILE*, i32 }
+ %struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
+ %struct.addr_diff_vec_flags = type { i8, i8, i8, i8 }
+ %struct.arm_stack_offsets = type { i32, i32, i32, i32, i32 }
+ %struct.attribute_spec = type { i8*, i32, i32, i8, i8, i8, %struct.tree_node* (%struct.tree_node**, %struct.tree_node*, %struct.tree_node*, i32, i8*)* }
+ %struct.basic_block_def = type { %struct.rtx_def*, %struct.rtx_def*, %struct.tree_node*, %struct.VEC_edge*, %struct.VEC_edge*, %struct.bitmap_head_def*, %struct.bitmap_head_def*, i8*, %struct.loop*, [2 x %struct.et_node*], %struct.basic_block_def*, %struct.basic_block_def*, %struct.reorder_block_def*, %struct.bb_ann_d*, i64, i32, i32, i32, i32 }
+ %struct.bb_ann_d = type { %struct.tree_node*, i8, %struct.edge_prediction* }
+ %struct.bitmap_element_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, [4 x i32] }
+ %struct.bitmap_head_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, %struct.bitmap_obstack* }
+ %struct.bitmap_obstack = type { %struct.bitmap_element_def*, %struct.bitmap_head_def*, %struct.obstack }
+ %struct.cgraph_edge = type { %struct.cgraph_node*, %struct.cgraph_node*, %struct.cgraph_edge*, %struct.cgraph_edge*, %struct.cgraph_edge*, %struct.cgraph_edge*, %struct.tree_node*, i8*, i8* }
+ %struct.cgraph_global_info = type { %struct.cgraph_node*, i32, i8 }
+ %struct.cgraph_local_info = type { i32, i8, i8, i8, i8, i8, i8, i8 }
+ %struct.cgraph_node = type { %struct.tree_node*, %struct.cgraph_edge*, %struct.cgraph_edge*, %struct.cgraph_node*, %struct.cgraph_node*, %struct.cgraph_node*, %struct.cgraph_node*, %struct.cgraph_node*, %struct.cgraph_node*, %struct.cgraph_node*, i8*, %struct.cgraph_local_info, %struct.cgraph_global_info, %struct.cgraph_rtl_info, i32, i8, i8, i8, i8, i8 }
+ %struct.cgraph_rtl_info = type { i32, i8, i8 }
+ %struct.cl_perfunc_opts = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+ %struct.cselib_val_struct = type opaque
+ %struct.dataflow_d = type { %struct.varray_head_tag*, [2 x %struct.tree_node*] }
+ %struct.def_operand_ptr = type { %struct.tree_node** }
+ %struct.def_optype_d = type { i32, [1 x %struct.def_operand_ptr] }
+ %struct.diagnostic_context = type { %struct.pretty_printer*, [8 x i32], i8, i8, i8, void (%struct.diagnostic_context*, %struct.diagnostic_info*)*, void (%struct.diagnostic_context*, %struct.diagnostic_info*)*, void (i8*, i8**)*, %struct.tree_node*, i32, i32 }
+ %struct.diagnostic_info = type { %struct.text_info, %struct.location_t, i32 }
+ %struct.die_struct = type opaque
+ %struct.edge_def = type { %struct.basic_block_def*, %struct.basic_block_def*, %struct.edge_def_insns, i8*, %struct.location_t*, i32, i32, i64, i32 }
+ %struct.edge_def_insns = type { %struct.rtx_def* }
+ %struct.edge_prediction = type { %struct.edge_prediction*, %struct.edge_def*, i32, i32 }
+ %struct.eh_status = type opaque
+ %struct.elt_list = type opaque
+ %struct.elt_t = type { %struct.tree_node*, %struct.tree_node* }
+ %struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack*, i32, %struct.location_t, i32, i8*, %struct.rtx_def** }
+ %struct.et_node = type opaque
+ %struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
+ %struct.function = type { %struct.eh_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, %struct.initial_value_struct*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i8, i32, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.varray_head_tag*, %struct.temp_slot*, i32, %struct.var_refs_queue*, i32, i32, %struct.rtvec_def*, %struct.tree_node*, i32, i32, i32, %struct.machine_function*, i32, i32, i8, i8, %struct.language_function*, %struct.rtx_def*, i32, i32, i32, i32, %struct.location_t, %struct.varray_head_tag*, %struct.tree_node*, i8, i8, i8 }
+ %struct.ggc_root_tab = type { i8*, i32, i32, void (i8*)*, void (i8*)* }
+ %struct.gimplify_ctx = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.varray_head_tag*, %struct.htab*, i32, i8, i8 }
+ %struct.gimplify_init_ctor_preeval_data = type { %struct.tree_node*, i32 }
+ %struct.ht_identifier = type { i8*, i32, i32 }
+ %struct.htab = type { i32 (i8*)*, i32 (i8*, i8*)*, void (i8*)*, i8**, i32, i32, i32, i32, i32, i8* (i32, i32)*, void (i8*)*, i8*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i32 }
+ %struct.initial_value_struct = type opaque
+ %struct.lang_decl = type opaque
+ %struct.lang_hooks = type { i8*, i32, i32 (i32)*, i32 (i32, i8**)*, void (%struct.diagnostic_context*)*, i32 (i32, i8*, i32)*, i8 (i8*, i32) zeroext *, i8 (i8**) zeroext *, i8 () zeroext *, void ()*, void ()*, void (i32)*, void ()*, i64 (%struct.tree_node*)*, %struct.tree_node* (%struct.tree_node*)*, %struct.rtx_def* (%struct.tree_node*, %struct.rtx_def*, i32, i32, %struct.rtx_def**)*, i32 (%struct.tree_node*)*, %struct.tree_node* (%struct.tree_node*)*, i32 (%struct.rtx_def*, %struct.tree_node*)*, void (%struct.tree_node*)*, i8 (%struct.tree_node*) zeroext *, %struct.tree_node* (%struct.tree_node*)*, void (%struct.tree_node*)*, void (%struct.tree_node*)*, i8 () zeroext *, i8, i8, void ()*, void (%struct.FILE*, %struct.tree_node*, i32)*, void (%struct.FILE*, %struct.tree_node*, i32)*, void (%struct.FILE*, %struct.tree_node*, i32)*, void (%struct.FILE*, %struct.tree_node*, i32)*, i8* (%struct.tree_node*, i32)*, i32 (%struct.tree_node*, %struct.tree_node*)*, %struct.tree_node* (%struct.tree_node*)*, void (%struct.diagnostic_context*, i8*)*, %struct.tree_node* (%struct.tree_node*)*, i64 (i64)*, %struct.attribute_spec*, %struct.attribute_spec*, %struct.attribute_spec*, i32 (%struct.tree_node*)*, %struct.lang_hooks_for_functions, %struct.lang_hooks_for_tree_inlining, %struct.lang_hooks_for_callgraph, %struct.lang_hooks_for_tree_dump, %struct.lang_hooks_for_decls, %struct.lang_hooks_for_types, i32 (%struct.tree_node**, %struct.tree_node**, %struct.tree_node**)*, %struct.tree_node* (%struct.tree_node*, %struct.tree_node*)*, %struct.tree_node* (i8*, %struct.tree_node*, i32, i32, i8*, %struct.tree_node*)* }
+ %struct.lang_hooks_for_callgraph = type { %struct.tree_node* (%struct.tree_node**, i32*, %struct.tree_node*)*, void (%struct.tree_node*)* }
+ %struct.lang_hooks_for_decls = type { i32 ()*, void (%struct.tree_node*)*, %struct.tree_node* (%struct.tree_node*)*, %struct.tree_node* ()*, i8 (%struct.tree_node*) zeroext *, void ()*, void (%struct.tree_node*)*, i8 (%struct.tree_node*) zeroext *, i8* (%struct.tree_node*)* }
+ %struct.lang_hooks_for_functions = type { void (%struct.function*)*, void (%struct.function*)*, void (%struct.function*)*, void (%struct.function*)*, i8 (%struct.tree_node*) zeroext * }
+ %struct.lang_hooks_for_tree_dump = type { i8 (i8*, %struct.tree_node*) zeroext *, i32 (%struct.tree_node*)* }
+ %struct.lang_hooks_for_tree_inlining = type { %struct.tree_node* (%struct.tree_node**, i32*, %struct.tree_node* (%struct.tree_node**, i32*, i8*)*, i8*, %struct.pointer_set_t*)*, i32 (%struct.tree_node**)*, i32 (%struct.tree_node*)*, %struct.tree_node* (i8*, %struct.tree_node*)*, i32 (%struct.tree_node*, %struct.tree_node*)*, i32 (%struct.tree_node*)*, i8 (%struct.tree_node*, %struct.tree_node*) zeroext *, i32 (%struct.tree_node*)*, void (%struct.tree_node*)*, %struct.tree_node* (%struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i32)* }
+ %struct.lang_hooks_for_types = type { %struct.tree_node* (i32)*, %struct.tree_node* (i32, i32)*, %struct.tree_node* (i32, i32)*, %struct.tree_node* (%struct.tree_node*)*, %struct.tree_node* (%struct.tree_node*)*, %struct.tree_node* (i32, %struct.tree_node*)*, %struct.tree_node* (%struct.tree_node*)*, void (%struct.tree_node*, i8*)*, void (%struct.tree_node*, %struct.tree_node*)*, %struct.tree_node* (%struct.tree_node*)*, i8 }
+ %struct.lang_type = type opaque
+ %struct.language_function = type opaque
+ %struct.location_t = type { i8*, i32 }
+ %struct.loop = type opaque
+ %struct.machine_function = type { %struct.rtx_def*, i32, i32, i32, %struct.arm_stack_offsets, i32, i32, i32, [14 x %struct.rtx_def*] }
+ %struct.mem_attrs = type { i64, %struct.tree_node*, %struct.rtx_def*, %struct.rtx_def*, i32 }
+ %struct.obstack = type { i32, %struct._obstack_chunk*, i8*, i8*, i8*, i32, i32, %struct._obstack_chunk* (i8*, i32)*, void (i8*, %struct._obstack_chunk*)*, i8*, i8 }
+ %struct.output_buffer = type { %struct.obstack, %struct.FILE*, i32, [128 x i8] }
+ %struct.phi_arg_d = type { %struct.tree_node*, i8 }
+ %struct.pointer_set_t = type opaque
+ %struct.pretty_printer = type { %struct.output_buffer*, i8*, i32, i32, i32, i32, i32, i8 (%struct.pretty_printer*, %struct.text_info*) zeroext *, i8, i8 }
+ %struct.ptr_info_def = type { i8, %struct.bitmap_head_def*, %struct.tree_node* }
+ %struct.real_value = type { i8, [3 x i8], [5 x i32] }
+ %struct.reg_attrs = type { %struct.tree_node*, i64 }
+ %struct.reg_info_def = type opaque
+ %struct.reorder_block_def = type { %struct.rtx_def*, %struct.rtx_def*, %struct.basic_block_def*, %struct.basic_block_def*, %struct.basic_block_def*, i32, i32, i32 }
+ %struct.rtunion = type { i32 }
+ %struct.rtvec_def = type { i32, [1 x %struct.rtx_def*] }
+ %struct.rtx_def = type { i16, i8, i8, %struct.u }
+ %struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack* }
+ %struct.stmt_ann_d = type { %struct.tree_ann_common_d, i8, %struct.basic_block_def*, %struct.stmt_operands_d, %struct.dataflow_d*, %struct.bitmap_head_def*, i32 }
+ %struct.stmt_operands_d = type { %struct.def_optype_d*, %struct.def_optype_d*, %struct.v_may_def_optype_d*, %struct.vuse_optype_d*, %struct.v_may_def_optype_d* }
+ %struct.temp_slot = type opaque
+ %struct.text_info = type { i8*, i8**, i32 }
+ %struct.tree_ann_common_d = type { i32, i8*, %struct.tree_node* }
+ %struct.tree_ann_d = type { %struct.stmt_ann_d }
+ %struct.tree_binfo = type { %struct.tree_common, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.VEC_tree*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.VEC_tree }
+ %struct.tree_block = type { %struct.tree_common, i8, [3 x i8], %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node* }
+ %struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_ann_d*, i8, i8, i8, i8, i8 }
+ %struct.tree_complex = type { %struct.tree_common, %struct.tree_node*, %struct.tree_node* }
+ %struct.tree_decl = type { %struct.tree_common, %struct.location_t, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, i8, i8, i32, %struct.tree_decl_u1, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, i32, %struct.tree_decl_u2, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }
+ %struct.tree_decl_u1 = type { i64 }
+ %struct.tree_decl_u1_a = type { i32 }
+ %struct.tree_decl_u2 = type { %struct.function* }
+ %struct.tree_exp = type { %struct.tree_common, %struct.location_t*, i32, %struct.tree_node*, [1 x %struct.tree_node*] }
+ %struct.tree_identifier = type { %struct.tree_common, %struct.ht_identifier }
+ %struct.tree_int_cst = type { %struct.tree_common, %struct.tree_int_cst_lowhi }
+ %struct.tree_int_cst_lowhi = type { i64, i64 }
+ %struct.tree_list = type { %struct.tree_common, %struct.tree_node*, %struct.tree_node* }
+ %struct.tree_node = type { %struct.tree_decl }
+ %struct.tree_phi_node = type { %struct.tree_common, %struct.tree_node*, i32, i32, i32, %struct.basic_block_def*, %struct.dataflow_d*, [1 x %struct.phi_arg_d] }
+ %struct.tree_real_cst = type { %struct.tree_common, %struct.real_value* }
+ %struct.tree_ssa_name = type { %struct.tree_common, %struct.tree_node*, i32, %struct.ptr_info_def*, %struct.tree_node*, i8* }
+ %struct.tree_statement_list = type { %struct.tree_common, %struct.tree_statement_list_node*, %struct.tree_statement_list_node* }
+ %struct.tree_statement_list_node = type { %struct.tree_statement_list_node*, %struct.tree_statement_list_node*, %struct.tree_node* }
+ %struct.tree_stmt_iterator = type { %struct.tree_statement_list_node*, %struct.tree_node* }
+ %struct.tree_string = type { %struct.tree_common, i32, [1 x i8] }
+ %struct.tree_type = type { %struct.tree_common, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i32, i16, i8, i8, i32, %struct.tree_node*, %struct.tree_node*, %struct.rtunion, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_type* }
+ %struct.tree_type_symtab = type { i32 }
+ %struct.tree_value_handle = type { %struct.tree_common, %struct.value_set*, i32 }
+ %struct.tree_vec = type { %struct.tree_common, i32, [1 x %struct.tree_node*] }
+ %struct.tree_vector = type { %struct.tree_common, %struct.tree_node* }
+ %struct.u = type { [1 x i64] }
+ %struct.use_operand_ptr = type { %struct.tree_node** }
+ %struct.use_optype_d = type { i32, [1 x %struct.def_operand_ptr] }
+ %struct.v_def_use_operand_type_t = type { %struct.tree_node*, %struct.tree_node* }
+ %struct.v_may_def_optype_d = type { i32, [1 x %struct.elt_t] }
+ %struct.v_must_def_optype_d = type { i32, [1 x %struct.elt_t] }
+ %struct.value_set = type opaque
+ %struct.var_ann_d = type { %struct.tree_ann_common_d, i8, i8, %struct.tree_node*, %struct.varray_head_tag*, i32, i32, i32, %struct.tree_node*, %struct.tree_node* }
+ %struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
+ %struct.varasm_status = type opaque
+ %struct.varray_data = type { [1 x i64] }
+ %struct.varray_head_tag = type { i32, i32, i32, i8*, %struct.u }
+ %struct.vuse_optype_d = type { i32, [1 x %struct.tree_node*] }
+@gt_pch_rs_gt_gimplify_h = external global [2 x %struct.ggc_root_tab] ; <[2 x %struct.ggc_root_tab]*> [#uses=0]
+@tmp_var_id_num = external global i32 ; <i32*> [#uses=0]
+@gt_ggc_r_gt_gimplify_h = external global [1 x %struct.ggc_root_tab] ; <[1 x %struct.ggc_root_tab]*> [#uses=0]
+@__FUNCTION__.19956 = external global [15 x i8] ; <[15 x i8]*> [#uses=0]
+@str = external global [42 x i8] ; <[42 x i8]*> [#uses=1]
+@__FUNCTION__.19974 = external global [22 x i8] ; <[22 x i8]*> [#uses=0]
+@gimplify_ctxp = external global %struct.gimplify_ctx* ; <%struct.gimplify_ctx**> [#uses=0]
+@cl_pf_opts = external global %struct.cl_perfunc_opts ; <%struct.cl_perfunc_opts*> [#uses=0]
+@__FUNCTION__.20030 = external global [22 x i8] ; <[22 x i8]*> [#uses=0]
+@__FUNCTION__.20099 = external global [24 x i8] ; <[24 x i8]*> [#uses=0]
+@global_trees = external global [47 x %struct.tree_node*] ; <[47 x %struct.tree_node*]*> [#uses=0]
+@tree_code_type = external global [0 x i32] ; <[0 x i32]*> [#uses=2]
+@current_function_decl = external global %struct.tree_node* ; <%struct.tree_node**> [#uses=0]
+@str1 = external global [2 x i8] ; <[2 x i8]*> [#uses=0]
+@str2 = external global [7 x i8] ; <[7 x i8]*> [#uses=0]
+@__FUNCTION__.20151 = external global [19 x i8] ; <[19 x i8]*> [#uses=0]
+@__FUNCTION__.20221 = external global [9 x i8] ; <[9 x i8]*> [#uses=0]
+@tree_code_length = external global [0 x i8] ; <[0 x i8]*> [#uses=0]
+@__FUNCTION__.20435 = external global [17 x i8] ; <[17 x i8]*> [#uses=0]
+@__FUNCTION__.20496 = external global [19 x i8] ; <[19 x i8]*> [#uses=0]
+@cfun = external global %struct.function* ; <%struct.function**> [#uses=0]
+@__FUNCTION__.20194 = external global [15 x i8] ; <[15 x i8]*> [#uses=0]
+@__FUNCTION__.19987 = external global [21 x i8] ; <[21 x i8]*> [#uses=0]
+@__FUNCTION__.20532 = external global [21 x i8] ; <[21 x i8]*> [#uses=0]
+@__FUNCTION__.20583 = external global [19 x i8] ; <[19 x i8]*> [#uses=0]
+@__FUNCTION__.20606 = external global [22 x i8] ; <[22 x i8]*> [#uses=0]
+@__FUNCTION__.20644 = external global [17 x i8] ; <[17 x i8]*> [#uses=0]
+@__FUNCTION__.20681 = external global [13 x i8] ; <[13 x i8]*> [#uses=0]
+@__FUNCTION__.20700 = external global [13 x i8] ; <[13 x i8]*> [#uses=0]
+@__FUNCTION__.21426 = external global [20 x i8] ; <[20 x i8]*> [#uses=0]
+@__FUNCTION__.21471 = external global [17 x i8] ; <[17 x i8]*> [#uses=0]
+@__FUNCTION__.21962 = external global [27 x i8] ; <[27 x i8]*> [#uses=0]
+@__FUNCTION__.22992 = external global [21 x i8] ; <[21 x i8]*> [#uses=0]
+@__FUNCTION__.23735 = external global [15 x i8] ; <[15 x i8]*> [#uses=0]
+@lang_hooks = external global %struct.lang_hooks ; <%struct.lang_hooks*> [#uses=0]
+@__FUNCTION__.27383 = external global [22 x i8] ; <[22 x i8]*> [#uses=0]
+@__FUNCTION__.20776 = external global [21 x i8] ; <[21 x i8]*> [#uses=0]
+@__FUNCTION__.10672 = external global [9 x i8] ; <[9 x i8]*> [#uses=0]
+@str3 = external global [47 x i8] ; <[47 x i8]*> [#uses=0]
+@str4 = external global [7 x i8] ; <[7 x i8]*> [#uses=0]
+@__FUNCTION__.20065 = external global [25 x i8] ; <[25 x i8]*> [#uses=0]
+@__FUNCTION__.23256 = external global [16 x i8] ; <[16 x i8]*> [#uses=0]
+@__FUNCTION__.23393 = external global [19 x i8] ; <[19 x i8]*> [#uses=0]
+@__FUNCTION__.20043 = external global [21 x i8] ; <[21 x i8]*> [#uses=0]
+@__FUNCTION__.20729 = external global [23 x i8] ; <[23 x i8]*> [#uses=0]
+@__FUNCTION__.20563 = external global [24 x i8] ; <[24 x i8]*> [#uses=0]
+@__FUNCTION__.10663 = external global [10 x i8] ; <[10 x i8]*> [#uses=0]
+@__FUNCTION__.20367 = external global [21 x i8] ; <[21 x i8]*> [#uses=0]
+@__FUNCTION__.20342 = external global [15 x i8] ; <[15 x i8]*> [#uses=0]
+@input_location = external global %struct.location_t ; <%struct.location_t*> [#uses=0]
+@__FUNCTION__.24510 = external global [27 x i8] ; <[27 x i8]*> [#uses=0]
+@__FUNCTION__.25097 = external global [25 x i8] ; <[25 x i8]*> [#uses=0]
+@__FUNCTION__.24705 = external global [26 x i8] ; <[26 x i8]*> [#uses=0]
+@str5 = external global [2 x i8] ; <[2 x i8]*> [#uses=0]
+@__FUNCTION__.25136 = external global [21 x i8] ; <[21 x i8]*> [#uses=0]
+@__FUNCTION__.24450 = external global [31 x i8] ; <[31 x i8]*> [#uses=0]
+@implicit_built_in_decls = external global [471 x %struct.tree_node*] ; <[471 x %struct.tree_node*]*> [#uses=0]
+@__FUNCTION__.24398 = external global [31 x i8] ; <[31 x i8]*> [#uses=0]
+@__FUNCTION__.26156 = external global [14 x i8] ; <[14 x i8]*> [#uses=1]
+@unknown_location = external global %struct.location_t ; <%struct.location_t*> [#uses=0]
+@__FUNCTION__.23038 = external global [19 x i8] ; <[19 x i8]*> [#uses=0]
+@str6 = external global [43 x i8] ; <[43 x i8]*> [#uses=0]
+@__FUNCTION__.25476 = external global [19 x i8] ; <[19 x i8]*> [#uses=0]
+@__FUNCTION__.22136 = external global [20 x i8] ; <[20 x i8]*> [#uses=1]
+@__FUNCTION__.21997 = external global [23 x i8] ; <[23 x i8]*> [#uses=0]
+@__FUNCTION__.21247 = external global [19 x i8] ; <[19 x i8]*> [#uses=0]
+@built_in_decls = external global [471 x %struct.tree_node*] ; <[471 x %struct.tree_node*]*> [#uses=0]
+@__FUNCTION__.21924 = external global [19 x i8] ; <[19 x i8]*> [#uses=0]
+@__FUNCTION__.21861 = external global [25 x i8] ; <[25 x i8]*> [#uses=0]
+@global_dc = external global %struct.diagnostic_context* ; <%struct.diagnostic_context**> [#uses=0]
+@__FUNCTION__.25246 = external global [32 x i8] ; <[32 x i8]*> [#uses=0]
+@str7 = external global [4 x i8] ; <[4 x i8]*> [#uses=0]
+@stderr = external global %struct.FILE* ; <%struct.FILE**> [#uses=0]
+@str8 = external global [24 x i8] ; <[24 x i8]*> [#uses=0]
+@str9 = external global [22 x i8] ; <[22 x i8]*> [#uses=0]
+@__FUNCTION__.27653 = external global [21 x i8] ; <[21 x i8]*> [#uses=0]
+@__FUNCTION__.27322 = external global [21 x i8] ; <[21 x i8]*> [#uses=0]
+@__FUNCTION__.27139 = external global [20 x i8] ; <[20 x i8]*> [#uses=0]
+@__FUNCTION__.22462 = external global [23 x i8] ; <[23 x i8]*> [#uses=0]
+@str10 = external global [6 x i8] ; <[6 x i8]*> [#uses=0]
+@__FUNCTION__.25389 = external global [19 x i8] ; <[19 x i8]*> [#uses=0]
+@__FUNCTION__.25650 = external global [18 x i8] ; <[18 x i8]*> [#uses=0]
+@str11 = external global [32 x i8] ; <[32 x i8]*> [#uses=0]
+@str12 = external global [3 x i8] ; <[3 x i8]*> [#uses=0]
+@str13 = external global [44 x i8] ; <[44 x i8]*> [#uses=0]
+@__FUNCTION__.27444 = external global [14 x i8] ; <[14 x i8]*> [#uses=0]
+@timevar_enable = external global i8 ; <i8*> [#uses=0]
+@__FUNCTION__.27533 = external global [23 x i8] ; <[23 x i8]*> [#uses=0]
+@flag_instrument_function_entry_exit = external global i32 ; <i32*> [#uses=0]
+@__FUNCTION__.25331 = external global [23 x i8] ; <[23 x i8]*> [#uses=0]
+@__FUNCTION__.20965 = external global [19 x i8] ; <[19 x i8]*> [#uses=0]
+@str14 = external global [12 x i8] ; <[12 x i8]*> [#uses=0]
+@__FUNCTION__.26053 = external global [21 x i8] ; <[21 x i8]*> [#uses=0]
+@__FUNCTION__.26004 = external global [20 x i8] ; <[20 x i8]*> [#uses=0]
+@str15 = external global [8 x i8] ; <[8 x i8]*> [#uses=0]
+@__FUNCTION__.21584 = external global [21 x i8] ; <[21 x i8]*> [#uses=0]
+@str16 = external global [12 x i8] ; <[12 x i8]*> [#uses=0]
+@__FUNCTION__.25903 = external global [28 x i8] ; <[28 x i8]*> [#uses=0]
+@__FUNCTION__.22930 = external global [23 x i8] ; <[23 x i8]*> [#uses=0]
+@__FUNCTION__.23832 = external global [19 x i8] ; <[19 x i8]*> [#uses=0]
+@str17 = external global [6 x i8] ; <[6 x i8]*> [#uses=0]
+@__FUNCTION__.24620 = external global [24 x i8] ; <[24 x i8]*> [#uses=0]
+@__FUNCTION__.24582 = external global [30 x i8] ; <[30 x i8]*> [#uses=0]
+@__FUNCTION__.21382 = external global [19 x i8] ; <[19 x i8]*> [#uses=0]
+@__FUNCTION__.21117 = external global [21 x i8] ; <[21 x i8]*> [#uses=0]
+
+
+declare void @push_gimplify_context()
+
+declare i32 @gimple_tree_hash(i8*)
+
+declare i32 @iterative_hash_expr(%struct.tree_node*, i32)
+
+declare i32 @gimple_tree_eq(i8*, i8*)
+
+declare i32 @operand_equal_p(%struct.tree_node*, %struct.tree_node*, i32)
+
+declare void @fancy_abort(i8*, i32, i8*)
+
+declare i8* @xcalloc(i32, i32)
+
+declare %struct.htab* @htab_create(i32, i32 (i8*)*, i32 (i8*, i8*)*, void (i8*)*)
+
+declare void @free(i8*)
+
+declare void @gimple_push_bind_expr(%struct.tree_node*)
+
+declare void @gimple_pop_bind_expr()
+
+declare %struct.tree_node* @gimple_current_bind_expr()
+
+declare fastcc void @gimple_push_condition()
+
+declare %struct.tree_node* @create_artificial_label()
+
+declare %struct.tree_node* @build_decl_stat(i32, %struct.tree_node*, %struct.tree_node*)
+
+declare void @tree_class_check_failed(%struct.tree_node*, i32, i8*, i32, i8*)
+
+declare %struct.tree_node* @create_tmp_var_name(i8*)
+
+declare i32 @strlen(i8*)
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+
+declare i32 @sprintf(i8*, i8*, ...)
+
+declare %struct.tree_node* @get_identifier(i8*)
+
+declare %struct.tree_node* @create_tmp_var_raw(%struct.tree_node*, i8*)
+
+declare %struct.tree_node* @build_qualified_type(%struct.tree_node*, i32)
+
+declare i8* @get_name(%struct.tree_node*)
+
+declare void @tree_operand_check_failed(i32, i32, i8*, i32, i8*)
+
+declare void @tree_check_failed(%struct.tree_node*, i8*, i32, i8*, ...)
+
+declare void @declare_tmp_vars(%struct.tree_node*, %struct.tree_node*)
+
+declare %struct.tree_node* @nreverse(%struct.tree_node*)
+
+declare void @gimple_add_tmp_var(%struct.tree_node*)
+
+declare void @record_vars(%struct.tree_node*)
+
+declare %struct.tree_node* @create_tmp_var(%struct.tree_node*, i8*)
+
+declare void @pop_gimplify_context(%struct.tree_node*)
+
+declare void @htab_delete(%struct.htab*)
+
+declare fastcc void @annotate_one_with_locus(%struct.tree_node*, i32, i32)
+
+declare void @annotate_with_locus(%struct.tree_node*, i32, i32)
+
+declare %struct.tree_node* @mostly_copy_tree_r(%struct.tree_node**, i32*, i8*)
+
+declare %struct.tree_node* @copy_tree_r(%struct.tree_node**, i32*, i8*)
+
+declare %struct.tree_node* @mark_decls_volatile_r(%struct.tree_node**, i32*, i8*)
+
+declare %struct.tree_node* @copy_if_shared_r(%struct.tree_node**, i32*, i8*)
+
+declare %struct.tree_node* @walk_tree(%struct.tree_node**, %struct.tree_node* (%struct.tree_node**, i32*, i8*)*, i8*, %struct.pointer_set_t*)
+
+declare %struct.tree_node* @unmark_visited_r(%struct.tree_node**, i32*, i8*)
+
+declare fastcc void @unshare_body(%struct.tree_node**, %struct.tree_node*)
+
+declare %struct.cgraph_node* @cgraph_node(%struct.tree_node*)
+
+declare fastcc void @unvisit_body(%struct.tree_node**, %struct.tree_node*)
+
+declare void @unshare_all_trees(%struct.tree_node*)
+
+declare %struct.tree_node* @unshare_expr(%struct.tree_node*)
+
+declare %struct.tree_node* @build_and_jump(%struct.tree_node**)
+
+declare %struct.tree_node* @build1_stat(i32, %struct.tree_node*, %struct.tree_node*)
+
+declare i32 @compare_case_labels(i8*, i8*)
+
+declare i32 @tree_int_cst_compare(%struct.tree_node*, %struct.tree_node*)
+
+declare void @sort_case_labels(%struct.tree_node*)
+
+declare void @tree_vec_elt_check_failed(i32, i32, i8*, i32, i8*)
+
+declare void @qsort(i8*, i32, i32, i32 (i8*, i8*)*)
+
+declare %struct.tree_node* @force_labels_r(%struct.tree_node**, i32*, i8*)
+
+declare fastcc void @canonicalize_component_ref(%struct.tree_node**)
+
+declare %struct.tree_node* @get_unwidened(%struct.tree_node*, %struct.tree_node*)
+
+declare fastcc void @maybe_with_size_expr(%struct.tree_node**)
+
+declare %struct.tree_node* @substitute_placeholder_in_expr(%struct.tree_node*, %struct.tree_node*)
+
+declare %struct.tree_node* @build2_stat(i32, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*)
+
+declare fastcc %struct.tree_node* @gimple_boolify(%struct.tree_node*)
+
+declare %struct.tree_node* @convert(%struct.tree_node*, %struct.tree_node*)
+
+declare %struct.tree_node* @gimplify_init_ctor_preeval_1(%struct.tree_node**, i32*, i8*)
+
+declare i64 @get_alias_set(%struct.tree_node*)
+
+declare i32 @alias_sets_conflict_p(i64, i64)
+
+declare fastcc i8 @cpt_same_type(%struct.tree_node*, %struct.tree_node*) zeroext
+
+declare %struct.tree_node* @check_pointer_types_r(%struct.tree_node**, i32*, i8*)
+
+declare %struct.tree_node* @voidify_wrapper_expr(%struct.tree_node*, %struct.tree_node*)
+
+declare i32 @integer_zerop(%struct.tree_node*)
+
+declare fastcc void @append_to_statement_list_1(%struct.tree_node*, %struct.tree_node**)
+
+declare %struct.tree_node* @alloc_stmt_list()
+
+declare void @tsi_link_after(%struct.tree_stmt_iterator*, %struct.tree_node*, i32)
+
+declare void @append_to_statement_list_force(%struct.tree_node*, %struct.tree_node**)
+
+declare void @append_to_statement_list(%struct.tree_node*, %struct.tree_node**)
+
+declare fastcc %struct.tree_node* @shortcut_cond_r(%struct.tree_node*, %struct.tree_node**, %struct.tree_node**)
+
+declare %struct.tree_node* @build3_stat(i32, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*)
+
+declare fastcc %struct.tree_node* @shortcut_cond_expr(%struct.tree_node*)
+
+declare %struct.tree_node* @expr_last(%struct.tree_node*)
+
+declare i8 @block_may_fallthru(%struct.tree_node*) zeroext
+
+declare fastcc void @gimple_pop_condition(%struct.tree_node**)
+
+declare %struct.tree_node* @gimple_build_eh_filter(%struct.tree_node*, %struct.tree_node*, %struct.tree_node*)
+
+declare void @annotate_all_with_locus(%struct.tree_node**, i32, i32)
+
+declare fastcc %struct.tree_node* @internal_get_tmp_var(%struct.tree_node*, %struct.tree_node**, %struct.tree_node**, i8 zeroext )
+
+define i32 @gimplify_expr(%struct.tree_node** %expr_p, %struct.tree_node** %pre_p, %struct.tree_node** %post_p, i8 (%struct.tree_node*) zeroext * %gimple_test_f, i32 %fallback) {
+entry:
+ %internal_post = alloca %struct.tree_node*, align 4 ; <%struct.tree_node**> [#uses=2]
+ %pre_p_addr.0 = select i1 false, %struct.tree_node** null, %struct.tree_node** %pre_p ; <%struct.tree_node**> [#uses=7]
+ %post_p_addr.0 = select i1 false, %struct.tree_node** %internal_post, %struct.tree_node** %post_p ; <%struct.tree_node**> [#uses=7]
+ br i1 false, label %bb277, label %bb191
+
+bb191: ; preds = %entry
+ ret i32 0
+
+bb277: ; preds = %entry
+ %tmp283 = call i32 null( %struct.tree_node** %expr_p, %struct.tree_node** %pre_p_addr.0, %struct.tree_node** %post_p_addr.0 ) ; <i32> [#uses=1]
+ switch i32 %tmp283, label %bb7478 [
+ i32 0, label %cond_next289
+ i32 -1, label %cond_next298
+ ]
+
+cond_next289: ; preds = %bb277
+ ret i32 0
+
+cond_next298: ; preds = %bb277
+ switch i32 0, label %bb7444 [
+ i32 24, label %bb7463
+ i32 25, label %bb7463
+ i32 26, label %bb7463
+ i32 27, label %bb7463
+ i32 28, label %bb7463
+ i32 33, label %bb4503
+ i32 39, label %bb397
+ i32 40, label %bb5650
+ i32 41, label %bb4339
+ i32 42, label %bb4350
+ i32 43, label %bb4350
+ i32 44, label %bb319
+ i32 45, label %bb397
+ i32 46, label %bb6124
+ i32 47, label %bb7463
+ i32 49, label %bb5524
+ i32 50, label %bb1283
+ i32 51, label %bb1289
+ i32 52, label %bb1289
+ i32 53, label %bb5969
+ i32 54, label %bb408
+ i32 56, label %bb5079
+ i32 57, label %bb428
+ i32 59, label %bb5965
+ i32 74, label %bb4275
+ i32 75, label %bb4275
+ i32 76, label %bb4275
+ i32 77, label %bb4275
+ i32 91, label %bb1296
+ i32 92, label %bb1296
+ i32 96, label %bb1322
+ i32 112, label %bb2548
+ i32 113, label %bb2548
+ i32 115, label %bb397
+ i32 116, label %bb5645
+ i32 117, label %bb1504
+ i32 121, label %bb397
+ i32 122, label %bb397
+ i32 123, label %bb313
+ i32 124, label %bb313
+ i32 125, label %bb313
+ i32 126, label %bb313
+ i32 127, label %bb2141
+ i32 128, label %cond_next5873
+ i32 129, label %cond_next5873
+ i32 130, label %bb4536
+ i32 131, label %bb5300
+ i32 132, label %bb5170
+ i32 133, label %bb5519
+ i32 134, label %bb5091
+ i32 135, label %bb5083
+ i32 136, label %bb5087
+ i32 137, label %bb5382
+ i32 139, label %bb7463
+ i32 140, label %bb7463
+ i32 142, label %bb5974
+ i32 143, label %bb6049
+ i32 147, label %bb6296
+ i32 151, label %cond_next6474
+ ]
+
+bb313: ; preds = %cond_next298, %cond_next298, %cond_next298, %cond_next298
+ ret i32 0
+
+bb319: ; preds = %cond_next298
+ ret i32 0
+
+bb397: ; preds = %cond_next298, %cond_next298, %cond_next298, %cond_next298, %cond_next298
+ ret i32 0
+
+bb408: ; preds = %cond_next298
+ %tmp413 = call fastcc i32 @gimplify_cond_expr( %struct.tree_node** %expr_p, %struct.tree_node** %pre_p_addr.0, %struct.tree_node** %post_p_addr.0, %struct.tree_node* null, i32 %fallback ) ; <i32> [#uses=0]
+ ret i32 0
+
+bb428: ; preds = %cond_next298
+ ret i32 0
+
+bb1283: ; preds = %cond_next298
+ ret i32 0
+
+bb1289: ; preds = %cond_next298, %cond_next298
+ ret i32 0
+
+bb1296: ; preds = %cond_next298, %cond_next298
+ ret i32 0
+
+bb1322: ; preds = %cond_next298
+ ret i32 0
+
+bb1504: ; preds = %cond_next298
+ ret i32 0
+
+bb2141: ; preds = %cond_next298
+ ret i32 0
+
+bb2548: ; preds = %cond_next298, %cond_next298
+ %tmp2554 = load %struct.tree_node** %expr_p ; <%struct.tree_node*> [#uses=2]
+ %tmp2562 = and i32 0, 255 ; <i32> [#uses=1]
+ %tmp2569 = add i8 0, -4 ; <i8> [#uses=1]
+ icmp ugt i8 %tmp2569, 5 ; <i1>:0 [#uses=2]
+ %tmp2587 = load i8* null ; <i8> [#uses=1]
+ icmp eq i8 %tmp2587, 0 ; <i1>:1 [#uses=2]
+ %tmp2607 = load %struct.tree_node** null ; <%struct.tree_node*> [#uses=2]
+ br i1 false, label %bb2754, label %cond_next2617
+
+cond_next2617: ; preds = %bb2548
+ ret i32 0
+
+bb2754: ; preds = %bb2548
+ br i1 %0, label %cond_true2780, label %cond_next2783
+
+cond_true2780: ; preds = %bb2754
+ call void @tree_class_check_failed( %struct.tree_node* %tmp2554, i32 9, i8* getelementptr ([42 x i8]* @str, i32 0, i32 0), i32 1415, i8* getelementptr ([20 x i8]* @__FUNCTION__.22136, i32 0, i32 0) )
+ unreachable
+
+cond_next2783: ; preds = %bb2754
+ %tmp2825 = and i32 0, 255 ; <i32> [#uses=1]
+ %tmp2829 = load i32* null ; <i32> [#uses=1]
+ %tmp28292830 = trunc i32 %tmp2829 to i8 ; <i8> [#uses=1]
+ %tmp2832 = add i8 %tmp28292830, -4 ; <i8> [#uses=1]
+ icmp ugt i8 %tmp2832, 5 ; <i1>:2 [#uses=1]
+ icmp eq i8 0, 0 ; <i1>:3 [#uses=1]
+ %tmp28652866 = bitcast %struct.tree_node* %tmp2607 to %struct.tree_exp* ; <%struct.tree_exp*> [#uses=1]
+ %tmp2868 = getelementptr %struct.tree_exp* %tmp28652866, i32 0, i32 4, i32 0 ; <%struct.tree_node**> [#uses=1]
+ %tmp2870 = load %struct.tree_node** %tmp2868 ; <%struct.tree_node*> [#uses=1]
+ br i1 %1, label %cond_true2915, label %cond_next2927
+
+cond_true2915: ; preds = %cond_next2783
+ unreachable
+
+cond_next2927: ; preds = %cond_next2783
+ %tmp2938 = load %struct.tree_node** null ; <%struct.tree_node*> [#uses=1]
+ %tmp2944 = load i32* null ; <i32> [#uses=1]
+ %tmp2946 = and i32 %tmp2944, 255 ; <i32> [#uses=1]
+ %tmp2949 = getelementptr [0 x i32]* @tree_code_type, i32 0, i32 %tmp2946 ; <i32*> [#uses=1]
+ %tmp2950 = load i32* %tmp2949 ; <i32> [#uses=1]
+ icmp eq i32 %tmp2950, 2 ; <i1>:4 [#uses=1]
+ br i1 %4, label %cond_next2954, label %cond_true2951
+
+cond_true2951: ; preds = %cond_next2927
+ call void @tree_class_check_failed( %struct.tree_node* %tmp2938, i32 2, i8* getelementptr ([42 x i8]* @str, i32 0, i32 0), i32 1415, i8* getelementptr ([20 x i8]* @__FUNCTION__.22136, i32 0, i32 0) )
+ unreachable
+
+cond_next2954: ; preds = %cond_next2927
+ br i1 %0, label %cond_true2991, label %cond_next2994
+
+cond_true2991: ; preds = %cond_next2954
+ unreachable
+
+cond_next2994: ; preds = %cond_next2954
+ br i1 %1, label %cond_true3009, label %cond_next3021
+
+cond_true3009: ; preds = %cond_next2994
+ call void @tree_operand_check_failed( i32 0, i32 %tmp2562, i8* getelementptr ([42 x i8]* @str, i32 0, i32 0), i32 1415, i8* getelementptr ([20 x i8]* @__FUNCTION__.22136, i32 0, i32 0) )
+ unreachable
+
+cond_next3021: ; preds = %cond_next2994
+ br i1 %2, label %cond_true3044, label %cond_next3047
+
+cond_true3044: ; preds = %cond_next3021
+ call void @tree_class_check_failed( %struct.tree_node* %tmp2607, i32 9, i8* getelementptr ([42 x i8]* @str, i32 0, i32 0), i32 1415, i8* getelementptr ([20 x i8]* @__FUNCTION__.22136, i32 0, i32 0) )
+ unreachable
+
+cond_next3047: ; preds = %cond_next3021
+ br i1 %3, label %cond_true3062, label %cond_next3074
+
+cond_true3062: ; preds = %cond_next3047
+ call void @tree_operand_check_failed( i32 0, i32 %tmp2825, i8* getelementptr ([42 x i8]* @str, i32 0, i32 0), i32 1415, i8* getelementptr ([20 x i8]* @__FUNCTION__.22136, i32 0, i32 0) )
+ unreachable
+
+cond_next3074: ; preds = %cond_next3047
+ %tmp3084 = getelementptr %struct.tree_node* %tmp2870, i32 0, i32 0, i32 0, i32 1 ; <%struct.tree_node**> [#uses=1]
+ %tmp3085 = load %struct.tree_node** %tmp3084 ; <%struct.tree_node*> [#uses=1]
+ %tmp31043105 = bitcast %struct.tree_node* %tmp3085 to %struct.tree_type* ; <%struct.tree_type*> [#uses=1]
+ %tmp3106 = getelementptr %struct.tree_type* %tmp31043105, i32 0, i32 6 ; <i16*> [#uses=1]
+ %tmp31063107 = bitcast i16* %tmp3106 to i32* ; <i32*> [#uses=1]
+ %tmp3108 = load i32* %tmp31063107 ; <i32> [#uses=1]
+ xor i32 %tmp3108, 0 ; <i32>:5 [#uses=1]
+ %tmp81008368 = and i32 %5, 65024 ; <i32> [#uses=1]
+ icmp eq i32 %tmp81008368, 0 ; <i1>:6 [#uses=1]
+ br i1 %6, label %cond_next3113, label %bb3351
+
+cond_next3113: ; preds = %cond_next3074
+ ret i32 0
+
+bb3351: ; preds = %cond_next3074
+ %tmp3354 = call i8 @tree_ssa_useless_type_conversion( %struct.tree_node* %tmp2554 ) zeroext ; <i8> [#uses=1]
+ icmp eq i8 %tmp3354, 0 ; <i1>:7 [#uses=1]
+ %tmp3424 = load i32* null ; <i32> [#uses=1]
+ br i1 %7, label %cond_next3417, label %cond_true3356
+
+cond_true3356: ; preds = %bb3351
+ ret i32 0
+
+cond_next3417: ; preds = %bb3351
+ br i1 false, label %cond_true3429, label %cond_next4266
+
+cond_true3429: ; preds = %cond_next3417
+ %tmp3443 = and i32 %tmp3424, 255 ; <i32> [#uses=0]
+ ret i32 0
+
+cond_next4266: ; preds = %cond_next3417
+ %tmp4268 = load %struct.tree_node** %expr_p ; <%struct.tree_node*> [#uses=1]
+ icmp eq %struct.tree_node* %tmp4268, null ; <i1>:8 [#uses=1]
+ br i1 %8, label %bb4275, label %bb7463
+
+bb4275: ; preds = %cond_next4266, %cond_next298, %cond_next298, %cond_next298, %cond_next298
+ %tmp4289 = and i32 0, 255 ; <i32> [#uses=2]
+ %tmp4292 = getelementptr [0 x i32]* @tree_code_type, i32 0, i32 %tmp4289 ; <i32*> [#uses=1]
+ %tmp4293 = load i32* %tmp4292 ; <i32> [#uses=1]
+ %tmp42934294 = trunc i32 %tmp4293 to i8 ; <i8> [#uses=1]
+ %tmp4296 = add i8 %tmp42934294, -4 ; <i8> [#uses=1]
+ icmp ugt i8 %tmp4296, 5 ; <i1>:9 [#uses=1]
+ br i1 %9, label %cond_true4297, label %cond_next4300
+
+cond_true4297: ; preds = %bb4275
+ unreachable
+
+cond_next4300: ; preds = %bb4275
+ %tmp4314 = load i8* null ; <i8> [#uses=1]
+ icmp eq i8 %tmp4314, 0 ; <i1>:10 [#uses=1]
+ br i1 %10, label %cond_true4315, label %cond_next4327
+
+cond_true4315: ; preds = %cond_next4300
+ call void @tree_operand_check_failed( i32 0, i32 %tmp4289, i8* getelementptr ([42 x i8]* @str, i32 0, i32 0), i32 3997, i8* getelementptr ([14 x i8]* @__FUNCTION__.26156, i32 0, i32 0) )
+ unreachable
+
+cond_next4327: ; preds = %cond_next4300
+ %tmp4336 = call i32 @gimplify_expr( %struct.tree_node** null, %struct.tree_node** %pre_p_addr.0, %struct.tree_node** %post_p_addr.0, i8 (%struct.tree_node*) zeroext * @is_gimple_val, i32 1 ) ; <i32> [#uses=0]
+ ret i32 0
+
+bb4339: ; preds = %cond_next298
+ ret i32 0
+
+bb4350: ; preds = %cond_next298, %cond_next298
+ ret i32 0
+
+bb4503: ; preds = %cond_next298
+ ret i32 0
+
+bb4536: ; preds = %cond_next298
+ ret i32 0
+
+bb5079: ; preds = %cond_next298
+ ret i32 0
+
+bb5083: ; preds = %cond_next298
+ ret i32 0
+
+bb5087: ; preds = %cond_next298
+ ret i32 0
+
+bb5091: ; preds = %cond_next298
+ ret i32 0
+
+bb5170: ; preds = %cond_next298
+ ret i32 0
+
+bb5300: ; preds = %cond_next298
+ ret i32 0
+
+bb5382: ; preds = %cond_next298
+ ret i32 0
+
+bb5519: ; preds = %cond_next298
+ ret i32 0
+
+bb5524: ; preds = %cond_next298
+ ret i32 0
+
+bb5645: ; preds = %cond_next298
+ ret i32 0
+
+bb5650: ; preds = %cond_next298
+ ret i32 0
+
+cond_next5873: ; preds = %cond_next298, %cond_next298
+ ret i32 0
+
+bb5965: ; preds = %cond_next298
+ %tmp5968 = call fastcc i32 @gimplify_cleanup_point_expr( %struct.tree_node** %expr_p, %struct.tree_node** %pre_p_addr.0 ) ; <i32> [#uses=0]
+ ret i32 0
+
+bb5969: ; preds = %cond_next298
+ %tmp5973 = call fastcc i32 @gimplify_target_expr( %struct.tree_node** %expr_p, %struct.tree_node** %pre_p_addr.0, %struct.tree_node** %post_p_addr.0 ) ; <i32> [#uses=0]
+ ret i32 0
+
+bb5974: ; preds = %cond_next298
+ ret i32 0
+
+bb6049: ; preds = %cond_next298
+ ret i32 0
+
+bb6124: ; preds = %cond_next298
+ ret i32 0
+
+bb6296: ; preds = %cond_next298
+ ret i32 0
+
+cond_next6474: ; preds = %cond_next298
+ icmp eq %struct.tree_node** %internal_post, %post_p_addr.0 ; <i1>:11 [#uses=1]
+ %iftmp.381.0 = select i1 %11, %struct.tree_node** null, %struct.tree_node** %post_p_addr.0 ; <%struct.tree_node**> [#uses=1]
+ %tmp6490 = call i32 @gimplify_expr( %struct.tree_node** null, %struct.tree_node** %pre_p_addr.0, %struct.tree_node** %iftmp.381.0, i8 (%struct.tree_node*) zeroext * %gimple_test_f, i32 %fallback ) ; <i32> [#uses=0]
+ %tmp6551 = call i32 @gimplify_expr( %struct.tree_node** null, %struct.tree_node** %pre_p_addr.0, %struct.tree_node** %post_p_addr.0, i8 (%struct.tree_node*) zeroext * @is_gimple_val, i32 1 ) ; <i32> [#uses=0]
+ ret i32 0
+
+bb7444: ; preds = %cond_next298
+ ret i32 0
+
+bb7463: ; preds = %cond_next4266, %cond_next298, %cond_next298, %cond_next298, %cond_next298, %cond_next298, %cond_next298, %cond_next298, %cond_next298
+ ret i32 0
+
+bb7478: ; preds = %bb277
+ ret i32 0
+}
+
+declare i8 @is_gimple_formal_tmp_rhs(%struct.tree_node*) zeroext
+
+declare void @gimplify_and_add(%struct.tree_node*, %struct.tree_node**)
+
+declare %struct.tree_node* @get_initialized_tmp_var(%struct.tree_node*, %struct.tree_node**, %struct.tree_node**)
+
+declare %struct.tree_node* @get_formal_tmp_var(%struct.tree_node*, %struct.tree_node**)
+
+declare fastcc void @gimplify_init_ctor_preeval(%struct.tree_node**, %struct.tree_node**, %struct.tree_node**, %struct.gimplify_init_ctor_preeval_data*)
+
+declare i8 @type_contains_placeholder_p(%struct.tree_node*) zeroext
+
+declare i8 @is_gimple_mem_rhs(%struct.tree_node*) zeroext
+
+declare fastcc i32 @gimplify_modify_expr_rhs(%struct.tree_node**, %struct.tree_node**, %struct.tree_node**, %struct.tree_node**, %struct.tree_node**, i8 zeroext )
+
+declare %struct.tree_node* @fold_indirect_ref(%struct.tree_node*)
+
+declare fastcc i32 @gimplify_compound_expr(%struct.tree_node**, %struct.tree_node**, i8 zeroext )
+
+declare i8 @is_gimple_lvalue(%struct.tree_node*) zeroext
+
+declare void @categorize_ctor_elements(%struct.tree_node*, i64*, i64*, i64*, i8*)
+
+declare void @lhd_set_decl_assembler_name(%struct.tree_node*)
+
+declare i64 @int_size_in_bytes(%struct.tree_node*)
+
+declare i32 @can_move_by_pieces(i64, i32)
+
+declare i64 @count_type_elements(%struct.tree_node*)
+
+declare void @gimplify_stmt(%struct.tree_node**)
+
+declare %struct.tree_node* @get_base_address(%struct.tree_node*)
+
+declare fastcc void @gimplify_init_ctor_eval(%struct.tree_node*, %struct.tree_node*, %struct.tree_node**, i8 zeroext )
+
+declare %struct.tree_node* @build_complex(%struct.tree_node*, %struct.tree_node*, %struct.tree_node*)
+
+declare i8 (%struct.tree_node*) zeroext * @rhs_predicate_for(%struct.tree_node*)
+
+declare %struct.tree_node* @build_vector(%struct.tree_node*, %struct.tree_node*)
+
+declare i8 @is_gimple_val(%struct.tree_node*) zeroext
+
+declare i8 @is_gimple_reg_type(%struct.tree_node*) zeroext
+
+declare fastcc i32 @gimplify_cond_expr(%struct.tree_node**, %struct.tree_node**, %struct.tree_node**, %struct.tree_node*, i32)
+
+declare fastcc i32 @gimplify_modify_expr(%struct.tree_node**, %struct.tree_node**, %struct.tree_node**, i8 zeroext )
+
+declare %struct.tree_node* @tree_cons_stat(%struct.tree_node*, %struct.tree_node*, %struct.tree_node*)
+
+declare %struct.tree_node* @build_fold_addr_expr(%struct.tree_node*)
+
+declare %struct.tree_node* @build_function_call_expr(%struct.tree_node*, %struct.tree_node*)
+
+declare i8 @is_gimple_addressable(%struct.tree_node*) zeroext
+
+declare i8 @is_gimple_reg(%struct.tree_node*) zeroext
+
+declare %struct.tree_node* @make_ssa_name(%struct.tree_node*, %struct.tree_node*)
+
+declare i8 @tree_ssa_useless_type_conversion(%struct.tree_node*) zeroext
+
+declare fastcc i32 @gimplify_self_mod_expr(%struct.tree_node**, %struct.tree_node**, %struct.tree_node**, i8 zeroext )
+
+declare fastcc i32 @gimplify_compound_lval(%struct.tree_node**, %struct.tree_node**, %struct.tree_node**, i32)
+
+declare %struct.tree_node* @get_callee_fndecl(%struct.tree_node*)
+
+declare %struct.tree_node* @fold_builtin(%struct.tree_node*, i8 zeroext )
+
+declare void @error(i8*, ...)
+
+declare %struct.tree_node* @build_empty_stmt()
+
+declare i8 @fold_builtin_next_arg(%struct.tree_node*) zeroext
+
+declare fastcc i32 @gimplify_arg(%struct.tree_node**, %struct.tree_node**)
+
+declare i8 @is_gimple_call_addr(%struct.tree_node*) zeroext
+
+declare i32 @call_expr_flags(%struct.tree_node*)
+
+declare void @recalculate_side_effects(%struct.tree_node*)
+
+declare %struct.tree_node* @fold_convert(%struct.tree_node*, %struct.tree_node*)
+
+declare void @recompute_tree_invarant_for_addr_expr(%struct.tree_node*)
+
+declare i32 @gimplify_va_arg_expr(%struct.tree_node**, %struct.tree_node**, %struct.tree_node**)
+
+declare %struct.tree_node* @size_int_kind(i64, i32)
+
+declare %struct.tree_node* @size_binop(i32, %struct.tree_node*, %struct.tree_node*)
+
+declare %struct.tree_node* @build4_stat(i32, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*)
+
+declare void @gimplify_type_sizes(%struct.tree_node*, %struct.tree_node**)
+
+declare void @gimplify_one_sizepos(%struct.tree_node**, %struct.tree_node**)
+
+declare %struct.tree_node* @build_pointer_type(%struct.tree_node*)
+
+declare %struct.tree_node* @build_fold_indirect_ref(%struct.tree_node*)
+
+declare fastcc i32 @gimplify_bind_expr(%struct.tree_node**, %struct.tree_node*, %struct.tree_node**)
+
+declare fastcc void @gimplify_loop_expr(%struct.tree_node**, %struct.tree_node**)
+
+declare fastcc i32 @gimplify_switch_expr(%struct.tree_node**, %struct.tree_node**)
+
+declare %struct.tree_node* @decl_function_context(%struct.tree_node*)
+
+declare %struct.varray_head_tag* @varray_grow(%struct.varray_head_tag*, i32)
+
+declare fastcc void @gimplify_return_expr(%struct.tree_node*, %struct.tree_node**)
+
+declare fastcc i32 @gimplify_save_expr(%struct.tree_node**, %struct.tree_node**, %struct.tree_node**)
+
+declare fastcc i32 @gimplify_asm_expr(%struct.tree_node**, %struct.tree_node**, %struct.tree_node**)
+
+declare void @gimplify_to_stmt_list(%struct.tree_node**)
+
+declare fastcc i32 @gimplify_cleanup_point_expr(%struct.tree_node**, %struct.tree_node**)
+
+declare fastcc i32 @gimplify_target_expr(%struct.tree_node**, %struct.tree_node**, %struct.tree_node**)
+
+declare void @tsi_delink(%struct.tree_stmt_iterator*)
+
+declare void @tsi_link_before(%struct.tree_stmt_iterator*, %struct.tree_node*, i32)
+
+declare i8 @is_gimple_stmt(%struct.tree_node*) zeroext
+
+declare void @print_generic_expr(%struct.FILE*, %struct.tree_node*, i32)
+
+declare void @debug_tree(%struct.tree_node*)
+
+declare void @internal_error(i8*, ...)
+
+declare %struct.tree_node* @force_gimple_operand(%struct.tree_node*, %struct.tree_node**, i8 zeroext , %struct.tree_node*)
+
+declare i8 @is_gimple_reg_rhs(%struct.tree_node*) zeroext
+
+declare void @add_referenced_tmp_var(%struct.tree_node*)
+
+declare i8 @contains_placeholder_p(%struct.tree_node*) zeroext
+
+declare %struct.varray_head_tag* @varray_init(i32, i32, i8*)
+
+declare i32 @handled_component_p(%struct.tree_node*)
+
+declare void @varray_check_failed(%struct.varray_head_tag*, i32, i8*, i32, i8*)
+
+declare %struct.tree_node* @array_ref_low_bound(%struct.tree_node*)
+
+declare i8 @is_gimple_min_invariant(%struct.tree_node*) zeroext
+
+declare i8 @is_gimple_formal_tmp_reg(%struct.tree_node*) zeroext
+
+declare %struct.tree_node* @array_ref_element_size(%struct.tree_node*)
+
+declare %struct.tree_node* @component_ref_field_offset(%struct.tree_node*)
+
+declare i8 @is_gimple_min_lval(%struct.tree_node*) zeroext
+
+declare void @varray_underflow(%struct.varray_head_tag*, i8*, i32, i8*)
+
+declare i32 @list_length(%struct.tree_node*)
+
+declare i8 @parse_output_constraint(i8**, i32, i32, i32, i8*, i8*, i8*) zeroext
+
+declare i8* @xstrdup(i8*)
+
+declare %struct.tree_node* @build_string(i32, i8*)
+
+declare i8* @strchr(i8*, i32)
+
+declare %struct.tree_node* @build_tree_list_stat(%struct.tree_node*, %struct.tree_node*)
+
+declare %struct.tree_node* @chainon(%struct.tree_node*, %struct.tree_node*)
+
+declare i8 @parse_input_constraint(i8**, i32, i32, i32, i32, i8**, i8*, i8*) zeroext
+
+declare i8 @is_gimple_asm_val(%struct.tree_node*) zeroext
+
+declare void @gimplify_body(%struct.tree_node**, %struct.tree_node*, i8 zeroext )
+
+declare void @timevar_push_1(i32)
+
+declare %struct.tree_node* @gimplify_parameters()
+
+declare %struct.tree_node* @expr_only(%struct.tree_node*)
+
+declare void @timevar_pop_1(i32)
+
+declare void @gimplify_function_tree(%struct.tree_node*)
+
+declare void @allocate_struct_function(%struct.tree_node*)
+
+declare %struct.tree_node* @make_tree_vec_stat(i32)
+
+declare %struct.tree_node* @tsi_split_statement_list_after(%struct.tree_stmt_iterator*)
+
+declare i8 @is_gimple_condexpr(%struct.tree_node*) zeroext
+
+declare %struct.tree_node* @invert_truthvalue(%struct.tree_node*)
+
+declare i8 @initializer_zerop(%struct.tree_node*) zeroext
+
+declare i32 @simple_cst_equal(%struct.tree_node*, %struct.tree_node*)
+
+declare i32 @aggregate_value_p(%struct.tree_node*, %struct.tree_node*)
+
+declare i32 @fwrite(i8*, i32, i32, %struct.FILE*)
diff --git a/test/CodeGen/ARM/2007-03-27-RegScavengerAssert.ll b/test/CodeGen/ARM/2007-03-27-RegScavengerAssert.ll
new file mode 100644
index 0000000..e4635f5
--- /dev/null
+++ b/test/CodeGen/ARM/2007-03-27-RegScavengerAssert.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi
+; PR1279
+
+ %struct.rtx_def = type { i16, i8, i8, %struct.u }
+ %struct.u = type { [1 x i64] }
+
+define fastcc void @find_reloads_address(%struct.rtx_def** %loc) {
+entry:
+ %ad_addr = alloca %struct.rtx_def* ; <%struct.rtx_def**> [#uses=2]
+ br i1 false, label %cond_next416, label %cond_true340
+
+cond_true340: ; preds = %entry
+ ret void
+
+cond_next416: ; preds = %entry
+ %tmp1085 = load %struct.rtx_def** %ad_addr ; <%struct.rtx_def*> [#uses=1]
+ br i1 false, label %bb1084, label %cond_true418
+
+cond_true418: ; preds = %cond_next416
+ ret void
+
+bb1084: ; preds = %cond_next416
+ br i1 false, label %cond_true1092, label %cond_next1102
+
+cond_true1092: ; preds = %bb1084
+ %tmp1094 = getelementptr %struct.rtx_def* %tmp1085, i32 0, i32 3 ; <%struct.u*> [#uses=1]
+ %tmp10981099 = bitcast %struct.u* %tmp1094 to %struct.rtx_def** ; <%struct.rtx_def**> [#uses=2]
+ %tmp1101 = load %struct.rtx_def** %tmp10981099 ; <%struct.rtx_def*> [#uses=1]
+ store %struct.rtx_def* %tmp1101, %struct.rtx_def** %ad_addr
+ br label %cond_next1102
+
+cond_next1102: ; preds = %cond_true1092, %bb1084
+ %loc_addr.0 = phi %struct.rtx_def** [ %tmp10981099, %cond_true1092 ], [ %loc, %bb1084 ] ; <%struct.rtx_def**> [#uses=0]
+ ret void
+}
diff --git a/test/CodeGen/ARM/2007-03-30-RegScavengerAssert.ll b/test/CodeGen/ARM/2007-03-30-RegScavengerAssert.ll
new file mode 100644
index 0000000..ea27676
--- /dev/null
+++ b/test/CodeGen/ARM/2007-03-30-RegScavengerAssert.ll
@@ -0,0 +1,101 @@
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi
+; PR1279
+
+ %struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32 }
+ %struct.arm_stack_offsets = type { i32, i32, i32, i32, i32 }
+ %struct.eh_status = type opaque
+ %struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack*, i32, %struct.location_t, i32, i8*, %struct.rtx_def** }
+ %struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
+ %struct.function = type { %struct.eh_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, %struct.initial_value_struct*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i8, i32, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.varray_head_tag*, %struct.temp_slot*, i32, %struct.var_refs_queue*, i32, i32, %struct.rtvec_def*, %struct.tree_node*, i32, i32, i32, %struct.machine_function*, i32, i32, i8, i8, %struct.language_function*, %struct.rtx_def*, i32, i32, i32, i32, %struct.location_t, %struct.varray_head_tag*, %struct.tree_node*, i8, i8, i8 }
+ %struct.initial_value_struct = type opaque
+ %struct.lang_decl = type opaque
+ %struct.language_function = type opaque
+ %struct.location_t = type { i8*, i32 }
+ %struct.machine_function = type { %struct.rtx_def*, i32, i32, i32, %struct.arm_stack_offsets, i32, i32, i32, [14 x %struct.rtx_def*] }
+ %struct.rtvec_def = type { i32, [1 x %struct.rtx_def*] }
+ %struct.rtx_def = type { i16, i8, i8, %struct.u }
+ %struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack* }
+ %struct.temp_slot = type opaque
+ %struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %union.tree_ann_d*, i8, i8, i8, i8, i8 }
+ %struct.tree_decl = type { %struct.tree_common, %struct.location_t, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, i8, i8, i32, %struct.tree_decl_u1, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, i32, %struct.tree_decl_u2, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }
+ %struct.tree_decl_u1 = type { i64 }
+ %struct.tree_decl_u2 = type { %struct.function* }
+ %struct.tree_node = type { %struct.tree_decl }
+ %struct.u = type { [1 x i64] }
+ %struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
+ %struct.varasm_status = type opaque
+ %struct.varray_head_tag = type { i32, i32, i32, i8*, %struct.u }
+ %union.tree_ann_d = type opaque
+@str469 = external global [42 x i8] ; <[42 x i8]*> [#uses=0]
+@__FUNCTION__.24265 = external global [19 x i8] ; <[19 x i8]*> [#uses=0]
+
+declare void @fancy_abort()
+
+define fastcc void @fold_builtin_bitop() {
+entry:
+ br i1 false, label %cond_true105, label %UnifiedReturnBlock
+
+cond_true105: ; preds = %entry
+ br i1 false, label %cond_true134, label %UnifiedReturnBlock
+
+cond_true134: ; preds = %cond_true105
+ switch i32 0, label %bb479 [
+ i32 378, label %bb313
+ i32 380, label %bb313
+ i32 381, label %bb313
+ i32 383, label %bb366
+ i32 385, label %bb366
+ i32 386, label %bb366
+ i32 403, label %bb250
+ i32 405, label %bb250
+ i32 406, label %bb250
+ i32 434, label %bb464
+ i32 436, label %bb464
+ i32 437, label %bb464
+ i32 438, label %bb441
+ i32 440, label %bb441
+ i32 441, label %bb441
+ ]
+
+bb250: ; preds = %cond_true134, %cond_true134, %cond_true134
+ ret void
+
+bb313: ; preds = %cond_true134, %cond_true134, %cond_true134
+ ret void
+
+bb366: ; preds = %cond_true134, %cond_true134, %cond_true134
+ ret void
+
+bb441: ; preds = %cond_true134, %cond_true134, %cond_true134
+ ret void
+
+bb457: ; preds = %bb464, %bb457
+ %tmp459 = add i64 0, 1 ; <i64> [#uses=1]
+ br i1 false, label %bb474.preheader, label %bb457
+
+bb464: ; preds = %cond_true134, %cond_true134, %cond_true134
+ br i1 false, label %bb474.preheader, label %bb457
+
+bb474.preheader: ; preds = %bb464, %bb457
+ %result.5.ph = phi i64 [ 0, %bb464 ], [ %tmp459, %bb457 ] ; <i64> [#uses=1]
+ br label %bb474
+
+bb467: ; preds = %bb474
+ %indvar.next586 = add i64 %indvar585, 1 ; <i64> [#uses=1]
+ br label %bb474
+
+bb474: ; preds = %bb467, %bb474.preheader
+ %indvar585 = phi i64 [ 0, %bb474.preheader ], [ %indvar.next586, %bb467 ] ; <i64> [#uses=2]
+ br i1 false, label %bb476, label %bb467
+
+bb476: ; preds = %bb474
+ %result.5 = add i64 %indvar585, %result.5.ph ; <i64> [#uses=0]
+ ret void
+
+bb479: ; preds = %cond_true134
+ tail call void @fancy_abort( )
+ unreachable
+
+UnifiedReturnBlock: ; preds = %cond_true105, %entry
+ ret void
+}
diff --git a/test/CodeGen/ARM/2007-04-02-RegScavengerAssert.ll b/test/CodeGen/ARM/2007-04-02-RegScavengerAssert.ll
new file mode 100644
index 0000000..f24def3
--- /dev/null
+++ b/test/CodeGen/ARM/2007-04-02-RegScavengerAssert.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin
+
+ %struct.H_TBL = type { [17 x i8], [256 x i8], i32 }
+ %struct.Q_TBL = type { [64 x i16], i32 }
+ %struct.anon = type { [80 x i8] }
+ %struct.X_c_coef_ccler = type { void (%struct.X_Y*, i32)*, i32 (%struct.X_Y*, i8***)* }
+ %struct.X_c_main_ccler = type { void (%struct.X_Y*, i32)*, void (%struct.X_Y*, i8**, i32*, i32)* }
+ %struct.X_c_prep_ccler = type { void (%struct.X_Y*, i32)*, void (%struct.X_Y*, i8**, i32*, i32, i8***, i32*, i32)* }
+ %struct.X_color_converter = type { void (%struct.X_Y*)*, void (%struct.X_Y*, i8**, i8***, i32, i32)* }
+ %struct.X_common_struct = type { %struct.X_error_mgr*, %struct.X_memory_mgr*, %struct.X_progress_mgr*, i8*, i32, i32 }
+ %struct.X_comp_master = type { void (%struct.X_Y*)*, void (%struct.X_Y*)*, void (%struct.X_Y*)*, i32, i32 }
+ %struct.X_component_info = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.Q_TBL*, i8* }
+ %struct.X_Y = type { %struct.X_error_mgr*, %struct.X_memory_mgr*, %struct.X_progress_mgr*, i8*, i32, i32, %struct.X_destination_mgr*, i32, i32, i32, i32, double, i32, i32, i32, %struct.X_component_info*, [4 x %struct.Q_TBL*], [4 x %struct.H_TBL*], [4 x %struct.H_TBL*], [16 x i8], [16 x i8], [16 x i8], i32, %struct.X_scan_info*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8, i8, i8, i16, i16, i32, i32, i32, i32, i32, i32, i32, [4 x %struct.X_component_info*], i32, i32, i32, [10 x i32], i32, i32, i32, i32, %struct.X_comp_master*, %struct.X_c_main_ccler*, %struct.X_c_prep_ccler*, %struct.X_c_coef_ccler*, %struct.X_marker_writer*, %struct.X_color_converter*, %struct.X_downssr*, %struct.X_forward_D*, %struct.X_entropy_en*, %struct.X_scan_info*, i32 }
+ %struct.X_destination_mgr = type { i8*, i32, void (%struct.X_Y*)*, i32 (%struct.X_Y*)*, void (%struct.X_Y*)* }
+ %struct.X_downssr = type { void (%struct.X_Y*)*, void (%struct.X_Y*, i8***, i32, i8***, i32)*, i32 }
+ %struct.X_entropy_en = type { void (%struct.X_Y*, i32)*, i32 (%struct.X_Y*, [64 x i16]**)*, void (%struct.X_Y*)* }
+ %struct.X_error_mgr = type { void (%struct.X_common_struct*)*, void (%struct.X_common_struct*, i32)*, void (%struct.X_common_struct*)*, void (%struct.X_common_struct*, i8*)*, void (%struct.X_common_struct*)*, i32, %struct.anon, i32, i32, i8**, i32, i8**, i32, i32 }
+ %struct.X_forward_D = type { void (%struct.X_Y*)*, void (%struct.X_Y*, %struct.X_component_info*, i8**, [64 x i16]*, i32, i32, i32)* }
+ %struct.X_marker_writer = type { void (%struct.X_Y*)*, void (%struct.X_Y*)*, void (%struct.X_Y*)*, void (%struct.X_Y*)*, void (%struct.X_Y*)*, void (%struct.X_Y*, i32, i32)*, void (%struct.X_Y*, i32)* }
+ %struct.X_memory_mgr = type { i8* (%struct.X_common_struct*, i32, i32)*, i8* (%struct.X_common_struct*, i32, i32)*, i8** (%struct.X_common_struct*, i32, i32, i32)*, [64 x i16]** (%struct.X_common_struct*, i32, i32, i32)*, %struct.jvirt_sAY_cc* (%struct.X_common_struct*, i32, i32, i32, i32, i32)*, %struct.jvirt_bAY_cc* (%struct.X_common_struct*, i32, i32, i32, i32, i32)*, void (%struct.X_common_struct*)*, i8** (%struct.X_common_struct*, %struct.jvirt_sAY_cc*, i32, i32, i32)*, [64 x i16]** (%struct.X_common_struct*, %struct.jvirt_bAY_cc*, i32, i32, i32)*, void (%struct.X_common_struct*, i32)*, void (%struct.X_common_struct*)*, i32, i32 }
+ %struct.X_progress_mgr = type { void (%struct.X_common_struct*)*, i32, i32, i32, i32 }
+ %struct.X_scan_info = type { i32, [4 x i32], i32, i32, i32, i32 }
+ %struct.jvirt_bAY_cc = type opaque
+ %struct.jvirt_sAY_cc = type opaque
+
+define void @test(%struct.X_Y* %cinfo) {
+entry:
+ br i1 false, label %bb.preheader, label %return
+
+bb.preheader: ; preds = %entry
+ %tbl.014.us = load i32* null ; <i32> [#uses=1]
+ br i1 false, label %cond_next.us, label %bb
+
+cond_next51.us: ; preds = %cond_next.us, %cond_true33.us.cond_true46.us_crit_edge
+ %htblptr.019.1.us = phi %struct.H_TBL** [ %tmp37.us, %cond_true33.us.cond_true46.us_crit_edge ], [ %tmp37.us, %cond_next.us ] ; <%struct.H_TBL**> [#uses=0]
+ ret void
+
+cond_true33.us.cond_true46.us_crit_edge: ; preds = %cond_next.us
+ call void @_C_X_a_HT( )
+ br label %cond_next51.us
+
+cond_next.us: ; preds = %bb.preheader
+ %tmp37.us = getelementptr %struct.X_Y* %cinfo, i32 0, i32 17, i32 %tbl.014.us ; <%struct.H_TBL**> [#uses=3]
+ %tmp4524.us = load %struct.H_TBL** %tmp37.us ; <%struct.H_TBL*> [#uses=1]
+ icmp eq %struct.H_TBL* %tmp4524.us, null ; <i1>:0 [#uses=1]
+ br i1 %0, label %cond_true33.us.cond_true46.us_crit_edge, label %cond_next51.us
+
+bb: ; preds = %bb.preheader
+ ret void
+
+return: ; preds = %entry
+ ret void
+}
+
+declare void @_C_X_a_HT()
diff --git a/test/CodeGen/ARM/2007-04-03-PEIBug.ll b/test/CodeGen/ARM/2007-04-03-PEIBug.ll
new file mode 100644
index 0000000..b543c57
--- /dev/null
+++ b/test/CodeGen/ARM/2007-04-03-PEIBug.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=arm | not grep {add.*#0}
+
+define i32 @foo() {
+entry:
+ %A = alloca [1123 x i32], align 16 ; <[1123 x i32]*> [#uses=1]
+ %B = alloca [3123 x i32], align 16 ; <[3123 x i32]*> [#uses=1]
+ %C = alloca [12312 x i32], align 16 ; <[12312 x i32]*> [#uses=1]
+ %tmp = call i32 (...)* @bar( [3123 x i32]* %B, [1123 x i32]* %A, [12312 x i32]* %C ) ; <i32> [#uses=0]
+ ret i32 undef
+}
+
+declare i32 @bar(...)
diff --git a/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll b/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll
new file mode 100644
index 0000000..e001cde
--- /dev/null
+++ b/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll
@@ -0,0 +1,99 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic | \
+; RUN: not grep LPC9
+
+ %struct.B = type { i32 }
+ %struct.anon = type { void (%struct.B*)*, i32 }
+@str = internal constant [7 x i8] c"i, %d\0A\00" ; <[7 x i8]*> [#uses=1]
+@str1 = internal constant [7 x i8] c"j, %d\0A\00" ; <[7 x i8]*> [#uses=1]
+
+define internal void @_ZN1B1iEv(%struct.B* %this) {
+entry:
+ %tmp1 = getelementptr %struct.B* %this, i32 0, i32 0 ; <i32*> [#uses=1]
+ %tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
+ %tmp4 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([7 x i8]* @str, i32 0, i32 0), i32 %tmp2 ) ; <i32> [#uses=0]
+ ret void
+}
+
+declare i32 @printf(i8*, ...)
+
+define internal void @_ZN1B1jEv(%struct.B* %this) {
+entry:
+ %tmp1 = getelementptr %struct.B* %this, i32 0, i32 0 ; <i32*> [#uses=1]
+ %tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
+ %tmp4 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([7 x i8]* @str1, i32 0, i32 0), i32 %tmp2 ) ; <i32> [#uses=0]
+ ret void
+}
+
+define i32 @main() {
+entry:
+ %b.i29 = alloca %struct.B, align 4 ; <%struct.B*> [#uses=3]
+ %b.i1 = alloca %struct.B, align 4 ; <%struct.B*> [#uses=3]
+ %b.i = alloca %struct.B, align 4 ; <%struct.B*> [#uses=3]
+ %tmp2.i = getelementptr %struct.B* %b.i, i32 0, i32 0 ; <i32*> [#uses=1]
+ store i32 4, i32* %tmp2.i
+ br i1 icmp eq (i64 and (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) to i64), i64 4294967296), i64 0), label %_Z3fooiM1BFvvE.exit, label %cond_true.i
+
+cond_true.i: ; preds = %entry
+ %b2.i = bitcast %struct.B* %b.i to i8* ; <i8*> [#uses=1]
+ %ctg23.i = getelementptr i8* %b2.i, i32 ashr (i32 trunc (i64 lshr (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) to i64), i64 32) to i32), i32 1) ; <i8*> [#uses=1]
+ %tmp121314.i = bitcast i8* %ctg23.i to i32 (...)*** ; <i32 (...)***> [#uses=1]
+ %tmp15.i = load i32 (...)*** %tmp121314.i ; <i32 (...)**> [#uses=1]
+ %tmp151.i = bitcast i32 (...)** %tmp15.i to i8* ; <i8*> [#uses=1]
+ %ctg2.i = getelementptr i8* %tmp151.i, i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) ; <i8*> [#uses=1]
+ %tmp2021.i = bitcast i8* %ctg2.i to i32 (...)** ; <i32 (...)**> [#uses=1]
+ %tmp22.i = load i32 (...)** %tmp2021.i ; <i32 (...)*> [#uses=1]
+ %tmp2223.i = bitcast i32 (...)* %tmp22.i to void (%struct.B*)* ; <void (%struct.B*)*> [#uses=1]
+ br label %_Z3fooiM1BFvvE.exit
+
+_Z3fooiM1BFvvE.exit: ; preds = %cond_true.i, %entry
+ %iftmp.2.0.i = phi void (%struct.B*)* [ %tmp2223.i, %cond_true.i ], [ inttoptr (i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) to void (%struct.B*)*), %entry ] ; <void (%struct.B*)*> [#uses=1]
+ %b4.i = bitcast %struct.B* %b.i to i8* ; <i8*> [#uses=1]
+ %ctg25.i = getelementptr i8* %b4.i, i32 ashr (i32 trunc (i64 lshr (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) to i64), i64 32) to i32), i32 1) ; <i8*> [#uses=1]
+ %tmp3031.i = bitcast i8* %ctg25.i to %struct.B* ; <%struct.B*> [#uses=1]
+ call void %iftmp.2.0.i( %struct.B* %tmp3031.i )
+ %tmp2.i30 = getelementptr %struct.B* %b.i29, i32 0, i32 0 ; <i32*> [#uses=1]
+ store i32 6, i32* %tmp2.i30
+ br i1 icmp eq (i64 and (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1jEv to i32) to i64), i64 4294967296), i64 0), label %_Z3fooiM1BFvvE.exit56, label %cond_true.i46
+
+cond_true.i46: ; preds = %_Z3fooiM1BFvvE.exit
+ %b2.i35 = bitcast %struct.B* %b.i29 to i8* ; <i8*> [#uses=1]
+ %ctg23.i36 = getelementptr i8* %b2.i35, i32 ashr (i32 trunc (i64 lshr (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1jEv to i32) to i64), i64 32) to i32), i32 1) ; <i8*> [#uses=1]
+ %tmp121314.i37 = bitcast i8* %ctg23.i36 to i32 (...)*** ; <i32 (...)***> [#uses=1]
+ %tmp15.i38 = load i32 (...)*** %tmp121314.i37 ; <i32 (...)**> [#uses=1]
+ %tmp151.i41 = bitcast i32 (...)** %tmp15.i38 to i8* ; <i8*> [#uses=1]
+ %ctg2.i42 = getelementptr i8* %tmp151.i41, i32 ptrtoint (void (%struct.B*)* @_ZN1B1jEv to i32) ; <i8*> [#uses=1]
+ %tmp2021.i43 = bitcast i8* %ctg2.i42 to i32 (...)** ; <i32 (...)**> [#uses=1]
+ %tmp22.i44 = load i32 (...)** %tmp2021.i43 ; <i32 (...)*> [#uses=1]
+ %tmp2223.i45 = bitcast i32 (...)* %tmp22.i44 to void (%struct.B*)* ; <void (%struct.B*)*> [#uses=1]
+ br label %_Z3fooiM1BFvvE.exit56
+
+_Z3fooiM1BFvvE.exit56: ; preds = %cond_true.i46, %_Z3fooiM1BFvvE.exit
+ %iftmp.2.0.i49 = phi void (%struct.B*)* [ %tmp2223.i45, %cond_true.i46 ], [ inttoptr (i32 ptrtoint (void (%struct.B*)* @_ZN1B1jEv to i32) to void (%struct.B*)*), %_Z3fooiM1BFvvE.exit ] ; <void (%struct.B*)*> [#uses=1]
+ %b4.i53 = bitcast %struct.B* %b.i29 to i8* ; <i8*> [#uses=1]
+ %ctg25.i54 = getelementptr i8* %b4.i53, i32 ashr (i32 trunc (i64 lshr (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1jEv to i32) to i64), i64 32) to i32), i32 1) ; <i8*> [#uses=1]
+ %tmp3031.i55 = bitcast i8* %ctg25.i54 to %struct.B* ; <%struct.B*> [#uses=1]
+ call void %iftmp.2.0.i49( %struct.B* %tmp3031.i55 )
+ %tmp2.i2 = getelementptr %struct.B* %b.i1, i32 0, i32 0 ; <i32*> [#uses=1]
+ store i32 -1, i32* %tmp2.i2
+ br i1 icmp eq (i64 and (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) to i64), i64 4294967296), i64 0), label %_Z3fooiM1BFvvE.exit28, label %cond_true.i18
+
+cond_true.i18: ; preds = %_Z3fooiM1BFvvE.exit56
+ %b2.i7 = bitcast %struct.B* %b.i1 to i8* ; <i8*> [#uses=1]
+ %ctg23.i8 = getelementptr i8* %b2.i7, i32 ashr (i32 trunc (i64 lshr (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) to i64), i64 32) to i32), i32 1) ; <i8*> [#uses=1]
+ %tmp121314.i9 = bitcast i8* %ctg23.i8 to i32 (...)*** ; <i32 (...)***> [#uses=1]
+ %tmp15.i10 = load i32 (...)*** %tmp121314.i9 ; <i32 (...)**> [#uses=1]
+ %tmp151.i13 = bitcast i32 (...)** %tmp15.i10 to i8* ; <i8*> [#uses=1]
+ %ctg2.i14 = getelementptr i8* %tmp151.i13, i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) ; <i8*> [#uses=1]
+ %tmp2021.i15 = bitcast i8* %ctg2.i14 to i32 (...)** ; <i32 (...)**> [#uses=1]
+ %tmp22.i16 = load i32 (...)** %tmp2021.i15 ; <i32 (...)*> [#uses=1]
+ %tmp2223.i17 = bitcast i32 (...)* %tmp22.i16 to void (%struct.B*)* ; <void (%struct.B*)*> [#uses=1]
+ br label %_Z3fooiM1BFvvE.exit28
+
+_Z3fooiM1BFvvE.exit28: ; preds = %cond_true.i18, %_Z3fooiM1BFvvE.exit56
+ %iftmp.2.0.i21 = phi void (%struct.B*)* [ %tmp2223.i17, %cond_true.i18 ], [ inttoptr (i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) to void (%struct.B*)*), %_Z3fooiM1BFvvE.exit56 ] ; <void (%struct.B*)*> [#uses=1]
+ %b4.i25 = bitcast %struct.B* %b.i1 to i8* ; <i8*> [#uses=1]
+ %ctg25.i26 = getelementptr i8* %b4.i25, i32 ashr (i32 trunc (i64 lshr (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) to i64), i64 32) to i32), i32 1) ; <i8*> [#uses=1]
+ %tmp3031.i27 = bitcast i8* %ctg25.i26 to %struct.B* ; <%struct.B*> [#uses=1]
+ call void %iftmp.2.0.i21( %struct.B* %tmp3031.i27 )
+ ret i32 0
+}
diff --git a/test/CodeGen/ARM/2007-04-30-CombinerCrash.ll b/test/CodeGen/ARM/2007-04-30-CombinerCrash.ll
new file mode 100644
index 0000000..a89e937
--- /dev/null
+++ b/test/CodeGen/ARM/2007-04-30-CombinerCrash.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "arm-apple-darwin8"
+ %struct.CHESS_POSITION = type { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i32, i32, i8, i8, [64 x i8], i8, i8, i8, i8, i8 }
+@search = external global %struct.CHESS_POSITION ; <%struct.CHESS_POSITION*> [#uses=3]
+@file_mask = external global [8 x i64] ; <[8 x i64]*> [#uses=1]
+@rank_mask.1.b = external global i1 ; <i1*> [#uses=1]
+
+define fastcc void @EvaluateDevelopment() {
+entry:
+ %tmp7 = load i64* getelementptr (%struct.CHESS_POSITION* @search, i32 0, i32 7) ; <i64> [#uses=1]
+ %tmp50 = load i64* getelementptr (%struct.CHESS_POSITION* @search, i32 0, i32 0) ; <i64> [#uses=1]
+ %tmp52 = load i64* getelementptr (%struct.CHESS_POSITION* @search, i32 0, i32 1) ; <i64> [#uses=1]
+ %tmp53 = or i64 %tmp52, %tmp50 ; <i64> [#uses=1]
+ %tmp57.b = load i1* @rank_mask.1.b ; <i1> [#uses=1]
+ %tmp57 = select i1 %tmp57.b, i64 71776119061217280, i64 0 ; <i64> [#uses=1]
+ %tmp58 = and i64 %tmp57, %tmp7 ; <i64> [#uses=1]
+ %tmp59 = lshr i64 %tmp58, 8 ; <i64> [#uses=1]
+ %tmp63 = load i64* getelementptr ([8 x i64]* @file_mask, i32 0, i32 4) ; <i64> [#uses=1]
+ %tmp64 = or i64 %tmp63, 0 ; <i64> [#uses=1]
+ %tmp65 = and i64 %tmp59, %tmp53 ; <i64> [#uses=1]
+ %tmp66 = and i64 %tmp65, %tmp64 ; <i64> [#uses=1]
+ %tmp67 = icmp eq i64 %tmp66, 0 ; <i1> [#uses=1]
+ br i1 %tmp67, label %cond_next145, label %cond_true70
+
+cond_true70: ; preds = %entry
+ ret void
+
+cond_next145: ; preds = %entry
+ ret void
+}
diff --git a/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll b/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll
new file mode 100644
index 0000000..c73b6793
--- /dev/null
+++ b/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll
@@ -0,0 +1,113 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+ %struct.Connection = type { i32, [10 x i8], i32 }
+ %struct.IntChunk = type { %struct.cppobjtype, i32, i32*, i32 }
+ %struct.Point = type { i8*, %struct.cppobjtype, i16 (%struct.Point*) signext *, i16 (%struct.Point*) signext *, double (%struct.Point*)*, double (%struct.Point*)* }
+ %struct.RefPoint = type { %struct.Point*, %struct.cppobjtype }
+ %struct.ShortArray = type { %struct.cppobjtype, i32, i16* }
+ %struct.TestObj = type { i8*, %struct.cppobjtype, i8, [32 x i8], i8*, i8**, i16, i16, i32, i32, i32, i32, float, double, %struct.cppobjtype, i32, i16*, i16**, i8**, i32, %struct.XyPoint, [3 x %struct.Connection], %struct.Point*, %struct.XyPoint*, i32, i8*, i8*, i16*, %struct.ShortArray, %struct.IntChunk, %struct.cppobjtype, %struct.cppobjtype, %struct.RefPoint, i32, %struct.cppobjtype, %struct.cppobjtype }
+ %struct.XyPoint = type { i16, i16 }
+ %struct.cppobjtype = type { i32, i16, i16 }
+@Msg = external global [256 x i8] ; <[256 x i8]*> [#uses=1]
[email protected] = external constant [48 x i8] ; <[48 x i8]*> [#uses=1]
[email protected] = external global i1 ; <i1*> [#uses=1]
+
+define fastcc void @Draw7(i32 %Option, i32* %Status) {
+entry:
+ %tmp115.b = load i1* @FirstTime.4637.b ; <i1> [#uses=1]
+ br i1 %tmp115.b, label %cond_next239, label %cond_next.i
+
+cond_next.i: ; preds = %entry
+ ret void
+
+cond_next239: ; preds = %entry
+ %tmp242 = icmp eq i32 0, 0 ; <i1> [#uses=1]
+ br i1 %tmp242, label %cond_next253, label %cond_next296
+
+cond_next253: ; preds = %cond_next239
+ switch i32 %Option, label %bb1326 [
+ i32 3, label %cond_true258
+ i32 4, label %cond_true268
+ i32 2, label %cond_true279
+ i32 1, label %cond_next315
+ ]
+
+cond_true258: ; preds = %cond_next253
+ ret void
+
+cond_true268: ; preds = %cond_next253
+ ret void
+
+cond_true279: ; preds = %cond_next253
+ ret void
+
+cond_next296: ; preds = %cond_next239
+ ret void
+
+cond_next315: ; preds = %cond_next253
+ %tmp1140 = icmp eq i32 0, 0 ; <i1> [#uses=1]
+ br i1 %tmp1140, label %cond_true1143, label %bb1326
+
+cond_true1143: ; preds = %cond_next315
+ %tmp1148 = icmp eq i32 0, 0 ; <i1> [#uses=4]
+ br i1 %tmp1148, label %cond_next1153, label %cond_true1151
+
+cond_true1151: ; preds = %cond_true1143
+ ret void
+
+cond_next1153: ; preds = %cond_true1143
+ %tmp8.i.i185 = icmp eq i32 0, 0 ; <i1> [#uses=1]
+ br i1 %tmp8.i.i185, label %TestObj_new1.exit, label %cond_true.i.i187
+
+cond_true.i.i187: ; preds = %cond_next1153
+ ret void
+
+TestObj_new1.exit: ; preds = %cond_next1153
+ %tmp1167 = icmp eq i16 0, 0 ; <i1> [#uses=1]
+ %tmp1178 = icmp eq i32 0, 0 ; <i1> [#uses=1]
+ %bothcond = and i1 %tmp1167, %tmp1178 ; <i1> [#uses=1]
+ br i1 %bothcond, label %bb1199, label %bb1181
+
+bb1181: ; preds = %TestObj_new1.exit
+ ret void
+
+bb1199: ; preds = %TestObj_new1.exit
+ br i1 %tmp1148, label %cond_next1235, label %Object_Dump.exit302
+
+Object_Dump.exit302: ; preds = %bb1199
+ ret void
+
+cond_next1235: ; preds = %bb1199
+ %bothcond10485 = or i1 false, %tmp1148 ; <i1> [#uses=1]
+ br i1 %bothcond10485, label %cond_next1267, label %cond_true1248
+
+cond_true1248: ; preds = %cond_next1235
+ ret void
+
+cond_next1267: ; preds = %cond_next1235
+ br i1 %tmp1148, label %cond_next1275, label %cond_true1272
+
+cond_true1272: ; preds = %cond_next1267
+ %tmp1273 = load %struct.TestObj** null ; <%struct.TestObj*> [#uses=2]
+ %tmp2930.i = ptrtoint %struct.TestObj* %tmp1273 to i32 ; <i32> [#uses=1]
+ %tmp42.i348 = sub i32 0, %tmp2930.i ; <i32> [#uses=1]
+ %tmp45.i = getelementptr %struct.TestObj* %tmp1273, i32 0, i32 0 ; <i8**> [#uses=2]
+ %tmp48.i = load i8** %tmp45.i ; <i8*> [#uses=1]
+ %tmp50.i350 = call i32 (i8*, i8*, ...)* @sprintf( i8* getelementptr ([256 x i8]* @Msg, i32 0, i32 0), i8* getelementptr ([48 x i8]* @.str53615, i32 0, i32 0), i8* null, i8** %tmp45.i, i8* %tmp48.i ) ; <i32> [#uses=0]
+ br i1 false, label %cond_true.i632.i, label %Ut_TraceMsg.exit648.i
+
+cond_true.i632.i: ; preds = %cond_true1272
+ ret void
+
+Ut_TraceMsg.exit648.i: ; preds = %cond_true1272
+ %tmp57.i = getelementptr i8* null, i32 %tmp42.i348 ; <i8*> [#uses=0]
+ ret void
+
+cond_next1275: ; preds = %cond_next1267
+ ret void
+
+bb1326: ; preds = %cond_next315, %cond_next253
+ ret void
+}
+
+declare i32 @sprintf(i8*, i8*, ...)
diff --git a/test/CodeGen/ARM/2007-05-07-jumptoentry.ll b/test/CodeGen/ARM/2007-05-07-jumptoentry.ll
new file mode 100644
index 0000000..26864f1
--- /dev/null
+++ b/test/CodeGen/ARM/2007-05-07-jumptoentry.ll
@@ -0,0 +1,58 @@
+; RUN: llc < %s | not grep 1_0
+; This used to create an extra branch to 'entry', LBB1_0.
+
+; ModuleID = 'bug.bc'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "arm-apple-darwin8"
+ %struct.HexxagonMove = type { i8, i8, i32 }
+ %struct.HexxagonMoveList = type { i32, %struct.HexxagonMove* }
+
+define void @_ZN16HexxagonMoveList8sortListEv(%struct.HexxagonMoveList* %this) {
+entry:
+ %tmp51 = getelementptr %struct.HexxagonMoveList* %this, i32 0, i32 0 ; <i32*> [#uses=1]
+ %tmp2 = getelementptr %struct.HexxagonMoveList* %this, i32 0, i32 1 ; <%struct.HexxagonMove**> [#uses=2]
+ br label %bb49
+
+bb1: ; preds = %bb49
+ %tmp3 = load %struct.HexxagonMove** %tmp2 ; <%struct.HexxagonMove*> [#uses=5]
+ %tmp6 = getelementptr %struct.HexxagonMove* %tmp3, i32 %i.1, i32 2 ; <i32*> [#uses=1]
+ %tmp7 = load i32* %tmp6 ; <i32> [#uses=2]
+ %tmp12 = add i32 %i.1, 1 ; <i32> [#uses=7]
+ %tmp14 = getelementptr %struct.HexxagonMove* %tmp3, i32 %tmp12, i32 2 ; <i32*> [#uses=1]
+ %tmp15 = load i32* %tmp14 ; <i32> [#uses=1]
+ %tmp16 = icmp slt i32 %tmp7, %tmp15 ; <i1> [#uses=1]
+ br i1 %tmp16, label %cond_true, label %bb49
+
+cond_true: ; preds = %bb1
+ %tmp23.0 = getelementptr %struct.HexxagonMove* %tmp3, i32 %i.1, i32 0 ; <i8*> [#uses=2]
+ %tmp67 = load i8* %tmp23.0 ; <i8> [#uses=1]
+ %tmp23.1 = getelementptr %struct.HexxagonMove* %tmp3, i32 %i.1, i32 1 ; <i8*> [#uses=1]
+ %tmp68 = load i8* %tmp23.1 ; <i8> [#uses=1]
+ %tmp3638 = getelementptr %struct.HexxagonMove* %tmp3, i32 %tmp12, i32 0 ; <i8*> [#uses=1]
+ tail call void @llvm.memcpy.i32( i8* %tmp23.0, i8* %tmp3638, i32 8, i32 4 )
+ %tmp41 = load %struct.HexxagonMove** %tmp2 ; <%struct.HexxagonMove*> [#uses=3]
+ %tmp44.0 = getelementptr %struct.HexxagonMove* %tmp41, i32 %tmp12, i32 0 ; <i8*> [#uses=1]
+ store i8 %tmp67, i8* %tmp44.0
+ %tmp44.1 = getelementptr %struct.HexxagonMove* %tmp41, i32 %tmp12, i32 1 ; <i8*> [#uses=1]
+ store i8 %tmp68, i8* %tmp44.1
+ %tmp44.2 = getelementptr %struct.HexxagonMove* %tmp41, i32 %tmp12, i32 2 ; <i32*> [#uses=1]
+ store i32 %tmp7, i32* %tmp44.2
+ br label %bb49
+
+bb49: ; preds = %bb59, %cond_true, %bb1, %entry
+ %i.1 = phi i32 [ 0, %entry ], [ %tmp12, %bb1 ], [ %tmp12, %cond_true ], [ 0, %bb59 ] ; <i32> [#uses=5]
+ %move.2 = phi i32 [ 0, %entry ], [ 1, %cond_true ], [ %move.2, %bb1 ], [ 0, %bb59 ] ; <i32> [#uses=2]
+ %tmp52 = load i32* %tmp51 ; <i32> [#uses=1]
+ %tmp53 = add i32 %tmp52, -1 ; <i32> [#uses=1]
+ %tmp55 = icmp sgt i32 %tmp53, %i.1 ; <i1> [#uses=1]
+ br i1 %tmp55, label %bb1, label %bb59
+
+bb59: ; preds = %bb49
+ %tmp61 = icmp eq i32 %move.2, 0 ; <i1> [#uses=1]
+ br i1 %tmp61, label %return, label %bb49
+
+return: ; preds = %bb59
+ ret void
+}
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
diff --git a/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll b/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll
new file mode 100644
index 0000000..f2a8ee1
--- /dev/null
+++ b/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll
@@ -0,0 +1,68 @@
+; RUN: llc < %s -march=arm -enable-tail-merge | grep bl.*baz | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge | grep bl.*quux | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge -enable-eh | grep bl.*baz | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge -enable-eh | grep bl.*quux | count 1
+; Check that calls to baz and quux are tail-merged.
+; PR1628
+
+; ModuleID = 'tail.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+
+define i32 @f(i32 %i, i32 %q) {
+entry:
+ %i_addr = alloca i32 ; <i32*> [#uses=2]
+ %q_addr = alloca i32 ; <i32*> [#uses=2]
+ %retval = alloca i32, align 4 ; <i32*> [#uses=1]
+ "alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
+ store i32 %i, i32* %i_addr
+ store i32 %q, i32* %q_addr
+ %tmp = load i32* %i_addr ; <i32> [#uses=1]
+ %tmp1 = icmp ne i32 %tmp, 0 ; <i1> [#uses=1]
+ %tmp12 = zext i1 %tmp1 to i8 ; <i8> [#uses=1]
+ %toBool = icmp ne i8 %tmp12, 0 ; <i1> [#uses=1]
+ br i1 %toBool, label %cond_true, label %cond_false
+
+cond_true: ; preds = %entry
+ %tmp3 = call i32 (...)* @bar( ) ; <i32> [#uses=0]
+ %tmp4 = call i32 (...)* @baz( i32 5, i32 6 ) ; <i32> [#uses=0]
+ br label %cond_next
+
+cond_false: ; preds = %entry
+ %tmp5 = call i32 (...)* @foo( ) ; <i32> [#uses=0]
+ %tmp6 = call i32 (...)* @baz( i32 5, i32 6 ) ; <i32> [#uses=0]
+ br label %cond_next
+
+cond_next: ; preds = %cond_false, %cond_true
+ %tmp7 = load i32* %q_addr ; <i32> [#uses=1]
+ %tmp8 = icmp ne i32 %tmp7, 0 ; <i1> [#uses=1]
+ %tmp89 = zext i1 %tmp8 to i8 ; <i8> [#uses=1]
+ %toBool10 = icmp ne i8 %tmp89, 0 ; <i1> [#uses=1]
+ br i1 %toBool10, label %cond_true11, label %cond_false15
+
+cond_true11: ; preds = %cond_next
+ %tmp13 = call i32 (...)* @foo( ) ; <i32> [#uses=0]
+ %tmp14 = call i32 (...)* @quux( i32 3, i32 4 ) ; <i32> [#uses=0]
+ br label %cond_next18
+
+cond_false15: ; preds = %cond_next
+ %tmp16 = call i32 (...)* @bar( ) ; <i32> [#uses=0]
+ %tmp17 = call i32 (...)* @quux( i32 3, i32 4 ) ; <i32> [#uses=0]
+ br label %cond_next18
+
+cond_next18: ; preds = %cond_false15, %cond_true11
+ %tmp19 = call i32 (...)* @bar( ) ; <i32> [#uses=0]
+ br label %return
+
+return: ; preds = %cond_next18
+ %retval20 = load i32* %retval ; <i32> [#uses=1]
+ ret i32 %retval20
+}
+
+declare i32 @bar(...)
+
+declare i32 @baz(...)
+
+declare i32 @foo(...)
+
+declare i32 @quux(...)
diff --git a/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll b/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll
new file mode 100644
index 0000000..2758505
--- /dev/null
+++ b/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll
@@ -0,0 +1,69 @@
+; RUN: llc < %s -march=arm -enable-tail-merge | grep bl.*baz | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge | grep bl.*quux | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge -enable-eh | grep bl.*baz | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge -enable-eh | grep bl.*quux | count 1
+; Check that calls to baz and quux are tail-merged.
+; PR1628
+
+; ModuleID = 'tail.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+
+define i32 @f(i32 %i, i32 %q) {
+entry:
+ %i_addr = alloca i32 ; <i32*> [#uses=2]
+ %q_addr = alloca i32 ; <i32*> [#uses=2]
+ %retval = alloca i32, align 4 ; <i32*> [#uses=1]
+ "alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
+ store i32 %i, i32* %i_addr
+ store i32 %q, i32* %q_addr
+ %tmp = load i32* %i_addr ; <i32> [#uses=1]
+ %tmp1 = icmp ne i32 %tmp, 0 ; <i1> [#uses=1]
+ %tmp12 = zext i1 %tmp1 to i8 ; <i8> [#uses=1]
+ %toBool = icmp ne i8 %tmp12, 0 ; <i1> [#uses=1]
+ br i1 %toBool, label %cond_true, label %cond_false
+
+cond_true: ; preds = %entry
+ %tmp3 = call i32 (...)* @bar( ) ; <i32> [#uses=0]
+ %tmp4 = call i32 (...)* @baz( i32 5, i32 6 ) ; <i32> [#uses=0]
+ %tmp7 = load i32* %q_addr ; <i32> [#uses=1]
+ %tmp8 = icmp ne i32 %tmp7, 0 ; <i1> [#uses=1]
+ %tmp89 = zext i1 %tmp8 to i8 ; <i8> [#uses=1]
+ %toBool10 = icmp ne i8 %tmp89, 0 ; <i1> [#uses=1]
+ br i1 %toBool10, label %cond_true11, label %cond_false15
+
+cond_false: ; preds = %entry
+ %tmp5 = call i32 (...)* @foo( ) ; <i32> [#uses=0]
+ %tmp6 = call i32 (...)* @baz( i32 5, i32 6 ) ; <i32> [#uses=0]
+ %tmp27 = load i32* %q_addr ; <i32> [#uses=1]
+ %tmp28 = icmp ne i32 %tmp27, 0 ; <i1> [#uses=1]
+ %tmp289 = zext i1 %tmp28 to i8 ; <i8> [#uses=1]
+ %toBool210 = icmp ne i8 %tmp289, 0 ; <i1> [#uses=1]
+ br i1 %toBool210, label %cond_true11, label %cond_false15
+
+cond_true11: ; preds = %cond_next
+ %tmp13 = call i32 (...)* @foo( ) ; <i32> [#uses=0]
+ %tmp14 = call i32 (...)* @quux( i32 3, i32 4 ) ; <i32> [#uses=0]
+ br label %cond_next18
+
+cond_false15: ; preds = %cond_next
+ %tmp16 = call i32 (...)* @bar( ) ; <i32> [#uses=0]
+ %tmp17 = call i32 (...)* @quux( i32 3, i32 4 ) ; <i32> [#uses=0]
+ br label %cond_next18
+
+cond_next18: ; preds = %cond_false15, %cond_true11
+ %tmp19 = call i32 (...)* @bar( ) ; <i32> [#uses=0]
+ br label %return
+
+return: ; preds = %cond_next18
+ %retval20 = load i32* %retval ; <i32> [#uses=1]
+ ret i32 %retval20
+}
+
+declare i32 @bar(...)
+
+declare i32 @baz(...)
+
+declare i32 @foo(...)
+
+declare i32 @quux(...)
diff --git a/test/CodeGen/ARM/2007-05-14-InlineAsmCstCrash.ll b/test/CodeGen/ARM/2007-05-14-InlineAsmCstCrash.ll
new file mode 100644
index 0000000..b3b07693
--- /dev/null
+++ b/test/CodeGen/ARM/2007-05-14-InlineAsmCstCrash.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm -mattr=+v6
+
+define i32 @test3() {
+ tail call void asm sideeffect "/* number: ${0:c} */", "i"( i32 1 )
+ ret i32 11
+}
diff --git a/test/CodeGen/ARM/2007-05-14-RegScavengerAssert.ll b/test/CodeGen/ARM/2007-05-14-RegScavengerAssert.ll
new file mode 100644
index 0000000..7b15ded
--- /dev/null
+++ b/test/CodeGen/ARM/2007-05-14-RegScavengerAssert.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi
+; PR1406
+
+ %struct.AVClass = type { i8*, i8* (i8*)*, %struct.AVOption* }
+ %struct.AVCodec = type { i8*, i32, i32, i32, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32, i8*)*, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32*, i8*, i32)*, i32, %struct.AVCodec*, void (%struct.AVCodecContext*)*, %struct.AVRational*, i32* }
+ %struct.AVCodecContext = type { %struct.AVClass*, i32, i32, i32, i32, i32, i8*, i32, %struct.AVRational, i32, i32, i32, i32, i32, void (%struct.AVCodecContext*, %struct.AVFrame*, i32*, i32, i32, i32)*, i32, i32, i32, i32, i32, i32, i32, float, float, i32, i32, i32, i32, float, i32, i32, i32, %struct.AVCodec*, i8*, i32, i32, void (%struct.AVCodecContext*, i8*, i32, i32)*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, [32 x i8], i32, i32, i32, i32, i32, i32, i32, float, i32, i32 (%struct.AVCodecContext*, %struct.AVFrame*)*, void (%struct.AVCodecContext*, %struct.AVFrame*)*, i32, i32, i32, i32, i8*, i8*, float, float, i32, %struct.RcOverride*, i32, i8*, i32, i32, i32, float, float, float, float, i32, float, float, float, float, float, i32, i32, i32, i32*, i32, i32, i32, i32, %struct.AVRational, %struct.AVFrame*, i32, i32, [4 x i64], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32*)*, i32, i32, i32, i32, i32, i32, i8*, i32, i32, i32, i32, i32, i32, i16*, i16*, i32, i32, i32, i32, %struct.AVPaletteControl*, i32, i32 (%struct.AVCodecContext*, %struct.AVFrame*)*, i32, i32, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32 (%struct.AVCodecContext*, i8*)*, i8**, i32*, i32)*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64 }
+ %struct.AVFrame = type { [4 x i8*], [4 x i32], [4 x i8*], i32, i32, i64, i32, i32, i32, i32, i32, i8*, i32, i8*, [2 x [2 x i16]*], i32*, i8, i8*, [4 x i64], i32, i32, i32, i32, i32, %struct.AVPanScan*, i32, i32, i16*, [2 x i8*] }
+ %struct.AVOption = type opaque
+ %struct.AVPaletteControl = type { i32, [256 x i32] }
+ %struct.AVPanScan = type { i32, i32, i32, [3 x [2 x i16]] }
+ %struct.AVRational = type { i32, i32 }
+ %struct.RcOverride = type { i32, i32, i32, float }
+
+define i32 @decode_init(%struct.AVCodecContext* %avctx) {
+entry:
+ br i1 false, label %bb, label %cond_next789
+
+bb: ; preds = %bb, %entry
+ br i1 false, label %bb59, label %bb
+
+bb59: ; preds = %bb
+ %tmp68 = sdiv i64 0, 0 ; <i64> [#uses=1]
+ %tmp6869 = trunc i64 %tmp68 to i32 ; <i32> [#uses=2]
+ %tmp81 = call i32 asm "smull $0, $1, $2, $3 \0A\09mov $0, $0, lsr $4\0A\09add $1, $0, $1, lsl $5\0A\09", "=&r,=*&r,r,r,i,i"( i32* null, i32 %tmp6869, i32 13316085, i32 23, i32 9 ) ; <i32> [#uses=0]
+ %tmp90 = call i32 asm "smull $0, $1, $2, $3 \0A\09mov $0, $0, lsr $4\0A\09add $1, $0, $1, lsl $5\0A\09", "=&r,=*&r,r,r,i,i"( i32* null, i32 %tmp6869, i32 10568984, i32 23, i32 9 ) ; <i32> [#uses=0]
+ unreachable
+
+cond_next789: ; preds = %entry
+ ret i32 0
+}
diff --git a/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll b/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
new file mode 100644
index 0000000..061bf5e
--- /dev/null
+++ b/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
@@ -0,0 +1,73 @@
+; RUN: llc < %s -march=arm | grep bl.*baz | count 1
+; RUN: llc < %s -march=arm | grep bl.*quux | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge=0 | grep bl.*baz | count 2
+; RUN: llc < %s -march=arm -enable-tail-merge=0 | grep bl.*quux | count 2
+; RUN: llc < %s -march=arm -enable-eh | grep bl.*baz | count 1
+; RUN: llc < %s -march=arm -enable-eh | grep bl.*quux | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge=0 -enable-eh | grep bl.*baz | count 2
+; RUN: llc < %s -march=arm -enable-tail-merge=0 -enable-eh | grep bl.*quux | count 2
+; Check that tail merging is the default on ARM, and that -enable-tail-merge=0 works.
+; PR1628
+
+; ModuleID = 'tail.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+
+define i32 @f(i32 %i, i32 %q) {
+entry:
+ %i_addr = alloca i32 ; <i32*> [#uses=2]
+ %q_addr = alloca i32 ; <i32*> [#uses=2]
+ %retval = alloca i32, align 4 ; <i32*> [#uses=1]
+ "alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
+ store i32 %i, i32* %i_addr
+ store i32 %q, i32* %q_addr
+ %tmp = load i32* %i_addr ; <i32> [#uses=1]
+ %tmp1 = icmp ne i32 %tmp, 0 ; <i1> [#uses=1]
+ %tmp12 = zext i1 %tmp1 to i8 ; <i8> [#uses=1]
+ %toBool = icmp ne i8 %tmp12, 0 ; <i1> [#uses=1]
+ br i1 %toBool, label %cond_true, label %cond_false
+
+cond_true: ; preds = %entry
+ %tmp3 = call i32 (...)* @bar( ) ; <i32> [#uses=0]
+ %tmp4 = call i32 (...)* @baz( i32 5, i32 6 ) ; <i32> [#uses=0]
+ %tmp7 = load i32* %q_addr ; <i32> [#uses=1]
+ %tmp8 = icmp ne i32 %tmp7, 0 ; <i1> [#uses=1]
+ %tmp89 = zext i1 %tmp8 to i8 ; <i8> [#uses=1]
+ %toBool10 = icmp ne i8 %tmp89, 0 ; <i1> [#uses=1]
+ br i1 %toBool10, label %cond_true11, label %cond_false15
+
+cond_false: ; preds = %entry
+ %tmp5 = call i32 (...)* @foo( ) ; <i32> [#uses=0]
+ %tmp6 = call i32 (...)* @baz( i32 5, i32 6 ) ; <i32> [#uses=0]
+ %tmp27 = load i32* %q_addr ; <i32> [#uses=1]
+ %tmp28 = icmp ne i32 %tmp27, 0 ; <i1> [#uses=1]
+ %tmp289 = zext i1 %tmp28 to i8 ; <i8> [#uses=1]
+ %toBool210 = icmp ne i8 %tmp289, 0 ; <i1> [#uses=1]
+ br i1 %toBool210, label %cond_true11, label %cond_false15
+
+cond_true11: ; preds = %cond_next
+ %tmp13 = call i32 (...)* @foo( ) ; <i32> [#uses=0]
+ %tmp14 = call i32 (...)* @quux( i32 3, i32 4 ) ; <i32> [#uses=0]
+ br label %cond_next18
+
+cond_false15: ; preds = %cond_next
+ %tmp16 = call i32 (...)* @bar( ) ; <i32> [#uses=0]
+ %tmp17 = call i32 (...)* @quux( i32 3, i32 4 ) ; <i32> [#uses=0]
+ br label %cond_next18
+
+cond_next18: ; preds = %cond_false15, %cond_true11
+ %tmp19 = call i32 (...)* @bar( ) ; <i32> [#uses=0]
+ br label %return
+
+return: ; preds = %cond_next18
+ %retval20 = load i32* %retval ; <i32> [#uses=1]
+ ret i32 %retval20
+}
+
+declare i32 @bar(...)
+
+declare i32 @baz(...)
+
+declare i32 @foo(...)
+
+declare i32 @quux(...)
diff --git a/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll b/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll
new file mode 100644
index 0000000..d2eb85d
--- /dev/null
+++ b/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=arm | not grep {str.*\\!}
+
+ %struct.shape_edge_t = type { %struct.shape_edge_t*, %struct.shape_edge_t*, i32, i32, i32, i32 }
+ %struct.shape_path_t = type { %struct.shape_edge_t*, %struct.shape_edge_t*, i32, i32, i32, i32, i32, i32 }
+ %struct.shape_pool_t = type { i8* (%struct.shape_pool_t*, i8*, i32)*, i8* (%struct.shape_pool_t*, i32)*, void (%struct.shape_pool_t*, i8*)* }
+
+define %struct.shape_path_t* @shape_path_alloc(%struct.shape_pool_t* %pool, i32* %shape) {
+entry:
+ br i1 false, label %cond_false, label %bb45
+
+bb45: ; preds = %entry
+ ret %struct.shape_path_t* null
+
+cond_false: ; preds = %entry
+ br i1 false, label %bb140, label %bb174
+
+bb140: ; preds = %bb140, %cond_false
+ %indvar = phi i32 [ 0, %cond_false ], [ %indvar.next, %bb140 ] ; <i32> [#uses=2]
+ %edge.230.0.rec = shl i32 %indvar, 1 ; <i32> [#uses=3]
+ %edge.230.0 = getelementptr %struct.shape_edge_t* null, i32 %edge.230.0.rec ; <%struct.shape_edge_t*> [#uses=1]
+ %edge.230.0.sum6970 = or i32 %edge.230.0.rec, 1 ; <i32> [#uses=2]
+ %tmp154 = getelementptr %struct.shape_edge_t* null, i32 %edge.230.0.sum6970 ; <%struct.shape_edge_t*> [#uses=1]
+ %tmp11.i5 = getelementptr %struct.shape_edge_t* null, i32 %edge.230.0.sum6970, i32 0 ; <%struct.shape_edge_t**> [#uses=1]
+ store %struct.shape_edge_t* %edge.230.0, %struct.shape_edge_t** %tmp11.i5
+ store %struct.shape_edge_t* %tmp154, %struct.shape_edge_t** null
+ %tmp16254.0.rec = add i32 %edge.230.0.rec, 2 ; <i32> [#uses=1]
+ %xp.350.sum = add i32 0, %tmp16254.0.rec ; <i32> [#uses=1]
+ %tmp168 = icmp slt i32 %xp.350.sum, 0 ; <i1> [#uses=1]
+ %indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1]
+ br i1 %tmp168, label %bb140, label %bb174
+
+bb174: ; preds = %bb140, %cond_false
+ ret %struct.shape_path_t* null
+}
diff --git a/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll b/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll
new file mode 100644
index 0000000..030486a
--- /dev/null
+++ b/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll
@@ -0,0 +1,237 @@
+; RUN: llc < %s
+; PR1424
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "arm-linux-gnueabi"
+ %struct.AVClass = type { i8*, i8* (i8*)*, %struct.AVOption* }
+ %struct.AVCodec = type { i8*, i32, i32, i32, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32, i8*)*, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32*, i8*, i32)*, i32, %struct.AVCodec*, void (%struct.AVCodecContext*)*, %struct.AVRational*, i32* }
+ %struct.AVCodecContext = type { %struct.AVClass*, i32, i32, i32, i32, i32, i8*, i32, %struct.AVRational, i32, i32, i32, i32, i32, void (%struct.AVCodecContext*, %struct.AVFrame*, i32*, i32, i32, i32)*, i32, i32, i32, i32, i32, i32, i32, float, float, i32, i32, i32, i32, float, i32, i32, i32, %struct.AVCodec*, i8*, i32, i32, void (%struct.AVCodecContext*, i8*, i32, i32)*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, [32 x i8], i32, i32, i32, i32, i32, i32, i32, float, i32, i32 (%struct.AVCodecContext*, %struct.AVFrame*)*, void (%struct.AVCodecContext*, %struct.AVFrame*)*, i32, i32, i32, i32, i8*, i8*, float, float, i32, %struct.RcOverride*, i32, i8*, i32, i32, i32, float, float, float, float, i32, float, float, float, float, float, i32, i32, i32, i32*, i32, i32, i32, i32, %struct.AVRational, %struct.AVFrame*, i32, i32, [4 x i64], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32*)*, i32, i32, i32, i32, i32, i32, i8*, i32, i32, i32, i32, i32, i32, i16*, i16*, i32, i32, i32, i32, %struct.AVPaletteControl*, i32, i32 (%struct.AVCodecContext*, %struct.AVFrame*)*, i32, i32, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32 (%struct.AVCodecContext*, i8*)*, i8**, i32*, i32)*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64 }
+ %struct.AVEvalExpr = type opaque
+ %struct.AVFrame = type { [4 x i8*], [4 x i32], [4 x i8*], i32, i32, i64, i32, i32, i32, i32, i32, i8*, i32, i8*, [2 x [2 x i16]*], i32*, i8, i8*, [4 x i64], i32, i32, i32, i32, i32, %struct.AVPanScan*, i32, i32, i16*, [2 x i8*] }
+ %struct.AVOption = type opaque
+ %struct.AVPaletteControl = type { i32, [256 x i32] }
+ %struct.AVPanScan = type { i32, i32, i32, [3 x [2 x i16]] }
+ %struct.AVRational = type { i32, i32 }
+ %struct.BlockNode = type { i16, i16, i8, [3 x i8], i8, i8 }
+ %struct.DSPContext = type { void (i16*, i8*, i32)*, void (i16*, i8*, i8*, i32)*, void (i16*, i8*, i32)*, void (i16*, i8*, i32)*, void (i16*, i8*, i32)*, void (i8*, i16*, i32)*, void (i8*, i16*, i32)*, i32 (i16*)*, void (i8*, i8*, i32, i32, i32, i32, i32)*, void (i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)*, void (i16*)*, i32 (i8*, i32)*, i32 (i8*, i32)*, [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], i32 (i8*, i16*, i32)*, [4 x [4 x void (i8*, i8*, i32, i32)*]], [4 x [4 x void (i8*, i8*, i32, i32)*]], [4 x [4 x void (i8*, i8*, i32, i32)*]], [4 x [4 x void (i8*, i8*, i32, i32)*]], [2 x void (i8*, i8*, i8*, i32, i32)*], [11 x void (i8*, i8*, i32, i32, i32)*], [11 x void (i8*, i8*, i32, i32, i32)*], [2 x [16 x void (i8*, i8*, i32)*]], [2 x [16 x void (i8*, i8*, i32)*]], [2 x [16 x void (i8*, i8*, i32)*]], [2 x [16 x void (i8*, i8*, i32)*]], [8 x void (i8*, i8*, i32)*], [3 x void (i8*, i8*, i32, i32, i32, i32)*], [3 x void (i8*, i8*, i32, i32, i32, i32)*], [3 x void (i8*, i8*, i32, i32, i32, i32)*], [4 x [16 x void (i8*, i8*, i32)*]], [4 x [16 x void (i8*, i8*, i32)*]], [4 x [16 x void (i8*, i8*, i32)*]], [4 x [16 x void (i8*, i8*, i32)*]], [10 x void (i8*, i32, i32, i32, i32)*], [10 x void (i8*, i8*, i32, i32, i32, i32, i32)*], [2 x [16 x void (i8*, i8*, i32)*]], [2 x [16 x void (i8*, i8*, i32)*]], void (i8*, i32, i32, i32, i32, i32, i32)*, void (i8*, i32, i32, i32, i32, i32, i32)*, void (i8*, i32, i32, i32, i32, i32, i32)*, void (i8*, i32, i32, i32, i32, i32, i32)*, void (i8*, i16*, i32)*, [2 x [4 x i32 (i8*, i8*, i8*, i32, i32)*]], void (i8*, i8*, i32)*, void (i8*, i8*, i8*, i32)*, void (i8*, i8*, i8*, i32, i32*, i32*)*, void (i32*, i32*, i32)*, void (i8*, i32, i32, i32, i8*)*, void (i8*, i32, i32, i32, i8*)*, void (i8*, i32, i32, i32, i8*)*, void (i8*, i32, i32, i32, i8*)*, void (i8*, i32, i32, i32)*, void (i8*, i32, i32, i32)*, void ([4 x [4 x i16]]*, i8*, [40 x i8]*, [40 x [2 x i16]]*, i32, i32, i32, i32, i32)*, void (i8*, i32, i32)*, void (i8*, i32, i32)*, void (i8*, i32)*, void (float*, float*, i32)*, void (float*, float*, i32)*, void (float*, float*, float*, i32)*, void (float*, float*, float*, float*, i32, i32, i32)*, void (i16*, float*, i32)*, void (i16*)*, void (i16*)*, void (i16*)*, void (i8*, i32, i16*)*, void (i8*, i32, i16*)*, [64 x i8], i32, i32 (i16*, i16*, i16*, i32)*, void (i16*, i16*, i32)*, void (i8*, i16*, i32)*, void (i8*, i16*, i32)*, void (i8*, i16*, i32)*, void (i8*, i16*, i32)*, void ([4 x i16]*)*, void (i32*, i32*, i32*, i32*, i32*, i32*, i32)*, void (i32*, i32)*, void (i8*, i32, i8**, i32, i32, i32, i32, i32, %struct.slice_buffer*, i32, i8*)*, void (i8*, i32, i32)*, [4 x void (i8*, i32, i8*, i32, i32, i32)*], void (i16*)*, void (i16*, i32)*, void (i16*, i32)*, void (i16*, i32)*, void (i8*, i32)*, void (i8*, i32)*, [16 x void (i8*, i8*, i32, i32)*] }
+ %struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] }
+ %struct.GetBitContext = type { i8*, i8*, i32*, i32, i32, i32, i32 }
+ %struct.MJpegContext = type opaque
+ %struct.MotionEstContext = type { %struct.AVCodecContext*, i32, [4 x [2 x i32]], [4 x [2 x i32]], i8*, i8*, [2 x i8*], i8*, i32, i32*, i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [4 x [4 x i8*]], [4 x [4 x i8*]], i32, i32, i32, i32, i32, [4 x void (i8*, i8*, i32, i32)*]*, [4 x void (i8*, i8*, i32, i32)*]*, [16 x void (i8*, i8*, i32)*]*, [16 x void (i8*, i8*, i32)*]*, [4097 x i8]*, i8*, i32 (%struct.MpegEncContext*, i32*, i32*, i32, i32, i32, i32, i32)* }
+ %struct.MpegEncContext = type { %struct.AVCodecContext*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.PutBitContext, i32, i32, i32, i32, i32, i32, i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.Picture*, %struct.Picture**, %struct.Picture**, i32, i32, [8 x %struct.MpegEncContext*], %struct.Picture, %struct.Picture, %struct.Picture, %struct.Picture, %struct.Picture*, %struct.Picture*, %struct.Picture*, [3 x i8*], [3 x i32], i16*, [3 x i16*], [20 x i16], i32, i32, i8*, i8*, i8*, i8*, i8*, [16 x i16]*, [3 x [16 x i16]*], i32, i8*, i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i32, i32, i32, i32, i32*, i32, i32, i32, i32, i32, i32, i32, [5 x i32], i32, i32, i32, i32, %struct.DSPContext, i32, i32, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x [2 x [2 x i16]*]], [2 x [2 x [2 x [2 x i16]*]]], [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x [2 x [2 x i16]*]], [2 x [2 x [2 x [2 x i16]*]]], [2 x i8*], [2 x [2 x i8*]], i32, i32, i32, [2 x [4 x [2 x i32]]], [2 x [2 x i32]], [2 x [2 x [2 x i32]]], i8*, [2 x [64 x i16]], %struct.MotionEstContext, i32, i32, i32, i32, i32, i32, i16*, [6 x i32], [6 x i32], [3 x i8*], i32*, [64 x i16], [64 x i16], [64 x i16], [64 x i16], i32, i32, i32, i32, i32, i8*, i8*, i8*, i8*, i8*, i8*, [8 x i32], [64 x i32]*, [64 x i32]*, [2 x [64 x i16]]*, [2 x [64 x i16]]*, [12 x i32], %struct.ScanTable, %struct.ScanTable, %struct.ScanTable, %struct.ScanTable, [64 x i32]*, [2 x i32], [64 x i16]*, i8*, i64, i64, i32, i32, %struct.RateControlContext, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i32, i32, %struct.GetBitContext, i32, i32, i32, %struct.ParseContext, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i16, i16, i16, i16, i32, i32, i32, i32, i32, i32, i32, i32, i32, [2 x [2 x i32]], [2 x [2 x i32]], [2 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.PutBitContext, %struct.PutBitContext, i32, i32, i32, i32, i32, i32, i8*, i32, i32, i32, i32, i32, [3 x i32], %struct.MJpegContext*, [3 x i32], [3 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [2 x [65 x [65 x [2 x i32]]]]*, i32, i32, %struct.GetBitContext, i32, i32, i32, i8*, i32, [2 x [2 x i32]], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [2 x i32], i32, i32, i32, i32, i8*, i32, [12 x i16*], [64 x i16]*, [8 x [64 x i16]]*, i32 (%struct.MpegEncContext*, [64 x i16]*)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, i32 (%struct.MpegEncContext*, i16*, i32, i32, i32*)*, i32 (%struct.MpegEncContext*, i16*, i32, i32, i32*)*, void (%struct.MpegEncContext*, i16*)* }
+ %struct.ParseContext = type { i8*, i32, i32, i32, i32, i32, i32, i32 }
+ %struct.Picture = type { [4 x i8*], [4 x i32], [4 x i8*], i32, i32, i64, i32, i32, i32, i32, i32, i8*, i32, i8*, [2 x [2 x i16]*], i32*, i8, i8*, [4 x i64], i32, i32, i32, i32, i32, %struct.AVPanScan*, i32, i32, i16*, [2 x i8*], [3 x i8*], [2 x [2 x i16]*], i32*, [2 x i32], i32, i32, i32, i32, [2 x [16 x i32]], [2 x i32], i32, i32, i16*, i16*, i8*, i32*, i32 }
+ %struct.Plane = type { i32, i32, [8 x [4 x %struct.SubBand]] }
+ %struct.Predictor = type { double, double, double }
+ %struct.PutBitContext = type { i32, i32, i8*, i8*, i8* }
+ %struct.RangeCoder = type { i32, i32, i32, i32, [256 x i8], [256 x i8], i8*, i8*, i8* }
+ %struct.RateControlContext = type { %struct.FILE*, i32, %struct.RateControlEntry*, double, [5 x %struct.Predictor], double, double, double, double, double, [5 x double], i32, i32, [5 x i64], [5 x i64], [5 x i64], [5 x i64], [5 x i32], i32, i8*, float, i32, %struct.AVEvalExpr* }
+ %struct.RateControlEntry = type { i32, float, i32, i32, i32, i32, i32, i64, i32, float, i32, i32, i32, i32, i32, i32 }
+ %struct.RcOverride = type { i32, i32, i32, float }
+ %struct.ScanTable = type { i8*, [64 x i8], [64 x i8] }
+ %struct.SnowContext = type { %struct.AVCodecContext*, %struct.RangeCoder, %struct.DSPContext, %struct.AVFrame, %struct.AVFrame, %struct.AVFrame, [8 x %struct.AVFrame], %struct.AVFrame, [32 x i8], [4224 x i8], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [8 x [2 x i16]*], [8 x i32*], i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [4 x %struct.Plane], %struct.BlockNode*, [1024 x i32], i32, %struct.slice_buffer, %struct.MpegEncContext }
+ %struct.SubBand = type { i32, i32, i32, i32, i32, i32*, i32, i32, i32, %struct.x_and_coeff*, %struct.SubBand*, [519 x [32 x i8]] }
+ %struct._IO_marker = type { %struct._IO_marker*, %struct.FILE*, i32 }
+ %struct.slice_buffer = type { i32**, i32**, i32, i32, i32, i32, i32* }
+ %struct.x_and_coeff = type { i16, i16 }
+
+define fastcc void @iterative_me(%struct.SnowContext* %s) {
+entry:
+ %state = alloca [4224 x i8], align 8 ; <[4224 x i8]*> [#uses=0]
+ %best_rd4233 = alloca i32, align 4 ; <i32*> [#uses=0]
+ %tmp21 = getelementptr %struct.SnowContext* %s, i32 0, i32 36 ; <i32*> [#uses=2]
+ br label %bb4198
+
+bb79: ; preds = %bb4189.preheader
+ br i1 false, label %cond_next239, label %cond_true
+
+cond_true: ; preds = %bb79
+ ret void
+
+cond_next239: ; preds = %bb79
+ %tmp286 = alloca i8, i32 0 ; <i8*> [#uses=0]
+ ret void
+
+bb4198: ; preds = %bb4189.preheader, %entry
+ br i1 false, label %bb4189.preheader, label %bb4204
+
+bb4189.preheader: ; preds = %bb4198
+ br i1 false, label %bb79, label %bb4198
+
+bb4204: ; preds = %bb4198
+ br i1 false, label %bb4221, label %cond_next4213
+
+cond_next4213: ; preds = %bb4204
+ ret void
+
+bb4221: ; preds = %bb4204
+ br i1 false, label %bb5242.preheader, label %UnifiedReturnBlock
+
+bb5242.preheader: ; preds = %bb4221
+ br label %bb5242
+
+bb4231: ; preds = %bb5233
+ %tmp4254.sum = add i32 0, 1 ; <i32> [#uses=2]
+ br i1 false, label %bb4559, label %cond_next4622
+
+bb4559: ; preds = %bb4231
+ ret void
+
+cond_next4622: ; preds = %bb4231
+ %tmp4637 = load i16* null ; <i16> [#uses=1]
+ %tmp46374638 = sext i16 %tmp4637 to i32 ; <i32> [#uses=1]
+ %tmp4642 = load i16* null ; <i16> [#uses=1]
+ %tmp46424643 = sext i16 %tmp4642 to i32 ; <i32> [#uses=1]
+ %tmp4648 = load i16* null ; <i16> [#uses=1]
+ %tmp46484649 = sext i16 %tmp4648 to i32 ; <i32> [#uses=1]
+ %tmp4653 = getelementptr %struct.BlockNode* null, i32 %tmp4254.sum, i32 0 ; <i16*> [#uses=1]
+ %tmp4654 = load i16* %tmp4653 ; <i16> [#uses=1]
+ %tmp46544655 = sext i16 %tmp4654 to i32 ; <i32> [#uses=1]
+ %tmp4644 = add i32 %tmp46374638, 2 ; <i32> [#uses=1]
+ %tmp4650 = add i32 %tmp4644, %tmp46424643 ; <i32> [#uses=1]
+ %tmp4656 = add i32 %tmp4650, %tmp46484649 ; <i32> [#uses=1]
+ %tmp4657 = add i32 %tmp4656, %tmp46544655 ; <i32> [#uses=2]
+ %tmp4658 = ashr i32 %tmp4657, 2 ; <i32> [#uses=1]
+ %tmp4662 = load i16* null ; <i16> [#uses=1]
+ %tmp46624663 = sext i16 %tmp4662 to i32 ; <i32> [#uses=1]
+ %tmp4672 = getelementptr %struct.BlockNode* null, i32 0, i32 1 ; <i16*> [#uses=1]
+ %tmp4673 = load i16* %tmp4672 ; <i16> [#uses=1]
+ %tmp46734674 = sext i16 %tmp4673 to i32 ; <i32> [#uses=1]
+ %tmp4678 = getelementptr %struct.BlockNode* null, i32 %tmp4254.sum, i32 1 ; <i16*> [#uses=1]
+ %tmp4679 = load i16* %tmp4678 ; <i16> [#uses=1]
+ %tmp46794680 = sext i16 %tmp4679 to i32 ; <i32> [#uses=1]
+ %tmp4669 = add i32 %tmp46624663, 2 ; <i32> [#uses=1]
+ %tmp4675 = add i32 %tmp4669, 0 ; <i32> [#uses=1]
+ %tmp4681 = add i32 %tmp4675, %tmp46734674 ; <i32> [#uses=1]
+ %tmp4682 = add i32 %tmp4681, %tmp46794680 ; <i32> [#uses=2]
+ %tmp4683 = ashr i32 %tmp4682, 2 ; <i32> [#uses=1]
+ %tmp4703 = load i32* %tmp21 ; <i32> [#uses=1]
+ %tmp4707 = shl i32 %tmp4703, 0 ; <i32> [#uses=4]
+ %tmp4710 = load %struct.BlockNode** null ; <%struct.BlockNode*> [#uses=6]
+ %tmp4713 = mul i32 %tmp4707, %mb_y.4 ; <i32> [#uses=1]
+ %tmp4715 = add i32 %tmp4713, %mb_x.7 ; <i32> [#uses=7]
+ store i8 0, i8* null
+ store i8 0, i8* null
+ %tmp47594761 = bitcast %struct.BlockNode* null to i8* ; <i8*> [#uses=2]
+ call void @llvm.memcpy.i32( i8* null, i8* %tmp47594761, i32 10, i32 0 )
+ %tmp4716.sum5775 = add i32 %tmp4715, 1 ; <i32> [#uses=1]
+ %tmp4764 = getelementptr %struct.BlockNode* %tmp4710, i32 %tmp4716.sum5775 ; <%struct.BlockNode*> [#uses=1]
+ %tmp47644766 = bitcast %struct.BlockNode* %tmp4764 to i8* ; <i8*> [#uses=1]
+ %tmp4716.sum5774 = add i32 %tmp4715, %tmp4707 ; <i32> [#uses=0]
+ %tmp47704772 = bitcast %struct.BlockNode* null to i8* ; <i8*> [#uses=1]
+ %tmp4774 = add i32 %tmp4707, 1 ; <i32> [#uses=1]
+ %tmp4716.sum5773 = add i32 %tmp4774, %tmp4715 ; <i32> [#uses=1]
+ %tmp4777 = getelementptr %struct.BlockNode* %tmp4710, i32 %tmp4716.sum5773 ; <%struct.BlockNode*> [#uses=1]
+ %tmp47774779 = bitcast %struct.BlockNode* %tmp4777 to i8* ; <i8*> [#uses=1]
+ %tmp4781 = icmp slt i32 %mb_x.7, 0 ; <i1> [#uses=1]
+ %tmp4788 = or i1 %tmp4781, %tmp4784 ; <i1> [#uses=2]
+ br i1 %tmp4788, label %cond_true4791, label %cond_next4794
+
+cond_true4791: ; preds = %cond_next4622
+ unreachable
+
+cond_next4794: ; preds = %cond_next4622
+ %tmp4797 = icmp slt i32 %mb_x.7, %tmp4707 ; <i1> [#uses=1]
+ br i1 %tmp4797, label %cond_next4803, label %cond_true4800
+
+cond_true4800: ; preds = %cond_next4794
+ unreachable
+
+cond_next4803: ; preds = %cond_next4794
+ %tmp4825 = ashr i32 %tmp4657, 12 ; <i32> [#uses=1]
+ shl i32 %tmp4682, 4 ; <i32>:0 [#uses=1]
+ %tmp4828 = and i32 %0, -64 ; <i32> [#uses=1]
+ %tmp4831 = getelementptr %struct.BlockNode* %tmp4710, i32 %tmp4715, i32 2 ; <i8*> [#uses=0]
+ %tmp4826 = add i32 %tmp4828, %tmp4825 ; <i32> [#uses=1]
+ %tmp4829 = add i32 %tmp4826, 0 ; <i32> [#uses=1]
+ %tmp4835 = add i32 %tmp4829, 0 ; <i32> [#uses=1]
+ store i32 %tmp4835, i32* null
+ %tmp48534854 = trunc i32 %tmp4658 to i16 ; <i16> [#uses=1]
+ %tmp4856 = getelementptr %struct.BlockNode* %tmp4710, i32 %tmp4715, i32 0 ; <i16*> [#uses=1]
+ store i16 %tmp48534854, i16* %tmp4856
+ %tmp48574858 = trunc i32 %tmp4683 to i16 ; <i16> [#uses=1]
+ %tmp4860 = getelementptr %struct.BlockNode* %tmp4710, i32 %tmp4715, i32 1 ; <i16*> [#uses=1]
+ store i16 %tmp48574858, i16* %tmp4860
+ %tmp4866 = getelementptr %struct.BlockNode* %tmp4710, i32 %tmp4715, i32 4 ; <i8*> [#uses=0]
+ br i1 false, label %bb4933, label %cond_false4906
+
+cond_false4906: ; preds = %cond_next4803
+ call void @llvm.memcpy.i32( i8* %tmp47594761, i8* null, i32 10, i32 0 )
+ call void @llvm.memcpy.i32( i8* %tmp47644766, i8* null, i32 10, i32 0 )
+ call void @llvm.memcpy.i32( i8* %tmp47704772, i8* null, i32 10, i32 0 )
+ call void @llvm.memcpy.i32( i8* %tmp47774779, i8* null, i32 10, i32 0 )
+ br label %bb5215
+
+bb4933: ; preds = %bb5215, %cond_next4803
+ br i1 false, label %cond_true4944, label %bb5215
+
+cond_true4944: ; preds = %bb4933
+ %tmp4982 = load i32* %tmp21 ; <i32> [#uses=1]
+ %tmp4986 = shl i32 %tmp4982, 0 ; <i32> [#uses=2]
+ %tmp4992 = mul i32 %tmp4986, %mb_y.4 ; <i32> [#uses=1]
+ %tmp4994 = add i32 %tmp4992, %mb_x.7 ; <i32> [#uses=5]
+ %tmp4995.sum5765 = add i32 %tmp4994, 1 ; <i32> [#uses=1]
+ %tmp5043 = getelementptr %struct.BlockNode* null, i32 %tmp4995.sum5765 ; <%struct.BlockNode*> [#uses=1]
+ %tmp50435045 = bitcast %struct.BlockNode* %tmp5043 to i8* ; <i8*> [#uses=2]
+ call void @llvm.memcpy.i32( i8* null, i8* %tmp50435045, i32 10, i32 0 )
+ %tmp4995.sum5764 = add i32 %tmp4994, %tmp4986 ; <i32> [#uses=1]
+ %tmp5049 = getelementptr %struct.BlockNode* null, i32 %tmp4995.sum5764 ; <%struct.BlockNode*> [#uses=1]
+ %tmp50495051 = bitcast %struct.BlockNode* %tmp5049 to i8* ; <i8*> [#uses=2]
+ call void @llvm.memcpy.i32( i8* null, i8* %tmp50495051, i32 10, i32 0 )
+ %tmp4995.sum5763 = add i32 0, %tmp4994 ; <i32> [#uses=1]
+ %tmp5056 = getelementptr %struct.BlockNode* null, i32 %tmp4995.sum5763 ; <%struct.BlockNode*> [#uses=1]
+ %tmp50565058 = bitcast %struct.BlockNode* %tmp5056 to i8* ; <i8*> [#uses=1]
+ br i1 %tmp4788, label %cond_true5070, label %cond_next5073
+
+cond_true5070: ; preds = %cond_true4944
+ unreachable
+
+cond_next5073: ; preds = %cond_true4944
+ %tmp5139 = getelementptr %struct.BlockNode* null, i32 %tmp4994, i32 1 ; <i16*> [#uses=0]
+ %tmp5145 = getelementptr %struct.BlockNode* null, i32 %tmp4994, i32 4 ; <i8*> [#uses=0]
+ call void @llvm.memcpy.i32( i8* %tmp50435045, i8* null, i32 10, i32 0 )
+ call void @llvm.memcpy.i32( i8* %tmp50495051, i8* null, i32 10, i32 0 )
+ call void @llvm.memcpy.i32( i8* %tmp50565058, i8* null, i32 10, i32 0 )
+ br label %bb5215
+
+bb5215: ; preds = %cond_next5073, %bb4933, %cond_false4906
+ %i4232.3 = phi i32 [ 0, %cond_false4906 ], [ 0, %cond_next5073 ], [ 0, %bb4933 ] ; <i32> [#uses=1]
+ %tmp5217 = icmp slt i32 %i4232.3, 4 ; <i1> [#uses=1]
+ br i1 %tmp5217, label %bb4933, label %bb5220
+
+bb5220: ; preds = %bb5215
+ br i1 false, label %bb5230, label %cond_true5226
+
+cond_true5226: ; preds = %bb5220
+ ret void
+
+bb5230: ; preds = %bb5220
+ %indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1]
+ br label %bb5233
+
+bb5233: ; preds = %bb5233.preheader, %bb5230
+ %indvar = phi i32 [ 0, %bb5233.preheader ], [ %indvar.next, %bb5230 ] ; <i32> [#uses=2]
+ %mb_x.7 = shl i32 %indvar, 1 ; <i32> [#uses=4]
+ br i1 false, label %bb4231, label %bb5239
+
+bb5239: ; preds = %bb5233
+ %indvar.next37882 = add i32 %indvar37881, 1 ; <i32> [#uses=1]
+ br label %bb5242
+
+bb5242: ; preds = %bb5239, %bb5242.preheader
+ %indvar37881 = phi i32 [ 0, %bb5242.preheader ], [ %indvar.next37882, %bb5239 ] ; <i32> [#uses=2]
+ %mb_y.4 = shl i32 %indvar37881, 1 ; <i32> [#uses=3]
+ br i1 false, label %bb5233.preheader, label %bb5248
+
+bb5233.preheader: ; preds = %bb5242
+ %tmp4784 = icmp slt i32 %mb_y.4, 0 ; <i1> [#uses=1]
+ br label %bb5233
+
+bb5248: ; preds = %bb5242
+ ret void
+
+UnifiedReturnBlock: ; preds = %bb4221
+ ret void
+}
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
diff --git a/test/CodeGen/ARM/2007-08-15-ReuseBug.ll b/test/CodeGen/ARM/2007-08-15-ReuseBug.ll
new file mode 100644
index 0000000..30b72e0
--- /dev/null
+++ b/test/CodeGen/ARM/2007-08-15-ReuseBug.ll
@@ -0,0 +1,106 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic -mattr=+v6
+; PR1609
+
+ %struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+ %struct.__sFILEX = type opaque
+ %struct.__sbuf = type { i8*, i32 }
+@_C_nextcmd = external global i32 ; <i32*> [#uses=2]
+@_C_cmds = external global [100 x i8*] ; <[100 x i8*]*> [#uses=2]
[email protected] = external constant [2 x i8] ; <[2 x i8]*> [#uses=1]
+
+define i32 @main(i32 %argc, i8** %argv) {
+entry:
+ br label %cond_next212.i
+
+bb21.i: ; preds = %cond_next212.i
+ br label %cond_next212.i
+
+bb24.i: ; preds = %cond_next212.i
+ ret i32 0
+
+bb27.i: ; preds = %cond_next212.i
+ ret i32 0
+
+bb30.i: ; preds = %cond_next212.i
+ %tmp205399.i = add i32 %argc_addr.2358.0.i, -1 ; <i32> [#uses=1]
+ br label %cond_next212.i
+
+bb33.i: ; preds = %cond_next212.i
+ ret i32 0
+
+cond_next73.i: ; preds = %cond_next212.i
+ ret i32 0
+
+bb75.i: ; preds = %cond_next212.i
+ ret i32 0
+
+bb77.i: ; preds = %cond_next212.i
+ ret i32 0
+
+bb79.i: ; preds = %cond_next212.i
+ ret i32 0
+
+bb102.i: ; preds = %cond_next212.i
+ br i1 false, label %cond_true110.i, label %cond_next123.i
+
+cond_true110.i: ; preds = %bb102.i
+ %tmp116.i = getelementptr i8** %argv_addr.2321.0.i, i32 2 ; <i8**> [#uses=1]
+ %tmp117.i = load i8** %tmp116.i ; <i8*> [#uses=1]
+ %tmp126425.i = call %struct.FILE* @fopen( i8* %tmp117.i, i8* getelementptr ([2 x i8]* @.str44, i32 0, i32 0) ) ; <%struct.FILE*> [#uses=0]
+ ret i32 0
+
+cond_next123.i: ; preds = %bb102.i
+ %tmp122.i = getelementptr i8* %tmp215.i, i32 2 ; <i8*> [#uses=0]
+ ret i32 0
+
+bb162.i: ; preds = %cond_next212.i
+ ret i32 0
+
+C_addcmd.exit120.i: ; preds = %cond_next212.i
+ %tmp3.i.i.i.i105.i = call i8* @calloc( i32 15, i32 1 ) ; <i8*> [#uses=1]
+ %tmp1.i108.i = getelementptr [100 x i8*]* @_C_cmds, i32 0, i32 0 ; <i8**> [#uses=1]
+ store i8* %tmp3.i.i.i.i105.i, i8** %tmp1.i108.i, align 4
+ %tmp.i91.i = load i32* @_C_nextcmd, align 4 ; <i32> [#uses=1]
+ store i32 0, i32* @_C_nextcmd, align 4
+ %tmp3.i.i.i.i95.i = call i8* @calloc( i32 15, i32 1 ) ; <i8*> [#uses=1]
+ %tmp1.i98.i = getelementptr [100 x i8*]* @_C_cmds, i32 0, i32 %tmp.i91.i ; <i8**> [#uses=1]
+ store i8* %tmp3.i.i.i.i95.i, i8** %tmp1.i98.i, align 4
+ br label %cond_next212.i
+
+bb174.i: ; preds = %cond_next212.i
+ ret i32 0
+
+bb192.i: ; preds = %cond_next212.i
+ br label %cond_next212.i
+
+cond_next212.i: ; preds = %cond_next212.i, %cond_next212.i, %cond_next212.i, %cond_next212.i, %bb192.i, %C_addcmd.exit120.i, %bb30.i, %bb21.i, %entry
+ %max_d.3 = phi i32 [ -1, %entry ], [ %max_d.3, %bb30.i ], [ %max_d.3, %bb21.i ], [ %max_d.3, %C_addcmd.exit120.i ], [ 0, %bb192.i ], [ %max_d.3, %cond_next212.i ], [ %max_d.3, %cond_next212.i ], [ %max_d.3, %cond_next212.i ], [ %max_d.3, %cond_next212.i ] ; <i32> [#uses=7]
+ %argv_addr.2321.0.i = phi i8** [ %argv, %entry ], [ %tmp214.i, %bb192.i ], [ %tmp214.i, %C_addcmd.exit120.i ], [ %tmp214.i, %bb30.i ], [ %tmp214.i, %bb21.i ], [ %tmp214.i, %cond_next212.i ], [ %tmp214.i, %cond_next212.i ], [ %tmp214.i, %cond_next212.i ], [ %tmp214.i, %cond_next212.i ] ; <i8**> [#uses=2]
+ %argc_addr.2358.0.i = phi i32 [ %argc, %entry ], [ %tmp205399.i, %bb30.i ], [ 0, %bb21.i ], [ 0, %C_addcmd.exit120.i ], [ 0, %bb192.i ], [ 0, %cond_next212.i ], [ 0, %cond_next212.i ], [ 0, %cond_next212.i ], [ 0, %cond_next212.i ] ; <i32> [#uses=1]
+ %tmp214.i = getelementptr i8** %argv_addr.2321.0.i, i32 1 ; <i8**> [#uses=9]
+ %tmp215.i = load i8** %tmp214.i ; <i8*> [#uses=1]
+ %tmp1314.i = sext i8 0 to i32 ; <i32> [#uses=1]
+ switch i32 %tmp1314.i, label %bb192.i [
+ i32 76, label %C_addcmd.exit120.i
+ i32 77, label %bb174.i
+ i32 83, label %bb162.i
+ i32 97, label %bb33.i
+ i32 98, label %bb21.i
+ i32 99, label %bb24.i
+ i32 100, label %bb27.i
+ i32 101, label %cond_next212.i
+ i32 102, label %bb102.i
+ i32 105, label %bb75.i
+ i32 109, label %bb30.i
+ i32 113, label %cond_next212.i
+ i32 114, label %cond_next73.i
+ i32 115, label %bb79.i
+ i32 116, label %cond_next212.i
+ i32 118, label %bb77.i
+ i32 119, label %cond_next212.i
+ ]
+}
+
+declare %struct.FILE* @fopen(i8*, i8*)
+
+declare i8* @calloc(i32, i32)
diff --git a/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll b/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll
new file mode 100644
index 0000000..ff01506
--- /dev/null
+++ b/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -regalloc=local
+; PR1925
+
+ %struct.encode_aux_nearestmatch = type { i32*, i32*, i32*, i32*, i32, i32 }
+ %struct.encode_aux_pigeonhole = type { float, float, i32, i32, i32*, i32, i32*, i32*, i32* }
+ %struct.encode_aux_threshmatch = type { float*, i32*, i32, i32 }
+ %struct.oggpack_buffer = type { i32, i32, i8*, i8*, i32 }
+ %struct.static_codebook = type { i32, i32, i32*, i32, i32, i32, i32, i32, i32*, %struct.encode_aux_nearestmatch*, %struct.encode_aux_threshmatch*, %struct.encode_aux_pigeonhole*, i32 }
+
+define i32 @vorbis_staticbook_pack(%struct.static_codebook* %c, %struct.oggpack_buffer* %opb) {
+entry:
+ %opb_addr = alloca %struct.oggpack_buffer* ; <%struct.oggpack_buffer**> [#uses=1]
+ %tmp1 = load %struct.oggpack_buffer** %opb_addr, align 4 ; <%struct.oggpack_buffer*> [#uses=1]
+ call void @oggpack_write( %struct.oggpack_buffer* %tmp1, i32 5653314, i32 24 ) nounwind
+ call void @oggpack_write( %struct.oggpack_buffer* null, i32 0, i32 24 ) nounwind
+ unreachable
+}
+
+declare void @oggpack_write(%struct.oggpack_buffer*, i32, i32)
diff --git a/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll b/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll
new file mode 100644
index 0000000..06bc987
--- /dev/null
+++ b/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=local
+; PR1925
+
+ %"struct.kc::impl_Ccode_option" = type { %"struct.kc::impl_abstract_phylum" }
+ %"struct.kc::impl_ID" = type { %"struct.kc::impl_abstract_phylum", %"struct.kc::impl_Ccode_option"*, %"struct.kc::impl_casestring__Str"*, i32, %"struct.kc::impl_casestring__Str"* }
+ %"struct.kc::impl_abstract_phylum" = type { i32 (...)** }
+ %"struct.kc::impl_casestring__Str" = type { %"struct.kc::impl_abstract_phylum", i8* }
+
+define %"struct.kc::impl_ID"* @_ZN2kc18f_typeofunpsubtermEPNS_15impl_unpsubtermEPNS_7impl_IDE(%"struct.kc::impl_Ccode_option"* %a_unpsubterm, %"struct.kc::impl_ID"* %a_operator) {
+entry:
+ %tmp8 = getelementptr %"struct.kc::impl_Ccode_option"* %a_unpsubterm, i32 0, i32 0, i32 0 ; <i32 (...)***> [#uses=0]
+ br i1 false, label %bb41, label %bb55
+
+bb41: ; preds = %entry
+ ret %"struct.kc::impl_ID"* null
+
+bb55: ; preds = %entry
+ %tmp67 = tail call i32 null( %"struct.kc::impl_abstract_phylum"* null ) ; <i32> [#uses=0]
+ %tmp97 = tail call i32 null( %"struct.kc::impl_abstract_phylum"* null ) ; <i32> [#uses=0]
+ ret %"struct.kc::impl_ID"* null
+}
diff --git a/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll b/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll
new file mode 100644
index 0000000..a604c5c
--- /dev/null
+++ b/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=arm -mattr=+v6 | not grep 255
+
+define i32 @main(i32 %argc, i8** %argv) {
+entry:
+ br label %bb1
+bb1: ; preds = %entry
+ %tmp3.i.i = load i8* null, align 1 ; <i8> [#uses=1]
+ %tmp4.i.i = icmp slt i8 %tmp3.i.i, 0 ; <i1> [#uses=1]
+ br i1 %tmp4.i.i, label %bb2, label %bb3
+bb2: ; preds = %bb1
+ ret i32 1
+bb3: ; preds = %bb1
+ ret i32 0
+}
diff --git a/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll b/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
new file mode 100644
index 0000000..78c6222
--- /dev/null
+++ b/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2
+
+@accum = external global { double, double } ; <{ double, double }*> [#uses=1]
[email protected] = external constant [4 x i8] ; <[4 x i8]*> [#uses=1]
+
+define i32 @main() {
+entry:
+ br label %bb74.i
+bb74.i: ; preds = %bb88.i, %bb74.i, %entry
+ br i1 false, label %bb88.i, label %bb74.i
+bb88.i: ; preds = %bb74.i
+ br i1 false, label %mandel.exit, label %bb74.i
+mandel.exit: ; preds = %bb88.i
+ %tmp2 = volatile load double* getelementptr ({ double, double }* @accum, i32 0, i32 0), align 8 ; <double> [#uses=1]
+ %tmp23 = fptosi double %tmp2 to i32 ; <i32> [#uses=1]
+ %tmp5 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i32 %tmp23 ) ; <i32> [#uses=0]
+ ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/ARM/2008-04-04-ScavengerAssert.ll b/test/CodeGen/ARM/2008-04-04-ScavengerAssert.ll
new file mode 100644
index 0000000..234c7b6
--- /dev/null
+++ b/test/CodeGen/ARM/2008-04-04-ScavengerAssert.ll
@@ -0,0 +1,60 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+@numBinsY = external global i32 ; <i32*> [#uses=1]
+
+declare double @pow(double, double)
+
+define void @main(i32 %argc, i8** %argv) noreturn nounwind {
+entry:
+ br i1 false, label %bb34.outer.i.i.i, label %cond_false674
+bb34.outer.i.i.i: ; preds = %entry
+ br i1 false, label %bb2.i.i.i, label %bb47.i.i.i
+bb2.i.i.i: ; preds = %bb34.outer.i.i.i
+ %tmp24.i.i.i = call double @pow( double 0.000000e+00, double 2.000000e+00 ) ; <double> [#uses=0]
+ ret void
+bb47.i.i.i: ; preds = %bb34.outer.i.i.i
+ br i1 false, label %bb220.i.i.i, label %bb62.preheader.i.i.i
+bb62.preheader.i.i.i: ; preds = %bb47.i.i.i
+ ret void
+bb220.i.i.i: ; preds = %bb47.i.i.i
+ br i1 false, label %bb248.i.i.i, label %cond_next232.i.i.i
+cond_next232.i.i.i: ; preds = %bb220.i.i.i
+ ret void
+bb248.i.i.i: ; preds = %bb220.i.i.i
+ br i1 false, label %bb300.i.i.i, label %cond_false256.i.i.i
+cond_false256.i.i.i: ; preds = %bb248.i.i.i
+ ret void
+bb300.i.i.i: ; preds = %bb248.i.i.i
+ store i32 undef, i32* @numBinsY, align 4
+ ret void
+cond_false674: ; preds = %entry
+ ret void
+}
+
+ %struct.anon = type { %struct.rnode*, %struct.rnode* }
+ %struct.ch_set = type { { i8, i8 }*, %struct.ch_set* }
+ %struct.pat_list = type { i32, %struct.pat_list* }
+ %struct.rnode = type { i16, { %struct.anon }, i16, %struct.pat_list*, %struct.pat_list* }
+
+define fastcc { i16, %struct.rnode* }* @get_token(i8** %s) nounwind {
+entry:
+ br i1 false, label %bb42, label %bb78
+bb42: ; preds = %entry
+ br label %cond_next119.i
+bb17.i: ; preds = %cond_next119.i
+ br i1 false, label %cond_true53.i, label %cond_false99.i
+cond_true53.i: ; preds = %bb17.i
+ ret { i16, %struct.rnode* }* null
+cond_false99.i: ; preds = %bb17.i
+ %tmp106.i = malloc %struct.ch_set ; <%struct.ch_set*> [#uses=1]
+ br i1 false, label %bb126.i, label %cond_next119.i
+cond_next119.i: ; preds = %cond_false99.i, %bb42
+ %curr_ptr.0.reg2mem.0.i = phi %struct.ch_set* [ %tmp106.i, %cond_false99.i ], [ null, %bb42 ] ; <%struct.ch_set*> [#uses=2]
+ %prev_ptr.0.reg2mem.0.i = phi %struct.ch_set* [ %curr_ptr.0.reg2mem.0.i, %cond_false99.i ], [ undef, %bb42 ] ; <%struct.ch_set*> [#uses=1]
+ br i1 false, label %bb126.i, label %bb17.i
+bb126.i: ; preds = %cond_next119.i, %cond_false99.i
+ %prev_ptr.0.reg2mem.1.i = phi %struct.ch_set* [ %prev_ptr.0.reg2mem.0.i, %cond_next119.i ], [ %curr_ptr.0.reg2mem.0.i, %cond_false99.i ] ; <%struct.ch_set*> [#uses=0]
+ ret { i16, %struct.rnode* }* null
+bb78: ; preds = %entry
+ ret { i16, %struct.rnode* }* null
+}
diff --git a/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll b/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll
new file mode 100644
index 0000000..77418be
--- /dev/null
+++ b/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll
@@ -0,0 +1,258 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+ %struct.CONTENTBOX = type { i32, i32, i32, i32, i32 }
+ %struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+ %struct.LOCBOX = type { i32, i32, i32, i32 }
+ %struct.SIDEBOX = type { i32, i32 }
+ %struct.UNCOMBOX = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+ %struct.__sFILEX = type opaque
+ %struct.__sbuf = type { i8*, i32 }
+ %struct.cellbox = type { i8*, i32, i32, i32, [9 x i32], i32, i32, i32, i32, i32, i32, i32, double, double, double, double, double, i32, i32, %struct.CONTENTBOX*, %struct.UNCOMBOX*, [8 x %struct.tilebox*], %struct.SIDEBOX* }
+ %struct.termbox = type { %struct.termbox*, i32, i32, i32, i32, i32 }
+ %struct.tilebox = type { %struct.tilebox*, double, double, double, double, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.termbox*, %struct.LOCBOX* }
[email protected] = external constant [2 x i8] ; <[2 x i8]*> [#uses=1]
[email protected] = external constant [5 x i8] ; <[5 x i8]*> [#uses=1]
[email protected] = external constant [9 x i8] ; <[9 x i8]*> [#uses=1]
+
+declare %struct.FILE* @fopen(i8*, i8*)
+
+declare i32 @strcmp(i8*, i8*)
+
+declare i32 @fscanf(%struct.FILE*, i8*, ...)
+
+define void @main(i32 %argc, i8** %argv) noreturn {
+entry:
+ br i1 false, label %cond_next48, label %cond_false674
+cond_next48: ; preds = %entry
+ %tmp61 = call %struct.FILE* @fopen( i8* null, i8* getelementptr ([2 x i8]* @.str127, i32 0, i32 0) ) ; <%struct.FILE*> [#uses=2]
+ br i1 false, label %bb220.i.i.i, label %bb62.preheader.i.i.i
+bb62.preheader.i.i.i: ; preds = %cond_next48
+ ret void
+bb220.i.i.i: ; preds = %cond_next48
+ br i1 false, label %bb248.i.i.i, label %cond_next232.i.i.i
+cond_next232.i.i.i: ; preds = %bb220.i.i.i
+ ret void
+bb248.i.i.i: ; preds = %bb220.i.i.i
+ br i1 false, label %bb300.i.i.i, label %cond_false256.i.i.i
+cond_false256.i.i.i: ; preds = %bb248.i.i.i
+ ret void
+bb300.i.i.i: ; preds = %bb248.i.i.i
+ br label %bb.i.i347.i
+bb.i.i347.i: ; preds = %bb.i.i347.i, %bb300.i.i.i
+ br i1 false, label %bb894.loopexit.i.i, label %bb.i.i347.i
+bb.i350.i: ; preds = %bb894.i.i
+ br i1 false, label %bb24.i.i, label %cond_false373.i.i
+bb24.i.i: ; preds = %bb24.i.i, %bb.i350.i
+ br i1 false, label %bb40.i.i, label %bb24.i.i
+bb40.i.i: ; preds = %bb24.i.i
+ br i1 false, label %bb177.i393.i, label %bb82.i.i
+bb82.i.i: ; preds = %bb40.i.i
+ ret void
+bb177.i393.i: ; preds = %bb40.i.i
+ br i1 false, label %bb894.i.i, label %bb192.i.i
+bb192.i.i: ; preds = %bb177.i393.i
+ ret void
+cond_false373.i.i: ; preds = %bb.i350.i
+ %tmp376.i.i = call i32 @strcmp( i8* null, i8* getelementptr ([9 x i8]* @.str8115, i32 0, i32 0) ) ; <i32> [#uses=0]
+ br i1 false, label %cond_true380.i.i, label %cond_next602.i.i
+cond_true380.i.i: ; preds = %cond_false373.i.i
+ %tmp394.i418.i = add i32 %cell.0.i.i, 1 ; <i32> [#uses=1]
+ %tmp397.i420.i = load %struct.cellbox** null, align 4 ; <%struct.cellbox*> [#uses=1]
+ br label %bb398.i.i
+bb398.i.i: ; preds = %bb398.i.i, %cond_true380.i.i
+ br i1 false, label %bb414.i.i, label %bb398.i.i
+bb414.i.i: ; preds = %bb398.i.i
+ br i1 false, label %bb581.i.i, label %bb455.i442.i
+bb455.i442.i: ; preds = %bb414.i.i
+ ret void
+bb581.i.i: ; preds = %bb581.i.i, %bb414.i.i
+ br i1 false, label %bb894.i.i, label %bb581.i.i
+cond_next602.i.i: ; preds = %cond_false373.i.i
+ br i1 false, label %bb609.i.i, label %bb661.i.i
+bb609.i.i: ; preds = %cond_next602.i.i
+ br label %bb620.i.i
+bb620.i.i: ; preds = %bb620.i.i, %bb609.i.i
+ %indvar166.i465.i = phi i32 [ %indvar.next167.i.i, %bb620.i.i ], [ 0, %bb609.i.i ] ; <i32> [#uses=1]
+ %tmp640.i.i = call i32 (%struct.FILE*, i8*, ...)* @fscanf( %struct.FILE* %tmp61, i8* getelementptr ([5 x i8]* @.str584, i32 0, i32 0), [1024 x i8]* null ) ; <i32> [#uses=0]
+ %tmp648.i.i = load i32* null, align 4 ; <i32> [#uses=1]
+ %tmp650.i468.i = icmp sgt i32 0, %tmp648.i.i ; <i1> [#uses=1]
+ %tmp624.i469.i = call i32 (%struct.FILE*, i8*, ...)* @fscanf( %struct.FILE* %tmp61, i8* getelementptr ([5 x i8]* @.str584, i32 0, i32 0), [1024 x i8]* null ) ; <i32> [#uses=0]
+ %indvar.next167.i.i = add i32 %indvar166.i465.i, 1 ; <i32> [#uses=1]
+ br i1 %tmp650.i468.i, label %bb653.i.i.loopexit, label %bb620.i.i
+bb653.i.i.loopexit: ; preds = %bb620.i.i
+ %tmp642.i466.i = add i32 0, 1 ; <i32> [#uses=1]
+ br label %bb894.i.i
+bb661.i.i: ; preds = %cond_next602.i.i
+ ret void
+bb894.loopexit.i.i: ; preds = %bb.i.i347.i
+ br label %bb894.i.i
+bb894.i.i: ; preds = %bb894.loopexit.i.i, %bb653.i.i.loopexit, %bb581.i.i, %bb177.i393.i
+ %pinctr.0.i.i = phi i32 [ 0, %bb894.loopexit.i.i ], [ %tmp642.i466.i, %bb653.i.i.loopexit ], [ %pinctr.0.i.i, %bb177.i393.i ], [ %pinctr.0.i.i, %bb581.i.i ] ; <i32> [#uses=2]
+ %soft.0.i.i = phi i32 [ undef, %bb894.loopexit.i.i ], [ %soft.0.i.i, %bb653.i.i.loopexit ], [ 0, %bb177.i393.i ], [ 1, %bb581.i.i ] ; <i32> [#uses=1]
+ %cell.0.i.i = phi i32 [ 0, %bb894.loopexit.i.i ], [ %cell.0.i.i, %bb653.i.i.loopexit ], [ 0, %bb177.i393.i ], [ %tmp394.i418.i, %bb581.i.i ] ; <i32> [#uses=2]
+ %ptr.0.i.i = phi %struct.cellbox* [ undef, %bb894.loopexit.i.i ], [ %ptr.0.i.i, %bb653.i.i.loopexit ], [ null, %bb177.i393.i ], [ %tmp397.i420.i, %bb581.i.i ] ; <%struct.cellbox*> [#uses=1]
+ br i1 false, label %bb.i350.i, label %bb902.i502.i
+bb902.i502.i: ; preds = %bb894.i.i
+ ret void
+cond_false674: ; preds = %entry
+ ret void
+}
+
+ %struct.III_psy_xmin = type { [22 x double], [13 x [3 x double]] }
+ %struct.III_scalefac_t = type { [22 x i32], [13 x [3 x i32]] }
+ %struct.gr_info = type { i32, i32, i32, i32, i32, i32, i32, i32, [3 x i32], [3 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32*, [4 x i32] }
+ %struct.lame_global_flags = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, float, float, float, float, i32, i32, i32, i32, i32, i32, i32, i32 }
+@scalefac_band.1 = external global [14 x i32] ; <[14 x i32]*> [#uses=2]
+
+declare fastcc i32 @init_outer_loop(%struct.lame_global_flags*, double*, %struct.gr_info*)
+
+define fastcc void @outer_loop(%struct.lame_global_flags* %gfp, double* %xr, i32 %targ_bits, double* %best_noise, %struct.III_psy_xmin* %l3_xmin, i32* %l3_enc, %struct.III_scalefac_t* %scalefac, %struct.gr_info* %cod_info, i32 %ch) {
+entry:
+ %cod_info.182 = getelementptr %struct.gr_info* %cod_info, i32 0, i32 1 ; <i32*> [#uses=1]
+ br label %bb
+bb: ; preds = %bb226, %entry
+ %save_cod_info.1.1 = phi i32 [ undef, %entry ], [ %save_cod_info.1.1, %bb226 ] ; <i32> [#uses=2]
+ br i1 false, label %cond_next, label %cond_true
+cond_true: ; preds = %bb
+ ret void
+cond_next: ; preds = %bb
+ br i1 false, label %cond_next144, label %cond_false
+cond_false: ; preds = %cond_next
+ ret void
+cond_next144: ; preds = %cond_next
+ br i1 false, label %cond_next205, label %cond_true163
+cond_true163: ; preds = %cond_next144
+ br i1 false, label %bb34.i, label %bb.i53
+bb.i53: ; preds = %cond_true163
+ ret void
+bb34.i: ; preds = %cond_true163
+ %tmp37.i55 = load i32* null, align 4 ; <i32> [#uses=1]
+ br i1 false, label %bb65.preheader.i, label %bb78.i
+bb65.preheader.i: ; preds = %bb34.i
+ br label %bb65.outer.us.i
+bb65.outer.us.i: ; preds = %bb65.outer.us.i, %bb65.preheader.i
+ br i1 false, label %bb78.i, label %bb65.outer.us.i
+bb78.i: ; preds = %bb65.outer.us.i, %bb34.i
+ br i1 false, label %bb151.i.preheader, label %bb90.i
+bb90.i: ; preds = %bb78.i
+ ret void
+bb151.i.preheader: ; preds = %bb78.i
+ br label %bb151.i
+bb151.i: ; preds = %bb226.backedge.i, %bb151.i.preheader
+ %i.154.i = phi i32 [ %tmp15747.i, %bb226.backedge.i ], [ 0, %bb151.i.preheader ] ; <i32> [#uses=2]
+ %tmp15747.i = add i32 %i.154.i, 1 ; <i32> [#uses=3]
+ br i1 false, label %bb155.i, label %bb226.backedge.i
+bb226.backedge.i: ; preds = %cond_next215.i, %bb151.i
+ %tmp228.i71 = icmp slt i32 %tmp15747.i, 3 ; <i1> [#uses=1]
+ br i1 %tmp228.i71, label %bb151.i, label %amp_scalefac_bands.exit
+bb155.i: ; preds = %cond_next215.i, %bb151.i
+ %indvar90.i = phi i32 [ %indvar.next91.i, %cond_next215.i ], [ 0, %bb151.i ] ; <i32> [#uses=2]
+ %sfb.3.reg2mem.0.i = add i32 %indvar90.i, %tmp37.i55 ; <i32> [#uses=4]
+ %tmp161.i = getelementptr [4 x [21 x double]]* null, i32 0, i32 %tmp15747.i, i32 %sfb.3.reg2mem.0.i ; <double*> [#uses=1]
+ %tmp162.i74 = load double* %tmp161.i, align 4 ; <double> [#uses=0]
+ br i1 false, label %cond_true167.i, label %cond_next215.i
+cond_true167.i: ; preds = %bb155.i
+ %tmp173.i = getelementptr %struct.III_scalefac_t* null, i32 0, i32 1, i32 %sfb.3.reg2mem.0.i, i32 %i.154.i ; <i32*> [#uses=1]
+ store i32 0, i32* %tmp173.i, align 4
+ %tmp182.1.i = getelementptr [14 x i32]* @scalefac_band.1, i32 0, i32 %sfb.3.reg2mem.0.i ; <i32*> [#uses=0]
+ %tmp185.i78 = add i32 %sfb.3.reg2mem.0.i, 1 ; <i32> [#uses=1]
+ %tmp187.1.i = getelementptr [14 x i32]* @scalefac_band.1, i32 0, i32 %tmp185.i78 ; <i32*> [#uses=1]
+ %tmp188.i = load i32* %tmp187.1.i, align 4 ; <i32> [#uses=1]
+ %tmp21153.i = icmp slt i32 0, %tmp188.i ; <i1> [#uses=1]
+ br i1 %tmp21153.i, label %bb190.preheader.i, label %cond_next215.i
+bb190.preheader.i: ; preds = %cond_true167.i
+ ret void
+cond_next215.i: ; preds = %cond_true167.i, %bb155.i
+ %indvar.next91.i = add i32 %indvar90.i, 1 ; <i32> [#uses=2]
+ %exitcond99.i87 = icmp eq i32 %indvar.next91.i, 0 ; <i1> [#uses=1]
+ br i1 %exitcond99.i87, label %bb226.backedge.i, label %bb155.i
+amp_scalefac_bands.exit: ; preds = %bb226.backedge.i
+ br i1 false, label %bb19.i, label %bb.i16
+bb.i16: ; preds = %amp_scalefac_bands.exit
+ ret void
+bb19.i: ; preds = %amp_scalefac_bands.exit
+ br i1 false, label %bb40.outer.i, label %cond_next205
+bb40.outer.i: ; preds = %bb19.i
+ ret void
+cond_next205: ; preds = %bb19.i, %cond_next144
+ br i1 false, label %bb226, label %cond_true210
+cond_true210: ; preds = %cond_next205
+ br i1 false, label %bb226, label %cond_true217
+cond_true217: ; preds = %cond_true210
+ %tmp221 = call fastcc i32 @init_outer_loop( %struct.lame_global_flags* %gfp, double* %xr, %struct.gr_info* %cod_info ) ; <i32> [#uses=0]
+ ret void
+bb226: ; preds = %cond_true210, %cond_next205
+ br i1 false, label %bb231, label %bb
+bb231: ; preds = %bb226
+ store i32 %save_cod_info.1.1, i32* %cod_info.182
+ ret void
+}
+
+ %struct.III_psy_xmin = type { [22 x double], [13 x [3 x double]] }
+ %struct.III_scalefac_t = type { [22 x i32], [13 x [3 x i32]] }
+ %struct.gr_info = type { i32, i32, i32, i32, i32, i32, i32, i32, [3 x i32], [3 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32*, [4 x i32] }
+ %struct.lame_global_flags = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, float, float, float, float, i32, i32, i32, i32, i32, i32, i32, i32 }
+
+define fastcc void @outer_loop2(%struct.lame_global_flags* %gfp, double* %xr, i32 %targ_bits, double* %best_noise, %struct.III_psy_xmin* %l3_xmin, i32* %l3_enc, %struct.III_scalefac_t* %scalefac, %struct.gr_info* %cod_info, i32 %ch) {
+entry:
+ %cod_info.20128.1 = getelementptr %struct.gr_info* %cod_info, i32 0, i32 20, i32 1 ; <i32*> [#uses=1]
+ %cod_info.20128.2 = getelementptr %struct.gr_info* %cod_info, i32 0, i32 20, i32 2 ; <i32*> [#uses=1]
+ %cod_info.20128.3 = getelementptr %struct.gr_info* %cod_info, i32 0, i32 20, i32 3 ; <i32*> [#uses=1]
+ br label %bb
+bb: ; preds = %bb226, %entry
+ %save_cod_info.19.1 = phi i32* [ undef, %entry ], [ %save_cod_info.19.0, %bb226 ] ; <i32*> [#uses=1]
+ %save_cod_info.0.1 = phi i32 [ undef, %entry ], [ %save_cod_info.0.0, %bb226 ] ; <i32> [#uses=1]
+ br i1 false, label %cond_next144, label %cond_false
+cond_false: ; preds = %bb
+ br i1 false, label %cond_true56, label %cond_false78
+cond_true56: ; preds = %cond_false
+ br i1 false, label %inner_loop.exit, label %cond_next85
+inner_loop.exit: ; preds = %cond_true56
+ br i1 false, label %cond_next104, label %cond_false96
+cond_false78: ; preds = %cond_false
+ ret void
+cond_next85: ; preds = %cond_true56
+ ret void
+cond_false96: ; preds = %inner_loop.exit
+ ret void
+cond_next104: ; preds = %inner_loop.exit
+ br i1 false, label %cond_next144, label %cond_false110
+cond_false110: ; preds = %cond_next104
+ ret void
+cond_next144: ; preds = %cond_next104, %bb
+ %save_cod_info.19.0 = phi i32* [ %save_cod_info.19.1, %bb ], [ null, %cond_next104 ] ; <i32*> [#uses=1]
+ %save_cod_info.4.0 = phi i32 [ 0, %bb ], [ 0, %cond_next104 ] ; <i32> [#uses=1]
+ %save_cod_info.3.0 = phi i32 [ 0, %bb ], [ 0, %cond_next104 ] ; <i32> [#uses=1]
+ %save_cod_info.2.0 = phi i32 [ 0, %bb ], [ 0, %cond_next104 ] ; <i32> [#uses=1]
+ %save_cod_info.1.0 = phi i32 [ 0, %bb ], [ 0, %cond_next104 ] ; <i32> [#uses=1]
+ %save_cod_info.0.0 = phi i32 [ %save_cod_info.0.1, %bb ], [ 0, %cond_next104 ] ; <i32> [#uses=1]
+ %over.1 = phi i32 [ 0, %bb ], [ 0, %cond_next104 ] ; <i32> [#uses=1]
+ %best_over.0 = phi i32 [ 0, %bb ], [ 0, %cond_next104 ] ; <i32> [#uses=1]
+ %notdone.0 = phi i32 [ 0, %bb ], [ 0, %cond_next104 ] ; <i32> [#uses=1]
+ %tmp147 = load i32* null, align 4 ; <i32> [#uses=1]
+ %tmp148 = icmp eq i32 %tmp147, 0 ; <i1> [#uses=1]
+ %tmp153 = icmp eq i32 %over.1, 0 ; <i1> [#uses=1]
+ %bothcond = and i1 %tmp148, %tmp153 ; <i1> [#uses=1]
+ %notdone.2 = select i1 %bothcond, i32 0, i32 %notdone.0 ; <i32> [#uses=1]
+ br i1 false, label %cond_next205, label %cond_true163
+cond_true163: ; preds = %cond_next144
+ ret void
+cond_next205: ; preds = %cond_next144
+ br i1 false, label %bb226, label %cond_true210
+cond_true210: ; preds = %cond_next205
+ ret void
+bb226: ; preds = %cond_next205
+ %tmp228 = icmp eq i32 %notdone.2, 0 ; <i1> [#uses=1]
+ br i1 %tmp228, label %bb231, label %bb
+bb231: ; preds = %bb226
+ store i32 %save_cod_info.1.0, i32* null
+ store i32 %save_cod_info.2.0, i32* null
+ store i32 %save_cod_info.3.0, i32* null
+ store i32 %save_cod_info.4.0, i32* null
+ store i32 0, i32* %cod_info.20128.1
+ store i32 0, i32* %cod_info.20128.2
+ store i32 0, i32* %cod_info.20128.3
+ %tmp244245 = sitofp i32 %best_over.0 to double ; <double> [#uses=1]
+ store double %tmp244245, double* %best_noise, align 4
+ ret void
+}
diff --git a/test/CodeGen/ARM/2008-04-11-PHIofImpDef.ll b/test/CodeGen/ARM/2008-04-11-PHIofImpDef.ll
new file mode 100644
index 0000000..33bd4de
--- /dev/null
+++ b/test/CodeGen/ARM/2008-04-11-PHIofImpDef.ll
@@ -0,0 +1,3544 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+declare void @foo(i8*, i8*, i32, i32, i32, i32, i32, i32, i32)
+
+define void @t() nounwind {
+ br label %1
+; <label>:1 ; preds = %0
+ br label %bb4351.i
+bb4351.i: ; preds = %1
+ switch i32 0, label %bb4411.i [
+ i32 1, label %bb4354.i
+ i32 2, label %bb4369.i
+ ]
+bb4354.i: ; preds = %bb4351.i
+ br label %t.exit
+bb4369.i: ; preds = %bb4351.i
+ br label %bb4374.i
+bb4374.i: ; preds = %bb4369.i
+ br label %bb4411.i
+bb4411.i: ; preds = %bb4374.i, %bb4351.i
+ %sf4083.0.i = phi i32 [ 0, %bb4374.i ], [ 6, %bb4351.i ] ; <i32> [#uses=8]
+ br label %bb4498.i
+bb4498.i: ; preds = %bb4411.i
+ %sfComp4077.1.i = phi i32 [ undef, %bb4411.i ] ; <i32> [#uses=2]
+ %stComp4075.1.i = phi i32 [ undef, %bb4411.i ] ; <i32> [#uses=1]
+ switch i32 0, label %bb4553.i [
+ i32 1, label %bb4501.i
+ i32 2, label %bb4521.i
+ ]
+bb4501.i: ; preds = %bb4498.i
+ %sfComp4077.1.reg2mem.0.i = phi i32 [ %sfComp4077.1.i, %bb4498.i ] ; <i32> [#uses=1]
+ call void @foo( i8* null, i8* null, i32 %sfComp4077.1.reg2mem.0.i, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0 ) nounwind
+ br i1 false, label %UnifiedReturnBlock.i, label %bb4517.i
+bb4517.i: ; preds = %bb4501.i
+ br label %t.exit
+bb4521.i: ; preds = %bb4498.i
+ br label %bb4526.i
+bb4526.i: ; preds = %bb4521.i
+ switch i32 0, label %bb4529.i [
+ i32 6, label %bb4530.i
+ i32 7, label %bb4530.i
+ ]
+bb4529.i: ; preds = %bb4526.i
+ br label %bb4530.i
+bb4530.i: ; preds = %bb4529.i, %bb4526.i, %bb4526.i
+ br label %bb4553.i
+bb4553.i: ; preds = %bb4530.i, %bb4498.i
+ %dt4080.0.i = phi i32 [ %stComp4075.1.i, %bb4530.i ], [ 7, %bb4498.i ] ; <i32> [#uses=32]
+ %df4081.0.i = phi i32 [ %sfComp4077.1.i, %bb4530.i ], [ 8, %bb4498.i ] ; <i32> [#uses=17]
+ switch i32 %sf4083.0.i, label %bb4559.i [
+ i32 0, label %bb4558.i
+ i32 1, label %bb4558.i
+ i32 2, label %bb4558.i
+ i32 5, label %bb4561.i
+ i32 6, label %bb4561.i
+ i32 7, label %bb4561.i
+ i32 9, label %bb4557.i
+ ]
+bb4557.i: ; preds = %bb4553.i
+ switch i32 %df4081.0.i, label %bb4569.i [
+ i32 0, label %bb4568.i
+ i32 1, label %bb4568.i
+ i32 2, label %bb4568.i
+ i32 5, label %bb4571.i
+ i32 6, label %bb4571.i
+ i32 7, label %bb4571.i
+ i32 9, label %bb4567.i
+ ]
+bb4558.i: ; preds = %bb4553.i, %bb4553.i, %bb4553.i
+ switch i32 %df4081.0.i, label %bb4569.i [
+ i32 0, label %bb4568.i
+ i32 1, label %bb4568.i
+ i32 2, label %bb4568.i
+ i32 5, label %bb4571.i
+ i32 6, label %bb4571.i
+ i32 7, label %bb4571.i
+ i32 9, label %bb4567.i
+ ]
+bb4559.i: ; preds = %bb4553.i
+ br label %bb4561.i
+bb4561.i: ; preds = %bb4559.i, %bb4553.i, %bb4553.i, %bb4553.i
+ switch i32 %df4081.0.i, label %bb4569.i [
+ i32 0, label %bb4568.i
+ i32 1, label %bb4568.i
+ i32 2, label %bb4568.i
+ i32 5, label %bb4571.i
+ i32 6, label %bb4571.i
+ i32 7, label %bb4571.i
+ i32 9, label %bb4567.i
+ ]
+bb4567.i: ; preds = %bb4561.i, %bb4558.i, %bb4557.i
+ br label %bb4580.i
+bb4568.i: ; preds = %bb4561.i, %bb4561.i, %bb4561.i, %bb4558.i, %bb4558.i, %bb4558.i, %bb4557.i, %bb4557.i, %bb4557.i
+ br label %bb4580.i
+bb4569.i: ; preds = %bb4561.i, %bb4558.i, %bb4557.i
+ br label %bb4571.i
+bb4571.i: ; preds = %bb4569.i, %bb4561.i, %bb4561.i, %bb4561.i, %bb4558.i, %bb4558.i, %bb4558.i, %bb4557.i, %bb4557.i, %bb4557.i
+ br label %bb4580.i
+bb4580.i: ; preds = %bb4571.i, %bb4568.i, %bb4567.i
+ br i1 false, label %bb4611.i, label %bb4593.i
+bb4593.i: ; preds = %bb4580.i
+ br i1 false, label %bb4610.i, label %bb4611.i
+bb4610.i: ; preds = %bb4593.i
+ br label %bb4611.i
+bb4611.i: ; preds = %bb4610.i, %bb4593.i, %bb4580.i
+ br i1 false, label %bb4776.i, label %bb4620.i
+bb4620.i: ; preds = %bb4611.i
+ switch i32 0, label %bb4776.i [
+ i32 0, label %bb4691.i
+ i32 2, label %bb4740.i
+ i32 4, label %bb4755.i
+ i32 8, label %bb4622.i
+ i32 9, label %bb4622.i
+ i32 10, label %bb4629.i
+ i32 11, label %bb4629.i
+ i32 12, label %bb4651.i
+ i32 13, label %bb4651.i
+ i32 14, label %bb4665.i
+ i32 15, label %bb4665.i
+ i32 16, label %bb4691.i
+ i32 17, label %bb4691.i
+ i32 18, label %bb4712.i
+ i32 19, label %bb4712.i
+ i32 22, label %bb4733.i
+ i32 23, label %bb4733.i
+ ]
+bb4622.i: ; preds = %bb4620.i, %bb4620.i
+ br i1 false, label %bb4628.i, label %bb4776.i
+bb4628.i: ; preds = %bb4622.i
+ br label %bb4776.i
+bb4629.i: ; preds = %bb4620.i, %bb4620.i
+ br i1 false, label %bb4776.i, label %bb4644.i
+bb4644.i: ; preds = %bb4629.i
+ br i1 false, label %bb4650.i, label %bb4776.i
+bb4650.i: ; preds = %bb4644.i
+ br label %bb4776.i
+bb4651.i: ; preds = %bb4620.i, %bb4620.i
+ br i1 false, label %bb4776.i, label %bb4658.i
+bb4658.i: ; preds = %bb4651.i
+ br i1 false, label %bb4664.i, label %bb4776.i
+bb4664.i: ; preds = %bb4658.i
+ br label %bb4776.i
+bb4665.i: ; preds = %bb4620.i, %bb4620.i
+ br i1 false, label %bb4776.i, label %bb4684.i
+bb4684.i: ; preds = %bb4665.i
+ br i1 false, label %bb4690.i, label %bb4776.i
+bb4690.i: ; preds = %bb4684.i
+ br label %bb4776.i
+bb4691.i: ; preds = %bb4620.i, %bb4620.i, %bb4620.i
+ br i1 false, label %bb4776.i, label %bb4698.i
+bb4698.i: ; preds = %bb4691.i
+ br i1 false, label %bb4711.i, label %bb4776.i
+bb4711.i: ; preds = %bb4698.i
+ br label %bb4776.i
+bb4712.i: ; preds = %bb4620.i, %bb4620.i
+ br i1 false, label %bb4776.i, label %bb4726.i
+bb4726.i: ; preds = %bb4712.i
+ br i1 false, label %bb4732.i, label %bb4776.i
+bb4732.i: ; preds = %bb4726.i
+ br label %bb4776.i
+bb4733.i: ; preds = %bb4620.i, %bb4620.i
+ br i1 false, label %bb4739.i, label %bb4776.i
+bb4739.i: ; preds = %bb4733.i
+ br label %bb4776.i
+bb4740.i: ; preds = %bb4620.i
+ br i1 false, label %bb4776.i, label %bb4754.i
+bb4754.i: ; preds = %bb4740.i
+ br label %bb4776.i
+bb4755.i: ; preds = %bb4620.i
+ br i1 false, label %bb4776.i, label %bb4774.i
+bb4774.i: ; preds = %bb4755.i
+ br label %bb4776.i
+bb4776.i: ; preds = %bb4774.i, %bb4755.i, %bb4754.i, %bb4740.i, %bb4739.i, %bb4733.i, %bb4732.i, %bb4726.i, %bb4712.i, %bb4711.i, %bb4698.i, %bb4691.i, %bb4690.i, %bb4684.i, %bb4665.i, %bb4664.i, %bb4658.i, %bb4651.i, %bb4650.i, %bb4644.i, %bb4629.i, %bb4628.i, %bb4622.i, %bb4620.i, %bb4611.i
+ switch i32 0, label %bb4790.i [
+ i32 0, label %bb4786.i
+ i32 1, label %bb4784.i
+ i32 3, label %bb4784.i
+ i32 5, label %bb4784.i
+ i32 6, label %bb4785.i
+ i32 7, label %bb4785.i
+ i32 8, label %bb4791.i
+ i32 9, label %bb4791.i
+ i32 10, label %bb4791.i
+ i32 11, label %bb4791.i
+ i32 12, label %bb4791.i
+ i32 13, label %bb4791.i
+ i32 14, label %bb4791.i
+ i32 15, label %bb4791.i
+ i32 16, label %bb4791.i
+ i32 17, label %bb4791.i
+ i32 18, label %bb4791.i
+ i32 19, label %bb4791.i
+ ]
+bb4784.i: ; preds = %bb4776.i, %bb4776.i, %bb4776.i
+ br label %bb4791.i
+bb4785.i: ; preds = %bb4776.i, %bb4776.i
+ br label %bb4791.i
+bb4786.i: ; preds = %bb4776.i
+ br label %bb4791.i
+bb4790.i: ; preds = %bb4776.i
+ br label %bb4791.i
+bb4791.i: ; preds = %bb4790.i, %bb4786.i, %bb4785.i, %bb4784.i, %bb4776.i, %bb4776.i, %bb4776.i, %bb4776.i, %bb4776.i, %bb4776.i, %bb4776.i, %bb4776.i, %bb4776.i, %bb4776.i, %bb4776.i, %bb4776.i
+ switch i32 %dt4080.0.i, label %bb4803.i [
+ i32 0, label %bb4799.i
+ i32 6, label %bb4794.i
+ i32 7, label %bb4794.i
+ i32 8, label %bb4804.i
+ i32 9, label %bb4804.i
+ i32 10, label %bb4804.i
+ i32 11, label %bb4804.i
+ i32 12, label %bb4804.i
+ i32 13, label %bb4804.i
+ i32 14, label %bb4804.i
+ i32 15, label %bb4804.i
+ i32 16, label %bb4804.i
+ i32 17, label %bb4804.i
+ i32 18, label %bb4804.i
+ i32 19, label %bb4804.i
+ ]
+bb4794.i: ; preds = %bb4791.i, %bb4791.i
+ br i1 false, label %bb4809.i, label %bb4819.i
+bb4799.i: ; preds = %bb4791.i
+ br i1 false, label %bb4809.i, label %bb4819.i
+bb4803.i: ; preds = %bb4791.i
+ br label %bb4804.i
+bb4804.i: ; preds = %bb4803.i, %bb4791.i, %bb4791.i, %bb4791.i, %bb4791.i, %bb4791.i, %bb4791.i, %bb4791.i, %bb4791.i, %bb4791.i, %bb4791.i, %bb4791.i, %bb4791.i
+ br i1 false, label %bb4809.i, label %bb4819.i
+bb4809.i: ; preds = %bb4804.i, %bb4799.i, %bb4794.i
+ switch i32 %df4081.0.i, label %bb71.i.i [
+ i32 3, label %bb61.i.i
+ i32 4, label %bb.i.i
+ i32 5, label %bb.i.i
+ i32 6, label %bb.i.i
+ i32 7, label %bb.i.i
+ i32 8, label %bb38.i.i
+ i32 9, label %bb38.i.i
+ i32 10, label %bb50.i.i
+ i32 11, label %bb40.i.i
+ i32 16, label %bb38.i.i
+ ]
+bb.i.i: ; preds = %bb4809.i, %bb4809.i, %bb4809.i, %bb4809.i
+ br label %bb403.i.i
+bb38.i.i: ; preds = %bb4809.i, %bb4809.i, %bb4809.i
+ br label %bb403.i.i
+bb40.i.i: ; preds = %bb4809.i
+ br label %bb403.i.i
+bb50.i.i: ; preds = %bb4809.i
+ br label %bb403.i.i
+bb61.i.i: ; preds = %bb4809.i
+ br label %bb403.i.i
+bb71.i.i: ; preds = %bb4809.i
+ br label %bb403.i.i
+bb403.i.i: ; preds = %bb71.i.i, %bb61.i.i, %bb50.i.i, %bb40.i.i, %bb38.i.i, %bb.i.i
+ br i1 false, label %bb408.i.i, label %bb502.i.i
+bb408.i.i: ; preds = %bb403.i.i
+ br label %bb708.i.i
+bb502.i.i: ; preds = %bb403.i.i
+ br label %bb708.i.i
+bb708.i.i: ; preds = %bb502.i.i, %bb408.i.i
+ switch i32 0, label %bb758.i.i [
+ i32 0, label %bb710.i.i
+ i32 1, label %bb713.i.i
+ i32 2, label %bb718.i.i
+ i32 3, label %bb721.i.i
+ i32 4, label %bb726.i.i
+ i32 5, label %bb729.i.i
+ i32 8, label %bb732.i.i
+ i32 9, label %bb732.i.i
+ i32 10, label %bb737.i.i
+ i32 11, label %bb737.i.i
+ i32 12, label %bb742.i.i
+ i32 13, label %bb742.i.i
+ i32 14, label %bb745.i.i
+ i32 15, label %bb745.i.i
+ i32 16, label %bb750.i.i
+ i32 17, label %bb750.i.i
+ i32 18, label %bb753.i.i
+ i32 19, label %bb753.i.i
+ i32 22, label %bb750.i.i
+ i32 23, label %bb750.i.i
+ ]
+bb710.i.i: ; preds = %bb708.i.i
+ br label %bb758.i.i
+bb713.i.i: ; preds = %bb708.i.i
+ br label %bb758.i.i
+bb718.i.i: ; preds = %bb708.i.i
+ br label %bb758.i.i
+bb721.i.i: ; preds = %bb708.i.i
+ br label %bb758.i.i
+bb726.i.i: ; preds = %bb708.i.i
+ br label %bb758.i.i
+bb729.i.i: ; preds = %bb708.i.i
+ br label %bb758.i.i
+bb732.i.i: ; preds = %bb708.i.i, %bb708.i.i
+ br label %bb758.i.i
+bb737.i.i: ; preds = %bb708.i.i, %bb708.i.i
+ br label %bb758.i.i
+bb742.i.i: ; preds = %bb708.i.i, %bb708.i.i
+ br label %bb758.i.i
+bb745.i.i: ; preds = %bb708.i.i, %bb708.i.i
+ br label %bb758.i.i
+bb750.i.i: ; preds = %bb708.i.i, %bb708.i.i, %bb708.i.i, %bb708.i.i
+ br label %bb758.i.i
+bb753.i.i: ; preds = %bb708.i.i, %bb708.i.i
+ br label %bb758.i.i
+bb758.i.i: ; preds = %bb753.i.i, %bb750.i.i, %bb745.i.i, %bb742.i.i, %bb737.i.i, %bb732.i.i, %bb729.i.i, %bb726.i.i, %bb721.i.i, %bb718.i.i, %bb713.i.i, %bb710.i.i, %bb708.i.i
+ switch i32 %dt4080.0.i, label %bb808.i.i [
+ i32 0, label %bb760.i.i
+ i32 1, label %bb763.i.i
+ i32 2, label %bb768.i.i
+ i32 3, label %bb771.i.i
+ i32 4, label %bb776.i.i
+ i32 5, label %bb779.i.i
+ i32 8, label %bb782.i.i
+ i32 9, label %bb782.i.i
+ i32 10, label %bb787.i.i
+ i32 11, label %bb787.i.i
+ i32 12, label %bb792.i.i
+ i32 13, label %bb792.i.i
+ i32 14, label %bb795.i.i
+ i32 15, label %bb795.i.i
+ i32 16, label %bb800.i.i
+ i32 17, label %bb800.i.i
+ i32 18, label %bb803.i.i
+ i32 19, label %bb803.i.i
+ i32 22, label %bb800.i.i
+ i32 23, label %bb800.i.i
+ ]
+bb760.i.i: ; preds = %bb758.i.i
+ br label %bb811.i.i
+bb763.i.i: ; preds = %bb758.i.i
+ br label %bb811.i.i
+bb768.i.i: ; preds = %bb758.i.i
+ br label %bb811.i.i
+bb771.i.i: ; preds = %bb758.i.i
+ br label %bb811.i.i
+bb776.i.i: ; preds = %bb758.i.i
+ br label %bb811.i.i
+bb779.i.i: ; preds = %bb758.i.i
+ br label %bb811.i.i
+bb782.i.i: ; preds = %bb758.i.i, %bb758.i.i
+ br label %bb811.i.i
+bb787.i.i: ; preds = %bb758.i.i, %bb758.i.i
+ br label %bb811.i.i
+bb792.i.i: ; preds = %bb758.i.i, %bb758.i.i
+ br label %bb811.i.i
+bb795.i.i: ; preds = %bb758.i.i, %bb758.i.i
+ br label %bb811.i.i
+bb800.i.i: ; preds = %bb758.i.i, %bb758.i.i, %bb758.i.i, %bb758.i.i
+ br label %bb811.i.i
+bb803.i.i: ; preds = %bb758.i.i, %bb758.i.i
+ br label %bb808.i.i
+bb808.i.i: ; preds = %bb803.i.i, %bb758.i.i
+ br label %bb811.i.i
+bb811.i.i: ; preds = %bb808.i.i, %bb800.i.i, %bb795.i.i, %bb792.i.i, %bb787.i.i, %bb782.i.i, %bb779.i.i, %bb776.i.i, %bb771.i.i, %bb768.i.i, %bb763.i.i, %bb760.i.i
+ switch i32 0, label %bb928.i.i [
+ i32 0, label %bb813.i.i
+ i32 1, label %bb833.i.i
+ i32 2, label %bb813.i.i
+ i32 3, label %bb833.i.i
+ i32 4, label %bb813.i.i
+ i32 5, label %bb813.i.i
+ i32 8, label %bb872.i.i
+ i32 9, label %bb872.i.i
+ i32 10, label %bb890.i.i
+ i32 11, label %bb890.i.i
+ i32 12, label %bb813.i.i
+ i32 13, label %bb813.i.i
+ i32 14, label %bb908.i.i
+ i32 15, label %bb908.i.i
+ i32 16, label %bb813.i.i
+ i32 17, label %bb813.i.i
+ i32 18, label %bb908.i.i
+ i32 19, label %bb908.i.i
+ i32 22, label %bb813.i.i
+ i32 23, label %bb813.i.i
+ ]
+bb813.i.i: ; preds = %bb811.i.i, %bb811.i.i, %bb811.i.i, %bb811.i.i, %bb811.i.i, %bb811.i.i, %bb811.i.i, %bb811.i.i, %bb811.i.i, %bb811.i.i
+ switch i32 %dt4080.0.i, label %bb1065.i.i [
+ i32 0, label %bb930.i.i
+ i32 1, label %bb950.i.i
+ i32 2, label %bb930.i.i
+ i32 3, label %bb950.i.i
+ i32 4, label %bb989.i.i
+ i32 5, label %bb989.i.i
+ i32 8, label %bb1009.i.i
+ i32 9, label %bb1009.i.i
+ i32 10, label %bb1027.i.i
+ i32 11, label %bb1027.i.i
+ i32 12, label %bb930.i.i
+ i32 13, label %bb930.i.i
+ i32 14, label %bb1045.i.i
+ i32 15, label %bb1045.i.i
+ i32 16, label %bb930.i.i
+ i32 17, label %bb930.i.i
+ i32 18, label %bb1045.i.i
+ i32 19, label %bb1045.i.i
+ i32 22, label %bb930.i.i
+ i32 23, label %bb930.i.i
+ ]
+bb833.i.i: ; preds = %bb811.i.i, %bb811.i.i
+ switch i32 %dt4080.0.i, label %bb1065.i.i [
+ i32 0, label %bb930.i.i
+ i32 1, label %bb950.i.i
+ i32 2, label %bb930.i.i
+ i32 3, label %bb950.i.i
+ i32 4, label %bb989.i.i
+ i32 5, label %bb989.i.i
+ i32 8, label %bb1009.i.i
+ i32 9, label %bb1009.i.i
+ i32 10, label %bb1027.i.i
+ i32 11, label %bb1027.i.i
+ i32 12, label %bb930.i.i
+ i32 13, label %bb930.i.i
+ i32 14, label %bb1045.i.i
+ i32 15, label %bb1045.i.i
+ i32 16, label %bb930.i.i
+ i32 17, label %bb930.i.i
+ i32 18, label %bb1045.i.i
+ i32 19, label %bb1045.i.i
+ i32 22, label %bb930.i.i
+ i32 23, label %bb930.i.i
+ ]
+bb872.i.i: ; preds = %bb811.i.i, %bb811.i.i
+ switch i32 %dt4080.0.i, label %bb1065.i.i [
+ i32 0, label %bb930.i.i
+ i32 1, label %bb950.i.i
+ i32 2, label %bb930.i.i
+ i32 3, label %bb950.i.i
+ i32 4, label %bb989.i.i
+ i32 5, label %bb989.i.i
+ i32 8, label %bb1009.i.i
+ i32 9, label %bb1009.i.i
+ i32 10, label %bb1027.i.i
+ i32 11, label %bb1027.i.i
+ i32 12, label %bb930.i.i
+ i32 13, label %bb930.i.i
+ i32 14, label %bb1045.i.i
+ i32 15, label %bb1045.i.i
+ i32 16, label %bb930.i.i
+ i32 17, label %bb930.i.i
+ i32 18, label %bb1045.i.i
+ i32 19, label %bb1045.i.i
+ i32 22, label %bb930.i.i
+ i32 23, label %bb930.i.i
+ ]
+bb890.i.i: ; preds = %bb811.i.i, %bb811.i.i
+ switch i32 %dt4080.0.i, label %bb1065.i.i [
+ i32 0, label %bb930.i.i
+ i32 1, label %bb950.i.i
+ i32 2, label %bb930.i.i
+ i32 3, label %bb950.i.i
+ i32 4, label %bb989.i.i
+ i32 5, label %bb989.i.i
+ i32 8, label %bb1009.i.i
+ i32 9, label %bb1009.i.i
+ i32 10, label %bb1027.i.i
+ i32 11, label %bb1027.i.i
+ i32 12, label %bb930.i.i
+ i32 13, label %bb930.i.i
+ i32 14, label %bb1045.i.i
+ i32 15, label %bb1045.i.i
+ i32 16, label %bb930.i.i
+ i32 17, label %bb930.i.i
+ i32 18, label %bb1045.i.i
+ i32 19, label %bb1045.i.i
+ i32 22, label %bb930.i.i
+ i32 23, label %bb930.i.i
+ ]
+bb908.i.i: ; preds = %bb811.i.i, %bb811.i.i, %bb811.i.i, %bb811.i.i
+ br label %bb928.i.i
+bb928.i.i: ; preds = %bb908.i.i, %bb811.i.i
+ switch i32 %dt4080.0.i, label %bb1065.i.i [
+ i32 0, label %bb930.i.i
+ i32 1, label %bb950.i.i
+ i32 2, label %bb930.i.i
+ i32 3, label %bb950.i.i
+ i32 4, label %bb989.i.i
+ i32 5, label %bb989.i.i
+ i32 8, label %bb1009.i.i
+ i32 9, label %bb1009.i.i
+ i32 10, label %bb1027.i.i
+ i32 11, label %bb1027.i.i
+ i32 12, label %bb930.i.i
+ i32 13, label %bb930.i.i
+ i32 14, label %bb1045.i.i
+ i32 15, label %bb1045.i.i
+ i32 16, label %bb930.i.i
+ i32 17, label %bb930.i.i
+ i32 18, label %bb1045.i.i
+ i32 19, label %bb1045.i.i
+ i32 22, label %bb930.i.i
+ i32 23, label %bb930.i.i
+ ]
+bb930.i.i: ; preds = %bb928.i.i, %bb928.i.i, %bb928.i.i, %bb928.i.i, %bb928.i.i, %bb928.i.i, %bb928.i.i, %bb928.i.i, %bb890.i.i, %bb890.i.i, %bb890.i.i, %bb890.i.i, %bb890.i.i, %bb890.i.i, %bb890.i.i, %bb890.i.i, %bb872.i.i, %bb872.i.i, %bb872.i.i, %bb872.i.i, %bb872.i.i, %bb872.i.i, %bb872.i.i, %bb872.i.i, %bb833.i.i, %bb833.i.i, %bb833.i.i, %bb833.i.i, %bb833.i.i, %bb833.i.i, %bb833.i.i, %bb833.i.i, %bb813.i.i, %bb813.i.i, %bb813.i.i, %bb813.i.i, %bb813.i.i, %bb813.i.i, %bb813.i.i, %bb813.i.i
+ br label %bb5235.i
+bb950.i.i: ; preds = %bb928.i.i, %bb928.i.i, %bb890.i.i, %bb890.i.i, %bb872.i.i, %bb872.i.i, %bb833.i.i, %bb833.i.i, %bb813.i.i, %bb813.i.i
+ br label %bb5235.i
+bb989.i.i: ; preds = %bb928.i.i, %bb928.i.i, %bb890.i.i, %bb890.i.i, %bb872.i.i, %bb872.i.i, %bb833.i.i, %bb833.i.i, %bb813.i.i, %bb813.i.i
+ br label %bb5235.i
+bb1009.i.i: ; preds = %bb928.i.i, %bb928.i.i, %bb890.i.i, %bb890.i.i, %bb872.i.i, %bb872.i.i, %bb833.i.i, %bb833.i.i, %bb813.i.i, %bb813.i.i
+ br label %bb5235.i
+bb1027.i.i: ; preds = %bb928.i.i, %bb928.i.i, %bb890.i.i, %bb890.i.i, %bb872.i.i, %bb872.i.i, %bb833.i.i, %bb833.i.i, %bb813.i.i, %bb813.i.i
+ br label %bb5235.i
+bb1045.i.i: ; preds = %bb928.i.i, %bb928.i.i, %bb928.i.i, %bb928.i.i, %bb890.i.i, %bb890.i.i, %bb890.i.i, %bb890.i.i, %bb872.i.i, %bb872.i.i, %bb872.i.i, %bb872.i.i, %bb833.i.i, %bb833.i.i, %bb833.i.i, %bb833.i.i, %bb813.i.i, %bb813.i.i, %bb813.i.i, %bb813.i.i
+ br label %bb1065.i.i
+bb1065.i.i: ; preds = %bb1045.i.i, %bb928.i.i, %bb890.i.i, %bb872.i.i, %bb833.i.i, %bb813.i.i
+ br label %bb5235.i
+bb4819.i: ; preds = %bb4804.i, %bb4799.i, %bb4794.i
+ br i1 false, label %bb5208.i, label %bb5011.i
+bb5011.i: ; preds = %bb4819.i
+ switch i32 0, label %bb5039.i [
+ i32 10, label %bb5016.i
+ i32 3, label %bb5103.i
+ ]
+bb5016.i: ; preds = %bb5011.i
+ br i1 false, label %bb5103.i, label %bb5039.i
+bb5039.i: ; preds = %bb5016.i, %bb5011.i
+ switch i32 0, label %bb5052.i [
+ i32 3, label %bb5103.i
+ i32 10, label %bb5103.i
+ ]
+bb5052.i: ; preds = %bb5039.i
+ br i1 false, label %bb5103.i, label %bb5065.i
+bb5065.i: ; preds = %bb5052.i
+ br i1 false, label %bb5078.i, label %bb5103.i
+bb5078.i: ; preds = %bb5065.i
+ br i1 false, label %bb5103.i, label %bb5084.i
+bb5084.i: ; preds = %bb5078.i
+ br i1 false, label %bb5103.i, label %bb5090.i
+bb5090.i: ; preds = %bb5084.i
+ br i1 false, label %bb5103.i, label %bb5096.i
+bb5096.i: ; preds = %bb5090.i
+ br i1 false, label %bb5103.i, label %bb5102.i
+bb5102.i: ; preds = %bb5096.i
+ br label %bb5103.i
+bb5103.i: ; preds = %bb5102.i, %bb5096.i, %bb5090.i, %bb5084.i, %bb5078.i, %bb5065.i, %bb5052.i, %bb5039.i, %bb5039.i, %bb5016.i, %bb5011.i
+ switch i32 0, label %bb5208.i [
+ i32 0, label %bb5133.i
+ i32 2, label %bb5162.i
+ i32 4, label %bb5182.i
+ i32 10, label %bb5113.i
+ i32 11, label %bb5113.i
+ i32 12, label %bb5121.i
+ i32 13, label %bb5121.i
+ i32 14, label %bb5125.i
+ i32 15, label %bb5125.i
+ i32 16, label %bb5133.i
+ i32 17, label %bb5133.i
+ i32 18, label %bb5146.i
+ i32 19, label %bb5146.i
+ ]
+bb5113.i: ; preds = %bb5103.i, %bb5103.i
+ switch i32 %dt4080.0.i, label %bb5208.i [
+ i32 8, label %bb5115.i
+ i32 9, label %bb5115.i
+ i32 12, label %bb5117.i
+ i32 13, label %bb5117.i
+ i32 14, label %bb5119.i
+ i32 15, label %bb5119.i
+ ]
+bb5115.i: ; preds = %bb5113.i, %bb5113.i
+ br label %bb5208.i
+bb5117.i: ; preds = %bb5113.i, %bb5113.i
+ br label %bb5208.i
+bb5119.i: ; preds = %bb5113.i, %bb5113.i
+ br label %bb5208.i
+bb5121.i: ; preds = %bb5103.i, %bb5103.i
+ switch i32 %dt4080.0.i, label %bb5208.i [
+ i32 8, label %bb5123.i
+ i32 9, label %bb5123.i
+ ]
+bb5123.i: ; preds = %bb5121.i, %bb5121.i
+ br label %bb5208.i
+bb5125.i: ; preds = %bb5103.i, %bb5103.i
+ switch i32 %dt4080.0.i, label %bb5208.i [
+ i32 8, label %bb5127.i
+ i32 9, label %bb5127.i
+ i32 12, label %bb5129.i
+ i32 13, label %bb5129.i
+ ]
+bb5127.i: ; preds = %bb5125.i, %bb5125.i
+ br label %bb5208.i
+bb5129.i: ; preds = %bb5125.i, %bb5125.i
+ br label %bb5208.i
+bb5133.i: ; preds = %bb5103.i, %bb5103.i, %bb5103.i
+ switch i32 %dt4080.0.i, label %bb5208.i [
+ i32 8, label %bb5135.i
+ i32 9, label %bb5135.i
+ i32 10, label %bb5137.i
+ i32 11, label %bb5137.i
+ i32 12, label %bb5139.i
+ i32 13, label %bb5139.i
+ i32 14, label %bb5143.i
+ i32 15, label %bb5143.i
+ ]
+bb5135.i: ; preds = %bb5133.i, %bb5133.i
+ br label %bb5208.i
+bb5137.i: ; preds = %bb5133.i, %bb5133.i
+ br label %bb5208.i
+bb5139.i: ; preds = %bb5133.i, %bb5133.i
+ br label %bb5208.i
+bb5143.i: ; preds = %bb5133.i, %bb5133.i
+ br label %bb5208.i
+bb5146.i: ; preds = %bb5103.i, %bb5103.i
+ switch i32 %dt4080.0.i, label %bb5208.i [
+ i32 0, label %bb5158.i
+ i32 8, label %bb5148.i
+ i32 9, label %bb5148.i
+ i32 10, label %bb5150.i
+ i32 11, label %bb5150.i
+ i32 12, label %bb5152.i
+ i32 13, label %bb5152.i
+ i32 14, label %bb5155.i
+ i32 15, label %bb5155.i
+ i32 16, label %bb5158.i
+ i32 17, label %bb5158.i
+ ]
+bb5148.i: ; preds = %bb5146.i, %bb5146.i
+ br label %bb5208.i
+bb5150.i: ; preds = %bb5146.i, %bb5146.i
+ br label %bb5208.i
+bb5152.i: ; preds = %bb5146.i, %bb5146.i
+ br label %bb5208.i
+bb5155.i: ; preds = %bb5146.i, %bb5146.i
+ br label %bb5208.i
+bb5158.i: ; preds = %bb5146.i, %bb5146.i, %bb5146.i
+ br label %bb5208.i
+bb5162.i: ; preds = %bb5103.i
+ switch i32 %dt4080.0.i, label %bb5208.i [
+ i32 0, label %bb5175.i
+ i32 8, label %bb5164.i
+ i32 9, label %bb5164.i
+ i32 10, label %bb5166.i
+ i32 11, label %bb5166.i
+ i32 12, label %bb5168.i
+ i32 13, label %bb5168.i
+ i32 14, label %bb5172.i
+ i32 15, label %bb5172.i
+ i32 16, label %bb5175.i
+ i32 17, label %bb5175.i
+ i32 18, label %bb5179.i
+ i32 19, label %bb5179.i
+ ]
+bb5164.i: ; preds = %bb5162.i, %bb5162.i
+ br label %bb5208.i
+bb5166.i: ; preds = %bb5162.i, %bb5162.i
+ br label %bb5208.i
+bb5168.i: ; preds = %bb5162.i, %bb5162.i
+ br label %bb5208.i
+bb5172.i: ; preds = %bb5162.i, %bb5162.i
+ br label %bb5208.i
+bb5175.i: ; preds = %bb5162.i, %bb5162.i, %bb5162.i
+ br label %bb5208.i
+bb5179.i: ; preds = %bb5162.i, %bb5162.i
+ br label %bb5208.i
+bb5182.i: ; preds = %bb5103.i
+ switch i32 %dt4080.0.i, label %bb5208.i [
+ i32 0, label %bb5195.i
+ i32 2, label %bb5202.i
+ i32 8, label %bb5184.i
+ i32 9, label %bb5184.i
+ i32 10, label %bb5186.i
+ i32 11, label %bb5186.i
+ i32 12, label %bb5188.i
+ i32 13, label %bb5188.i
+ i32 14, label %bb5192.i
+ i32 15, label %bb5192.i
+ i32 16, label %bb5195.i
+ i32 17, label %bb5195.i
+ i32 18, label %bb5199.i
+ i32 19, label %bb5199.i
+ ]
+bb5184.i: ; preds = %bb5182.i, %bb5182.i
+ br label %bb5208.i
+bb5186.i: ; preds = %bb5182.i, %bb5182.i
+ br label %bb5208.i
+bb5188.i: ; preds = %bb5182.i, %bb5182.i
+ br label %bb5208.i
+bb5192.i: ; preds = %bb5182.i, %bb5182.i
+ br label %bb5208.i
+bb5195.i: ; preds = %bb5182.i, %bb5182.i, %bb5182.i
+ br label %bb5208.i
+bb5199.i: ; preds = %bb5182.i, %bb5182.i
+ br label %bb5208.i
+bb5202.i: ; preds = %bb5182.i
+ br label %bb5208.i
+bb5208.i: ; preds = %bb5202.i, %bb5199.i, %bb5195.i, %bb5192.i, %bb5188.i, %bb5186.i, %bb5184.i, %bb5182.i, %bb5179.i, %bb5175.i, %bb5172.i, %bb5168.i, %bb5166.i, %bb5164.i, %bb5162.i, %bb5158.i, %bb5155.i, %bb5152.i, %bb5150.i, %bb5148.i, %bb5146.i, %bb5143.i, %bb5139.i, %bb5137.i, %bb5135.i, %bb5133.i, %bb5129.i, %bb5127.i, %bb5125.i, %bb5123.i, %bb5121.i, %bb5119.i, %bb5117.i, %bb5115.i, %bb5113.i, %bb5103.i, %bb4819.i
+ switch i32 0, label %bb5221.i [
+ i32 0, label %bb5210.i
+ i32 1, label %bb5211.i
+ i32 2, label %bb5212.i
+ i32 3, label %bb5213.i
+ i32 4, label %bb5214.i
+ i32 5, label %bb5215.i
+ i32 6, label %bb5217.i
+ i32 7, label %bb5216.i
+ i32 12, label %bb5218.i
+ i32 13, label %bb5218.i
+ i32 14, label %bb5219.i
+ i32 15, label %bb5219.i
+ i32 16, label %bb5210.i
+ i32 17, label %bb5210.i
+ i32 22, label %bb5210.i
+ i32 23, label %bb5210.i
+ ]
+bb5210.i: ; preds = %bb5208.i, %bb5208.i, %bb5208.i, %bb5208.i, %bb5208.i
+ br label %bb5224.i
+bb5211.i: ; preds = %bb5208.i
+ br label %bb5224.i
+bb5212.i: ; preds = %bb5208.i
+ br label %bb5224.i
+bb5213.i: ; preds = %bb5208.i
+ br label %bb5224.i
+bb5214.i: ; preds = %bb5208.i
+ br label %bb5224.i
+bb5215.i: ; preds = %bb5208.i
+ br label %bb5224.i
+bb5216.i: ; preds = %bb5208.i
+ br label %bb5224.i
+bb5217.i: ; preds = %bb5208.i
+ br label %bb5224.i
+bb5218.i: ; preds = %bb5208.i, %bb5208.i
+ br label %bb5224.i
+bb5219.i: ; preds = %bb5208.i, %bb5208.i
+ br label %bb5224.i
+bb5221.i: ; preds = %bb5208.i
+ br label %bb5224.i
+bb5224.i: ; preds = %bb5221.i, %bb5219.i, %bb5218.i, %bb5217.i, %bb5216.i, %bb5215.i, %bb5214.i, %bb5213.i, %bb5212.i, %bb5211.i, %bb5210.i
+ br label %bb5235.i
+bb5235.i: ; preds = %bb5224.i, %bb1065.i.i, %bb1027.i.i, %bb1009.i.i, %bb989.i.i, %bb950.i.i, %bb930.i.i
+ br label %bb5272.i
+bb5272.i: ; preds = %bb5235.i
+ br label %bb5276.i
+bb5276.i: ; preds = %bb19808.i, %bb5272.i
+ br label %bb16607.i
+bb5295.i: ; preds = %bb5295.preheader.i, %storeVecColor_RGB_UI.exit
+ br label %loadVecColor_BGRA_UI8888R.exit
+loadVecColor_BGRA_UI8888R.exit: ; preds = %bb5295.i
+ br i1 false, label %bb5325.i, label %bb5351.i
+bb5325.i: ; preds = %loadVecColor_BGRA_UI8888R.exit
+ br i1 false, label %bb4527.i, label %bb.i
+bb.i: ; preds = %bb5325.i
+ switch i32 0, label %bb4527.i [
+ i32 4, label %bb4362.i
+ i32 8, label %bb4448.i
+ ]
+bb4362.i: ; preds = %bb.i
+ br i1 false, label %bb4532.i, label %bb5556.i
+bb4448.i: ; preds = %bb.i
+ br label %bb4527.i
+bb4527.i: ; preds = %bb4448.i, %bb.i, %bb5325.i
+ br i1 false, label %bb4532.i, label %bb5556.i
+bb4532.i: ; preds = %bb4527.i, %bb4362.i
+ switch i32 0, label %bb4997.i [
+ i32 6, label %bb4534.i
+ i32 7, label %bb4982.i
+ ]
+bb4534.i: ; preds = %bb4532.i
+ br i1 false, label %bb4875.i, label %bb4619.i
+bb4619.i: ; preds = %bb4534.i
+ br i1 false, label %bb4875.i, label %bb4663.i
+bb4663.i: ; preds = %bb4619.i
+ br label %bb4855.i
+bb4759.i: ; preds = %bb4855.i
+ br label %bb4855.i
+bb4855.i: ; preds = %bb4759.i, %bb4663.i
+ br i1 false, label %bb4866.i, label %bb4759.i
+bb4866.i: ; preds = %bb4855.i
+ br label %bb4875.i
+bb4875.i: ; preds = %bb4866.i, %bb4619.i, %bb4534.i
+ br i1 false, label %bb4973.i, label %bb4922.i
+bb4922.i: ; preds = %bb4875.i
+ br label %bb4973.i
+bb4973.i: ; preds = %bb4922.i, %bb4875.i
+ br label %bb4982.i
+bb4982.i: ; preds = %bb4973.i, %bb4532.i
+ br label %bb5041.i
+bb4997.i: ; preds = %bb4532.i
+ br label %bb5041.i
+bb5041.i: ; preds = %bb4997.i, %bb4982.i
+ switch i32 0, label %bb5464.i [
+ i32 0, label %bb5344.i
+ i32 1, label %bb5374.i
+ i32 2, label %bb5404.i
+ i32 3, label %bb5434.i
+ i32 11, label %bb5263.i
+ ]
+bb5263.i: ; preds = %bb5041.i
+ br i1 false, label %bb12038.i, label %bb5467.i
+bb5344.i: ; preds = %bb5041.i
+ br i1 false, label %bb12038.i, label %bb5467.i
+bb5374.i: ; preds = %bb5041.i
+ br i1 false, label %bb12038.i, label %bb5467.i
+bb5404.i: ; preds = %bb5041.i
+ br i1 false, label %bb12038.i, label %bb5467.i
+bb5434.i: ; preds = %bb5041.i
+ br label %bb5464.i
+bb5464.i: ; preds = %bb5434.i, %bb5041.i
+ br i1 false, label %bb12038.i, label %bb5467.i
+bb5467.i: ; preds = %bb5464.i, %bb5404.i, %bb5374.i, %bb5344.i, %bb5263.i
+ switch i32 0, label %bb15866.i [
+ i32 3, label %bb13016.i
+ i32 4, label %bb12040.i
+ i32 8, label %bb12514.i
+ i32 10, label %bb12903.i
+ i32 11, label %bb12553.i
+ i32 16, label %bb12514.i
+ ]
+bb5556.i: ; preds = %bb4527.i, %bb4362.i
+ switch i32 0, label %bb8990.i [
+ i32 3, label %bb6403.i
+ i32 4, label %bb6924.i
+ i32 8, label %bb6924.i
+ i32 10, label %bb6403.i
+ i32 11, label %bb5882.i
+ i32 16, label %bb5558.i
+ ]
+bb5558.i: ; preds = %bb5556.i
+ br label %bb8990.i
+bb5882.i: ; preds = %bb5556.i
+ switch i32 0, label %bb6387.i [
+ i32 1, label %bb6332.i
+ i32 3, label %bb6332.i
+ i32 4, label %bb6352.i
+ i32 6, label %bb5884.i
+ i32 7, label %bb8990.i
+ ]
+bb5884.i: ; preds = %bb5882.i
+ br i1 false, label %bb6225.i, label %bb5969.i
+bb5969.i: ; preds = %bb5884.i
+ br i1 false, label %bb6225.i, label %bb6013.i
+bb6013.i: ; preds = %bb5969.i
+ br label %bb6205.i
+bb6109.i: ; preds = %bb6205.i
+ br label %bb6205.i
+bb6205.i: ; preds = %bb6109.i, %bb6013.i
+ br i1 false, label %bb6216.i, label %bb6109.i
+bb6216.i: ; preds = %bb6205.i
+ br label %bb6225.i
+bb6225.i: ; preds = %bb6216.i, %bb5969.i, %bb5884.i
+ br i1 false, label %bb6323.i, label %bb6272.i
+bb6272.i: ; preds = %bb6225.i
+ switch i32 0, label %bb6908.i [
+ i32 1, label %bb6853.i48
+ i32 3, label %bb6853.i48
+ i32 4, label %bb6873.i
+ i32 6, label %bb6405.i
+ i32 7, label %bb8990.i
+ ]
+bb6323.i: ; preds = %bb6225.i
+ switch i32 0, label %bb6908.i [
+ i32 1, label %bb6853.i48
+ i32 3, label %bb6853.i48
+ i32 4, label %bb6873.i
+ i32 6, label %bb6405.i
+ i32 7, label %bb8990.i
+ ]
+bb6332.i: ; preds = %bb5882.i, %bb5882.i
+ switch i32 0, label %bb6908.i [
+ i32 1, label %bb6853.i48
+ i32 3, label %bb6853.i48
+ i32 4, label %bb6873.i
+ i32 6, label %bb6405.i
+ i32 7, label %bb8990.i
+ ]
+bb6352.i: ; preds = %bb5882.i
+ br label %bb6873.i
+bb6387.i: ; preds = %bb5882.i
+ br label %bb6403.i
+bb6403.i: ; preds = %bb6387.i, %bb5556.i, %bb5556.i
+ switch i32 0, label %bb6908.i [
+ i32 1, label %bb6853.i48
+ i32 3, label %bb6853.i48
+ i32 4, label %bb6873.i
+ i32 6, label %bb6405.i
+ i32 7, label %bb8990.i
+ ]
+bb6405.i: ; preds = %bb6403.i, %bb6332.i, %bb6323.i, %bb6272.i
+ br i1 false, label %bb6746.i, label %bb6490.i
+bb6490.i: ; preds = %bb6405.i
+ br i1 false, label %bb6746.i, label %bb6534.i
+bb6534.i: ; preds = %bb6490.i
+ br label %bb6726.i
+bb6630.i: ; preds = %bb6726.i
+ br label %bb6726.i
+bb6726.i: ; preds = %bb6630.i, %bb6534.i
+ br i1 false, label %bb6737.i, label %bb6630.i
+bb6737.i: ; preds = %bb6726.i
+ br label %bb6746.i
+bb6746.i: ; preds = %bb6737.i, %bb6490.i, %bb6405.i
+ br i1 false, label %bb6844.i, label %bb6793.i
+bb6793.i: ; preds = %bb6746.i
+ br label %bb8990.i
+bb6844.i: ; preds = %bb6746.i
+ br label %bb8990.i
+bb6853.i48: ; preds = %bb6403.i, %bb6403.i, %bb6332.i, %bb6332.i, %bb6323.i, %bb6323.i, %bb6272.i, %bb6272.i
+ br label %bb8990.i
+bb6873.i: ; preds = %bb6403.i, %bb6352.i, %bb6332.i, %bb6323.i, %bb6272.i
+ br label %bb8990.i
+bb6908.i: ; preds = %bb6403.i, %bb6332.i, %bb6323.i, %bb6272.i
+ br label %bb8990.i
+bb6924.i: ; preds = %bb5556.i, %bb5556.i
+ switch i32 0, label %bb8929.i [
+ i32 1, label %bb8715.i
+ i32 3, label %bb8715.i
+ i32 4, label %bb8792.i
+ i32 6, label %bb6926.i
+ i32 7, label %bb8990.i
+ ]
+bb6926.i: ; preds = %bb6924.i
+ br i1 false, label %bb7267.i, label %bb7011.i
+bb7011.i: ; preds = %bb6926.i
+ br i1 false, label %bb7267.i, label %bb7055.i
+bb7055.i: ; preds = %bb7011.i
+ br label %bb7247.i
+bb7151.i: ; preds = %bb7247.i
+ br label %bb7247.i
+bb7247.i: ; preds = %bb7151.i, %bb7055.i
+ br i1 false, label %bb7258.i, label %bb7151.i
+bb7258.i: ; preds = %bb7247.i
+ br label %bb7267.i
+bb7267.i: ; preds = %bb7258.i, %bb7011.i, %bb6926.i
+ br i1 false, label %bb7365.i, label %bb7314.i
+bb7314.i: ; preds = %bb7267.i
+ br label %bb7365.i
+bb7365.i: ; preds = %bb7314.i, %bb7267.i
+ br i1 false, label %bb7714.i, label %bb7458.i
+bb7458.i: ; preds = %bb7365.i
+ br i1 false, label %bb7714.i, label %bb7502.i
+bb7502.i: ; preds = %bb7458.i
+ br label %bb7694.i
+bb7598.i: ; preds = %bb7694.i
+ br label %bb7694.i
+bb7694.i: ; preds = %bb7598.i, %bb7502.i
+ br i1 false, label %bb7705.i, label %bb7598.i
+bb7705.i: ; preds = %bb7694.i
+ br label %bb7714.i
+bb7714.i: ; preds = %bb7705.i, %bb7458.i, %bb7365.i
+ br i1 false, label %bb7812.i, label %bb7761.i
+bb7761.i: ; preds = %bb7714.i
+ br label %bb7812.i
+bb7812.i: ; preds = %bb7761.i, %bb7714.i
+ br i1 false, label %bb8161.i, label %bb7905.i
+bb7905.i: ; preds = %bb7812.i
+ br i1 false, label %bb8161.i, label %bb7949.i
+bb7949.i: ; preds = %bb7905.i
+ br label %bb8141.i
+bb8045.i: ; preds = %bb8141.i
+ br label %bb8141.i
+bb8141.i: ; preds = %bb8045.i, %bb7949.i
+ br i1 false, label %bb8152.i, label %bb8045.i
+bb8152.i: ; preds = %bb8141.i
+ br label %bb8161.i
+bb8161.i: ; preds = %bb8152.i, %bb7905.i, %bb7812.i
+ br i1 false, label %bb8259.i, label %bb8208.i
+bb8208.i: ; preds = %bb8161.i
+ br label %bb8259.i
+bb8259.i: ; preds = %bb8208.i, %bb8161.i
+ br i1 false, label %bb8608.i, label %bb8352.i
+bb8352.i: ; preds = %bb8259.i
+ br i1 false, label %bb8608.i, label %bb8396.i
+bb8396.i: ; preds = %bb8352.i
+ br label %bb8588.i63
+bb8492.i: ; preds = %bb8588.i63
+ br label %bb8588.i63
+bb8588.i63: ; preds = %bb8492.i, %bb8396.i
+ br i1 false, label %bb8599.i, label %bb8492.i
+bb8599.i: ; preds = %bb8588.i63
+ br label %bb8608.i
+bb8608.i: ; preds = %bb8599.i, %bb8352.i, %bb8259.i
+ br i1 false, label %bb8706.i, label %bb8655.i
+bb8655.i: ; preds = %bb8608.i
+ br label %bb8990.i
+bb8706.i: ; preds = %bb8608.i
+ br label %bb8990.i
+bb8715.i: ; preds = %bb6924.i, %bb6924.i
+ br label %bb8990.i
+bb8792.i: ; preds = %bb6924.i
+ br label %bb8990.i
+bb8929.i: ; preds = %bb6924.i
+ br label %bb8990.i
+bb8990.i: ; preds = %bb8929.i, %bb8792.i, %bb8715.i, %bb8706.i, %bb8655.i, %bb6924.i, %bb6908.i, %bb6873.i, %bb6853.i48, %bb6844.i, %bb6793.i, %bb6403.i, %bb6332.i, %bb6323.i, %bb6272.i, %bb5882.i, %bb5558.i, %bb5556.i
+ switch i32 %sf4083.0.i, label %bb11184.i [
+ i32 0, label %bb10372.i
+ i32 1, label %bb10609.i
+ i32 2, label %bb10811.i
+ i32 3, label %bb11013.i
+ i32 4, label %bb8992.i
+ i32 5, label %bb8992.i
+ i32 6, label %bb8992.i
+ i32 7, label %bb8992.i
+ i32 8, label %bb9195.i
+ i32 9, label %bb9195.i
+ i32 10, label %bb9965.i
+ i32 11, label %bb9585.i
+ i32 16, label %bb9195.i
+ ]
+bb8992.i: ; preds = %bb8990.i, %bb8990.i, %bb8990.i, %bb8990.i
+ switch i32 0, label %bb11184.i [
+ i32 0, label %bb9075.i
+ i32 1, label %bb9105.i
+ i32 2, label %bb9135.i
+ i32 3, label %bb9165.i
+ i32 11, label %bb8994.i
+ ]
+bb8994.i: ; preds = %bb8992.i
+ br label %bb11247.i
+bb9075.i: ; preds = %bb8992.i
+ br label %bb11247.i
+bb9105.i: ; preds = %bb8992.i
+ br label %bb11247.i
+bb9135.i: ; preds = %bb8992.i
+ br label %bb11247.i
+bb9165.i: ; preds = %bb8992.i
+ br label %bb11247.i
+bb9195.i: ; preds = %bb8990.i, %bb8990.i, %bb8990.i
+ switch i32 0, label %bb11184.i [
+ i32 0, label %bb9491.i
+ i32 1, label %bb9521.i
+ i32 2, label %bb9551.i
+ i32 3, label %bb9581.i
+ i32 4, label %bb9197.i
+ i32 11, label %bb9342.i
+ ]
+bb9197.i: ; preds = %bb9195.i
+ br label %bb11247.i
+bb9342.i: ; preds = %bb9195.i
+ br label %bb11247.i
+bb9491.i: ; preds = %bb9195.i
+ br label %bb11247.i
+bb9521.i: ; preds = %bb9195.i
+ br label %bb11247.i
+bb9551.i: ; preds = %bb9195.i
+ br label %bb11247.i
+bb9581.i: ; preds = %bb9195.i
+ br label %bb11247.i
+bb9585.i: ; preds = %bb8990.i
+ switch i32 0, label %bb11184.i [
+ i32 0, label %bb9879.i
+ i32 1, label %bb9920.i
+ i32 2, label %bb9920.i
+ i32 3, label %bb9924.i
+ i32 4, label %bb9587.i
+ i32 8, label %bb9587.i
+ ]
+bb9587.i: ; preds = %bb9585.i, %bb9585.i
+ br label %bb11247.i
+bb9879.i: ; preds = %bb9585.i
+ br label %bb11247.i
+bb9920.i: ; preds = %bb9585.i, %bb9585.i
+ br label %bb11247.i
+bb9924.i: ; preds = %bb9585.i
+ br label %bb11247.i
+bb9965.i: ; preds = %bb8990.i
+ switch i32 0, label %bb11184.i [
+ i32 1, label %bb10368.i
+ i32 2, label %bb10368.i
+ i32 3, label %bb10364.i
+ i32 4, label %bb9967.i
+ i32 8, label %bb10127.i
+ i32 11, label %bb10287.i
+ ]
+bb9967.i: ; preds = %bb9965.i
+ br label %bb11247.i
+bb10127.i: ; preds = %bb9965.i
+ br label %bb11247.i
+bb10287.i: ; preds = %bb9965.i
+ br label %bb11247.i
+bb10364.i: ; preds = %bb9965.i
+ br label %bb11247.i
+bb10368.i: ; preds = %bb9965.i, %bb9965.i
+ br label %bb11247.i
+bb10372.i: ; preds = %bb8990.i
+ switch i32 0, label %bb11184.i [
+ i32 1, label %bb10605.i
+ i32 2, label %bb10605.i
+ i32 3, label %bb10601.i
+ i32 4, label %bb10374.i
+ i32 8, label %bb10449.i
+ i32 11, label %bb10524.i
+ ]
+bb10374.i: ; preds = %bb10372.i
+ br label %bb11247.i
+bb10449.i: ; preds = %bb10372.i
+ br label %bb11247.i
+bb10524.i: ; preds = %bb10372.i
+ br label %bb11247.i
+bb10601.i: ; preds = %bb10372.i
+ br label %bb11247.i
+bb10605.i: ; preds = %bb10372.i, %bb10372.i
+ br label %bb11247.i
+bb10609.i: ; preds = %bb8990.i
+ switch i32 0, label %bb11184.i [
+ i32 0, label %bb10807.i
+ i32 2, label %bb10807.i
+ i32 3, label %bb10803.i
+ i32 4, label %bb10611.i
+ i32 8, label %bb10686.i
+ i32 11, label %bb10761.i
+ ]
+bb10611.i: ; preds = %bb10609.i
+ br label %bb11247.i
+bb10686.i: ; preds = %bb10609.i
+ br label %bb11247.i
+bb10761.i: ; preds = %bb10609.i
+ br label %bb11247.i
+bb10803.i: ; preds = %bb10609.i
+ br label %bb11247.i
+bb10807.i: ; preds = %bb10609.i, %bb10609.i
+ br label %bb11247.i
+bb10811.i: ; preds = %bb8990.i
+ switch i32 0, label %bb11184.i [
+ i32 0, label %bb11009.i
+ i32 1, label %bb11009.i
+ i32 3, label %bb11005.i
+ i32 4, label %bb10813.i
+ i32 8, label %bb10888.i
+ i32 11, label %bb10963.i
+ ]
+bb10813.i: ; preds = %bb10811.i
+ br label %bb11247.i
+bb10888.i: ; preds = %bb10811.i
+ br label %bb11247.i
+bb10963.i: ; preds = %bb10811.i
+ br label %bb11247.i
+bb11005.i: ; preds = %bb10811.i
+ br label %bb11247.i
+bb11009.i: ; preds = %bb10811.i, %bb10811.i
+ br label %bb11247.i
+bb11013.i: ; preds = %bb8990.i
+ switch i32 0, label %bb11184.i [
+ i32 0, label %bb11180.i
+ i32 1, label %bb11180.i
+ i32 2, label %bb11180.i
+ i32 4, label %bb11015.i
+ i32 8, label %bb11090.i
+ i32 11, label %bb11103.i
+ ]
+bb11015.i: ; preds = %bb11013.i
+ br label %bb11247.i
+bb11090.i: ; preds = %bb11013.i
+ br label %bb11247.i
+bb11103.i: ; preds = %bb11013.i
+ br label %bb11247.i
+bb11180.i: ; preds = %bb11013.i, %bb11013.i, %bb11013.i
+ br label %bb11184.i
+bb11184.i: ; preds = %bb11180.i, %bb11013.i, %bb10811.i, %bb10609.i, %bb10372.i, %bb9965.i, %bb9585.i, %bb9195.i, %bb8992.i, %bb8990.i
+ br label %bb11247.i
+bb11247.i: ; preds = %bb11184.i, %bb11103.i, %bb11090.i, %bb11015.i, %bb11009.i, %bb11005.i, %bb10963.i, %bb10888.i, %bb10813.i, %bb10807.i, %bb10803.i, %bb10761.i, %bb10686.i, %bb10611.i, %bb10605.i, %bb10601.i, %bb10524.i, %bb10449.i, %bb10374.i, %bb10368.i, %bb10364.i, %bb10287.i, %bb10127.i, %bb9967.i, %bb9924.i, %bb9920.i, %bb9879.i, %bb9587.i, %bb9581.i, %bb9551.i, %bb9521.i, %bb9491.i, %bb9342.i, %bb9197.i, %bb9165.i, %bb9135.i, %bb9105.i, %bb9075.i, %bb8994.i
+ br i1 false, label %bb11250.i, label %bb11256.i
+bb11250.i: ; preds = %bb11247.i
+ br label %bb11378.i
+bb11256.i: ; preds = %bb11247.i
+ switch i32 0, label %bb11348.i [
+ i32 4, label %bb11258.i
+ i32 8, label %bb11258.i
+ i32 11, label %bb11318.i
+ ]
+bb11258.i: ; preds = %bb11256.i, %bb11256.i
+ br i1 false, label %bb11273.i, label %bb11261.i
+bb11261.i: ; preds = %bb11258.i
+ br label %bb11273.i
+bb11273.i: ; preds = %bb11261.i, %bb11258.i
+ br i1 false, label %bb11288.i, label %bb11276.i
+bb11276.i: ; preds = %bb11273.i
+ br label %bb11288.i
+bb11288.i: ; preds = %bb11276.i, %bb11273.i
+ br i1 false, label %bb11303.i, label %bb11291.i
+bb11291.i: ; preds = %bb11288.i
+ br label %bb11303.i
+bb11303.i: ; preds = %bb11291.i, %bb11288.i
+ br i1 false, label %bb11318.i, label %bb11306.i
+bb11306.i: ; preds = %bb11303.i
+ br label %bb11318.i
+bb11318.i: ; preds = %bb11306.i, %bb11303.i, %bb11256.i
+ br i1 false, label %bb11333.i, label %bb11321.i
+bb11321.i: ; preds = %bb11318.i
+ br label %bb11333.i
+bb11333.i: ; preds = %bb11321.i, %bb11318.i
+ br i1 false, label %bb11348.i, label %bb11336.i
+bb11336.i: ; preds = %bb11333.i
+ br label %bb11348.i
+bb11348.i: ; preds = %bb11336.i, %bb11333.i, %bb11256.i
+ br i1 false, label %bb11363.i, label %bb11351.i
+bb11351.i: ; preds = %bb11348.i
+ br label %bb11363.i
+bb11363.i: ; preds = %bb11351.i, %bb11348.i
+ br i1 false, label %bb11378.i, label %bb11366.i
+bb11366.i: ; preds = %bb11363.i
+ br label %bb11378.i
+bb11378.i: ; preds = %bb11366.i, %bb11363.i, %bb11250.i
+ br label %bb12038.i
+bb12038.i: ; preds = %bb11378.i, %bb5464.i, %bb5404.i, %bb5374.i, %bb5344.i, %bb5263.i
+ switch i32 0, label %bb15866.i [
+ i32 3, label %bb13016.i
+ i32 4, label %bb12040.i
+ i32 8, label %bb12514.i
+ i32 10, label %bb12903.i
+ i32 11, label %bb12553.i
+ i32 16, label %bb12514.i
+ ]
+bb12040.i: ; preds = %bb12038.i, %bb5467.i
+ br label %bb13026.i
+bb12514.i: ; preds = %bb12038.i, %bb12038.i, %bb5467.i, %bb5467.i
+ br label %bb13026.i
+bb12553.i: ; preds = %bb12038.i, %bb5467.i
+ br i1 false, label %bb12558.i, label %bb12747.i
+bb12558.i: ; preds = %bb12553.i
+ br i1 false, label %bb12666.i, label %bb12654.i
+bb12654.i: ; preds = %bb12558.i
+ br label %bb12666.i
+bb12666.i: ; preds = %bb12654.i, %bb12558.i
+ br label %bb12747.i
+bb12747.i: ; preds = %bb12666.i, %bb12553.i
+ br label %bb13026.i
+bb12903.i: ; preds = %bb12038.i, %bb5467.i
+ br i1 false, label %bb12908.i, label %bb13026.i
+bb12908.i: ; preds = %bb12903.i
+ br i1 false, label %bb13026.i, label %bb13004.i
+bb13004.i: ; preds = %bb12908.i
+ switch i32 0, label %bb15866.i [
+ i32 3, label %bb13752.i
+ i32 4, label %bb14197.i
+ i32 8, label %bb14197.i
+ i32 10, label %bb13752.i
+ i32 11, label %bb13307.i
+ i32 16, label %bb13028.i
+ ]
+bb13016.i: ; preds = %bb12038.i, %bb5467.i
+ br label %bb13026.i
+bb13026.i: ; preds = %bb13016.i, %bb12908.i, %bb12903.i, %bb12747.i, %bb12514.i, %bb12040.i
+ switch i32 0, label %bb15866.i [
+ i32 3, label %bb13752.i
+ i32 4, label %bb14197.i
+ i32 8, label %bb14197.i
+ i32 10, label %bb13752.i
+ i32 11, label %bb13307.i
+ i32 16, label %bb13028.i
+ ]
+bb13028.i: ; preds = %bb13026.i, %bb13004.i
+ br i1 false, label %UnifiedReturnBlock.i177, label %bb15869.i
+bb13307.i: ; preds = %bb13026.i, %bb13004.i
+ switch i32 %dt4080.0.i, label %bb13736.i [
+ i32 6, label %bb13312.i
+ i32 1, label %bb13624.i
+ i32 3, label %bb13624.i
+ i32 5, label %bb13649.i
+ i32 4, label %bb13688.i
+ i32 7, label %bb15866.i
+ ]
+bb13312.i: ; preds = %bb13307.i
+ br i1 false, label %bb13483.i, label %bb13400.i
+bb13400.i: ; preds = %bb13312.i
+ br label %bb13483.i
+bb13483.i: ; preds = %bb13400.i, %bb13312.i
+ br i1 false, label %bb13593.i, label %bb13505.i
+bb13505.i: ; preds = %bb13483.i
+ switch i32 %dt4080.0.i, label %bb14181.i [
+ i32 6, label %bb13757.i
+ i32 1, label %bb14069.i
+ i32 3, label %bb14069.i
+ i32 5, label %bb14094.i
+ i32 4, label %bb14133.i
+ i32 7, label %bb15866.i
+ ]
+bb13593.i: ; preds = %bb13483.i
+ switch i32 %dt4080.0.i, label %bb14181.i [
+ i32 6, label %bb13757.i
+ i32 1, label %bb14069.i
+ i32 3, label %bb14069.i
+ i32 5, label %bb14094.i
+ i32 4, label %bb14133.i
+ i32 7, label %bb15866.i
+ ]
+bb13624.i: ; preds = %bb13307.i, %bb13307.i
+ switch i32 %dt4080.0.i, label %bb14181.i [
+ i32 6, label %bb13757.i
+ i32 1, label %bb14069.i
+ i32 3, label %bb14069.i
+ i32 5, label %bb14094.i
+ i32 4, label %bb14133.i
+ i32 7, label %bb15866.i
+ ]
+bb13649.i: ; preds = %bb13307.i
+ br label %bb14094.i
+bb13688.i: ; preds = %bb13307.i
+ br label %bb14133.i
+bb13736.i: ; preds = %bb13307.i
+ br label %bb13752.i
+bb13752.i: ; preds = %bb13736.i, %bb13026.i, %bb13026.i, %bb13004.i, %bb13004.i
+ switch i32 %dt4080.0.i, label %bb14181.i [
+ i32 6, label %bb13757.i
+ i32 1, label %bb14069.i
+ i32 3, label %bb14069.i
+ i32 5, label %bb14094.i
+ i32 4, label %bb14133.i
+ i32 7, label %bb15866.i
+ ]
+bb13757.i: ; preds = %bb13752.i, %bb13624.i, %bb13593.i, %bb13505.i
+ br i1 false, label %bb13928.i, label %bb13845.i
+bb13845.i: ; preds = %bb13757.i
+ br label %bb13928.i
+bb13928.i: ; preds = %bb13845.i, %bb13757.i
+ br i1 false, label %bb14038.i, label %bb13950.i
+bb13950.i: ; preds = %bb13928.i
+ br i1 false, label %UnifiedReturnBlock.i177, label %bb15869.i
+bb14038.i: ; preds = %bb13928.i
+ br i1 false, label %UnifiedReturnBlock.i177, label %bb15869.i
+bb14069.i: ; preds = %bb13752.i, %bb13752.i, %bb13624.i, %bb13624.i, %bb13593.i, %bb13593.i, %bb13505.i, %bb13505.i
+ br i1 false, label %UnifiedReturnBlock.i177, label %bb15869.i
+bb14094.i: ; preds = %bb13752.i, %bb13649.i, %bb13624.i, %bb13593.i, %bb13505.i
+ br i1 false, label %UnifiedReturnBlock.i177, label %bb15869.i
+bb14133.i: ; preds = %bb13752.i, %bb13688.i, %bb13624.i, %bb13593.i, %bb13505.i
+ br i1 false, label %UnifiedReturnBlock.i177, label %bb15869.i
+bb14181.i: ; preds = %bb13752.i, %bb13624.i, %bb13593.i, %bb13505.i
+ br i1 false, label %UnifiedReturnBlock.i177, label %bb15869.i
+bb14197.i: ; preds = %bb13026.i, %bb13026.i, %bb13004.i, %bb13004.i
+ switch i32 %dt4080.0.i, label %bb15805.i [
+ i32 6, label %bb14202.i
+ i32 1, label %bb15411.i
+ i32 3, label %bb15411.i
+ i32 5, label %bb15493.i
+ i32 4, label %bb15631.i
+ i32 7, label %bb15866.i
+ ]
+bb14202.i: ; preds = %bb14197.i
+ br i1 false, label %bb14373.i, label %bb14290.i
+bb14290.i: ; preds = %bb14202.i
+ br label %bb14373.i
+bb14373.i: ; preds = %bb14290.i, %bb14202.i
+ br i1 false, label %bb14483.i, label %bb14395.i
+bb14395.i: ; preds = %bb14373.i
+ br label %bb14483.i
+bb14483.i: ; preds = %bb14395.i, %bb14373.i
+ br i1 false, label %bb14672.i, label %bb14589.i
+bb14589.i: ; preds = %bb14483.i
+ br label %bb14672.i
+bb14672.i: ; preds = %bb14589.i, %bb14483.i
+ br i1 false, label %bb14782.i, label %bb14694.i
+bb14694.i: ; preds = %bb14672.i
+ br label %bb14782.i
+bb14782.i: ; preds = %bb14694.i, %bb14672.i
+ br i1 false, label %bb14971.i, label %bb14888.i
+bb14888.i: ; preds = %bb14782.i
+ br label %bb14971.i
+bb14971.i: ; preds = %bb14888.i, %bb14782.i
+ br i1 false, label %bb15081.i, label %bb14993.i
+bb14993.i: ; preds = %bb14971.i
+ br label %bb15081.i
+bb15081.i: ; preds = %bb14993.i, %bb14971.i
+ br i1 false, label %bb15270.i, label %bb15187.i
+bb15187.i: ; preds = %bb15081.i
+ br label %bb15270.i
+bb15270.i: ; preds = %bb15187.i, %bb15081.i
+ br i1 false, label %bb15380.i, label %bb15292.i
+bb15292.i: ; preds = %bb15270.i
+ br i1 false, label %UnifiedReturnBlock.i177, label %bb15869.i
+bb15380.i: ; preds = %bb15270.i
+ br i1 false, label %UnifiedReturnBlock.i177, label %bb15869.i
+bb15411.i: ; preds = %bb14197.i, %bb14197.i
+ br i1 false, label %UnifiedReturnBlock.i177, label %bb15869.i
+bb15493.i: ; preds = %bb14197.i
+ br i1 false, label %UnifiedReturnBlock.i177, label %bb15869.i
+bb15631.i: ; preds = %bb14197.i
+ br i1 false, label %UnifiedReturnBlock.i177, label %bb15869.i
+bb15805.i: ; preds = %bb14197.i
+ br label %bb15866.i
+bb15866.i: ; preds = %bb15805.i, %bb14197.i, %bb13752.i, %bb13624.i, %bb13593.i, %bb13505.i, %bb13307.i, %bb13026.i, %bb13004.i, %bb12038.i, %bb5467.i
+ br i1 false, label %UnifiedReturnBlock.i177, label %bb15869.i
+bb15869.i: ; preds = %bb15866.i, %bb15631.i, %bb15493.i, %bb15411.i, %bb15380.i, %bb15292.i, %bb14181.i, %bb14133.i, %bb14094.i, %bb14069.i, %bb14038.i, %bb13950.i, %bb13028.i
+ switch i32 0, label %UnifiedReturnBlock.i177 [
+ i32 4, label %bb15874.i
+ i32 8, label %bb15960.i
+ ]
+bb15874.i: ; preds = %bb15869.i
+ br label %glgVectorFloatConversion.exit
+bb15960.i: ; preds = %bb15869.i
+ br label %glgVectorFloatConversion.exit
+UnifiedReturnBlock.i177: ; preds = %bb15869.i, %bb15866.i, %bb15631.i, %bb15493.i, %bb15411.i, %bb15380.i, %bb15292.i, %bb14181.i, %bb14133.i, %bb14094.i, %bb14069.i, %bb14038.i, %bb13950.i, %bb13028.i
+ br label %glgVectorFloatConversion.exit
+glgVectorFloatConversion.exit: ; preds = %UnifiedReturnBlock.i177, %bb15960.i, %bb15874.i
+ br label %bb16581.i
+bb5351.i: ; preds = %loadVecColor_BGRA_UI8888R.exit
+ br i1 false, label %bb5359.i, label %bb5586.i
+bb5359.i: ; preds = %bb5351.i
+ switch i32 0, label %bb5586.i [
+ i32 0, label %bb5361.i
+ i32 1, label %bb5511.i
+ i32 2, label %bb5511.i
+ ]
+bb5361.i: ; preds = %bb5359.i
+ br i1 false, label %bb5366.i, label %bb5379.i
+bb5366.i: ; preds = %bb5361.i
+ br label %bb7230.i
+bb5379.i: ; preds = %bb5361.i
+ switch i32 %sf4083.0.i, label %bb5415.i [
+ i32 1, label %bb5384.i
+ i32 2, label %bb5402.i
+ ]
+bb5384.i: ; preds = %bb5379.i
+ switch i32 0, label %bb7230.i [
+ i32 4, label %bb5445.i
+ i32 8, label %bb5445.i
+ i32 11, label %bb5445.i
+ ]
+bb5402.i: ; preds = %bb5379.i
+ switch i32 0, label %bb7230.i [
+ i32 4, label %bb5445.i
+ i32 8, label %bb5445.i
+ i32 11, label %bb5445.i
+ ]
+bb5415.i: ; preds = %bb5379.i
+ switch i32 0, label %bb7230.i [
+ i32 4, label %bb5445.i
+ i32 8, label %bb5445.i
+ i32 11, label %bb5445.i
+ ]
+bb5445.i: ; preds = %bb5415.i, %bb5415.i, %bb5415.i, %bb5402.i, %bb5402.i, %bb5402.i, %bb5384.i, %bb5384.i, %bb5384.i
+ switch i32 0, label %bb7230.i [
+ i32 4, label %bb5470.i
+ i32 8, label %bb5470.i
+ i32 11, label %bb6853.i
+ ]
+bb5470.i: ; preds = %bb5445.i, %bb5445.i
+ switch i32 0, label %bb7230.i [
+ i32 4, label %bb5498.i
+ i32 8, label %bb5493.i
+ i32 11, label %bb6853.i
+ ]
+bb5493.i: ; preds = %bb5470.i
+ br i1 false, label %bb5498.i, label %bb5586.i
+bb5498.i: ; preds = %bb5493.i, %bb5470.i
+ switch i32 0, label %bb7230.i [
+ i32 4, label %bb5591.i
+ i32 8, label %bb6153.i
+ i32 11, label %bb6853.i
+ ]
+bb5511.i: ; preds = %bb5359.i, %bb5359.i
+ br i1 false, label %bb5568.i, label %bb5586.i
+bb5568.i: ; preds = %bb5511.i
+ br label %bb5586.i
+bb5586.i: ; preds = %bb5568.i, %bb5511.i, %bb5493.i, %bb5359.i, %bb5351.i
+ switch i32 0, label %bb7230.i [
+ i32 4, label %bb5591.i
+ i32 8, label %bb6153.i
+ i32 11, label %bb6853.i
+ ]
+bb5591.i: ; preds = %bb5586.i, %bb5498.i
+ switch i32 0, label %bb5995.i [
+ i32 4, label %bb5596.i
+ i32 8, label %bb5680.i
+ i32 11, label %bb5842.i
+ ]
+bb5596.i: ; preds = %bb5591.i
+ br i1 false, label %bb8428.i, label %bb5602.i
+bb5602.i: ; preds = %bb5596.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb5680.i: ; preds = %bb5591.i
+ br i1 false, label %bb5692.i, label %bb5764.i
+bb5692.i: ; preds = %bb5680.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb5764.i: ; preds = %bb5680.i
+ br i1 false, label %bb8428.i, label %bb5772.i
+bb5772.i: ; preds = %bb5764.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb5842.i: ; preds = %bb5591.i
+ br i1 false, label %bb5920.i, label %bb5845.i
+bb5845.i: ; preds = %bb5842.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb5920.i: ; preds = %bb5842.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb5995.i: ; preds = %bb5591.i
+ switch i32 %df4081.0.i, label %bb8428.i [
+ i32 0, label %bb6007.i
+ i32 10, label %bb6007.i
+ i32 1, label %bb6042.i
+ i32 2, label %bb6079.i
+ i32 3, label %bb6116.i
+ ]
+bb6007.i: ; preds = %bb5995.i, %bb5995.i
+ br i1 false, label %bb6012.i, label %bb8428.i
+bb6012.i: ; preds = %bb6007.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb6042.i: ; preds = %bb5995.i
+ br i1 false, label %bb6049.i, label %bb6045.i
+bb6045.i: ; preds = %bb6042.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb6049.i: ; preds = %bb6042.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb6079.i: ; preds = %bb5995.i
+ br i1 false, label %bb6086.i, label %bb6082.i
+bb6082.i: ; preds = %bb6079.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb6086.i: ; preds = %bb6079.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb6116.i: ; preds = %bb5995.i
+ br i1 false, label %bb6123.i, label %bb6119.i
+bb6119.i: ; preds = %bb6116.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb6123.i: ; preds = %bb6116.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb6153.i: ; preds = %bb5586.i, %bb5498.i
+ switch i32 0, label %bb6724.i [
+ i32 4, label %bb6158.i
+ i32 8, label %bb6459.i
+ i32 11, label %bb6621.i
+ ]
+bb6158.i: ; preds = %bb6153.i
+ br i1 false, label %bb6242.i, label %bb6161.i
+bb6161.i: ; preds = %bb6158.i
+ br i1 false, label %bb6239.i, label %bb6166.i
+bb6166.i: ; preds = %bb6161.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb6239.i: ; preds = %bb6161.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb6242.i: ; preds = %bb6158.i
+ br i1 false, label %bb6245.i, label %bb6317.i
+bb6245.i: ; preds = %bb6242.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb6317.i: ; preds = %bb6242.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb6459.i: ; preds = %bb6153.i
+ br i1 false, label %bb6471.i, label %bb6543.i
+bb6471.i: ; preds = %bb6459.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb6543.i: ; preds = %bb6459.i
+ br i1 false, label %bb8428.i, label %bb6551.i
+bb6551.i: ; preds = %bb6543.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb6621.i: ; preds = %bb6153.i
+ br i1 false, label %bb6626.i, label %bb6651.i
+bb6626.i: ; preds = %bb6621.i
+ br label %bb6651.i
+bb6651.i: ; preds = %bb6626.i, %bb6621.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb6724.i: ; preds = %bb6153.i
+ switch i32 %df4081.0.i, label %bb8428.i [
+ i32 0, label %bb6736.i
+ i32 10, label %bb6736.i
+ i32 1, label %bb6771.i
+ i32 2, label %bb6808.i
+ i32 3, label %bb6845.i
+ ]
+bb6736.i: ; preds = %bb6724.i, %bb6724.i
+ br i1 false, label %bb6741.i, label %bb8428.i
+bb6741.i: ; preds = %bb6736.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb6771.i: ; preds = %bb6724.i
+ br i1 false, label %bb6778.i, label %bb6774.i
+bb6774.i: ; preds = %bb6771.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb6778.i: ; preds = %bb6771.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb6808.i: ; preds = %bb6724.i
+ br i1 false, label %bb6815.i, label %bb6811.i
+bb6811.i: ; preds = %bb6808.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb6815.i: ; preds = %bb6808.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb6845.i: ; preds = %bb6724.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb6853.i: ; preds = %bb5586.i, %bb5498.i, %bb5470.i, %bb5445.i
+ switch i32 0, label %bb8428.i [
+ i32 4, label %bb6858.i
+ i32 8, label %bb7072.i
+ i32 10, label %bb7149.i
+ i32 3, label %bb7192.i
+ ]
+bb6858.i: ; preds = %bb6853.i
+ br i1 false, label %bb6942.i, label %bb6861.i
+bb6861.i: ; preds = %bb6858.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb6942.i: ; preds = %bb6858.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb7072.i: ; preds = %bb6853.i
+ br i1 false, label %bb7119.i, label %bb7075.i
+bb7075.i: ; preds = %bb7072.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb7119.i: ; preds = %bb7072.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb7149.i: ; preds = %bb6853.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb7192.i: ; preds = %bb6853.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb7230.i: ; preds = %bb5586.i, %bb5498.i, %bb5470.i, %bb5445.i, %bb5415.i, %bb5402.i, %bb5384.i, %bb5366.i
+ switch i32 %sf4083.0.i, label %bb8428.i [
+ i32 10, label %bb7235.i
+ i32 0, label %bb7455.i
+ i32 1, label %bb7725.i
+ i32 2, label %bb7978.i
+ i32 3, label %bb8231.i
+ ]
+bb7235.i: ; preds = %bb7230.i
+ switch i32 0, label %bb7442.i [
+ i32 4, label %bb7240.i
+ i32 8, label %bb7329.i
+ i32 11, label %bb7369.i
+ ]
+bb7240.i: ; preds = %bb7235.i
+ br i1 false, label %bb7252.i, label %bb7243.i
+bb7243.i: ; preds = %bb7240.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb7252.i: ; preds = %bb7240.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb7329.i: ; preds = %bb7235.i
+ br i1 false, label %bb7339.i, label %bb7332.i
+bb7332.i: ; preds = %bb7329.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb7339.i: ; preds = %bb7329.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb7369.i: ; preds = %bb7235.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb7442.i: ; preds = %bb7235.i
+ br i1 false, label %bb7447.i, label %bb8428.i
+bb7447.i: ; preds = %bb7442.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb7455.i: ; preds = %bb7230.i
+ switch i32 0, label %bb7703.i [
+ i32 4, label %bb7460.i
+ i32 8, label %bb7546.i
+ i32 11, label %bb7630.i
+ ]
+bb7460.i: ; preds = %bb7455.i
+ br i1 false, label %bb7471.i, label %bb7463.i
+bb7463.i: ; preds = %bb7460.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb7471.i: ; preds = %bb7460.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb7546.i: ; preds = %bb7455.i
+ br i1 false, label %bb7555.i, label %bb7549.i
+bb7549.i: ; preds = %bb7546.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb7555.i: ; preds = %bb7546.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb7630.i: ; preds = %bb7455.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb7703.i: ; preds = %bb7455.i
+ br i1 false, label %bb7709.i, label %bb7712.i
+bb7709.i: ; preds = %bb7703.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb7712.i: ; preds = %bb7703.i
+ br i1 false, label %bb7717.i, label %bb8428.i
+bb7717.i: ; preds = %bb7712.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb7725.i: ; preds = %bb7230.i
+ switch i32 0, label %bb7945.i [
+ i32 4, label %bb7730.i
+ i32 8, label %bb7819.i
+ i32 11, label %bb7906.i
+ ]
+bb7730.i: ; preds = %bb7725.i
+ br i1 false, label %bb7744.i, label %bb7733.i
+bb7733.i: ; preds = %bb7730.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb7744.i: ; preds = %bb7730.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb7819.i: ; preds = %bb7725.i
+ br i1 false, label %bb7831.i, label %bb7822.i
+bb7822.i: ; preds = %bb7819.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb7831.i: ; preds = %bb7819.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb7906.i: ; preds = %bb7725.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb7945.i: ; preds = %bb7725.i
+ switch i32 %df4081.0.i, label %bb8428.i [
+ i32 0, label %bb7962.i
+ i32 2, label %bb7962.i
+ i32 10, label %bb7962.i
+ i32 3, label %bb7970.i
+ ]
+bb7962.i: ; preds = %bb7945.i, %bb7945.i, %bb7945.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb7970.i: ; preds = %bb7945.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb7978.i: ; preds = %bb7230.i
+ switch i32 0, label %bb8198.i [
+ i32 4, label %bb7983.i
+ i32 8, label %bb8072.i
+ i32 11, label %bb8159.i
+ ]
+bb7983.i: ; preds = %bb7978.i
+ br i1 false, label %bb7997.i, label %bb7986.i
+bb7986.i: ; preds = %bb7983.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb7997.i: ; preds = %bb7983.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb8072.i: ; preds = %bb7978.i
+ br i1 false, label %bb8084.i, label %bb8075.i
+bb8075.i: ; preds = %bb8072.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb8084.i: ; preds = %bb8072.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb8159.i: ; preds = %bb7978.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb8198.i: ; preds = %bb7978.i
+ switch i32 %df4081.0.i, label %bb8428.i [
+ i32 0, label %bb8215.i
+ i32 1, label %bb8215.i
+ i32 10, label %bb8215.i
+ i32 3, label %bb8223.i
+ ]
+bb8215.i: ; preds = %bb8198.i, %bb8198.i, %bb8198.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb8223.i: ; preds = %bb8198.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb8231.i: ; preds = %bb7230.i
+ switch i32 0, label %bb8428.i [
+ i32 4, label %bb8236.i
+ i32 8, label %bb8326.i
+ i32 11, label %bb8347.i
+ i32 10, label %bb8425.i
+ ]
+bb8236.i: ; preds = %bb8231.i
+ br i1 false, label %bb8251.i, label %bb8239.i
+bb8239.i: ; preds = %bb8236.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb8251.i: ; preds = %bb8236.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb8326.i: ; preds = %bb8231.i
+ br i1 false, label %bb8339.i, label %bb8428.i
+bb8339.i: ; preds = %bb8326.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb8347.i: ; preds = %bb8231.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb8425.i: ; preds = %bb8231.i
+ br label %bb8428.i
+bb8428.i: ; preds = %bb8425.i, %bb8326.i, %bb8231.i, %bb8198.i, %bb7945.i, %bb7712.i, %bb7442.i, %bb7230.i, %bb6853.i, %bb6736.i, %bb6724.i, %bb6543.i, %bb6007.i, %bb5995.i, %bb5764.i, %bb5596.i
+ br i1 false, label %bb8668.i, label %bb8434.i
+bb8434.i: ; preds = %bb8428.i, %bb8347.i, %bb8339.i, %bb8251.i, %bb8239.i, %bb8223.i, %bb8215.i, %bb8159.i, %bb8084.i, %bb8075.i, %bb7997.i, %bb7986.i, %bb7970.i, %bb7962.i, %bb7906.i, %bb7831.i, %bb7822.i, %bb7744.i, %bb7733.i, %bb7717.i, %bb7709.i, %bb7630.i, %bb7555.i, %bb7549.i, %bb7471.i, %bb7463.i, %bb7447.i, %bb7369.i, %bb7339.i, %bb7332.i, %bb7252.i, %bb7243.i, %bb7192.i, %bb7149.i, %bb7119.i, %bb7075.i, %bb6942.i, %bb6861.i, %bb6845.i, %bb6815.i, %bb6811.i, %bb6778.i, %bb6774.i, %bb6741.i, %bb6651.i, %bb6551.i, %bb6471.i, %bb6317.i, %bb6245.i, %bb6239.i, %bb6166.i, %bb6123.i, %bb6119.i, %bb6086.i, %bb6082.i, %bb6049.i, %bb6045.i, %bb6012.i, %bb5920.i, %bb5845.i, %bb5772.i, %bb5692.i, %bb5602.i
+ switch i32 0, label %bb8668.i [
+ i32 0, label %bb8436.i
+ i32 1, label %bb8531.i
+ i32 2, label %bb8531.i
+ ]
+bb8436.i: ; preds = %bb8434.i
+ switch i32 0, label %bb9310.i [
+ i32 4, label %bb8465.i
+ i32 8, label %bb8465.i
+ i32 11, label %bb8465.i
+ i32 3, label %bb9301.i
+ ]
+bb8465.i: ; preds = %bb8436.i, %bb8436.i, %bb8436.i
+ switch i32 0, label %bb9310.i [
+ i32 4, label %bb8490.i
+ i32 8, label %bb8490.i
+ i32 3, label %bb9301.i
+ i32 11, label %bb9153.i
+ ]
+bb8490.i: ; preds = %bb8465.i, %bb8465.i
+ switch i32 0, label %bb9310.i [
+ i32 4, label %bb8518.i
+ i32 8, label %bb8513.i
+ i32 3, label %bb9301.i
+ i32 11, label %bb9153.i
+ ]
+bb8513.i: ; preds = %bb8490.i
+ br i1 false, label %bb8518.i, label %bb8668.i
+bb8518.i: ; preds = %bb8513.i, %bb8490.i
+ switch i32 0, label %bb9310.i [
+ i32 3, label %bb9301.i
+ i32 4, label %bb8670.i
+ i32 8, label %bb9112.i
+ i32 11, label %bb9153.i
+ ]
+bb8531.i: ; preds = %bb8434.i, %bb8434.i
+ br i1 false, label %bb8536.i, label %bb8575.i
+bb8536.i: ; preds = %bb8531.i
+ br i1 false, label %bb8557.i, label %bb8588.i
+bb8557.i: ; preds = %bb8536.i
+ switch i32 0, label %bb9310.i [
+ i32 4, label %bb8600.i
+ i32 8, label %bb8600.i
+ i32 3, label %bb9301.i
+ i32 11, label %bb9153.i
+ ]
+bb8575.i: ; preds = %bb8531.i
+ br label %bb8588.i
+bb8588.i: ; preds = %bb8575.i, %bb8536.i
+ switch i32 0, label %bb9310.i [
+ i32 4, label %bb8600.i
+ i32 8, label %bb8600.i
+ i32 3, label %bb9301.i
+ i32 11, label %bb9153.i
+ ]
+bb8600.i: ; preds = %bb8588.i, %bb8588.i, %bb8557.i, %bb8557.i
+ switch i32 0, label %bb9310.i [
+ i32 4, label %bb8629.i
+ i32 3, label %bb9301.i
+ i32 8, label %bb9112.i
+ i32 11, label %bb9153.i
+ ]
+bb8629.i: ; preds = %bb8600.i
+ br i1 false, label %bb8650.i, label %bb8668.i
+bb8650.i: ; preds = %bb8629.i
+ br label %bb8668.i
+bb8668.i: ; preds = %bb8650.i, %bb8629.i, %bb8513.i, %bb8434.i, %bb8428.i, %bb8347.i, %bb8339.i, %bb8251.i, %bb8239.i, %bb8223.i, %bb8215.i, %bb8159.i, %bb8084.i, %bb8075.i, %bb7997.i, %bb7986.i, %bb7970.i, %bb7962.i, %bb7906.i, %bb7831.i, %bb7822.i, %bb7744.i, %bb7733.i, %bb7717.i, %bb7709.i, %bb7630.i, %bb7555.i, %bb7549.i, %bb7471.i, %bb7463.i, %bb7447.i, %bb7369.i, %bb7339.i, %bb7332.i, %bb7252.i, %bb7243.i, %bb7192.i, %bb7149.i, %bb7119.i, %bb7075.i, %bb6942.i, %bb6861.i, %bb6845.i, %bb6815.i, %bb6811.i, %bb6778.i, %bb6774.i, %bb6741.i, %bb6651.i, %bb6551.i, %bb6471.i, %bb6317.i, %bb6245.i, %bb6239.i, %bb6166.i, %bb6123.i, %bb6119.i, %bb6086.i, %bb6082.i, %bb6049.i, %bb6045.i, %bb6012.i, %bb5920.i, %bb5845.i, %bb5772.i, %bb5692.i, %bb5602.i
+ switch i32 0, label %bb9310.i [
+ i32 3, label %bb9301.i
+ i32 4, label %bb8670.i
+ i32 8, label %bb9112.i
+ i32 11, label %bb9153.i
+ ]
+bb8670.i: ; preds = %bb8668.i, %bb8518.i
+ br label %bb9310.i
+bb9112.i: ; preds = %bb8668.i, %bb8600.i, %bb8518.i
+ br label %bb9310.i
+bb9153.i: ; preds = %bb8668.i, %bb8600.i, %bb8588.i, %bb8557.i, %bb8518.i, %bb8490.i, %bb8465.i
+ br label %bb9310.i
+bb9301.i: ; preds = %bb8668.i, %bb8600.i, %bb8588.i, %bb8557.i, %bb8518.i, %bb8490.i, %bb8465.i, %bb8436.i
+ br label %bb9310.i
+bb9310.i: ; preds = %bb9301.i, %bb9153.i, %bb9112.i, %bb8670.i, %bb8668.i, %bb8600.i, %bb8588.i, %bb8557.i, %bb8518.i, %bb8490.i, %bb8465.i, %bb8436.i
+ br i1 false, label %bb16581.i, label %bb9313.i
+bb9313.i: ; preds = %bb9310.i
+ switch i32 %dt4080.0.i, label %bb16578.i [
+ i32 0, label %bb9315.i
+ i32 1, label %bb9890.i
+ i32 2, label %bb10465.i
+ i32 3, label %bb11040.i
+ i32 4, label %bb11615.i
+ i32 5, label %bb11823.i
+ i32 8, label %bb12398.i
+ i32 9, label %bb12833.i
+ i32 10, label %bb13268.i
+ i32 11, label %bb13268.i
+ i32 12, label %bb13703.i
+ i32 13, label %bb13703.i
+ i32 14, label %bb14278.i
+ i32 15, label %bb14853.i
+ i32 16, label %bb9315.i
+ i32 17, label %bb9315.i
+ i32 18, label %bb15428.i
+ i32 19, label %bb16003.i
+ ]
+bb9315.i: ; preds = %bb9313.i, %bb9313.i, %bb9313.i
+ br i1 false, label %bb9535.i, label %bb9323.i
+bb9323.i: ; preds = %bb9315.i
+ br label %bb9535.i
+bb9535.i: ; preds = %bb9323.i, %bb9315.i
+ br label %bb16581.i
+bb9890.i: ; preds = %bb9313.i
+ br i1 false, label %bb10255.i, label %bb9898.i
+bb9898.i: ; preds = %bb9890.i
+ br label %bb10255.i
+bb10255.i: ; preds = %bb9898.i, %bb9890.i
+ br label %bb16581.i
+bb10465.i: ; preds = %bb9313.i
+ br i1 false, label %bb10685.i, label %bb10473.i
+bb10473.i: ; preds = %bb10465.i
+ br label %bb10685.i
+bb10685.i: ; preds = %bb10473.i, %bb10465.i
+ br label %bb16581.i
+bb11040.i: ; preds = %bb9313.i
+ br i1 false, label %bb11405.i, label %bb11048.i
+bb11048.i: ; preds = %bb11040.i
+ br label %bb11405.i
+bb11405.i: ; preds = %bb11048.i, %bb11040.i
+ br label %bb16581.i
+bb11615.i: ; preds = %bb9313.i
+ br i1 false, label %bb16581.i, label %bb11618.i
+bb11618.i: ; preds = %bb11615.i
+ br label %bb16581.i
+bb11823.i: ; preds = %bb9313.i
+ br i1 false, label %bb12188.i, label %bb11831.i
+bb11831.i: ; preds = %bb11823.i
+ br label %bb12188.i
+bb12188.i: ; preds = %bb11831.i, %bb11823.i
+ br label %bb16581.i
+bb12398.i: ; preds = %bb9313.i
+ br i1 false, label %bb12566.i, label %bb12406.i
+bb12406.i: ; preds = %bb12398.i
+ br label %bb12566.i
+bb12566.i: ; preds = %bb12406.i, %bb12398.i
+ br label %bb16581.i
+bb12833.i: ; preds = %bb9313.i
+ br i1 false, label %bb13001.i, label %bb12841.i
+bb12841.i: ; preds = %bb12833.i
+ br label %bb13001.i
+bb13001.i: ; preds = %bb12841.i, %bb12833.i
+ br label %bb16581.i
+bb13268.i: ; preds = %bb9313.i, %bb9313.i
+ br i1 false, label %bb13436.i, label %bb13276.i
+bb13276.i: ; preds = %bb13268.i
+ br label %bb13436.i
+bb13436.i: ; preds = %bb13276.i, %bb13268.i
+ br label %bb16581.i
+bb13703.i: ; preds = %bb9313.i, %bb9313.i
+ br i1 false, label %bb13923.i, label %bb13711.i
+bb13711.i: ; preds = %bb13703.i
+ br label %bb13923.i
+bb13923.i: ; preds = %bb13711.i, %bb13703.i
+ br label %bb16581.i
+bb14278.i: ; preds = %bb9313.i
+ br i1 false, label %bb14498.i, label %bb14286.i
+bb14286.i: ; preds = %bb14278.i
+ br label %bb14498.i
+bb14498.i: ; preds = %bb14286.i, %bb14278.i
+ br label %bb16581.i
+bb14853.i: ; preds = %bb9313.i
+ br i1 false, label %bb15073.i, label %bb14861.i
+bb14861.i: ; preds = %bb14853.i
+ br label %bb15073.i
+bb15073.i: ; preds = %bb14861.i, %bb14853.i
+ br label %bb16581.i
+bb15428.i: ; preds = %bb9313.i
+ br i1 false, label %bb15648.i, label %bb15436.i
+bb15436.i: ; preds = %bb15428.i
+ br label %bb15648.i
+bb15648.i: ; preds = %bb15436.i, %bb15428.i
+ br label %bb16581.i
+bb16003.i: ; preds = %bb9313.i
+ br i1 false, label %bb16223.i, label %bb16011.i
+bb16011.i: ; preds = %bb16003.i
+ br label %bb16223.i
+bb16223.i: ; preds = %bb16011.i, %bb16003.i
+ br label %bb16581.i
+bb16578.i: ; preds = %bb9313.i
+ unreachable
+bb16581.i: ; preds = %bb16223.i, %bb15648.i, %bb15073.i, %bb14498.i, %bb13923.i, %bb13436.i, %bb13001.i, %bb12566.i, %bb12188.i, %bb11618.i, %bb11615.i, %bb11405.i, %bb10685.i, %bb10255.i, %bb9535.i, %bb9310.i, %glgVectorFloatConversion.exit
+ br label %storeVecColor_RGB_UI.exit
+storeVecColor_RGB_UI.exit: ; preds = %bb16581.i
+ br i1 false, label %bb5295.i, label %bb16621.i
+bb16607.i: ; preds = %bb5276.i
+ br i1 false, label %bb5295.preheader.i, label %bb16621.i
+bb5295.preheader.i: ; preds = %bb16607.i
+ br label %bb5295.i
+bb16621.i: ; preds = %bb16607.i, %storeVecColor_RGB_UI.exit
+ br label %bb16650.outer.i
+bb16650.outer.i: ; preds = %bb16621.i
+ br label %bb16650.i
+bb16650.i: ; preds = %storeColor_RGB_UI.exit, %bb16650.outer.i
+ br label %loadColor_BGRA_UI8888R.exit
+loadColor_BGRA_UI8888R.exit: ; preds = %bb16650.i
+ br i1 false, label %bb16671.i, label %bb16697.i
+bb16671.i: ; preds = %loadColor_BGRA_UI8888R.exit
+ br i1 false, label %bb.i179, label %bb662.i
+bb.i179: ; preds = %bb16671.i
+ switch i32 0, label %bb513.i [
+ i32 7, label %bb418.i
+ i32 6, label %bb433.i
+ ]
+bb418.i: ; preds = %bb.i179
+ br label %bb559.i
+bb433.i: ; preds = %bb.i179
+ switch i32 0, label %bb493.i [
+ i32 31744, label %bb455.i
+ i32 0, label %bb471.i
+ ]
+bb455.i: ; preds = %bb433.i
+ br i1 false, label %bb463.i, label %bb504.i
+bb463.i: ; preds = %bb455.i
+ br label %bb559.i
+bb471.i: ; preds = %bb433.i
+ br i1 false, label %bb497.i, label %bb484.preheader.i
+bb484.preheader.i: ; preds = %bb471.i
+ br i1 false, label %bb479.i, label %bb490.i
+bb479.i: ; preds = %bb479.i, %bb484.preheader.i
+ br i1 false, label %bb479.i, label %bb490.i
+bb490.i: ; preds = %bb479.i, %bb484.preheader.i
+ br label %bb559.i
+bb493.i: ; preds = %bb433.i
+ br label %bb497.i
+bb497.i: ; preds = %bb493.i, %bb471.i
+ br label %bb504.i
+bb504.i: ; preds = %bb497.i, %bb455.i
+ br label %bb513.i
+bb513.i: ; preds = %bb504.i, %bb.i179
+ br label %bb559.i
+bb559.i: ; preds = %bb513.i, %bb490.i, %bb463.i, %bb418.i
+ br i1 false, label %bb2793.i, label %bb614.i
+bb614.i: ; preds = %bb559.i
+ br i1 false, label %bb626.i, label %bb620.i
+bb620.i: ; preds = %bb614.i
+ br i1 false, label %bb625.i, label %bb626.i
+bb625.i: ; preds = %bb620.i
+ br label %bb626.i
+bb626.i: ; preds = %bb625.i, %bb620.i, %bb614.i
+ br i1 false, label %bb638.i, label %bb632.i
+bb632.i: ; preds = %bb626.i
+ br i1 false, label %bb637.i, label %bb638.i
+bb637.i: ; preds = %bb632.i
+ br label %bb638.i
+bb638.i: ; preds = %bb637.i, %bb632.i, %bb626.i
+ br i1 false, label %bb650.i, label %bb644.i
+bb644.i: ; preds = %bb638.i
+ br i1 false, label %bb649.i, label %bb650.i
+bb649.i: ; preds = %bb644.i
+ br label %bb650.i
+bb650.i: ; preds = %bb649.i, %bb644.i, %bb638.i
+ br i1 false, label %bb2793.i, label %bb656.i
+bb656.i: ; preds = %bb650.i
+ br i1 false, label %bb661.i, label %bb2793.i
+bb661.i: ; preds = %bb656.i
+ switch i32 0, label %bb2883.i [
+ i32 3, label %bb2874.i
+ i32 4, label %bb2795.i
+ i32 8, label %bb2810.i
+ i32 10, label %bb2834.i
+ i32 11, label %bb2819.i
+ i32 16, label %bb2810.i
+ ]
+bb662.i: ; preds = %bb16671.i
+ switch i32 0, label %bb1937.i [
+ i32 3, label %bb902.i
+ i32 4, label %bb1416.i
+ i32 8, label %bb1020.i
+ i32 10, label %bb902.i
+ i32 11, label %bb784.i
+ i32 16, label %bb664.i
+ ]
+bb664.i: ; preds = %bb662.i
+ br i1 false, label %bb682.i, label %bb669.i
+bb669.i: ; preds = %bb664.i
+ br label %bb710.i
+bb682.i: ; preds = %bb664.i
+ br label %bb710.i
+bb710.i: ; preds = %bb682.i, %bb669.i
+ br i1 false, label %bb760.i, label %bb754.i
+bb754.i: ; preds = %bb710.i
+ br i1 false, label %bb759.i, label %bb760.i
+bb759.i: ; preds = %bb754.i
+ br label %bb760.i
+bb760.i: ; preds = %bb759.i, %bb754.i, %bb710.i
+ br i1 false, label %bb772.i, label %bb766.i
+bb766.i: ; preds = %bb760.i
+ br i1 false, label %bb771.i, label %bb772.i
+bb771.i: ; preds = %bb766.i
+ br label %bb772.i
+bb772.i: ; preds = %bb771.i, %bb766.i, %bb760.i
+ br i1 false, label %bb1937.i, label %bb778.i
+bb778.i: ; preds = %bb772.i
+ br i1 false, label %bb783.i, label %bb1937.i
+bb783.i: ; preds = %bb778.i
+ br label %bb1937.i
+bb784.i: ; preds = %bb662.i
+ switch i32 0, label %bb892.i [
+ i32 1, label %bb868.i
+ i32 3, label %bb868.i
+ i32 4, label %bb882.i
+ i32 6, label %bb792.i
+ i32 7, label %bb786.i
+ ]
+bb786.i: ; preds = %bb784.i
+ br label %bb904.i
+bb792.i: ; preds = %bb784.i
+ switch i32 0, label %bb852.i [
+ i32 31744, label %bb814.i
+ i32 0, label %bb830.i
+ ]
+bb814.i: ; preds = %bb792.i
+ br i1 false, label %bb822.i, label %bb863.i
+bb822.i: ; preds = %bb814.i
+ switch i32 0, label %bb1010.i [
+ i32 1, label %bb986.i
+ i32 3, label %bb986.i
+ i32 4, label %bb1000.i
+ i32 6, label %bb910.i
+ i32 7, label %bb904.i
+ ]
+bb830.i: ; preds = %bb792.i
+ br i1 false, label %bb856.i, label %bb843.preheader.i
+bb843.preheader.i: ; preds = %bb830.i
+ br i1 false, label %bb838.i, label %bb849.i
+bb838.i: ; preds = %bb838.i, %bb843.preheader.i
+ br i1 false, label %bb838.i, label %bb849.i
+bb849.i: ; preds = %bb838.i, %bb843.preheader.i
+ switch i32 0, label %bb1010.i [
+ i32 1, label %bb986.i
+ i32 3, label %bb986.i
+ i32 4, label %bb1000.i
+ i32 6, label %bb910.i
+ i32 7, label %bb904.i
+ ]
+bb852.i: ; preds = %bb792.i
+ br label %bb856.i
+bb856.i: ; preds = %bb852.i, %bb830.i
+ switch i32 0, label %bb1010.i [
+ i32 1, label %bb986.i
+ i32 3, label %bb986.i
+ i32 4, label %bb1000.i
+ i32 6, label %bb910.i
+ i32 7, label %bb904.i
+ ]
+bb863.i: ; preds = %bb814.i
+ switch i32 0, label %bb1010.i [
+ i32 1, label %bb986.i
+ i32 3, label %bb986.i
+ i32 4, label %bb1000.i
+ i32 6, label %bb910.i
+ i32 7, label %bb904.i
+ ]
+bb868.i: ; preds = %bb784.i, %bb784.i
+ switch i32 0, label %bb1010.i [
+ i32 1, label %bb986.i
+ i32 3, label %bb986.i
+ i32 4, label %bb1000.i
+ i32 6, label %bb910.i
+ i32 7, label %bb904.i
+ ]
+bb882.i: ; preds = %bb784.i
+ br label %bb1000.i
+bb892.i: ; preds = %bb784.i
+ br label %bb902.i
+bb902.i: ; preds = %bb892.i, %bb662.i, %bb662.i
+ switch i32 0, label %bb1010.i [
+ i32 1, label %bb986.i
+ i32 3, label %bb986.i
+ i32 4, label %bb1000.i
+ i32 6, label %bb910.i
+ i32 7, label %bb904.i
+ ]
+bb904.i: ; preds = %bb902.i, %bb868.i, %bb863.i, %bb856.i, %bb849.i, %bb822.i, %bb786.i
+ br label %bb1937.i
+bb910.i: ; preds = %bb902.i, %bb868.i, %bb863.i, %bb856.i, %bb849.i, %bb822.i
+ switch i32 0, label %bb970.i [
+ i32 31744, label %bb932.i
+ i32 0, label %bb948.i
+ ]
+bb932.i: ; preds = %bb910.i
+ br i1 false, label %bb940.i, label %bb981.i
+bb940.i: ; preds = %bb932.i
+ br label %bb1937.i
+bb948.i: ; preds = %bb910.i
+ br i1 false, label %bb974.i, label %bb961.preheader.i
+bb961.preheader.i: ; preds = %bb948.i
+ br i1 false, label %bb956.i, label %bb967.i
+bb956.i: ; preds = %bb956.i, %bb961.preheader.i
+ br i1 false, label %bb956.i, label %bb967.i
+bb967.i: ; preds = %bb956.i, %bb961.preheader.i
+ br label %bb1937.i
+bb970.i: ; preds = %bb910.i
+ br label %bb974.i
+bb974.i: ; preds = %bb970.i, %bb948.i
+ br label %bb1937.i
+bb981.i: ; preds = %bb932.i
+ br label %bb1937.i
+bb986.i: ; preds = %bb902.i, %bb902.i, %bb868.i, %bb868.i, %bb863.i, %bb863.i, %bb856.i, %bb856.i, %bb849.i, %bb849.i, %bb822.i, %bb822.i
+ br label %bb1937.i
+bb1000.i: ; preds = %bb902.i, %bb882.i, %bb868.i, %bb863.i, %bb856.i, %bb849.i, %bb822.i
+ br label %bb1937.i
+bb1010.i: ; preds = %bb902.i, %bb868.i, %bb863.i, %bb856.i, %bb849.i, %bb822.i
+ br label %bb1937.i
+bb1020.i: ; preds = %bb662.i
+ switch i32 0, label %bb1388.i [
+ i32 1, label %bb1264.i
+ i32 3, label %bb1264.i
+ i32 4, label %bb1304.i
+ i32 6, label %bb1038.i
+ i32 7, label %bb1022.i
+ i32 8, label %bb1332.i
+ i32 9, label %bb1332.i
+ i32 10, label %bb1360.i
+ i32 11, label %bb1360.i
+ ]
+bb1022.i: ; preds = %bb1020.i
+ br label %bb1937.i
+bb1038.i: ; preds = %bb1020.i
+ switch i32 0, label %bb1098.i [
+ i32 31744, label %bb1060.i
+ i32 0, label %bb1076.i
+ ]
+bb1060.i: ; preds = %bb1038.i
+ br i1 false, label %bb1068.i, label %bb1109.i
+bb1068.i: ; preds = %bb1060.i
+ br label %bb1109.i
+bb1076.i: ; preds = %bb1038.i
+ br i1 false, label %bb1102.i, label %bb1089.preheader.i
+bb1089.preheader.i: ; preds = %bb1076.i
+ br i1 false, label %bb1084.i, label %bb1095.i
+bb1084.i: ; preds = %bb1084.i, %bb1089.preheader.i
+ br i1 false, label %bb1084.i, label %bb1095.i
+bb1095.i: ; preds = %bb1084.i, %bb1089.preheader.i
+ br label %bb1109.i
+bb1098.i: ; preds = %bb1038.i
+ br label %bb1102.i
+bb1102.i: ; preds = %bb1098.i, %bb1076.i
+ br label %bb1109.i
+bb1109.i: ; preds = %bb1102.i, %bb1095.i, %bb1068.i, %bb1060.i
+ switch i32 0, label %bb1173.i [
+ i32 31744, label %bb1135.i
+ i32 0, label %bb1151.i
+ ]
+bb1135.i: ; preds = %bb1109.i
+ br i1 false, label %bb1143.i, label %bb1184.i
+bb1143.i: ; preds = %bb1135.i
+ br label %bb1184.i
+bb1151.i: ; preds = %bb1109.i
+ br i1 false, label %bb1177.i, label %bb1164.preheader.i
+bb1164.preheader.i: ; preds = %bb1151.i
+ br i1 false, label %bb1159.i, label %bb1170.i
+bb1159.i: ; preds = %bb1159.i, %bb1164.preheader.i
+ br i1 false, label %bb1159.i, label %bb1170.i
+bb1170.i: ; preds = %bb1159.i, %bb1164.preheader.i
+ br label %bb1184.i
+bb1173.i: ; preds = %bb1109.i
+ br label %bb1177.i
+bb1177.i: ; preds = %bb1173.i, %bb1151.i
+ br label %bb1184.i
+bb1184.i: ; preds = %bb1177.i, %bb1170.i, %bb1143.i, %bb1135.i
+ switch i32 0, label %bb1248.i [
+ i32 31744, label %bb1210.i
+ i32 0, label %bb1226.i
+ ]
+bb1210.i: ; preds = %bb1184.i
+ br i1 false, label %bb1218.i, label %bb1259.i
+bb1218.i: ; preds = %bb1210.i
+ br label %bb1937.i
+bb1226.i: ; preds = %bb1184.i
+ br i1 false, label %bb1252.i, label %bb1239.preheader.i
+bb1239.preheader.i: ; preds = %bb1226.i
+ br i1 false, label %bb1234.i, label %bb1245.i
+bb1234.i: ; preds = %bb1234.i, %bb1239.preheader.i
+ br i1 false, label %bb1234.i, label %bb1245.i
+bb1245.i: ; preds = %bb1234.i, %bb1239.preheader.i
+ br label %bb1937.i
+bb1248.i: ; preds = %bb1184.i
+ br label %bb1252.i
+bb1252.i: ; preds = %bb1248.i, %bb1226.i
+ br label %bb1937.i
+bb1259.i: ; preds = %bb1210.i
+ br label %bb1937.i
+bb1264.i: ; preds = %bb1020.i, %bb1020.i
+ br label %bb1937.i
+bb1304.i: ; preds = %bb1020.i
+ br label %bb1937.i
+bb1332.i: ; preds = %bb1020.i, %bb1020.i
+ br label %bb1937.i
+bb1360.i: ; preds = %bb1020.i, %bb1020.i
+ br label %bb1937.i
+bb1388.i: ; preds = %bb1020.i
+ br label %bb1937.i
+bb1416.i: ; preds = %bb662.i
+ switch i32 0, label %bb1900.i [
+ i32 1, label %bb1740.i
+ i32 3, label %bb1740.i
+ i32 4, label %bb1793.i
+ i32 6, label %bb1439.i
+ i32 7, label %bb1418.i
+ i32 14, label %bb1830.i
+ i32 15, label %bb1830.i
+ i32 18, label %bb1863.i
+ i32 19, label %bb1863.i
+ ]
+bb1418.i: ; preds = %bb1416.i
+ br label %bb1937.i
+bb1439.i: ; preds = %bb1416.i
+ switch i32 0, label %bb1499.i [
+ i32 31744, label %bb1461.i
+ i32 0, label %bb1477.i
+ ]
+bb1461.i: ; preds = %bb1439.i
+ br i1 false, label %bb1469.i, label %bb1510.i
+bb1469.i: ; preds = %bb1461.i
+ br label %bb1510.i
+bb1477.i: ; preds = %bb1439.i
+ br i1 false, label %bb1503.i, label %bb1490.preheader.i
+bb1490.preheader.i: ; preds = %bb1477.i
+ br i1 false, label %bb1485.i, label %bb1496.i
+bb1485.i: ; preds = %bb1485.i, %bb1490.preheader.i
+ br i1 false, label %bb1485.i, label %bb1496.i
+bb1496.i: ; preds = %bb1485.i, %bb1490.preheader.i
+ br label %bb1510.i
+bb1499.i: ; preds = %bb1439.i
+ br label %bb1503.i
+bb1503.i: ; preds = %bb1499.i, %bb1477.i
+ br label %bb1510.i
+bb1510.i: ; preds = %bb1503.i, %bb1496.i, %bb1469.i, %bb1461.i
+ switch i32 0, label %bb1574.i [
+ i32 31744, label %bb1536.i
+ i32 0, label %bb1552.i
+ ]
+bb1536.i: ; preds = %bb1510.i
+ br i1 false, label %bb1544.i, label %bb1585.i
+bb1544.i: ; preds = %bb1536.i
+ br label %bb1585.i
+bb1552.i: ; preds = %bb1510.i
+ br i1 false, label %bb1578.i, label %bb1565.preheader.i
+bb1565.preheader.i: ; preds = %bb1552.i
+ br i1 false, label %bb1560.i, label %bb1571.i
+bb1560.i: ; preds = %bb1560.i, %bb1565.preheader.i
+ br i1 false, label %bb1560.i, label %bb1571.i
+bb1571.i: ; preds = %bb1560.i, %bb1565.preheader.i
+ br label %bb1585.i
+bb1574.i: ; preds = %bb1510.i
+ br label %bb1578.i
+bb1578.i: ; preds = %bb1574.i, %bb1552.i
+ br label %bb1585.i
+bb1585.i: ; preds = %bb1578.i, %bb1571.i, %bb1544.i, %bb1536.i
+ switch i32 0, label %bb1649.i [
+ i32 31744, label %bb1611.i
+ i32 0, label %bb1627.i
+ ]
+bb1611.i: ; preds = %bb1585.i
+ br i1 false, label %bb1619.i, label %bb1660.i
+bb1619.i: ; preds = %bb1611.i
+ br label %bb1660.i
+bb1627.i: ; preds = %bb1585.i
+ br i1 false, label %bb1653.i, label %bb1640.preheader.i
+bb1640.preheader.i: ; preds = %bb1627.i
+ br i1 false, label %bb1635.i, label %bb1646.i
+bb1635.i: ; preds = %bb1635.i, %bb1640.preheader.i
+ br i1 false, label %bb1635.i, label %bb1646.i
+bb1646.i: ; preds = %bb1635.i, %bb1640.preheader.i
+ br label %bb1660.i
+bb1649.i: ; preds = %bb1585.i
+ br label %bb1653.i
+bb1653.i: ; preds = %bb1649.i, %bb1627.i
+ br label %bb1660.i
+bb1660.i: ; preds = %bb1653.i, %bb1646.i, %bb1619.i, %bb1611.i
+ switch i32 0, label %bb1724.i [
+ i32 31744, label %bb1686.i
+ i32 0, label %bb1702.i
+ ]
+bb1686.i: ; preds = %bb1660.i
+ br i1 false, label %bb1694.i, label %bb1735.i
+bb1694.i: ; preds = %bb1686.i
+ br label %bb1937.i
+bb1702.i: ; preds = %bb1660.i
+ br i1 false, label %bb1728.i, label %bb1715.preheader.i
+bb1715.preheader.i: ; preds = %bb1702.i
+ br i1 false, label %bb1710.i, label %bb1721.i
+bb1710.i: ; preds = %bb1710.i, %bb1715.preheader.i
+ br i1 false, label %bb1710.i, label %bb1721.i
+bb1721.i: ; preds = %bb1710.i, %bb1715.preheader.i
+ br label %bb1937.i
+bb1724.i: ; preds = %bb1660.i
+ br label %bb1728.i
+bb1728.i: ; preds = %bb1724.i, %bb1702.i
+ br label %bb1937.i
+bb1735.i: ; preds = %bb1686.i
+ br label %bb1937.i
+bb1740.i: ; preds = %bb1416.i, %bb1416.i
+ br label %bb1937.i
+bb1793.i: ; preds = %bb1416.i
+ br label %bb1937.i
+bb1830.i: ; preds = %bb1416.i, %bb1416.i
+ br label %bb1937.i
+bb1863.i: ; preds = %bb1416.i, %bb1416.i
+ br label %bb1937.i
+bb1900.i: ; preds = %bb1416.i
+ br label %bb1937.i
+bb1937.i: ; preds = %bb1900.i, %bb1863.i, %bb1830.i, %bb1793.i, %bb1740.i, %bb1735.i, %bb1728.i, %bb1721.i, %bb1694.i, %bb1418.i, %bb1388.i, %bb1360.i, %bb1332.i, %bb1304.i, %bb1264.i, %bb1259.i, %bb1252.i, %bb1245.i, %bb1218.i, %bb1022.i, %bb1010.i, %bb1000.i, %bb986.i, %bb981.i, %bb974.i, %bb967.i, %bb940.i, %bb904.i, %bb783.i, %bb778.i, %bb772.i, %bb662.i
+ switch i32 %sf4083.0.i, label %bb2321.i [
+ i32 0, label %bb2027.i
+ i32 1, label %bb2081.i
+ i32 2, label %bb2161.i
+ i32 3, label %bb2241.i
+ i32 8, label %bb1939.i
+ i32 9, label %bb1939.i
+ i32 10, label %bb1957.i
+ i32 11, label %bb1975.i
+ i32 16, label %bb1939.i
+ ]
+bb1939.i: ; preds = %bb1937.i, %bb1937.i, %bb1937.i
+ switch i32 0, label %bb2321.i [
+ i32 3, label %bb1956.i
+ i32 4, label %bb1956.i
+ i32 11, label %bb1956.i
+ ]
+bb1956.i: ; preds = %bb1939.i, %bb1939.i, %bb1939.i
+ br label %bb2337.i
+bb1957.i: ; preds = %bb1937.i
+ switch i32 0, label %bb1975.i [
+ i32 3, label %bb1974.i
+ i32 4, label %bb1974.i
+ i32 11, label %bb1974.i
+ ]
+bb1974.i: ; preds = %bb1957.i, %bb1957.i, %bb1957.i
+ br label %bb1975.i
+bb1975.i: ; preds = %bb1974.i, %bb1957.i, %bb1937.i
+ switch i32 0, label %bb2001.i [
+ i32 1, label %bb1992.i
+ i32 4, label %bb1992.i
+ i32 8, label %bb1992.i
+ ]
+bb1992.i: ; preds = %bb1975.i, %bb1975.i, %bb1975.i
+ br label %bb2001.i
+bb2001.i: ; preds = %bb1992.i, %bb1975.i
+ switch i32 0, label %bb2321.i [
+ i32 2, label %bb2018.i
+ i32 4, label %bb2018.i
+ i32 8, label %bb2018.i
+ ]
+bb2018.i: ; preds = %bb2001.i, %bb2001.i, %bb2001.i
+ br label %bb2321.i
+bb2027.i: ; preds = %bb1937.i
+ switch i32 0, label %bb2045.i [
+ i32 1, label %bb2044.i
+ i32 4, label %bb2044.i
+ i32 8, label %bb2044.i
+ ]
+bb2044.i: ; preds = %bb2027.i, %bb2027.i, %bb2027.i
+ br label %bb2045.i
+bb2045.i: ; preds = %bb2044.i, %bb2027.i
+ switch i32 0, label %bb2063.i [
+ i32 2, label %bb2062.i
+ i32 4, label %bb2062.i
+ i32 8, label %bb2062.i
+ ]
+bb2062.i: ; preds = %bb2045.i, %bb2045.i, %bb2045.i
+ br label %bb2063.i
+bb2063.i: ; preds = %bb2062.i, %bb2045.i
+ switch i32 0, label %bb2321.i [
+ i32 3, label %bb2080.i
+ i32 4, label %bb2080.i
+ i32 11, label %bb2080.i
+ ]
+bb2080.i: ; preds = %bb2063.i, %bb2063.i, %bb2063.i
+ br label %bb2321.i
+bb2081.i: ; preds = %bb1937.i
+ switch i32 0, label %bb2100.i [
+ i32 1, label %bb2098.i
+ i32 4, label %bb2098.i
+ i32 8, label %bb2098.i
+ ]
+bb2098.i: ; preds = %bb2081.i, %bb2081.i, %bb2081.i
+ br label %bb2100.i
+bb2100.i: ; preds = %bb2098.i, %bb2081.i
+ switch i32 0, label %bb2125.i [
+ i32 4, label %bb2124.i
+ i32 8, label %bb2124.i
+ i32 0, label %bb2124.i
+ i32 11, label %bb2124.i
+ ]
+bb2124.i: ; preds = %bb2100.i, %bb2100.i, %bb2100.i, %bb2100.i
+ br label %bb2125.i
+bb2125.i: ; preds = %bb2124.i, %bb2100.i
+ switch i32 0, label %bb2143.i [
+ i32 2, label %bb2142.i
+ i32 4, label %bb2142.i
+ i32 8, label %bb2142.i
+ ]
+bb2142.i: ; preds = %bb2125.i, %bb2125.i, %bb2125.i
+ br label %bb2143.i
+bb2143.i: ; preds = %bb2142.i, %bb2125.i
+ switch i32 0, label %bb2321.i [
+ i32 3, label %bb2160.i
+ i32 4, label %bb2160.i
+ i32 11, label %bb2160.i
+ ]
+bb2160.i: ; preds = %bb2143.i, %bb2143.i, %bb2143.i
+ br label %bb2321.i
+bb2161.i: ; preds = %bb1937.i
+ switch i32 0, label %bb2180.i [
+ i32 2, label %bb2178.i
+ i32 4, label %bb2178.i
+ i32 8, label %bb2178.i
+ ]
+bb2178.i: ; preds = %bb2161.i, %bb2161.i, %bb2161.i
+ br label %bb2180.i
+bb2180.i: ; preds = %bb2178.i, %bb2161.i
+ switch i32 0, label %bb2205.i [
+ i32 4, label %bb2204.i
+ i32 8, label %bb2204.i
+ i32 0, label %bb2204.i
+ i32 11, label %bb2204.i
+ ]
+bb2204.i: ; preds = %bb2180.i, %bb2180.i, %bb2180.i, %bb2180.i
+ br label %bb2205.i
+bb2205.i: ; preds = %bb2204.i, %bb2180.i
+ switch i32 0, label %bb2223.i [
+ i32 1, label %bb2222.i
+ i32 4, label %bb2222.i
+ i32 8, label %bb2222.i
+ ]
+bb2222.i: ; preds = %bb2205.i, %bb2205.i, %bb2205.i
+ br label %bb2223.i
+bb2223.i: ; preds = %bb2222.i, %bb2205.i
+ switch i32 0, label %bb2321.i [
+ i32 3, label %bb2240.i
+ i32 4, label %bb2240.i
+ i32 11, label %bb2240.i
+ ]
+bb2240.i: ; preds = %bb2223.i, %bb2223.i, %bb2223.i
+ br label %bb2321.i
+bb2241.i: ; preds = %bb1937.i
+ switch i32 0, label %bb2260.i [
+ i32 3, label %bb2258.i
+ i32 4, label %bb2258.i
+ i32 11, label %bb2258.i
+ ]
+bb2258.i: ; preds = %bb2241.i, %bb2241.i, %bb2241.i
+ br label %bb2260.i
+bb2260.i: ; preds = %bb2258.i, %bb2241.i
+ switch i32 0, label %bb2285.i [
+ i32 4, label %bb2284.i
+ i32 11, label %bb2284.i
+ i32 0, label %bb2284.i
+ i32 8, label %bb2284.i
+ ]
+bb2284.i: ; preds = %bb2260.i, %bb2260.i, %bb2260.i, %bb2260.i
+ br label %bb2285.i
+bb2285.i: ; preds = %bb2284.i, %bb2260.i
+ switch i32 0, label %bb2303.i [
+ i32 1, label %bb2302.i
+ i32 4, label %bb2302.i
+ i32 8, label %bb2302.i
+ ]
+bb2302.i: ; preds = %bb2285.i, %bb2285.i, %bb2285.i
+ br label %bb2303.i
+bb2303.i: ; preds = %bb2302.i, %bb2285.i
+ switch i32 0, label %bb2321.i [
+ i32 2, label %bb2320.i
+ i32 4, label %bb2320.i
+ i32 8, label %bb2320.i
+ ]
+bb2320.i: ; preds = %bb2303.i, %bb2303.i, %bb2303.i
+ br label %bb2321.i
+bb2321.i: ; preds = %bb2320.i, %bb2303.i, %bb2240.i, %bb2223.i, %bb2160.i, %bb2143.i, %bb2080.i, %bb2063.i, %bb2018.i, %bb2001.i, %bb1939.i, %bb1937.i
+ br label %bb2337.i
+bb2337.i: ; preds = %bb2321.i, %bb1956.i
+ br label %bb2353.i
+bb2353.i: ; preds = %bb2337.i
+ br label %bb2369.i
+bb2369.i: ; preds = %bb2353.i
+ br label %bb2385.i
+bb2385.i: ; preds = %bb2369.i
+ br i1 false, label %bb2388.i, label %bb2394.i
+bb2388.i: ; preds = %bb2385.i
+ br label %bb2600.i
+bb2394.i: ; preds = %bb2385.i
+ switch i32 0, label %bb2600.i [
+ i32 0, label %bb2504.i
+ i32 1, label %bb2528.i
+ i32 2, label %bb2552.i
+ i32 3, label %bb2576.i
+ i32 4, label %bb2396.i
+ i32 8, label %bb2420.i
+ i32 11, label %bb2480.i
+ ]
+bb2396.i: ; preds = %bb2394.i
+ br i1 false, label %bb2411.i, label %bb2399.i
+bb2399.i: ; preds = %bb2396.i
+ br i1 false, label %bb2420.i, label %bb2405.i
+bb2405.i: ; preds = %bb2399.i
+ br i1 false, label %bb2410.i, label %bb2420.i
+bb2410.i: ; preds = %bb2405.i
+ br i1 false, label %bb2459.i, label %bb2423.i
+bb2411.i: ; preds = %bb2396.i
+ br i1 false, label %bb2420.i, label %bb2414.i
+bb2414.i: ; preds = %bb2411.i
+ br i1 false, label %bb2419.i, label %bb2420.i
+bb2419.i: ; preds = %bb2414.i
+ br label %bb2420.i
+bb2420.i: ; preds = %bb2419.i, %bb2414.i, %bb2411.i, %bb2405.i, %bb2399.i, %bb2394.i
+ br i1 false, label %bb2459.i, label %bb2423.i
+bb2423.i: ; preds = %bb2420.i, %bb2410.i
+ br i1 false, label %bb2435.i, label %bb2429.i
+bb2429.i: ; preds = %bb2423.i
+ br i1 false, label %bb2434.i, label %bb2435.i
+bb2434.i: ; preds = %bb2429.i
+ br label %bb2435.i
+bb2435.i: ; preds = %bb2434.i, %bb2429.i, %bb2423.i
+ br i1 false, label %bb2447.i, label %bb2441.i
+bb2441.i: ; preds = %bb2435.i
+ br i1 false, label %bb2446.i, label %bb2447.i
+bb2446.i: ; preds = %bb2441.i
+ br label %bb2447.i
+bb2447.i: ; preds = %bb2446.i, %bb2441.i, %bb2435.i
+ br i1 false, label %bb2600.i, label %bb2453.i
+bb2453.i: ; preds = %bb2447.i
+ br i1 false, label %bb2458.i, label %bb2600.i
+bb2458.i: ; preds = %bb2453.i
+ br label %bb2793.i
+bb2459.i: ; preds = %bb2420.i, %bb2410.i
+ br i1 false, label %bb2600.i, label %bb2462.i
+bb2462.i: ; preds = %bb2459.i
+ br i1 false, label %bb2479.i, label %bb2600.i
+bb2479.i: ; preds = %bb2462.i
+ br label %bb2600.i
+bb2480.i: ; preds = %bb2394.i
+ br i1 false, label %bb2495.i, label %bb2483.i
+bb2483.i: ; preds = %bb2480.i
+ br i1 false, label %bb2504.i, label %bb2489.i
+bb2489.i: ; preds = %bb2483.i
+ br i1 false, label %bb2494.i, label %bb2504.i
+bb2494.i: ; preds = %bb2489.i
+ br i1 false, label %bb2519.i, label %bb2507.i
+bb2495.i: ; preds = %bb2480.i
+ br i1 false, label %bb2504.i, label %bb2498.i
+bb2498.i: ; preds = %bb2495.i
+ br i1 false, label %bb2503.i, label %bb2504.i
+bb2503.i: ; preds = %bb2498.i
+ br label %bb2504.i
+bb2504.i: ; preds = %bb2503.i, %bb2498.i, %bb2495.i, %bb2489.i, %bb2483.i, %bb2394.i
+ br i1 false, label %bb2519.i, label %bb2507.i
+bb2507.i: ; preds = %bb2504.i, %bb2494.i
+ br i1 false, label %bb2600.i, label %bb2513.i
+bb2513.i: ; preds = %bb2507.i
+ br i1 false, label %bb2518.i, label %bb2600.i
+bb2518.i: ; preds = %bb2513.i
+ br label %bb2600.i
+bb2519.i: ; preds = %bb2504.i, %bb2494.i
+ br i1 false, label %bb2600.i, label %bb2522.i
+bb2522.i: ; preds = %bb2519.i
+ br i1 false, label %bb2527.i, label %bb2600.i
+bb2527.i: ; preds = %bb2522.i
+ br label %bb2600.i
+bb2528.i: ; preds = %bb2394.i
+ br i1 false, label %bb2543.i, label %bb2531.i
+bb2531.i: ; preds = %bb2528.i
+ br i1 false, label %bb2600.i, label %bb2537.i
+bb2537.i: ; preds = %bb2531.i
+ br i1 false, label %bb2542.i, label %bb2600.i
+bb2542.i: ; preds = %bb2537.i
+ br label %bb2600.i
+bb2543.i: ; preds = %bb2528.i
+ br i1 false, label %bb2600.i, label %bb2546.i
+bb2546.i: ; preds = %bb2543.i
+ br i1 false, label %bb2551.i, label %bb2600.i
+bb2551.i: ; preds = %bb2546.i
+ br label %bb2600.i
+bb2552.i: ; preds = %bb2394.i
+ br i1 false, label %bb2567.i, label %bb2555.i
+bb2555.i: ; preds = %bb2552.i
+ br i1 false, label %bb2600.i, label %bb2561.i
+bb2561.i: ; preds = %bb2555.i
+ br i1 false, label %bb2566.i, label %bb2600.i
+bb2566.i: ; preds = %bb2561.i
+ br label %bb2600.i
+bb2567.i: ; preds = %bb2552.i
+ br i1 false, label %bb2600.i, label %bb2570.i
+bb2570.i: ; preds = %bb2567.i
+ br i1 false, label %bb2575.i, label %bb2600.i
+bb2575.i: ; preds = %bb2570.i
+ br label %bb2600.i
+bb2576.i: ; preds = %bb2394.i
+ br i1 false, label %bb2591.i, label %bb2579.i
+bb2579.i: ; preds = %bb2576.i
+ br i1 false, label %bb2600.i, label %bb2585.i
+bb2585.i: ; preds = %bb2579.i
+ br i1 false, label %bb2590.i, label %bb2600.i
+bb2590.i: ; preds = %bb2585.i
+ br label %bb2600.i
+bb2591.i: ; preds = %bb2576.i
+ br i1 false, label %bb2600.i, label %bb2594.i
+bb2594.i: ; preds = %bb2591.i
+ br i1 false, label %bb2599.i, label %bb2600.i
+bb2599.i: ; preds = %bb2594.i
+ br label %bb2600.i
+bb2600.i: ; preds = %bb2599.i, %bb2594.i, %bb2591.i, %bb2590.i, %bb2585.i, %bb2579.i, %bb2575.i, %bb2570.i, %bb2567.i, %bb2566.i, %bb2561.i, %bb2555.i, %bb2551.i, %bb2546.i, %bb2543.i, %bb2542.i, %bb2537.i, %bb2531.i, %bb2527.i, %bb2522.i, %bb2519.i, %bb2518.i, %bb2513.i, %bb2507.i, %bb2479.i, %bb2462.i, %bb2459.i, %bb2453.i, %bb2447.i, %bb2394.i, %bb2388.i
+ br label %bb2793.i
+bb2793.i: ; preds = %bb2600.i, %bb2458.i, %bb656.i, %bb650.i, %bb559.i
+ switch i32 0, label %bb2883.i [
+ i32 3, label %bb2874.i
+ i32 4, label %bb2795.i
+ i32 8, label %bb2810.i
+ i32 10, label %bb2834.i
+ i32 11, label %bb2819.i
+ i32 16, label %bb2810.i
+ ]
+bb2795.i: ; preds = %bb2793.i, %bb661.i
+ br label %bb2810.i
+bb2810.i: ; preds = %bb2795.i, %bb2793.i, %bb2793.i, %bb661.i, %bb661.i
+ br label %bb2883.i
+bb2819.i: ; preds = %bb2793.i, %bb661.i
+ br label %bb2834.i
+bb2834.i: ; preds = %bb2819.i, %bb2793.i, %bb661.i
+ switch i32 0, label %bb2860.i [
+ i32 4, label %bb2846.i
+ i32 8, label %bb2846.i
+ ]
+bb2846.i: ; preds = %bb2834.i, %bb2834.i
+ br i1 false, label %bb2859.i, label %bb2860.i
+bb2859.i: ; preds = %bb2846.i
+ br label %bb2860.i
+bb2860.i: ; preds = %bb2859.i, %bb2846.i, %bb2834.i
+ switch i32 %df4081.0.i, label %bb2867.bb2883_crit_edge.i [
+ i32 1, label %bb2883.i
+ i32 2, label %bb2872.i
+ ]
+bb2867.bb2883_crit_edge.i: ; preds = %bb2860.i
+ br label %bb2883.i
+bb2872.i: ; preds = %bb2860.i
+ switch i32 0, label %UnifiedReturnBlock.i235 [
+ i32 3, label %bb3253.i
+ i32 4, label %bb4173.i
+ i32 8, label %bb3485.i
+ i32 10, label %bb3253.i
+ i32 11, label %bb3021.i
+ i32 16, label %bb2885.i
+ ]
+bb2874.i: ; preds = %bb2793.i, %bb661.i
+ br label %bb2883.i
+bb2883.i: ; preds = %bb2874.i, %bb2867.bb2883_crit_edge.i, %bb2860.i, %bb2810.i, %bb2793.i, %bb661.i
+ %f_alpha.1.i = phi i32 [ 0, %bb2867.bb2883_crit_edge.i ], [ 0, %bb2874.i ], [ 1065353216, %bb661.i ], [ 0, %bb2793.i ], [ 0, %bb2810.i ], [ 0, %bb2860.i ] ; <i32> [#uses=1]
+ switch i32 0, label %UnifiedReturnBlock.i235 [
+ i32 3, label %bb3253.i
+ i32 4, label %bb4173.i
+ i32 8, label %bb3485.i
+ i32 10, label %bb3253.i
+ i32 11, label %bb3021.i
+ i32 16, label %bb2885.i
+ ]
+bb2885.i: ; preds = %bb2883.i, %bb2872.i
+ br i1 false, label %bb3011.i, label %bb2890.i
+bb2890.i: ; preds = %bb2885.i
+ br i1 false, label %bb2960.i, label %bb2954.i
+bb2954.i: ; preds = %bb2890.i
+ br i1 false, label %bb2959.i, label %bb2960.i
+bb2959.i: ; preds = %bb2954.i
+ br label %bb2960.i
+bb2960.i: ; preds = %bb2959.i, %bb2954.i, %bb2890.i
+ br i1 false, label %bb2972.i, label %bb2966.i
+bb2966.i: ; preds = %bb2960.i
+ br i1 false, label %bb2971.i, label %bb2972.i
+bb2971.i: ; preds = %bb2966.i
+ br label %bb2972.i
+bb2972.i: ; preds = %bb2971.i, %bb2966.i, %bb2960.i
+ br label %glgScalarFloatConversion.exit
+bb3011.i: ; preds = %bb2885.i
+ br label %glgScalarFloatConversion.exit
+bb3021.i: ; preds = %bb2883.i, %bb2872.i
+ switch i32 %dt4080.0.i, label %bb3192.i [
+ i32 7, label %bb3026.i
+ i32 6, label %bb3037.i
+ i32 1, label %bb3125.i
+ i32 3, label %bb3125.i
+ i32 5, label %bb3144.i
+ ]
+bb3026.i: ; preds = %bb3021.i
+ br label %bb3258.i
+bb3037.i: ; preds = %bb3021.i
+ br i1 false, label %bb3052.i, label %bb3074.i
+bb3052.i: ; preds = %bb3037.i
+ br i1 false, label %bb3105.i, label %bb3069.i
+bb3069.i: ; preds = %bb3052.i
+ switch i32 %dt4080.0.i, label %bb3424.i [
+ i32 7, label %bb3258.i
+ i32 6, label %bb3269.i
+ i32 1, label %bb3357.i
+ i32 3, label %bb3357.i
+ i32 5, label %bb3376.i
+ ]
+bb3074.i: ; preds = %bb3037.i
+ br i1 false, label %bb3079.i, label %bb3092.i
+bb3079.i: ; preds = %bb3074.i
+ switch i32 %dt4080.0.i, label %bb3424.i [
+ i32 7, label %bb3258.i
+ i32 6, label %bb3269.i
+ i32 1, label %bb3357.i
+ i32 3, label %bb3357.i
+ i32 5, label %bb3376.i
+ ]
+bb3092.i: ; preds = %bb3074.i
+ switch i32 %dt4080.0.i, label %bb3424.i [
+ i32 7, label %bb3258.i
+ i32 6, label %bb3269.i
+ i32 1, label %bb3357.i
+ i32 3, label %bb3357.i
+ i32 5, label %bb3376.i
+ ]
+bb3105.i: ; preds = %bb3052.i
+ switch i32 %dt4080.0.i, label %bb3424.i [
+ i32 7, label %bb3258.i
+ i32 6, label %bb3269.i
+ i32 1, label %bb3357.i
+ i32 3, label %bb3357.i
+ i32 5, label %bb3376.i
+ ]
+bb3125.i: ; preds = %bb3021.i, %bb3021.i
+ switch i32 %dt4080.0.i, label %bb3424.i [
+ i32 7, label %bb3258.i
+ i32 6, label %bb3269.i
+ i32 1, label %bb3357.i
+ i32 3, label %bb3357.i
+ i32 5, label %bb3376.i
+ ]
+bb3144.i: ; preds = %bb3021.i
+ br label %bb3376.i
+bb3192.i: ; preds = %bb3021.i
+ br i1 false, label %bb3197.i, label %bb3243.i
+bb3197.i: ; preds = %bb3192.i
+ br label %bb3424.i
+bb3243.i: ; preds = %bb3192.i
+ br label %bb3253.i
+bb3253.i: ; preds = %bb3243.i, %bb2883.i, %bb2883.i, %bb2872.i, %bb2872.i
+ switch i32 %dt4080.0.i, label %bb3424.i [
+ i32 7, label %bb3258.i
+ i32 6, label %bb3269.i
+ i32 1, label %bb3357.i
+ i32 3, label %bb3357.i
+ i32 5, label %bb3376.i
+ ]
+bb3258.i: ; preds = %bb3253.i, %bb3125.i, %bb3105.i, %bb3092.i, %bb3079.i, %bb3069.i, %bb3026.i
+ br label %glgScalarFloatConversion.exit
+bb3269.i: ; preds = %bb3253.i, %bb3125.i, %bb3105.i, %bb3092.i, %bb3079.i, %bb3069.i
+ br i1 false, label %bb3284.i, label %bb3306.i
+bb3284.i: ; preds = %bb3269.i
+ br i1 false, label %bb3337.i, label %bb3301.i
+bb3301.i: ; preds = %bb3284.i
+ br label %glgScalarFloatConversion.exit
+bb3306.i: ; preds = %bb3269.i
+ br i1 false, label %bb3311.i, label %bb3324.i
+bb3311.i: ; preds = %bb3306.i
+ br label %glgScalarFloatConversion.exit
+bb3324.i: ; preds = %bb3306.i
+ br label %glgScalarFloatConversion.exit
+bb3337.i: ; preds = %bb3284.i
+ br label %glgScalarFloatConversion.exit
+bb3357.i: ; preds = %bb3253.i, %bb3253.i, %bb3125.i, %bb3125.i, %bb3105.i, %bb3105.i, %bb3092.i, %bb3092.i, %bb3079.i, %bb3079.i, %bb3069.i, %bb3069.i
+ br label %glgScalarFloatConversion.exit
+bb3376.i: ; preds = %bb3253.i, %bb3144.i, %bb3125.i, %bb3105.i, %bb3092.i, %bb3079.i, %bb3069.i
+ br label %glgScalarFloatConversion.exit
+bb3424.i: ; preds = %bb3253.i, %bb3197.i, %bb3125.i, %bb3105.i, %bb3092.i, %bb3079.i, %bb3069.i
+ br i1 false, label %bb3429.i, label %bb3475.i
+bb3429.i: ; preds = %bb3424.i
+ br label %glgScalarFloatConversion.exit
+bb3475.i: ; preds = %bb3424.i
+ br label %glgScalarFloatConversion.exit
+bb3485.i: ; preds = %bb2883.i, %bb2872.i
+ switch i32 %dt4080.0.i, label %bb4077.i [
+ i32 7, label %bb3490.i
+ i32 6, label %bb3511.i
+ i32 1, label %bb3749.i
+ i32 3, label %bb3749.i
+ i32 5, label %bb3794.i
+ i32 4, label %bb3941.i
+ ]
+bb3490.i: ; preds = %bb3485.i
+ br label %glgScalarFloatConversion.exit
+bb3511.i: ; preds = %bb3485.i
+ br i1 false, label %bb3526.i, label %bb3548.i
+bb3526.i: ; preds = %bb3511.i
+ br i1 false, label %bb3579.i, label %bb3543.i
+bb3543.i: ; preds = %bb3526.i
+ br label %bb3579.i
+bb3548.i: ; preds = %bb3511.i
+ br i1 false, label %bb3553.i, label %bb3566.i
+bb3553.i: ; preds = %bb3548.i
+ br label %bb3579.i
+bb3566.i: ; preds = %bb3548.i
+ br label %bb3579.i
+bb3579.i: ; preds = %bb3566.i, %bb3553.i, %bb3543.i, %bb3526.i
+ br i1 false, label %bb3601.i, label %bb3623.i
+bb3601.i: ; preds = %bb3579.i
+ br i1 false, label %bb3654.i, label %bb3618.i
+bb3618.i: ; preds = %bb3601.i
+ br label %bb3654.i
+bb3623.i: ; preds = %bb3579.i
+ br i1 false, label %bb3628.i, label %bb3641.i
+bb3628.i: ; preds = %bb3623.i
+ br label %bb3654.i
+bb3641.i: ; preds = %bb3623.i
+ br label %bb3654.i
+bb3654.i: ; preds = %bb3641.i, %bb3628.i, %bb3618.i, %bb3601.i
+ br i1 false, label %bb3676.i, label %bb3698.i
+bb3676.i: ; preds = %bb3654.i
+ br i1 false, label %bb3729.i, label %bb3693.i
+bb3693.i: ; preds = %bb3676.i
+ br label %glgScalarFloatConversion.exit
+bb3698.i: ; preds = %bb3654.i
+ br i1 false, label %bb3703.i, label %bb3716.i
+bb3703.i: ; preds = %bb3698.i
+ br label %glgScalarFloatConversion.exit
+bb3716.i: ; preds = %bb3698.i
+ br label %glgScalarFloatConversion.exit
+bb3729.i: ; preds = %bb3676.i
+ br label %glgScalarFloatConversion.exit
+bb3749.i: ; preds = %bb3485.i, %bb3485.i
+ br label %glgScalarFloatConversion.exit
+bb3794.i: ; preds = %bb3485.i
+ br label %glgScalarFloatConversion.exit
+bb3941.i: ; preds = %bb3485.i
+ br label %glgScalarFloatConversion.exit
+bb4077.i: ; preds = %bb3485.i
+ br i1 false, label %bb4083.i, label %bb4111.i
+bb4083.i: ; preds = %bb4077.i
+ br label %glgScalarFloatConversion.exit
+bb4111.i: ; preds = %bb4077.i
+ br i1 false, label %bb4117.i, label %bb4145.i
+bb4117.i: ; preds = %bb4111.i
+ br label %glgScalarFloatConversion.exit
+bb4145.i: ; preds = %bb4111.i
+ br label %glgScalarFloatConversion.exit
+bb4173.i: ; preds = %bb2883.i, %bb2872.i
+ %f_red.0.reg2mem.4.i = phi i32 [ 0, %bb2872.i ], [ 0, %bb2883.i ] ; <i32> [#uses=2]
+ %f_green.0.reg2mem.2.i = phi i32 [ 0, %bb2872.i ], [ 0, %bb2883.i ] ; <i32> [#uses=1]
+ %f_blue.0.reg2mem.2.i = phi i32 [ 0, %bb2872.i ], [ 0, %bb2883.i ] ; <i32> [#uses=1]
+ %f_alpha.1.reg2mem.1.i = phi i32 [ 0, %bb2872.i ], [ %f_alpha.1.i, %bb2883.i ] ; <i32> [#uses=1]
+ switch i32 %dt4080.0.i, label %bb4950.i [
+ i32 7, label %bb4178.i
+ i32 6, label %bb4204.i
+ i32 1, label %bb4517.i202
+ i32 3, label %bb4517.i202
+ i32 5, label %bb4575.i
+ i32 4, label %bb4769.i
+ ]
+bb4178.i: ; preds = %bb4173.i
+ br label %glgScalarFloatConversion.exit
+bb4204.i: ; preds = %bb4173.i
+ %tmp4210.i = and i32 0, 32768 ; <i32> [#uses=4]
+ %tmp4212.i = and i32 %f_red.0.reg2mem.4.i, 2139095040 ; <i32> [#uses=1]
+ %tmp4214.i = and i32 %f_red.0.reg2mem.4.i, 8388607 ; <i32> [#uses=1]
+ br i1 false, label %bb4219.i, label %bb4241.i
+bb4219.i: ; preds = %bb4204.i
+ br i1 false, label %bb4272.i, label %bb4236.i
+bb4236.i: ; preds = %bb4219.i
+ br label %bb4272.i
+bb4241.i: ; preds = %bb4204.i
+ br i1 false, label %bb4246.i, label %bb4259.i
+bb4246.i: ; preds = %bb4241.i
+ %tmp4253.i = lshr i32 %tmp4214.i, 0 ; <i32> [#uses=1]
+ %tmp4253.masked.i = and i32 %tmp4253.i, 65535 ; <i32> [#uses=1]
+ br label %bb4272.i
+bb4259.i: ; preds = %bb4241.i
+ %tmp4261.i187 = add i32 %tmp4212.i, 134217728 ; <i32> [#uses=1]
+ %tmp4262.i188 = lshr i32 %tmp4261.i187, 13 ; <i32> [#uses=1]
+ %tmp4262.masked.i = and i32 %tmp4262.i188, 64512 ; <i32> [#uses=1]
+ %tmp42665693.masked.i = or i32 %tmp4262.masked.i, %tmp4210.i ; <i32> [#uses=1]
+ br label %bb4272.i
+bb4272.i: ; preds = %bb4259.i, %bb4246.i, %bb4236.i, %bb4219.i
+ %tmp42665693.masked.pn.i = phi i32 [ %tmp42665693.masked.i, %bb4259.i ], [ %tmp4253.masked.i, %bb4246.i ], [ %tmp4210.i, %bb4236.i ], [ %tmp4210.i, %bb4219.i ] ; <i32> [#uses=1]
+ %tmp4268.pn.i = phi i32 [ 0, %bb4259.i ], [ %tmp4210.i, %bb4246.i ], [ 31744, %bb4236.i ], [ 32767, %bb4219.i ] ; <i32> [#uses=1]
+ %tmp100.0.i = or i32 %tmp4268.pn.i, %tmp42665693.masked.pn.i ; <i32> [#uses=0]
+ %tmp4289.i = and i32 %f_green.0.reg2mem.2.i, 8388607 ; <i32> [#uses=1]
+ br i1 false, label %bb4294.i, label %bb4316.i
+bb4294.i: ; preds = %bb4272.i
+ br i1 false, label %bb4347.i, label %bb4311.i
+bb4311.i: ; preds = %bb4294.i
+ br label %bb4347.i
+bb4316.i: ; preds = %bb4272.i
+ br i1 false, label %bb4321.i, label %bb4334.i
+bb4321.i: ; preds = %bb4316.i
+ br label %bb4347.i
+bb4334.i: ; preds = %bb4316.i
+ %tmp4343.i = lshr i32 %tmp4289.i, 13 ; <i32> [#uses=0]
+ br label %bb4347.i
+bb4347.i: ; preds = %bb4334.i, %bb4321.i, %bb4311.i, %bb4294.i
+ %tmp4364.i190 = and i32 %f_blue.0.reg2mem.2.i, 8388607 ; <i32> [#uses=1]
+ br i1 false, label %bb4369.i192, label %bb4391.i
+bb4369.i192: ; preds = %bb4347.i
+ br i1 false, label %bb4422.i, label %bb4386.i
+bb4386.i: ; preds = %bb4369.i192
+ br label %bb4422.i
+bb4391.i: ; preds = %bb4347.i
+ br i1 false, label %bb4396.i, label %bb4409.i
+bb4396.i: ; preds = %bb4391.i
+ br label %bb4422.i
+bb4409.i: ; preds = %bb4391.i
+ %tmp4418.i = lshr i32 %tmp4364.i190, 13 ; <i32> [#uses=0]
+ br label %bb4422.i
+bb4422.i: ; preds = %bb4409.i, %bb4396.i, %bb4386.i, %bb4369.i192
+ %tmp4439.i194 = and i32 %f_alpha.1.reg2mem.1.i, 8388607 ; <i32> [#uses=1]
+ br i1 false, label %bb4444.i, label %bb4466.i
+bb4444.i: ; preds = %bb4422.i
+ br i1 false, label %bb4497.i, label %bb4461.i
+bb4461.i: ; preds = %bb4444.i
+ br label %glgScalarFloatConversion.exit
+bb4466.i: ; preds = %bb4422.i
+ br i1 false, label %bb4471.i, label %bb4484.i
+bb4471.i: ; preds = %bb4466.i
+ br label %glgScalarFloatConversion.exit
+bb4484.i: ; preds = %bb4466.i
+ %tmp4493.i = lshr i32 %tmp4439.i194, 13 ; <i32> [#uses=0]
+ br label %glgScalarFloatConversion.exit
+bb4497.i: ; preds = %bb4444.i
+ br label %glgScalarFloatConversion.exit
+bb4517.i202: ; preds = %bb4173.i, %bb4173.i
+ br label %glgScalarFloatConversion.exit
+bb4575.i: ; preds = %bb4173.i
+ br label %glgScalarFloatConversion.exit
+bb4769.i: ; preds = %bb4173.i
+ br label %glgScalarFloatConversion.exit
+bb4950.i: ; preds = %bb4173.i
+ br i1 false, label %bb4956.i, label %bb4993.i
+bb4956.i: ; preds = %bb4950.i
+ br label %glgScalarFloatConversion.exit
+bb4993.i: ; preds = %bb4950.i
+ br i1 false, label %bb4999.i, label %bb5036.i
+bb4999.i: ; preds = %bb4993.i
+ br label %glgScalarFloatConversion.exit
+bb5036.i: ; preds = %bb4993.i
+ br label %glgScalarFloatConversion.exit
+UnifiedReturnBlock.i235: ; preds = %bb2883.i, %bb2872.i
+ br label %glgScalarFloatConversion.exit
+glgScalarFloatConversion.exit: ; preds = %UnifiedReturnBlock.i235, %bb5036.i, %bb4999.i, %bb4956.i, %bb4769.i, %bb4575.i, %bb4517.i202, %bb4497.i, %bb4484.i, %bb4471.i, %bb4461.i, %bb4178.i, %bb4145.i, %bb4117.i, %bb4083.i, %bb3941.i, %bb3794.i, %bb3749.i, %bb3729.i, %bb3716.i, %bb3703.i, %bb3693.i, %bb3490.i, %bb3475.i, %bb3429.i, %bb3376.i, %bb3357.i, %bb3337.i, %bb3324.i, %bb3311.i, %bb3301.i, %bb3258.i, %bb3011.i, %bb2972.i
+ br label %bb18851.i
+bb16697.i: ; preds = %loadColor_BGRA_UI8888R.exit
+ br i1 false, label %bb17749.i, label %bb16700.i
+bb16700.i: ; preds = %bb16697.i
+ switch i32 0, label %bb16829.i [
+ i32 4, label %bb16705.i
+ i32 8, label %bb16743.i
+ i32 11, label %bb16795.i
+ ]
+bb16705.i: ; preds = %bb16700.i
+ switch i32 %df4081.0.i, label %bb17183.i [
+ i32 1, label %bb16710.i
+ i32 2, label %bb16721.i
+ i32 3, label %bb16732.i
+ ]
+bb16710.i: ; preds = %bb16705.i
+ br label %bb17195.i
+bb16721.i: ; preds = %bb16705.i
+ br label %bb17195.i
+bb16732.i: ; preds = %bb16705.i
+ br label %bb17195.i
+bb16743.i: ; preds = %bb16700.i
+ switch i32 0, label %bb16759.i [
+ i32 4, label %bb16755.i
+ i32 11, label %bb16755.i
+ ]
+bb16755.i: ; preds = %bb16743.i, %bb16743.i
+ br label %bb17195.i
+bb16759.i: ; preds = %bb16743.i
+ switch i32 %df4081.0.i, label %bb17183.i [
+ i32 1, label %bb16764.i
+ i32 2, label %bb16775.i
+ i32 3, label %bb16786.i
+ ]
+bb16764.i: ; preds = %bb16759.i
+ br label %bb17195.i
+bb16775.i: ; preds = %bb16759.i
+ br label %bb17195.i
+bb16786.i: ; preds = %bb16759.i
+ br label %bb17195.i
+bb16795.i: ; preds = %bb16700.i
+ switch i32 0, label %bb17183.i [
+ i32 4, label %bb16807.i
+ i32 8, label %bb16807.i
+ i32 3, label %bb16823.i
+ ]
+bb16807.i: ; preds = %bb16795.i, %bb16795.i
+ br label %bb17195.i
+bb16823.i: ; preds = %bb16795.i
+ br label %bb17195.i
+bb16829.i: ; preds = %bb16700.i
+ switch i32 %sf4083.0.i, label %bb17183.i [
+ i32 10, label %bb16834.i
+ i32 0, label %bb16892.i
+ i32 1, label %bb16953.i
+ i32 2, label %bb17037.i
+ i32 3, label %bb17121.i
+ ]
+bb16834.i: ; preds = %bb16829.i
+ switch i32 0, label %bb16878.i [
+ i32 4, label %bb16839.i
+ i32 8, label %bb16858.i
+ i32 11, label %bb16874.i
+ ]
+bb16839.i: ; preds = %bb16834.i
+ br label %bb17195.i
+bb16858.i: ; preds = %bb16834.i
+ br label %bb17195.i
+bb16874.i: ; preds = %bb16834.i
+ br label %bb17195.i
+bb16878.i: ; preds = %bb16834.i
+ br i1 false, label %bb16883.i, label %bb17183.i
+bb16883.i: ; preds = %bb16878.i
+ br label %bb17195.i
+bb16892.i: ; preds = %bb16829.i
+ switch i32 0, label %bb16930.i [
+ i32 4, label %bb16897.i
+ i32 8, label %bb16913.i
+ i32 11, label %bb16926.i
+ ]
+bb16897.i: ; preds = %bb16892.i
+ br label %bb17195.i
+bb16913.i: ; preds = %bb16892.i
+ br label %bb17195.i
+bb16926.i: ; preds = %bb16892.i
+ br label %bb17195.i
+bb16930.i: ; preds = %bb16892.i
+ br i1 false, label %bb16936.i, label %bb16939.i
+bb16936.i: ; preds = %bb16930.i
+ br label %bb17195.i
+bb16939.i: ; preds = %bb16930.i
+ br i1 false, label %bb16944.i, label %bb17183.i
+bb16944.i: ; preds = %bb16939.i
+ br label %bb17195.i
+bb16953.i: ; preds = %bb16829.i
+ switch i32 0, label %bb17003.i [
+ i32 4, label %bb16958.i
+ i32 8, label %bb16979.i
+ i32 11, label %bb16997.i
+ ]
+bb16958.i: ; preds = %bb16953.i
+ br label %bb17195.i
+bb16979.i: ; preds = %bb16953.i
+ br label %bb17195.i
+bb16997.i: ; preds = %bb16953.i
+ br label %bb17195.i
+bb17003.i: ; preds = %bb16953.i
+ switch i32 %df4081.0.i, label %bb17183.i [
+ i32 0, label %bb17020.i
+ i32 2, label %bb17020.i
+ i32 10, label %bb17020.i
+ i32 3, label %bb17028.i
+ ]
+bb17020.i: ; preds = %bb17003.i, %bb17003.i, %bb17003.i
+ br label %bb17195.i
+bb17028.i: ; preds = %bb17003.i
+ br label %bb17195.i
+bb17037.i: ; preds = %bb16829.i
+ switch i32 0, label %bb17087.i [
+ i32 4, label %bb17042.i
+ i32 8, label %bb17063.i
+ i32 11, label %bb17081.i
+ ]
+bb17042.i: ; preds = %bb17037.i
+ br label %bb17195.i
+bb17063.i: ; preds = %bb17037.i
+ br label %bb17195.i
+bb17081.i: ; preds = %bb17037.i
+ br label %bb17195.i
+bb17087.i: ; preds = %bb17037.i
+ switch i32 %df4081.0.i, label %bb17183.i [
+ i32 0, label %bb17104.i
+ i32 1, label %bb17104.i
+ i32 10, label %bb17104.i
+ i32 3, label %bb17112.i
+ ]
+bb17104.i: ; preds = %bb17087.i, %bb17087.i, %bb17087.i
+ br label %bb17195.i
+bb17112.i: ; preds = %bb17087.i
+ br label %bb17195.i
+bb17121.i: ; preds = %bb16829.i
+ switch i32 0, label %bb17183.i [
+ i32 4, label %bb17126.i
+ i32 8, label %bb17149.i
+ i32 11, label %bb17167.i
+ i32 10, label %bb17180.i
+ ]
+bb17126.i: ; preds = %bb17121.i
+ br label %bb17195.i
+bb17149.i: ; preds = %bb17121.i
+ br label %bb17195.i
+bb17167.i: ; preds = %bb17121.i
+ br label %bb17195.i
+bb17180.i: ; preds = %bb17121.i
+ br label %bb17183.i
+bb17183.i: ; preds = %bb17180.i, %bb17121.i, %bb17087.i, %bb17003.i, %bb16939.i, %bb16878.i, %bb16829.i, %bb16795.i, %bb16759.i, %bb16705.i
+ br label %bb17195.i
+bb17195.i: ; preds = %bb17183.i, %bb17167.i, %bb17149.i, %bb17126.i, %bb17112.i, %bb17104.i, %bb17081.i, %bb17063.i, %bb17042.i, %bb17028.i, %bb17020.i, %bb16997.i, %bb16979.i, %bb16958.i, %bb16944.i, %bb16936.i, %bb16926.i, %bb16913.i, %bb16897.i, %bb16883.i, %bb16874.i, %bb16858.i, %bb16839.i, %bb16823.i, %bb16807.i, %bb16786.i, %bb16775.i, %bb16764.i, %bb16755.i, %bb16732.i, %bb16721.i, %bb16710.i
+ br i1 false, label %bb18845.i, label %bb17225.i
+bb17225.i: ; preds = %bb17195.i
+ switch i32 %dt4080.0.i, label %bb17677.i [
+ i32 4, label %bb17227.i
+ i32 8, label %bb17259.i
+ i32 9, label %bb17309.i
+ i32 10, label %bb17359.i
+ i32 11, label %bb17359.i
+ i32 14, label %bb17409.i
+ i32 15, label %bb17474.i
+ i32 18, label %bb17539.i
+ i32 19, label %bb17604.i
+ i32 0, label %bb17680.i
+ i32 1, label %bb17672.i
+ i32 2, label %bb17673.i
+ i32 3, label %bb17674.i
+ i32 5, label %bb17675.i
+ i32 12, label %bb17676.i
+ i32 13, label %bb17676.i
+ i32 16, label %bb17680.i
+ i32 17, label %bb17680.i
+ ]
+bb17227.i: ; preds = %bb17225.i
+ br i1 false, label %bb18845.i, label %bb17230.i
+bb17230.i: ; preds = %bb17227.i
+ br label %bb18851.i
+bb17259.i: ; preds = %bb17225.i
+ br i1 false, label %bb17284.i, label %bb17262.i
+bb17262.i: ; preds = %bb17259.i
+ br label %bb17284.i
+bb17284.i: ; preds = %bb17262.i, %bb17259.i
+ br label %bb18851.i
+bb17309.i: ; preds = %bb17225.i
+ br i1 false, label %bb17334.i, label %bb17312.i
+bb17312.i: ; preds = %bb17309.i
+ br label %bb17334.i
+bb17334.i: ; preds = %bb17312.i, %bb17309.i
+ br label %bb18851.i
+bb17359.i: ; preds = %bb17225.i, %bb17225.i
+ br i1 false, label %bb17384.i, label %bb17362.i
+bb17362.i: ; preds = %bb17359.i
+ br label %bb17384.i
+bb17384.i: ; preds = %bb17362.i, %bb17359.i
+ br label %bb18851.i
+bb17409.i: ; preds = %bb17225.i
+ br i1 false, label %bb17441.i, label %bb17412.i
+bb17412.i: ; preds = %bb17409.i
+ br label %bb17441.i
+bb17441.i: ; preds = %bb17412.i, %bb17409.i
+ br label %bb18851.i
+bb17474.i: ; preds = %bb17225.i
+ br i1 false, label %bb17506.i, label %bb17477.i
+bb17477.i: ; preds = %bb17474.i
+ br label %bb17506.i
+bb17506.i: ; preds = %bb17477.i, %bb17474.i
+ br label %bb18851.i
+bb17539.i: ; preds = %bb17225.i
+ br i1 false, label %bb17571.i, label %bb17542.i
+bb17542.i: ; preds = %bb17539.i
+ br label %bb17571.i
+bb17571.i: ; preds = %bb17542.i, %bb17539.i
+ br label %bb18851.i
+bb17604.i: ; preds = %bb17225.i
+ br i1 false, label %bb17636.i, label %bb17607.i
+bb17607.i: ; preds = %bb17604.i
+ br label %bb17636.i
+bb17636.i: ; preds = %bb17607.i, %bb17604.i
+ br label %bb18851.i
+bb17672.i: ; preds = %bb17225.i
+ br i1 false, label %bb17716.i, label %bb17683.i
+bb17673.i: ; preds = %bb17225.i
+ br i1 false, label %bb17716.i, label %bb17683.i
+bb17674.i: ; preds = %bb17225.i
+ br i1 false, label %bb17716.i, label %bb17683.i
+bb17675.i: ; preds = %bb17225.i
+ br i1 false, label %bb17716.i, label %bb17683.i
+bb17676.i: ; preds = %bb17225.i, %bb17225.i
+ br i1 false, label %bb17716.i, label %bb17683.i
+bb17677.i: ; preds = %bb17225.i
+ unreachable
+bb17680.i: ; preds = %bb17225.i, %bb17225.i, %bb17225.i
+ br i1 false, label %bb17716.i, label %bb17683.i
+bb17683.i: ; preds = %bb17680.i, %bb17676.i, %bb17675.i, %bb17674.i, %bb17673.i, %bb17672.i
+ br label %bb17716.i
+bb17716.i: ; preds = %bb17683.i, %bb17680.i, %bb17676.i, %bb17675.i, %bb17674.i, %bb17673.i, %bb17672.i
+ br label %bb18851.i
+bb17749.i: ; preds = %bb16697.i
+ br i1 false, label %bb17757.i, label %bb17903.i
+bb17757.i: ; preds = %bb17749.i
+ switch i32 0, label %bb17903.i [
+ i32 0, label %bb17759.i
+ i32 1, label %bb17853.i
+ i32 2, label %bb17853.i
+ ]
+bb17759.i: ; preds = %bb17757.i
+ br i1 false, label %bb17764.i, label %bb17772.i
+bb17764.i: ; preds = %bb17759.i
+ br label %bb18032.i
+bb17772.i: ; preds = %bb17759.i
+ switch i32 %sf4083.0.i, label %bb17798.i [
+ i32 1, label %bb17777.i
+ i32 2, label %bb17790.i
+ ]
+bb17777.i: ; preds = %bb17772.i
+ switch i32 0, label %bb18032.i [
+ i32 4, label %bb17818.i
+ i32 8, label %bb17818.i
+ i32 11, label %bb17845.i
+ ]
+bb17790.i: ; preds = %bb17772.i
+ switch i32 0, label %bb18032.i [
+ i32 4, label %bb17818.i
+ i32 8, label %bb17818.i
+ i32 11, label %bb17845.i
+ ]
+bb17798.i: ; preds = %bb17772.i
+ switch i32 0, label %bb18032.i [
+ i32 4, label %bb17818.i
+ i32 8, label %bb17818.i
+ i32 11, label %bb17845.i
+ ]
+bb17818.i: ; preds = %bb17798.i, %bb17798.i, %bb17790.i, %bb17790.i, %bb17777.i, %bb17777.i
+ switch i32 0, label %bb18032.i [
+ i32 4, label %bb17845.i
+ i32 11, label %bb17845.i
+ i32 8, label %bb17946.i
+ ]
+bb17845.i: ; preds = %bb17818.i, %bb17818.i, %bb17798.i, %bb17790.i, %bb17777.i
+ switch i32 0, label %bb18032.i [
+ i32 4, label %bb17908.i
+ i32 8, label %bb17946.i
+ i32 11, label %bb17998.i
+ ]
+bb17853.i: ; preds = %bb17757.i, %bb17757.i
+ br i1 false, label %bb17890.i, label %bb17903.i
+bb17890.i: ; preds = %bb17853.i
+ br label %bb17903.i
+bb17903.i: ; preds = %bb17890.i, %bb17853.i, %bb17757.i, %bb17749.i
+ switch i32 0, label %bb18032.i [
+ i32 4, label %bb17908.i
+ i32 8, label %bb17946.i
+ i32 11, label %bb17998.i
+ ]
+bb17908.i: ; preds = %bb17903.i, %bb17845.i
+ switch i32 %df4081.0.i, label %bb18386.i [
+ i32 1, label %bb17913.i
+ i32 2, label %bb17924.i
+ i32 3, label %bb17935.i
+ ]
+bb17913.i: ; preds = %bb17908.i
+ br label %bb18398.i
+bb17924.i: ; preds = %bb17908.i
+ br label %bb18398.i
+bb17935.i: ; preds = %bb17908.i
+ br label %bb18398.i
+bb17946.i: ; preds = %bb17903.i, %bb17845.i, %bb17818.i
+ switch i32 0, label %bb17962.i [
+ i32 4, label %bb17958.i
+ i32 11, label %bb17958.i
+ ]
+bb17958.i: ; preds = %bb17946.i, %bb17946.i
+ br label %bb18398.i
+bb17962.i: ; preds = %bb17946.i
+ switch i32 %df4081.0.i, label %bb18386.i [
+ i32 1, label %bb17967.i
+ i32 2, label %bb17978.i
+ i32 3, label %bb17989.i
+ ]
+bb17967.i: ; preds = %bb17962.i
+ br label %bb18398.i
+bb17978.i: ; preds = %bb17962.i
+ br label %bb18398.i
+bb17989.i: ; preds = %bb17962.i
+ br label %bb18398.i
+bb17998.i: ; preds = %bb17903.i, %bb17845.i
+ switch i32 0, label %bb18386.i [
+ i32 4, label %bb18010.i
+ i32 8, label %bb18010.i
+ i32 3, label %bb18026.i
+ ]
+bb18010.i: ; preds = %bb17998.i, %bb17998.i
+ br label %bb18398.i
+bb18026.i: ; preds = %bb17998.i
+ br label %bb18398.i
+bb18032.i: ; preds = %bb17903.i, %bb17845.i, %bb17818.i, %bb17798.i, %bb17790.i, %bb17777.i, %bb17764.i
+ switch i32 %sf4083.0.i, label %bb18386.i [
+ i32 10, label %bb18037.i
+ i32 0, label %bb18095.i
+ i32 1, label %bb18156.i
+ i32 2, label %bb18240.i
+ i32 3, label %bb18324.i
+ ]
+bb18037.i: ; preds = %bb18032.i
+ switch i32 0, label %bb18081.i [
+ i32 4, label %bb18042.i
+ i32 8, label %bb18061.i
+ i32 11, label %bb18077.i
+ ]
+bb18042.i: ; preds = %bb18037.i
+ br label %bb18398.i
+bb18061.i: ; preds = %bb18037.i
+ br label %bb18398.i
+bb18077.i: ; preds = %bb18037.i
+ br label %bb18398.i
+bb18081.i: ; preds = %bb18037.i
+ br i1 false, label %bb18086.i, label %bb18386.i
+bb18086.i: ; preds = %bb18081.i
+ br label %bb18398.i
+bb18095.i: ; preds = %bb18032.i
+ switch i32 0, label %bb18133.i [
+ i32 4, label %bb18100.i
+ i32 8, label %bb18116.i
+ i32 11, label %bb18129.i
+ ]
+bb18100.i: ; preds = %bb18095.i
+ br label %bb18398.i
+bb18116.i: ; preds = %bb18095.i
+ br label %bb18398.i
+bb18129.i: ; preds = %bb18095.i
+ br label %bb18398.i
+bb18133.i: ; preds = %bb18095.i
+ br i1 false, label %bb18139.i, label %bb18142.i
+bb18139.i: ; preds = %bb18133.i
+ br label %bb18398.i
+bb18142.i: ; preds = %bb18133.i
+ br i1 false, label %bb18147.i, label %bb18386.i
+bb18147.i: ; preds = %bb18142.i
+ br label %bb18398.i
+bb18156.i: ; preds = %bb18032.i
+ switch i32 0, label %bb18206.i [
+ i32 4, label %bb18161.i
+ i32 8, label %bb18182.i
+ i32 11, label %bb18200.i
+ ]
+bb18161.i: ; preds = %bb18156.i
+ br label %bb18398.i
+bb18182.i: ; preds = %bb18156.i
+ br label %bb18398.i
+bb18200.i: ; preds = %bb18156.i
+ br label %bb18398.i
+bb18206.i: ; preds = %bb18156.i
+ switch i32 %df4081.0.i, label %bb18386.i [
+ i32 0, label %bb18223.i
+ i32 2, label %bb18223.i
+ i32 10, label %bb18223.i
+ i32 3, label %bb18231.i
+ ]
+bb18223.i: ; preds = %bb18206.i, %bb18206.i, %bb18206.i
+ br label %bb18398.i
+bb18231.i: ; preds = %bb18206.i
+ br label %bb18398.i
+bb18240.i: ; preds = %bb18032.i
+ switch i32 0, label %bb18290.i [
+ i32 4, label %bb18245.i
+ i32 8, label %bb18266.i
+ i32 11, label %bb18284.i
+ ]
+bb18245.i: ; preds = %bb18240.i
+ br label %bb18398.i
+bb18266.i: ; preds = %bb18240.i
+ br label %bb18398.i
+bb18284.i: ; preds = %bb18240.i
+ br label %bb18398.i
+bb18290.i: ; preds = %bb18240.i
+ switch i32 %df4081.0.i, label %bb18386.i [
+ i32 0, label %bb18307.i
+ i32 1, label %bb18307.i
+ i32 10, label %bb18307.i
+ i32 3, label %bb18315.i
+ ]
+bb18307.i: ; preds = %bb18290.i, %bb18290.i, %bb18290.i
+ br label %bb18398.i
+bb18315.i: ; preds = %bb18290.i
+ br label %bb18398.i
+bb18324.i: ; preds = %bb18032.i
+ switch i32 0, label %bb18386.i [
+ i32 4, label %bb18329.i
+ i32 8, label %bb18352.i
+ i32 11, label %bb18370.i
+ i32 10, label %bb18383.i
+ ]
+bb18329.i: ; preds = %bb18324.i
+ br label %bb18398.i
+bb18352.i: ; preds = %bb18324.i
+ br label %bb18398.i
+bb18370.i: ; preds = %bb18324.i
+ br label %bb18398.i
+bb18383.i: ; preds = %bb18324.i
+ br label %bb18386.i
+bb18386.i: ; preds = %bb18383.i, %bb18324.i, %bb18290.i, %bb18206.i, %bb18142.i, %bb18081.i, %bb18032.i, %bb17998.i, %bb17962.i, %bb17908.i
+ br label %bb18398.i
+bb18398.i: ; preds = %bb18386.i, %bb18370.i, %bb18352.i, %bb18329.i, %bb18315.i, %bb18307.i, %bb18284.i, %bb18266.i, %bb18245.i, %bb18231.i, %bb18223.i, %bb18200.i, %bb18182.i, %bb18161.i, %bb18147.i, %bb18139.i, %bb18129.i, %bb18116.i, %bb18100.i, %bb18086.i, %bb18077.i, %bb18061.i, %bb18042.i, %bb18026.i, %bb18010.i, %bb17989.i, %bb17978.i, %bb17967.i, %bb17958.i, %bb17935.i, %bb17924.i, %bb17913.i
+ br i1 false, label %bb18589.i, label %bb18431.i
+bb18431.i: ; preds = %bb18398.i
+ switch i32 0, label %bb18589.i [
+ i32 0, label %bb18433.i
+ i32 1, label %bb18487.i
+ i32 2, label %bb18487.i
+ ]
+bb18433.i: ; preds = %bb18431.i
+ switch i32 0, label %bb18589.i [
+ i32 4, label %bb18452.i
+ i32 8, label %bb18452.i
+ i32 11, label %bb18479.i
+ ]
+bb18452.i: ; preds = %bb18433.i, %bb18433.i
+ switch i32 0, label %bb18589.i [
+ i32 4, label %bb18479.i
+ i32 11, label %bb18479.i
+ ]
+bb18479.i: ; preds = %bb18452.i, %bb18452.i, %bb18433.i
+ br i1 false, label %bb18845.i, label %bb18592.i
+bb18487.i: ; preds = %bb18431.i, %bb18431.i
+ br i1 false, label %bb18492.i, label %bb18521.i
+bb18492.i: ; preds = %bb18487.i
+ br i1 false, label %bb18508.i, label %bb18529.i
+bb18508.i: ; preds = %bb18492.i
+ switch i32 0, label %bb18589.i [
+ i32 4, label %bb18541.i
+ i32 8, label %bb18541.i
+ ]
+bb18521.i: ; preds = %bb18487.i
+ br label %bb18529.i
+bb18529.i: ; preds = %bb18521.i, %bb18492.i
+ switch i32 0, label %bb18589.i [
+ i32 4, label %bb18541.i
+ i32 8, label %bb18541.i
+ ]
+bb18541.i: ; preds = %bb18529.i, %bb18529.i, %bb18508.i, %bb18508.i
+ br i1 false, label %bb18560.i, label %bb18589.i
+bb18560.i: ; preds = %bb18541.i
+ br i1 false, label %bb18576.i, label %bb18589.i
+bb18576.i: ; preds = %bb18560.i
+ br label %bb18589.i
+bb18589.i: ; preds = %bb18576.i, %bb18560.i, %bb18541.i, %bb18529.i, %bb18508.i, %bb18452.i, %bb18433.i, %bb18431.i, %bb18398.i
+ br i1 false, label %bb18845.i, label %bb18592.i
+bb18592.i: ; preds = %bb18589.i, %bb18479.i
+ switch i32 %dt4080.0.i, label %bb18809.i [
+ i32 4, label %bb18845.i
+ i32 8, label %bb18594.i
+ i32 9, label %bb18619.i
+ i32 10, label %bb18644.i
+ i32 11, label %bb18644.i
+ i32 14, label %bb18669.i
+ i32 15, label %bb18702.i
+ i32 18, label %bb18735.i
+ i32 19, label %bb18768.i
+ i32 0, label %bb18812.i
+ i32 1, label %bb18804.i
+ i32 2, label %bb18805.i
+ i32 3, label %bb18806.i
+ i32 5, label %bb18807.i
+ i32 12, label %bb18808.i
+ i32 13, label %bb18808.i
+ i32 16, label %bb18812.i
+ i32 17, label %bb18812.i
+ ]
+bb18594.i: ; preds = %bb18592.i
+ br label %bb18851.i
+bb18619.i: ; preds = %bb18592.i
+ br label %bb18851.i
+bb18644.i: ; preds = %bb18592.i, %bb18592.i
+ br label %bb18851.i
+bb18669.i: ; preds = %bb18592.i
+ br label %bb18851.i
+bb18702.i: ; preds = %bb18592.i
+ br label %bb18851.i
+bb18735.i: ; preds = %bb18592.i
+ br label %bb18851.i
+bb18768.i: ; preds = %bb18592.i
+ br label %bb18851.i
+bb18804.i: ; preds = %bb18592.i
+ br label %bb18812.i
+bb18805.i: ; preds = %bb18592.i
+ br label %bb18812.i
+bb18806.i: ; preds = %bb18592.i
+ br label %bb18812.i
+bb18807.i: ; preds = %bb18592.i
+ br label %bb18812.i
+bb18808.i: ; preds = %bb18592.i, %bb18592.i
+ br label %bb18812.i
+bb18809.i: ; preds = %bb18592.i
+ unreachable
+bb18812.i: ; preds = %bb18808.i, %bb18807.i, %bb18806.i, %bb18805.i, %bb18804.i, %bb18592.i, %bb18592.i, %bb18592.i
+ br label %bb18845.i
+bb18845.i: ; preds = %bb18812.i, %bb18592.i, %bb18589.i, %bb18479.i, %bb17227.i, %bb17195.i
+ br label %bb18851.i
+bb18851.i: ; preds = %bb18845.i, %bb18768.i, %bb18735.i, %bb18702.i, %bb18669.i, %bb18644.i, %bb18619.i, %bb18594.i, %bb17716.i, %bb17636.i, %bb17571.i, %bb17506.i, %bb17441.i, %bb17384.i, %bb17334.i, %bb17284.i, %bb17230.i, %glgScalarFloatConversion.exit
+ br label %storeColor_RGB_UI.exit
+storeColor_RGB_UI.exit: ; preds = %bb18851.i
+ br i1 false, label %bb19786.i, label %bb16650.i
+bb19786.i: ; preds = %storeColor_RGB_UI.exit
+ br label %bb19808.i
+bb19808.i: ; preds = %bb19786.i
+ br i1 false, label %bb19818.i, label %bb5276.i
+bb19818.i: ; preds = %bb19808.i
+ br i1 false, label %bb19840.i, label %bb19821.i
+bb19821.i: ; preds = %bb19818.i
+ br label %bb19840.i
+bb19840.i: ; preds = %bb19821.i, %bb19818.i
+ br i1 false, label %UnifiedReturnBlock.i, label %bb19843.i
+bb19843.i: ; preds = %bb19840.i
+ br label %t.exit
+UnifiedReturnBlock.i: ; preds = %bb19840.i, %bb4501.i
+ br label %t.exit
+t.exit: ; preds = %UnifiedReturnBlock.i, %bb19843.i, %bb4517.i, %bb4354.i
+ ret void
+}
diff --git a/test/CodeGen/ARM/2008-05-19-LiveIntervalsBug.ll b/test/CodeGen/ARM/2008-05-19-LiveIntervalsBug.ll
new file mode 100644
index 0000000..71aa603
--- /dev/null
+++ b/test/CodeGen/ARM/2008-05-19-LiveIntervalsBug.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+ %struct.BiContextType = type { i16, i8, i32 }
+ %struct.Bitstream = type { i32, i32, i8, i32, i32, i8, i8, i32, i32, i8*, i32 }
+ %struct.DataPartition = type { %struct.Bitstream*, %struct.EncodingEnvironment, %struct.EncodingEnvironment }
+ %struct.DecRefPicMarking_t = type { i32, i32, i32, i32, i32, %struct.DecRefPicMarking_t* }
+ %struct.EncodingEnvironment = type { i32, i32, i32, i32, i32, i8*, i32*, i32, i32 }
+ %struct.ImageParameters = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8**, i8**, i32, i32***, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [9 x [16 x [16 x i16]]], [5 x [16 x [16 x i16]]], [9 x [8 x [8 x i16]]], [2 x [4 x [16 x [16 x i16]]]], [16 x [16 x i16]], [16 x [16 x i32]], i32****, i32***, i32***, i32***, i32****, i32****, %struct.Picture*, %struct.Slice*, %struct.Macroblock*, i32*, i32*, i32, i32, i32, i32, [4 x [4 x i32]], i32, i32, i32, i32, i32, double, i32, i32, i32, i32, i16******, i16******, i16******, i16******, [15 x i16], i32, i32, i32, i32, i32, i32, i32, i32, [6 x [32 x i32]], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [1 x i32], i32, i32, [2 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.DecRefPicMarking_t*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, double**, double***, i32***, double**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [3 x [2 x i32]], [2 x i32], i32, i32, i16, i32, i32, i32, i32, i32 }
+ %struct.Macroblock = type { i32, i32, i32, [2 x i32], i32, [8 x i32], %struct.Macroblock*, %struct.Macroblock*, i32, [2 x [4 x [4 x [2 x i32]]]], [16 x i8], [16 x i8], i32, i64, [4 x i32], [4 x i32], i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, double, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+ %struct.MotionInfoContexts = type { [3 x [11 x %struct.BiContextType]], [2 x [9 x %struct.BiContextType]], [2 x [10 x %struct.BiContextType]], [2 x [6 x %struct.BiContextType]], [4 x %struct.BiContextType], [4 x %struct.BiContextType], [3 x %struct.BiContextType] }
+ %struct.Picture = type { i32, i32, [100 x %struct.Slice*], i32, float, float, float }
+ %struct.Slice = type { i32, i32, i32, i32, i32, i32, %struct.DataPartition*, %struct.MotionInfoContexts*, %struct.TextureInfoContexts*, i32, i32*, i32*, i32*, i32, i32*, i32*, i32*, i32 (i32)*, [3 x [2 x i32]] }
+ %struct.TextureInfoContexts = type { [2 x %struct.BiContextType], [4 x %struct.BiContextType], [3 x [4 x %struct.BiContextType]], [10 x [4 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]], [10 x [5 x %struct.BiContextType]], [10 x [5 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]] }
+@images = external global %struct.ImageParameters ; <%struct.ImageParameters*> [#uses=2]
+
+declare i8* @calloc(i32, i32)
+
+define fastcc void @init_global_buffers() nounwind {
+entry:
+ %tmp50.i.i = mul i32 0, 0 ; <i32> [#uses=2]
+ br i1 false, label %init_orig_buffers.exit, label %cond_true.i29
+
+cond_true.i29: ; preds = %entry
+ %tmp17.i = load i32* getelementptr (%struct.ImageParameters* @images, i32 0, i32 20), align 8 ; <i32> [#uses=1]
+ %tmp20.i27 = load i32* getelementptr (%struct.ImageParameters* @images, i32 0, i32 16), align 8 ; <i32> [#uses=1]
+ %tmp8.i.i = select i1 false, i32 1, i32 0 ; <i32> [#uses=1]
+ br label %bb.i8.us.i
+
+bb.i8.us.i: ; preds = %get_mem2Dpel.exit.i.us.i, %cond_true.i29
+ %j.04.i.us.i = phi i32 [ %indvar.next39.i, %get_mem2Dpel.exit.i.us.i ], [ 0, %cond_true.i29 ] ; <i32> [#uses=2]
+ %tmp13.i.us.i = getelementptr i16*** null, i32 %j.04.i.us.i ; <i16***> [#uses=0]
+ %tmp15.i.i.us.i = tail call i8* @calloc( i32 0, i32 2 ) ; <i8*> [#uses=0]
+ store i16* null, i16** null, align 4
+ br label %bb.i.i.us.i
+
+get_mem2Dpel.exit.i.us.i: ; preds = %bb.i.i.us.i
+ %indvar.next39.i = add i32 %j.04.i.us.i, 1 ; <i32> [#uses=2]
+ %exitcond40.i = icmp eq i32 %indvar.next39.i, 2 ; <i1> [#uses=1]
+ br i1 %exitcond40.i, label %get_mem3Dpel.exit.split.i, label %bb.i8.us.i
+
+bb.i.i.us.i: ; preds = %bb.i.i.us.i, %bb.i8.us.i
+ %exitcond.i = icmp eq i32 0, %tmp8.i.i ; <i1> [#uses=1]
+ br i1 %exitcond.i, label %get_mem2Dpel.exit.i.us.i, label %bb.i.i.us.i
+
+get_mem3Dpel.exit.split.i: ; preds = %get_mem2Dpel.exit.i.us.i
+ %tmp30.i.i = shl i32 %tmp17.i, 2 ; <i32> [#uses=1]
+ %tmp31.i.i = mul i32 %tmp30.i.i, %tmp20.i27 ; <i32> [#uses=1]
+ %tmp23.i31 = add i32 %tmp31.i.i, %tmp50.i.i ; <i32> [#uses=1]
+ br label %init_orig_buffers.exit
+
+init_orig_buffers.exit: ; preds = %get_mem3Dpel.exit.split.i, %entry
+ %memory_size.0.i = phi i32 [ %tmp23.i31, %get_mem3Dpel.exit.split.i ], [ %tmp50.i.i, %entry ] ; <i32> [#uses=1]
+ %tmp41 = add i32 0, %memory_size.0.i ; <i32> [#uses=0]
+ unreachable
+}
diff --git a/test/CodeGen/ARM/2008-05-19-ScavengerAssert.ll b/test/CodeGen/ARM/2008-05-19-ScavengerAssert.ll
new file mode 100644
index 0000000..aa61d86
--- /dev/null
+++ b/test/CodeGen/ARM/2008-05-19-ScavengerAssert.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+ %struct.Decoders = type { i32**, i16***, i16****, i16***, i16**, i8**, i8** }
+@decoders = external global %struct.Decoders ; <%struct.Decoders*> [#uses=1]
+
+declare i8* @calloc(i32, i32)
+
+declare fastcc i32 @get_mem2Dint(i32***, i32, i32)
+
+define fastcc void @init_global_buffers() nounwind {
+entry:
+ %tmp151 = tail call fastcc i32 @get_mem2Dint( i32*** getelementptr (%struct.Decoders* @decoders, i32 0, i32 0), i32 16, i32 16 ) ; <i32> [#uses=1]
+ %tmp158 = tail call i8* @calloc( i32 0, i32 4 ) ; <i8*> [#uses=0]
+ br i1 false, label %cond_true166, label %bb190.preheader
+
+bb190.preheader: ; preds = %entry
+ %memory_size.3555 = add i32 0, %tmp151 ; <i32> [#uses=0]
+ unreachable
+
+cond_true166: ; preds = %entry
+ unreachable
+}
diff --git a/test/CodeGen/ARM/2008-07-17-Fdiv.ll b/test/CodeGen/ARM/2008-07-17-Fdiv.ll
new file mode 100644
index 0000000..4cb768e
--- /dev/null
+++ b/test/CodeGen/ARM/2008-07-17-Fdiv.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm
+
+define float @f(float %a, float %b) nounwind {
+ %tmp = fdiv float %a, %b
+ ret float %tmp
+}
diff --git a/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll b/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll
new file mode 100644
index 0000000..83fde07
--- /dev/null
+++ b/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=arm
+; PR2589
+
+define void @main({ i32 }*) {
+entry:
+ %sret1 = alloca { i32 } ; <{ i32 }*> [#uses=1]
+ load { i32 }* %sret1 ; <{ i32 }>:1 [#uses=0]
+ ret void
+}
diff --git a/test/CodeGen/ARM/2008-08-07-AsmPrintBug.ll b/test/CodeGen/ARM/2008-08-07-AsmPrintBug.ll
new file mode 100644
index 0000000..adb0112
--- /dev/null
+++ b/test/CodeGen/ARM/2008-08-07-AsmPrintBug.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6 -relocation-model=pic | grep comm
+
+ %struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+ %struct.__gcov_var = type { %struct.FILE*, i32, i32, i32, i32, i32, i32, [1025 x i32] }
+ %struct.__sFILEX = type opaque
+ %struct.__sbuf = type { i8*, i32 }
+@__gcov_var = common global %struct.__gcov_var zeroinitializer ; <%struct.__gcov_var*> [#uses=1]
+
+define i32 @__gcov_close() nounwind {
+entry:
+ load i32* getelementptr (%struct.__gcov_var* @__gcov_var, i32 0, i32 5), align 4 ; <i32>:0 [#uses=1]
+ ret i32 %0
+}
diff --git a/test/CodeGen/ARM/2008-09-14-CoalescerBug.ll b/test/CodeGen/ARM/2008-09-14-CoalescerBug.ll
new file mode 100644
index 0000000..5f9d9ae
--- /dev/null
+++ b/test/CodeGen/ARM/2008-09-14-CoalescerBug.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+@"\01LC1" = external constant [288 x i8] ; <[288 x i8]*> [#uses=1]
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
+
+define i32 @main(i32 %argc, i8** %argv) nounwind {
+entry:
+ br label %bb.i
+
+bb.i: ; preds = %bb.i, %entry
+ %i.01.i = phi i32 [ 0, %entry ], [ %indvar.next52, %bb.i ] ; <i32> [#uses=1]
+ %indvar.next52 = add i32 %i.01.i, 1 ; <i32> [#uses=2]
+ %exitcond53 = icmp eq i32 %indvar.next52, 15 ; <i1> [#uses=1]
+ br i1 %exitcond53, label %bb.i33.loopexit, label %bb.i
+
+bb.i33.loopexit: ; preds = %bb.i
+ %0 = malloc [347 x i8] ; <[347 x i8]*> [#uses=2]
+ %.sub = getelementptr [347 x i8]* %0, i32 0, i32 0 ; <i8*> [#uses=1]
+ call void @llvm.memcpy.i32( i8* %.sub, i8* getelementptr ([288 x i8]* @"\01LC1", i32 0, i32 0), i32 287, i32 1 ) nounwind
+ br label %bb.i28
+
+bb.i28: ; preds = %bb.i28, %bb.i33.loopexit
+ br i1 false, label %repeat_fasta.exit, label %bb.i28
+
+repeat_fasta.exit: ; preds = %bb.i28
+ free [347 x i8]* %0
+ unreachable
+}
diff --git a/test/CodeGen/ARM/2008-09-17-CoalescerBug.ll b/test/CodeGen/ARM/2008-09-17-CoalescerBug.ll
new file mode 100644
index 0000000..d3bc3e1
--- /dev/null
+++ b/test/CodeGen/ARM/2008-09-17-CoalescerBug.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+define void @gcov_exit() nounwind {
+entry:
+ br i1 false, label %bb24, label %bb33.thread
+
+bb24: ; preds = %entry
+ br label %bb39
+
+bb33.thread: ; preds = %entry
+ %0 = alloca i8, i32 0 ; <i8*> [#uses=1]
+ br label %bb39
+
+bb39: ; preds = %bb33.thread, %bb24
+ %.reg2mem.0 = phi i8* [ %0, %bb33.thread ], [ null, %bb24 ] ; <i8*> [#uses=0]
+ ret void
+}
diff --git a/test/CodeGen/ARM/2008-11-18-ScavengerAssert.ll b/test/CodeGen/ARM/2008-11-18-ScavengerAssert.ll
new file mode 100644
index 0000000..601a516
--- /dev/null
+++ b/test/CodeGen/ARM/2008-11-18-ScavengerAssert.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2
+
+define hidden i64 @__muldi3(i64 %u, i64 %v) nounwind {
+entry:
+ %0 = trunc i64 %u to i32 ; <i32> [#uses=1]
+ %asmtmp = tail call { i32, i32, i32, i32, i32 } asm "@ Inlined umul_ppmm\0A\09mov\09$2, $5, lsr #16\0A\09mov\09$0, $6, lsr #16\0A\09bic\09$3, $5, $2, lsl #16\0A\09bic\09$4, $6, $0, lsl #16\0A\09mul\09$1, $3, $4\0A\09mul\09$4, $2, $4\0A\09mul\09$3, $0, $3\0A\09mul\09$0, $2, $0\0A\09adds\09$3, $4, $3\0A\09addcs\09$0, $0, #65536\0A\09adds\09$1, $1, $3, lsl #16\0A\09adc\09$0, $0, $3, lsr #16", "=&r,=r,=&r,=&r,=r,r,r,~{cc}"(i32 %0, i32 0) nounwind ; <{ i32, i32, i32, i32, i32 }> [#uses=1]
+ %asmresult1 = extractvalue { i32, i32, i32, i32, i32 } %asmtmp, 1 ; <i32> [#uses=1]
+ %asmresult116 = zext i32 %asmresult1 to i64 ; <i64> [#uses=1]
+ %asmresult116.ins = or i64 0, %asmresult116 ; <i64> [#uses=1]
+ %1 = lshr i64 %v, 32 ; <i64> [#uses=1]
+ %2 = mul i64 %1, %u ; <i64> [#uses=1]
+ %3 = add i64 %2, 0 ; <i64> [#uses=1]
+ %4 = shl i64 %3, 32 ; <i64> [#uses=1]
+ %5 = add i64 %asmresult116.ins, %4 ; <i64> [#uses=1]
+ ret i64 %5
+}
diff --git a/test/CodeGen/ARM/2009-02-16-SpillerBug.ll b/test/CodeGen/ARM/2009-02-16-SpillerBug.ll
new file mode 100644
index 0000000..4c0c59c
--- /dev/null
+++ b/test/CodeGen/ARM/2009-02-16-SpillerBug.ll
@@ -0,0 +1,117 @@
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2
+
+target triple = "arm-apple-darwin9"
+ %struct.FILE_POS = type { i8, i8, i16, i32 }
+ %struct.FIRST_UNION = type { %struct.FILE_POS }
+ %struct.FOURTH_UNION = type { %struct.STYLE }
+ %struct.GAP = type { i8, i8, i16 }
+ %struct.LIST = type { %struct.rec*, %struct.rec* }
+ %struct.SECOND_UNION = type { { i16, i8, i8 } }
+ %struct.STYLE = type { { %struct.GAP }, { %struct.GAP }, i16, i16, i32 }
+ %struct.THIRD_UNION = type { { [2 x i32], [2 x i32] } }
+ %struct.head_type = type { [2 x %struct.LIST], %struct.FIRST_UNION, %struct.SECOND_UNION, %struct.THIRD_UNION, %struct.FOURTH_UNION, %struct.rec*, { %struct.rec* }, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, i32 }
+ %struct.rec = type { %struct.head_type }
+@no_file_pos = external global %struct.FILE_POS ; <%struct.FILE_POS*> [#uses=1]
+@"\01LC13423" = external constant [23 x i8] ; <[23 x i8]*> [#uses=1]
+@"\01LC18972" = external constant [13 x i8] ; <[13 x i8]*> [#uses=1]
+
+define fastcc void @FlushGalley(%struct.rec* %hd) nounwind {
+entry:
+ br label %RESUME
+
+RESUME: ; preds = %bb520.preheader, %entry
+ br label %bb396
+
+bb122: ; preds = %bb396
+ switch i32 0, label %bb394 [
+ i32 1, label %bb131
+ i32 2, label %bb244
+ i32 4, label %bb244
+ i32 5, label %bb244
+ i32 6, label %bb244
+ i32 7, label %bb244
+ i32 11, label %bb244
+ i32 12, label %bb244
+ i32 15, label %bb244
+ i32 17, label %bb244
+ i32 18, label %bb244
+ i32 19, label %bb244
+ i32 20, label %bb396
+ i32 21, label %bb396
+ i32 22, label %bb396
+ i32 23, label %bb396
+ i32 24, label %bb244
+ i32 25, label %bb244
+ i32 26, label %bb244
+ i32 27, label %bb244
+ i32 28, label %bb244
+ i32 29, label %bb244
+ i32 30, label %bb244
+ i32 31, label %bb244
+ i32 32, label %bb244
+ i32 33, label %bb244
+ i32 34, label %bb244
+ i32 35, label %bb244
+ i32 36, label %bb244
+ i32 37, label %bb244
+ i32 38, label %bb244
+ i32 39, label %bb244
+ i32 40, label %bb244
+ i32 41, label %bb244
+ i32 42, label %bb244
+ i32 43, label %bb244
+ i32 44, label %bb244
+ i32 45, label %bb244
+ i32 46, label %bb244
+ i32 50, label %bb244
+ i32 51, label %bb244
+ i32 94, label %bb244
+ i32 95, label %bb244
+ i32 96, label %bb244
+ i32 97, label %bb244
+ i32 98, label %bb244
+ i32 99, label %bb244
+ ]
+
+bb131: ; preds = %bb122
+ br label %bb396
+
+bb244: ; preds = %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122
+ %0 = icmp eq %struct.rec* %stop_link.3, null ; <i1> [#uses=1]
+ br i1 %0, label %bb435, label %bb433
+
+bb394: ; preds = %bb122
+ call void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 1, i32 3, i8* getelementptr ([23 x i8]* @"\01LC13423", i32 0, i32 0), i32 0, %struct.FILE_POS* @no_file_pos, i8* getelementptr ([13 x i8]* @"\01LC18972", i32 0, i32 0), i8* null) nounwind
+ br label %bb396
+
+bb396: ; preds = %bb394, %bb131, %bb122, %bb122, %bb122, %bb122, %RESUME
+ %stop_link.3 = phi %struct.rec* [ null, %RESUME ], [ %stop_link.3, %bb394 ], [ %stop_link.3, %bb122 ], [ %stop_link.3, %bb122 ], [ %stop_link.3, %bb122 ], [ %stop_link.3, %bb122 ], [ %link.1, %bb131 ] ; <%struct.rec*> [#uses=7]
+ %headers_seen.1 = phi i32 [ 0, %RESUME ], [ %headers_seen.1, %bb394 ], [ 1, %bb122 ], [ 1, %bb122 ], [ 1, %bb122 ], [ 1, %bb122 ], [ %headers_seen.1, %bb131 ] ; <i32> [#uses=2]
+ %link.1 = load %struct.rec** null ; <%struct.rec*> [#uses=2]
+ %1 = icmp eq %struct.rec* %link.1, %hd ; <i1> [#uses=1]
+ br i1 %1, label %bb398, label %bb122
+
+bb398: ; preds = %bb396
+ unreachable
+
+bb433: ; preds = %bb244
+ call fastcc void @Promote(%struct.rec* %hd, %struct.rec* %stop_link.3, %struct.rec* null, i32 1) nounwind
+ br label %bb435
+
+bb435: ; preds = %bb433, %bb244
+ br i1 false, label %bb491, label %bb499
+
+bb491: ; preds = %bb435
+ br label %bb499
+
+bb499: ; preds = %bb499, %bb491, %bb435
+ %2 = icmp eq %struct.rec* null, null ; <i1> [#uses=1]
+ br i1 %2, label %bb520.preheader, label %bb499
+
+bb520.preheader: ; preds = %bb499
+ br label %RESUME
+}
+
+declare fastcc void @Promote(%struct.rec*, %struct.rec*, %struct.rec* nocapture, i32) nounwind
+
+declare void @Error(i32, i32, i8*, i32, %struct.FILE_POS*, ...) nounwind
diff --git a/test/CodeGen/ARM/2009-02-22-SoftenFloatVaArg.ll b/test/CodeGen/ARM/2009-02-22-SoftenFloatVaArg.ll
new file mode 100644
index 0000000..a48f003
--- /dev/null
+++ b/test/CodeGen/ARM/2009-02-22-SoftenFloatVaArg.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s
+; PR3610
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-s0:0:64-f80:32:32"
+target triple = "arm-elf"
+
+define i32 @main(i8*) nounwind {
+entry:
+ %ap = alloca i8* ; <i8**> [#uses=2]
+ store i8* %0, i8** %ap
+ %retval = alloca i32 ; <i32*> [#uses=2]
+ store i32 0, i32* %retval
+ %tmp = alloca float ; <float*> [#uses=1]
+ %1 = va_arg i8** %ap, float ; <float> [#uses=1]
+ store float %1, float* %tmp
+ br label %return
+
+return: ; preds = %entry
+ %2 = load i32* %retval ; <i32> [#uses=1]
+ ret i32 %2
+}
diff --git a/test/CodeGen/ARM/2009-02-27-SpillerBug.ll b/test/CodeGen/ARM/2009-02-27-SpillerBug.ll
new file mode 100644
index 0000000..bc5e602
--- /dev/null
+++ b/test/CodeGen/ARM/2009-02-27-SpillerBug.ll
@@ -0,0 +1,229 @@
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2
+
+target triple = "arm-apple-darwin9"
+@a = external global double ; <double*> [#uses=1]
+@N = external global double ; <double*> [#uses=1]
+
+declare double @llvm.exp.f64(double) nounwind readonly
+
+define fastcc void @findratio(double* nocapture %res1, double* nocapture %res2) nounwind {
+bb.thread:
+ br label %bb52
+
+bb32: ; preds = %bb52
+ %0 = fadd double 0.000000e+00, 0.000000e+00 ; <double> [#uses=1]
+ %1 = add i32 %j.1, 1 ; <i32> [#uses=1]
+ br label %bb52
+
+bb52: ; preds = %bb53, %bb32, %bb.thread
+ %i.3494 = phi i32 [ 0, %bb.thread ], [ %3, %bb53 ], [ %i.3494, %bb32 ] ; <i32> [#uses=2]
+ %k.4 = phi double [ %0, %bb32 ], [ 0.000000e+00, %bb53 ], [ 0.000000e+00, %bb.thread ] ; <double> [#uses=2]
+ %j.1 = phi i32 [ %1, %bb32 ], [ 0, %bb53 ], [ 0, %bb.thread ] ; <i32> [#uses=2]
+ %2 = icmp sgt i32 %j.1, 99 ; <i1> [#uses=1]
+ br i1 %2, label %bb53, label %bb32
+
+bb53: ; preds = %bb52
+ %3 = add i32 %i.3494, 1 ; <i32> [#uses=2]
+ %phitmp = icmp sgt i32 %3, 999999 ; <i1> [#uses=1]
+ br i1 %phitmp, label %bb55, label %bb52
+
+bb55: ; preds = %bb53
+ %4 = load double* @a, align 4 ; <double> [#uses=10]
+ %5 = fadd double %4, 0.000000e+00 ; <double> [#uses=16]
+ %6 = fcmp ogt double %k.4, 0.000000e+00 ; <i1> [#uses=1]
+ %.pn404 = fmul double %4, %4 ; <double> [#uses=4]
+ %.pn402 = fmul double %5, %5 ; <double> [#uses=5]
+ %.pn165.in = load double* @N ; <double> [#uses=5]
+ %.pn198 = fmul double 0.000000e+00, %5 ; <double> [#uses=1]
+ %.pn185 = fsub double -0.000000e+00, 0.000000e+00 ; <double> [#uses=1]
+ %.pn147 = fsub double -0.000000e+00, 0.000000e+00 ; <double> [#uses=1]
+ %.pn141 = fdiv double 0.000000e+00, %4 ; <double> [#uses=1]
+ %.pn142 = fdiv double 0.000000e+00, %5 ; <double> [#uses=1]
+ %.pn136 = fdiv double 0.000000e+00, 0.000000e+00 ; <double> [#uses=1]
+ %.pn132 = fdiv double 0.000000e+00, %5 ; <double> [#uses=1]
+ %.pn123 = fdiv double 0.000000e+00, 0.000000e+00 ; <double> [#uses=1]
+ %.pn124 = fdiv double 0.000000e+00, %.pn198 ; <double> [#uses=1]
+ %.pn120 = fdiv double 0.000000e+00, 0.000000e+00 ; <double> [#uses=1]
+ %.pn117 = fdiv double 0.000000e+00, %4 ; <double> [#uses=1]
+ %.pn118 = fdiv double %.pn185, %5 ; <double> [#uses=1]
+ %.pn88 = fdiv double %.pn147, %5 ; <double> [#uses=1]
+ %.pn81 = fsub double %.pn141, %.pn142 ; <double> [#uses=1]
+ %.pn77 = fsub double 0.000000e+00, %.pn136 ; <double> [#uses=1]
+ %.pn75 = fsub double 0.000000e+00, %.pn132 ; <double> [#uses=1]
+ %.pn69 = fsub double %.pn123, %.pn124 ; <double> [#uses=1]
+ %.pn67 = fsub double 0.000000e+00, %.pn120 ; <double> [#uses=1]
+ %.pn56 = fsub double %.pn117, %.pn118 ; <double> [#uses=1]
+ %.pn42 = fsub double 0.000000e+00, %.pn88 ; <double> [#uses=1]
+ %.pn60 = fmul double %.pn81, 0.000000e+00 ; <double> [#uses=1]
+ %.pn57 = fadd double %.pn77, 0.000000e+00 ; <double> [#uses=1]
+ %.pn58 = fmul double %.pn75, %.pn165.in ; <double> [#uses=1]
+ %.pn32 = fadd double %.pn69, 0.000000e+00 ; <double> [#uses=1]
+ %.pn33 = fmul double %.pn67, %.pn165.in ; <double> [#uses=1]
+ %.pn17 = fsub double 0.000000e+00, %.pn60 ; <double> [#uses=1]
+ %.pn9 = fadd double %.pn57, %.pn58 ; <double> [#uses=1]
+ %.pn30 = fmul double 0.000000e+00, %.pn56 ; <double> [#uses=1]
+ %.pn24 = fmul double 0.000000e+00, %.pn42 ; <double> [#uses=1]
+ %.pn1 = fadd double %.pn32, %.pn33 ; <double> [#uses=1]
+ %.pn28 = fsub double %.pn30, 0.000000e+00 ; <double> [#uses=1]
+ %.pn26 = fadd double %.pn28, 0.000000e+00 ; <double> [#uses=1]
+ %.pn22 = fsub double %.pn26, 0.000000e+00 ; <double> [#uses=1]
+ %.pn20 = fsub double %.pn24, 0.000000e+00 ; <double> [#uses=1]
+ %.pn18 = fadd double %.pn22, 0.000000e+00 ; <double> [#uses=1]
+ %.pn16 = fadd double %.pn20, 0.000000e+00 ; <double> [#uses=1]
+ %.pn14 = fsub double %.pn18, 0.000000e+00 ; <double> [#uses=1]
+ %.pn12 = fsub double %.pn16, %.pn17 ; <double> [#uses=1]
+ %.pn10 = fadd double %.pn14, 0.000000e+00 ; <double> [#uses=1]
+ %.pn8 = fadd double %.pn12, 0.000000e+00 ; <double> [#uses=1]
+ %.pn6 = fsub double %.pn10, 0.000000e+00 ; <double> [#uses=1]
+ %.pn4 = fsub double %.pn8, %.pn9 ; <double> [#uses=1]
+ %.pn2 = fadd double %.pn6, 0.000000e+00 ; <double> [#uses=1]
+ %.pn = fadd double %.pn4, 0.000000e+00 ; <double> [#uses=1]
+ %N1.0 = fsub double %.pn2, 0.000000e+00 ; <double> [#uses=2]
+ %D1.0 = fsub double %.pn, %.pn1 ; <double> [#uses=2]
+ br i1 %6, label %bb62, label %bb64
+
+bb62: ; preds = %bb55
+ %7 = fmul double 0.000000e+00, %4 ; <double> [#uses=1]
+ %8 = fsub double -0.000000e+00, %7 ; <double> [#uses=3]
+ %9 = fmul double 0.000000e+00, %5 ; <double> [#uses=1]
+ %10 = fsub double -0.000000e+00, %9 ; <double> [#uses=3]
+ %11 = fmul double %.pn404, %4 ; <double> [#uses=5]
+ %12 = fmul double %.pn402, %5 ; <double> [#uses=5]
+ %13 = fmul double 0.000000e+00, -2.000000e+00 ; <double> [#uses=1]
+ %14 = fdiv double 0.000000e+00, %.pn402 ; <double> [#uses=1]
+ %15 = fsub double 0.000000e+00, %14 ; <double> [#uses=1]
+ %16 = fmul double 0.000000e+00, %15 ; <double> [#uses=1]
+ %17 = fadd double %13, %16 ; <double> [#uses=1]
+ %18 = fmul double %.pn165.in, -2.000000e+00 ; <double> [#uses=5]
+ %19 = fmul double %18, 0.000000e+00 ; <double> [#uses=1]
+ %20 = fadd double %17, %19 ; <double> [#uses=1]
+ %21 = fmul double 0.000000e+00, %20 ; <double> [#uses=1]
+ %22 = fadd double 0.000000e+00, %21 ; <double> [#uses=1]
+ %23 = fdiv double 0.000000e+00, %12 ; <double> [#uses=1]
+ %24 = fsub double 0.000000e+00, %23 ; <double> [#uses=0]
+ %25 = fmul double %18, 0.000000e+00 ; <double> [#uses=1]
+ %26 = fadd double 0.000000e+00, %25 ; <double> [#uses=1]
+ %27 = fmul double 0.000000e+00, %26 ; <double> [#uses=1]
+ %28 = fsub double %22, %27 ; <double> [#uses=1]
+ %29 = fmul double %11, %4 ; <double> [#uses=1]
+ %30 = fmul double %12, %5 ; <double> [#uses=3]
+ %31 = fmul double %.pn165.in, -4.000000e+00 ; <double> [#uses=1]
+ %32 = fmul double %.pn165.in, 0x3FF5555555555555 ; <double> [#uses=1]
+ %33 = fmul double %32, 0.000000e+00 ; <double> [#uses=2]
+ %34 = fadd double %28, 0.000000e+00 ; <double> [#uses=1]
+ %35 = fsub double -0.000000e+00, 0.000000e+00 ; <double> [#uses=1]
+ %36 = fdiv double %35, %11 ; <double> [#uses=1]
+ %37 = fdiv double 0.000000e+00, %12 ; <double> [#uses=1]
+ %38 = fsub double %36, %37 ; <double> [#uses=1]
+ %39 = fmul double 0.000000e+00, %38 ; <double> [#uses=1]
+ %40 = fadd double 0.000000e+00, %39 ; <double> [#uses=1]
+ %41 = fadd double %40, 0.000000e+00 ; <double> [#uses=1]
+ %42 = fadd double %41, 0.000000e+00 ; <double> [#uses=1]
+ %43 = fmul double %42, 0.000000e+00 ; <double> [#uses=1]
+ %44 = fsub double %34, %43 ; <double> [#uses=1]
+ %45 = tail call double @llvm.exp.f64(double %8) nounwind ; <double> [#uses=1]
+ %46 = fsub double -0.000000e+00, %45 ; <double> [#uses=2]
+ %47 = fdiv double %46, 0.000000e+00 ; <double> [#uses=1]
+ %48 = fmul double %30, %5 ; <double> [#uses=1]
+ %49 = fdiv double 0.000000e+00, %48 ; <double> [#uses=1]
+ %50 = fsub double %47, %49 ; <double> [#uses=1]
+ %51 = fmul double %50, -4.000000e+00 ; <double> [#uses=1]
+ %52 = fadd double %51, 0.000000e+00 ; <double> [#uses=1]
+ %53 = fdiv double %46, %11 ; <double> [#uses=1]
+ %54 = fsub double %53, 0.000000e+00 ; <double> [#uses=1]
+ %55 = fmul double %31, %54 ; <double> [#uses=1]
+ %56 = fadd double %52, %55 ; <double> [#uses=1]
+ %57 = fadd double %56, 0.000000e+00 ; <double> [#uses=1]
+ %58 = fadd double %44, %57 ; <double> [#uses=1]
+ %59 = fsub double %58, 0.000000e+00 ; <double> [#uses=1]
+ %60 = tail call double @llvm.exp.f64(double 0.000000e+00) nounwind ; <double> [#uses=1]
+ %61 = fsub double -0.000000e+00, %60 ; <double> [#uses=1]
+ %62 = fdiv double 0.000000e+00, -6.000000e+00 ; <double> [#uses=1]
+ %63 = fdiv double %61, %5 ; <double> [#uses=1]
+ %64 = fsub double 0.000000e+00, %63 ; <double> [#uses=1]
+ %65 = fmul double %62, %64 ; <double> [#uses=1]
+ %66 = fsub double 0.000000e+00, %65 ; <double> [#uses=1]
+ %67 = fsub double -0.000000e+00, 0.000000e+00 ; <double> [#uses=2]
+ %68 = tail call double @llvm.exp.f64(double %10) nounwind ; <double> [#uses=1]
+ %69 = fsub double -0.000000e+00, %68 ; <double> [#uses=2]
+ %70 = fdiv double %67, %.pn404 ; <double> [#uses=1]
+ %71 = fdiv double %69, %.pn402 ; <double> [#uses=1]
+ %72 = fsub double %70, %71 ; <double> [#uses=1]
+ %73 = fmul double %72, -5.000000e-01 ; <double> [#uses=1]
+ %74 = fdiv double %67, %4 ; <double> [#uses=1]
+ %75 = fdiv double %69, %5 ; <double> [#uses=1]
+ %76 = fsub double %74, %75 ; <double> [#uses=1]
+ %77 = fmul double %76, 0.000000e+00 ; <double> [#uses=1]
+ %78 = fadd double %73, %77 ; <double> [#uses=1]
+ %79 = fmul double 0.000000e+00, %78 ; <double> [#uses=1]
+ %80 = fadd double %66, %79 ; <double> [#uses=1]
+ %81 = fdiv double 0.000000e+00, %.pn404 ; <double> [#uses=1]
+ %82 = fdiv double 0.000000e+00, %.pn402 ; <double> [#uses=1]
+ %83 = fsub double %81, %82 ; <double> [#uses=1]
+ %84 = fmul double %83, -5.000000e-01 ; <double> [#uses=1]
+ %85 = fdiv double 0.000000e+00, %4 ; <double> [#uses=1]
+ %86 = fdiv double 0.000000e+00, %5 ; <double> [#uses=1]
+ %87 = fsub double %85, %86 ; <double> [#uses=1]
+ %88 = fmul double %87, 0.000000e+00 ; <double> [#uses=1]
+ %89 = fadd double %84, %88 ; <double> [#uses=1]
+ %90 = fmul double 0.000000e+00, %89 ; <double> [#uses=1]
+ %91 = fsub double %80, %90 ; <double> [#uses=1]
+ %92 = tail call double @llvm.exp.f64(double %8) nounwind ; <double> [#uses=1]
+ %93 = fsub double -0.000000e+00, %92 ; <double> [#uses=1]
+ %94 = tail call double @llvm.exp.f64(double %10) nounwind ; <double> [#uses=1]
+ %95 = fsub double -0.000000e+00, %94 ; <double> [#uses=3]
+ %96 = fdiv double %95, %.pn402 ; <double> [#uses=1]
+ %97 = fsub double 0.000000e+00, %96 ; <double> [#uses=1]
+ %98 = fmul double 0.000000e+00, %97 ; <double> [#uses=1]
+ %99 = fdiv double %93, %11 ; <double> [#uses=1]
+ %100 = fdiv double %95, %12 ; <double> [#uses=1]
+ %101 = fsub double %99, %100 ; <double> [#uses=1]
+ %102 = fsub double %98, %101 ; <double> [#uses=1]
+ %103 = fdiv double %95, %5 ; <double> [#uses=1]
+ %104 = fsub double 0.000000e+00, %103 ; <double> [#uses=1]
+ %105 = fmul double %18, %104 ; <double> [#uses=1]
+ %106 = fadd double %102, %105 ; <double> [#uses=1]
+ %107 = fmul double %106, %k.4 ; <double> [#uses=1]
+ %108 = fadd double %91, %107 ; <double> [#uses=1]
+ %109 = fsub double %108, 0.000000e+00 ; <double> [#uses=1]
+ %110 = tail call double @llvm.exp.f64(double %8) nounwind ; <double> [#uses=1]
+ %111 = fsub double -0.000000e+00, %110 ; <double> [#uses=2]
+ %112 = tail call double @llvm.exp.f64(double %10) nounwind ; <double> [#uses=1]
+ %113 = fsub double -0.000000e+00, %112 ; <double> [#uses=2]
+ %114 = fdiv double %111, %11 ; <double> [#uses=1]
+ %115 = fdiv double %113, %12 ; <double> [#uses=1]
+ %116 = fsub double %114, %115 ; <double> [#uses=1]
+ %117 = fmul double 0.000000e+00, %116 ; <double> [#uses=1]
+ %118 = fdiv double %111, %29 ; <double> [#uses=1]
+ %119 = fdiv double %113, %30 ; <double> [#uses=1]
+ %120 = fsub double %118, %119 ; <double> [#uses=1]
+ %121 = fsub double %117, %120 ; <double> [#uses=1]
+ %122 = fmul double %18, 0.000000e+00 ; <double> [#uses=1]
+ %123 = fadd double %121, %122 ; <double> [#uses=1]
+ %124 = fmul double %33, 0.000000e+00 ; <double> [#uses=1]
+ %125 = fadd double %123, %124 ; <double> [#uses=1]
+ %126 = fadd double %109, %125 ; <double> [#uses=1]
+ %127 = tail call double @llvm.exp.f64(double 0.000000e+00) nounwind ; <double> [#uses=1]
+ %128 = fsub double -0.000000e+00, %127 ; <double> [#uses=2]
+ %129 = fdiv double %128, %30 ; <double> [#uses=1]
+ %130 = fsub double 0.000000e+00, %129 ; <double> [#uses=1]
+ %131 = fsub double 0.000000e+00, %130 ; <double> [#uses=1]
+ %132 = fdiv double 0.000000e+00, %.pn404 ; <double> [#uses=1]
+ %133 = fsub double %132, 0.000000e+00 ; <double> [#uses=1]
+ %134 = fmul double %18, %133 ; <double> [#uses=1]
+ %135 = fadd double %131, %134 ; <double> [#uses=1]
+ %136 = fdiv double %128, %5 ; <double> [#uses=1]
+ %137 = fsub double 0.000000e+00, %136 ; <double> [#uses=1]
+ %138 = fmul double %33, %137 ; <double> [#uses=1]
+ %139 = fadd double %135, %138 ; <double> [#uses=1]
+ %140 = fsub double %126, %139 ; <double> [#uses=1]
+ %141 = fadd double %N1.0, %59 ; <double> [#uses=1]
+ %142 = fadd double %D1.0, %140 ; <double> [#uses=1]
+ br label %bb64
+
+bb64: ; preds = %bb62, %bb55
+ %N1.0.pn = phi double [ %141, %bb62 ], [ %N1.0, %bb55 ] ; <double> [#uses=1]
+ %D1.0.pn = phi double [ %142, %bb62 ], [ %D1.0, %bb55 ] ; <double> [#uses=1]
+ %x.1 = fdiv double %N1.0.pn, %D1.0.pn ; <double> [#uses=0]
+ ret void
+}
diff --git a/test/CodeGen/ARM/2009-03-07-SpillerBug.ll b/test/CodeGen/ARM/2009-03-07-SpillerBug.ll
new file mode 100644
index 0000000..0ec17ae
--- /dev/null
+++ b/test/CodeGen/ARM/2009-03-07-SpillerBug.ll
@@ -0,0 +1,78 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin9 -mattr=+vfp2
+; rdar://6653182
+
+ %struct.ggBRDF = type { i32 (...)** }
+ %struct.ggPoint2 = type { [2 x double] }
+ %struct.ggPoint3 = type { [3 x double] }
+ %struct.ggSpectrum = type { [8 x float] }
+ %struct.ggSphere = type { %struct.ggPoint3, double }
+ %struct.mrDiffuseAreaSphereLuminaire = type { %struct.mrSphere, %struct.ggSpectrum }
+ %struct.mrDiffuseCosineSphereLuminaire = type { %struct.mrDiffuseAreaSphereLuminaire }
+ %struct.mrSphere = type { %struct.ggBRDF, %struct.ggSphere }
+
+declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
+
+declare double @llvm.sqrt.f64(double) nounwind readonly
+
+declare double @sin(double) nounwind readonly
+
+declare double @acos(double) nounwind readonly
+
+define i32 @_ZNK34mrDiffuseSolidAngleSphereLuminaire18selectVisiblePointERK8ggPoint3RK9ggVector3RK8ggPoint2dRS0_Rd(%struct.mrDiffuseCosineSphereLuminaire* nocapture %this, %struct.ggPoint3* nocapture %x, %struct.ggPoint3* nocapture %unnamed_arg, %struct.ggPoint2* nocapture %uv, double %unnamed_arg2, %struct.ggPoint3* nocapture %on_light, double* nocapture %invProb) nounwind {
+entry:
+ %0 = call double @llvm.sqrt.f64(double 0.000000e+00) nounwind ; <double> [#uses=4]
+ %1 = fcmp ult double 0.000000e+00, %0 ; <i1> [#uses=1]
+ br i1 %1, label %bb3, label %bb7
+
+bb3: ; preds = %entry
+ %2 = fdiv double 1.000000e+00, 0.000000e+00 ; <double> [#uses=1]
+ %3 = fmul double 0.000000e+00, %2 ; <double> [#uses=2]
+ %4 = call double @llvm.sqrt.f64(double 0.000000e+00) nounwind ; <double> [#uses=1]
+ %5 = fdiv double 1.000000e+00, %4 ; <double> [#uses=2]
+ %6 = fmul double %3, %5 ; <double> [#uses=2]
+ %7 = fmul double 0.000000e+00, %5 ; <double> [#uses=2]
+ %8 = fmul double %3, %7 ; <double> [#uses=1]
+ %9 = fsub double %8, 0.000000e+00 ; <double> [#uses=1]
+ %10 = fmul double 0.000000e+00, %6 ; <double> [#uses=1]
+ %11 = fsub double 0.000000e+00, %10 ; <double> [#uses=1]
+ %12 = fsub double -0.000000e+00, %11 ; <double> [#uses=1]
+ %13 = fmul double %0, %0 ; <double> [#uses=2]
+ %14 = fsub double %13, 0.000000e+00 ; <double> [#uses=1]
+ %15 = call double @llvm.sqrt.f64(double %14) ; <double> [#uses=1]
+ %16 = fmul double 0.000000e+00, %15 ; <double> [#uses=1]
+ %17 = fdiv double %16, %0 ; <double> [#uses=1]
+ %18 = fadd double 0.000000e+00, %17 ; <double> [#uses=1]
+ %19 = call double @acos(double %18) nounwind readonly ; <double> [#uses=1]
+ %20 = load double* null, align 4 ; <double> [#uses=1]
+ %21 = fmul double %20, 0x401921FB54442D18 ; <double> [#uses=1]
+ %22 = call double @sin(double %19) nounwind readonly ; <double> [#uses=2]
+ %23 = fmul double %22, 0.000000e+00 ; <double> [#uses=2]
+ %24 = fmul double %6, %23 ; <double> [#uses=1]
+ %25 = fmul double %7, %23 ; <double> [#uses=1]
+ %26 = call double @sin(double %21) nounwind readonly ; <double> [#uses=1]
+ %27 = fmul double %22, %26 ; <double> [#uses=2]
+ %28 = fmul double %9, %27 ; <double> [#uses=1]
+ %29 = fmul double %27, %12 ; <double> [#uses=1]
+ %30 = fadd double %24, %28 ; <double> [#uses=1]
+ %31 = fadd double 0.000000e+00, %29 ; <double> [#uses=1]
+ %32 = fadd double %25, 0.000000e+00 ; <double> [#uses=1]
+ %33 = fadd double %30, 0.000000e+00 ; <double> [#uses=1]
+ %34 = fadd double %31, 0.000000e+00 ; <double> [#uses=1]
+ %35 = fadd double %32, 0.000000e+00 ; <double> [#uses=1]
+ %36 = bitcast %struct.ggPoint3* %x to i8* ; <i8*> [#uses=1]
+ call void @llvm.memcpy.i32(i8* null, i8* %36, i32 24, i32 4) nounwind
+ store double %33, double* null, align 8
+ br i1 false, label %_Z20ggRaySphereIntersectRK6ggRay3RK8ggSphereddRd.exit, label %bb5.i.i.i
+
+bb5.i.i.i: ; preds = %bb3
+ unreachable
+
+_Z20ggRaySphereIntersectRK6ggRay3RK8ggSphereddRd.exit: ; preds = %bb3
+ %37 = fsub double %13, 0.000000e+00 ; <double> [#uses=0]
+ %38 = fsub double -0.000000e+00, %34 ; <double> [#uses=0]
+ %39 = fsub double -0.000000e+00, %35 ; <double> [#uses=0]
+ ret i32 1
+
+bb7: ; preds = %entry
+ ret i32 0
+}
diff --git a/test/CodeGen/ARM/2009-03-09-AddrModeBug.ll b/test/CodeGen/ARM/2009-03-09-AddrModeBug.ll
new file mode 100644
index 0000000..a1ce384
--- /dev/null
+++ b/test/CodeGen/ARM/2009-03-09-AddrModeBug.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=arm
+
+ %struct.hit_t = type { %struct.v_t, double }
+ %struct.node_t = type { %struct.hit_t, %struct.hit_t, i32 }
+ %struct.v_t = type { double, double, double }
+
+define fastcc %struct.node_t* @_ZL6createP6node_tii3v_tS1_d(%struct.node_t* %n, i32 %lvl, i32 %dist, i64 %c.0.0, i64 %c.0.1, i64 %c.0.2, i64 %d.0.0, i64 %d.0.1, i64 %d.0.2, double %r) nounwind {
+entry:
+ %0 = getelementptr %struct.node_t* %n, i32 0, i32 1 ; <%struct.hit_t*> [#uses=1]
+ %1 = bitcast %struct.hit_t* %0 to i256* ; <i256*> [#uses=1]
+ store i256 0, i256* %1, align 4
+ unreachable
+}
diff --git a/test/CodeGen/ARM/2009-04-06-AsmModifier.ll b/test/CodeGen/ARM/2009-04-06-AsmModifier.ll
new file mode 100644
index 0000000..3526722
--- /dev/null
+++ b/test/CodeGen/ARM/2009-04-06-AsmModifier.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=arm | grep {swi 107}
+
+define i32 @_swilseek(i32) nounwind {
+entry:
+ %ptr = alloca i32 ; <i32*> [#uses=2]
+ store i32 %0, i32* %ptr
+ %retval = alloca i32 ; <i32*> [#uses=2]
+ store i32 0, i32* %retval
+ %res = alloca i32 ; <i32*> [#uses=0]
+ %fh = alloca i32 ; <i32*> [#uses=1]
+ %1 = load i32* %fh ; <i32> [#uses=1]
+ %2 = load i32* %ptr ; <i32> [#uses=1]
+ %3 = call i32 asm "mov r0, $2; mov r1, $3; swi ${1:a}; mov $0, r0", "=r,i,r,r,~{r0},~{r1}"(i32 107, i32 %1, i32 %2) nounwind ; <i32> [#uses=1]
+ store i32 %3, i32* %retval
+ br label %return
+
+return: ; preds = %entry
+ %4 = load i32* %retval ; <i32> [#uses=1]
+ ret i32 %4
+}
diff --git a/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll b/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll
new file mode 100644
index 0000000..f6b3d2c
--- /dev/null
+++ b/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=arm
+; PR3795
+
+define fastcc void @_D3foo3fooFAriZv({ i32, { double, double }* } %d_arg, i32 %x_arg) {
+entry:
+ %d = alloca { i32, { double, double }* } ; <{ i32, { double, double }* }*> [#uses=2]
+ %x = alloca i32 ; <i32*> [#uses=2]
+ %b = alloca { double, double } ; <{ double, double }*> [#uses=1]
+ store { i32, { double, double }* } %d_arg, { i32, { double, double }* }* %d
+ store i32 %x_arg, i32* %x
+ %tmp = load i32* %x ; <i32> [#uses=1]
+ %tmp1 = getelementptr { i32, { double, double }* }* %d, i32 0, i32 1 ; <{ double, double }**> [#uses=1]
+ %.ptr = load { double, double }** %tmp1 ; <{ double, double }*> [#uses=1]
+ %tmp2 = getelementptr { double, double }* %.ptr, i32 %tmp ; <{ double, double }*> [#uses=1]
+ %tmp3 = load { double, double }* %tmp2 ; <{ double, double }> [#uses=1]
+ store { double, double } %tmp3, { double, double }* %b
+ ret void
+}
diff --git a/test/CodeGen/ARM/2009-04-08-FREM.ll b/test/CodeGen/ARM/2009-04-08-FREM.ll
new file mode 100644
index 0000000..99907fc
--- /dev/null
+++ b/test/CodeGen/ARM/2009-04-08-FREM.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=arm
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+ %rem_r = frem double 0.000000e+00, 0.000000e+00 ; <double> [#uses=1]
+ %1 = call i32 (i8*, ...)* @printf(i8* null, double %rem_r) ; <i32> [#uses=0]
+ ret i32 0
+}
diff --git a/test/CodeGen/ARM/2009-04-08-FloatUndef.ll b/test/CodeGen/ARM/2009-04-08-FloatUndef.ll
new file mode 100644
index 0000000..05d2f26
--- /dev/null
+++ b/test/CodeGen/ARM/2009-04-08-FloatUndef.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=arm
+
+define void @execute_shader(<4 x float>* %OUT, <4 x float>* %IN, <4 x float>* %CONST) {
+entry:
+ %input2 = load <4 x float>* null, align 16 ; <<4 x float>> [#uses=2]
+ %shuffle7 = shufflevector <4 x float> %input2, <4 x float> <float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00>, <4 x i32> <i32 2, i32 2, i32 2, i32 2> ; <<4 x float>> [#uses=1]
+ %mul1 = fmul <4 x float> %shuffle7, zeroinitializer ; <<4 x float>> [#uses=1]
+ %add2 = fadd <4 x float> %mul1, %input2 ; <<4 x float>> [#uses=1]
+ store <4 x float> %add2, <4 x float>* null, align 16
+ ret void
+}
diff --git a/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll b/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll
new file mode 100644
index 0000000..deb092b
--- /dev/null
+++ b/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=arm
+; PR3954
+
+define void @foo(...) nounwind {
+entry:
+ %rr = alloca i32 ; <i32*> [#uses=2]
+ %0 = load i32* %rr ; <i32> [#uses=1]
+ %1 = call i32 asm "nop", "=r,0"(i32 %0) nounwind ; <i32> [#uses=1]
+ store i32 %1, i32* %rr
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
diff --git a/test/CodeGen/ARM/2009-05-05-DAGCombineBug.ll b/test/CodeGen/ARM/2009-05-05-DAGCombineBug.ll
new file mode 100644
index 0000000..670d204
--- /dev/null
+++ b/test/CodeGen/ARM/2009-05-05-DAGCombineBug.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=arm-linuxeabi-unknown-gnu -mattr=+v6
+; PR4166
+
+ %"byte[]" = type { i32, i8* }
+ %tango.time.Time.Time = type { i64 }
+
+define fastcc void @t() {
+entry:
+ %tmp28 = call fastcc i1 null(i32* null, %"byte[]" undef, %"byte[]" undef, %tango.time.Time.Time* byval null) ; <i1> [#uses=0]
+ ret void
+}
diff --git a/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll b/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll
new file mode 100644
index 0000000..75610ff
--- /dev/null
+++ b/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=armv5-unknown-linux-gnueabi -O0 -regalloc=local
+; PR4100
[email protected] = external constant [30 x i8] ; <[30 x i8]*> [#uses=1]
+
+define i16 @fn16(i16 %arg0.0, <2 x i16> %arg1, i16 %arg2.0) nounwind {
+entry:
+ store <2 x i16> %arg1, <2 x i16>* null
+ %0 = call i32 (i8*, ...)* @printf(i8* getelementptr ([30 x i8]* @.str, i32 0, i32 0), i32 0) nounwind ; <i32> [#uses=0]
+ ret i16 0
+}
+
+declare i32 @printf(i8*, ...) nounwind
diff --git a/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll b/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll
new file mode 100644
index 0000000..7046fcc
--- /dev/null
+++ b/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=arm
+ %struct.List = type { %struct.List*, i32 }
+@Node5 = external constant %struct.List ; <%struct.List*> [#uses=1]
+@"\01LC" = external constant [7 x i8] ; <[7 x i8]*> [#uses=1]
+
+define i32 @main() nounwind {
+entry:
+ br label %bb
+
+bb: ; preds = %bb3, %entry
+ %CurL.02 = phi %struct.List* [ @Node5, %entry ], [ %2, %bb3 ] ; <%struct.List*> [#uses=1]
+ %PrevL.01 = phi %struct.List* [ null, %entry ], [ %CurL.02, %bb3 ] ; <%struct.List*> [#uses=1]
+ %0 = icmp eq %struct.List* %PrevL.01, null ; <i1> [#uses=1]
+ br i1 %0, label %bb3, label %bb1
+
+bb1: ; preds = %bb
+ br label %bb3
+
+bb3: ; preds = %bb1, %bb
+ %iftmp.0.0 = phi i32 [ 0, %bb1 ], [ -1, %bb ] ; <i32> [#uses=1]
+ %1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([7 x i8]* @"\01LC", i32 0, i32 0), i32 0, i32 %iftmp.0.0) nounwind ; <i32> [#uses=0]
+ %2 = load %struct.List** null, align 4 ; <%struct.List*> [#uses=2]
+ %phitmp = icmp eq %struct.List* %2, null ; <i1> [#uses=1]
+ br i1 %phitmp, label %bb5, label %bb
+
+bb5: ; preds = %bb3
+ ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
diff --git a/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll b/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll
new file mode 100644
index 0000000..1e2707f
--- /dev/null
+++ b/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc < %s -march=thumb | FileCheck %s
+; PR4091
+
+define void @foo(i32 %i, i32* %p) nounwind {
+;CHECK: swp r2, r0, [r1]
+ %asmtmp = call i32 asm sideeffect "swp $0, $2, $3", "=&r,=*m,r,*m,~{memory}"(i32* %p, i32 %i, i32* %p) nounwind
+ ret void
+}
diff --git a/test/CodeGen/ARM/2009-06-02-ISelCrash.ll b/test/CodeGen/ARM/2009-06-02-ISelCrash.ll
new file mode 100644
index 0000000..403e3f65
--- /dev/null
+++ b/test/CodeGen/ARM/2009-06-02-ISelCrash.ll
@@ -0,0 +1,62 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic -mattr=+v6,+vfp2
+
+@"\01LC" = external constant [15 x i8] ; <[15 x i8]*> [#uses=1]
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+define i32 @main() nounwind {
+entry:
+ br label %bb.i1.i
+
+bb.i1.i: ; preds = %Cos.exit.i.i, %entry
+ br label %bb.i.i.i
+
+bb.i.i.i: ; preds = %bb.i.i.i, %bb.i1.i
+ br i1 undef, label %Cos.exit.i.i, label %bb.i.i.i
+
+Cos.exit.i.i: ; preds = %bb.i.i.i
+ br i1 undef, label %bb2.i.i, label %bb.i1.i
+
+bb2.i.i: ; preds = %Cos.exit.i.i
+ br label %bb3.i.i
+
+bb3.i.i: ; preds = %bb5.i.i, %bb2.i.i
+ br label %bb4.i.i
+
+bb4.i.i: ; preds = %bb4.i.i, %bb3.i.i
+ br i1 undef, label %bb5.i.i, label %bb4.i.i
+
+bb5.i.i: ; preds = %bb4.i.i
+ br i1 undef, label %bb.i, label %bb3.i.i
+
+bb.i: ; preds = %bb.i, %bb5.i.i
+ br i1 undef, label %bb1.outer2.i.i.outer, label %bb.i
+
+bb1.outer2.i.i.outer: ; preds = %Fft.exit.i, %bb5.i12.i, %bb.i
+ br label %bb1.outer2.i.i
+
+bb1.outer2.i.i: ; preds = %bb2.i9.i, %bb1.outer2.i.i.outer
+ br label %bb1.i.i
+
+bb1.i.i: ; preds = %bb1.i.i, %bb1.outer2.i.i
+ br i1 undef, label %bb2.i9.i, label %bb1.i.i
+
+bb2.i9.i: ; preds = %bb1.i.i
+ br i1 undef, label %bb4.i11.i, label %bb1.outer2.i.i
+
+bb4.i11.i: ; preds = %bb4.i11.i, %bb2.i9.i
+ br i1 undef, label %bb5.i12.i, label %bb4.i11.i
+
+bb5.i12.i: ; preds = %bb4.i11.i
+ br i1 undef, label %bb7.i.i, label %bb1.outer2.i.i.outer
+
+bb7.i.i: ; preds = %bb7.i.i, %bb5.i12.i
+ br i1 undef, label %Fft.exit.i, label %bb7.i.i
+
+Fft.exit.i: ; preds = %bb7.i.i
+ br i1 undef, label %bb5.i, label %bb1.outer2.i.i.outer
+
+bb5.i: ; preds = %Fft.exit.i
+ %0 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([15 x i8]* @"\01LC", i32 0, i32 0), double undef, double undef) nounwind ; <i32> [#uses=0]
+ unreachable
+}
diff --git a/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll b/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll
new file mode 100644
index 0000000..98e0023
--- /dev/null
+++ b/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll
@@ -0,0 +1,263 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6
+
+ %struct.anon = type { i16, i16 }
+ %struct.cab_archive = type { i32, i16, i16, i16, i16, i8, %struct.cab_folder*, %struct.cab_file* }
+ %struct.cab_file = type { i32, i16, i64, i8*, i32, i32, i32, %struct.cab_folder*, %struct.cab_file*, %struct.cab_archive*, %struct.cab_state* }
+ %struct.cab_folder = type { i16, i16, %struct.cab_archive*, i64, %struct.cab_folder* }
+ %struct.cab_state = type { i8*, i8*, [38912 x i8], i16, i16, i8*, i16 }
+ %struct.qtm_model = type { i32, i32, %struct.anon* }
+ %struct.qtm_stream = type { i32, i32, i8, i8*, i32, i32, i32, i16, i16, i16, i8, i32, i8*, i8*, i8*, i8*, i8*, i32, i32, i8, [42 x i32], [42 x i8], [27 x i8], [27 x i8], %struct.qtm_model, %struct.qtm_model, %struct.qtm_model, %struct.qtm_model, %struct.qtm_model, %struct.qtm_model, %struct.qtm_model, %struct.qtm_model, %struct.qtm_model, [65 x %struct.anon], [65 x %struct.anon], [65 x %struct.anon], [65 x %struct.anon], [25 x %struct.anon], [37 x %struct.anon], [43 x %struct.anon], [28 x %struct.anon], [8 x %struct.anon], %struct.cab_file*, i32 (%struct.cab_file*, i8*, i32)* }
+
+declare fastcc i32 @qtm_read_input(%struct.qtm_stream* nocapture) nounwind
+
+define fastcc i32 @qtm_decompress(%struct.qtm_stream* %qtm, i64 %out_bytes) nounwind {
+entry:
+ br i1 undef, label %bb245, label %bb3
+
+bb3: ; preds = %entry
+ br i1 undef, label %bb5, label %bb4
+
+bb4: ; preds = %bb3
+ ret i32 undef
+
+bb5: ; preds = %bb3
+ br i1 undef, label %bb245, label %bb14
+
+bb14: ; preds = %bb5
+ br label %bb238
+
+bb28: ; preds = %bb215
+ br label %bb31
+
+bb29: ; preds = %bb31
+ br i1 undef, label %bb31, label %bb32
+
+bb31: ; preds = %bb29, %bb28
+ br i1 undef, label %bb29, label %bb32
+
+bb32: ; preds = %bb31, %bb29
+ br label %bb33
+
+bb33: ; preds = %bb33, %bb32
+ br i1 undef, label %bb34, label %bb33
+
+bb34: ; preds = %bb33
+ br i1 undef, label %bb35, label %bb36
+
+bb35: ; preds = %bb34
+ br label %bb36
+
+bb36: ; preds = %bb46, %bb35, %bb34
+ br i1 undef, label %bb40, label %bb37
+
+bb37: ; preds = %bb36
+ br i1 undef, label %bb77, label %bb60
+
+bb40: ; preds = %bb36
+ br i1 undef, label %bb46, label %bb41
+
+bb41: ; preds = %bb40
+ br i1 undef, label %bb45, label %bb42
+
+bb42: ; preds = %bb41
+ ret i32 undef
+
+bb45: ; preds = %bb41
+ br label %bb46
+
+bb46: ; preds = %bb45, %bb40
+ br label %bb36
+
+bb60: ; preds = %bb60, %bb37
+ br label %bb60
+
+bb77: ; preds = %bb37
+ switch i32 undef, label %bb197 [
+ i32 5, label %bb108
+ i32 6, label %bb138
+ ]
+
+bb108: ; preds = %bb77
+ br label %bb111
+
+bb109: ; preds = %bb111
+ br i1 undef, label %bb111, label %bb112
+
+bb111: ; preds = %bb109, %bb108
+ br i1 undef, label %bb109, label %bb112
+
+bb112: ; preds = %bb111, %bb109
+ br label %bb113
+
+bb113: ; preds = %bb113, %bb112
+ br i1 undef, label %bb114, label %bb113
+
+bb114: ; preds = %bb113
+ br i1 undef, label %bb115, label %bb116
+
+bb115: ; preds = %bb114
+ br label %bb116
+
+bb116: ; preds = %bb115, %bb114
+ br i1 undef, label %bb120, label %bb117
+
+bb117: ; preds = %bb116
+ br label %bb136
+
+bb120: ; preds = %bb116
+ ret i32 undef
+
+bb128: ; preds = %bb136
+ br i1 undef, label %bb134, label %bb129
+
+bb129: ; preds = %bb128
+ br i1 undef, label %bb133, label %bb130
+
+bb130: ; preds = %bb129
+ br i1 undef, label %bb132, label %bb131
+
+bb131: ; preds = %bb130
+ ret i32 undef
+
+bb132: ; preds = %bb130
+ br label %bb133
+
+bb133: ; preds = %bb132, %bb129
+ br label %bb134
+
+bb134: ; preds = %bb133, %bb128
+ br label %bb136
+
+bb136: ; preds = %bb134, %bb117
+ br i1 undef, label %bb198, label %bb128
+
+bb138: ; preds = %bb77
+ %0 = trunc i32 undef to i16 ; <i16> [#uses=1]
+ br label %bb141
+
+bb139: ; preds = %bb141
+ %scevgep441442881 = load i16* undef ; <i16> [#uses=1]
+ %1 = icmp ugt i16 %scevgep441442881, %0 ; <i1> [#uses=1]
+ br i1 %1, label %bb141, label %bb142
+
+bb141: ; preds = %bb139, %bb138
+ br i1 undef, label %bb139, label %bb142
+
+bb142: ; preds = %bb141, %bb139
+ br label %bb143
+
+bb143: ; preds = %bb143, %bb142
+ br i1 undef, label %bb144, label %bb143
+
+bb144: ; preds = %bb143
+ br i1 undef, label %bb145, label %bb146
+
+bb145: ; preds = %bb144
+ unreachable
+
+bb146: ; preds = %bb156, %bb144
+ br i1 undef, label %bb150, label %bb147
+
+bb147: ; preds = %bb146
+ br i1 undef, label %bb157, label %bb148
+
+bb148: ; preds = %bb147
+ br i1 undef, label %bb149, label %bb157
+
+bb149: ; preds = %bb148
+ br label %bb150
+
+bb150: ; preds = %bb149, %bb146
+ br i1 undef, label %bb156, label %bb152
+
+bb152: ; preds = %bb150
+ unreachable
+
+bb156: ; preds = %bb150
+ br label %bb146
+
+bb157: ; preds = %bb148, %bb147
+ br i1 undef, label %bb167, label %bb160
+
+bb160: ; preds = %bb157
+ ret i32 undef
+
+bb167: ; preds = %bb157
+ br label %bb170
+
+bb168: ; preds = %bb170
+ br i1 undef, label %bb170, label %bb171
+
+bb170: ; preds = %bb168, %bb167
+ br i1 undef, label %bb168, label %bb171
+
+bb171: ; preds = %bb170, %bb168
+ br label %bb172
+
+bb172: ; preds = %bb172, %bb171
+ br i1 undef, label %bb173, label %bb172
+
+bb173: ; preds = %bb172
+ br i1 undef, label %bb174, label %bb175
+
+bb174: ; preds = %bb173
+ unreachable
+
+bb175: ; preds = %bb179, %bb173
+ br i1 undef, label %bb179, label %bb176
+
+bb176: ; preds = %bb175
+ br i1 undef, label %bb186, label %bb177
+
+bb177: ; preds = %bb176
+ br i1 undef, label %bb178, label %bb186
+
+bb178: ; preds = %bb177
+ br label %bb179
+
+bb179: ; preds = %bb178, %bb175
+ br label %bb175
+
+bb186: ; preds = %bb177, %bb176
+ br label %bb195
+
+bb187: ; preds = %bb195
+ br i1 undef, label %bb193, label %bb189
+
+bb189: ; preds = %bb187
+ %2 = tail call fastcc i32 @qtm_read_input(%struct.qtm_stream* %qtm) nounwind ; <i32> [#uses=0]
+ ret i32 undef
+
+bb193: ; preds = %bb187
+ br label %bb195
+
+bb195: ; preds = %bb193, %bb186
+ br i1 undef, label %bb198, label %bb187
+
+bb197: ; preds = %bb77
+ ret i32 -124
+
+bb198: ; preds = %bb195, %bb136
+ br i1 undef, label %bb211.preheader, label %bb214
+
+bb211.preheader: ; preds = %bb198
+ br label %bb211
+
+bb211: ; preds = %bb211, %bb211.preheader
+ br i1 undef, label %bb214, label %bb211
+
+bb214: ; preds = %bb211, %bb198
+ br label %bb215
+
+bb215: ; preds = %bb238, %bb214
+ br i1 undef, label %bb28, label %bb216
+
+bb216: ; preds = %bb215
+ br label %bb238
+
+bb238: ; preds = %bb216, %bb14
+ br label %bb215
+
+bb245: ; preds = %bb5, %entry
+ ret i32 undef
+}
diff --git a/test/CodeGen/ARM/2009-06-12-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-06-12-RegScavengerAssert.ll
new file mode 100644
index 0000000..27888d7
--- /dev/null
+++ b/test/CodeGen/ARM/2009-06-12-RegScavengerAssert.ll
@@ -0,0 +1,77 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin
+
+ type { i32, i32, %struct.D_Sym**, [3 x %struct.D_Sym*] } ; type %0
+ type { i32, %struct.D_Reduction** } ; type %1
+ type { i32, %struct.D_RightEpsilonHint* } ; type %2
+ type { i32, %struct.D_ErrorRecoveryHint* } ; type %3
+ type { i32, i32, %struct.D_Reduction**, [3 x %struct.D_Reduction*] } ; type %4
+ %struct.D_ErrorRecoveryHint = type { i16, i16, i8* }
+ %struct.D_ParseNode = type { i32, %struct.d_loc_t, i8*, i8*, %struct.D_Scope*, void (%struct.D_Parser*, %struct.d_loc_t*, i8**)*, i8*, i8* }
+ %struct.D_Parser = type { i8*, void (%struct.D_Parser*, %struct.d_loc_t*, i8**)*, %struct.D_Scope*, void (%struct.D_Parser*)*, %struct.D_ParseNode* (%struct.D_Parser*, i32, %struct.D_ParseNode**)*, void (%struct.D_ParseNode*)*, %struct.d_loc_t, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+ %struct.D_ParserTables = type { i32, %struct.D_State*, i16*, i32, i32, %struct.D_Symbol*, void (%struct.D_Parser*, %struct.d_loc_t*, i8**)*, i32, %struct.D_Pass*, i32 }
+ %struct.D_Pass = type { i8*, i32, i32, i32 }
+ %struct.D_Reduction = type { i16, i16, i32 (i8*, i8**, i32, i32, %struct.D_Parser*)*, i32 (i8*, i8**, i32, i32, %struct.D_Parser*)*, i16, i16, i32, i32, i32, i32, i32 (i8*, i8**, i32, i32, %struct.D_Parser*)** }
+ %struct.D_RightEpsilonHint = type { i16, i16, %struct.D_Reduction* }
+ %struct.D_Scope = type { i8, %struct.D_Sym*, %struct.D_SymHash*, %struct.D_Sym*, %struct.D_Scope*, %struct.D_Scope*, %struct.D_Scope*, %struct.D_Scope*, %struct.D_Scope* }
+ %struct.D_Shift = type { i16, i8, i8, i32, i32, i32 (i8*, i8**, i32, i32, %struct.D_Parser*)* }
+ %struct.D_State = type { i8*, i32, %1, %2, %3, %struct.D_Shift**, i32 (i8**, i32*, i32*, i16*, i32*, i8*, i32*)*, i8*, i8, i8, i8, i8*, %struct.D_Shift***, i32 }
+ %struct.D_Sym = type { i8*, i32, i32, %struct.D_Sym*, %struct.D_Sym*, i32 }
+ %struct.D_SymHash = type { i32, i32, %0 }
+ %struct.D_Symbol = type { i32, i8*, i32 }
+ %struct.PNode = type { i32, i32, i32, i32, %struct.D_Reduction*, %struct.D_Shift*, i32, %struct.VecPNode, i32, i8, i8, %struct.PNode*, %struct.PNode*, %struct.PNode*, %struct.PNode*, i8*, i8*, %struct.D_Scope*, i8*, %struct.D_ParseNode }
+ %struct.PNodeHash = type { %struct.PNode**, i32, i32, i32, %struct.PNode* }
+ %struct.Parser = type { %struct.D_Parser, i8*, i8*, %struct.D_ParserTables*, i32, i32, i32, i32, i32, i32, i32, %struct.PNodeHash, %struct.SNodeHash, %struct.Reduction*, %struct.Shift*, %struct.D_Scope*, %struct.SNode*, i32, %struct.Reduction*, %struct.Shift*, i32, %struct.PNode*, %struct.SNode*, %struct.ZNode*, %4, %struct.ShiftResult*, %struct.D_Shift, %struct.Parser*, i8* }
+ %struct.Reduction = type { %struct.ZNode*, %struct.SNode*, %struct.D_Reduction*, %struct.SNode*, i32, %struct.Reduction* }
+ %struct.SNode = type { %struct.D_State*, %struct.D_Scope*, i8*, %struct.d_loc_t, i32, %struct.PNode*, %struct.VecZNode, i32, %struct.SNode*, %struct.SNode* }
+ %struct.SNodeHash = type { %struct.SNode**, i32, i32, i32, %struct.SNode*, %struct.SNode* }
+ %struct.Shift = type { %struct.SNode*, %struct.Shift* }
+ %struct.ShiftResult = type { %struct.D_Shift*, %struct.d_loc_t }
+ %struct.VecPNode = type { i32, i32, %struct.PNode**, [3 x %struct.PNode*] }
+ %struct.VecSNode = type { i32, i32, %struct.SNode**, [3 x %struct.SNode*] }
+ %struct.VecZNode = type { i32, i32, %struct.ZNode**, [3 x %struct.ZNode*] }
+ %struct.ZNode = type { %struct.PNode*, %struct.VecSNode }
+ %struct.d_loc_t = type { i8*, i8*, i32, i32, i32 }
+
+declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
+
+define fastcc i32 @exhaustive_parse(%struct.Parser* %p, i32 %state) nounwind {
+entry:
+ store i8* undef, i8** undef, align 4
+ %0 = getelementptr %struct.Parser* %p, i32 0, i32 0, i32 6 ; <%struct.d_loc_t*> [#uses=1]
+ %1 = bitcast %struct.d_loc_t* %0 to i8* ; <i8*> [#uses=1]
+ call void @llvm.memcpy.i32(i8* undef, i8* %1, i32 20, i32 4)
+ br label %bb10
+
+bb10: ; preds = %bb30, %bb29, %bb26, %entry
+ br i1 undef, label %bb18, label %bb20
+
+bb18: ; preds = %bb10
+ br i1 undef, label %bb20, label %bb19
+
+bb19: ; preds = %bb18
+ br label %bb20
+
+bb20: ; preds = %bb19, %bb18, %bb10
+ br i1 undef, label %bb21, label %bb22
+
+bb21: ; preds = %bb20
+ unreachable
+
+bb22: ; preds = %bb20
+ br i1 undef, label %bb24, label %bb26
+
+bb24: ; preds = %bb22
+ unreachable
+
+bb26: ; preds = %bb22
+ br i1 undef, label %bb10, label %bb29
+
+bb29: ; preds = %bb26
+ br i1 undef, label %bb10, label %bb30
+
+bb30: ; preds = %bb29
+ br i1 undef, label %bb31, label %bb10
+
+bb31: ; preds = %bb30
+ unreachable
+}
diff --git a/test/CodeGen/ARM/2009-06-15-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-06-15-RegScavengerAssert.ll
new file mode 100644
index 0000000..a0f903b
--- /dev/null
+++ b/test/CodeGen/ARM/2009-06-15-RegScavengerAssert.ll
@@ -0,0 +1,344 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin
+
+ %struct.term = type { i32, i32, i32 }
+
+declare fastcc i8* @memory_Malloc(i32) nounwind
+
+define fastcc %struct.term* @t1() nounwind {
+entry:
+ br i1 undef, label %bb, label %bb1
+
+bb: ; preds = %entry
+ ret %struct.term* undef
+
+bb1: ; preds = %entry
+ %0 = tail call fastcc i8* @memory_Malloc(i32 12) nounwind ; <i8*> [#uses=0]
+ %1 = tail call fastcc i8* @memory_Malloc(i32 12) nounwind ; <i8*> [#uses=0]
+ ret %struct.term* undef
+}
+
+
+define i32 @t2(i32 %argc, i8** nocapture %argv) nounwind {
+entry:
+ br label %bb6.i8
+
+bb6.i8: ; preds = %memory_CalculateRealBlockSize1374.exit.i, %entry
+ br i1 undef, label %memory_CalculateRealBlockSize1374.exit.i, label %bb.i.i9
+
+bb.i.i9: ; preds = %bb6.i8
+ br label %memory_CalculateRealBlockSize1374.exit.i
+
+memory_CalculateRealBlockSize1374.exit.i: ; preds = %bb.i.i9, %bb6.i8
+ %0 = phi i32 [ undef, %bb.i.i9 ], [ undef, %bb6.i8 ] ; <i32> [#uses=2]
+ store i32 %0, i32* undef, align 4
+ %1 = urem i32 8184, %0 ; <i32> [#uses=1]
+ %2 = sub i32 8188, %1 ; <i32> [#uses=1]
+ store i32 %2, i32* undef, align 4
+ br i1 undef, label %memory_Init.exit, label %bb6.i8
+
+memory_Init.exit: ; preds = %memory_CalculateRealBlockSize1374.exit.i
+ br label %bb.i.i
+
+bb.i.i: ; preds = %bb.i.i, %memory_Init.exit
+ br i1 undef, label %symbol_Init.exit, label %bb.i.i
+
+symbol_Init.exit: ; preds = %bb.i.i
+ br label %bb.i.i67
+
+bb.i.i67: ; preds = %bb.i.i67, %symbol_Init.exit
+ br i1 undef, label %symbol_CreatePrecedence3522.exit, label %bb.i.i67
+
+symbol_CreatePrecedence3522.exit: ; preds = %bb.i.i67
+ br label %bb.i.i8.i
+
+bb.i.i8.i: ; preds = %bb.i.i8.i, %symbol_CreatePrecedence3522.exit
+ br i1 undef, label %cont_Create.exit9.i, label %bb.i.i8.i
+
+cont_Create.exit9.i: ; preds = %bb.i.i8.i
+ br label %bb.i.i.i72
+
+bb.i.i.i72: ; preds = %bb.i.i.i72, %cont_Create.exit9.i
+ br i1 undef, label %cont_Init.exit, label %bb.i.i.i72
+
+cont_Init.exit: ; preds = %bb.i.i.i72
+ br label %bb.i103
+
+bb.i103: ; preds = %bb.i103, %cont_Init.exit
+ br i1 undef, label %subs_Init.exit, label %bb.i103
+
+subs_Init.exit: ; preds = %bb.i103
+ br i1 undef, label %bb1.i.i.i80, label %cc_Init.exit
+
+bb1.i.i.i80: ; preds = %subs_Init.exit
+ unreachable
+
+cc_Init.exit: ; preds = %subs_Init.exit
+ br label %bb.i.i375
+
+bb.i.i375: ; preds = %bb.i.i375, %cc_Init.exit
+ br i1 undef, label %bb.i439, label %bb.i.i375
+
+bb.i439: ; preds = %bb.i439, %bb.i.i375
+ br i1 undef, label %opts_DeclareSPASSFlagsAsOptions.exit, label %bb.i439
+
+opts_DeclareSPASSFlagsAsOptions.exit: ; preds = %bb.i439
+ br i1 undef, label %opts_TranslateShortOptDeclarations.exit.i, label %bb.i.i82
+
+bb.i.i82: ; preds = %opts_DeclareSPASSFlagsAsOptions.exit
+ unreachable
+
+opts_TranslateShortOptDeclarations.exit.i: ; preds = %opts_DeclareSPASSFlagsAsOptions.exit
+ br i1 undef, label %list_Length.exit.i.thread.i, label %bb.i.i4.i
+
+list_Length.exit.i.thread.i: ; preds = %opts_TranslateShortOptDeclarations.exit.i
+ br i1 undef, label %bb18.i.i.i, label %bb26.i.i.i
+
+bb.i.i4.i: ; preds = %opts_TranslateShortOptDeclarations.exit.i
+ unreachable
+
+bb18.i.i.i: ; preds = %list_Length.exit.i.thread.i
+ unreachable
+
+bb26.i.i.i: ; preds = %list_Length.exit.i.thread.i
+ br i1 undef, label %bb27.i142, label %opts_GetOptLongOnly.exit.thread97.i
+
+opts_GetOptLongOnly.exit.thread97.i: ; preds = %bb26.i.i.i
+ br label %bb27.i142
+
+bb27.i142: ; preds = %opts_GetOptLongOnly.exit.thread97.i, %bb26.i.i.i
+ br label %bb1.i3.i
+
+bb1.i3.i: ; preds = %bb1.i3.i, %bb27.i142
+ br i1 undef, label %opts_FreeLongOptsArray.exit.i, label %bb1.i3.i
+
+opts_FreeLongOptsArray.exit.i: ; preds = %bb1.i3.i
+ br label %bb.i443
+
+bb.i443: ; preds = %bb.i443, %opts_FreeLongOptsArray.exit.i
+ br i1 undef, label %flag_InitStoreByDefaults3542.exit, label %bb.i443
+
+flag_InitStoreByDefaults3542.exit: ; preds = %bb.i443
+ br i1 undef, label %bb6.i449, label %bb.i503
+
+bb6.i449: ; preds = %flag_InitStoreByDefaults3542.exit
+ unreachable
+
+bb.i503: ; preds = %bb.i503, %flag_InitStoreByDefaults3542.exit
+ br i1 undef, label %flag_CleanStore3464.exit, label %bb.i503
+
+flag_CleanStore3464.exit: ; preds = %bb.i503
+ br i1 undef, label %bb1.i81.i.preheader, label %bb.i173
+
+bb.i173: ; preds = %flag_CleanStore3464.exit
+ unreachable
+
+bb1.i81.i.preheader: ; preds = %flag_CleanStore3464.exit
+ br i1 undef, label %bb1.i64.i.preheader, label %bb5.i179
+
+bb5.i179: ; preds = %bb1.i81.i.preheader
+ unreachable
+
+bb1.i64.i.preheader: ; preds = %bb1.i81.i.preheader
+ br i1 undef, label %dfg_DeleteProofList.exit.i, label %bb.i9.i
+
+bb.i9.i: ; preds = %bb1.i64.i.preheader
+ unreachable
+
+dfg_DeleteProofList.exit.i: ; preds = %bb1.i64.i.preheader
+ br i1 undef, label %term_DeleteTermList621.exit.i, label %bb.i.i62.i
+
+bb.i.i62.i: ; preds = %bb.i.i62.i, %dfg_DeleteProofList.exit.i
+ br i1 undef, label %term_DeleteTermList621.exit.i, label %bb.i.i62.i
+
+term_DeleteTermList621.exit.i: ; preds = %bb.i.i62.i, %dfg_DeleteProofList.exit.i
+ br i1 undef, label %dfg_DFGParser.exit, label %bb.i.i211
+
+bb.i.i211: ; preds = %term_DeleteTermList621.exit.i
+ unreachable
+
+dfg_DFGParser.exit: ; preds = %term_DeleteTermList621.exit.i
+ br label %bb.i513
+
+bb.i513: ; preds = %bb2.i516, %dfg_DFGParser.exit
+ br i1 undef, label %bb2.i516, label %bb1.i514
+
+bb1.i514: ; preds = %bb.i513
+ unreachable
+
+bb2.i516: ; preds = %bb.i513
+ br i1 undef, label %bb.i509, label %bb.i513
+
+bb.i509: ; preds = %bb.i509, %bb2.i516
+ br i1 undef, label %symbol_TransferPrecedence3468.exit511, label %bb.i509
+
+symbol_TransferPrecedence3468.exit511: ; preds = %bb.i509
+ br i1 undef, label %bb20, label %bb21
+
+bb20: ; preds = %symbol_TransferPrecedence3468.exit511
+ unreachable
+
+bb21: ; preds = %symbol_TransferPrecedence3468.exit511
+ br i1 undef, label %cnf_Init.exit, label %bb.i498
+
+bb.i498: ; preds = %bb21
+ unreachable
+
+cnf_Init.exit: ; preds = %bb21
+ br i1 undef, label %bb23, label %bb22
+
+bb22: ; preds = %cnf_Init.exit
+ br i1 undef, label %bb2.i.i496, label %bb.i.i494
+
+bb.i.i494: ; preds = %bb22
+ unreachable
+
+bb2.i.i496: ; preds = %bb22
+ unreachable
+
+bb23: ; preds = %cnf_Init.exit
+ br i1 undef, label %bb28, label %bb24
+
+bb24: ; preds = %bb23
+ unreachable
+
+bb28: ; preds = %bb23
+ br i1 undef, label %bb31, label %bb29
+
+bb29: ; preds = %bb28
+ unreachable
+
+bb31: ; preds = %bb28
+ br i1 undef, label %bb34, label %bb32
+
+bb32: ; preds = %bb31
+ unreachable
+
+bb34: ; preds = %bb31
+ br i1 undef, label %bb83, label %bb66
+
+bb66: ; preds = %bb34
+ unreachable
+
+bb83: ; preds = %bb34
+ br i1 undef, label %bb2.i1668, label %bb.i1667
+
+bb.i1667: ; preds = %bb83
+ unreachable
+
+bb2.i1668: ; preds = %bb83
+ br i1 undef, label %bb5.i205, label %bb3.i204
+
+bb3.i204: ; preds = %bb2.i1668
+ unreachable
+
+bb5.i205: ; preds = %bb2.i1668
+ br i1 undef, label %bb.i206.i, label %ana_AnalyzeSortStructure.exit.i
+
+bb.i206.i: ; preds = %bb5.i205
+ br i1 undef, label %bb1.i207.i, label %ana_AnalyzeSortStructure.exit.i
+
+bb1.i207.i: ; preds = %bb.i206.i
+ br i1 undef, label %bb25.i1801.thread, label %bb.i1688
+
+bb.i1688: ; preds = %bb1.i207.i
+ unreachable
+
+bb25.i1801.thread: ; preds = %bb1.i207.i
+ unreachable
+
+ana_AnalyzeSortStructure.exit.i: ; preds = %bb.i206.i, %bb5.i205
+ br i1 undef, label %bb7.i207, label %bb.i1806
+
+bb.i1806: ; preds = %ana_AnalyzeSortStructure.exit.i
+ br i1 undef, label %bb2.i.i.i1811, label %bb.i.i.i1809
+
+bb.i.i.i1809: ; preds = %bb.i1806
+ unreachable
+
+bb2.i.i.i1811: ; preds = %bb.i1806
+ unreachable
+
+bb7.i207: ; preds = %ana_AnalyzeSortStructure.exit.i
+ br i1 undef, label %bb9.i, label %bb8.i
+
+bb8.i: ; preds = %bb7.i207
+ unreachable
+
+bb9.i: ; preds = %bb7.i207
+ br i1 undef, label %bb23.i, label %bb26.i
+
+bb23.i: ; preds = %bb9.i
+ br i1 undef, label %bb25.i, label %bb24.i
+
+bb24.i: ; preds = %bb23.i
+ br i1 undef, label %sort_SortTheoryIsTrivial.exit.i, label %bb.i2093
+
+bb.i2093: ; preds = %bb.i2093, %bb24.i
+ br label %bb.i2093
+
+sort_SortTheoryIsTrivial.exit.i: ; preds = %bb24.i
+ br i1 undef, label %bb3.i2141, label %bb4.i2143
+
+bb3.i2141: ; preds = %sort_SortTheoryIsTrivial.exit.i
+ unreachable
+
+bb4.i2143: ; preds = %sort_SortTheoryIsTrivial.exit.i
+ br i1 undef, label %bb8.i2178, label %bb5.i2144
+
+bb5.i2144: ; preds = %bb4.i2143
+ br i1 undef, label %bb7.i2177, label %bb1.i28.i
+
+bb1.i28.i: ; preds = %bb5.i2144
+ br i1 undef, label %bb4.i43.i, label %bb2.i.i2153
+
+bb2.i.i2153: ; preds = %bb1.i28.i
+ br i1 undef, label %bb4.i.i33.i, label %bb.i.i30.i
+
+bb.i.i30.i: ; preds = %bb2.i.i2153
+ unreachable
+
+bb4.i.i33.i: ; preds = %bb2.i.i2153
+ br i1 undef, label %bb9.i.i36.i, label %bb5.i.i34.i
+
+bb5.i.i34.i: ; preds = %bb4.i.i33.i
+ unreachable
+
+bb9.i.i36.i: ; preds = %bb4.i.i33.i
+ br i1 undef, label %bb14.i.i.i2163, label %bb10.i.i37.i
+
+bb10.i.i37.i: ; preds = %bb9.i.i36.i
+ unreachable
+
+bb14.i.i.i2163: ; preds = %bb9.i.i36.i
+ br i1 undef, label %sort_LinkPrint.exit.i.i, label %bb15.i.i.i2164
+
+bb15.i.i.i2164: ; preds = %bb14.i.i.i2163
+ unreachable
+
+sort_LinkPrint.exit.i.i: ; preds = %bb14.i.i.i2163
+ unreachable
+
+bb4.i43.i: ; preds = %bb1.i28.i
+ unreachable
+
+bb7.i2177: ; preds = %bb5.i2144
+ unreachable
+
+bb8.i2178: ; preds = %bb4.i2143
+ br i1 undef, label %sort_ApproxStaticSortTheory.exit, label %bb.i5.i2185.preheader
+
+bb.i5.i2185.preheader: ; preds = %bb8.i2178
+ br label %bb.i5.i2185
+
+bb.i5.i2185: ; preds = %bb.i5.i2185, %bb.i5.i2185.preheader
+ br i1 undef, label %sort_ApproxStaticSortTheory.exit, label %bb.i5.i2185
+
+sort_ApproxStaticSortTheory.exit: ; preds = %bb.i5.i2185, %bb8.i2178
+ br label %bb25.i
+
+bb25.i: ; preds = %sort_ApproxStaticSortTheory.exit, %bb23.i
+ unreachable
+
+bb26.i: ; preds = %bb9.i
+ unreachable
+}
diff --git a/test/CodeGen/ARM/2009-06-19-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-06-19-RegScavengerAssert.ll
new file mode 100644
index 0000000..b56b684
--- /dev/null
+++ b/test/CodeGen/ARM/2009-06-19-RegScavengerAssert.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=armv6-eabi -mattr=+vfp2 -float-abi=hard
+; PR4419
+
+define float @__ieee754_acosf(float %x) nounwind {
+entry:
+ br i1 undef, label %bb, label %bb4
+
+bb: ; preds = %entry
+ ret float undef
+
+bb4: ; preds = %entry
+ br i1 undef, label %bb5, label %bb6
+
+bb5: ; preds = %bb4
+ ret float undef
+
+bb6: ; preds = %bb4
+ br i1 undef, label %bb11, label %bb12
+
+bb11: ; preds = %bb6
+ %0 = tail call float @__ieee754_sqrtf(float undef) nounwind ; <float> [#uses=1]
+ %1 = fmul float %0, -2.000000e+00 ; <float> [#uses=1]
+ %2 = fadd float %1, 0x400921FB40000000 ; <float> [#uses=1]
+ ret float %2
+
+bb12: ; preds = %bb6
+ ret float undef
+}
+
+declare float @__ieee754_sqrtf(float)
diff --git a/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll b/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll
new file mode 100644
index 0000000..e068be7
--- /dev/null
+++ b/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin
+
+ %struct.rtunion = type { i64 }
+ %struct.rtx_def = type { i16, i8, i8, [1 x %struct.rtunion] }
+
+define arm_apcscc void @simplify_unary_real(i8* nocapture %p) nounwind {
+entry:
+ %tmp121 = load i64* null, align 4 ; <i64> [#uses=1]
+ %0 = getelementptr %struct.rtx_def* null, i32 0, i32 3, i32 3, i32 0 ; <i64*> [#uses=1]
+ %tmp122 = load i64* %0, align 4 ; <i64> [#uses=1]
+ %1 = zext i64 undef to i192 ; <i192> [#uses=2]
+ %2 = zext i64 %tmp121 to i192 ; <i192> [#uses=1]
+ %3 = shl i192 %2, 64 ; <i192> [#uses=2]
+ %4 = zext i64 %tmp122 to i192 ; <i192> [#uses=1]
+ %5 = shl i192 %4, 128 ; <i192> [#uses=1]
+ %6 = or i192 %3, %1 ; <i192> [#uses=1]
+ %7 = or i192 %6, %5 ; <i192> [#uses=2]
+ switch i32 undef, label %bb82 [
+ i32 77, label %bb38
+ i32 129, label %bb21
+ i32 130, label %bb20
+ ]
+
+bb20: ; preds = %entry
+ ret void
+
+bb21: ; preds = %entry
+ br i1 undef, label %bb82, label %bb29
+
+bb29: ; preds = %bb21
+ %tmp18.i = and i192 %3, 1208907372870555465154560 ; <i192> [#uses=1]
+ %mask.i = or i192 %tmp18.i, %1 ; <i192> [#uses=1]
+ %mask41.i = or i192 %mask.i, 0 ; <i192> [#uses=1]
+ br label %bb82
+
+bb38: ; preds = %entry
+ br label %bb82
+
+bb82: ; preds = %bb38, %bb29, %bb21, %entry
+ %d.0 = phi i192 [ %mask41.i, %bb29 ], [ undef, %bb38 ], [ %7, %entry ], [ %7, %bb21 ] ; <i192> [#uses=1]
+ %tmp51 = trunc i192 %d.0 to i64 ; <i64> [#uses=0]
+ ret void
+}
diff --git a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll
new file mode 100644
index 0000000..17efe00
--- /dev/null
+++ b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll
@@ -0,0 +1,122 @@
+; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9
+
+@nn = external global i32 ; <i32*> [#uses=1]
+@al_len = external global i32 ; <i32*> [#uses=2]
+@no_mat = external global i32 ; <i32*> [#uses=2]
+@no_mis = external global i32 ; <i32*> [#uses=2]
+@"\01LC12" = external constant [29 x i8], align 1 ; <[29 x i8]*> [#uses=1]
+@"\01LC16" = external constant [33 x i8], align 1 ; <[33 x i8]*> [#uses=1]
+@"\01LC17" = external constant [47 x i8], align 1 ; <[47 x i8]*> [#uses=1]
+
+declare arm_apcscc i32 @printf(i8* nocapture, ...) nounwind
+
+declare arm_apcscc void @diff(i8*, i8*, i32, i32, i32, i32) nounwind
+
+define arm_apcscc void @SIM(i8* %A, i8* %B, i32 %M, i32 %N, i32 %K, [256 x i32]* %V, i32 %Q, i32 %R, i32 %nseq) nounwind {
+entry:
+ br i1 undef, label %bb5, label %bb
+
+bb: ; preds = %bb, %entry
+ br label %bb
+
+bb5: ; preds = %entry
+ br i1 undef, label %bb6, label %bb8
+
+bb6: ; preds = %bb6, %bb5
+ br i1 undef, label %bb8, label %bb6
+
+bb8: ; preds = %bb6, %bb5
+ br label %bb15
+
+bb9: ; preds = %bb15
+ br i1 undef, label %bb10, label %bb11
+
+bb10: ; preds = %bb9
+ unreachable
+
+bb11: ; preds = %bb9
+ %0 = load i32* undef, align 4 ; <i32> [#uses=2]
+ %1 = add i32 %0, 1 ; <i32> [#uses=2]
+ store i32 %1, i32* undef, align 4
+ %2 = load i32* undef, align 4 ; <i32> [#uses=1]
+ store i32 %2, i32* @nn, align 4
+ store i32 0, i32* @al_len, align 4
+ store i32 0, i32* @no_mat, align 4
+ store i32 0, i32* @no_mis, align 4
+ %3 = getelementptr i8* %B, i32 %0 ; <i8*> [#uses=1]
+ tail call arm_apcscc void @diff(i8* undef, i8* %3, i32 undef, i32 undef, i32 undef, i32 undef) nounwind
+ %4 = sitofp i32 undef to double ; <double> [#uses=1]
+ %5 = fdiv double %4, 1.000000e+01 ; <double> [#uses=1]
+ %6 = tail call arm_apcscc i32 (i8*, ...)* @printf(i8* getelementptr ([29 x i8]* @"\01LC12", i32 0, i32 0), double %5) nounwind ; <i32> [#uses=0]
+ %7 = load i32* @al_len, align 4 ; <i32> [#uses=1]
+ %8 = load i32* @no_mat, align 4 ; <i32> [#uses=1]
+ %9 = load i32* @no_mis, align 4 ; <i32> [#uses=1]
+ %10 = sub i32 %7, %8 ; <i32> [#uses=1]
+ %11 = sub i32 %10, %9 ; <i32> [#uses=1]
+ %12 = tail call arm_apcscc i32 (i8*, ...)* @printf(i8* getelementptr ([33 x i8]* @"\01LC16", i32 0, i32 0), i32 %11) nounwind ; <i32> [#uses=0]
+ %13 = tail call arm_apcscc i32 (i8*, ...)* @printf(i8* getelementptr ([47 x i8]* @"\01LC17", i32 0, i32 0), i32 undef, i32 %1, i32 undef, i32 undef) nounwind ; <i32> [#uses=0]
+ br i1 undef, label %bb15, label %bb12
+
+bb12: ; preds = %bb11
+ br label %bb228.i
+
+bb74.i: ; preds = %bb228.i
+ br i1 undef, label %bb138.i, label %bb145.i
+
+bb138.i: ; preds = %bb74.i
+ br label %bb145.i
+
+bb145.i: ; preds = %bb228.i, %bb138.i, %bb74.i
+ br i1 undef, label %bb146.i, label %bb151.i
+
+bb146.i: ; preds = %bb145.i
+ br i1 undef, label %bb228.i, label %bb151.i
+
+bb151.i: ; preds = %bb146.i, %bb145.i
+ br i1 undef, label %bb153.i, label %bb228.i
+
+bb153.i: ; preds = %bb151.i
+ br i1 undef, label %bb220.i, label %bb.nph.i98
+
+bb.nph.i98: ; preds = %bb153.i
+ br label %bb158.i
+
+bb158.i: ; preds = %bb218.i, %bb.nph.i98
+ br i1 undef, label %bb168.i, label %bb160.i
+
+bb160.i: ; preds = %bb158.i
+ br i1 undef, label %bb161.i, label %bb168.i
+
+bb161.i: ; preds = %bb160.i
+ br i1 undef, label %bb168.i, label %bb163.i
+
+bb163.i: ; preds = %bb161.i
+ br i1 undef, label %bb167.i, label %bb168.i
+
+bb167.i: ; preds = %bb163.i
+ br label %bb168.i
+
+bb168.i: ; preds = %bb167.i, %bb163.i, %bb161.i, %bb160.i, %bb158.i
+ br i1 undef, label %bb211.i, label %bb218.i
+
+bb211.i: ; preds = %bb168.i
+ br label %bb218.i
+
+bb218.i: ; preds = %bb211.i, %bb168.i
+ br i1 undef, label %bb220.i, label %bb158.i
+
+bb220.i: ; preds = %bb218.i, %bb153.i
+ br i1 undef, label %bb221.i, label %bb228.i
+
+bb221.i: ; preds = %bb220.i
+ br label %bb228.i
+
+bb228.i: ; preds = %bb221.i, %bb220.i, %bb151.i, %bb146.i, %bb12
+ br i1 undef, label %bb74.i, label %bb145.i
+
+bb15: ; preds = %bb11, %bb8
+ br i1 undef, label %return, label %bb9
+
+return: ; preds = %bb15
+ ret void
+}
diff --git a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll
new file mode 100644
index 0000000..f520be3
--- /dev/null
+++ b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll
@@ -0,0 +1,116 @@
+; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9
+
+@no_mat = external global i32 ; <i32*> [#uses=1]
+@no_mis = external global i32 ; <i32*> [#uses=2]
+@"\01LC11" = external constant [33 x i8], align 1 ; <[33 x i8]*> [#uses=1]
+@"\01LC15" = external constant [33 x i8], align 1 ; <[33 x i8]*> [#uses=1]
+@"\01LC17" = external constant [47 x i8], align 1 ; <[47 x i8]*> [#uses=1]
+
+declare arm_apcscc i32 @printf(i8* nocapture, ...) nounwind
+
+declare arm_apcscc void @diff(i8*, i8*, i32, i32, i32, i32) nounwind
+
+define arm_apcscc void @SIM(i8* %A, i8* %B, i32 %M, i32 %N, i32 %K, [256 x i32]* %V, i32 %Q, i32 %R, i32 %nseq) nounwind {
+entry:
+ br i1 undef, label %bb5, label %bb
+
+bb: ; preds = %bb, %entry
+ br label %bb
+
+bb5: ; preds = %entry
+ br i1 undef, label %bb6, label %bb8
+
+bb6: ; preds = %bb6, %bb5
+ br i1 undef, label %bb8, label %bb6
+
+bb8: ; preds = %bb6, %bb5
+ br label %bb15
+
+bb9: ; preds = %bb15
+ br i1 undef, label %bb10, label %bb11
+
+bb10: ; preds = %bb9
+ unreachable
+
+bb11: ; preds = %bb9
+ %0 = load i32* undef, align 4 ; <i32> [#uses=3]
+ %1 = add i32 %0, 1 ; <i32> [#uses=2]
+ store i32 %1, i32* undef, align 4
+ %2 = load i32* undef, align 4 ; <i32> [#uses=2]
+ %3 = sub i32 %2, %0 ; <i32> [#uses=1]
+ store i32 0, i32* @no_mat, align 4
+ store i32 0, i32* @no_mis, align 4
+ %4 = getelementptr i8* %B, i32 %0 ; <i8*> [#uses=1]
+ tail call arm_apcscc void @diff(i8* undef, i8* %4, i32 undef, i32 %3, i32 undef, i32 undef) nounwind
+ %5 = tail call arm_apcscc i32 (i8*, ...)* @printf(i8* getelementptr ([33 x i8]* @"\01LC11", i32 0, i32 0), i32 %tmp13) nounwind ; <i32> [#uses=0]
+ %6 = load i32* @no_mis, align 4 ; <i32> [#uses=1]
+ %7 = tail call arm_apcscc i32 (i8*, ...)* @printf(i8* getelementptr ([33 x i8]* @"\01LC15", i32 0, i32 0), i32 %6) nounwind ; <i32> [#uses=0]
+ %8 = tail call arm_apcscc i32 (i8*, ...)* @printf(i8* getelementptr ([47 x i8]* @"\01LC17", i32 0, i32 0), i32 undef, i32 %1, i32 undef, i32 %2) nounwind ; <i32> [#uses=0]
+ br i1 undef, label %bb15, label %bb12
+
+bb12: ; preds = %bb11
+ br label %bb228.i
+
+bb74.i: ; preds = %bb228.i
+ br i1 undef, label %bb138.i, label %bb145.i
+
+bb138.i: ; preds = %bb74.i
+ br label %bb145.i
+
+bb145.i: ; preds = %bb228.i, %bb138.i, %bb74.i
+ br i1 undef, label %bb146.i, label %bb151.i
+
+bb146.i: ; preds = %bb145.i
+ br i1 undef, label %bb228.i, label %bb151.i
+
+bb151.i: ; preds = %bb146.i, %bb145.i
+ br i1 undef, label %bb153.i, label %bb228.i
+
+bb153.i: ; preds = %bb151.i
+ br i1 undef, label %bb220.i, label %bb.nph.i98
+
+bb.nph.i98: ; preds = %bb153.i
+ br label %bb158.i
+
+bb158.i: ; preds = %bb218.i, %bb.nph.i98
+ br i1 undef, label %bb168.i, label %bb160.i
+
+bb160.i: ; preds = %bb158.i
+ br i1 undef, label %bb161.i, label %bb168.i
+
+bb161.i: ; preds = %bb160.i
+ br i1 undef, label %bb168.i, label %bb163.i
+
+bb163.i: ; preds = %bb161.i
+ br i1 undef, label %bb167.i, label %bb168.i
+
+bb167.i: ; preds = %bb163.i
+ br label %bb168.i
+
+bb168.i: ; preds = %bb167.i, %bb163.i, %bb161.i, %bb160.i, %bb158.i
+ br i1 undef, label %bb211.i, label %bb218.i
+
+bb211.i: ; preds = %bb168.i
+ br label %bb218.i
+
+bb218.i: ; preds = %bb211.i, %bb168.i
+ br i1 undef, label %bb220.i, label %bb158.i
+
+bb220.i: ; preds = %bb218.i, %bb153.i
+ br i1 undef, label %bb221.i, label %bb228.i
+
+bb221.i: ; preds = %bb220.i
+ br label %bb228.i
+
+bb228.i: ; preds = %bb221.i, %bb220.i, %bb151.i, %bb146.i, %bb12
+ br i1 undef, label %bb74.i, label %bb145.i
+
+bb15: ; preds = %bb11, %bb8
+ %indvar11 = phi i32 [ 0, %bb8 ], [ %tmp13, %bb11 ] ; <i32> [#uses=2]
+ %tmp13 = add i32 %indvar11, 1 ; <i32> [#uses=2]
+ %count.0 = sub i32 undef, %indvar11 ; <i32> [#uses=0]
+ br i1 undef, label %return, label %bb9
+
+return: ; preds = %bb15
+ ret void
+}
diff --git a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll
new file mode 100644
index 0000000..eee6ff9
--- /dev/null
+++ b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll
@@ -0,0 +1,128 @@
+; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9
+
+@JJ = external global i32* ; <i32**> [#uses=1]
+
+define arm_apcscc void @SIM(i8* %A, i8* %B, i32 %M, i32 %N, i32 %K, [256 x i32]* %V, i32 %Q, i32 %R, i32 %nseq) nounwind {
+entry:
+ br i1 undef, label %bb5, label %bb
+
+bb: ; preds = %bb, %entry
+ br label %bb
+
+bb5: ; preds = %entry
+ br i1 undef, label %bb6, label %bb8
+
+bb6: ; preds = %bb6, %bb5
+ br i1 undef, label %bb8, label %bb6
+
+bb8: ; preds = %bb6, %bb5
+ br label %bb15
+
+bb9: ; preds = %bb15
+ br i1 undef, label %bb10, label %bb11
+
+bb10: ; preds = %bb9
+ unreachable
+
+bb11: ; preds = %bb9
+ br i1 undef, label %bb15, label %bb12
+
+bb12: ; preds = %bb11
+ %0 = load i32** @JJ, align 4 ; <i32*> [#uses=1]
+ br label %bb228.i
+
+bb74.i: ; preds = %bb228.i
+ br i1 undef, label %bb138.i, label %bb145.i
+
+bb138.i: ; preds = %bb74.i
+ br label %bb145.i
+
+bb145.i: ; preds = %bb228.i, %bb138.i, %bb74.i
+ %cflag.0.i = phi i16 [ 0, %bb228.i ], [ 0, %bb74.i ], [ 1, %bb138.i ] ; <i16> [#uses=1]
+ br i1 undef, label %bb146.i, label %bb151.i
+
+bb146.i: ; preds = %bb145.i
+ br i1 undef, label %bb228.i, label %bb151.i
+
+bb151.i: ; preds = %bb146.i, %bb145.i
+ %.not297 = icmp ne i16 %cflag.0.i, 0 ; <i1> [#uses=1]
+ %or.cond298 = and i1 undef, %.not297 ; <i1> [#uses=1]
+ br i1 %or.cond298, label %bb153.i, label %bb228.i
+
+bb153.i: ; preds = %bb151.i
+ br i1 undef, label %bb220.i, label %bb.nph.i98
+
+bb.nph.i98: ; preds = %bb153.i
+ br label %bb158.i
+
+bb158.i: ; preds = %bb218.i, %bb.nph.i98
+ %c.1020.i = phi i32 [ 0, %bb.nph.i98 ], [ %c.14.i, %bb218.i ] ; <i32> [#uses=1]
+ %cflag.418.i = phi i16 [ 0, %bb.nph.i98 ], [ %cflag.3.i, %bb218.i ] ; <i16> [#uses=1]
+ %pj.317.i = phi i32 [ undef, %bb.nph.i98 ], [ %8, %bb218.i ] ; <i32> [#uses=1]
+ %pi.316.i = phi i32 [ undef, %bb.nph.i98 ], [ %7, %bb218.i ] ; <i32> [#uses=1]
+ %fj.515.i = phi i32 [ undef, %bb.nph.i98 ], [ %fj.4.i, %bb218.i ] ; <i32> [#uses=3]
+ %ci.910.i = phi i32 [ undef, %bb.nph.i98 ], [ %ci.12.i, %bb218.i ] ; <i32> [#uses=2]
+ %i.121.i = sub i32 undef, undef ; <i32> [#uses=3]
+ %tmp105.i = sub i32 undef, undef ; <i32> [#uses=1]
+ %1 = sub i32 %c.1020.i, undef ; <i32> [#uses=0]
+ br i1 undef, label %bb168.i, label %bb160.i
+
+bb160.i: ; preds = %bb158.i
+ br i1 undef, label %bb161.i, label %bb168.i
+
+bb161.i: ; preds = %bb160.i
+ br i1 undef, label %bb168.i, label %bb163.i
+
+bb163.i: ; preds = %bb161.i
+ %2 = icmp slt i32 %fj.515.i, undef ; <i1> [#uses=1]
+ %3 = and i1 %2, undef ; <i1> [#uses=1]
+ br i1 %3, label %bb167.i, label %bb168.i
+
+bb167.i: ; preds = %bb163.i
+ br label %bb168.i
+
+bb168.i: ; preds = %bb167.i, %bb163.i, %bb161.i, %bb160.i, %bb158.i
+ %fi.5.i = phi i32 [ undef, %bb167.i ], [ %ci.910.i, %bb158.i ], [ undef, %bb160.i ], [ %ci.910.i, %bb161.i ], [ undef, %bb163.i ] ; <i32> [#uses=1]
+ %fj.4.i = phi i32 [ undef, %bb167.i ], [ undef, %bb158.i ], [ %fj.515.i, %bb160.i ], [ undef, %bb161.i ], [ %fj.515.i, %bb163.i ] ; <i32> [#uses=2]
+ %scevgep88.i = getelementptr i32* null, i32 %i.121.i ; <i32*> [#uses=3]
+ %4 = load i32* %scevgep88.i, align 4 ; <i32> [#uses=2]
+ %scevgep89.i = getelementptr i32* %0, i32 %i.121.i ; <i32*> [#uses=3]
+ %5 = load i32* %scevgep89.i, align 4 ; <i32> [#uses=1]
+ %ci.10.i = select i1 undef, i32 %pi.316.i, i32 %i.121.i ; <i32> [#uses=0]
+ %cj.9.i = select i1 undef, i32 %pj.317.i, i32 undef ; <i32> [#uses=0]
+ %6 = icmp slt i32 undef, 0 ; <i1> [#uses=3]
+ %ci.12.i = select i1 %6, i32 %fi.5.i, i32 %4 ; <i32> [#uses=2]
+ %cj.11.i100 = select i1 %6, i32 %fj.4.i, i32 %5 ; <i32> [#uses=1]
+ %c.14.i = select i1 %6, i32 0, i32 undef ; <i32> [#uses=2]
+ store i32 %c.14.i, i32* undef, align 4
+ %7 = load i32* %scevgep88.i, align 4 ; <i32> [#uses=1]
+ %8 = load i32* %scevgep89.i, align 4 ; <i32> [#uses=1]
+ store i32 %ci.12.i, i32* %scevgep88.i, align 4
+ store i32 %cj.11.i100, i32* %scevgep89.i, align 4
+ store i32 %4, i32* undef, align 4
+ br i1 undef, label %bb211.i, label %bb218.i
+
+bb211.i: ; preds = %bb168.i
+ br label %bb218.i
+
+bb218.i: ; preds = %bb211.i, %bb168.i
+ %cflag.3.i = phi i16 [ %cflag.418.i, %bb168.i ], [ 1, %bb211.i ] ; <i16> [#uses=2]
+ %9 = icmp slt i32 %tmp105.i, undef ; <i1> [#uses=1]
+ br i1 %9, label %bb220.i, label %bb158.i
+
+bb220.i: ; preds = %bb218.i, %bb153.i
+ %cflag.4.lcssa.i = phi i16 [ 0, %bb153.i ], [ %cflag.3.i, %bb218.i ] ; <i16> [#uses=0]
+ br i1 undef, label %bb221.i, label %bb228.i
+
+bb221.i: ; preds = %bb220.i
+ br label %bb228.i
+
+bb228.i: ; preds = %bb221.i, %bb220.i, %bb151.i, %bb146.i, %bb12
+ br i1 undef, label %bb74.i, label %bb145.i
+
+bb15: ; preds = %bb11, %bb8
+ br i1 undef, label %return, label %bb9
+
+return: ; preds = %bb15
+ ret void
+}
diff --git a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll
new file mode 100644
index 0000000..93c92b1
--- /dev/null
+++ b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll
@@ -0,0 +1,128 @@
+; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9
+
+@r = external global i32 ; <i32*> [#uses=1]
+@qr = external global i32 ; <i32*> [#uses=1]
+@II = external global i32* ; <i32**> [#uses=1]
+@no_mis = external global i32 ; <i32*> [#uses=1]
+@name1 = external global i8* ; <i8**> [#uses=1]
+
+declare arm_apcscc void @diff(i8*, i8*, i32, i32, i32, i32) nounwind
+
+define arm_apcscc void @SIM(i8* %A, i8* %B, i32 %M, i32 %N, i32 %K, [256 x i32]* %V, i32 %Q, i32 %R, i32 %nseq) nounwind {
+entry:
+ br i1 undef, label %bb5, label %bb
+
+bb: ; preds = %bb, %entry
+ br label %bb
+
+bb5: ; preds = %entry
+ br i1 undef, label %bb6, label %bb8
+
+bb6: ; preds = %bb6, %bb5
+ br i1 undef, label %bb8, label %bb6
+
+bb8: ; preds = %bb6, %bb5
+ %0 = load i8** @name1, align 4 ; <i8*> [#uses=0]
+ br label %bb15
+
+bb9: ; preds = %bb15
+ br i1 undef, label %bb10, label %bb11
+
+bb10: ; preds = %bb9
+ unreachable
+
+bb11: ; preds = %bb9
+ store i32 0, i32* @no_mis, align 4
+ %1 = getelementptr i8* %A, i32 0 ; <i8*> [#uses=1]
+ %2 = getelementptr i8* %B, i32 0 ; <i8*> [#uses=1]
+ tail call arm_apcscc void @diff(i8* %1, i8* %2, i32 undef, i32 undef, i32 undef, i32 undef) nounwind
+ br i1 undef, label %bb15, label %bb12
+
+bb12: ; preds = %bb11
+ %3 = load i32** @II, align 4 ; <i32*> [#uses=1]
+ %4 = load i32* @r, align 4 ; <i32> [#uses=1]
+ %5 = load i32* @qr, align 4 ; <i32> [#uses=1]
+ br label %bb228.i
+
+bb74.i: ; preds = %bb228.i
+ br i1 undef, label %bb138.i, label %bb145.i
+
+bb138.i: ; preds = %bb74.i
+ br label %bb145.i
+
+bb145.i: ; preds = %bb228.i, %bb138.i, %bb74.i
+ br i1 undef, label %bb146.i, label %bb151.i
+
+bb146.i: ; preds = %bb145.i
+ br i1 undef, label %bb228.i, label %bb151.i
+
+bb151.i: ; preds = %bb146.i, %bb145.i
+ br i1 undef, label %bb153.i, label %bb228.i
+
+bb153.i: ; preds = %bb151.i
+ %6 = add i32 undef, -1 ; <i32> [#uses=3]
+ br i1 undef, label %bb220.i, label %bb.nph.i98
+
+bb.nph.i98: ; preds = %bb153.i
+ br label %bb158.i
+
+bb158.i: ; preds = %bb218.i, %bb.nph.i98
+ %c.1020.i = phi i32 [ 0, %bb.nph.i98 ], [ %c.14.i, %bb218.i ] ; <i32> [#uses=1]
+ %f.419.i = phi i32 [ undef, %bb.nph.i98 ], [ %f.5.i, %bb218.i ] ; <i32> [#uses=1]
+ %pi.316.i = phi i32 [ undef, %bb.nph.i98 ], [ %10, %bb218.i ] ; <i32> [#uses=1]
+ %fj.515.i = phi i32 [ %6, %bb.nph.i98 ], [ %fj.4.i, %bb218.i ] ; <i32> [#uses=2]
+ %fi.614.i = phi i32 [ undef, %bb.nph.i98 ], [ %fi.5.i, %bb218.i ] ; <i32> [#uses=3]
+ %cj.811.i = phi i32 [ %6, %bb.nph.i98 ], [ %cj.11.i100, %bb218.i ] ; <i32> [#uses=3]
+ %ci.910.i = phi i32 [ undef, %bb.nph.i98 ], [ %ci.12.i, %bb218.i ] ; <i32> [#uses=2]
+ %7 = sub i32 %f.419.i, %4 ; <i32> [#uses=5]
+ %8 = sub i32 %c.1020.i, %5 ; <i32> [#uses=2]
+ %9 = icmp slt i32 %7, %8 ; <i1> [#uses=1]
+ br i1 %9, label %bb168.i, label %bb160.i
+
+bb160.i: ; preds = %bb158.i
+ br i1 undef, label %bb161.i, label %bb168.i
+
+bb161.i: ; preds = %bb160.i
+ br i1 undef, label %bb168.i, label %bb163.i
+
+bb163.i: ; preds = %bb161.i
+ br i1 undef, label %bb167.i, label %bb168.i
+
+bb167.i: ; preds = %bb163.i
+ br label %bb168.i
+
+bb168.i: ; preds = %bb167.i, %bb163.i, %bb161.i, %bb160.i, %bb158.i
+ %fi.5.i = phi i32 [ %fi.614.i, %bb167.i ], [ %ci.910.i, %bb158.i ], [ %fi.614.i, %bb160.i ], [ %ci.910.i, %bb161.i ], [ %fi.614.i, %bb163.i ] ; <i32> [#uses=2]
+ %fj.4.i = phi i32 [ %cj.811.i, %bb167.i ], [ %cj.811.i, %bb158.i ], [ %fj.515.i, %bb160.i ], [ %cj.811.i, %bb161.i ], [ %fj.515.i, %bb163.i ] ; <i32> [#uses=2]
+ %f.5.i = phi i32 [ %7, %bb167.i ], [ %8, %bb158.i ], [ %7, %bb160.i ], [ %7, %bb161.i ], [ %7, %bb163.i ] ; <i32> [#uses=2]
+ %scevgep88.i = getelementptr i32* %3, i32 undef ; <i32*> [#uses=1]
+ %ci.10.i = select i1 undef, i32 %pi.316.i, i32 undef ; <i32> [#uses=0]
+ %ci.12.i = select i1 undef, i32 %fi.5.i, i32 undef ; <i32> [#uses=1]
+ %cj.11.i100 = select i1 undef, i32 %fj.4.i, i32 undef ; <i32> [#uses=1]
+ %c.14.i = select i1 undef, i32 %f.5.i, i32 undef ; <i32> [#uses=1]
+ %10 = load i32* %scevgep88.i, align 4 ; <i32> [#uses=1]
+ br i1 undef, label %bb211.i, label %bb218.i
+
+bb211.i: ; preds = %bb168.i
+ br label %bb218.i
+
+bb218.i: ; preds = %bb211.i, %bb168.i
+ br i1 undef, label %bb220.i, label %bb158.i
+
+bb220.i: ; preds = %bb218.i, %bb153.i
+ %11 = getelementptr i32* null, i32 %6 ; <i32*> [#uses=1]
+ store i32 undef, i32* %11, align 4
+ br i1 undef, label %bb221.i, label %bb228.i
+
+bb221.i: ; preds = %bb220.i
+ br label %bb228.i
+
+bb228.i: ; preds = %bb221.i, %bb220.i, %bb151.i, %bb146.i, %bb12
+ br i1 undef, label %bb74.i, label %bb145.i
+
+bb15: ; preds = %bb11, %bb8
+ br i1 undef, label %return, label %bb9
+
+return: ; preds = %bb15
+ ret void
+}
diff --git a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll
new file mode 100644
index 0000000..277283d
--- /dev/null
+++ b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll
@@ -0,0 +1,99 @@
+; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9
+
+@XX = external global i32* ; <i32**> [#uses=1]
+
+define arm_apcscc void @SIM(i8* %A, i8* %B, i32 %M, i32 %N, i32 %K, [256 x i32]* %V, i32 %Q, i32 %R, i32 %nseq) nounwind {
+entry:
+ br i1 undef, label %bb5, label %bb
+
+bb: ; preds = %bb, %entry
+ br label %bb
+
+bb5: ; preds = %entry
+ br i1 undef, label %bb6, label %bb8
+
+bb6: ; preds = %bb6, %bb5
+ br i1 undef, label %bb8, label %bb6
+
+bb8: ; preds = %bb6, %bb5
+ br label %bb15
+
+bb9: ; preds = %bb15
+ br i1 undef, label %bb10, label %bb11
+
+bb10: ; preds = %bb9
+ unreachable
+
+bb11: ; preds = %bb9
+ br i1 undef, label %bb15, label %bb12
+
+bb12: ; preds = %bb11
+ %0 = load i32** @XX, align 4 ; <i32*> [#uses=0]
+ br label %bb228.i
+
+bb74.i: ; preds = %bb228.i
+ br i1 undef, label %bb138.i, label %bb145.i
+
+bb138.i: ; preds = %bb74.i
+ br label %bb145.i
+
+bb145.i: ; preds = %bb228.i, %bb138.i, %bb74.i
+ br i1 undef, label %bb146.i, label %bb151.i
+
+bb146.i: ; preds = %bb145.i
+ br i1 undef, label %bb228.i, label %bb151.i
+
+bb151.i: ; preds = %bb146.i, %bb145.i
+ br i1 undef, label %bb153.i, label %bb228.i
+
+bb153.i: ; preds = %bb151.i
+ br i1 undef, label %bb220.i, label %bb.nph.i98
+
+bb.nph.i98: ; preds = %bb153.i
+ br label %bb158.i
+
+bb158.i: ; preds = %bb218.i, %bb.nph.i98
+ %1 = sub i32 undef, undef ; <i32> [#uses=4]
+ %2 = sub i32 undef, undef ; <i32> [#uses=1]
+ br i1 undef, label %bb168.i, label %bb160.i
+
+bb160.i: ; preds = %bb158.i
+ br i1 undef, label %bb161.i, label %bb168.i
+
+bb161.i: ; preds = %bb160.i
+ br i1 undef, label %bb168.i, label %bb163.i
+
+bb163.i: ; preds = %bb161.i
+ br i1 undef, label %bb167.i, label %bb168.i
+
+bb167.i: ; preds = %bb163.i
+ br label %bb168.i
+
+bb168.i: ; preds = %bb167.i, %bb163.i, %bb161.i, %bb160.i, %bb158.i
+ %f.5.i = phi i32 [ %1, %bb167.i ], [ %2, %bb158.i ], [ %1, %bb160.i ], [ %1, %bb161.i ], [ %1, %bb163.i ] ; <i32> [#uses=1]
+ %c.14.i = select i1 undef, i32 %f.5.i, i32 undef ; <i32> [#uses=1]
+ store i32 %c.14.i, i32* undef, align 4
+ store i32 undef, i32* null, align 4
+ br i1 undef, label %bb211.i, label %bb218.i
+
+bb211.i: ; preds = %bb168.i
+ br label %bb218.i
+
+bb218.i: ; preds = %bb211.i, %bb168.i
+ br i1 undef, label %bb220.i, label %bb158.i
+
+bb220.i: ; preds = %bb218.i, %bb153.i
+ br i1 undef, label %bb221.i, label %bb228.i
+
+bb221.i: ; preds = %bb220.i
+ br label %bb228.i
+
+bb228.i: ; preds = %bb221.i, %bb220.i, %bb151.i, %bb146.i, %bb12
+ br i1 undef, label %bb74.i, label %bb145.i
+
+bb15: ; preds = %bb11, %bb8
+ br i1 undef, label %return, label %bb9
+
+return: ; preds = %bb15
+ ret void
+}
diff --git a/test/CodeGen/ARM/2009-07-01-CommuteBug.ll b/test/CodeGen/ARM/2009-07-01-CommuteBug.ll
new file mode 100644
index 0000000..5c0e5fa
--- /dev/null
+++ b/test/CodeGen/ARM/2009-07-01-CommuteBug.ll
@@ -0,0 +1,130 @@
+; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9
+
+@qr = external global i32 ; <i32*> [#uses=1]
+@II = external global i32* ; <i32**> [#uses=1]
+@JJ = external global i32* ; <i32**> [#uses=1]
+
+define arm_apcscc void @SIM(i8* %A, i8* %B, i32 %M, i32 %N, i32 %K, [256 x i32]* %V, i32 %Q, i32 %R, i32 %nseq) nounwind {
+entry:
+ br i1 undef, label %bb5, label %bb
+
+bb: ; preds = %bb, %entry
+ br label %bb
+
+bb5: ; preds = %entry
+ br i1 undef, label %bb6, label %bb8
+
+bb6: ; preds = %bb6, %bb5
+ br i1 undef, label %bb8, label %bb6
+
+bb8: ; preds = %bb6, %bb5
+ br label %bb15
+
+bb9: ; preds = %bb15
+ br i1 undef, label %bb10, label %bb11
+
+bb10: ; preds = %bb9
+ unreachable
+
+bb11: ; preds = %bb9
+ br i1 undef, label %bb15, label %bb12
+
+bb12: ; preds = %bb11
+ %0 = load i32** @II, align 4 ; <i32*> [#uses=1]
+ %1 = load i32** @JJ, align 4 ; <i32*> [#uses=1]
+ %2 = load i32* @qr, align 4 ; <i32> [#uses=1]
+ br label %bb228.i
+
+bb74.i: ; preds = %bb228.i
+ br i1 undef, label %bb138.i, label %bb145.i
+
+bb138.i: ; preds = %bb74.i
+ br label %bb145.i
+
+bb145.i: ; preds = %bb228.i, %bb138.i, %bb74.i
+ %cflag.0.i = phi i16 [ %cflag.1.i, %bb228.i ], [ %cflag.1.i, %bb74.i ], [ 1, %bb138.i ] ; <i16> [#uses=2]
+ br i1 undef, label %bb146.i, label %bb151.i
+
+bb146.i: ; preds = %bb145.i
+ br i1 undef, label %bb228.i, label %bb151.i
+
+bb151.i: ; preds = %bb146.i, %bb145.i
+ %.not297 = icmp ne i16 %cflag.0.i, 0 ; <i1> [#uses=1]
+ %or.cond298 = and i1 undef, %.not297 ; <i1> [#uses=1]
+ br i1 %or.cond298, label %bb153.i, label %bb228.i
+
+bb153.i: ; preds = %bb151.i
+ br i1 undef, label %bb220.i, label %bb.nph.i98
+
+bb.nph.i98: ; preds = %bb153.i
+ br label %bb158.i
+
+bb158.i: ; preds = %bb218.i, %bb.nph.i98
+ %c.1020.i = phi i32 [ 0, %bb.nph.i98 ], [ %c.14.i, %bb218.i ] ; <i32> [#uses=1]
+ %f.419.i = phi i32 [ undef, %bb.nph.i98 ], [ %f.5.i, %bb218.i ] ; <i32> [#uses=1]
+ %cflag.418.i = phi i16 [ 0, %bb.nph.i98 ], [ %cflag.3.i, %bb218.i ] ; <i16> [#uses=1]
+ %pj.317.i = phi i32 [ undef, %bb.nph.i98 ], [ %7, %bb218.i ] ; <i32> [#uses=1]
+ %pi.316.i = phi i32 [ undef, %bb.nph.i98 ], [ %6, %bb218.i ] ; <i32> [#uses=1]
+ %fj.515.i = phi i32 [ undef, %bb.nph.i98 ], [ %fj.4.i, %bb218.i ] ; <i32> [#uses=2]
+ %fi.614.i = phi i32 [ undef, %bb.nph.i98 ], [ %fi.5.i, %bb218.i ] ; <i32> [#uses=3]
+ %cj.811.i = phi i32 [ undef, %bb.nph.i98 ], [ %cj.11.i100, %bb218.i ] ; <i32> [#uses=3]
+ %ci.910.i = phi i32 [ undef, %bb.nph.i98 ], [ %ci.12.i, %bb218.i ] ; <i32> [#uses=2]
+ %3 = sub i32 %f.419.i, 0 ; <i32> [#uses=5]
+ %4 = sub i32 %c.1020.i, %2 ; <i32> [#uses=2]
+ %5 = icmp slt i32 %3, %4 ; <i1> [#uses=1]
+ br i1 %5, label %bb168.i, label %bb160.i
+
+bb160.i: ; preds = %bb158.i
+ br i1 undef, label %bb161.i, label %bb168.i
+
+bb161.i: ; preds = %bb160.i
+ br i1 undef, label %bb168.i, label %bb163.i
+
+bb163.i: ; preds = %bb161.i
+ br i1 undef, label %bb167.i, label %bb168.i
+
+bb167.i: ; preds = %bb163.i
+ br label %bb168.i
+
+bb168.i: ; preds = %bb167.i, %bb163.i, %bb161.i, %bb160.i, %bb158.i
+ %fi.5.i = phi i32 [ %fi.614.i, %bb167.i ], [ %ci.910.i, %bb158.i ], [ %fi.614.i, %bb160.i ], [ %ci.910.i, %bb161.i ], [ %fi.614.i, %bb163.i ] ; <i32> [#uses=2]
+ %fj.4.i = phi i32 [ %cj.811.i, %bb167.i ], [ %cj.811.i, %bb158.i ], [ %fj.515.i, %bb160.i ], [ %cj.811.i, %bb161.i ], [ %fj.515.i, %bb163.i ] ; <i32> [#uses=2]
+ %f.5.i = phi i32 [ %3, %bb167.i ], [ %4, %bb158.i ], [ %3, %bb160.i ], [ %3, %bb161.i ], [ %3, %bb163.i ] ; <i32> [#uses=2]
+ %scevgep88.i = getelementptr i32* %0, i32 undef ; <i32*> [#uses=2]
+ %scevgep89.i = getelementptr i32* %1, i32 undef ; <i32*> [#uses=2]
+ %ci.10.i = select i1 undef, i32 %pi.316.i, i32 undef ; <i32> [#uses=0]
+ %cj.9.i = select i1 undef, i32 %pj.317.i, i32 undef ; <i32> [#uses=0]
+ %ci.12.i = select i1 undef, i32 %fi.5.i, i32 undef ; <i32> [#uses=2]
+ %cj.11.i100 = select i1 undef, i32 %fj.4.i, i32 undef ; <i32> [#uses=2]
+ %c.14.i = select i1 undef, i32 %f.5.i, i32 undef ; <i32> [#uses=1]
+ %6 = load i32* %scevgep88.i, align 4 ; <i32> [#uses=1]
+ %7 = load i32* %scevgep89.i, align 4 ; <i32> [#uses=1]
+ store i32 %ci.12.i, i32* %scevgep88.i, align 4
+ store i32 %cj.11.i100, i32* %scevgep89.i, align 4
+ br i1 undef, label %bb211.i, label %bb218.i
+
+bb211.i: ; preds = %bb168.i
+ br label %bb218.i
+
+bb218.i: ; preds = %bb211.i, %bb168.i
+ %cflag.3.i = phi i16 [ %cflag.418.i, %bb168.i ], [ 1, %bb211.i ] ; <i16> [#uses=2]
+ %8 = icmp slt i32 undef, undef ; <i1> [#uses=1]
+ br i1 %8, label %bb220.i, label %bb158.i
+
+bb220.i: ; preds = %bb218.i, %bb153.i
+ %cflag.4.lcssa.i = phi i16 [ 0, %bb153.i ], [ %cflag.3.i, %bb218.i ] ; <i16> [#uses=2]
+ br i1 undef, label %bb221.i, label %bb228.i
+
+bb221.i: ; preds = %bb220.i
+ br label %bb228.i
+
+bb228.i: ; preds = %bb221.i, %bb220.i, %bb151.i, %bb146.i, %bb12
+ %cflag.1.i = phi i16 [ 0, %bb146.i ], [ %cflag.0.i, %bb151.i ], [ %cflag.4.lcssa.i, %bb220.i ], [ 1, %bb12 ], [ %cflag.4.lcssa.i, %bb221.i ] ; <i16> [#uses=2]
+ br i1 false, label %bb74.i, label %bb145.i
+
+bb15: ; preds = %bb11, %bb8
+ br i1 false, label %return, label %bb9
+
+return: ; preds = %bb15
+ ret void
+}
diff --git a/test/CodeGen/ARM/2009-07-09-asm-p-constraint.ll b/test/CodeGen/ARM/2009-07-09-asm-p-constraint.ll
new file mode 100644
index 0000000..e1e94b6
--- /dev/null
+++ b/test/CodeGen/ARM/2009-07-09-asm-p-constraint.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=arm -mattr=+v6
+
+define void @test(i8* %x) nounwind {
+entry:
+ call void asm sideeffect "pld\09${0:a}", "r,~{cc}"(i8* %x) nounwind
+ ret void
+}
diff --git a/test/CodeGen/ARM/2009-07-18-RewriterBug.ll b/test/CodeGen/ARM/2009-07-18-RewriterBug.ll
new file mode 100644
index 0000000..2b7ccd8
--- /dev/null
+++ b/test/CodeGen/ARM/2009-07-18-RewriterBug.ll
@@ -0,0 +1,1323 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin10 -mattr=+vfp2 | grep vcmpe | count 13
+
+ %struct.EDGE_PAIR = type { %struct.edge_rec*, %struct.edge_rec* }
+ %struct.VEC2 = type { double, double, double }
+ %struct.VERTEX = type { %struct.VEC2, %struct.VERTEX*, %struct.VERTEX* }
+ %struct.edge_rec = type { %struct.VERTEX*, %struct.edge_rec*, i32, i8* }
+@avail_edge = internal global %struct.edge_rec* null ; <%struct.edge_rec**> [#uses=6]
+@_2E_str7 = internal constant [21 x i8] c"ERROR: Only 1 point!\00", section "__TEXT,__cstring,cstring_literals", align 1 ; <[21 x i8]*> [#uses=1]
[email protected] = appending global [1 x i8*] [i8* bitcast (void (%struct.EDGE_PAIR*, %struct.VERTEX*, %struct.VERTEX*)* @build_delaunay to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define arm_apcscc void @build_delaunay(%struct.EDGE_PAIR* noalias nocapture sret %agg.result, %struct.VERTEX* %tree, %struct.VERTEX* %extra) nounwind {
+entry:
+ %delright = alloca %struct.EDGE_PAIR, align 8 ; <%struct.EDGE_PAIR*> [#uses=3]
+ %delleft = alloca %struct.EDGE_PAIR, align 8 ; <%struct.EDGE_PAIR*> [#uses=3]
+ %0 = icmp eq %struct.VERTEX* %tree, null ; <i1> [#uses=1]
+ br i1 %0, label %bb8, label %bb
+
+bb: ; preds = %entry
+ %1 = getelementptr %struct.VERTEX* %tree, i32 0, i32 2 ; <%struct.VERTEX**> [#uses=1]
+ %2 = load %struct.VERTEX** %1, align 4 ; <%struct.VERTEX*> [#uses=2]
+ %3 = icmp eq %struct.VERTEX* %2, null ; <i1> [#uses=1]
+ br i1 %3, label %bb7, label %bb1.i
+
+bb1.i: ; preds = %bb1.i, %bb
+ %tree_addr.0.i = phi %struct.VERTEX* [ %5, %bb1.i ], [ %tree, %bb ] ; <%struct.VERTEX*> [#uses=3]
+ %4 = getelementptr %struct.VERTEX* %tree_addr.0.i, i32 0, i32 1 ; <%struct.VERTEX**> [#uses=1]
+ %5 = load %struct.VERTEX** %4, align 4 ; <%struct.VERTEX*> [#uses=2]
+ %6 = icmp eq %struct.VERTEX* %5, null ; <i1> [#uses=1]
+ br i1 %6, label %get_low.exit, label %bb1.i
+
+get_low.exit: ; preds = %bb1.i
+ call arm_apcscc void @build_delaunay(%struct.EDGE_PAIR* noalias sret %delright, %struct.VERTEX* %2, %struct.VERTEX* %extra) nounwind
+ %7 = getelementptr %struct.VERTEX* %tree, i32 0, i32 1 ; <%struct.VERTEX**> [#uses=1]
+ %8 = load %struct.VERTEX** %7, align 4 ; <%struct.VERTEX*> [#uses=1]
+ call arm_apcscc void @build_delaunay(%struct.EDGE_PAIR* noalias sret %delleft, %struct.VERTEX* %8, %struct.VERTEX* %tree) nounwind
+ %9 = getelementptr %struct.EDGE_PAIR* %delleft, i32 0, i32 0 ; <%struct.edge_rec**> [#uses=1]
+ %10 = load %struct.edge_rec** %9, align 8 ; <%struct.edge_rec*> [#uses=2]
+ %11 = getelementptr %struct.EDGE_PAIR* %delleft, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %12 = load %struct.edge_rec** %11, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %13 = getelementptr %struct.EDGE_PAIR* %delright, i32 0, i32 0 ; <%struct.edge_rec**> [#uses=1]
+ %14 = load %struct.edge_rec** %13, align 8 ; <%struct.edge_rec*> [#uses=1]
+ %15 = getelementptr %struct.EDGE_PAIR* %delright, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %16 = load %struct.edge_rec** %15, align 4 ; <%struct.edge_rec*> [#uses=2]
+ br label %bb.i
+
+bb.i: ; preds = %bb4.i, %get_low.exit
+ %rdi_addr.0.i = phi %struct.edge_rec* [ %14, %get_low.exit ], [ %72, %bb4.i ] ; <%struct.edge_rec*> [#uses=2]
+ %ldi_addr.1.i = phi %struct.edge_rec* [ %12, %get_low.exit ], [ %ldi_addr.0.i, %bb4.i ] ; <%struct.edge_rec*> [#uses=3]
+ %17 = getelementptr %struct.edge_rec* %rdi_addr.0.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %18 = load %struct.VERTEX** %17, align 4 ; <%struct.VERTEX*> [#uses=3]
+ %19 = ptrtoint %struct.edge_rec* %ldi_addr.1.i to i32 ; <i32> [#uses=1]
+ %20 = getelementptr %struct.VERTEX* %18, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %21 = load double* %20, align 4 ; <double> [#uses=3]
+ %22 = getelementptr %struct.VERTEX* %18, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %23 = load double* %22, align 4 ; <double> [#uses=3]
+ br label %bb2.i
+
+bb1.i1: ; preds = %bb2.i
+ %24 = ptrtoint %struct.edge_rec* %ldi_addr.0.i to i32 ; <i32> [#uses=2]
+ %25 = add i32 %24, 48 ; <i32> [#uses=1]
+ %26 = and i32 %25, 63 ; <i32> [#uses=1]
+ %27 = and i32 %24, -64 ; <i32> [#uses=1]
+ %28 = or i32 %26, %27 ; <i32> [#uses=1]
+ %29 = inttoptr i32 %28 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %30 = getelementptr %struct.edge_rec* %29, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %31 = load %struct.edge_rec** %30, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %32 = ptrtoint %struct.edge_rec* %31 to i32 ; <i32> [#uses=2]
+ %33 = add i32 %32, 16 ; <i32> [#uses=1]
+ %34 = and i32 %33, 63 ; <i32> [#uses=1]
+ %35 = and i32 %32, -64 ; <i32> [#uses=1]
+ %36 = or i32 %34, %35 ; <i32> [#uses=2]
+ %37 = inttoptr i32 %36 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
+ br label %bb2.i
+
+bb2.i: ; preds = %bb1.i1, %bb.i
+ %ldi_addr.1.pn.i = phi %struct.edge_rec* [ %ldi_addr.1.i, %bb.i ], [ %37, %bb1.i1 ] ; <%struct.edge_rec*> [#uses=1]
+ %.pn6.in.in.i = phi i32 [ %19, %bb.i ], [ %36, %bb1.i1 ] ; <i32> [#uses=1]
+ %ldi_addr.0.i = phi %struct.edge_rec* [ %ldi_addr.1.i, %bb.i ], [ %37, %bb1.i1 ] ; <%struct.edge_rec*> [#uses=4]
+ %.pn6.in.i = xor i32 %.pn6.in.in.i, 32 ; <i32> [#uses=1]
+ %.pn6.i = inttoptr i32 %.pn6.in.i to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %t1.0.in.i = getelementptr %struct.edge_rec* %ldi_addr.1.pn.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %t2.0.in.i = getelementptr %struct.edge_rec* %.pn6.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %t1.0.i = load %struct.VERTEX** %t1.0.in.i ; <%struct.VERTEX*> [#uses=2]
+ %t2.0.i = load %struct.VERTEX** %t2.0.in.i ; <%struct.VERTEX*> [#uses=2]
+ %38 = getelementptr %struct.VERTEX* %t1.0.i, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %39 = load double* %38, align 4 ; <double> [#uses=3]
+ %40 = getelementptr %struct.VERTEX* %t1.0.i, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %41 = load double* %40, align 4 ; <double> [#uses=3]
+ %42 = getelementptr %struct.VERTEX* %t2.0.i, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %43 = load double* %42, align 4 ; <double> [#uses=1]
+ %44 = getelementptr %struct.VERTEX* %t2.0.i, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %45 = load double* %44, align 4 ; <double> [#uses=1]
+ %46 = fsub double %39, %21 ; <double> [#uses=1]
+ %47 = fsub double %45, %23 ; <double> [#uses=1]
+ %48 = fmul double %46, %47 ; <double> [#uses=1]
+ %49 = fsub double %43, %21 ; <double> [#uses=1]
+ %50 = fsub double %41, %23 ; <double> [#uses=1]
+ %51 = fmul double %49, %50 ; <double> [#uses=1]
+ %52 = fsub double %48, %51 ; <double> [#uses=1]
+ %53 = fcmp ogt double %52, 0.000000e+00 ; <i1> [#uses=1]
+ br i1 %53, label %bb1.i1, label %bb3.i
+
+bb3.i: ; preds = %bb2.i
+ %54 = ptrtoint %struct.edge_rec* %rdi_addr.0.i to i32 ; <i32> [#uses=1]
+ %55 = xor i32 %54, 32 ; <i32> [#uses=3]
+ %56 = inttoptr i32 %55 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
+ %57 = getelementptr %struct.edge_rec* %56, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %58 = load %struct.VERTEX** %57, align 4 ; <%struct.VERTEX*> [#uses=2]
+ %59 = getelementptr %struct.VERTEX* %58, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %60 = load double* %59, align 4 ; <double> [#uses=1]
+ %61 = getelementptr %struct.VERTEX* %58, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %62 = load double* %61, align 4 ; <double> [#uses=1]
+ %63 = fsub double %60, %39 ; <double> [#uses=1]
+ %64 = fsub double %23, %41 ; <double> [#uses=1]
+ %65 = fmul double %63, %64 ; <double> [#uses=1]
+ %66 = fsub double %21, %39 ; <double> [#uses=1]
+ %67 = fsub double %62, %41 ; <double> [#uses=1]
+ %68 = fmul double %66, %67 ; <double> [#uses=1]
+ %69 = fsub double %65, %68 ; <double> [#uses=1]
+ %70 = fcmp ogt double %69, 0.000000e+00 ; <i1> [#uses=1]
+ br i1 %70, label %bb4.i, label %bb5.i
+
+bb4.i: ; preds = %bb3.i
+ %71 = getelementptr %struct.edge_rec* %56, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %72 = load %struct.edge_rec** %71, align 4 ; <%struct.edge_rec*> [#uses=1]
+ br label %bb.i
+
+bb5.i: ; preds = %bb3.i
+ %73 = add i32 %55, 48 ; <i32> [#uses=1]
+ %74 = and i32 %73, 63 ; <i32> [#uses=1]
+ %75 = and i32 %55, -64 ; <i32> [#uses=1]
+ %76 = or i32 %74, %75 ; <i32> [#uses=1]
+ %77 = inttoptr i32 %76 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %78 = getelementptr %struct.edge_rec* %77, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %79 = load %struct.edge_rec** %78, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %80 = ptrtoint %struct.edge_rec* %79 to i32 ; <i32> [#uses=2]
+ %81 = add i32 %80, 16 ; <i32> [#uses=1]
+ %82 = and i32 %81, 63 ; <i32> [#uses=1]
+ %83 = and i32 %80, -64 ; <i32> [#uses=1]
+ %84 = or i32 %82, %83 ; <i32> [#uses=1]
+ %85 = inttoptr i32 %84 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %86 = getelementptr %struct.edge_rec* %ldi_addr.0.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %87 = load %struct.VERTEX** %86, align 4 ; <%struct.VERTEX*> [#uses=1]
+ %88 = call arm_apcscc %struct.edge_rec* @alloc_edge() nounwind ; <%struct.edge_rec*> [#uses=6]
+ %89 = getelementptr %struct.edge_rec* %88, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=4]
+ store %struct.edge_rec* %88, %struct.edge_rec** %89, align 4
+ %90 = getelementptr %struct.edge_rec* %88, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=2]
+ store %struct.VERTEX* %18, %struct.VERTEX** %90, align 4
+ %91 = ptrtoint %struct.edge_rec* %88 to i32 ; <i32> [#uses=5]
+ %92 = add i32 %91, 16 ; <i32> [#uses=2]
+ %93 = inttoptr i32 %92 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
+ %94 = add i32 %91, 48 ; <i32> [#uses=1]
+ %95 = inttoptr i32 %94 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
+ %96 = getelementptr %struct.edge_rec* %93, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ store %struct.edge_rec* %95, %struct.edge_rec** %96, align 4
+ %97 = add i32 %91, 32 ; <i32> [#uses=1]
+ %98 = inttoptr i32 %97 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=3]
+ %99 = getelementptr %struct.edge_rec* %98, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ store %struct.edge_rec* %98, %struct.edge_rec** %99, align 4
+ %100 = getelementptr %struct.edge_rec* %98, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ store %struct.VERTEX* %87, %struct.VERTEX** %100, align 4
+ %101 = getelementptr %struct.edge_rec* %95, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ store %struct.edge_rec* %93, %struct.edge_rec** %101, align 4
+ %102 = load %struct.edge_rec** %89, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %103 = ptrtoint %struct.edge_rec* %102 to i32 ; <i32> [#uses=2]
+ %104 = add i32 %103, 16 ; <i32> [#uses=1]
+ %105 = and i32 %104, 63 ; <i32> [#uses=1]
+ %106 = and i32 %103, -64 ; <i32> [#uses=1]
+ %107 = or i32 %105, %106 ; <i32> [#uses=1]
+ %108 = inttoptr i32 %107 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %109 = getelementptr %struct.edge_rec* %85, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %110 = load %struct.edge_rec** %109, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %111 = ptrtoint %struct.edge_rec* %110 to i32 ; <i32> [#uses=2]
+ %112 = add i32 %111, 16 ; <i32> [#uses=1]
+ %113 = and i32 %112, 63 ; <i32> [#uses=1]
+ %114 = and i32 %111, -64 ; <i32> [#uses=1]
+ %115 = or i32 %113, %114 ; <i32> [#uses=1]
+ %116 = inttoptr i32 %115 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %117 = getelementptr %struct.edge_rec* %116, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %118 = load %struct.edge_rec** %117, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %119 = getelementptr %struct.edge_rec* %108, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %120 = load %struct.edge_rec** %119, align 4 ; <%struct.edge_rec*> [#uses=1]
+ store %struct.edge_rec* %118, %struct.edge_rec** %119, align 4
+ store %struct.edge_rec* %120, %struct.edge_rec** %117, align 4
+ %121 = load %struct.edge_rec** %89, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %122 = load %struct.edge_rec** %109, align 4 ; <%struct.edge_rec*> [#uses=1]
+ store %struct.edge_rec* %121, %struct.edge_rec** %109, align 4
+ store %struct.edge_rec* %122, %struct.edge_rec** %89, align 4
+ %123 = xor i32 %91, 32 ; <i32> [#uses=1]
+ %124 = inttoptr i32 %123 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=3]
+ %125 = getelementptr %struct.edge_rec* %124, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %126 = load %struct.edge_rec** %125, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %127 = ptrtoint %struct.edge_rec* %126 to i32 ; <i32> [#uses=2]
+ %128 = add i32 %127, 16 ; <i32> [#uses=1]
+ %129 = and i32 %128, 63 ; <i32> [#uses=1]
+ %130 = and i32 %127, -64 ; <i32> [#uses=1]
+ %131 = or i32 %129, %130 ; <i32> [#uses=1]
+ %132 = inttoptr i32 %131 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %133 = getelementptr %struct.edge_rec* %ldi_addr.0.i, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %134 = load %struct.edge_rec** %133, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %135 = ptrtoint %struct.edge_rec* %134 to i32 ; <i32> [#uses=2]
+ %136 = add i32 %135, 16 ; <i32> [#uses=1]
+ %137 = and i32 %136, 63 ; <i32> [#uses=1]
+ %138 = and i32 %135, -64 ; <i32> [#uses=1]
+ %139 = or i32 %137, %138 ; <i32> [#uses=1]
+ %140 = inttoptr i32 %139 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %141 = getelementptr %struct.edge_rec* %140, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %142 = load %struct.edge_rec** %141, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %143 = getelementptr %struct.edge_rec* %132, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %144 = load %struct.edge_rec** %143, align 4 ; <%struct.edge_rec*> [#uses=1]
+ store %struct.edge_rec* %142, %struct.edge_rec** %143, align 4
+ store %struct.edge_rec* %144, %struct.edge_rec** %141, align 4
+ %145 = load %struct.edge_rec** %125, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %146 = load %struct.edge_rec** %133, align 4 ; <%struct.edge_rec*> [#uses=2]
+ store %struct.edge_rec* %145, %struct.edge_rec** %133, align 4
+ store %struct.edge_rec* %146, %struct.edge_rec** %125, align 4
+ %147 = and i32 %92, 63 ; <i32> [#uses=1]
+ %148 = and i32 %91, -64 ; <i32> [#uses=1]
+ %149 = or i32 %147, %148 ; <i32> [#uses=1]
+ %150 = inttoptr i32 %149 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %151 = getelementptr %struct.edge_rec* %150, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %152 = load %struct.edge_rec** %151, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %153 = ptrtoint %struct.edge_rec* %152 to i32 ; <i32> [#uses=2]
+ %154 = add i32 %153, 16 ; <i32> [#uses=1]
+ %155 = and i32 %154, 63 ; <i32> [#uses=1]
+ %156 = and i32 %153, -64 ; <i32> [#uses=1]
+ %157 = or i32 %155, %156 ; <i32> [#uses=1]
+ %158 = inttoptr i32 %157 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %159 = load %struct.VERTEX** %90, align 4 ; <%struct.VERTEX*> [#uses=1]
+ %160 = getelementptr %struct.edge_rec* %124, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %161 = load %struct.VERTEX** %160, align 4 ; <%struct.VERTEX*> [#uses=1]
+ %162 = getelementptr %struct.edge_rec* %16, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %163 = load %struct.VERTEX** %162, align 4 ; <%struct.VERTEX*> [#uses=1]
+ %164 = icmp eq %struct.VERTEX* %163, %159 ; <i1> [#uses=1]
+ %rdo_addr.0.i = select i1 %164, %struct.edge_rec* %88, %struct.edge_rec* %16 ; <%struct.edge_rec*> [#uses=3]
+ %165 = getelementptr %struct.edge_rec* %10, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %166 = load %struct.VERTEX** %165, align 4 ; <%struct.VERTEX*> [#uses=1]
+ %167 = icmp eq %struct.VERTEX* %166, %161 ; <i1> [#uses=1]
+ %ldo_addr.0.ph.i = select i1 %167, %struct.edge_rec* %124, %struct.edge_rec* %10 ; <%struct.edge_rec*> [#uses=3]
+ br label %bb9.i
+
+bb9.i: ; preds = %bb25.i, %bb24.i, %bb5.i
+ %lcand.2.i = phi %struct.edge_rec* [ %146, %bb5.i ], [ %lcand.1.i, %bb24.i ], [ %739, %bb25.i ] ; <%struct.edge_rec*> [#uses=5]
+ %rcand.2.i = phi %struct.edge_rec* [ %158, %bb5.i ], [ %666, %bb24.i ], [ %rcand.1.i, %bb25.i ] ; <%struct.edge_rec*> [#uses=5]
+ %basel.0.i = phi %struct.edge_rec* [ %88, %bb5.i ], [ %595, %bb24.i ], [ %716, %bb25.i ] ; <%struct.edge_rec*> [#uses=2]
+ %168 = getelementptr %struct.edge_rec* %lcand.2.i, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %169 = load %struct.edge_rec** %168, align 4 ; <%struct.edge_rec*> [#uses=3]
+ %170 = getelementptr %struct.edge_rec* %basel.0.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=3]
+ %171 = load %struct.VERTEX** %170, align 4 ; <%struct.VERTEX*> [#uses=4]
+ %172 = ptrtoint %struct.edge_rec* %basel.0.i to i32 ; <i32> [#uses=3]
+ %173 = xor i32 %172, 32 ; <i32> [#uses=1]
+ %174 = inttoptr i32 %173 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
+ %175 = getelementptr %struct.edge_rec* %174, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=3]
+ %176 = load %struct.VERTEX** %175, align 4 ; <%struct.VERTEX*> [#uses=3]
+ %177 = ptrtoint %struct.edge_rec* %169 to i32 ; <i32> [#uses=1]
+ %178 = xor i32 %177, 32 ; <i32> [#uses=1]
+ %179 = inttoptr i32 %178 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %180 = getelementptr %struct.edge_rec* %179, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %181 = load %struct.VERTEX** %180, align 4 ; <%struct.VERTEX*> [#uses=2]
+ %182 = getelementptr %struct.VERTEX* %171, i32 0, i32 0, i32 0 ; <double*> [#uses=2]
+ %183 = load double* %182, align 4 ; <double> [#uses=2]
+ %184 = getelementptr %struct.VERTEX* %171, i32 0, i32 0, i32 1 ; <double*> [#uses=2]
+ %185 = load double* %184, align 4 ; <double> [#uses=2]
+ %186 = getelementptr %struct.VERTEX* %181, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %187 = load double* %186, align 4 ; <double> [#uses=1]
+ %188 = getelementptr %struct.VERTEX* %181, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %189 = load double* %188, align 4 ; <double> [#uses=1]
+ %190 = getelementptr %struct.VERTEX* %176, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %191 = load double* %190, align 4 ; <double> [#uses=2]
+ %192 = getelementptr %struct.VERTEX* %176, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %193 = load double* %192, align 4 ; <double> [#uses=2]
+ %194 = fsub double %183, %191 ; <double> [#uses=1]
+ %195 = fsub double %189, %193 ; <double> [#uses=1]
+ %196 = fmul double %194, %195 ; <double> [#uses=1]
+ %197 = fsub double %187, %191 ; <double> [#uses=1]
+ %198 = fsub double %185, %193 ; <double> [#uses=1]
+ %199 = fmul double %197, %198 ; <double> [#uses=1]
+ %200 = fsub double %196, %199 ; <double> [#uses=1]
+ %201 = fcmp ogt double %200, 0.000000e+00 ; <i1> [#uses=1]
+ br i1 %201, label %bb10.i, label %bb13.i
+
+bb10.i: ; preds = %bb9.i
+ %202 = getelementptr %struct.VERTEX* %171, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
+ %avail_edge.promoted25 = load %struct.edge_rec** @avail_edge ; <%struct.edge_rec*> [#uses=1]
+ br label %bb12.i
+
+bb11.i: ; preds = %bb12.i
+ %203 = ptrtoint %struct.edge_rec* %lcand.0.i to i32 ; <i32> [#uses=3]
+ %204 = add i32 %203, 16 ; <i32> [#uses=1]
+ %205 = and i32 %204, 63 ; <i32> [#uses=1]
+ %206 = and i32 %203, -64 ; <i32> [#uses=3]
+ %207 = or i32 %205, %206 ; <i32> [#uses=1]
+ %208 = inttoptr i32 %207 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %209 = getelementptr %struct.edge_rec* %208, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %210 = load %struct.edge_rec** %209, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %211 = ptrtoint %struct.edge_rec* %210 to i32 ; <i32> [#uses=2]
+ %212 = add i32 %211, 16 ; <i32> [#uses=1]
+ %213 = and i32 %212, 63 ; <i32> [#uses=1]
+ %214 = and i32 %211, -64 ; <i32> [#uses=1]
+ %215 = or i32 %213, %214 ; <i32> [#uses=1]
+ %216 = inttoptr i32 %215 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %217 = getelementptr %struct.edge_rec* %lcand.0.i, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %218 = load %struct.edge_rec** %217, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %219 = ptrtoint %struct.edge_rec* %218 to i32 ; <i32> [#uses=2]
+ %220 = add i32 %219, 16 ; <i32> [#uses=1]
+ %221 = and i32 %220, 63 ; <i32> [#uses=1]
+ %222 = and i32 %219, -64 ; <i32> [#uses=1]
+ %223 = or i32 %221, %222 ; <i32> [#uses=1]
+ %224 = inttoptr i32 %223 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %225 = getelementptr %struct.edge_rec* %216, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %226 = load %struct.edge_rec** %225, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %227 = ptrtoint %struct.edge_rec* %226 to i32 ; <i32> [#uses=2]
+ %228 = add i32 %227, 16 ; <i32> [#uses=1]
+ %229 = and i32 %228, 63 ; <i32> [#uses=1]
+ %230 = and i32 %227, -64 ; <i32> [#uses=1]
+ %231 = or i32 %229, %230 ; <i32> [#uses=1]
+ %232 = inttoptr i32 %231 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %233 = getelementptr %struct.edge_rec* %232, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %234 = load %struct.edge_rec** %233, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %235 = getelementptr %struct.edge_rec* %224, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %236 = load %struct.edge_rec** %235, align 4 ; <%struct.edge_rec*> [#uses=1]
+ store %struct.edge_rec* %234, %struct.edge_rec** %235, align 4
+ store %struct.edge_rec* %236, %struct.edge_rec** %233, align 4
+ %237 = load %struct.edge_rec** %217, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %238 = load %struct.edge_rec** %225, align 4 ; <%struct.edge_rec*> [#uses=1]
+ store %struct.edge_rec* %237, %struct.edge_rec** %225, align 4
+ store %struct.edge_rec* %238, %struct.edge_rec** %217, align 4
+ %239 = xor i32 %203, 32 ; <i32> [#uses=2]
+ %240 = add i32 %239, 16 ; <i32> [#uses=1]
+ %241 = and i32 %240, 63 ; <i32> [#uses=1]
+ %242 = or i32 %241, %206 ; <i32> [#uses=1]
+ %243 = inttoptr i32 %242 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %244 = getelementptr %struct.edge_rec* %243, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %245 = load %struct.edge_rec** %244, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %246 = ptrtoint %struct.edge_rec* %245 to i32 ; <i32> [#uses=2]
+ %247 = add i32 %246, 16 ; <i32> [#uses=1]
+ %248 = and i32 %247, 63 ; <i32> [#uses=1]
+ %249 = and i32 %246, -64 ; <i32> [#uses=1]
+ %250 = or i32 %248, %249 ; <i32> [#uses=1]
+ %251 = inttoptr i32 %250 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %252 = inttoptr i32 %239 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %253 = getelementptr %struct.edge_rec* %252, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %254 = load %struct.edge_rec** %253, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %255 = ptrtoint %struct.edge_rec* %254 to i32 ; <i32> [#uses=2]
+ %256 = add i32 %255, 16 ; <i32> [#uses=1]
+ %257 = and i32 %256, 63 ; <i32> [#uses=1]
+ %258 = and i32 %255, -64 ; <i32> [#uses=1]
+ %259 = or i32 %257, %258 ; <i32> [#uses=1]
+ %260 = inttoptr i32 %259 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %261 = getelementptr %struct.edge_rec* %251, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %262 = load %struct.edge_rec** %261, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %263 = ptrtoint %struct.edge_rec* %262 to i32 ; <i32> [#uses=2]
+ %264 = add i32 %263, 16 ; <i32> [#uses=1]
+ %265 = and i32 %264, 63 ; <i32> [#uses=1]
+ %266 = and i32 %263, -64 ; <i32> [#uses=1]
+ %267 = or i32 %265, %266 ; <i32> [#uses=1]
+ %268 = inttoptr i32 %267 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %269 = getelementptr %struct.edge_rec* %268, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %270 = load %struct.edge_rec** %269, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %271 = getelementptr %struct.edge_rec* %260, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %272 = load %struct.edge_rec** %271, align 4 ; <%struct.edge_rec*> [#uses=1]
+ store %struct.edge_rec* %270, %struct.edge_rec** %271, align 4
+ store %struct.edge_rec* %272, %struct.edge_rec** %269, align 4
+ %273 = load %struct.edge_rec** %253, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %274 = load %struct.edge_rec** %261, align 4 ; <%struct.edge_rec*> [#uses=1]
+ store %struct.edge_rec* %273, %struct.edge_rec** %261, align 4
+ store %struct.edge_rec* %274, %struct.edge_rec** %253, align 4
+ %275 = inttoptr i32 %206 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
+ %276 = getelementptr %struct.edge_rec* %275, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ store %struct.edge_rec* %avail_edge.tmp.026, %struct.edge_rec** %276, align 4
+ %277 = getelementptr %struct.edge_rec* %t.0.i, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %278 = load %struct.edge_rec** %277, align 4 ; <%struct.edge_rec*> [#uses=2]
+ %.pre.i = load double* %182, align 4 ; <double> [#uses=1]
+ %.pre22.i = load double* %184, align 4 ; <double> [#uses=1]
+ br label %bb12.i
+
+bb12.i: ; preds = %bb11.i, %bb10.i
+ %avail_edge.tmp.026 = phi %struct.edge_rec* [ %avail_edge.promoted25, %bb10.i ], [ %275, %bb11.i ] ; <%struct.edge_rec*> [#uses=2]
+ %279 = phi double [ %.pre22.i, %bb11.i ], [ %185, %bb10.i ] ; <double> [#uses=3]
+ %280 = phi double [ %.pre.i, %bb11.i ], [ %183, %bb10.i ] ; <double> [#uses=3]
+ %lcand.0.i = phi %struct.edge_rec* [ %lcand.2.i, %bb10.i ], [ %t.0.i, %bb11.i ] ; <%struct.edge_rec*> [#uses=3]
+ %t.0.i = phi %struct.edge_rec* [ %169, %bb10.i ], [ %278, %bb11.i ] ; <%struct.edge_rec*> [#uses=4]
+ %.pn5.in.in.in.i = phi %struct.edge_rec* [ %lcand.2.i, %bb10.i ], [ %t.0.i, %bb11.i ] ; <%struct.edge_rec*> [#uses=1]
+ %.pn4.in.in.in.i = phi %struct.edge_rec* [ %169, %bb10.i ], [ %278, %bb11.i ] ; <%struct.edge_rec*> [#uses=1]
+ %lcand.2.pn.i = phi %struct.edge_rec* [ %lcand.2.i, %bb10.i ], [ %t.0.i, %bb11.i ] ; <%struct.edge_rec*> [#uses=1]
+ %.pn5.in.in.i = ptrtoint %struct.edge_rec* %.pn5.in.in.in.i to i32 ; <i32> [#uses=1]
+ %.pn4.in.in.i = ptrtoint %struct.edge_rec* %.pn4.in.in.in.i to i32 ; <i32> [#uses=1]
+ %.pn5.in.i = xor i32 %.pn5.in.in.i, 32 ; <i32> [#uses=1]
+ %.pn4.in.i = xor i32 %.pn4.in.in.i, 32 ; <i32> [#uses=1]
+ %.pn5.i = inttoptr i32 %.pn5.in.i to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %.pn4.i = inttoptr i32 %.pn4.in.i to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %v1.0.in.i = getelementptr %struct.edge_rec* %.pn5.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %v2.0.in.i = getelementptr %struct.edge_rec* %.pn4.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %v3.0.in.i = getelementptr %struct.edge_rec* %lcand.2.pn.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %v1.0.i = load %struct.VERTEX** %v1.0.in.i ; <%struct.VERTEX*> [#uses=3]
+ %v2.0.i = load %struct.VERTEX** %v2.0.in.i ; <%struct.VERTEX*> [#uses=3]
+ %v3.0.i = load %struct.VERTEX** %v3.0.in.i ; <%struct.VERTEX*> [#uses=3]
+ %281 = load double* %202, align 4 ; <double> [#uses=3]
+ %282 = getelementptr %struct.VERTEX* %v1.0.i, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %283 = load double* %282, align 4 ; <double> [#uses=1]
+ %284 = fsub double %283, %280 ; <double> [#uses=2]
+ %285 = getelementptr %struct.VERTEX* %v1.0.i, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %286 = load double* %285, align 4 ; <double> [#uses=1]
+ %287 = fsub double %286, %279 ; <double> [#uses=2]
+ %288 = getelementptr %struct.VERTEX* %v1.0.i, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
+ %289 = load double* %288, align 4 ; <double> [#uses=1]
+ %290 = getelementptr %struct.VERTEX* %v2.0.i, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %291 = load double* %290, align 4 ; <double> [#uses=1]
+ %292 = fsub double %291, %280 ; <double> [#uses=2]
+ %293 = getelementptr %struct.VERTEX* %v2.0.i, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %294 = load double* %293, align 4 ; <double> [#uses=1]
+ %295 = fsub double %294, %279 ; <double> [#uses=2]
+ %296 = getelementptr %struct.VERTEX* %v2.0.i, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
+ %297 = load double* %296, align 4 ; <double> [#uses=1]
+ %298 = getelementptr %struct.VERTEX* %v3.0.i, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %299 = load double* %298, align 4 ; <double> [#uses=1]
+ %300 = fsub double %299, %280 ; <double> [#uses=2]
+ %301 = getelementptr %struct.VERTEX* %v3.0.i, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %302 = load double* %301, align 4 ; <double> [#uses=1]
+ %303 = fsub double %302, %279 ; <double> [#uses=2]
+ %304 = getelementptr %struct.VERTEX* %v3.0.i, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
+ %305 = load double* %304, align 4 ; <double> [#uses=1]
+ %306 = fsub double %289, %281 ; <double> [#uses=1]
+ %307 = fmul double %292, %303 ; <double> [#uses=1]
+ %308 = fmul double %295, %300 ; <double> [#uses=1]
+ %309 = fsub double %307, %308 ; <double> [#uses=1]
+ %310 = fmul double %306, %309 ; <double> [#uses=1]
+ %311 = fsub double %297, %281 ; <double> [#uses=1]
+ %312 = fmul double %300, %287 ; <double> [#uses=1]
+ %313 = fmul double %303, %284 ; <double> [#uses=1]
+ %314 = fsub double %312, %313 ; <double> [#uses=1]
+ %315 = fmul double %311, %314 ; <double> [#uses=1]
+ %316 = fadd double %315, %310 ; <double> [#uses=1]
+ %317 = fsub double %305, %281 ; <double> [#uses=1]
+ %318 = fmul double %284, %295 ; <double> [#uses=1]
+ %319 = fmul double %287, %292 ; <double> [#uses=1]
+ %320 = fsub double %318, %319 ; <double> [#uses=1]
+ %321 = fmul double %317, %320 ; <double> [#uses=1]
+ %322 = fadd double %321, %316 ; <double> [#uses=1]
+ %323 = fcmp ogt double %322, 0.000000e+00 ; <i1> [#uses=1]
+ br i1 %323, label %bb11.i, label %bb13.loopexit.i
+
+bb13.loopexit.i: ; preds = %bb12.i
+ store %struct.edge_rec* %avail_edge.tmp.026, %struct.edge_rec** @avail_edge
+ %.pre23.i = load %struct.VERTEX** %170, align 4 ; <%struct.VERTEX*> [#uses=1]
+ %.pre24.i = load %struct.VERTEX** %175, align 4 ; <%struct.VERTEX*> [#uses=1]
+ br label %bb13.i
+
+bb13.i: ; preds = %bb13.loopexit.i, %bb9.i
+ %324 = phi %struct.VERTEX* [ %.pre24.i, %bb13.loopexit.i ], [ %176, %bb9.i ] ; <%struct.VERTEX*> [#uses=4]
+ %325 = phi %struct.VERTEX* [ %.pre23.i, %bb13.loopexit.i ], [ %171, %bb9.i ] ; <%struct.VERTEX*> [#uses=3]
+ %lcand.1.i = phi %struct.edge_rec* [ %lcand.0.i, %bb13.loopexit.i ], [ %lcand.2.i, %bb9.i ] ; <%struct.edge_rec*> [#uses=3]
+ %326 = ptrtoint %struct.edge_rec* %rcand.2.i to i32 ; <i32> [#uses=2]
+ %327 = add i32 %326, 16 ; <i32> [#uses=1]
+ %328 = and i32 %327, 63 ; <i32> [#uses=1]
+ %329 = and i32 %326, -64 ; <i32> [#uses=1]
+ %330 = or i32 %328, %329 ; <i32> [#uses=1]
+ %331 = inttoptr i32 %330 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %332 = getelementptr %struct.edge_rec* %331, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %333 = load %struct.edge_rec** %332, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %334 = ptrtoint %struct.edge_rec* %333 to i32 ; <i32> [#uses=2]
+ %335 = add i32 %334, 16 ; <i32> [#uses=1]
+ %336 = and i32 %335, 63 ; <i32> [#uses=1]
+ %337 = and i32 %334, -64 ; <i32> [#uses=1]
+ %338 = or i32 %336, %337 ; <i32> [#uses=3]
+ %339 = xor i32 %338, 32 ; <i32> [#uses=1]
+ %340 = inttoptr i32 %339 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %341 = getelementptr %struct.edge_rec* %340, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %342 = load %struct.VERTEX** %341, align 4 ; <%struct.VERTEX*> [#uses=2]
+ %343 = getelementptr %struct.VERTEX* %325, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %344 = load double* %343, align 4 ; <double> [#uses=1]
+ %345 = getelementptr %struct.VERTEX* %325, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %346 = load double* %345, align 4 ; <double> [#uses=1]
+ %347 = getelementptr %struct.VERTEX* %342, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %348 = load double* %347, align 4 ; <double> [#uses=1]
+ %349 = getelementptr %struct.VERTEX* %342, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %350 = load double* %349, align 4 ; <double> [#uses=1]
+ %351 = getelementptr %struct.VERTEX* %324, i32 0, i32 0, i32 0 ; <double*> [#uses=2]
+ %352 = load double* %351, align 4 ; <double> [#uses=3]
+ %353 = getelementptr %struct.VERTEX* %324, i32 0, i32 0, i32 1 ; <double*> [#uses=2]
+ %354 = load double* %353, align 4 ; <double> [#uses=3]
+ %355 = fsub double %344, %352 ; <double> [#uses=1]
+ %356 = fsub double %350, %354 ; <double> [#uses=1]
+ %357 = fmul double %355, %356 ; <double> [#uses=1]
+ %358 = fsub double %348, %352 ; <double> [#uses=1]
+ %359 = fsub double %346, %354 ; <double> [#uses=1]
+ %360 = fmul double %358, %359 ; <double> [#uses=1]
+ %361 = fsub double %357, %360 ; <double> [#uses=1]
+ %362 = fcmp ogt double %361, 0.000000e+00 ; <i1> [#uses=1]
+ br i1 %362, label %bb14.i, label %bb17.i
+
+bb14.i: ; preds = %bb13.i
+ %363 = getelementptr %struct.VERTEX* %324, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
+ %avail_edge.promoted = load %struct.edge_rec** @avail_edge ; <%struct.edge_rec*> [#uses=1]
+ br label %bb16.i
+
+bb15.i: ; preds = %bb16.i
+ %364 = ptrtoint %struct.edge_rec* %rcand.0.i to i32 ; <i32> [#uses=3]
+ %365 = add i32 %364, 16 ; <i32> [#uses=1]
+ %366 = and i32 %365, 63 ; <i32> [#uses=1]
+ %367 = and i32 %364, -64 ; <i32> [#uses=3]
+ %368 = or i32 %366, %367 ; <i32> [#uses=1]
+ %369 = inttoptr i32 %368 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %370 = getelementptr %struct.edge_rec* %369, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %371 = load %struct.edge_rec** %370, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %372 = ptrtoint %struct.edge_rec* %371 to i32 ; <i32> [#uses=2]
+ %373 = add i32 %372, 16 ; <i32> [#uses=1]
+ %374 = and i32 %373, 63 ; <i32> [#uses=1]
+ %375 = and i32 %372, -64 ; <i32> [#uses=1]
+ %376 = or i32 %374, %375 ; <i32> [#uses=1]
+ %377 = inttoptr i32 %376 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %378 = getelementptr %struct.edge_rec* %rcand.0.i, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %379 = load %struct.edge_rec** %378, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %380 = ptrtoint %struct.edge_rec* %379 to i32 ; <i32> [#uses=2]
+ %381 = add i32 %380, 16 ; <i32> [#uses=1]
+ %382 = and i32 %381, 63 ; <i32> [#uses=1]
+ %383 = and i32 %380, -64 ; <i32> [#uses=1]
+ %384 = or i32 %382, %383 ; <i32> [#uses=1]
+ %385 = inttoptr i32 %384 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %386 = getelementptr %struct.edge_rec* %377, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %387 = load %struct.edge_rec** %386, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %388 = ptrtoint %struct.edge_rec* %387 to i32 ; <i32> [#uses=2]
+ %389 = add i32 %388, 16 ; <i32> [#uses=1]
+ %390 = and i32 %389, 63 ; <i32> [#uses=1]
+ %391 = and i32 %388, -64 ; <i32> [#uses=1]
+ %392 = or i32 %390, %391 ; <i32> [#uses=1]
+ %393 = inttoptr i32 %392 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %394 = getelementptr %struct.edge_rec* %393, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %395 = load %struct.edge_rec** %394, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %396 = getelementptr %struct.edge_rec* %385, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %397 = load %struct.edge_rec** %396, align 4 ; <%struct.edge_rec*> [#uses=1]
+ store %struct.edge_rec* %395, %struct.edge_rec** %396, align 4
+ store %struct.edge_rec* %397, %struct.edge_rec** %394, align 4
+ %398 = load %struct.edge_rec** %378, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %399 = load %struct.edge_rec** %386, align 4 ; <%struct.edge_rec*> [#uses=1]
+ store %struct.edge_rec* %398, %struct.edge_rec** %386, align 4
+ store %struct.edge_rec* %399, %struct.edge_rec** %378, align 4
+ %400 = xor i32 %364, 32 ; <i32> [#uses=2]
+ %401 = add i32 %400, 16 ; <i32> [#uses=1]
+ %402 = and i32 %401, 63 ; <i32> [#uses=1]
+ %403 = or i32 %402, %367 ; <i32> [#uses=1]
+ %404 = inttoptr i32 %403 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %405 = getelementptr %struct.edge_rec* %404, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %406 = load %struct.edge_rec** %405, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %407 = ptrtoint %struct.edge_rec* %406 to i32 ; <i32> [#uses=2]
+ %408 = add i32 %407, 16 ; <i32> [#uses=1]
+ %409 = and i32 %408, 63 ; <i32> [#uses=1]
+ %410 = and i32 %407, -64 ; <i32> [#uses=1]
+ %411 = or i32 %409, %410 ; <i32> [#uses=1]
+ %412 = inttoptr i32 %411 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %413 = inttoptr i32 %400 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %414 = getelementptr %struct.edge_rec* %413, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %415 = load %struct.edge_rec** %414, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %416 = ptrtoint %struct.edge_rec* %415 to i32 ; <i32> [#uses=2]
+ %417 = add i32 %416, 16 ; <i32> [#uses=1]
+ %418 = and i32 %417, 63 ; <i32> [#uses=1]
+ %419 = and i32 %416, -64 ; <i32> [#uses=1]
+ %420 = or i32 %418, %419 ; <i32> [#uses=1]
+ %421 = inttoptr i32 %420 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %422 = getelementptr %struct.edge_rec* %412, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %423 = load %struct.edge_rec** %422, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %424 = ptrtoint %struct.edge_rec* %423 to i32 ; <i32> [#uses=2]
+ %425 = add i32 %424, 16 ; <i32> [#uses=1]
+ %426 = and i32 %425, 63 ; <i32> [#uses=1]
+ %427 = and i32 %424, -64 ; <i32> [#uses=1]
+ %428 = or i32 %426, %427 ; <i32> [#uses=1]
+ %429 = inttoptr i32 %428 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %430 = getelementptr %struct.edge_rec* %429, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %431 = load %struct.edge_rec** %430, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %432 = getelementptr %struct.edge_rec* %421, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %433 = load %struct.edge_rec** %432, align 4 ; <%struct.edge_rec*> [#uses=1]
+ store %struct.edge_rec* %431, %struct.edge_rec** %432, align 4
+ store %struct.edge_rec* %433, %struct.edge_rec** %430, align 4
+ %434 = load %struct.edge_rec** %414, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %435 = load %struct.edge_rec** %422, align 4 ; <%struct.edge_rec*> [#uses=1]
+ store %struct.edge_rec* %434, %struct.edge_rec** %422, align 4
+ store %struct.edge_rec* %435, %struct.edge_rec** %414, align 4
+ %436 = inttoptr i32 %367 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
+ %437 = getelementptr %struct.edge_rec* %436, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ store %struct.edge_rec* %avail_edge.tmp.0, %struct.edge_rec** %437, align 4
+ %438 = add i32 %t.1.in.i, 16 ; <i32> [#uses=1]
+ %439 = and i32 %438, 63 ; <i32> [#uses=1]
+ %440 = and i32 %t.1.in.i, -64 ; <i32> [#uses=1]
+ %441 = or i32 %439, %440 ; <i32> [#uses=1]
+ %442 = inttoptr i32 %441 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %443 = getelementptr %struct.edge_rec* %442, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %444 = load %struct.edge_rec** %443, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %445 = ptrtoint %struct.edge_rec* %444 to i32 ; <i32> [#uses=2]
+ %446 = add i32 %445, 16 ; <i32> [#uses=1]
+ %447 = and i32 %446, 63 ; <i32> [#uses=1]
+ %448 = and i32 %445, -64 ; <i32> [#uses=1]
+ %449 = or i32 %447, %448 ; <i32> [#uses=2]
+ %.pre25.i = load double* %351, align 4 ; <double> [#uses=1]
+ %.pre26.i = load double* %353, align 4 ; <double> [#uses=1]
+ br label %bb16.i
+
+bb16.i: ; preds = %bb15.i, %bb14.i
+ %avail_edge.tmp.0 = phi %struct.edge_rec* [ %avail_edge.promoted, %bb14.i ], [ %436, %bb15.i ] ; <%struct.edge_rec*> [#uses=2]
+ %450 = phi double [ %.pre26.i, %bb15.i ], [ %354, %bb14.i ] ; <double> [#uses=3]
+ %451 = phi double [ %.pre25.i, %bb15.i ], [ %352, %bb14.i ] ; <double> [#uses=3]
+ %rcand.0.i = phi %struct.edge_rec* [ %rcand.2.i, %bb14.i ], [ %t.1.i, %bb15.i ] ; <%struct.edge_rec*> [#uses=3]
+ %t.1.in.i = phi i32 [ %338, %bb14.i ], [ %449, %bb15.i ] ; <i32> [#uses=3]
+ %.pn3.in.in.i = phi i32 [ %338, %bb14.i ], [ %449, %bb15.i ] ; <i32> [#uses=1]
+ %.pn.in.in.in.i = phi %struct.edge_rec* [ %rcand.2.i, %bb14.i ], [ %t.1.i, %bb15.i ] ; <%struct.edge_rec*> [#uses=1]
+ %rcand.2.pn.i = phi %struct.edge_rec* [ %rcand.2.i, %bb14.i ], [ %t.1.i, %bb15.i ] ; <%struct.edge_rec*> [#uses=1]
+ %t.1.i = inttoptr i32 %t.1.in.i to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=3]
+ %.pn.in.in.i = ptrtoint %struct.edge_rec* %.pn.in.in.in.i to i32 ; <i32> [#uses=1]
+ %.pn3.in.i = xor i32 %.pn3.in.in.i, 32 ; <i32> [#uses=1]
+ %.pn.in.i = xor i32 %.pn.in.in.i, 32 ; <i32> [#uses=1]
+ %.pn3.i = inttoptr i32 %.pn3.in.i to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %.pn.i = inttoptr i32 %.pn.in.i to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %v1.1.in.i = getelementptr %struct.edge_rec* %.pn3.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %v2.1.in.i = getelementptr %struct.edge_rec* %.pn.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %v3.1.in.i = getelementptr %struct.edge_rec* %rcand.2.pn.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %v1.1.i = load %struct.VERTEX** %v1.1.in.i ; <%struct.VERTEX*> [#uses=3]
+ %v2.1.i = load %struct.VERTEX** %v2.1.in.i ; <%struct.VERTEX*> [#uses=3]
+ %v3.1.i = load %struct.VERTEX** %v3.1.in.i ; <%struct.VERTEX*> [#uses=3]
+ %452 = load double* %363, align 4 ; <double> [#uses=3]
+ %453 = getelementptr %struct.VERTEX* %v1.1.i, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %454 = load double* %453, align 4 ; <double> [#uses=1]
+ %455 = fsub double %454, %451 ; <double> [#uses=2]
+ %456 = getelementptr %struct.VERTEX* %v1.1.i, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %457 = load double* %456, align 4 ; <double> [#uses=1]
+ %458 = fsub double %457, %450 ; <double> [#uses=2]
+ %459 = getelementptr %struct.VERTEX* %v1.1.i, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
+ %460 = load double* %459, align 4 ; <double> [#uses=1]
+ %461 = getelementptr %struct.VERTEX* %v2.1.i, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %462 = load double* %461, align 4 ; <double> [#uses=1]
+ %463 = fsub double %462, %451 ; <double> [#uses=2]
+ %464 = getelementptr %struct.VERTEX* %v2.1.i, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %465 = load double* %464, align 4 ; <double> [#uses=1]
+ %466 = fsub double %465, %450 ; <double> [#uses=2]
+ %467 = getelementptr %struct.VERTEX* %v2.1.i, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
+ %468 = load double* %467, align 4 ; <double> [#uses=1]
+ %469 = getelementptr %struct.VERTEX* %v3.1.i, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %470 = load double* %469, align 4 ; <double> [#uses=1]
+ %471 = fsub double %470, %451 ; <double> [#uses=2]
+ %472 = getelementptr %struct.VERTEX* %v3.1.i, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %473 = load double* %472, align 4 ; <double> [#uses=1]
+ %474 = fsub double %473, %450 ; <double> [#uses=2]
+ %475 = getelementptr %struct.VERTEX* %v3.1.i, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
+ %476 = load double* %475, align 4 ; <double> [#uses=1]
+ %477 = fsub double %460, %452 ; <double> [#uses=1]
+ %478 = fmul double %463, %474 ; <double> [#uses=1]
+ %479 = fmul double %466, %471 ; <double> [#uses=1]
+ %480 = fsub double %478, %479 ; <double> [#uses=1]
+ %481 = fmul double %477, %480 ; <double> [#uses=1]
+ %482 = fsub double %468, %452 ; <double> [#uses=1]
+ %483 = fmul double %471, %458 ; <double> [#uses=1]
+ %484 = fmul double %474, %455 ; <double> [#uses=1]
+ %485 = fsub double %483, %484 ; <double> [#uses=1]
+ %486 = fmul double %482, %485 ; <double> [#uses=1]
+ %487 = fadd double %486, %481 ; <double> [#uses=1]
+ %488 = fsub double %476, %452 ; <double> [#uses=1]
+ %489 = fmul double %455, %466 ; <double> [#uses=1]
+ %490 = fmul double %458, %463 ; <double> [#uses=1]
+ %491 = fsub double %489, %490 ; <double> [#uses=1]
+ %492 = fmul double %488, %491 ; <double> [#uses=1]
+ %493 = fadd double %492, %487 ; <double> [#uses=1]
+ %494 = fcmp ogt double %493, 0.000000e+00 ; <i1> [#uses=1]
+ br i1 %494, label %bb15.i, label %bb17.loopexit.i
+
+bb17.loopexit.i: ; preds = %bb16.i
+ store %struct.edge_rec* %avail_edge.tmp.0, %struct.edge_rec** @avail_edge
+ %.pre27.i = load %struct.VERTEX** %170, align 4 ; <%struct.VERTEX*> [#uses=1]
+ %.pre28.i = load %struct.VERTEX** %175, align 4 ; <%struct.VERTEX*> [#uses=1]
+ br label %bb17.i
+
+bb17.i: ; preds = %bb17.loopexit.i, %bb13.i
+ %495 = phi %struct.VERTEX* [ %.pre28.i, %bb17.loopexit.i ], [ %324, %bb13.i ] ; <%struct.VERTEX*> [#uses=3]
+ %496 = phi %struct.VERTEX* [ %.pre27.i, %bb17.loopexit.i ], [ %325, %bb13.i ] ; <%struct.VERTEX*> [#uses=3]
+ %rcand.1.i = phi %struct.edge_rec* [ %rcand.0.i, %bb17.loopexit.i ], [ %rcand.2.i, %bb13.i ] ; <%struct.edge_rec*> [#uses=3]
+ %497 = ptrtoint %struct.edge_rec* %lcand.1.i to i32 ; <i32> [#uses=1]
+ %498 = xor i32 %497, 32 ; <i32> [#uses=1]
+ %499 = inttoptr i32 %498 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
+ %500 = getelementptr %struct.edge_rec* %499, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %501 = load %struct.VERTEX** %500, align 4 ; <%struct.VERTEX*> [#uses=4]
+ %502 = getelementptr %struct.VERTEX* %496, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %503 = load double* %502, align 4 ; <double> [#uses=1]
+ %504 = getelementptr %struct.VERTEX* %496, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %505 = load double* %504, align 4 ; <double> [#uses=1]
+ %506 = getelementptr %struct.VERTEX* %501, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %507 = load double* %506, align 4 ; <double> [#uses=2]
+ %508 = getelementptr %struct.VERTEX* %501, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %509 = load double* %508, align 4 ; <double> [#uses=2]
+ %510 = getelementptr %struct.VERTEX* %495, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %511 = load double* %510, align 4 ; <double> [#uses=3]
+ %512 = getelementptr %struct.VERTEX* %495, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %513 = load double* %512, align 4 ; <double> [#uses=3]
+ %514 = fsub double %503, %511 ; <double> [#uses=2]
+ %515 = fsub double %509, %513 ; <double> [#uses=1]
+ %516 = fmul double %514, %515 ; <double> [#uses=1]
+ %517 = fsub double %507, %511 ; <double> [#uses=1]
+ %518 = fsub double %505, %513 ; <double> [#uses=2]
+ %519 = fmul double %517, %518 ; <double> [#uses=1]
+ %520 = fsub double %516, %519 ; <double> [#uses=1]
+ %521 = fcmp ogt double %520, 0.000000e+00 ; <i1> [#uses=2]
+ %522 = ptrtoint %struct.edge_rec* %rcand.1.i to i32 ; <i32> [#uses=3]
+ %523 = xor i32 %522, 32 ; <i32> [#uses=1]
+ %524 = inttoptr i32 %523 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %525 = getelementptr %struct.edge_rec* %524, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %526 = load %struct.VERTEX** %525, align 4 ; <%struct.VERTEX*> [#uses=4]
+ %527 = getelementptr %struct.VERTEX* %526, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %528 = load double* %527, align 4 ; <double> [#uses=4]
+ %529 = getelementptr %struct.VERTEX* %526, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %530 = load double* %529, align 4 ; <double> [#uses=4]
+ %531 = fsub double %530, %513 ; <double> [#uses=1]
+ %532 = fmul double %514, %531 ; <double> [#uses=1]
+ %533 = fsub double %528, %511 ; <double> [#uses=1]
+ %534 = fmul double %533, %518 ; <double> [#uses=1]
+ %535 = fsub double %532, %534 ; <double> [#uses=1]
+ %536 = fcmp ogt double %535, 0.000000e+00 ; <i1> [#uses=2]
+ %537 = or i1 %536, %521 ; <i1> [#uses=1]
+ br i1 %537, label %bb21.i, label %do_merge.exit
+
+bb21.i: ; preds = %bb17.i
+ %538 = getelementptr %struct.edge_rec* %lcand.1.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %539 = load %struct.VERTEX** %538, align 4 ; <%struct.VERTEX*> [#uses=3]
+ %540 = getelementptr %struct.edge_rec* %rcand.1.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %541 = load %struct.VERTEX** %540, align 4 ; <%struct.VERTEX*> [#uses=3]
+ br i1 %521, label %bb22.i, label %bb24.i
+
+bb22.i: ; preds = %bb21.i
+ br i1 %536, label %bb23.i, label %bb25.i
+
+bb23.i: ; preds = %bb22.i
+ %542 = getelementptr %struct.VERTEX* %526, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
+ %543 = load double* %542, align 4 ; <double> [#uses=3]
+ %544 = fsub double %507, %528 ; <double> [#uses=2]
+ %545 = fsub double %509, %530 ; <double> [#uses=2]
+ %546 = getelementptr %struct.VERTEX* %501, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
+ %547 = load double* %546, align 4 ; <double> [#uses=1]
+ %548 = getelementptr %struct.VERTEX* %539, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %549 = load double* %548, align 4 ; <double> [#uses=1]
+ %550 = fsub double %549, %528 ; <double> [#uses=2]
+ %551 = getelementptr %struct.VERTEX* %539, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %552 = load double* %551, align 4 ; <double> [#uses=1]
+ %553 = fsub double %552, %530 ; <double> [#uses=2]
+ %554 = getelementptr %struct.VERTEX* %539, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
+ %555 = load double* %554, align 4 ; <double> [#uses=1]
+ %556 = getelementptr %struct.VERTEX* %541, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %557 = load double* %556, align 4 ; <double> [#uses=1]
+ %558 = fsub double %557, %528 ; <double> [#uses=2]
+ %559 = getelementptr %struct.VERTEX* %541, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %560 = load double* %559, align 4 ; <double> [#uses=1]
+ %561 = fsub double %560, %530 ; <double> [#uses=2]
+ %562 = getelementptr %struct.VERTEX* %541, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
+ %563 = load double* %562, align 4 ; <double> [#uses=1]
+ %564 = fsub double %547, %543 ; <double> [#uses=1]
+ %565 = fmul double %550, %561 ; <double> [#uses=1]
+ %566 = fmul double %553, %558 ; <double> [#uses=1]
+ %567 = fsub double %565, %566 ; <double> [#uses=1]
+ %568 = fmul double %564, %567 ; <double> [#uses=1]
+ %569 = fsub double %555, %543 ; <double> [#uses=1]
+ %570 = fmul double %558, %545 ; <double> [#uses=1]
+ %571 = fmul double %561, %544 ; <double> [#uses=1]
+ %572 = fsub double %570, %571 ; <double> [#uses=1]
+ %573 = fmul double %569, %572 ; <double> [#uses=1]
+ %574 = fadd double %573, %568 ; <double> [#uses=1]
+ %575 = fsub double %563, %543 ; <double> [#uses=1]
+ %576 = fmul double %544, %553 ; <double> [#uses=1]
+ %577 = fmul double %545, %550 ; <double> [#uses=1]
+ %578 = fsub double %576, %577 ; <double> [#uses=1]
+ %579 = fmul double %575, %578 ; <double> [#uses=1]
+ %580 = fadd double %579, %574 ; <double> [#uses=1]
+ %581 = fcmp ogt double %580, 0.000000e+00 ; <i1> [#uses=1]
+ br i1 %581, label %bb24.i, label %bb25.i
+
+bb24.i: ; preds = %bb23.i, %bb21.i
+ %582 = add i32 %522, 48 ; <i32> [#uses=1]
+ %583 = and i32 %582, 63 ; <i32> [#uses=1]
+ %584 = and i32 %522, -64 ; <i32> [#uses=1]
+ %585 = or i32 %583, %584 ; <i32> [#uses=1]
+ %586 = inttoptr i32 %585 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %587 = getelementptr %struct.edge_rec* %586, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %588 = load %struct.edge_rec** %587, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %589 = ptrtoint %struct.edge_rec* %588 to i32 ; <i32> [#uses=2]
+ %590 = add i32 %589, 16 ; <i32> [#uses=1]
+ %591 = and i32 %590, 63 ; <i32> [#uses=1]
+ %592 = and i32 %589, -64 ; <i32> [#uses=1]
+ %593 = or i32 %591, %592 ; <i32> [#uses=1]
+ %594 = inttoptr i32 %593 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %595 = call arm_apcscc %struct.edge_rec* @alloc_edge() nounwind ; <%struct.edge_rec*> [#uses=5]
+ %596 = getelementptr %struct.edge_rec* %595, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=4]
+ store %struct.edge_rec* %595, %struct.edge_rec** %596, align 4
+ %597 = getelementptr %struct.edge_rec* %595, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ store %struct.VERTEX* %526, %struct.VERTEX** %597, align 4
+ %598 = ptrtoint %struct.edge_rec* %595 to i32 ; <i32> [#uses=5]
+ %599 = add i32 %598, 16 ; <i32> [#uses=1]
+ %600 = inttoptr i32 %599 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
+ %601 = add i32 %598, 48 ; <i32> [#uses=1]
+ %602 = inttoptr i32 %601 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
+ %603 = getelementptr %struct.edge_rec* %600, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ store %struct.edge_rec* %602, %struct.edge_rec** %603, align 4
+ %604 = add i32 %598, 32 ; <i32> [#uses=1]
+ %605 = inttoptr i32 %604 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=3]
+ %606 = getelementptr %struct.edge_rec* %605, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ store %struct.edge_rec* %605, %struct.edge_rec** %606, align 4
+ %607 = getelementptr %struct.edge_rec* %605, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ store %struct.VERTEX* %495, %struct.VERTEX** %607, align 4
+ %608 = getelementptr %struct.edge_rec* %602, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ store %struct.edge_rec* %600, %struct.edge_rec** %608, align 4
+ %609 = load %struct.edge_rec** %596, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %610 = ptrtoint %struct.edge_rec* %609 to i32 ; <i32> [#uses=2]
+ %611 = add i32 %610, 16 ; <i32> [#uses=1]
+ %612 = and i32 %611, 63 ; <i32> [#uses=1]
+ %613 = and i32 %610, -64 ; <i32> [#uses=1]
+ %614 = or i32 %612, %613 ; <i32> [#uses=1]
+ %615 = inttoptr i32 %614 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %616 = getelementptr %struct.edge_rec* %594, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %617 = load %struct.edge_rec** %616, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %618 = ptrtoint %struct.edge_rec* %617 to i32 ; <i32> [#uses=2]
+ %619 = add i32 %618, 16 ; <i32> [#uses=1]
+ %620 = and i32 %619, 63 ; <i32> [#uses=1]
+ %621 = and i32 %618, -64 ; <i32> [#uses=1]
+ %622 = or i32 %620, %621 ; <i32> [#uses=1]
+ %623 = inttoptr i32 %622 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %624 = getelementptr %struct.edge_rec* %623, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %625 = load %struct.edge_rec** %624, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %626 = getelementptr %struct.edge_rec* %615, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %627 = load %struct.edge_rec** %626, align 4 ; <%struct.edge_rec*> [#uses=1]
+ store %struct.edge_rec* %625, %struct.edge_rec** %626, align 4
+ store %struct.edge_rec* %627, %struct.edge_rec** %624, align 4
+ %628 = load %struct.edge_rec** %596, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %629 = load %struct.edge_rec** %616, align 4 ; <%struct.edge_rec*> [#uses=1]
+ store %struct.edge_rec* %628, %struct.edge_rec** %616, align 4
+ store %struct.edge_rec* %629, %struct.edge_rec** %596, align 4
+ %630 = xor i32 %598, 32 ; <i32> [#uses=2]
+ %631 = inttoptr i32 %630 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %632 = getelementptr %struct.edge_rec* %631, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %633 = load %struct.edge_rec** %632, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %634 = ptrtoint %struct.edge_rec* %633 to i32 ; <i32> [#uses=2]
+ %635 = add i32 %634, 16 ; <i32> [#uses=1]
+ %636 = and i32 %635, 63 ; <i32> [#uses=1]
+ %637 = and i32 %634, -64 ; <i32> [#uses=1]
+ %638 = or i32 %636, %637 ; <i32> [#uses=1]
+ %639 = inttoptr i32 %638 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %640 = getelementptr %struct.edge_rec* %174, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %641 = load %struct.edge_rec** %640, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %642 = ptrtoint %struct.edge_rec* %641 to i32 ; <i32> [#uses=2]
+ %643 = add i32 %642, 16 ; <i32> [#uses=1]
+ %644 = and i32 %643, 63 ; <i32> [#uses=1]
+ %645 = and i32 %642, -64 ; <i32> [#uses=1]
+ %646 = or i32 %644, %645 ; <i32> [#uses=1]
+ %647 = inttoptr i32 %646 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %648 = getelementptr %struct.edge_rec* %647, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %649 = load %struct.edge_rec** %648, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %650 = getelementptr %struct.edge_rec* %639, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %651 = load %struct.edge_rec** %650, align 4 ; <%struct.edge_rec*> [#uses=1]
+ store %struct.edge_rec* %649, %struct.edge_rec** %650, align 4
+ store %struct.edge_rec* %651, %struct.edge_rec** %648, align 4
+ %652 = load %struct.edge_rec** %632, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %653 = load %struct.edge_rec** %640, align 4 ; <%struct.edge_rec*> [#uses=1]
+ store %struct.edge_rec* %652, %struct.edge_rec** %640, align 4
+ store %struct.edge_rec* %653, %struct.edge_rec** %632, align 4
+ %654 = add i32 %630, 48 ; <i32> [#uses=1]
+ %655 = and i32 %654, 63 ; <i32> [#uses=1]
+ %656 = and i32 %598, -64 ; <i32> [#uses=1]
+ %657 = or i32 %655, %656 ; <i32> [#uses=1]
+ %658 = inttoptr i32 %657 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %659 = getelementptr %struct.edge_rec* %658, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %660 = load %struct.edge_rec** %659, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %661 = ptrtoint %struct.edge_rec* %660 to i32 ; <i32> [#uses=2]
+ %662 = add i32 %661, 16 ; <i32> [#uses=1]
+ %663 = and i32 %662, 63 ; <i32> [#uses=1]
+ %664 = and i32 %661, -64 ; <i32> [#uses=1]
+ %665 = or i32 %663, %664 ; <i32> [#uses=1]
+ %666 = inttoptr i32 %665 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ br label %bb9.i
+
+bb25.i: ; preds = %bb23.i, %bb22.i
+ %667 = add i32 %172, 16 ; <i32> [#uses=1]
+ %668 = and i32 %667, 63 ; <i32> [#uses=1]
+ %669 = and i32 %172, -64 ; <i32> [#uses=1]
+ %670 = or i32 %668, %669 ; <i32> [#uses=1]
+ %671 = inttoptr i32 %670 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %672 = getelementptr %struct.edge_rec* %671, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %673 = load %struct.edge_rec** %672, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %674 = ptrtoint %struct.edge_rec* %673 to i32 ; <i32> [#uses=2]
+ %675 = add i32 %674, 16 ; <i32> [#uses=1]
+ %676 = and i32 %675, 63 ; <i32> [#uses=1]
+ %677 = and i32 %674, -64 ; <i32> [#uses=1]
+ %678 = or i32 %676, %677 ; <i32> [#uses=1]
+ %679 = inttoptr i32 %678 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %680 = call arm_apcscc %struct.edge_rec* @alloc_edge() nounwind ; <%struct.edge_rec*> [#uses=4]
+ %681 = getelementptr %struct.edge_rec* %680, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=5]
+ store %struct.edge_rec* %680, %struct.edge_rec** %681, align 4
+ %682 = getelementptr %struct.edge_rec* %680, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ store %struct.VERTEX* %501, %struct.VERTEX** %682, align 4
+ %683 = ptrtoint %struct.edge_rec* %680 to i32 ; <i32> [#uses=4]
+ %684 = add i32 %683, 16 ; <i32> [#uses=1]
+ %685 = inttoptr i32 %684 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
+ %686 = add i32 %683, 48 ; <i32> [#uses=1]
+ %687 = inttoptr i32 %686 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
+ %688 = getelementptr %struct.edge_rec* %685, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ store %struct.edge_rec* %687, %struct.edge_rec** %688, align 4
+ %689 = add i32 %683, 32 ; <i32> [#uses=1]
+ %690 = inttoptr i32 %689 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=3]
+ %691 = getelementptr %struct.edge_rec* %690, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ store %struct.edge_rec* %690, %struct.edge_rec** %691, align 4
+ %692 = getelementptr %struct.edge_rec* %690, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ store %struct.VERTEX* %496, %struct.VERTEX** %692, align 4
+ %693 = getelementptr %struct.edge_rec* %687, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ store %struct.edge_rec* %685, %struct.edge_rec** %693, align 4
+ %694 = load %struct.edge_rec** %681, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %695 = ptrtoint %struct.edge_rec* %694 to i32 ; <i32> [#uses=2]
+ %696 = add i32 %695, 16 ; <i32> [#uses=1]
+ %697 = and i32 %696, 63 ; <i32> [#uses=1]
+ %698 = and i32 %695, -64 ; <i32> [#uses=1]
+ %699 = or i32 %697, %698 ; <i32> [#uses=1]
+ %700 = inttoptr i32 %699 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %701 = getelementptr %struct.edge_rec* %499, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %702 = load %struct.edge_rec** %701, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %703 = ptrtoint %struct.edge_rec* %702 to i32 ; <i32> [#uses=2]
+ %704 = add i32 %703, 16 ; <i32> [#uses=1]
+ %705 = and i32 %704, 63 ; <i32> [#uses=1]
+ %706 = and i32 %703, -64 ; <i32> [#uses=1]
+ %707 = or i32 %705, %706 ; <i32> [#uses=1]
+ %708 = inttoptr i32 %707 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %709 = getelementptr %struct.edge_rec* %708, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %710 = load %struct.edge_rec** %709, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %711 = getelementptr %struct.edge_rec* %700, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %712 = load %struct.edge_rec** %711, align 4 ; <%struct.edge_rec*> [#uses=1]
+ store %struct.edge_rec* %710, %struct.edge_rec** %711, align 4
+ store %struct.edge_rec* %712, %struct.edge_rec** %709, align 4
+ %713 = load %struct.edge_rec** %681, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %714 = load %struct.edge_rec** %701, align 4 ; <%struct.edge_rec*> [#uses=1]
+ store %struct.edge_rec* %713, %struct.edge_rec** %701, align 4
+ store %struct.edge_rec* %714, %struct.edge_rec** %681, align 4
+ %715 = xor i32 %683, 32 ; <i32> [#uses=1]
+ %716 = inttoptr i32 %715 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
+ %717 = getelementptr %struct.edge_rec* %716, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %718 = load %struct.edge_rec** %717, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %719 = ptrtoint %struct.edge_rec* %718 to i32 ; <i32> [#uses=2]
+ %720 = add i32 %719, 16 ; <i32> [#uses=1]
+ %721 = and i32 %720, 63 ; <i32> [#uses=1]
+ %722 = and i32 %719, -64 ; <i32> [#uses=1]
+ %723 = or i32 %721, %722 ; <i32> [#uses=1]
+ %724 = inttoptr i32 %723 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %725 = getelementptr %struct.edge_rec* %679, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %726 = load %struct.edge_rec** %725, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %727 = ptrtoint %struct.edge_rec* %726 to i32 ; <i32> [#uses=2]
+ %728 = add i32 %727, 16 ; <i32> [#uses=1]
+ %729 = and i32 %728, 63 ; <i32> [#uses=1]
+ %730 = and i32 %727, -64 ; <i32> [#uses=1]
+ %731 = or i32 %729, %730 ; <i32> [#uses=1]
+ %732 = inttoptr i32 %731 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %733 = getelementptr %struct.edge_rec* %732, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %734 = load %struct.edge_rec** %733, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %735 = getelementptr %struct.edge_rec* %724, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %736 = load %struct.edge_rec** %735, align 4 ; <%struct.edge_rec*> [#uses=1]
+ store %struct.edge_rec* %734, %struct.edge_rec** %735, align 4
+ store %struct.edge_rec* %736, %struct.edge_rec** %733, align 4
+ %737 = load %struct.edge_rec** %717, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %738 = load %struct.edge_rec** %725, align 4 ; <%struct.edge_rec*> [#uses=1]
+ store %struct.edge_rec* %737, %struct.edge_rec** %725, align 4
+ store %struct.edge_rec* %738, %struct.edge_rec** %717, align 4
+ %739 = load %struct.edge_rec** %681, align 4 ; <%struct.edge_rec*> [#uses=1]
+ br label %bb9.i
+
+do_merge.exit: ; preds = %bb17.i
+ %740 = getelementptr %struct.edge_rec* %ldo_addr.0.ph.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %741 = load %struct.VERTEX** %740, align 4 ; <%struct.VERTEX*> [#uses=1]
+ %742 = icmp eq %struct.VERTEX* %741, %tree_addr.0.i ; <i1> [#uses=1]
+ br i1 %742, label %bb5.loopexit, label %bb2
+
+bb2: ; preds = %bb2, %do_merge.exit
+ %ldo.07 = phi %struct.edge_rec* [ %747, %bb2 ], [ %ldo_addr.0.ph.i, %do_merge.exit ] ; <%struct.edge_rec*> [#uses=1]
+ %743 = ptrtoint %struct.edge_rec* %ldo.07 to i32 ; <i32> [#uses=1]
+ %744 = xor i32 %743, 32 ; <i32> [#uses=1]
+ %745 = inttoptr i32 %744 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %746 = getelementptr %struct.edge_rec* %745, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %747 = load %struct.edge_rec** %746, align 4 ; <%struct.edge_rec*> [#uses=3]
+ %748 = getelementptr %struct.edge_rec* %747, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %749 = load %struct.VERTEX** %748, align 4 ; <%struct.VERTEX*> [#uses=1]
+ %750 = icmp eq %struct.VERTEX* %749, %tree_addr.0.i ; <i1> [#uses=1]
+ br i1 %750, label %bb5.loopexit, label %bb2
+
+bb4: ; preds = %bb5.loopexit, %bb4
+ %rdo.05 = phi %struct.edge_rec* [ %755, %bb4 ], [ %rdo_addr.0.i, %bb5.loopexit ] ; <%struct.edge_rec*> [#uses=1]
+ %751 = getelementptr %struct.edge_rec* %rdo.05, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %752 = load %struct.edge_rec** %751, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %753 = ptrtoint %struct.edge_rec* %752 to i32 ; <i32> [#uses=1]
+ %754 = xor i32 %753, 32 ; <i32> [#uses=1]
+ %755 = inttoptr i32 %754 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=3]
+ %756 = getelementptr %struct.edge_rec* %755, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %757 = load %struct.VERTEX** %756, align 4 ; <%struct.VERTEX*> [#uses=1]
+ %758 = icmp eq %struct.VERTEX* %757, %extra ; <i1> [#uses=1]
+ br i1 %758, label %bb6, label %bb4
+
+bb5.loopexit: ; preds = %bb2, %do_merge.exit
+ %ldo.0.lcssa = phi %struct.edge_rec* [ %ldo_addr.0.ph.i, %do_merge.exit ], [ %747, %bb2 ] ; <%struct.edge_rec*> [#uses=1]
+ %759 = getelementptr %struct.edge_rec* %rdo_addr.0.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %760 = load %struct.VERTEX** %759, align 4 ; <%struct.VERTEX*> [#uses=1]
+ %761 = icmp eq %struct.VERTEX* %760, %extra ; <i1> [#uses=1]
+ br i1 %761, label %bb6, label %bb4
+
+bb6: ; preds = %bb5.loopexit, %bb4
+ %rdo.0.lcssa = phi %struct.edge_rec* [ %rdo_addr.0.i, %bb5.loopexit ], [ %755, %bb4 ] ; <%struct.edge_rec*> [#uses=1]
+ %tmp16 = ptrtoint %struct.edge_rec* %ldo.0.lcssa to i32 ; <i32> [#uses=1]
+ %tmp4 = ptrtoint %struct.edge_rec* %rdo.0.lcssa to i32 ; <i32> [#uses=1]
+ br label %bb15
+
+bb7: ; preds = %bb
+ %762 = getelementptr %struct.VERTEX* %tree, i32 0, i32 1 ; <%struct.VERTEX**> [#uses=1]
+ %763 = load %struct.VERTEX** %762, align 4 ; <%struct.VERTEX*> [#uses=4]
+ %764 = icmp eq %struct.VERTEX* %763, null ; <i1> [#uses=1]
+ %765 = call arm_apcscc %struct.edge_rec* @alloc_edge() nounwind ; <%struct.edge_rec*> [#uses=5]
+ %766 = getelementptr %struct.edge_rec* %765, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=4]
+ store %struct.edge_rec* %765, %struct.edge_rec** %766, align 4
+ %767 = getelementptr %struct.edge_rec* %765, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=3]
+ br i1 %764, label %bb10, label %bb11
+
+bb8: ; preds = %entry
+ %768 = call arm_apcscc i32 @puts(i8* getelementptr ([21 x i8]* @_2E_str7, i32 0, i32 0)) nounwind ; <i32> [#uses=0]
+ call arm_apcscc void @exit(i32 -1) noreturn nounwind
+ unreachable
+
+bb10: ; preds = %bb7
+ store %struct.VERTEX* %tree, %struct.VERTEX** %767, align 4
+ %769 = ptrtoint %struct.edge_rec* %765 to i32 ; <i32> [#uses=5]
+ %770 = add i32 %769, 16 ; <i32> [#uses=1]
+ %771 = inttoptr i32 %770 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
+ %772 = add i32 %769, 48 ; <i32> [#uses=1]
+ %773 = inttoptr i32 %772 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
+ %774 = getelementptr %struct.edge_rec* %771, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ store %struct.edge_rec* %773, %struct.edge_rec** %774, align 4
+ %775 = add i32 %769, 32 ; <i32> [#uses=1]
+ %776 = inttoptr i32 %775 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=3]
+ %777 = getelementptr %struct.edge_rec* %776, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ store %struct.edge_rec* %776, %struct.edge_rec** %777, align 4
+ %778 = getelementptr %struct.edge_rec* %776, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ store %struct.VERTEX* %extra, %struct.VERTEX** %778, align 4
+ %779 = getelementptr %struct.edge_rec* %773, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ store %struct.edge_rec* %771, %struct.edge_rec** %779, align 4
+ %780 = xor i32 %769, 32 ; <i32> [#uses=1]
+ br label %bb15
+
+bb11: ; preds = %bb7
+ store %struct.VERTEX* %763, %struct.VERTEX** %767, align 4
+ %781 = ptrtoint %struct.edge_rec* %765 to i32 ; <i32> [#uses=6]
+ %782 = add i32 %781, 16 ; <i32> [#uses=1]
+ %783 = inttoptr i32 %782 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
+ %784 = add i32 %781, 48 ; <i32> [#uses=1]
+ %785 = inttoptr i32 %784 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
+ %786 = getelementptr %struct.edge_rec* %783, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ store %struct.edge_rec* %785, %struct.edge_rec** %786, align 4
+ %787 = add i32 %781, 32 ; <i32> [#uses=1]
+ %788 = inttoptr i32 %787 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=3]
+ %789 = getelementptr %struct.edge_rec* %788, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ store %struct.edge_rec* %788, %struct.edge_rec** %789, align 4
+ %790 = getelementptr %struct.edge_rec* %788, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ store %struct.VERTEX* %tree, %struct.VERTEX** %790, align 4
+ %791 = getelementptr %struct.edge_rec* %785, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ store %struct.edge_rec* %783, %struct.edge_rec** %791, align 4
+ %792 = call arm_apcscc %struct.edge_rec* @alloc_edge() nounwind ; <%struct.edge_rec*> [#uses=4]
+ %793 = getelementptr %struct.edge_rec* %792, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=4]
+ store %struct.edge_rec* %792, %struct.edge_rec** %793, align 4
+ %794 = getelementptr %struct.edge_rec* %792, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ store %struct.VERTEX* %tree, %struct.VERTEX** %794, align 4
+ %795 = ptrtoint %struct.edge_rec* %792 to i32 ; <i32> [#uses=5]
+ %796 = add i32 %795, 16 ; <i32> [#uses=1]
+ %797 = inttoptr i32 %796 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
+ %798 = add i32 %795, 48 ; <i32> [#uses=2]
+ %799 = inttoptr i32 %798 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
+ %800 = getelementptr %struct.edge_rec* %797, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ store %struct.edge_rec* %799, %struct.edge_rec** %800, align 4
+ %801 = add i32 %795, 32 ; <i32> [#uses=1]
+ %802 = inttoptr i32 %801 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=3]
+ %803 = getelementptr %struct.edge_rec* %802, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ store %struct.edge_rec* %802, %struct.edge_rec** %803, align 4
+ %804 = getelementptr %struct.edge_rec* %802, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ store %struct.VERTEX* %extra, %struct.VERTEX** %804, align 4
+ %805 = getelementptr %struct.edge_rec* %799, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ store %struct.edge_rec* %797, %struct.edge_rec** %805, align 4
+ %806 = xor i32 %781, 32 ; <i32> [#uses=1]
+ %807 = inttoptr i32 %806 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %808 = getelementptr %struct.edge_rec* %807, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %809 = load %struct.edge_rec** %808, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %810 = ptrtoint %struct.edge_rec* %809 to i32 ; <i32> [#uses=2]
+ %811 = add i32 %810, 16 ; <i32> [#uses=1]
+ %812 = and i32 %811, 63 ; <i32> [#uses=1]
+ %813 = and i32 %810, -64 ; <i32> [#uses=1]
+ %814 = or i32 %812, %813 ; <i32> [#uses=1]
+ %815 = inttoptr i32 %814 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %816 = load %struct.edge_rec** %793, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %817 = ptrtoint %struct.edge_rec* %816 to i32 ; <i32> [#uses=2]
+ %818 = add i32 %817, 16 ; <i32> [#uses=1]
+ %819 = and i32 %818, 63 ; <i32> [#uses=1]
+ %820 = and i32 %817, -64 ; <i32> [#uses=1]
+ %821 = or i32 %819, %820 ; <i32> [#uses=1]
+ %822 = inttoptr i32 %821 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %823 = getelementptr %struct.edge_rec* %822, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %824 = load %struct.edge_rec** %823, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %825 = getelementptr %struct.edge_rec* %815, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %826 = load %struct.edge_rec** %825, align 4 ; <%struct.edge_rec*> [#uses=1]
+ store %struct.edge_rec* %824, %struct.edge_rec** %825, align 4
+ store %struct.edge_rec* %826, %struct.edge_rec** %823, align 4
+ %827 = load %struct.edge_rec** %808, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %828 = load %struct.edge_rec** %793, align 4 ; <%struct.edge_rec*> [#uses=1]
+ store %struct.edge_rec* %827, %struct.edge_rec** %793, align 4
+ store %struct.edge_rec* %828, %struct.edge_rec** %808, align 4
+ %829 = xor i32 %795, 32 ; <i32> [#uses=3]
+ %830 = inttoptr i32 %829 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %831 = getelementptr %struct.edge_rec* %830, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %832 = load %struct.VERTEX** %831, align 4 ; <%struct.VERTEX*> [#uses=1]
+ %833 = and i32 %798, 63 ; <i32> [#uses=1]
+ %834 = and i32 %795, -64 ; <i32> [#uses=1]
+ %835 = or i32 %833, %834 ; <i32> [#uses=1]
+ %836 = inttoptr i32 %835 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %837 = getelementptr %struct.edge_rec* %836, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %838 = load %struct.edge_rec** %837, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %839 = ptrtoint %struct.edge_rec* %838 to i32 ; <i32> [#uses=2]
+ %840 = add i32 %839, 16 ; <i32> [#uses=1]
+ %841 = and i32 %840, 63 ; <i32> [#uses=1]
+ %842 = and i32 %839, -64 ; <i32> [#uses=1]
+ %843 = or i32 %841, %842 ; <i32> [#uses=1]
+ %844 = inttoptr i32 %843 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %845 = load %struct.VERTEX** %767, align 4 ; <%struct.VERTEX*> [#uses=1]
+ %846 = call arm_apcscc %struct.edge_rec* @alloc_edge() nounwind ; <%struct.edge_rec*> [#uses=4]
+ %847 = getelementptr %struct.edge_rec* %846, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=7]
+ store %struct.edge_rec* %846, %struct.edge_rec** %847, align 4
+ %848 = getelementptr %struct.edge_rec* %846, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ store %struct.VERTEX* %832, %struct.VERTEX** %848, align 4
+ %849 = ptrtoint %struct.edge_rec* %846 to i32 ; <i32> [#uses=6]
+ %850 = add i32 %849, 16 ; <i32> [#uses=2]
+ %851 = inttoptr i32 %850 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
+ %852 = add i32 %849, 48 ; <i32> [#uses=1]
+ %853 = inttoptr i32 %852 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
+ %854 = getelementptr %struct.edge_rec* %851, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ store %struct.edge_rec* %853, %struct.edge_rec** %854, align 4
+ %855 = add i32 %849, 32 ; <i32> [#uses=1]
+ %856 = inttoptr i32 %855 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=3]
+ %857 = getelementptr %struct.edge_rec* %856, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ store %struct.edge_rec* %856, %struct.edge_rec** %857, align 4
+ %858 = getelementptr %struct.edge_rec* %856, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ store %struct.VERTEX* %845, %struct.VERTEX** %858, align 4
+ %859 = getelementptr %struct.edge_rec* %853, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ store %struct.edge_rec* %851, %struct.edge_rec** %859, align 4
+ %860 = load %struct.edge_rec** %847, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %861 = ptrtoint %struct.edge_rec* %860 to i32 ; <i32> [#uses=2]
+ %862 = add i32 %861, 16 ; <i32> [#uses=1]
+ %863 = and i32 %862, 63 ; <i32> [#uses=1]
+ %864 = and i32 %861, -64 ; <i32> [#uses=1]
+ %865 = or i32 %863, %864 ; <i32> [#uses=1]
+ %866 = inttoptr i32 %865 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %867 = getelementptr %struct.edge_rec* %844, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %868 = load %struct.edge_rec** %867, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %869 = ptrtoint %struct.edge_rec* %868 to i32 ; <i32> [#uses=2]
+ %870 = add i32 %869, 16 ; <i32> [#uses=1]
+ %871 = and i32 %870, 63 ; <i32> [#uses=1]
+ %872 = and i32 %869, -64 ; <i32> [#uses=1]
+ %873 = or i32 %871, %872 ; <i32> [#uses=1]
+ %874 = inttoptr i32 %873 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %875 = getelementptr %struct.edge_rec* %874, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %876 = load %struct.edge_rec** %875, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %877 = getelementptr %struct.edge_rec* %866, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %878 = load %struct.edge_rec** %877, align 4 ; <%struct.edge_rec*> [#uses=1]
+ store %struct.edge_rec* %876, %struct.edge_rec** %877, align 4
+ store %struct.edge_rec* %878, %struct.edge_rec** %875, align 4
+ %879 = load %struct.edge_rec** %847, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %880 = load %struct.edge_rec** %867, align 4 ; <%struct.edge_rec*> [#uses=1]
+ store %struct.edge_rec* %879, %struct.edge_rec** %867, align 4
+ store %struct.edge_rec* %880, %struct.edge_rec** %847, align 4
+ %881 = xor i32 %849, 32 ; <i32> [#uses=3]
+ %882 = inttoptr i32 %881 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %883 = getelementptr %struct.edge_rec* %882, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=6]
+ %884 = load %struct.edge_rec** %883, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %885 = ptrtoint %struct.edge_rec* %884 to i32 ; <i32> [#uses=2]
+ %886 = add i32 %885, 16 ; <i32> [#uses=1]
+ %887 = and i32 %886, 63 ; <i32> [#uses=1]
+ %888 = and i32 %885, -64 ; <i32> [#uses=1]
+ %889 = or i32 %887, %888 ; <i32> [#uses=1]
+ %890 = inttoptr i32 %889 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %891 = load %struct.edge_rec** %766, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %892 = ptrtoint %struct.edge_rec* %891 to i32 ; <i32> [#uses=2]
+ %893 = add i32 %892, 16 ; <i32> [#uses=1]
+ %894 = and i32 %893, 63 ; <i32> [#uses=1]
+ %895 = and i32 %892, -64 ; <i32> [#uses=1]
+ %896 = or i32 %894, %895 ; <i32> [#uses=1]
+ %897 = inttoptr i32 %896 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %898 = getelementptr %struct.edge_rec* %897, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %899 = load %struct.edge_rec** %898, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %900 = getelementptr %struct.edge_rec* %890, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %901 = load %struct.edge_rec** %900, align 4 ; <%struct.edge_rec*> [#uses=1]
+ store %struct.edge_rec* %899, %struct.edge_rec** %900, align 4
+ store %struct.edge_rec* %901, %struct.edge_rec** %898, align 4
+ %902 = load %struct.edge_rec** %883, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %903 = load %struct.edge_rec** %766, align 4 ; <%struct.edge_rec*> [#uses=1]
+ store %struct.edge_rec* %902, %struct.edge_rec** %766, align 4
+ store %struct.edge_rec* %903, %struct.edge_rec** %883, align 4
+ %904 = getelementptr %struct.VERTEX* %763, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %905 = load double* %904, align 4 ; <double> [#uses=2]
+ %906 = getelementptr %struct.VERTEX* %763, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %907 = load double* %906, align 4 ; <double> [#uses=2]
+ %908 = getelementptr %struct.VERTEX* %extra, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %909 = load double* %908, align 4 ; <double> [#uses=3]
+ %910 = getelementptr %struct.VERTEX* %extra, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %911 = load double* %910, align 4 ; <double> [#uses=3]
+ %912 = getelementptr %struct.VERTEX* %tree, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %913 = load double* %912, align 4 ; <double> [#uses=3]
+ %914 = getelementptr %struct.VERTEX* %tree, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %915 = load double* %914, align 4 ; <double> [#uses=3]
+ %916 = fsub double %905, %913 ; <double> [#uses=1]
+ %917 = fsub double %911, %915 ; <double> [#uses=1]
+ %918 = fmul double %916, %917 ; <double> [#uses=1]
+ %919 = fsub double %909, %913 ; <double> [#uses=1]
+ %920 = fsub double %907, %915 ; <double> [#uses=1]
+ %921 = fmul double %919, %920 ; <double> [#uses=1]
+ %922 = fsub double %918, %921 ; <double> [#uses=1]
+ %923 = fcmp ogt double %922, 0.000000e+00 ; <i1> [#uses=1]
+ br i1 %923, label %bb15, label %bb13
+
+bb13: ; preds = %bb11
+ %924 = fsub double %905, %909 ; <double> [#uses=1]
+ %925 = fsub double %915, %911 ; <double> [#uses=1]
+ %926 = fmul double %924, %925 ; <double> [#uses=1]
+ %927 = fsub double %913, %909 ; <double> [#uses=1]
+ %928 = fsub double %907, %911 ; <double> [#uses=1]
+ %929 = fmul double %927, %928 ; <double> [#uses=1]
+ %930 = fsub double %926, %929 ; <double> [#uses=1]
+ %931 = fcmp ogt double %930, 0.000000e+00 ; <i1> [#uses=1]
+ br i1 %931, label %bb15, label %bb14
+
+bb14: ; preds = %bb13
+ %932 = and i32 %850, 63 ; <i32> [#uses=1]
+ %933 = and i32 %849, -64 ; <i32> [#uses=3]
+ %934 = or i32 %932, %933 ; <i32> [#uses=1]
+ %935 = inttoptr i32 %934 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %936 = getelementptr %struct.edge_rec* %935, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %937 = load %struct.edge_rec** %936, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %938 = ptrtoint %struct.edge_rec* %937 to i32 ; <i32> [#uses=2]
+ %939 = add i32 %938, 16 ; <i32> [#uses=1]
+ %940 = and i32 %939, 63 ; <i32> [#uses=1]
+ %941 = and i32 %938, -64 ; <i32> [#uses=1]
+ %942 = or i32 %940, %941 ; <i32> [#uses=1]
+ %943 = inttoptr i32 %942 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %944 = load %struct.edge_rec** %847, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %945 = ptrtoint %struct.edge_rec* %944 to i32 ; <i32> [#uses=2]
+ %946 = add i32 %945, 16 ; <i32> [#uses=1]
+ %947 = and i32 %946, 63 ; <i32> [#uses=1]
+ %948 = and i32 %945, -64 ; <i32> [#uses=1]
+ %949 = or i32 %947, %948 ; <i32> [#uses=1]
+ %950 = inttoptr i32 %949 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %951 = getelementptr %struct.edge_rec* %943, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %952 = load %struct.edge_rec** %951, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %953 = ptrtoint %struct.edge_rec* %952 to i32 ; <i32> [#uses=2]
+ %954 = add i32 %953, 16 ; <i32> [#uses=1]
+ %955 = and i32 %954, 63 ; <i32> [#uses=1]
+ %956 = and i32 %953, -64 ; <i32> [#uses=1]
+ %957 = or i32 %955, %956 ; <i32> [#uses=1]
+ %958 = inttoptr i32 %957 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %959 = getelementptr %struct.edge_rec* %958, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %960 = load %struct.edge_rec** %959, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %961 = getelementptr %struct.edge_rec* %950, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %962 = load %struct.edge_rec** %961, align 4 ; <%struct.edge_rec*> [#uses=1]
+ store %struct.edge_rec* %960, %struct.edge_rec** %961, align 4
+ store %struct.edge_rec* %962, %struct.edge_rec** %959, align 4
+ %963 = load %struct.edge_rec** %847, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %964 = load %struct.edge_rec** %951, align 4 ; <%struct.edge_rec*> [#uses=1]
+ store %struct.edge_rec* %963, %struct.edge_rec** %951, align 4
+ store %struct.edge_rec* %964, %struct.edge_rec** %847, align 4
+ %965 = add i32 %881, 16 ; <i32> [#uses=1]
+ %966 = and i32 %965, 63 ; <i32> [#uses=1]
+ %967 = or i32 %966, %933 ; <i32> [#uses=1]
+ %968 = inttoptr i32 %967 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %969 = getelementptr %struct.edge_rec* %968, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %970 = load %struct.edge_rec** %969, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %971 = ptrtoint %struct.edge_rec* %970 to i32 ; <i32> [#uses=2]
+ %972 = add i32 %971, 16 ; <i32> [#uses=1]
+ %973 = and i32 %972, 63 ; <i32> [#uses=1]
+ %974 = and i32 %971, -64 ; <i32> [#uses=1]
+ %975 = or i32 %973, %974 ; <i32> [#uses=1]
+ %976 = inttoptr i32 %975 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %977 = load %struct.edge_rec** %883, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %978 = ptrtoint %struct.edge_rec* %977 to i32 ; <i32> [#uses=2]
+ %979 = add i32 %978, 16 ; <i32> [#uses=1]
+ %980 = and i32 %979, 63 ; <i32> [#uses=1]
+ %981 = and i32 %978, -64 ; <i32> [#uses=1]
+ %982 = or i32 %980, %981 ; <i32> [#uses=1]
+ %983 = inttoptr i32 %982 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %984 = getelementptr %struct.edge_rec* %976, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %985 = load %struct.edge_rec** %984, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %986 = ptrtoint %struct.edge_rec* %985 to i32 ; <i32> [#uses=2]
+ %987 = add i32 %986, 16 ; <i32> [#uses=1]
+ %988 = and i32 %987, 63 ; <i32> [#uses=1]
+ %989 = and i32 %986, -64 ; <i32> [#uses=1]
+ %990 = or i32 %988, %989 ; <i32> [#uses=1]
+ %991 = inttoptr i32 %990 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
+ %992 = getelementptr %struct.edge_rec* %991, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %993 = load %struct.edge_rec** %992, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %994 = getelementptr %struct.edge_rec* %983, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %995 = load %struct.edge_rec** %994, align 4 ; <%struct.edge_rec*> [#uses=1]
+ store %struct.edge_rec* %993, %struct.edge_rec** %994, align 4
+ store %struct.edge_rec* %995, %struct.edge_rec** %992, align 4
+ %996 = load %struct.edge_rec** %883, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %997 = load %struct.edge_rec** %984, align 4 ; <%struct.edge_rec*> [#uses=1]
+ store %struct.edge_rec* %996, %struct.edge_rec** %984, align 4
+ store %struct.edge_rec* %997, %struct.edge_rec** %883, align 4
+ %998 = inttoptr i32 %933 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
+ %999 = load %struct.edge_rec** @avail_edge, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %1000 = getelementptr %struct.edge_rec* %998, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ store %struct.edge_rec* %999, %struct.edge_rec** %1000, align 4
+ store %struct.edge_rec* %998, %struct.edge_rec** @avail_edge, align 4
+ br label %bb15
+
+bb15: ; preds = %bb14, %bb13, %bb11, %bb10, %bb6
+ %retval.1.0 = phi i32 [ %780, %bb10 ], [ %829, %bb13 ], [ %829, %bb14 ], [ %tmp4, %bb6 ], [ %849, %bb11 ] ; <i32> [#uses=1]
+ %retval.0.0 = phi i32 [ %769, %bb10 ], [ %781, %bb13 ], [ %781, %bb14 ], [ %tmp16, %bb6 ], [ %881, %bb11 ] ; <i32> [#uses=1]
+ %agg.result162 = bitcast %struct.EDGE_PAIR* %agg.result to i64* ; <i64*> [#uses=1]
+ %1001 = zext i32 %retval.0.0 to i64 ; <i64> [#uses=1]
+ %1002 = zext i32 %retval.1.0 to i64 ; <i64> [#uses=1]
+ %1003 = shl i64 %1002, 32 ; <i64> [#uses=1]
+ %1004 = or i64 %1003, %1001 ; <i64> [#uses=1]
+ store i64 %1004, i64* %agg.result162, align 4
+ ret void
+}
+
+declare arm_apcscc i32 @puts(i8* nocapture) nounwind
+
+declare arm_apcscc void @exit(i32) noreturn nounwind
+
+declare arm_apcscc %struct.edge_rec* @alloc_edge() nounwind
diff --git a/test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll b/test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll
new file mode 100644
index 0000000..b4b989b
--- /dev/null
+++ b/test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll
@@ -0,0 +1,94 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin10
+
+ %struct.cli_ac_alt = type { i8, i8*, i16, i16, %struct.cli_ac_alt* }
+ %struct.cli_ac_node = type { i8, i8, %struct.cli_ac_patt*, %struct.cli_ac_node**, %struct.cli_ac_node* }
+ %struct.cli_ac_patt = type { i16*, i16*, i16, i16, i8, i32, i32, i8*, i8*, i32, i16, i16, i16, i16, %struct.cli_ac_alt**, i8, i16, %struct.cli_ac_patt*, %struct.cli_ac_patt* }
+ %struct.cli_bm_patt = type { i8*, i8*, i16, i16, i8*, i8*, i8, %struct.cli_bm_patt*, i16 }
+ %struct.cli_matcher = type { i16, i8, i8*, %struct.cli_bm_patt**, i32*, i32, i8, i8, %struct.cli_ac_node*, %struct.cli_ac_node**, %struct.cli_ac_patt**, i32, i32, i32 }
+
+declare arm_apcscc i32 @strlen(i8* nocapture) nounwind readonly
+
+define arm_apcscc i32 @cli_ac_addsig(%struct.cli_matcher* nocapture %root, i8* %virname, i8* %hexsig, i32 %sigid, i16 zeroext %parts, i16 zeroext %partno, i16 zeroext %type, i32 %mindist, i32 %maxdist, i8* %offset, i8 zeroext %target) nounwind {
+entry:
+ br i1 undef, label %bb126, label %bb1
+
+bb1: ; preds = %entry
+ br i1 undef, label %cli_calloc.exit.thread, label %cli_calloc.exit
+
+cli_calloc.exit.thread: ; preds = %bb1
+ ret i32 -114
+
+cli_calloc.exit: ; preds = %bb1
+ store i16 %parts, i16* undef, align 4
+ br i1 undef, label %bb52, label %bb4
+
+bb4: ; preds = %cli_calloc.exit
+ br i1 undef, label %bb.i, label %bb1.i3
+
+bb.i: ; preds = %bb4
+ unreachable
+
+bb1.i3: ; preds = %bb4
+ br i1 undef, label %bb2.i4, label %cli_strdup.exit
+
+bb2.i4: ; preds = %bb1.i3
+ ret i32 -114
+
+cli_strdup.exit: ; preds = %bb1.i3
+ br i1 undef, label %cli_calloc.exit54.thread, label %cli_calloc.exit54
+
+cli_calloc.exit54.thread: ; preds = %cli_strdup.exit
+ ret i32 -114
+
+cli_calloc.exit54: ; preds = %cli_strdup.exit
+ br label %bb45
+
+cli_calloc.exit70.thread: ; preds = %bb45
+ unreachable
+
+cli_calloc.exit70: ; preds = %bb45
+ br i1 undef, label %bb.i83, label %bb1.i84
+
+bb.i83: ; preds = %cli_calloc.exit70
+ unreachable
+
+bb1.i84: ; preds = %cli_calloc.exit70
+ br i1 undef, label %bb2.i85, label %bb17
+
+bb2.i85: ; preds = %bb1.i84
+ unreachable
+
+bb17: ; preds = %bb1.i84
+ br i1 undef, label %bb22, label %bb.nph
+
+bb.nph: ; preds = %bb17
+ br label %bb18
+
+bb18: ; preds = %bb18, %bb.nph
+ br i1 undef, label %bb18, label %bb22
+
+bb22: ; preds = %bb18, %bb17
+ br i1 undef, label %bb25, label %bb43.preheader
+
+bb43.preheader: ; preds = %bb22
+ br i1 undef, label %bb28, label %bb45
+
+bb25: ; preds = %bb22
+ unreachable
+
+bb28: ; preds = %bb43.preheader
+ unreachable
+
+bb45: ; preds = %bb43.preheader, %cli_calloc.exit54
+ br i1 undef, label %cli_calloc.exit70.thread, label %cli_calloc.exit70
+
+bb52: ; preds = %cli_calloc.exit
+ %0 = load i16* undef, align 4 ; <i16> [#uses=1]
+ %1 = icmp eq i16 %0, 0 ; <i1> [#uses=1]
+ %iftmp.20.0 = select i1 %1, i8* %hexsig, i8* null ; <i8*> [#uses=1]
+ %2 = tail call arm_apcscc i32 @strlen(i8* %iftmp.20.0) nounwind readonly ; <i32> [#uses=0]
+ unreachable
+
+bb126: ; preds = %entry
+ ret i32 -117
+}
diff --git a/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll b/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll
new file mode 100644
index 0000000..24f4990
--- /dev/null
+++ b/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll
@@ -0,0 +1,95 @@
+; RUN: llc < %s -march=arm
+
+ %struct.cli_ac_alt = type { i8, i8*, i16, i16, %struct.cli_ac_alt* }
+ %struct.cli_ac_node = type { i8, i8, %struct.cli_ac_patt*, %struct.cli_ac_node**, %struct.cli_ac_node* }
+ %struct.cli_ac_patt = type { i16*, i16*, i16, i16, i8, i32, i32, i8*, i8*, i32, i16, i16, i16, i16, %struct.cli_ac_alt**, i8, i16, %struct.cli_ac_patt*, %struct.cli_ac_patt* }
+ %struct.cli_bm_patt = type { i8*, i8*, i16, i16, i8*, i8*, i8, %struct.cli_bm_patt*, i16 }
+ %struct.cli_matcher = type { i16, i8, i8*, %struct.cli_bm_patt**, i32*, i32, i8, i8, %struct.cli_ac_node*, %struct.cli_ac_node**, %struct.cli_ac_patt**, i32, i32, i32 }
+
+define arm_apcscc i32 @cli_ac_addsig(%struct.cli_matcher* nocapture %root, i8* %virname, i8* %hexsig, i32 %sigid, i16 zeroext %parts, i16 zeroext %partno, i16 zeroext %type, i32 %mindist, i32 %maxdist, i8* %offset, i8 zeroext %target) nounwind {
+entry:
+ br i1 undef, label %bb126, label %bb1
+
+bb1: ; preds = %entry
+ br i1 undef, label %cli_calloc.exit.thread, label %cli_calloc.exit
+
+cli_calloc.exit.thread: ; preds = %bb1
+ ret i32 -114
+
+cli_calloc.exit: ; preds = %bb1
+ br i1 undef, label %bb52, label %bb4
+
+bb4: ; preds = %cli_calloc.exit
+ br i1 undef, label %bb.i, label %bb1.i3
+
+bb.i: ; preds = %bb4
+ unreachable
+
+bb1.i3: ; preds = %bb4
+ br i1 undef, label %bb2.i4, label %cli_strdup.exit
+
+bb2.i4: ; preds = %bb1.i3
+ ret i32 -114
+
+cli_strdup.exit: ; preds = %bb1.i3
+ br i1 undef, label %cli_calloc.exit54.thread, label %cli_calloc.exit54
+
+cli_calloc.exit54.thread: ; preds = %cli_strdup.exit
+ ret i32 -114
+
+cli_calloc.exit54: ; preds = %cli_strdup.exit
+ br label %bb45
+
+cli_calloc.exit70.thread: ; preds = %bb45
+ unreachable
+
+cli_calloc.exit70: ; preds = %bb45
+ br i1 undef, label %bb.i83, label %bb1.i84
+
+bb.i83: ; preds = %cli_calloc.exit70
+ unreachable
+
+bb1.i84: ; preds = %cli_calloc.exit70
+ br i1 undef, label %bb2.i85, label %bb17
+
+bb2.i85: ; preds = %bb1.i84
+ unreachable
+
+bb17: ; preds = %bb1.i84
+ br i1 undef, label %bb22, label %bb.nph
+
+bb.nph: ; preds = %bb17
+ br label %bb18
+
+bb18: ; preds = %bb18, %bb.nph
+ br i1 undef, label %bb18, label %bb22
+
+bb22: ; preds = %bb18, %bb17
+ %0 = getelementptr i8* null, i32 10 ; <i8*> [#uses=1]
+ %1 = bitcast i8* %0 to i16* ; <i16*> [#uses=1]
+ %2 = load i16* %1, align 2 ; <i16> [#uses=1]
+ %3 = add i16 %2, 1 ; <i16> [#uses=1]
+ %4 = zext i16 %3 to i32 ; <i32> [#uses=1]
+ %5 = mul i32 %4, 3 ; <i32> [#uses=1]
+ %6 = add i32 %5, -1 ; <i32> [#uses=1]
+ %7 = icmp eq i32 %6, undef ; <i1> [#uses=1]
+ br i1 %7, label %bb25, label %bb43.preheader
+
+bb43.preheader: ; preds = %bb22
+ br i1 undef, label %bb28, label %bb45
+
+bb25: ; preds = %bb22
+ unreachable
+
+bb28: ; preds = %bb43.preheader
+ unreachable
+
+bb45: ; preds = %bb43.preheader, %cli_calloc.exit54
+ br i1 undef, label %cli_calloc.exit70.thread, label %cli_calloc.exit70
+
+bb52: ; preds = %cli_calloc.exit
+ unreachable
+
+bb126: ; preds = %entry
+ ret i32 -117
+}
diff --git a/test/CodeGen/ARM/2009-07-29-VFP3Registers.ll b/test/CodeGen/ARM/2009-07-29-VFP3Registers.ll
new file mode 100644
index 0000000..e1d19d1
--- /dev/null
+++ b/test/CodeGen/ARM/2009-07-29-VFP3Registers.ll
@@ -0,0 +1,108 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin10 -mattr=+vfp3
+
+@a = external global double ; <double*> [#uses=1]
+
+declare double @llvm.exp.f64(double) nounwind readonly
+
+define arm_apcscc void @findratio(double* nocapture %res1, double* nocapture %res2) nounwind {
+entry:
+ br label %bb
+
+bb: ; preds = %bb, %entry
+ br i1 undef, label %bb28, label %bb
+
+bb28: ; preds = %bb
+ %0 = load double* @a, align 4 ; <double> [#uses=2]
+ %1 = fadd double %0, undef ; <double> [#uses=2]
+ br i1 undef, label %bb59, label %bb60
+
+bb59: ; preds = %bb28
+ %2 = fsub double -0.000000e+00, undef ; <double> [#uses=2]
+ br label %bb61
+
+bb60: ; preds = %bb28
+ %3 = tail call double @llvm.exp.f64(double undef) nounwind ; <double> [#uses=1]
+ %4 = fsub double -0.000000e+00, %3 ; <double> [#uses=2]
+ %5 = fsub double -0.000000e+00, undef ; <double> [#uses=1]
+ %6 = fsub double -0.000000e+00, undef ; <double> [#uses=1]
+ br label %bb61
+
+bb61: ; preds = %bb60, %bb59
+ %.pn201 = phi double [ undef, %bb59 ], [ undef, %bb60 ] ; <double> [#uses=1]
+ %.pn111 = phi double [ undef, %bb59 ], [ undef, %bb60 ] ; <double> [#uses=1]
+ %.pn452 = phi double [ undef, %bb59 ], [ undef, %bb60 ] ; <double> [#uses=1]
+ %.pn85 = phi double [ undef, %bb59 ], [ undef, %bb60 ] ; <double> [#uses=1]
+ %.pn238 = phi double [ 0.000000e+00, %bb59 ], [ 0.000000e+00, %bb60 ] ; <double> [#uses=1]
+ %.pn39 = phi double [ undef, %bb59 ], [ undef, %bb60 ] ; <double> [#uses=1]
+ %.pn230 = phi double [ undef, %bb59 ], [ undef, %bb60 ] ; <double> [#uses=1]
+ %.pn228 = phi double [ 0.000000e+00, %bb59 ], [ undef, %bb60 ] ; <double> [#uses=1]
+ %.pn224 = phi double [ undef, %bb59 ], [ undef, %bb60 ] ; <double> [#uses=1]
+ %.pn222 = phi double [ 0.000000e+00, %bb59 ], [ undef, %bb60 ] ; <double> [#uses=1]
+ %.pn218 = phi double [ %2, %bb59 ], [ %4, %bb60 ] ; <double> [#uses=1]
+ %.pn214 = phi double [ 0.000000e+00, %bb59 ], [ undef, %bb60 ] ; <double> [#uses=1]
+ %.pn212 = phi double [ %2, %bb59 ], [ %4, %bb60 ] ; <double> [#uses=1]
+ %.pn213 = phi double [ undef, %bb59 ], [ undef, %bb60 ] ; <double> [#uses=1]
+ %.pn210 = phi double [ undef, %bb59 ], [ %5, %bb60 ] ; <double> [#uses=1]
+ %.pn202 = phi double [ undef, %bb59 ], [ %6, %bb60 ] ; <double> [#uses=0]
+ %.pn390 = fdiv double %.pn452, undef ; <double> [#uses=0]
+ %.pn145 = fdiv double %.pn238, %1 ; <double> [#uses=0]
+ %.pn138 = fdiv double %.pn230, undef ; <double> [#uses=1]
+ %.pn139 = fdiv double %.pn228, undef ; <double> [#uses=1]
+ %.pn134 = fdiv double %.pn224, %0 ; <double> [#uses=1]
+ %.pn135 = fdiv double %.pn222, %1 ; <double> [#uses=1]
+ %.pn133 = fdiv double %.pn218, undef ; <double> [#uses=0]
+ %.pn128 = fdiv double %.pn214, undef ; <double> [#uses=1]
+ %.pn129 = fdiv double %.pn212, %.pn213 ; <double> [#uses=1]
+ %.pn126 = fdiv double %.pn210, undef ; <double> [#uses=0]
+ %.pn54.in = fmul double undef, %.pn201 ; <double> [#uses=1]
+ %.pn42.in = fmul double undef, undef ; <double> [#uses=1]
+ %.pn76 = fsub double %.pn138, %.pn139 ; <double> [#uses=1]
+ %.pn74 = fsub double %.pn134, %.pn135 ; <double> [#uses=1]
+ %.pn70 = fsub double %.pn128, %.pn129 ; <double> [#uses=1]
+ %.pn54 = fdiv double %.pn54.in, 6.000000e+00 ; <double> [#uses=1]
+ %.pn64 = fmul double undef, 0x3FE5555555555555 ; <double> [#uses=1]
+ %.pn65 = fmul double undef, undef ; <double> [#uses=1]
+ %.pn50 = fmul double undef, %.pn111 ; <double> [#uses=0]
+ %.pn42 = fdiv double %.pn42.in, 6.000000e+00 ; <double> [#uses=1]
+ %.pn40 = fmul double undef, %.pn85 ; <double> [#uses=0]
+ %.pn56 = fadd double %.pn76, undef ; <double> [#uses=1]
+ %.pn57 = fmul double %.pn74, undef ; <double> [#uses=1]
+ %.pn36 = fadd double undef, undef ; <double> [#uses=1]
+ %.pn37 = fmul double %.pn70, undef ; <double> [#uses=1]
+ %.pn33 = fmul double undef, 0x3FC5555555555555 ; <double> [#uses=1]
+ %.pn29 = fsub double %.pn64, %.pn65 ; <double> [#uses=1]
+ %.pn21 = fadd double undef, undef ; <double> [#uses=1]
+ %.pn27 = fmul double undef, 0x3FC5555555555555 ; <double> [#uses=1]
+ %.pn11 = fadd double %.pn56, %.pn57 ; <double> [#uses=1]
+ %.pn32 = fmul double %.pn54, undef ; <double> [#uses=1]
+ %.pn26 = fmul double %.pn42, undef ; <double> [#uses=1]
+ %.pn15 = fmul double 0.000000e+00, %.pn39 ; <double> [#uses=1]
+ %.pn7 = fadd double %.pn36, %.pn37 ; <double> [#uses=1]
+ %.pn30 = fsub double %.pn32, %.pn33 ; <double> [#uses=1]
+ %.pn28 = fadd double %.pn30, 0.000000e+00 ; <double> [#uses=1]
+ %.pn24 = fsub double %.pn28, %.pn29 ; <double> [#uses=1]
+ %.pn22 = fsub double %.pn26, %.pn27 ; <double> [#uses=1]
+ %.pn20 = fadd double %.pn24, undef ; <double> [#uses=1]
+ %.pn18 = fadd double %.pn22, 0.000000e+00 ; <double> [#uses=1]
+ %.pn16 = fsub double %.pn20, %.pn21 ; <double> [#uses=1]
+ %.pn14 = fsub double %.pn18, undef ; <double> [#uses=1]
+ %.pn12 = fadd double %.pn16, undef ; <double> [#uses=1]
+ %.pn10 = fadd double %.pn14, %.pn15 ; <double> [#uses=1]
+ %.pn8 = fsub double %.pn12, undef ; <double> [#uses=1]
+ %.pn6 = fsub double %.pn10, %.pn11 ; <double> [#uses=1]
+ %.pn4 = fadd double %.pn8, undef ; <double> [#uses=1]
+ %.pn2 = fadd double %.pn6, %.pn7 ; <double> [#uses=1]
+ %N1.0 = fsub double %.pn4, undef ; <double> [#uses=1]
+ %D1.0 = fsub double %.pn2, undef ; <double> [#uses=2]
+ br i1 undef, label %bb62, label %bb64
+
+bb62: ; preds = %bb61
+ %7 = fadd double %D1.0, undef ; <double> [#uses=1]
+ br label %bb64
+
+bb64: ; preds = %bb62, %bb61
+ %.pn = phi double [ undef, %bb62 ], [ %N1.0, %bb61 ] ; <double> [#uses=1]
+ %.pn1 = phi double [ %7, %bb62 ], [ %D1.0, %bb61 ] ; <double> [#uses=1]
+ %x.1 = fdiv double %.pn, %.pn1 ; <double> [#uses=0]
+ ret void
+}
diff --git a/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll b/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll
new file mode 100644
index 0000000..2d4e58d
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=arm -mattr=+neon
+; PR4657
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-apple-darwin9"
+
+define arm_apcscc <4 x i32> @scale(<4 x i32> %v, i32 %f) nounwind {
+entry:
+ %v_addr = alloca <4 x i32> ; <<4 x i32>*> [#uses=2]
+ %f_addr = alloca i32 ; <i32*> [#uses=2]
+ %retval = alloca <4 x i32> ; <<4 x i32>*> [#uses=2]
+ %0 = alloca <4 x i32> ; <<4 x i32>*> [#uses=2]
+ %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
+ store <4 x i32> %v, <4 x i32>* %v_addr
+ store i32 %f, i32* %f_addr
+ %1 = load <4 x i32>* %v_addr, align 16 ; <<4 x i32>> [#uses=1]
+ %2 = load i32* %f_addr, align 4 ; <i32> [#uses=1]
+ %3 = insertelement <4 x i32> undef, i32 %2, i32 0 ; <<4 x i32>> [#uses=1]
+ %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer ; <<4 x i32>> [#uses=1]
+ %5 = mul <4 x i32> %1, %4 ; <<4 x i32>> [#uses=1]
+ store <4 x i32> %5, <4 x i32>* %0, align 16
+ %6 = load <4 x i32>* %0, align 16 ; <<4 x i32>> [#uses=1]
+ store <4 x i32> %6, <4 x i32>* %retval, align 16
+ br label %return
+
+return: ; preds = %entry
+ %retval1 = load <4 x i32>* %retval ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %retval1
+}
diff --git a/test/CodeGen/ARM/2009-08-04-RegScavengerAssert-2.ll b/test/CodeGen/ARM/2009-08-04-RegScavengerAssert-2.ll
new file mode 100644
index 0000000..65ffed2
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-04-RegScavengerAssert-2.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=armv6-elf
+; PR4528
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv6-elf"
+
+define arm_aapcscc i32 @file_read_actor(i32* nocapture %desc, i32* %page, i32 %offset, i32 %size) nounwind optsize {
+entry:
+ br i1 undef, label %fault_in_pages_writeable.exit, label %bb5.i
+
+bb5.i: ; preds = %entry
+ %asmtmp.i = tail call i32 asm sideeffect "1:\09strbt\09$1,[$2]\0A2:\0A\09.section .fixup,\22ax\22\0A\09.align\092\0A3:\09mov\09$0, $3\0A\09b\092b\0A\09.previous\0A\09.section __ex_table,\22a\22\0A\09.align\093\0A\09.long\091b, 3b\0A\09.previous", "=r,r,r,i,0,~{cc}"(i8 0, i32 undef, i32 -14, i32 0) nounwind ; <i32> [#uses=1]
+ %0 = icmp eq i32 %asmtmp.i, 0 ; <i1> [#uses=1]
+ br i1 %0, label %bb6.i, label %fault_in_pages_writeable.exit
+
+bb6.i: ; preds = %bb5.i
+ br i1 undef, label %fault_in_pages_writeable.exit, label %bb7.i
+
+bb7.i: ; preds = %bb6.i
+ unreachable
+
+fault_in_pages_writeable.exit: ; preds = %bb6.i, %bb5.i, %entry
+ br i1 undef, label %bb2, label %bb3
+
+bb2: ; preds = %fault_in_pages_writeable.exit
+ unreachable
+
+bb3: ; preds = %fault_in_pages_writeable.exit
+ %1 = tail call arm_aapcscc i32 @__copy_to_user(i8* undef, i8* undef, i32 undef) nounwind ; <i32> [#uses=0]
+ unreachable
+}
+
+declare arm_aapcscc i32 @__copy_to_user(i8*, i8*, i32)
diff --git a/test/CodeGen/ARM/2009-08-04-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-08-04-RegScavengerAssert.ll
new file mode 100644
index 0000000..9e5372a
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-04-RegScavengerAssert.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=armv6-elf
+; PR4528
+
+define arm_aapcscc i32 @file_read_actor(i32 %desc, i32 %page, i32 %offset, i32 %size) nounwind optsize {
+entry:
+ br i1 undef, label %fault_in_pages_writeable.exit, label %bb5.i
+
+bb5.i: ; preds = %entry
+ %asmtmp.i = tail call i32 asm sideeffect "1:\09strbt\09$1,[$2]\0A2:\0A\09.section .fixup,\22ax\22\0A\09.align\092\0A3:\09mov\09$0, $3\0A\09b\092b\0A\09.previous\0A\09.section __ex_table,\22a\22\0A\09.align\093\0A\09.long\091b, 3b\0A\09.previous", "=r,r,r,i,0,~{cc}"(i8 0, i32 undef, i32 -14, i32 0) nounwind ; <i32> [#uses=1]
+ br label %fault_in_pages_writeable.exit
+
+fault_in_pages_writeable.exit: ; preds = %bb5.i, %entry
+ %0 = phi i32 [ 0, %entry ], [ %asmtmp.i, %bb5.i ] ; <i32> [#uses=1]
+ %1 = icmp eq i32 %0, 0 ; <i1> [#uses=1]
+ br i1 %1, label %bb2, label %bb3
+
+bb2: ; preds = %fault_in_pages_writeable.exit
+ unreachable
+
+bb3: ; preds = %fault_in_pages_writeable.exit
+ %2 = tail call arm_aapcscc i32 @__copy_to_user(i8* undef, i8* undef, i32 undef) nounwind ; <i32> [#uses=0]
+ unreachable
+}
+
+declare arm_aapcscc i32 @__copy_to_user(i8*, i8*, i32)
diff --git a/test/CodeGen/ARM/2009-08-15-RegScavenger-EarlyClobber.ll b/test/CodeGen/ARM/2009-08-15-RegScavenger-EarlyClobber.ll
new file mode 100644
index 0000000..18d68f7
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-15-RegScavenger-EarlyClobber.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=arm
+; PR4528
+
+; Inline asm is allowed to contain operands "=&r", "0".
+
+%struct.device_dma_parameters = type { i32, i32 }
+%struct.iovec = type { i8*, i32 }
+
+define arm_aapcscc i32 @generic_segment_checks(%struct.iovec* nocapture %iov, i32* nocapture %nr_segs, i32* nocapture %count, i32 %access_flags) nounwind optsize {
+entry:
+ br label %bb8
+
+bb: ; preds = %bb8
+ br i1 undef, label %bb10, label %bb2
+
+bb2: ; preds = %bb
+ %asmtmp = tail call %struct.device_dma_parameters asm "adds $1, $2, $3; sbcccs $1, $1, $0; movcc $0, #0", "=&r,=&r,r,Ir,0,~{cc}"(i8* undef, i32 undef, i32 0) nounwind; <%struct.device_dma_parameters> [#uses=1]
+ %asmresult = extractvalue %struct.device_dma_parameters %asmtmp, 0; <i32> [#uses=1]
+ %0 = icmp eq i32 %asmresult, 0 ; <i1> [#uses=1]
+ br i1 %0, label %bb7, label %bb4
+
+bb4: ; preds = %bb2
+ br i1 undef, label %bb10, label %bb9
+
+bb7: ; preds = %bb2
+ %1 = add i32 %2, 1 ; <i32> [#uses=1]
+ br label %bb8
+
+bb8: ; preds = %bb7, %entry
+ %2 = phi i32 [ 0, %entry ], [ %1, %bb7 ] ; <i32> [#uses=3]
+ %scevgep22 = getelementptr %struct.iovec* %iov, i32 %2, i32 0; <i8**> [#uses=0]
+ %3 = load i32* %nr_segs, align 4 ; <i32> [#uses=1]
+ %4 = icmp ult i32 %2, %3 ; <i1> [#uses=1]
+ br i1 %4, label %bb, label %bb9
+
+bb9: ; preds = %bb8, %bb4
+ store i32 undef, i32* %count, align 4
+ ret i32 0
+
+bb10: ; preds = %bb4, %bb
+ ret i32 0
+}
diff --git a/test/CodeGen/ARM/2009-08-15-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-08-15-RegScavengerAssert.ll
new file mode 100644
index 0000000..a46482c
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-15-RegScavengerAssert.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=arm
+; PR4716
+
+define arm_aapcscc void @_start() nounwind naked {
+entry:
+ tail call arm_aapcscc void @exit(i32 undef) noreturn nounwind
+ unreachable
+}
+
+declare arm_aapcscc void @exit(i32) noreturn nounwind
diff --git a/test/CodeGen/ARM/2009-08-21-PostRAKill.ll b/test/CodeGen/ARM/2009-08-21-PostRAKill.ll
new file mode 100644
index 0000000..84915c4
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-21-PostRAKill.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 -post-RA-scheduler -mcpu=cortex-a8
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-apple-darwin9"
+
+%struct.tree = type { i32, double, double, %struct.tree*, %struct.tree*, %struct.tree*, %struct.tree* }
+@g = common global %struct.tree* null
+
+define arm_apcscc %struct.tree* @tsp(%struct.tree* %t, i32 %nproc) nounwind {
+entry:
+ %t.idx51.val.i = load double* null ; <double> [#uses=1]
+ br i1 undef, label %bb4.i, label %bb.i
+
+bb.i: ; preds = %entry
+ unreachable
+
+bb4.i: ; preds = %entry
+ %0 = load %struct.tree** @g, align 4 ; <%struct.tree*> [#uses=2]
+ %.idx45.i = getelementptr %struct.tree* %0, i32 0, i32 1 ; <double*> [#uses=1]
+ %.idx45.val.i = load double* %.idx45.i ; <double> [#uses=1]
+ %.idx46.i = getelementptr %struct.tree* %0, i32 0, i32 2 ; <double*> [#uses=1]
+ %.idx46.val.i = load double* %.idx46.i ; <double> [#uses=1]
+ %1 = fsub double 0.000000e+00, %.idx45.val.i ; <double> [#uses=2]
+ %2 = fmul double %1, %1 ; <double> [#uses=1]
+ %3 = fsub double %t.idx51.val.i, %.idx46.val.i ; <double> [#uses=2]
+ %4 = fmul double %3, %3 ; <double> [#uses=1]
+ %5 = fadd double %2, %4 ; <double> [#uses=1]
+ %6 = tail call double @llvm.sqrt.f64(double %5) nounwind ; <double> [#uses=1]
+ br i1 undef, label %bb7.i4, label %bb6.i
+
+bb6.i: ; preds = %bb4.i
+ br label %bb7.i4
+
+bb7.i4: ; preds = %bb6.i, %bb4.i
+ %tton1.0.i = phi double [ %6, %bb6.i ], [ undef, %bb4.i ] ; <double> [#uses=0]
+ unreachable
+}
+
+declare double @llvm.sqrt.f64(double) nounwind readonly
diff --git a/test/CodeGen/ARM/2009-08-21-PostRAKill2.ll b/test/CodeGen/ARM/2009-08-21-PostRAKill2.ll
new file mode 100644
index 0000000..a21ffc3
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-21-PostRAKill2.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -post-RA-scheduler
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-apple-darwin9"
+
+%struct.anon = type { [3 x double], double, %struct.node*, [64 x %struct.bnode*], [64 x %struct.bnode*] }
+%struct.bnode = type { i16, double, [3 x double], i32, i32, [3 x double], [3 x double], [3 x double], double, %struct.bnode*, %struct.bnode* }
+%struct.icstruct = type { [3 x i32], i16 }
+%struct.node = type { i16, double, [3 x double], i32, i32 }
+
+declare arm_apcscc double @floor(double) nounwind readnone
+
+define void @intcoord(%struct.icstruct* noalias nocapture sret %agg.result, i1 %a, double %b) {
+entry:
+ br i1 %a, label %bb3, label %bb1
+
+bb1: ; preds = %entry
+ unreachable
+
+bb3: ; preds = %entry
+ br i1 %a, label %bb7, label %bb5
+
+bb5: ; preds = %bb3
+ unreachable
+
+bb7: ; preds = %bb3
+ br i1 %a, label %bb11, label %bb9
+
+bb9: ; preds = %bb7
+ %0 = tail call arm_apcscc double @floor(double %b) nounwind readnone ; <double> [#uses=0]
+ br label %bb11
+
+bb11: ; preds = %bb9, %bb7
+ %1 = getelementptr %struct.icstruct* %agg.result, i32 0, i32 0, i32 0 ; <i32*> [#uses=1]
+ store i32 0, i32* %1
+ ret void
+}
diff --git a/test/CodeGen/ARM/2009-08-21-PostRAKill3.ll b/test/CodeGen/ARM/2009-08-21-PostRAKill3.ll
new file mode 100644
index 0000000..e3d8ea60
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-21-PostRAKill3.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -post-RA-scheduler
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-apple-darwin9"
+
+%struct.Hosp = type { i32, i32, i32, %struct.List, %struct.List, %struct.List, %struct.List }
+%struct.List = type { %struct.List*, %struct.Patient*, %struct.List* }
+%struct.Patient = type { i32, i32, i32, %struct.Village* }
+%struct.Village = type { [4 x %struct.Village*], %struct.Village*, %struct.List, %struct.Hosp, i32, i32 }
+
+define arm_apcscc %struct.Village* @alloc_tree(i32 %level, i32 %label, %struct.Village* %back, i1 %p) nounwind {
+entry:
+ br i1 %p, label %bb8, label %bb1
+
+bb1: ; preds = %entry
+ %0 = malloc %struct.Village ; <%struct.Village*> [#uses=3]
+ %exp2 = call double @ldexp(double 1.000000e+00, i32 %level) nounwind ; <double> [#uses=1]
+ %.c = fptosi double %exp2 to i32 ; <i32> [#uses=1]
+ store i32 %.c, i32* null
+ %1 = getelementptr %struct.Village* %0, i32 0, i32 3, i32 6, i32 0 ; <%struct.List**> [#uses=1]
+ store %struct.List* null, %struct.List** %1
+ %2 = getelementptr %struct.Village* %0, i32 0, i32 3, i32 6, i32 2 ; <%struct.List**> [#uses=1]
+ store %struct.List* null, %struct.List** %2
+ ret %struct.Village* %0
+
+bb8: ; preds = %entry
+ ret %struct.Village* null
+}
+
+declare double @ldexp(double, i32)
diff --git a/test/CodeGen/ARM/2009-08-21-PostRAKill4.ll b/test/CodeGen/ARM/2009-08-21-PostRAKill4.ll
new file mode 100644
index 0000000..9123377
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-21-PostRAKill4.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -post-RA-scheduler
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-apple-darwin9"
+
[email protected] = external constant [36 x i8], align 1 ; <[36 x i8]*> [#uses=0]
[email protected] = external constant [31 x i8], align 1 ; <[31 x i8]*> [#uses=1]
[email protected] = external constant [4 x i8], align 1 ; <[4 x i8]*> [#uses=1]
+
+declare arm_apcscc i32 @getUnknown(i32, ...) nounwind
+
+declare void @llvm.va_start(i8*) nounwind
+
+declare void @llvm.va_end(i8*) nounwind
+
+declare arm_apcscc i32 @printf(i8* nocapture, ...) nounwind
+
+define arm_apcscc i32 @main() nounwind {
+entry:
+ %0 = tail call arm_apcscc i32 (i8*, ...)* @printf(i8* getelementptr ([31 x i8]* @.str1, i32 0, i32 0), i32 1, i32 1, i32 1, i32 1, i32 1, i32 1) nounwind ; <i32> [#uses=0]
+ %1 = tail call arm_apcscc i32 (i8*, ...)* @printf(i8* getelementptr ([31 x i8]* @.str1, i32 0, i32 0), i32 -128, i32 116, i32 116, i32 -3852, i32 -31232, i32 -1708916736) nounwind ; <i32> [#uses=0]
+ %2 = tail call arm_apcscc i32 (i32, ...)* @getUnknown(i32 undef, i32 116, i32 116, i32 -3852, i32 -31232, i32 30556, i32 -1708916736) nounwind ; <i32> [#uses=1]
+ %3 = tail call arm_apcscc i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @.str2, i32 0, i32 0), i32 %2) nounwind ; <i32> [#uses=0]
+ ret i32 0
+}
diff --git a/test/CodeGen/ARM/2009-08-23-linkerprivate.ll b/test/CodeGen/ARM/2009-08-23-linkerprivate.ll
new file mode 100644
index 0000000..0fad533
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-23-linkerprivate.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | FileCheck %s
+
+; ModuleID = '/Volumes/MacOS9/tests/WebKit/JavaScriptCore/profiler/ProfilerServer.mm'
+
+@"\01l_objc_msgSend_fixup_alloc" = linker_private hidden global i32 0, section "__DATA, __objc_msgrefs, coalesced", align 16 ; <i32*> [#uses=0]
+
+; CHECK: .globl l_objc_msgSend_fixup_alloc
+; CHECK: .weak_definition l_objc_msgSend_fixup_alloc
diff --git a/test/CodeGen/ARM/2009-08-26-ScalarToVector.ll b/test/CodeGen/ARM/2009-08-26-ScalarToVector.ll
new file mode 100644
index 0000000..c6ef256
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-26-ScalarToVector.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mattr=+neon | not grep fldmfdd
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+%bar = type { float, float, float }
+%baz = type { i32, [16 x %bar], [16 x float], [16 x i32], i8 }
+%foo = type { <4 x float> }
+%quux = type { i32 (...)**, %baz*, i32 }
+%quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo }
+
+declare <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+define arm_apcscc void @_ZN6squish10ClusterFit9Compress3EPv(%quuz* %this, i8* %block) {
+entry:
+ %0 = lshr <4 x i32> zeroinitializer, <i32 31, i32 31, i32 31, i32 31> ; <<4 x i32>> [#uses=1]
+ %1 = shufflevector <4 x i32> %0, <4 x i32> undef, <2 x i32> <i32 2, i32 3> ; <<2 x i32>> [#uses=1]
+ %2 = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> undef, <2 x i32> %1) nounwind ; <<2 x i32>> [#uses=1]
+ %3 = extractelement <2 x i32> %2, i32 0 ; <i32> [#uses=1]
+ %not..i = icmp eq i32 %3, undef ; <i1> [#uses=1]
+ br i1 %not..i, label %return, label %bb221
+
+bb221: ; preds = %bb221, %entry
+ br label %bb221
+
+return: ; preds = %entry
+ ret void
+}
diff --git a/test/CodeGen/ARM/2009-08-27-ScalarToVector.ll b/test/CodeGen/ARM/2009-08-27-ScalarToVector.ll
new file mode 100644
index 0000000..bc5bfe9
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-27-ScalarToVector.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -mattr=+neon | not grep fldmfdd
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+%bar = type { float, float, float }
+%baz = type { i32, [16 x %bar], [16 x float], [16 x i32], i8 }
+%foo = type { <4 x float> }
+%quux = type { i32 (...)**, %baz*, i32 }
+%quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo }
+
+define arm_apcscc void @aaaa(%quuz* %this, i8* %block) {
+entry:
+ br i1 undef, label %bb.nph269, label %bb201
+
+bb.nph269: ; preds = %entry
+ br label %bb12
+
+bb12: ; preds = %bb194, %bb.nph269
+ %0 = fmul <4 x float> undef, undef ; <<4 x float>> [#uses=1]
+ %1 = shufflevector <4 x float> %0, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+ %2 = shufflevector <2 x float> %1, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+ %3 = fadd <4 x float> undef, %2 ; <<4 x float>> [#uses=1]
+ br i1 undef, label %bb194, label %bb186
+
+bb186: ; preds = %bb12
+ br label %bb194
+
+bb194: ; preds = %bb186, %bb12
+ %besterror.0.0 = phi <4 x float> [ %3, %bb186 ], [ undef, %bb12 ] ; <<4 x float>> [#uses=0]
+ %indvar.next294 = add i32 undef, 1 ; <i32> [#uses=0]
+ br label %bb12
+
+bb201: ; preds = %entry
+ ret void
+}
diff --git a/test/CodeGen/ARM/2009-08-29-ExtractEltf32.ll b/test/CodeGen/ARM/2009-08-29-ExtractEltf32.ll
new file mode 100644
index 0000000..d5178b4
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-29-ExtractEltf32.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mattr=+neon
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+define arm_apcscc void @foo() nounwind {
+entry:
+ %0 = tail call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> undef, <2 x float> undef) nounwind ; <<2 x float>> [#uses=1]
+ %tmp28 = extractelement <2 x float> %0, i32 0 ; <float> [#uses=1]
+ %1 = fcmp une float %tmp28, 4.900000e+01 ; <i1> [#uses=1]
+ br i1 %1, label %bb, label %bb7
+
+bb: ; preds = %entry
+ unreachable
+
+bb7: ; preds = %entry
+ br i1 undef, label %bb8, label %bb9
+
+bb8: ; preds = %bb7
+ unreachable
+
+bb9: ; preds = %bb7
+ ret void
+}
+
+declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/2009-08-29-TooLongSplat.ll b/test/CodeGen/ARM/2009-08-29-TooLongSplat.ll
new file mode 100644
index 0000000..266fce6
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-29-TooLongSplat.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -mattr=+neon
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+define arm_apcscc void @aaa() nounwind {
+entry:
+ %0 = fmul <4 x float> undef, <float 1.000000e+00, float 1.000000e+01, float 1.000000e+02, float 0x3EB0C6F7A0000000> ; <<4 x float>> [#uses=1]
+ %tmp31 = extractelement <4 x float> %0, i32 0 ; <float> [#uses=1]
+ %1 = fpext float %tmp31 to double ; <double> [#uses=1]
+ %2 = fsub double 1.000000e+00, %1 ; <double> [#uses=1]
+ %3 = fdiv double %2, 1.000000e+00 ; <double> [#uses=1]
+ %4 = tail call double @fabs(double %3) nounwind readnone ; <double> [#uses=1]
+ %5 = fcmp ogt double %4, 1.000000e-05 ; <i1> [#uses=1]
+ br i1 %5, label %bb, label %bb7
+
+bb: ; preds = %entry
+ unreachable
+
+bb7: ; preds = %entry
+ unreachable
+}
+
+declare double @fabs(double)
diff --git a/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll b/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
new file mode 100644
index 0000000..b6cf880
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
@@ -0,0 +1,103 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin9 -march=arm | FileCheck %s
+
+%struct.A = type { i32* }
+
+define arm_apcscc void @"\01-[MyFunction Name:]"() {
+entry:
+ %save_filt.1 = alloca i32 ; <i32*> [#uses=2]
+ %save_eptr.0 = alloca i8* ; <i8**> [#uses=2]
+ %a = alloca %struct.A ; <%struct.A*> [#uses=3]
+ %eh_exception = alloca i8* ; <i8**> [#uses=5]
+ %eh_selector = alloca i32 ; <i32*> [#uses=3]
+ %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
+ call arm_apcscc void @_ZN1AC1Ev(%struct.A* %a)
+ invoke arm_apcscc void @_Z3barv()
+ to label %invcont unwind label %lpad
+
+invcont: ; preds = %entry
+ call arm_apcscc void @_ZN1AD1Ev(%struct.A* %a) nounwind
+ br label %return
+
+bb: ; preds = %ppad
+ %eh_select = load i32* %eh_selector ; <i32> [#uses=1]
+ store i32 %eh_select, i32* %save_filt.1, align 4
+ %eh_value = load i8** %eh_exception ; <i8*> [#uses=1]
+ store i8* %eh_value, i8** %save_eptr.0, align 4
+ call arm_apcscc void @_ZN1AD1Ev(%struct.A* %a) nounwind
+ %0 = load i8** %save_eptr.0, align 4 ; <i8*> [#uses=1]
+ store i8* %0, i8** %eh_exception, align 4
+ %1 = load i32* %save_filt.1, align 4 ; <i32> [#uses=1]
+ store i32 %1, i32* %eh_selector, align 4
+ br label %Unwind
+
+return: ; preds = %invcont
+ ret void
+
+lpad: ; preds = %entry
+ %eh_ptr = call i8* @llvm.eh.exception() ; <i8*> [#uses=1]
+ store i8* %eh_ptr, i8** %eh_exception
+ %eh_ptr1 = load i8** %eh_exception ; <i8*> [#uses=1]
+ %eh_select2 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32(i8* %eh_ptr1, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i32 0) ; <i32> [#uses=1]
+ store i32 %eh_select2, i32* %eh_selector
+ br label %ppad
+
+ppad: ; preds = %lpad
+ br label %bb
+
+Unwind: ; preds = %bb
+ %eh_ptr3 = load i8** %eh_exception ; <i8*> [#uses=1]
+ call arm_apcscc void @_Unwind_SjLj_Resume(i8* %eh_ptr3)
+ unreachable
+}
+
+define linkonce_odr arm_apcscc void @_ZN1AC1Ev(%struct.A* %this) {
+entry:
+ %this_addr = alloca %struct.A* ; <%struct.A**> [#uses=2]
+ %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
+ store %struct.A* %this, %struct.A** %this_addr
+ %0 = call arm_apcscc i8* @_Znwm(i32 4) ; <i8*> [#uses=1]
+ %1 = bitcast i8* %0 to i32* ; <i32*> [#uses=1]
+ %2 = load %struct.A** %this_addr, align 4 ; <%struct.A*> [#uses=1]
+ %3 = getelementptr inbounds %struct.A* %2, i32 0, i32 0 ; <i32**> [#uses=1]
+ store i32* %1, i32** %3, align 4
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
+
+declare arm_apcscc i8* @_Znwm(i32)
+
+define linkonce_odr arm_apcscc void @_ZN1AD1Ev(%struct.A* %this) nounwind {
+entry:
+ %this_addr = alloca %struct.A* ; <%struct.A**> [#uses=2]
+ %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
+ store %struct.A* %this, %struct.A** %this_addr
+ %0 = load %struct.A** %this_addr, align 4 ; <%struct.A*> [#uses=1]
+ %1 = getelementptr inbounds %struct.A* %0, i32 0, i32 0 ; <i32**> [#uses=1]
+ %2 = load i32** %1, align 4 ; <i32*> [#uses=1]
+ %3 = bitcast i32* %2 to i8* ; <i8*> [#uses=1]
+ call arm_apcscc void @_ZdlPv(i8* %3) nounwind
+ br label %bb
+
+bb: ; preds = %entry
+ br label %return
+
+return: ; preds = %bb
+ ret void
+}
+;CHECK: L_LSDA_1:
+
+declare arm_apcscc void @_ZdlPv(i8*) nounwind
+
+declare arm_apcscc void @_Z3barv()
+
+declare i8* @llvm.eh.exception() nounwind
+
+declare i32 @llvm.eh.selector.i32(i8*, i8*, ...) nounwind
+
+declare i32 @llvm.eh.typeid.for.i32(i8*) nounwind
+
+declare arm_apcscc i32 @__gxx_personality_sj0(...)
+
+declare arm_apcscc void @_Unwind_SjLj_Resume(i8*)
diff --git a/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll b/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll
new file mode 100644
index 0000000..e1e60e6
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; pr4843
+define <4 x i16> @v2regbug(<4 x i16>* %B) nounwind {
+;CHECK: v2regbug:
+;CHECK: vzip.16
+ %tmp1 = load <4 x i16>* %B
+ %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32><i32 0, i32 0, i32 1, i32 1>
+ ret <4 x i16> %tmp2
+}
diff --git a/test/CodeGen/ARM/2009-09-01-PostRAProlog.ll b/test/CodeGen/ARM/2009-09-01-PostRAProlog.ll
new file mode 100644
index 0000000..bf91fe0
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-01-PostRAProlog.ll
@@ -0,0 +1,106 @@
+; RUN: llc -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-apple-darwin9"
+
+@history = internal global [2 x [56 x i32]] [[56 x i32] [i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0], [56 x i32] [i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0]] ; <[2 x [56 x i32]]*> [#uses=3]
+@nodes = internal global i64 0 ; <i64*> [#uses=4]
[email protected] = private constant [9 x i8] c"##-<=>+#\00", align 1 ; <[9 x i8]*> [#uses=2]
[email protected] = private constant [6 x i8] c"%c%d\0A\00", align 1 ; <[6 x i8]*> [#uses=1]
[email protected] = private constant [16 x i8] c"Fhourstones 2.0\00", align 1 ; <[16 x i8]*> [#uses=1]
[email protected] = private constant [54 x i8] c"Using %d transposition table entries with %d probes.\0A\00", align 1 ; <[54 x i8]*> [#uses=1]
[email protected] = private constant [31 x i8] c"Solving %d-ply position after \00", align 1 ; <[31 x i8]*> [#uses=1]
[email protected] = private constant [7 x i8] c" . . .\00", align 1 ; <[7 x i8]*> [#uses=1]
[email protected] = private constant [28 x i8] c"score = %d (%c) work = %d\0A\00", align 1 ; <[28 x i8]*> [#uses=1]
[email protected] = private constant [36 x i8] c"%lu pos / %lu msec = %.1f Kpos/sec\0A\00", align 1 ; <[36 x i8]*> [#uses=1]
+@plycnt = internal global i32 0 ; <i32*> [#uses=21]
+@dias = internal global [19 x i32] zeroinitializer ; <[19 x i32]*> [#uses=43]
+@columns = internal global [128 x i32] zeroinitializer ; <[128 x i32]*> [#uses=18]
+@height = internal global [128 x i32] zeroinitializer ; <[128 x i32]*> [#uses=21]
+@rows = internal global [8 x i32] zeroinitializer ; <[8 x i32]*> [#uses=20]
+@colthr = internal global [128 x i32] zeroinitializer ; <[128 x i32]*> [#uses=5]
+@moves = internal global [44 x i32] zeroinitializer ; <[44 x i32]*> [#uses=9]
[email protected] = private constant [3 x i8] c"%d\00", align 1 ; <[3 x i8]*> [#uses=1]
+@he = internal global i8* null ; <i8**> [#uses=9]
+@hits = internal global i64 0 ; <i64*> [#uses=8]
+@posed = internal global i64 0 ; <i64*> [#uses=7]
+@ht = internal global i32* null ; <i32**> [#uses=5]
[email protected] = private constant [19 x i8] c"store rate = %.3f\0A\00", align 1 ; <[19 x i8]*> [#uses=1]
[email protected] = private constant [45 x i8] c"- %5.3f < %5.3f = %5.3f > %5.3f + %5.3f\0A\00", align 1 ; <[45 x i8]*> [#uses=1]
[email protected] = private constant [6 x i8] c"%7d%c\00", align 1 ; <[6 x i8]*> [#uses=1]
[email protected] = private constant [30 x i8] c"Failed to allocate %u bytes.\0A\00", align 1 ; <[30 x i8]*> [#uses=1]
+
+declare arm_apcscc i32 @puts(i8* nocapture) nounwind
+
+declare arm_apcscc i32 @getchar() nounwind
+
+define internal arm_apcscc i32 @transpose() nounwind readonly {
+; CHECK: push
+entry:
+ %0 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 1), align 4 ; <i32> [#uses=1]
+ %1 = shl i32 %0, 7 ; <i32> [#uses=1]
+ %2 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 2), align 4 ; <i32> [#uses=1]
+ %3 = or i32 %1, %2 ; <i32> [#uses=1]
+ %4 = shl i32 %3, 7 ; <i32> [#uses=1]
+ %5 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 3), align 4 ; <i32> [#uses=1]
+ %6 = or i32 %4, %5 ; <i32> [#uses=3]
+ %7 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 7), align 4 ; <i32> [#uses=1]
+ %8 = shl i32 %7, 7 ; <i32> [#uses=1]
+ %9 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 6), align 4 ; <i32> [#uses=1]
+ %10 = or i32 %8, %9 ; <i32> [#uses=1]
+ %11 = shl i32 %10, 7 ; <i32> [#uses=1]
+ %12 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 5), align 4 ; <i32> [#uses=1]
+ %13 = or i32 %11, %12 ; <i32> [#uses=3]
+ %14 = icmp ugt i32 %6, %13 ; <i1> [#uses=2]
+ %.pn2.in.i = select i1 %14, i32 %6, i32 %13 ; <i32> [#uses=1]
+ %.pn1.in.i = select i1 %14, i32 %13, i32 %6 ; <i32> [#uses=1]
+ %.pn2.i = shl i32 %.pn2.in.i, 7 ; <i32> [#uses=1]
+ %.pn3.i = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 4) ; <i32> [#uses=1]
+ %.pn.in.in.i = or i32 %.pn2.i, %.pn3.i ; <i32> [#uses=1]
+ %.pn.in.i = zext i32 %.pn.in.in.i to i64 ; <i64> [#uses=1]
+ %.pn.i = shl i64 %.pn.in.i, 21 ; <i64> [#uses=1]
+ %.pn1.i = zext i32 %.pn1.in.i to i64 ; <i64> [#uses=1]
+ %iftmp.22.0.i = or i64 %.pn.i, %.pn1.i ; <i64> [#uses=2]
+ %15 = lshr i64 %iftmp.22.0.i, 17 ; <i64> [#uses=1]
+ %16 = trunc i64 %15 to i32 ; <i32> [#uses=2]
+ %17 = urem i64 %iftmp.22.0.i, 1050011 ; <i64> [#uses=1]
+ %18 = trunc i64 %17 to i32 ; <i32> [#uses=1]
+ %19 = urem i32 %16, 179 ; <i32> [#uses=1]
+ %20 = or i32 %19, 131072 ; <i32> [#uses=1]
+ %21 = load i32** @ht, align 4 ; <i32*> [#uses=1]
+ br label %bb5
+
+bb: ; preds = %bb5
+ %22 = getelementptr inbounds i32* %21, i32 %x.0 ; <i32*> [#uses=1]
+ %23 = load i32* %22, align 4 ; <i32> [#uses=1]
+ %24 = icmp eq i32 %23, %16 ; <i1> [#uses=1]
+ br i1 %24, label %bb1, label %bb2
+
+bb1: ; preds = %bb
+ %25 = load i8** @he, align 4 ; <i8*> [#uses=1]
+ %26 = getelementptr inbounds i8* %25, i32 %x.0 ; <i8*> [#uses=1]
+ %27 = load i8* %26, align 1 ; <i8> [#uses=1]
+ %28 = sext i8 %27 to i32 ; <i32> [#uses=1]
+ ret i32 %28
+
+bb2: ; preds = %bb
+ %29 = add nsw i32 %20, %x.0 ; <i32> [#uses=3]
+ %30 = add i32 %29, -1050011 ; <i32> [#uses=1]
+ %31 = icmp sgt i32 %29, 1050010 ; <i1> [#uses=1]
+ %. = select i1 %31, i32 %30, i32 %29 ; <i32> [#uses=1]
+ %32 = add i32 %33, 1 ; <i32> [#uses=1]
+ br label %bb5
+
+bb5: ; preds = %bb2, %entry
+ %33 = phi i32 [ 0, %entry ], [ %32, %bb2 ] ; <i32> [#uses=2]
+ %x.0 = phi i32 [ %18, %entry ], [ %., %bb2 ] ; <i32> [#uses=3]
+ %34 = icmp sgt i32 %33, 7 ; <i1> [#uses=1]
+ br i1 %34, label %bb7, label %bb
+
+bb7: ; preds = %bb5
+ ret i32 -128
+}
+
+declare arm_apcscc noalias i8* @calloc(i32, i32) nounwind
+
+declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind
diff --git a/test/CodeGen/ARM/2009-09-09-AllOnes.ll b/test/CodeGen/ARM/2009-09-09-AllOnes.ll
new file mode 100644
index 0000000..f654a16
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-09-AllOnes.ll
@@ -0,0 +1,10 @@
+; RUN: llc -mattr=+neon < %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+define arm_apcscc void @foo() {
+entry:
+ %0 = insertelement <4 x i32> undef, i32 -1, i32 3
+ store <4 x i32> %0, <4 x i32>* undef, align 16
+ unreachable
+}
diff --git a/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll b/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
new file mode 100644
index 0000000..3909c6a5
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
@@ -0,0 +1,18 @@
+; RUN: llc -O1 -march=arm -mattr=+vfp2 < %s | FileCheck %s
+; pr4939
+
+define void @test(double* %x, double* %y) nounwind {
+ %1 = load double* %x, align 4
+ %2 = load double* %y, align 4
+ %3 = fsub double -0.000000e+00, %1
+ %4 = fcmp ugt double %2, %3
+ br i1 %4, label %bb1, label %bb2
+
+bb1:
+;CHECK: vstrhi.64
+ store double %1, double* %y, align 4
+ br label %bb2
+
+bb2:
+ ret void
+}
diff --git a/test/CodeGen/ARM/2009-09-10-postdec.ll b/test/CodeGen/ARM/2009-09-10-postdec.ll
new file mode 100644
index 0000000..10653b5
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-10-postdec.ll
@@ -0,0 +1,11 @@
+; RUN: llc -march=arm < %s | FileCheck %s
+; Radar 7213850
+
+define i32 @test(i8* %d, i32 %x, i32 %y) nounwind {
+ %1 = ptrtoint i8* %d to i32
+;CHECK: sub
+ %2 = sub i32 %x, %1
+ %3 = add nsw i32 %2, %y
+ store i8 0, i8* %d, align 1
+ ret i32 %3
+}
diff --git a/test/CodeGen/ARM/2009-09-13-InvalidSubreg.ll b/test/CodeGen/ARM/2009-09-13-InvalidSubreg.ll
new file mode 100644
index 0000000..13adb24
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-13-InvalidSubreg.ll
@@ -0,0 +1,61 @@
+; RUN: llc -mattr=+neon < %s
+; PR4965
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-eabi"
+
+%struct.fr = type { [6 x %struct.pl] }
+%struct.obb = type { %"struct.m4", %"struct.p3" }
+%struct.pl = type { %"struct.p3" }
+%"struct.m4" = type { %"struct.p3", %"struct.p3", %"struct.p3", %"struct.p3" }
+%"struct.p3" = type { <4 x float> }
+
+declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+define arm_aapcs_vfpcc i8 @foo(%struct.fr* nocapture %this, %struct.obb* %box) nounwind {
+entry:
+ %val.i.i = load <4 x float>* undef ; <<4 x float>> [#uses=1]
+ %val2.i.i = load <4 x float>* null ; <<4 x float>> [#uses=1]
+ %elt3.i.i = getelementptr inbounds %struct.obb* %box, i32 0, i32 0, i32 2, i32 0 ; <<4 x float>*> [#uses=1]
+ %val4.i.i = load <4 x float>* %elt3.i.i ; <<4 x float>> [#uses=1]
+ %0 = shufflevector <2 x float> undef, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+ %1 = fadd <4 x float> undef, zeroinitializer ; <<4 x float>> [#uses=1]
+ br label %bb33
+
+bb: ; preds = %bb33
+ %2 = fmul <4 x float> %val.i.i, undef ; <<4 x float>> [#uses=1]
+ %3 = fmul <4 x float> %val2.i.i, undef ; <<4 x float>> [#uses=1]
+ %4 = fadd <4 x float> %3, %2 ; <<4 x float>> [#uses=1]
+ %5 = fmul <4 x float> %val4.i.i, undef ; <<4 x float>> [#uses=1]
+ %6 = fadd <4 x float> %5, %4 ; <<4 x float>> [#uses=1]
+ %7 = bitcast <4 x float> %6 to <4 x i32> ; <<4 x i32>> [#uses=1]
+ %8 = and <4 x i32> %7, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648> ; <<4 x i32>> [#uses=1]
+ %9 = or <4 x i32> %8, undef ; <<4 x i32>> [#uses=1]
+ %10 = bitcast <4 x i32> %9 to <4 x float> ; <<4 x float>> [#uses=1]
+ %11 = shufflevector <4 x float> %10, <4 x float> undef, <2 x i32> <i32 0, i32 1> ; <<2 x float>> [#uses=1]
+ %12 = shufflevector <2 x float> %11, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+ %13 = fmul <4 x float> undef, %12 ; <<4 x float>> [#uses=1]
+ %14 = fmul <4 x float> %0, undef ; <<4 x float>> [#uses=1]
+ %15 = fadd <4 x float> %14, %13 ; <<4 x float>> [#uses=1]
+ %16 = fadd <4 x float> undef, %15 ; <<4 x float>> [#uses=1]
+ %17 = fadd <4 x float> %1, %16 ; <<4 x float>> [#uses=1]
+ %18 = fmul <4 x float> zeroinitializer, %17 ; <<4 x float>> [#uses=1]
+ %19 = insertelement <4 x float> %18, float 0.000000e+00, i32 3 ; <<4 x float>> [#uses=2]
+ %20 = shufflevector <4 x float> %19, <4 x float> undef, <2 x i32> <i32 0, i32 1> ; <<2 x float>> [#uses=1]
+ %21 = shufflevector <4 x float> %19, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+ %22 = tail call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %20, <2 x float> %21) nounwind ; <<2 x float>> [#uses=2]
+ %23 = tail call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %22, <2 x float> %22) nounwind ; <<2 x float>> [#uses=2]
+ %24 = shufflevector <2 x float> %23, <2 x float> %23, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+ %25 = fadd <4 x float> %24, zeroinitializer ; <<4 x float>> [#uses=1]
+ %tmp46 = extractelement <4 x float> %25, i32 0 ; <float> [#uses=1]
+ %26 = fcmp olt float %tmp46, 0.000000e+00 ; <i1> [#uses=1]
+ br i1 %26, label %bb41, label %bb33
+
+bb33: ; preds = %bb, %entry
+ br i1 undef, label %bb34, label %bb
+
+bb34: ; preds = %bb33
+ ret i8 undef
+
+bb41: ; preds = %bb
+ ret i8 1
+}
diff --git a/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll b/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll
new file mode 100644
index 0000000..758b59a
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=arm -mattr=+neon -mcpu=cortex-a9
+
+define arm_aapcs_vfpcc <4 x float> @foo(i8* nocapture %pBuffer, i32 %numItems) nounwind {
+ %1 = ptrtoint i8* %pBuffer to i32
+
+ %lsr.iv2641 = inttoptr i32 %1 to float*
+ %tmp29 = add i32 %1, 4
+ %tmp2930 = inttoptr i32 %tmp29 to float*
+ %tmp31 = add i32 %1, 8
+ %tmp3132 = inttoptr i32 %tmp31 to float*
+ %tmp33 = add i32 %1, 12
+ %tmp3334 = inttoptr i32 %tmp33 to float*
+ %tmp35 = add i32 %1, 16
+ %tmp3536 = inttoptr i32 %tmp35 to float*
+ %tmp37 = add i32 %1, 20
+ %tmp3738 = inttoptr i32 %tmp37 to float*
+ %tmp39 = add i32 %1, 24
+ %tmp3940 = inttoptr i32 %tmp39 to float*
+ %2 = load float* %lsr.iv2641, align 4
+ %3 = load float* %tmp2930, align 4
+ %4 = load float* %tmp3132, align 4
+ %5 = load float* %tmp3334, align 4
+ %6 = load float* %tmp3536, align 4
+ %7 = load float* %tmp3738, align 4
+ %8 = load float* %tmp3940, align 4
+ %9 = insertelement <4 x float> undef, float %6, i32 0
+ %10 = shufflevector <4 x float> %9, <4 x float> undef, <4 x i32> zeroinitializer
+ %11 = insertelement <4 x float> %10, float %7, i32 1
+ %12 = insertelement <4 x float> %11, float %8, i32 2
+ %13 = insertelement <4 x float> undef, float %2, i32 0
+ %14 = shufflevector <4 x float> %13, <4 x float> undef, <4 x i32> zeroinitializer
+ %15 = insertelement <4 x float> %14, float %3, i32 1
+ %16 = insertelement <4 x float> %15, float %4, i32 2
+ %17 = insertelement <4 x float> %16, float %5, i32 3
+ %18 = fsub <4 x float> zeroinitializer, %12
+ %19 = shufflevector <4 x float> %18, <4 x float> undef, <4 x i32> zeroinitializer
+ %20 = shufflevector <4 x float> %17, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+ %21 = shufflevector <2 x float> %20, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+
+ ret <4 x float> %21
+}
diff --git a/test/CodeGen/ARM/2009-09-20-LiveIntervalsBug.ll b/test/CodeGen/ARM/2009-09-20-LiveIntervalsBug.ll
new file mode 100644
index 0000000..980f8ce
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-20-LiveIntervalsBug.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=arm-eabi -mattr=+neon -mcpu=cortex-a9
+
+; PR4986
+
+define arm_aapcs_vfpcc void @foo(i8* nocapture %pBuffer, i32 %numItems) nounwind {
+entry:
+ br i1 undef, label %return, label %bb.preheader
+
+bb.preheader: ; preds = %entry
+ br label %bb
+
+bb: ; preds = %bb, %bb.preheader
+ %0 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+ %1 = insertelement <4 x float> %0, float undef, i32 1 ; <<4 x float>> [#uses=1]
+ %2 = insertelement <4 x float> %1, float undef, i32 2 ; <<4 x float>> [#uses=1]
+ %3 = insertelement <4 x float> %2, float undef, i32 3 ; <<4 x float>> [#uses=1]
+ %4 = fmul <4 x float> undef, %3 ; <<4 x float>> [#uses=1]
+ %5 = extractelement <4 x float> %4, i32 3 ; <float> [#uses=1]
+ store float %5, float* undef, align 4
+ br i1 undef, label %return, label %bb
+
+return: ; preds = %bb, %entry
+ ret void
+}
+
+define arm_aapcs_vfpcc <4 x float> @bar(i8* nocapture %pBuffer, i32 %numItems) nounwind {
+ %1 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+ %2 = insertelement <4 x float> %1, float undef, i32 1 ; <<4 x float>> [#uses=1]
+ %3 = insertelement <4 x float> %2, float undef, i32 2 ; <<4 x float>> [#uses=1]
+ %4 = insertelement <4 x float> %3, float undef, i32 3 ; <<4 x float>> [#uses=1]
+ %5 = shufflevector <4 x float> %4, <4 x float> undef, <2 x i32> <i32 0, i32 1> ; <<2 x float>> [#uses=1]
+ %6 = shufflevector <2 x float> %5, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>> [#uses=1]
+ ret <4 x float> %6
+}
diff --git a/test/CodeGen/ARM/2009-09-21-LiveVariablesBug.ll b/test/CodeGen/ARM/2009-09-21-LiveVariablesBug.ll
new file mode 100644
index 0000000..aace475
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-21-LiveVariablesBug.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mattr=+neon
+
+; PR5024
+
+%bar = type { <4 x float> }
+%foo = type { %bar, %bar, %bar, %bar }
+
+declare arm_aapcs_vfpcc <4 x float> @bbb(%bar*) nounwind
+
+define arm_aapcs_vfpcc void @aaa(%foo* noalias sret %agg.result, %foo* %tfrm) nounwind {
+entry:
+ %0 = call arm_aapcs_vfpcc <4 x float> @bbb(%bar* undef) nounwind ; <<4 x float>> [#uses=0]
+ ret void
+}
diff --git a/test/CodeGen/ARM/2009-09-22-LiveVariablesBug.ll b/test/CodeGen/ARM/2009-09-22-LiveVariablesBug.ll
new file mode 100644
index 0000000..30931a2
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-22-LiveVariablesBug.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mattr=+neon
+
+; PR5024
+
+%bar = type { %foo, %foo }
+%foo = type { <4 x float> }
+
+declare arm_aapcs_vfpcc float @aaa(%foo* nocapture) nounwind readonly
+
+declare arm_aapcs_vfpcc %bar* @bbb(%bar*, <4 x float>, <4 x float>) nounwind
+
+define arm_aapcs_vfpcc void @ccc(i8* nocapture %pBuffer, i32 %numItems) nounwind {
+entry:
+ br i1 undef, label %return, label %bb.nph
+
+bb.nph: ; preds = %entry
+ %0 = call arm_aapcs_vfpcc %bar* @bbb(%bar* undef, <4 x float> undef, <4 x float> undef) nounwind ; <%bar*> [#uses=0]
+ %1 = call arm_aapcs_vfpcc float @aaa(%foo* undef) nounwind ; <float> [#uses=0]
+ unreachable
+
+return: ; preds = %entry
+ ret void
+}
diff --git a/test/CodeGen/ARM/2009-09-23-LiveVariablesBug.ll b/test/CodeGen/ARM/2009-09-23-LiveVariablesBug.ll
new file mode 100644
index 0000000..2ff479b
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-23-LiveVariablesBug.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mattr=+neon
+
+; PR5024
+
+%struct.1 = type { %struct.4, %struct.4 }
+%struct.4 = type { <4 x float> }
+
+define arm_aapcs_vfpcc %struct.1* @hhh3(%struct.1* %this, <4 x float> %lenation.0, <4 x float> %legalation.0) nounwind {
+entry:
+ %0 = call arm_aapcs_vfpcc %struct.4* @sss1(%struct.4* undef, float 0.000000e+00) nounwind ; <%struct.4*> [#uses=0]
+ %1 = call arm_aapcs_vfpcc %struct.4* @qqq1(%struct.4* null, float 5.000000e-01) nounwind ; <%struct.4*> [#uses=0]
+ %val92 = load <4 x float>* null ; <<4 x float>> [#uses=1]
+ %2 = call arm_aapcs_vfpcc %struct.4* @zzz2(%struct.4* undef, <4 x float> %val92) nounwind ; <%struct.4*> [#uses=0]
+ ret %struct.1* %this
+}
+
+declare arm_aapcs_vfpcc %struct.4* @qqq1(%struct.4*, float) nounwind
+
+declare arm_aapcs_vfpcc %struct.4* @sss1(%struct.4*, float) nounwind
+
+declare arm_aapcs_vfpcc %struct.4* @zzz2(%struct.4*, <4 x float>) nounwind
diff --git a/test/CodeGen/ARM/2009-09-24-spill-align.ll b/test/CodeGen/ARM/2009-09-24-spill-align.ll
new file mode 100644
index 0000000..5476d5f
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-24-spill-align.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; pr4926
+
+define arm_apcscc void @test_vget_lanep16() nounwind {
+entry:
+ %arg0_poly16x4_t = alloca <4 x i16> ; <<4 x i16>*> [#uses=1]
+ %out_poly16_t = alloca i16 ; <i16*> [#uses=1]
+ %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
+; CHECK: vldr.64
+ %0 = load <4 x i16>* %arg0_poly16x4_t, align 8 ; <<4 x i16>> [#uses=1]
+ %1 = extractelement <4 x i16> %0, i32 1 ; <i16> [#uses=1]
+ store i16 %1, i16* %out_poly16_t, align 2
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
diff --git a/test/CodeGen/ARM/2009-09-27-CoalescerBug.ll b/test/CodeGen/ARM/2009-09-27-CoalescerBug.ll
new file mode 100644
index 0000000..ea2693a
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-27-CoalescerBug.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=armv7-eabi -mcpu=cortex-a8
+; PR5055
+
+module asm ".globl\09__aeabi_f2lz"
+module asm ".set\09__aeabi_f2lz, __fixsfdi"
+module asm ""
+
+define arm_aapcs_vfpcc i64 @__fixsfdi(float %a) nounwind {
+entry:
+ %0 = fcmp olt float %a, 0.000000e+00 ; <i1> [#uses=1]
+ br i1 %0, label %bb, label %bb1
+
+bb: ; preds = %entry
+ %1 = fsub float -0.000000e+00, %a ; <float> [#uses=1]
+ %2 = tail call arm_aapcs_vfpcc i64 @__fixunssfdi(float %1) nounwind ; <i64> [#uses=1]
+ %3 = sub i64 0, %2 ; <i64> [#uses=1]
+ ret i64 %3
+
+bb1: ; preds = %entry
+ %4 = tail call arm_aapcs_vfpcc i64 @__fixunssfdi(float %a) nounwind ; <i64> [#uses=1]
+ ret i64 %4
+}
+
+declare arm_aapcs_vfpcc i64 @__fixunssfdi(float)
diff --git a/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll b/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll
new file mode 100644
index 0000000..53bd668
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=armv5-unknown-linux-gnueabi -mcpu=arm10tdmi | FileCheck %s
+; PR4687
+
+%0 = type { double, double }
+
+define arm_aapcscc void @foo(%0* noalias nocapture sret %agg.result, double %x.0, double %y.0) nounwind {
+; CHECK: foo:
+; CHECK: bl __adddf3
+; CHECK-NOT: strd
+; CHECK: mov
+ %x76 = fmul double %y.0, 0.000000e+00 ; <double> [#uses=1]
+ %x77 = fadd double %y.0, 0.000000e+00 ; <double> [#uses=1]
+ %tmpr = fadd double %x.0, %x76 ; <double> [#uses=1]
+ %agg.result.0 = getelementptr %0* %agg.result, i32 0, i32 0 ; <double*> [#uses=1]
+ store double %tmpr, double* %agg.result.0, align 8
+ %agg.result.1 = getelementptr %0* %agg.result, i32 0, i32 1 ; <double*> [#uses=1]
+ store double %x77, double* %agg.result.1, align 8
+ ret void
+}
diff --git a/test/CodeGen/ARM/2009-10-02-NEONSubregsBug.ll b/test/CodeGen/ARM/2009-10-02-NEONSubregsBug.ll
new file mode 100644
index 0000000..465368b
--- /dev/null
+++ b/test/CodeGen/ARM/2009-10-02-NEONSubregsBug.ll
@@ -0,0 +1,63 @@
+; RUN: llc -mtriple=armv7-eabi -mcpu=cortex-a8 -enable-unsafe-fp-math < %s
+; PR5367
+
+define arm_aapcs_vfpcc void @_Z27Benchmark_SceDualQuaternionPvm(i8* nocapture %pBuffer, i32 %numItems) nounwind {
+entry:
+ br i1 undef, label %return, label %bb
+
+bb: ; preds = %bb, %entry
+ %0 = load float* undef, align 4 ; <float> [#uses=1]
+ %1 = load float* null, align 4 ; <float> [#uses=1]
+ %2 = insertelement <4 x float> undef, float undef, i32 1 ; <<4 x float>> [#uses=1]
+ %3 = insertelement <4 x float> %2, float %1, i32 2 ; <<4 x float>> [#uses=2]
+ %4 = insertelement <4 x float> undef, float %0, i32 2 ; <<4 x float>> [#uses=1]
+ %5 = insertelement <4 x float> %4, float 0.000000e+00, i32 3 ; <<4 x float>> [#uses=4]
+ %6 = fsub <4 x float> zeroinitializer, %3 ; <<4 x float>> [#uses=1]
+ %7 = shufflevector <4 x float> %6, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=2]
+ %8 = shufflevector <4 x float> %5, <4 x float> undef, <2 x i32> <i32 0, i32 1> ; <<2 x float>> [#uses=1]
+ %9 = shufflevector <2 x float> %8, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>> [#uses=2]
+ %10 = fmul <4 x float> %7, %9 ; <<4 x float>> [#uses=1]
+ %11 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+ %12 = shufflevector <4 x float> %5, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=2]
+ %13 = shufflevector <2 x float> %12, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+ %14 = fmul <4 x float> %11, %13 ; <<4 x float>> [#uses=1]
+ %15 = fadd <4 x float> %10, %14 ; <<4 x float>> [#uses=1]
+ %16 = shufflevector <2 x float> %12, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>> [#uses=1]
+ %17 = fadd <4 x float> %15, zeroinitializer ; <<4 x float>> [#uses=1]
+ %18 = shufflevector <4 x float> %17, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef> ; <<4 x float>> [#uses=1]
+ %19 = fmul <4 x float> %7, %16 ; <<4 x float>> [#uses=1]
+ %20 = fadd <4 x float> %19, zeroinitializer ; <<4 x float>> [#uses=1]
+ %21 = shufflevector <4 x float> %3, <4 x float> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef> ; <<4 x float>> [#uses=1]
+ %22 = shufflevector <4 x float> %21, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+ %23 = fmul <4 x float> %22, %9 ; <<4 x float>> [#uses=1]
+ %24 = fadd <4 x float> %20, %23 ; <<4 x float>> [#uses=1]
+ %25 = shufflevector <4 x float> %18, <4 x float> %24, <4 x i32> <i32 0, i32 1, i32 6, i32 undef> ; <<4 x float>> [#uses=1]
+ %26 = shufflevector <4 x float> %25, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 7> ; <<4 x float>> [#uses=1]
+ %27 = fmul <4 x float> %26, <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01> ; <<4 x float>> [#uses=1]
+ %28 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %5 ; <<4 x float>> [#uses=1]
+ %29 = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1]
+ %30 = fmul <4 x float> zeroinitializer, %29 ; <<4 x float>> [#uses=1]
+ %31 = fmul <4 x float> %30, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> ; <<4 x float>> [#uses=1]
+ %32 = shufflevector <4 x float> %27, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+ %33 = shufflevector <4 x float> %28, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+ %34 = shufflevector <2 x float> %33, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>> [#uses=1]
+ %35 = fmul <4 x float> %32, %34 ; <<4 x float>> [#uses=1]
+ %36 = fadd <4 x float> %35, zeroinitializer ; <<4 x float>> [#uses=1]
+ %37 = shufflevector <4 x float> %5, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> ; <<4 x float>> [#uses=1]
+ %38 = shufflevector <4 x float> %37, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+ %39 = fmul <4 x float> zeroinitializer, %38 ; <<4 x float>> [#uses=1]
+ %40 = fadd <4 x float> %36, %39 ; <<4 x float>> [#uses=1]
+ %41 = fadd <4 x float> %40, zeroinitializer ; <<4 x float>> [#uses=1]
+ %42 = shufflevector <4 x float> undef, <4 x float> %41, <4 x i32> <i32 0, i32 1, i32 6, i32 3> ; <<4 x float>> [#uses=1]
+ %43 = fmul <4 x float> %42, %31 ; <<4 x float>> [#uses=1]
+ store float undef, float* undef, align 4
+ store float 0.000000e+00, float* null, align 4
+ %44 = extractelement <4 x float> %43, i32 1 ; <float> [#uses=1]
+ store float %44, float* undef, align 4
+ br i1 undef, label %return, label %bb
+
+return: ; preds = %bb, %entry
+ ret void
+}
+
+declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/2009-10-21-InvalidFNeg.ll b/test/CodeGen/ARM/2009-10-21-InvalidFNeg.ll
new file mode 100644
index 0000000..0f021d2
--- /dev/null
+++ b/test/CodeGen/ARM/2009-10-21-InvalidFNeg.ll
@@ -0,0 +1,48 @@
+; RUN: llc -mcpu=cortex-a8 -mattr=+neon < %s | grep vneg
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-eabi"
+
+%aaa = type { %fff, %fff }
+%bbb = type { [6 x %ddd] }
+%ccc = type { %eee, %fff }
+%ddd = type { %fff }
+%eee = type { %fff, %fff, %fff, %fff }
+%fff = type { %struct.vec_float4 }
+%struct.vec_float4 = type { <4 x float> }
+
+define linkonce_odr arm_aapcs_vfpcc void @foo(%eee* noalias sret %agg.result, i64 %tfrm.0.0, i64 %tfrm.0.1, i64 %tfrm.0.2, i64 %tfrm.0.3, i64 %tfrm.0.4, i64 %tfrm.0.5, i64 %tfrm.0.6, i64 %tfrm.0.7) nounwind noinline {
+entry:
+ %tmp104 = zext i64 %tfrm.0.2 to i512 ; <i512> [#uses=1]
+ %tmp105 = shl i512 %tmp104, 128 ; <i512> [#uses=1]
+ %tmp118 = zext i64 %tfrm.0.3 to i512 ; <i512> [#uses=1]
+ %tmp119 = shl i512 %tmp118, 192 ; <i512> [#uses=1]
+ %ins121 = or i512 %tmp119, %tmp105 ; <i512> [#uses=1]
+ %tmp99 = zext i64 %tfrm.0.4 to i512 ; <i512> [#uses=1]
+ %tmp100 = shl i512 %tmp99, 256 ; <i512> [#uses=1]
+ %tmp123 = zext i64 %tfrm.0.5 to i512 ; <i512> [#uses=1]
+ %tmp124 = shl i512 %tmp123, 320 ; <i512> [#uses=1]
+ %tmp96 = zext i64 %tfrm.0.6 to i512 ; <i512> [#uses=1]
+ %tmp97 = shl i512 %tmp96, 384 ; <i512> [#uses=1]
+ %tmp128 = zext i64 %tfrm.0.7 to i512 ; <i512> [#uses=1]
+ %tmp129 = shl i512 %tmp128, 448 ; <i512> [#uses=1]
+ %mask.masked = or i512 %tmp124, %tmp100 ; <i512> [#uses=1]
+ %ins131 = or i512 %tmp129, %tmp97 ; <i512> [#uses=1]
+ %tmp109132 = zext i64 %tfrm.0.0 to i128 ; <i128> [#uses=1]
+ %tmp113134 = zext i64 %tfrm.0.1 to i128 ; <i128> [#uses=1]
+ %tmp114133 = shl i128 %tmp113134, 64 ; <i128> [#uses=1]
+ %tmp94 = or i128 %tmp114133, %tmp109132 ; <i128> [#uses=1]
+ %tmp95 = bitcast i128 %tmp94 to <4 x float> ; <<4 x float>> [#uses=0]
+ %tmp82 = lshr i512 %ins121, 128 ; <i512> [#uses=1]
+ %tmp83 = trunc i512 %tmp82 to i128 ; <i128> [#uses=1]
+ %tmp84 = bitcast i128 %tmp83 to <4 x float> ; <<4 x float>> [#uses=0]
+ %tmp86 = lshr i512 %mask.masked, 256 ; <i512> [#uses=1]
+ %tmp87 = trunc i512 %tmp86 to i128 ; <i128> [#uses=1]
+ %tmp88 = bitcast i128 %tmp87 to <4 x float> ; <<4 x float>> [#uses=0]
+ %tmp90 = lshr i512 %ins131, 384 ; <i512> [#uses=1]
+ %tmp91 = trunc i512 %tmp90 to i128 ; <i128> [#uses=1]
+ %tmp92 = bitcast i128 %tmp91 to <4 x float> ; <<4 x float>> [#uses=1]
+ %tmp = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %tmp92 ; <<4 x float>> [#uses=1]
+ %tmp28 = getelementptr inbounds %eee* %agg.result, i32 0, i32 3, i32 0, i32 0 ; <<4 x float>*> [#uses=1]
+ store <4 x float> %tmp, <4 x float>* %tmp28, align 16
+ ret void
+}
diff --git a/test/CodeGen/ARM/2009-10-27-double-align.ll b/test/CodeGen/ARM/2009-10-27-double-align.ll
new file mode 100644
index 0000000..a4e7685
--- /dev/null
+++ b/test/CodeGen/ARM/2009-10-27-double-align.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s
+
[email protected] = private constant [1 x i8] zeroinitializer, align 1
+
+define arm_aapcscc void @g() {
+entry:
+;CHECK: [sp, #+8]
+;CHECK: [sp, #+12]
+;CHECK: [sp]
+ tail call arm_aapcscc void (i8*, ...)* @f(i8* getelementptr ([1 x i8]* @.str, i32 0, i32 0), i32 1, double 2.000000e+00, i32 3, double 4.000000e+00)
+ ret void
+}
+
+declare arm_aapcscc void @f(i8*, ...)
diff --git a/test/CodeGen/ARM/2009-10-30.ll b/test/CodeGen/ARM/2009-10-30.ll
new file mode 100644
index 0000000..90a5bd2
--- /dev/null
+++ b/test/CodeGen/ARM/2009-10-30.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s
+; This test checks that the address of the varg arguments is correctly
+; computed when there are 5 or more regular arguments.
+
+define void @f(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, ...) {
+entry:
+;CHECK: sub sp, sp, #4
+;CHECK: add r{{[0-9]+}}, sp, #8
+;CHECK: str r{{[0-9]+}}, [sp], #+4
+;CHECK: bx lr
+ %ap = alloca i8*, align 4
+ %ap1 = bitcast i8** %ap to i8*
+ call void @llvm.va_start(i8* %ap1)
+ ret void
+}
+
+declare void @llvm.va_start(i8*) nounwind
diff --git a/test/CodeGen/ARM/2009-11-01-NeonMoves.ll b/test/CodeGen/ARM/2009-11-01-NeonMoves.ll
new file mode 100644
index 0000000..62f3786
--- /dev/null
+++ b/test/CodeGen/ARM/2009-11-01-NeonMoves.ll
@@ -0,0 +1,40 @@
+; RUN: llc -mcpu=cortex-a8 < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-eabi"
+
+%foo = type { <4 x float> }
+
+define arm_aapcs_vfpcc void @bar(%foo* noalias sret %agg.result, <4 x float> %quat.0) nounwind {
+entry:
+ %quat_addr = alloca %foo, align 16 ; <%foo*> [#uses=2]
+ %0 = getelementptr inbounds %foo* %quat_addr, i32 0, i32 0 ; <<4 x float>*> [#uses=1]
+ store <4 x float> %quat.0, <4 x float>* %0
+ %1 = call arm_aapcs_vfpcc <4 x float> @quux(%foo* %quat_addr) nounwind ; <<4 x float>> [#uses=3]
+;CHECK: vmov.f32
+;CHECK: vmov.f32
+ %2 = fmul <4 x float> %1, %1 ; <<4 x float>> [#uses=2]
+ %3 = shufflevector <4 x float> %2, <4 x float> undef, <2 x i32> <i32 0, i32 1> ; <<2 x float>> [#uses=1]
+ %4 = shufflevector <4 x float> %2, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+ %5 = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %3, <2 x float> %4) nounwind ; <<2 x float>> [#uses=2]
+ %6 = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %5, <2 x float> %5) nounwind ; <<2 x float>> [#uses=2]
+ %7 = shufflevector <2 x float> %6, <2 x float> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=2]
+;CHECK: vmov
+ %8 = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %7) nounwind ; <<4 x float>> [#uses=3]
+ %9 = fmul <4 x float> %8, %8 ; <<4 x float>> [#uses=1]
+ %10 = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %9, <4 x float> %7) nounwind ; <<4 x float>> [#uses=1]
+ %11 = fmul <4 x float> %10, %8 ; <<4 x float>> [#uses=1]
+ %12 = fmul <4 x float> %11, %1 ; <<4 x float>> [#uses=1]
+ %13 = call arm_aapcs_vfpcc %foo* @baz(%foo* %agg.result, <4 x float> %12) nounwind ; <%foo*> [#uses=0]
+ ret void
+}
+
+declare arm_aapcs_vfpcc %foo* @baz(%foo*, <4 x float>) nounwind
+
+declare arm_aapcs_vfpcc <4 x float> @quux(%foo* nocapture) nounwind readonly
+
+declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/2009-11-02-NegativeLane.ll b/test/CodeGen/ARM/2009-11-02-NegativeLane.ll
new file mode 100644
index 0000000..f2288c3
--- /dev/null
+++ b/test/CodeGen/ARM/2009-11-02-NegativeLane.ll
@@ -0,0 +1,20 @@
+; RUN: llc -mcpu=cortex-a8 < %s | grep vdup.32
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-eabi"
+
+define arm_aapcs_vfpcc void @foo(i8* nocapture %pBuffer, i32 %numItems) nounwind {
+entry:
+ br i1 undef, label %return, label %bb
+
+bb: ; preds = %bb, %entry
+ %0 = load float* undef, align 4 ; <float> [#uses=1]
+ %1 = insertelement <4 x float> undef, float %0, i32 2 ; <<4 x float>> [#uses=1]
+ %2 = insertelement <4 x float> %1, float undef, i32 3 ; <<4 x float>> [#uses=1]
+ %3 = fmul <4 x float> undef, %2 ; <<4 x float>> [#uses=1]
+ %4 = extractelement <4 x float> %3, i32 1 ; <float> [#uses=1]
+ store float %4, float* undef, align 4
+ br i1 undef, label %return, label %bb
+
+return: ; preds = %bb, %entry
+ ret void
+}
diff --git a/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll b/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
new file mode 100644
index 0000000..7aae3ac
--- /dev/null
+++ b/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
@@ -0,0 +1,66 @@
+; RUN: llc -mcpu=cortex-a8 < %s | FileCheck %s
+; PR5423
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-eabi"
+
+define arm_aapcs_vfpcc void @foo() {
+entry:
+ %0 = load float* null, align 4 ; <float> [#uses=2]
+ %1 = fmul float %0, undef ; <float> [#uses=2]
+ %2 = fmul float 0.000000e+00, %1 ; <float> [#uses=2]
+ %3 = fmul float %0, %1 ; <float> [#uses=1]
+ %4 = fadd float 0.000000e+00, %3 ; <float> [#uses=1]
+ %5 = fsub float 1.000000e+00, %4 ; <float> [#uses=1]
+; CHECK: foo:
+; CHECK: vmov.f32 s{{[0-9]+}}, #1.000000e+00
+ %6 = fsub float 1.000000e+00, undef ; <float> [#uses=2]
+ %7 = fsub float %2, undef ; <float> [#uses=1]
+ %8 = fsub float 0.000000e+00, undef ; <float> [#uses=3]
+ %9 = fadd float %2, undef ; <float> [#uses=3]
+ %10 = load float* undef, align 8 ; <float> [#uses=3]
+ %11 = fmul float %8, %10 ; <float> [#uses=1]
+ %12 = fadd float undef, %11 ; <float> [#uses=2]
+ %13 = fmul float undef, undef ; <float> [#uses=1]
+ %14 = fmul float %6, 0.000000e+00 ; <float> [#uses=1]
+ %15 = fadd float %13, %14 ; <float> [#uses=1]
+ %16 = fmul float %9, %10 ; <float> [#uses=1]
+ %17 = fadd float %15, %16 ; <float> [#uses=2]
+ %18 = fmul float 0.000000e+00, undef ; <float> [#uses=1]
+ %19 = fadd float %18, 0.000000e+00 ; <float> [#uses=1]
+ %20 = fmul float undef, %10 ; <float> [#uses=1]
+ %21 = fadd float %19, %20 ; <float> [#uses=1]
+ %22 = load float* undef, align 8 ; <float> [#uses=1]
+ %23 = fmul float %5, %22 ; <float> [#uses=1]
+ %24 = fadd float %23, undef ; <float> [#uses=1]
+ %25 = load float* undef, align 8 ; <float> [#uses=2]
+ %26 = fmul float %8, %25 ; <float> [#uses=1]
+ %27 = fadd float %24, %26 ; <float> [#uses=1]
+ %28 = fmul float %9, %25 ; <float> [#uses=1]
+ %29 = fadd float undef, %28 ; <float> [#uses=1]
+ %30 = fmul float %8, undef ; <float> [#uses=1]
+ %31 = fadd float undef, %30 ; <float> [#uses=1]
+ %32 = fmul float %6, undef ; <float> [#uses=1]
+ %33 = fadd float undef, %32 ; <float> [#uses=1]
+ %34 = fmul float %9, undef ; <float> [#uses=1]
+ %35 = fadd float %33, %34 ; <float> [#uses=1]
+ %36 = fmul float 0.000000e+00, undef ; <float> [#uses=1]
+ %37 = fmul float %7, undef ; <float> [#uses=1]
+ %38 = fadd float %36, %37 ; <float> [#uses=1]
+ %39 = fmul float undef, undef ; <float> [#uses=1]
+ %40 = fadd float %38, %39 ; <float> [#uses=1]
+ store float %12, float* undef, align 8
+ store float %17, float* undef, align 4
+ store float %21, float* undef, align 8
+ store float %27, float* undef, align 8
+ store float %29, float* undef, align 4
+ store float %31, float* undef, align 8
+ store float %40, float* undef, align 8
+ store float %12, float* null, align 8
+ %41 = fmul float %17, undef ; <float> [#uses=1]
+ %42 = fadd float %41, undef ; <float> [#uses=1]
+ %43 = fmul float %35, undef ; <float> [#uses=1]
+ %44 = fadd float %42, %43 ; <float> [#uses=1]
+ store float %44, float* null, align 4
+ unreachable
+}
diff --git a/test/CodeGen/ARM/2009-11-13-CoalescerCrash.ll b/test/CodeGen/ARM/2009-11-13-CoalescerCrash.ll
new file mode 100644
index 0000000..efc4be1
--- /dev/null
+++ b/test/CodeGen/ARM/2009-11-13-CoalescerCrash.ll
@@ -0,0 +1,20 @@
+; RUN: llc -mtriple=armv7-eabi -mcpu=cortex-a8 < %s
+; PR5410
+
+%0 = type { float, float, float, float }
+%pln = type { %vec, float }
+%vec = type { [4 x float] }
+
+define arm_aapcs_vfpcc float @aaa(%vec* nocapture %ustart, %vec* nocapture %udir, %vec* nocapture %vstart, %vec* nocapture %vdir, %vec* %upoint, %vec* %vpoint) {
+entry:
+ br i1 undef, label %bb81, label %bb48
+
+bb48: ; preds = %entry
+ %0 = call arm_aapcs_vfpcc %0 @bbb(%pln* undef, %vec* %vstart, %vec* undef) nounwind ; <%0> [#uses=0]
+ ret float 0.000000e+00
+
+bb81: ; preds = %entry
+ ret float 0.000000e+00
+}
+
+declare arm_aapcs_vfpcc %0 @bbb(%pln* nocapture, %vec* nocapture, %vec* nocapture) nounwind
diff --git a/test/CodeGen/ARM/2009-11-13-ScavengerAssert.ll b/test/CodeGen/ARM/2009-11-13-ScavengerAssert.ll
new file mode 100644
index 0000000..6cce02d
--- /dev/null
+++ b/test/CodeGen/ARM/2009-11-13-ScavengerAssert.ll
@@ -0,0 +1,42 @@
+; RUN: llc -mtriple=armv7-eabi -mcpu=cortex-a8 < %s
+; PR5411
+
+%bar = type { %quad, float, float, [3 x %quux*], [3 x %bar*], [2 x %bar*], [3 x i8], i8 }
+%baz = type { %bar*, i32 }
+%foo = type { i8, %quuz, %quad, float, [64 x %quux], [128 x %bar], i32, %baz, %baz }
+%quad = type { [4 x float] }
+%quux = type { %quad, %quad }
+%quuz = type { [4 x %quux*], [4 x float], i32 }
+
+define arm_aapcs_vfpcc %bar* @aaa(%foo* nocapture %this, %quux* %a, %quux* %b, %quux* %c, i8 zeroext %forced) {
+entry:
+ br i1 undef, label %bb85, label %bb
+
+bb: ; preds = %entry
+ %0 = getelementptr inbounds %bar* null, i32 0, i32 0, i32 0, i32 2 ; <float*> [#uses=2]
+ %1 = load float* undef, align 4 ; <float> [#uses=1]
+ %2 = fsub float 0.000000e+00, undef ; <float> [#uses=2]
+ %3 = fmul float 0.000000e+00, undef ; <float> [#uses=1]
+ %4 = load float* %0, align 4 ; <float> [#uses=3]
+ %5 = fmul float %4, %2 ; <float> [#uses=1]
+ %6 = fsub float %3, %5 ; <float> [#uses=1]
+ %7 = fmul float %4, undef ; <float> [#uses=1]
+ %8 = fsub float %7, undef ; <float> [#uses=1]
+ %9 = fmul float undef, %2 ; <float> [#uses=1]
+ %10 = fmul float 0.000000e+00, undef ; <float> [#uses=1]
+ %11 = fsub float %9, %10 ; <float> [#uses=1]
+ %12 = fmul float undef, %6 ; <float> [#uses=1]
+ %13 = fmul float 0.000000e+00, %8 ; <float> [#uses=1]
+ %14 = fadd float %12, %13 ; <float> [#uses=1]
+ %15 = fmul float %1, %11 ; <float> [#uses=1]
+ %16 = fadd float %14, %15 ; <float> [#uses=1]
+ %17 = select i1 undef, float undef, float %16 ; <float> [#uses=1]
+ %18 = fdiv float %17, 0.000000e+00 ; <float> [#uses=1]
+ store float %18, float* undef, align 4
+ %19 = fmul float %4, undef ; <float> [#uses=1]
+ store float %19, float* %0, align 4
+ ret %bar* null
+
+bb85: ; preds = %entry
+ ret %bar* null
+}
diff --git a/test/CodeGen/ARM/2009-11-13-ScavengerAssert2.ll b/test/CodeGen/ARM/2009-11-13-ScavengerAssert2.ll
new file mode 100644
index 0000000..3ff6631
--- /dev/null
+++ b/test/CodeGen/ARM/2009-11-13-ScavengerAssert2.ll
@@ -0,0 +1,123 @@
+; RUN: llc -mtriple=armv7-eabi -mcpu=cortex-a8 < %s
+; PR5412
+
+%bar = type { %quad, float, float, [3 x %quuz*], [3 x %bar*], [2 x %bar*], [3 x i8], i8 }
+%baz = type { %bar*, i32 }
+%foo = type { i8, %quux, %quad, float, [64 x %quuz], [128 x %bar], i32, %baz, %baz }
+%quad = type { [4 x float] }
+%quux = type { [4 x %quuz*], [4 x float], i32 }
+%quuz = type { %quad, %quad }
+
+define arm_aapcs_vfpcc %bar* @aaa(%foo* nocapture %this, %quuz* %a, %quuz* %b, %quuz* %c, i8 zeroext %forced) {
+entry:
+ br i1 undef, label %bb85, label %bb
+
+bb: ; preds = %entry
+ br i1 undef, label %bb3.i, label %bb2.i
+
+bb2.i: ; preds = %bb
+ br label %bb3.i
+
+bb3.i: ; preds = %bb2.i, %bb
+ %0 = getelementptr inbounds %quuz* %a, i32 0, i32 1, i32 0, i32 0 ; <float*> [#uses=0]
+ %1 = fsub float 0.000000e+00, undef ; <float> [#uses=1]
+ %2 = getelementptr inbounds %quuz* %b, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=2]
+ %3 = load float* %2, align 4 ; <float> [#uses=1]
+ %4 = getelementptr inbounds %quuz* %a, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=1]
+ %5 = fsub float %3, undef ; <float> [#uses=2]
+ %6 = getelementptr inbounds %quuz* %b, i32 0, i32 1, i32 0, i32 2 ; <float*> [#uses=2]
+ %7 = load float* %6, align 4 ; <float> [#uses=1]
+ %8 = fsub float %7, undef ; <float> [#uses=1]
+ %9 = getelementptr inbounds %quuz* %c, i32 0, i32 1, i32 0, i32 0 ; <float*> [#uses=2]
+ %10 = load float* %9, align 4 ; <float> [#uses=1]
+ %11 = fsub float %10, undef ; <float> [#uses=2]
+ %12 = getelementptr inbounds %quuz* %c, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=2]
+ %13 = load float* %12, align 4 ; <float> [#uses=1]
+ %14 = fsub float %13, undef ; <float> [#uses=1]
+ %15 = load float* undef, align 4 ; <float> [#uses=1]
+ %16 = fsub float %15, undef ; <float> [#uses=1]
+ %17 = fmul float %5, %16 ; <float> [#uses=1]
+ %18 = fsub float %17, 0.000000e+00 ; <float> [#uses=5]
+ %19 = fmul float %8, %11 ; <float> [#uses=1]
+ %20 = fsub float %19, undef ; <float> [#uses=3]
+ %21 = fmul float %1, %14 ; <float> [#uses=1]
+ %22 = fmul float %5, %11 ; <float> [#uses=1]
+ %23 = fsub float %21, %22 ; <float> [#uses=2]
+ store float %18, float* undef
+ %24 = getelementptr inbounds %bar* null, i32 0, i32 0, i32 0, i32 1 ; <float*> [#uses=2]
+ store float %20, float* %24
+ store float %23, float* undef
+ %25 = getelementptr inbounds %bar* null, i32 0, i32 0, i32 0, i32 3 ; <float*> [#uses=0]
+ %26 = fmul float %18, %18 ; <float> [#uses=1]
+ %27 = fadd float %26, undef ; <float> [#uses=1]
+ %28 = fadd float %27, undef ; <float> [#uses=1]
+ %29 = call arm_aapcs_vfpcc float @sqrtf(float %28) readnone ; <float> [#uses=1]
+ %30 = load float* null, align 4 ; <float> [#uses=2]
+ %31 = load float* %4, align 4 ; <float> [#uses=2]
+ %32 = load float* %2, align 4 ; <float> [#uses=2]
+ %33 = load float* null, align 4 ; <float> [#uses=3]
+ %34 = load float* %6, align 4 ; <float> [#uses=2]
+ %35 = fsub float %33, %34 ; <float> [#uses=2]
+ %36 = fmul float %20, %35 ; <float> [#uses=1]
+ %37 = fsub float %36, undef ; <float> [#uses=1]
+ %38 = fmul float %23, 0.000000e+00 ; <float> [#uses=1]
+ %39 = fmul float %18, %35 ; <float> [#uses=1]
+ %40 = fsub float %38, %39 ; <float> [#uses=1]
+ %41 = fmul float %18, 0.000000e+00 ; <float> [#uses=1]
+ %42 = fmul float %20, 0.000000e+00 ; <float> [#uses=1]
+ %43 = fsub float %41, %42 ; <float> [#uses=1]
+ %44 = fmul float 0.000000e+00, %37 ; <float> [#uses=1]
+ %45 = fmul float %31, %40 ; <float> [#uses=1]
+ %46 = fadd float %44, %45 ; <float> [#uses=1]
+ %47 = fmul float %33, %43 ; <float> [#uses=1]
+ %48 = fadd float %46, %47 ; <float> [#uses=2]
+ %49 = load float* %9, align 4 ; <float> [#uses=2]
+ %50 = fsub float %30, %49 ; <float> [#uses=1]
+ %51 = load float* %12, align 4 ; <float> [#uses=3]
+ %52 = fsub float %32, %51 ; <float> [#uses=2]
+ %53 = load float* undef, align 4 ; <float> [#uses=2]
+ %54 = load float* %24, align 4 ; <float> [#uses=2]
+ %55 = fmul float %54, undef ; <float> [#uses=1]
+ %56 = fmul float undef, %52 ; <float> [#uses=1]
+ %57 = fsub float %55, %56 ; <float> [#uses=1]
+ %58 = fmul float undef, %52 ; <float> [#uses=1]
+ %59 = fmul float %54, %50 ; <float> [#uses=1]
+ %60 = fsub float %58, %59 ; <float> [#uses=1]
+ %61 = fmul float %30, %57 ; <float> [#uses=1]
+ %62 = fmul float %32, 0.000000e+00 ; <float> [#uses=1]
+ %63 = fadd float %61, %62 ; <float> [#uses=1]
+ %64 = fmul float %34, %60 ; <float> [#uses=1]
+ %65 = fadd float %63, %64 ; <float> [#uses=2]
+ %66 = fcmp olt float %48, %65 ; <i1> [#uses=1]
+ %67 = fsub float %49, 0.000000e+00 ; <float> [#uses=1]
+ %68 = fsub float %51, %31 ; <float> [#uses=1]
+ %69 = fsub float %53, %33 ; <float> [#uses=1]
+ %70 = fmul float undef, %67 ; <float> [#uses=1]
+ %71 = load float* undef, align 4 ; <float> [#uses=2]
+ %72 = fmul float %71, %69 ; <float> [#uses=1]
+ %73 = fsub float %70, %72 ; <float> [#uses=1]
+ %74 = fmul float %71, %68 ; <float> [#uses=1]
+ %75 = fsub float %74, 0.000000e+00 ; <float> [#uses=1]
+ %76 = fmul float %51, %73 ; <float> [#uses=1]
+ %77 = fadd float undef, %76 ; <float> [#uses=1]
+ %78 = fmul float %53, %75 ; <float> [#uses=1]
+ %79 = fadd float %77, %78 ; <float> [#uses=1]
+ %80 = select i1 %66, float %48, float %65 ; <float> [#uses=1]
+ %81 = select i1 undef, float %80, float %79 ; <float> [#uses=1]
+ %iftmp.164.0 = select i1 undef, float %29, float 1.000000e+00 ; <float> [#uses=1]
+ %82 = fdiv float %81, %iftmp.164.0 ; <float> [#uses=1]
+ %iftmp.165.0 = select i1 undef, float %82, float 0.000000e+00 ; <float> [#uses=1]
+ store float %iftmp.165.0, float* undef, align 4
+ br i1 false, label %bb4.i97, label %ccc.exit98
+
+bb4.i97: ; preds = %bb3.i
+ br label %ccc.exit98
+
+ccc.exit98: ; preds = %bb4.i97, %bb3.i
+ ret %bar* null
+
+bb85: ; preds = %entry
+ ret %bar* null
+}
+
+declare arm_aapcs_vfpcc float @sqrtf(float) readnone
diff --git a/test/CodeGen/ARM/2009-11-13-VRRewriterCrash.ll b/test/CodeGen/ARM/2009-11-13-VRRewriterCrash.ll
new file mode 100644
index 0000000..832ff4f
--- /dev/null
+++ b/test/CodeGen/ARM/2009-11-13-VRRewriterCrash.ll
@@ -0,0 +1,113 @@
+; RUN: llc -mtriple=armv7-eabi -mcpu=cortex-a8 < %s
+; PR5412
+; rdar://7384107
+
+%bar = type { %quad, float, float, [3 x %quuz*], [3 x %bar*], [2 x %bar*], [3 x i8], i8 }
+%baz = type { %bar*, i32 }
+%foo = type { i8, %quux, %quad, float, [64 x %quuz], [128 x %bar], i32, %baz, %baz }
+%quad = type { [4 x float] }
+%quux = type { [4 x %quuz*], [4 x float], i32 }
+%quuz = type { %quad, %quad }
+
+define arm_aapcs_vfpcc %bar* @aaa(%foo* nocapture %this, %quuz* %a, %quuz* %b, %quuz* %c, i8 zeroext %forced) {
+entry:
+ %0 = load %bar** undef, align 4 ; <%bar*> [#uses=2]
+ br i1 false, label %bb85, label %bb
+
+bb: ; preds = %entry
+ br i1 undef, label %bb3.i, label %bb2.i
+
+bb2.i: ; preds = %bb
+ br label %bb3.i
+
+bb3.i: ; preds = %bb2.i, %bb
+ %1 = getelementptr inbounds %quuz* %a, i32 0, i32 1, i32 0, i32 0 ; <float*> [#uses=1]
+ %2 = fsub float 0.000000e+00, undef ; <float> [#uses=1]
+ %3 = getelementptr inbounds %quuz* %b, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=1]
+ %4 = getelementptr inbounds %quuz* %b, i32 0, i32 1, i32 0, i32 2 ; <float*> [#uses=1]
+ %5 = fsub float 0.000000e+00, undef ; <float> [#uses=1]
+ %6 = getelementptr inbounds %quuz* %c, i32 0, i32 1, i32 0, i32 0 ; <float*> [#uses=1]
+ %7 = getelementptr inbounds %quuz* %c, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=1]
+ %8 = fsub float undef, undef ; <float> [#uses=1]
+ %9 = fmul float 0.000000e+00, %8 ; <float> [#uses=1]
+ %10 = fmul float %5, 0.000000e+00 ; <float> [#uses=1]
+ %11 = fsub float %9, %10 ; <float> [#uses=3]
+ %12 = fmul float %2, 0.000000e+00 ; <float> [#uses=1]
+ %13 = fmul float 0.000000e+00, undef ; <float> [#uses=1]
+ %14 = fsub float %12, %13 ; <float> [#uses=2]
+ store float %14, float* undef
+ %15 = getelementptr inbounds %bar* %0, i32 0, i32 0, i32 0, i32 3 ; <float*> [#uses=1]
+ store float 0.000000e+00, float* %15
+ %16 = fmul float %11, %11 ; <float> [#uses=1]
+ %17 = fadd float %16, 0.000000e+00 ; <float> [#uses=1]
+ %18 = fadd float %17, undef ; <float> [#uses=1]
+ %19 = call arm_aapcs_vfpcc float @sqrtf(float %18) readnone ; <float> [#uses=2]
+ %20 = fcmp ogt float %19, 0x3F1A36E2E0000000 ; <i1> [#uses=1]
+ %21 = load float* %1, align 4 ; <float> [#uses=2]
+ %22 = load float* %3, align 4 ; <float> [#uses=2]
+ %23 = load float* undef, align 4 ; <float> [#uses=2]
+ %24 = load float* %4, align 4 ; <float> [#uses=2]
+ %25 = fsub float %23, %24 ; <float> [#uses=2]
+ %26 = fmul float 0.000000e+00, %25 ; <float> [#uses=1]
+ %27 = fsub float %26, undef ; <float> [#uses=1]
+ %28 = fmul float %14, 0.000000e+00 ; <float> [#uses=1]
+ %29 = fmul float %11, %25 ; <float> [#uses=1]
+ %30 = fsub float %28, %29 ; <float> [#uses=1]
+ %31 = fsub float undef, 0.000000e+00 ; <float> [#uses=1]
+ %32 = fmul float %21, %27 ; <float> [#uses=1]
+ %33 = fmul float undef, %30 ; <float> [#uses=1]
+ %34 = fadd float %32, %33 ; <float> [#uses=1]
+ %35 = fmul float %23, %31 ; <float> [#uses=1]
+ %36 = fadd float %34, %35 ; <float> [#uses=1]
+ %37 = load float* %6, align 4 ; <float> [#uses=2]
+ %38 = load float* %7, align 4 ; <float> [#uses=2]
+ %39 = fsub float %22, %38 ; <float> [#uses=2]
+ %40 = load float* undef, align 4 ; <float> [#uses=1]
+ %41 = load float* null, align 4 ; <float> [#uses=2]
+ %42 = fmul float %41, undef ; <float> [#uses=1]
+ %43 = fmul float undef, %39 ; <float> [#uses=1]
+ %44 = fsub float %42, %43 ; <float> [#uses=1]
+ %45 = fmul float undef, %39 ; <float> [#uses=1]
+ %46 = fmul float %41, 0.000000e+00 ; <float> [#uses=1]
+ %47 = fsub float %45, %46 ; <float> [#uses=1]
+ %48 = fmul float 0.000000e+00, %44 ; <float> [#uses=1]
+ %49 = fmul float %22, undef ; <float> [#uses=1]
+ %50 = fadd float %48, %49 ; <float> [#uses=1]
+ %51 = fmul float %24, %47 ; <float> [#uses=1]
+ %52 = fadd float %50, %51 ; <float> [#uses=1]
+ %53 = fsub float %37, %21 ; <float> [#uses=2]
+ %54 = fmul float undef, undef ; <float> [#uses=1]
+ %55 = fmul float undef, undef ; <float> [#uses=1]
+ %56 = fsub float %54, %55 ; <float> [#uses=1]
+ %57 = fmul float undef, %53 ; <float> [#uses=1]
+ %58 = load float* undef, align 4 ; <float> [#uses=2]
+ %59 = fmul float %58, undef ; <float> [#uses=1]
+ %60 = fsub float %57, %59 ; <float> [#uses=1]
+ %61 = fmul float %58, undef ; <float> [#uses=1]
+ %62 = fmul float undef, %53 ; <float> [#uses=1]
+ %63 = fsub float %61, %62 ; <float> [#uses=1]
+ %64 = fmul float %37, %56 ; <float> [#uses=1]
+ %65 = fmul float %38, %60 ; <float> [#uses=1]
+ %66 = fadd float %64, %65 ; <float> [#uses=1]
+ %67 = fmul float %40, %63 ; <float> [#uses=1]
+ %68 = fadd float %66, %67 ; <float> [#uses=1]
+ %69 = select i1 undef, float %36, float %52 ; <float> [#uses=1]
+ %70 = select i1 undef, float %69, float %68 ; <float> [#uses=1]
+ %iftmp.164.0 = select i1 %20, float %19, float 1.000000e+00 ; <float> [#uses=1]
+ %71 = fdiv float %70, %iftmp.164.0 ; <float> [#uses=1]
+ store float %71, float* null, align 4
+ %72 = icmp eq %bar* null, %0 ; <i1> [#uses=1]
+ br i1 %72, label %bb4.i97, label %ccc.exit98
+
+bb4.i97: ; preds = %bb3.i
+ %73 = load %bar** undef, align 4 ; <%bar*> [#uses=0]
+ br label %ccc.exit98
+
+ccc.exit98: ; preds = %bb4.i97, %bb3.i
+ ret %bar* null
+
+bb85: ; preds = %entry
+ ret %bar* null
+}
+
+declare arm_aapcs_vfpcc float @sqrtf(float) readnone
diff --git a/test/CodeGen/ARM/2009-11-30-LiveVariablesBug.ll b/test/CodeGen/ARM/2009-11-30-LiveVariablesBug.ll
new file mode 100644
index 0000000..efe74cf
--- /dev/null
+++ b/test/CodeGen/ARM/2009-11-30-LiveVariablesBug.ll
@@ -0,0 +1,41 @@
+; RUN: llc -mtriple=armv7-eabi -mcpu=cortex-a8 < %s
+; PR5614
+
+%"als" = type { i32 (...)** }
+%"av" = type { %"als" }
+%"c" = type { %"lsm", %"Vec3", %"av"*, float, i8, float, %"lsm", i8, %"Vec3", %"Vec3", %"Vec3", float, float, float, %"Vec3", %"Vec3" }
+%"lsm" = type { %"als", %"Vec3", %"Vec3", %"Vec3", %"Vec3" }
+%"Vec3" = type { float, float, float }
+
+define arm_aapcs_vfpcc void @foo(%"c"* %this, %"Vec3"* nocapture %adjustment) {
+entry:
+ switch i32 undef, label %return [
+ i32 1, label %bb
+ i32 2, label %bb72
+ i32 3, label %bb31
+ i32 4, label %bb79
+ i32 5, label %bb104
+ ]
+
+bb: ; preds = %entry
+ ret void
+
+bb31: ; preds = %entry
+ %0 = call arm_aapcs_vfpcc %"Vec3" undef(%"lsm"* undef) ; <%"Vec3"> [#uses=1]
+ %mrv_gr69 = extractvalue %"Vec3" %0, 1 ; <float> [#uses=1]
+ %1 = fsub float %mrv_gr69, undef ; <float> [#uses=1]
+ store float %1, float* undef, align 4
+ ret void
+
+bb72: ; preds = %entry
+ ret void
+
+bb79: ; preds = %entry
+ ret void
+
+bb104: ; preds = %entry
+ ret void
+
+return: ; preds = %entry
+ ret void
+}
diff --git a/test/CodeGen/ARM/2009-12-02-vtrn-undef.ll b/test/CodeGen/ARM/2009-12-02-vtrn-undef.ll
new file mode 100644
index 0000000..a737591
--- /dev/null
+++ b/test/CodeGen/ARM/2009-12-02-vtrn-undef.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mcpu=cortex-a8 < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "armv7-apple-darwin10"
+
+%struct.int16x8_t = type { <8 x i16> }
+%struct.int16x8x2_t = type { [2 x %struct.int16x8_t] }
+
+define arm_apcscc void @t(%struct.int16x8x2_t* noalias nocapture sret %agg.result, <8 x i16> %tmp.0, %struct.int16x8x2_t* nocapture %dst) nounwind {
+entry:
+;CHECK: vtrn.16
+ %0 = shufflevector <8 x i16> %tmp.0, <8 x i16> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
+ %1 = shufflevector <8 x i16> %tmp.0, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
+ %agg.result1218.0 = getelementptr %struct.int16x8x2_t* %agg.result, i32 0, i32 0, i32 0, i32 0 ; <<8 x i16>*>
+ store <8 x i16> %0, <8 x i16>* %agg.result1218.0, align 16
+ %agg.result12.1.0 = getelementptr %struct.int16x8x2_t* %agg.result, i32 0, i32 0, i32 1, i32 0 ; <<8 x i16>*>
+ store <8 x i16> %1, <8 x i16>* %agg.result12.1.0, align 16
+ ret void
+}
diff --git a/test/CodeGen/ARM/addrmode.ll b/test/CodeGen/ARM/addrmode.ll
new file mode 100644
index 0000000..9ccff07
--- /dev/null
+++ b/test/CodeGen/ARM/addrmode.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=arm -stats |& grep asm-printer | grep 4
+
+define i32 @t1(i32 %a) {
+ %b = mul i32 %a, 9
+ %c = inttoptr i32 %b to i32*
+ %d = load i32* %c
+ ret i32 %d
+}
+
+define i32 @t2(i32 %a) {
+ %b = mul i32 %a, -7
+ %c = inttoptr i32 %b to i32*
+ %d = load i32* %c
+ ret i32 %d
+}
diff --git a/test/CodeGen/ARM/aliases.ll b/test/CodeGen/ARM/aliases.ll
new file mode 100644
index 0000000..31c5007
--- /dev/null
+++ b/test/CodeGen/ARM/aliases.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -o %t
+; RUN: grep { = } %t | count 5
+; RUN: grep globl %t | count 4
+; RUN: grep weak %t | count 1
+
+@bar = external global i32
+@foo1 = alias i32* @bar
+@foo2 = alias i32* @bar
+
+%FunTy = type i32()
+
+declare i32 @foo_f()
+@bar_f = alias weak %FunTy* @foo_f
+
+@bar_i = alias internal i32* @bar
+
+@A = alias bitcast (i32* @bar to i64*)
+
+define i32 @test() {
+entry:
+ %tmp = load i32* @foo1
+ %tmp1 = load i32* @foo2
+ %tmp0 = load i32* @bar_i
+ %tmp2 = call i32 @foo_f()
+ %tmp3 = add i32 %tmp, %tmp2
+ %tmp4 = call %FunTy* @bar_f()
+ %tmp5 = add i32 %tmp3, %tmp4
+ %tmp6 = add i32 %tmp1, %tmp5
+ %tmp7 = add i32 %tmp6, %tmp0
+ ret i32 %tmp7
+}
diff --git a/test/CodeGen/ARM/align.ll b/test/CodeGen/ARM/align.ll
new file mode 100644
index 0000000..d4d01288
--- /dev/null
+++ b/test/CodeGen/ARM/align.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=ELF
+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=DARWIN
+
+@a = global i1 true
+; no alignment
+
+@b = global i8 1
+; no alignment
+
+@c = global i16 2
+;ELF: .align 1
+;ELF: c:
+;DARWIN: .align 1
+;DARWIN: _c:
+
+@d = global i32 3
+;ELF: .align 2
+;ELF: d:
+;DARWIN: .align 2
+;DARWIN: _d:
+
+@e = global i64 4
+;ELF: .align 3
+;ELF: e
+;DARWIN: .align 2
+;DARWIN: _e:
+
+@f = global float 5.0
+;ELF: .align 2
+;ELF: f:
+;DARWIN: .align 2
+;DARWIN: _f:
+
+@g = global double 6.0
+;ELF: .align 3
+;ELF: g:
+;DARWIN: .align 2
+;DARWIN: _g:
+
+@bar = common global [75 x i8] zeroinitializer, align 128
+;ELF: .comm bar,75,128
+;DARWIN: .comm _bar,75,7
diff --git a/test/CodeGen/ARM/alloca.ll b/test/CodeGen/ARM/alloca.ll
new file mode 100644
index 0000000..82a8c98
--- /dev/null
+++ b/test/CodeGen/ARM/alloca.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnu | FileCheck %s
+
+define void @f(i32 %a) {
+entry:
+; CHECK: mov r11, sp
+ %tmp = alloca i8, i32 %a ; <i8*> [#uses=1]
+ call void @g( i8* %tmp, i32 %a, i32 1, i32 2, i32 3 )
+ ret void
+; CHECK: mov sp, r11
+}
+
+declare void @g(i8*, i32, i32, i32, i32)
diff --git a/test/CodeGen/ARM/argaddr.ll b/test/CodeGen/ARM/argaddr.ll
new file mode 100644
index 0000000..116a32f
--- /dev/null
+++ b/test/CodeGen/ARM/argaddr.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=arm
+
+define void @f(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
+entry:
+ %a_addr = alloca i32 ; <i32*> [#uses=2]
+ %b_addr = alloca i32 ; <i32*> [#uses=2]
+ %c_addr = alloca i32 ; <i32*> [#uses=2]
+ %d_addr = alloca i32 ; <i32*> [#uses=2]
+ %e_addr = alloca i32 ; <i32*> [#uses=2]
+ store i32 %a, i32* %a_addr
+ store i32 %b, i32* %b_addr
+ store i32 %c, i32* %c_addr
+ store i32 %d, i32* %d_addr
+ store i32 %e, i32* %e_addr
+ call void @g( i32* %a_addr, i32* %b_addr, i32* %c_addr, i32* %d_addr, i32* %e_addr )
+ ret void
+}
+
+declare void @g(i32*, i32*, i32*, i32*, i32*)
diff --git a/test/CodeGen/ARM/arguments-nosplit-double.ll b/test/CodeGen/ARM/arguments-nosplit-double.ll
new file mode 100644
index 0000000..770e41d
--- /dev/null
+++ b/test/CodeGen/ARM/arguments-nosplit-double.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | not grep r3
+; PR4059
+
+define i32 @f(i64 %z, i32 %a, double %b) {
+ %tmp = call i32 @g(double %b)
+ ret i32 %tmp
+}
+
+declare i32 @g(double)
diff --git a/test/CodeGen/ARM/arguments-nosplit-i64.ll b/test/CodeGen/ARM/arguments-nosplit-i64.ll
new file mode 100644
index 0000000..815edfd
--- /dev/null
+++ b/test/CodeGen/ARM/arguments-nosplit-i64.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | not grep r3
+; PR4058
+
+define i32 @f(i64 %z, i32 %a, i64 %b) {
+ %tmp = call i32 @g(i64 %b)
+ ret i32 %tmp
+}
+
+declare i32 @g(i64)
diff --git a/test/CodeGen/ARM/arguments.ll b/test/CodeGen/ARM/arguments.ll
new file mode 100644
index 0000000..cc71839
--- /dev/null
+++ b/test/CodeGen/ARM/arguments.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=ELF
+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=DARWIN
+
+define i32 @f(i32 %a, i64 %b) {
+; ELF: mov r0, r2
+; DARWIN: mov r0, r1
+ %tmp = call i32 @g(i64 %b)
+ ret i32 %tmp
+}
+
+declare i32 @g(i64)
diff --git a/test/CodeGen/ARM/arguments2.ll b/test/CodeGen/ARM/arguments2.ll
new file mode 100644
index 0000000..a515ad7
--- /dev/null
+++ b/test/CodeGen/ARM/arguments2.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+define i32 @f(i32 %a, i128 %b) {
+ %tmp = call i32 @g(i128 %b)
+ ret i32 %tmp
+}
+
+declare i32 @g(i128)
diff --git a/test/CodeGen/ARM/arguments3.ll b/test/CodeGen/ARM/arguments3.ll
new file mode 100644
index 0000000..58f64c6
--- /dev/null
+++ b/test/CodeGen/ARM/arguments3.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+define i64 @f(i32 %a, i128 %b) {
+ %tmp = call i64 @g(i128 %b)
+ ret i64 %tmp
+}
+
+declare i64 @g(i128)
diff --git a/test/CodeGen/ARM/arguments4.ll b/test/CodeGen/ARM/arguments4.ll
new file mode 100644
index 0000000..f5f4207
--- /dev/null
+++ b/test/CodeGen/ARM/arguments4.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+define float @f(i32 %a, i128 %b) {
+ %tmp = call float @g(i128 %b)
+ ret float %tmp
+}
+
+declare float @g(i128)
diff --git a/test/CodeGen/ARM/arguments5.ll b/test/CodeGen/ARM/arguments5.ll
new file mode 100644
index 0000000..388a8eb
--- /dev/null
+++ b/test/CodeGen/ARM/arguments5.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+define double @f(i32 %a, i128 %b) {
+ %tmp = call double @g(i128 %b)
+ ret double %tmp
+}
+
+declare double @g(i128)
diff --git a/test/CodeGen/ARM/arguments6.ll b/test/CodeGen/ARM/arguments6.ll
new file mode 100644
index 0000000..3f757fe
--- /dev/null
+++ b/test/CodeGen/ARM/arguments6.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+define i128 @f(i32 %a, i128 %b) {
+ %tmp = call i128 @g(i128 %b)
+ ret i128 %tmp
+}
+
+declare i128 @g(i128)
diff --git a/test/CodeGen/ARM/arguments7.ll b/test/CodeGen/ARM/arguments7.ll
new file mode 100644
index 0000000..038e417
--- /dev/null
+++ b/test/CodeGen/ARM/arguments7.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+define double @f(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, double %b) {
+ %tmp = call double @g(i32 %a2, i32 %a3, i32 %a4, i32 %a5, double %b)
+ ret double %tmp
+}
+
+declare double @g(double)
diff --git a/test/CodeGen/ARM/arguments8.ll b/test/CodeGen/ARM/arguments8.ll
new file mode 100644
index 0000000..6999a4d
--- /dev/null
+++ b/test/CodeGen/ARM/arguments8.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+define i64 @f(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i64 %b) {
+ %tmp = call i64 @g(i32 %a2, i32 %a3, i32 %a4, i32 %a5, i64 %b)
+ ret i64 %tmp
+}
+
+declare i64 @g(i64)
diff --git a/test/CodeGen/ARM/arguments_f64_backfill.ll b/test/CodeGen/ARM/arguments_f64_backfill.ll
new file mode 100644
index 0000000..062133e
--- /dev/null
+++ b/test/CodeGen/ARM/arguments_f64_backfill.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -mattr=+vfp2 -float-abi=hard | FileCheck %s
+
+define float @f(float %z, double %a, float %b) {
+; CHECK: vmov.f32 s0, s1
+ %tmp = call float @g(float %b)
+ ret float %tmp
+}
+
+declare float @g(float)
diff --git a/test/CodeGen/ARM/arm-asm.ll b/test/CodeGen/ARM/arm-asm.ll
new file mode 100644
index 0000000..2e35e39
--- /dev/null
+++ b/test/CodeGen/ARM/arm-asm.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=arm
+
+define void @frame_dummy() {
+entry:
+ %tmp1 = tail call void (i8*)* (void (i8*)*)* asm "", "=r,0,~{dirflag},~{fpsr},~{flags}"( void (i8*)* null ) ; <void (i8*)*> [#uses=0]
+ ret void
+}
diff --git a/test/CodeGen/ARM/arm-frameaddr.ll b/test/CodeGen/ARM/arm-frameaddr.ll
new file mode 100644
index 0000000..2739860
--- /dev/null
+++ b/test/CodeGen/ARM/arm-frameaddr.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin | grep mov | grep r7
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | grep mov | grep r11
+; PR4344
+; PR4416
+
+define arm_aapcscc i8* @t() nounwind {
+entry:
+ %0 = call i8* @llvm.frameaddress(i32 0)
+ ret i8* %0
+}
+
+declare i8* @llvm.frameaddress(i32) nounwind readnone
diff --git a/test/CodeGen/ARM/arm-negative-stride.ll b/test/CodeGen/ARM/arm-negative-stride.ll
new file mode 100644
index 0000000..72ec8ef
--- /dev/null
+++ b/test/CodeGen/ARM/arm-negative-stride.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+
+define void @test(i32* %P, i32 %A, i32 %i) nounwind {
+entry:
+; CHECK: str r1, [{{r.*}}, -{{r.*}}, lsl #2]
+ icmp eq i32 %i, 0 ; <i1>:0 [#uses=1]
+ br i1 %0, label %return, label %bb
+
+bb: ; preds = %bb, %entry
+ %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
+ %i_addr.09.0 = sub i32 %i, %indvar ; <i32> [#uses=1]
+ %tmp2 = getelementptr i32* %P, i32 %i_addr.09.0 ; <i32*> [#uses=1]
+ store i32 %A, i32* %tmp2
+ %indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2]
+ icmp eq i32 %indvar.next, %i ; <i1>:1 [#uses=1]
+ br i1 %1, label %return, label %bb
+
+return: ; preds = %bb, %entry
+ ret void
+}
+
diff --git a/test/CodeGen/ARM/bfc.ll b/test/CodeGen/ARM/bfc.ll
new file mode 100644
index 0000000..c4a44b4
--- /dev/null
+++ b/test/CodeGen/ARM/bfc.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=arm -mattr=+v6t2 | FileCheck %s
+
+; 4278190095 = 0xff00000f
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: bfc
+ %tmp = and i32 %a, 4278190095
+ ret i32 %tmp
+}
+
+; 4286578688 = 0xff800000
+define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: bfc
+ %tmp = and i32 %a, 4286578688
+ ret i32 %tmp
+}
+
+; 4095 = 0x00000fff
+define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: bfc
+ %tmp = and i32 %a, 4095
+ ret i32 %tmp
+}
diff --git a/test/CodeGen/ARM/bic.ll b/test/CodeGen/ARM/bic.ll
new file mode 100644
index 0000000..1dfd627
--- /dev/null
+++ b/test/CodeGen/ARM/bic.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+
+define i32 @f1(i32 %a, i32 %b) {
+ %tmp = xor i32 %b, 4294967295
+ %tmp1 = and i32 %a, %tmp
+ ret i32 %tmp1
+}
+
+; CHECK: bic r0, r0, r1
+
+define i32 @f2(i32 %a, i32 %b) {
+ %tmp = xor i32 %b, 4294967295
+ %tmp1 = and i32 %tmp, %a
+ ret i32 %tmp1
+}
+
+; CHECK: bic r0, r0, r1
diff --git a/test/CodeGen/ARM/bits.ll b/test/CodeGen/ARM/bits.ll
new file mode 100644
index 0000000..9e94efe
--- /dev/null
+++ b/test/CodeGen/ARM/bits.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=arm > %t
+; RUN: grep and %t | count 1
+; RUN: grep orr %t | count 1
+; RUN: grep eor %t | count 1
+; RUN: grep mov.*lsl %t | count 1
+; RUN: grep mov.*asr %t | count 1
+
+define i32 @f1(i32 %a, i32 %b) {
+entry:
+ %tmp2 = and i32 %b, %a ; <i32> [#uses=1]
+ ret i32 %tmp2
+}
+
+define i32 @f2(i32 %a, i32 %b) {
+entry:
+ %tmp2 = or i32 %b, %a ; <i32> [#uses=1]
+ ret i32 %tmp2
+}
+
+define i32 @f3(i32 %a, i32 %b) {
+entry:
+ %tmp2 = xor i32 %b, %a ; <i32> [#uses=1]
+ ret i32 %tmp2
+}
+
+define i32 @f4(i32 %a, i32 %b) {
+entry:
+ %tmp3 = shl i32 %a, %b ; <i32> [#uses=1]
+ ret i32 %tmp3
+}
+
+define i32 @f5(i32 %a, i32 %b) {
+entry:
+ %tmp3 = ashr i32 %a, %b ; <i32> [#uses=1]
+ ret i32 %tmp3
+}
diff --git a/test/CodeGen/ARM/bx_fold.ll b/test/CodeGen/ARM/bx_fold.ll
new file mode 100644
index 0000000..0e3e070
--- /dev/null
+++ b/test/CodeGen/ARM/bx_fold.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -march=arm | not grep bx
+
+define void @test(i32 %Ptr, i8* %L) {
+entry:
+ br label %bb1
+
+bb: ; preds = %bb1
+ %gep.upgrd.1 = zext i32 %indvar to i64 ; <i64> [#uses=1]
+ %tmp7 = getelementptr i8* %L, i64 %gep.upgrd.1 ; <i8*> [#uses=1]
+ store i8 0, i8* %tmp7
+ %indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1]
+ br label %bb1
+
+bb1: ; preds = %bb, %entry
+ %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=3]
+ %i.0 = bitcast i32 %indvar to i32 ; <i32> [#uses=2]
+ %tmp = tail call i32 (...)* @bar( ) ; <i32> [#uses=1]
+ %tmp2 = add i32 %i.0, %tmp ; <i32> [#uses=1]
+ %Ptr_addr.0 = sub i32 %Ptr, %tmp2 ; <i32> [#uses=0]
+ %tmp12 = icmp eq i32 %i.0, %Ptr ; <i1> [#uses=1]
+ %tmp12.not = xor i1 %tmp12, true ; <i1> [#uses=1]
+ %bothcond = and i1 %tmp12.not, false ; <i1> [#uses=1]
+ br i1 %bothcond, label %bb, label %bb18
+
+bb18: ; preds = %bb1
+ ret void
+}
+
+declare i32 @bar(...)
diff --git a/test/CodeGen/ARM/call.ll b/test/CodeGen/ARM/call.ll
new file mode 100644
index 0000000..3dd66ae
--- /dev/null
+++ b/test/CodeGen/ARM/call.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=arm | FileCheck %s -check-prefix=CHECKV4
+; RUN: llc < %s -march=arm -mattr=+v5t | FileCheck %s -check-prefix=CHECKV5
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi\
+; RUN: -relocation-model=pic | FileCheck %s -check-prefix=CHECKELF
+
+@t = weak global i32 ()* null ; <i32 ()**> [#uses=1]
+
+declare void @g(i32, i32, i32, i32)
+
+define void @f() {
+; CHECKV4: mov lr, pc
+; CHECKV5: blx
+; CHECKELF: PLT
+ call void @g( i32 1, i32 2, i32 3, i32 4 )
+ ret void
+}
+
+define void @g.upgrd.1() {
+ %tmp = load i32 ()** @t ; <i32 ()*> [#uses=1]
+ %tmp.upgrd.2 = tail call i32 %tmp( ) ; <i32> [#uses=0]
+ ret void
+}
diff --git a/test/CodeGen/ARM/call_nolink.ll b/test/CodeGen/ARM/call_nolink.ll
new file mode 100644
index 0000000..efe29d8
--- /dev/null
+++ b/test/CodeGen/ARM/call_nolink.ll
@@ -0,0 +1,52 @@
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
+; RUN: not grep {bx lr}
+
+ %struct.anon = type { i32 (i32, i32, i32)*, i32, i32, [3 x i32], i8*, i8*, i8* }
+@r = external global [14 x i32] ; <[14 x i32]*> [#uses=4]
+@isa = external global [13 x %struct.anon] ; <[13 x %struct.anon]*> [#uses=1]
+@pgm = external global [2 x { i32, [3 x i32] }] ; <[2 x { i32, [3 x i32] }]*> [#uses=4]
+@numi = external global i32 ; <i32*> [#uses=1]
+@counter = external global [2 x i32] ; <[2 x i32]*> [#uses=1]
+
+
+define void @main_bb_2E_i_bb205_2E_i_2E_i_bb115_2E_i_2E_i() {
+newFuncRoot:
+ br label %bb115.i.i
+
+bb115.i.i.bb170.i.i_crit_edge.exitStub: ; preds = %bb115.i.i
+ ret void
+
+bb115.i.i.bb115.i.i_crit_edge: ; preds = %bb115.i.i
+ br label %bb115.i.i
+
+bb115.i.i: ; preds = %bb115.i.i.bb115.i.i_crit_edge, %newFuncRoot
+ %i_addr.3210.0.i.i = phi i32 [ %tmp166.i.i, %bb115.i.i.bb115.i.i_crit_edge ], [ 0, %newFuncRoot ] ; <i32> [#uses=7]
+ %tmp124.i.i = getelementptr [2 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %i_addr.3210.0.i.i, i32 1, i32 0 ; <i32*> [#uses=1]
+ %tmp125.i.i = load i32* %tmp124.i.i ; <i32> [#uses=1]
+ %tmp126.i.i = getelementptr [14 x i32]* @r, i32 0, i32 %tmp125.i.i ; <i32*> [#uses=1]
+ %tmp127.i.i = load i32* %tmp126.i.i ; <i32> [#uses=1]
+ %tmp131.i.i = getelementptr [2 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %i_addr.3210.0.i.i, i32 1, i32 1 ; <i32*> [#uses=1]
+ %tmp132.i.i = load i32* %tmp131.i.i ; <i32> [#uses=1]
+ %tmp133.i.i = getelementptr [14 x i32]* @r, i32 0, i32 %tmp132.i.i ; <i32*> [#uses=1]
+ %tmp134.i.i = load i32* %tmp133.i.i ; <i32> [#uses=1]
+ %tmp138.i.i = getelementptr [2 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %i_addr.3210.0.i.i, i32 1, i32 2 ; <i32*> [#uses=1]
+ %tmp139.i.i = load i32* %tmp138.i.i ; <i32> [#uses=1]
+ %tmp140.i.i = getelementptr [14 x i32]* @r, i32 0, i32 %tmp139.i.i ; <i32*> [#uses=1]
+ %tmp141.i.i = load i32* %tmp140.i.i ; <i32> [#uses=1]
+ %tmp143.i.i = add i32 %i_addr.3210.0.i.i, 12 ; <i32> [#uses=1]
+ %tmp146.i.i = getelementptr [2 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %i_addr.3210.0.i.i, i32 0 ; <i32*> [#uses=1]
+ %tmp147.i.i = load i32* %tmp146.i.i ; <i32> [#uses=1]
+ %tmp149.i.i = getelementptr [13 x %struct.anon]* @isa, i32 0, i32 %tmp147.i.i, i32 0 ; <i32 (i32, i32, i32)**> [#uses=1]
+ %tmp150.i.i = load i32 (i32, i32, i32)** %tmp149.i.i ; <i32 (i32, i32, i32)*> [#uses=1]
+ %tmp154.i.i = tail call i32 %tmp150.i.i( i32 %tmp127.i.i, i32 %tmp134.i.i, i32 %tmp141.i.i ) ; <i32> [#uses=1]
+ %tmp155.i.i = getelementptr [14 x i32]* @r, i32 0, i32 %tmp143.i.i ; <i32*> [#uses=1]
+ store i32 %tmp154.i.i, i32* %tmp155.i.i
+ %tmp159.i.i = getelementptr [2 x i32]* @counter, i32 0, i32 %i_addr.3210.0.i.i ; <i32*> [#uses=2]
+ %tmp160.i.i = load i32* %tmp159.i.i ; <i32> [#uses=1]
+ %tmp161.i.i = add i32 %tmp160.i.i, 1 ; <i32> [#uses=1]
+ store i32 %tmp161.i.i, i32* %tmp159.i.i
+ %tmp166.i.i = add i32 %i_addr.3210.0.i.i, 1 ; <i32> [#uses=2]
+ %tmp168.i.i = load i32* @numi ; <i32> [#uses=1]
+ icmp slt i32 %tmp166.i.i, %tmp168.i.i ; <i1>:0 [#uses=1]
+ br i1 %0, label %bb115.i.i.bb115.i.i_crit_edge, label %bb115.i.i.bb170.i.i_crit_edge.exitStub
+}
diff --git a/test/CodeGen/ARM/carry.ll b/test/CodeGen/ARM/carry.ll
new file mode 100644
index 0000000..a6a7ed6
--- /dev/null
+++ b/test/CodeGen/ARM/carry.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+
+define i64 @f1(i64 %a, i64 %b) {
+; CHECK: f1:
+; CHECK: subs r
+; CHECK: sbc r
+entry:
+ %tmp = sub i64 %a, %b
+ ret i64 %tmp
+}
+
+define i64 @f2(i64 %a, i64 %b) {
+; CHECK: f2:
+; CHECK: adc r
+; CHECK: subs r
+; CHECK: sbc r
+entry:
+ %tmp1 = shl i64 %a, 1
+ %tmp2 = sub i64 %tmp1, %b
+ ret i64 %tmp2
+}
diff --git a/test/CodeGen/ARM/clz.ll b/test/CodeGen/ARM/clz.ll
new file mode 100644
index 0000000..d2235c9
--- /dev/null
+++ b/test/CodeGen/ARM/clz.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=arm -mattr=+v5t | grep clz
+
+declare i32 @llvm.ctlz.i32(i32)
+
+define i32 @test(i32 %x) {
+ %tmp.1 = call i32 @llvm.ctlz.i32( i32 %x ) ; <i32> [#uses=1]
+ ret i32 %tmp.1
+}
diff --git a/test/CodeGen/ARM/compare-call.ll b/test/CodeGen/ARM/compare-call.ll
new file mode 100644
index 0000000..fac2bc5
--- /dev/null
+++ b/test/CodeGen/ARM/compare-call.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | \
+; RUN: grep vcmpe.f32
+
+define void @test3(float* %glob, i32 %X) {
+entry:
+ %tmp = load float* %glob ; <float> [#uses=1]
+ %tmp2 = getelementptr float* %glob, i32 2 ; <float*> [#uses=1]
+ %tmp3 = load float* %tmp2 ; <float> [#uses=1]
+ %tmp.upgrd.1 = fcmp ogt float %tmp, %tmp3 ; <i1> [#uses=1]
+ br i1 %tmp.upgrd.1, label %cond_true, label %UnifiedReturnBlock
+
+cond_true: ; preds = %entry
+ %tmp.upgrd.2 = tail call i32 (...)* @bar( ) ; <i32> [#uses=0]
+ ret void
+
+UnifiedReturnBlock: ; preds = %entry
+ ret void
+}
+
+declare i32 @bar(...)
diff --git a/test/CodeGen/ARM/constants.ll b/test/CodeGen/ARM/constants.ll
new file mode 100644
index 0000000..ce91936
--- /dev/null
+++ b/test/CodeGen/ARM/constants.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+
+define i32 @f1() {
+; CHECK: f1
+; CHECK: mov r0, #0
+ ret i32 0
+}
+
+define i32 @f2() {
+; CHECK: f2
+; CHECK: mov r0, #255
+ ret i32 255
+}
+
+define i32 @f3() {
+; CHECK: f3
+; CHECK: mov r0{{.*}}256
+ ret i32 256
+}
+
+define i32 @f4() {
+; CHECK: f4
+; CHECK: orr{{.*}}256
+ ret i32 257
+}
+
+define i32 @f5() {
+; CHECK: f5
+; CHECK: mov r0, {{.*}}-1073741761
+ ret i32 -1073741761
+}
+
+define i32 @f6() {
+; CHECK: f6
+; CHECK: mov r0, {{.*}}1008
+ ret i32 1008
+}
+
+define void @f7(i32 %a) {
+; CHECK: f7
+; CHECK: cmp r0, #1, 16
+ %b = icmp ugt i32 %a, 65536 ; <i1> [#uses=1]
+ br i1 %b, label %r, label %r
+
+r: ; preds = %0, %0
+ ret void
+}
diff --git a/test/CodeGen/ARM/cse-libcalls.ll b/test/CodeGen/ARM/cse-libcalls.ll
new file mode 100644
index 0000000..0dcf9dd
--- /dev/null
+++ b/test/CodeGen/ARM/cse-libcalls.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=arm | grep {bl.\*__ltdf} | count 1
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+
+; Without CSE of libcalls, there are two calls in the output instead of one.
+
+define i32 @u_f_nonbon(double %lambda) nounwind {
+entry:
+ %tmp19.i.i = load double* null, align 4 ; <double> [#uses=2]
+ %tmp6.i = fcmp olt double %tmp19.i.i, 1.000000e+00 ; <i1> [#uses=1]
+ %dielectric.0.i = select i1 %tmp6.i, double 1.000000e+00, double %tmp19.i.i ; <double> [#uses=1]
+ %tmp10.i4 = fdiv double 0x4074C2D71F36262D, %dielectric.0.i ; <double> [#uses=1]
+ br i1 false, label %bb28.i, label %bb508.i
+
+bb28.i: ; preds = %bb28.i, %entry
+ br i1 false, label %bb502.loopexit.i, label %bb28.i
+
+bb.nph53.i: ; preds = %bb502.loopexit.i
+ %tmp354.i = fsub double -0.000000e+00, %tmp10.i4 ; <double> [#uses=0]
+ br label %bb244.i
+
+bb244.i: ; preds = %bb244.i, %bb.nph53.i
+ br label %bb244.i
+
+bb502.loopexit.i: ; preds = %bb28.i
+ br i1 false, label %bb.nph53.i, label %bb508.i
+
+bb508.i: ; preds = %bb502.loopexit.i, %entry
+ ret i32 1
+}
diff --git a/test/CodeGen/ARM/ctors_dtors.ll b/test/CodeGen/ARM/ctors_dtors.ll
new file mode 100644
index 0000000..fb94626
--- /dev/null
+++ b/test/CodeGen/ARM/ctors_dtors.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=DARWIN
+; RUN: llc < %s -mtriple=arm-linux-gnu | FileCheck %s -check-prefix=ELF
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=GNUEABI
+
+; DARWIN: .section __DATA,__mod_init_func,mod_init_funcs
+; DARWIN: .section __DATA,__mod_term_func,mod_term_funcs
+
+; ELF: .section .ctors,"aw",%progbits
+; ELF: .section .dtors,"aw",%progbits
+
+; GNUEABI: .section .init_array,"aw",%init_array
+; GNUEABI: .section .fini_array,"aw",%fini_array
+
[email protected]_ctors = appending global [1 x { i32, void ()* }] [ { i32, void ()* } { i32 65535, void ()* @__mf_init } ] ; <[1 x { i32, void ()* }]*> [#uses=0]
[email protected]_dtors = appending global [1 x { i32, void ()* }] [ { i32, void ()* } { i32 65535, void ()* @__mf_fini } ] ; <[1 x { i32, void ()* }]*> [#uses=0]
+
+define void @__mf_init() {
+entry:
+ ret void
+}
+
+define void @__mf_fini() {
+entry:
+ ret void
+}
diff --git a/test/CodeGen/ARM/ctz.ll b/test/CodeGen/ARM/ctz.ll
new file mode 100644
index 0000000..1d2ced3
--- /dev/null
+++ b/test/CodeGen/ARM/ctz.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=arm -mattr=+v6t2 | FileCheck %s
+
+declare i32 @llvm.cttz.i32(i32)
+
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: rbit
+; CHECK: clz
+ %tmp = call i32 @llvm.cttz.i32( i32 %a )
+ ret i32 %tmp
+}
diff --git a/test/CodeGen/ARM/dg.exp b/test/CodeGen/ARM/dg.exp
new file mode 100644
index 0000000..3ff359a
--- /dev/null
+++ b/test/CodeGen/ARM/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target ARM] } {
+ RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/test/CodeGen/ARM/div.ll b/test/CodeGen/ARM/div.ll
new file mode 100644
index 0000000..2f724e7
--- /dev/null
+++ b/test/CodeGen/ARM/div.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=arm > %t
+; RUN: grep __divsi3 %t
+; RUN: grep __udivsi3 %t
+; RUN: grep __modsi3 %t
+; RUN: grep __umodsi3 %t
+
+define i32 @f1(i32 %a, i32 %b) {
+entry:
+ %tmp1 = sdiv i32 %a, %b ; <i32> [#uses=1]
+ ret i32 %tmp1
+}
+
+define i32 @f2(i32 %a, i32 %b) {
+entry:
+ %tmp1 = udiv i32 %a, %b ; <i32> [#uses=1]
+ ret i32 %tmp1
+}
+
+define i32 @f3(i32 %a, i32 %b) {
+entry:
+ %tmp1 = srem i32 %a, %b ; <i32> [#uses=1]
+ ret i32 %tmp1
+}
+
+define i32 @f4(i32 %a, i32 %b) {
+entry:
+ %tmp1 = urem i32 %a, %b ; <i32> [#uses=1]
+ ret i32 %tmp1
+}
+
diff --git a/test/CodeGen/ARM/dyn-stackalloc.ll b/test/CodeGen/ARM/dyn-stackalloc.ll
new file mode 100644
index 0000000..92e2d13
--- /dev/null
+++ b/test/CodeGen/ARM/dyn-stackalloc.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -march=arm
+
+ %struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
+ %struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
+
+define void @t1(%struct.state* %v) {
+ %tmp6 = load i32* null
+ %tmp8 = alloca float, i32 %tmp6
+ store i32 1, i32* null
+ br i1 false, label %bb123.preheader, label %return
+
+bb123.preheader:
+ br i1 false, label %bb43, label %return
+
+bb43:
+ call fastcc void @f1( float* %tmp8, float* null, i32 0 )
+ %tmp70 = load i32* null
+ %tmp85 = getelementptr float* %tmp8, i32 0
+ call fastcc void @f2( float* null, float* null, float* %tmp85, i32 %tmp70 )
+ ret void
+
+return:
+ ret void
+}
+
+declare fastcc void @f1(float*, float*, i32)
+
+declare fastcc void @f2(float*, float*, float*, i32)
+
+ %struct.comment = type { i8**, i32*, i32, i8* }
+@str215 = external global [2 x i8]
+
+define void @t2(%struct.comment* %vc, i8* %tag, i8* %contents) {
+ %tmp1 = call i32 @strlen( i8* %tag )
+ %tmp3 = call i32 @strlen( i8* %contents )
+ %tmp4 = add i32 %tmp1, 2
+ %tmp5 = add i32 %tmp4, %tmp3
+ %tmp6 = alloca i8, i32 %tmp5
+ %tmp9 = call i8* @strcpy( i8* %tmp6, i8* %tag )
+ %tmp6.len = call i32 @strlen( i8* %tmp6 )
+ %tmp6.indexed = getelementptr i8* %tmp6, i32 %tmp6.len
+ call void @llvm.memcpy.i32( i8* %tmp6.indexed, i8* getelementptr ([2 x i8]* @str215, i32 0, i32 0), i32 2, i32 1 )
+ %tmp15 = call i8* @strcat( i8* %tmp6, i8* %contents )
+ call fastcc void @comment_add( %struct.comment* %vc, i8* %tmp6 )
+ ret void
+}
+
+declare i32 @strlen(i8*)
+
+declare i8* @strcat(i8*, i8*)
+
+declare fastcc void @comment_add(%struct.comment*, i8*)
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+
+declare i8* @strcpy(i8*, i8*)
diff --git a/test/CodeGen/ARM/extloadi1.ll b/test/CodeGen/ARM/extloadi1.ll
new file mode 100644
index 0000000..dc45ce7
--- /dev/null
+++ b/test/CodeGen/ARM/extloadi1.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=arm
+@handler_installed.6144.b = external global i1 ; <i1*> [#uses=1]
+
+define void @__mf_sigusr1_respond() {
+entry:
+ %tmp8.b = load i1* @handler_installed.6144.b ; <i1> [#uses=1]
+ br i1 false, label %cond_true7, label %cond_next
+
+cond_next: ; preds = %entry
+ br i1 %tmp8.b, label %bb, label %cond_next3
+
+cond_next3: ; preds = %cond_next
+ ret void
+
+bb: ; preds = %cond_next
+ ret void
+
+cond_true7: ; preds = %entry
+ ret void
+}
diff --git a/test/CodeGen/ARM/fabss.ll b/test/CodeGen/ARM/fabss.ll
new file mode 100644
index 0000000..e5b5791
--- /dev/null
+++ b/test/CodeGen/ARM/fabss.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s -check-prefix=NFP1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s -check-prefix=NFP0
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
+
+define float @test(float %a, float %b) {
+entry:
+ %dum = fadd float %a, %b
+ %0 = tail call float @fabsf(float %dum)
+ %dum1 = fadd float %0, %b
+ ret float %dum1
+}
+
+declare float @fabsf(float)
+
+; VFP2: test:
+; VFP2: vabs.f32 s1, s1
+
+; NFP1: test:
+; NFP1: vabs.f32 d1, d1
+; NFP0: test:
+; NFP0: vabs.f32 s1, s1
+
+; CORTEXA8: test:
+; CORTEXA8: vabs.f32 d1, d1
+; CORTEXA9: test:
+; CORTEXA9: vabs.f32 s1, s1
diff --git a/test/CodeGen/ARM/fadds.ll b/test/CodeGen/ARM/fadds.ll
new file mode 100644
index 0000000..db18a86
--- /dev/null
+++ b/test/CodeGen/ARM/fadds.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s -check-prefix=NFP1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s -check-prefix=NFP0
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
+
+define float @test(float %a, float %b) {
+entry:
+ %0 = fadd float %a, %b
+ ret float %0
+}
+
+; VFP2: test:
+; VFP2: vadd.f32 s0, s1, s0
+
+; NFP1: test:
+; NFP1: vadd.f32 d0, d1, d0
+; NFP0: test:
+; NFP0: vadd.f32 s0, s1, s0
+
+; CORTEXA8: test:
+; CORTEXA8: vadd.f32 d0, d1, d0
+; CORTEXA9: test:
+; CORTEXA9: vadd.f32 s0, s1, s0
diff --git a/test/CodeGen/ARM/fcopysign.ll b/test/CodeGen/ARM/fcopysign.ll
new file mode 100644
index 0000000..a6d7410
--- /dev/null
+++ b/test/CodeGen/ARM/fcopysign.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=arm | grep bic | count 2
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | \
+; RUN: grep vneg | count 2
+
+define float @test1(float %x, double %y) {
+ %tmp = fpext float %x to double
+ %tmp2 = tail call double @copysign( double %tmp, double %y )
+ %tmp3 = fptrunc double %tmp2 to float
+ ret float %tmp3
+}
+
+define double @test2(double %x, float %y) {
+ %tmp = fpext float %y to double
+ %tmp2 = tail call double @copysign( double %x, double %tmp )
+ ret double %tmp2
+}
+
+declare double @copysign(double, double)
diff --git a/test/CodeGen/ARM/fdivs.ll b/test/CodeGen/ARM/fdivs.ll
new file mode 100644
index 0000000..a5c86bf
--- /dev/null
+++ b/test/CodeGen/ARM/fdivs.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s -check-prefix=NFP1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s -check-prefix=NFP0
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
+
+define float @test(float %a, float %b) {
+entry:
+ %0 = fdiv float %a, %b
+ ret float %0
+}
+
+; VFP2: test:
+; VFP2: vdiv.f32 s0, s1, s0
+
+; NFP1: test:
+; NFP1: vdiv.f32 s0, s1, s0
+; NFP0: test:
+; NFP0: vdiv.f32 s0, s1, s0
+
+; CORTEXA8: test:
+; CORTEXA8: vdiv.f32 s0, s1, s0
+; CORTEXA9: test:
+; CORTEXA9: vdiv.f32 s0, s1, s0
diff --git a/test/CodeGen/ARM/fixunsdfdi.ll b/test/CodeGen/ARM/fixunsdfdi.ll
new file mode 100644
index 0000000..6db2385
--- /dev/null
+++ b/test/CodeGen/ARM/fixunsdfdi.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2
+; RUN: llc < %s -march=arm -mattr=vfp2 | not grep vstr.64
+
+define hidden i64 @__fixunsdfdi(double %x) nounwind readnone {
+entry:
+ %x14 = bitcast double %x to i64 ; <i64> [#uses=1]
+ br i1 true, label %bb3, label %bb10
+
+bb3: ; preds = %entry
+ br i1 true, label %bb5, label %bb7
+
+bb5: ; preds = %bb3
+ %u.in.mask = and i64 %x14, -4294967296 ; <i64> [#uses=1]
+ %.ins = or i64 0, %u.in.mask ; <i64> [#uses=1]
+ %0 = bitcast i64 %.ins to double ; <double> [#uses=1]
+ %1 = fsub double %x, %0 ; <double> [#uses=1]
+ %2 = fptosi double %1 to i32 ; <i32> [#uses=1]
+ %3 = add i32 %2, 0 ; <i32> [#uses=1]
+ %4 = zext i32 %3 to i64 ; <i64> [#uses=1]
+ %5 = shl i64 %4, 32 ; <i64> [#uses=1]
+ %6 = or i64 %5, 0 ; <i64> [#uses=1]
+ ret i64 %6
+
+bb7: ; preds = %bb3
+ ret i64 0
+
+bb10: ; preds = %entry
+ ret i64 0
+}
diff --git a/test/CodeGen/ARM/fmacs.ll b/test/CodeGen/ARM/fmacs.ll
new file mode 100644
index 0000000..904a587
--- /dev/null
+++ b/test/CodeGen/ARM/fmacs.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s -check-prefix=NFP1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s -check-prefix=NFP0
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
+
+define float @test(float %acc, float %a, float %b) {
+entry:
+ %0 = fmul float %a, %b
+ %1 = fadd float %acc, %0
+ ret float %1
+}
+
+; VFP2: test:
+; VFP2: vmla.f32 s2, s1, s0
+
+; NFP1: test:
+; NFP1: vmul.f32 d0, d1, d0
+; NFP0: test:
+; NFP0: vmla.f32 s2, s1, s0
+
+; CORTEXA8: test:
+; CORTEXA8: vmul.f32 d0, d1, d0
+; CORTEXA9: test:
+; CORTEXA9: vmla.f32 s2, s1, s0
diff --git a/test/CodeGen/ARM/fmdrr-fmrrd.ll b/test/CodeGen/ARM/fmdrr-fmrrd.ll
new file mode 100644
index 0000000..eb72faf
--- /dev/null
+++ b/test/CodeGen/ARM/fmdrr-fmrrd.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=arm -mattr=vfp2 | not grep fmdrr
+; RUN: llc < %s -march=arm -mattr=vfp2 | not grep fmrrd
+
+; naive codegen for this is:
+; _i:
+; fmdrr d0, r0, r1
+; fmrrd r0, r1, d0
+; bx lr
+
+define i64 @test(double %X) {
+ %Y = bitcast double %X to i64
+ ret i64 %Y
+}
diff --git a/test/CodeGen/ARM/fmscs.ll b/test/CodeGen/ARM/fmscs.ll
new file mode 100644
index 0000000..7b9e029
--- /dev/null
+++ b/test/CodeGen/ARM/fmscs.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s -check-prefix=NFP1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s -check-prefix=NFP0
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
+
+define float @test(float %acc, float %a, float %b) {
+entry:
+ %0 = fmul float %a, %b
+ %1 = fsub float %0, %acc
+ ret float %1
+}
+
+; VFP2: test:
+; VFP2: vnmls.f32 s2, s1, s0
+
+; NFP1: test:
+; NFP1: vnmls.f32 s2, s1, s0
+; NFP0: test:
+; NFP0: vnmls.f32 s2, s1, s0
+
+; CORTEXA8: test:
+; CORTEXA8: vnmls.f32 s2, s1, s0
+; CORTEXA9: test:
+; CORTEXA9: vnmls.f32 s2, s1, s0
diff --git a/test/CodeGen/ARM/fmuls.ll b/test/CodeGen/ARM/fmuls.ll
new file mode 100644
index 0000000..d3c9c82
--- /dev/null
+++ b/test/CodeGen/ARM/fmuls.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s -check-prefix=NFP1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s -check-prefix=NFP0
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
+
+define float @test(float %a, float %b) {
+entry:
+ %0 = fmul float %a, %b
+ ret float %0
+}
+
+; VFP2: test:
+; VFP2: vmul.f32 s0, s1, s0
+
+; NFP1: test:
+; NFP1: vmul.f32 d0, d1, d0
+; NFP0: test:
+; NFP0: vmul.f32 s0, s1, s0
+
+; CORTEXA8: test:
+; CORTEXA8: vmul.f32 d0, d1, d0
+; CORTEXA9: test:
+; CORTEXA9: vmul.f32 s0, s1, s0
diff --git a/test/CodeGen/ARM/fnegs.ll b/test/CodeGen/ARM/fnegs.ll
new file mode 100644
index 0000000..d6c22f1
--- /dev/null
+++ b/test/CodeGen/ARM/fnegs.ll
@@ -0,0 +1,54 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s -check-prefix=NFP1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s -check-prefix=NFP0
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
+
+define float @test1(float* %a) {
+entry:
+ %0 = load float* %a, align 4 ; <float> [#uses=2]
+ %1 = fsub float -0.000000e+00, %0 ; <float> [#uses=2]
+ %2 = fpext float %1 to double ; <double> [#uses=1]
+ %3 = fcmp olt double %2, 1.234000e+00 ; <i1> [#uses=1]
+ %retval = select i1 %3, float %1, float %0 ; <float> [#uses=1]
+ ret float %retval
+}
+; VFP2: test1:
+; VFP2: vneg.f32 s1, s0
+
+; NFP1: test1:
+; NFP1: vneg.f32 d1, d0
+
+; NFP0: test1:
+; NFP0: vneg.f32 s1, s0
+
+; CORTEXA8: test1:
+; CORTEXA8: vneg.f32 d1, d0
+
+; CORTEXA9: test1:
+; CORTEXA9: vneg.f32 s1, s0
+
+define float @test2(float* %a) {
+entry:
+ %0 = load float* %a, align 4 ; <float> [#uses=2]
+ %1 = fmul float -1.000000e+00, %0 ; <float> [#uses=2]
+ %2 = fpext float %1 to double ; <double> [#uses=1]
+ %3 = fcmp olt double %2, 1.234000e+00 ; <i1> [#uses=1]
+ %retval = select i1 %3, float %1, float %0 ; <float> [#uses=1]
+ ret float %retval
+}
+; VFP2: test2:
+; VFP2: vneg.f32 s1, s0
+
+; NFP1: test2:
+; NFP1: vneg.f32 d1, d0
+
+; NFP0: test2:
+; NFP0: vneg.f32 s1, s0
+
+; CORTEXA8: test2:
+; CORTEXA8: vneg.f32 d1, d0
+
+; CORTEXA9: test2:
+; CORTEXA9: vneg.f32 s1, s0
+
diff --git a/test/CodeGen/ARM/fnmacs.ll b/test/CodeGen/ARM/fnmacs.ll
new file mode 100644
index 0000000..724947e
--- /dev/null
+++ b/test/CodeGen/ARM/fnmacs.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s -check-prefix=NEON
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s -check-prefix=NEONFP
+
+define float @test(float %acc, float %a, float %b) {
+entry:
+; VFP2: vmls.f32
+; NEON: vmls.f32
+
+; NEONFP-NOT: vmls
+; NEONFP-NOT: vmov.f32
+; NEONFP: vmul.f32
+; NEONFP: vsub.f32
+; NEONFP: vmov
+
+ %0 = fmul float %a, %b
+ %1 = fsub float %acc, %0
+ ret float %1
+}
+
diff --git a/test/CodeGen/ARM/fnmscs.ll b/test/CodeGen/ARM/fnmscs.ll
new file mode 100644
index 0000000..ad21882
--- /dev/null
+++ b/test/CodeGen/ARM/fnmscs.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s
+
+define float @test1(float %acc, float %a, float %b) nounwind {
+; CHECK: vnmla.f32 s2, s1, s0
+entry:
+ %0 = fmul float %a, %b
+ %1 = fsub float -0.0, %0
+ %2 = fsub float %1, %acc
+ ret float %2
+}
+
+define float @test2(float %acc, float %a, float %b) nounwind {
+; CHECK: vnmla.f32 s2, s1, s0
+entry:
+ %0 = fmul float %a, %b
+ %1 = fmul float -1.0, %0
+ %2 = fsub float %1, %acc
+ ret float %2
+}
+
diff --git a/test/CodeGen/ARM/fnmul.ll b/test/CodeGen/ARM/fnmul.ll
new file mode 100644
index 0000000..6d7bc05
--- /dev/null
+++ b/test/CodeGen/ARM/fnmul.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | grep vnmul.f64
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 -enable-sign-dependent-rounding-fp-math | grep vmul.f64
+
+
+define double @t1(double %a, double %b) {
+entry:
+ %tmp2 = fsub double -0.000000e+00, %a ; <double> [#uses=1]
+ %tmp4 = fmul double %tmp2, %b ; <double> [#uses=1]
+ ret double %tmp4
+}
+
diff --git a/test/CodeGen/ARM/fnmuls.ll b/test/CodeGen/ARM/fnmuls.ll
new file mode 100644
index 0000000..efd87d2
--- /dev/null
+++ b/test/CodeGen/ARM/fnmuls.ll
@@ -0,0 +1,23 @@
+; XFAIL: *
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s
+
+define float @test1(float %a, float %b) nounwind {
+; CHECK: fnmscs s2, s1, s0
+entry:
+ %0 = fmul float %a, %b
+ %1 = fsub float -0.0, %0
+ ret float %1
+}
+
+define float @test2(float %a, float %b) nounwind {
+; CHECK: fnmscs s2, s1, s0
+entry:
+ %0 = fmul float %a, %b
+ %1 = fmul float -1.0, %0
+ ret float %1
+}
+
diff --git a/test/CodeGen/ARM/formal.ll b/test/CodeGen/ARM/formal.ll
new file mode 100644
index 0000000..4ac10ba
--- /dev/null
+++ b/test/CodeGen/ARM/formal.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2
+
+declare void @bar(i64 %x, i64 %y)
+
+define void @foo() {
+ call void @bar(i64 2, i64 3)
+ ret void
+}
diff --git a/test/CodeGen/ARM/fp.ll b/test/CodeGen/ARM/fp.ll
new file mode 100644
index 0000000..8fbd45b
--- /dev/null
+++ b/test/CodeGen/ARM/fp.ll
@@ -0,0 +1,78 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
+
+define float @f(i32 %a) {
+;CHECK: f:
+;CHECK: vmov
+;CHECK-NEXT: vcvt.f32.s32
+;CHECK-NEXT: vmov
+entry:
+ %tmp = sitofp i32 %a to float ; <float> [#uses=1]
+ ret float %tmp
+}
+
+define double @g(i32 %a) {
+;CHECK: g:
+;CHECK: vmov
+;CHECK-NEXT: vcvt.f64.s32
+;CHECK-NEXT: vmov
+entry:
+ %tmp = sitofp i32 %a to double ; <double> [#uses=1]
+ ret double %tmp
+}
+
+define double @uint_to_double(i32 %a) {
+;CHECK: uint_to_double:
+;CHECK: vmov
+;CHECK-NEXT: vcvt.f64.u32
+;CHECK-NEXT: vmov
+entry:
+ %tmp = uitofp i32 %a to double ; <double> [#uses=1]
+ ret double %tmp
+}
+
+define float @uint_to_float(i32 %a) {
+;CHECK: uint_to_float:
+;CHECK: vmov
+;CHECK-NEXT: vcvt.f32.u32
+;CHECK-NEXT: vmov
+entry:
+ %tmp = uitofp i32 %a to float ; <float> [#uses=1]
+ ret float %tmp
+}
+
+define double @h(double* %v) {
+;CHECK: h:
+;CHECK: vldr.64
+;CHECK-NEXT: vmov
+entry:
+ %tmp = load double* %v ; <double> [#uses=1]
+ ret double %tmp
+}
+
+define float @h2() {
+;CHECK: h2:
+;CHECK: 1065353216
+entry:
+ ret float 1.000000e+00
+}
+
+define double @f2(double %a) {
+;CHECK: f2:
+;CHECK-NOT: vmov
+ ret double %a
+}
+
+define void @f3() {
+;CHECK: f3:
+;CHECK-NOT: vmov
+;CHECK: f4
+entry:
+ %tmp = call double @f5( ) ; <double> [#uses=1]
+ call void @f4( double %tmp )
+ ret void
+}
+
+declare void @f4(double)
+
+declare double @f5()
+
diff --git a/test/CodeGen/ARM/fp_convert.ll b/test/CodeGen/ARM/fp_convert.ll
new file mode 100644
index 0000000..2adac78
--- /dev/null
+++ b/test/CodeGen/ARM/fp_convert.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s -check-prefix=NEON
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=NEON
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=VFP2
+
+define i32 @test1(float %a, float %b) {
+; VFP2: test1:
+; VFP2: vcvt.s32.f32 s0, s0
+; NEON: test1:
+; NEON: vcvt.s32.f32 d0, d0
+entry:
+ %0 = fadd float %a, %b
+ %1 = fptosi float %0 to i32
+ ret i32 %1
+}
+
+define i32 @test2(float %a, float %b) {
+; VFP2: test2:
+; VFP2: vcvt.u32.f32 s0, s0
+; NEON: test2:
+; NEON: vcvt.u32.f32 d0, d0
+entry:
+ %0 = fadd float %a, %b
+ %1 = fptoui float %0 to i32
+ ret i32 %1
+}
+
+define float @test3(i32 %a, i32 %b) {
+; VFP2: test3:
+; VFP2: vcvt.f32.u32 s0, s0
+; NEON: test3:
+; NEON: vcvt.f32.u32 d0, d0
+entry:
+ %0 = add i32 %a, %b
+ %1 = uitofp i32 %0 to float
+ ret float %1
+}
+
+define float @test4(i32 %a, i32 %b) {
+; VFP2: test4:
+; VFP2: vcvt.f32.s32 s0, s0
+; NEON: test4:
+; NEON: vcvt.f32.s32 d0, d0
+entry:
+ %0 = add i32 %a, %b
+ %1 = sitofp i32 %0 to float
+ ret float %1
+}
diff --git a/test/CodeGen/ARM/fparith.ll b/test/CodeGen/ARM/fparith.ll
new file mode 100644
index 0000000..ce6d6b2
--- /dev/null
+++ b/test/CodeGen/ARM/fparith.ll
@@ -0,0 +1,101 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
+
+define float @f1(float %a, float %b) {
+;CHECK: f1:
+;CHECK: vadd.f32
+entry:
+ %tmp = fadd float %a, %b ; <float> [#uses=1]
+ ret float %tmp
+}
+
+define double @f2(double %a, double %b) {
+;CHECK: f2:
+;CHECK: vadd.f64
+entry:
+ %tmp = fadd double %a, %b ; <double> [#uses=1]
+ ret double %tmp
+}
+
+define float @f3(float %a, float %b) {
+;CHECK: f3:
+;CHECK: vmul.f32
+entry:
+ %tmp = fmul float %a, %b ; <float> [#uses=1]
+ ret float %tmp
+}
+
+define double @f4(double %a, double %b) {
+;CHECK: f4:
+;CHECK: vmul.f64
+entry:
+ %tmp = fmul double %a, %b ; <double> [#uses=1]
+ ret double %tmp
+}
+
+define float @f5(float %a, float %b) {
+;CHECK: f5:
+;CHECK: vsub.f32
+entry:
+ %tmp = fsub float %a, %b ; <float> [#uses=1]
+ ret float %tmp
+}
+
+define double @f6(double %a, double %b) {
+;CHECK: f6:
+;CHECK: vsub.f64
+entry:
+ %tmp = fsub double %a, %b ; <double> [#uses=1]
+ ret double %tmp
+}
+
+define float @f7(float %a) {
+;CHECK: f7:
+;CHECK: eor
+entry:
+ %tmp1 = fsub float -0.000000e+00, %a ; <float> [#uses=1]
+ ret float %tmp1
+}
+
+define double @f8(double %a) {
+;CHECK: f8:
+;CHECK: vneg.f64
+entry:
+ %tmp1 = fsub double -0.000000e+00, %a ; <double> [#uses=1]
+ ret double %tmp1
+}
+
+define float @f9(float %a, float %b) {
+;CHECK: f9:
+;CHECK: vdiv.f32
+entry:
+ %tmp1 = fdiv float %a, %b ; <float> [#uses=1]
+ ret float %tmp1
+}
+
+define double @f10(double %a, double %b) {
+;CHECK: f10:
+;CHECK: vdiv.f64
+entry:
+ %tmp1 = fdiv double %a, %b ; <double> [#uses=1]
+ ret double %tmp1
+}
+
+define float @f11(float %a) {
+;CHECK: f11:
+;CHECK: bic
+entry:
+ %tmp1 = call float @fabsf( float %a ) ; <float> [#uses=1]
+ ret float %tmp1
+}
+
+declare float @fabsf(float)
+
+define double @f12(double %a) {
+;CHECK: f12:
+;CHECK: vabs.f64
+entry:
+ %tmp1 = call double @fabs( double %a ) ; <double> [#uses=1]
+ ret double %tmp1
+}
+
+declare double @fabs(double)
diff --git a/test/CodeGen/ARM/fpcmp.ll b/test/CodeGen/ARM/fpcmp.ll
new file mode 100644
index 0000000..260ec49
--- /dev/null
+++ b/test/CodeGen/ARM/fpcmp.ll
@@ -0,0 +1,71 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
+
+define i32 @f1(float %a) {
+;CHECK: f1:
+;CHECK: vcmpe.f32
+;CHECK: movmi
+entry:
+ %tmp = fcmp olt float %a, 1.000000e+00 ; <i1> [#uses=1]
+ %tmp1 = zext i1 %tmp to i32 ; <i32> [#uses=1]
+ ret i32 %tmp1
+}
+
+define i32 @f2(float %a) {
+;CHECK: f2:
+;CHECK: vcmpe.f32
+;CHECK: moveq
+entry:
+ %tmp = fcmp oeq float %a, 1.000000e+00 ; <i1> [#uses=1]
+ %tmp2 = zext i1 %tmp to i32 ; <i32> [#uses=1]
+ ret i32 %tmp2
+}
+
+define i32 @f3(float %a) {
+;CHECK: f3:
+;CHECK: vcmpe.f32
+;CHECK: movgt
+entry:
+ %tmp = fcmp ogt float %a, 1.000000e+00 ; <i1> [#uses=1]
+ %tmp3 = zext i1 %tmp to i32 ; <i32> [#uses=1]
+ ret i32 %tmp3
+}
+
+define i32 @f4(float %a) {
+;CHECK: f4:
+;CHECK: vcmpe.f32
+;CHECK: movge
+entry:
+ %tmp = fcmp oge float %a, 1.000000e+00 ; <i1> [#uses=1]
+ %tmp4 = zext i1 %tmp to i32 ; <i32> [#uses=1]
+ ret i32 %tmp4
+}
+
+define i32 @f5(float %a) {
+;CHECK: f5:
+;CHECK: vcmpe.f32
+;CHECK: movls
+entry:
+ %tmp = fcmp ole float %a, 1.000000e+00 ; <i1> [#uses=1]
+ %tmp5 = zext i1 %tmp to i32 ; <i32> [#uses=1]
+ ret i32 %tmp5
+}
+
+define i32 @f6(float %a) {
+;CHECK: f6:
+;CHECK: vcmpe.f32
+;CHECK: movne
+entry:
+ %tmp = fcmp une float %a, 1.000000e+00 ; <i1> [#uses=1]
+ %tmp6 = zext i1 %tmp to i32 ; <i32> [#uses=1]
+ ret i32 %tmp6
+}
+
+define i32 @g1(double %a) {
+;CHECK: g1:
+;CHECK: vcmpe.f64
+;CHECK: movmi
+entry:
+ %tmp = fcmp olt double %a, 1.000000e+00 ; <i1> [#uses=1]
+ %tmp7 = zext i1 %tmp to i32 ; <i32> [#uses=1]
+ ret i32 %tmp7
+}
diff --git a/test/CodeGen/ARM/fpcmp_ueq.ll b/test/CodeGen/ARM/fpcmp_ueq.ll
new file mode 100644
index 0000000..67f70e9e
--- /dev/null
+++ b/test/CodeGen/ARM/fpcmp_ueq.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=arm | grep moveq
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep movvs
+
+define i32 @f7(float %a, float %b) {
+entry:
+ %tmp = fcmp ueq float %a,%b
+ %retval = select i1 %tmp, i32 666, i32 42
+ ret i32 %retval
+}
+
diff --git a/test/CodeGen/ARM/fpconsts.ll b/test/CodeGen/ARM/fpconsts.ll
new file mode 100644
index 0000000..710994d
--- /dev/null
+++ b/test/CodeGen/ARM/fpconsts.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=arm -mattr=+vfp3 | FileCheck %s
+
+define arm_apcscc float @t1(float %x) nounwind readnone optsize {
+entry:
+; CHECK: t1:
+; CHECK: vmov.f32 s1, #4.000000e+00
+ %0 = fadd float %x, 4.000000e+00
+ ret float %0
+}
+
+define arm_apcscc double @t2(double %x) nounwind readnone optsize {
+entry:
+; CHECK: t2:
+; CHECK: vmov.f64 d1, #3.000000e+00
+ %0 = fadd double %x, 3.000000e+00
+ ret double %0
+}
+
+define arm_apcscc double @t3(double %x) nounwind readnone optsize {
+entry:
+; CHECK: t3:
+; CHECK: vmov.f64 d1, #-1.300000e+01
+ %0 = fmul double %x, -1.300000e+01
+ ret double %0
+}
+
+define arm_apcscc float @t4(float %x) nounwind readnone optsize {
+entry:
+; CHECK: t4:
+; CHECK: vmov.f32 s1, #-2.400000e+01
+ %0 = fmul float %x, -2.400000e+01
+ ret float %0
+}
diff --git a/test/CodeGen/ARM/fpconv.ll b/test/CodeGen/ARM/fpconv.ll
new file mode 100644
index 0000000..bf197a4
--- /dev/null
+++ b/test/CodeGen/ARM/fpconv.ll
@@ -0,0 +1,102 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s --check-prefix=CHECK-VFP
+; RUN: llc < %s -march=arm | FileCheck %s
+
+define float @f1(double %x) {
+;CHECK-VFP: f1:
+;CHECK-VFP: vcvt.f32.f64
+;CHECK: f1:
+;CHECK: truncdfsf2
+entry:
+ %tmp1 = fptrunc double %x to float ; <float> [#uses=1]
+ ret float %tmp1
+}
+
+define double @f2(float %x) {
+;CHECK-VFP: f2:
+;CHECK-VFP: vcvt.f64.f32
+;CHECK: f2:
+;CHECK: extendsfdf2
+entry:
+ %tmp1 = fpext float %x to double ; <double> [#uses=1]
+ ret double %tmp1
+}
+
+define i32 @f3(float %x) {
+;CHECK-VFP: f3:
+;CHECK-VFP: vcvt.s32.f32
+;CHECK: f3:
+;CHECK: fixsfsi
+entry:
+ %tmp = fptosi float %x to i32 ; <i32> [#uses=1]
+ ret i32 %tmp
+}
+
+define i32 @f4(float %x) {
+;CHECK-VFP: f4:
+;CHECK-VFP: vcvt.u32.f32
+;CHECK: f4:
+;CHECK: fixunssfsi
+entry:
+ %tmp = fptoui float %x to i32 ; <i32> [#uses=1]
+ ret i32 %tmp
+}
+
+define i32 @f5(double %x) {
+;CHECK-VFP: f5:
+;CHECK-VFP: vcvt.s32.f64
+;CHECK: f5:
+;CHECK: fixdfsi
+entry:
+ %tmp = fptosi double %x to i32 ; <i32> [#uses=1]
+ ret i32 %tmp
+}
+
+define i32 @f6(double %x) {
+;CHECK-VFP: f6:
+;CHECK-VFP: vcvt.u32.f64
+;CHECK: f6:
+;CHECK: fixunsdfsi
+entry:
+ %tmp = fptoui double %x to i32 ; <i32> [#uses=1]
+ ret i32 %tmp
+}
+
+define float @f7(i32 %a) {
+;CHECK-VFP: f7:
+;CHECK-VFP: vcvt.f32.s32
+;CHECK: f7:
+;CHECK: floatsisf
+entry:
+ %tmp = sitofp i32 %a to float ; <float> [#uses=1]
+ ret float %tmp
+}
+
+define double @f8(i32 %a) {
+;CHECK-VFP: f8:
+;CHECK-VFP: vcvt.f64.s32
+;CHECK: f8:
+;CHECK: floatsidf
+entry:
+ %tmp = sitofp i32 %a to double ; <double> [#uses=1]
+ ret double %tmp
+}
+
+define float @f9(i32 %a) {
+;CHECK-VFP: f9:
+;CHECK-VFP: vcvt.f32.u32
+;CHECK: f9:
+;CHECK: floatunsisf
+entry:
+ %tmp = uitofp i32 %a to float ; <float> [#uses=1]
+ ret float %tmp
+}
+
+define double @f10(i32 %a) {
+;CHECK-VFP: f10:
+;CHECK-VFP: vcvt.f64.u32
+;CHECK: f10:
+;CHECK: floatunsidf
+entry:
+ %tmp = uitofp i32 %a to double ; <double> [#uses=1]
+ ret double %tmp
+}
diff --git a/test/CodeGen/ARM/fpmem.ll b/test/CodeGen/ARM/fpmem.ll
new file mode 100644
index 0000000..c3cff18
--- /dev/null
+++ b/test/CodeGen/ARM/fpmem.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
+
+define float @f1(float %a) {
+; CHECK: f1:
+; CHECK: mov r0, #0
+ ret float 0.000000e+00
+}
+
+define float @f2(float* %v, float %u) {
+; CHECK: f2:
+; CHECK: vldr.32{{.*}}[
+ %tmp = load float* %v ; <float> [#uses=1]
+ %tmp1 = fadd float %tmp, %u ; <float> [#uses=1]
+ ret float %tmp1
+}
+
+define void @f3(float %a, float %b, float* %v) {
+; CHECK: f3:
+; CHECK: vstr.32{{.*}}[
+ %tmp = fadd float %a, %b ; <float> [#uses=1]
+ store float %tmp, float* %v
+ ret void
+}
diff --git a/test/CodeGen/ARM/fpow.ll b/test/CodeGen/ARM/fpow.ll
new file mode 100644
index 0000000..6d48792
--- /dev/null
+++ b/test/CodeGen/ARM/fpow.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=arm
+
+define double @t(double %x, double %y) nounwind optsize {
+entry:
+ %0 = tail call double @llvm.pow.f64( double %x, double %y ) ; <double> [#uses=1]
+ ret double %0
+}
+
+declare double @llvm.pow.f64(double, double) nounwind readonly
diff --git a/test/CodeGen/ARM/fpowi.ll b/test/CodeGen/ARM/fpowi.ll
new file mode 100644
index 0000000..7f9d62a
--- /dev/null
+++ b/test/CodeGen/ARM/fpowi.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | grep powidf2
+; PR1287
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "arm-linux-gnueabi"
+
+define double @_ZSt3powdi(double %__x, i32 %__i) {
+entry:
+ %tmp3 = call double @llvm.powi.f64( double %__x, i32 %__i )
+ ret double %tmp3
+}
+
+declare double @llvm.powi.f64(double, i32)
+
diff --git a/test/CodeGen/ARM/fptoint.ll b/test/CodeGen/ARM/fptoint.ll
new file mode 100644
index 0000000..299cb8f81
--- /dev/null
+++ b/test/CodeGen/ARM/fptoint.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | FileCheck %s
+
+@i = weak global i32 0 ; <i32*> [#uses=2]
+@u = weak global i32 0 ; <i32*> [#uses=2]
+
+define i32 @foo1(float *%x) {
+ %tmp1 = load float* %x
+ %tmp2 = bitcast float %tmp1 to i32
+ ret i32 %tmp2
+}
+
+define i64 @foo2(double *%x) {
+ %tmp1 = load double* %x
+ %tmp2 = bitcast double %tmp1 to i64
+ ret i64 %tmp2
+}
+
+define void @foo5(float %x) {
+ %tmp1 = fptosi float %x to i32
+ store i32 %tmp1, i32* @i
+ ret void
+}
+
+define void @foo6(float %x) {
+ %tmp1 = fptoui float %x to i32
+ store i32 %tmp1, i32* @u
+ ret void
+}
+
+define void @foo7(double %x) {
+ %tmp1 = fptosi double %x to i32
+ store i32 %tmp1, i32* @i
+ ret void
+}
+
+define void @foo8(double %x) {
+ %tmp1 = fptoui double %x to i32
+ store i32 %tmp1, i32* @u
+ ret void
+}
+
+define void @foo9(double %x) {
+ %tmp = fptoui double %x to i16
+ store i16 %tmp, i16* null
+ ret void
+}
+; CHECK: foo9:
+; CHECK: vmov r0, s0
+
diff --git a/test/CodeGen/ARM/fsubs.ll b/test/CodeGen/ARM/fsubs.ll
new file mode 100644
index 0000000..ae98be3
--- /dev/null
+++ b/test/CodeGen/ARM/fsubs.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s -check-prefix=NFP1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s -check-prefix=NFP0
+
+define float @test(float %a, float %b) {
+entry:
+ %0 = fsub float %a, %b
+ ret float %0
+}
+
+; VFP2: vsub.f32 s0, s1, s0
+; NFP1: vsub.f32 d0, d1, d0
+; NFP0: vsub.f32 s0, s1, s0
diff --git a/test/CodeGen/ARM/globals.ll b/test/CodeGen/ARM/globals.ll
new file mode 100644
index 0000000..886c0d5
--- /dev/null
+++ b/test/CodeGen/ARM/globals.ll
@@ -0,0 +1,75 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=static | FileCheck %s -check-prefix=DarwinStatic
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=dynamic-no-pic | FileCheck %s -check-prefix=DarwinDynamic
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic | FileCheck %s -check-prefix=DarwinPIC
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -relocation-model=pic | FileCheck %s -check-prefix=LinuxPIC
+
+@G = external global i32
+
+define i32 @test1() {
+ %tmp = load i32* @G
+ ret i32 %tmp
+}
+
+; DarwinStatic: _test1:
+; DarwinStatic: ldr r0, LCPI1_0
+; DarwinStatic: ldr r0, [r0]
+; DarwinStatic: bx lr
+
+; DarwinStatic: .align 2
+; DarwinStatic: LCPI1_0:
+; DarwinStatic: .long {{_G$}}
+
+
+; DarwinDynamic: _test1:
+; DarwinDynamic: ldr r0, LCPI1_0
+; DarwinDynamic: ldr r0, [r0]
+; DarwinDynamic: ldr r0, [r0]
+; DarwinDynamic: bx lr
+
+; DarwinDynamic: .align 2
+; DarwinDynamic: LCPI1_0:
+; DarwinDynamic: .long L_G$non_lazy_ptr
+
+; DarwinDynamic: .section __DATA,__nl_symbol_ptr,non_lazy_symbol_pointers
+; DarwinDynamic: .align 2
+; DarwinDynamic: L_G$non_lazy_ptr:
+; DarwinDynamic: .indirect_symbol _G
+; DarwinDynamic: .long 0
+
+
+
+; DarwinPIC: _test1:
+; DarwinPIC: ldr r0, LCPI1_0
+; DarwinPIC: LPC1_0:
+; DarwinPIC: ldr r0, [pc, +r0]
+; DarwinPIC: ldr r0, [r0]
+; DarwinPIC: bx lr
+
+; DarwinPIC: .align 2
+; DarwinPIC: LCPI1_0:
+; DarwinPIC: .long L_G$non_lazy_ptr-(LPC1_0+8)
+
+; DarwinPIC: .section __DATA,__nl_symbol_ptr,non_lazy_symbol_pointers
+; DarwinPIC: .align 2
+; DarwinPIC: L_G$non_lazy_ptr:
+; DarwinPIC: .indirect_symbol _G
+; DarwinPIC: .long 0
+
+
+
+; LinuxPIC: test1:
+; LinuxPIC: ldr r0, .LCPI1_0
+; LinuxPIC: ldr r1, .LCPI1_1
+
+; LinuxPIC: .LPC1_0:
+; LinuxPIC: add r0, pc, r0
+; LinuxPIC: ldr r0, [r1, +r0]
+; LinuxPIC: ldr r0, [r0]
+; LinuxPIC: bx lr
+
+; LinuxPIC: .align 2
+; LinuxPIC: .LCPI1_0:
+; LinuxPIC: .long _GLOBAL_OFFSET_TABLE_-(.LPC1_0+8)
+; LinuxPIC: .align 2
+; LinuxPIC: .LCPI1_1:
+; LinuxPIC: .long G(GOT)
diff --git a/test/CodeGen/ARM/hardfloat_neon.ll b/test/CodeGen/ARM/hardfloat_neon.ll
new file mode 100644
index 0000000..4abf04b
--- /dev/null
+++ b/test/CodeGen/ARM/hardfloat_neon.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -mattr=+neon -float-abi=hard
+
+define <16 x i8> @vmulQi8_reg(<16 x i8> %A, <16 x i8> %B) nounwind {
+ %tmp1 = mul <16 x i8> %A, %B
+ ret <16 x i8> %tmp1
+}
+
+define <16 x i8> @f(<16 x i8> %a, <16 x i8> %b) {
+ %tmp = call <16 x i8> @g(<16 x i8> %b)
+ ret <16 x i8> %tmp
+}
+
+declare <16 x i8> @g(<16 x i8>)
diff --git a/test/CodeGen/ARM/hello.ll b/test/CodeGen/ARM/hello.ll
new file mode 100644
index 0000000..ccdc7bf
--- /dev/null
+++ b/test/CodeGen/ARM/hello.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | grep mov | count 1
+; RUN: llc < %s -mtriple=arm-linux-gnu --disable-fp-elim | \
+; RUN: grep mov | count 3
+; RUN: llc < %s -mtriple=arm-apple-darwin | grep mov | count 2
+
+@str = internal constant [12 x i8] c"Hello World\00"
+
+define i32 @main() {
+ %tmp = call i32 @puts( i8* getelementptr ([12 x i8]* @str, i32 0, i64 0) ) ; <i32> [#uses=0]
+ ret i32 0
+}
+
+declare i32 @puts(i8*)
diff --git a/test/CodeGen/ARM/hidden-vis-2.ll b/test/CodeGen/ARM/hidden-vis-2.ll
new file mode 100644
index 0000000..90f5308
--- /dev/null
+++ b/test/CodeGen/ARM/hidden-vis-2.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
+
+@x = weak hidden global i32 0 ; <i32*> [#uses=1]
+
+define i32 @t() nounwind readonly {
+entry:
+; CHECK: t:
+; CHECK: ldr
+; CHECK-NEXT: ldr
+ %0 = load i32* @x, align 4 ; <i32> [#uses=1]
+ ret i32 %0
+}
diff --git a/test/CodeGen/ARM/hidden-vis-3.ll b/test/CodeGen/ARM/hidden-vis-3.ll
new file mode 100644
index 0000000..3bd710a
--- /dev/null
+++ b/test/CodeGen/ARM/hidden-vis-3.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin9 | FileCheck %s
+
+@x = external hidden global i32 ; <i32*> [#uses=1]
+@y = extern_weak hidden global i32 ; <i32*> [#uses=1]
+
+define i32 @t() nounwind readonly {
+entry:
+; CHECK: LCPI1_0:
+; CHECK-NEXT: .long _x
+; CHECK: LCPI1_1:
+; CHECK-NEXT: .long _y
+
+ %0 = load i32* @x, align 4 ; <i32> [#uses=1]
+ %1 = load i32* @y, align 4 ; <i32> [#uses=1]
+ %2 = add i32 %1, %0 ; <i32> [#uses=1]
+ ret i32 %2
+}
diff --git a/test/CodeGen/ARM/hidden-vis.ll b/test/CodeGen/ARM/hidden-vis.ll
new file mode 100644
index 0000000..3544ae8
--- /dev/null
+++ b/test/CodeGen/ARM/hidden-vis.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=arm-linux | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=DARWIN
+
+@a = hidden global i32 0
+@b = external global i32
+
+define weak hidden void @t1() nounwind {
+; LINUX: .hidden t1
+; LINUX: t1:
+
+; DARWIN: .private_extern _t1
+; DARWIN: t1:
+ ret void
+}
+
+define weak void @t2() nounwind {
+; LINUX: t2:
+; LINUX: .hidden a
+
+; DARWIN: t2:
+; DARWIN: .private_extern _a
+ ret void
+}
diff --git a/test/CodeGen/ARM/iabs.ll b/test/CodeGen/ARM/iabs.ll
new file mode 100644
index 0000000..63808b2
--- /dev/null
+++ b/test/CodeGen/ARM/iabs.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+
+;; Integer absolute value, should produce something as good as: ARM:
+;; add r3, r0, r0, asr #31
+;; eor r0, r3, r0, asr #31
+;; bx lr
+
+define i32 @test(i32 %a) {
+ %tmp1neg = sub i32 0, %a
+ %b = icmp sgt i32 %a, -1
+ %abs = select i1 %b, i32 %a, i32 %tmp1neg
+ ret i32 %abs
+; CHECK: add r1, r0, r0, asr #31
+; CHECK: eor r0, r1, r0, asr #31
+; CHECK: bx lr
+}
diff --git a/test/CodeGen/ARM/ifcvt1.ll b/test/CodeGen/ARM/ifcvt1.ll
new file mode 100644
index 0000000..e6aa044
--- /dev/null
+++ b/test/CodeGen/ARM/ifcvt1.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -march=arm | grep bx | count 1
+
+define i32 @t1(i32 %a, i32 %b) {
+ %tmp2 = icmp eq i32 %a, 0
+ br i1 %tmp2, label %cond_false, label %cond_true
+
+cond_true:
+ %tmp5 = add i32 %b, 1
+ ret i32 %tmp5
+
+cond_false:
+ %tmp7 = add i32 %b, -1
+ ret i32 %tmp7
+}
diff --git a/test/CodeGen/ARM/ifcvt2.ll b/test/CodeGen/ARM/ifcvt2.ll
new file mode 100644
index 0000000..ce57d73
--- /dev/null
+++ b/test/CodeGen/ARM/ifcvt2.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -march=arm | grep bxlt | count 1
+; RUN: llc < %s -march=arm | grep bxgt | count 1
+; RUN: llc < %s -march=arm | grep bxge | count 1
+
+define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
+ %tmp2 = icmp sgt i32 %c, 10
+ %tmp5 = icmp slt i32 %d, 4
+ %tmp8 = or i1 %tmp5, %tmp2
+ %tmp13 = add i32 %b, %a
+ br i1 %tmp8, label %cond_true, label %UnifiedReturnBlock
+
+cond_true:
+ %tmp15 = add i32 %tmp13, %c
+ %tmp1821 = sub i32 %tmp15, %d
+ ret i32 %tmp1821
+
+UnifiedReturnBlock:
+ ret i32 %tmp13
+}
+
+define i32 @t2(i32 %a, i32 %b, i32 %c, i32 %d) {
+ %tmp2 = icmp sgt i32 %c, 10
+ %tmp5 = icmp slt i32 %d, 4
+ %tmp8 = and i1 %tmp5, %tmp2
+ %tmp13 = add i32 %b, %a
+ br i1 %tmp8, label %cond_true, label %UnifiedReturnBlock
+
+cond_true:
+ %tmp15 = add i32 %tmp13, %c
+ %tmp1821 = sub i32 %tmp15, %d
+ ret i32 %tmp1821
+
+UnifiedReturnBlock:
+ ret i32 %tmp13
+}
diff --git a/test/CodeGen/ARM/ifcvt3.ll b/test/CodeGen/ARM/ifcvt3.ll
new file mode 100644
index 0000000..f7ebac6
--- /dev/null
+++ b/test/CodeGen/ARM/ifcvt3.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -march=arm | grep cmpne | count 1
+; RUN: llc < %s -march=arm | grep bx | count 2
+
+define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
+ switch i32 %c, label %cond_next [
+ i32 1, label %cond_true
+ i32 7, label %cond_true
+ ]
+
+cond_true:
+ %tmp12 = add i32 %a, 1
+ %tmp1518 = add i32 %tmp12, %b
+ ret i32 %tmp1518
+
+cond_next:
+ %tmp15 = add i32 %b, %a
+ ret i32 %tmp15
+}
diff --git a/test/CodeGen/ARM/ifcvt4.ll b/test/CodeGen/ARM/ifcvt4.ll
new file mode 100644
index 0000000..f28c61b
--- /dev/null
+++ b/test/CodeGen/ARM/ifcvt4.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -march=arm | grep subgt | count 1
+; RUN: llc < %s -march=arm | grep suble | count 1
+; FIXME: Check for # of unconditional branch after adding branch folding post ifcvt.
+
+define i32 @t(i32 %a, i32 %b) {
+entry:
+ %tmp1434 = icmp eq i32 %a, %b ; <i1> [#uses=1]
+ br i1 %tmp1434, label %bb17, label %bb.outer
+
+bb.outer: ; preds = %cond_false, %entry
+ %b_addr.021.0.ph = phi i32 [ %b, %entry ], [ %tmp10, %cond_false ] ; <i32> [#uses=5]
+ %a_addr.026.0.ph = phi i32 [ %a, %entry ], [ %a_addr.026.0, %cond_false ] ; <i32> [#uses=1]
+ br label %bb
+
+bb: ; preds = %cond_true, %bb.outer
+ %indvar = phi i32 [ 0, %bb.outer ], [ %indvar.next, %cond_true ] ; <i32> [#uses=2]
+ %tmp. = sub i32 0, %b_addr.021.0.ph ; <i32> [#uses=1]
+ %tmp.40 = mul i32 %indvar, %tmp. ; <i32> [#uses=1]
+ %a_addr.026.0 = add i32 %tmp.40, %a_addr.026.0.ph ; <i32> [#uses=6]
+ %tmp3 = icmp sgt i32 %a_addr.026.0, %b_addr.021.0.ph ; <i1> [#uses=1]
+ br i1 %tmp3, label %cond_true, label %cond_false
+
+cond_true: ; preds = %bb
+ %tmp7 = sub i32 %a_addr.026.0, %b_addr.021.0.ph ; <i32> [#uses=2]
+ %tmp1437 = icmp eq i32 %tmp7, %b_addr.021.0.ph ; <i1> [#uses=1]
+ %indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1]
+ br i1 %tmp1437, label %bb17, label %bb
+
+cond_false: ; preds = %bb
+ %tmp10 = sub i32 %b_addr.021.0.ph, %a_addr.026.0 ; <i32> [#uses=2]
+ %tmp14 = icmp eq i32 %a_addr.026.0, %tmp10 ; <i1> [#uses=1]
+ br i1 %tmp14, label %bb17, label %bb.outer
+
+bb17: ; preds = %cond_false, %cond_true, %entry
+ %a_addr.026.1 = phi i32 [ %a, %entry ], [ %tmp7, %cond_true ], [ %a_addr.026.0, %cond_false ] ; <i32> [#uses=1]
+ ret i32 %a_addr.026.1
+}
diff --git a/test/CodeGen/ARM/ifcvt5.ll b/test/CodeGen/ARM/ifcvt5.ll
new file mode 100644
index 0000000..623f2cb
--- /dev/null
+++ b/test/CodeGen/ARM/ifcvt5.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+
+@x = external global i32* ; <i32**> [#uses=1]
+
+define void @foo(i32 %a) {
+entry:
+ %tmp = load i32** @x ; <i32*> [#uses=1]
+ store i32 %a, i32* %tmp
+ ret void
+}
+
+define void @t1(i32 %a, i32 %b) {
+; CHECK: t1:
+; CHECK: ldmfdlt sp!, {r7, pc}
+entry:
+ %tmp1 = icmp sgt i32 %a, 10 ; <i1> [#uses=1]
+ br i1 %tmp1, label %cond_true, label %UnifiedReturnBlock
+
+cond_true: ; preds = %entry
+ tail call void @foo( i32 %b )
+ ret void
+
+UnifiedReturnBlock: ; preds = %entry
+ ret void
+}
diff --git a/test/CodeGen/ARM/ifcvt6.ll b/test/CodeGen/ARM/ifcvt6.ll
new file mode 100644
index 0000000..d7fcf7d
--- /dev/null
+++ b/test/CodeGen/ARM/ifcvt6.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
+; RUN: grep cmpne | count 1
+; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
+; RUN: grep ldmfdhi | count 1
+
+define void @foo(i32 %X, i32 %Y) {
+entry:
+ %tmp1 = icmp ult i32 %X, 4 ; <i1> [#uses=1]
+ %tmp4 = icmp eq i32 %Y, 0 ; <i1> [#uses=1]
+ %tmp7 = or i1 %tmp4, %tmp1 ; <i1> [#uses=1]
+ br i1 %tmp7, label %cond_true, label %UnifiedReturnBlock
+
+cond_true: ; preds = %entry
+ %tmp10 = tail call i32 (...)* @bar( ) ; <i32> [#uses=0]
+ ret void
+
+UnifiedReturnBlock: ; preds = %entry
+ ret void
+}
+
+declare i32 @bar(...)
diff --git a/test/CodeGen/ARM/ifcvt7.ll b/test/CodeGen/ARM/ifcvt7.ll
new file mode 100644
index 0000000..c60ad93
--- /dev/null
+++ b/test/CodeGen/ARM/ifcvt7.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
+; RUN: grep cmpeq | count 1
+; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
+; RUN: grep moveq | count 1
+; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
+; RUN: grep ldmfdeq | count 1
+; FIXME: Need post-ifcvt branch folding to get rid of the extra br at end of BB1.
+
+ %struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
+
+define fastcc i32 @CountTree(%struct.quad_struct* %tree) {
+entry:
+ br label %tailrecurse
+
+tailrecurse: ; preds = %bb, %entry
+ %tmp6 = load %struct.quad_struct** null ; <%struct.quad_struct*> [#uses=1]
+ %tmp9 = load %struct.quad_struct** null ; <%struct.quad_struct*> [#uses=2]
+ %tmp12 = load %struct.quad_struct** null ; <%struct.quad_struct*> [#uses=1]
+ %tmp14 = icmp eq %struct.quad_struct* null, null ; <i1> [#uses=1]
+ %tmp17 = icmp eq %struct.quad_struct* %tmp6, null ; <i1> [#uses=1]
+ %tmp23 = icmp eq %struct.quad_struct* %tmp9, null ; <i1> [#uses=1]
+ %tmp29 = icmp eq %struct.quad_struct* %tmp12, null ; <i1> [#uses=1]
+ %bothcond = and i1 %tmp17, %tmp14 ; <i1> [#uses=1]
+ %bothcond1 = and i1 %bothcond, %tmp23 ; <i1> [#uses=1]
+ %bothcond2 = and i1 %bothcond1, %tmp29 ; <i1> [#uses=1]
+ br i1 %bothcond2, label %return, label %bb
+
+bb: ; preds = %tailrecurse
+ %tmp41 = tail call fastcc i32 @CountTree( %struct.quad_struct* %tmp9 ) ; <i32> [#uses=0]
+ br label %tailrecurse
+
+return: ; preds = %tailrecurse
+ ret i32 0
+}
diff --git a/test/CodeGen/ARM/ifcvt8.ll b/test/CodeGen/ARM/ifcvt8.ll
new file mode 100644
index 0000000..a7da834
--- /dev/null
+++ b/test/CodeGen/ARM/ifcvt8.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
+; RUN: grep ldmfdne | count 1
+
+ %struct.SString = type { i8*, i32, i32 }
+
+declare void @abort()
+
+define fastcc void @t(%struct.SString* %word, i8 signext %c) {
+entry:
+ %tmp1 = icmp eq %struct.SString* %word, null ; <i1> [#uses=1]
+ br i1 %tmp1, label %cond_true, label %cond_false
+
+cond_true: ; preds = %entry
+ tail call void @abort( )
+ unreachable
+
+cond_false: ; preds = %entry
+ ret void
+}
diff --git a/test/CodeGen/ARM/ifcvt9.ll b/test/CodeGen/ARM/ifcvt9.ll
new file mode 100644
index 0000000..05bdc459
--- /dev/null
+++ b/test/CodeGen/ARM/ifcvt9.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=arm
+
+define fastcc void @t() nounwind {
+entry:
+ br i1 undef, label %bb.i.i3, label %growMapping.exit
+
+bb.i.i3: ; preds = %entry
+ unreachable
+
+growMapping.exit: ; preds = %entry
+ unreachable
+}
diff --git a/test/CodeGen/ARM/illegal-vector-bitcast.ll b/test/CodeGen/ARM/illegal-vector-bitcast.ll
new file mode 100644
index 0000000..febe6f5
--- /dev/null
+++ b/test/CodeGen/ARM/illegal-vector-bitcast.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -mtriple=arm-linux
+
+define void @foo(<8 x float>* %f, <8 x float>* %g, <4 x i64>* %y)
+{
+ %h = load <8 x float>* %f
+ %i = fmul <8 x float> %h, <float 0x3FF19999A0000000, float 0x400A666660000000, float 0x40119999A0000000, float 0x40159999A0000000, float 0.5, float 0x3FE3333340000000, float 0x3FE6666660000000, float 0x3FE99999A0000000>
+ %m = bitcast <8 x float> %i to <4 x i64>
+ %z = load <4 x i64>* %y
+ %n = mul <4 x i64> %z, %m
+ %p = bitcast <4 x i64> %n to <8 x float>
+ store <8 x float> %p, <8 x float>* %g
+ ret void
+}
diff --git a/test/CodeGen/ARM/imm.ll b/test/CodeGen/ARM/imm.ll
new file mode 100644
index 0000000..6f25f9d
--- /dev/null
+++ b/test/CodeGen/ARM/imm.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=arm | not grep CPI
+
+define i32 @test1(i32 %A) {
+ %B = add i32 %A, -268435441 ; <i32> [#uses=1]
+ ret i32 %B
+}
+
+define i32 @test2() {
+ ret i32 65533
+}
+
+define i32 @test3(i32 %A) {
+ %B = or i32 %A, 65533 ; <i32> [#uses=1]
+ ret i32 %B
+}
+
diff --git a/test/CodeGen/ARM/indirectbr.ll b/test/CodeGen/ARM/indirectbr.ll
new file mode 100644
index 0000000..5135d03
--- /dev/null
+++ b/test/CodeGen/ARM/indirectbr.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s -relocation-model=pic -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=ARM
+; RUN: llc < %s -relocation-model=pic -mtriple=thumb-apple-darwin | FileCheck %s -check-prefix=THUMB
+; RUN: llc < %s -relocation-model=static -mtriple=thumbv7-apple-darwin | FileCheck %s -check-prefix=THUMB2
+
+@nextaddr = global i8* null ; <i8**> [#uses=2]
[email protected] = private constant [5 x i8*] [i8* blockaddress(@foo, %L1), i8* blockaddress(@foo, %L2), i8* blockaddress(@foo, %L3), i8* blockaddress(@foo, %L4), i8* blockaddress(@foo, %L5)] ; <[5 x i8*]*> [#uses=1]
+
+define internal arm_apcscc i32 @foo(i32 %i) nounwind {
+; ARM: foo:
+; THUMB: foo:
+; THUMB2: foo:
+entry:
+ %0 = load i8** @nextaddr, align 4 ; <i8*> [#uses=2]
+ %1 = icmp eq i8* %0, null ; <i1> [#uses=1]
+; indirect branch gets duplicated here
+; ARM: bx
+; THUMB: mov pc, r1
+; THUMB2: mov pc, r1
+ br i1 %1, label %bb3, label %bb2
+
+bb2: ; preds = %entry, %bb3
+ %gotovar.4.0 = phi i8* [ %gotovar.4.0.pre, %bb3 ], [ %0, %entry ] ; <i8*> [#uses=1]
+; ARM: bx
+; THUMB: mov pc, r1
+; THUMB2: mov pc, r1
+ indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1]
+
+bb3: ; preds = %entry
+ %2 = getelementptr inbounds [5 x i8*]* @C.0.2070, i32 0, i32 %i ; <i8**> [#uses=1]
+ %gotovar.4.0.pre = load i8** %2, align 4 ; <i8*> [#uses=1]
+ br label %bb2
+
+L5: ; preds = %bb2
+ br label %L4
+
+L4: ; preds = %L5, %bb2
+ %res.0 = phi i32 [ 385, %L5 ], [ 35, %bb2 ] ; <i32> [#uses=1]
+ br label %L3
+
+L3: ; preds = %L4, %bb2
+ %res.1 = phi i32 [ %res.0, %L4 ], [ 5, %bb2 ] ; <i32> [#uses=1]
+ br label %L2
+
+L2: ; preds = %L3, %bb2
+ %res.2 = phi i32 [ %res.1, %L3 ], [ 1, %bb2 ] ; <i32> [#uses=1]
+ %phitmp = mul i32 %res.2, 6 ; <i32> [#uses=1]
+ br label %L1
+
+L1: ; preds = %L2, %bb2
+ %res.3 = phi i32 [ %phitmp, %L2 ], [ 2, %bb2 ] ; <i32> [#uses=1]
+; ARM: ldr r1, LCPI
+; ARM: add r1, pc, r1
+; ARM: str r1
+; THUMB: ldr.n r2, LCPI
+; THUMB: add r2, pc
+; THUMB: str r2
+; THUMB2: ldr.n r2, LCPI
+; THUMB2-NEXT: str r2
+ store i8* blockaddress(@foo, %L5), i8** @nextaddr, align 4
+ ret i32 %res.3
+}
+; ARM: .long L_BA4__foo_L5-(LPC{{.*}}+8)
+; THUMB: .long L_BA4__foo_L5-(LPC{{.*}}+4)
+; THUMB2: .long L_BA4__foo_L5
diff --git a/test/CodeGen/ARM/inlineasm-imm-arm.ll b/test/CodeGen/ARM/inlineasm-imm-arm.ll
new file mode 100644
index 0000000..45dfcf0
--- /dev/null
+++ b/test/CodeGen/ARM/inlineasm-imm-arm.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=arm
+
+; Test ARM-mode "I" constraint, for any Data Processing immediate.
+define i32 @testI(i32 %x) {
+ %y = call i32 asm "add $0, $1, $2", "=r,r,I"( i32 %x, i32 65280 ) nounwind
+ ret i32 %y
+}
+
+; Test ARM-mode "J" constraint, for compatibility with unknown use in GCC.
+define void @testJ() {
+ tail call void asm sideeffect ".word $0", "J"( i32 4080 ) nounwind
+ ret void
+}
+
+; Test ARM-mode "K" constraint, for bitwise inverted Data Processing immediates.
+define void @testK() {
+ tail call void asm sideeffect ".word $0", "K"( i32 16777215 ) nounwind
+ ret void
+}
+
+; Test ARM-mode "L" constraint, for negated Data Processing immediates.
+define void @testL() {
+ tail call void asm sideeffect ".word $0", "L"( i32 -65280 ) nounwind
+ ret void
+}
+
+; Test ARM-mode "M" constraint, for value between 0 and 32.
+define i32 @testM(i32 %x) {
+ %y = call i32 asm "lsl $0, $1, $2", "=r,r,M"( i32 %x, i32 31 ) nounwind
+ ret i32 %y
+}
diff --git a/test/CodeGen/ARM/inlineasm.ll b/test/CodeGen/ARM/inlineasm.ll
new file mode 100644
index 0000000..d522348
--- /dev/null
+++ b/test/CodeGen/ARM/inlineasm.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=arm -mattr=+v6
+
+define i32 @test1(i32 %tmp54) {
+ %tmp56 = tail call i32 asm "uxtb16 $0,$1", "=r,r"( i32 %tmp54 ) ; <i32> [#uses=1]
+ ret i32 %tmp56
+}
+
+define void @test2() {
+ %tmp1 = call i64 asm "ldmia $1!, {$0, ${0:H}}", "=r,=*r,1"( i32** null, i32* null ) ; <i64> [#uses=2]
+ %tmp2 = lshr i64 %tmp1, 32 ; <i64> [#uses=1]
+ %tmp3 = trunc i64 %tmp2 to i32 ; <i32> [#uses=1]
+ %tmp4 = call i32 asm "pkhbt $0, $1, $2, lsl #16", "=r,r,r"( i32 0, i32 %tmp3 ) ; <i32> [#uses=0]
+ ret void
+}
+
+define void @test3() {
+ tail call void asm sideeffect "/* number: ${0:c} */", "i"( i32 1 )
+ ret void
+}
diff --git a/test/CodeGen/ARM/inlineasm2.ll b/test/CodeGen/ARM/inlineasm2.ll
new file mode 100644
index 0000000..a99bccf
--- /dev/null
+++ b/test/CodeGen/ARM/inlineasm2.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2
+
+define double @__ieee754_sqrt(double %x) {
+ %tmp2 = tail call double asm "fsqrtd ${0:P}, ${1:P}", "=w,w"( double %x )
+ ret double %tmp2
+}
+
+define float @__ieee754_sqrtf(float %x) {
+ %tmp2 = tail call float asm "fsqrts $0, $1", "=w,w"( float %x )
+ ret float %tmp2
+}
diff --git a/test/CodeGen/ARM/inlineasm3.ll b/test/CodeGen/ARM/inlineasm3.ll
new file mode 100644
index 0000000..f062772
--- /dev/null
+++ b/test/CodeGen/ARM/inlineasm3.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+; Radar 7449043
+%struct.int32x4_t = type { <4 x i32> }
+
+define arm_apcscc void @t() nounwind {
+entry:
+; CHECK: vmov.I64 q15, #0
+; CHECK: vmov.32 d30[0], r0
+; CHECK: vmov q0, q15
+ %tmp = alloca %struct.int32x4_t, align 16
+ call void asm sideeffect "vmov.I64 q15, #0\0Avmov.32 d30[0], $1\0Avmov ${0:q}, q15\0A", "=*w,r,~{d31},~{d30}"(%struct.int32x4_t* %tmp, i32 8192) nounwind
+ ret void
+}
+
+; Radar 7457110
+%struct.int32x2_t = type { <4 x i32> }
+
+define arm_apcscc void @t2() nounwind {
+entry:
+; CHECK: vmov d30, d0
+; CHECK: vmov.32 r0, d30[0]
+ %asmtmp2 = tail call i32 asm sideeffect "vmov d30, $1\0Avmov.32 $0, d30[0]\0A", "=r,w,~{d30}"(<2 x i32> undef) nounwind
+ ret void
+}
diff --git a/test/CodeGen/ARM/insn-sched1.ll b/test/CodeGen/ARM/insn-sched1.ll
new file mode 100644
index 0000000..59f0d53
--- /dev/null
+++ b/test/CodeGen/ARM/insn-sched1.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=arm -mattr=+v6
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6 |\
+; RUN: grep mov | count 3
+
+define i32 @test(i32 %x) {
+ %tmp = trunc i32 %x to i16 ; <i16> [#uses=1]
+ %tmp2 = tail call i32 @f( i32 1, i16 %tmp ) ; <i32> [#uses=1]
+ ret i32 %tmp2
+}
+
+declare i32 @f(i32, i16)
diff --git a/test/CodeGen/ARM/ispositive.ll b/test/CodeGen/ARM/ispositive.ll
new file mode 100644
index 0000000..245ed51
--- /dev/null
+++ b/test/CodeGen/ARM/ispositive.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+
+define i32 @test1(i32 %X) {
+; CHECK: mov r0, r0, lsr #31
+entry:
+ icmp slt i32 %X, 0 ; <i1>:0 [#uses=1]
+ zext i1 %0 to i32 ; <i32>:1 [#uses=1]
+ ret i32 %1
+}
+
diff --git a/test/CodeGen/ARM/large-stack.ll b/test/CodeGen/ARM/large-stack.ll
new file mode 100644
index 0000000..ddf0f0e
--- /dev/null
+++ b/test/CodeGen/ARM/large-stack.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=arm
+
+define void @test1() {
+ %tmp = alloca [ 64 x i32 ] , align 4
+ ret void
+}
+
+define void @test2() {
+ %tmp = alloca [ 4168 x i8 ] , align 4
+ ret void
+}
+
+define i32 @test3() {
+ %retval = alloca i32, align 4
+ %tmp = alloca i32, align 4
+ %a = alloca [805306369 x i8], align 16
+ store i32 0, i32* %tmp
+ %tmp1 = load i32* %tmp
+ ret i32 %tmp1
+}
diff --git a/test/CodeGen/ARM/ldm.ll b/test/CodeGen/ARM/ldm.ll
new file mode 100644
index 0000000..1a016a0
--- /dev/null
+++ b/test/CodeGen/ARM/ldm.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
+
+@X = external global [0 x i32] ; <[0 x i32]*> [#uses=5]
+
+define i32 @t1() {
+; CHECK: t1:
+; CHECK: ldmia
+ %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0) ; <i32> [#uses=1]
+ %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
+ %tmp4 = tail call i32 @f1( i32 %tmp, i32 %tmp3 ) ; <i32> [#uses=1]
+ ret i32 %tmp4
+}
+
+define i32 @t2() {
+; CHECK: t2:
+; CHECK: ldmia
+ %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1]
+ %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3) ; <i32> [#uses=1]
+ %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 4) ; <i32> [#uses=1]
+ %tmp6 = tail call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 ) ; <i32> [#uses=1]
+ ret i32 %tmp6
+}
+
+define i32 @t3() {
+; CHECK: t3:
+; CHECK: ldmib
+; CHECK: ldmfd sp!
+ %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
+ %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1]
+ %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3) ; <i32> [#uses=1]
+ %tmp6 = tail call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 ) ; <i32> [#uses=1]
+ ret i32 %tmp6
+}
+
+declare i32 @f1(i32, i32)
+
+declare i32 @f2(i32, i32, i32)
diff --git a/test/CodeGen/ARM/ldr.ll b/test/CodeGen/ARM/ldr.ll
new file mode 100644
index 0000000..011e61c
--- /dev/null
+++ b/test/CodeGen/ARM/ldr.ll
@@ -0,0 +1,71 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+
+define i32 @f1(i32* %v) {
+; CHECK: f1:
+; CHECK: ldr r0
+entry:
+ %tmp = load i32* %v
+ ret i32 %tmp
+}
+
+define i32 @f2(i32* %v) {
+; CHECK: f2:
+; CHECK: ldr r0
+entry:
+ %tmp2 = getelementptr i32* %v, i32 1023
+ %tmp = load i32* %tmp2
+ ret i32 %tmp
+}
+
+define i32 @f3(i32* %v) {
+; CHECK: f3:
+; CHECK: mov
+; CHECK: ldr r0
+entry:
+ %tmp2 = getelementptr i32* %v, i32 1024
+ %tmp = load i32* %tmp2
+ ret i32 %tmp
+}
+
+define i32 @f4(i32 %base) {
+; CHECK: f4:
+; CHECK-NOT: mvn
+; CHECK: ldr r0
+entry:
+ %tmp1 = sub i32 %base, 128
+ %tmp2 = inttoptr i32 %tmp1 to i32*
+ %tmp3 = load i32* %tmp2
+ ret i32 %tmp3
+}
+
+define i32 @f5(i32 %base, i32 %offset) {
+; CHECK: f5:
+; CHECK: ldr r0
+entry:
+ %tmp1 = add i32 %base, %offset
+ %tmp2 = inttoptr i32 %tmp1 to i32*
+ %tmp3 = load i32* %tmp2
+ ret i32 %tmp3
+}
+
+define i32 @f6(i32 %base, i32 %offset) {
+; CHECK: f6:
+; CHECK: ldr r0{{.*}}lsl{{.*}}
+entry:
+ %tmp1 = shl i32 %offset, 2
+ %tmp2 = add i32 %base, %tmp1
+ %tmp3 = inttoptr i32 %tmp2 to i32*
+ %tmp4 = load i32* %tmp3
+ ret i32 %tmp4
+}
+
+define i32 @f7(i32 %base, i32 %offset) {
+; CHECK: f7:
+; CHECK: ldr r0{{.*}}lsr{{.*}}
+entry:
+ %tmp1 = lshr i32 %offset, 2
+ %tmp2 = add i32 %base, %tmp1
+ %tmp3 = inttoptr i32 %tmp2 to i32*
+ %tmp4 = load i32* %tmp3
+ ret i32 %tmp4
+}
diff --git a/test/CodeGen/ARM/ldr_ext.ll b/test/CodeGen/ARM/ldr_ext.ll
new file mode 100644
index 0000000..d29eb02
--- /dev/null
+++ b/test/CodeGen/ARM/ldr_ext.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+
+define i32 @test1(i8* %t1) nounwind {
+; CHECK: ldrb
+ %tmp.u = load i8* %t1
+ %tmp1.s = zext i8 %tmp.u to i32
+ ret i32 %tmp1.s
+}
+
+define i32 @test2(i16* %t1) nounwind {
+; CHECK: ldrh
+ %tmp.u = load i16* %t1
+ %tmp1.s = zext i16 %tmp.u to i32
+ ret i32 %tmp1.s
+}
+
+define i32 @test3(i8* %t0) nounwind {
+; CHECK: ldrsb
+ %tmp.s = load i8* %t0
+ %tmp1.s = sext i8 %tmp.s to i32
+ ret i32 %tmp1.s
+}
+
+define i32 @test4(i16* %t0) nounwind {
+; CHECK: ldrsh
+ %tmp.s = load i16* %t0
+ %tmp1.s = sext i16 %tmp.s to i32
+ ret i32 %tmp1.s
+}
+
+define i32 @test5() nounwind {
+; CHECK: mov r0, #0
+; CHECK: ldrsh
+ %tmp.s = load i16* null
+ %tmp1.s = sext i16 %tmp.s to i32
+ ret i32 %tmp1.s
+}
diff --git a/test/CodeGen/ARM/ldr_frame.ll b/test/CodeGen/ARM/ldr_frame.ll
new file mode 100644
index 0000000..a3abdb6
--- /dev/null
+++ b/test/CodeGen/ARM/ldr_frame.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=arm | not grep mov
+
+define i32 @f1() {
+ %buf = alloca [32 x i32], align 4
+ %tmp = getelementptr [32 x i32]* %buf, i32 0, i32 0
+ %tmp1 = load i32* %tmp
+ ret i32 %tmp1
+}
+
+define i32 @f2() {
+ %buf = alloca [32 x i8], align 4
+ %tmp = getelementptr [32 x i8]* %buf, i32 0, i32 0
+ %tmp1 = load i8* %tmp
+ %tmp2 = zext i8 %tmp1 to i32
+ ret i32 %tmp2
+}
+
+define i32 @f3() {
+ %buf = alloca [32 x i32], align 4
+ %tmp = getelementptr [32 x i32]* %buf, i32 0, i32 32
+ %tmp1 = load i32* %tmp
+ ret i32 %tmp1
+}
+
+define i32 @f4() {
+ %buf = alloca [32 x i8], align 4
+ %tmp = getelementptr [32 x i8]* %buf, i32 0, i32 2
+ %tmp1 = load i8* %tmp
+ %tmp2 = zext i8 %tmp1 to i32
+ ret i32 %tmp2
+}
diff --git a/test/CodeGen/ARM/ldr_post.ll b/test/CodeGen/ARM/ldr_post.ll
new file mode 100644
index 0000000..97a48e1
--- /dev/null
+++ b/test/CodeGen/ARM/ldr_post.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=arm | \
+; RUN: grep {ldr.*\\\[.*\],} | count 1
+
+define i32 @test(i32 %a, i32 %b, i32 %c) {
+ %tmp1 = mul i32 %a, %b ; <i32> [#uses=2]
+ %tmp2 = inttoptr i32 %tmp1 to i32* ; <i32*> [#uses=1]
+ %tmp3 = load i32* %tmp2 ; <i32> [#uses=1]
+ %tmp4 = sub i32 %tmp1, %c ; <i32> [#uses=1]
+ %tmp5 = mul i32 %tmp4, %tmp3 ; <i32> [#uses=1]
+ ret i32 %tmp5
+}
+
diff --git a/test/CodeGen/ARM/ldr_pre.ll b/test/CodeGen/ARM/ldr_pre.ll
new file mode 100644
index 0000000..7c44284
--- /dev/null
+++ b/test/CodeGen/ARM/ldr_pre.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=arm | \
+; RUN: grep {ldr.*\\!} | count 2
+
+define i32* @test1(i32* %X, i32* %dest) {
+ %Y = getelementptr i32* %X, i32 4 ; <i32*> [#uses=2]
+ %A = load i32* %Y ; <i32> [#uses=1]
+ store i32 %A, i32* %dest
+ ret i32* %Y
+}
+
+define i32 @test2(i32 %a, i32 %b, i32 %c) {
+ %tmp1 = sub i32 %a, %b ; <i32> [#uses=2]
+ %tmp2 = inttoptr i32 %tmp1 to i32* ; <i32*> [#uses=1]
+ %tmp3 = load i32* %tmp2 ; <i32> [#uses=1]
+ %tmp4 = sub i32 %tmp1, %c ; <i32> [#uses=1]
+ %tmp5 = add i32 %tmp4, %tmp3 ; <i32> [#uses=1]
+ ret i32 %tmp5
+}
+
diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll
new file mode 100644
index 0000000..c366e2d
--- /dev/null
+++ b/test/CodeGen/ARM/ldrd.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin | FileCheck %s -check-prefix=V6
+; RUN: llc < %s -mtriple=armv5-apple-darwin | FileCheck %s -check-prefix=V5
+; RUN: llc < %s -mtriple=armv6-eabi | FileCheck %s -check-prefix=EABI
+; rdar://r6949835
+
+@b = external global i64*
+
+define i64 @t(i64 %a) nounwind readonly {
+entry:
+;V6: ldrd r2, [r2]
+
+;V5: ldr r3, [r2]
+;V5: ldr r2, [r2, #+4]
+
+;EABI: ldr r3, [r2]
+;EABI: ldr r2, [r2, #+4]
+
+ %0 = load i64** @b, align 4
+ %1 = load i64* %0, align 4
+ %2 = mul i64 %1, %a
+ ret i64 %2
+}
diff --git a/test/CodeGen/ARM/load.ll b/test/CodeGen/ARM/load.ll
new file mode 100644
index 0000000..253b0e1
--- /dev/null
+++ b/test/CodeGen/ARM/load.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=arm > %t
+; RUN: grep ldrsb %t
+; RUN: grep ldrb %t
+; RUN: grep ldrsh %t
+; RUN: grep ldrh %t
+
+
+define i32 @f1(i8* %p) {
+entry:
+ %tmp = load i8* %p ; <i8> [#uses=1]
+ %tmp1 = sext i8 %tmp to i32 ; <i32> [#uses=1]
+ ret i32 %tmp1
+}
+
+define i32 @f2(i8* %p) {
+entry:
+ %tmp = load i8* %p ; <i8> [#uses=1]
+ %tmp2 = zext i8 %tmp to i32 ; <i32> [#uses=1]
+ ret i32 %tmp2
+}
+
+define i32 @f3(i16* %p) {
+entry:
+ %tmp = load i16* %p ; <i16> [#uses=1]
+ %tmp3 = sext i16 %tmp to i32 ; <i32> [#uses=1]
+ ret i32 %tmp3
+}
+
+define i32 @f4(i16* %p) {
+entry:
+ %tmp = load i16* %p ; <i16> [#uses=1]
+ %tmp4 = zext i16 %tmp to i32 ; <i32> [#uses=1]
+ ret i32 %tmp4
+}
diff --git a/test/CodeGen/ARM/long-setcc.ll b/test/CodeGen/ARM/long-setcc.ll
new file mode 100644
index 0000000..c76a5e4
--- /dev/null
+++ b/test/CodeGen/ARM/long-setcc.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=arm | grep cmp | count 1
+
+
+define i1 @t1(i64 %x) {
+ %B = icmp slt i64 %x, 0
+ ret i1 %B
+}
+
+define i1 @t2(i64 %x) {
+ %tmp = icmp ult i64 %x, 4294967296
+ ret i1 %tmp
+}
+
+define i1 @t3(i32 %x) {
+ %tmp = icmp ugt i32 %x, -1
+ ret i1 %tmp
+}
diff --git a/test/CodeGen/ARM/long.ll b/test/CodeGen/ARM/long.ll
new file mode 100644
index 0000000..16ef7cc
--- /dev/null
+++ b/test/CodeGen/ARM/long.ll
@@ -0,0 +1,90 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+
+define i64 @f1() {
+; CHECK: f1:
+entry:
+ ret i64 0
+}
+
+define i64 @f2() {
+; CHECK: f2:
+entry:
+ ret i64 1
+}
+
+define i64 @f3() {
+; CHECK: f3:
+; CHECK: mvn{{.*}}-2147483648
+entry:
+ ret i64 2147483647
+}
+
+define i64 @f4() {
+; CHECK: f4:
+; CHECK: -2147483648
+entry:
+ ret i64 2147483648
+}
+
+define i64 @f5() {
+; CHECK: f5:
+; CHECK: mvn
+; CHECK: mvn{{.*}}-2147483648
+entry:
+ ret i64 9223372036854775807
+}
+
+define i64 @f6(i64 %x, i64 %y) {
+; CHECK: f6:
+; CHECK: adds
+; CHECK: adc
+entry:
+ %tmp1 = add i64 %y, 1 ; <i64> [#uses=1]
+ ret i64 %tmp1
+}
+
+define void @f7() {
+; CHECK: f7:
+entry:
+ %tmp = call i64 @f8( ) ; <i64> [#uses=0]
+ ret void
+}
+
+declare i64 @f8()
+
+define i64 @f9(i64 %a, i64 %b) {
+; CHECK: f9:
+; CHECK: subs r
+; CHECK: sbc
+entry:
+ %tmp = sub i64 %a, %b ; <i64> [#uses=1]
+ ret i64 %tmp
+}
+
+define i64 @f(i32 %a, i32 %b) {
+; CHECK: f:
+; CHECK: smull
+entry:
+ %tmp = sext i32 %a to i64 ; <i64> [#uses=1]
+ %tmp1 = sext i32 %b to i64 ; <i64> [#uses=1]
+ %tmp2 = mul i64 %tmp1, %tmp ; <i64> [#uses=1]
+ ret i64 %tmp2
+}
+
+define i64 @g(i32 %a, i32 %b) {
+; CHECK: g:
+; CHECK: umull
+entry:
+ %tmp = zext i32 %a to i64 ; <i64> [#uses=1]
+ %tmp1 = zext i32 %b to i64 ; <i64> [#uses=1]
+ %tmp2 = mul i64 %tmp1, %tmp ; <i64> [#uses=1]
+ ret i64 %tmp2
+}
+
+define i64 @f10() {
+; CHECK: f10:
+entry:
+ %a = alloca i64, align 8 ; <i64*> [#uses=1]
+ %retval = load i64* %a ; <i64> [#uses=1]
+ ret i64 %retval
+}
diff --git a/test/CodeGen/ARM/long_shift.ll b/test/CodeGen/ARM/long_shift.ll
new file mode 100644
index 0000000..76332cc
--- /dev/null
+++ b/test/CodeGen/ARM/long_shift.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+
+define i64 @f0(i64 %A, i64 %B) {
+; CHECK: f0
+; CHECK: movs r3, r3, lsr #1
+; CHECK-NEXT: mov r2, r2, rrx
+; CHECK-NEXT: subs r0, r0, r2
+; CHECK-NEXT: sbc r1, r1, r3
+ %tmp = bitcast i64 %A to i64
+ %tmp2 = lshr i64 %B, 1
+ %tmp3 = sub i64 %tmp, %tmp2
+ ret i64 %tmp3
+}
+
+define i32 @f1(i64 %x, i64 %y) {
+; CHECK: f1
+; CHECK: mov r0, r0, lsl r2
+ %a = shl i64 %x, %y
+ %b = trunc i64 %a to i32
+ ret i32 %b
+}
+
+define i32 @f2(i64 %x, i64 %y) {
+; CHECK: f2
+; CHECK: mov r0, r0, lsr r2
+; CHECK-NEXT: rsb r12, r2, #32
+; CHECK-NEXT: sub r2, r2, #32
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: orr r0, r0, r1, lsl r12
+; CHECK-NEXT: movge r0, r1, asr r2
+ %a = ashr i64 %x, %y
+ %b = trunc i64 %a to i32
+ ret i32 %b
+}
+
+define i32 @f3(i64 %x, i64 %y) {
+; CHECK: f3
+; CHECK: mov r0, r0, lsr r2
+; CHECK-NEXT: rsb r12, r2, #32
+; CHECK-NEXT: sub r2, r2, #32
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: orr r0, r0, r1, lsl r12
+; CHECK-NEXT: movge r0, r1, lsr r2
+ %a = lshr i64 %x, %y
+ %b = trunc i64 %a to i32
+ ret i32 %b
+}
diff --git a/test/CodeGen/ARM/lsr-code-insertion.ll b/test/CodeGen/ARM/lsr-code-insertion.ll
new file mode 100644
index 0000000..507ec2c
--- /dev/null
+++ b/test/CodeGen/ARM/lsr-code-insertion.ll
@@ -0,0 +1,60 @@
+; RUN: llc < %s -stats |& grep {40.*Number of machine instrs printed}
+; RUN: llc < %s -stats |& grep {.*Number of re-materialization}
+; This test really wants to check that the resultant "cond_true" block only
+; has a single store in it, and that cond_true55 only has code to materialize
+; the constant and do a store. We do *not* want something like this:
+;
+;LBB1_3: @cond_true
+; add r8, r0, r6
+; str r10, [r8, #+4]
+;
+target triple = "arm-apple-darwin8"
+
+define void @foo(i32* %mc, i32* %mpp, i32* %ip, i32* %dpp, i32* %tpmm, i32 %M, i32* %tpim, i32* %tpdm, i32* %bp, i32* %ms, i32 %xmb) {
+entry:
+ %tmp6584 = icmp slt i32 %M, 1 ; <i1> [#uses=1]
+ br i1 %tmp6584, label %return, label %bb
+
+bb: ; preds = %cond_next59, %entry
+ %indvar = phi i32 [ 0, %entry ], [ %k.069.0, %cond_next59 ] ; <i32> [#uses=6]
+ %k.069.0 = add i32 %indvar, 1 ; <i32> [#uses=3]
+ %tmp3 = getelementptr i32* %mpp, i32 %indvar ; <i32*> [#uses=1]
+ %tmp4 = load i32* %tmp3 ; <i32> [#uses=1]
+ %tmp8 = getelementptr i32* %tpmm, i32 %indvar ; <i32*> [#uses=1]
+ %tmp9 = load i32* %tmp8 ; <i32> [#uses=1]
+ %tmp10 = add i32 %tmp9, %tmp4 ; <i32> [#uses=2]
+ %tmp13 = getelementptr i32* %mc, i32 %k.069.0 ; <i32*> [#uses=5]
+ store i32 %tmp10, i32* %tmp13
+ %tmp17 = getelementptr i32* %ip, i32 %indvar ; <i32*> [#uses=1]
+ %tmp18 = load i32* %tmp17 ; <i32> [#uses=1]
+ %tmp22 = getelementptr i32* %tpim, i32 %indvar ; <i32*> [#uses=1]
+ %tmp23 = load i32* %tmp22 ; <i32> [#uses=1]
+ %tmp24 = add i32 %tmp23, %tmp18 ; <i32> [#uses=2]
+ %tmp30 = icmp sgt i32 %tmp24, %tmp10 ; <i1> [#uses=1]
+ br i1 %tmp30, label %cond_true, label %cond_next
+
+cond_true: ; preds = %bb
+ store i32 %tmp24, i32* %tmp13
+ br label %cond_next
+
+cond_next: ; preds = %cond_true, %bb
+ %tmp39 = load i32* %tmp13 ; <i32> [#uses=1]
+ %tmp42 = getelementptr i32* %ms, i32 %k.069.0 ; <i32*> [#uses=1]
+ %tmp43 = load i32* %tmp42 ; <i32> [#uses=1]
+ %tmp44 = add i32 %tmp43, %tmp39 ; <i32> [#uses=2]
+ store i32 %tmp44, i32* %tmp13
+ %tmp52 = icmp slt i32 %tmp44, -987654321 ; <i1> [#uses=1]
+ br i1 %tmp52, label %cond_true55, label %cond_next59
+
+cond_true55: ; preds = %cond_next
+ store i32 -987654321, i32* %tmp13
+ br label %cond_next59
+
+cond_next59: ; preds = %cond_true55, %cond_next
+ %tmp61 = add i32 %indvar, 2 ; <i32> [#uses=1]
+ %tmp65 = icmp sgt i32 %tmp61, %M ; <i1> [#uses=1]
+ br i1 %tmp65, label %return, label %bb
+
+return: ; preds = %cond_next59, %entry
+ ret void
+}
diff --git a/test/CodeGen/ARM/lsr-scale-addr-mode.ll b/test/CodeGen/ARM/lsr-scale-addr-mode.ll
new file mode 100644
index 0000000..8130019
--- /dev/null
+++ b/test/CodeGen/ARM/lsr-scale-addr-mode.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=arm | grep lsl | grep -F {lsl #2\]}
+; Should use scaled addressing mode.
+
+define void @sintzero(i32* %a) nounwind {
+entry:
+ store i32 0, i32* %a
+ br label %cond_next
+
+cond_next: ; preds = %cond_next, %entry
+ %indvar = phi i32 [ 0, %entry ], [ %tmp25, %cond_next ] ; <i32> [#uses=1]
+ %tmp25 = add i32 %indvar, 1 ; <i32> [#uses=3]
+ %tmp36 = getelementptr i32* %a, i32 %tmp25 ; <i32*> [#uses=1]
+ store i32 0, i32* %tmp36
+ icmp eq i32 %tmp25, -1 ; <i1>:0 [#uses=1]
+ br i1 %0, label %return, label %cond_next
+
+return: ; preds = %cond_next
+ ret void
+}
diff --git a/test/CodeGen/ARM/mem.ll b/test/CodeGen/ARM/mem.ll
new file mode 100644
index 0000000..f46c7a5
--- /dev/null
+++ b/test/CodeGen/ARM/mem.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=arm | grep strb
+; RUN: llc < %s -march=arm | grep strh
+
+define void @f1() {
+entry:
+ store i8 0, i8* null
+ ret void
+}
+
+define void @f2() {
+entry:
+ store i16 0, i16* null
+ ret void
+}
diff --git a/test/CodeGen/ARM/memcpy-inline.ll b/test/CodeGen/ARM/memcpy-inline.ll
new file mode 100644
index 0000000..ed20c32
--- /dev/null
+++ b/test/CodeGen/ARM/memcpy-inline.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin | grep ldmia
+; RUN: llc < %s -mtriple=arm-apple-darwin | grep stmia
+; RUN: llc < %s -mtriple=arm-apple-darwin | grep ldrb
+; RUN: llc < %s -mtriple=arm-apple-darwin | grep ldrh
+
+ %struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
+@src = external global %struct.x
+@dst = external global %struct.x
+
+define i32 @t() {
+entry:
+ call void @llvm.memcpy.i32( i8* getelementptr (%struct.x* @dst, i32 0, i32 0), i8* getelementptr (%struct.x* @src, i32 0, i32 0), i32 11, i32 8 )
+ ret i32 0
+}
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
diff --git a/test/CodeGen/ARM/memfunc.ll b/test/CodeGen/ARM/memfunc.ll
new file mode 100644
index 0000000..41d5944
--- /dev/null
+++ b/test/CodeGen/ARM/memfunc.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=arm
+
+define void @f() {
+entry:
+ call void @llvm.memmove.i32( i8* null, i8* null, i32 64, i32 0 )
+ call void @llvm.memcpy.i32( i8* null, i8* null, i32 64, i32 0 )
+ call void @llvm.memset.i32( i8* null, i8 64, i32 0, i32 0 )
+ unreachable
+}
+
+declare void @llvm.memmove.i32(i8*, i8*, i32, i32)
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+
+declare void @llvm.memset.i32(i8*, i8, i32, i32)
+
diff --git a/test/CodeGen/ARM/mls.ll b/test/CodeGen/ARM/mls.ll
new file mode 100644
index 0000000..a6cdba4
--- /dev/null
+++ b/test/CodeGen/ARM/mls.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=arm -mattr=+v6t2 | FileCheck %s
+
+define i32 @f1(i32 %a, i32 %b, i32 %c) {
+ %tmp1 = mul i32 %a, %b
+ %tmp2 = sub i32 %c, %tmp1
+ ret i32 %tmp2
+}
+
+; sub doesn't commute, so no mls for this one
+define i32 @f2(i32 %a, i32 %b, i32 %c) {
+ %tmp1 = mul i32 %a, %b
+ %tmp2 = sub i32 %tmp1, %c
+ ret i32 %tmp2
+}
+
+; CHECK: mls r0, r0, r1, r2
diff --git a/test/CodeGen/ARM/movt-movw-global.ll b/test/CodeGen/ARM/movt-movw-global.ll
new file mode 100644
index 0000000..886ff3f
--- /dev/null
+++ b/test/CodeGen/ARM/movt-movw-global.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-eabi"
+
+@foo = common global i32 0 ; <i32*> [#uses=1]
+
+define arm_aapcs_vfpcc i32* @bar1() nounwind readnone {
+entry:
+; CHECK: movw r0, :lower16:foo
+; CHECK-NEXT: movt r0, :upper16:foo
+ ret i32* @foo
+}
+
+define arm_aapcs_vfpcc void @bar2(i32 %baz) nounwind {
+entry:
+; CHECK: movw r1, :lower16:foo
+; CHECK-NEXT: movt r1, :upper16:foo
+ store i32 %baz, i32* @foo, align 4
+ ret void
+}
diff --git a/test/CodeGen/ARM/movt.ll b/test/CodeGen/ARM/movt.ll
new file mode 100644
index 0000000..e82aca0
--- /dev/null
+++ b/test/CodeGen/ARM/movt.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=arm -mattr=+thumb2 | FileCheck %s
+; rdar://7317664
+
+define i32 @t(i32 %X) nounwind {
+; CHECK: t:
+; CHECK: movt r0, #65535
+entry:
+ %0 = or i32 %X, -65536
+ ret i32 %0
+}
+
+define i32 @t2(i32 %X) nounwind {
+; CHECK: t2:
+; CHECK: movt r0, #65534
+entry:
+ %0 = or i32 %X, -131072
+ %1 = and i32 %0, -65537
+ ret i32 %1
+}
diff --git a/test/CodeGen/ARM/mul.ll b/test/CodeGen/ARM/mul.ll
new file mode 100644
index 0000000..466a802
--- /dev/null
+++ b/test/CodeGen/ARM/mul.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=arm | grep mul | count 2
+; RUN: llc < %s -march=arm | grep lsl | count 2
+
+define i32 @f1(i32 %u) {
+ %tmp = mul i32 %u, %u
+ ret i32 %tmp
+}
+
+define i32 @f2(i32 %u, i32 %v) {
+ %tmp = mul i32 %u, %v
+ ret i32 %tmp
+}
+
+define i32 @f3(i32 %u) {
+ %tmp = mul i32 %u, 5
+ ret i32 %tmp
+}
+
+define i32 @f4(i32 %u) {
+ %tmp = mul i32 %u, 4
+ ret i32 %tmp
+}
diff --git a/test/CodeGen/ARM/mul_const.ll b/test/CodeGen/ARM/mul_const.ll
new file mode 100644
index 0000000..93188cd
--- /dev/null
+++ b/test/CodeGen/ARM/mul_const.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+
+define i32 @t1(i32 %v) nounwind readnone {
+entry:
+; CHECK: t1:
+; CHECK: add r0, r0, r0, lsl #3
+ %0 = mul i32 %v, 9
+ ret i32 %0
+}
+
+define i32 @t2(i32 %v) nounwind readnone {
+entry:
+; CHECK: t2:
+; CHECK: rsb r0, r0, r0, lsl #3
+ %0 = mul i32 %v, 7
+ ret i32 %0
+}
diff --git a/test/CodeGen/ARM/mulhi.ll b/test/CodeGen/ARM/mulhi.ll
new file mode 100644
index 0000000..148f291
--- /dev/null
+++ b/test/CodeGen/ARM/mulhi.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=arm -mattr=+v6
+; RUN: llc < %s -march=arm -mattr=+v6 | \
+; RUN: grep smmul | count 1
+; RUN: llc < %s -march=arm | grep umull | count 1
+
+define i32 @smulhi(i32 %x, i32 %y) {
+ %tmp = sext i32 %x to i64 ; <i64> [#uses=1]
+ %tmp1 = sext i32 %y to i64 ; <i64> [#uses=1]
+ %tmp2 = mul i64 %tmp1, %tmp ; <i64> [#uses=1]
+ %tmp3 = lshr i64 %tmp2, 32 ; <i64> [#uses=1]
+ %tmp3.upgrd.1 = trunc i64 %tmp3 to i32 ; <i32> [#uses=1]
+ ret i32 %tmp3.upgrd.1
+}
+
+define i32 @umulhi(i32 %x, i32 %y) {
+ %tmp = zext i32 %x to i64 ; <i64> [#uses=1]
+ %tmp1 = zext i32 %y to i64 ; <i64> [#uses=1]
+ %tmp2 = mul i64 %tmp1, %tmp ; <i64> [#uses=1]
+ %tmp3 = lshr i64 %tmp2, 32 ; <i64> [#uses=1]
+ %tmp3.upgrd.2 = trunc i64 %tmp3 to i32 ; <i32> [#uses=1]
+ ret i32 %tmp3.upgrd.2
+}
diff --git a/test/CodeGen/ARM/mvn.ll b/test/CodeGen/ARM/mvn.ll
new file mode 100644
index 0000000..571c21a
--- /dev/null
+++ b/test/CodeGen/ARM/mvn.ll
@@ -0,0 +1,74 @@
+; RUN: llc < %s -march=arm | grep mvn | count 8
+
+define i32 @f1() {
+entry:
+ ret i32 -1
+}
+
+define i32 @f2(i32 %a) {
+entry:
+ %tmpnot = xor i32 %a, -1 ; <i32> [#uses=1]
+ ret i32 %tmpnot
+}
+
+define i32 @f3(i32 %a) {
+entry:
+ %tmp1 = shl i32 %a, 2 ; <i32> [#uses=1]
+ %tmp1not = xor i32 %tmp1, -1 ; <i32> [#uses=1]
+ ret i32 %tmp1not
+}
+
+define i32 @f4(i32 %a, i8 %b) {
+entry:
+ %shift.upgrd.1 = zext i8 %b to i32 ; <i32> [#uses=1]
+ %tmp3 = shl i32 %a, %shift.upgrd.1 ; <i32> [#uses=1]
+ %tmp3not = xor i32 %tmp3, -1 ; <i32> [#uses=1]
+ ret i32 %tmp3not
+}
+
+define i32 @f5(i32 %a) {
+entry:
+ %tmp1 = lshr i32 %a, 2 ; <i32> [#uses=1]
+ %tmp1not = xor i32 %tmp1, -1 ; <i32> [#uses=1]
+ ret i32 %tmp1not
+}
+
+define i32 @f6(i32 %a, i8 %b) {
+entry:
+ %shift.upgrd.2 = zext i8 %b to i32 ; <i32> [#uses=1]
+ %tmp2 = lshr i32 %a, %shift.upgrd.2 ; <i32> [#uses=1]
+ %tmp2not = xor i32 %tmp2, -1 ; <i32> [#uses=1]
+ ret i32 %tmp2not
+}
+
+define i32 @f7(i32 %a) {
+entry:
+ %tmp1 = ashr i32 %a, 2 ; <i32> [#uses=1]
+ %tmp1not = xor i32 %tmp1, -1 ; <i32> [#uses=1]
+ ret i32 %tmp1not
+}
+
+define i32 @f8(i32 %a, i8 %b) {
+entry:
+ %shift.upgrd.3 = zext i8 %b to i32 ; <i32> [#uses=1]
+ %tmp3 = ashr i32 %a, %shift.upgrd.3 ; <i32> [#uses=1]
+ %tmp3not = xor i32 %tmp3, -1 ; <i32> [#uses=1]
+ ret i32 %tmp3not
+}
+
+define i32 @f9() {
+entry:
+ %tmp4845 = add i32 0, 0 ; <i32> [#uses=1]
+ br label %cond_true4848
+
+cond_true4848: ; preds = %entry
+ %tmp4851 = sub i32 -3, 0 ; <i32> [#uses=1]
+ %abc = add i32 %tmp4851, %tmp4845 ; <i32> [#uses=1]
+ ret i32 %abc
+}
+
+define i1 @f10(i32 %a) {
+entry:
+ %tmp102 = icmp eq i32 -2, %a ; <i1> [#uses=1]
+ ret i1 %tmp102
+}
diff --git a/test/CodeGen/ARM/neon_arith1.ll b/test/CodeGen/ARM/neon_arith1.ll
new file mode 100644
index 0000000..5892737
--- /dev/null
+++ b/test/CodeGen/ARM/neon_arith1.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=arm -mattr=+neon | grep vadd
+
+define <8 x i8> @t_i8x8(<8 x i8> %a, <8 x i8> %b) nounwind {
+entry:
+ %0 = add <8 x i8> %a, %b
+ ret <8 x i8> %0
+}
diff --git a/test/CodeGen/ARM/neon_ld1.ll b/test/CodeGen/ARM/neon_ld1.ll
new file mode 100644
index 0000000..c78872a
--- /dev/null
+++ b/test/CodeGen/ARM/neon_ld1.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=arm -mattr=+neon | grep vldr.64 | count 4
+; RUN: llc < %s -march=arm -mattr=+neon | grep vstr.64
+; RUN: llc < %s -march=arm -mattr=+neon | grep vmov
+
+define void @t1(<2 x i32>* %r, <4 x i16>* %a, <4 x i16>* %b) nounwind {
+entry:
+ %0 = load <4 x i16>* %a, align 8 ; <<4 x i16>> [#uses=1]
+ %1 = load <4 x i16>* %b, align 8 ; <<4 x i16>> [#uses=1]
+ %2 = add <4 x i16> %0, %1 ; <<4 x i16>> [#uses=1]
+ %3 = bitcast <4 x i16> %2 to <2 x i32> ; <<2 x i32>> [#uses=1]
+ store <2 x i32> %3, <2 x i32>* %r, align 8
+ ret void
+}
+
+define <2 x i32> @t2(<4 x i16>* %a, <4 x i16>* %b) nounwind readonly {
+entry:
+ %0 = load <4 x i16>* %a, align 8 ; <<4 x i16>> [#uses=1]
+ %1 = load <4 x i16>* %b, align 8 ; <<4 x i16>> [#uses=1]
+ %2 = sub <4 x i16> %0, %1 ; <<4 x i16>> [#uses=1]
+ %3 = bitcast <4 x i16> %2 to <2 x i32> ; <<2 x i32>> [#uses=1]
+ ret <2 x i32> %3
+}
diff --git a/test/CodeGen/ARM/neon_ld2.ll b/test/CodeGen/ARM/neon_ld2.ll
new file mode 100644
index 0000000..130277b
--- /dev/null
+++ b/test/CodeGen/ARM/neon_ld2.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=arm -mattr=+neon | grep vldmia | count 4
+; RUN: llc < %s -march=arm -mattr=+neon | grep vstmia | count 1
+; RUN: llc < %s -march=arm -mattr=+neon | grep vmov | count 2
+
+define void @t1(<4 x i32>* %r, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+entry:
+ %0 = load <2 x i64>* %a, align 16 ; <<2 x i64>> [#uses=1]
+ %1 = load <2 x i64>* %b, align 16 ; <<2 x i64>> [#uses=1]
+ %2 = add <2 x i64> %0, %1 ; <<2 x i64>> [#uses=1]
+ %3 = bitcast <2 x i64> %2 to <4 x i32> ; <<4 x i32>> [#uses=1]
+ store <4 x i32> %3, <4 x i32>* %r, align 16
+ ret void
+}
+
+define <4 x i32> @t2(<2 x i64>* %a, <2 x i64>* %b) nounwind readonly {
+entry:
+ %0 = load <2 x i64>* %a, align 16 ; <<2 x i64>> [#uses=1]
+ %1 = load <2 x i64>* %b, align 16 ; <<2 x i64>> [#uses=1]
+ %2 = sub <2 x i64> %0, %1 ; <<2 x i64>> [#uses=1]
+ %3 = bitcast <2 x i64> %2 to <4 x i32> ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %3
+}
+
diff --git a/test/CodeGen/ARM/pack.ll b/test/CodeGen/ARM/pack.ll
new file mode 100644
index 0000000..1e2e7aa
--- /dev/null
+++ b/test/CodeGen/ARM/pack.ll
@@ -0,0 +1,73 @@
+; RUN: llc < %s -march=arm -mattr=+v6 | \
+; RUN: grep pkhbt | count 5
+; RUN: llc < %s -march=arm -mattr=+v6 | \
+; RUN: grep pkhtb | count 4
+
+define i32 @test1(i32 %X, i32 %Y) {
+ %tmp1 = and i32 %X, 65535 ; <i32> [#uses=1]
+ %tmp4 = shl i32 %Y, 16 ; <i32> [#uses=1]
+ %tmp5 = or i32 %tmp4, %tmp1 ; <i32> [#uses=1]
+ ret i32 %tmp5
+}
+
+define i32 @test1a(i32 %X, i32 %Y) {
+ %tmp19 = and i32 %X, 65535 ; <i32> [#uses=1]
+ %tmp37 = shl i32 %Y, 16 ; <i32> [#uses=1]
+ %tmp5 = or i32 %tmp37, %tmp19 ; <i32> [#uses=1]
+ ret i32 %tmp5
+}
+
+define i32 @test2(i32 %X, i32 %Y) {
+ %tmp1 = and i32 %X, 65535 ; <i32> [#uses=1]
+ %tmp3 = shl i32 %Y, 12 ; <i32> [#uses=1]
+ %tmp4 = and i32 %tmp3, -65536 ; <i32> [#uses=1]
+ %tmp57 = or i32 %tmp4, %tmp1 ; <i32> [#uses=1]
+ ret i32 %tmp57
+}
+
+define i32 @test3(i32 %X, i32 %Y) {
+ %tmp19 = and i32 %X, 65535 ; <i32> [#uses=1]
+ %tmp37 = shl i32 %Y, 18 ; <i32> [#uses=1]
+ %tmp5 = or i32 %tmp37, %tmp19 ; <i32> [#uses=1]
+ ret i32 %tmp5
+}
+
+define i32 @test4(i32 %X, i32 %Y) {
+ %tmp1 = and i32 %X, 65535 ; <i32> [#uses=1]
+ %tmp3 = and i32 %Y, -65536 ; <i32> [#uses=1]
+ %tmp46 = or i32 %tmp3, %tmp1 ; <i32> [#uses=1]
+ ret i32 %tmp46
+}
+
+define i32 @test5(i32 %X, i32 %Y) {
+ %tmp17 = and i32 %X, -65536 ; <i32> [#uses=1]
+ %tmp2 = bitcast i32 %Y to i32 ; <i32> [#uses=1]
+ %tmp4 = lshr i32 %tmp2, 16 ; <i32> [#uses=2]
+ %tmp5 = or i32 %tmp4, %tmp17 ; <i32> [#uses=1]
+ ret i32 %tmp5
+}
+
+define i32 @test5a(i32 %X, i32 %Y) {
+ %tmp110 = and i32 %X, -65536 ; <i32> [#uses=1]
+ %tmp37 = lshr i32 %Y, 16 ; <i32> [#uses=1]
+ %tmp39 = bitcast i32 %tmp37 to i32 ; <i32> [#uses=1]
+ %tmp5 = or i32 %tmp39, %tmp110 ; <i32> [#uses=1]
+ ret i32 %tmp5
+}
+
+define i32 @test6(i32 %X, i32 %Y) {
+ %tmp1 = and i32 %X, -65536 ; <i32> [#uses=1]
+ %tmp37 = lshr i32 %Y, 12 ; <i32> [#uses=1]
+ %tmp38 = bitcast i32 %tmp37 to i32 ; <i32> [#uses=1]
+ %tmp4 = and i32 %tmp38, 65535 ; <i32> [#uses=1]
+ %tmp59 = or i32 %tmp4, %tmp1 ; <i32> [#uses=1]
+ ret i32 %tmp59
+}
+
+define i32 @test7(i32 %X, i32 %Y) {
+ %tmp1 = and i32 %X, -65536 ; <i32> [#uses=1]
+ %tmp3 = ashr i32 %Y, 18 ; <i32> [#uses=1]
+ %tmp4 = and i32 %tmp3, 65535 ; <i32> [#uses=1]
+ %tmp57 = or i32 %tmp4, %tmp1 ; <i32> [#uses=1]
+ ret i32 %tmp57
+}
diff --git a/test/CodeGen/ARM/pr3502.ll b/test/CodeGen/ARM/pr3502.ll
new file mode 100644
index 0000000..606d969
--- /dev/null
+++ b/test/CodeGen/ARM/pr3502.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=arm-none-linux-gnueabi
+;pr3502
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+ %struct.ArmPTD = type { i32 }
+ %struct.RegisterSave = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+ %struct.SHARED_AREA = type { i32, %struct.SHARED_AREA*, %struct.SHARED_AREA*, %struct.SHARED_AREA*, %struct.ArmPTD, void (%struct.RegisterSave*)*, void (%struct.RegisterSave*)*, i32, [1024 x i8], i32, i32, i32, i32, i32, i8, i8, i16, i32, i32, i32, i32, [16 x i8], i32, i32, i32, i8, i8, i8, i32, i16, i32, i64, i32, i32, i32, i32, i32, i32, i8*, i32, [256 x i8], i32, i32, i32, [20 x i8], %struct.RegisterSave, { %struct.WorldSwitchV5 }, [4 x i32] }
+ %struct.WorldSwitchV5 = type { i32, i32, i32, i32, i32, i32, i32 }
+
+define void @SomeCall(i32 %num) nounwind {
+entry:
+ tail call void asm sideeffect "mcr p15, 0, $0, c7, c10, 4 \0A\09", "r,~{memory}"(i32 0) nounwind
+ tail call void asm sideeffect "mcr p15,0,$0,c7,c14,0", "r,~{memory}"(i32 0) nounwind
+ %0 = load %struct.SHARED_AREA** null, align 4 ; <%struct.SHARED_AREA*> [#uses=1]
+ %1 = ptrtoint %struct.SHARED_AREA* %0 to i32 ; <i32> [#uses=1]
+ %2 = lshr i32 %1, 20 ; <i32> [#uses=1]
+ %3 = tail call i32 @SetCurrEntry(i32 %2, i32 0) nounwind ; <i32> [#uses=0]
+ tail call void @ClearStuff(i32 0) nounwind
+ ret void
+}
+
+declare i32 @SetCurrEntry(i32, i32)
+
+declare void @ClearStuff(i32)
diff --git a/test/CodeGen/ARM/private.ll b/test/CodeGen/ARM/private.ll
new file mode 100644
index 0000000..fba56b4
--- /dev/null
+++ b/test/CodeGen/ARM/private.ll
@@ -0,0 +1,22 @@
+; Test to make sure that the 'private' is used correctly.
+;
+; RUN: llc < %s -mtriple=arm-linux-gnueabi > %t
+; RUN: grep .Lfoo: %t
+; RUN: egrep bl.*\.Lfoo %t
+; RUN: grep .Lbaz: %t
+; RUN: grep long.*\.Lbaz %t
+
+declare void @foo()
+
+define private void @foo() {
+ ret void
+}
+
+@baz = private global i32 4
+
+define i32 @bar() {
+ call void @foo()
+ %1 = load i32* @baz, align 4
+ ret i32 %1
+}
+
diff --git a/test/CodeGen/ARM/remat.ll b/test/CodeGen/ARM/remat.ll
new file mode 100644
index 0000000..367f782
--- /dev/null
+++ b/test/CodeGen/ARM/remat.ll
@@ -0,0 +1,65 @@
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 -stats -info-output-file - | grep "Number of re-materialization"
+
+define arm_apcscc i32 @main(i32 %argc, i8** nocapture %argv, double %d1, double %d2) nounwind {
+entry:
+ br i1 undef, label %smvp.exit, label %bb.i3
+
+bb.i3: ; preds = %bb.i3, %bb134
+ br i1 undef, label %smvp.exit, label %bb.i3
+
+smvp.exit: ; preds = %bb.i3
+ %0 = fmul double %d1, 2.400000e-03 ; <double> [#uses=2]
+ br i1 undef, label %bb138.preheader, label %bb159
+
+bb138.preheader: ; preds = %smvp.exit
+ br label %bb138
+
+bb138: ; preds = %bb138, %bb138.preheader
+ br i1 undef, label %bb138, label %bb145.loopexit
+
+bb142: ; preds = %bb.nph218.bb.nph218.split_crit_edge, %phi0.exit
+ %1 = fmul double %d1, -1.200000e-03 ; <double> [#uses=1]
+ %2 = fadd double %d2, %1 ; <double> [#uses=1]
+ %3 = fmul double %2, %d2 ; <double> [#uses=1]
+ %4 = fsub double 0.000000e+00, %3 ; <double> [#uses=1]
+ br i1 %14, label %phi1.exit, label %bb.i35
+
+bb.i35: ; preds = %bb142
+ %5 = call arm_apcscc double @sin(double %15) nounwind readonly ; <double> [#uses=1]
+ %6 = fmul double %5, 0x4031740AFA84AD8A ; <double> [#uses=1]
+ %7 = fsub double 1.000000e+00, undef ; <double> [#uses=1]
+ %8 = fdiv double %7, 6.000000e-01 ; <double> [#uses=1]
+ br label %phi1.exit
+
+phi1.exit: ; preds = %bb.i35, %bb142
+ %.pn = phi double [ %6, %bb.i35 ], [ 0.000000e+00, %bb142 ] ; <double> [#uses=0]
+ %9 = phi double [ %8, %bb.i35 ], [ 0.000000e+00, %bb142 ] ; <double> [#uses=1]
+ %10 = fmul double undef, %9 ; <double> [#uses=0]
+ br i1 %14, label %phi0.exit, label %bb.i
+
+bb.i: ; preds = %phi1.exit
+ unreachable
+
+phi0.exit: ; preds = %phi1.exit
+ %11 = fsub double %4, undef ; <double> [#uses=1]
+ %12 = fadd double 0.000000e+00, %11 ; <double> [#uses=1]
+ store double %12, double* undef, align 4
+ br label %bb142
+
+bb145.loopexit: ; preds = %bb138
+ br i1 undef, label %bb.nph218.bb.nph218.split_crit_edge, label %bb159
+
+bb.nph218.bb.nph218.split_crit_edge: ; preds = %bb145.loopexit
+ %13 = fmul double %0, 0x401921FB54442D18 ; <double> [#uses=1]
+ %14 = fcmp ugt double %0, 6.000000e-01 ; <i1> [#uses=2]
+ %15 = fdiv double %13, 6.000000e-01 ; <double> [#uses=1]
+ br label %bb142
+
+bb159: ; preds = %bb145.loopexit, %smvp.exit, %bb134
+ unreachable
+
+bb166: ; preds = %bb127
+ unreachable
+}
+
+declare arm_apcscc double @sin(double) nounwind readonly
diff --git a/test/CodeGen/ARM/ret0.ll b/test/CodeGen/ARM/ret0.ll
new file mode 100644
index 0000000..5c312eb
--- /dev/null
+++ b/test/CodeGen/ARM/ret0.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=arm
+
+define i32 @test() {
+ ret i32 0
+}
diff --git a/test/CodeGen/ARM/ret_arg1.ll b/test/CodeGen/ARM/ret_arg1.ll
new file mode 100644
index 0000000..1ab947b
--- /dev/null
+++ b/test/CodeGen/ARM/ret_arg1.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=arm
+
+define i32 @test(i32 %a1) {
+ ret i32 %a1
+}
diff --git a/test/CodeGen/ARM/ret_arg2.ll b/test/CodeGen/ARM/ret_arg2.ll
new file mode 100644
index 0000000..84477d0
--- /dev/null
+++ b/test/CodeGen/ARM/ret_arg2.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm
+
+define i32 @test(i32 %a1, i32 %a2) {
+ ret i32 %a2
+}
+
diff --git a/test/CodeGen/ARM/ret_arg3.ll b/test/CodeGen/ARM/ret_arg3.ll
new file mode 100644
index 0000000..f7f9057
--- /dev/null
+++ b/test/CodeGen/ARM/ret_arg3.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=arm
+define i32 @test(i32 %a1, i32 %a2, i32 %a3) {
+ ret i32 %a3
+}
+
diff --git a/test/CodeGen/ARM/ret_arg4.ll b/test/CodeGen/ARM/ret_arg4.ll
new file mode 100644
index 0000000..f7b3e4a
--- /dev/null
+++ b/test/CodeGen/ARM/ret_arg4.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=arm
+
+define i32 @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+ ret i32 %a4
+}
diff --git a/test/CodeGen/ARM/ret_arg5.ll b/test/CodeGen/ARM/ret_arg5.ll
new file mode 100644
index 0000000..c4f9fb5e
--- /dev/null
+++ b/test/CodeGen/ARM/ret_arg5.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=arm
+
+define i32 @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5) {
+ ret i32 %a5
+}
diff --git a/test/CodeGen/ARM/ret_f32_arg2.ll b/test/CodeGen/ARM/ret_f32_arg2.ll
new file mode 100644
index 0000000..2bafea6
--- /dev/null
+++ b/test/CodeGen/ARM/ret_f32_arg2.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2
+
+define float @test_f32(float %a1, float %a2) {
+ ret float %a2
+}
+
diff --git a/test/CodeGen/ARM/ret_f32_arg5.ll b/test/CodeGen/ARM/ret_f32_arg5.ll
new file mode 100644
index 0000000..c6ce60e
--- /dev/null
+++ b/test/CodeGen/ARM/ret_f32_arg5.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2
+
+define float @test_f32_arg5(float %a1, float %a2, float %a3, float %a4, float %a5) {
+ ret float %a5
+}
+
diff --git a/test/CodeGen/ARM/ret_f64_arg2.ll b/test/CodeGen/ARM/ret_f64_arg2.ll
new file mode 100644
index 0000000..386e85f
--- /dev/null
+++ b/test/CodeGen/ARM/ret_f64_arg2.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2
+
+define double @test_f64(double %a1, double %a2) {
+ ret double %a2
+}
+
diff --git a/test/CodeGen/ARM/ret_f64_arg_reg_split.ll b/test/CodeGen/ARM/ret_f64_arg_reg_split.ll
new file mode 100644
index 0000000..bdb0a60
--- /dev/null
+++ b/test/CodeGen/ARM/ret_f64_arg_reg_split.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm -mcpu=arm8 -mattr=+vfp2
+
+define double @test_double_arg_reg_split(i32 %a1, double %a2) {
+ ret double %a2
+}
+
diff --git a/test/CodeGen/ARM/ret_f64_arg_split.ll b/test/CodeGen/ARM/ret_f64_arg_split.ll
new file mode 100644
index 0000000..4f841a3
--- /dev/null
+++ b/test/CodeGen/ARM/ret_f64_arg_split.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2
+
+define double @test_double_arg_split(i64 %a1, i32 %a2, double %a3) {
+ ret double %a3
+}
+
diff --git a/test/CodeGen/ARM/ret_f64_arg_stack.ll b/test/CodeGen/ARM/ret_f64_arg_stack.ll
new file mode 100644
index 0000000..2144317
--- /dev/null
+++ b/test/CodeGen/ARM/ret_f64_arg_stack.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2
+
+define double @test_double_arg_stack(i64 %a1, i32 %a2, i32 %a3, double %a4) {
+ ret double %a4
+}
+
diff --git a/test/CodeGen/ARM/ret_i128_arg2.ll b/test/CodeGen/ARM/ret_i128_arg2.ll
new file mode 100644
index 0000000..908c34f
--- /dev/null
+++ b/test/CodeGen/ARM/ret_i128_arg2.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2
+
+define i128 @test_i128(i128 %a1, i128 %a2, i128 %a3) {
+ ret i128 %a3
+}
+
diff --git a/test/CodeGen/ARM/ret_i64_arg2.ll b/test/CodeGen/ARM/ret_i64_arg2.ll
new file mode 100644
index 0000000..b1a1024
--- /dev/null
+++ b/test/CodeGen/ARM/ret_i64_arg2.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2
+
+define i64 @test_i64(i64 %a1, i64 %a2) {
+ ret i64 %a2
+}
+
diff --git a/test/CodeGen/ARM/ret_i64_arg3.ll b/test/CodeGen/ARM/ret_i64_arg3.ll
new file mode 100644
index 0000000..ffc1d2f
--- /dev/null
+++ b/test/CodeGen/ARM/ret_i64_arg3.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2
+
+define i64 @test_i64_arg3(i64 %a1, i64 %a2, i64 %a3) {
+ ret i64 %a3
+}
+
diff --git a/test/CodeGen/ARM/ret_i64_arg_split.ll b/test/CodeGen/ARM/ret_i64_arg_split.ll
new file mode 100644
index 0000000..956bce5
--- /dev/null
+++ b/test/CodeGen/ARM/ret_i64_arg_split.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2
+
+define i64 @test_i64_arg_split(i64 %a1, i32 %a2, i64 %a3) {
+ ret i64 %a3
+}
+
diff --git a/test/CodeGen/ARM/ret_void.ll b/test/CodeGen/ARM/ret_void.ll
new file mode 100644
index 0000000..2b7ae05
--- /dev/null
+++ b/test/CodeGen/ARM/ret_void.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm
+
+define void @test() {
+ ret void
+}
+
diff --git a/test/CodeGen/ARM/rev.ll b/test/CodeGen/ARM/rev.ll
new file mode 100644
index 0000000..1c12268
--- /dev/null
+++ b/test/CodeGen/ARM/rev.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=arm -mattr=+v6 | grep rev16
+; RUN: llc < %s -march=arm -mattr=+v6 | grep revsh
+
+define i32 @test1(i32 %X) {
+ %tmp1 = lshr i32 %X, 8 ; <i32> [#uses=3]
+ %X15 = bitcast i32 %X to i32 ; <i32> [#uses=1]
+ %tmp4 = shl i32 %X15, 8 ; <i32> [#uses=2]
+ %tmp2 = and i32 %tmp1, 16711680 ; <i32> [#uses=1]
+ %tmp5 = and i32 %tmp4, -16777216 ; <i32> [#uses=1]
+ %tmp9 = and i32 %tmp1, 255 ; <i32> [#uses=1]
+ %tmp13 = and i32 %tmp4, 65280 ; <i32> [#uses=1]
+ %tmp6 = or i32 %tmp5, %tmp2 ; <i32> [#uses=1]
+ %tmp10 = or i32 %tmp6, %tmp13 ; <i32> [#uses=1]
+ %tmp14 = or i32 %tmp10, %tmp9 ; <i32> [#uses=1]
+ ret i32 %tmp14
+}
+
+define i32 @test2(i32 %X) {
+ %tmp1 = lshr i32 %X, 8 ; <i32> [#uses=1]
+ %tmp1.upgrd.1 = trunc i32 %tmp1 to i16 ; <i16> [#uses=1]
+ %tmp3 = trunc i32 %X to i16 ; <i16> [#uses=1]
+ %tmp2 = and i16 %tmp1.upgrd.1, 255 ; <i16> [#uses=1]
+ %tmp4 = shl i16 %tmp3, 8 ; <i16> [#uses=1]
+ %tmp5 = or i16 %tmp2, %tmp4 ; <i16> [#uses=1]
+ %tmp5.upgrd.2 = sext i16 %tmp5 to i32 ; <i32> [#uses=1]
+ ret i32 %tmp5.upgrd.2
+}
diff --git a/test/CodeGen/ARM/sbfx.ll b/test/CodeGen/ARM/sbfx.ll
new file mode 100644
index 0000000..6f1d87d
--- /dev/null
+++ b/test/CodeGen/ARM/sbfx.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -march=arm -mattr=+v6t2 | FileCheck %s
+
+define i32 @f1(i32 %a) {
+entry:
+; CHECK: f1:
+; CHECK: sbfx r0, r0, #0, #20
+ %tmp = shl i32 %a, 12
+ %tmp2 = ashr i32 %tmp, 12
+ ret i32 %tmp2
+}
+
+define i32 @f2(i32 %a) {
+entry:
+; CHECK: f2:
+; CHECK: ubfx r0, r0, #0, #20
+ %tmp = shl i32 %a, 12
+ %tmp2 = lshr i32 %tmp, 12
+ ret i32 %tmp2
+}
+
+define i32 @f3(i32 %a) {
+entry:
+; CHECK: f3:
+; CHECK: sbfx r0, r0, #5, #3
+ %tmp = shl i32 %a, 24
+ %tmp2 = ashr i32 %tmp, 29
+ ret i32 %tmp2
+}
+
+define i32 @f4(i32 %a) {
+entry:
+; CHECK: f4:
+; CHECK: ubfx r0, r0, #5, #3
+ %tmp = shl i32 %a, 24
+ %tmp2 = lshr i32 %tmp, 29
+ ret i32 %tmp2
+}
+
+define i32 @f5(i32 %a) {
+entry:
+; CHECK: f5:
+; CHECK-NOT: sbfx
+; CHECK: bx
+ %tmp = shl i32 %a, 3
+ %tmp2 = ashr i32 %tmp, 1
+ ret i32 %tmp2
+}
diff --git a/test/CodeGen/ARM/section.ll b/test/CodeGen/ARM/section.ll
new file mode 100644
index 0000000..7a566d4
--- /dev/null
+++ b/test/CodeGen/ARM/section.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -mtriple=arm-linux | \
+; RUN: grep {__DTOR_END__:}
+; RUN: llc < %s -mtriple=arm-linux | \
+; RUN: grep {\\.section.\\.dtors,"aw",.progbits}
+
+@__DTOR_END__ = internal global [1 x i32] zeroinitializer, section ".dtors" ; <[1 x i32]*> [#uses=0]
+
diff --git a/test/CodeGen/ARM/select-imm.ll b/test/CodeGen/ARM/select-imm.ll
new file mode 100644
index 0000000..07edc91
--- /dev/null
+++ b/test/CodeGen/ARM/select-imm.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -march=arm | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -march=arm -mattr=+thumb2 | FileCheck %s --check-prefix=T2
+
+define arm_apcscc i32 @t1(i32 %c) nounwind readnone {
+entry:
+; ARM: t1:
+; ARM: mov r1, #101
+; ARM: orr r1, r1, #1, 24
+; ARM: movgt r0, #123
+
+; T2: t1:
+; T2: movw r0, #357
+; T2: movgt r0, #123
+
+ %0 = icmp sgt i32 %c, 1
+ %1 = select i1 %0, i32 123, i32 357
+ ret i32 %1
+}
+
+define arm_apcscc i32 @t2(i32 %c) nounwind readnone {
+entry:
+; ARM: t2:
+; ARM: mov r1, #101
+; ARM: orr r1, r1, #1, 24
+; ARM: movle r0, #123
+
+; T2: t2:
+; T2: movw r0, #357
+; T2: movle r0, #123
+
+ %0 = icmp sgt i32 %c, 1
+ %1 = select i1 %0, i32 357, i32 123
+ ret i32 %1
+}
+
+define arm_apcscc i32 @t3(i32 %a) nounwind readnone {
+entry:
+; ARM: t3:
+; ARM: mov r0, #0
+; ARM: moveq r0, #1
+
+; T2: t3:
+; T2: mov r0, #0
+; T2: moveq r0, #1
+ %0 = icmp eq i32 %a, 160
+ %1 = zext i1 %0 to i32
+ ret i32 %1
+}
diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll
new file mode 100644
index 0000000..29c55c6
--- /dev/null
+++ b/test/CodeGen/ARM/select.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s --check-prefix=CHECK-VFP
+
+define i32 @f1(i32 %a.s) {
+;CHECK: f1:
+;CHECK: moveq
+entry:
+ %tmp = icmp eq i32 %a.s, 4
+ %tmp1.s = select i1 %tmp, i32 2, i32 3
+ ret i32 %tmp1.s
+}
+
+define i32 @f2(i32 %a.s) {
+;CHECK: f2:
+;CHECK: movgt
+entry:
+ %tmp = icmp sgt i32 %a.s, 4
+ %tmp1.s = select i1 %tmp, i32 2, i32 3
+ ret i32 %tmp1.s
+}
+
+define i32 @f3(i32 %a.s, i32 %b.s) {
+;CHECK: f3:
+;CHECK: movlt
+entry:
+ %tmp = icmp slt i32 %a.s, %b.s
+ %tmp1.s = select i1 %tmp, i32 2, i32 3
+ ret i32 %tmp1.s
+}
+
+define i32 @f4(i32 %a.s, i32 %b.s) {
+;CHECK: f4:
+;CHECK: movle
+entry:
+ %tmp = icmp sle i32 %a.s, %b.s
+ %tmp1.s = select i1 %tmp, i32 2, i32 3
+ ret i32 %tmp1.s
+}
+
+define i32 @f5(i32 %a.u, i32 %b.u) {
+;CHECK: f5:
+;CHECK: movls
+entry:
+ %tmp = icmp ule i32 %a.u, %b.u
+ %tmp1.s = select i1 %tmp, i32 2, i32 3
+ ret i32 %tmp1.s
+}
+
+define i32 @f6(i32 %a.u, i32 %b.u) {
+;CHECK: f6:
+;CHECK: movhi
+entry:
+ %tmp = icmp ugt i32 %a.u, %b.u
+ %tmp1.s = select i1 %tmp, i32 2, i32 3
+ ret i32 %tmp1.s
+}
+
+define double @f7(double %a, double %b) {
+;CHECK: f7:
+;CHECK: movlt
+;CHECK: movlt
+;CHECK-VFP: f7:
+;CHECK-VFP: vmovmi
+ %tmp = fcmp olt double %a, 1.234e+00
+ %tmp1 = select i1 %tmp, double -1.000e+00, double %b
+ ret double %tmp1
+}
diff --git a/test/CodeGen/ARM/select_xform.ll b/test/CodeGen/ARM/select_xform.ll
new file mode 100644
index 0000000..7fd91ce
--- /dev/null
+++ b/test/CodeGen/ARM/select_xform.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=arm | grep mov | count 2
+
+define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
+ %tmp1 = icmp sgt i32 %c, 10
+ %tmp2 = select i1 %tmp1, i32 0, i32 2147483647
+ %tmp3 = add i32 %tmp2, %b
+ ret i32 %tmp3
+}
+
+define i32 @t2(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+ %tmp1 = icmp sgt i32 %c, 10
+ %tmp2 = select i1 %tmp1, i32 0, i32 10
+ %tmp3 = sub i32 %b, %tmp2
+ ret i32 %tmp3
+}
diff --git a/test/CodeGen/ARM/shifter_operand.ll b/test/CodeGen/ARM/shifter_operand.ll
new file mode 100644
index 0000000..2bbe9fd
--- /dev/null
+++ b/test/CodeGen/ARM/shifter_operand.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=arm | grep add | grep lsl
+; RUN: llc < %s -march=arm | grep bic | grep asr
+
+
+define i32 @test1(i32 %X, i32 %Y, i8 %sh) {
+ %shift.upgrd.1 = zext i8 %sh to i32 ; <i32> [#uses=1]
+ %A = shl i32 %Y, %shift.upgrd.1 ; <i32> [#uses=1]
+ %B = add i32 %X, %A ; <i32> [#uses=1]
+ ret i32 %B
+}
+
+define i32 @test2(i32 %X, i32 %Y, i8 %sh) {
+ %shift.upgrd.2 = zext i8 %sh to i32 ; <i32> [#uses=1]
+ %A = ashr i32 %Y, %shift.upgrd.2 ; <i32> [#uses=1]
+ %B = xor i32 %A, -1 ; <i32> [#uses=1]
+ %C = and i32 %X, %B ; <i32> [#uses=1]
+ ret i32 %C
+}
diff --git a/test/CodeGen/ARM/smul.ll b/test/CodeGen/ARM/smul.ll
new file mode 100644
index 0000000..b7ab2e7
--- /dev/null
+++ b/test/CodeGen/ARM/smul.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -march=arm -mattr=+v5TE
+; RUN: llc < %s -march=arm -mattr=+v5TE | \
+; RUN: grep smulbt | count 1
+; RUN: llc < %s -march=arm -mattr=+v5TE | \
+; RUN: grep smultt | count 1
+; RUN: llc < %s -march=arm -mattr=+v5TE | \
+; RUN: grep smlabt | count 1
+
+@x = weak global i16 0 ; <i16*> [#uses=1]
+@y = weak global i16 0 ; <i16*> [#uses=0]
+
+define i32 @f1(i32 %y) {
+ %tmp = load i16* @x ; <i16> [#uses=1]
+ %tmp1 = add i16 %tmp, 2 ; <i16> [#uses=1]
+ %tmp2 = sext i16 %tmp1 to i32 ; <i32> [#uses=1]
+ %tmp3 = ashr i32 %y, 16 ; <i32> [#uses=1]
+ %tmp4 = mul i32 %tmp2, %tmp3 ; <i32> [#uses=1]
+ ret i32 %tmp4
+}
+
+define i32 @f2(i32 %x, i32 %y) {
+ %tmp1 = ashr i32 %x, 16 ; <i32> [#uses=1]
+ %tmp3 = ashr i32 %y, 16 ; <i32> [#uses=1]
+ %tmp4 = mul i32 %tmp3, %tmp1 ; <i32> [#uses=1]
+ ret i32 %tmp4
+}
+
+define i32 @f3(i32 %a, i16 %x, i32 %y) {
+ %tmp = sext i16 %x to i32 ; <i32> [#uses=1]
+ %tmp2 = ashr i32 %y, 16 ; <i32> [#uses=1]
+ %tmp3 = mul i32 %tmp2, %tmp ; <i32> [#uses=1]
+ %tmp5 = add i32 %tmp3, %a ; <i32> [#uses=1]
+ ret i32 %tmp5
+}
+
diff --git a/test/CodeGen/ARM/spill-q.ll b/test/CodeGen/ARM/spill-q.ll
new file mode 100644
index 0000000..5ad7ecc
--- /dev/null
+++ b/test/CodeGen/ARM/spill-q.ll
@@ -0,0 +1,58 @@
+; RUN: llc < %s -mtriple=armv7-elf -mattr=+neon | FileCheck %s
+; PR4789
+
+%bar = type { float, float, float }
+%baz = type { i32, [16 x %bar], [16 x float], [16 x i32], i8 }
+%foo = type { <4 x float> }
+%quux = type { i32 (...)**, %baz*, i32 }
+%quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo }
+
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly
+
+define arm_apcscc void @aaa(%quuz* %this, i8* %block) {
+; CHECK: aaa:
+; CHECK: bic sp, sp, #15
+; CHECK: vst1.64 {{.*}}sp, :128
+; CHECK: vld1.64 {{.*}}sp, :128
+entry:
+ %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
+ store float 6.300000e+01, float* undef, align 4
+ %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
+ store float 0.000000e+00, float* undef, align 4
+ %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
+ %val173 = load <4 x float>* undef ; <<4 x float>> [#uses=1]
+ br label %bb4
+
+bb4: ; preds = %bb193, %entry
+ %besterror.0.2264 = phi <4 x float> [ undef, %entry ], [ %besterror.0.0, %bb193 ] ; <<4 x float>> [#uses=2]
+ %part0.0.0261 = phi <4 x float> [ zeroinitializer, %entry ], [ %23, %bb193 ] ; <<4 x float>> [#uses=2]
+ %3 = fmul <4 x float> zeroinitializer, %0 ; <<4 x float>> [#uses=2]
+ %4 = fadd <4 x float> %3, %part0.0.0261 ; <<4 x float>> [#uses=1]
+ %5 = shufflevector <4 x float> %3, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+ %6 = shufflevector <2 x float> %5, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>> [#uses=1]
+ %7 = fmul <4 x float> %1, undef ; <<4 x float>> [#uses=1]
+ %8 = fadd <4 x float> %7, <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01> ; <<4 x float>> [#uses=1]
+ %9 = fptosi <4 x float> %8 to <4 x i32> ; <<4 x i32>> [#uses=1]
+ %10 = sitofp <4 x i32> %9 to <4 x float> ; <<4 x float>> [#uses=1]
+ %11 = fmul <4 x float> %10, %2 ; <<4 x float>> [#uses=1]
+ %12 = fmul <4 x float> undef, %6 ; <<4 x float>> [#uses=1]
+ %13 = fmul <4 x float> %11, %4 ; <<4 x float>> [#uses=1]
+ %14 = fsub <4 x float> %12, %13 ; <<4 x float>> [#uses=1]
+ %15 = fsub <4 x float> %14, undef ; <<4 x float>> [#uses=1]
+ %16 = fmul <4 x float> %15, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> ; <<4 x float>> [#uses=1]
+ %17 = fadd <4 x float> %16, undef ; <<4 x float>> [#uses=1]
+ %18 = fmul <4 x float> %17, %val173 ; <<4 x float>> [#uses=1]
+ %19 = shufflevector <4 x float> %18, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+ %20 = shufflevector <2 x float> %19, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+ %21 = fadd <4 x float> zeroinitializer, %20 ; <<4 x float>> [#uses=2]
+ %22 = fcmp ogt <4 x float> %besterror.0.2264, %21 ; <<4 x i1>> [#uses=0]
+ br i1 undef, label %bb193, label %bb186
+
+bb186: ; preds = %bb4
+ br label %bb193
+
+bb193: ; preds = %bb186, %bb4
+ %besterror.0.0 = phi <4 x float> [ %21, %bb186 ], [ %besterror.0.2264, %bb4 ] ; <<4 x float>> [#uses=1]
+ %23 = fadd <4 x float> %part0.0.0261, zeroinitializer ; <<4 x float>> [#uses=1]
+ br label %bb4
+}
diff --git a/test/CodeGen/ARM/stack-frame.ll b/test/CodeGen/ARM/stack-frame.ll
new file mode 100644
index 0000000..1dd57dd
--- /dev/null
+++ b/test/CodeGen/ARM/stack-frame.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -march=arm | grep add | count 1
+
+define void @f1() {
+ %c = alloca i8, align 1
+ ret void
+}
+
+define i32 @f2() {
+ ret i32 1
+}
+
+
diff --git a/test/CodeGen/ARM/stm.ll b/test/CodeGen/ARM/stm.ll
new file mode 100644
index 0000000..22a7ecb
--- /dev/null
+++ b/test/CodeGen/ARM/stm.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 | grep stm | count 2
+
+@"\01LC" = internal constant [32 x i8] c"Boolean Not: %d %d %d %d %d %d\0A\00", section "__TEXT,__cstring,cstring_literals" ; <[32 x i8]*> [#uses=1]
+@"\01LC1" = internal constant [26 x i8] c"Bitwise Not: %d %d %d %d\0A\00", section "__TEXT,__cstring,cstring_literals" ; <[26 x i8]*> [#uses=1]
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+define i32 @main() nounwind {
+entry:
+ %0 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([26 x i8]* @"\01LC1", i32 0, i32 0), i32 -2, i32 -3, i32 2, i32 -6) nounwind ; <i32> [#uses=0]
+ %1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([32 x i8]* @"\01LC", i32 0, i32 0), i32 0, i32 1, i32 0, i32 1, i32 0, i32 1) nounwind ; <i32> [#uses=0]
+ ret i32 0
+}
diff --git a/test/CodeGen/ARM/str_post.ll b/test/CodeGen/ARM/str_post.ll
new file mode 100644
index 0000000..97916f1
--- /dev/null
+++ b/test/CodeGen/ARM/str_post.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+
+define i16 @test1(i32* %X, i16* %A) {
+; CHECK: test1:
+; CHECK: strh {{.*}}[{{.*}}], #-4
+ %Y = load i32* %X ; <i32> [#uses=1]
+ %tmp1 = trunc i32 %Y to i16 ; <i16> [#uses=1]
+ store i16 %tmp1, i16* %A
+ %tmp2 = ptrtoint i16* %A to i16 ; <i16> [#uses=1]
+ %tmp3 = sub i16 %tmp2, 4 ; <i16> [#uses=1]
+ ret i16 %tmp3
+}
+
+define i32 @test2(i32* %X, i32* %A) {
+; CHECK: test2:
+; CHECK: str {{.*}}[{{.*}}],
+ %Y = load i32* %X ; <i32> [#uses=1]
+ store i32 %Y, i32* %A
+ %tmp1 = ptrtoint i32* %A to i32 ; <i32> [#uses=1]
+ %tmp2 = sub i32 %tmp1, 4 ; <i32> [#uses=1]
+ ret i32 %tmp2
+}
diff --git a/test/CodeGen/ARM/str_pre-2.ll b/test/CodeGen/ARM/str_pre-2.ll
new file mode 100644
index 0000000..f8d3df2
--- /dev/null
+++ b/test/CodeGen/ARM/str_pre-2.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=arm-linux-gnu | grep {str.*\\!}
+; RUN: llc < %s -mtriple=arm-linux-gnu | grep {ldr.*\\\[.*\], #+4}
+
+@b = external global i64*
+
+define i64 @t(i64 %a) nounwind readonly {
+entry:
+ %0 = load i64** @b, align 4
+ %1 = load i64* %0, align 4
+ %2 = mul i64 %1, %a
+ ret i64 %2
+}
diff --git a/test/CodeGen/ARM/str_pre.ll b/test/CodeGen/ARM/str_pre.ll
new file mode 100644
index 0000000..e56e3f2
--- /dev/null
+++ b/test/CodeGen/ARM/str_pre.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=arm | \
+; RUN: grep {str.*\\!} | count 2
+
+define void @test1(i32* %X, i32* %A, i32** %dest) {
+ %B = load i32* %A ; <i32> [#uses=1]
+ %Y = getelementptr i32* %X, i32 4 ; <i32*> [#uses=2]
+ store i32 %B, i32* %Y
+ store i32* %Y, i32** %dest
+ ret void
+}
+
+define i16* @test2(i16* %X, i32* %A) {
+ %B = load i32* %A ; <i32> [#uses=1]
+ %Y = getelementptr i16* %X, i32 4 ; <i16*> [#uses=2]
+ %tmp = trunc i32 %B to i16 ; <i16> [#uses=1]
+ store i16 %tmp, i16* %Y
+ ret i16* %Y
+}
diff --git a/test/CodeGen/ARM/str_trunc.ll b/test/CodeGen/ARM/str_trunc.ll
new file mode 100644
index 0000000..2f1166b
--- /dev/null
+++ b/test/CodeGen/ARM/str_trunc.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=arm | \
+; RUN: grep strb | count 1
+; RUN: llc < %s -march=arm | \
+; RUN: grep strh | count 1
+
+define void @test1(i32 %v, i16* %ptr) {
+ %tmp = trunc i32 %v to i16 ; <i16> [#uses=1]
+ store i16 %tmp, i16* %ptr
+ ret void
+}
+
+define void @test2(i32 %v, i8* %ptr) {
+ %tmp = trunc i32 %v to i8 ; <i8> [#uses=1]
+ store i8 %tmp, i8* %ptr
+ ret void
+}
diff --git a/test/CodeGen/ARM/sxt_rot.ll b/test/CodeGen/ARM/sxt_rot.ll
new file mode 100644
index 0000000..4752f17
--- /dev/null
+++ b/test/CodeGen/ARM/sxt_rot.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=arm -mattr=+v6 | \
+; RUN: grep sxtb | count 2
+; RUN: llc < %s -march=arm -mattr=+v6 | \
+; RUN: grep sxtb | grep ror | count 1
+; RUN: llc < %s -march=arm -mattr=+v6 | \
+; RUN: grep sxtab | count 1
+
+define i32 @test0(i8 %A) {
+ %B = sext i8 %A to i32
+ ret i32 %B
+}
+
+define i8 @test1(i32 %A) signext {
+ %B = lshr i32 %A, 8
+ %C = shl i32 %A, 24
+ %D = or i32 %B, %C
+ %E = trunc i32 %D to i8
+ ret i8 %E
+}
+
+define i32 @test2(i32 %A, i32 %X) signext {
+ %B = lshr i32 %A, 8
+ %C = shl i32 %A, 24
+ %D = or i32 %B, %C
+ %E = trunc i32 %D to i8
+ %F = sext i8 %E to i32
+ %G = add i32 %F, %X
+ ret i32 %G
+}
diff --git a/test/CodeGen/ARM/t2-imm.ll b/test/CodeGen/ARM/t2-imm.ll
new file mode 100644
index 0000000..848a4df
--- /dev/null
+++ b/test/CodeGen/ARM/t2-imm.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=arm -mattr=+thumb2 | FileCheck %s
+
+define i32 @f6(i32 %a) {
+; CHECK:f6
+; CHECK: movw r0, #:lower16:65537123
+; CHECK: movt r0, #:upper16:65537123
+ %tmp = add i32 0, 65537123
+ ret i32 %tmp
+}
diff --git a/test/CodeGen/ARM/tail-opts.ll b/test/CodeGen/ARM/tail-opts.ll
new file mode 100644
index 0000000..17c8bae
--- /dev/null
+++ b/test/CodeGen/ARM/tail-opts.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mcpu=cortex-a8 -asm-verbose=false | FileCheck %s
+
+declare void @bar(i32)
+declare void @car(i32)
+declare void @dar(i32)
+declare void @ear(i32)
+declare void @far(i32)
+declare i1 @qux()
+
+@GHJK = global i32 0
+
+declare i8* @choose(i8*, i8*)
+
+; BranchFolding should tail-duplicate the indirect jump to avoid
+; redundant branching.
+
+; CHECK: tail_duplicate_me:
+; CHECK: qux
+; CHECK: qux
+; CHECK: ldr r{{.}}, LCPI
+; CHECK: str r
+; CHECK-NEXT: bx r
+; CHECK: ldr r{{.}}, LCPI
+; CHECK: str r
+; CHECK-NEXT: bx r
+; CHECK: ldr r{{.}}, LCPI
+; CHECK: str r
+; CHECK-NEXT: bx r
+
+define void @tail_duplicate_me() nounwind {
+entry:
+ %a = call i1 @qux()
+ %c = call i8* @choose(i8* blockaddress(@tail_duplicate_me, %return),
+ i8* blockaddress(@tail_duplicate_me, %altret))
+ br i1 %a, label %A, label %next
+next:
+ %b = call i1 @qux()
+ br i1 %b, label %B, label %C
+
+A:
+ call void @bar(i32 0)
+ store i32 0, i32* @GHJK
+ br label %M
+
+B:
+ call void @car(i32 1)
+ store i32 0, i32* @GHJK
+ br label %M
+
+C:
+ call void @dar(i32 2)
+ store i32 0, i32* @GHJK
+ br label %M
+
+M:
+ indirectbr i8* %c, [label %return, label %altret]
+
+return:
+ call void @ear(i32 1000)
+ ret void
+altret:
+ call void @far(i32 1001)
+ ret void
+}
diff --git a/test/CodeGen/ARM/thread_pointer.ll b/test/CodeGen/ARM/thread_pointer.ll
new file mode 100644
index 0000000..3143387
--- /dev/null
+++ b/test/CodeGen/ARM/thread_pointer.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
+; RUN: grep {__aeabi_read_tp}
+
+define i8* @test() {
+entry:
+ %tmp1 = call i8* @llvm.arm.thread.pointer( ) ; <i8*> [#uses=0]
+ ret i8* %tmp1
+}
+
+declare i8* @llvm.arm.thread.pointer()
diff --git a/test/CodeGen/ARM/tls1.ll b/test/CodeGen/ARM/tls1.ll
new file mode 100644
index 0000000..1087094
--- /dev/null
+++ b/test/CodeGen/ARM/tls1.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
+; RUN: grep {i(tpoff)}
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
+; RUN: grep {__aeabi_read_tp}
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi \
+; RUN: -relocation-model=pic | grep {__tls_get_addr}
+
+
+@i = thread_local global i32 15 ; <i32*> [#uses=2]
+
+define i32 @f() {
+entry:
+ %tmp1 = load i32* @i ; <i32> [#uses=1]
+ ret i32 %tmp1
+}
+
+define i32* @g() {
+entry:
+ ret i32* @i
+}
diff --git a/test/CodeGen/ARM/tls2.ll b/test/CodeGen/ARM/tls2.ll
new file mode 100644
index 0000000..d932f90
--- /dev/null
+++ b/test/CodeGen/ARM/tls2.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi \
+; RUN: | FileCheck %s -check-prefix=CHECK-NONPIC
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi \
+; RUN: -relocation-model=pic | FileCheck %s -check-prefix=CHECK-PIC
+
+@i = external thread_local global i32 ; <i32*> [#uses=2]
+
+define i32 @f() {
+; CHECK-NONPIC: f:
+; CHECK-NONPIC: ldr {{r.}}, [pc, +{{r.}}]
+; CHECK-NONPIC: i(gottpoff)
+; CHECK-PIC: f:
+; CHECK-PIC: __tls_get_addr
+entry:
+ %tmp1 = load i32* @i ; <i32> [#uses=1]
+ ret i32 %tmp1
+}
+
+define i32* @g() {
+; CHECK-NONPIC: g:
+; CHECK-NONPIC: ldr {{r.}}, [pc, +{{r.}}]
+; CHECK-NONPIC: i(gottpoff)
+; CHECK-PIC: g:
+; CHECK-PIC: __tls_get_addr
+entry:
+ ret i32* @i
+}
diff --git a/test/CodeGen/ARM/tls3.ll b/test/CodeGen/ARM/tls3.ll
new file mode 100644
index 0000000..df7a4ca
--- /dev/null
+++ b/test/CodeGen/ARM/tls3.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
+; RUN: grep {tbss}
+
+%struct.anon = type { i32, i32 }
+@teste = internal thread_local global %struct.anon zeroinitializer ; <%struct.anon*> [#uses=1]
+
+define i32 @main() {
+entry:
+ %tmp2 = load i32* getelementptr (%struct.anon* @teste, i32 0, i32 0), align 8 ; <i32> [#uses=1]
+ ret i32 %tmp2
+}
diff --git a/test/CodeGen/ARM/trunc_ldr.ll b/test/CodeGen/ARM/trunc_ldr.ll
new file mode 100644
index 0000000..3033c2b
--- /dev/null
+++ b/test/CodeGen/ARM/trunc_ldr.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=arm | grep ldrb.*7 | count 1
+; RUN: llc < %s -march=arm | grep ldrsb.*7 | count 1
+
+ %struct.A = type { i8, i8, i8, i8, i16, i8, i8, %struct.B** }
+ %struct.B = type { float, float, i32, i32, i32, [0 x i8] }
+
+define i8 @f1(%struct.A* %d) {
+ %tmp2 = getelementptr %struct.A* %d, i32 0, i32 4
+ %tmp23 = bitcast i16* %tmp2 to i32*
+ %tmp4 = load i32* %tmp23
+ %tmp512 = lshr i32 %tmp4, 24
+ %tmp56 = trunc i32 %tmp512 to i8
+ ret i8 %tmp56
+}
+
+define i32 @f2(%struct.A* %d) {
+ %tmp2 = getelementptr %struct.A* %d, i32 0, i32 4
+ %tmp23 = bitcast i16* %tmp2 to i32*
+ %tmp4 = load i32* %tmp23
+ %tmp512 = lshr i32 %tmp4, 24
+ %tmp56 = trunc i32 %tmp512 to i8
+ %tmp57 = sext i8 %tmp56 to i32
+ ret i32 %tmp57
+}
diff --git a/test/CodeGen/ARM/truncstore-dag-combine.ll b/test/CodeGen/ARM/truncstore-dag-combine.ll
new file mode 100644
index 0000000..2da08b6
--- /dev/null
+++ b/test/CodeGen/ARM/truncstore-dag-combine.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=arm | not grep orr
+; RUN: llc < %s -march=arm | not grep mov
+
+define void @bar(i8* %P, i16* %Q) {
+entry:
+ %P1 = bitcast i8* %P to i16* ; <i16*> [#uses=1]
+ %tmp = load i16* %Q, align 1 ; <i16> [#uses=1]
+ store i16 %tmp, i16* %P1, align 1
+ ret void
+}
+
+define void @foo(i8* %P, i32* %Q) {
+entry:
+ %P1 = bitcast i8* %P to i32* ; <i32*> [#uses=1]
+ %tmp = load i32* %Q, align 1 ; <i32> [#uses=1]
+ store i32 %tmp, i32* %P1, align 1
+ ret void
+}
diff --git a/test/CodeGen/ARM/tst_teq.ll b/test/CodeGen/ARM/tst_teq.ll
new file mode 100644
index 0000000..c83111e
--- /dev/null
+++ b/test/CodeGen/ARM/tst_teq.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=arm | grep tst
+; RUN: llc < %s -march=arm | grep teq
+
+define i32 @f(i32 %a) {
+entry:
+ %tmp2 = and i32 %a, 255 ; <i32> [#uses=1]
+ icmp eq i32 %tmp2, 0 ; <i1>:0 [#uses=1]
+ %retval = select i1 %0, i32 20, i32 10 ; <i32> [#uses=1]
+ ret i32 %retval
+}
+
+define i32 @g(i32 %a) {
+entry:
+ %tmp2 = xor i32 %a, 255
+ icmp eq i32 %tmp2, 0 ; <i1>:0 [#uses=1]
+ %retval = select i1 %0, i32 20, i32 10 ; <i32> [#uses=1]
+ ret i32 %retval
+}
diff --git a/test/CodeGen/ARM/uint64tof64.ll b/test/CodeGen/ARM/uint64tof64.ll
new file mode 100644
index 0000000..32eb225
--- /dev/null
+++ b/test/CodeGen/ARM/uint64tof64.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+vfp2
+
+ %struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+ %struct.__sFILEX = type opaque
+ %struct.__sbuf = type { i8*, i32 }
+@"\01LC10" = external constant [54 x i8] ; <[54 x i8]*> [#uses=1]
+
+define fastcc void @t() {
+entry:
+ %0 = load i64* null, align 4 ; <i64> [#uses=1]
+ %1 = uitofp i64 %0 to double ; <double> [#uses=1]
+ %2 = fdiv double 0.000000e+00, %1 ; <double> [#uses=1]
+ %3 = call i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* null, i8* getelementptr ([54 x i8]* @"\01LC10", i32 0, i32 0), i64 0, double %2) ; <i32> [#uses=0]
+ ret void
+}
+
+declare i32 @fprintf(%struct.FILE*, i8*, ...)
diff --git a/test/CodeGen/ARM/unaligned_load_store.ll b/test/CodeGen/ARM/unaligned_load_store.ll
new file mode 100644
index 0000000..a4494f3
--- /dev/null
+++ b/test/CodeGen/ARM/unaligned_load_store.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=arm | FileCheck %s -check-prefix=GENERIC
+; RUN: llc < %s -mtriple=armv6-apple-darwin | FileCheck %s -check-prefix=DARWIN_V6
+; RUN: llc < %s -mtriple=armv6-linux | FileCheck %s -check-prefix=GENERIC
+
+; rdar://7113725
+
+define arm_apcscc void @t(i8* nocapture %a, i8* nocapture %b) nounwind {
+entry:
+; GENERIC: t:
+; GENERIC: ldrb r2
+; GENERIC: ldrb r3
+; GENERIC: ldrb r12
+; GENERIC: ldrb r1
+; GENERIC: strb r1
+; GENERIC: strb r12
+; GENERIC: strb r3
+; GENERIC: strb r2
+
+; DARWIN_V6: t:
+; DARWIN_V6: ldr r1
+; DARWIN_V6: str r1
+
+ %__src1.i = bitcast i8* %b to i32* ; <i32*> [#uses=1]
+ %__dest2.i = bitcast i8* %a to i32* ; <i32*> [#uses=1]
+ %tmp.i = load i32* %__src1.i, align 1 ; <i32> [#uses=1]
+ store i32 %tmp.i, i32* %__dest2.i, align 1
+ ret void
+}
diff --git a/test/CodeGen/ARM/unord.ll b/test/CodeGen/ARM/unord.ll
new file mode 100644
index 0000000..bd28034
--- /dev/null
+++ b/test/CodeGen/ARM/unord.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=arm | grep movne | count 1
+; RUN: llc < %s -march=arm | grep moveq | count 1
+
+define i32 @f1(float %X, float %Y) {
+ %tmp = fcmp uno float %X, %Y
+ %retval = select i1 %tmp, i32 1, i32 -1
+ ret i32 %retval
+}
+
+define i32 @f2(float %X, float %Y) {
+ %tmp = fcmp ord float %X, %Y
+ %retval = select i1 %tmp, i32 1, i32 -1
+ ret i32 %retval
+}
diff --git a/test/CodeGen/ARM/uxt_rot.ll b/test/CodeGen/ARM/uxt_rot.ll
new file mode 100644
index 0000000..6307795
--- /dev/null
+++ b/test/CodeGen/ARM/uxt_rot.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=arm -mattr=+v6 | grep uxtb | count 1
+; RUN: llc < %s -march=arm -mattr=+v6 | grep uxtab | count 1
+; RUN: llc < %s -march=arm -mattr=+v6 | grep uxth | count 1
+
+define i8 @test1(i32 %A.u) zeroext {
+ %B.u = trunc i32 %A.u to i8
+ ret i8 %B.u
+}
+
+define i32 @test2(i32 %A.u, i32 %B.u) zeroext {
+ %C.u = trunc i32 %B.u to i8
+ %D.u = zext i8 %C.u to i32
+ %E.u = add i32 %A.u, %D.u
+ ret i32 %E.u
+}
+
+define i32 @test3(i32 %A.u) zeroext {
+ %B.u = lshr i32 %A.u, 8
+ %C.u = shl i32 %A.u, 24
+ %D.u = or i32 %B.u, %C.u
+ %E.u = trunc i32 %D.u to i16
+ %F.u = zext i16 %E.u to i32
+ ret i32 %F.u
+}
diff --git a/test/CodeGen/ARM/uxtb.ll b/test/CodeGen/ARM/uxtb.ll
new file mode 100644
index 0000000..9d6e4bd
--- /dev/null
+++ b/test/CodeGen/ARM/uxtb.ll
@@ -0,0 +1,74 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin | \
+; RUN: grep uxt | count 10
+
+define i32 @test1(i32 %x) {
+ %tmp1 = and i32 %x, 16711935 ; <i32> [#uses=1]
+ ret i32 %tmp1
+}
+
+define i32 @test2(i32 %x) {
+ %tmp1 = lshr i32 %x, 8 ; <i32> [#uses=1]
+ %tmp2 = and i32 %tmp1, 16711935 ; <i32> [#uses=1]
+ ret i32 %tmp2
+}
+
+define i32 @test3(i32 %x) {
+ %tmp1 = lshr i32 %x, 8 ; <i32> [#uses=1]
+ %tmp2 = and i32 %tmp1, 16711935 ; <i32> [#uses=1]
+ ret i32 %tmp2
+}
+
+define i32 @test4(i32 %x) {
+ %tmp1 = lshr i32 %x, 8 ; <i32> [#uses=1]
+ %tmp6 = and i32 %tmp1, 16711935 ; <i32> [#uses=1]
+ ret i32 %tmp6
+}
+
+define i32 @test5(i32 %x) {
+ %tmp1 = lshr i32 %x, 8 ; <i32> [#uses=1]
+ %tmp2 = and i32 %tmp1, 16711935 ; <i32> [#uses=1]
+ ret i32 %tmp2
+}
+
+define i32 @test6(i32 %x) {
+ %tmp1 = lshr i32 %x, 16 ; <i32> [#uses=1]
+ %tmp2 = and i32 %tmp1, 255 ; <i32> [#uses=1]
+ %tmp4 = shl i32 %x, 16 ; <i32> [#uses=1]
+ %tmp5 = and i32 %tmp4, 16711680 ; <i32> [#uses=1]
+ %tmp6 = or i32 %tmp2, %tmp5 ; <i32> [#uses=1]
+ ret i32 %tmp6
+}
+
+define i32 @test7(i32 %x) {
+ %tmp1 = lshr i32 %x, 16 ; <i32> [#uses=1]
+ %tmp2 = and i32 %tmp1, 255 ; <i32> [#uses=1]
+ %tmp4 = shl i32 %x, 16 ; <i32> [#uses=1]
+ %tmp5 = and i32 %tmp4, 16711680 ; <i32> [#uses=1]
+ %tmp6 = or i32 %tmp2, %tmp5 ; <i32> [#uses=1]
+ ret i32 %tmp6
+}
+
+define i32 @test8(i32 %x) {
+ %tmp1 = shl i32 %x, 8 ; <i32> [#uses=1]
+ %tmp2 = and i32 %tmp1, 16711680 ; <i32> [#uses=1]
+ %tmp5 = lshr i32 %x, 24 ; <i32> [#uses=1]
+ %tmp6 = or i32 %tmp2, %tmp5 ; <i32> [#uses=1]
+ ret i32 %tmp6
+}
+
+define i32 @test9(i32 %x) {
+ %tmp1 = lshr i32 %x, 24 ; <i32> [#uses=1]
+ %tmp4 = shl i32 %x, 8 ; <i32> [#uses=1]
+ %tmp5 = and i32 %tmp4, 16711680 ; <i32> [#uses=1]
+ %tmp6 = or i32 %tmp5, %tmp1 ; <i32> [#uses=1]
+ ret i32 %tmp6
+}
+
+define i32 @test10(i32 %p0) {
+ %tmp1 = lshr i32 %p0, 7 ; <i32> [#uses=1]
+ %tmp2 = and i32 %tmp1, 16253176 ; <i32> [#uses=2]
+ %tmp4 = lshr i32 %tmp2, 5 ; <i32> [#uses=1]
+ %tmp5 = and i32 %tmp4, 458759 ; <i32> [#uses=1]
+ %tmp7 = or i32 %tmp5, %tmp2 ; <i32> [#uses=1]
+ ret i32 %tmp7
+}
diff --git a/test/CodeGen/ARM/vaba.ll b/test/CodeGen/ARM/vaba.ll
new file mode 100644
index 0000000..e2dca46
--- /dev/null
+++ b/test/CodeGen/ARM/vaba.ll
@@ -0,0 +1,205 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vabas8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vabas8:
+;CHECK: vaba.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = load <8 x i8>* %C
+ %tmp4 = call <8 x i8> @llvm.arm.neon.vabas.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
+ ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vabas16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vabas16:
+;CHECK: vaba.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = load <4 x i16>* %C
+ %tmp4 = call <4 x i16> @llvm.arm.neon.vabas.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+ ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vabas32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vabas32:
+;CHECK: vaba.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = load <2 x i32>* %C
+ %tmp4 = call <2 x i32> @llvm.arm.neon.vabas.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+ ret <2 x i32> %tmp4
+}
+
+define <8 x i8> @vabau8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vabau8:
+;CHECK: vaba.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = load <8 x i8>* %C
+ %tmp4 = call <8 x i8> @llvm.arm.neon.vabau.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
+ ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vabau16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vabau16:
+;CHECK: vaba.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = load <4 x i16>* %C
+ %tmp4 = call <4 x i16> @llvm.arm.neon.vabau.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+ ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vabau32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vabau32:
+;CHECK: vaba.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = load <2 x i32>* %C
+ %tmp4 = call <2 x i32> @llvm.arm.neon.vabau.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+ ret <2 x i32> %tmp4
+}
+
+define <16 x i8> @vabaQs8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
+;CHECK: vabaQs8:
+;CHECK: vaba.s8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = load <16 x i8>* %C
+ %tmp4 = call <16 x i8> @llvm.arm.neon.vabas.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i8> %tmp3)
+ ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vabaQs16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
+;CHECK: vabaQs16:
+;CHECK: vaba.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = load <8 x i16>* %C
+ %tmp4 = call <8 x i16> @llvm.arm.neon.vabas.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i16> %tmp3)
+ ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vabaQs32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
+;CHECK: vabaQs32:
+;CHECK: vaba.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = load <4 x i32>* %C
+ %tmp4 = call <4 x i32> @llvm.arm.neon.vabas.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> %tmp3)
+ ret <4 x i32> %tmp4
+}
+
+define <16 x i8> @vabaQu8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
+;CHECK: vabaQu8:
+;CHECK: vaba.u8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = load <16 x i8>* %C
+ %tmp4 = call <16 x i8> @llvm.arm.neon.vabau.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i8> %tmp3)
+ ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vabaQu16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
+;CHECK: vabaQu16:
+;CHECK: vaba.u16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = load <8 x i16>* %C
+ %tmp4 = call <8 x i16> @llvm.arm.neon.vabau.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i16> %tmp3)
+ ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vabaQu32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
+;CHECK: vabaQu32:
+;CHECK: vaba.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = load <4 x i32>* %C
+ %tmp4 = call <4 x i32> @llvm.arm.neon.vabau.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> %tmp3)
+ ret <4 x i32> %tmp4
+}
+
+declare <8 x i8> @llvm.arm.neon.vabas.v8i8(<8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vabas.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vabas.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8> @llvm.arm.neon.vabau.v8i8(<8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vabau.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vabau.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vabas.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vabas.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vabas.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vabau.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vabau.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vabau.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @vabals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vabals8:
+;CHECK: vabal.s8
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = load <8 x i8>* %C
+ %tmp4 = call <8 x i16> @llvm.arm.neon.vabals.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
+ ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vabals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vabals16:
+;CHECK: vabal.s16
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = load <4 x i16>* %C
+ %tmp4 = call <4 x i32> @llvm.arm.neon.vabals.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+ ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vabals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vabals32:
+;CHECK: vabal.s32
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = load <2 x i32>* %C
+ %tmp4 = call <2 x i64> @llvm.arm.neon.vabals.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+ ret <2 x i64> %tmp4
+}
+
+define <8 x i16> @vabalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vabalu8:
+;CHECK: vabal.u8
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = load <8 x i8>* %C
+ %tmp4 = call <8 x i16> @llvm.arm.neon.vabalu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
+ ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vabalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vabalu16:
+;CHECK: vabal.u16
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = load <4 x i16>* %C
+ %tmp4 = call <4 x i32> @llvm.arm.neon.vabalu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+ ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vabalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vabalu32:
+;CHECK: vabal.u32
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = load <2 x i32>* %C
+ %tmp4 = call <2 x i64> @llvm.arm.neon.vabalu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+ ret <2 x i64> %tmp4
+}
+
+declare <8 x i16> @llvm.arm.neon.vabals.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vabals.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vabals.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vabalu.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vabalu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vabalu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vabd.ll b/test/CodeGen/ARM/vabd.ll
new file mode 100644
index 0000000..2b45393
--- /dev/null
+++ b/test/CodeGen/ARM/vabd.ll
@@ -0,0 +1,209 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vabds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vabds8:
+;CHECK: vabd.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vabds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vabds16:
+;CHECK: vabd.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vabds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vabds32:
+;CHECK: vabd.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vabdu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vabdu8:
+;CHECK: vabd.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vabdu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vabdu16:
+;CHECK: vabd.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vabdu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vabdu32:
+;CHECK: vabd.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vabdf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vabdf32:
+;CHECK: vabd.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = load <2 x float>* %B
+ %tmp3 = call <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+ ret <2 x float> %tmp3
+}
+
+define <16 x i8> @vabdQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vabdQs8:
+;CHECK: vabd.s8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vabdQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vabdQs16:
+;CHECK: vabd.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vabdQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vabdQs32:
+;CHECK: vabd.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <16 x i8> @vabdQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vabdQu8:
+;CHECK: vabd.u8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vabdQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vabdQu16:
+;CHECK: vabd.u16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vabdQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vabdQu32:
+;CHECK: vabd.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <4 x float> @vabdQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vabdQf32:
+;CHECK: vabd.f32
+ %tmp1 = load <4 x float>* %A
+ %tmp2 = load <4 x float>* %B
+ %tmp3 = call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+ ret <4 x float> %tmp3
+}
+
+declare <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+define <8 x i16> @vabdls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vabdls8:
+;CHECK: vabdl.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vabdls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vabdls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vabdls16:
+;CHECK: vabdl.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vabdls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vabdls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vabdls32:
+;CHECK: vabdl.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vabdls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @vabdlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vabdlu8:
+;CHECK: vabdl.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vabdlu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vabdlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vabdlu16:
+;CHECK: vabdl.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vabdlu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vabdlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vabdlu32:
+;CHECK: vabdl.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vabdlu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+declare <8 x i16> @llvm.arm.neon.vabdls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vabdls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vabdls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vabdlu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vabdlu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vabdlu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vabs.ll b/test/CodeGen/ARM/vabs.ll
new file mode 100644
index 0000000..18ba61f
--- /dev/null
+++ b/test/CodeGen/ARM/vabs.ll
@@ -0,0 +1,131 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vabss8(<8 x i8>* %A) nounwind {
+;CHECK: vabss8:
+;CHECK: vabs.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8> %tmp1)
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vabss16(<4 x i16>* %A) nounwind {
+;CHECK: vabss16:
+;CHECK: vabs.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16> %tmp1)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vabss32(<2 x i32>* %A) nounwind {
+;CHECK: vabss32:
+;CHECK: vabs.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32> %tmp1)
+ ret <2 x i32> %tmp2
+}
+
+define <2 x float> @vabsf32(<2 x float>* %A) nounwind {
+;CHECK: vabsf32:
+;CHECK: vabs.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = call <2 x float> @llvm.arm.neon.vabs.v2f32(<2 x float> %tmp1)
+ ret <2 x float> %tmp2
+}
+
+define <16 x i8> @vabsQs8(<16 x i8>* %A) nounwind {
+;CHECK: vabsQs8:
+;CHECK: vabs.s8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = call <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8> %tmp1)
+ ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vabsQs16(<8 x i16>* %A) nounwind {
+;CHECK: vabsQs16:
+;CHECK: vabs.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> %tmp1)
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vabsQs32(<4 x i32>* %A) nounwind {
+;CHECK: vabsQs32:
+;CHECK: vabs.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32> %tmp1)
+ ret <4 x i32> %tmp2
+}
+
+define <4 x float> @vabsQf32(<4 x float>* %A) nounwind {
+;CHECK: vabsQf32:
+;CHECK: vabs.f32
+ %tmp1 = load <4 x float>* %A
+ %tmp2 = call <4 x float> @llvm.arm.neon.vabs.v4f32(<4 x float> %tmp1)
+ ret <4 x float> %tmp2
+}
+
+declare <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32>) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vabs.v2f32(<2 x float>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vabs.v4f32(<4 x float>) nounwind readnone
+
+define <8 x i8> @vqabss8(<8 x i8>* %A) nounwind {
+;CHECK: vqabss8:
+;CHECK: vqabs.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8> %tmp1)
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqabss16(<4 x i16>* %A) nounwind {
+;CHECK: vqabss16:
+;CHECK: vqabs.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16> %tmp1)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqabss32(<2 x i32>* %A) nounwind {
+;CHECK: vqabss32:
+;CHECK: vqabs.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32> %tmp1)
+ ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vqabsQs8(<16 x i8>* %A) nounwind {
+;CHECK: vqabsQs8:
+;CHECK: vqabs.s8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = call <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8> %tmp1)
+ ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vqabsQs16(<8 x i16>* %A) nounwind {
+;CHECK: vqabsQs16:
+;CHECK: vqabs.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16> %tmp1)
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vqabsQs32(<4 x i32>* %A) nounwind {
+;CHECK: vqabsQs32:
+;CHECK: vqabs.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32> %tmp1)
+ ret <4 x i32> %tmp2
+}
+
+declare <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vadd.ll b/test/CodeGen/ARM/vadd.ll
new file mode 100644
index 0000000..9fa5307
--- /dev/null
+++ b/test/CodeGen/ARM/vadd.ll
@@ -0,0 +1,277 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vaddi8:
+;CHECK: vadd.i8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = add <8 x i8> %tmp1, %tmp2
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vaddi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vaddi16:
+;CHECK: vadd.i16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = add <4 x i16> %tmp1, %tmp2
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vaddi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vaddi32:
+;CHECK: vadd.i32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = add <2 x i32> %tmp1, %tmp2
+ ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vaddi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vaddi64:
+;CHECK: vadd.i64
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <1 x i64>* %B
+ %tmp3 = add <1 x i64> %tmp1, %tmp2
+ ret <1 x i64> %tmp3
+}
+
+define <2 x float> @vaddf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vaddf32:
+;CHECK: vadd.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = load <2 x float>* %B
+ %tmp3 = add <2 x float> %tmp1, %tmp2
+ ret <2 x float> %tmp3
+}
+
+define <16 x i8> @vaddQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vaddQi8:
+;CHECK: vadd.i8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = add <16 x i8> %tmp1, %tmp2
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vaddQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vaddQi16:
+;CHECK: vadd.i16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = add <8 x i16> %tmp1, %tmp2
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vaddQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vaddQi32:
+;CHECK: vadd.i32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = add <4 x i32> %tmp1, %tmp2
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vaddQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vaddQi64:
+;CHECK: vadd.i64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = add <2 x i64> %tmp1, %tmp2
+ ret <2 x i64> %tmp3
+}
+
+define <4 x float> @vaddQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vaddQf32:
+;CHECK: vadd.f32
+ %tmp1 = load <4 x float>* %A
+ %tmp2 = load <4 x float>* %B
+ %tmp3 = add <4 x float> %tmp1, %tmp2
+ ret <4 x float> %tmp3
+}
+
+define <8 x i8> @vaddhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vaddhni16:
+;CHECK: vaddhn.i16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vaddhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vaddhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vaddhni32:
+;CHECK: vaddhn.i32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vaddhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vaddhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vaddhni64:
+;CHECK: vaddhn.i64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vaddhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+declare <8 x i8> @llvm.arm.neon.vaddhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vaddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vaddhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vraddhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vraddhni16:
+;CHECK: vraddhn.i16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vraddhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vraddhni32:
+;CHECK: vraddhn.i32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vraddhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vraddhni64:
+;CHECK: vraddhn.i64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+declare <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @vaddls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vaddls8:
+;CHECK: vaddl.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vaddls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vaddls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vaddls16:
+;CHECK: vaddl.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vaddls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vaddls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vaddls32:
+;CHECK: vaddl.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vaddls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @vaddlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vaddlu8:
+;CHECK: vaddl.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vaddlu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vaddlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vaddlu16:
+;CHECK: vaddl.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vaddlu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vaddlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vaddlu32:
+;CHECK: vaddl.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vaddlu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+declare <8 x i16> @llvm.arm.neon.vaddls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vaddls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vaddls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vaddlu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vaddlu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vaddlu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+define <8 x i16> @vaddws8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vaddws8:
+;CHECK: vaddw.s8
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vaddws.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vaddws16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vaddws16:
+;CHECK: vaddw.s16
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vaddws.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vaddws32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vaddws32:
+;CHECK: vaddw.s32
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vaddws.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @vaddwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vaddwu8:
+;CHECK: vaddw.u8
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vaddwu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vaddwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vaddwu16:
+;CHECK: vaddw.u16
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vaddwu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vaddwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vaddwu32:
+;CHECK: vaddw.u32
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vaddwu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+declare <8 x i16> @llvm.arm.neon.vaddws.v8i16(<8 x i16>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vaddws.v4i32(<4 x i32>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vaddws.v2i64(<2 x i64>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vaddwu.v8i16(<8 x i16>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vaddwu.v4i32(<4 x i32>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vaddwu.v2i64(<2 x i64>, <2 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vargs.ll b/test/CodeGen/ARM/vargs.ll
new file mode 100644
index 0000000..5f3536c
--- /dev/null
+++ b/test/CodeGen/ARM/vargs.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=arm
+@str = internal constant [43 x i8] c"Hello World %d %d %d %d %d %d %d %d %d %d\0A\00" ; <[43 x i8]*> [#uses=1]
+
+define i32 @main() {
+entry:
+ %tmp = call i32 (i8*, ...)* @printf( i8* getelementptr ([43 x i8]* @str, i32 0, i64 0), i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10 ) ; <i32> [#uses=0]
+ %tmp2 = call i32 (i8*, ...)* @printf( i8* getelementptr ([43 x i8]* @str, i32 0, i64 0), i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1 ) ; <i32> [#uses=0]
+ ret i32 11
+}
+
+declare i32 @printf(i8*, ...)
+
diff --git a/test/CodeGen/ARM/vargs_align.ll b/test/CodeGen/ARM/vargs_align.ll
new file mode 100644
index 0000000..e4ef9e3
--- /dev/null
+++ b/test/CodeGen/ARM/vargs_align.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=EABI
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnu | FileCheck %s -check-prefix=OABI
+
+define i32 @f(i32 %a, ...) {
+entry:
+ %a_addr = alloca i32 ; <i32*> [#uses=1]
+ %retval = alloca i32, align 4 ; <i32*> [#uses=2]
+ %tmp = alloca i32, align 4 ; <i32*> [#uses=2]
+ "alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
+ store i32 %a, i32* %a_addr
+ store i32 0, i32* %tmp
+ %tmp1 = load i32* %tmp ; <i32> [#uses=1]
+ store i32 %tmp1, i32* %retval
+ br label %return
+
+return: ; preds = %entry
+ %retval2 = load i32* %retval ; <i32> [#uses=1]
+ ret i32 %retval2
+; EABI: add sp, sp, #12
+; EABI: add sp, sp, #16
+; OABI: add sp, sp, #12
+; OABI: add sp, sp, #12
+}
diff --git a/test/CodeGen/ARM/vbits.ll b/test/CodeGen/ARM/vbits.ll
new file mode 100644
index 0000000..293d229
--- /dev/null
+++ b/test/CodeGen/ARM/vbits.ll
@@ -0,0 +1,507 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @v_andi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: v_andi8:
+;CHECK: vand
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = and <8 x i8> %tmp1, %tmp2
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @v_andi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: v_andi16:
+;CHECK: vand
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = and <4 x i16> %tmp1, %tmp2
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @v_andi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: v_andi32:
+;CHECK: vand
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = and <2 x i32> %tmp1, %tmp2
+ ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @v_andi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: v_andi64:
+;CHECK: vand
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <1 x i64>* %B
+ %tmp3 = and <1 x i64> %tmp1, %tmp2
+ ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @v_andQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: v_andQi8:
+;CHECK: vand
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = and <16 x i8> %tmp1, %tmp2
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @v_andQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: v_andQi16:
+;CHECK: vand
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = and <8 x i16> %tmp1, %tmp2
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @v_andQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: v_andQi32:
+;CHECK: vand
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = and <4 x i32> %tmp1, %tmp2
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @v_andQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: v_andQi64:
+;CHECK: vand
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = and <2 x i64> %tmp1, %tmp2
+ ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @v_bici8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: v_bici8:
+;CHECK: vbic
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = xor <8 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+ %tmp4 = and <8 x i8> %tmp1, %tmp3
+ ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @v_bici16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: v_bici16:
+;CHECK: vbic
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = xor <4 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1 >
+ %tmp4 = and <4 x i16> %tmp1, %tmp3
+ ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @v_bici32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: v_bici32:
+;CHECK: vbic
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = xor <2 x i32> %tmp2, < i32 -1, i32 -1 >
+ %tmp4 = and <2 x i32> %tmp1, %tmp3
+ ret <2 x i32> %tmp4
+}
+
+define <1 x i64> @v_bici64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: v_bici64:
+;CHECK: vbic
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <1 x i64>* %B
+ %tmp3 = xor <1 x i64> %tmp2, < i64 -1 >
+ %tmp4 = and <1 x i64> %tmp1, %tmp3
+ ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @v_bicQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: v_bicQi8:
+;CHECK: vbic
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = xor <16 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+ %tmp4 = and <16 x i8> %tmp1, %tmp3
+ ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @v_bicQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: v_bicQi16:
+;CHECK: vbic
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = xor <8 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
+ %tmp4 = and <8 x i16> %tmp1, %tmp3
+ ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @v_bicQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: v_bicQi32:
+;CHECK: vbic
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = xor <4 x i32> %tmp2, < i32 -1, i32 -1, i32 -1, i32 -1 >
+ %tmp4 = and <4 x i32> %tmp1, %tmp3
+ ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @v_bicQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: v_bicQi64:
+;CHECK: vbic
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = xor <2 x i64> %tmp2, < i64 -1, i64 -1 >
+ %tmp4 = and <2 x i64> %tmp1, %tmp3
+ ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @v_eori8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: v_eori8:
+;CHECK: veor
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = xor <8 x i8> %tmp1, %tmp2
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @v_eori16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: v_eori16:
+;CHECK: veor
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = xor <4 x i16> %tmp1, %tmp2
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @v_eori32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: v_eori32:
+;CHECK: veor
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = xor <2 x i32> %tmp1, %tmp2
+ ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @v_eori64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: v_eori64:
+;CHECK: veor
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <1 x i64>* %B
+ %tmp3 = xor <1 x i64> %tmp1, %tmp2
+ ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @v_eorQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: v_eorQi8:
+;CHECK: veor
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = xor <16 x i8> %tmp1, %tmp2
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @v_eorQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: v_eorQi16:
+;CHECK: veor
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = xor <8 x i16> %tmp1, %tmp2
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @v_eorQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: v_eorQi32:
+;CHECK: veor
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = xor <4 x i32> %tmp1, %tmp2
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @v_eorQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: v_eorQi64:
+;CHECK: veor
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = xor <2 x i64> %tmp1, %tmp2
+ ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @v_mvni8(<8 x i8>* %A) nounwind {
+;CHECK: v_mvni8:
+;CHECK: vmvn
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = xor <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @v_mvni16(<4 x i16>* %A) nounwind {
+;CHECK: v_mvni16:
+;CHECK: vmvn
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = xor <4 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1 >
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @v_mvni32(<2 x i32>* %A) nounwind {
+;CHECK: v_mvni32:
+;CHECK: vmvn
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = xor <2 x i32> %tmp1, < i32 -1, i32 -1 >
+ ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @v_mvni64(<1 x i64>* %A) nounwind {
+;CHECK: v_mvni64:
+;CHECK: vmvn
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = xor <1 x i64> %tmp1, < i64 -1 >
+ ret <1 x i64> %tmp2
+}
+
+define <16 x i8> @v_mvnQi8(<16 x i8>* %A) nounwind {
+;CHECK: v_mvnQi8:
+;CHECK: vmvn
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = xor <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+ ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @v_mvnQi16(<8 x i16>* %A) nounwind {
+;CHECK: v_mvnQi16:
+;CHECK: vmvn
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = xor <8 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @v_mvnQi32(<4 x i32>* %A) nounwind {
+;CHECK: v_mvnQi32:
+;CHECK: vmvn
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = xor <4 x i32> %tmp1, < i32 -1, i32 -1, i32 -1, i32 -1 >
+ ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @v_mvnQi64(<2 x i64>* %A) nounwind {
+;CHECK: v_mvnQi64:
+;CHECK: vmvn
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = xor <2 x i64> %tmp1, < i64 -1, i64 -1 >
+ ret <2 x i64> %tmp2
+}
+
+define <8 x i8> @v_orri8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: v_orri8:
+;CHECK: vorr
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = or <8 x i8> %tmp1, %tmp2
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @v_orri16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: v_orri16:
+;CHECK: vorr
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = or <4 x i16> %tmp1, %tmp2
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @v_orri32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: v_orri32:
+;CHECK: vorr
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = or <2 x i32> %tmp1, %tmp2
+ ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @v_orri64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: v_orri64:
+;CHECK: vorr
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <1 x i64>* %B
+ %tmp3 = or <1 x i64> %tmp1, %tmp2
+ ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @v_orrQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: v_orrQi8:
+;CHECK: vorr
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = or <16 x i8> %tmp1, %tmp2
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @v_orrQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: v_orrQi16:
+;CHECK: vorr
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = or <8 x i16> %tmp1, %tmp2
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @v_orrQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: v_orrQi32:
+;CHECK: vorr
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = or <4 x i32> %tmp1, %tmp2
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @v_orrQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: v_orrQi64:
+;CHECK: vorr
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = or <2 x i64> %tmp1, %tmp2
+ ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @v_orni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: v_orni8:
+;CHECK: vorn
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = xor <8 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+ %tmp4 = or <8 x i8> %tmp1, %tmp3
+ ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @v_orni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: v_orni16:
+;CHECK: vorn
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = xor <4 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1 >
+ %tmp4 = or <4 x i16> %tmp1, %tmp3
+ ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @v_orni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: v_orni32:
+;CHECK: vorn
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = xor <2 x i32> %tmp2, < i32 -1, i32 -1 >
+ %tmp4 = or <2 x i32> %tmp1, %tmp3
+ ret <2 x i32> %tmp4
+}
+
+define <1 x i64> @v_orni64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: v_orni64:
+;CHECK: vorn
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <1 x i64>* %B
+ %tmp3 = xor <1 x i64> %tmp2, < i64 -1 >
+ %tmp4 = or <1 x i64> %tmp1, %tmp3
+ ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @v_ornQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: v_ornQi8:
+;CHECK: vorn
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = xor <16 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+ %tmp4 = or <16 x i8> %tmp1, %tmp3
+ ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @v_ornQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: v_ornQi16:
+;CHECK: vorn
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = xor <8 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
+ %tmp4 = or <8 x i16> %tmp1, %tmp3
+ ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @v_ornQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: v_ornQi32:
+;CHECK: vorn
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = xor <4 x i32> %tmp2, < i32 -1, i32 -1, i32 -1, i32 -1 >
+ %tmp4 = or <4 x i32> %tmp1, %tmp3
+ ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @v_ornQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: v_ornQi64:
+;CHECK: vorn
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = xor <2 x i64> %tmp2, < i64 -1, i64 -1 >
+ %tmp4 = or <2 x i64> %tmp1, %tmp3
+ ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @vtsti8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vtsti8:
+;CHECK: vtst.8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = and <8 x i8> %tmp1, %tmp2
+ %tmp4 = icmp ne <8 x i8> %tmp3, zeroinitializer
+ %tmp5 = sext <8 x i1> %tmp4 to <8 x i8>
+ ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vtsti16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vtsti16:
+;CHECK: vtst.16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = and <4 x i16> %tmp1, %tmp2
+ %tmp4 = icmp ne <4 x i16> %tmp3, zeroinitializer
+ %tmp5 = sext <4 x i1> %tmp4 to <4 x i16>
+ ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @vtsti32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vtsti32:
+;CHECK: vtst.32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = and <2 x i32> %tmp1, %tmp2
+ %tmp4 = icmp ne <2 x i32> %tmp3, zeroinitializer
+ %tmp5 = sext <2 x i1> %tmp4 to <2 x i32>
+ ret <2 x i32> %tmp5
+}
+
+define <16 x i8> @vtstQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vtstQi8:
+;CHECK: vtst.8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = and <16 x i8> %tmp1, %tmp2
+ %tmp4 = icmp ne <16 x i8> %tmp3, zeroinitializer
+ %tmp5 = sext <16 x i1> %tmp4 to <16 x i8>
+ ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @vtstQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vtstQi16:
+;CHECK: vtst.16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = and <8 x i16> %tmp1, %tmp2
+ %tmp4 = icmp ne <8 x i16> %tmp3, zeroinitializer
+ %tmp5 = sext <8 x i1> %tmp4 to <8 x i16>
+ ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vtstQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vtstQi32:
+;CHECK: vtst.32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = and <4 x i32> %tmp1, %tmp2
+ %tmp4 = icmp ne <4 x i32> %tmp3, zeroinitializer
+ %tmp5 = sext <4 x i1> %tmp4 to <4 x i32>
+ ret <4 x i32> %tmp5
+}
diff --git a/test/CodeGen/ARM/vbsl.ll b/test/CodeGen/ARM/vbsl.ll
new file mode 100644
index 0000000..9f3bb4e
--- /dev/null
+++ b/test/CodeGen/ARM/vbsl.ll
@@ -0,0 +1,105 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: v_bsli8:
+;CHECK: vbsl
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = load <8 x i8>* %C
+ %tmp4 = and <8 x i8> %tmp1, %tmp2
+ %tmp5 = xor <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+ %tmp6 = and <8 x i8> %tmp5, %tmp3
+ %tmp7 = or <8 x i8> %tmp4, %tmp6
+ ret <8 x i8> %tmp7
+}
+
+define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: v_bsli16:
+;CHECK: vbsl
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = load <4 x i16>* %C
+ %tmp4 = and <4 x i16> %tmp1, %tmp2
+ %tmp5 = xor <4 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1 >
+ %tmp6 = and <4 x i16> %tmp5, %tmp3
+ %tmp7 = or <4 x i16> %tmp4, %tmp6
+ ret <4 x i16> %tmp7
+}
+
+define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: v_bsli32:
+;CHECK: vbsl
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = load <2 x i32>* %C
+ %tmp4 = and <2 x i32> %tmp1, %tmp2
+ %tmp5 = xor <2 x i32> %tmp1, < i32 -1, i32 -1 >
+ %tmp6 = and <2 x i32> %tmp5, %tmp3
+ %tmp7 = or <2 x i32> %tmp4, %tmp6
+ ret <2 x i32> %tmp7
+}
+
+define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind {
+;CHECK: v_bsli64:
+;CHECK: vbsl
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <1 x i64>* %B
+ %tmp3 = load <1 x i64>* %C
+ %tmp4 = and <1 x i64> %tmp1, %tmp2
+ %tmp5 = xor <1 x i64> %tmp1, < i64 -1 >
+ %tmp6 = and <1 x i64> %tmp5, %tmp3
+ %tmp7 = or <1 x i64> %tmp4, %tmp6
+ ret <1 x i64> %tmp7
+}
+
+define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
+;CHECK: v_bslQi8:
+;CHECK: vbsl
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = load <16 x i8>* %C
+ %tmp4 = and <16 x i8> %tmp1, %tmp2
+ %tmp5 = xor <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+ %tmp6 = and <16 x i8> %tmp5, %tmp3
+ %tmp7 = or <16 x i8> %tmp4, %tmp6
+ ret <16 x i8> %tmp7
+}
+
+define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
+;CHECK: v_bslQi16:
+;CHECK: vbsl
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = load <8 x i16>* %C
+ %tmp4 = and <8 x i16> %tmp1, %tmp2
+ %tmp5 = xor <8 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
+ %tmp6 = and <8 x i16> %tmp5, %tmp3
+ %tmp7 = or <8 x i16> %tmp4, %tmp6
+ ret <8 x i16> %tmp7
+}
+
+define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
+;CHECK: v_bslQi32:
+;CHECK: vbsl
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = load <4 x i32>* %C
+ %tmp4 = and <4 x i32> %tmp1, %tmp2
+ %tmp5 = xor <4 x i32> %tmp1, < i32 -1, i32 -1, i32 -1, i32 -1 >
+ %tmp6 = and <4 x i32> %tmp5, %tmp3
+ %tmp7 = or <4 x i32> %tmp4, %tmp6
+ ret <4 x i32> %tmp7
+}
+
+define <2 x i64> @v_bslQi64(<2 x i64>* %A, <2 x i64>* %B, <2 x i64>* %C) nounwind {
+;CHECK: v_bslQi64:
+;CHECK: vbsl
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = load <2 x i64>* %C
+ %tmp4 = and <2 x i64> %tmp1, %tmp2
+ %tmp5 = xor <2 x i64> %tmp1, < i64 -1, i64 -1 >
+ %tmp6 = and <2 x i64> %tmp5, %tmp3
+ %tmp7 = or <2 x i64> %tmp4, %tmp6
+ ret <2 x i64> %tmp7
+}
diff --git a/test/CodeGen/ARM/vceq.ll b/test/CodeGen/ARM/vceq.ll
new file mode 100644
index 0000000..e478751
--- /dev/null
+++ b/test/CodeGen/ARM/vceq.ll
@@ -0,0 +1,81 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vceqi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vceqi8:
+;CHECK: vceq.i8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = icmp eq <8 x i8> %tmp1, %tmp2
+ %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+ ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vceqi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vceqi16:
+;CHECK: vceq.i16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = icmp eq <4 x i16> %tmp1, %tmp2
+ %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+ ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vceqi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vceqi32:
+;CHECK: vceq.i32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = icmp eq <2 x i32> %tmp1, %tmp2
+ %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+ ret <2 x i32> %tmp4
+}
+
+define <2 x i32> @vceqf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vceqf32:
+;CHECK: vceq.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = load <2 x float>* %B
+ %tmp3 = fcmp oeq <2 x float> %tmp1, %tmp2
+ %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+ ret <2 x i32> %tmp4
+}
+
+define <16 x i8> @vceqQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vceqQi8:
+;CHECK: vceq.i8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = icmp eq <16 x i8> %tmp1, %tmp2
+ %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+ ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vceqQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vceqQi16:
+;CHECK: vceq.i16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = icmp eq <8 x i16> %tmp1, %tmp2
+ %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+ ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vceqQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vceqQi32:
+;CHECK: vceq.i32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = icmp eq <4 x i32> %tmp1, %tmp2
+ %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+ ret <4 x i32> %tmp4
+}
+
+define <4 x i32> @vceqQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vceqQf32:
+;CHECK: vceq.f32
+ %tmp1 = load <4 x float>* %A
+ %tmp2 = load <4 x float>* %B
+ %tmp3 = fcmp oeq <4 x float> %tmp1, %tmp2
+ %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+ ret <4 x i32> %tmp4
+}
diff --git a/test/CodeGen/ARM/vcge.ll b/test/CodeGen/ARM/vcge.ll
new file mode 100644
index 0000000..2c16111
--- /dev/null
+++ b/test/CodeGen/ARM/vcge.ll
@@ -0,0 +1,162 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vcges8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vcges8:
+;CHECK: vcge.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = icmp sge <8 x i8> %tmp1, %tmp2
+ %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+ ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vcges16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcges16:
+;CHECK: vcge.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = icmp sge <4 x i16> %tmp1, %tmp2
+ %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+ ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vcges32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vcges32:
+;CHECK: vcge.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = icmp sge <2 x i32> %tmp1, %tmp2
+ %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+ ret <2 x i32> %tmp4
+}
+
+define <8 x i8> @vcgeu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vcgeu8:
+;CHECK: vcge.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = icmp uge <8 x i8> %tmp1, %tmp2
+ %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+ ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vcgeu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcgeu16:
+;CHECK: vcge.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = icmp uge <4 x i16> %tmp1, %tmp2
+ %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+ ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vcgeu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vcgeu32:
+;CHECK: vcge.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = icmp uge <2 x i32> %tmp1, %tmp2
+ %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+ ret <2 x i32> %tmp4
+}
+
+define <2 x i32> @vcgef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcgef32:
+;CHECK: vcge.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = load <2 x float>* %B
+ %tmp3 = fcmp oge <2 x float> %tmp1, %tmp2
+ %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+ ret <2 x i32> %tmp4
+}
+
+define <16 x i8> @vcgeQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vcgeQs8:
+;CHECK: vcge.s8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = icmp sge <16 x i8> %tmp1, %tmp2
+ %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+ ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vcgeQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vcgeQs16:
+;CHECK: vcge.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = icmp sge <8 x i16> %tmp1, %tmp2
+ %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+ ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vcgeQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vcgeQs32:
+;CHECK: vcge.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = icmp sge <4 x i32> %tmp1, %tmp2
+ %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+ ret <4 x i32> %tmp4
+}
+
+define <16 x i8> @vcgeQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vcgeQu8:
+;CHECK: vcge.u8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = icmp uge <16 x i8> %tmp1, %tmp2
+ %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+ ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vcgeQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vcgeQu16:
+;CHECK: vcge.u16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = icmp uge <8 x i16> %tmp1, %tmp2
+ %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+ ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vcgeQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vcgeQu32:
+;CHECK: vcge.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = icmp uge <4 x i32> %tmp1, %tmp2
+ %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+ ret <4 x i32> %tmp4
+}
+
+define <4 x i32> @vcgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vcgeQf32:
+;CHECK: vcge.f32
+ %tmp1 = load <4 x float>* %A
+ %tmp2 = load <4 x float>* %B
+ %tmp3 = fcmp oge <4 x float> %tmp1, %tmp2
+ %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+ ret <4 x i32> %tmp4
+}
+
+define <2 x i32> @vacgef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vacgef32:
+;CHECK: vacge.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = load <2 x float>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vacged(<2 x float> %tmp1, <2 x float> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @vacgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vacgeQf32:
+;CHECK: vacge.f32
+ %tmp1 = load <4 x float>* %A
+ %tmp2 = load <4 x float>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vacgeq(<4 x float> %tmp1, <4 x float> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+declare <2 x i32> @llvm.arm.neon.vacged(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vacgeq(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vcgt.ll b/test/CodeGen/ARM/vcgt.ll
new file mode 100644
index 0000000..6b11ba5
--- /dev/null
+++ b/test/CodeGen/ARM/vcgt.ll
@@ -0,0 +1,162 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vcgts8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vcgts8:
+;CHECK: vcgt.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = icmp sgt <8 x i8> %tmp1, %tmp2
+ %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+ ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vcgts16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcgts16:
+;CHECK: vcgt.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = icmp sgt <4 x i16> %tmp1, %tmp2
+ %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+ ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vcgts32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vcgts32:
+;CHECK: vcgt.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = icmp sgt <2 x i32> %tmp1, %tmp2
+ %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+ ret <2 x i32> %tmp4
+}
+
+define <8 x i8> @vcgtu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vcgtu8:
+;CHECK: vcgt.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = icmp ugt <8 x i8> %tmp1, %tmp2
+ %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+ ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vcgtu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcgtu16:
+;CHECK: vcgt.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = icmp ugt <4 x i16> %tmp1, %tmp2
+ %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+ ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vcgtu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vcgtu32:
+;CHECK: vcgt.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = icmp ugt <2 x i32> %tmp1, %tmp2
+ %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+ ret <2 x i32> %tmp4
+}
+
+define <2 x i32> @vcgtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcgtf32:
+;CHECK: vcgt.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = load <2 x float>* %B
+ %tmp3 = fcmp ogt <2 x float> %tmp1, %tmp2
+ %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+ ret <2 x i32> %tmp4
+}
+
+define <16 x i8> @vcgtQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vcgtQs8:
+;CHECK: vcgt.s8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = icmp sgt <16 x i8> %tmp1, %tmp2
+ %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+ ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vcgtQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vcgtQs16:
+;CHECK: vcgt.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = icmp sgt <8 x i16> %tmp1, %tmp2
+ %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+ ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vcgtQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vcgtQs32:
+;CHECK: vcgt.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = icmp sgt <4 x i32> %tmp1, %tmp2
+ %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+ ret <4 x i32> %tmp4
+}
+
+define <16 x i8> @vcgtQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vcgtQu8:
+;CHECK: vcgt.u8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = icmp ugt <16 x i8> %tmp1, %tmp2
+ %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+ ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vcgtQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vcgtQu16:
+;CHECK: vcgt.u16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = icmp ugt <8 x i16> %tmp1, %tmp2
+ %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+ ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vcgtQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vcgtQu32:
+;CHECK: vcgt.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = icmp ugt <4 x i32> %tmp1, %tmp2
+ %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+ ret <4 x i32> %tmp4
+}
+
+define <4 x i32> @vcgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vcgtQf32:
+;CHECK: vcgt.f32
+ %tmp1 = load <4 x float>* %A
+ %tmp2 = load <4 x float>* %B
+ %tmp3 = fcmp ogt <4 x float> %tmp1, %tmp2
+ %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+ ret <4 x i32> %tmp4
+}
+
+define <2 x i32> @vacgtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vacgtf32:
+;CHECK: vacgt.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = load <2 x float>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vacgtd(<2 x float> %tmp1, <2 x float> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @vacgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vacgtQf32:
+;CHECK: vacgt.f32
+ %tmp1 = load <4 x float>* %A
+ %tmp2 = load <4 x float>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vacgtq(<4 x float> %tmp1, <4 x float> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+declare <2 x i32> @llvm.arm.neon.vacgtd(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vacgtq(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vcnt.ll b/test/CodeGen/ARM/vcnt.ll
new file mode 100644
index 0000000..450f90d
--- /dev/null
+++ b/test/CodeGen/ARM/vcnt.ll
@@ -0,0 +1,132 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind {
+;CHECK: vcnt8:
+;CHECK: vcnt.8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vcnt.v8i8(<8 x i8> %tmp1)
+ ret <8 x i8> %tmp2
+}
+
+define <16 x i8> @vcntQ8(<16 x i8>* %A) nounwind {
+;CHECK: vcntQ8:
+;CHECK: vcnt.8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = call <16 x i8> @llvm.arm.neon.vcnt.v16i8(<16 x i8> %tmp1)
+ ret <16 x i8> %tmp2
+}
+
+declare <8 x i8> @llvm.arm.neon.vcnt.v8i8(<8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.arm.neon.vcnt.v16i8(<16 x i8>) nounwind readnone
+
+define <8 x i8> @vclz8(<8 x i8>* %A) nounwind {
+;CHECK: vclz8:
+;CHECK: vclz.i8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vclz.v8i8(<8 x i8> %tmp1)
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vclz16(<4 x i16>* %A) nounwind {
+;CHECK: vclz16:
+;CHECK: vclz.i16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16> %tmp1)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vclz32(<2 x i32>* %A) nounwind {
+;CHECK: vclz32:
+;CHECK: vclz.i32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vclz.v2i32(<2 x i32> %tmp1)
+ ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vclzQ8(<16 x i8>* %A) nounwind {
+;CHECK: vclzQ8:
+;CHECK: vclz.i8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = call <16 x i8> @llvm.arm.neon.vclz.v16i8(<16 x i8> %tmp1)
+ ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vclzQ16(<8 x i16>* %A) nounwind {
+;CHECK: vclzQ16:
+;CHECK: vclz.i16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <8 x i16> @llvm.arm.neon.vclz.v8i16(<8 x i16> %tmp1)
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vclzQ32(<4 x i32>* %A) nounwind {
+;CHECK: vclzQ32:
+;CHECK: vclz.i32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i32> @llvm.arm.neon.vclz.v4i32(<4 x i32> %tmp1)
+ ret <4 x i32> %tmp2
+}
+
+declare <8 x i8> @llvm.arm.neon.vclz.v8i8(<8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vclz.v2i32(<2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vclz.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vclz.v8i16(<8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vclz.v4i32(<4 x i32>) nounwind readnone
+
+define <8 x i8> @vclss8(<8 x i8>* %A) nounwind {
+;CHECK: vclss8:
+;CHECK: vcls.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %tmp1)
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vclss16(<4 x i16>* %A) nounwind {
+;CHECK: vclss16:
+;CHECK: vcls.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %tmp1)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vclss32(<2 x i32>* %A) nounwind {
+;CHECK: vclss32:
+;CHECK: vcls.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %tmp1)
+ ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vclsQs8(<16 x i8>* %A) nounwind {
+;CHECK: vclsQs8:
+;CHECK: vcls.s8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %tmp1)
+ ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vclsQs16(<8 x i16>* %A) nounwind {
+;CHECK: vclsQs16:
+;CHECK: vcls.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %tmp1)
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vclsQs32(<4 x i32>* %A) nounwind {
+;CHECK: vclsQs32:
+;CHECK: vcls.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %tmp1)
+ ret <4 x i32> %tmp2
+}
+
+declare <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vcombine.ll b/test/CodeGen/ARM/vcombine.ll
new file mode 100644
index 0000000..e673305
--- /dev/null
+++ b/test/CodeGen/ARM/vcombine.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=arm -mattr=+neon
+
+define <16 x i8> @vcombine8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vcombine16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vcombine32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i32> %tmp3
+}
+
+define <4 x float> @vcombinefloat(<2 x float>* %A, <2 x float>* %B) nounwind {
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = load <2 x float>* %B
+ %tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x float> %tmp3
+}
+
+define <2 x i64> @vcombine64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <1 x i64>* %B
+ %tmp3 = shufflevector <1 x i64> %tmp1, <1 x i64> %tmp2, <2 x i32> <i32 0, i32 1>
+ ret <2 x i64> %tmp3
+}
diff --git a/test/CodeGen/ARM/vcvt.ll b/test/CodeGen/ARM/vcvt.ll
new file mode 100644
index 0000000..f4cc536
--- /dev/null
+++ b/test/CodeGen/ARM/vcvt.ll
@@ -0,0 +1,140 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <2 x i32> @vcvt_f32tos32(<2 x float>* %A) nounwind {
+;CHECK: vcvt_f32tos32:
+;CHECK: vcvt.s32.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = fptosi <2 x float> %tmp1 to <2 x i32>
+ ret <2 x i32> %tmp2
+}
+
+define <2 x i32> @vcvt_f32tou32(<2 x float>* %A) nounwind {
+;CHECK: vcvt_f32tou32:
+;CHECK: vcvt.u32.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = fptoui <2 x float> %tmp1 to <2 x i32>
+ ret <2 x i32> %tmp2
+}
+
+define <2 x float> @vcvt_s32tof32(<2 x i32>* %A) nounwind {
+;CHECK: vcvt_s32tof32:
+;CHECK: vcvt.f32.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = sitofp <2 x i32> %tmp1 to <2 x float>
+ ret <2 x float> %tmp2
+}
+
+define <2 x float> @vcvt_u32tof32(<2 x i32>* %A) nounwind {
+;CHECK: vcvt_u32tof32:
+;CHECK: vcvt.f32.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = uitofp <2 x i32> %tmp1 to <2 x float>
+ ret <2 x float> %tmp2
+}
+
+define <4 x i32> @vcvtQ_f32tos32(<4 x float>* %A) nounwind {
+;CHECK: vcvtQ_f32tos32:
+;CHECK: vcvt.s32.f32
+ %tmp1 = load <4 x float>* %A
+ %tmp2 = fptosi <4 x float> %tmp1 to <4 x i32>
+ ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @vcvtQ_f32tou32(<4 x float>* %A) nounwind {
+;CHECK: vcvtQ_f32tou32:
+;CHECK: vcvt.u32.f32
+ %tmp1 = load <4 x float>* %A
+ %tmp2 = fptoui <4 x float> %tmp1 to <4 x i32>
+ ret <4 x i32> %tmp2
+}
+
+define <4 x float> @vcvtQ_s32tof32(<4 x i32>* %A) nounwind {
+;CHECK: vcvtQ_s32tof32:
+;CHECK: vcvt.f32.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = sitofp <4 x i32> %tmp1 to <4 x float>
+ ret <4 x float> %tmp2
+}
+
+define <4 x float> @vcvtQ_u32tof32(<4 x i32>* %A) nounwind {
+;CHECK: vcvtQ_u32tof32:
+;CHECK: vcvt.f32.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = uitofp <4 x i32> %tmp1 to <4 x float>
+ ret <4 x float> %tmp2
+}
+
+define <2 x i32> @vcvt_n_f32tos32(<2 x float>* %A) nounwind {
+;CHECK: vcvt_n_f32tos32:
+;CHECK: vcvt.s32.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> %tmp1, i32 1)
+ ret <2 x i32> %tmp2
+}
+
+define <2 x i32> @vcvt_n_f32tou32(<2 x float>* %A) nounwind {
+;CHECK: vcvt_n_f32tou32:
+;CHECK: vcvt.u32.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> %tmp1, i32 1)
+ ret <2 x i32> %tmp2
+}
+
+define <2 x float> @vcvt_n_s32tof32(<2 x i32>* %A) nounwind {
+;CHECK: vcvt_n_s32tof32:
+;CHECK: vcvt.f32.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %tmp1, i32 1)
+ ret <2 x float> %tmp2
+}
+
+define <2 x float> @vcvt_n_u32tof32(<2 x i32>* %A) nounwind {
+;CHECK: vcvt_n_u32tof32:
+;CHECK: vcvt.f32.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %tmp1, i32 1)
+ ret <2 x float> %tmp2
+}
+
+declare <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
+
+define <4 x i32> @vcvtQ_n_f32tos32(<4 x float>* %A) nounwind {
+;CHECK: vcvtQ_n_f32tos32:
+;CHECK: vcvt.s32.f32
+ %tmp1 = load <4 x float>* %A
+ %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> %tmp1, i32 1)
+ ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @vcvtQ_n_f32tou32(<4 x float>* %A) nounwind {
+;CHECK: vcvtQ_n_f32tou32:
+;CHECK: vcvt.u32.f32
+ %tmp1 = load <4 x float>* %A
+ %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> %tmp1, i32 1)
+ ret <4 x i32> %tmp2
+}
+
+define <4 x float> @vcvtQ_n_s32tof32(<4 x i32>* %A) nounwind {
+;CHECK: vcvtQ_n_s32tof32:
+;CHECK: vcvt.f32.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %tmp1, i32 1)
+ ret <4 x float> %tmp2
+}
+
+define <4 x float> @vcvtQ_n_u32tof32(<4 x i32>* %A) nounwind {
+;CHECK: vcvtQ_n_u32tof32:
+;CHECK: vcvt.f32.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %tmp1, i32 1)
+ ret <4 x float> %tmp2
+}
+
+declare <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
+
diff --git a/test/CodeGen/ARM/vdup.ll b/test/CodeGen/ARM/vdup.ll
new file mode 100644
index 0000000..c9a68ca
--- /dev/null
+++ b/test/CodeGen/ARM/vdup.ll
@@ -0,0 +1,269 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @v_dup8(i8 %A) nounwind {
+;CHECK: v_dup8:
+;CHECK: vdup.8
+ %tmp1 = insertelement <8 x i8> zeroinitializer, i8 %A, i32 0
+ %tmp2 = insertelement <8 x i8> %tmp1, i8 %A, i32 1
+ %tmp3 = insertelement <8 x i8> %tmp2, i8 %A, i32 2
+ %tmp4 = insertelement <8 x i8> %tmp3, i8 %A, i32 3
+ %tmp5 = insertelement <8 x i8> %tmp4, i8 %A, i32 4
+ %tmp6 = insertelement <8 x i8> %tmp5, i8 %A, i32 5
+ %tmp7 = insertelement <8 x i8> %tmp6, i8 %A, i32 6
+ %tmp8 = insertelement <8 x i8> %tmp7, i8 %A, i32 7
+ ret <8 x i8> %tmp8
+}
+
+define <4 x i16> @v_dup16(i16 %A) nounwind {
+;CHECK: v_dup16:
+;CHECK: vdup.16
+ %tmp1 = insertelement <4 x i16> zeroinitializer, i16 %A, i32 0
+ %tmp2 = insertelement <4 x i16> %tmp1, i16 %A, i32 1
+ %tmp3 = insertelement <4 x i16> %tmp2, i16 %A, i32 2
+ %tmp4 = insertelement <4 x i16> %tmp3, i16 %A, i32 3
+ ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @v_dup32(i32 %A) nounwind {
+;CHECK: v_dup32:
+;CHECK: vdup.32
+ %tmp1 = insertelement <2 x i32> zeroinitializer, i32 %A, i32 0
+ %tmp2 = insertelement <2 x i32> %tmp1, i32 %A, i32 1
+ ret <2 x i32> %tmp2
+}
+
+define <2 x float> @v_dupfloat(float %A) nounwind {
+;CHECK: v_dupfloat:
+;CHECK: vdup.32
+ %tmp1 = insertelement <2 x float> zeroinitializer, float %A, i32 0
+ %tmp2 = insertelement <2 x float> %tmp1, float %A, i32 1
+ ret <2 x float> %tmp2
+}
+
+define <16 x i8> @v_dupQ8(i8 %A) nounwind {
+;CHECK: v_dupQ8:
+;CHECK: vdup.8
+ %tmp1 = insertelement <16 x i8> zeroinitializer, i8 %A, i32 0
+ %tmp2 = insertelement <16 x i8> %tmp1, i8 %A, i32 1
+ %tmp3 = insertelement <16 x i8> %tmp2, i8 %A, i32 2
+ %tmp4 = insertelement <16 x i8> %tmp3, i8 %A, i32 3
+ %tmp5 = insertelement <16 x i8> %tmp4, i8 %A, i32 4
+ %tmp6 = insertelement <16 x i8> %tmp5, i8 %A, i32 5
+ %tmp7 = insertelement <16 x i8> %tmp6, i8 %A, i32 6
+ %tmp8 = insertelement <16 x i8> %tmp7, i8 %A, i32 7
+ %tmp9 = insertelement <16 x i8> %tmp8, i8 %A, i32 8
+ %tmp10 = insertelement <16 x i8> %tmp9, i8 %A, i32 9
+ %tmp11 = insertelement <16 x i8> %tmp10, i8 %A, i32 10
+ %tmp12 = insertelement <16 x i8> %tmp11, i8 %A, i32 11
+ %tmp13 = insertelement <16 x i8> %tmp12, i8 %A, i32 12
+ %tmp14 = insertelement <16 x i8> %tmp13, i8 %A, i32 13
+ %tmp15 = insertelement <16 x i8> %tmp14, i8 %A, i32 14
+ %tmp16 = insertelement <16 x i8> %tmp15, i8 %A, i32 15
+ ret <16 x i8> %tmp16
+}
+
+define <8 x i16> @v_dupQ16(i16 %A) nounwind {
+;CHECK: v_dupQ16:
+;CHECK: vdup.16
+ %tmp1 = insertelement <8 x i16> zeroinitializer, i16 %A, i32 0
+ %tmp2 = insertelement <8 x i16> %tmp1, i16 %A, i32 1
+ %tmp3 = insertelement <8 x i16> %tmp2, i16 %A, i32 2
+ %tmp4 = insertelement <8 x i16> %tmp3, i16 %A, i32 3
+ %tmp5 = insertelement <8 x i16> %tmp4, i16 %A, i32 4
+ %tmp6 = insertelement <8 x i16> %tmp5, i16 %A, i32 5
+ %tmp7 = insertelement <8 x i16> %tmp6, i16 %A, i32 6
+ %tmp8 = insertelement <8 x i16> %tmp7, i16 %A, i32 7
+ ret <8 x i16> %tmp8
+}
+
+define <4 x i32> @v_dupQ32(i32 %A) nounwind {
+;CHECK: v_dupQ32:
+;CHECK: vdup.32
+ %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %A, i32 0
+ %tmp2 = insertelement <4 x i32> %tmp1, i32 %A, i32 1
+ %tmp3 = insertelement <4 x i32> %tmp2, i32 %A, i32 2
+ %tmp4 = insertelement <4 x i32> %tmp3, i32 %A, i32 3
+ ret <4 x i32> %tmp4
+}
+
+define <4 x float> @v_dupQfloat(float %A) nounwind {
+;CHECK: v_dupQfloat:
+;CHECK: vdup.32
+ %tmp1 = insertelement <4 x float> zeroinitializer, float %A, i32 0
+ %tmp2 = insertelement <4 x float> %tmp1, float %A, i32 1
+ %tmp3 = insertelement <4 x float> %tmp2, float %A, i32 2
+ %tmp4 = insertelement <4 x float> %tmp3, float %A, i32 3
+ ret <4 x float> %tmp4
+}
+
+; Check to make sure it works with shuffles, too.
+
+define <8 x i8> @v_shuffledup8(i8 %A) nounwind {
+;CHECK: v_shuffledup8:
+;CHECK: vdup.8
+ %tmp1 = insertelement <8 x i8> undef, i8 %A, i32 0
+ %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @v_shuffledup16(i16 %A) nounwind {
+;CHECK: v_shuffledup16:
+;CHECK: vdup.16
+ %tmp1 = insertelement <4 x i16> undef, i16 %A, i32 0
+ %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @v_shuffledup32(i32 %A) nounwind {
+;CHECK: v_shuffledup32:
+;CHECK: vdup.32
+ %tmp1 = insertelement <2 x i32> undef, i32 %A, i32 0
+ %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
+ ret <2 x i32> %tmp2
+}
+
+define <2 x float> @v_shuffledupfloat(float %A) nounwind {
+;CHECK: v_shuffledupfloat:
+;CHECK: vdup.32
+ %tmp1 = insertelement <2 x float> undef, float %A, i32 0
+ %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
+ ret <2 x float> %tmp2
+}
+
+define <16 x i8> @v_shuffledupQ8(i8 %A) nounwind {
+;CHECK: v_shuffledupQ8:
+;CHECK: vdup.8
+ %tmp1 = insertelement <16 x i8> undef, i8 %A, i32 0
+ %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> zeroinitializer
+ ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @v_shuffledupQ16(i16 %A) nounwind {
+;CHECK: v_shuffledupQ16:
+;CHECK: vdup.16
+ %tmp1 = insertelement <8 x i16> undef, i16 %A, i32 0
+ %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> zeroinitializer
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @v_shuffledupQ32(i32 %A) nounwind {
+;CHECK: v_shuffledupQ32:
+;CHECK: vdup.32
+ %tmp1 = insertelement <4 x i32> undef, i32 %A, i32 0
+ %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> zeroinitializer
+ ret <4 x i32> %tmp2
+}
+
+define <4 x float> @v_shuffledupQfloat(float %A) nounwind {
+;CHECK: v_shuffledupQfloat:
+;CHECK: vdup.32
+ %tmp1 = insertelement <4 x float> undef, float %A, i32 0
+ %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
+ ret <4 x float> %tmp2
+}
+
+define <2 x float> @v_shuffledupfloat2(float* %A) nounwind {
+;CHECK: v_shuffledupfloat2:
+;CHECK: vdup.32
+ %tmp0 = load float* %A
+ %tmp1 = insertelement <2 x float> undef, float %tmp0, i32 0
+ %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
+ ret <2 x float> %tmp2
+}
+
+define <4 x float> @v_shuffledupQfloat2(float* %A) nounwind {
+;CHECK: v_shuffledupQfloat2:
+;CHECK: vdup.32
+ %tmp0 = load float* %A
+ %tmp1 = insertelement <4 x float> undef, float %tmp0, i32 0
+ %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
+ ret <4 x float> %tmp2
+}
+
+define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind {
+;CHECK: vduplane8:
+;CHECK: vdup.8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vduplane16(<4 x i16>* %A) nounwind {
+;CHECK: vduplane16:
+;CHECK: vdup.16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vduplane32(<2 x i32>* %A) nounwind {
+;CHECK: vduplane32:
+;CHECK: vdup.32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
+ ret <2 x i32> %tmp2
+}
+
+define <2 x float> @vduplanefloat(<2 x float>* %A) nounwind {
+;CHECK: vduplanefloat:
+;CHECK: vdup.32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> < i32 1, i32 1 >
+ ret <2 x float> %tmp2
+}
+
+define <16 x i8> @vduplaneQ8(<8 x i8>* %A) nounwind {
+;CHECK: vduplaneQ8:
+;CHECK: vdup.8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
+ ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vduplaneQ16(<4 x i16>* %A) nounwind {
+;CHECK: vduplaneQ16:
+;CHECK: vdup.16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vduplaneQ32(<2 x i32>* %A) nounwind {
+;CHECK: vduplaneQ32:
+;CHECK: vdup.32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
+ ret <4 x i32> %tmp2
+}
+
+define <4 x float> @vduplaneQfloat(<2 x float>* %A) nounwind {
+;CHECK: vduplaneQfloat:
+;CHECK: vdup.32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
+ ret <4 x float> %tmp2
+}
+
+define arm_apcscc <2 x i64> @foo(<2 x i64> %arg0_int64x1_t) nounwind readnone {
+entry:
+ %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+ ret <2 x i64> %0
+}
+
+define arm_apcscc <2 x i64> @bar(<2 x i64> %arg0_int64x1_t) nounwind readnone {
+entry:
+ %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ ret <2 x i64> %0
+}
+
+define arm_apcscc <2 x double> @baz(<2 x double> %arg0_int64x1_t) nounwind readnone {
+entry:
+ %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+ ret <2 x double> %0
+}
+
+define arm_apcscc <2 x double> @qux(<2 x double> %arg0_int64x1_t) nounwind readnone {
+entry:
+ %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 0, i32 0>
+ ret <2 x double> %0
+}
diff --git a/test/CodeGen/ARM/vext.ll b/test/CodeGen/ARM/vext.ll
new file mode 100644
index 0000000..20d953b
--- /dev/null
+++ b/test/CodeGen/ARM/vext.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define arm_apcscc <8 x i8> @test_vextd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: test_vextd:
+;CHECK: vext
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
+ ret <8 x i8> %tmp3
+}
+
+define arm_apcscc <8 x i8> @test_vextRd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: test_vextRd:
+;CHECK: vext
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4>
+ ret <8 x i8> %tmp3
+}
+
+define arm_apcscc <16 x i8> @test_vextq(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: test_vextq:
+;CHECK: vext
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+ ret <16 x i8> %tmp3
+}
+
+define arm_apcscc <16 x i8> @test_vextRq(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: test_vextRq:
+;CHECK: vext
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
+ ret <16 x i8> %tmp3
+}
+
+define arm_apcscc <4 x i16> @test_vextd16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: test_vextd16:
+;CHECK: vext
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+ ret <4 x i16> %tmp3
+}
+
+define arm_apcscc <4 x i32> @test_vextq32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: test_vextq32:
+;CHECK: vext
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+ ret <4 x i32> %tmp3
+}
+
diff --git a/test/CodeGen/ARM/vfcmp.ll b/test/CodeGen/ARM/vfcmp.ll
new file mode 100644
index 0000000..6946d02
--- /dev/null
+++ b/test/CodeGen/ARM/vfcmp.ll
@@ -0,0 +1,139 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+; This tests fcmp operations that do not map directly to NEON instructions.
+
+; une is implemented with VCEQ/VMVN
+define <2 x i32> @vcunef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcunef32:
+;CHECK: vceq.f32
+;CHECK-NEXT: vmvn
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = load <2 x float>* %B
+ %tmp3 = fcmp une <2 x float> %tmp1, %tmp2
+ %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+ ret <2 x i32> %tmp4
+}
+
+; olt is implemented with VCGT
+define <2 x i32> @vcoltf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcoltf32:
+;CHECK: vcgt.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = load <2 x float>* %B
+ %tmp3 = fcmp olt <2 x float> %tmp1, %tmp2
+ %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+ ret <2 x i32> %tmp4
+}
+
+; ole is implemented with VCGE
+define <2 x i32> @vcolef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcolef32:
+;CHECK: vcge.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = load <2 x float>* %B
+ %tmp3 = fcmp ole <2 x float> %tmp1, %tmp2
+ %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+ ret <2 x i32> %tmp4
+}
+
+; uge is implemented with VCGT/VMVN
+define <2 x i32> @vcugef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcugef32:
+;CHECK: vcgt.f32
+;CHECK-NEXT: vmvn
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = load <2 x float>* %B
+ %tmp3 = fcmp uge <2 x float> %tmp1, %tmp2
+ %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+ ret <2 x i32> %tmp4
+}
+
+; ule is implemented with VCGT/VMVN
+define <2 x i32> @vculef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vculef32:
+;CHECK: vcgt.f32
+;CHECK-NEXT: vmvn
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = load <2 x float>* %B
+ %tmp3 = fcmp ule <2 x float> %tmp1, %tmp2
+ %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+ ret <2 x i32> %tmp4
+}
+
+; ugt is implemented with VCGE/VMVN
+define <2 x i32> @vcugtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcugtf32:
+;CHECK: vcge.f32
+;CHECK-NEXT: vmvn
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = load <2 x float>* %B
+ %tmp3 = fcmp ugt <2 x float> %tmp1, %tmp2
+ %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+ ret <2 x i32> %tmp4
+}
+
+; ult is implemented with VCGE/VMVN
+define <2 x i32> @vcultf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcultf32:
+;CHECK: vcge.f32
+;CHECK-NEXT: vmvn
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = load <2 x float>* %B
+ %tmp3 = fcmp ult <2 x float> %tmp1, %tmp2
+ %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+ ret <2 x i32> %tmp4
+}
+
+; ueq is implemented with VCGT/VCGT/VORR/VMVN
+define <2 x i32> @vcueqf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcueqf32:
+;CHECK: vcgt.f32
+;CHECK-NEXT: vcgt.f32
+;CHECK-NEXT: vorr
+;CHECK-NEXT: vmvn
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = load <2 x float>* %B
+ %tmp3 = fcmp ueq <2 x float> %tmp1, %tmp2
+ %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+ ret <2 x i32> %tmp4
+}
+
+; one is implemented with VCGT/VCGT/VORR
+define <2 x i32> @vconef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vconef32:
+;CHECK: vcgt.f32
+;CHECK-NEXT: vcgt.f32
+;CHECK-NEXT: vorr
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = load <2 x float>* %B
+ %tmp3 = fcmp one <2 x float> %tmp1, %tmp2
+ %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+ ret <2 x i32> %tmp4
+}
+
+; uno is implemented with VCGT/VCGE/VORR/VMVN
+define <2 x i32> @vcunof32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcunof32:
+;CHECK: vcge.f32
+;CHECK-NEXT: vcgt.f32
+;CHECK-NEXT: vorr
+;CHECK-NEXT: vmvn
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = load <2 x float>* %B
+ %tmp3 = fcmp uno <2 x float> %tmp1, %tmp2
+ %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+ ret <2 x i32> %tmp4
+}
+
+; ord is implemented with VCGT/VCGE/VORR
+define <2 x i32> @vcordf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcordf32:
+;CHECK: vcge.f32
+;CHECK-NEXT: vcgt.f32
+;CHECK-NEXT: vorr
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = load <2 x float>* %B
+ %tmp3 = fcmp ord <2 x float> %tmp1, %tmp2
+ %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+ ret <2 x i32> %tmp4
+}
diff --git a/test/CodeGen/ARM/vfp.ll b/test/CodeGen/ARM/vfp.ll
new file mode 100644
index 0000000..44a44af
--- /dev/null
+++ b/test/CodeGen/ARM/vfp.ll
@@ -0,0 +1,155 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
+
+define void @test(float* %P, double* %D) {
+ %A = load float* %P ; <float> [#uses=1]
+ %B = load double* %D ; <double> [#uses=1]
+ store float %A, float* %P
+ store double %B, double* %D
+ ret void
+}
+
+declare float @fabsf(float)
+
+declare double @fabs(double)
+
+define void @test_abs(float* %P, double* %D) {
+;CHECK: test_abs:
+ %a = load float* %P ; <float> [#uses=1]
+;CHECK: vabs.f32
+ %b = call float @fabsf( float %a ) ; <float> [#uses=1]
+ store float %b, float* %P
+ %A = load double* %D ; <double> [#uses=1]
+;CHECK: vabs.f64
+ %B = call double @fabs( double %A ) ; <double> [#uses=1]
+ store double %B, double* %D
+ ret void
+}
+
+define void @test_add(float* %P, double* %D) {
+;CHECK: test_add:
+ %a = load float* %P ; <float> [#uses=2]
+ %b = fadd float %a, %a ; <float> [#uses=1]
+ store float %b, float* %P
+ %A = load double* %D ; <double> [#uses=2]
+ %B = fadd double %A, %A ; <double> [#uses=1]
+ store double %B, double* %D
+ ret void
+}
+
+define void @test_ext_round(float* %P, double* %D) {
+;CHECK: test_ext_round:
+ %a = load float* %P ; <float> [#uses=1]
+;CHECK: vcvt.f64.f32
+ %b = fpext float %a to double ; <double> [#uses=1]
+ %A = load double* %D ; <double> [#uses=1]
+;CHECK: vcvt.f32.f64
+ %B = fptrunc double %A to float ; <float> [#uses=1]
+ store double %b, double* %D
+ store float %B, float* %P
+ ret void
+}
+
+define void @test_fma(float* %P1, float* %P2, float* %P3) {
+;CHECK: test_fma:
+ %a1 = load float* %P1 ; <float> [#uses=1]
+ %a2 = load float* %P2 ; <float> [#uses=1]
+ %a3 = load float* %P3 ; <float> [#uses=1]
+;CHECK: vnmls.f32
+ %X = fmul float %a1, %a2 ; <float> [#uses=1]
+ %Y = fsub float %X, %a3 ; <float> [#uses=1]
+ store float %Y, float* %P1
+ ret void
+}
+
+define i32 @test_ftoi(float* %P1) {
+;CHECK: test_ftoi:
+ %a1 = load float* %P1 ; <float> [#uses=1]
+;CHECK: vcvt.s32.f32
+ %b1 = fptosi float %a1 to i32 ; <i32> [#uses=1]
+ ret i32 %b1
+}
+
+define i32 @test_ftou(float* %P1) {
+;CHECK: test_ftou:
+ %a1 = load float* %P1 ; <float> [#uses=1]
+;CHECK: vcvt.u32.f32
+ %b1 = fptoui float %a1 to i32 ; <i32> [#uses=1]
+ ret i32 %b1
+}
+
+define i32 @test_dtoi(double* %P1) {
+;CHECK: test_dtoi:
+ %a1 = load double* %P1 ; <double> [#uses=1]
+;CHECK: vcvt.s32.f64
+ %b1 = fptosi double %a1 to i32 ; <i32> [#uses=1]
+ ret i32 %b1
+}
+
+define i32 @test_dtou(double* %P1) {
+;CHECK: test_dtou:
+ %a1 = load double* %P1 ; <double> [#uses=1]
+;CHECK: vcvt.u32.f64
+ %b1 = fptoui double %a1 to i32 ; <i32> [#uses=1]
+ ret i32 %b1
+}
+
+define void @test_utod(double* %P1, i32 %X) {
+;CHECK: test_utod:
+;CHECK: vcvt.f64.u32
+ %b1 = uitofp i32 %X to double ; <double> [#uses=1]
+ store double %b1, double* %P1
+ ret void
+}
+
+define void @test_utod2(double* %P1, i8 %X) {
+;CHECK: test_utod2:
+;CHECK: vcvt.f64.u32
+ %b1 = uitofp i8 %X to double ; <double> [#uses=1]
+ store double %b1, double* %P1
+ ret void
+}
+
+define void @test_cmp(float* %glob, i32 %X) {
+;CHECK: test_cmp:
+entry:
+ %tmp = load float* %glob ; <float> [#uses=2]
+ %tmp3 = getelementptr float* %glob, i32 2 ; <float*> [#uses=1]
+ %tmp4 = load float* %tmp3 ; <float> [#uses=2]
+ %tmp.upgrd.1 = fcmp oeq float %tmp, %tmp4 ; <i1> [#uses=1]
+ %tmp5 = fcmp uno float %tmp, %tmp4 ; <i1> [#uses=1]
+ %tmp6 = or i1 %tmp.upgrd.1, %tmp5 ; <i1> [#uses=1]
+;CHECK: bmi
+;CHECK-NEXT: bgt
+ br i1 %tmp6, label %cond_true, label %cond_false
+
+cond_true: ; preds = %entry
+ %tmp.upgrd.2 = tail call i32 (...)* @bar( ) ; <i32> [#uses=0]
+ ret void
+
+cond_false: ; preds = %entry
+ %tmp7 = tail call i32 (...)* @baz( ) ; <i32> [#uses=0]
+ ret void
+}
+
+declare i1 @llvm.isunordered.f32(float, float)
+
+declare i32 @bar(...)
+
+declare i32 @baz(...)
+
+define void @test_cmpfp0(float* %glob, i32 %X) {
+;CHECK: test_cmpfp0:
+entry:
+ %tmp = load float* %glob ; <float> [#uses=1]
+;CHECK: vcmpe.f32
+ %tmp.upgrd.3 = fcmp ogt float %tmp, 0.000000e+00 ; <i1> [#uses=1]
+ br i1 %tmp.upgrd.3, label %cond_true, label %cond_false
+
+cond_true: ; preds = %entry
+ %tmp.upgrd.4 = tail call i32 (...)* @bar( ) ; <i32> [#uses=0]
+ ret void
+
+cond_false: ; preds = %entry
+ %tmp1 = tail call i32 (...)* @baz( ) ; <i32> [#uses=0]
+ ret void
+}
diff --git a/test/CodeGen/ARM/vget_lane.ll b/test/CodeGen/ARM/vget_lane.ll
new file mode 100644
index 0000000..5dd87d6
--- /dev/null
+++ b/test/CodeGen/ARM/vget_lane.ll
@@ -0,0 +1,212 @@
+; RUN: llc < %s -mattr=+neon | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+define i32 @vget_lanes8(<8 x i8>* %A) nounwind {
+;CHECK: vget_lanes8:
+;CHECK: vmov.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = extractelement <8 x i8> %tmp1, i32 1
+ %tmp3 = sext i8 %tmp2 to i32
+ ret i32 %tmp3
+}
+
+define i32 @vget_lanes16(<4 x i16>* %A) nounwind {
+;CHECK: vget_lanes16:
+;CHECK: vmov.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = extractelement <4 x i16> %tmp1, i32 1
+ %tmp3 = sext i16 %tmp2 to i32
+ ret i32 %tmp3
+}
+
+define i32 @vget_laneu8(<8 x i8>* %A) nounwind {
+;CHECK: vget_laneu8:
+;CHECK: vmov.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = extractelement <8 x i8> %tmp1, i32 1
+ %tmp3 = zext i8 %tmp2 to i32
+ ret i32 %tmp3
+}
+
+define i32 @vget_laneu16(<4 x i16>* %A) nounwind {
+;CHECK: vget_laneu16:
+;CHECK: vmov.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = extractelement <4 x i16> %tmp1, i32 1
+ %tmp3 = zext i16 %tmp2 to i32
+ ret i32 %tmp3
+}
+
+; Do a vector add to keep the extraction from being done directly from memory.
+define i32 @vget_lanei32(<2 x i32>* %A) nounwind {
+;CHECK: vget_lanei32:
+;CHECK: vmov.32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = add <2 x i32> %tmp1, %tmp1
+ %tmp3 = extractelement <2 x i32> %tmp2, i32 1
+ ret i32 %tmp3
+}
+
+define i32 @vgetQ_lanes8(<16 x i8>* %A) nounwind {
+;CHECK: vgetQ_lanes8:
+;CHECK: vmov.s8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = extractelement <16 x i8> %tmp1, i32 1
+ %tmp3 = sext i8 %tmp2 to i32
+ ret i32 %tmp3
+}
+
+define i32 @vgetQ_lanes16(<8 x i16>* %A) nounwind {
+;CHECK: vgetQ_lanes16:
+;CHECK: vmov.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = extractelement <8 x i16> %tmp1, i32 1
+ %tmp3 = sext i16 %tmp2 to i32
+ ret i32 %tmp3
+}
+
+define i32 @vgetQ_laneu8(<16 x i8>* %A) nounwind {
+;CHECK: vgetQ_laneu8:
+;CHECK: vmov.u8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = extractelement <16 x i8> %tmp1, i32 1
+ %tmp3 = zext i8 %tmp2 to i32
+ ret i32 %tmp3
+}
+
+define i32 @vgetQ_laneu16(<8 x i16>* %A) nounwind {
+;CHECK: vgetQ_laneu16:
+;CHECK: vmov.u16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = extractelement <8 x i16> %tmp1, i32 1
+ %tmp3 = zext i16 %tmp2 to i32
+ ret i32 %tmp3
+}
+
+; Do a vector add to keep the extraction from being done directly from memory.
+define i32 @vgetQ_lanei32(<4 x i32>* %A) nounwind {
+;CHECK: vgetQ_lanei32:
+;CHECK: vmov.32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = add <4 x i32> %tmp1, %tmp1
+ %tmp3 = extractelement <4 x i32> %tmp2, i32 1
+ ret i32 %tmp3
+}
+
+define arm_aapcs_vfpcc void @test_vget_laneu16() nounwind {
+entry:
+; CHECK: vmov.u16 r0, d0[1]
+ %arg0_uint16x4_t = alloca <4 x i16> ; <<4 x i16>*> [#uses=1]
+ %out_uint16_t = alloca i16 ; <i16*> [#uses=1]
+ %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
+ %0 = load <4 x i16>* %arg0_uint16x4_t, align 8 ; <<4 x i16>> [#uses=1]
+ %1 = extractelement <4 x i16> %0, i32 1 ; <i16> [#uses=1]
+ store i16 %1, i16* %out_uint16_t, align 2
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
+
+define arm_aapcs_vfpcc void @test_vget_laneu8() nounwind {
+entry:
+; CHECK: vmov.u8 r0, d0[1]
+ %arg0_uint8x8_t = alloca <8 x i8> ; <<8 x i8>*> [#uses=1]
+ %out_uint8_t = alloca i8 ; <i8*> [#uses=1]
+ %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
+ %0 = load <8 x i8>* %arg0_uint8x8_t, align 8 ; <<8 x i8>> [#uses=1]
+ %1 = extractelement <8 x i8> %0, i32 1 ; <i8> [#uses=1]
+ store i8 %1, i8* %out_uint8_t, align 1
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
+
+define arm_aapcs_vfpcc void @test_vgetQ_laneu16() nounwind {
+entry:
+; CHECK: vmov.u16 r0, d0[1]
+ %arg0_uint16x8_t = alloca <8 x i16> ; <<8 x i16>*> [#uses=1]
+ %out_uint16_t = alloca i16 ; <i16*> [#uses=1]
+ %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
+ %0 = load <8 x i16>* %arg0_uint16x8_t, align 16 ; <<8 x i16>> [#uses=1]
+ %1 = extractelement <8 x i16> %0, i32 1 ; <i16> [#uses=1]
+ store i16 %1, i16* %out_uint16_t, align 2
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
+
+define arm_aapcs_vfpcc void @test_vgetQ_laneu8() nounwind {
+entry:
+; CHECK: vmov.u8 r0, d0[1]
+ %arg0_uint8x16_t = alloca <16 x i8> ; <<16 x i8>*> [#uses=1]
+ %out_uint8_t = alloca i8 ; <i8*> [#uses=1]
+ %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
+ %0 = load <16 x i8>* %arg0_uint8x16_t, align 16 ; <<16 x i8>> [#uses=1]
+ %1 = extractelement <16 x i8> %0, i32 1 ; <i8> [#uses=1]
+ store i8 %1, i8* %out_uint8_t, align 1
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
+
+define <8 x i8> @vset_lane8(<8 x i8>* %A, i8 %B) nounwind {
+;CHECK: vset_lane8:
+;CHECK: vmov.8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = insertelement <8 x i8> %tmp1, i8 %B, i32 1
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vset_lane16(<4 x i16>* %A, i16 %B) nounwind {
+;CHECK: vset_lane16:
+;CHECK: vmov.16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = insertelement <4 x i16> %tmp1, i16 %B, i32 1
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vset_lane32(<2 x i32>* %A, i32 %B) nounwind {
+;CHECK: vset_lane32:
+;CHECK: vmov.32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = insertelement <2 x i32> %tmp1, i32 %B, i32 1
+ ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vsetQ_lane8(<16 x i8>* %A, i8 %B) nounwind {
+;CHECK: vsetQ_lane8:
+;CHECK: vmov.8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = insertelement <16 x i8> %tmp1, i8 %B, i32 1
+ ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vsetQ_lane16(<8 x i16>* %A, i16 %B) nounwind {
+;CHECK: vsetQ_lane16:
+;CHECK: vmov.16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = insertelement <8 x i16> %tmp1, i16 %B, i32 1
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vsetQ_lane32(<4 x i32>* %A, i32 %B) nounwind {
+;CHECK: vsetQ_lane32:
+;CHECK: vmov.32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = insertelement <4 x i32> %tmp1, i32 %B, i32 1
+ ret <4 x i32> %tmp2
+}
+
+define arm_aapcs_vfpcc <2 x float> @test_vset_lanef32(float %arg0_float32_t, <2 x float> %arg1_float32x2_t) nounwind {
+;CHECK: test_vset_lanef32:
+;CHECK: vmov.f32
+;CHECK: vmov.f32
+entry:
+ %0 = insertelement <2 x float> %arg1_float32x2_t, float %arg0_float32_t, i32 1 ; <<2 x float>> [#uses=1]
+ ret <2 x float> %0
+}
diff --git a/test/CodeGen/ARM/vhadd.ll b/test/CodeGen/ARM/vhadd.ll
new file mode 100644
index 0000000..379e062
--- /dev/null
+++ b/test/CodeGen/ARM/vhadd.ll
@@ -0,0 +1,249 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vhadds8:
+;CHECK: vhadd.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vhadds16:
+;CHECK: vhadd.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vhadds32:
+;CHECK: vhadd.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vhaddu8:
+;CHECK: vhadd.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vhaddu16:
+;CHECK: vhadd.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vhaddu32:
+;CHECK: vhadd.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @vhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vhaddQs8:
+;CHECK: vhadd.s8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vhaddQs16:
+;CHECK: vhadd.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vhaddQs32:
+;CHECK: vhadd.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <16 x i8> @vhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vhaddQu8:
+;CHECK: vhadd.u8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vhaddQu16:
+;CHECK: vhadd.u16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vhaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vhaddQu32:
+;CHECK: vhadd.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+declare <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i8> @vrhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrhadds8:
+;CHECK: vrhadd.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vrhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrhadds16:
+;CHECK: vrhadd.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vrhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vrhadds32:
+;CHECK: vrhadd.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vrhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrhaddu8:
+;CHECK: vrhadd.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vrhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrhaddu16:
+;CHECK: vrhadd.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vrhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vrhaddu32:
+;CHECK: vrhadd.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @vrhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vrhaddQs8:
+;CHECK: vrhadd.s8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vrhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrhaddQs16:
+;CHECK: vrhadd.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vrhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrhaddQs32:
+;CHECK: vrhadd.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <16 x i8> @vrhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vrhaddQu8:
+;CHECK: vrhadd.u8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vrhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrhaddQu16:
+;CHECK: vrhadd.u16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vrhaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrhaddQu32:
+;CHECK: vrhadd.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+declare <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vhsub.ll b/test/CodeGen/ARM/vhsub.ll
new file mode 100644
index 0000000..0f0d027
--- /dev/null
+++ b/test/CodeGen/ARM/vhsub.ll
@@ -0,0 +1,125 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vhsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vhsubs8:
+;CHECK: vhsub.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vhsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vhsubs16:
+;CHECK: vhsub.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vhsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vhsubs32:
+;CHECK: vhsub.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vhsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vhsubu8:
+;CHECK: vhsub.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vhsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vhsubu16:
+;CHECK: vhsub.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vhsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vhsubu32:
+;CHECK: vhsub.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @vhsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vhsubQs8:
+;CHECK: vhsub.s8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vhsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vhsubQs16:
+;CHECK: vhsub.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vhsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vhsubQs32:
+;CHECK: vhsub.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <16 x i8> @vhsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vhsubQu8:
+;CHECK: vhsub.u8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vhsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vhsubQu16:
+;CHECK: vhsub.u16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vhsubQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vhsubQu32:
+;CHECK: vhsub.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+declare <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vicmp.ll b/test/CodeGen/ARM/vicmp.ll
new file mode 100644
index 0000000..2d8cb89
--- /dev/null
+++ b/test/CodeGen/ARM/vicmp.ll
@@ -0,0 +1,113 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+; This tests icmp operations that do not map directly to NEON instructions.
+; Not-equal (ne) operations are implemented by VCEQ/VMVN. Less-than (lt/ult)
+; and less-than-or-equal (le/ule) are implemented by swapping the arguments
+; to VCGT and VCGE. Test all the operand types for not-equal but only sample
+; the other operations.
+
+define <8 x i8> @vcnei8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vcnei8:
+;CHECK: vceq.i8
+;CHECK-NEXT: vmvn
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = icmp ne <8 x i8> %tmp1, %tmp2
+ %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+ ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vcnei16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcnei16:
+;CHECK: vceq.i16
+;CHECK-NEXT: vmvn
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = icmp ne <4 x i16> %tmp1, %tmp2
+ %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+ ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vcnei32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vcnei32:
+;CHECK: vceq.i32
+;CHECK-NEXT: vmvn
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = icmp ne <2 x i32> %tmp1, %tmp2
+ %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+ ret <2 x i32> %tmp4
+}
+
+define <16 x i8> @vcneQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vcneQi8:
+;CHECK: vceq.i8
+;CHECK-NEXT: vmvn
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = icmp ne <16 x i8> %tmp1, %tmp2
+ %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+ ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vcneQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vcneQi16:
+;CHECK: vceq.i16
+;CHECK-NEXT: vmvn
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = icmp ne <8 x i16> %tmp1, %tmp2
+ %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+ ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vcneQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vcneQi32:
+;CHECK: vceq.i32
+;CHECK-NEXT: vmvn
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = icmp ne <4 x i32> %tmp1, %tmp2
+ %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+ ret <4 x i32> %tmp4
+}
+
+define <16 x i8> @vcltQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vcltQs8:
+;CHECK: vcgt.s8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = icmp slt <16 x i8> %tmp1, %tmp2
+ %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+ ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @vcles16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcles16:
+;CHECK: vcge.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = icmp sle <4 x i16> %tmp1, %tmp2
+ %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+ ret <4 x i16> %tmp4
+}
+
+define <4 x i16> @vcltu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcltu16:
+;CHECK: vcgt.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = icmp ult <4 x i16> %tmp1, %tmp2
+ %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+ ret <4 x i16> %tmp4
+}
+
+define <4 x i32> @vcleQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vcleQu32:
+;CHECK: vcge.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = icmp ule <4 x i32> %tmp1, %tmp2
+ %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+ ret <4 x i32> %tmp4
+}
diff --git a/test/CodeGen/ARM/vld1.ll b/test/CodeGen/ARM/vld1.ll
new file mode 100644
index 0000000..f5383aa
--- /dev/null
+++ b/test/CodeGen/ARM/vld1.ll
@@ -0,0 +1,83 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vld1i8(i8* %A) nounwind {
+;CHECK: vld1i8:
+;CHECK: vld1.8
+ %tmp1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %A)
+ ret <8 x i8> %tmp1
+}
+
+define <4 x i16> @vld1i16(i16* %A) nounwind {
+;CHECK: vld1i16:
+;CHECK: vld1.16
+ %tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i16* %A)
+ ret <4 x i16> %tmp1
+}
+
+define <2 x i32> @vld1i32(i32* %A) nounwind {
+;CHECK: vld1i32:
+;CHECK: vld1.32
+ %tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i32* %A)
+ ret <2 x i32> %tmp1
+}
+
+define <2 x float> @vld1f(float* %A) nounwind {
+;CHECK: vld1f:
+;CHECK: vld1.32
+ %tmp1 = call <2 x float> @llvm.arm.neon.vld1.v2f32(float* %A)
+ ret <2 x float> %tmp1
+}
+
+define <1 x i64> @vld1i64(i64* %A) nounwind {
+;CHECK: vld1i64:
+;CHECK: vld1.64
+ %tmp1 = call <1 x i64> @llvm.arm.neon.vld1.v1i64(i64* %A)
+ ret <1 x i64> %tmp1
+}
+
+define <16 x i8> @vld1Qi8(i8* %A) nounwind {
+;CHECK: vld1Qi8:
+;CHECK: vld1.8
+ %tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A)
+ ret <16 x i8> %tmp1
+}
+
+define <8 x i16> @vld1Qi16(i16* %A) nounwind {
+;CHECK: vld1Qi16:
+;CHECK: vld1.16
+ %tmp1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i16* %A)
+ ret <8 x i16> %tmp1
+}
+
+define <4 x i32> @vld1Qi32(i32* %A) nounwind {
+;CHECK: vld1Qi32:
+;CHECK: vld1.32
+ %tmp1 = call <4 x i32> @llvm.arm.neon.vld1.v4i32(i32* %A)
+ ret <4 x i32> %tmp1
+}
+
+define <4 x float> @vld1Qf(float* %A) nounwind {
+;CHECK: vld1Qf:
+;CHECK: vld1.32
+ %tmp1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(float* %A)
+ ret <4 x float> %tmp1
+}
+
+define <2 x i64> @vld1Qi64(i64* %A) nounwind {
+;CHECK: vld1Qi64:
+;CHECK: vld1.64
+ %tmp1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i64* %A)
+ ret <2 x i64> %tmp1
+}
+
+declare <8 x i8> @llvm.arm.neon.vld1.v8i8(i8*) nounwind readonly
+declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*) nounwind readonly
+declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*) nounwind readonly
+declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*) nounwind readonly
+declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*) nounwind readonly
+
+declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*) nounwind readonly
+declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*) nounwind readonly
+declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*) nounwind readonly
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly
+declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*) nounwind readonly
diff --git a/test/CodeGen/ARM/vld2.ll b/test/CodeGen/ARM/vld2.ll
new file mode 100644
index 0000000..23f7d2c
--- /dev/null
+++ b/test/CodeGen/ARM/vld2.ll
@@ -0,0 +1,113 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
+%struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> }
+%struct.__neon_int64x1x2_t = type { <1 x i64>, <1 x i64> }
+
+%struct.__neon_int8x16x2_t = type { <16 x i8>, <16 x i8> }
+%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> }
+
+define <8 x i8> @vld2i8(i8* %A) nounwind {
+;CHECK: vld2i8:
+;CHECK: vld2.8
+ %tmp1 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8* %A)
+ %tmp2 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 1
+ %tmp4 = add <8 x i8> %tmp2, %tmp3
+ ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vld2i16(i16* %A) nounwind {
+;CHECK: vld2i16:
+;CHECK: vld2.16
+ %tmp1 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i16* %A)
+ %tmp2 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 1
+ %tmp4 = add <4 x i16> %tmp2, %tmp3
+ ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vld2i32(i32* %A) nounwind {
+;CHECK: vld2i32:
+;CHECK: vld2.32
+ %tmp1 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i32* %A)
+ %tmp2 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 1
+ %tmp4 = add <2 x i32> %tmp2, %tmp3
+ ret <2 x i32> %tmp4
+}
+
+define <2 x float> @vld2f(float* %A) nounwind {
+;CHECK: vld2f:
+;CHECK: vld2.32
+ %tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(float* %A)
+ %tmp2 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 1
+ %tmp4 = add <2 x float> %tmp2, %tmp3
+ ret <2 x float> %tmp4
+}
+
+define <1 x i64> @vld2i64(i64* %A) nounwind {
+;CHECK: vld2i64:
+;CHECK: vld1.64
+ %tmp1 = call %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i64* %A)
+ %tmp2 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 1
+ %tmp4 = add <1 x i64> %tmp2, %tmp3
+ ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @vld2Qi8(i8* %A) nounwind {
+;CHECK: vld2Qi8:
+;CHECK: vld2.8
+ %tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A)
+ %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1
+ %tmp4 = add <16 x i8> %tmp2, %tmp3
+ ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vld2Qi16(i16* %A) nounwind {
+;CHECK: vld2Qi16:
+;CHECK: vld2.16
+ %tmp1 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i16* %A)
+ %tmp2 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 1
+ %tmp4 = add <8 x i16> %tmp2, %tmp3
+ ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vld2Qi32(i32* %A) nounwind {
+;CHECK: vld2Qi32:
+;CHECK: vld2.32
+ %tmp1 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i32* %A)
+ %tmp2 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 1
+ %tmp4 = add <4 x i32> %tmp2, %tmp3
+ ret <4 x i32> %tmp4
+}
+
+define <4 x float> @vld2Qf(float* %A) nounwind {
+;CHECK: vld2Qf:
+;CHECK: vld2.32
+ %tmp1 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(float* %A)
+ %tmp2 = extractvalue %struct.__neon_float32x4x2_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp1, 1
+ %tmp4 = add <4 x float> %tmp2, %tmp3
+ ret <4 x float> %tmp4
+}
+
+declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8*) nounwind readonly
+declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8*) nounwind readonly
+declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8*) nounwind readonly
+declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8*) nounwind readonly
+declare %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8*) nounwind readonly
+
+declare %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8*) nounwind readonly
+declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8*) nounwind readonly
+declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8*) nounwind readonly
+declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(i8*) nounwind readonly
diff --git a/test/CodeGen/ARM/vld3.ll b/test/CodeGen/ARM/vld3.ll
new file mode 100644
index 0000000..207dc6a2
--- /dev/null
+++ b/test/CodeGen/ARM/vld3.ll
@@ -0,0 +1,117 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
+%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> }
+%struct.__neon_int64x1x3_t = type { <1 x i64>, <1 x i64>, <1 x i64> }
+
+%struct.__neon_int8x16x3_t = type { <16 x i8>, <16 x i8>, <16 x i8> }
+%struct.__neon_int16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> }
+
+define <8 x i8> @vld3i8(i8* %A) nounwind {
+;CHECK: vld3i8:
+;CHECK: vld3.8
+ %tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A)
+ %tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 2
+ %tmp4 = add <8 x i8> %tmp2, %tmp3
+ ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vld3i16(i16* %A) nounwind {
+;CHECK: vld3i16:
+;CHECK: vld3.16
+ %tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i16* %A)
+ %tmp2 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 2
+ %tmp4 = add <4 x i16> %tmp2, %tmp3
+ ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vld3i32(i32* %A) nounwind {
+;CHECK: vld3i32:
+;CHECK: vld3.32
+ %tmp1 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i32* %A)
+ %tmp2 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 2
+ %tmp4 = add <2 x i32> %tmp2, %tmp3
+ ret <2 x i32> %tmp4
+}
+
+define <2 x float> @vld3f(float* %A) nounwind {
+;CHECK: vld3f:
+;CHECK: vld3.32
+ %tmp1 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(float* %A)
+ %tmp2 = extractvalue %struct.__neon_float32x2x3_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp1, 2
+ %tmp4 = add <2 x float> %tmp2, %tmp3
+ ret <2 x float> %tmp4
+}
+
+define <1 x i64> @vld3i64(i64* %A) nounwind {
+;CHECK: vld3i64:
+;CHECK: vld1.64
+ %tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i64* %A)
+ %tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 2
+ %tmp4 = add <1 x i64> %tmp2, %tmp3
+ ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @vld3Qi8(i8* %A) nounwind {
+;CHECK: vld3Qi8:
+;CHECK: vld3.8
+;CHECK: vld3.8
+ %tmp1 = call %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8* %A)
+ %tmp2 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 2
+ %tmp4 = add <16 x i8> %tmp2, %tmp3
+ ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vld3Qi16(i16* %A) nounwind {
+;CHECK: vld3Qi16:
+;CHECK: vld3.16
+;CHECK: vld3.16
+ %tmp1 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i16* %A)
+ %tmp2 = extractvalue %struct.__neon_int16x8x3_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp1, 2
+ %tmp4 = add <8 x i16> %tmp2, %tmp3
+ ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vld3Qi32(i32* %A) nounwind {
+;CHECK: vld3Qi32:
+;CHECK: vld3.32
+;CHECK: vld3.32
+ %tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i32* %A)
+ %tmp2 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 2
+ %tmp4 = add <4 x i32> %tmp2, %tmp3
+ ret <4 x i32> %tmp4
+}
+
+define <4 x float> @vld3Qf(float* %A) nounwind {
+;CHECK: vld3Qf:
+;CHECK: vld3.32
+;CHECK: vld3.32
+ %tmp1 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(float* %A)
+ %tmp2 = extractvalue %struct.__neon_float32x4x3_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp1, 2
+ %tmp4 = add <4 x float> %tmp2, %tmp3
+ ret <4 x float> %tmp4
+}
+
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*) nounwind readonly
+declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8*) nounwind readonly
+declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8*) nounwind readonly
+declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8*) nounwind readonly
+declare %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8*) nounwind readonly
+
+declare %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8*) nounwind readonly
+declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i8*) nounwind readonly
+declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8*) nounwind readonly
+declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(i8*) nounwind readonly
diff --git a/test/CodeGen/ARM/vld4.ll b/test/CodeGen/ARM/vld4.ll
new file mode 100644
index 0000000..0624f29
--- /dev/null
+++ b/test/CodeGen/ARM/vld4.ll
@@ -0,0 +1,117 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+%struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
+%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> }
+%struct.__neon_int64x1x4_t = type { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }
+
+%struct.__neon_int8x16x4_t = type { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }
+%struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x4_t = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> }
+
+define <8 x i8> @vld4i8(i8* %A) nounwind {
+;CHECK: vld4i8:
+;CHECK: vld4.8
+ %tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A)
+ %tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2
+ %tmp4 = add <8 x i8> %tmp2, %tmp3
+ ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vld4i16(i16* %A) nounwind {
+;CHECK: vld4i16:
+;CHECK: vld4.16
+ %tmp1 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i16* %A)
+ %tmp2 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 2
+ %tmp4 = add <4 x i16> %tmp2, %tmp3
+ ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vld4i32(i32* %A) nounwind {
+;CHECK: vld4i32:
+;CHECK: vld4.32
+ %tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i32* %A)
+ %tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 2
+ %tmp4 = add <2 x i32> %tmp2, %tmp3
+ ret <2 x i32> %tmp4
+}
+
+define <2 x float> @vld4f(float* %A) nounwind {
+;CHECK: vld4f:
+;CHECK: vld4.32
+ %tmp1 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(float* %A)
+ %tmp2 = extractvalue %struct.__neon_float32x2x4_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp1, 2
+ %tmp4 = add <2 x float> %tmp2, %tmp3
+ ret <2 x float> %tmp4
+}
+
+define <1 x i64> @vld4i64(i64* %A) nounwind {
+;CHECK: vld4i64:
+;CHECK: vld1.64
+ %tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i64* %A)
+ %tmp2 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 2
+ %tmp4 = add <1 x i64> %tmp2, %tmp3
+ ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @vld4Qi8(i8* %A) nounwind {
+;CHECK: vld4Qi8:
+;CHECK: vld4.8
+;CHECK: vld4.8
+ %tmp1 = call %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8* %A)
+ %tmp2 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 2
+ %tmp4 = add <16 x i8> %tmp2, %tmp3
+ ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vld4Qi16(i16* %A) nounwind {
+;CHECK: vld4Qi16:
+;CHECK: vld4.16
+;CHECK: vld4.16
+ %tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i16* %A)
+ %tmp2 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 2
+ %tmp4 = add <8 x i16> %tmp2, %tmp3
+ ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vld4Qi32(i32* %A) nounwind {
+;CHECK: vld4Qi32:
+;CHECK: vld4.32
+;CHECK: vld4.32
+ %tmp1 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i32* %A)
+ %tmp2 = extractvalue %struct.__neon_int32x4x4_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp1, 2
+ %tmp4 = add <4 x i32> %tmp2, %tmp3
+ ret <4 x i32> %tmp4
+}
+
+define <4 x float> @vld4Qf(float* %A) nounwind {
+;CHECK: vld4Qf:
+;CHECK: vld4.32
+;CHECK: vld4.32
+ %tmp1 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(float* %A)
+ %tmp2 = extractvalue %struct.__neon_float32x4x4_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp1, 2
+ %tmp4 = add <4 x float> %tmp2, %tmp3
+ ret <4 x float> %tmp4
+}
+
+declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8*) nounwind readonly
+declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8*) nounwind readonly
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8*) nounwind readonly
+declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(i8*) nounwind readonly
+declare %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8*) nounwind readonly
+
+declare %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8*) nounwind readonly
+declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8*) nounwind readonly
+declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i8*) nounwind readonly
+declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(i8*) nounwind readonly
diff --git a/test/CodeGen/ARM/vldlane.ll b/test/CodeGen/ARM/vldlane.ll
new file mode 100644
index 0000000..53881a3
--- /dev/null
+++ b/test/CodeGen/ARM/vldlane.ll
@@ -0,0 +1,328 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
+%struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> }
+
+%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> }
+
+define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vld2lanei8:
+;CHECK: vld2.8
+ %tmp1 = load <8 x i8>* %B
+ %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
+ %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
+ %tmp5 = add <8 x i8> %tmp3, %tmp4
+ ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vld2lanei16:
+;CHECK: vld2.16
+ %tmp1 = load <4 x i16>* %B
+ %tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+ %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 1
+ %tmp5 = add <4 x i16> %tmp3, %tmp4
+ ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @vld2lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vld2lanei32:
+;CHECK: vld2.32
+ %tmp1 = load <2 x i32>* %B
+ %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+ %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
+ %tmp5 = add <2 x i32> %tmp3, %tmp4
+ ret <2 x i32> %tmp5
+}
+
+define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vld2lanef:
+;CHECK: vld2.32
+ %tmp1 = load <2 x float>* %B
+ %tmp2 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+ %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 1
+ %tmp5 = add <2 x float> %tmp3, %tmp4
+ ret <2 x float> %tmp5
+}
+
+define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vld2laneQi16:
+;CHECK: vld2.16
+ %tmp1 = load <8 x i16>* %B
+ %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+ %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1
+ %tmp5 = add <8 x i16> %tmp3, %tmp4
+ ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vld2laneQi32:
+;CHECK: vld2.32
+ %tmp1 = load <4 x i32>* %B
+ %tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2)
+ %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 1
+ %tmp5 = add <4 x i32> %tmp3, %tmp4
+ ret <4 x i32> %tmp5
+}
+
+define <4 x float> @vld2laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vld2laneQf:
+;CHECK: vld2.32
+ %tmp1 = load <4 x float>* %B
+ %tmp2 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+ %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 1
+ %tmp5 = add <4 x float> %tmp3, %tmp4
+ ret <4 x float> %tmp5
+}
+
+declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind readonly
+declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind readonly
+declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind readonly
+declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32) nounwind readonly
+
+declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind readonly
+declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind readonly
+declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind readonly
+
+%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
+%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> }
+
+%struct.__neon_int16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> }
+
+define <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vld3lanei8:
+;CHECK: vld3.8
+ %tmp1 = load <8 x i8>* %B
+ %tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
+ %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
+ %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
+ %tmp6 = add <8 x i8> %tmp3, %tmp4
+ %tmp7 = add <8 x i8> %tmp5, %tmp6
+ ret <8 x i8> %tmp7
+}
+
+define <4 x i16> @vld3lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vld3lanei16:
+;CHECK: vld3.16
+ %tmp1 = load <4 x i16>* %B
+ %tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+ %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 1
+ %tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 2
+ %tmp6 = add <4 x i16> %tmp3, %tmp4
+ %tmp7 = add <4 x i16> %tmp5, %tmp6
+ ret <4 x i16> %tmp7
+}
+
+define <2 x i32> @vld3lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vld3lanei32:
+;CHECK: vld3.32
+ %tmp1 = load <2 x i32>* %B
+ %tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+ %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 1
+ %tmp5 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 2
+ %tmp6 = add <2 x i32> %tmp3, %tmp4
+ %tmp7 = add <2 x i32> %tmp5, %tmp6
+ ret <2 x i32> %tmp7
+}
+
+define <2 x float> @vld3lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vld3lanef:
+;CHECK: vld3.32
+ %tmp1 = load <2 x float>* %B
+ %tmp2 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+ %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 1
+ %tmp5 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 2
+ %tmp6 = add <2 x float> %tmp3, %tmp4
+ %tmp7 = add <2 x float> %tmp5, %tmp6
+ ret <2 x float> %tmp7
+}
+
+define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vld3laneQi16:
+;CHECK: vld3.16
+ %tmp1 = load <8 x i16>* %B
+ %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+ %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
+ %tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2
+ %tmp6 = add <8 x i16> %tmp3, %tmp4
+ %tmp7 = add <8 x i16> %tmp5, %tmp6
+ ret <8 x i16> %tmp7
+}
+
+define <4 x i32> @vld3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vld3laneQi32:
+;CHECK: vld3.32
+ %tmp1 = load <4 x i32>* %B
+ %tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3)
+ %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 1
+ %tmp5 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 2
+ %tmp6 = add <4 x i32> %tmp3, %tmp4
+ %tmp7 = add <4 x i32> %tmp5, %tmp6
+ ret <4 x i32> %tmp7
+}
+
+define <4 x float> @vld3laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vld3laneQf:
+;CHECK: vld3.32
+ %tmp1 = load <4 x float>* %B
+ %tmp2 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+ %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 1
+ %tmp5 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 2
+ %tmp6 = add <4 x float> %tmp3, %tmp4
+ %tmp7 = add <4 x float> %tmp5, %tmp6
+ ret <4 x float> %tmp7
+}
+
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind readonly
+declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind readonly
+declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind readonly
+declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) nounwind readonly
+
+declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind readonly
+declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind readonly
+declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32) nounwind readonly
+
+%struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
+%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> }
+
+%struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x4_t = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> }
+
+define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vld4lanei8:
+;CHECK: vld4.8
+ %tmp1 = load <8 x i8>* %B
+ %tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
+ %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
+ %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
+ %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
+ %tmp7 = add <8 x i8> %tmp3, %tmp4
+ %tmp8 = add <8 x i8> %tmp5, %tmp6
+ %tmp9 = add <8 x i8> %tmp7, %tmp8
+ ret <8 x i8> %tmp9
+}
+
+define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vld4lanei16:
+;CHECK: vld4.16
+ %tmp1 = load <4 x i16>* %B
+ %tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+ %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 1
+ %tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 2
+ %tmp6 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 3
+ %tmp7 = add <4 x i16> %tmp3, %tmp4
+ %tmp8 = add <4 x i16> %tmp5, %tmp6
+ %tmp9 = add <4 x i16> %tmp7, %tmp8
+ ret <4 x i16> %tmp9
+}
+
+define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vld4lanei32:
+;CHECK: vld4.32
+ %tmp1 = load <2 x i32>* %B
+ %tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+ %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 1
+ %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 2
+ %tmp6 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 3
+ %tmp7 = add <2 x i32> %tmp3, %tmp4
+ %tmp8 = add <2 x i32> %tmp5, %tmp6
+ %tmp9 = add <2 x i32> %tmp7, %tmp8
+ ret <2 x i32> %tmp9
+}
+
+define <2 x float> @vld4lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vld4lanef:
+;CHECK: vld4.32
+ %tmp1 = load <2 x float>* %B
+ %tmp2 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+ %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 1
+ %tmp5 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 2
+ %tmp6 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 3
+ %tmp7 = add <2 x float> %tmp3, %tmp4
+ %tmp8 = add <2 x float> %tmp5, %tmp6
+ %tmp9 = add <2 x float> %tmp7, %tmp8
+ ret <2 x float> %tmp9
+}
+
+define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vld4laneQi16:
+;CHECK: vld4.16
+ %tmp1 = load <8 x i16>* %B
+ %tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+ %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 1
+ %tmp5 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 2
+ %tmp6 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 3
+ %tmp7 = add <8 x i16> %tmp3, %tmp4
+ %tmp8 = add <8 x i16> %tmp5, %tmp6
+ %tmp9 = add <8 x i16> %tmp7, %tmp8
+ ret <8 x i16> %tmp9
+}
+
+define <4 x i32> @vld4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vld4laneQi32:
+;CHECK: vld4.32
+ %tmp1 = load <4 x i32>* %B
+ %tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1)
+ %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 1
+ %tmp5 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 2
+ %tmp6 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 3
+ %tmp7 = add <4 x i32> %tmp3, %tmp4
+ %tmp8 = add <4 x i32> %tmp5, %tmp6
+ %tmp9 = add <4 x i32> %tmp7, %tmp8
+ ret <4 x i32> %tmp9
+}
+
+define <4 x float> @vld4laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vld4laneQf:
+;CHECK: vld4.32
+ %tmp1 = load <4 x float>* %B
+ %tmp2 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+ %tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 1
+ %tmp5 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 2
+ %tmp6 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 3
+ %tmp7 = add <4 x float> %tmp3, %tmp4
+ %tmp8 = add <4 x float> %tmp5, %tmp6
+ %tmp9 = add <4 x float> %tmp7, %tmp8
+ ret <4 x float> %tmp9
+}
+
+declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind readonly
+declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind readonly
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind readonly
+declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) nounwind readonly
+
+declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind readonly
+declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind readonly
+declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) nounwind readonly
diff --git a/test/CodeGen/ARM/vminmax.ll b/test/CodeGen/ARM/vminmax.ll
new file mode 100644
index 0000000..e3527c1
--- /dev/null
+++ b/test/CodeGen/ARM/vminmax.ll
@@ -0,0 +1,293 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmins8:
+;CHECK: vmin.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vmins16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmins16:
+;CHECK: vmin.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vmins32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmins32:
+;CHECK: vmin.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vminu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vminu8:
+;CHECK: vmin.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vminu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vminu16:
+;CHECK: vmin.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vminu32:
+;CHECK: vmin.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vminf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vminf32:
+;CHECK: vmin.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = load <2 x float>* %B
+ %tmp3 = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+ ret <2 x float> %tmp3
+}
+
+define <16 x i8> @vminQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vminQs8:
+;CHECK: vmin.s8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vminQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vminQs16:
+;CHECK: vmin.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vminQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vminQs32:
+;CHECK: vmin.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <16 x i8> @vminQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vminQu8:
+;CHECK: vmin.u8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vminQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vminQu16:
+;CHECK: vmin.u16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vminQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vminQu32:
+;CHECK: vmin.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <4 x float> @vminQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vminQf32:
+;CHECK: vmin.f32
+ %tmp1 = load <4 x float>* %A
+ %tmp2 = load <4 x float>* %B
+ %tmp3 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+ ret <4 x float> %tmp3
+}
+
+declare <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+define <8 x i8> @vmaxs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmaxs8:
+;CHECK: vmax.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vmaxs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmaxs16:
+;CHECK: vmax.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vmaxs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmaxs32:
+;CHECK: vmax.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vmaxu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmaxu8:
+;CHECK: vmax.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vmaxu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmaxu16:
+;CHECK: vmax.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmaxu32:
+;CHECK: vmax.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vmaxf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vmaxf32:
+;CHECK: vmax.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = load <2 x float>* %B
+ %tmp3 = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+ ret <2 x float> %tmp3
+}
+
+define <16 x i8> @vmaxQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vmaxQs8:
+;CHECK: vmax.s8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vmaxQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vmaxQs16:
+;CHECK: vmax.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vmaxQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vmaxQs32:
+;CHECK: vmax.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <16 x i8> @vmaxQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vmaxQu8:
+;CHECK: vmax.u8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vmaxQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vmaxQu16:
+;CHECK: vmax.u16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vmaxQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vmaxQu32:
+;CHECK: vmax.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <4 x float> @vmaxQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vmaxQf32:
+;CHECK: vmax.f32
+ %tmp1 = load <4 x float>* %A
+ %tmp2 = load <4 x float>* %B
+ %tmp3 = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+ ret <4 x float> %tmp3
+}
+
+declare <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vmla.ll b/test/CodeGen/ARM/vmla.ll
new file mode 100644
index 0000000..8405218
--- /dev/null
+++ b/test/CodeGen/ARM/vmla.ll
@@ -0,0 +1,193 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vmlai8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind {
+;CHECK: vmlai8:
+;CHECK: vmla.i8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = load <8 x i8>* %C
+ %tmp4 = mul <8 x i8> %tmp2, %tmp3
+ %tmp5 = add <8 x i8> %tmp1, %tmp4
+ ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vmlai16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vmlai16:
+;CHECK: vmla.i16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = load <4 x i16>* %C
+ %tmp4 = mul <4 x i16> %tmp2, %tmp3
+ %tmp5 = add <4 x i16> %tmp1, %tmp4
+ ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @vmlai32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vmlai32:
+;CHECK: vmla.i32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = load <2 x i32>* %C
+ %tmp4 = mul <2 x i32> %tmp2, %tmp3
+ %tmp5 = add <2 x i32> %tmp1, %tmp4
+ ret <2 x i32> %tmp5
+}
+
+define <2 x float> @vmlaf32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind {
+;CHECK: vmlaf32:
+;CHECK: vmla.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = load <2 x float>* %B
+ %tmp3 = load <2 x float>* %C
+ %tmp4 = mul <2 x float> %tmp2, %tmp3
+ %tmp5 = add <2 x float> %tmp1, %tmp4
+ ret <2 x float> %tmp5
+}
+
+define <16 x i8> @vmlaQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8> * %C) nounwind {
+;CHECK: vmlaQi8:
+;CHECK: vmla.i8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = load <16 x i8>* %C
+ %tmp4 = mul <16 x i8> %tmp2, %tmp3
+ %tmp5 = add <16 x i8> %tmp1, %tmp4
+ ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @vmlaQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
+;CHECK: vmlaQi16:
+;CHECK: vmla.i16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = load <8 x i16>* %C
+ %tmp4 = mul <8 x i16> %tmp2, %tmp3
+ %tmp5 = add <8 x i16> %tmp1, %tmp4
+ ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vmlaQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
+;CHECK: vmlaQi32:
+;CHECK: vmla.i32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = load <4 x i32>* %C
+ %tmp4 = mul <4 x i32> %tmp2, %tmp3
+ %tmp5 = add <4 x i32> %tmp1, %tmp4
+ ret <4 x i32> %tmp5
+}
+
+define <4 x float> @vmlaQf32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
+;CHECK: vmlaQf32:
+;CHECK: vmla.f32
+ %tmp1 = load <4 x float>* %A
+ %tmp2 = load <4 x float>* %B
+ %tmp3 = load <4 x float>* %C
+ %tmp4 = mul <4 x float> %tmp2, %tmp3
+ %tmp5 = add <4 x float> %tmp1, %tmp4
+ ret <4 x float> %tmp5
+}
+
+define <8 x i16> @vmlals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vmlals8:
+;CHECK: vmlal.s8
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = load <8 x i8>* %C
+ %tmp4 = call <8 x i16> @llvm.arm.neon.vmlals.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
+ ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vmlals16:
+;CHECK: vmlal.s16
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = load <4 x i16>* %C
+ %tmp4 = call <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+ ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vmlals32:
+;CHECK: vmlal.s32
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = load <2 x i32>* %C
+ %tmp4 = call <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+ ret <2 x i64> %tmp4
+}
+
+define <8 x i16> @vmlalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vmlalu8:
+;CHECK: vmlal.u8
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = load <8 x i8>* %C
+ %tmp4 = call <8 x i16> @llvm.arm.neon.vmlalu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
+ ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vmlalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vmlalu16:
+;CHECK: vmlal.u16
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = load <4 x i16>* %C
+ %tmp4 = call <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+ ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vmlalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vmlalu32:
+;CHECK: vmlal.u32
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = load <2 x i32>* %C
+ %tmp4 = call <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+ ret <2 x i64> %tmp4
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmlal_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmlal_lanes16
+; CHECK: vmlal.s16 q0, d2, d3[1]
+ %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+ %1 = tail call <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmlal_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmlal_lanes32
+; CHECK: vmlal.s32 q0, d2, d3[1]
+ %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+ %1 = tail call <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmlal_laneu16(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %arg2_uint16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmlal_laneu16
+; CHECK: vmlal.u16 q0, d2, d3[1]
+ %0 = shufflevector <4 x i16> %arg2_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+ %1 = tail call <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmlal_laneu32(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %arg2_uint32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmlal_laneu32
+; CHECK: vmlal.u32 q0, d2, d3[1]
+ %0 = shufflevector <2 x i32> %arg2_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+ %1 = tail call <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %1
+}
+
+declare <8 x i16> @llvm.arm.neon.vmlals.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vmlalu.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vmls.ll b/test/CodeGen/ARM/vmls.ll
new file mode 100644
index 0000000..c89552e
--- /dev/null
+++ b/test/CodeGen/ARM/vmls.ll
@@ -0,0 +1,193 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vmlsi8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind {
+;CHECK: vmlsi8:
+;CHECK: vmls.i8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = load <8 x i8>* %C
+ %tmp4 = mul <8 x i8> %tmp2, %tmp3
+ %tmp5 = sub <8 x i8> %tmp1, %tmp4
+ ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vmlsi16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vmlsi16:
+;CHECK: vmls.i16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = load <4 x i16>* %C
+ %tmp4 = mul <4 x i16> %tmp2, %tmp3
+ %tmp5 = sub <4 x i16> %tmp1, %tmp4
+ ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @vmlsi32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vmlsi32:
+;CHECK: vmls.i32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = load <2 x i32>* %C
+ %tmp4 = mul <2 x i32> %tmp2, %tmp3
+ %tmp5 = sub <2 x i32> %tmp1, %tmp4
+ ret <2 x i32> %tmp5
+}
+
+define <2 x float> @vmlsf32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind {
+;CHECK: vmlsf32:
+;CHECK: vmls.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = load <2 x float>* %B
+ %tmp3 = load <2 x float>* %C
+ %tmp4 = mul <2 x float> %tmp2, %tmp3
+ %tmp5 = sub <2 x float> %tmp1, %tmp4
+ ret <2 x float> %tmp5
+}
+
+define <16 x i8> @vmlsQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8> * %C) nounwind {
+;CHECK: vmlsQi8:
+;CHECK: vmls.i8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = load <16 x i8>* %C
+ %tmp4 = mul <16 x i8> %tmp2, %tmp3
+ %tmp5 = sub <16 x i8> %tmp1, %tmp4
+ ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @vmlsQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
+;CHECK: vmlsQi16:
+;CHECK: vmls.i16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = load <8 x i16>* %C
+ %tmp4 = mul <8 x i16> %tmp2, %tmp3
+ %tmp5 = sub <8 x i16> %tmp1, %tmp4
+ ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vmlsQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
+;CHECK: vmlsQi32:
+;CHECK: vmls.i32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = load <4 x i32>* %C
+ %tmp4 = mul <4 x i32> %tmp2, %tmp3
+ %tmp5 = sub <4 x i32> %tmp1, %tmp4
+ ret <4 x i32> %tmp5
+}
+
+define <4 x float> @vmlsQf32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
+;CHECK: vmlsQf32:
+;CHECK: vmls.f32
+ %tmp1 = load <4 x float>* %A
+ %tmp2 = load <4 x float>* %B
+ %tmp3 = load <4 x float>* %C
+ %tmp4 = mul <4 x float> %tmp2, %tmp3
+ %tmp5 = sub <4 x float> %tmp1, %tmp4
+ ret <4 x float> %tmp5
+}
+
+define <8 x i16> @vmlsls8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vmlsls8:
+;CHECK: vmlsl.s8
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = load <8 x i8>* %C
+ %tmp4 = call <8 x i16> @llvm.arm.neon.vmlsls.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
+ ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vmlsls16:
+;CHECK: vmlsl.s16
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = load <4 x i16>* %C
+ %tmp4 = call <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+ ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vmlsls32:
+;CHECK: vmlsl.s32
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = load <2 x i32>* %C
+ %tmp4 = call <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+ ret <2 x i64> %tmp4
+}
+
+define <8 x i16> @vmlslu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vmlslu8:
+;CHECK: vmlsl.u8
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = load <8 x i8>* %C
+ %tmp4 = call <8 x i16> @llvm.arm.neon.vmlslu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
+ ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vmlslu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vmlslu16:
+;CHECK: vmlsl.u16
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = load <4 x i16>* %C
+ %tmp4 = call <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+ ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vmlslu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vmlslu32:
+;CHECK: vmlsl.u32
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = load <2 x i32>* %C
+ %tmp4 = call <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+ ret <2 x i64> %tmp4
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmlsl_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmlsl_lanes16
+; CHECK: vmlsl.s16 q0, d2, d3[1]
+ %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+ %1 = tail call <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmlsl_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmlsl_lanes32
+; CHECK: vmlsl.s32 q0, d2, d3[1]
+ %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+ %1 = tail call <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmlsl_laneu16(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %arg2_uint16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmlsl_laneu16
+; CHECK: vmlsl.u16 q0, d2, d3[1]
+ %0 = shufflevector <4 x i16> %arg2_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+ %1 = tail call <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmlsl_laneu32(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %arg2_uint32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmlsl_laneu32
+; CHECK: vmlsl.u32 q0, d2, d3[1]
+ %0 = shufflevector <2 x i32> %arg2_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+ %1 = tail call <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %1
+}
+
+declare <8 x i16> @llvm.arm.neon.vmlsls.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vmlslu.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vmov.ll b/test/CodeGen/ARM/vmov.ll
new file mode 100644
index 0000000..e4368d6
--- /dev/null
+++ b/test/CodeGen/ARM/vmov.ll
@@ -0,0 +1,323 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @v_movi8() nounwind {
+;CHECK: v_movi8:
+;CHECK: vmov.i8
+ ret <8 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+}
+
+define <4 x i16> @v_movi16a() nounwind {
+;CHECK: v_movi16a:
+;CHECK: vmov.i16
+ ret <4 x i16> < i16 16, i16 16, i16 16, i16 16 >
+}
+
+; 0x1000 = 4096
+define <4 x i16> @v_movi16b() nounwind {
+;CHECK: v_movi16b:
+;CHECK: vmov.i16
+ ret <4 x i16> < i16 4096, i16 4096, i16 4096, i16 4096 >
+}
+
+define <2 x i32> @v_movi32a() nounwind {
+;CHECK: v_movi32a:
+;CHECK: vmov.i32
+ ret <2 x i32> < i32 32, i32 32 >
+}
+
+; 0x2000 = 8192
+define <2 x i32> @v_movi32b() nounwind {
+;CHECK: v_movi32b:
+;CHECK: vmov.i32
+ ret <2 x i32> < i32 8192, i32 8192 >
+}
+
+; 0x200000 = 2097152
+define <2 x i32> @v_movi32c() nounwind {
+;CHECK: v_movi32c:
+;CHECK: vmov.i32
+ ret <2 x i32> < i32 2097152, i32 2097152 >
+}
+
+; 0x20000000 = 536870912
+define <2 x i32> @v_movi32d() nounwind {
+;CHECK: v_movi32d:
+;CHECK: vmov.i32
+ ret <2 x i32> < i32 536870912, i32 536870912 >
+}
+
+; 0x20ff = 8447
+define <2 x i32> @v_movi32e() nounwind {
+;CHECK: v_movi32e:
+;CHECK: vmov.i32
+ ret <2 x i32> < i32 8447, i32 8447 >
+}
+
+; 0x20ffff = 2162687
+define <2 x i32> @v_movi32f() nounwind {
+;CHECK: v_movi32f:
+;CHECK: vmov.i32
+ ret <2 x i32> < i32 2162687, i32 2162687 >
+}
+
+; 0xff0000ff0000ffff = 18374687574888349695
+define <1 x i64> @v_movi64() nounwind {
+;CHECK: v_movi64:
+;CHECK: vmov.i64
+ ret <1 x i64> < i64 18374687574888349695 >
+}
+
+define <16 x i8> @v_movQi8() nounwind {
+;CHECK: v_movQi8:
+;CHECK: vmov.i8
+ ret <16 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+}
+
+define <8 x i16> @v_movQi16a() nounwind {
+;CHECK: v_movQi16a:
+;CHECK: vmov.i16
+ ret <8 x i16> < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
+}
+
+; 0x1000 = 4096
+define <8 x i16> @v_movQi16b() nounwind {
+;CHECK: v_movQi16b:
+;CHECK: vmov.i16
+ ret <8 x i16> < i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096 >
+}
+
+define <4 x i32> @v_movQi32a() nounwind {
+;CHECK: v_movQi32a:
+;CHECK: vmov.i32
+ ret <4 x i32> < i32 32, i32 32, i32 32, i32 32 >
+}
+
+; 0x2000 = 8192
+define <4 x i32> @v_movQi32b() nounwind {
+;CHECK: v_movQi32b:
+;CHECK: vmov.i32
+ ret <4 x i32> < i32 8192, i32 8192, i32 8192, i32 8192 >
+}
+
+; 0x200000 = 2097152
+define <4 x i32> @v_movQi32c() nounwind {
+;CHECK: v_movQi32c:
+;CHECK: vmov.i32
+ ret <4 x i32> < i32 2097152, i32 2097152, i32 2097152, i32 2097152 >
+}
+
+; 0x20000000 = 536870912
+define <4 x i32> @v_movQi32d() nounwind {
+;CHECK: v_movQi32d:
+;CHECK: vmov.i32
+ ret <4 x i32> < i32 536870912, i32 536870912, i32 536870912, i32 536870912 >
+}
+
+; 0x20ff = 8447
+define <4 x i32> @v_movQi32e() nounwind {
+;CHECK: v_movQi32e:
+;CHECK: vmov.i32
+ ret <4 x i32> < i32 8447, i32 8447, i32 8447, i32 8447 >
+}
+
+; 0x20ffff = 2162687
+define <4 x i32> @v_movQi32f() nounwind {
+;CHECK: v_movQi32f:
+;CHECK: vmov.i32
+ ret <4 x i32> < i32 2162687, i32 2162687, i32 2162687, i32 2162687 >
+}
+
+; 0xff0000ff0000ffff = 18374687574888349695
+define <2 x i64> @v_movQi64() nounwind {
+;CHECK: v_movQi64:
+;CHECK: vmov.i64
+ ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 >
+}
+
+; Check for correct assembler printing for immediate values.
+%struct.int8x8_t = type { <8 x i8> }
+define arm_apcscc void @vdupn128(%struct.int8x8_t* noalias nocapture sret %agg.result) nounwind {
+entry:
+;CHECK: vdupn128:
+;CHECK: vmov.i8 d0, #0x80
+ %0 = getelementptr inbounds %struct.int8x8_t* %agg.result, i32 0, i32 0 ; <<8 x i8>*> [#uses=1]
+ store <8 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>, <8 x i8>* %0, align 8
+ ret void
+}
+
+define arm_apcscc void @vdupnneg75(%struct.int8x8_t* noalias nocapture sret %agg.result) nounwind {
+entry:
+;CHECK: vdupnneg75:
+;CHECK: vmov.i8 d0, #0xB5
+ %0 = getelementptr inbounds %struct.int8x8_t* %agg.result, i32 0, i32 0 ; <<8 x i8>*> [#uses=1]
+ store <8 x i8> <i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75>, <8 x i8>* %0, align 8
+ ret void
+}
+
+define <8 x i16> @vmovls8(<8 x i8>* %A) nounwind {
+;CHECK: vmovls8:
+;CHECK: vmovl.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = call <8 x i16> @llvm.arm.neon.vmovls.v8i16(<8 x i8> %tmp1)
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vmovls16(<4 x i16>* %A) nounwind {
+;CHECK: vmovls16:
+;CHECK: vmovl.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = call <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16> %tmp1)
+ ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vmovls32(<2 x i32>* %A) nounwind {
+;CHECK: vmovls32:
+;CHECK: vmovl.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = call <2 x i64> @llvm.arm.neon.vmovls.v2i64(<2 x i32> %tmp1)
+ ret <2 x i64> %tmp2
+}
+
+define <8 x i16> @vmovlu8(<8 x i8>* %A) nounwind {
+;CHECK: vmovlu8:
+;CHECK: vmovl.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = call <8 x i16> @llvm.arm.neon.vmovlu.v8i16(<8 x i8> %tmp1)
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vmovlu16(<4 x i16>* %A) nounwind {
+;CHECK: vmovlu16:
+;CHECK: vmovl.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = call <4 x i32> @llvm.arm.neon.vmovlu.v4i32(<4 x i16> %tmp1)
+ ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vmovlu32(<2 x i32>* %A) nounwind {
+;CHECK: vmovlu32:
+;CHECK: vmovl.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = call <2 x i64> @llvm.arm.neon.vmovlu.v2i64(<2 x i32> %tmp1)
+ ret <2 x i64> %tmp2
+}
+
+declare <8 x i16> @llvm.arm.neon.vmovls.v8i16(<8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmovls.v2i64(<2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vmovlu.v8i16(<8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmovlu.v4i32(<4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmovlu.v2i64(<2 x i32>) nounwind readnone
+
+define <8 x i8> @vmovni16(<8 x i16>* %A) nounwind {
+;CHECK: vmovni16:
+;CHECK: vmovn.i16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vmovn.v8i8(<8 x i16> %tmp1)
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vmovni32(<4 x i32>* %A) nounwind {
+;CHECK: vmovni32:
+;CHECK: vmovn.i32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vmovn.v4i16(<4 x i32> %tmp1)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vmovni64(<2 x i64>* %A) nounwind {
+;CHECK: vmovni64:
+;CHECK: vmovn.i64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vmovn.v2i32(<2 x i64> %tmp1)
+ ret <2 x i32> %tmp2
+}
+
+declare <8 x i8> @llvm.arm.neon.vmovn.v8i8(<8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vmovn.v4i16(<4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vmovn.v2i32(<2 x i64>) nounwind readnone
+
+define <8 x i8> @vqmovns16(<8 x i16>* %A) nounwind {
+;CHECK: vqmovns16:
+;CHECK: vqmovn.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %tmp1)
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqmovns32(<4 x i32>* %A) nounwind {
+;CHECK: vqmovns32:
+;CHECK: vqmovn.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %tmp1)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqmovns64(<2 x i64>* %A) nounwind {
+;CHECK: vqmovns64:
+;CHECK: vqmovn.s64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %tmp1)
+ ret <2 x i32> %tmp2
+}
+
+define <8 x i8> @vqmovnu16(<8 x i16>* %A) nounwind {
+;CHECK: vqmovnu16:
+;CHECK: vqmovn.u16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %tmp1)
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqmovnu32(<4 x i32>* %A) nounwind {
+;CHECK: vqmovnu32:
+;CHECK: vqmovn.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %tmp1)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqmovnu64(<2 x i64>* %A) nounwind {
+;CHECK: vqmovnu64:
+;CHECK: vqmovn.u64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %tmp1)
+ ret <2 x i32> %tmp2
+}
+
+define <8 x i8> @vqmovuns16(<8 x i16>* %A) nounwind {
+;CHECK: vqmovuns16:
+;CHECK: vqmovun.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %tmp1)
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqmovuns32(<4 x i32>* %A) nounwind {
+;CHECK: vqmovuns32:
+;CHECK: vqmovun.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %tmp1)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqmovuns64(<2 x i64>* %A) nounwind {
+;CHECK: vqmovuns64:
+;CHECK: vqmovun.s64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %tmp1)
+ ret <2 x i32> %tmp2
+}
+
+declare <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64>) nounwind readnone
+
+declare <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64>) nounwind readnone
+
+declare <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64>) nounwind readnone
diff --git a/test/CodeGen/ARM/vmul.ll b/test/CodeGen/ARM/vmul.ll
new file mode 100644
index 0000000..325da5d
--- /dev/null
+++ b/test/CodeGen/ARM/vmul.ll
@@ -0,0 +1,257 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vmuli8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmuli8:
+;CHECK: vmul.i8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = mul <8 x i8> %tmp1, %tmp2
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vmuli16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmuli16:
+;CHECK: vmul.i16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = mul <4 x i16> %tmp1, %tmp2
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vmuli32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmuli32:
+;CHECK: vmul.i32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = mul <2 x i32> %tmp1, %tmp2
+ ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vmulf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vmulf32:
+;CHECK: vmul.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = load <2 x float>* %B
+ %tmp3 = mul <2 x float> %tmp1, %tmp2
+ ret <2 x float> %tmp3
+}
+
+define <8 x i8> @vmulp8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmulp8:
+;CHECK: vmul.p8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @vmulQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vmulQi8:
+;CHECK: vmul.i8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = mul <16 x i8> %tmp1, %tmp2
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vmulQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vmulQi16:
+;CHECK: vmul.i16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = mul <8 x i16> %tmp1, %tmp2
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vmulQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vmulQi32:
+;CHECK: vmul.i32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = mul <4 x i32> %tmp1, %tmp2
+ ret <4 x i32> %tmp3
+}
+
+define <4 x float> @vmulQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vmulQf32:
+;CHECK: vmul.f32
+ %tmp1 = load <4 x float>* %A
+ %tmp2 = load <4 x float>* %B
+ %tmp3 = mul <4 x float> %tmp1, %tmp2
+ ret <4 x float> %tmp3
+}
+
+define <16 x i8> @vmulQp8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vmulQp8:
+;CHECK: vmul.p8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+declare <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+
+define arm_aapcs_vfpcc <2 x float> @test_vmul_lanef32(<2 x float> %arg0_float32x2_t, <2 x float> %arg1_float32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmul_lanef32:
+; CHECK: vmul.f32 d0, d0, d1[0]
+ %0 = shufflevector <2 x float> %arg1_float32x2_t, <2 x float> undef, <2 x i32> zeroinitializer ; <<2 x float>> [#uses=1]
+ %1 = fmul <2 x float> %0, %arg0_float32x2_t ; <<2 x float>> [#uses=1]
+ ret <2 x float> %1
+}
+
+define arm_aapcs_vfpcc <4 x i16> @test_vmul_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmul_lanes16:
+; CHECK: vmul.i16 d0, d0, d1[1]
+ %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses$
+ %1 = mul <4 x i16> %0, %arg0_int16x4_t ; <<4 x i16>> [#uses=1]
+ ret <4 x i16> %1
+}
+
+define arm_aapcs_vfpcc <2 x i32> @test_vmul_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmul_lanes32:
+; CHECK: vmul.i32 d0, d0, d1[1]
+ %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+ %1 = mul <2 x i32> %0, %arg0_int32x2_t ; <<2 x i32>> [#uses=1]
+ ret <2 x i32> %1
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vmulQ_lanef32(<4 x float> %arg0_float32x4_t, <2 x float> %arg1_float32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmulQ_lanef32:
+; CHECK: vmul.f32 q0, q0, d2[1]
+ %0 = shufflevector <2 x float> %arg1_float32x2_t, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>$
+ %1 = fmul <4 x float> %0, %arg0_float32x4_t ; <<4 x float>> [#uses=1]
+ ret <4 x float> %1
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmulQ_lanes16(<8 x i16> %arg0_int16x8_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmulQ_lanes16:
+; CHECK: vmul.i16 q0, q0, d2[1]
+ %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %1 = mul <8 x i16> %0, %arg0_int16x8_t ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmulQ_lanes32(<4 x i32> %arg0_int32x4_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmulQ_lanes32:
+; CHECK: vmul.i32 q0, q0, d2[1]
+ %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i32>> [#uses$
+ %1 = mul <4 x i32> %0, %arg0_int32x4_t ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %1
+}
+
+define <8 x i16> @vmulls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmulls8:
+;CHECK: vmull.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmulls16:
+;CHECK: vmull.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmulls32:
+;CHECK: vmull.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @vmullu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmullu8:
+;CHECK: vmull.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vmullu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmullu16:
+;CHECK: vmull.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vmullu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmullu32:
+;CHECK: vmull.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @vmullp8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmullp8:
+;CHECK: vmull.p8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmull_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmull_lanes16
+; CHECK: vmull.s16 q0, d0, d1[1]
+ %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+ %1 = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmull_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmull_lanes32
+; CHECK: vmull.s32 q0, d0, d1[1]
+ %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+ %1 = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmull_laneu16(<4 x i16> %arg0_uint16x4_t, <4 x i16> %arg1_uint16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmull_laneu16
+; CHECK: vmull.u16 q0, d0, d1[1]
+ %0 = shufflevector <4 x i16> %arg1_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+ %1 = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %arg0_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmull_laneu32(<2 x i32> %arg0_uint32x2_t, <2 x i32> %arg1_uint32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmull_laneu32
+; CHECK: vmull.u32 q0, d0, d1[1]
+ %0 = shufflevector <2 x i32> %arg1_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+ %1 = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %arg0_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %1
+}
+
+declare <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
diff --git a/test/CodeGen/ARM/vneg.ll b/test/CodeGen/ARM/vneg.ll
new file mode 100644
index 0000000..7764e87
--- /dev/null
+++ b/test/CodeGen/ARM/vneg.ll
@@ -0,0 +1,121 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vnegs8(<8 x i8>* %A) nounwind {
+;CHECK: vnegs8:
+;CHECK: vneg.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = sub <8 x i8> zeroinitializer, %tmp1
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vnegs16(<4 x i16>* %A) nounwind {
+;CHECK: vnegs16:
+;CHECK: vneg.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = sub <4 x i16> zeroinitializer, %tmp1
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vnegs32(<2 x i32>* %A) nounwind {
+;CHECK: vnegs32:
+;CHECK: vneg.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = sub <2 x i32> zeroinitializer, %tmp1
+ ret <2 x i32> %tmp2
+}
+
+define <2 x float> @vnegf32(<2 x float>* %A) nounwind {
+;CHECK: vnegf32:
+;CHECK: vneg.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = sub <2 x float> < float -0.000000e+00, float -0.000000e+00 >, %tmp1
+ ret <2 x float> %tmp2
+}
+
+define <16 x i8> @vnegQs8(<16 x i8>* %A) nounwind {
+;CHECK: vnegQs8:
+;CHECK: vneg.s8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = sub <16 x i8> zeroinitializer, %tmp1
+ ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vnegQs16(<8 x i16>* %A) nounwind {
+;CHECK: vnegQs16:
+;CHECK: vneg.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = sub <8 x i16> zeroinitializer, %tmp1
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vnegQs32(<4 x i32>* %A) nounwind {
+;CHECK: vnegQs32:
+;CHECK: vneg.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = sub <4 x i32> zeroinitializer, %tmp1
+ ret <4 x i32> %tmp2
+}
+
+define <4 x float> @vnegQf32(<4 x float>* %A) nounwind {
+;CHECK: vnegQf32:
+;CHECK: vneg.f32
+ %tmp1 = load <4 x float>* %A
+ %tmp2 = sub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %tmp1
+ ret <4 x float> %tmp2
+}
+
+define <8 x i8> @vqnegs8(<8 x i8>* %A) nounwind {
+;CHECK: vqnegs8:
+;CHECK: vqneg.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8> %tmp1)
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqnegs16(<4 x i16>* %A) nounwind {
+;CHECK: vqnegs16:
+;CHECK: vqneg.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16> %tmp1)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqnegs32(<2 x i32>* %A) nounwind {
+;CHECK: vqnegs32:
+;CHECK: vqneg.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32> %tmp1)
+ ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vqnegQs8(<16 x i8>* %A) nounwind {
+;CHECK: vqnegQs8:
+;CHECK: vqneg.s8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = call <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8> %tmp1)
+ ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vqnegQs16(<8 x i16>* %A) nounwind {
+;CHECK: vqnegQs16:
+;CHECK: vqneg.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16> %tmp1)
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vqnegQs32(<4 x i32>* %A) nounwind {
+;CHECK: vqnegQs32:
+;CHECK: vqneg.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32> %tmp1)
+ ret <4 x i32> %tmp2
+}
+
+declare <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vpadal.ll b/test/CodeGen/ARM/vpadal.ll
new file mode 100644
index 0000000..7296e936
--- /dev/null
+++ b/test/CodeGen/ARM/vpadal.ll
@@ -0,0 +1,125 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <4 x i16> @vpadals8(<4 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpadals8:
+;CHECK: vpadal.s8
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16> %tmp1, <8 x i8> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vpadals16(<2 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpadals16:
+;CHECK: vpadal.s16
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32> %tmp1, <4 x i16> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vpadals32(<1 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpadals32:
+;CHECK: vpadal.s32
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64> %tmp1, <2 x i32> %tmp2)
+ ret <1 x i64> %tmp3
+}
+
+define <4 x i16> @vpadalu8(<4 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpadalu8:
+;CHECK: vpadal.u8
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16> %tmp1, <8 x i8> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vpadalu16(<2 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpadalu16:
+;CHECK: vpadal.u16
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32> %tmp1, <4 x i16> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vpadalu32(<1 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpadalu32:
+;CHECK: vpadal.u32
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64> %tmp1, <2 x i32> %tmp2)
+ ret <1 x i64> %tmp3
+}
+
+define <8 x i16> @vpadalQs8(<8 x i16>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vpadalQs8:
+;CHECK: vpadal.s8
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16> %tmp1, <16 x i8> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vpadalQs16(<4 x i32>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vpadalQs16:
+;CHECK: vpadal.s16
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32> %tmp1, <8 x i16> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vpadalQs32(<2 x i64>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vpadalQs32:
+;CHECK: vpadal.s32
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64> %tmp1, <4 x i32> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @vpadalQu8(<8 x i16>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vpadalQu8:
+;CHECK: vpadal.u8
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16> %tmp1, <16 x i8> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vpadalQu16(<4 x i32>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vpadalQu16:
+;CHECK: vpadal.u16
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32> %tmp1, <8 x i16> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vpadalQu32(<2 x i64>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vpadalQu32:
+;CHECK: vpadal.u32
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64> %tmp1, <4 x i32> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+declare <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16>, <8 x i8>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32>, <4 x i16>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64>, <2 x i32>) nounwind readnone
+
+declare <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16>, <8 x i8>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32>, <4 x i16>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16>, <16 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32>, <8 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64>, <4 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16>, <16 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32>, <8 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64>, <4 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vpadd.ll b/test/CodeGen/ARM/vpadd.ll
new file mode 100644
index 0000000..2125573
--- /dev/null
+++ b/test/CodeGen/ARM/vpadd.ll
@@ -0,0 +1,155 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vpaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpaddi8:
+;CHECK: vpadd.i8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vpaddi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpaddi16:
+;CHECK: vpadd.i16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vpaddi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpaddi32:
+;CHECK: vpadd.i32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vpaddf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vpaddf32:
+;CHECK: vpadd.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = load <2 x float>* %B
+ %tmp3 = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+ ret <2 x float> %tmp3
+}
+
+declare <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+define <4 x i16> @vpaddls8(<8 x i8>* %A) nounwind {
+;CHECK: vpaddls8:
+;CHECK: vpaddl.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8> %tmp1)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vpaddls16(<4 x i16>* %A) nounwind {
+;CHECK: vpaddls16:
+;CHECK: vpaddl.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16> %tmp1)
+ ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vpaddls32(<2 x i32>* %A) nounwind {
+;CHECK: vpaddls32:
+;CHECK: vpaddl.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = call <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32> %tmp1)
+ ret <1 x i64> %tmp2
+}
+
+define <4 x i16> @vpaddlu8(<8 x i8>* %A) nounwind {
+;CHECK: vpaddlu8:
+;CHECK: vpaddl.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8> %tmp1)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vpaddlu16(<4 x i16>* %A) nounwind {
+;CHECK: vpaddlu16:
+;CHECK: vpaddl.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> %tmp1)
+ ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vpaddlu32(<2 x i32>* %A) nounwind {
+;CHECK: vpaddlu32:
+;CHECK: vpaddl.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = call <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32> %tmp1)
+ ret <1 x i64> %tmp2
+}
+
+define <8 x i16> @vpaddlQs8(<16 x i8>* %A) nounwind {
+;CHECK: vpaddlQs8:
+;CHECK: vpaddl.s8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = call <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8> %tmp1)
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vpaddlQs16(<8 x i16>* %A) nounwind {
+;CHECK: vpaddlQs16:
+;CHECK: vpaddl.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16> %tmp1)
+ ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vpaddlQs32(<4 x i32>* %A) nounwind {
+;CHECK: vpaddlQs32:
+;CHECK: vpaddl.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32> %tmp1)
+ ret <2 x i64> %tmp2
+}
+
+define <8 x i16> @vpaddlQu8(<16 x i8>* %A) nounwind {
+;CHECK: vpaddlQu8:
+;CHECK: vpaddl.u8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = call <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8> %tmp1)
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vpaddlQu16(<8 x i16>* %A) nounwind {
+;CHECK: vpaddlQu16:
+;CHECK: vpaddl.u16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> %tmp1)
+ ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vpaddlQu32(<4 x i32>* %A) nounwind {
+;CHECK: vpaddlQu32:
+;CHECK: vpaddl.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %tmp1)
+ ret <2 x i64> %tmp2
+}
+
+declare <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32>) nounwind readnone
+
+declare <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vpminmax.ll b/test/CodeGen/ARM/vpminmax.ll
new file mode 100644
index 0000000..b75bcc9
--- /dev/null
+++ b/test/CodeGen/ARM/vpminmax.ll
@@ -0,0 +1,147 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vpmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpmins8:
+;CHECK: vpmin.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vpmins16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpmins16:
+;CHECK: vpmin.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vpmins32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpmins32:
+;CHECK: vpmin.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vpminu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpminu8:
+;CHECK: vpmin.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vpminu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpminu16:
+;CHECK: vpmin.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vpminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpminu32:
+;CHECK: vpmin.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vpminf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vpminf32:
+;CHECK: vpmin.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = load <2 x float>* %B
+ %tmp3 = call <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+ ret <2 x float> %tmp3
+}
+
+declare <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+define <8 x i8> @vpmaxs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpmaxs8:
+;CHECK: vpmax.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vpmaxs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpmaxs16:
+;CHECK: vpmax.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vpmaxs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpmaxs32:
+;CHECK: vpmax.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vpmaxu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpmaxu8:
+;CHECK: vpmax.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vpmaxu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpmaxu16:
+;CHECK: vpmax.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vpmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpmaxu32:
+;CHECK: vpmax.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vpmaxf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vpmaxf32:
+;CHECK: vpmax.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = load <2 x float>* %B
+ %tmp3 = call <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+ ret <2 x float> %tmp3
+}
+
+declare <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vqadd.ll b/test/CodeGen/ARM/vqadd.ll
new file mode 100644
index 0000000..a1669b6
--- /dev/null
+++ b/test/CodeGen/ARM/vqadd.ll
@@ -0,0 +1,165 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vqadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqadds8:
+;CHECK: vqadd.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vqadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqadds16:
+;CHECK: vqadd.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqadds32:
+;CHECK: vqadd.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vqadds64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqadds64:
+;CHECK: vqadd.s64
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <1 x i64>* %B
+ %tmp3 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+ ret <1 x i64> %tmp3
+}
+
+define <8 x i8> @vqaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqaddu8:
+;CHECK: vqadd.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vqaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqaddu16:
+;CHECK: vqadd.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqaddu32:
+;CHECK: vqadd.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vqaddu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqaddu64:
+;CHECK: vqadd.u64
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <1 x i64>* %B
+ %tmp3 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+ ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @vqaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqaddQs8:
+;CHECK: vqadd.s8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vqaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqaddQs16:
+;CHECK: vqadd.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqaddQs32:
+;CHECK: vqadd.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vqaddQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqaddQs64:
+;CHECK: vqadd.s64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @vqaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqaddQu8:
+;CHECK: vqadd.u8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vqaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqaddQu16:
+;CHECK: vqadd.u16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqaddQu32:
+;CHECK: vqadd.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vqaddQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqaddQu64:
+;CHECK: vqadd.u64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+declare <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/test/CodeGen/ARM/vqdmul.ll b/test/CodeGen/ARM/vqdmul.ll
new file mode 100644
index 0000000..8dcc7f7
--- /dev/null
+++ b/test/CodeGen/ARM/vqdmul.ll
@@ -0,0 +1,281 @@
+; RUN: llc -mattr=+neon < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+define <4 x i16> @vqdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqdmulhs16:
+;CHECK: vqdmulh.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqdmulhs32:
+;CHECK: vqdmulh.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <8 x i16> @vqdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqdmulhQs16:
+;CHECK: vqdmulh.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqdmulhQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqdmulhQs32:
+;CHECK: vqdmulh.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vqdmulhQ_lanes16(<8 x i16> %arg0_int16x8_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmulhQ_lanes16
+; CHECK: vqdmulh.s16 q0, q0, d2[1]
+ %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> ; <<8 x i16>> [#uses=1]
+ %1 = tail call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %arg0_int16x8_t, <8 x i16> %0) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqdmulhQ_lanes32(<4 x i32> %arg0_int32x4_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmulhQ_lanes32
+; CHECK: vqdmulh.s32 q0, q0, d2[1]
+ %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i32>> [#uses=1]
+ %1 = tail call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i32> %0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <4 x i16> @test_vqdmulh_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmulh_lanes16
+; CHECK: vqdmulh.s16 d0, d0, d1[1]
+ %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+ %1 = tail call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i16>> [#uses=1]
+ ret <4 x i16> %1
+}
+
+define arm_aapcs_vfpcc <2 x i32> @test_vqdmulh_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmulh_lanes32
+; CHECK: vqdmulh.s32 d0, d0, d1[1]
+ %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+ %1 = tail call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i32>> [#uses=1]
+ ret <2 x i32> %1
+}
+
+declare <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i16> @vqrdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqrdmulhs16:
+;CHECK: vqrdmulh.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqrdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqrdmulhs32:
+;CHECK: vqrdmulh.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <8 x i16> @vqrdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqrdmulhQs16:
+;CHECK: vqrdmulh.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqrdmulhQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqrdmulhQs32:
+;CHECK: vqrdmulh.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vqRdmulhQ_lanes16(<8 x i16> %arg0_int16x8_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqRdmulhQ_lanes16
+; CHECK: vqrdmulh.s16 q0, q0, d2[1]
+ %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> ; <<8 x i16>> [#uses=1]
+ %1 = tail call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %arg0_int16x8_t, <8 x i16> %0) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqRdmulhQ_lanes32(<4 x i32> %arg0_int32x4_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqRdmulhQ_lanes32
+; CHECK: vqrdmulh.s32 q0, q0, d2[1]
+ %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i32>> [#uses=1]
+ %1 = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i32> %0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <4 x i16> @test_vqRdmulh_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqRdmulh_lanes16
+; CHECK: vqrdmulh.s16 d0, d0, d1[1]
+ %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+ %1 = tail call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i16>> [#uses=1]
+ ret <4 x i16> %1
+}
+
+define arm_aapcs_vfpcc <2 x i32> @test_vqRdmulh_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqRdmulh_lanes32
+; CHECK: vqrdmulh.s32 d0, d0, d1[1]
+ %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+ %1 = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i32>> [#uses=1]
+ ret <2 x i32> %1
+}
+
+declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @vqdmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqdmulls16:
+;CHECK: vqdmull.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vqdmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqdmulls32:
+;CHECK: vqdmull.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqdmull_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmull_lanes16
+; CHECK: vqdmull.s16 q0, d0, d1[1]
+ %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+ %1 = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vqdmull_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmull_lanes32
+; CHECK: vqdmull.s32 q0, d0, d1[1]
+ %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+ %1 = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %1
+}
+
+declare <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+define <4 x i32> @vqdmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vqdmlals16:
+;CHECK: vqdmlal.s16
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = load <4 x i16>* %C
+ %tmp4 = call <4 x i32> @llvm.arm.neon.vqdmlal.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+ ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vqdmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vqdmlals32:
+;CHECK: vqdmlal.s32
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = load <2 x i32>* %C
+ %tmp4 = call <2 x i64> @llvm.arm.neon.vqdmlal.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+ ret <2 x i64> %tmp4
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqdmlal_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmlal_lanes16
+; CHECK: vqdmlal.s16 q0, d2, d3[1]
+ %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+ %1 = tail call <4 x i32> @llvm.arm.neon.vqdmlal.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vqdmlal_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmlal_lanes32
+; CHECK: vqdmlal.s32 q0, d2, d3[1]
+ %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+ %1 = tail call <2 x i64> @llvm.arm.neon.vqdmlal.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %1
+}
+
+declare <4 x i32> @llvm.arm.neon.vqdmlal.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqdmlal.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
+
+define <4 x i32> @vqdmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vqdmlsls16:
+;CHECK: vqdmlsl.s16
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = load <4 x i16>* %C
+ %tmp4 = call <4 x i32> @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+ ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vqdmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vqdmlsls32:
+;CHECK: vqdmlsl.s32
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = load <2 x i32>* %C
+ %tmp4 = call <2 x i64> @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+ ret <2 x i64> %tmp4
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqdmlsl_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmlsl_lanes16
+; CHECK: vqdmlsl.s16 q0, d2, d3[1]
+ %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+ %1 = tail call <4 x i32> @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vqdmlsl_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmlsl_lanes32
+; CHECK: vqdmlsl.s32 q0, d2, d3[1]
+ %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+ %1 = tail call <2 x i64> @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %1
+}
+
+declare <4 x i32> @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vqshl.ll b/test/CodeGen/ARM/vqshl.ll
new file mode 100644
index 0000000..e4d29a3
--- /dev/null
+++ b/test/CodeGen/ARM/vqshl.ll
@@ -0,0 +1,531 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vqshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqshls8:
+;CHECK: vqshl.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vqshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqshls16:
+;CHECK: vqshl.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqshls32:
+;CHECK: vqshl.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vqshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqshls64:
+;CHECK: vqshl.s64
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <1 x i64>* %B
+ %tmp3 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+ ret <1 x i64> %tmp3
+}
+
+define <8 x i8> @vqshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqshlu8:
+;CHECK: vqshl.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vqshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqshlu16:
+;CHECK: vqshl.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqshlu32:
+;CHECK: vqshl.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vqshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqshlu64:
+;CHECK: vqshl.u64
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <1 x i64>* %B
+ %tmp3 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+ ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @vqshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqshlQs8:
+;CHECK: vqshl.s8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vqshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqshlQs16:
+;CHECK: vqshl.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqshlQs32:
+;CHECK: vqshl.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vqshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqshlQs64:
+;CHECK: vqshl.s64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @vqshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqshlQu8:
+;CHECK: vqshl.u8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vqshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqshlQu16:
+;CHECK: vqshl.u16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqshlQu32:
+;CHECK: vqshl.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vqshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqshlQu64:
+;CHECK: vqshl.u64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @vqshls_n8(<8 x i8>* %A) nounwind {
+;CHECK: vqshls_n8:
+;CHECK: vqshl.s8{{.*#7}}
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqshls_n16(<4 x i16>* %A) nounwind {
+;CHECK: vqshls_n16:
+;CHECK: vqshl.s16{{.*#15}}
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqshls_n32(<2 x i32>* %A) nounwind {
+;CHECK: vqshls_n32:
+;CHECK: vqshl.s32{{.*#31}}
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
+ ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vqshls_n64(<1 x i64>* %A) nounwind {
+;CHECK: vqshls_n64:
+;CHECK: vqshl.s64{{.*#63}}
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
+ ret <1 x i64> %tmp2
+}
+
+define <8 x i8> @vqshlu_n8(<8 x i8>* %A) nounwind {
+;CHECK: vqshlu_n8:
+;CHECK: vqshl.u8{{.*#7}}
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqshlu_n16(<4 x i16>* %A) nounwind {
+;CHECK: vqshlu_n16:
+;CHECK: vqshl.u16{{.*#15}}
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqshlu_n32(<2 x i32>* %A) nounwind {
+;CHECK: vqshlu_n32:
+;CHECK: vqshl.u32{{.*#31}}
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
+ ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vqshlu_n64(<1 x i64>* %A) nounwind {
+;CHECK: vqshlu_n64:
+;CHECK: vqshl.u64{{.*#63}}
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
+ ret <1 x i64> %tmp2
+}
+
+define <8 x i8> @vqshlsu_n8(<8 x i8>* %A) nounwind {
+;CHECK: vqshlsu_n8:
+;CHECK: vqshlu.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqshlsu_n16(<4 x i16>* %A) nounwind {
+;CHECK: vqshlsu_n16:
+;CHECK: vqshlu.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqshlsu_n32(<2 x i32>* %A) nounwind {
+;CHECK: vqshlsu_n32:
+;CHECK: vqshlu.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
+ ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vqshlsu_n64(<1 x i64>* %A) nounwind {
+;CHECK: vqshlsu_n64:
+;CHECK: vqshlu.s64
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = call <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
+ ret <1 x i64> %tmp2
+}
+
+define <16 x i8> @vqshlQs_n8(<16 x i8>* %A) nounwind {
+;CHECK: vqshlQs_n8:
+;CHECK: vqshl.s8{{.*#7}}
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
+ ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vqshlQs_n16(<8 x i16>* %A) nounwind {
+;CHECK: vqshlQs_n16:
+;CHECK: vqshl.s16{{.*#15}}
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vqshlQs_n32(<4 x i32>* %A) nounwind {
+;CHECK: vqshlQs_n32:
+;CHECK: vqshl.s32{{.*#31}}
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
+ ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vqshlQs_n64(<2 x i64>* %A) nounwind {
+;CHECK: vqshlQs_n64:
+;CHECK: vqshl.s64{{.*#63}}
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
+ ret <2 x i64> %tmp2
+}
+
+define <16 x i8> @vqshlQu_n8(<16 x i8>* %A) nounwind {
+;CHECK: vqshlQu_n8:
+;CHECK: vqshl.u8{{.*#7}}
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
+ ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vqshlQu_n16(<8 x i16>* %A) nounwind {
+;CHECK: vqshlQu_n16:
+;CHECK: vqshl.u16{{.*#15}}
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vqshlQu_n32(<4 x i32>* %A) nounwind {
+;CHECK: vqshlQu_n32:
+;CHECK: vqshl.u32{{.*#31}}
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
+ ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vqshlQu_n64(<2 x i64>* %A) nounwind {
+;CHECK: vqshlQu_n64:
+;CHECK: vqshl.u64{{.*#63}}
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
+ ret <2 x i64> %tmp2
+}
+
+define <16 x i8> @vqshlQsu_n8(<16 x i8>* %A) nounwind {
+;CHECK: vqshlQsu_n8:
+;CHECK: vqshlu.s8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = call <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
+ ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vqshlQsu_n16(<8 x i16>* %A) nounwind {
+;CHECK: vqshlQsu_n16:
+;CHECK: vqshlu.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vqshlQsu_n32(<4 x i32>* %A) nounwind {
+;CHECK: vqshlQsu_n32:
+;CHECK: vqshlu.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
+ ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vqshlQsu_n64(<2 x i64>* %A) nounwind {
+;CHECK: vqshlQsu_n64:
+;CHECK: vqshlu.s64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = call <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
+ ret <2 x i64> %tmp2
+}
+
+declare <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8> @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vqrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqrshls8:
+;CHECK: vqrshl.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vqrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqrshls16:
+;CHECK: vqrshl.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqrshls32:
+;CHECK: vqrshl.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vqrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqrshls64:
+;CHECK: vqrshl.s64
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <1 x i64>* %B
+ %tmp3 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+ ret <1 x i64> %tmp3
+}
+
+define <8 x i8> @vqrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqrshlu8:
+;CHECK: vqrshl.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vqrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqrshlu16:
+;CHECK: vqrshl.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqrshlu32:
+;CHECK: vqrshl.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vqrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqrshlu64:
+;CHECK: vqrshl.u64
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <1 x i64>* %B
+ %tmp3 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+ ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @vqrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqrshlQs8:
+;CHECK: vqrshl.s8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vqrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqrshlQs16:
+;CHECK: vqrshl.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqrshlQs32:
+;CHECK: vqrshl.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vqrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqrshlQs64:
+;CHECK: vqrshl.s64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @vqrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqrshlQu8:
+;CHECK: vqrshl.u8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vqrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqrshlQu16:
+;CHECK: vqrshl.u16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqrshlQu32:
+;CHECK: vqrshl.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vqrshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqrshlQu64:
+;CHECK: vqrshl.u64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+declare <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/test/CodeGen/ARM/vqshrn.ll b/test/CodeGen/ARM/vqshrn.ll
new file mode 100644
index 0000000..5da7943
--- /dev/null
+++ b/test/CodeGen/ARM/vqshrn.ll
@@ -0,0 +1,169 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vqshrns8(<8 x i16>* %A) nounwind {
+;CHECK: vqshrns8:
+;CHECK: vqshrn.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftns.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqshrns16(<4 x i32>* %A) nounwind {
+;CHECK: vqshrns16:
+;CHECK: vqshrn.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqshrns32(<2 x i64>* %A) nounwind {
+;CHECK: vqshrns32:
+;CHECK: vqshrn.s64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
+ ret <2 x i32> %tmp2
+}
+
+define <8 x i8> @vqshrnu8(<8 x i16>* %A) nounwind {
+;CHECK: vqshrnu8:
+;CHECK: vqshrn.u16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqshrnu16(<4 x i32>* %A) nounwind {
+;CHECK: vqshrnu16:
+;CHECK: vqshrn.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqshrnu32(<2 x i64>* %A) nounwind {
+;CHECK: vqshrnu32:
+;CHECK: vqshrn.u64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
+ ret <2 x i32> %tmp2
+}
+
+define <8 x i8> @vqshruns8(<8 x i16>* %A) nounwind {
+;CHECK: vqshruns8:
+;CHECK: vqshrun.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqshruns16(<4 x i32>* %A) nounwind {
+;CHECK: vqshruns16:
+;CHECK: vqshrun.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqshruns32(<2 x i64>* %A) nounwind {
+;CHECK: vqshruns32:
+;CHECK: vqshrun.s64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
+ ret <2 x i32> %tmp2
+}
+
+declare <8 x i8> @llvm.arm.neon.vqshiftns.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <8 x i8> @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <8 x i8> @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vqrshrns8(<8 x i16>* %A) nounwind {
+;CHECK: vqrshrns8:
+;CHECK: vqrshrn.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqrshrns16(<4 x i32>* %A) nounwind {
+;CHECK: vqrshrns16:
+;CHECK: vqrshrn.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqrshrns32(<2 x i64>* %A) nounwind {
+;CHECK: vqrshrns32:
+;CHECK: vqrshrn.s64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
+ ret <2 x i32> %tmp2
+}
+
+define <8 x i8> @vqrshrnu8(<8 x i16>* %A) nounwind {
+;CHECK: vqrshrnu8:
+;CHECK: vqrshrn.u16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqrshrnu16(<4 x i32>* %A) nounwind {
+;CHECK: vqrshrnu16:
+;CHECK: vqrshrn.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqrshrnu32(<2 x i64>* %A) nounwind {
+;CHECK: vqrshrnu32:
+;CHECK: vqrshrn.u64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
+ ret <2 x i32> %tmp2
+}
+
+define <8 x i8> @vqrshruns8(<8 x i16>* %A) nounwind {
+;CHECK: vqrshruns8:
+;CHECK: vqrshrun.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqrshruns16(<4 x i32>* %A) nounwind {
+;CHECK: vqrshruns16:
+;CHECK: vqrshrun.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqrshruns32(<2 x i64>* %A) nounwind {
+;CHECK: vqrshruns32:
+;CHECK: vqrshrun.s64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
+ ret <2 x i32> %tmp2
+}
+
+declare <8 x i8> @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <8 x i8> @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <8 x i8> @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/test/CodeGen/ARM/vqsub.ll b/test/CodeGen/ARM/vqsub.ll
new file mode 100644
index 0000000..4231fca
--- /dev/null
+++ b/test/CodeGen/ARM/vqsub.ll
@@ -0,0 +1,165 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vqsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqsubs8:
+;CHECK: vqsub.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vqsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqsubs16:
+;CHECK: vqsub.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqsubs32:
+;CHECK: vqsub.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vqsubs64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqsubs64:
+;CHECK: vqsub.s64
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <1 x i64>* %B
+ %tmp3 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+ ret <1 x i64> %tmp3
+}
+
+define <8 x i8> @vqsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqsubu8:
+;CHECK: vqsub.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vqsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqsubu16:
+;CHECK: vqsub.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqsubu32:
+;CHECK: vqsub.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vqsubu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqsubu64:
+;CHECK: vqsub.u64
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <1 x i64>* %B
+ %tmp3 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+ ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @vqsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqsubQs8:
+;CHECK: vqsub.s8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vqsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqsubQs16:
+;CHECK: vqsub.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqsubQs32:
+;CHECK: vqsub.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vqsubQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqsubQs64:
+;CHECK: vqsub.s64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @vqsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqsubQu8:
+;CHECK: vqsub.u8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vqsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqsubQu16:
+;CHECK: vqsub.u16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqsubQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqsubQu32:
+;CHECK: vqsub.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vqsubQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqsubQu64:
+;CHECK: vqsub.u64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+declare <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/test/CodeGen/ARM/vrec.ll b/test/CodeGen/ARM/vrec.ll
new file mode 100644
index 0000000..99989e9
--- /dev/null
+++ b/test/CodeGen/ARM/vrec.ll
@@ -0,0 +1,119 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <2 x i32> @vrecpei32(<2 x i32>* %A) nounwind {
+;CHECK: vrecpei32:
+;CHECK: vrecpe.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32> %tmp1)
+ ret <2 x i32> %tmp2
+}
+
+define <4 x i32> @vrecpeQi32(<4 x i32>* %A) nounwind {
+;CHECK: vrecpeQi32:
+;CHECK: vrecpe.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32> %tmp1)
+ ret <4 x i32> %tmp2
+}
+
+define <2 x float> @vrecpef32(<2 x float>* %A) nounwind {
+;CHECK: vrecpef32:
+;CHECK: vrecpe.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %tmp1)
+ ret <2 x float> %tmp2
+}
+
+define <4 x float> @vrecpeQf32(<4 x float>* %A) nounwind {
+;CHECK: vrecpeQf32:
+;CHECK: vrecpe.f32
+ %tmp1 = load <4 x float>* %A
+ %tmp2 = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %tmp1)
+ ret <4 x float> %tmp2
+}
+
+declare <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
+
+define <2 x float> @vrecpsf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vrecpsf32:
+;CHECK: vrecps.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = load <2 x float>* %B
+ %tmp3 = call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+ ret <2 x float> %tmp3
+}
+
+define <4 x float> @vrecpsQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vrecpsQf32:
+;CHECK: vrecps.f32
+ %tmp1 = load <4 x float>* %A
+ %tmp2 = load <4 x float>* %B
+ %tmp3 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+ ret <4 x float> %tmp3
+}
+
+declare <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+define <2 x i32> @vrsqrtei32(<2 x i32>* %A) nounwind {
+;CHECK: vrsqrtei32:
+;CHECK: vrsqrte.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32> %tmp1)
+ ret <2 x i32> %tmp2
+}
+
+define <4 x i32> @vrsqrteQi32(<4 x i32>* %A) nounwind {
+;CHECK: vrsqrteQi32:
+;CHECK: vrsqrte.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32> %tmp1)
+ ret <4 x i32> %tmp2
+}
+
+define <2 x float> @vrsqrtef32(<2 x float>* %A) nounwind {
+;CHECK: vrsqrtef32:
+;CHECK: vrsqrte.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %tmp1)
+ ret <2 x float> %tmp2
+}
+
+define <4 x float> @vrsqrteQf32(<4 x float>* %A) nounwind {
+;CHECK: vrsqrteQf32:
+;CHECK: vrsqrte.f32
+ %tmp1 = load <4 x float>* %A
+ %tmp2 = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %tmp1)
+ ret <4 x float> %tmp2
+}
+
+declare <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone
+
+define <2 x float> @vrsqrtsf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vrsqrtsf32:
+;CHECK: vrsqrts.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = load <2 x float>* %B
+ %tmp3 = call <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+ ret <2 x float> %tmp3
+}
+
+define <4 x float> @vrsqrtsQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vrsqrtsQf32:
+;CHECK: vrsqrts.f32
+ %tmp1 = load <4 x float>* %A
+ %tmp2 = load <4 x float>* %B
+ %tmp3 = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+ ret <4 x float> %tmp3
+}
+
+declare <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vrev.ll b/test/CodeGen/ARM/vrev.ll
new file mode 100644
index 0000000..f0a04a4
--- /dev/null
+++ b/test/CodeGen/ARM/vrev.ll
@@ -0,0 +1,113 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define arm_apcscc <8 x i8> @test_vrev64D8(<8 x i8>* %A) nounwind {
+;CHECK: test_vrev64D8:
+;CHECK: vrev64.8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+ ret <8 x i8> %tmp2
+}
+
+define arm_apcscc <4 x i16> @test_vrev64D16(<4 x i16>* %A) nounwind {
+;CHECK: test_vrev64D16:
+;CHECK: vrev64.16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+ ret <4 x i16> %tmp2
+}
+
+define arm_apcscc <2 x i32> @test_vrev64D32(<2 x i32>* %A) nounwind {
+;CHECK: test_vrev64D32:
+;CHECK: vrev64.32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
+ ret <2 x i32> %tmp2
+}
+
+define arm_apcscc <2 x float> @test_vrev64Df(<2 x float>* %A) nounwind {
+;CHECK: test_vrev64Df:
+;CHECK: vrev64.32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+ ret <2 x float> %tmp2
+}
+
+define arm_apcscc <16 x i8> @test_vrev64Q8(<16 x i8>* %A) nounwind {
+;CHECK: test_vrev64Q8:
+;CHECK: vrev64.8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+ ret <16 x i8> %tmp2
+}
+
+define arm_apcscc <8 x i16> @test_vrev64Q16(<8 x i16>* %A) nounwind {
+;CHECK: test_vrev64Q16:
+;CHECK: vrev64.16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+ ret <8 x i16> %tmp2
+}
+
+define arm_apcscc <4 x i32> @test_vrev64Q32(<4 x i32>* %A) nounwind {
+;CHECK: test_vrev64Q32:
+;CHECK: vrev64.32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+ ret <4 x i32> %tmp2
+}
+
+define arm_apcscc <4 x float> @test_vrev64Qf(<4 x float>* %A) nounwind {
+;CHECK: test_vrev64Qf:
+;CHECK: vrev64.32
+ %tmp1 = load <4 x float>* %A
+ %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+ ret <4 x float> %tmp2
+}
+
+define arm_apcscc <8 x i8> @test_vrev32D8(<8 x i8>* %A) nounwind {
+;CHECK: test_vrev32D8:
+;CHECK: vrev32.8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+ ret <8 x i8> %tmp2
+}
+
+define arm_apcscc <4 x i16> @test_vrev32D16(<4 x i16>* %A) nounwind {
+;CHECK: test_vrev32D16:
+;CHECK: vrev32.16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+ ret <4 x i16> %tmp2
+}
+
+define arm_apcscc <16 x i8> @test_vrev32Q8(<16 x i8>* %A) nounwind {
+;CHECK: test_vrev32Q8:
+;CHECK: vrev32.8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+ ret <16 x i8> %tmp2
+}
+
+define arm_apcscc <8 x i16> @test_vrev32Q16(<8 x i16>* %A) nounwind {
+;CHECK: test_vrev32Q16:
+;CHECK: vrev32.16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+ ret <8 x i16> %tmp2
+}
+
+define arm_apcscc <8 x i8> @test_vrev16D8(<8 x i8>* %A) nounwind {
+;CHECK: test_vrev16D8:
+;CHECK: vrev16.8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+ ret <8 x i8> %tmp2
+}
+
+define arm_apcscc <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind {
+;CHECK: test_vrev16Q8:
+;CHECK: vrev16.8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+ ret <16 x i8> %tmp2
+}
diff --git a/test/CodeGen/ARM/vshift.ll b/test/CodeGen/ARM/vshift.ll
new file mode 100644
index 0000000..f3cbec7
--- /dev/null
+++ b/test/CodeGen/ARM/vshift.ll
@@ -0,0 +1,432 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vshls8:
+;CHECK: vshl.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = shl <8 x i8> %tmp1, %tmp2
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vshls16:
+;CHECK: vshl.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = shl <4 x i16> %tmp1, %tmp2
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vshls32:
+;CHECK: vshl.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = shl <2 x i32> %tmp1, %tmp2
+ ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vshls64:
+;CHECK: vshl.u64
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <1 x i64>* %B
+ %tmp3 = shl <1 x i64> %tmp1, %tmp2
+ ret <1 x i64> %tmp3
+}
+
+define <8 x i8> @vshli8(<8 x i8>* %A) nounwind {
+;CHECK: vshli8:
+;CHECK: vshl.i8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = shl <8 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vshli16(<4 x i16>* %A) nounwind {
+;CHECK: vshli16:
+;CHECK: vshl.i16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = shl <4 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15 >
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vshli32(<2 x i32>* %A) nounwind {
+;CHECK: vshli32:
+;CHECK: vshl.i32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = shl <2 x i32> %tmp1, < i32 31, i32 31 >
+ ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vshli64(<1 x i64>* %A) nounwind {
+;CHECK: vshli64:
+;CHECK: vshl.i64
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = shl <1 x i64> %tmp1, < i64 63 >
+ ret <1 x i64> %tmp2
+}
+
+define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vshlQs8:
+;CHECK: vshl.u8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = shl <16 x i8> %tmp1, %tmp2
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vshlQs16:
+;CHECK: vshl.u16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = shl <8 x i16> %tmp1, %tmp2
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vshlQs32:
+;CHECK: vshl.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = shl <4 x i32> %tmp1, %tmp2
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vshlQs64:
+;CHECK: vshl.u64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = shl <2 x i64> %tmp1, %tmp2
+ ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @vshlQi8(<16 x i8>* %A) nounwind {
+;CHECK: vshlQi8:
+;CHECK: vshl.i8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = shl <16 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
+ ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vshlQi16(<8 x i16>* %A) nounwind {
+;CHECK: vshlQi16:
+;CHECK: vshl.i16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = shl <8 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vshlQi32(<4 x i32>* %A) nounwind {
+;CHECK: vshlQi32:
+;CHECK: vshl.i32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = shl <4 x i32> %tmp1, < i32 31, i32 31, i32 31, i32 31 >
+ ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vshlQi64(<2 x i64>* %A) nounwind {
+;CHECK: vshlQi64:
+;CHECK: vshl.i64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = shl <2 x i64> %tmp1, < i64 63, i64 63 >
+ ret <2 x i64> %tmp2
+}
+
+define <8 x i8> @vlshru8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vlshru8:
+;CHECK: vneg.s8
+;CHECK: vshl.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = lshr <8 x i8> %tmp1, %tmp2
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vlshru16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vlshru16:
+;CHECK: vneg.s16
+;CHECK: vshl.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = lshr <4 x i16> %tmp1, %tmp2
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vlshru32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vlshru32:
+;CHECK: vneg.s32
+;CHECK: vshl.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = lshr <2 x i32> %tmp1, %tmp2
+ ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vlshru64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vlshru64:
+;CHECK: vsub.i64
+;CHECK: vshl.u64
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <1 x i64>* %B
+ %tmp3 = lshr <1 x i64> %tmp1, %tmp2
+ ret <1 x i64> %tmp3
+}
+
+define <8 x i8> @vlshri8(<8 x i8>* %A) nounwind {
+;CHECK: vlshri8:
+;CHECK: vshr.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = lshr <8 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vlshri16(<4 x i16>* %A) nounwind {
+;CHECK: vlshri16:
+;CHECK: vshr.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = lshr <4 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16 >
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vlshri32(<2 x i32>* %A) nounwind {
+;CHECK: vlshri32:
+;CHECK: vshr.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = lshr <2 x i32> %tmp1, < i32 32, i32 32 >
+ ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vlshri64(<1 x i64>* %A) nounwind {
+;CHECK: vlshri64:
+;CHECK: vshr.u64
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = lshr <1 x i64> %tmp1, < i64 64 >
+ ret <1 x i64> %tmp2
+}
+
+define <16 x i8> @vlshrQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vlshrQu8:
+;CHECK: vneg.s8
+;CHECK: vshl.u8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = lshr <16 x i8> %tmp1, %tmp2
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vlshrQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vlshrQu16:
+;CHECK: vneg.s16
+;CHECK: vshl.u16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = lshr <8 x i16> %tmp1, %tmp2
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vlshrQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vlshrQu32:
+;CHECK: vneg.s32
+;CHECK: vshl.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = lshr <4 x i32> %tmp1, %tmp2
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vlshrQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vlshrQu64:
+;CHECK: vsub.i64
+;CHECK: vshl.u64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = lshr <2 x i64> %tmp1, %tmp2
+ ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @vlshrQi8(<16 x i8>* %A) nounwind {
+;CHECK: vlshrQi8:
+;CHECK: vshr.u8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = lshr <16 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+ ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vlshrQi16(<8 x i16>* %A) nounwind {
+;CHECK: vlshrQi16:
+;CHECK: vshr.u16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = lshr <8 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vlshrQi32(<4 x i32>* %A) nounwind {
+;CHECK: vlshrQi32:
+;CHECK: vshr.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = lshr <4 x i32> %tmp1, < i32 32, i32 32, i32 32, i32 32 >
+ ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vlshrQi64(<2 x i64>* %A) nounwind {
+;CHECK: vlshrQi64:
+;CHECK: vshr.u64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = lshr <2 x i64> %tmp1, < i64 64, i64 64 >
+ ret <2 x i64> %tmp2
+}
+
+; Example that requires splitting and expanding a vector shift.
+define <2 x i64> @update(<2 x i64> %val) nounwind readnone {
+entry:
+ %shr = lshr <2 x i64> %val, < i64 2, i64 2 > ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %shr
+}
+
+define <8 x i8> @vashrs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vashrs8:
+;CHECK: vneg.s8
+;CHECK: vshl.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = ashr <8 x i8> %tmp1, %tmp2
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vashrs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vashrs16:
+;CHECK: vneg.s16
+;CHECK: vshl.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = ashr <4 x i16> %tmp1, %tmp2
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vashrs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vashrs32:
+;CHECK: vneg.s32
+;CHECK: vshl.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = ashr <2 x i32> %tmp1, %tmp2
+ ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vashrs64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vashrs64:
+;CHECK: vsub.i64
+;CHECK: vshl.s64
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <1 x i64>* %B
+ %tmp3 = ashr <1 x i64> %tmp1, %tmp2
+ ret <1 x i64> %tmp3
+}
+
+define <8 x i8> @vashri8(<8 x i8>* %A) nounwind {
+;CHECK: vashri8:
+;CHECK: vshr.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = ashr <8 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vashri16(<4 x i16>* %A) nounwind {
+;CHECK: vashri16:
+;CHECK: vshr.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = ashr <4 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16 >
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vashri32(<2 x i32>* %A) nounwind {
+;CHECK: vashri32:
+;CHECK: vshr.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = ashr <2 x i32> %tmp1, < i32 32, i32 32 >
+ ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vashri64(<1 x i64>* %A) nounwind {
+;CHECK: vashri64:
+;CHECK: vshr.s64
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = ashr <1 x i64> %tmp1, < i64 64 >
+ ret <1 x i64> %tmp2
+}
+
+define <16 x i8> @vashrQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vashrQs8:
+;CHECK: vneg.s8
+;CHECK: vshl.s8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = ashr <16 x i8> %tmp1, %tmp2
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vashrQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vashrQs16:
+;CHECK: vneg.s16
+;CHECK: vshl.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = ashr <8 x i16> %tmp1, %tmp2
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vashrQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vashrQs32:
+;CHECK: vneg.s32
+;CHECK: vshl.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = ashr <4 x i32> %tmp1, %tmp2
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vashrQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vashrQs64:
+;CHECK: vsub.i64
+;CHECK: vshl.s64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = ashr <2 x i64> %tmp1, %tmp2
+ ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @vashrQi8(<16 x i8>* %A) nounwind {
+;CHECK: vashrQi8:
+;CHECK: vshr.s8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = ashr <16 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+ ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vashrQi16(<8 x i16>* %A) nounwind {
+;CHECK: vashrQi16:
+;CHECK: vshr.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = ashr <8 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vashrQi32(<4 x i32>* %A) nounwind {
+;CHECK: vashrQi32:
+;CHECK: vshr.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = ashr <4 x i32> %tmp1, < i32 32, i32 32, i32 32, i32 32 >
+ ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vashrQi64(<2 x i64>* %A) nounwind {
+;CHECK: vashrQi64:
+;CHECK: vshr.s64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = ashr <2 x i64> %tmp1, < i64 64, i64 64 >
+ ret <2 x i64> %tmp2
+}
diff --git a/test/CodeGen/ARM/vshiftins.ll b/test/CodeGen/ARM/vshiftins.ll
new file mode 100644
index 0000000..3a4f857
--- /dev/null
+++ b/test/CodeGen/ARM/vshiftins.ll
@@ -0,0 +1,155 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vsli8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsli8:
+;CHECK: vsli.8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vsli16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsli16:
+;CHECK: vsli.16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vsli32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsli32:
+;CHECK: vsli.32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> < i32 31, i32 31 >)
+ ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vsli64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vsli64:
+;CHECK: vsli.64
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <1 x i64>* %B
+ %tmp3 = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, <1 x i64> < i64 63 >)
+ ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @vsliQ8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vsliQ8:
+;CHECK: vsli.8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vsliQ16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vsliQ16:
+;CHECK: vsli.16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vsliQ32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vsliQ32:
+;CHECK: vsli.32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vsliQ64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vsliQ64:
+;CHECK: vsli.64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, <2 x i64> < i64 63, i64 63 >)
+ ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @vsri8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsri8:
+;CHECK: vsri.8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vsri16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsri16:
+;CHECK: vsri.16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vsri32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsri32:
+;CHECK: vsri.32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> < i32 -32, i32 -32 >)
+ ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vsri64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vsri64:
+;CHECK: vsri.64
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <1 x i64>* %B
+ %tmp3 = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, <1 x i64> < i64 -64 >)
+ ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @vsriQ8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vsriQ8:
+;CHECK: vsri.8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vsriQ16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vsriQ16:
+;CHECK: vsri.16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vsriQ32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vsriQ32:
+;CHECK: vsri.32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vsriQ64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vsriQ64:
+;CHECK: vsri.64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, <2 x i64> < i64 -64, i64 -64 >)
+ ret <2 x i64> %tmp3
+}
+
+declare <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64>, <1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) nounwind readnone
diff --git a/test/CodeGen/ARM/vshl.ll b/test/CodeGen/ARM/vshl.ll
new file mode 100644
index 0000000..818e71b
--- /dev/null
+++ b/test/CodeGen/ARM/vshl.ll
@@ -0,0 +1,654 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vshls8:
+;CHECK: vshl.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vshls16:
+;CHECK: vshl.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vshls32:
+;CHECK: vshl.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vshls64:
+;CHECK: vshl.s64
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <1 x i64>* %B
+ %tmp3 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+ ret <1 x i64> %tmp3
+}
+
+define <8 x i8> @vshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vshlu8:
+;CHECK: vshl.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vshlu16:
+;CHECK: vshl.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vshlu32:
+;CHECK: vshl.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vshlu64:
+;CHECK: vshl.u64
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <1 x i64>* %B
+ %tmp3 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+ ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vshlQs8:
+;CHECK: vshl.s8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vshlQs16:
+;CHECK: vshl.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vshlQs32:
+;CHECK: vshl.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vshlQs64:
+;CHECK: vshl.s64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @vshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vshlQu8:
+;CHECK: vshl.u8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vshlQu16:
+;CHECK: vshl.u16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vshlQu32:
+;CHECK: vshl.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vshlQu64:
+;CHECK: vshl.u64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+; For left shifts by immediates, the signedness is irrelevant.
+; Test a mix of both signed and unsigned intrinsics.
+
+define <8 x i8> @vshli8(<8 x i8>* %A) nounwind {
+;CHECK: vshli8:
+;CHECK: vshl.i8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vshli16(<4 x i16>* %A) nounwind {
+;CHECK: vshli16:
+;CHECK: vshl.i16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vshli32(<2 x i32>* %A) nounwind {
+;CHECK: vshli32:
+;CHECK: vshl.i32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
+ ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vshli64(<1 x i64>* %A) nounwind {
+;CHECK: vshli64:
+;CHECK: vshl.i64
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
+ ret <1 x i64> %tmp2
+}
+
+define <16 x i8> @vshlQi8(<16 x i8>* %A) nounwind {
+;CHECK: vshlQi8:
+;CHECK: vshl.i8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
+ ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vshlQi16(<8 x i16>* %A) nounwind {
+;CHECK: vshlQi16:
+;CHECK: vshl.i16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vshlQi32(<4 x i32>* %A) nounwind {
+;CHECK: vshlQi32:
+;CHECK: vshl.i32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
+ ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vshlQi64(<2 x i64>* %A) nounwind {
+;CHECK: vshlQi64:
+;CHECK: vshl.i64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
+ ret <2 x i64> %tmp2
+}
+
+; Right shift by immediate:
+
+define <8 x i8> @vshrs8(<8 x i8>* %A) nounwind {
+;CHECK: vshrs8:
+;CHECK: vshr.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vshrs16(<4 x i16>* %A) nounwind {
+;CHECK: vshrs16:
+;CHECK: vshr.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vshrs32(<2 x i32>* %A) nounwind {
+;CHECK: vshrs32:
+;CHECK: vshr.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
+ ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vshrs64(<1 x i64>* %A) nounwind {
+;CHECK: vshrs64:
+;CHECK: vshr.s64
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
+ ret <1 x i64> %tmp2
+}
+
+define <8 x i8> @vshru8(<8 x i8>* %A) nounwind {
+;CHECK: vshru8:
+;CHECK: vshr.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vshru16(<4 x i16>* %A) nounwind {
+;CHECK: vshru16:
+;CHECK: vshr.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vshru32(<2 x i32>* %A) nounwind {
+;CHECK: vshru32:
+;CHECK: vshr.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
+ ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vshru64(<1 x i64>* %A) nounwind {
+;CHECK: vshru64:
+;CHECK: vshr.u64
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
+ ret <1 x i64> %tmp2
+}
+
+define <16 x i8> @vshrQs8(<16 x i8>* %A) nounwind {
+;CHECK: vshrQs8:
+;CHECK: vshr.s8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+ ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vshrQs16(<8 x i16>* %A) nounwind {
+;CHECK: vshrQs16:
+;CHECK: vshr.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vshrQs32(<4 x i32>* %A) nounwind {
+;CHECK: vshrQs32:
+;CHECK: vshr.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
+ ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vshrQs64(<2 x i64>* %A) nounwind {
+;CHECK: vshrQs64:
+;CHECK: vshr.s64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
+ ret <2 x i64> %tmp2
+}
+
+define <16 x i8> @vshrQu8(<16 x i8>* %A) nounwind {
+;CHECK: vshrQu8:
+;CHECK: vshr.u8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+ ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vshrQu16(<8 x i16>* %A) nounwind {
+;CHECK: vshrQu16:
+;CHECK: vshr.u16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vshrQu32(<4 x i32>* %A) nounwind {
+;CHECK: vshrQu32:
+;CHECK: vshr.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
+ ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vshrQu64(<2 x i64>* %A) nounwind {
+;CHECK: vshrQu64:
+;CHECK: vshr.u64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
+ ret <2 x i64> %tmp2
+}
+
+declare <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrshls8:
+;CHECK: vrshl.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrshls16:
+;CHECK: vrshl.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vrshls32:
+;CHECK: vrshl.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vrshls64:
+;CHECK: vrshl.s64
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <1 x i64>* %B
+ %tmp3 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+ ret <1 x i64> %tmp3
+}
+
+define <8 x i8> @vrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrshlu8:
+;CHECK: vrshl.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrshlu16:
+;CHECK: vrshl.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vrshlu32:
+;CHECK: vrshl.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vrshlu64:
+;CHECK: vrshl.u64
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <1 x i64>* %B
+ %tmp3 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+ ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @vrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vrshlQs8:
+;CHECK: vrshl.s8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrshlQs16:
+;CHECK: vrshl.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrshlQs32:
+;CHECK: vrshl.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vrshlQs64:
+;CHECK: vrshl.s64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @vrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vrshlQu8:
+;CHECK: vrshl.u8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrshlQu16:
+;CHECK: vrshl.u16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrshlQu32:
+;CHECK: vrshl.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vrshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vrshlQu64:
+;CHECK: vrshl.u64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @vrshrs8(<8 x i8>* %A) nounwind {
+;CHECK: vrshrs8:
+;CHECK: vrshr.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vrshrs16(<4 x i16>* %A) nounwind {
+;CHECK: vrshrs16:
+;CHECK: vrshr.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vrshrs32(<2 x i32>* %A) nounwind {
+;CHECK: vrshrs32:
+;CHECK: vrshr.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
+ ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vrshrs64(<1 x i64>* %A) nounwind {
+;CHECK: vrshrs64:
+;CHECK: vrshr.s64
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
+ ret <1 x i64> %tmp2
+}
+
+define <8 x i8> @vrshru8(<8 x i8>* %A) nounwind {
+;CHECK: vrshru8:
+;CHECK: vrshr.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vrshru16(<4 x i16>* %A) nounwind {
+;CHECK: vrshru16:
+;CHECK: vrshr.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vrshru32(<2 x i32>* %A) nounwind {
+;CHECK: vrshru32:
+;CHECK: vrshr.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
+ ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vrshru64(<1 x i64>* %A) nounwind {
+;CHECK: vrshru64:
+;CHECK: vrshr.u64
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
+ ret <1 x i64> %tmp2
+}
+
+define <16 x i8> @vrshrQs8(<16 x i8>* %A) nounwind {
+;CHECK: vrshrQs8:
+;CHECK: vrshr.s8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+ ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vrshrQs16(<8 x i16>* %A) nounwind {
+;CHECK: vrshrQs16:
+;CHECK: vrshr.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vrshrQs32(<4 x i32>* %A) nounwind {
+;CHECK: vrshrQs32:
+;CHECK: vrshr.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
+ ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vrshrQs64(<2 x i64>* %A) nounwind {
+;CHECK: vrshrQs64:
+;CHECK: vrshr.s64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
+ ret <2 x i64> %tmp2
+}
+
+define <16 x i8> @vrshrQu8(<16 x i8>* %A) nounwind {
+;CHECK: vrshrQu8:
+;CHECK: vrshr.u8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+ ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vrshrQu16(<8 x i16>* %A) nounwind {
+;CHECK: vrshrQu16:
+;CHECK: vrshr.u16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vrshrQu32(<4 x i32>* %A) nounwind {
+;CHECK: vrshrQu32:
+;CHECK: vrshr.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
+ ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vrshrQu64(<2 x i64>* %A) nounwind {
+;CHECK: vrshrQu64:
+;CHECK: vrshr.u64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
+ ret <2 x i64> %tmp2
+}
+
+declare <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/test/CodeGen/ARM/vshll.ll b/test/CodeGen/ARM/vshll.ll
new file mode 100644
index 0000000..8e85b98
--- /dev/null
+++ b/test/CodeGen/ARM/vshll.ll
@@ -0,0 +1,83 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i16> @vshlls8(<8 x i8>* %A) nounwind {
+;CHECK: vshlls8:
+;CHECK: vshll.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = call <8 x i16> @llvm.arm.neon.vshiftls.v8i16(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vshlls16(<4 x i16>* %A) nounwind {
+;CHECK: vshlls16:
+;CHECK: vshll.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = call <4 x i32> @llvm.arm.neon.vshiftls.v4i32(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
+ ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vshlls32(<2 x i32>* %A) nounwind {
+;CHECK: vshlls32:
+;CHECK: vshll.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = call <2 x i64> @llvm.arm.neon.vshiftls.v2i64(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
+ ret <2 x i64> %tmp2
+}
+
+define <8 x i16> @vshllu8(<8 x i8>* %A) nounwind {
+;CHECK: vshllu8:
+;CHECK: vshll.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = call <8 x i16> @llvm.arm.neon.vshiftlu.v8i16(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vshllu16(<4 x i16>* %A) nounwind {
+;CHECK: vshllu16:
+;CHECK: vshll.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = call <4 x i32> @llvm.arm.neon.vshiftlu.v4i32(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
+ ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vshllu32(<2 x i32>* %A) nounwind {
+;CHECK: vshllu32:
+;CHECK: vshll.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = call <2 x i64> @llvm.arm.neon.vshiftlu.v2i64(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
+ ret <2 x i64> %tmp2
+}
+
+; The following tests use the maximum shift count, so the signedness is
+; irrelevant. Test both signed and unsigned versions.
+define <8 x i16> @vshlli8(<8 x i8>* %A) nounwind {
+;CHECK: vshlli8:
+;CHECK: vshll.i8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = call <8 x i16> @llvm.arm.neon.vshiftls.v8i16(<8 x i8> %tmp1, <8 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >)
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vshlli16(<4 x i16>* %A) nounwind {
+;CHECK: vshlli16:
+;CHECK: vshll.i16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = call <4 x i32> @llvm.arm.neon.vshiftlu.v4i32(<4 x i16> %tmp1, <4 x i16> < i16 16, i16 16, i16 16, i16 16 >)
+ ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vshlli32(<2 x i32>* %A) nounwind {
+;CHECK: vshlli32:
+;CHECK: vshll.i32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = call <2 x i64> @llvm.arm.neon.vshiftls.v2i64(<2 x i32> %tmp1, <2 x i32> < i32 32, i32 32 >)
+ ret <2 x i64> %tmp2
+}
+
+declare <8 x i16> @llvm.arm.neon.vshiftls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vshiftls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vshiftls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vshiftlu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vshiftlu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vshiftlu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vshrn.ll b/test/CodeGen/ARM/vshrn.ll
new file mode 100644
index 0000000..e2544f4
--- /dev/null
+++ b/test/CodeGen/ARM/vshrn.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vshrns8(<8 x i16>* %A) nounwind {
+;CHECK: vshrns8:
+;CHECK: vshrn.i16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vshiftn.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vshrns16(<4 x i32>* %A) nounwind {
+;CHECK: vshrns16:
+;CHECK: vshrn.i32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vshrns32(<2 x i64>* %A) nounwind {
+;CHECK: vshrns32:
+;CHECK: vshrn.i64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vshiftn.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
+ ret <2 x i32> %tmp2
+}
+
+declare <8 x i8> @llvm.arm.neon.vshiftn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vshiftn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vrshrns8(<8 x i16>* %A) nounwind {
+;CHECK: vrshrns8:
+;CHECK: vrshrn.i16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vrshrns16(<4 x i32>* %A) nounwind {
+;CHECK: vrshrns16:
+;CHECK: vrshrn.i32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vrshrns32(<2 x i64>* %A) nounwind {
+;CHECK: vrshrns32:
+;CHECK: vrshrn.i64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
+ ret <2 x i32> %tmp2
+}
+
+declare <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/test/CodeGen/ARM/vsra.ll b/test/CodeGen/ARM/vsra.ll
new file mode 100644
index 0000000..acb672d
--- /dev/null
+++ b/test/CodeGen/ARM/vsra.ll
@@ -0,0 +1,341 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsras8:
+;CHECK: vsra.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = ashr <8 x i8> %tmp2, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+ %tmp4 = add <8 x i8> %tmp1, %tmp3
+ ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsras16:
+;CHECK: vsra.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = ashr <4 x i16> %tmp2, < i16 16, i16 16, i16 16, i16 16 >
+ %tmp4 = add <4 x i16> %tmp1, %tmp3
+ ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsras32:
+;CHECK: vsra.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = ashr <2 x i32> %tmp2, < i32 32, i32 32 >
+ %tmp4 = add <2 x i32> %tmp1, %tmp3
+ ret <2 x i32> %tmp4
+}
+
+define <1 x i64> @vsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vsras64:
+;CHECK: vsra.s64
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <1 x i64>* %B
+ %tmp3 = ashr <1 x i64> %tmp2, < i64 64 >
+ %tmp4 = add <1 x i64> %tmp1, %tmp3
+ ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @vsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vsraQs8:
+;CHECK: vsra.s8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = ashr <16 x i8> %tmp2, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+ %tmp4 = add <16 x i8> %tmp1, %tmp3
+ ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vsraQs16:
+;CHECK: vsra.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = ashr <8 x i16> %tmp2, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
+ %tmp4 = add <8 x i16> %tmp1, %tmp3
+ ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vsraQs32:
+;CHECK: vsra.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = ashr <4 x i32> %tmp2, < i32 32, i32 32, i32 32, i32 32 >
+ %tmp4 = add <4 x i32> %tmp1, %tmp3
+ ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vsraQs64:
+;CHECK: vsra.s64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = ashr <2 x i64> %tmp2, < i64 64, i64 64 >
+ %tmp4 = add <2 x i64> %tmp1, %tmp3
+ ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @vsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsrau8:
+;CHECK: vsra.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = lshr <8 x i8> %tmp2, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+ %tmp4 = add <8 x i8> %tmp1, %tmp3
+ ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsrau16:
+;CHECK: vsra.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = lshr <4 x i16> %tmp2, < i16 16, i16 16, i16 16, i16 16 >
+ %tmp4 = add <4 x i16> %tmp1, %tmp3
+ ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsrau32:
+;CHECK: vsra.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = lshr <2 x i32> %tmp2, < i32 32, i32 32 >
+ %tmp4 = add <2 x i32> %tmp1, %tmp3
+ ret <2 x i32> %tmp4
+}
+
+define <1 x i64> @vsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vsrau64:
+;CHECK: vsra.u64
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <1 x i64>* %B
+ %tmp3 = lshr <1 x i64> %tmp2, < i64 64 >
+ %tmp4 = add <1 x i64> %tmp1, %tmp3
+ ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @vsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vsraQu8:
+;CHECK: vsra.u8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = lshr <16 x i8> %tmp2, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+ %tmp4 = add <16 x i8> %tmp1, %tmp3
+ ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vsraQu16:
+;CHECK: vsra.u16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = lshr <8 x i16> %tmp2, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
+ %tmp4 = add <8 x i16> %tmp1, %tmp3
+ ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vsraQu32:
+;CHECK: vsra.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = lshr <4 x i32> %tmp2, < i32 32, i32 32, i32 32, i32 32 >
+ %tmp4 = add <4 x i32> %tmp1, %tmp3
+ ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vsraQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vsraQu64:
+;CHECK: vsra.u64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = lshr <2 x i64> %tmp2, < i64 64, i64 64 >
+ %tmp4 = add <2 x i64> %tmp1, %tmp3
+ ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @vrsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrsras8:
+;CHECK: vrsra.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp2, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+ %tmp4 = add <8 x i8> %tmp1, %tmp3
+ ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vrsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrsras16:
+;CHECK: vrsra.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp2, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
+ %tmp4 = add <4 x i16> %tmp1, %tmp3
+ ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vrsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vrsras32:
+;CHECK: vrsra.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp2, <2 x i32> < i32 -32, i32 -32 >)
+ %tmp4 = add <2 x i32> %tmp1, %tmp3
+ ret <2 x i32> %tmp4
+}
+
+define <1 x i64> @vrsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vrsras64:
+;CHECK: vrsra.s64
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <1 x i64>* %B
+ %tmp3 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp2, <1 x i64> < i64 -64 >)
+ %tmp4 = add <1 x i64> %tmp1, %tmp3
+ ret <1 x i64> %tmp4
+}
+
+define <8 x i8> @vrsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrsrau8:
+;CHECK: vrsra.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp2, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+ %tmp4 = add <8 x i8> %tmp1, %tmp3
+ ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vrsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrsrau16:
+;CHECK: vrsra.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp2, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
+ %tmp4 = add <4 x i16> %tmp1, %tmp3
+ ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vrsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vrsrau32:
+;CHECK: vrsra.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp2, <2 x i32> < i32 -32, i32 -32 >)
+ %tmp4 = add <2 x i32> %tmp1, %tmp3
+ ret <2 x i32> %tmp4
+}
+
+define <1 x i64> @vrsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vrsrau64:
+;CHECK: vrsra.u64
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <1 x i64>* %B
+ %tmp3 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp2, <1 x i64> < i64 -64 >)
+ %tmp4 = add <1 x i64> %tmp1, %tmp3
+ ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @vrsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vrsraQs8:
+;CHECK: vrsra.s8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp2, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+ %tmp4 = add <16 x i8> %tmp1, %tmp3
+ ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vrsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrsraQs16:
+;CHECK: vrsra.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp2, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
+ %tmp4 = add <8 x i16> %tmp1, %tmp3
+ ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vrsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrsraQs32:
+;CHECK: vrsra.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp2, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
+ %tmp4 = add <4 x i32> %tmp1, %tmp3
+ ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vrsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vrsraQs64:
+;CHECK: vrsra.s64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp2, <2 x i64> < i64 -64, i64 -64 >)
+ %tmp4 = add <2 x i64> %tmp1, %tmp3
+ ret <2 x i64> %tmp4
+}
+
+define <16 x i8> @vrsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vrsraQu8:
+;CHECK: vrsra.u8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp2, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+ %tmp4 = add <16 x i8> %tmp1, %tmp3
+ ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vrsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrsraQu16:
+;CHECK: vrsra.u16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp2, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
+ %tmp4 = add <8 x i16> %tmp1, %tmp3
+ ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vrsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrsraQu32:
+;CHECK: vrsra.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp2, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
+ %tmp4 = add <4 x i32> %tmp1, %tmp3
+ ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vrsraQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vrsraQu64:
+;CHECK: vrsra.u64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp2, <2 x i64> < i64 -64, i64 -64 >)
+ %tmp4 = add <2 x i64> %tmp1, %tmp3
+ ret <2 x i64> %tmp4
+}
+
+declare <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/test/CodeGen/ARM/vst1.ll b/test/CodeGen/ARM/vst1.ll
new file mode 100644
index 0000000..602b124
--- /dev/null
+++ b/test/CodeGen/ARM/vst1.ll
@@ -0,0 +1,93 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define void @vst1i8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst1i8:
+;CHECK: vst1.8
+ %tmp1 = load <8 x i8>* %B
+ call void @llvm.arm.neon.vst1.v8i8(i8* %A, <8 x i8> %tmp1)
+ ret void
+}
+
+define void @vst1i16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst1i16:
+;CHECK: vst1.16
+ %tmp1 = load <4 x i16>* %B
+ call void @llvm.arm.neon.vst1.v4i16(i16* %A, <4 x i16> %tmp1)
+ ret void
+}
+
+define void @vst1i32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst1i32:
+;CHECK: vst1.32
+ %tmp1 = load <2 x i32>* %B
+ call void @llvm.arm.neon.vst1.v2i32(i32* %A, <2 x i32> %tmp1)
+ ret void
+}
+
+define void @vst1f(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst1f:
+;CHECK: vst1.32
+ %tmp1 = load <2 x float>* %B
+ call void @llvm.arm.neon.vst1.v2f32(float* %A, <2 x float> %tmp1)
+ ret void
+}
+
+define void @vst1i64(i64* %A, <1 x i64>* %B) nounwind {
+;CHECK: vst1i64:
+;CHECK: vst1.64
+ %tmp1 = load <1 x i64>* %B
+ call void @llvm.arm.neon.vst1.v1i64(i64* %A, <1 x i64> %tmp1)
+ ret void
+}
+
+define void @vst1Qi8(i8* %A, <16 x i8>* %B) nounwind {
+;CHECK: vst1Qi8:
+;CHECK: vst1.8
+ %tmp1 = load <16 x i8>* %B
+ call void @llvm.arm.neon.vst1.v16i8(i8* %A, <16 x i8> %tmp1)
+ ret void
+}
+
+define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst1Qi16:
+;CHECK: vst1.16
+ %tmp1 = load <8 x i16>* %B
+ call void @llvm.arm.neon.vst1.v8i16(i16* %A, <8 x i16> %tmp1)
+ ret void
+}
+
+define void @vst1Qi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst1Qi32:
+;CHECK: vst1.32
+ %tmp1 = load <4 x i32>* %B
+ call void @llvm.arm.neon.vst1.v4i32(i32* %A, <4 x i32> %tmp1)
+ ret void
+}
+
+define void @vst1Qf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst1Qf:
+;CHECK: vst1.32
+ %tmp1 = load <4 x float>* %B
+ call void @llvm.arm.neon.vst1.v4f32(float* %A, <4 x float> %tmp1)
+ ret void
+}
+
+define void @vst1Qi64(i64* %A, <2 x i64>* %B) nounwind {
+;CHECK: vst1Qi64:
+;CHECK: vst1.64
+ %tmp1 = load <2 x i64>* %B
+ call void @llvm.arm.neon.vst1.v2i64(i64* %A, <2 x i64> %tmp1)
+ ret void
+}
+
+declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>) nounwind
+declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>) nounwind
+declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>) nounwind
+declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>) nounwind
+declare void @llvm.arm.neon.vst1.v1i64(i8*, <1 x i64>) nounwind
+
+declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>) nounwind
+declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>) nounwind
+declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>) nounwind
+declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>) nounwind
+declare void @llvm.arm.neon.vst1.v2i64(i8*, <2 x i64>) nounwind
diff --git a/test/CodeGen/ARM/vst2.ll b/test/CodeGen/ARM/vst2.ll
new file mode 100644
index 0000000..17d6bee
--- /dev/null
+++ b/test/CodeGen/ARM/vst2.ll
@@ -0,0 +1,84 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define void @vst2i8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst2i8:
+;CHECK: vst2.8
+ %tmp1 = load <8 x i8>* %B
+ call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1)
+ ret void
+}
+
+define void @vst2i16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst2i16:
+;CHECK: vst2.16
+ %tmp1 = load <4 x i16>* %B
+ call void @llvm.arm.neon.vst2.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1)
+ ret void
+}
+
+define void @vst2i32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst2i32:
+;CHECK: vst2.32
+ %tmp1 = load <2 x i32>* %B
+ call void @llvm.arm.neon.vst2.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1)
+ ret void
+}
+
+define void @vst2f(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst2f:
+;CHECK: vst2.32
+ %tmp1 = load <2 x float>* %B
+ call void @llvm.arm.neon.vst2.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1)
+ ret void
+}
+
+define void @vst2i64(i64* %A, <1 x i64>* %B) nounwind {
+;CHECK: vst2i64:
+;CHECK: vst1.64
+ %tmp1 = load <1 x i64>* %B
+ call void @llvm.arm.neon.vst2.v1i64(i64* %A, <1 x i64> %tmp1, <1 x i64> %tmp1)
+ ret void
+}
+
+define void @vst2Qi8(i8* %A, <16 x i8>* %B) nounwind {
+;CHECK: vst2Qi8:
+;CHECK: vst2.8
+ %tmp1 = load <16 x i8>* %B
+ call void @llvm.arm.neon.vst2.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1)
+ ret void
+}
+
+define void @vst2Qi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst2Qi16:
+;CHECK: vst2.16
+ %tmp1 = load <8 x i16>* %B
+ call void @llvm.arm.neon.vst2.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1)
+ ret void
+}
+
+define void @vst2Qi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst2Qi32:
+;CHECK: vst2.32
+ %tmp1 = load <4 x i32>* %B
+ call void @llvm.arm.neon.vst2.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1)
+ ret void
+}
+
+define void @vst2Qf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst2Qf:
+;CHECK: vst2.32
+ %tmp1 = load <4 x float>* %B
+ call void @llvm.arm.neon.vst2.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1)
+ ret void
+}
+
+declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>) nounwind
+declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>) nounwind
+declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>) nounwind
+declare void @llvm.arm.neon.vst2.v2f32(i8*, <2 x float>, <2 x float>) nounwind
+declare void @llvm.arm.neon.vst2.v1i64(i8*, <1 x i64>, <1 x i64>) nounwind
+
+declare void @llvm.arm.neon.vst2.v16i8(i8*, <16 x i8>, <16 x i8>) nounwind
+declare void @llvm.arm.neon.vst2.v8i16(i8*, <8 x i16>, <8 x i16>) nounwind
+declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>) nounwind
+declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>) nounwind
diff --git a/test/CodeGen/ARM/vst3.ll b/test/CodeGen/ARM/vst3.ll
new file mode 100644
index 0000000..a831a0c
--- /dev/null
+++ b/test/CodeGen/ARM/vst3.ll
@@ -0,0 +1,88 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst3i8:
+;CHECK: vst3.8
+ %tmp1 = load <8 x i8>* %B
+ call void @llvm.arm.neon.vst3.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1)
+ ret void
+}
+
+define void @vst3i16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst3i16:
+;CHECK: vst3.16
+ %tmp1 = load <4 x i16>* %B
+ call void @llvm.arm.neon.vst3.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1)
+ ret void
+}
+
+define void @vst3i32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst3i32:
+;CHECK: vst3.32
+ %tmp1 = load <2 x i32>* %B
+ call void @llvm.arm.neon.vst3.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1)
+ ret void
+}
+
+define void @vst3f(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst3f:
+;CHECK: vst3.32
+ %tmp1 = load <2 x float>* %B
+ call void @llvm.arm.neon.vst3.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1)
+ ret void
+}
+
+define void @vst3i64(i64* %A, <1 x i64>* %B) nounwind {
+;CHECK: vst3i64:
+;CHECK: vst1.64
+ %tmp1 = load <1 x i64>* %B
+ call void @llvm.arm.neon.vst3.v1i64(i64* %A, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1)
+ ret void
+}
+
+define void @vst3Qi8(i8* %A, <16 x i8>* %B) nounwind {
+;CHECK: vst3Qi8:
+;CHECK: vst3.8
+;CHECK: vst3.8
+ %tmp1 = load <16 x i8>* %B
+ call void @llvm.arm.neon.vst3.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1)
+ ret void
+}
+
+define void @vst3Qi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst3Qi16:
+;CHECK: vst3.16
+;CHECK: vst3.16
+ %tmp1 = load <8 x i16>* %B
+ call void @llvm.arm.neon.vst3.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1)
+ ret void
+}
+
+define void @vst3Qi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst3Qi32:
+;CHECK: vst3.32
+;CHECK: vst3.32
+ %tmp1 = load <4 x i32>* %B
+ call void @llvm.arm.neon.vst3.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1)
+ ret void
+}
+
+define void @vst3Qf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst3Qf:
+;CHECK: vst3.32
+;CHECK: vst3.32
+ %tmp1 = load <4 x float>* %B
+ call void @llvm.arm.neon.vst3.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1)
+ ret void
+}
+
+declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>) nounwind
+declare void @llvm.arm.neon.vst3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>) nounwind
+declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>) nounwind
+declare void @llvm.arm.neon.vst3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>) nounwind
+declare void @llvm.arm.neon.vst3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>) nounwind
+
+declare void @llvm.arm.neon.vst3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>) nounwind
+declare void @llvm.arm.neon.vst3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>) nounwind
+declare void @llvm.arm.neon.vst3.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>) nounwind
+declare void @llvm.arm.neon.vst3.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>) nounwind
diff --git a/test/CodeGen/ARM/vst4.ll b/test/CodeGen/ARM/vst4.ll
new file mode 100644
index 0000000..d92c017
--- /dev/null
+++ b/test/CodeGen/ARM/vst4.ll
@@ -0,0 +1,88 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst4i8:
+;CHECK: vst4.8
+ %tmp1 = load <8 x i8>* %B
+ call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1)
+ ret void
+}
+
+define void @vst4i16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst4i16:
+;CHECK: vst4.16
+ %tmp1 = load <4 x i16>* %B
+ call void @llvm.arm.neon.vst4.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1)
+ ret void
+}
+
+define void @vst4i32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst4i32:
+;CHECK: vst4.32
+ %tmp1 = load <2 x i32>* %B
+ call void @llvm.arm.neon.vst4.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1)
+ ret void
+}
+
+define void @vst4f(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst4f:
+;CHECK: vst4.32
+ %tmp1 = load <2 x float>* %B
+ call void @llvm.arm.neon.vst4.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1)
+ ret void
+}
+
+define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind {
+;CHECK: vst4i64:
+;CHECK: vst1.64
+ %tmp1 = load <1 x i64>* %B
+ call void @llvm.arm.neon.vst4.v1i64(i64* %A, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1)
+ ret void
+}
+
+define void @vst4Qi8(i8* %A, <16 x i8>* %B) nounwind {
+;CHECK: vst4Qi8:
+;CHECK: vst4.8
+;CHECK: vst4.8
+ %tmp1 = load <16 x i8>* %B
+ call void @llvm.arm.neon.vst4.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1)
+ ret void
+}
+
+define void @vst4Qi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst4Qi16:
+;CHECK: vst4.16
+;CHECK: vst4.16
+ %tmp1 = load <8 x i16>* %B
+ call void @llvm.arm.neon.vst4.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1)
+ ret void
+}
+
+define void @vst4Qi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst4Qi32:
+;CHECK: vst4.32
+;CHECK: vst4.32
+ %tmp1 = load <4 x i32>* %B
+ call void @llvm.arm.neon.vst4.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1)
+ ret void
+}
+
+define void @vst4Qf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst4Qf:
+;CHECK: vst4.32
+;CHECK: vst4.32
+ %tmp1 = load <4 x float>* %B
+ call void @llvm.arm.neon.vst4.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1)
+ ret void
+}
+
+declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind
+declare void @llvm.arm.neon.vst4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>) nounwind
+declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>) nounwind
+declare void @llvm.arm.neon.vst4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>) nounwind
+declare void @llvm.arm.neon.vst4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>) nounwind
+
+declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind
+declare void @llvm.arm.neon.vst4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>) nounwind
+declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) nounwind
+declare void @llvm.arm.neon.vst4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>) nounwind
diff --git a/test/CodeGen/ARM/vstlane.ll b/test/CodeGen/ARM/vstlane.ll
new file mode 100644
index 0000000..3bfb14f
--- /dev/null
+++ b/test/CodeGen/ARM/vstlane.ll
@@ -0,0 +1,197 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define void @vst2lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst2lanei8:
+;CHECK: vst2.8
+ %tmp1 = load <8 x i8>* %B
+ call void @llvm.arm.neon.vst2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
+ ret void
+}
+
+define void @vst2lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst2lanei16:
+;CHECK: vst2.16
+ %tmp1 = load <4 x i16>* %B
+ call void @llvm.arm.neon.vst2lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+ ret void
+}
+
+define void @vst2lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst2lanei32:
+;CHECK: vst2.32
+ %tmp1 = load <2 x i32>* %B
+ call void @llvm.arm.neon.vst2lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+ ret void
+}
+
+define void @vst2lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst2lanef:
+;CHECK: vst2.32
+ %tmp1 = load <2 x float>* %B
+ call void @llvm.arm.neon.vst2lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+ ret void
+}
+
+define void @vst2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst2laneQi16:
+;CHECK: vst2.16
+ %tmp1 = load <8 x i16>* %B
+ call void @llvm.arm.neon.vst2lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+ ret void
+}
+
+define void @vst2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst2laneQi32:
+;CHECK: vst2.32
+ %tmp1 = load <4 x i32>* %B
+ call void @llvm.arm.neon.vst2lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2)
+ ret void
+}
+
+define void @vst2laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst2laneQf:
+;CHECK: vst2.32
+ %tmp1 = load <4 x float>* %B
+ call void @llvm.arm.neon.vst2lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, i32 3)
+ ret void
+}
+
+declare void @llvm.arm.neon.vst2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v2f32(i8*, <2 x float>, <2 x float>, i32) nounwind
+
+declare void @llvm.arm.neon.vst2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind
+
+define void @vst3lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst3lanei8:
+;CHECK: vst3.8
+ %tmp1 = load <8 x i8>* %B
+ call void @llvm.arm.neon.vst3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
+ ret void
+}
+
+define void @vst3lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst3lanei16:
+;CHECK: vst3.16
+ %tmp1 = load <4 x i16>* %B
+ call void @llvm.arm.neon.vst3lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+ ret void
+}
+
+define void @vst3lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst3lanei32:
+;CHECK: vst3.32
+ %tmp1 = load <2 x i32>* %B
+ call void @llvm.arm.neon.vst3lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+ ret void
+}
+
+define void @vst3lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst3lanef:
+;CHECK: vst3.32
+ %tmp1 = load <2 x float>* %B
+ call void @llvm.arm.neon.vst3lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+ ret void
+}
+
+define void @vst3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst3laneQi16:
+;CHECK: vst3.16
+ %tmp1 = load <8 x i16>* %B
+ call void @llvm.arm.neon.vst3lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 6)
+ ret void
+}
+
+define void @vst3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst3laneQi32:
+;CHECK: vst3.32
+ %tmp1 = load <4 x i32>* %B
+ call void @llvm.arm.neon.vst3lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0)
+ ret void
+}
+
+define void @vst3laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst3laneQf:
+;CHECK: vst3.32
+ %tmp1 = load <4 x float>* %B
+ call void @llvm.arm.neon.vst3lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+ ret void
+}
+
+declare void @llvm.arm.neon.vst3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) nounwind
+
+declare void @llvm.arm.neon.vst3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32) nounwind
+
+
+define void @vst4lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst4lanei8:
+;CHECK: vst4.8
+ %tmp1 = load <8 x i8>* %B
+ call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
+ ret void
+}
+
+define void @vst4lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst4lanei16:
+;CHECK: vst4.16
+ %tmp1 = load <4 x i16>* %B
+ call void @llvm.arm.neon.vst4lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+ ret void
+}
+
+define void @vst4lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst4lanei32:
+;CHECK: vst4.32
+ %tmp1 = load <2 x i32>* %B
+ call void @llvm.arm.neon.vst4lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+ ret void
+}
+
+define void @vst4lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst4lanef:
+;CHECK: vst4.32
+ %tmp1 = load <2 x float>* %B
+ call void @llvm.arm.neon.vst4lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+ ret void
+}
+
+define void @vst4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst4laneQi16:
+;CHECK: vst4.16
+ %tmp1 = load <8 x i16>* %B
+ call void @llvm.arm.neon.vst4lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7)
+ ret void
+}
+
+define void @vst4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst4laneQi32:
+;CHECK: vst4.32
+ %tmp1 = load <4 x i32>* %B
+ call void @llvm.arm.neon.vst4lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2)
+ ret void
+}
+
+define void @vst4laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst4laneQf:
+;CHECK: vst4.32
+ %tmp1 = load <4 x float>* %B
+ call void @llvm.arm.neon.vst4lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+ ret void
+}
+
+declare void @llvm.arm.neon.vst4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) nounwind
+
+declare void @llvm.arm.neon.vst4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) nounwind
diff --git a/test/CodeGen/ARM/vsub.ll b/test/CodeGen/ARM/vsub.ll
new file mode 100644
index 0000000..8f0055f
--- /dev/null
+++ b/test/CodeGen/ARM/vsub.ll
@@ -0,0 +1,277 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vsubi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsubi8:
+;CHECK: vsub.i8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = sub <8 x i8> %tmp1, %tmp2
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vsubi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsubi16:
+;CHECK: vsub.i16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = sub <4 x i16> %tmp1, %tmp2
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vsubi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsubi32:
+;CHECK: vsub.i32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = sub <2 x i32> %tmp1, %tmp2
+ ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vsubi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vsubi64:
+;CHECK: vsub.i64
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <1 x i64>* %B
+ %tmp3 = sub <1 x i64> %tmp1, %tmp2
+ ret <1 x i64> %tmp3
+}
+
+define <2 x float> @vsubf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vsubf32:
+;CHECK: vsub.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = load <2 x float>* %B
+ %tmp3 = sub <2 x float> %tmp1, %tmp2
+ ret <2 x float> %tmp3
+}
+
+define <16 x i8> @vsubQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vsubQi8:
+;CHECK: vsub.i8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = sub <16 x i8> %tmp1, %tmp2
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vsubQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vsubQi16:
+;CHECK: vsub.i16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = sub <8 x i16> %tmp1, %tmp2
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vsubQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vsubQi32:
+;CHECK: vsub.i32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = sub <4 x i32> %tmp1, %tmp2
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vsubQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vsubQi64:
+;CHECK: vsub.i64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = sub <2 x i64> %tmp1, %tmp2
+ ret <2 x i64> %tmp3
+}
+
+define <4 x float> @vsubQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vsubQf32:
+;CHECK: vsub.f32
+ %tmp1 = load <4 x float>* %A
+ %tmp2 = load <4 x float>* %B
+ %tmp3 = sub <4 x float> %tmp1, %tmp2
+ ret <4 x float> %tmp3
+}
+
+define <8 x i8> @vsubhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vsubhni16:
+;CHECK: vsubhn.i16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vsubhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vsubhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vsubhni32:
+;CHECK: vsubhn.i32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vsubhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vsubhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vsubhni64:
+;CHECK: vsubhn.i64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vsubhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+declare <8 x i8> @llvm.arm.neon.vsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vsubhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vsubhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vrsubhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrsubhni16:
+;CHECK: vrsubhn.i16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vrsubhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrsubhni32:
+;CHECK: vrsubhn.i32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vrsubhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vrsubhni64:
+;CHECK: vrsubhn.i64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+declare <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @vsubls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsubls8:
+;CHECK: vsubl.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vsubls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vsubls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsubls16:
+;CHECK: vsubl.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vsubls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vsubls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsubls32:
+;CHECK: vsubl.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vsubls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @vsublu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsublu8:
+;CHECK: vsubl.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vsublu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vsublu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsublu16:
+;CHECK: vsubl.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vsublu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vsublu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsublu32:
+;CHECK: vsubl.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vsublu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+declare <8 x i16> @llvm.arm.neon.vsubls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vsubls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vsubls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vsublu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vsublu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vsublu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+define <8 x i16> @vsubws8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsubws8:
+;CHECK: vsubw.s8
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vsubws.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vsubws16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsubws16:
+;CHECK: vsubw.s16
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vsubws.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vsubws32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsubws32:
+;CHECK: vsubw.s32
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vsubws.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @vsubwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsubwu8:
+;CHECK: vsubw.u8
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vsubwu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vsubwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsubwu16:
+;CHECK: vsubw.u16
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vsubwu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vsubwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsubwu32:
+;CHECK: vsubw.u32
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vsubwu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+declare <8 x i16> @llvm.arm.neon.vsubws.v8i16(<8 x i16>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vsubws.v4i32(<4 x i32>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vsubws.v2i64(<2 x i64>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vsubwu.v8i16(<8 x i16>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vsubwu.v4i32(<4 x i32>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vsubwu.v2i64(<2 x i64>, <2 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vtbl.ll b/test/CodeGen/ARM/vtbl.ll
new file mode 100644
index 0000000..9264987
--- /dev/null
+++ b/test/CodeGen/ARM/vtbl.ll
@@ -0,0 +1,109 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
+%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
+%struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
+
+define <8 x i8> @vtbl1(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vtbl1:
+;CHECK: vtbl.8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <8 x i8> @vtbl2(<8 x i8>* %A, %struct.__neon_int8x8x2_t* %B) nounwind {
+;CHECK: vtbl2:
+;CHECK: vtbl.8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load %struct.__neon_int8x8x2_t* %B
+ %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
+ %tmp5 = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4)
+ ret <8 x i8> %tmp5
+}
+
+define <8 x i8> @vtbl3(<8 x i8>* %A, %struct.__neon_int8x8x3_t* %B) nounwind {
+;CHECK: vtbl3:
+;CHECK: vtbl.8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load %struct.__neon_int8x8x3_t* %B
+ %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
+ %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
+ %tmp6 = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5)
+ ret <8 x i8> %tmp6
+}
+
+define <8 x i8> @vtbl4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B) nounwind {
+;CHECK: vtbl4:
+;CHECK: vtbl.8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load %struct.__neon_int8x8x4_t* %B
+ %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
+ %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
+ %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
+ %tmp7 = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6)
+ ret <8 x i8> %tmp7
+}
+
+define <8 x i8> @vtbx1(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vtbx1:
+;CHECK: vtbx.8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = load <8 x i8>* %C
+ %tmp4 = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
+ ret <8 x i8> %tmp4
+}
+
+define <8 x i8> @vtbx2(<8 x i8>* %A, %struct.__neon_int8x8x2_t* %B, <8 x i8>* %C) nounwind {
+;CHECK: vtbx2:
+;CHECK: vtbx.8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load %struct.__neon_int8x8x2_t* %B
+ %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
+ %tmp5 = load <8 x i8>* %C
+ %tmp6 = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5)
+ ret <8 x i8> %tmp6
+}
+
+define <8 x i8> @vtbx3(<8 x i8>* %A, %struct.__neon_int8x8x3_t* %B, <8 x i8>* %C) nounwind {
+;CHECK: vtbx3:
+;CHECK: vtbx.8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load %struct.__neon_int8x8x3_t* %B
+ %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
+ %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
+ %tmp6 = load <8 x i8>* %C
+ %tmp7 = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6)
+ ret <8 x i8> %tmp7
+}
+
+define <8 x i8> @vtbx4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B, <8 x i8>* %C) nounwind {
+;CHECK: vtbx4:
+;CHECK: vtbx.8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load %struct.__neon_int8x8x4_t* %B
+ %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
+ %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
+ %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
+ %tmp7 = load <8 x i8>* %C
+ %tmp8 = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6, <8 x i8> %tmp7)
+ ret <8 x i8> %tmp8
+}
+
+declare <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8>, <8 x i8>) nounwind readnone
+declare <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+
+declare <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
diff --git a/test/CodeGen/ARM/vtrn.ll b/test/CodeGen/ARM/vtrn.ll
new file mode 100644
index 0000000..5122b09
--- /dev/null
+++ b/test/CodeGen/ARM/vtrn.ll
@@ -0,0 +1,97 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vtrni8:
+;CHECK: vtrn.8
+;CHECK-NEXT: vadd.i8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+ %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+ %tmp5 = add <8 x i8> %tmp3, %tmp4
+ ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vtrni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vtrni16:
+;CHECK: vtrn.16
+;CHECK-NEXT: vadd.i16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+ %tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+ %tmp5 = add <4 x i16> %tmp3, %tmp4
+ ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @vtrni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vtrni32:
+;CHECK: vtrn.32
+;CHECK-NEXT: vadd.i32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 0, i32 2>
+ %tmp4 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 3>
+ %tmp5 = add <2 x i32> %tmp3, %tmp4
+ ret <2 x i32> %tmp5
+}
+
+define <2 x float> @vtrnf(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vtrnf:
+;CHECK: vtrn.32
+;CHECK-NEXT: vadd.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = load <2 x float>* %B
+ %tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 0, i32 2>
+ %tmp4 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 1, i32 3>
+ %tmp5 = add <2 x float> %tmp3, %tmp4
+ ret <2 x float> %tmp5
+}
+
+define <16 x i8> @vtrnQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vtrnQi8:
+;CHECK: vtrn.8
+;CHECK-NEXT: vadd.i8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+ %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+ %tmp5 = add <16 x i8> %tmp3, %tmp4
+ ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @vtrnQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vtrnQi16:
+;CHECK: vtrn.16
+;CHECK-NEXT: vadd.i16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+ %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+ %tmp5 = add <8 x i16> %tmp3, %tmp4
+ ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vtrnQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vtrnQi32:
+;CHECK: vtrn.32
+;CHECK-NEXT: vadd.i32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+ %tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+ %tmp5 = add <4 x i32> %tmp3, %tmp4
+ ret <4 x i32> %tmp5
+}
+
+define <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vtrnQf:
+;CHECK: vtrn.32
+;CHECK-NEXT: vadd.f32
+ %tmp1 = load <4 x float>* %A
+ %tmp2 = load <4 x float>* %B
+ %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+ %tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+ %tmp5 = add <4 x float> %tmp3, %tmp4
+ ret <4 x float> %tmp5
+}
diff --git a/test/CodeGen/ARM/vuzp.ll b/test/CodeGen/ARM/vuzp.ll
new file mode 100644
index 0000000..e531718
--- /dev/null
+++ b/test/CodeGen/ARM/vuzp.ll
@@ -0,0 +1,75 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vuzpi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vuzpi8:
+;CHECK: vuzp.8
+;CHECK-NEXT: vadd.i8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+ %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+ %tmp5 = add <8 x i8> %tmp3, %tmp4
+ ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vuzpi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vuzpi16:
+;CHECK: vuzp.16
+;CHECK-NEXT: vadd.i16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %tmp5 = add <4 x i16> %tmp3, %tmp4
+ ret <4 x i16> %tmp5
+}
+
+; VUZP.32 is equivalent to VTRN.32 for 64-bit vectors.
+
+define <16 x i8> @vuzpQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vuzpQi8:
+;CHECK: vuzp.8
+;CHECK-NEXT: vadd.i8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+ %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+ %tmp5 = add <16 x i8> %tmp3, %tmp4
+ ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @vuzpQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vuzpQi16:
+;CHECK: vuzp.16
+;CHECK-NEXT: vadd.i16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+ %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+ %tmp5 = add <8 x i16> %tmp3, %tmp4
+ ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vuzpQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vuzpQi32:
+;CHECK: vuzp.32
+;CHECK-NEXT: vadd.i32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %tmp5 = add <4 x i32> %tmp3, %tmp4
+ ret <4 x i32> %tmp5
+}
+
+define <4 x float> @vuzpQf(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vuzpQf:
+;CHECK: vuzp.32
+;CHECK-NEXT: vadd.f32
+ %tmp1 = load <4 x float>* %A
+ %tmp2 = load <4 x float>* %B
+ %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %tmp5 = add <4 x float> %tmp3, %tmp4
+ ret <4 x float> %tmp5
+}
diff --git a/test/CodeGen/ARM/vzip.ll b/test/CodeGen/ARM/vzip.ll
new file mode 100644
index 0000000..32f7e0d
--- /dev/null
+++ b/test/CodeGen/ARM/vzip.ll
@@ -0,0 +1,75 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vzipi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vzipi8:
+;CHECK: vzip.8
+;CHECK-NEXT: vadd.i8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+ %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+ %tmp5 = add <8 x i8> %tmp3, %tmp4
+ ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vzipi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vzipi16:
+;CHECK: vzip.16
+;CHECK-NEXT: vadd.i16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+ %tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+ %tmp5 = add <4 x i16> %tmp3, %tmp4
+ ret <4 x i16> %tmp5
+}
+
+; VZIP.32 is equivalent to VTRN.32 for 64-bit vectors.
+
+define <16 x i8> @vzipQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vzipQi8:
+;CHECK: vzip.8
+;CHECK-NEXT: vadd.i8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+ %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+ %tmp5 = add <16 x i8> %tmp3, %tmp4
+ ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @vzipQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vzipQi16:
+;CHECK: vzip.16
+;CHECK-NEXT: vadd.i16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+ %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+ %tmp5 = add <8 x i16> %tmp3, %tmp4
+ ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vzipQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vzipQi32:
+;CHECK: vzip.32
+;CHECK-NEXT: vadd.i32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+ %tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+ %tmp5 = add <4 x i32> %tmp3, %tmp4
+ ret <4 x i32> %tmp5
+}
+
+define <4 x float> @vzipQf(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vzipQf:
+;CHECK: vzip.32
+;CHECK-NEXT: vadd.f32
+ %tmp1 = load <4 x float>* %A
+ %tmp2 = load <4 x float>* %B
+ %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+ %tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+ %tmp5 = add <4 x float> %tmp3, %tmp4
+ ret <4 x float> %tmp5
+}
diff --git a/test/CodeGen/ARM/weak.ll b/test/CodeGen/ARM/weak.ll
new file mode 100644
index 0000000..5ac4b8c
--- /dev/null
+++ b/test/CodeGen/ARM/weak.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=arm | grep .weak.*f
+; RUN: llc < %s -march=arm | grep .weak.*h
+
+define weak i32 @f() {
+entry:
+ unreachable
+}
+
+define void @g() {
+entry:
+ tail call void @h( )
+ ret void
+}
+
+declare extern_weak void @h()
+
diff --git a/test/CodeGen/ARM/weak2.ll b/test/CodeGen/ARM/weak2.ll
new file mode 100644
index 0000000..cf327bb
--- /dev/null
+++ b/test/CodeGen/ARM/weak2.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=arm | grep .weak
+
+define i32 @f(i32 %a) {
+entry:
+ %tmp2 = icmp eq i32 %a, 0 ; <i1> [#uses=1]
+ %t.0 = select i1 %tmp2, i32 (...)* null, i32 (...)* @test_weak ; <i32 (...)*> [#uses=2]
+ %tmp5 = icmp eq i32 (...)* %t.0, null ; <i1> [#uses=1]
+ br i1 %tmp5, label %UnifiedReturnBlock, label %cond_true8
+
+cond_true8: ; preds = %entry
+ %tmp10 = tail call i32 (...)* %t.0( ) ; <i32> [#uses=1]
+ ret i32 %tmp10
+
+UnifiedReturnBlock: ; preds = %entry
+ ret i32 250
+}
+
+declare extern_weak i32 @test_weak(...)