Support for half in driver/runtime
Bug: 7342860
Add conversion functions, clamp to the C files. Add rsGetElementAt,
rsSetElementAt to ll*/allocation.ll
Change-Id: I71c93029699f9bbc79a5b04b51074e303981ae4b
diff --git a/driver/runtime/build_bc_lib_internal.mk b/driver/runtime/build_bc_lib_internal.mk
index a44461b..b024ecd 100644
--- a/driver/runtime/build_bc_lib_internal.mk
+++ b/driver/runtime/build_bc_lib_internal.mk
@@ -31,7 +31,7 @@
bc_clang := $(RS_DRIVER_CLANG_EXE)
endif
-bc_clang_cc1_cflags :=
+bc_clang_cc1_cflags := -fnative-half-type -fallow-half-arguments-and-returns
ifeq ($(BCC_RS_TRIPLE),armv7-none-linux-gnueabi)
# We need to pass the +long64 flag to the underlying version of Clang, since
# we are generating a library for use with Renderscript (64-bit long type,
diff --git a/driver/runtime/ll32/allocation.ll b/driver/runtime/ll32/allocation.ll
index 1ba8222..bab40c8 100644
--- a/driver/runtime/ll32/allocation.ll
+++ b/driver/runtime/ll32/allocation.ll
@@ -654,6 +654,68 @@
ret void
}
+!61 = !{!"half", !15}
+define void @rsSetElementAtImpl_half([1 x i32] %a.coerce, half %val, i32 %x, i32 %y, i32 %z) #1 {
+ %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 2, i32 %x, i32 %y, i32 %z) #2
+ %2 = bitcast i8* %1 to half*
+ store half %val, half* %2, align 2, !tbaa !61
+ ret void
+}
+
+define half @rsGetElementAtImpl_half([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
+ %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 2, i32 %x, i32 %y, i32 %z) #2
+ %2 = bitcast i8* %1 to half*
+ %3 = load half, half* %2, align 2, !tbaa !61
+ ret half %3
+}
+
+!62 = !{!"half2", !15}
+define void @rsSetElementAtImpl_half2([1 x i32] %a.coerce, <2 x half> %val, i32 %x, i32 %y, i32 %z) #1 {
+ %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2
+ %2 = bitcast i8* %1 to <2 x half>*
+ store <2 x half> %val, <2 x half>* %2, align 4, !tbaa !62
+ ret void
+}
+
+define <2 x half> @rsGetElementAtImpl_half2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
+ %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2
+ %2 = bitcast i8* %1 to <2 x half>*
+ %3 = load <2 x half>, <2 x half>* %2, align 4, !tbaa !62
+ ret <2 x half> %3
+}
+
+!63 = !{!"half3", !15}
+define void @rsSetElementAtImpl_half3([1 x i32] %a.coerce, <3 x half> %val, i32 %x, i32 %y, i32 %z) #1 {
+ %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2
+ %2 = shufflevector <3 x half> %val, <3 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+ %3 = bitcast i8* %1 to <4 x half>*
+ store <4 x half> %2, <4 x half>* %3, align 8, !tbaa !63
+ ret void
+}
+
+define void @rsGetElementAtImpl_half3(<3 x half>* noalias nocapture sret %agg.result, [1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #1 {
+ %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 32, i32 %x, i32 %y, i32 %z) #2
+ %2 = bitcast i8* %1 to <4 x half>*
+ %3 = load <4 x half>, <4 x half>* %2, align 8
+ %4 = bitcast <3 x half>* %agg.result to <4 x half>*
+ store <4 x half> %3, <4 x half>* %4, align 8, !tbaa !63
+ ret void
+}
+
+!64 = !{!"half4", !15}
+define void @rsSetElementAtImpl_half4([1 x i32] %a.coerce, <4 x half> %val, i32 %x, i32 %y, i32 %z) #1 {
+ %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2
+ %2 = bitcast i8* %1 to <4 x half>*
+ store <4 x half> %val, <4 x half>* %2, align 8, !tbaa !64
+ ret void
+}
+
+define <4 x half> @rsGetElementAtImpl_half4([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
+ %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2
+ %2 = bitcast i8* %1 to <4 x half>*
+ %3 = load <4 x half>, <4 x half>* %2, align 8, !tbaa !64
+ ret <4 x half> %3
+}
define void @__rsAllocationVLoadXImpl_long4(<4 x i64>* noalias nocapture sret %agg.result, [1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #1 {
%1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
diff --git a/driver/runtime/ll64/allocation.ll b/driver/runtime/ll64/allocation.ll
index adb385c..ad18874 100644
--- a/driver/runtime/ll64/allocation.ll
+++ b/driver/runtime/ll64/allocation.ll
@@ -669,6 +669,69 @@
ret void
}
+!61 = !{!"half", !15}
+define void @rsSetElementAtImpl_half(%struct.rs_allocation* nocapture readonly %a.coerce, half %val, i32 %x, i32 %y, i32 %z) #1 {
+ %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a.coerce, i32 2, i32 %x, i32 %y, i32 %z) #2
+ %2 = bitcast i8* %1 to half*
+ store half %val, half* %2, align 2, !tbaa !61
+ ret void
+}
+
+define half @rsGetElementAtImpl_half(%struct.rs_allocation* nocapture readonly %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
+ %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a.coerce, i32 2, i32 %x, i32 %y, i32 %z) #2
+ %2 = bitcast i8* %1 to half*
+ %3 = load half, half* %2, align 2, !tbaa !61
+ ret half %3
+}
+
+!62 = !{!"half2", !15}
+define void @rsSetElementAtImpl_half2(%struct.rs_allocation* nocapture readonly %a.coerce, <2 x half> %val, i32 %x, i32 %y, i32 %z) #1 {
+ %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2
+ %2 = bitcast i8* %1 to <2 x half>*
+ store <2 x half> %val, <2 x half>* %2, align 4, !tbaa !62
+ ret void
+}
+
+define <2 x half> @rsGetElementAtImpl_half2(%struct.rs_allocation* nocapture readonly %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
+ %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2
+ %2 = bitcast i8* %1 to <2 x half>*
+ %3 = load <2 x half>, <2 x half>* %2, align 4, !tbaa !62
+ ret <2 x half> %3
+}
+
+!63 = !{!"half3", !15}
+define void @rsSetElementAtImpl_half3(%struct.rs_allocation* nocapture readonly %a.coerce, <3 x half> %val, i32 %x, i32 %y, i32 %z) #1 {
+ %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2
+ %2 = shufflevector <3 x half> %val, <3 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+ %3 = bitcast i8* %1 to <4 x half>*
+ store <4 x half> %2, <4 x half>* %3, align 8, !tbaa !63
+ ret void
+}
+
+define void @rsGetElementAtImpl_half3(<3 x half>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a.coerce, i32 %x, i32 %y, i32 %z) #1 {
+ %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a.coerce, i32 32, i32 %x, i32 %y, i32 %z) #2
+ %2 = bitcast i8* %1 to <4 x half>*
+ %3 = load <4 x half>, <4 x half>* %2, align 8
+ %4 = bitcast <3 x half>* %agg.result to <4 x half>*
+ store <4 x half> %3, <4 x half>* %4, align 8, !tbaa !63
+ ret void
+}
+
+!64 = !{!"half4", !15}
+define void @rsSetElementAtImpl_half4(%struct.rs_allocation* nocapture readonly %a.coerce, <4 x half> %val, i32 %x, i32 %y, i32 %z) #1 {
+ %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2
+ %2 = bitcast i8* %1 to <4 x half>*
+ store <4 x half> %val, <4 x half>* %2, align 8, !tbaa !64
+ ret void
+}
+
+define <4 x half> @rsGetElementAtImpl_half4(%struct.rs_allocation* nocapture readonly %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
+ %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2
+ %2 = bitcast i8* %1 to <4 x half>*
+ %3 = load <4 x half>, <4 x half>* %2, align 8, !tbaa !64
+ ret <4 x half> %3
+}
+
define void @__rsAllocationVLoadXImpl_long4(<4 x i64>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 {
%1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
diff --git a/driver/runtime/rs_allocation.c b/driver/runtime/rs_allocation.c
index 7755e97..8c8d1ba 100644
--- a/driver/runtime/rs_allocation.c
+++ b/driver/runtime/rs_allocation.c
@@ -272,6 +272,10 @@
ELEMENT_AT(ulong2)
ELEMENT_AT(ulong3)
ELEMENT_AT(ulong4)
+ELEMENT_AT(half)
+ELEMENT_AT(half2)
+ELEMENT_AT(half3)
+ELEMENT_AT(half4)
ELEMENT_AT(float)
ELEMENT_AT(float2)
ELEMENT_AT(float3)