Add AllocationCreateStrided to support lib CPU driver

 - Able to create Allocations with arbitrary alignment requirement,
   making Incremental Intrinsic Support able to run with different
   native GPU RS runtime.
 - Make compat mode CPU driver using an additional rs_compat.spec.
 - Add a compat mode only USAGE_INCREMENTAL_SUPPORT.
 - Add AllocationCreateStrided() to take an Alignment requirement (power
   of 2). Only enbled when detect USAGE_INCREMENTAL_SUPPORT.

Change-Id: I66f913c3a2474f93af5a244c0c84460a7a395e71
diff --git a/cpp/rsDispatch.h b/cpp/rsDispatch.h
index 740457a..9ce97c8 100644
--- a/cpp/rsDispatch.h
+++ b/cpp/rsDispatch.h
@@ -42,6 +42,7 @@
 typedef void (*ContextDeinitToClientFnPtr) (RsContext);
 typedef RsType (*TypeCreateFnPtr) (RsContext, RsElement, uint32_t, uint32_t, uint32_t, bool, bool, uint32_t);
 typedef RsAllocation (*AllocationCreateTypedFnPtr) (RsContext, RsType, RsAllocationMipmapControl, uint32_t, uintptr_t);
+typedef RsAllocation (*AllocationCreateStridedFnPtr) (RsContext, RsType, RsAllocationMipmapControl, uint32_t, uintptr_t, size_t);
 typedef RsAllocation (*AllocationCreateFromBitmapFnPtr) (RsContext, RsType, RsAllocationMipmapControl, const void*, size_t, uint32_t);
 typedef RsAllocation (*AllocationCubeCreateFromBitmapFnPtr) (RsContext, RsType, RsAllocationMipmapControl, const void*, size_t, uint32_t);
 typedef RsNativeWindow (*AllocationGetSurfaceFnPtr) (RsContext, RsAllocation);
@@ -181,6 +182,7 @@
     AllocationIoSendFnPtr AllocationIoSend;
     AllocationIoReceiveFnPtr AllocationIoReceive;
     AllocationGetPointerFnPtr AllocationGetPointer;
+    AllocationCreateStridedFnPtr AllocationCreateStrided;
 };
 
 bool loadSymbols(void* handle, dispatchTable& dispatchTab, int device_api = 0);
diff --git a/driver/rsdAllocation.cpp b/driver/rsdAllocation.cpp
index 790005d..a3203a7 100644
--- a/driver/rsdAllocation.cpp
+++ b/driver/rsdAllocation.cpp
@@ -302,16 +302,15 @@
     return uvSize;
 }
 
-
 static size_t AllocationBuildPointerTable(const Context *rsc, const Allocation *alloc,
-        const Type *type, uint8_t *ptr) {
+                                          const Type *type, uint8_t *ptr, size_t requiredAlignment) {
     alloc->mHal.drvState.lod[0].dimX = type->getDimX();
     alloc->mHal.drvState.lod[0].dimY = type->getDimY();
     alloc->mHal.drvState.lod[0].dimZ = type->getDimZ();
     alloc->mHal.drvState.lod[0].mallocPtr = 0;
-    // Stride needs to be 16-byte aligned too!
+    // Stride needs to be aligned to a boundary defined by requiredAlignment!
     size_t stride = alloc->mHal.drvState.lod[0].dimX * type->getElementSizeBytes();
-    alloc->mHal.drvState.lod[0].stride = rsRound(stride, 16);
+    alloc->mHal.drvState.lod[0].stride = rsRound(stride, requiredAlignment);
     alloc->mHal.drvState.lodCount = type->getLODCount();
     alloc->mHal.drvState.faceCount = type->getDimFaces();
 
@@ -335,7 +334,7 @@
             alloc->mHal.drvState.lod[lod].dimY = ty;
             alloc->mHal.drvState.lod[lod].dimZ = tz;
             alloc->mHal.drvState.lod[lod].stride =
-                    rsRound(tx * type->getElementSizeBytes(), 16);
+                    rsRound(tx * type->getElementSizeBytes(), requiredAlignment);
             offsets[lod] = o;
             o += alloc->mHal.drvState.lod[lod].stride * rsMax(ty, 1u) * rsMax(tz, 1u);
             if (tx > 1) tx >>= 1;
@@ -359,9 +358,14 @@
     return allocSize;
 }
 
-static uint8_t* allocAlignedMemory(size_t allocSize, bool forceZero) {
-    // We align all allocations to a 16-byte boundary.
-    uint8_t* ptr = (uint8_t *)memalign(16, allocSize);
+static size_t AllocationBuildPointerTable(const Context *rsc, const Allocation *alloc,
+                                          const Type *type, uint8_t *ptr) {
+    return AllocationBuildPointerTable(rsc, alloc, type, ptr, Allocation::kMinimumRSAlignment);
+}
+
+static uint8_t* allocAlignedMemory(size_t allocSize, bool forceZero, size_t requiredAlignment) {
+    // We align all allocations to a boundary defined by requiredAlignment.
+    uint8_t* ptr = (uint8_t *)memalign(requiredAlignment, allocSize);
     if (!ptr) {
         return nullptr;
     }
@@ -371,15 +375,20 @@
     return ptr;
 }
 
-bool rsdAllocationInit(const Context *rsc, Allocation *alloc, bool forceZero) {
+bool rsdAllocationInitStrided(const Context *rsc, Allocation *alloc, bool forceZero, size_t requiredAlignment) {
     DrvAllocation *drv = (DrvAllocation *)calloc(1, sizeof(DrvAllocation));
     if (!drv) {
         return false;
     }
     alloc->mHal.drv = drv;
 
+    // Check if requiredAlignment is power of 2, also requiredAlignment should be larger or equal than kMinimumRSAlignment.
+    if ((requiredAlignment & (requiredAlignment-1)) != 0 || requiredAlignment < Allocation::kMinimumRSAlignment) {
+        ALOGE("requiredAlignment must be power of 2");
+        return false;
+    }
     // Calculate the object size.
-    size_t allocSize = AllocationBuildPointerTable(rsc, alloc, alloc->getType(), nullptr);
+    size_t allocSize = AllocationBuildPointerTable(rsc, alloc, alloc->getType(), nullptr, requiredAlignment);
 
     uint8_t * ptr = nullptr;
     if (alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_IO_OUTPUT) {
@@ -387,6 +396,20 @@
     } else if (alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_IO_INPUT) {
         // Allocation is allocated when the surface is created
         // in getSurface
+#ifdef RS_COMPATIBILITY_LIB
+    } else if (alloc->mHal.state.usageFlags == (RS_ALLOCATION_USAGE_INCREMENTAL_SUPPORT | RS_ALLOCATION_USAGE_SHARED)) {
+        if (alloc->mHal.state.userProvidedPtr == nullptr) {
+            ALOGE("User-backed buffer pointer cannot be null");
+            return false;
+        }
+        if (alloc->getType()->getDimLOD() || alloc->getType()->getDimFaces()) {
+            ALOGE("User-allocated buffers must not have multiple faces or LODs");
+            return false;
+        }
+
+        drv->useUserProvidedPtr = true;
+        ptr = (uint8_t*)alloc->mHal.state.userProvidedPtr;
+#endif
     } else if (alloc->mHal.state.userProvidedPtr != nullptr) {
         // user-provided allocation
         // limitations: no faces, no LOD, USAGE_SCRIPT or SCRIPT+TEXTURE only
@@ -400,13 +423,13 @@
             return false;
         }
 
-        // rows must be 16-byte aligned
+        // rows must be aligned based on requiredAlignment.
         // validate that here, otherwise fall back to not use the user-backed allocation
-        if (((alloc->getType()->getDimX() * alloc->getType()->getElement()->getSizeBytes()) % 16) != 0) {
+        if (((alloc->getType()->getDimX() * alloc->getType()->getElement()->getSizeBytes()) % requiredAlignment) != 0) {
             ALOGV("User-backed allocation failed stride requirement, falling back to separate allocation");
             drv->useUserProvidedPtr = false;
 
-            ptr = allocAlignedMemory(allocSize, forceZero);
+            ptr = allocAlignedMemory(allocSize, forceZero, requiredAlignment);
             if (!ptr) {
                 alloc->mHal.drv = nullptr;
                 free(drv);
@@ -418,7 +441,7 @@
             ptr = (uint8_t*)alloc->mHal.state.userProvidedPtr;
         }
     } else {
-        ptr = allocAlignedMemory(allocSize, forceZero);
+        ptr = allocAlignedMemory(allocSize, forceZero, requiredAlignment);
         if (!ptr) {
             alloc->mHal.drv = nullptr;
             free(drv);
@@ -426,7 +449,7 @@
         }
     }
     // Build the pointer tables
-    size_t verifySize = AllocationBuildPointerTable(rsc, alloc, alloc->getType(), ptr);
+    size_t verifySize = AllocationBuildPointerTable(rsc, alloc, alloc->getType(), ptr, requiredAlignment);
     if(allocSize != verifySize) {
         rsAssert(!"Size mismatch");
     }
@@ -476,6 +499,10 @@
     return true;
 }
 
+bool rsdAllocationInit(const Context *rsc, Allocation *alloc, bool forceZero) {
+    return rsdAllocationInitStrided(rsc, alloc, forceZero, Allocation::kMinimumRSAlignment);
+}
+
 void rsdAllocationAdapterOffset(const Context *rsc, const Allocation *alloc) {
     //ALOGE("rsdAllocationAdapterOffset");
 
diff --git a/driver/rsdAllocation.h b/driver/rsdAllocation.h
index eff5e30..e51bef4 100644
--- a/driver/rsdAllocation.h
+++ b/driver/rsdAllocation.h
@@ -84,6 +84,11 @@
 bool rsdAllocationInit(const android::renderscript::Context *rsc,
                        android::renderscript::Allocation *alloc,
                        bool forceZero);
+#ifdef RS_COMPATIBILITY_LIB
+bool rsdAllocationInitStrided(const android::renderscript::Context *rsc,
+                              android::renderscript::Allocation *alloc,
+                              bool forceZero, size_t requiredAlignment);
+#endif
 bool rsdAllocationAdapterInit(const android::renderscript::Context *rsc,
                               android::renderscript::Allocation *alloc);
 void rsdAllocationDestroy(const android::renderscript::Context *rsc,
diff --git a/driver/rsdCore.cpp b/driver/rsdCore.cpp
index 3933d98..1fcfcc2 100644
--- a/driver/rsdCore.cpp
+++ b/driver/rsdCore.cpp
@@ -156,6 +156,10 @@
         fnPtr[0] = (void *)rsdAllocationAdapterOffset; break;
     case RS_HAL_ALLOCATION_GET_POINTER:
         fnPtr[0] = (void *)nullptr; break;
+#ifdef RS_COMPATIBILITY_LIB
+    case RS_HAL_ALLOCATION_INIT_STRIDED:
+        fnPtr[0] = (void *)rsdAllocationInitStrided; break;
+#endif
 
     case RS_HAL_SAMPLER_INIT:
         fnPtr[0] = (void *)rsdSamplerInit; break;
diff --git a/rsAllocation.cpp b/rsAllocation.cpp
index f024b7d..474ffda 100644
--- a/rsAllocation.cpp
+++ b/rsAllocation.cpp
@@ -59,8 +59,9 @@
     }
 }
 
-Allocation * Allocation::createAllocation(Context *rsc, const Type *type, uint32_t usages,
-                              RsAllocationMipmapControl mc, void * ptr) {
+Allocation * Allocation::createAllocationStrided(Context *rsc, const Type *type, uint32_t usages,
+                                                 RsAllocationMipmapControl mc, void * ptr,
+                                                 size_t requiredAlignment) {
     // Allocation objects must use allocator specified by the driver
     void* allocMem = rsc->mHal.funcs.allocRuntimeMem(sizeof(Allocation), 0);
 
@@ -79,6 +80,11 @@
             rsc->setError(RS_ERROR_FATAL_DRIVER, "Allocation Init called with USAGE_OEM but driver does not support it");
             return nullptr;
         }
+#ifdef RS_COMPATIBILITY_LIB
+    } else if (usages & RS_ALLOCATION_USAGE_INCREMENTAL_SUPPORT){
+        a = new (allocMem) Allocation(rsc, type, usages, mc, ptr);
+        success = rsc->mHal.funcs.allocation.initStrided(rsc, a, type->getElement()->getHasReferences(), requiredAlignment);
+#endif
     } else {
         a = new (allocMem) Allocation(rsc, type, usages, mc, ptr);
         success = rsc->mHal.funcs.allocation.init(rsc, a, type->getElement()->getHasReferences());
@@ -93,6 +99,11 @@
     return a;
 }
 
+Allocation * Allocation::createAllocation(Context *rsc, const Type *type, uint32_t usages,
+                              RsAllocationMipmapControl mc, void * ptr) {
+    return Allocation::createAllocationStrided(rsc, type, usages, mc, ptr, kMinimumRSAlignment);
+}
+
 Allocation * Allocation::createAdapter(Context *rsc, const Allocation *alloc, const Type *type) {
     // Allocation objects must use allocator specified by the driver
     void* allocMem = rsc->mHal.funcs.allocRuntimeMem(sizeof(Allocation), 0);
@@ -726,6 +737,19 @@
     return alloc;
 }
 
+RsAllocation rsi_AllocationCreateStrided(Context *rsc, RsType vtype,
+                                         RsAllocationMipmapControl mipmaps,
+                                         uint32_t usages, uintptr_t ptr,
+                                         size_t requiredAlignment) {
+    Allocation * alloc = Allocation::createAllocationStrided(rsc, static_cast<Type *>(vtype), usages, mipmaps,
+                                                             (void*)ptr, requiredAlignment);
+    if (!alloc) {
+        return nullptr;
+    }
+    alloc->incUserRef();
+    return alloc;
+}
+
 RsAllocation rsi_AllocationCreateFromBitmap(Context *rsc, RsType vtype,
                                             RsAllocationMipmapControl mipmaps,
                                             const void *data, size_t sizeBytes, uint32_t usages) {
diff --git a/rsAllocation.h b/rsAllocation.h
index 6ffe05d..a1b6c8f 100644
--- a/rsAllocation.h
+++ b/rsAllocation.h
@@ -48,6 +48,8 @@
 
 public:
     const static int MAX_LOD = 16;
+    // The mininum alignment requirement for RenderScript. Must be power of 2 and larger than 0.
+    const static size_t kMinimumRSAlignment = 16;
 
     struct Hal {
         void * drv;
@@ -111,6 +113,9 @@
     static Allocation * createAllocation(Context *rsc, const Type *, uint32_t usages,
                                          RsAllocationMipmapControl mc = RS_ALLOCATION_MIPMAP_NONE,
                                          void *ptr = 0);
+    static Allocation * createAllocationStrided(Context *rsc, const Type *, uint32_t usages,
+                                                RsAllocationMipmapControl mc = RS_ALLOCATION_MIPMAP_NONE,
+                                                void *ptr = 0, size_t byteAligned = 16);
     static Allocation * createAdapter(Context *rsc, const Allocation *alloc, const Type *type);
 
 
diff --git a/rsDefines.h b/rsDefines.h
index 0c6f8df..6e38fdb 100644
--- a/rsDefines.h
+++ b/rsDefines.h
@@ -94,6 +94,7 @@
     RS_ALLOCATION_USAGE_IO_OUTPUT = 0x0040,
     RS_ALLOCATION_USAGE_SHARED = 0x0080,
 
+    RS_ALLOCATION_USAGE_INCREMENTAL_SUPPORT = 0x1000,
     RS_ALLOCATION_USAGE_OEM = 0x8000,
     RS_ALLOCATION_USAGE_ALL = 0x80FF
 };
diff --git a/rsDriverLoader.cpp b/rsDriverLoader.cpp
index 7b728b6..125a6df 100644
--- a/rsDriverLoader.cpp
+++ b/rsDriverLoader.cpp
@@ -109,6 +109,9 @@
     ret &= fn(RS_HAL_ALLOCATION_UPDATE_CACHED_OBJECT, (void **)&rsc->mHal.funcs.allocation.updateCachedObject);
     ret &= fn(RS_HAL_ALLOCATION_ADAPTER_OFFSET, (void **)&rsc->mHal.funcs.allocation.adapterOffset);
     ret &= fn(RS_HAL_ALLOCATION_GET_POINTER, (void **)&rsc->mHal.funcs.allocation.getPointer);
+#ifdef RS_COMPATIBILITY_LIB
+    ret &= fn(RS_HAL_ALLOCATION_INIT_STRIDED, (void **)&rsc->mHal.funcs.allocation.initStrided);
+#endif
 
     ret &= fn(RS_HAL_SAMPLER_INIT, (void **)&rsc->mHal.funcs.sampler.init);
     ret &= fn(RS_HAL_SAMPLER_DESTROY, (void **)&rsc->mHal.funcs.sampler.destroy);
diff --git a/rs_compat.spec b/rs_compat.spec
new file mode 100644
index 0000000..5efda24
--- /dev/null
+++ b/rs_compat.spec
@@ -0,0 +1,9 @@
+AllocationCreateStrided {
+    direct
+    param RsType vtype
+    param RsAllocationMipmapControl mipmaps
+    param uint32_t usages
+    param uintptr_t ptr
+    param size_t requiredAlignment
+    ret RsAllocation
+}
diff --git a/rs_hal.h b/rs_hal.h
index c500e5a..6bc7d7a 100644
--- a/rs_hal.h
+++ b/rs_hal.h
@@ -279,6 +279,9 @@
         void (*getPointer)(const Context *rsc, const Allocation *alloc,
                            uint32_t lod, RsAllocationCubemapFace face,
                            uint32_t z, uint32_t array);
+#ifdef RS_COMPATIBILITY_LIB
+        bool (*initStrided)(const Context *rsc, Allocation *alloc, bool forceZero, size_t requiredAlignment);
+#endif
     } allocation;
 
     struct {
@@ -407,6 +410,9 @@
     RS_HAL_ALLOCATION_ADAPTER_OFFSET                        = 2025,
     RS_HAL_ALLOCATION_INIT_OEM                              = 2026,
     RS_HAL_ALLOCATION_GET_POINTER                           = 2027,
+#ifdef RS_COMPATIBILITY_LIB
+    RS_HAL_ALLOCATION_INIT_STRIDED                          = 2999,
+#endif
 
     RS_HAL_SAMPLER_INIT                                     = 3000,
     RS_HAL_SAMPLER_DESTROY                                  = 3001,