Add support for 2D strided copies to/from an allocation with the C++ API.

Change-Id: I55cd7512f683f8d36d2b75f894931fd0657521bc
diff --git a/driver/rsdAllocation.cpp b/driver/rsdAllocation.cpp
index 928f777..82d87c2 100644
--- a/driver/rsdAllocation.cpp
+++ b/driver/rsdAllocation.cpp
@@ -582,11 +582,14 @@
 
 void rsdAllocationData2D(const Context *rsc, const Allocation *alloc,
                          uint32_t xoff, uint32_t yoff, uint32_t lod, RsAllocationCubemapFace face,
-                         uint32_t w, uint32_t h, const void *data, size_t sizeBytes) {
+                         uint32_t w, uint32_t h, const void *data, size_t sizeBytes, size_t stride) {
     DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv;
 
     uint32_t eSize = alloc->mHal.state.elementSizeBytes;
     uint32_t lineSize = eSize * w;
+    if (!stride) {
+        stride = lineSize;
+    }
 
     if (alloc->mHal.drvState.lod[0].mallocPtr) {
         const uint8_t *src = static_cast<const uint8_t *>(data);
@@ -598,7 +601,7 @@
                 alloc->decRefs(dst, w);
             }
             memcpy(dst, src, lineSize);
-            src += lineSize;
+            src += stride;
             dst += alloc->mHal.drvState.lod[lod].stride;
         }
         drv->uploadDeferred = true;
@@ -623,10 +626,13 @@
 }
 
 void rsdAllocationRead2D(const Context *rsc, const Allocation *alloc,
-                         uint32_t xoff, uint32_t yoff, uint32_t lod, RsAllocationCubemapFace face,
-                         uint32_t w, uint32_t h, void *data, size_t sizeBytes) {
+                                uint32_t xoff, uint32_t yoff, uint32_t lod, RsAllocationCubemapFace face,
+                                uint32_t w, uint32_t h, void *data, size_t sizeBytes, size_t stride) {
     uint32_t eSize = alloc->mHal.state.elementSizeBytes;
     uint32_t lineSize = eSize * w;
+    if (!stride) {
+        stride = lineSize;
+    }
 
     if (alloc->mHal.drvState.lod[0].mallocPtr) {
         uint8_t *dst = static_cast<uint8_t *>(data);
@@ -634,7 +640,7 @@
 
         for (uint32_t line=yoff; line < (yoff+h); line++) {
             memcpy(dst, src, lineSize);
-            dst += lineSize;
+            dst += stride;
             src += alloc->mHal.drvState.lod[lod].stride;
         }
     } else {
@@ -642,6 +648,7 @@
     }
 }
 
+
 void rsdAllocationRead3D(const Context *rsc, const Allocation *alloc,
                          uint32_t xoff, uint32_t yoff, uint32_t zoff,
                          uint32_t lod, RsAllocationCubemapFace face,