Add support for 2D strided copies to/from an allocation with the C++ API.
Change-Id: I55cd7512f683f8d36d2b75f894931fd0657521bc
diff --git a/cpp/Allocation.cpp b/cpp/Allocation.cpp
index 2e3597e..d6dfa94 100644
--- a/cpp/Allocation.cpp
+++ b/cpp/Allocation.cpp
@@ -221,7 +221,7 @@
const void *data) {
validate2DRange(xoff, yoff, w, h);
rsAllocation2DData(mRS->getContext(), getIDSafe(), xoff, yoff, mSelectedLOD, mSelectedFace,
- w, h, data, w * h * mType->getElement()->getSizeBytes());
+ w, h, data, w * h * mType->getElement()->getSizeBytes(), w * mType->getElement()->getSizeBytes());
}
void Allocation::copy2DRangeFrom(uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h,
@@ -237,9 +237,32 @@
void* data) {
validate2DRange(xoff, yoff, w, h);
rsAllocation2DRead(mRS->getContext(), getIDSafe(), xoff, yoff, mSelectedLOD, mSelectedFace,
- w, h, data, w * h * mType->getElement()->getSizeBytes());
+ w, h, data, w * h * mType->getElement()->getSizeBytes(), w * mType->getElement()->getSizeBytes());
}
+void Allocation::copy2DStridedFrom(uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h,
+ const void *data, size_t stride) {
+ validate2DRange(xoff, yoff, w, h);
+ rsAllocation2DData(mRS->getContext(), getIDSafe(), xoff, yoff, mSelectedLOD, mSelectedFace,
+ w, h, data, w * h * mType->getElement()->getSizeBytes(), stride);
+}
+
+void Allocation::copy2DStridedFrom(const void* data, size_t stride) {
+ copy2DStridedFrom(0, 0, mCurrentDimX, mCurrentDimY, data, stride);
+}
+
+void Allocation::copy2DStridedTo(uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h,
+ void *data, size_t stride) {
+ validate2DRange(xoff, yoff, w, h);
+ rsAllocation2DRead(mRS->getContext(), getIDSafe(), xoff, yoff, mSelectedLOD, mSelectedFace,
+ w, h, data, w * h * mType->getElement()->getSizeBytes(), stride);
+}
+
+void Allocation::copy2DStridedTo(void* data, size_t stride) {
+ copy2DStridedTo(0, 0, mCurrentDimX, mCurrentDimY, data, stride);
+}
+
+
/*
void resize(int dimX) {
if ((mType.getY() > 0)|| (mType.getZ() > 0) || mType.hasFaces() || mType.hasMipmaps()) {
diff --git a/cpp/rsCppStructs.h b/cpp/rsCppStructs.h
index 8d3a9af..a430c35 100644
--- a/cpp/rsCppStructs.h
+++ b/cpp/rsCppStructs.h
@@ -238,6 +238,14 @@
void copy2DRangeFrom(uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h,
sp<const Allocation> data, uint32_t dataXoff, uint32_t dataYoff);
+ void copy2DStridedFrom(uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h,
+ const void *data, size_t stride);
+ void copy2DStridedFrom(const void *data, size_t stride);
+
+ void copy2DStridedTo(uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h,
+ void *data, size_t stride);
+ void copy2DStridedTo(void *data, size_t stride);
+
void resize(int dimX);
void resize(int dimX, int dimY);
diff --git a/driver/rsdAllocation.cpp b/driver/rsdAllocation.cpp
index 928f777..82d87c2 100644
--- a/driver/rsdAllocation.cpp
+++ b/driver/rsdAllocation.cpp
@@ -582,11 +582,14 @@
void rsdAllocationData2D(const Context *rsc, const Allocation *alloc,
uint32_t xoff, uint32_t yoff, uint32_t lod, RsAllocationCubemapFace face,
- uint32_t w, uint32_t h, const void *data, size_t sizeBytes) {
+ uint32_t w, uint32_t h, const void *data, size_t sizeBytes, size_t stride) {
DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv;
uint32_t eSize = alloc->mHal.state.elementSizeBytes;
uint32_t lineSize = eSize * w;
+ if (!stride) {
+ stride = lineSize;
+ }
if (alloc->mHal.drvState.lod[0].mallocPtr) {
const uint8_t *src = static_cast<const uint8_t *>(data);
@@ -598,7 +601,7 @@
alloc->decRefs(dst, w);
}
memcpy(dst, src, lineSize);
- src += lineSize;
+ src += stride;
dst += alloc->mHal.drvState.lod[lod].stride;
}
drv->uploadDeferred = true;
@@ -623,10 +626,13 @@
}
void rsdAllocationRead2D(const Context *rsc, const Allocation *alloc,
- uint32_t xoff, uint32_t yoff, uint32_t lod, RsAllocationCubemapFace face,
- uint32_t w, uint32_t h, void *data, size_t sizeBytes) {
+ uint32_t xoff, uint32_t yoff, uint32_t lod, RsAllocationCubemapFace face,
+ uint32_t w, uint32_t h, void *data, size_t sizeBytes, size_t stride) {
uint32_t eSize = alloc->mHal.state.elementSizeBytes;
uint32_t lineSize = eSize * w;
+ if (!stride) {
+ stride = lineSize;
+ }
if (alloc->mHal.drvState.lod[0].mallocPtr) {
uint8_t *dst = static_cast<uint8_t *>(data);
@@ -634,7 +640,7 @@
for (uint32_t line=yoff; line < (yoff+h); line++) {
memcpy(dst, src, lineSize);
- dst += lineSize;
+ dst += stride;
src += alloc->mHal.drvState.lod[lod].stride;
}
} else {
@@ -642,6 +648,7 @@
}
}
+
void rsdAllocationRead3D(const Context *rsc, const Allocation *alloc,
uint32_t xoff, uint32_t yoff, uint32_t zoff,
uint32_t lod, RsAllocationCubemapFace face,
diff --git a/driver/rsdAllocation.h b/driver/rsdAllocation.h
index d2ecc9a..5f916d1 100644
--- a/driver/rsdAllocation.h
+++ b/driver/rsdAllocation.h
@@ -88,7 +88,7 @@
const android::renderscript::Allocation *alloc,
uint32_t xoff, uint32_t yoff, uint32_t lod, RsAllocationCubemapFace face,
uint32_t w, uint32_t h,
- const void *data, uint32_t sizeBytes);
+ const void *data, uint32_t sizeBytes, size_t stride);
void rsdAllocationData3D(const android::renderscript::Context *rsc,
const android::renderscript::Allocation *alloc,
uint32_t xoff, uint32_t yoff, uint32_t zoff,
@@ -103,7 +103,7 @@
const android::renderscript::Allocation *alloc,
uint32_t xoff, uint32_t yoff, uint32_t lod, RsAllocationCubemapFace face,
uint32_t w, uint32_t h,
- void *data, uint32_t sizeBytes);
+ void *data, uint32_t sizeBytes, size_t stride);
void rsdAllocationRead3D(const android::renderscript::Context *rsc,
const android::renderscript::Allocation *alloc,
uint32_t xoff, uint32_t yoff, uint32_t zoff,
diff --git a/rs.spec b/rs.spec
index 679b481..5802a0c 100644
--- a/rs.spec
+++ b/rs.spec
@@ -200,6 +200,7 @@
param uint32_t w
param uint32_t h
param const void *data
+ param size_t stride
}
Allocation2DElementData {
@@ -238,6 +239,7 @@
param uint32_t w
param uint32_t h
param void *data
+ param size_t stride
}
AllocationSyncAll {
diff --git a/rsAllocation.cpp b/rsAllocation.cpp
index 259c3f8..df0a79e 100644
--- a/rsAllocation.cpp
+++ b/rsAllocation.cpp
@@ -89,19 +89,28 @@
}
void Allocation::data(Context *rsc, uint32_t xoff, uint32_t yoff, uint32_t lod, RsAllocationCubemapFace face,
- uint32_t w, uint32_t h, const void *data, size_t sizeBytes) {
+ uint32_t w, uint32_t h, const void *data, size_t sizeBytes) {
+
const size_t eSize = mHal.state.elementSizeBytes;
const size_t lineSize = eSize * w;
- //ALOGE("data2d %p, %i %i %i %i %i %i %p %i", this, xoff, yoff, lod, face, w, h, data, sizeBytes);
-
if ((lineSize * h) != sizeBytes) {
ALOGE("Allocation size mismatch, expected %zu, got %zu", (lineSize * h), sizeBytes);
rsAssert(!"Allocation::subData called with mismatched size");
return;
}
- rsc->mHal.funcs.allocation.data2D(rsc, this, xoff, yoff, lod, face, w, h, data, sizeBytes);
+ this->data(rsc, xoff, yoff, lod, face, w, h, data, sizeBytes, lineSize);
+}
+
+void Allocation::data(Context *rsc, uint32_t xoff, uint32_t yoff, uint32_t lod, RsAllocationCubemapFace face,
+ uint32_t w, uint32_t h, const void *data, size_t sizeBytes, size_t stride) {
+ const size_t eSize = mHal.state.elementSizeBytes;
+ const size_t lineSize = eSize * w;
+
+ //ALOGE("data2d %p, %i %i %i %i %i %i %p %i", this, xoff, yoff, lod, face, w, h, data, sizeBytes);
+
+ rsc->mHal.funcs.allocation.data2D(rsc, this, xoff, yoff, lod, face, w, h, data, sizeBytes, stride);
sendDirty(rsc);
}
@@ -111,7 +120,7 @@
}
void Allocation::read(Context *rsc, uint32_t xoff, uint32_t lod,
- uint32_t count, void *data, size_t sizeBytes) {
+ uint32_t count, void *data, size_t sizeBytes) {
const size_t eSize = mHal.state.type->getElementSizeBytes();
if ((count * eSize) != sizeBytes) {
@@ -131,7 +140,7 @@
void Allocation::read(Context *rsc, uint32_t xoff, uint32_t yoff, uint32_t lod, RsAllocationCubemapFace face,
- uint32_t w, uint32_t h, void *data, size_t sizeBytes) {
+ uint32_t w, uint32_t h, void *data, size_t sizeBytes) {
const size_t eSize = mHal.state.elementSizeBytes;
const size_t lineSize = eSize * w;
@@ -141,7 +150,18 @@
return;
}
- rsc->mHal.funcs.allocation.read2D(rsc, this, xoff, yoff, lod, face, w, h, data, sizeBytes);
+ read(rsc, xoff, yoff, lod, face, w, h, data, sizeBytes, lineSize);
+}
+
+void Allocation::read(Context *rsc, uint32_t xoff, uint32_t yoff, uint32_t lod, RsAllocationCubemapFace face,
+ uint32_t w, uint32_t h, void *data, size_t sizeBytes, size_t stride) {
+ const size_t eSize = mHal.state.elementSizeBytes;
+ const size_t lineSize = eSize * w;
+ if (!stride) {
+ stride = lineSize;
+ }
+
+ rsc->mHal.funcs.allocation.read2D(rsc, this, xoff, yoff, lod, face, w, h, data, sizeBytes, stride);
}
void Allocation::read(Context *rsc, uint32_t xoff, uint32_t yoff, uint32_t zoff,
@@ -526,9 +546,9 @@
}
void rsi_Allocation2DData(Context *rsc, RsAllocation va, uint32_t xoff, uint32_t yoff, uint32_t lod, RsAllocationCubemapFace face,
- uint32_t w, uint32_t h, const void *data, size_t sizeBytes) {
+ uint32_t w, uint32_t h, const void *data, size_t sizeBytes, size_t stride) {
Allocation *a = static_cast<Allocation *>(va);
- a->data(rsc, xoff, yoff, lod, face, w, h, data, sizeBytes);
+ a->data(rsc, xoff, yoff, lod, face, w, h, data, sizeBytes, stride);
}
void rsi_AllocationRead(Context *rsc, RsAllocation va, void *data, size_t sizeBytes) {
@@ -674,9 +694,9 @@
void rsi_Allocation2DRead(Context *rsc, RsAllocation va, uint32_t xoff, uint32_t yoff,
uint32_t lod, RsAllocationCubemapFace face, uint32_t w,
- uint32_t h, void *data, size_t sizeBytes) {
+ uint32_t h, void *data, size_t sizeBytes, size_t stride) {
Allocation *a = static_cast<Allocation *>(va);
- a->read(rsc, xoff, yoff, lod, face, w, h, data, sizeBytes);
+ a->read(rsc, xoff, yoff, lod, face, w, h, data, sizeBytes, stride);
}
}
diff --git a/rsAllocation.h b/rsAllocation.h
index de79cba..b552ca3 100644
--- a/rsAllocation.h
+++ b/rsAllocation.h
@@ -103,16 +103,20 @@
void data(Context *rsc, uint32_t xoff, uint32_t lod, uint32_t count, const void *data, size_t sizeBytes);
void data(Context *rsc, uint32_t xoff, uint32_t yoff, uint32_t lod, RsAllocationCubemapFace face,
- uint32_t w, uint32_t h, const void *data, size_t sizeBytes);
+ uint32_t w, uint32_t h, const void *data, size_t sizeBytes);
+ void data(Context *rsc, uint32_t xoff, uint32_t yoff, uint32_t lod, RsAllocationCubemapFace face,
+ uint32_t w, uint32_t h, const void *data, size_t sizeBytes, size_t stride);
void data(Context *rsc, uint32_t xoff, uint32_t yoff, uint32_t zoff, uint32_t lod, RsAllocationCubemapFace face,
- uint32_t w, uint32_t h, uint32_t d, const void *data, size_t sizeBytes);
+ uint32_t w, uint32_t h, uint32_t d, const void *data, size_t sizeBytes);
void read(Context *rsc, uint32_t xoff, uint32_t lod, uint32_t count, void *data, size_t sizeBytes);
void readUnchecked(Context *rsc, uint32_t xoff, uint32_t lod, uint32_t count, void *data, size_t sizeBytes);
void read(Context *rsc, uint32_t xoff, uint32_t yoff, uint32_t lod, RsAllocationCubemapFace face,
- uint32_t w, uint32_t h, void *data, size_t sizeBytes);
+ uint32_t w, uint32_t h, void *data, size_t sizeBytes);
+ void read(Context *rsc, uint32_t xoff, uint32_t yoff, uint32_t lod, RsAllocationCubemapFace face,
+ uint32_t w, uint32_t h, void *data, size_t sizeBytes, size_t stride);
void read(Context *rsc, uint32_t xoff, uint32_t yoff, uint32_t zoff, uint32_t lod, RsAllocationCubemapFace face,
- uint32_t w, uint32_t h, uint32_t d, void *data, size_t sizeBytes);
+ uint32_t w, uint32_t h, uint32_t d, void *data, size_t sizeBytes);
void elementData(Context *rsc, uint32_t x,
const void *data, uint32_t elementOff, size_t sizeBytes);
diff --git a/rsFont.cpp b/rsFont.cpp
index 82fb90f..0d14d1b 100644
--- a/rsFont.cpp
+++ b/rsFont.cpp
@@ -469,7 +469,7 @@
mRSC->mHal.funcs.allocation.data2D(mRSC, mTextTexture.get(), 0, 0, 0,
RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X, mCacheWidth, mCacheHeight,
- mCacheBuffer, mCacheWidth*mCacheHeight);
+ mCacheBuffer, mCacheWidth*mCacheHeight, mCacheWidth);
mFontShaderF->bindTexture(mRSC, 0, mTextTexture.get());
diff --git a/rs_hal.h b/rs_hal.h
index 877fd96..4f562d5 100644
--- a/rs_hal.h
+++ b/rs_hal.h
@@ -157,7 +157,7 @@
void (*data2D)(const Context *rsc, const Allocation *alloc,
uint32_t xoff, uint32_t yoff, uint32_t lod,
RsAllocationCubemapFace face, uint32_t w, uint32_t h,
- const void *data, size_t sizeBytes);
+ const void *data, size_t sizeBytes, size_t stride);
void (*data3D)(const Context *rsc, const Allocation *alloc,
uint32_t xoff, uint32_t yoff, uint32_t zoff,
uint32_t lod, RsAllocationCubemapFace face,
@@ -169,7 +169,7 @@
void (*read2D)(const Context *rsc, const Allocation *alloc,
uint32_t xoff, uint32_t yoff, uint32_t lod,
RsAllocationCubemapFace face, uint32_t w, uint32_t h,
- void *data, size_t sizeBytes);
+ void *data, size_t sizeBytes, size_t stride);
void (*read3D)(const Context *rsc, const Allocation *alloc,
uint32_t xoff, uint32_t yoff, uint32_t zoff,
uint32_t lod, RsAllocationCubemapFace face,
diff --git a/tests/cppstrided/Android.mk b/tests/cppstrided/Android.mk
new file mode 100644
index 0000000..febfd38
--- /dev/null
+++ b/tests/cppstrided/Android.mk
@@ -0,0 +1,34 @@
+LOCAL_PATH:= $(call my-dir)
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES:= \
+ multiply.rs \
+ compute.cpp
+
+LOCAL_SHARED_LIBRARIES := \
+ libRS \
+ libRScpp \
+ libz \
+ libcutils \
+ libutils \
+ libEGL \
+ libGLESv1_CM \
+ libGLESv2 \
+ libui \
+ libbcc \
+ libbcinfo \
+ libgui
+
+LOCAL_MODULE:= rstest-cppstrided
+
+LOCAL_MODULE_TAGS := tests
+
+intermediates := $(call intermediates-dir-for,STATIC_LIBRARIES,libRS,TARGET,)
+
+LOCAL_C_INCLUDES += frameworks/rs/cpp
+LOCAL_C_INCLUDES += frameworks/rs
+LOCAL_C_INCLUDES += $(intermediates)
+
+
+include $(BUILD_EXECUTABLE)
+
diff --git a/tests/cppstrided/compute.cpp b/tests/cppstrided/compute.cpp
new file mode 100644
index 0000000..b705f77
--- /dev/null
+++ b/tests/cppstrided/compute.cpp
@@ -0,0 +1,74 @@
+
+#include "RenderScript.h"
+
+#include "ScriptC_multiply.h"
+
+using namespace android;
+using namespace RSC;
+
+int main(int argc, char** argv)
+{
+
+ uint32_t numElems = 1024;
+ uint32_t stride = 1025;
+
+ if (argc >= 2) {
+ int tempStride = atoi(argv[1]);
+ if (tempStride < 1024) {
+ printf("stride must be greater than or equal to 1024\n");
+ return 1;
+ }
+ stride = (uint32_t) tempStride;
+ }
+
+ sp<RS> rs = new RS();
+
+ bool r = rs->init();
+
+ sp<const Element> e = Element::U32(rs);
+
+ Type::Builder tb(rs, e);
+ tb.setX(numElems);
+ tb.setY(numElems);
+ sp<const Type> t = tb.create();
+
+ sp<Allocation> ain = Allocation::createTyped(rs, t);
+ sp<Allocation> aout = Allocation::createTyped(rs, t);
+
+ sp<ScriptC_multiply> sc = new ScriptC_multiply(rs, NULL, 0);
+
+ uint32_t* buf = (uint32_t*) malloc(stride * numElems * sizeof(uint32_t));
+ if (!buf) {
+ printf("malloc failed\n");
+ return 1;
+ }
+
+ for (uint32_t i = 0; i < numElems; i++) {
+ for (uint32_t ct=0; ct < numElems; ct++) {
+ *(buf+(stride*i)+ct) = (uint32_t)ct + (i * numElems);
+ }
+ }
+
+ ain->copy2DStridedFrom(buf, stride * sizeof(uint32_t));
+
+ sc->forEach_multiply(ain, aout);
+
+ aout->copy2DStridedTo(buf, stride * sizeof(uint32_t));
+
+ for (uint32_t i = 0; i < numElems; i++) {
+ for (uint32_t ct=0; ct < numElems; ct++) {
+ if (*(buf+(stride*i)+ct) != (uint32_t)(ct + (i * numElems)) * 2) {
+ printf("Mismatch at location %d, %d: %u\n", i, ct, *(buf+(stride*i)+ct));
+ return 1;
+ }
+ }
+ }
+
+ printf("Test successful with %u stride!\n", stride);
+
+ sc.clear();
+ t.clear();
+ e.clear();
+ ain.clear();
+ aout.clear();
+}
diff --git a/tests/cppstrided/multiply.rs b/tests/cppstrided/multiply.rs
new file mode 100644
index 0000000..d1ffefb
--- /dev/null
+++ b/tests/cppstrided/multiply.rs
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma version(1)
+#pragma rs java_package_name(unused)
+#pragma rs_fp_relaxed
+
+uint32_t __attribute__((kernel)) multiply(uint32_t in) {
+ return in * 2;
+}
+
+