Split the RsForEachStubParamStruct in two.
This patch splits the RsForEachStubParamStruct into two smaller structs, one
used specifically by the driver and the other by the expanded kernels. Doing
so makes it clearer what data is used where. In addition, fewer data are
copied between memory locations during kernel invocation.
Several fields that were not being used were removed from the structs.
Change-Id: I7788ef754add44463b17a6b571c7cde6e73b9712
diff --git a/cpu_ref/rsCpuCore.cpp b/cpu_ref/rsCpuCore.cpp
index 499f890..a0564fc 100644
--- a/cpu_ref/rsCpuCore.cpp
+++ b/cpu_ref/rsCpuCore.cpp
@@ -48,7 +48,7 @@
using namespace android::renderscript;
typedef void (*outer_foreach_t)(
- const android::renderscript::RsForEachStubParamStruct *,
+ const android::renderscript::RsExpandKernelParams *,
uint32_t x1, uint32_t x2,
uint32_t instep, uint32_t outstep);
@@ -353,17 +353,21 @@
static void wc_xy(void *usr, uint32_t idx) {
MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
- RsForEachStubParamStruct p;
- memcpy(&p, &mtls->fep, sizeof(p));
- p.lid = idx;
- uint32_t sig = mtls->sig;
+
+ RsExpandKernelParams kparams;
+ kparams.takeFields(mtls->fep);
+
+ // Used by CpuScriptGroup, IntrinsicBlur, and IntrinsicHistogram
+ kparams.lid = idx;
outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
while (1) {
- uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
+ uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize;
- uint32_t yEnd = yStart + mtls->mSliceSize;
+ uint32_t yEnd = yStart + mtls->mSliceSize;
+
yEnd = rsMin(yEnd, mtls->yEnd);
+
if (yEnd <= yStart) {
return;
}
@@ -371,29 +375,39 @@
//ALOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
//ALOGE("usr ptr in %p, out %p", mtls->fep.ptrIn, mtls->fep.ptrOut);
- for (p.y = yStart; p.y < yEnd; p.y++) {
- p.out = mtls->fep.ptrOut + (mtls->fep.yStrideOut * p.y) +
- (mtls->fep.eStrideOut * mtls->xStart);
- p.in = mtls->fep.ptrIn + (mtls->fep.yStrideIn * p.y) +
- (mtls->fep.eStrideIn * mtls->xStart);
- fn(&p, mtls->xStart, mtls->xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut);
+ for (kparams.y = yStart; kparams.y < yEnd; kparams.y++) {
+ kparams.out = mtls->fep.ptrOut +
+ (mtls->fep.yStrideOut * kparams.y) +
+ (mtls->fep.eStrideOut * mtls->xStart);
+
+ kparams.in = mtls->fep.ptrIn +
+ (mtls->fep.yStrideIn * kparams.y) +
+ (mtls->fep.eStrideIn * mtls->xStart);
+
+
+ fn(&kparams, mtls->xStart, mtls->xEnd, mtls->fep.eStrideIn,
+ mtls->fep.eStrideOut);
}
}
}
static void wc_x(void *usr, uint32_t idx) {
MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
- RsForEachStubParamStruct p;
- memcpy(&p, &mtls->fep, sizeof(p));
- p.lid = idx;
- uint32_t sig = mtls->sig;
+
+ RsExpandKernelParams kparams;
+ kparams.takeFields(mtls->fep);
+
+ // Used by CpuScriptGroup, IntrinsicBlur, and IntrisicHistogram
+ kparams.lid = idx;
outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
while (1) {
- uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
+ uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize;
- uint32_t xEnd = xStart + mtls->mSliceSize;
+ uint32_t xEnd = xStart + mtls->mSliceSize;
+
xEnd = rsMin(xEnd, mtls->xEnd);
+
if (xEnd <= xStart) {
return;
}
@@ -401,14 +415,15 @@
//ALOGE("usr slice %i idx %i, x %i,%i", slice, idx, xStart, xEnd);
//ALOGE("usr ptr in %p, out %p", mtls->fep.ptrIn, mtls->fep.ptrOut);
- p.out = mtls->fep.ptrOut + (mtls->fep.eStrideOut * xStart);
- p.in = mtls->fep.ptrIn + (mtls->fep.eStrideIn * xStart);
- fn(&p, xStart, xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut);
+ kparams.out = mtls->fep.ptrOut + (mtls->fep.eStrideOut * xStart);
+ kparams.in = mtls->fep.ptrIn + (mtls->fep.eStrideIn * xStart);
+
+ fn(&kparams, xStart, xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut);
}
}
void RsdCpuReferenceImpl::launchThreads(const Allocation * ain, Allocation * aout,
- const RsScriptCall *sc, MTLaunchStruct *mtls) {
+ const RsScriptCall *sc, MTLaunchStruct *mtls) {
//android::StopWatch kernel_time("kernel time");
@@ -457,22 +472,34 @@
//ALOGE("launch 1");
} else {
- RsForEachStubParamStruct p;
- memcpy(&p, &mtls->fep, sizeof(p));
- uint32_t sig = mtls->sig;
+ RsExpandKernelParams kparams;
+ kparams.takeFields(mtls->fep);
//ALOGE("launch 3");
outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
- for (p.ar[0] = mtls->arrayStart; p.ar[0] < mtls->arrayEnd; p.ar[0]++) {
- for (p.z = mtls->zStart; p.z < mtls->zEnd; p.z++) {
- for (p.y = mtls->yStart; p.y < mtls->yEnd; p.y++) {
- uint32_t offset = mtls->fep.dimY * mtls->fep.dimZ * p.ar[0] +
- mtls->fep.dimY * p.z + p.y;
- p.out = mtls->fep.ptrOut + (mtls->fep.yStrideOut * offset) +
- (mtls->fep.eStrideOut * mtls->xStart);
- p.in = mtls->fep.ptrIn + (mtls->fep.yStrideIn * offset) +
- (mtls->fep.eStrideIn * mtls->xStart);
- fn(&p, mtls->xStart, mtls->xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut);
+ for (uint32_t arrayIndex = mtls->arrayStart;
+ arrayIndex < mtls->arrayEnd; arrayIndex++) {
+
+ for (kparams.z = mtls->zStart; kparams.z < mtls->zEnd;
+ kparams.z++) {
+
+ for (kparams.y = mtls->yStart; kparams.y < mtls->yEnd;
+ kparams.y++) {
+
+ uint32_t offset =
+ kparams.dimY * kparams.dimZ * arrayIndex +
+ kparams.dimY * kparams.z + kparams.y;
+
+ kparams.out = mtls->fep.ptrOut +
+ (mtls->fep.yStrideOut * offset) +
+ (mtls->fep.eStrideOut * mtls->xStart);
+
+ kparams.in = mtls->fep.ptrIn +
+ (mtls->fep.yStrideIn * offset) +
+ (mtls->fep.eStrideIn * mtls->xStart);
+
+ fn(&kparams, mtls->xStart, mtls->xEnd, mtls->fep.eStrideIn,
+ mtls->fep.eStrideOut);
}
}
}
@@ -529,41 +556,46 @@
//ALOGE("launch 1");
} else {
- RsForEachStubParamStruct p;
- memcpy(&p, &mtls->fep, sizeof(p));
- uint32_t sig = mtls->sig;
+ RsExpandKernelParams kparams;
+ kparams.takeFields(mtls->fep);
// Allocate space for our input base pointers.
- p.ins = new const void*[inLen];
+ kparams.ins = new const void*[inLen];
// Allocate space for our input stride information.
- p.eStrideIns = new uint32_t[inLen];
+ kparams.eStrideIns = new uint32_t[inLen];
// Fill our stride information.
- for (int index = inLen; --index >= 0;) {
- p.eStrideIns[index] = mtls->fep.inStrides[index].eStride;
+ for (int inIndex = inLen; --inIndex >= 0;) {
+ kparams.eStrideIns[inIndex] = mtls->fep.inStrides[inIndex].eStride;
}
//ALOGE("launch 3");
outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
- uint32_t offset_invariant = mtls->fep.dimY * mtls->fep.dimZ * p.ar[0];
+ for (uint32_t arrayIndex = mtls->arrayStart;
+ arrayIndex < mtls->arrayEnd; arrayIndex++) {
- for (p.ar[0] = mtls->arrayStart; p.ar[0] < mtls->arrayEnd; p.ar[0]++) {
- uint32_t offset_part = offset_invariant * p.ar[0];
+ for (kparams.z = mtls->zStart; kparams.z < mtls->zEnd;
+ kparams.z++) {
- for (p.z = mtls->zStart; p.z < mtls->zEnd; p.z++) {
- for (p.y = mtls->yStart; p.y < mtls->yEnd; p.y++) {
- uint32_t offset = offset_part + mtls->fep.dimY * p.z + p.y;
+ for (kparams.y = mtls->yStart; kparams.y < mtls->yEnd;
+ kparams.y++) {
- p.out = mtls->fep.ptrOut + (mtls->fep.yStrideOut * offset) +
- (mtls->fep.eStrideOut * mtls->xStart);
+ uint32_t offset =
+ mtls->fep.dimY * mtls->fep.dimZ * arrayIndex +
+ mtls->fep.dimY * kparams.z + kparams.y;
- for (int index = inLen; --index >= 0;) {
- StridePair &strides = mtls->fep.inStrides[index];
+ kparams.out = mtls->fep.ptrOut +
+ (mtls->fep.yStrideOut * offset) +
+ (mtls->fep.eStrideOut * mtls->xStart);
- p.ins[index] = mtls->fep.ptrIns[index] +
- (strides.yStride * offset) +
- (strides.eStride * mtls->xStart);
+ for (int inIndex = inLen; --inIndex >= 0;) {
+ StridePair &strides = mtls->fep.inStrides[inIndex];
+
+ kparams.ins[inIndex] =
+ mtls->fep.ptrIns[inIndex] +
+ (strides.yStride * offset) +
+ (strides.eStride * mtls->xStart);
}
/*
@@ -571,14 +603,15 @@
* kernels get their stride information from a member of p
* that points to an array.
*/
- fn(&p, mtls->xStart, mtls->xEnd, 0, mtls->fep.eStrideOut);
+ fn(&kparams, mtls->xStart, mtls->xEnd, 0,
+ mtls->fep.eStrideOut);
}
}
}
// Free our arrays.
- delete[] p.ins;
- delete[] p.eStrideIns;
+ delete[] kparams.ins;
+ delete[] kparams.eStrideIns;
}
}
diff --git a/cpu_ref/rsCpuCore.h b/cpu_ref/rsCpuCore.h
index c54dca2..5d4b6cc 100644
--- a/cpu_ref/rsCpuCore.h
+++ b/cpu_ref/rsCpuCore.h
@@ -34,32 +34,18 @@
namespace android {
namespace renderscript {
-typedef struct {
+struct StridePair {
uint32_t eStride;
uint32_t yStride;
-} StridePair;
+};
-typedef struct {
- const void *in;
- void *out;
+struct RsExpandKernelDriverInfo {
const void *usr;
uint32_t usrLen;
- uint32_t x;
- uint32_t y;
- uint32_t z;
- uint32_t lod;
- RsAllocationCubemapFace face;
- uint32_t ar[16];
-
- const void **ins;
- uint32_t *eStrideIns;
-
- uint32_t lid;
uint32_t dimX;
uint32_t dimY;
uint32_t dimZ;
- uint32_t dimArray;
const uint8_t *ptrIn;
uint8_t *ptrOut;
@@ -71,7 +57,54 @@
const uint8_t** ptrIns;
StridePair* inStrides;
-} RsForEachStubParamStruct;
+
+ ~RsExpandKernelDriverInfo() {
+ if (ptrIns != NULL) {
+ delete[] ptrIns;
+ }
+
+ if (inStrides != NULL) {
+ delete[] inStrides;
+ }
+ }
+};
+
+struct RsExpandKernelParams {
+
+ // Used by kernels
+ const void *in;
+ void *out;
+ uint32_t y;
+ uint32_t z;
+ uint32_t lid;
+
+ const void **ins;
+ uint32_t *eStrideIns;
+
+ // Used by ScriptGroup and user kernels.
+ const void *usr;
+
+ // Used by intrinsics
+ uint32_t dimX;
+ uint32_t dimY;
+ uint32_t dimZ;
+
+ /*
+ * FIXME: This is only used by the blend intrinsic. If possible, we should
+ * modify blur to not need it.
+ */
+ uint32_t slot;
+
+ /// Copy fields needed by a kernel from a driver struct.
+ void takeFields(const RsExpandKernelDriverInfo &dstruct) {
+ this->usr = dstruct.usr;
+ this->slot = dstruct.slot;
+
+ this->dimX = dstruct.dimX;
+ this->dimY = dstruct.dimY;
+ this->dimZ = dstruct.dimZ;
+ }
+};
extern bool gArchUseSIMD;
@@ -89,7 +122,7 @@
} ScriptTLSStruct;
typedef struct {
- RsForEachStubParamStruct fep;
+ RsExpandKernelDriverInfo fep;
RsdCpuReferenceImpl *rsc;
RsdCpuScriptImpl *script;
diff --git a/cpu_ref/rsCpuIntrinsic3DLUT.cpp b/cpu_ref/rsCpuIntrinsic3DLUT.cpp
index a7c9487..c839c19 100644
--- a/cpu_ref/rsCpuIntrinsic3DLUT.cpp
+++ b/cpu_ref/rsCpuIntrinsic3DLUT.cpp
@@ -38,7 +38,7 @@
protected:
ObjectBaseRef<Allocation> mLUT;
- static void kernel(const RsForEachStubParamStruct *p,
+ static void kernel(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
};
@@ -58,7 +58,7 @@
int dimx, int dimy, int dimz);
-void RsdCpuScriptIntrinsic3DLUT::kernel(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsic3DLUT::kernel(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsic3DLUT *cp = (RsdCpuScriptIntrinsic3DLUT *)p->usr;
diff --git a/cpu_ref/rsCpuIntrinsicBlend.cpp b/cpu_ref/rsCpuIntrinsicBlend.cpp
index 228b887..b604658 100644
--- a/cpu_ref/rsCpuIntrinsicBlend.cpp
+++ b/cpu_ref/rsCpuIntrinsicBlend.cpp
@@ -33,7 +33,7 @@
RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
protected:
- static void kernel(const RsForEachStubParamStruct *p,
+ static void kernel(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
};
@@ -110,7 +110,7 @@
extern "C" void rsdIntrinsicBlendSub_K(void *dst, const void *src, uint32_t count8);
#endif
-void RsdCpuScriptIntrinsicBlend::kernel(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicBlend::kernel(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicBlend *cp = (RsdCpuScriptIntrinsicBlend *)p->usr;
diff --git a/cpu_ref/rsCpuIntrinsicBlur.cpp b/cpu_ref/rsCpuIntrinsicBlur.cpp
index c1ca4e2..22e1176 100644
--- a/cpu_ref/rsCpuIntrinsicBlur.cpp
+++ b/cpu_ref/rsCpuIntrinsicBlur.cpp
@@ -44,10 +44,10 @@
int mIradius;
ObjectBaseRef<Allocation> mAlloc;
- static void kernelU4(const RsForEachStubParamStruct *p,
+ static void kernelU4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelU1(const RsForEachStubParamStruct *p,
+ static void kernelU1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
void ComputeGaussianWeights();
@@ -113,7 +113,7 @@
-static void OneVU4(const RsForEachStubParamStruct *p, float4 *out, int32_t x, int32_t y,
+static void OneVU4(const RsExpandKernelParams *p, float4 *out, int32_t x, int32_t y,
const uchar *ptrIn, int iStride, const float* gPtr, int iradius) {
const uchar *pi = ptrIn + x*4;
@@ -131,7 +131,7 @@
out->xyzw = blurredPixel;
}
-static void OneVU1(const RsForEachStubParamStruct *p, float *out, int32_t x, int32_t y,
+static void OneVU1(const RsExpandKernelParams *p, float *out, int32_t x, int32_t y,
const uchar *ptrIn, int iStride, const float* gPtr, int iradius) {
const uchar *pi = ptrIn + x;
@@ -243,7 +243,7 @@
}
}
-static void OneHU4(const RsForEachStubParamStruct *p, uchar4 *out, int32_t x,
+static void OneHU4(const RsExpandKernelParams *p, uchar4 *out, int32_t x,
const float4 *ptrIn, const float* gPtr, int iradius) {
float4 blurredPixel = 0;
@@ -258,7 +258,7 @@
out->xyzw = convert_uchar4(blurredPixel);
}
-static void OneHU1(const RsForEachStubParamStruct *p, uchar *out, int32_t x,
+static void OneHU1(const RsExpandKernelParams *p, uchar *out, int32_t x,
const float *ptrIn, const float* gPtr, int iradius) {
float blurredPixel = 0;
@@ -274,7 +274,7 @@
}
-void RsdCpuScriptIntrinsicBlur::kernelU4(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicBlur::kernelU4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
@@ -345,7 +345,7 @@
}
}
-void RsdCpuScriptIntrinsicBlur::kernelU1(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicBlur::kernelU1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
float buf[4 * 2048];
diff --git a/cpu_ref/rsCpuIntrinsicColorMatrix.cpp b/cpu_ref/rsCpuIntrinsicColorMatrix.cpp
index 0f3af5b..0d296ea 100644
--- a/cpu_ref/rsCpuIntrinsicColorMatrix.cpp
+++ b/cpu_ref/rsCpuIntrinsicColorMatrix.cpp
@@ -179,7 +179,7 @@
FunctionTab_t mFnTab;
#endif
- static void kernel(const RsForEachStubParamStruct *p,
+ static void kernel(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
void updateCoeffCache(float fpMul, float addMul);
@@ -769,7 +769,7 @@
}
-static void One(const RsForEachStubParamStruct *p, void *out,
+static void One(const RsExpandKernelParams *p, void *out,
const void *py, const float* coeff, const float *add,
uint32_t vsin, uint32_t vsout, bool fin, bool fout) {
@@ -870,7 +870,7 @@
//ALOGE("out %p %f %f %f %f", out, ((float *)out)[0], ((float *)out)[1], ((float *)out)[2], ((float *)out)[3]);
}
-void RsdCpuScriptIntrinsicColorMatrix::kernel(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicColorMatrix::kernel(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicColorMatrix *cp = (RsdCpuScriptIntrinsicColorMatrix *)p->usr;
diff --git a/cpu_ref/rsCpuIntrinsicConvolve3x3.cpp b/cpu_ref/rsCpuIntrinsicConvolve3x3.cpp
index 552a835..d5af88c 100644
--- a/cpu_ref/rsCpuIntrinsicConvolve3x3.cpp
+++ b/cpu_ref/rsCpuIntrinsicConvolve3x3.cpp
@@ -42,22 +42,22 @@
ObjectBaseRef<const Allocation> mAlloc;
ObjectBaseRef<const Element> mElement;
- static void kernelU1(const RsForEachStubParamStruct *p,
+ static void kernelU1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelU2(const RsForEachStubParamStruct *p,
+ static void kernelU2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelU4(const RsForEachStubParamStruct *p,
+ static void kernelU4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelF1(const RsForEachStubParamStruct *p,
+ static void kernelF1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelF2(const RsForEachStubParamStruct *p,
+ static void kernelF2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelF4(const RsForEachStubParamStruct *p,
+ static void kernelF4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
};
@@ -88,7 +88,7 @@
const void *y2, const short *coef, uint32_t count);
-static void ConvolveOneU4(const RsForEachStubParamStruct *p, uint32_t x, uchar4 *out,
+static void ConvolveOneU4(const RsExpandKernelParams *p, uint32_t x, uchar4 *out,
const uchar4 *py0, const uchar4 *py1, const uchar4 *py2,
const float* coeff) {
@@ -110,7 +110,7 @@
*out = o;
}
-static void ConvolveOneU2(const RsForEachStubParamStruct *p, uint32_t x, uchar2 *out,
+static void ConvolveOneU2(const RsExpandKernelParams *p, uint32_t x, uchar2 *out,
const uchar2 *py0, const uchar2 *py1, const uchar2 *py2,
const float* coeff) {
@@ -131,7 +131,7 @@
*out = convert_uchar2(px);
}
-static void ConvolveOneU1(const RsForEachStubParamStruct *p, uint32_t x, uchar *out,
+static void ConvolveOneU1(const RsExpandKernelParams *p, uint32_t x, uchar *out,
const uchar *py0, const uchar *py1, const uchar *py2,
const float* coeff) {
@@ -150,7 +150,7 @@
*out = clamp(px, 0.f, 255.f);
}
-static void ConvolveOneF4(const RsForEachStubParamStruct *p, uint32_t x, float4 *out,
+static void ConvolveOneF4(const RsExpandKernelParams *p, uint32_t x, float4 *out,
const float4 *py0, const float4 *py1, const float4 *py2,
const float* coeff) {
@@ -161,7 +161,7 @@
(py2[x1] * coeff[6]) + (py2[x] * coeff[7]) + (py2[x2] * coeff[8]);
}
-static void ConvolveOneF2(const RsForEachStubParamStruct *p, uint32_t x, float2 *out,
+static void ConvolveOneF2(const RsExpandKernelParams *p, uint32_t x, float2 *out,
const float2 *py0, const float2 *py1, const float2 *py2,
const float* coeff) {
@@ -172,7 +172,7 @@
(py2[x1] * coeff[6]) + (py2[x] * coeff[7]) + (py2[x2] * coeff[8]);
}
-static void ConvolveOneF1(const RsForEachStubParamStruct *p, uint32_t x, float *out,
+static void ConvolveOneF1(const RsExpandKernelParams *p, uint32_t x, float *out,
const float *py0, const float *py1, const float *py2,
const float* coeff) {
@@ -183,7 +183,7 @@
(py2[x1] * coeff[6]) + (py2[x] * coeff[7]) + (py2[x2] * coeff[8]);
}
-void RsdCpuScriptIntrinsicConvolve3x3::kernelU4(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicConvolve3x3::kernelU4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
@@ -230,7 +230,7 @@
}
}
-void RsdCpuScriptIntrinsicConvolve3x3::kernelU2(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicConvolve3x3::kernelU2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
@@ -275,7 +275,7 @@
}
}
-void RsdCpuScriptIntrinsicConvolve3x3::kernelU1(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicConvolve3x3::kernelU1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
@@ -320,7 +320,7 @@
}
}
-void RsdCpuScriptIntrinsicConvolve3x3::kernelF4(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicConvolve3x3::kernelF4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
@@ -365,7 +365,7 @@
}
}
-void RsdCpuScriptIntrinsicConvolve3x3::kernelF2(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicConvolve3x3::kernelF2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
@@ -409,7 +409,7 @@
}
}
}
-void RsdCpuScriptIntrinsicConvolve3x3::kernelF1(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicConvolve3x3::kernelF1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
diff --git a/cpu_ref/rsCpuIntrinsicConvolve5x5.cpp b/cpu_ref/rsCpuIntrinsicConvolve5x5.cpp
index 48b5ca5..8421175 100644
--- a/cpu_ref/rsCpuIntrinsicConvolve5x5.cpp
+++ b/cpu_ref/rsCpuIntrinsicConvolve5x5.cpp
@@ -42,22 +42,22 @@
ObjectBaseRef<Allocation> alloc;
- static void kernelU1(const RsForEachStubParamStruct *p,
+ static void kernelU1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelU2(const RsForEachStubParamStruct *p,
+ static void kernelU2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelU4(const RsForEachStubParamStruct *p,
+ static void kernelU4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelF1(const RsForEachStubParamStruct *p,
+ static void kernelF1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelF2(const RsForEachStubParamStruct *p,
+ static void kernelF2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelF4(const RsForEachStubParamStruct *p,
+ static void kernelF4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
@@ -86,7 +86,7 @@
}
-static void OneU4(const RsForEachStubParamStruct *p, uint32_t x, uchar4 *out,
+static void OneU4(const RsExpandKernelParams *p, uint32_t x, uchar4 *out,
const uchar4 *py0, const uchar4 *py1, const uchar4 *py2, const uchar4 *py3, const uchar4 *py4,
const float* coeff) {
@@ -129,7 +129,7 @@
*out = convert_uchar4(px);
}
-static void OneU2(const RsForEachStubParamStruct *p, uint32_t x, uchar2 *out,
+static void OneU2(const RsExpandKernelParams *p, uint32_t x, uchar2 *out,
const uchar2 *py0, const uchar2 *py1, const uchar2 *py2, const uchar2 *py3, const uchar2 *py4,
const float* coeff) {
@@ -172,7 +172,7 @@
*out = convert_uchar2(px);
}
-static void OneU1(const RsForEachStubParamStruct *p, uint32_t x, uchar *out,
+static void OneU1(const RsExpandKernelParams *p, uint32_t x, uchar *out,
const uchar *py0, const uchar *py1, const uchar *py2, const uchar *py3, const uchar *py4,
const float* coeff) {
@@ -215,7 +215,7 @@
*out = px;
}
-static void OneF4(const RsForEachStubParamStruct *p, uint32_t x, float4 *out,
+static void OneF4(const RsExpandKernelParams *p, uint32_t x, float4 *out,
const float4 *py0, const float4 *py1, const float4 *py2, const float4 *py3, const float4 *py4,
const float* coeff) {
@@ -257,7 +257,7 @@
*out = px;
}
-static void OneF2(const RsForEachStubParamStruct *p, uint32_t x, float2 *out,
+static void OneF2(const RsExpandKernelParams *p, uint32_t x, float2 *out,
const float2 *py0, const float2 *py1, const float2 *py2, const float2 *py3, const float2 *py4,
const float* coeff) {
@@ -299,7 +299,7 @@
*out = px;
}
-static void OneF1(const RsForEachStubParamStruct *p, uint32_t x, float *out,
+static void OneF1(const RsExpandKernelParams *p, uint32_t x, float *out,
const float *py0, const float *py1, const float *py2, const float *py3, const float *py4,
const float* coeff) {
@@ -346,7 +346,7 @@
const void *y2, const void *y3, const void *y4,
const short *coef, uint32_t count);
-void RsdCpuScriptIntrinsicConvolve5x5::kernelU4(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicConvolve5x5::kernelU4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr;
@@ -406,7 +406,7 @@
}
}
-void RsdCpuScriptIntrinsicConvolve5x5::kernelU2(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicConvolve5x5::kernelU2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr;
@@ -455,7 +455,7 @@
}
}
-void RsdCpuScriptIntrinsicConvolve5x5::kernelU1(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicConvolve5x5::kernelU1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr;
@@ -504,7 +504,7 @@
}
}
-void RsdCpuScriptIntrinsicConvolve5x5::kernelF4(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicConvolve5x5::kernelF4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr;
@@ -553,7 +553,7 @@
}
}
-void RsdCpuScriptIntrinsicConvolve5x5::kernelF2(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicConvolve5x5::kernelF2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr;
@@ -602,7 +602,7 @@
}
}
-void RsdCpuScriptIntrinsicConvolve5x5::kernelF1(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicConvolve5x5::kernelF1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr;
diff --git a/cpu_ref/rsCpuIntrinsicHistogram.cpp b/cpu_ref/rsCpuIntrinsicHistogram.cpp
index cdfe7d1..1c430b7 100644
--- a/cpu_ref/rsCpuIntrinsicHistogram.cpp
+++ b/cpu_ref/rsCpuIntrinsicHistogram.cpp
@@ -49,29 +49,29 @@
int *mSums;
ObjectBaseRef<Allocation> mAllocOut;
- static void kernelP1U4(const RsForEachStubParamStruct *p,
+ static void kernelP1U4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelP1U3(const RsForEachStubParamStruct *p,
+ static void kernelP1U3(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelP1U2(const RsForEachStubParamStruct *p,
+ static void kernelP1U2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelP1U1(const RsForEachStubParamStruct *p,
+ static void kernelP1U1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelP1L4(const RsForEachStubParamStruct *p,
+ static void kernelP1L4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelP1L3(const RsForEachStubParamStruct *p,
+ static void kernelP1L3(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelP1L2(const RsForEachStubParamStruct *p,
+ static void kernelP1L2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelP1L1(const RsForEachStubParamStruct *p,
+ static void kernelP1L1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
@@ -160,7 +160,7 @@
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1U4(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicHistogram::kernelP1U4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
@@ -177,7 +177,7 @@
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1U3(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicHistogram::kernelP1U3(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
@@ -193,7 +193,7 @@
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1U2(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicHistogram::kernelP1U2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
@@ -208,7 +208,7 @@
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1L4(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicHistogram::kernelP1L4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
@@ -226,7 +226,7 @@
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1L3(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicHistogram::kernelP1L3(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
@@ -243,7 +243,7 @@
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1L2(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicHistogram::kernelP1L2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
@@ -259,7 +259,7 @@
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1L1(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicHistogram::kernelP1L1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
@@ -274,7 +274,7 @@
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1U1(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicHistogram::kernelP1U1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
diff --git a/cpu_ref/rsCpuIntrinsicLUT.cpp b/cpu_ref/rsCpuIntrinsicLUT.cpp
index 5b2adc5..db73a83 100644
--- a/cpu_ref/rsCpuIntrinsicLUT.cpp
+++ b/cpu_ref/rsCpuIntrinsicLUT.cpp
@@ -38,7 +38,7 @@
protected:
ObjectBaseRef<Allocation> lut;
- static void kernel(const RsForEachStubParamStruct *p,
+ static void kernel(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
};
@@ -53,7 +53,7 @@
}
-void RsdCpuScriptIntrinsicLUT::kernel(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicLUT::kernel(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicLUT *cp = (RsdCpuScriptIntrinsicLUT *)p->usr;
diff --git a/cpu_ref/rsCpuIntrinsicLoopFilter.cpp b/cpu_ref/rsCpuIntrinsicLoopFilter.cpp
index c31fcdf..45f85e5 100644
--- a/cpu_ref/rsCpuIntrinsicLoopFilter.cpp
+++ b/cpu_ref/rsCpuIntrinsicLoopFilter.cpp
@@ -174,7 +174,7 @@
ObjectBaseRef<Allocation> mFrameBuffer;
void doLoopFilter();
- static void kernel(const RsForEachStubParamStruct *p,
+ static void kernel(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
};
@@ -182,7 +182,7 @@
}
}
-void RsdCpuScriptIntrinsicLoopFilter::kernel(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicLoopFilter::kernel(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicLoopFilter *cp = (RsdCpuScriptIntrinsicLoopFilter*)p->usr;
diff --git a/cpu_ref/rsCpuIntrinsicResize.cpp b/cpu_ref/rsCpuIntrinsicResize.cpp
index d18eb8f..af1127e 100644
--- a/cpu_ref/rsCpuIntrinsicResize.cpp
+++ b/cpu_ref/rsCpuIntrinsicResize.cpp
@@ -46,13 +46,13 @@
ObjectBaseRef<const Allocation> mAlloc;
ObjectBaseRef<const Element> mElement;
- static void kernelU1(const RsForEachStubParamStruct *p,
+ static void kernelU1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelU2(const RsForEachStubParamStruct *p,
+ static void kernelU2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelU4(const RsForEachStubParamStruct *p,
+ static void kernelU4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
};
@@ -179,7 +179,7 @@
return (uchar)p;
}
-void RsdCpuScriptIntrinsicResize::kernelU4(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicResize::kernelU4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
@@ -219,7 +219,7 @@
}
}
-void RsdCpuScriptIntrinsicResize::kernelU2(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicResize::kernelU2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
@@ -259,7 +259,7 @@
}
}
-void RsdCpuScriptIntrinsicResize::kernelU1(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicResize::kernelU1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
diff --git a/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp b/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp
index 563b3e1..d9ab98c 100644
--- a/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp
+++ b/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp
@@ -46,7 +46,7 @@
protected:
ObjectBaseRef<Allocation> alloc;
- static void kernel(const RsForEachStubParamStruct *p,
+ static void kernel(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
};
@@ -101,7 +101,7 @@
extern "C" void rsdIntrinsicYuvR_K(void *dst, const uchar *Y, const uchar *uv, uint32_t xstart, size_t xend);
extern "C" void rsdIntrinsicYuv2_K(void *dst, const uchar *Y, const uchar *u, const uchar *v, size_t xstart, size_t xend);
-void RsdCpuScriptIntrinsicYuvToRGB::kernel(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicYuvToRGB::kernel(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicYuvToRGB *cp = (RsdCpuScriptIntrinsicYuvToRGB *)p->usr;
diff --git a/cpu_ref/rsCpuScript.h b/cpu_ref/rsCpuScript.h
index f4ca1ed..d51e9e3 100644
--- a/cpu_ref/rsCpuScript.h
+++ b/cpu_ref/rsCpuScript.h
@@ -47,7 +47,7 @@
class RsdCpuScriptImpl : public RsdCpuReferenceImpl::CpuScript {
public:
typedef void (*outer_foreach_t)(
- const RsForEachStubParamStruct *,
+ const RsExpandKernelParams *,
uint32_t x1, uint32_t x2,
uint32_t instep, uint32_t outstep);
#ifdef RS_COMPATIBILITY_LIB
diff --git a/cpu_ref/rsCpuScriptGroup.cpp b/cpu_ref/rsCpuScriptGroup.cpp
index 40eddf2..1e42185 100644
--- a/cpu_ref/rsCpuScriptGroup.cpp
+++ b/cpu_ref/rsCpuScriptGroup.cpp
@@ -44,66 +44,71 @@
}
-typedef void (*ScriptGroupRootFunc_t)(const RsForEachStubParamStruct *p,
+typedef void (*ScriptGroupRootFunc_t)(const RsExpandKernelParams *kparams,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
-void CpuScriptGroupImpl::scriptGroupRoot(const RsForEachStubParamStruct *p,
+void CpuScriptGroupImpl::scriptGroupRoot(const RsExpandKernelParams *kparams,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
- const ScriptList *sl = (const ScriptList *)p->usr;
- RsForEachStubParamStruct *mp = (RsForEachStubParamStruct *)p;
- const void *oldUsr = p->usr;
+ const ScriptList *sl = (const ScriptList *)kparams->usr;
+ RsExpandKernelParams *mkparams = (RsExpandKernelParams *)kparams;
- for(size_t ct=0; ct < sl->count; ct++) {
+ for (size_t ct = 0; ct < sl->count; ct++) {
ScriptGroupRootFunc_t func;
- func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct];
- mp->usr = sl->usrPtrs[ct];
+ func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct];
+ mkparams->usr = sl->usrPtrs[ct];
- mp->ptrIn = NULL;
- mp->in = NULL;
- mp->ptrOut = NULL;
- mp->out = NULL;
+ mkparams->in = NULL;
+ mkparams->out = NULL;
uint32_t istep = 0;
uint32_t ostep = 0;
if (sl->ins[ct]) {
- mp->ptrIn = (const uint8_t *)sl->ins[ct]->mHal.drvState.lod[0].mallocPtr;
+ mkparams->in =
+ (const uint8_t *)sl->ins[ct]->mHal.drvState.lod[0].mallocPtr;
+
istep = sl->ins[ct]->mHal.state.elementSizeBytes;
- mp->in = mp->ptrIn;
+
if (sl->inExts[ct]) {
- mp->in = mp->ptrIn + sl->ins[ct]->mHal.drvState.lod[0].stride * p->y;
- } else {
- if (sl->ins[ct]->mHal.drvState.lod[0].dimY > p->lid) {
- mp->in = mp->ptrIn + sl->ins[ct]->mHal.drvState.lod[0].stride * p->lid;
- }
+ mkparams->in =
+ (const uint8_t *)mkparams->in +
+ sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->y;
+
+ } else if (sl->ins[ct]->mHal.drvState.lod[0].dimY > kparams->lid) {
+ mkparams->in =
+ (const uint8_t *)mkparams->in +
+ sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->lid;
}
}
if (sl->outs[ct]) {
- mp->ptrOut = (uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr;
- mp->out = mp->ptrOut;
+ mkparams->out =
+ (uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr;
+
ostep = sl->outs[ct]->mHal.state.elementSizeBytes;
+
if (sl->outExts[ct]) {
- mp->out = mp->ptrOut + sl->outs[ct]->mHal.drvState.lod[0].stride * p->y;
- } else {
- if (sl->outs[ct]->mHal.drvState.lod[0].dimY > p->lid) {
- mp->out = mp->ptrOut + sl->outs[ct]->mHal.drvState.lod[0].stride * p->lid;
- }
+ mkparams->out =
+ (uint8_t *)mkparams->out +
+ sl->outs[ct]->mHal.drvState.lod[0].stride * kparams->y;
+
+ } else if (sl->outs[ct]->mHal.drvState.lod[0].dimY > kparams->lid) {
+ mkparams->out =
+ (uint8_t *)mkparams->out +
+ sl->outs[ct]->mHal.drvState.lod[0].stride * kparams->lid;
}
}
//ALOGE("kernel %i %p,%p %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out);
- func(p, xstart, xend, istep, ostep);
+ func(kparams, xstart, xend, istep, ostep);
}
//ALOGE("script group root");
- //ConvolveParams *cp = (ConvolveParams *)p->usr;
-
- mp->usr = oldUsr;
+ mkparams->usr = sl;
}
@@ -245,5 +250,3 @@
}
}
}
-
-
diff --git a/cpu_ref/rsCpuScriptGroup.h b/cpu_ref/rsCpuScriptGroup.h
index 78e179d..71f2dd8 100644
--- a/cpu_ref/rsCpuScriptGroup.h
+++ b/cpu_ref/rsCpuScriptGroup.h
@@ -33,7 +33,7 @@
CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroup *sg);
bool init();
- static void scriptGroupRoot(const RsForEachStubParamStruct *p,
+ static void scriptGroupRoot(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);