Pass RsExpandKernelDriverInfo not RsExpandKernelParams.
Which is to say: retire RsExpandKernelParams and pass RsExpandKernelDriverInfo
directly to kernel wrapper functions instead.
Requires related change in frameworks/compile/libbcc.
Change-Id: I453f45ec18f389e88e27fcfa57ddf245d077cb98
diff --git a/cpu_ref/rsCpuCore.cpp b/cpu_ref/rsCpuCore.cpp
index 2492c22..f164517 100644
--- a/cpu_ref/rsCpuCore.cpp
+++ b/cpu_ref/rsCpuCore.cpp
@@ -49,7 +49,7 @@
using namespace android::renderscript;
typedef void (*outer_foreach_t)(
- const android::renderscript::RsExpandKernelParams *,
+ const RsExpandKernelDriverInfo *,
uint32_t x1, uint32_t x2, uint32_t outstep);
@@ -348,28 +348,6 @@
RsExpandKernelDriverInfo,
outer_foreach_t);
-static void kparamSetup(RsExpandKernelParams *kparams, const RsExpandKernelDriverInfo *fep) {
- //ALOGE("kp usr %p", fep->usr);
- //ALOGE("kp slot %i", fep->slot);
- //ALOGE("kp dim %i %i %i", fep->dim.x, fep->dim.y, fep->dim.z);
- //ALOGE("kp lid %i", fep->lid);
- //ALOGE("kp in[0] stide %i ptr %p", fep->inStride[0], fep->inPtr[0]);
- //ALOGE("kp out[0] ptr %p", fep->outPtr[0]);
- //ALOGE("kp loc %i %i %i", fep->current.x, fep->current.y, fep->current.z);
-
- kparams->usr = fep->usr;
- kparams->slot = fep->slot;
- kparams->dimX = fep->dim.x;
- kparams->dimY = fep->dim.y;
- kparams->dimZ = fep->dim.z;
- kparams->lid = fep->lid;
- kparams->inEStrides = (uint32_t *)&fep->inStride[0];
- kparams->ins = (const void **)&fep->inPtr[0];
- kparams->out = fep->outPtr[0];
- kparams->y = fep->current.y;
- kparams->z = fep->current.z;
-}
-
static inline void FepPtrSetup(const MTLaunchStruct *mtls, RsExpandKernelDriverInfo *fep,
uint32_t x, uint32_t y,
uint32_t z = 0, uint32_t lod = 0,
@@ -436,9 +414,7 @@
mtls->fep.current.array[0], mtls->fep.current.array[1],
mtls->fep.current.array[2], mtls->fep.current.array[3]);
- RsExpandKernelParams kparams;
- kparamSetup(&kparams, &mtls->fep);
- fn(&kparams, mtls->start.x, mtls->end.x, mtls->fep.outStride[0]);
+ fn(&mtls->fep, mtls->start.x, mtls->end.x, mtls->fep.outStride[0]);
}
}
@@ -464,10 +440,7 @@
for (fep.current.y = yStart; fep.current.y < yEnd; fep.current.y++) {
FepPtrSetup(mtls, &fep, mtls->start.x, fep.current.y);
- RsExpandKernelParams kparams;
- kparamSetup(&kparams, &fep);
-
- fn(&kparams, mtls->start.x, mtls->end.x, fep.outStride[0]);
+ fn(&fep, mtls->start.x, mtls->end.x, fep.outStride[0]);
}
}
}
@@ -491,10 +464,7 @@
FepPtrSetup(mtls, &fep, xStart, 0);
- RsExpandKernelParams kparams;
- kparamSetup(&kparams, &fep);
-
- fn(&kparams, xStart, xEnd, fep.outStride[0]);
+ fn(&fep, xStart, xEnd, fep.outStride[0]);
}
}
@@ -579,9 +549,7 @@
mtls->fep.current.array[0], mtls->fep.current.array[1],
mtls->fep.current.array[2], mtls->fep.current.array[3]);
- RsExpandKernelParams kparams;
- kparamSetup(&kparams, &mtls->fep);
- fn(&kparams, mtls->start.x, mtls->end.x, mtls->fep.outStride[0]);
+ fn(&mtls->fep, mtls->start.x, mtls->end.x, mtls->fep.outStride[0]);
}
}
}
diff --git a/cpu_ref/rsCpuCore.h b/cpu_ref/rsCpuCore.h
index 5f1913f..afe8ef5 100644
--- a/cpu_ref/rsCpuCore.h
+++ b/cpu_ref/rsCpuCore.h
@@ -25,9 +25,6 @@
#include "rsScriptC.h"
#include "rsCpuCoreRuntime.h"
-
-#define RS_KERNEL_INPUT_LIMIT 8
-
namespace bcc {
class BCCContext;
class RSCompilerDriver;
@@ -37,51 +34,6 @@
namespace android {
namespace renderscript {
-struct StridePair {
- uint32_t eStride;
- uint32_t yStride;
-};
-
-struct RsLaunchDimensions {
- uint32_t x;
- uint32_t y;
- uint32_t z;
- uint32_t lod;
- uint32_t face;
- uint32_t array[4 /*make a define*/];
-};
-
-struct RsExpandKernelDriverInfo {
- // Warning: This structure is shared with the compiler
- // Any change to the fields here requires a matching compiler change
-
- const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT];
- uint32_t inStride[RS_KERNEL_INPUT_LIMIT];
- uint32_t inLen;
-
- uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT];
- uint32_t outStride[RS_KERNEL_INPUT_LIMIT];
- uint32_t outLen;
-
- // Dimension of the launch
- RsLaunchDimensions dim;
-
- // The walking itterator of the launch
- RsLaunchDimensions current;
-
- const void *usr;
- uint32_t usrLen;
-
-
-
- // Items below this line are not used by the compiler and can be change in the driver
- uint32_t lid;
- uint32_t slot;
-
-};
-
-typedef ::RsExpandKernelParams RsExpandKernelParams;
-
extern bool gArchUseSIMD;
typedef void (* InvokeFunc_t)(void);
diff --git a/cpu_ref/rsCpuCoreRuntime.h b/cpu_ref/rsCpuCoreRuntime.h
index 19add1b..c7841ec 100644
--- a/cpu_ref/rsCpuCoreRuntime.h
+++ b/cpu_ref/rsCpuCoreRuntime.h
@@ -20,28 +20,45 @@
#ifndef RSD_CPU_CORE_RUNTIME_H
#define RSD_CPU_CORE_RUNTIME_H
-struct RsExpandKernelParams {
+// Warning: This value is shared with the compiler
+// Any change to this value requires a matching compiler change
+#define RS_KERNEL_INPUT_LIMIT 8
- // Used by kernels
- const void **ins;
- uint32_t *inEStrides;
- void *out;
+struct RsLaunchDimensions {
+ // Warning: This structure is shared with the compiler
+ // Any change to the fields here requires a matching compiler change
+
+ uint32_t x;
uint32_t y;
uint32_t z;
- uint32_t lid;
+ uint32_t lod;
+ uint32_t face;
+ uint32_t array[4 /*make a define*/];
+};
- // Used by ScriptGroup and user kernels.
+struct RsExpandKernelDriverInfo {
+ // Warning: This structure is shared with the compiler
+ // Any change to the fields here requires a matching compiler change
+
+ const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT];
+ uint32_t inStride[RS_KERNEL_INPUT_LIMIT];
+ uint32_t inLen;
+
+ uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT];
+ uint32_t outStride[RS_KERNEL_INPUT_LIMIT];
+ uint32_t outLen;
+
+ // Dimension of the launch
+ struct RsLaunchDimensions dim;
+
+ // The walking iterator of the launch
+ struct RsLaunchDimensions current;
+
const void *usr;
+ uint32_t usrLen;
- // Used by intrinsics
- uint32_t dimX;
- uint32_t dimY;
- uint32_t dimZ;
-
- /*
- * FIXME: This is only used by the blend intrinsic. If possible, we should
- * modify blur to not need it.
- */
+ // Items below this line are not used by the compiler and can be change in the driver
+ uint32_t lid;
uint32_t slot;
};
diff --git a/cpu_ref/rsCpuIntrinsic3DLUT.cpp b/cpu_ref/rsCpuIntrinsic3DLUT.cpp
index 86d0478..734e5e5 100644
--- a/cpu_ref/rsCpuIntrinsic3DLUT.cpp
+++ b/cpu_ref/rsCpuIntrinsic3DLUT.cpp
@@ -38,7 +38,7 @@
protected:
ObjectBaseRef<Allocation> mLUT;
- static void kernel(const RsExpandKernelParams *p,
+ static void kernel(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
};
@@ -58,13 +58,13 @@
int dimx, int dimy, int dimz);
-void RsdCpuScriptIntrinsic3DLUT::kernel(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsic3DLUT::kernel(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsic3DLUT *cp = (RsdCpuScriptIntrinsic3DLUT *)p->usr;
+ RsdCpuScriptIntrinsic3DLUT *cp = (RsdCpuScriptIntrinsic3DLUT *)info->usr;
- uchar4 *out = (uchar4 *)p->out;
- uchar4 *in = (uchar4 *)p->ins[0];
+ uchar4 *out = (uchar4 *)info->outPtr[0];
+ uchar4 *in = (uchar4 *)info->inPtr[0];
uint32_t x1 = xstart;
uint32_t x2 = xend;
diff --git a/cpu_ref/rsCpuIntrinsicBlend.cpp b/cpu_ref/rsCpuIntrinsicBlend.cpp
index 27a02b7..16348c6 100644
--- a/cpu_ref/rsCpuIntrinsicBlend.cpp
+++ b/cpu_ref/rsCpuIntrinsicBlend.cpp
@@ -33,7 +33,7 @@
RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
protected:
- static void kernel(const RsExpandKernelParams *p, uint32_t xstart,
+ static void kernel(const RsExpandKernelDriverInfo *info, uint32_t xstart,
uint32_t xend, uint32_t outstep);
};
@@ -109,24 +109,24 @@
extern void rsdIntrinsicBlendSub_K(void *dst, const void *src, uint32_t count8);
#endif
-void RsdCpuScriptIntrinsicBlend::kernel(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicBlend::kernel(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicBlend *cp = (RsdCpuScriptIntrinsicBlend *)p->usr;
+ RsdCpuScriptIntrinsicBlend *cp = (RsdCpuScriptIntrinsicBlend *)info->usr;
// instep/outstep can be ignored--sizeof(uchar4) known at compile time
- uchar4 *out = (uchar4 *)p->out;
- uchar4 *in = (uchar4 *)p->ins[0];
+ uchar4 *out = (uchar4 *)info->outPtr[0];
+ uchar4 *in = (uchar4 *)info->inPtr[0];
uint32_t x1 = xstart;
uint32_t x2 = xend;
#if defined(ARCH_ARM_USE_INTRINSICS) && !defined(ARCH_ARM64_USE_INTRINSICS)
if (gArchUseSIMD) {
- if (rsdIntrinsicBlend_K(out, in, p->slot, x1, x2) >= 0)
+ if (rsdIntrinsicBlend_K(out, in, info->slot, x1, x2) >= 0)
return;
}
#endif
- switch (p->slot) {
+ switch (info->slot) {
case BLEND_CLEAR:
for (;x1 < x2; x1++, out++) {
*out = 0;
@@ -483,7 +483,7 @@
break;
default:
- ALOGE("Called unimplemented value %d", p->slot);
+ ALOGE("Called unimplemented value %d", info->slot);
rsAssert(false);
}
diff --git a/cpu_ref/rsCpuIntrinsicBlur.cpp b/cpu_ref/rsCpuIntrinsicBlur.cpp
index ffdb74b..a3ed1d1 100644
--- a/cpu_ref/rsCpuIntrinsicBlur.cpp
+++ b/cpu_ref/rsCpuIntrinsicBlur.cpp
@@ -44,10 +44,10 @@
int mIradius;
ObjectBaseRef<Allocation> mAlloc;
- static void kernelU4(const RsExpandKernelParams *p,
+ static void kernelU4(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelU1(const RsExpandKernelParams *p,
+ static void kernelU1(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
void ComputeGaussianWeights();
@@ -113,7 +113,7 @@
-static void OneVU4(const RsExpandKernelParams *p, float4 *out, int32_t x, int32_t y,
+static void OneVU4(const RsExpandKernelDriverInfo *info, float4 *out, int32_t x, int32_t y,
const uchar *ptrIn, int iStride, const float* gPtr, int iradius) {
const uchar *pi = ptrIn + x*4;
@@ -121,7 +121,7 @@
float4 blurredPixel = 0;
for (int r = -iradius; r <= iradius; r ++) {
int validY = rsMax((y + r), 0);
- validY = rsMin(validY, (int)(p->dimY - 1));
+ validY = rsMin(validY, (int)(info->dim.y- 1));
const uchar4 *pvy = (const uchar4 *)&pi[validY * iStride];
float4 pf = convert_float4(pvy[0]);
blurredPixel += pf * gPtr[0];
@@ -131,7 +131,7 @@
out[0] = blurredPixel;
}
-static void OneVU1(const RsExpandKernelParams *p, float *out, int32_t x, int32_t y,
+static void OneVU1(const RsExpandKernelDriverInfo *info, float *out, int32_t x, int32_t y,
const uchar *ptrIn, int iStride, const float* gPtr, int iradius) {
const uchar *pi = ptrIn + x;
@@ -139,7 +139,7 @@
float blurredPixel = 0;
for (int r = -iradius; r <= iradius; r ++) {
int validY = rsMax((y + r), 0);
- validY = rsMin(validY, (int)(p->dimY - 1));
+ validY = rsMin(validY, (int)(info->dim.y - 1));
float pf = (float)pi[validY * iStride];
blurredPixel += pf * gPtr[0];
gPtr++;
@@ -247,13 +247,13 @@
}
}
-static void OneHU4(const RsExpandKernelParams *p, uchar4 *out, int32_t x,
+static void OneHU4(const RsExpandKernelDriverInfo *info, uchar4 *out, int32_t x,
const float4 *ptrIn, const float* gPtr, int iradius) {
float4 blurredPixel = 0;
for (int r = -iradius; r <= iradius; r ++) {
int validX = rsMax((x + r), 0);
- validX = rsMin(validX, (int)(p->dimX - 1));
+ validX = rsMin(validX, (int)(info->dim.x - 1));
float4 pf = ptrIn[validX];
blurredPixel += pf * gPtr[0];
gPtr++;
@@ -262,13 +262,13 @@
out->xyzw = convert_uchar4(blurredPixel);
}
-static void OneHU1(const RsExpandKernelParams *p, uchar *out, int32_t x,
+static void OneHU1(const RsExpandKernelDriverInfo *info, uchar *out, int32_t x,
const float *ptrIn, const float* gPtr, int iradius) {
float blurredPixel = 0;
for (int r = -iradius; r <= iradius; r ++) {
int validX = rsMax((x + r), 0);
- validX = rsMin(validX, (int)(p->dimX - 1));
+ validX = rsMin(validX, (int)(info->dim.x - 1));
float pf = ptrIn[validX];
blurredPixel += pf * gPtr[0];
gPtr++;
@@ -278,13 +278,13 @@
}
-void RsdCpuScriptIntrinsicBlur::kernelU4(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicBlur::kernelU4(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
float4 stackbuf[2048];
float4 *buf = &stackbuf[0];
- RsdCpuScriptIntrinsicBlur *cp = (RsdCpuScriptIntrinsicBlur *)p->usr;
+ RsdCpuScriptIntrinsicBlur *cp = (RsdCpuScriptIntrinsicBlur *)info->usr;
if (!cp->mAlloc.get()) {
ALOGE("Blur executed without input, skipping");
return;
@@ -292,36 +292,37 @@
const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
- uchar4 *out = (uchar4 *)p->out;
+ uchar4 *out = (uchar4 *)info->outPtr[0];
uint32_t x1 = xstart;
uint32_t x2 = xend;
#if defined(ARCH_ARM_USE_INTRINSICS)
if (gArchUseSIMD) {
- rsdIntrinsicBlurU4_K(out, (uchar4 const *)(pin + stride * p->y), p->dimX, p->dimY,
- stride, x1, p->y, x2 - x1, cp->mIradius, cp->mIp + cp->mIradius);
+ rsdIntrinsicBlurU4_K(out, (uchar4 const *)(pin + stride * info->current.y),
+ info->dim.x, info->dim.y,
+ stride, x1, info->current.y, x2 - x1, cp->mIradius, cp->mIp + cp->mIradius);
return;
}
#endif
- if (p->dimX > 2048) {
- if ((p->dimX > cp->mScratchSize[p->lid]) || !cp->mScratch[p->lid]) {
+ if (info->dim.x > 2048) {
+ if ((info->dim.x > cp->mScratchSize[info->lid]) || !cp->mScratch[info->lid]) {
// Pad the side of the allocation by one unit to allow alignment later
- cp->mScratch[p->lid] = realloc(cp->mScratch[p->lid], (p->dimX + 1) * 16);
- cp->mScratchSize[p->lid] = p->dimX;
+ cp->mScratch[info->lid] = realloc(cp->mScratch[info->lid], (info->dim.x + 1) * 16);
+ cp->mScratchSize[info->lid] = info->dim.x;
}
// realloc only aligns to 8 bytes so we manually align to 16.
- buf = (float4 *) ((((intptr_t)cp->mScratch[p->lid]) + 15) & ~0xf);
+ buf = (float4 *) ((((intptr_t)cp->mScratch[info->lid]) + 15) & ~0xf);
}
float4 *fout = (float4 *)buf;
- int y = p->y;
- if ((y > cp->mIradius) && (y < ((int)p->dimY - cp->mIradius))) {
+ int y = info->current.y;
+ if ((y > cp->mIradius) && (y < ((int)info->dim.y - cp->mIradius))) {
const uchar *pi = pin + (y - cp->mIradius) * stride;
- OneVFU4(fout, pi, stride, cp->mFp, cp->mIradius * 2 + 1, 0, p->dimX);
+ OneVFU4(fout, pi, stride, cp->mFp, cp->mIradius * 2 + 1, 0, info->dim.x);
} else {
x1 = 0;
- while(p->dimX > x1) {
- OneVU4(p, fout, x1, y, pin, stride, cp->mFp, cp->mIradius);
+ while(info->dim.x > x1) {
+ OneVU4(info, fout, x1, y, pin, stride, cp->mFp, cp->mIradius);
fout++;
x1++;
}
@@ -329,7 +330,7 @@
x1 = xstart;
while ((x1 < (uint32_t)cp->mIradius) && (x1 < x2)) {
- OneHU4(p, out, x1, buf, cp->mFp, cp->mIradius);
+ OneHU4(info, out, x1, buf, cp->mFp, cp->mIradius);
out++;
x1++;
}
@@ -344,17 +345,17 @@
}
#endif
while(x2 > x1) {
- OneHU4(p, out, x1, buf, cp->mFp, cp->mIradius);
+ OneHU4(info, out, x1, buf, cp->mFp, cp->mIradius);
out++;
x1++;
}
}
-void RsdCpuScriptIntrinsicBlur::kernelU1(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicBlur::kernelU1(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
float buf[4 * 2048];
- RsdCpuScriptIntrinsicBlur *cp = (RsdCpuScriptIntrinsicBlur *)p->usr;
+ RsdCpuScriptIntrinsicBlur *cp = (RsdCpuScriptIntrinsicBlur *)info->usr;
if (!cp->mAlloc.get()) {
ALOGE("Blur executed without input, skipping");
return;
@@ -362,27 +363,27 @@
const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
- uchar *out = (uchar *)p->out;
+ uchar *out = (uchar *)info->outPtr[0];
uint32_t x1 = xstart;
uint32_t x2 = xend;
#if defined(ARCH_ARM_USE_INTRINSICS)
if (gArchUseSIMD) {
- rsdIntrinsicBlurU1_K(out, pin + stride * p->y, p->dimX, p->dimY,
- stride, x1, p->y, x2 - x1, cp->mIradius, cp->mIp + cp->mIradius);
+ rsdIntrinsicBlurU1_K(out, pin + stride * info->current.y, info->dim.x, info->dim.y,
+ stride, x1, info->current.y, x2 - x1, cp->mIradius, cp->mIp + cp->mIradius);
return;
}
#endif
float *fout = (float *)buf;
- int y = p->y;
- if ((y > cp->mIradius) && (y < ((int)p->dimY - cp->mIradius -1))) {
+ int y = info->current.y;
+ if ((y > cp->mIradius) && (y < ((int)info->dim.y - cp->mIradius -1))) {
const uchar *pi = pin + (y - cp->mIradius) * stride;
- OneVFU1(fout, pi, stride, cp->mFp, cp->mIradius * 2 + 1, 0, p->dimX);
+ OneVFU1(fout, pi, stride, cp->mFp, cp->mIradius * 2 + 1, 0, info->dim.x);
} else {
x1 = 0;
- while(p->dimX > x1) {
- OneVU1(p, fout, x1, y, pin, stride, cp->mFp, cp->mIradius);
+ while(info->dim.x > x1) {
+ OneVU1(info, fout, x1, y, pin, stride, cp->mFp, cp->mIradius);
fout++;
x1++;
}
@@ -391,7 +392,7 @@
x1 = xstart;
while ((x1 < x2) &&
((x1 < (uint32_t)cp->mIradius) || (((uintptr_t)out) & 0x3))) {
- OneHU1(p, out, x1, buf, cp->mFp, cp->mIradius);
+ OneHU1(info, out, x1, buf, cp->mFp, cp->mIradius);
out++;
x1++;
}
@@ -410,7 +411,7 @@
}
#endif
while(x2 > x1) {
- OneHU1(p, out, x1, buf, cp->mFp, cp->mIradius);
+ OneHU1(info, out, x1, buf, cp->mFp, cp->mIradius);
out++;
x1++;
}
diff --git a/cpu_ref/rsCpuIntrinsicColorMatrix.cpp b/cpu_ref/rsCpuIntrinsicColorMatrix.cpp
index e629dea..a7d576b 100644
--- a/cpu_ref/rsCpuIntrinsicColorMatrix.cpp
+++ b/cpu_ref/rsCpuIntrinsicColorMatrix.cpp
@@ -185,7 +185,7 @@
FunctionTab_t mFnTab;
#endif
- static void kernel(const RsExpandKernelParams *p,
+ static void kernel(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
void updateCoeffCache(float fpMul, float addMul);
@@ -776,7 +776,7 @@
}
-static void One(const RsExpandKernelParams *p, void *out,
+static void One(const RsExpandKernelDriverInfo *info, void *out,
const void *py, const float* coeff, const float *add,
uint32_t vsin, uint32_t vsout, bool fin, bool fout) {
@@ -877,15 +877,15 @@
//ALOGE("out %p %f %f %f %f", out, ((float *)out)[0], ((float *)out)[1], ((float *)out)[2], ((float *)out)[3]);
}
-void RsdCpuScriptIntrinsicColorMatrix::kernel(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicColorMatrix::kernel(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicColorMatrix *cp = (RsdCpuScriptIntrinsicColorMatrix *)p->usr;
+ RsdCpuScriptIntrinsicColorMatrix *cp = (RsdCpuScriptIntrinsicColorMatrix *)info->usr;
- uint32_t instep = p->inEStrides[0];
+ uint32_t instep = info->inStride[0];
- uchar *out = (uchar *)p->out;
- uchar *in = (uchar *)p->ins[0];
+ uchar *out = (uchar *)info->outPtr[0];
+ uchar *in = (uchar *)info->inPtr[0];
uint32_t x1 = xstart;
uint32_t x2 = xend;
@@ -894,7 +894,7 @@
bool floatIn = !!cp->mLastKey.u.inType;
bool floatOut = !!cp->mLastKey.u.outType;
- //if (!p->y) ALOGE("steps %i %i %i %i", instep, outstep, vsin, vsout);
+ //if (!info->current.y) ALOGE("steps %i %i %i %i", instep, outstep, vsin, vsout);
if(x2 > x1) {
int32_t len = x2 - x1;
@@ -929,7 +929,7 @@
}
while(x1 != x2) {
- One(p, out, in, cp->tmpFp, cp->tmpFpa, vsin, vsout, floatIn, floatOut);
+ One(info, out, in, cp->tmpFp, cp->tmpFpa, vsin, vsout, floatIn, floatOut);
out += outstep;
in += instep;
x1++;
diff --git a/cpu_ref/rsCpuIntrinsicConvolve3x3.cpp b/cpu_ref/rsCpuIntrinsicConvolve3x3.cpp
index e3fa245..ce7be79 100644
--- a/cpu_ref/rsCpuIntrinsicConvolve3x3.cpp
+++ b/cpu_ref/rsCpuIntrinsicConvolve3x3.cpp
@@ -42,22 +42,22 @@
ObjectBaseRef<const Allocation> mAlloc;
ObjectBaseRef<const Element> mElement;
- static void kernelU1(const RsExpandKernelParams *p,
+ static void kernelU1(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelU2(const RsExpandKernelParams *p,
+ static void kernelU2(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelU4(const RsExpandKernelParams *p,
+ static void kernelU4(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelF1(const RsExpandKernelParams *p,
+ static void kernelF1(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelF2(const RsExpandKernelParams *p,
+ static void kernelF2(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelF4(const RsExpandKernelParams *p,
+ static void kernelF4(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
};
@@ -88,12 +88,12 @@
const void *y2, const short *coef, uint32_t count);
-static void ConvolveOneU4(const RsExpandKernelParams *p, uint32_t x, uchar4 *out,
+static void ConvolveOneU4(const RsExpandKernelDriverInfo *info, uint32_t x, uchar4 *out,
const uchar4 *py0, const uchar4 *py1, const uchar4 *py2,
const float* coeff) {
uint32_t x1 = rsMax((int32_t)x-1, 0);
- uint32_t x2 = rsMin((int32_t)x+1, (int32_t)p->dimX-1);
+ uint32_t x2 = rsMin((int32_t)x+1, (int32_t)info->dim.x-1);
float4 px = convert_float4(py0[x1]) * coeff[0] +
convert_float4(py0[x]) * coeff[1] +
@@ -110,12 +110,12 @@
*out = o;
}
-static void ConvolveOneU2(const RsExpandKernelParams *p, uint32_t x, uchar2 *out,
+static void ConvolveOneU2(const RsExpandKernelDriverInfo *info, uint32_t x, uchar2 *out,
const uchar2 *py0, const uchar2 *py1, const uchar2 *py2,
const float* coeff) {
uint32_t x1 = rsMax((int32_t)x-1, 0);
- uint32_t x2 = rsMin((int32_t)x+1, (int32_t)p->dimX-1);
+ uint32_t x2 = rsMin((int32_t)x+1, (int32_t)info->dim.x-1);
float2 px = convert_float2(py0[x1]) * coeff[0] +
convert_float2(py0[x]) * coeff[1] +
@@ -131,12 +131,12 @@
*out = convert_uchar2(px);
}
-static void ConvolveOneU1(const RsExpandKernelParams *p, uint32_t x, uchar *out,
+static void ConvolveOneU1(const RsExpandKernelDriverInfo *info, uint32_t x, uchar *out,
const uchar *py0, const uchar *py1, const uchar *py2,
const float* coeff) {
uint32_t x1 = rsMax((int32_t)x-1, 0);
- uint32_t x2 = rsMin((int32_t)x+1, (int32_t)p->dimX-1);
+ uint32_t x2 = rsMin((int32_t)x+1, (int32_t)info->dim.x-1);
float px = ((float)py0[x1]) * coeff[0] +
((float)py0[x]) * coeff[1] +
@@ -150,43 +150,43 @@
*out = clamp(px + 0.5f, 0.f, 255.f);
}
-static void ConvolveOneF4(const RsExpandKernelParams *p, uint32_t x, float4 *out,
+static void ConvolveOneF4(const RsExpandKernelDriverInfo *info, uint32_t x, float4 *out,
const float4 *py0, const float4 *py1, const float4 *py2,
const float* coeff) {
uint32_t x1 = rsMax((int32_t)x-1, 0);
- uint32_t x2 = rsMin((int32_t)x+1, (int32_t)p->dimX-1);
+ uint32_t x2 = rsMin((int32_t)x+1, (int32_t)info->dim.x-1);
*out = (py0[x1] * coeff[0]) + (py0[x] * coeff[1]) + (py0[x2] * coeff[2]) +
(py1[x1] * coeff[3]) + (py1[x] * coeff[4]) + (py1[x2] * coeff[5]) +
(py2[x1] * coeff[6]) + (py2[x] * coeff[7]) + (py2[x2] * coeff[8]);
}
-static void ConvolveOneF2(const RsExpandKernelParams *p, uint32_t x, float2 *out,
+static void ConvolveOneF2(const RsExpandKernelDriverInfo *info, uint32_t x, float2 *out,
const float2 *py0, const float2 *py1, const float2 *py2,
const float* coeff) {
uint32_t x1 = rsMax((int32_t)x-1, 0);
- uint32_t x2 = rsMin((int32_t)x+1, (int32_t)p->dimX-1);
+ uint32_t x2 = rsMin((int32_t)x+1, (int32_t)info->dim.x-1);
*out = (py0[x1] * coeff[0]) + (py0[x] * coeff[1]) + (py0[x2] * coeff[2]) +
(py1[x1] * coeff[3]) + (py1[x] * coeff[4]) + (py1[x2] * coeff[5]) +
(py2[x1] * coeff[6]) + (py2[x] * coeff[7]) + (py2[x2] * coeff[8]);
}
-static void ConvolveOneF1(const RsExpandKernelParams *p, uint32_t x, float *out,
+static void ConvolveOneF1(const RsExpandKernelDriverInfo *info, uint32_t x, float *out,
const float *py0, const float *py1, const float *py2,
const float* coeff) {
uint32_t x1 = rsMax((int32_t)x-1, 0);
- uint32_t x2 = rsMin((int32_t)x+1, (int32_t)p->dimX-1);
+ uint32_t x2 = rsMin((int32_t)x+1, (int32_t)info->dim.x-1);
*out = (py0[x1] * coeff[0]) + (py0[x] * coeff[1]) + (py0[x2] * coeff[2]) +
(py1[x1] * coeff[3]) + (py1[x] * coeff[4]) + (py1[x2] * coeff[5]) +
(py2[x1] * coeff[6]) + (py2[x] * coeff[7]) + (py2[x2] * coeff[8]);
}
-void RsdCpuScriptIntrinsicConvolve3x3::kernelU4(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicConvolve3x3::kernelU4(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
+ RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)info->usr;
if (!cp->mAlloc.get()) {
ALOGE("Convolve3x3 executed without input, skipping");
@@ -195,17 +195,17 @@
const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
- uint32_t y1 = rsMin((int32_t)p->y + 1, (int32_t)(p->dimY-1));
- uint32_t y2 = rsMax((int32_t)p->y - 1, 0);
+ uint32_t y1 = rsMin((int32_t)info->current.y + 1, (int32_t)(info->dim.y-1));
+ uint32_t y2 = rsMax((int32_t)info->current.y - 1, 0);
const uchar4 *py0 = (const uchar4 *)(pin + stride * y2);
- const uchar4 *py1 = (const uchar4 *)(pin + stride * p->y);
+ const uchar4 *py1 = (const uchar4 *)(pin + stride * info->current.y);
const uchar4 *py2 = (const uchar4 *)(pin + stride * y1);
- uchar4 *out = (uchar4 *)p->out;
+ uchar4 *out = (uchar4 *)info->outPtr[0];
uint32_t x1 = xstart;
uint32_t x2 = xend;
if(x1 == 0) {
- ConvolveOneU4(p, 0, out, py0, py1, py2, cp->mFp);
+ ConvolveOneU4(info, 0, out, py0, py1, py2, cp->mFp);
x1 ++;
out++;
}
@@ -223,17 +223,17 @@
#endif
while(x1 != x2) {
- ConvolveOneU4(p, x1, out, py0, py1, py2, cp->mFp);
+ ConvolveOneU4(info, x1, out, py0, py1, py2, cp->mFp);
out++;
x1++;
}
}
}
-void RsdCpuScriptIntrinsicConvolve3x3::kernelU2(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicConvolve3x3::kernelU2(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
+ RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)info->usr;
if (!cp->mAlloc.get()) {
ALOGE("Convolve3x3 executed without input, skipping");
@@ -242,17 +242,17 @@
const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
- uint32_t y1 = rsMin((int32_t)p->y + 1, (int32_t)(p->dimY-1));
- uint32_t y2 = rsMax((int32_t)p->y - 1, 0);
+ uint32_t y1 = rsMin((int32_t)info->current.y + 1, (int32_t)(info->dim.y-1));
+ uint32_t y2 = rsMax((int32_t)info->current.y - 1, 0);
const uchar2 *py0 = (const uchar2 *)(pin + stride * y2);
- const uchar2 *py1 = (const uchar2 *)(pin + stride * p->y);
+ const uchar2 *py1 = (const uchar2 *)(pin + stride * info->current.y);
const uchar2 *py2 = (const uchar2 *)(pin + stride * y1);
- uchar2 *out = (uchar2 *)p->out;
+ uchar2 *out = (uchar2 *)info->outPtr[0];
uint32_t x1 = xstart;
uint32_t x2 = xend;
if(x1 == 0) {
- ConvolveOneU2(p, 0, out, py0, py1, py2, cp->mFp);
+ ConvolveOneU2(info, 0, out, py0, py1, py2, cp->mFp);
x1 ++;
out++;
}
@@ -268,17 +268,17 @@
#endif
while(x1 != x2) {
- ConvolveOneU2(p, x1, out, py0, py1, py2, cp->mFp);
+ ConvolveOneU2(info, x1, out, py0, py1, py2, cp->mFp);
out++;
x1++;
}
}
}
-void RsdCpuScriptIntrinsicConvolve3x3::kernelU1(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicConvolve3x3::kernelU1(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
+ RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)info->usr;
if (!cp->mAlloc.get()) {
ALOGE("Convolve3x3 executed without input, skipping");
@@ -287,17 +287,17 @@
const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
- uint32_t y1 = rsMin((int32_t)p->y + 1, (int32_t)(p->dimY-1));
- uint32_t y2 = rsMax((int32_t)p->y - 1, 0);
+ uint32_t y1 = rsMin((int32_t)info->current.y + 1, (int32_t)(info->dim.y-1));
+ uint32_t y2 = rsMax((int32_t)info->current.y - 1, 0);
const uchar *py0 = (const uchar *)(pin + stride * y2);
- const uchar *py1 = (const uchar *)(pin + stride * p->y);
+ const uchar *py1 = (const uchar *)(pin + stride * info->current.y);
const uchar *py2 = (const uchar *)(pin + stride * y1);
- uchar *out = (uchar *)p->out;
+ uchar *out = (uchar *)info->outPtr[0];
uint32_t x1 = xstart;
uint32_t x2 = xend;
if(x1 == 0) {
- ConvolveOneU1(p, 0, out, py0, py1, py2, cp->mFp);
+ ConvolveOneU1(info, 0, out, py0, py1, py2, cp->mFp);
x1 ++;
out++;
}
@@ -313,17 +313,17 @@
#endif
while(x1 != x2) {
- ConvolveOneU1(p, x1, out, py0, py1, py2, cp->mFp);
+ ConvolveOneU1(info, x1, out, py0, py1, py2, cp->mFp);
out++;
x1++;
}
}
}
-void RsdCpuScriptIntrinsicConvolve3x3::kernelF4(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicConvolve3x3::kernelF4(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
+ RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)info->usr;
if (!cp->mAlloc.get()) {
ALOGE("Convolve3x3 executed without input, skipping");
@@ -332,17 +332,17 @@
const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
- uint32_t y1 = rsMin((int32_t)p->y + 1, (int32_t)(p->dimY-1));
- uint32_t y2 = rsMax((int32_t)p->y - 1, 0);
+ uint32_t y1 = rsMin((int32_t)info->current.y + 1, (int32_t)(info->dim.y-1));
+ uint32_t y2 = rsMax((int32_t)info->current.y - 1, 0);
const float4 *py0 = (const float4 *)(pin + stride * y2);
- const float4 *py1 = (const float4 *)(pin + stride * p->y);
+ const float4 *py1 = (const float4 *)(pin + stride * info->current.y);
const float4 *py2 = (const float4 *)(pin + stride * y1);
- float4 *out = (float4 *)p->out;
+ float4 *out = (float4 *)info->outPtr[0];
uint32_t x1 = xstart;
uint32_t x2 = xend;
if(x1 == 0) {
- ConvolveOneF4(p, 0, out, py0, py1, py2, cp->mFp);
+ ConvolveOneF4(info, 0, out, py0, py1, py2, cp->mFp);
x1 ++;
out++;
}
@@ -358,17 +358,17 @@
#endif
while(x1 != x2) {
- ConvolveOneF4(p, x1, out, py0, py1, py2, cp->mFp);
+ ConvolveOneF4(info, x1, out, py0, py1, py2, cp->mFp);
out++;
x1++;
}
}
}
-void RsdCpuScriptIntrinsicConvolve3x3::kernelF2(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicConvolve3x3::kernelF2(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
+ RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)info->usr;
if (!cp->mAlloc.get()) {
ALOGE("Convolve3x3 executed without input, skipping");
@@ -377,17 +377,17 @@
const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
- uint32_t y1 = rsMin((int32_t)p->y + 1, (int32_t)(p->dimY-1));
- uint32_t y2 = rsMax((int32_t)p->y - 1, 0);
+ uint32_t y1 = rsMin((int32_t)info->current.y + 1, (int32_t)(info->dim.y-1));
+ uint32_t y2 = rsMax((int32_t)info->current.y - 1, 0);
const float2 *py0 = (const float2 *)(pin + stride * y2);
- const float2 *py1 = (const float2 *)(pin + stride * p->y);
+ const float2 *py1 = (const float2 *)(pin + stride * info->current.y);
const float2 *py2 = (const float2 *)(pin + stride * y1);
- float2 *out = (float2 *)p->out;
+ float2 *out = (float2 *)info->outPtr[0];
uint32_t x1 = xstart;
uint32_t x2 = xend;
if(x1 == 0) {
- ConvolveOneF2(p, 0, out, py0, py1, py2, cp->mFp);
+ ConvolveOneF2(info, 0, out, py0, py1, py2, cp->mFp);
x1 ++;
out++;
}
@@ -403,16 +403,16 @@
#endif
while(x1 != x2) {
- ConvolveOneF2(p, x1, out, py0, py1, py2, cp->mFp);
+ ConvolveOneF2(info, x1, out, py0, py1, py2, cp->mFp);
out++;
x1++;
}
}
}
-void RsdCpuScriptIntrinsicConvolve3x3::kernelF1(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicConvolve3x3::kernelF1(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
+ RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)info->usr;
if (!cp->mAlloc.get()) {
ALOGE("Convolve3x3 executed without input, skipping");
@@ -421,17 +421,17 @@
const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
- uint32_t y1 = rsMin((int32_t)p->y + 1, (int32_t)(p->dimY-1));
- uint32_t y2 = rsMax((int32_t)p->y - 1, 0);
+ uint32_t y1 = rsMin((int32_t)info->current.y + 1, (int32_t)(info->dim.y-1));
+ uint32_t y2 = rsMax((int32_t)info->current.y - 1, 0);
const float *py0 = (const float *)(pin + stride * y2);
- const float *py1 = (const float *)(pin + stride * p->y);
+ const float *py1 = (const float *)(pin + stride * info->current.y);
const float *py2 = (const float *)(pin + stride * y1);
- float *out = (float *)p->out;
+ float *out = (float *)info->outPtr[0];
uint32_t x1 = xstart;
uint32_t x2 = xend;
if(x1 == 0) {
- ConvolveOneF1(p, 0, out, py0, py1, py2, cp->mFp);
+ ConvolveOneF1(info, 0, out, py0, py1, py2, cp->mFp);
x1 ++;
out++;
}
@@ -447,7 +447,7 @@
#endif
while(x1 != x2) {
- ConvolveOneF1(p, x1, out, py0, py1, py2, cp->mFp);
+ ConvolveOneF1(info, x1, out, py0, py1, py2, cp->mFp);
out++;
x1++;
}
diff --git a/cpu_ref/rsCpuIntrinsicConvolve5x5.cpp b/cpu_ref/rsCpuIntrinsicConvolve5x5.cpp
index e591e44..29dd886 100644
--- a/cpu_ref/rsCpuIntrinsicConvolve5x5.cpp
+++ b/cpu_ref/rsCpuIntrinsicConvolve5x5.cpp
@@ -42,22 +42,22 @@
ObjectBaseRef<Allocation> alloc;
- static void kernelU1(const RsExpandKernelParams *p,
+ static void kernelU1(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelU2(const RsExpandKernelParams *p,
+ static void kernelU2(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelU4(const RsExpandKernelParams *p,
+ static void kernelU4(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelF1(const RsExpandKernelParams *p,
+ static void kernelF1(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelF2(const RsExpandKernelParams *p,
+ static void kernelF2(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelF4(const RsExpandKernelParams *p,
+ static void kernelF4(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
@@ -86,15 +86,15 @@
}
-static void OneU4(const RsExpandKernelParams *p, uint32_t x, uchar4 *out,
+static void OneU4(const RsExpandKernelDriverInfo *info, uint32_t x, uchar4 *out,
const uchar4 *py0, const uchar4 *py1, const uchar4 *py2, const uchar4 *py3, const uchar4 *py4,
const float* coeff) {
uint32_t x0 = rsMax((int32_t)x-2, 0);
uint32_t x1 = rsMax((int32_t)x-1, 0);
uint32_t x2 = x;
- uint32_t x3 = rsMin((int32_t)x+1, (int32_t)(p->dimX-1));
- uint32_t x4 = rsMin((int32_t)x+2, (int32_t)(p->dimX-1));
+ uint32_t x3 = rsMin((int32_t)x+1, (int32_t)(info->dim.x-1));
+ uint32_t x4 = rsMin((int32_t)x+2, (int32_t)(info->dim.x-1));
float4 px = convert_float4(py0[x0]) * coeff[0] +
convert_float4(py0[x1]) * coeff[1] +
@@ -129,15 +129,15 @@
*out = convert_uchar4(px);
}
-static void OneU2(const RsExpandKernelParams *p, uint32_t x, uchar2 *out,
+static void OneU2(const RsExpandKernelDriverInfo *info, uint32_t x, uchar2 *out,
const uchar2 *py0, const uchar2 *py1, const uchar2 *py2, const uchar2 *py3, const uchar2 *py4,
const float* coeff) {
uint32_t x0 = rsMax((int32_t)x-2, 0);
uint32_t x1 = rsMax((int32_t)x-1, 0);
uint32_t x2 = x;
- uint32_t x3 = rsMin((int32_t)x+1, (int32_t)(p->dimX-1));
- uint32_t x4 = rsMin((int32_t)x+2, (int32_t)(p->dimX-1));
+ uint32_t x3 = rsMin((int32_t)x+1, (int32_t)(info->dim.x-1));
+ uint32_t x4 = rsMin((int32_t)x+2, (int32_t)(info->dim.x-1));
float2 px = convert_float2(py0[x0]) * coeff[0] +
convert_float2(py0[x1]) * coeff[1] +
@@ -172,15 +172,15 @@
*out = convert_uchar2(px);
}
-static void OneU1(const RsExpandKernelParams *p, uint32_t x, uchar *out,
+static void OneU1(const RsExpandKernelDriverInfo *info, uint32_t x, uchar *out,
const uchar *py0, const uchar *py1, const uchar *py2, const uchar *py3, const uchar *py4,
const float* coeff) {
uint32_t x0 = rsMax((int32_t)x-2, 0);
uint32_t x1 = rsMax((int32_t)x-1, 0);
uint32_t x2 = x;
- uint32_t x3 = rsMin((int32_t)x+1, (int32_t)(p->dimX-1));
- uint32_t x4 = rsMin((int32_t)x+2, (int32_t)(p->dimX-1));
+ uint32_t x3 = rsMin((int32_t)x+1, (int32_t)(info->dim.x-1));
+ uint32_t x4 = rsMin((int32_t)x+2, (int32_t)(info->dim.x-1));
float px = (float)(py0[x0]) * coeff[0] +
(float)(py0[x1]) * coeff[1] +
@@ -215,15 +215,15 @@
*out = px;
}
-static void OneF4(const RsExpandKernelParams *p, uint32_t x, float4 *out,
+static void OneF4(const RsExpandKernelDriverInfo *info, uint32_t x, float4 *out,
const float4 *py0, const float4 *py1, const float4 *py2, const float4 *py3, const float4 *py4,
const float* coeff) {
uint32_t x0 = rsMax((int32_t)x-2, 0);
uint32_t x1 = rsMax((int32_t)x-1, 0);
uint32_t x2 = x;
- uint32_t x3 = rsMin((int32_t)x+1, (int32_t)(p->dimX-1));
- uint32_t x4 = rsMin((int32_t)x+2, (int32_t)(p->dimX-1));
+ uint32_t x3 = rsMin((int32_t)x+1, (int32_t)(info->dim.x-1));
+ uint32_t x4 = rsMin((int32_t)x+2, (int32_t)(info->dim.x-1));
float4 px = py0[x0] * coeff[0] +
py0[x1] * coeff[1] +
@@ -257,15 +257,15 @@
*out = px;
}
-static void OneF2(const RsExpandKernelParams *p, uint32_t x, float2 *out,
+static void OneF2(const RsExpandKernelDriverInfo *info, uint32_t x, float2 *out,
const float2 *py0, const float2 *py1, const float2 *py2, const float2 *py3, const float2 *py4,
const float* coeff) {
uint32_t x0 = rsMax((int32_t)x-2, 0);
uint32_t x1 = rsMax((int32_t)x-1, 0);
uint32_t x2 = x;
- uint32_t x3 = rsMin((int32_t)x+1, (int32_t)(p->dimX-1));
- uint32_t x4 = rsMin((int32_t)x+2, (int32_t)(p->dimX-1));
+ uint32_t x3 = rsMin((int32_t)x+1, (int32_t)(info->dim.x-1));
+ uint32_t x4 = rsMin((int32_t)x+2, (int32_t)(info->dim.x-1));
float2 px = py0[x0] * coeff[0] +
py0[x1] * coeff[1] +
@@ -299,15 +299,15 @@
*out = px;
}
-static void OneF1(const RsExpandKernelParams *p, uint32_t x, float *out,
+static void OneF1(const RsExpandKernelDriverInfo *info, uint32_t x, float *out,
const float *py0, const float *py1, const float *py2, const float *py3, const float *py4,
const float* coeff) {
uint32_t x0 = rsMax((int32_t)x-2, 0);
uint32_t x1 = rsMax((int32_t)x-1, 0);
uint32_t x2 = x;
- uint32_t x3 = rsMin((int32_t)x+1, (int32_t)(p->dimX-1));
- uint32_t x4 = rsMin((int32_t)x+2, (int32_t)(p->dimX-1));
+ uint32_t x3 = rsMin((int32_t)x+1, (int32_t)(info->dim.x-1));
+ uint32_t x4 = rsMin((int32_t)x+2, (int32_t)(info->dim.x-1));
float px = py0[x0] * coeff[0] +
py0[x1] * coeff[1] +
@@ -346,10 +346,10 @@
const void *y2, const void *y3, const void *y4,
const short *coef, uint32_t count);
-void RsdCpuScriptIntrinsicConvolve5x5::kernelU4(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicConvolve5x5::kernelU4(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr;
+ RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)info->usr;
if (!cp->alloc.get()) {
ALOGE("Convolve5x5 executed without input, skipping");
return;
@@ -357,11 +357,11 @@
const uchar *pin = (const uchar *)cp->alloc->mHal.drvState.lod[0].mallocPtr;
const size_t stride = cp->alloc->mHal.drvState.lod[0].stride;
- uint32_t y0 = rsMax((int32_t)p->y-2, 0);
- uint32_t y1 = rsMax((int32_t)p->y-1, 0);
- uint32_t y2 = p->y;
- uint32_t y3 = rsMin((int32_t)p->y+1, (int32_t)(p->dimY-1));
- uint32_t y4 = rsMin((int32_t)p->y+2, (int32_t)(p->dimY-1));
+ uint32_t y0 = rsMax((int32_t)info->current.y-2, 0);
+ uint32_t y1 = rsMax((int32_t)info->current.y-1, 0);
+ uint32_t y2 = info->current.y;
+ uint32_t y3 = rsMin((int32_t)info->current.y+1, (int32_t)(info->dim.y-1));
+ uint32_t y4 = rsMin((int32_t)info->current.y+2, (int32_t)(info->dim.y-1));
const uchar4 *py0 = (const uchar4 *)(pin + stride * y0);
const uchar4 *py1 = (const uchar4 *)(pin + stride * y1);
@@ -369,12 +369,12 @@
const uchar4 *py3 = (const uchar4 *)(pin + stride * y3);
const uchar4 *py4 = (const uchar4 *)(pin + stride * y4);
- uchar4 *out = (uchar4 *)p->out;
+ uchar4 *out = (uchar4 *)info->outPtr[0];
uint32_t x1 = xstart;
uint32_t x2 = xend;
while((x1 < x2) && (x1 < 2)) {
- OneU4(p, x1, out, py0, py1, py2, py3, py4, cp->mFp);
+ OneU4(info, x1, out, py0, py1, py2, py3, py4, cp->mFp);
out++;
x1++;
}
@@ -400,16 +400,16 @@
#endif
while(x1 < x2) {
- OneU4(p, x1, out, py0, py1, py2, py3, py4, cp->mFp);
+ OneU4(info, x1, out, py0, py1, py2, py3, py4, cp->mFp);
out++;
x1++;
}
}
-void RsdCpuScriptIntrinsicConvolve5x5::kernelU2(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicConvolve5x5::kernelU2(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr;
+ RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)info->usr;
if (!cp->alloc.get()) {
ALOGE("Convolve5x5 executed without input, skipping");
return;
@@ -417,11 +417,11 @@
const uchar *pin = (const uchar *)cp->alloc->mHal.drvState.lod[0].mallocPtr;
const size_t stride = cp->alloc->mHal.drvState.lod[0].stride;
- uint32_t y0 = rsMax((int32_t)p->y-2, 0);
- uint32_t y1 = rsMax((int32_t)p->y-1, 0);
- uint32_t y2 = p->y;
- uint32_t y3 = rsMin((int32_t)p->y+1, (int32_t)(p->dimY-1));
- uint32_t y4 = rsMin((int32_t)p->y+2, (int32_t)(p->dimY-1));
+ uint32_t y0 = rsMax((int32_t)info->current.y-2, 0);
+ uint32_t y1 = rsMax((int32_t)info->current.y-1, 0);
+ uint32_t y2 = info->current.y;
+ uint32_t y3 = rsMin((int32_t)info->current.y+1, (int32_t)(info->dim.y-1));
+ uint32_t y4 = rsMin((int32_t)info->current.y+2, (int32_t)(info->dim.y-1));
const uchar2 *py0 = (const uchar2 *)(pin + stride * y0);
const uchar2 *py1 = (const uchar2 *)(pin + stride * y1);
@@ -429,12 +429,12 @@
const uchar2 *py3 = (const uchar2 *)(pin + stride * y3);
const uchar2 *py4 = (const uchar2 *)(pin + stride * y4);
- uchar2 *out = (uchar2 *)p->out;
+ uchar2 *out = (uchar2 *)info->outPtr[0];
uint32_t x1 = xstart;
uint32_t x2 = xend;
while((x1 < x2) && (x1 < 2)) {
- OneU2(p, x1, out, py0, py1, py2, py3, py4, cp->mFp);
+ OneU2(info, x1, out, py0, py1, py2, py3, py4, cp->mFp);
out++;
x1++;
}
@@ -449,16 +449,16 @@
#endif
while(x1 < x2) {
- OneU2(p, x1, out, py0, py1, py2, py3, py4, cp->mFp);
+ OneU2(info, x1, out, py0, py1, py2, py3, py4, cp->mFp);
out++;
x1++;
}
}
-void RsdCpuScriptIntrinsicConvolve5x5::kernelU1(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicConvolve5x5::kernelU1(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr;
+ RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)info->usr;
if (!cp->alloc.get()) {
ALOGE("Convolve5x5 executed without input, skipping");
return;
@@ -466,11 +466,11 @@
const uchar *pin = (const uchar *)cp->alloc->mHal.drvState.lod[0].mallocPtr;
const size_t stride = cp->alloc->mHal.drvState.lod[0].stride;
- uint32_t y0 = rsMax((int32_t)p->y-2, 0);
- uint32_t y1 = rsMax((int32_t)p->y-1, 0);
- uint32_t y2 = p->y;
- uint32_t y3 = rsMin((int32_t)p->y+1, (int32_t)(p->dimY-1));
- uint32_t y4 = rsMin((int32_t)p->y+2, (int32_t)(p->dimY-1));
+ uint32_t y0 = rsMax((int32_t)info->current.y-2, 0);
+ uint32_t y1 = rsMax((int32_t)info->current.y-1, 0);
+ uint32_t y2 = info->current.y;
+ uint32_t y3 = rsMin((int32_t)info->current.y+1, (int32_t)(info->dim.y-1));
+ uint32_t y4 = rsMin((int32_t)info->current.y+2, (int32_t)(info->dim.y-1));
const uchar *py0 = (const uchar *)(pin + stride * y0);
const uchar *py1 = (const uchar *)(pin + stride * y1);
@@ -478,12 +478,12 @@
const uchar *py3 = (const uchar *)(pin + stride * y3);
const uchar *py4 = (const uchar *)(pin + stride * y4);
- uchar *out = (uchar *)p->out;
+ uchar *out = (uchar *)info->outPtr[0];
uint32_t x1 = xstart;
uint32_t x2 = xend;
while((x1 < x2) && (x1 < 2)) {
- OneU1(p, x1, out, py0, py1, py2, py3, py4, cp->mFp);
+ OneU1(info, x1, out, py0, py1, py2, py3, py4, cp->mFp);
out++;
x1++;
}
@@ -498,16 +498,16 @@
#endif
while(x1 < x2) {
- OneU1(p, x1, out, py0, py1, py2, py3, py4, cp->mFp);
+ OneU1(info, x1, out, py0, py1, py2, py3, py4, cp->mFp);
out++;
x1++;
}
}
-void RsdCpuScriptIntrinsicConvolve5x5::kernelF4(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicConvolve5x5::kernelF4(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr;
+ RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)info->usr;
if (!cp->alloc.get()) {
ALOGE("Convolve5x5 executed without input, skipping");
return;
@@ -515,11 +515,11 @@
const uchar *pin = (const uchar *)cp->alloc->mHal.drvState.lod[0].mallocPtr;
const size_t stride = cp->alloc->mHal.drvState.lod[0].stride;
- uint32_t y0 = rsMax((int32_t)p->y-2, 0);
- uint32_t y1 = rsMax((int32_t)p->y-1, 0);
- uint32_t y2 = p->y;
- uint32_t y3 = rsMin((int32_t)p->y+1, (int32_t)(p->dimY-1));
- uint32_t y4 = rsMin((int32_t)p->y+2, (int32_t)(p->dimY-1));
+ uint32_t y0 = rsMax((int32_t)info->current.y-2, 0);
+ uint32_t y1 = rsMax((int32_t)info->current.y-1, 0);
+ uint32_t y2 = info->current.y;
+ uint32_t y3 = rsMin((int32_t)info->current.y+1, (int32_t)(info->dim.y-1));
+ uint32_t y4 = rsMin((int32_t)info->current.y+2, (int32_t)(info->dim.y-1));
const float4 *py0 = (const float4 *)(pin + stride * y0);
const float4 *py1 = (const float4 *)(pin + stride * y1);
@@ -527,12 +527,12 @@
const float4 *py3 = (const float4 *)(pin + stride * y3);
const float4 *py4 = (const float4 *)(pin + stride * y4);
- float4 *out = (float4 *)p->out;
+ float4 *out = (float4 *)info->outPtr[0];
uint32_t x1 = xstart;
uint32_t x2 = xend;
while((x1 < x2) && (x1 < 2)) {
- OneF4(p, x1, out, py0, py1, py2, py3, py4, cp->mFp);
+ OneF4(info, x1, out, py0, py1, py2, py3, py4, cp->mFp);
out++;
x1++;
}
@@ -547,16 +547,16 @@
#endif
while(x1 < x2) {
- OneF4(p, x1, out, py0, py1, py2, py3, py4, cp->mFp);
+ OneF4(info, x1, out, py0, py1, py2, py3, py4, cp->mFp);
out++;
x1++;
}
}
-void RsdCpuScriptIntrinsicConvolve5x5::kernelF2(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicConvolve5x5::kernelF2(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr;
+ RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)info->usr;
if (!cp->alloc.get()) {
ALOGE("Convolve5x5 executed without input, skipping");
return;
@@ -564,11 +564,11 @@
const uchar *pin = (const uchar *)cp->alloc->mHal.drvState.lod[0].mallocPtr;
const size_t stride = cp->alloc->mHal.drvState.lod[0].stride;
- uint32_t y0 = rsMax((int32_t)p->y-2, 0);
- uint32_t y1 = rsMax((int32_t)p->y-1, 0);
- uint32_t y2 = p->y;
- uint32_t y3 = rsMin((int32_t)p->y+1, (int32_t)(p->dimY-1));
- uint32_t y4 = rsMin((int32_t)p->y+2, (int32_t)(p->dimY-1));
+ uint32_t y0 = rsMax((int32_t)info->current.y-2, 0);
+ uint32_t y1 = rsMax((int32_t)info->current.y-1, 0);
+ uint32_t y2 = info->current.y;
+ uint32_t y3 = rsMin((int32_t)info->current.y+1, (int32_t)(info->dim.y-1));
+ uint32_t y4 = rsMin((int32_t)info->current.y+2, (int32_t)(info->dim.y-1));
const float2 *py0 = (const float2 *)(pin + stride * y0);
const float2 *py1 = (const float2 *)(pin + stride * y1);
@@ -576,12 +576,12 @@
const float2 *py3 = (const float2 *)(pin + stride * y3);
const float2 *py4 = (const float2 *)(pin + stride * y4);
- float2 *out = (float2 *)p->out;
+ float2 *out = (float2 *)info->outPtr[0];
uint32_t x1 = xstart;
uint32_t x2 = xend;
while((x1 < x2) && (x1 < 2)) {
- OneF2(p, x1, out, py0, py1, py2, py3, py4, cp->mFp);
+ OneF2(info, x1, out, py0, py1, py2, py3, py4, cp->mFp);
out++;
x1++;
}
@@ -596,16 +596,16 @@
#endif
while(x1 < x2) {
- OneF2(p, x1, out, py0, py1, py2, py3, py4, cp->mFp);
+ OneF2(info, x1, out, py0, py1, py2, py3, py4, cp->mFp);
out++;
x1++;
}
}
-void RsdCpuScriptIntrinsicConvolve5x5::kernelF1(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicConvolve5x5::kernelF1(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr;
+ RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)info->usr;
if (!cp->alloc.get()) {
ALOGE("Convolve5x5 executed without input, skipping");
return;
@@ -613,11 +613,11 @@
const uchar *pin = (const uchar *)cp->alloc->mHal.drvState.lod[0].mallocPtr;
const size_t stride = cp->alloc->mHal.drvState.lod[0].stride;
- uint32_t y0 = rsMax((int32_t)p->y-2, 0);
- uint32_t y1 = rsMax((int32_t)p->y-1, 0);
- uint32_t y2 = p->y;
- uint32_t y3 = rsMin((int32_t)p->y+1, (int32_t)(p->dimY-1));
- uint32_t y4 = rsMin((int32_t)p->y+2, (int32_t)(p->dimY-1));
+ uint32_t y0 = rsMax((int32_t)info->current.y-2, 0);
+ uint32_t y1 = rsMax((int32_t)info->current.y-1, 0);
+ uint32_t y2 = info->current.y;
+ uint32_t y3 = rsMin((int32_t)info->current.y+1, (int32_t)(info->dim.y-1));
+ uint32_t y4 = rsMin((int32_t)info->current.y+2, (int32_t)(info->dim.y-1));
const float *py0 = (const float *)(pin + stride * y0);
const float *py1 = (const float *)(pin + stride * y1);
@@ -625,12 +625,12 @@
const float *py3 = (const float *)(pin + stride * y3);
const float *py4 = (const float *)(pin + stride * y4);
- float *out = (float *)p->out;
+ float *out = (float *)info->outPtr[0];
uint32_t x1 = xstart;
uint32_t x2 = xend;
while((x1 < x2) && (x1 < 2)) {
- OneF1(p, x1, out, py0, py1, py2, py3, py4, cp->mFp);
+ OneF1(info, x1, out, py0, py1, py2, py3, py4, cp->mFp);
out++;
x1++;
}
@@ -645,7 +645,7 @@
#endif
while(x1 < x2) {
- OneF1(p, x1, out, py0, py1, py2, py3, py4, cp->mFp);
+ OneF1(info, x1, out, py0, py1, py2, py3, py4, cp->mFp);
out++;
x1++;
}
diff --git a/cpu_ref/rsCpuIntrinsicHistogram.cpp b/cpu_ref/rsCpuIntrinsicHistogram.cpp
index 4779187..fd60794 100644
--- a/cpu_ref/rsCpuIntrinsicHistogram.cpp
+++ b/cpu_ref/rsCpuIntrinsicHistogram.cpp
@@ -49,29 +49,29 @@
int *mSums;
ObjectBaseRef<Allocation> mAllocOut;
- static void kernelP1U4(const RsExpandKernelParams *p,
+ static void kernelP1U4(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelP1U3(const RsExpandKernelParams *p,
+ static void kernelP1U3(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelP1U2(const RsExpandKernelParams *p,
+ static void kernelP1U2(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelP1U1(const RsExpandKernelParams *p,
+ static void kernelP1U1(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelP1L4(const RsExpandKernelParams *p,
+ static void kernelP1L4(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelP1L3(const RsExpandKernelParams *p,
+ static void kernelP1L3(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelP1L2(const RsExpandKernelParams *p,
+ static void kernelP1L2(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelP1L1(const RsExpandKernelParams *p,
+ static void kernelP1L1(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
@@ -166,61 +166,61 @@
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1U4(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicHistogram::kernelP1U4(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
- uchar *in = (uchar *)p->ins[0];
- int * sums = &cp->mSums[256 * 4 * p->lid];
+ RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
+ uchar *in = (uchar *)info->inPtr[0];
+ int * sums = &cp->mSums[256 * 4 * info->lid];
for (uint32_t x = xstart; x < xend; x++) {
sums[(in[0] << 2) ] ++;
sums[(in[1] << 2) + 1] ++;
sums[(in[2] << 2) + 2] ++;
sums[(in[3] << 2) + 3] ++;
- in += p->inEStrides[0];
+ in += info->inStride[0];
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1U3(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicHistogram::kernelP1U3(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
- uchar *in = (uchar *)p->ins[0];
- int * sums = &cp->mSums[256 * 4 * p->lid];
+ RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
+ uchar *in = (uchar *)info->inPtr[0];
+ int * sums = &cp->mSums[256 * 4 * info->lid];
for (uint32_t x = xstart; x < xend; x++) {
sums[(in[0] << 2) ] ++;
sums[(in[1] << 2) + 1] ++;
sums[(in[2] << 2) + 2] ++;
- in += p->inEStrides[0];
+ in += info->inStride[0];
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1U2(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicHistogram::kernelP1U2(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
- uchar *in = (uchar *)p->ins[0];
- int * sums = &cp->mSums[256 * 2 * p->lid];
+ RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
+ uchar *in = (uchar *)info->inPtr[0];
+ int * sums = &cp->mSums[256 * 2 * info->lid];
for (uint32_t x = xstart; x < xend; x++) {
sums[(in[0] << 1) ] ++;
sums[(in[1] << 1) + 1] ++;
- in += p->inEStrides[0];
+ in += info->inStride[0];
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1L4(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicHistogram::kernelP1L4(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
- uchar *in = (uchar *)p->ins[0];
- int * sums = &cp->mSums[256 * p->lid];
+ RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
+ uchar *in = (uchar *)info->inPtr[0];
+ int * sums = &cp->mSums[256 * info->lid];
for (uint32_t x = xstart; x < xend; x++) {
int t = (cp->mDotI[0] * in[0]) +
@@ -228,69 +228,69 @@
(cp->mDotI[2] * in[2]) +
(cp->mDotI[3] * in[3]);
sums[(t + 0x7f) >> 8] ++;
- in += p->inEStrides[0];
+ in += info->inStride[0];
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1L3(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicHistogram::kernelP1L3(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
- uchar *in = (uchar *)p->ins[0];
- int * sums = &cp->mSums[256 * p->lid];
+ RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
+ uchar *in = (uchar *)info->inPtr[0];
+ int * sums = &cp->mSums[256 * info->lid];
for (uint32_t x = xstart; x < xend; x++) {
int t = (cp->mDotI[0] * in[0]) +
(cp->mDotI[1] * in[1]) +
(cp->mDotI[2] * in[2]);
sums[(t + 0x7f) >> 8] ++;
- in += p->inEStrides[0];
+ in += info->inStride[0];
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1L2(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicHistogram::kernelP1L2(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
- uchar *in = (uchar *)p->ins[0];
- int * sums = &cp->mSums[256 * p->lid];
+ RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
+ uchar *in = (uchar *)info->inPtr[0];
+ int * sums = &cp->mSums[256 * info->lid];
for (uint32_t x = xstart; x < xend; x++) {
int t = (cp->mDotI[0] * in[0]) +
(cp->mDotI[1] * in[1]);
sums[(t + 0x7f) >> 8] ++;
- in += p->inEStrides[0];
+ in += info->inStride[0];
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1L1(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicHistogram::kernelP1L1(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
- uchar *in = (uchar *)p->ins[0];
- int * sums = &cp->mSums[256 * p->lid];
+ RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
+ uchar *in = (uchar *)info->inPtr[0];
+ int * sums = &cp->mSums[256 * info->lid];
for (uint32_t x = xstart; x < xend; x++) {
int t = (cp->mDotI[0] * in[0]);
sums[(t + 0x7f) >> 8] ++;
- in += p->inEStrides[0];
+ in += info->inStride[0];
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1U1(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicHistogram::kernelP1U1(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
- uchar *in = (uchar *)p->ins[0];
- int * sums = &cp->mSums[256 * p->lid];
+ RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
+ uchar *in = (uchar *)info->inPtr[0];
+ int * sums = &cp->mSums[256 * info->lid];
for (uint32_t x = xstart; x < xend; x++) {
sums[in[0]] ++;
- in += p->inEStrides[0];
+ in += info->inStride[0];
}
}
diff --git a/cpu_ref/rsCpuIntrinsicLUT.cpp b/cpu_ref/rsCpuIntrinsicLUT.cpp
index b08a0e5..622fe1e 100644
--- a/cpu_ref/rsCpuIntrinsicLUT.cpp
+++ b/cpu_ref/rsCpuIntrinsicLUT.cpp
@@ -38,7 +38,7 @@
protected:
ObjectBaseRef<Allocation> lut;
- static void kernel(const RsExpandKernelParams *p,
+ static void kernel(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
};
@@ -53,13 +53,13 @@
}
-void RsdCpuScriptIntrinsicLUT::kernel(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicLUT::kernel(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicLUT *cp = (RsdCpuScriptIntrinsicLUT *)p->usr;
+ RsdCpuScriptIntrinsicLUT *cp = (RsdCpuScriptIntrinsicLUT *)info->usr;
- uchar *out = (uchar *)p->out;
- const uchar *in = (uchar *)p->ins[0];
+ uchar *out = (uchar *)info->outPtr[0];
+ const uchar *in = (uchar *)info->inPtr[0];
uint32_t x1 = xstart;
uint32_t x2 = xend;
diff --git a/cpu_ref/rsCpuIntrinsicResize.cpp b/cpu_ref/rsCpuIntrinsicResize.cpp
index 2c51b5a..5668d96 100644
--- a/cpu_ref/rsCpuIntrinsicResize.cpp
+++ b/cpu_ref/rsCpuIntrinsicResize.cpp
@@ -46,22 +46,22 @@
ObjectBaseRef<const Allocation> mAlloc;
ObjectBaseRef<const Element> mElement;
- static void kernelU1(const RsExpandKernelParams *p,
+ static void kernelU1(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelU2(const RsExpandKernelParams *p,
+ static void kernelU2(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelU4(const RsExpandKernelParams *p,
+ static void kernelU4(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelF1(const RsExpandKernelParams *p,
+ static void kernelF1(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelF2(const RsExpandKernelParams *p,
+ static void kernelF2(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelF4(const RsExpandKernelParams *p,
+ static void kernelF4(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
};
@@ -255,10 +255,10 @@
return p;
}
-void RsdCpuScriptIntrinsicResize::kernelU4(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicResize::kernelU4(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
+ RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
if (!cp->mAlloc.get()) {
ALOGE("Resize executed without input, skipping");
@@ -269,7 +269,7 @@
const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
- float yf = (p->y + 0.5f) * cp->scaleY - 0.5f;
+ float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
int starty = (int) floor(yf - 1);
yf = yf - floor(yf);
int maxy = srcHeight - 1;
@@ -283,7 +283,7 @@
const uchar4 *yp2 = (const uchar4 *)(pin + stride * ys2);
const uchar4 *yp3 = (const uchar4 *)(pin + stride * ys3);
- uchar4 *out = ((uchar4 *)p->out) + xstart;
+ uchar4 *out = ((uchar4 *)info->outPtr[0]) + xstart;
uint32_t x1 = xstart;
uint32_t x2 = xend;
@@ -295,10 +295,10 @@
}
}
-void RsdCpuScriptIntrinsicResize::kernelU2(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicResize::kernelU2(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
+ RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
if (!cp->mAlloc.get()) {
ALOGE("Resize executed without input, skipping");
@@ -309,7 +309,7 @@
const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
- float yf = (p->y + 0.5f) * cp->scaleY - 0.5f;
+ float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
int starty = (int) floor(yf - 1);
yf = yf - floor(yf);
int maxy = srcHeight - 1;
@@ -323,7 +323,7 @@
const uchar2 *yp2 = (const uchar2 *)(pin + stride * ys2);
const uchar2 *yp3 = (const uchar2 *)(pin + stride * ys3);
- uchar2 *out = ((uchar2 *)p->out) + xstart;
+ uchar2 *out = ((uchar2 *)info->outPtr[0]) + xstart;
uint32_t x1 = xstart;
uint32_t x2 = xend;
@@ -335,10 +335,10 @@
}
}
-void RsdCpuScriptIntrinsicResize::kernelU1(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicResize::kernelU1(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
+ RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
if (!cp->mAlloc.get()) {
ALOGE("Resize executed without input, skipping");
@@ -349,7 +349,7 @@
const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
- float yf = (p->y + 0.5f) * cp->scaleY - 0.5f;
+ float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
int starty = (int) floor(yf - 1);
yf = yf - floor(yf);
int maxy = srcHeight - 1;
@@ -363,7 +363,7 @@
const uchar *yp2 = pin + stride * ys2;
const uchar *yp3 = pin + stride * ys3;
- uchar *out = ((uchar *)p->out) + xstart;
+ uchar *out = ((uchar *)info->outPtr[0]) + xstart;
uint32_t x1 = xstart;
uint32_t x2 = xend;
@@ -375,10 +375,10 @@
}
}
-void RsdCpuScriptIntrinsicResize::kernelF4(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicResize::kernelF4(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
+ RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
if (!cp->mAlloc.get()) {
ALOGE("Resize executed without input, skipping");
@@ -389,7 +389,7 @@
const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
- float yf = (p->y + 0.5f) * cp->scaleY - 0.5f;
+ float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
int starty = (int) floor(yf - 1);
yf = yf - floor(yf);
int maxy = srcHeight - 1;
@@ -403,7 +403,7 @@
const float4 *yp2 = (const float4 *)(pin + stride * ys2);
const float4 *yp3 = (const float4 *)(pin + stride * ys3);
- float4 *out = ((float4 *)p->out) + xstart;
+ float4 *out = ((float4 *)info->outPtr[0]) + xstart;
uint32_t x1 = xstart;
uint32_t x2 = xend;
@@ -415,10 +415,10 @@
}
}
-void RsdCpuScriptIntrinsicResize::kernelF2(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicResize::kernelF2(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
+ RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
if (!cp->mAlloc.get()) {
ALOGE("Resize executed without input, skipping");
@@ -429,7 +429,7 @@
const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
- float yf = (p->y + 0.5f) * cp->scaleY - 0.5f;
+ float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
int starty = (int) floor(yf - 1);
yf = yf - floor(yf);
int maxy = srcHeight - 1;
@@ -443,7 +443,7 @@
const float2 *yp2 = (const float2 *)(pin + stride * ys2);
const float2 *yp3 = (const float2 *)(pin + stride * ys3);
- float2 *out = ((float2 *)p->out) + xstart;
+ float2 *out = ((float2 *)info->outPtr[0]) + xstart;
uint32_t x1 = xstart;
uint32_t x2 = xend;
@@ -455,10 +455,10 @@
}
}
-void RsdCpuScriptIntrinsicResize::kernelF1(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicResize::kernelF1(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
+ RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
if (!cp->mAlloc.get()) {
ALOGE("Resize executed without input, skipping");
@@ -469,7 +469,7 @@
const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
- float yf = (p->y + 0.5f) * cp->scaleY - 0.5f;
+ float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
int starty = (int) floor(yf - 1);
yf = yf - floor(yf);
int maxy = srcHeight - 1;
@@ -483,7 +483,7 @@
const float *yp2 = (const float *)(pin + stride * ys2);
const float *yp3 = (const float *)(pin + stride * ys3);
- float *out = ((float *)p->out) + xstart;
+ float *out = ((float *)info->outPtr[0]) + xstart;
uint32_t x1 = xstart;
uint32_t x2 = xend;
diff --git a/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp b/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp
index afd3cd3..395a158 100644
--- a/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp
+++ b/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp
@@ -46,7 +46,7 @@
protected:
ObjectBaseRef<Allocation> alloc;
- static void kernel(const RsExpandKernelParams *p,
+ static void kernel(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
};
@@ -101,10 +101,10 @@
extern "C" void rsdIntrinsicYuvR_K(void *dst, const uchar *Y, const uchar *uv, uint32_t xstart, size_t xend);
extern "C" void rsdIntrinsicYuv2_K(void *dst, const uchar *Y, const uchar *u, const uchar *v, size_t xstart, size_t xend);
-void RsdCpuScriptIntrinsicYuvToRGB::kernel(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicYuvToRGB::kernel(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicYuvToRGB *cp = (RsdCpuScriptIntrinsicYuvToRGB *)p->usr;
+ RsdCpuScriptIntrinsicYuvToRGB *cp = (RsdCpuScriptIntrinsicYuvToRGB *)info->usr;
if (!cp->alloc.get()) {
ALOGE("YuvToRGB executed without input, skipping");
return;
@@ -119,11 +119,11 @@
// calculate correct stride in legacy case
if (cp->alloc->mHal.drvState.lod[0].dimY == 0) {
- strideY = p->dimX;
+ strideY = info->dim.x;
}
- const uchar *Y = pinY + (p->y * strideY);
+ const uchar *Y = pinY + (info->current.y * strideY);
- uchar4 *out = (uchar4 *)p->out + xstart;
+ uchar4 *out = (uchar4 *)info->outPtr[0] + xstart;
uint32_t x1 = xstart;
uint32_t x2 = xend;
@@ -131,23 +131,23 @@
const uchar *pinU = (const uchar *)cp->alloc->mHal.drvState.lod[1].mallocPtr;
const size_t strideU = cp->alloc->mHal.drvState.lod[1].stride;
- const uchar *u = pinU + ((p->y >> 1) * strideU);
+ const uchar *u = pinU + ((info->current.y >> 1) * strideU);
const uchar *pinV = (const uchar *)cp->alloc->mHal.drvState.lod[2].mallocPtr;
const size_t strideV = cp->alloc->mHal.drvState.lod[2].stride;
- const uchar *v = pinV + ((p->y >> 1) * strideV);
+ const uchar *v = pinV + ((info->current.y >> 1) * strideV);
- //ALOGE("pinY, %p, Y, %p, p->y, %d, strideY, %d", pinY, Y, p->y, strideY);
- //ALOGE("pinU, %p, U, %p, p->y, %d, strideU, %d", pinU, u, p->y, strideU);
- //ALOGE("pinV, %p, V, %p, p->y, %d, strideV, %d", pinV, v, p->y, strideV);
+ //ALOGE("pinY, %p, Y, %p, info->current.y, %d, strideY, %d", pinY, Y, info->current.y, strideY);
+ //ALOGE("pinU, %p, U, %p, info->current.y, %d, strideU, %d", pinU, u, info->current.y, strideU);
+ //ALOGE("pinV, %p, V, %p, info->current.y, %d, strideV, %d", pinV, v, info->current.y, strideV);
//ALOGE("dimX, %d, dimY, %d", cp->alloc->mHal.drvState.lod[0].dimX, cp->alloc->mHal.drvState.lod[0].dimY);
- //ALOGE("p->dimX, %d, p->dimY, %d", p->dimX, p->dimY);
+ //ALOGE("info->dim.x, %d, info->dim.y, %d", info->dim.x, info->dim.y);
if (pinU == nullptr) {
// Legacy yuv support didn't fill in uv
v = ((uint8_t *)cp->alloc->mHal.drvState.lod[0].mallocPtr) +
- (strideY * p->dimY) +
- ((p->y >> 1) * strideY);
+ (strideY * info->dim.y) +
+ ((info->current.y >> 1) * strideY);
u = v + 1;
cstep = 2;
}
@@ -166,7 +166,7 @@
if((x2 > x1) && gArchUseSIMD) {
int32_t len = x2 - x1;
if (cstep == 1) {
- rsdIntrinsicYuv2_K(p->out, Y, u, v, x1, x2);
+ rsdIntrinsicYuv2_K(info->outPtr[0], Y, u, v, x1, x2);
x1 += len;
out += len;
} else if (cstep == 2) {
@@ -175,11 +175,11 @@
intptr_t ipv = (intptr_t)v;
if (ipu == (ipv + 1)) {
- rsdIntrinsicYuv_K(p->out, Y, v, x1, x2);
+ rsdIntrinsicYuv_K(info->outPtr[0], Y, v, x1, x2);
x1 += len;
out += len;
} else if (ipu == (ipv - 1)) {
- rsdIntrinsicYuvR_K(p->out, Y, u, x1, x2);
+ rsdIntrinsicYuvR_K(info->outPtr[0], Y, u, x1, x2);
x1 += len;
out += len;
}
@@ -188,7 +188,7 @@
#endif
if(x2 > x1) {
- // ALOGE("y %i %i %i", p->y, x1, x2);
+ // ALOGE("y %i %i %i", info->current.y, x1, x2);
while(x1 < x2) {
int cx = (x1 >> 1) * cstep;
*out = rsYuvToRGBA_uchar4(Y[x1], u[cx], v[cx]);
diff --git a/cpu_ref/rsCpuScript.h b/cpu_ref/rsCpuScript.h
index aaaa2a2..72da141 100644
--- a/cpu_ref/rsCpuScript.h
+++ b/cpu_ref/rsCpuScript.h
@@ -43,7 +43,7 @@
class RsdCpuScriptImpl : public RsdCpuReferenceImpl::CpuScript {
public:
typedef void (*outer_foreach_t)(
- const RsExpandKernelParams *,
+ const RsExpandKernelDriverInfo *,
uint32_t x1, uint32_t x2,
uint32_t outstep);
diff --git a/cpu_ref/rsCpuScriptGroup.cpp b/cpu_ref/rsCpuScriptGroup.cpp
index 281a715..82208db 100644
--- a/cpu_ref/rsCpuScriptGroup.cpp
+++ b/cpu_ref/rsCpuScriptGroup.cpp
@@ -42,83 +42,83 @@
}
-typedef void (*ScriptGroupRootFunc_t)(const RsExpandKernelParams *kparams,
+typedef void (*ScriptGroupRootFunc_t)(const RsExpandKernelDriverInfo *kinfo,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
-void CpuScriptGroupImpl::scriptGroupRoot(const RsExpandKernelParams *kparams,
+void CpuScriptGroupImpl::scriptGroupRoot(const RsExpandKernelDriverInfo *kinfo,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- const ScriptList *sl = (const ScriptList *)kparams->usr;
- RsExpandKernelParams *mkparams = (RsExpandKernelParams *)kparams;
+ const ScriptList *sl = (const ScriptList *)kinfo->usr;
+ RsExpandKernelDriverInfo *mkinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo);
- const void **oldIns = mkparams->ins;
- uint32_t *oldStrides = mkparams->inEStrides;
-
- void *localIns[1];
- uint32_t localStride[1];
-
- mkparams->ins = (const void**)localIns;
- mkparams->inEStrides = localStride;
+ const uint32_t oldInStride = mkinfo->inStride[0];
for (size_t ct = 0; ct < sl->count; ct++) {
ScriptGroupRootFunc_t func;
func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct];
- mkparams->usr = sl->usrPtrs[ct];
+ mkinfo->usr = sl->usrPtrs[ct];
if (sl->ins[ct]) {
- localIns[0] = sl->ins[ct]->mHal.drvState.lod[0].mallocPtr;
+ rsAssert(kinfo->inLen == 1);
- localStride[0] = sl->ins[ct]->mHal.state.elementSizeBytes;
+ mkinfo->inPtr[0] = (const uint8_t *)sl->ins[ct]->mHal.drvState.lod[0].mallocPtr;
+
+ mkinfo->inStride[0] = sl->ins[ct]->mHal.state.elementSizeBytes;
if (sl->inExts[ct]) {
- localIns[0] = (void*)
- ((const uint8_t *)localIns[0] +
- sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->y);
+ mkinfo->inPtr[0] =
+ (mkinfo->inPtr[0] +
+ sl->ins[ct]->mHal.drvState.lod[0].stride * kinfo->current.y);
- } else if (sl->ins[ct]->mHal.drvState.lod[0].dimY > kparams->lid) {
- localIns[0] = (void*)
- ((const uint8_t *)localIns[0] +
- sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->lid);
+ } else if (sl->ins[ct]->mHal.drvState.lod[0].dimY > kinfo->lid) {
+ mkinfo->inPtr[0] =
+ (mkinfo->inPtr[0] +
+ sl->ins[ct]->mHal.drvState.lod[0].stride * kinfo->lid);
}
} else {
- localIns[0] = nullptr;
- localStride[0] = 0;
+ rsAssert(kinfo->inLen == 0);
+
+ mkinfo->inPtr[0] = nullptr;
+ mkinfo->inStride[0] = 0;
}
uint32_t ostep;
if (sl->outs[ct]) {
- mkparams->out =
+ rsAssert(kinfo->outLen == 1);
+
+ mkinfo->outPtr[0] =
(uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr;
ostep = sl->outs[ct]->mHal.state.elementSizeBytes;
if (sl->outExts[ct]) {
- mkparams->out =
- (uint8_t *)mkparams->out +
- sl->outs[ct]->mHal.drvState.lod[0].stride * kparams->y;
+ mkinfo->outPtr[0] =
+ mkinfo->outPtr[0] +
+ sl->outs[ct]->mHal.drvState.lod[0].stride * kinfo->current.y;
- } else if (sl->outs[ct]->mHal.drvState.lod[0].dimY > kparams->lid) {
- mkparams->out =
- (uint8_t *)mkparams->out +
- sl->outs[ct]->mHal.drvState.lod[0].stride * kparams->lid;
+ } else if (sl->outs[ct]->mHal.drvState.lod[0].dimY > kinfo->lid) {
+ mkinfo->outPtr[0] =
+ mkinfo->outPtr[0] +
+ sl->outs[ct]->mHal.drvState.lod[0].stride * kinfo->lid;
}
} else {
- mkparams->out = nullptr;
- ostep = 0;
+ rsAssert(kinfo->outLen == 0);
+
+ mkinfo->outPtr[0] = nullptr;
+ ostep = 0;
}
//ALOGE("kernel %i %p,%p %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out);
- func(kparams, xstart, xend, ostep);
+ func(kinfo, xstart, xend, ostep);
}
//ALOGE("script group root");
- mkparams->ins = oldIns;
- mkparams->inEStrides = oldStrides;
- mkparams->usr = sl;
+ mkinfo->inStride[0] = oldInStride;
+ mkinfo->usr = sl;
}
diff --git a/cpu_ref/rsCpuScriptGroup.h b/cpu_ref/rsCpuScriptGroup.h
index 50ba2ac..acfe754 100644
--- a/cpu_ref/rsCpuScriptGroup.h
+++ b/cpu_ref/rsCpuScriptGroup.h
@@ -33,7 +33,7 @@
CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroupBase *sg);
bool init();
- static void scriptGroupRoot(const RsExpandKernelParams *p,
+ static void scriptGroupRoot(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
diff --git a/cpu_ref/rsCpuScriptGroup2.cpp b/cpu_ref/rsCpuScriptGroup2.cpp
index 915fa4d..2e50ecb 100644
--- a/cpu_ref/rsCpuScriptGroup2.cpp
+++ b/cpu_ref/rsCpuScriptGroup2.cpp
@@ -16,7 +16,6 @@
#endif
#include "cpu_ref/rsCpuCore.h"
-#include "cpu_ref/rsCpuCoreRuntime.h"
#include "rsClosure.h"
#include "rsContext.h"
#include "rsCpuCore.h"
@@ -36,21 +35,21 @@
const size_t DefaultKernelArgCount = 2;
-void groupRoot(const RsExpandKernelParams *kparams, uint32_t xstart,
+void groupRoot(const RsExpandKernelDriverInfo *kinfo, uint32_t xstart,
uint32_t xend, uint32_t outstep) {
- const List<CPUClosure*>& closures = *(List<CPUClosure*>*)kparams->usr;
- RsExpandKernelParams *mutable_kparams = (RsExpandKernelParams *)kparams;
- const void **oldIns = kparams->ins;
- uint32_t *oldStrides = kparams->inEStrides;
+ const List<CPUClosure*>& closures = *(List<CPUClosure*>*)kinfo->usr;
+ RsExpandKernelDriverInfo *mutable_kinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo);
- std::vector<const void*> ins(DefaultKernelArgCount);
- std::vector<uint32_t> strides(DefaultKernelArgCount);
+ const size_t oldInLen = mutable_kinfo->inLen;
+
+ decltype(mutable_kinfo->inStride) oldInStride;
+ memcpy(&oldInStride, &mutable_kinfo->inStride, sizeof(oldInStride));
for (CPUClosure* cpuClosure : closures) {
const Closure* closure = cpuClosure->mClosure;
- auto in_iter = ins.begin();
- auto stride_iter = strides.begin();
+ // There had better be enough space in mutable_kinfo
+ rsAssert(closure->mNumArg <= RS_KERNEL_INPUT_LIMIT);
for (size_t i = 0; i < closure->mNumArg; i++) {
const void* arg = closure->mArgs[i];
@@ -58,31 +57,30 @@
const uint32_t eStride = a->mHal.state.elementSizeBytes;
const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) +
eStride * xstart;
- if (kparams->dimY > 1) {
- ptr += a->mHal.drvState.lod[0].stride * kparams->y;
+ if (kinfo->dim.y > 1) {
+ ptr += a->mHal.drvState.lod[0].stride * kinfo->current.y;
}
- *in_iter++ = ptr;
- *stride_iter++ = eStride;
+ mutable_kinfo->inPtr[i] = ptr;
+ mutable_kinfo->inStride[i] = eStride;
}
-
- mutable_kparams->ins = &ins[0];
- mutable_kparams->inEStrides = &strides[0];
+ mutable_kinfo->inLen = closure->mNumArg;
const Allocation* out = closure->mReturnValue;
const uint32_t ostep = out->mHal.state.elementSizeBytes;
const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) +
ostep * xstart;
- if (kparams->dimY > 1) {
- ptr += out->mHal.drvState.lod[0].stride * kparams->y;
+ if (kinfo->dim.y > 1) {
+ ptr += out->mHal.drvState.lod[0].stride * kinfo->current.y;
}
- mutable_kparams->out = (void*)ptr;
+ rsAssert(kinfo->outLen <= 1);
+ mutable_kinfo->outPtr[0] = const_cast<uint8_t*>(ptr);
- cpuClosure->mFunc(kparams, xstart, xend, ostep);
+ cpuClosure->mFunc(kinfo, xstart, xend, ostep);
}
- mutable_kparams->ins = oldIns;
- mutable_kparams->inEStrides = oldStrides;
+ mutable_kinfo->inLen = oldInLen;
+ memcpy(&mutable_kinfo->inStride, &oldInStride, sizeof(oldInStride));
}
} // namespace
diff --git a/cpu_ref/rsCpuScriptGroup2.h b/cpu_ref/rsCpuScriptGroup2.h
index 3074cc8..1883f90 100644
--- a/cpu_ref/rsCpuScriptGroup2.h
+++ b/cpu_ref/rsCpuScriptGroup2.h
@@ -4,7 +4,7 @@
#include "rsd_cpu.h"
#include "rsList.h"
-struct RsExpandKernelParams;
+struct RsExpandKernelDriverInfo;
namespace android {
namespace renderscript {
@@ -15,9 +15,7 @@
class ScriptExecutable;
class ScriptGroup2;
-typedef ::RsExpandKernelParams RsExpandKernelParams;
-
-typedef void (*ExpandFuncTy)(const RsExpandKernelParams*, uint32_t, uint32_t,
+typedef void (*ExpandFuncTy)(const RsExpandKernelDriverInfo*, uint32_t, uint32_t,
uint32_t);
typedef void (*InvokeFuncTy)(const void*, uint32_t);
diff --git a/driver/runtime/rs_core.c b/driver/runtime/rs_core.c
index 3fd48a1..de3364e 100644
--- a/driver/runtime/rs_core.c
+++ b/driver/runtime/rs_core.c
@@ -178,15 +178,15 @@
}
extern uint32_t __attribute__((overloadable)) rsGetDimX(rs_kernel_context ctxt) {
- return ((struct RsExpandKernelParams *)ctxt)->dimX;
+ return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.x;
}
extern uint32_t __attribute__((overloadable)) rsGetDimY(rs_kernel_context ctxt) {
- return ((struct RsExpandKernelParams *)ctxt)->dimY;
+ return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.y;
}
extern uint32_t __attribute__((overloadable)) rsGetDimZ(rs_kernel_context ctxt) {
- return ((struct RsExpandKernelParams *)ctxt)->dimZ;
+ return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.z;
}
#define PRIM_DEBUG(T) \