Separate CPU driver impl from reference driver.
Change-Id: Ifb484edda665959b81d7b1f890d108bfa20a535d
diff --git a/driver/rsdAllocation.cpp b/driver/rsdAllocation.cpp
index 8956b2e..928f777 100644
--- a/driver/rsdAllocation.cpp
+++ b/driver/rsdAllocation.cpp
@@ -16,7 +16,6 @@
#include "rsdCore.h"
-#include "rsdRuntime.h"
#include "rsdAllocation.h"
#include "rsdFrameBufferObj.h"
@@ -80,10 +79,9 @@
uint8_t *GetOffsetPtr(const android::renderscript::Allocation *alloc,
uint32_t xoff, uint32_t yoff, uint32_t lod,
RsAllocationCubemapFace face) {
- DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv;
- uint8_t *ptr = (uint8_t *)drv->lod[lod].mallocPtr;
- ptr += face * drv->faceOffset;
- ptr += yoff * drv->lod[lod].stride;
+ uint8_t *ptr = (uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
+ ptr += face * alloc->mHal.drvState.faceOffset;
+ ptr += yoff * alloc->mHal.drvState.lod[lod].stride;
ptr += xoff * alloc->mHal.state.elementSizeBytes;
return ptr;
}
@@ -160,7 +158,7 @@
return;
}
- if (!drv->lod[0].mallocPtr) {
+ if (!alloc->mHal.drvState.lod[0].mallocPtr) {
return;
}
@@ -174,10 +172,9 @@
Upload2DTexture(rsc, alloc, isFirstUpload);
if (!(alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_SCRIPT)) {
- if (alloc->mHal.drvState.mallocPtrLOD0) {
- free(alloc->mHal.drvState.mallocPtrLOD0);
- alloc->mHal.drvState.mallocPtrLOD0 = NULL;
- drv->lod[0].mallocPtr = NULL;
+ if (alloc->mHal.drvState.lod[0].mallocPtr) {
+ free(alloc->mHal.drvState.lod[0].mallocPtr);
+ alloc->mHal.drvState.lod[0].mallocPtr = NULL;
}
}
rsdGLCheckError(rsc, "UploadToTexture");
@@ -224,54 +221,50 @@
}
RSD_CALL_GL(glBindBuffer, drv->glTarget, drv->bufferID);
RSD_CALL_GL(glBufferData, drv->glTarget, alloc->mHal.state.type->getSizeBytes(),
- alloc->mHal.drvState.mallocPtrLOD0, GL_DYNAMIC_DRAW);
+ alloc->mHal.drvState.lod[0].mallocPtr, GL_DYNAMIC_DRAW);
RSD_CALL_GL(glBindBuffer, drv->glTarget, 0);
rsdGLCheckError(rsc, "UploadToBufferObject");
}
static size_t AllocationBuildPointerTable(const Context *rsc, const Allocation *alloc,
const Type *type, uint8_t *ptr) {
-
- DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv;
-
- drv->lod[0].dimX = type->getDimX();
- drv->lod[0].dimY = type->getDimY();
- drv->lod[0].mallocPtr = 0;
- drv->lod[0].stride = drv->lod[0].dimX * type->getElementSizeBytes();
- drv->lodCount = type->getLODCount();
- drv->faceCount = type->getDimFaces();
+ alloc->mHal.drvState.lod[0].dimX = type->getDimX();
+ alloc->mHal.drvState.lod[0].dimY = type->getDimY();
+ alloc->mHal.drvState.lod[0].mallocPtr = 0;
+ alloc->mHal.drvState.lod[0].stride = alloc->mHal.drvState.lod[0].dimX * type->getElementSizeBytes();
+ alloc->mHal.drvState.lodCount = type->getLODCount();
+ alloc->mHal.drvState.faceCount = type->getDimFaces();
size_t offsets[Allocation::MAX_LOD];
memset(offsets, 0, sizeof(offsets));
- size_t o = drv->lod[0].stride * rsMax(drv->lod[0].dimY, 1u) * rsMax(drv->lod[0].dimZ, 1u);
- if(drv->lodCount > 1) {
- uint32_t tx = drv->lod[0].dimX;
- uint32_t ty = drv->lod[0].dimY;
- uint32_t tz = drv->lod[0].dimZ;
- for (uint32_t lod=1; lod < drv->lodCount; lod++) {
- drv->lod[lod].dimX = tx;
- drv->lod[lod].dimY = ty;
- drv->lod[lod].dimZ = tz;
- drv->lod[lod].stride = tx * type->getElementSizeBytes();
+ size_t o = alloc->mHal.drvState.lod[0].stride * rsMax(alloc->mHal.drvState.lod[0].dimY, 1u) *
+ rsMax(alloc->mHal.drvState.lod[0].dimZ, 1u);
+ if(alloc->mHal.drvState.lodCount > 1) {
+ uint32_t tx = alloc->mHal.drvState.lod[0].dimX;
+ uint32_t ty = alloc->mHal.drvState.lod[0].dimY;
+ uint32_t tz = alloc->mHal.drvState.lod[0].dimZ;
+ for (uint32_t lod=1; lod < alloc->mHal.drvState.lodCount; lod++) {
+ alloc->mHal.drvState.lod[lod].dimX = tx;
+ alloc->mHal.drvState.lod[lod].dimY = ty;
+ alloc->mHal.drvState.lod[lod].dimZ = tz;
+ alloc->mHal.drvState.lod[lod].stride = tx * type->getElementSizeBytes();
offsets[lod] = o;
- o += drv->lod[lod].stride * rsMax(ty, 1u) * rsMax(tz, 1u);
+ o += alloc->mHal.drvState.lod[lod].stride * rsMax(ty, 1u) * rsMax(tz, 1u);
if (tx > 1) tx >>= 1;
if (ty > 1) ty >>= 1;
if (tz > 1) tz >>= 1;
}
}
- drv->faceOffset = o;
+ alloc->mHal.drvState.faceOffset = o;
- drv->lod[0].mallocPtr = ptr;
- for (uint32_t lod=1; lod < drv->lodCount; lod++) {
- drv->lod[lod].mallocPtr = ptr + offsets[lod];
+ alloc->mHal.drvState.lod[0].mallocPtr = ptr;
+ for (uint32_t lod=1; lod < alloc->mHal.drvState.lodCount; lod++) {
+ alloc->mHal.drvState.lod[lod].mallocPtr = ptr + offsets[lod];
}
- alloc->mHal.drvState.strideLOD0 = drv->lod[0].stride;
- alloc->mHal.drvState.mallocPtrLOD0 = ptr;
- size_t allocSize = drv->faceOffset;
- if(drv->faceCount) {
+ size_t allocSize = alloc->mHal.drvState.faceOffset;
+ if(alloc->mHal.drvState.faceCount) {
allocSize *= 6;
}
@@ -352,9 +345,9 @@
drv->renderTargetID = 0;
}
- if (alloc->mHal.drvState.mallocPtrLOD0) {
- free(alloc->mHal.drvState.mallocPtrLOD0);
- alloc->mHal.drvState.mallocPtrLOD0 = NULL;
+ if (alloc->mHal.drvState.lod[0].mallocPtr) {
+ free(alloc->mHal.drvState.lod[0].mallocPtr);
+ alloc->mHal.drvState.lod[0].mallocPtr = NULL;
}
if (drv->readBackFBO != NULL) {
delete drv->readBackFBO;
@@ -366,9 +359,7 @@
void rsdAllocationResize(const Context *rsc, const Allocation *alloc,
const Type *newType, bool zeroNew) {
- DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv;
-
- void * oldPtr = drv->lod[0].mallocPtr;
+ void * oldPtr = alloc->mHal.drvState.lod[0].mallocPtr;
// Calculate the object size
size_t s = AllocationBuildPointerTable(rsc, alloc, newType, NULL);
uint8_t *ptr = (uint8_t *)realloc(oldPtr, s);
@@ -383,7 +374,7 @@
if (dimX > oldDimX) {
uint32_t stride = alloc->mHal.state.elementSizeBytes;
- memset(((uint8_t *)alloc->mHal.drvState.mallocPtrLOD0) + stride * oldDimX,
+ memset(((uint8_t *)alloc->mHal.drvState.lod[0].mallocPtr) + stride * oldDimX,
0, stride * (dimX - oldDimX));
}
}
@@ -411,8 +402,9 @@
drv->readBackFBO->setActive(rsc);
// Do the readback
- RSD_CALL_GL(glReadPixels, 0, 0, drv->lod[0].dimX, drv->lod[0].dimY,
- drv->glFormat, drv->glType, drv->lod[0].mallocPtr);
+ RSD_CALL_GL(glReadPixels, 0, 0, alloc->mHal.drvState.lod[0].dimX,
+ alloc->mHal.drvState.lod[0].dimY,
+ drv->glFormat, drv->glType, alloc->mHal.drvState.lod[0].mallocPtr);
// Revert framebuffer to its original
lastFbo->setActive(rsc);
@@ -482,9 +474,8 @@
mapper.lock(drv->wndBuffer->handle,
GRALLOC_USAGE_SW_READ_NEVER | GRALLOC_USAGE_SW_WRITE_OFTEN,
bounds, &dst);
- drv->lod[0].mallocPtr = dst;
- alloc->mHal.drvState.mallocPtrLOD0 = dst;
- drv->lod[0].stride = drv->wndBuffer->stride * alloc->mHal.state.elementSizeBytes;
+ alloc->mHal.drvState.lod[0].mallocPtr = dst;
+ alloc->mHal.drvState.lod[0].stride = drv->wndBuffer->stride * alloc->mHal.state.elementSizeBytes;
return true;
}
@@ -597,7 +588,7 @@
uint32_t eSize = alloc->mHal.state.elementSizeBytes;
uint32_t lineSize = eSize * w;
- if (drv->lod[0].mallocPtr) {
+ if (alloc->mHal.drvState.lod[0].mallocPtr) {
const uint8_t *src = static_cast<const uint8_t *>(data);
uint8_t *dst = GetOffsetPtr(alloc, xoff, yoff, lod, face);
@@ -608,7 +599,7 @@
}
memcpy(dst, src, lineSize);
src += lineSize;
- dst += drv->lod[lod].stride;
+ dst += alloc->mHal.drvState.lod[lod].stride;
}
drv->uploadDeferred = true;
} else {
@@ -626,8 +617,6 @@
void rsdAllocationRead1D(const Context *rsc, const Allocation *alloc,
uint32_t xoff, uint32_t lod, uint32_t count,
void *data, size_t sizeBytes) {
- DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv;
-
const uint32_t eSize = alloc->mHal.state.type->getElementSizeBytes();
const uint8_t * ptr = GetOffsetPtr(alloc, xoff, 0, 0, RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X);
memcpy(data, ptr, count * eSize);
@@ -636,19 +625,17 @@
void rsdAllocationRead2D(const Context *rsc, const Allocation *alloc,
uint32_t xoff, uint32_t yoff, uint32_t lod, RsAllocationCubemapFace face,
uint32_t w, uint32_t h, void *data, size_t sizeBytes) {
- DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv;
-
uint32_t eSize = alloc->mHal.state.elementSizeBytes;
uint32_t lineSize = eSize * w;
- if (drv->lod[0].mallocPtr) {
+ if (alloc->mHal.drvState.lod[0].mallocPtr) {
uint8_t *dst = static_cast<uint8_t *>(data);
const uint8_t *src = GetOffsetPtr(alloc, xoff, yoff, lod, face);
for (uint32_t line=yoff; line < (yoff+h); line++) {
memcpy(dst, src, lineSize);
dst += lineSize;
- src += drv->lod[lod].stride;
+ src += alloc->mHal.drvState.lod[lod].stride;
}
} else {
ALOGE("Add code to readback from non-script memory");
@@ -664,8 +651,7 @@
void * rsdAllocationLock1D(const android::renderscript::Context *rsc,
const android::renderscript::Allocation *alloc) {
- DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv;
- return drv->lod[0].mallocPtr;
+ return alloc->mHal.drvState.lod[0].mallocPtr;
}
void rsdAllocationUnlock1D(const android::renderscript::Context *rsc,
@@ -767,9 +753,8 @@
}
static void mip565(const Allocation *alloc, int lod, RsAllocationCubemapFace face) {
- DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv;
- uint32_t w = drv->lod[lod + 1].dimX;
- uint32_t h = drv->lod[lod + 1].dimY;
+ uint32_t w = alloc->mHal.drvState.lod[lod + 1].dimX;
+ uint32_t h = alloc->mHal.drvState.lod[lod + 1].dimY;
for (uint32_t y=0; y < h; y++) {
uint16_t *oPtr = (uint16_t *)GetOffsetPtr(alloc, 0, y, lod + 1, face);
@@ -786,9 +771,8 @@
}
static void mip8888(const Allocation *alloc, int lod, RsAllocationCubemapFace face) {
- DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv;
- uint32_t w = drv->lod[lod + 1].dimX;
- uint32_t h = drv->lod[lod + 1].dimY;
+ uint32_t w = alloc->mHal.drvState.lod[lod + 1].dimX;
+ uint32_t h = alloc->mHal.drvState.lod[lod + 1].dimY;
for (uint32_t y=0; y < h; y++) {
uint32_t *oPtr = (uint32_t *)GetOffsetPtr(alloc, 0, y, lod + 1, face);
@@ -805,9 +789,8 @@
}
static void mip8(const Allocation *alloc, int lod, RsAllocationCubemapFace face) {
- DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv;
- uint32_t w = drv->lod[lod + 1].dimX;
- uint32_t h = drv->lod[lod + 1].dimY;
+ uint32_t w = alloc->mHal.drvState.lod[lod + 1].dimX;
+ uint32_t h = alloc->mHal.drvState.lod[lod + 1].dimY;
for (uint32_t y=0; y < h; y++) {
uint8_t *oPtr = GetOffsetPtr(alloc, 0, y, lod + 1, face);
@@ -824,8 +807,7 @@
}
void rsdAllocationGenerateMipmaps(const Context *rsc, const Allocation *alloc) {
- DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv;
- if(!drv->lod[0].mallocPtr) {
+ if(!alloc->mHal.drvState.lod[0].mallocPtr) {
return;
}
uint32_t numFaces = alloc->getType()->getDimFaces() ? 6 : 1;
diff --git a/driver/rsdAllocation.h b/driver/rsdAllocation.h
index e6488b9..d2ecc9a 100644
--- a/driver/rsdAllocation.h
+++ b/driver/rsdAllocation.h
@@ -21,6 +21,8 @@
#include <rsRuntime.h>
#include <rsAllocation.h>
+#include "../cpu_ref/rsd_cpu.h"
+
#include <GLES/gl.h>
#include <GLES2/gl2.h>
@@ -49,19 +51,6 @@
RsdFrameBufferObj * readBackFBO;
ANativeWindow *wnd;
ANativeWindowBuffer *wndBuffer;
-
- struct LodState {
- void * mallocPtr;
- size_t stride;
- uint32_t dimX;
- uint32_t dimY;
- uint32_t dimZ;
- } lod[android::renderscript::Allocation::MAX_LOD];
- size_t faceOffset;
- uint32_t lodCount;
- uint32_t faceCount;
-
-
};
GLenum rsdTypeToGLType(RsDataType t);
diff --git a/driver/rsdBcc.cpp b/driver/rsdBcc.cpp
index ddcaac8..436b9b2 100644
--- a/driver/rsdBcc.cpp
+++ b/driver/rsdBcc.cpp
@@ -14,17 +14,12 @@
* limitations under the License.
*/
+#include "../cpu_ref/rsd_cpu.h"
+
#include "rsdCore.h"
-#include <bcc/BCCContext.h>
-#include <bcc/Renderscript/RSCompilerDriver.h>
-#include <bcc/Renderscript/RSExecutable.h>
-#include <bcc/Renderscript/RSInfo.h>
-
#include "rsdBcc.h"
-#include "rsdRuntime.h"
#include "rsdAllocation.h"
-#include "rsdIntrinsics.h"
#include "rsContext.h"
#include "rsElement.h"
@@ -38,15 +33,6 @@
using namespace android::renderscript;
-static Script * setTLS(Script *sc) {
- ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(rsdgThreadTLSKey);
- rsAssert(tls);
- Script *old = tls->mScript;
- tls->mScript = sc;
- return old;
-}
-
-
bool rsdScriptInit(const Context *rsc,
ScriptC *script,
char const *resName,
@@ -54,358 +40,26 @@
uint8_t const *bitcode,
size_t bitcodeSize,
uint32_t flags) {
- //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc);
- //ALOGE("rsdScriptInit %p %p", rsc, script);
-
- pthread_mutex_lock(&rsdgInitMutex);
-
- bcc::RSExecutable *exec;
- const bcc::RSInfo *info;
- DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript));
- if (drv == NULL) {
- goto error;
+ RsdHal *dc = (RsdHal *)rsc->mHal.drv;
+ RsdCpuReference::CpuScript * cs = dc->mCpuRef->createScript(script, resName, cacheDir,
+ bitcode, bitcodeSize, flags);
+ if (cs == NULL) {
+ return false;
}
- script->mHal.drv = drv;
-
- drv->mCompilerContext = NULL;
- drv->mCompilerDriver = NULL;
- drv->mExecutable = NULL;
-
- drv->mCompilerContext = new bcc::BCCContext();
- if (drv->mCompilerContext == NULL) {
- ALOGE("bcc: FAILS to create compiler context (out of memory)");
- goto error;
- }
-
- drv->mCompilerDriver = new bcc::RSCompilerDriver();
- if (drv->mCompilerDriver == NULL) {
- ALOGE("bcc: FAILS to create compiler driver (out of memory)");
- goto error;
- }
-
- script->mHal.info.isThreadable = true;
-
- drv->mCompilerDriver->setRSRuntimeLookupFunction(rsdLookupRuntimeStub);
- drv->mCompilerDriver->setRSRuntimeLookupContext(script);
-
- exec = drv->mCompilerDriver->build(*drv->mCompilerContext,
- cacheDir, resName,
- (const char *)bitcode, bitcodeSize,
- NULL);
-
- if (exec == NULL) {
- ALOGE("bcc: FAILS to prepare executable for '%s'", resName);
- goto error;
- }
-
- drv->mExecutable = exec;
-
- exec->setThreadable(script->mHal.info.isThreadable);
- if (!exec->syncInfo()) {
- ALOGW("bcc: FAILS to synchronize the RS info file to the disk");
- }
-
- drv->mRoot = reinterpret_cast<int (*)()>(exec->getSymbolAddress("root"));
- drv->mRootExpand =
- reinterpret_cast<int (*)()>(exec->getSymbolAddress("root.expand"));
- drv->mInit = reinterpret_cast<void (*)()>(exec->getSymbolAddress("init"));
- drv->mFreeChildren =
- reinterpret_cast<void (*)()>(exec->getSymbolAddress(".rs.dtor"));
-
- info = &drv->mExecutable->getInfo();
- // Copy info over to runtime
- script->mHal.info.exportedFunctionCount = info->getExportFuncNames().size();
- script->mHal.info.exportedVariableCount = info->getExportVarNames().size();
- script->mHal.info.exportedPragmaCount = info->getPragmas().size();
- script->mHal.info.exportedPragmaKeyList =
- const_cast<const char**>(exec->getPragmaKeys().array());
- script->mHal.info.exportedPragmaValueList =
- const_cast<const char**>(exec->getPragmaValues().array());
-
- if (drv->mRootExpand) {
- script->mHal.info.root = drv->mRootExpand;
- } else {
- script->mHal.info.root = drv->mRoot;
- }
-
- if (script->mHal.info.exportedVariableCount) {
- drv->mBoundAllocs = new Allocation *[script->mHal.info.exportedVariableCount];
- memset(drv->mBoundAllocs, 0, sizeof(void *) * script->mHal.info.exportedVariableCount);
- }
-
- pthread_mutex_unlock(&rsdgInitMutex);
+ script->mHal.drv = cs;
+ cs->populateScript(script);
return true;
-
-error:
-
- pthread_mutex_unlock(&rsdgInitMutex);
- if (drv) {
- delete drv->mCompilerContext;
- delete drv->mCompilerDriver;
- delete drv->mExecutable;
- delete[] drv->mBoundAllocs;
- free(drv);
- }
- script->mHal.drv = NULL;
- return false;
-
}
bool rsdInitIntrinsic(const Context *rsc, Script *s, RsScriptIntrinsicID iid, Element *e) {
- pthread_mutex_lock(&rsdgInitMutex);
-
- DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript));
- if (drv == NULL) {
- goto error;
+ RsdHal *dc = (RsdHal *)rsc->mHal.drv;
+ RsdCpuReference::CpuScript * cs = dc->mCpuRef->createIntrinsic(s, iid, e);
+ if (cs == NULL) {
+ return false;
}
- s->mHal.drv = drv;
- drv->mIntrinsicID = iid;
- drv->mIntrinsicData = rsdIntrinsic_Init(rsc, s, iid, &drv->mIntrinsicFuncs);
- s->mHal.info.isThreadable = true;
-
- pthread_mutex_unlock(&rsdgInitMutex);
+ s->mHal.drv = cs;
+ cs->populateScript(s);
return true;
-
-error:
- pthread_mutex_unlock(&rsdgInitMutex);
- return false;
-}
-
-typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
-
-static void wc_xy(void *usr, uint32_t idx) {
- MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
- RsForEachStubParamStruct p;
- memcpy(&p, &mtls->fep, sizeof(p));
- p.lid = idx;
- RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
- uint32_t sig = mtls->sig;
-
-#if defined(ARCH_ARM_RS_USE_CACHED_SCANLINE_WRITE)
- unsigned char buf[1024 * 8];
-#endif
-
- outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
- while (1) {
- uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
- uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize;
- uint32_t yEnd = yStart + mtls->mSliceSize;
- yEnd = rsMin(yEnd, mtls->yEnd);
- if (yEnd <= yStart) {
- return;
- }
-
- //ALOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
- //ALOGE("usr ptr in %p, out %p", mtls->fep.ptrIn, mtls->fep.ptrOut);
-
-#if defined(ARCH_ARM_RS_USE_CACHED_SCANLINE_WRITE)
- if (mtls->fep.yStrideOut < sizeof(buf)) {
- p.out = buf;
- for (p.y = yStart; p.y < yEnd; p.y++) {
- p.in = mtls->fep.ptrIn + (mtls->fep.yStrideIn * p.y);
- fn(&p, mtls->xStart, mtls->xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut);
- memcpy(mtls->fep.ptrOut + (mtls->fep.yStrideOut * p.y), buf, mtls->fep.yStrideOut);
- }
- } else
-#endif
- {
- for (p.y = yStart; p.y < yEnd; p.y++) {
- p.out = mtls->fep.ptrOut + (mtls->fep.yStrideOut * p.y) +
- (mtls->fep.eStrideOut * mtls->xStart);
- p.in = mtls->fep.ptrIn + (mtls->fep.yStrideIn * p.y) +
- (mtls->fep.eStrideIn * mtls->xStart);
- fn(&p, mtls->xStart, mtls->xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut);
- }
- }
- }
-}
-
-static void wc_x(void *usr, uint32_t idx) {
- MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
- RsForEachStubParamStruct p;
- memcpy(&p, &mtls->fep, sizeof(p));
- p.lid = idx;
- RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
- uint32_t sig = mtls->sig;
-
- outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
- while (1) {
- uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
- uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize;
- uint32_t xEnd = xStart + mtls->mSliceSize;
- xEnd = rsMin(xEnd, mtls->xEnd);
- if (xEnd <= xStart) {
- return;
- }
-
- //ALOGE("usr slice %i idx %i, x %i,%i", slice, idx, xStart, xEnd);
- //ALOGE("usr ptr in %p, out %p", mtls->fep.ptrIn, mtls->fep.ptrOut);
-
- p.out = mtls->fep.ptrOut + (mtls->fep.eStrideOut * xStart);
- p.in = mtls->fep.ptrIn + (mtls->fep.eStrideIn * xStart);
- fn(&p, xStart, xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut);
- }
-}
-
-void rsdScriptInvokeForEachMtlsSetup(const Context *rsc,
- const Allocation * ain,
- Allocation * aout,
- const void * usr,
- uint32_t usrLen,
- const RsScriptCall *sc,
- MTLaunchStruct *mtls) {
-
- memset(mtls, 0, sizeof(MTLaunchStruct));
-
- if (ain) {
- mtls->fep.dimX = ain->getType()->getDimX();
- mtls->fep.dimY = ain->getType()->getDimY();
- mtls->fep.dimZ = ain->getType()->getDimZ();
- //mtls->dimArray = ain->getType()->getDimArray();
- } else if (aout) {
- mtls->fep.dimX = aout->getType()->getDimX();
- mtls->fep.dimY = aout->getType()->getDimY();
- mtls->fep.dimZ = aout->getType()->getDimZ();
- //mtls->dimArray = aout->getType()->getDimArray();
- } else {
- rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
- return;
- }
-
- if (!sc || (sc->xEnd == 0)) {
- mtls->xEnd = mtls->fep.dimX;
- } else {
- rsAssert(sc->xStart < mtls->fep.dimX);
- rsAssert(sc->xEnd <= mtls->fep.dimX);
- rsAssert(sc->xStart < sc->xEnd);
- mtls->xStart = rsMin(mtls->fep.dimX, sc->xStart);
- mtls->xEnd = rsMin(mtls->fep.dimX, sc->xEnd);
- if (mtls->xStart >= mtls->xEnd) return;
- }
-
- if (!sc || (sc->yEnd == 0)) {
- mtls->yEnd = mtls->fep.dimY;
- } else {
- rsAssert(sc->yStart < mtls->fep.dimY);
- rsAssert(sc->yEnd <= mtls->fep.dimY);
- rsAssert(sc->yStart < sc->yEnd);
- mtls->yStart = rsMin(mtls->fep.dimY, sc->yStart);
- mtls->yEnd = rsMin(mtls->fep.dimY, sc->yEnd);
- if (mtls->yStart >= mtls->yEnd) return;
- }
-
- mtls->xEnd = rsMax((uint32_t)1, mtls->xEnd);
- mtls->yEnd = rsMax((uint32_t)1, mtls->yEnd);
- mtls->zEnd = rsMax((uint32_t)1, mtls->zEnd);
- mtls->arrayEnd = rsMax((uint32_t)1, mtls->arrayEnd);
-
- rsAssert(!ain || (ain->getType()->getDimZ() == 0));
-
- Context *mrsc = (Context *)rsc;
- mtls->rsc = mrsc;
- mtls->ain = ain;
- mtls->aout = aout;
- mtls->fep.usr = usr;
- mtls->fep.usrLen = usrLen;
- mtls->mSliceSize = 10;
- mtls->mSliceNum = 0;
-
- mtls->fep.ptrIn = NULL;
- mtls->fep.eStrideIn = 0;
-
- if (ain) {
- DrvAllocation *aindrv = (DrvAllocation *)ain->mHal.drv;
- mtls->fep.ptrIn = (const uint8_t *)aindrv->lod[0].mallocPtr;
- mtls->fep.eStrideIn = ain->getType()->getElementSizeBytes();
- mtls->fep.yStrideIn = aindrv->lod[0].stride;
- }
-
- mtls->fep.ptrOut = NULL;
- mtls->fep.eStrideOut = 0;
- if (aout) {
- DrvAllocation *aoutdrv = (DrvAllocation *)aout->mHal.drv;
- mtls->fep.ptrOut = (uint8_t *)aoutdrv->lod[0].mallocPtr;
- mtls->fep.eStrideOut = aout->getType()->getElementSizeBytes();
- mtls->fep.yStrideOut = aoutdrv->lod[0].stride;
- }
-}
-
-void rsdScriptLaunchThreads(const Context *rsc,
- bool isThreadable,
- const Allocation * ain,
- Allocation * aout,
- const void * usr,
- uint32_t usrLen,
- const RsScriptCall *sc,
- MTLaunchStruct *mtls) {
-
- Context *mrsc = (Context *)rsc;
- RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
-
- if ((dc->mWorkers.mCount >= 1) && isThreadable && !dc->mInForEach) {
- const size_t targetByteChunk = 16 * 1024;
- dc->mInForEach = true;
- if (mtls->fep.dimY > 1) {
- uint32_t s1 = mtls->fep.dimY / ((dc->mWorkers.mCount + 1) * 4);
- uint32_t s2 = 0;
-
- // This chooses our slice size to rate limit atomic ops to
- // one per 16k bytes of reads/writes.
- if (mtls->fep.yStrideOut) {
- s2 = targetByteChunk / mtls->fep.yStrideOut;
- } else {
- s2 = targetByteChunk / mtls->fep.yStrideIn;
- }
- mtls->mSliceSize = rsMin(s1, s2);
-
- if(mtls->mSliceSize < 1) {
- mtls->mSliceSize = 1;
- }
-
- rsdLaunchThreads(mrsc, wc_xy, mtls);
- } else {
- uint32_t s1 = mtls->fep.dimX / ((dc->mWorkers.mCount + 1) * 4);
- uint32_t s2 = 0;
-
- // This chooses our slice size to rate limit atomic ops to
- // one per 16k bytes of reads/writes.
- if (mtls->fep.eStrideOut) {
- s2 = targetByteChunk / mtls->fep.eStrideOut;
- } else {
- s2 = targetByteChunk / mtls->fep.eStrideIn;
- }
- mtls->mSliceSize = rsMin(s1, s2);
-
- if(mtls->mSliceSize < 1) {
- mtls->mSliceSize = 1;
- }
-
- rsdLaunchThreads(mrsc, wc_x, mtls);
- }
- dc->mInForEach = false;
-
- //ALOGE("launch 1");
- } else {
- RsForEachStubParamStruct p;
- memcpy(&p, &mtls->fep, sizeof(p));
- uint32_t sig = mtls->sig;
-
- //ALOGE("launch 3");
- outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
- for (p.ar[0] = mtls->arrayStart; p.ar[0] < mtls->arrayEnd; p.ar[0]++) {
- for (p.z = mtls->zStart; p.z < mtls->zEnd; p.z++) {
- for (p.y = mtls->yStart; p.y < mtls->yEnd; p.y++) {
- uint32_t offset = mtls->fep.dimY * mtls->fep.dimZ * p.ar[0] +
- mtls->fep.dimY * p.z + p.y;
- p.out = mtls->fep.ptrOut + (mtls->fep.yStrideOut * offset) +
- (mtls->fep.eStrideOut * mtls->xStart);
- p.in = mtls->fep.ptrIn + (mtls->fep.yStrideIn * offset) +
- (mtls->fep.eStrideIn * mtls->xStart);
- fn(&p, mtls->xStart, mtls->xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut);
- }
- }
- }
- }
}
void rsdScriptInvokeForEach(const Context *rsc,
@@ -417,237 +71,69 @@
uint32_t usrLen,
const RsScriptCall *sc) {
- RsdHal * dc = (RsdHal *)rsc->mHal.drv;
-
- MTLaunchStruct mtls;
- rsdScriptInvokeForEachMtlsSetup(rsc, ain, aout, usr, usrLen, sc, &mtls);
- mtls.script = s;
- mtls.fep.slot = slot;
-
- DrvScript *drv = (DrvScript *)s->mHal.drv;
- if (drv->mIntrinsicID) {
- mtls.kernel = (void (*)())drv->mIntrinsicFuncs.root;
- mtls.fep.usr = drv->mIntrinsicData;
- } else {
- rsAssert(slot < drv->mExecutable->getExportForeachFuncAddrs().size());
- mtls.kernel = reinterpret_cast<ForEachFunc_t>(
- drv->mExecutable->getExportForeachFuncAddrs()[slot]);
- rsAssert(mtls.kernel != NULL);
- mtls.sig = drv->mExecutable->getInfo().getExportForeachFuncs()[slot].second;
- }
-
-
- Script * oldTLS = setTLS(s);
- rsdScriptLaunchThreads(rsc, s->mHal.info.isThreadable, ain, aout, usr, usrLen, sc, &mtls);
- setTLS(oldTLS);
+ RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
+ cs->invokeForEach(slot, ain, aout, usr, usrLen, sc);
}
-int rsdScriptInvokeRoot(const Context *dc, Script *script) {
- DrvScript *drv = (DrvScript *)script->mHal.drv;
-
- Script * oldTLS = setTLS(script);
- int ret = drv->mRoot();
- setTLS(oldTLS);
-
- return ret;
+int rsdScriptInvokeRoot(const Context *dc, Script *s) {
+ RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
+ return cs->invokeRoot();
}
-void rsdScriptInvokeInit(const Context *dc, Script *script) {
- DrvScript *drv = (DrvScript *)script->mHal.drv;
-
- if (drv->mInit) {
- drv->mInit();
- }
+void rsdScriptInvokeInit(const Context *dc, Script *s) {
+ RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
+ cs->invokeInit();
}
-void rsdScriptInvokeFreeChildren(const Context *dc, Script *script) {
- DrvScript *drv = (DrvScript *)script->mHal.drv;
-
- if (drv->mFreeChildren) {
- drv->mFreeChildren();
- }
+void rsdScriptInvokeFreeChildren(const Context *dc, Script *s) {
+ RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
+ cs->invokeFreeChildren();
}
-void rsdScriptInvokeFunction(const Context *dc, Script *script,
+void rsdScriptInvokeFunction(const Context *dc, Script *s,
uint32_t slot,
const void *params,
size_t paramLength) {
- DrvScript *drv = (DrvScript *)script->mHal.drv;
- //ALOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength);
-
- Script * oldTLS = setTLS(script);
- reinterpret_cast<void (*)(const void *, uint32_t)>(
- drv->mExecutable->getExportFuncAddrs()[slot])(params, paramLength);
- setTLS(oldTLS);
+ RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
+ cs->invokeFunction(slot, params, paramLength);
}
-void rsdScriptSetGlobalVar(const Context *dc, const Script *script,
+void rsdScriptSetGlobalVar(const Context *dc, const Script *s,
uint32_t slot, void *data, size_t dataLength) {
- DrvScript *drv = (DrvScript *)script->mHal.drv;
- //rsAssert(!script->mFieldIsObject[slot]);
- //ALOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength);
-
- if (drv->mIntrinsicID) {
- drv->mIntrinsicFuncs.setVar(dc, script, drv->mIntrinsicData, slot, data, dataLength);
- return;
- }
-
- int32_t *destPtr = reinterpret_cast<int32_t *>(
- drv->mExecutable->getExportVarAddrs()[slot]);
- if (!destPtr) {
- //ALOGV("Calling setVar on slot = %i which is null", slot);
- return;
- }
-
- memcpy(destPtr, data, dataLength);
+ RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
+ cs->setGlobalVar(slot, data, dataLength);
}
-void rsdScriptSetGlobalVarWithElemDims(
- const android::renderscript::Context *dc,
- const android::renderscript::Script *script,
- uint32_t slot, void *data, size_t dataLength,
- const android::renderscript::Element *elem,
- const size_t *dims, size_t dimLength) {
- DrvScript *drv = (DrvScript *)script->mHal.drv;
-
- int32_t *destPtr = reinterpret_cast<int32_t *>(
- drv->mExecutable->getExportVarAddrs()[slot]);
- if (!destPtr) {
- //ALOGV("Calling setVar on slot = %i which is null", slot);
- return;
- }
-
- // We want to look at dimension in terms of integer components,
- // but dimLength is given in terms of bytes.
- dimLength /= sizeof(int);
-
- // Only a single dimension is currently supported.
- rsAssert(dimLength == 1);
- if (dimLength == 1) {
- // First do the increment loop.
- size_t stride = elem->getSizeBytes();
- char *cVal = reinterpret_cast<char *>(data);
- for (size_t i = 0; i < dims[0]; i++) {
- elem->incRefs(cVal);
- cVal += stride;
- }
-
- // Decrement loop comes after (to prevent race conditions).
- char *oldVal = reinterpret_cast<char *>(destPtr);
- for (size_t i = 0; i < dims[0]; i++) {
- elem->decRefs(oldVal);
- oldVal += stride;
- }
- }
-
- memcpy(destPtr, data, dataLength);
+void rsdScriptSetGlobalVarWithElemDims(const Context *dc, const Script *s,
+ uint32_t slot, void *data, size_t dataLength,
+ const android::renderscript::Element *elem,
+ const size_t *dims, size_t dimLength) {
+ RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
+ cs->setGlobalVarWithElemDims(slot, data, dataLength, elem, dims, dimLength);
}
-void rsdScriptSetGlobalBind(const Context *dc, const Script *script, uint32_t slot, Allocation *data) {
- DrvScript *drv = (DrvScript *)script->mHal.drv;
-
- //rsAssert(!script->mFieldIsObject[slot]);
- //ALOGE("setGlobalBind %p %p %i %p", dc, script, slot, data);
-
- rsAssert(!drv->mIntrinsicID);
-
- int32_t *destPtr = reinterpret_cast<int32_t *>(
- drv->mExecutable->getExportVarAddrs()[slot]);
- if (!destPtr) {
- //ALOGV("Calling setVar on slot = %i which is null", slot);
- return;
- }
-
- void *ptr = NULL;
- drv->mBoundAllocs[slot] = data;
- if(data) {
- DrvAllocation *allocDrv = (DrvAllocation *)data->mHal.drv;
- ptr = allocDrv->lod[0].mallocPtr;
- }
- memcpy(destPtr, &ptr, sizeof(void *));
+void rsdScriptSetGlobalBind(const Context *dc, const Script *s, uint32_t slot, Allocation *data) {
+ RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
+ cs->setGlobalBind(slot, data);
}
-void rsdScriptSetGlobalObj(const Context *dc, const Script *script, uint32_t slot, ObjectBase *data) {
- DrvScript *drv = (DrvScript *)script->mHal.drv;
- //rsAssert(script->mFieldIsObject[slot]);
- //ALOGE("setGlobalObj %p %p %i %p", dc, script, slot, data);
-
- if (drv->mIntrinsicID) {
- drv->mIntrinsicFuncs.setVarObj(dc, script, drv->mIntrinsicData, slot,
- static_cast<Allocation *>(data));
- return;
- }
-
- int32_t *destPtr = reinterpret_cast<int32_t *>(
- drv->mExecutable->getExportVarAddrs()[slot]);
- if (!destPtr) {
- //ALOGV("Calling setVar on slot = %i which is null", slot);
- return;
- }
-
- rsrSetObject(dc, script, (ObjectBase **)destPtr, data);
+void rsdScriptSetGlobalObj(const Context *dc, const Script *s, uint32_t slot, ObjectBase *data) {
+ RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
+ cs->setGlobalObj(slot, data);
}
-void rsdScriptDestroy(const Context *dc, Script *script) {
- DrvScript *drv = (DrvScript *)script->mHal.drv;
-
- if (drv == NULL) {
- return;
- }
-
- if (drv->mExecutable) {
- Vector<void *>::const_iterator var_addr_iter =
- drv->mExecutable->getExportVarAddrs().begin();
- Vector<void *>::const_iterator var_addr_end =
- drv->mExecutable->getExportVarAddrs().end();
-
- bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_iter =
- drv->mExecutable->getInfo().getObjectSlots().begin();
- bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_end =
- drv->mExecutable->getInfo().getObjectSlots().end();
-
- while ((var_addr_iter != var_addr_end) &&
- (is_object_iter != is_object_end)) {
- // The field address can be NULL if the script-side has optimized
- // the corresponding global variable away.
- ObjectBase **obj_addr =
- reinterpret_cast<ObjectBase **>(*var_addr_iter);
- if (*is_object_iter) {
- if (*var_addr_iter != NULL) {
- rsrClearObject(dc, script, obj_addr);
- }
- }
- var_addr_iter++;
- is_object_iter++;
- }
- }
-
- delete drv->mCompilerContext;
- delete drv->mCompilerDriver;
- delete drv->mExecutable;
- delete[] drv->mBoundAllocs;
- free(drv);
- script->mHal.drv = NULL;
+void rsdScriptDestroy(const Context *dc, Script *s) {
+ RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
+ delete cs;
+ s->mHal.drv = NULL;
}
+
Allocation * rsdScriptGetAllocationForPointer(const android::renderscript::Context *dc,
const android::renderscript::Script *sc,
const void *ptr) {
- DrvScript *drv = (DrvScript *)sc->mHal.drv;
- if (!ptr) {
- return NULL;
- }
-
- for (uint32_t ct=0; ct < sc->mHal.info.exportedVariableCount; ct++) {
- Allocation *a = drv->mBoundAllocs[ct];
- if (!a) continue;
- DrvAllocation *adrv = (DrvAllocation *)a->mHal.drv;
- if (adrv->lod[0].mallocPtr == ptr) {
- return a;
- }
- }
- ALOGE("rsGetAllocation, failed to find %p", ptr);
- return NULL;
+ RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)sc->mHal.drv;
+ return cs->getAllocationForPointer(ptr);
}
diff --git a/driver/rsdBcc.h b/driver/rsdBcc.h
index 4a42eb5..4c65c2a 100644
--- a/driver/rsdBcc.h
+++ b/driver/rsdBcc.h
@@ -20,12 +20,6 @@
#include <rs_hal.h>
#include <rsRuntime.h>
-namespace bcc {
- class BCCContext;
- class RSCompilerDriver;
- class RSExecutable;
-}
-
bool rsdScriptInit(const android::renderscript::Context *, android::renderscript::ScriptC *,
char const *resName, char const *cacheDir,
uint8_t const *bitcode, size_t bitcodeSize, uint32_t flags);
@@ -92,92 +86,4 @@
const void *);
-typedef void (*outer_foreach_t)(
- const android::renderscript::RsForEachStubParamStruct *,
- uint32_t x1, uint32_t x2,
- uint32_t instep, uint32_t outstep);
-
-typedef struct RsdIntriniscFuncs_rec {
-
- void (*setVarObj)(const android::renderscript::Context *dc,
- const android::renderscript::Script *script,
- void * intrinsicData,
- uint32_t slot, android::renderscript::Allocation *data);
- void (*setVar)(const android::renderscript::Context *dc,
- const android::renderscript::Script *script,
- void * intrinsicData,
- uint32_t slot, void *data, size_t dataLength);
- void (*root)(const android::renderscript::RsForEachStubParamStruct *,
- uint32_t x1, uint32_t x2, uint32_t instep, uint32_t outstep);
-
- void (*destroy)(const android::renderscript::Context *dc,
- const android::renderscript::Script *script,
- void * intrinsicData);
-} RsdIntriniscFuncs_t;
-
-struct DrvScript {
- RsScriptIntrinsicID mIntrinsicID;
- int (*mRoot)();
- int (*mRootExpand)();
- void (*mInit)();
- void (*mFreeChildren)();
-
- bcc::BCCContext *mCompilerContext;
- bcc::RSCompilerDriver *mCompilerDriver;
- bcc::RSExecutable *mExecutable;
-
- android::renderscript::Allocation **mBoundAllocs;
- RsdIntriniscFuncs_t mIntrinsicFuncs;
- void * mIntrinsicData;
-};
-
-typedef struct {
- android::renderscript::RsForEachStubParamStruct fep;
- uint32_t cpuIdx;
-
-} MTThreadStuct;
-
-typedef struct {
- android::renderscript::RsForEachStubParamStruct fep;
-
- android::renderscript::Context *rsc;
- android::renderscript::Script *script;
- ForEachFunc_t kernel;
- uint32_t sig;
- const android::renderscript::Allocation * ain;
- android::renderscript::Allocation * aout;
-
- uint32_t mSliceSize;
- volatile int mSliceNum;
-
- uint32_t xStart;
- uint32_t xEnd;
- uint32_t yStart;
- uint32_t yEnd;
- uint32_t zStart;
- uint32_t zEnd;
- uint32_t arrayStart;
- uint32_t arrayEnd;
-} MTLaunchStruct;
-
-void rsdScriptLaunchThreads(const android::renderscript::Context *rsc,
- bool isThreadable,
- const android::renderscript::Allocation * ain,
- android::renderscript::Allocation * aout,
- const void * usr,
- uint32_t usrLen,
- const RsScriptCall *sc,
- MTLaunchStruct *mtls);
-
-void rsdScriptInvokeForEachMtlsSetup(const android::renderscript::Context *rsc,
- const android::renderscript::Allocation * ain,
- android::renderscript::Allocation * aout,
- const void * usr,
- uint32_t usrLen,
- const RsScriptCall *sc,
- MTLaunchStruct *mtls);
-
-
-
-
#endif
diff --git a/driver/rsdCore.cpp b/driver/rsdCore.cpp
index caa5aa7..7f4060a 100644
--- a/driver/rsdCore.cpp
+++ b/driver/rsdCore.cpp
@@ -14,6 +14,8 @@
* limitations under the License.
*/
+#include "../cpu_ref/rsd_cpu.h"
+
#include "rsdCore.h"
#include "rsdAllocation.h"
#include "rsdBcc.h"
@@ -154,71 +156,10 @@
};
-pthread_key_t rsdgThreadTLSKey = 0;
-uint32_t rsdgThreadTLSKeyCount = 0;
-pthread_mutex_t rsdgInitMutex = PTHREAD_MUTEX_INITIALIZER;
+extern const RsdCpuReference::CpuSymbol * rsdLookupRuntimeStub(Context * pContext, char const* name);
-
-static void * HelperThreadProc(void *vrsc) {
- Context *rsc = static_cast<Context *>(vrsc);
- RsdHal *dc = (RsdHal *)rsc->mHal.drv;
-
-
- uint32_t idx = (uint32_t)android_atomic_inc(&dc->mWorkers.mLaunchCount);
-
- //ALOGV("RS helperThread starting %p idx=%i", rsc, idx);
-
- dc->mWorkers.mLaunchSignals[idx].init();
- dc->mWorkers.mNativeThreadId[idx] = gettid();
-
- int status = pthread_setspecific(rsdgThreadTLSKey, &dc->mTlsStruct);
- if (status) {
- ALOGE("pthread_setspecific %i", status);
- }
-
-#if 0
- typedef struct {uint64_t bits[1024 / 64]; } cpu_set_t;
- cpu_set_t cpuset;
- memset(&cpuset, 0, sizeof(cpuset));
- cpuset.bits[idx / 64] |= 1ULL << (idx % 64);
- int ret = syscall(241, rsc->mWorkers.mNativeThreadId[idx],
- sizeof(cpuset), &cpuset);
- ALOGE("SETAFFINITY ret = %i %s", ret, EGLUtils::strerror(ret));
-#endif
-
- while (!dc->mExit) {
- dc->mWorkers.mLaunchSignals[idx].wait();
- if (dc->mWorkers.mLaunchCallback) {
- // idx +1 is used because the calling thread is always worker 0.
- dc->mWorkers.mLaunchCallback(dc->mWorkers.mLaunchData, idx+1);
- }
- android_atomic_dec(&dc->mWorkers.mRunningCount);
- dc->mWorkers.mCompleteSignal.set();
- }
-
- //ALOGV("RS helperThread exited %p idx=%i", rsc, idx);
- return NULL;
-}
-
-void rsdLaunchThreads(Context *rsc, WorkerCallback_t cbk, void *data) {
- RsdHal *dc = (RsdHal *)rsc->mHal.drv;
-
- dc->mWorkers.mLaunchData = data;
- dc->mWorkers.mLaunchCallback = cbk;
- android_atomic_release_store(dc->mWorkers.mCount, &dc->mWorkers.mRunningCount);
- for (uint32_t ct = 0; ct < dc->mWorkers.mCount; ct++) {
- dc->mWorkers.mLaunchSignals[ct].set();
- }
-
- // We use the calling thread as one of the workers so we can start without
- // the delay of the thread wakeup.
- if (dc->mWorkers.mLaunchCallback) {
- dc->mWorkers.mLaunchCallback(dc->mWorkers.mLaunchData, 0);
- }
-
- while (android_atomic_acquire_load(&dc->mWorkers.mRunningCount) != 0) {
- dc->mWorkers.mCompleteSignal.wait();
- }
+static RsdCpuReference::CpuScript * LookupScript(Context *, const Script *s) {
+ return (RsdCpuReference::CpuScript *)s->mHal.drv;
}
extern "C" bool rsdHalInit(RsContext c, uint32_t version_major,
@@ -233,76 +174,23 @@
}
rsc->mHal.drv = dc;
- pthread_mutex_lock(&rsdgInitMutex);
- if (!rsdgThreadTLSKeyCount) {
- int status = pthread_key_create(&rsdgThreadTLSKey, NULL);
- if (status) {
- ALOGE("Failed to init thread tls key.");
- pthread_mutex_unlock(&rsdgInitMutex);
- return false;
- }
- }
- rsdgThreadTLSKeyCount++;
- pthread_mutex_unlock(&rsdgInitMutex);
-
- dc->mTlsStruct.mContext = rsc;
- dc->mTlsStruct.mScript = NULL;
- int status = pthread_setspecific(rsdgThreadTLSKey, &dc->mTlsStruct);
- if (status) {
- ALOGE("pthread_setspecific %i", status);
- }
-
-
- int cpu = sysconf(_SC_NPROCESSORS_ONLN);
- if(rsc->props.mDebugMaxThreads) {
- cpu = rsc->props.mDebugMaxThreads;
- }
- if (cpu < 2) {
- cpu = 0;
- }
- ALOGV("%p Launching thread(s), CPUs %i", rsc, cpu);
-
- // Subtract one from the cpu count because we also use the command thread as a worker.
- dc->mWorkers.mCount = (uint32_t)(cpu - 1);
- dc->mWorkers.mThreadId = (pthread_t *) calloc(dc->mWorkers.mCount, sizeof(pthread_t));
- dc->mWorkers.mNativeThreadId = (pid_t *) calloc(dc->mWorkers.mCount, sizeof(pid_t));
- dc->mWorkers.mLaunchSignals = new Signal[dc->mWorkers.mCount];
- dc->mWorkers.mLaunchCallback = NULL;
-
- dc->mWorkers.mCompleteSignal.init();
-
- android_atomic_release_store(dc->mWorkers.mCount, &dc->mWorkers.mRunningCount);
- android_atomic_release_store(0, &dc->mWorkers.mLaunchCount);
-
- pthread_attr_t threadAttr;
- status = pthread_attr_init(&threadAttr);
- if (status) {
- ALOGE("Failed to init thread attribute.");
+ dc->mCpuRef = RsdCpuReference::create((Context *)c, version_major, version_minor,
+ &rsdLookupRuntimeStub, &LookupScript);
+ if (!dc->mCpuRef) {
+ ALOGE("RsdCpuReference::create for driver hal failed.");
+ free(dc);
return false;
}
- for (uint32_t ct=0; ct < dc->mWorkers.mCount; ct++) {
- status = pthread_create(&dc->mWorkers.mThreadId[ct], &threadAttr, HelperThreadProc, rsc);
- if (status) {
- dc->mWorkers.mCount = ct;
- ALOGE("Created fewer than expected number of RS threads.");
- break;
- }
- }
- while (android_atomic_acquire_load(&dc->mWorkers.mRunningCount) != 0) {
- usleep(100);
- }
-
- pthread_attr_destroy(&threadAttr);
return true;
}
void SetPriority(const Context *rsc, int32_t priority) {
RsdHal *dc = (RsdHal *)rsc->mHal.drv;
- for (uint32_t ct=0; ct < dc->mWorkers.mCount; ct++) {
- setpriority(PRIO_PROCESS, dc->mWorkers.mNativeThreadId[ct], priority);
- }
+
+ dc->mCpuRef->setPriority(priority);
+
if (dc->mHasGraphics) {
rsdGLSetPriority(rsc, priority);
}
@@ -310,27 +198,7 @@
void Shutdown(Context *rsc) {
RsdHal *dc = (RsdHal *)rsc->mHal.drv;
-
- dc->mExit = true;
- dc->mWorkers.mLaunchData = NULL;
- dc->mWorkers.mLaunchCallback = NULL;
- android_atomic_release_store(dc->mWorkers.mCount, &dc->mWorkers.mRunningCount);
- for (uint32_t ct = 0; ct < dc->mWorkers.mCount; ct++) {
- dc->mWorkers.mLaunchSignals[ct].set();
- }
- void *res;
- for (uint32_t ct = 0; ct < dc->mWorkers.mCount; ct++) {
- pthread_join(dc->mWorkers.mThreadId[ct], &res);
- }
- rsAssert(android_atomic_acquire_load(&dc->mWorkers.mRunningCount) == 0);
-
- // Global structure cleanup.
- pthread_mutex_lock(&rsdgInitMutex);
- --rsdgThreadTLSKeyCount;
- if (!rsdgThreadTLSKeyCount) {
- pthread_key_delete(rsdgThreadTLSKey);
- }
- pthread_mutex_unlock(&rsdgInitMutex);
-
+ delete dc->mCpuRef;
+ rsc->mHal.drv = NULL;
}
diff --git a/driver/rsdCore.h b/driver/rsdCore.h
index 92e7c7f..0a46460 100644
--- a/driver/rsdCore.h
+++ b/driver/rsdCore.h
@@ -19,6 +19,8 @@
#include <rs_hal.h>
+#include "../cpu_ref/rsd_cpu.h"
+
#include "rsMutex.h"
#include "rsSignal.h"
@@ -28,12 +30,6 @@
typedef void (* ForEachFunc_t)(void);
typedef void (*WorkerCallback_t)(void *usr, uint32_t idx);
-typedef struct RsdSymbolTableRec {
- const char * mName;
- void * mPtr;
- bool threadable;
-} RsdSymbolTable;
-
typedef struct ScriptTLSStructRec {
android::renderscript::Context * mContext;
android::renderscript::Script * mScript;
@@ -43,33 +39,13 @@
uint32_t version_major;
uint32_t version_minor;
bool mHasGraphics;
- bool mInForEach;
-
- struct Workers {
- volatile int mRunningCount;
- volatile int mLaunchCount;
- uint32_t mCount;
- pthread_t *mThreadId;
- pid_t *mNativeThreadId;
- android::renderscript::Signal mCompleteSignal;
-
- android::renderscript::Signal *mLaunchSignals;
- WorkerCallback_t mLaunchCallback;
- void *mLaunchData;
- };
- Workers mWorkers;
- bool mExit;
ScriptTLSStruct mTlsStruct;
+ android::renderscript::RsdCpuReference *mCpuRef;
RsdGL gl;
} RsdHal;
-extern pthread_key_t rsdgThreadTLSKey;
-extern uint32_t rsdgThreadTLSKeyCount;
-extern pthread_mutex_t rsdgInitMutex;
-
-
void rsdLaunchThreads(android::renderscript::Context *rsc, WorkerCallback_t cbk, void *data);
#endif
diff --git a/driver/rsdIntrinsicBlend.cpp b/driver/rsdIntrinsicBlend.cpp
deleted file mode 100644
index c35c379..0000000
--- a/driver/rsdIntrinsicBlend.cpp
+++ /dev/null
@@ -1,456 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-#include "rsdCore.h"
-#include "rsdIntrinsics.h"
-#include "rsdAllocation.h"
-
-#include "rsdIntrinsicInlines.h"
-
-using namespace android;
-using namespace android::renderscript;
-
-struct ConvolveParams {
- float f[4];
-};
-
-
-enum {
- BLEND_CLEAR = 0,
- BLEND_SRC = 1,
- BLEND_DST = 2,
- BLEND_SRC_OVER = 3,
- BLEND_DST_OVER = 4,
- BLEND_SRC_IN = 5,
- BLEND_DST_IN = 6,
- BLEND_SRC_OUT = 7,
- BLEND_DST_OUT = 8,
- BLEND_SRC_ATOP = 9,
- BLEND_DST_ATOP = 10,
- BLEND_XOR = 11,
-
- BLEND_NORMAL = 12,
- BLEND_AVERAGE = 13,
- BLEND_MULTIPLY = 14,
- BLEND_SCREEN = 15,
- BLEND_DARKEN = 16,
- BLEND_LIGHTEN = 17,
- BLEND_OVERLAY = 18,
- BLEND_HARDLIGHT = 19,
- BLEND_SOFTLIGHT = 20,
- BLEND_DIFFERENCE = 21,
- BLEND_NEGATION = 22,
- BLEND_EXCLUSION = 23,
- BLEND_COLOR_DODGE = 24,
- BLEND_INVERSE_COLOR_DODGE = 25,
- BLEND_SOFT_DODGE = 26,
- BLEND_COLOR_BURN = 27,
- BLEND_INVERSE_COLOR_BURN = 28,
- BLEND_SOFT_BURN = 29,
- BLEND_REFLECT = 30,
- BLEND_GLOW = 31,
- BLEND_FREEZE = 32,
- BLEND_HEAT = 33,
- BLEND_ADD = 34,
- BLEND_SUBTRACT = 35,
- BLEND_STAMP = 36,
- BLEND_RED = 37,
- BLEND_GREEN = 38,
- BLEND_BLUE = 39,
- BLEND_HUE = 40,
- BLEND_SATURATION = 41,
- BLEND_COLOR = 42,
- BLEND_LUMINOSITY = 43
-};
-
-extern "C" void rsdIntrinsicBlendSrcOver_K(void *dst, const void *src, uint32_t count8);
-extern "C" void rsdIntrinsicBlendDstOver_K(void *dst, const void *src, uint32_t count8);
-extern "C" void rsdIntrinsicBlendSrcIn_K(void *dst, const void *src, uint32_t count8);
-extern "C" void rsdIntrinsicBlendDstIn_K(void *dst, const void *src, uint32_t count8);
-extern "C" void rsdIntrinsicBlendSrcOut_K(void *dst, const void *src, uint32_t count8);
-extern "C" void rsdIntrinsicBlendDstOut_K(void *dst, const void *src, uint32_t count8);
-extern "C" void rsdIntrinsicBlendSrcAtop_K(void *dst, const void *src, uint32_t count8);
-extern "C" void rsdIntrinsicBlendDstAtop_K(void *dst, const void *src, uint32_t count8);
-extern "C" void rsdIntrinsicBlendXor_K(void *dst, const void *src, uint32_t count8);
-extern "C" void rsdIntrinsicBlendMultiply_K(void *dst, const void *src, uint32_t count8);
-extern "C" void rsdIntrinsicBlendAdd_K(void *dst, const void *src, uint32_t count8);
-extern "C" void rsdIntrinsicBlendSub_K(void *dst, const void *src, uint32_t count8);
-
-//#undef ARCH_ARM_HAVE_NEON
-
-static void ColorMatrix_uchar4(const RsForEachStubParamStruct *p,
- uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
- ConvolveParams *cp = (ConvolveParams *)p->usr;
-
- // instep/outstep can be ignored--sizeof(uchar4) known at compile time
- uchar4 *out = (uchar4 *)p->out;
- uchar4 *in = (uchar4 *)p->in;
- uint32_t x1 = xstart;
- uint32_t x2 = xend;
-
- switch (p->slot) {
- case BLEND_CLEAR:
- for (;x1 < x2; x1++, out++) {
- *out = 0;
- }
- break;
- case BLEND_SRC:
- for (;x1 < x2; x1++, out++, in++) {
- *out = *in;
- }
- break;
- //BLEND_DST is a NOP
- case BLEND_DST:
- break;
- case BLEND_SRC_OVER:
-#if defined(ARCH_ARM_HAVE_NEON)
- if((x1 + 8) < x2) {
- uint32_t len = (x2 - x1) >> 3;
- rsdIntrinsicBlendSrcOver_K(out, in, len);
- x1 += len << 3;
- out += len << 3;
- in += len << 3;
- }
-#endif
- for (;x1 < x2; x1++, out++, in++) {
- short4 in_s = convert_short4(*in);
- short4 out_s = convert_short4(*out);
- in_s = in_s + ((out_s * (short4)(255 - in_s.a)) >> (short4)8);
- *out = convert_uchar4(in_s);
- }
- break;
- case BLEND_DST_OVER:
-#if defined(ARCH_ARM_HAVE_NEON)
- if((x1 + 8) < x2) {
- uint32_t len = (x2 - x1) >> 3;
- rsdIntrinsicBlendDstOver_K(out, in, len);
- x1 += len << 3;
- out += len << 3;
- in += len << 3;
- }
-#endif
- for (;x1 < x2; x1++, out++, in++) {
- short4 in_s = convert_short4(*in);
- short4 out_s = convert_short4(*out);
- in_s = out_s + ((in_s * (short4)(255 - out_s.a)) >> (short4)8);
- *out = convert_uchar4(in_s);
- }
- break;
- case BLEND_SRC_IN:
-#if defined(ARCH_ARM_HAVE_NEON)
- if((x1 + 8) < x2) {
- uint32_t len = (x2 - x1) >> 3;
- rsdIntrinsicBlendSrcIn_K(out, in, len);
- x1 += len << 3;
- out += len << 3;
- in += len << 3;
- }
-#endif
- for (;x1 < x2; x1++, out++, in++) {
- short4 in_s = convert_short4(*in);
- in_s = (in_s * out->a) >> (short4)8;
- *out = convert_uchar4(in_s);
- }
- break;
- case BLEND_DST_IN:
-#if defined(ARCH_ARM_HAVE_NEON)
- if((x1 + 8) < x2) {
- uint32_t len = (x2 - x1) >> 3;
- rsdIntrinsicBlendDstIn_K(out, in, len);
- x1 += len << 3;
- out += len << 3;
- in += len << 3;
- }
-#endif
- for (;x1 < x2; x1++, out++, in++) {
- short4 out_s = convert_short4(*out);
- out_s = (out_s * in->a) >> (short4)8;
- *out = convert_uchar4(out_s);
- }
- break;
- case BLEND_SRC_OUT:
-#if defined(ARCH_ARM_HAVE_NEON)
- if((x1 + 8) < x2) {
- uint32_t len = (x2 - x1) >> 3;
- rsdIntrinsicBlendSrcOut_K(out, in, len);
- x1 += len << 3;
- out += len << 3;
- in += len << 3;
- }
-#endif
- for (;x1 < x2; x1++, out++, in++) {
- short4 in_s = convert_short4(*in);
- in_s = (in_s * (short4)(255 - out->a)) >> (short4)8;
- *out = convert_uchar4(in_s);
- }
- break;
- case BLEND_DST_OUT:
-#if defined(ARCH_ARM_HAVE_NEON)
- if((x1 + 8) < x2) {
- uint32_t len = (x2 - x1) >> 3;
- rsdIntrinsicBlendDstOut_K(out, in, len);
- x1 += len << 3;
- out += len << 3;
- in += len << 3;
- }
-#endif
- for (;x1 < x2; x1++, out++, in++) {
- short4 out_s = convert_short4(*out);
- out_s = (out_s * (short4)(255 - in->a)) >> (short4)8;
- *out = convert_uchar4(out_s);
- }
- break;
- case BLEND_SRC_ATOP:
-#if defined(ARCH_ARM_HAVE_NEON)
- if((x1 + 8) < x2) {
- uint32_t len = (x2 - x1) >> 3;
- rsdIntrinsicBlendSrcAtop_K(out, in, len);
- x1 += len << 3;
- out += len << 3;
- in += len << 3;
- }
-#endif
- for (;x1 < x2; x1++, out++, in++) {
- short4 in_s = convert_short4(*in);
- short4 out_s = convert_short4(*out);
- out_s.rgb = ((in_s.rgb * out_s.a) +
- (out_s.rgb * ((short3)255 - (short3)in_s.a))) >> (short3)8;
- *out = convert_uchar4(out_s);
- }
- break;
- case BLEND_DST_ATOP:
-#if defined(ARCH_ARM_HAVE_NEON)
- if((x1 + 8) < x2) {
- uint32_t len = (x2 - x1) >> 3;
- rsdIntrinsicBlendDstAtop_K(out, in, len);
- x1 += len << 3;
- out += len << 3;
- in += len << 3;
- }
-#endif
- for (;x1 < x2; x1++, out++, in++) {
- short4 in_s = convert_short4(*in);
- short4 out_s = convert_short4(*out);
- out_s.rgb = ((out_s.rgb * in_s.a) +
- (in_s.rgb * ((short3)255 - (short3)out_s.a))) >> (short3)8;
- *out = convert_uchar4(out_s);
- }
- break;
- case BLEND_XOR:
-#if defined(ARCH_ARM_HAVE_NEON)
- if((x1 + 8) < x2) {
- uint32_t len = (x2 - x1) >> 3;
- rsdIntrinsicBlendXor_K(out, in, len);
- x1 += len << 3;
- out += len << 3;
- in += len << 3;
- }
-#endif
- for (;x1 < x2; x1++, out++, in++) {
- *out = *in ^ *out;
- }
- break;
- case BLEND_NORMAL:
- ALOGE("Called unimplemented blend intrinsic BLEND_NORMAL");
- rsAssert(false);
- break;
- case BLEND_AVERAGE:
- ALOGE("Called unimplemented blend intrinsic BLEND_AVERAGE");
- rsAssert(false);
- break;
- case BLEND_MULTIPLY:
-#if defined(ARCH_ARM_HAVE_NEON)
- if((x1 + 8) < x2) {
- uint32_t len = (x2 - x1) >> 3;
- rsdIntrinsicBlendMultiply_K(out, in, len);
- x1 += len << 3;
- out += len << 3;
- in += len << 3;
- }
-#endif
- for (;x1 < x2; x1++, out++, in++) {
- *out = convert_uchar4((convert_short4(*in) * convert_short4(*out))
- >> (short4)8);
- }
- break;
- case BLEND_SCREEN:
- ALOGE("Called unimplemented blend intrinsic BLEND_SCREEN");
- rsAssert(false);
- break;
- case BLEND_DARKEN:
- ALOGE("Called unimplemented blend intrinsic BLEND_DARKEN");
- rsAssert(false);
- break;
- case BLEND_LIGHTEN:
- ALOGE("Called unimplemented blend intrinsic BLEND_LIGHTEN");
- rsAssert(false);
- break;
- case BLEND_OVERLAY:
- ALOGE("Called unimplemented blend intrinsic BLEND_OVERLAY");
- rsAssert(false);
- break;
- case BLEND_HARDLIGHT:
- ALOGE("Called unimplemented blend intrinsic BLEND_HARDLIGHT");
- rsAssert(false);
- break;
- case BLEND_SOFTLIGHT:
- ALOGE("Called unimplemented blend intrinsic BLEND_SOFTLIGHT");
- rsAssert(false);
- break;
- case BLEND_DIFFERENCE:
- ALOGE("Called unimplemented blend intrinsic BLEND_DIFFERENCE");
- rsAssert(false);
- break;
- case BLEND_NEGATION:
- ALOGE("Called unimplemented blend intrinsic BLEND_NEGATION");
- rsAssert(false);
- break;
- case BLEND_EXCLUSION:
- ALOGE("Called unimplemented blend intrinsic BLEND_EXCLUSION");
- rsAssert(false);
- break;
- case BLEND_COLOR_DODGE:
- ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_DODGE");
- rsAssert(false);
- break;
- case BLEND_INVERSE_COLOR_DODGE:
- ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_DODGE");
- rsAssert(false);
- break;
- case BLEND_SOFT_DODGE:
- ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_DODGE");
- rsAssert(false);
- break;
- case BLEND_COLOR_BURN:
- ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_BURN");
- rsAssert(false);
- break;
- case BLEND_INVERSE_COLOR_BURN:
- ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_BURN");
- rsAssert(false);
- break;
- case BLEND_SOFT_BURN:
- ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_BURN");
- rsAssert(false);
- break;
- case BLEND_REFLECT:
- ALOGE("Called unimplemented blend intrinsic BLEND_REFLECT");
- rsAssert(false);
- break;
- case BLEND_GLOW:
- ALOGE("Called unimplemented blend intrinsic BLEND_GLOW");
- rsAssert(false);
- break;
- case BLEND_FREEZE:
- ALOGE("Called unimplemented blend intrinsic BLEND_FREEZE");
- rsAssert(false);
- break;
- case BLEND_HEAT:
- ALOGE("Called unimplemented blend intrinsic BLEND_HEAT");
- rsAssert(false);
- break;
- case BLEND_ADD:
-#if defined(ARCH_ARM_HAVE_NEON)
- if((x1 + 8) < x2) {
- uint32_t len = (x2 - x1) >> 3;
- rsdIntrinsicBlendAdd_K(out, in, len);
- x1 += len << 3;
- out += len << 3;
- in += len << 3;
- }
-#endif
- for (;x1 < x2; x1++, out++, in++) {
- uint32_t iR = in->r, iG = in->g, iB = in->b, iA = in->a,
- oR = out->r, oG = out->g, oB = out->b, oA = out->a;
- out->r = (oR + iR) > 255 ? 255 : oR + iR;
- out->g = (oG + iG) > 255 ? 255 : oG + iG;
- out->b = (oB + iB) > 255 ? 255 : oB + iB;
- out->a = (oA + iA) > 255 ? 255 : oA + iA;
- }
- break;
- case BLEND_SUBTRACT:
-#if defined(ARCH_ARM_HAVE_NEON)
- if((x1 + 8) < x2) {
- uint32_t len = (x2 - x1) >> 3;
- rsdIntrinsicBlendSub_K(out, in, len);
- x1 += len << 3;
- out += len << 3;
- in += len << 3;
- }
-#endif
- for (;x1 < x2; x1++, out++, in++) {
- int32_t iR = in->r, iG = in->g, iB = in->b, iA = in->a,
- oR = out->r, oG = out->g, oB = out->b, oA = out->a;
- out->r = (oR - iR) < 0 ? 0 : oR - iR;
- out->g = (oG - iG) < 0 ? 0 : oG - iG;
- out->b = (oB - iB) < 0 ? 0 : oB - iB;
- out->a = (oA - iA) < 0 ? 0 : oA - iA;
- }
- break;
- case BLEND_STAMP:
- ALOGE("Called unimplemented blend intrinsic BLEND_STAMP");
- rsAssert(false);
- break;
- case BLEND_RED:
- ALOGE("Called unimplemented blend intrinsic BLEND_RED");
- rsAssert(false);
- break;
- case BLEND_GREEN:
- ALOGE("Called unimplemented blend intrinsic BLEND_GREEN");
- rsAssert(false);
- break;
- case BLEND_BLUE:
- ALOGE("Called unimplemented blend intrinsic BLEND_BLUE");
- rsAssert(false);
- break;
- case BLEND_HUE:
- ALOGE("Called unimplemented blend intrinsic BLEND_HUE");
- rsAssert(false);
- break;
- case BLEND_SATURATION:
- ALOGE("Called unimplemented blend intrinsic BLEND_SATURATION");
- rsAssert(false);
- break;
- case BLEND_COLOR:
- ALOGE("Called unimplemented blend intrinsic BLEND_COLOR");
- rsAssert(false);
- break;
- case BLEND_LUMINOSITY:
- ALOGE("Called unimplemented blend intrinsic BLEND_LUMINOSITY");
- rsAssert(false);
- break;
-
- default:
- ALOGE("Called unimplemented value %d", p->slot);
- rsAssert(false);
-
- }
-}
-
-void * rsdIntrinsic_InitBlend(const android::renderscript::Context *dc,
- android::renderscript::Script *script,
- RsdIntriniscFuncs_t *funcs) {
-
- script->mHal.info.exportedVariableCount = 0;
- funcs->root = ColorMatrix_uchar4;
-
- ConvolveParams *cp = (ConvolveParams *)calloc(1, sizeof(ConvolveParams));
- return cp;
-}
-
-
diff --git a/driver/rsdIntrinsicBlur.cpp b/driver/rsdIntrinsicBlur.cpp
deleted file mode 100644
index b67e8d5..0000000
--- a/driver/rsdIntrinsicBlur.cpp
+++ /dev/null
@@ -1,226 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-#include "rsdCore.h"
-#include "rsdIntrinsics.h"
-#include "rsdAllocation.h"
-
-#include "rsdIntrinsicInlines.h"
-
-using namespace android;
-using namespace android::renderscript;
-
-struct ConvolveParams {
- float fp[104];
- short ip[104];
- float radius;
- int iradius;
- ObjectBaseRef<Allocation> alloc;
-};
-
-static void ComputeGaussianWeights(ConvolveParams *cp) {
- // Compute gaussian weights for the blur
- // e is the euler's number
- float e = 2.718281828459045f;
- float pi = 3.1415926535897932f;
- // g(x) = ( 1 / sqrt( 2 * pi ) * sigma) * e ^ ( -x^2 / 2 * sigma^2 )
- // x is of the form [-radius .. 0 .. radius]
- // and sigma varies with radius.
- // Based on some experimental radius values and sigma's
- // we approximately fit sigma = f(radius) as
- // sigma = radius * 0.4 + 0.6
- // The larger the radius gets, the more our gaussian blur
- // will resemble a box blur since with large sigma
- // the gaussian curve begins to lose its shape
- float sigma = 0.4f * cp->radius + 0.6f;
-
- // Now compute the coefficients. We will store some redundant values to save
- // some math during the blur calculations precompute some values
- float coeff1 = 1.0f / (sqrtf(2.0f * pi) * sigma);
- float coeff2 = - 1.0f / (2.0f * sigma * sigma);
-
- float normalizeFactor = 0.0f;
- float floatR = 0.0f;
- int r;
- cp->iradius = (float)ceil(cp->radius) + 0.5f;
- for (r = -cp->iradius; r <= cp->iradius; r ++) {
- floatR = (float)r;
- cp->fp[r + cp->iradius] = coeff1 * powf(e, floatR * floatR * coeff2);
- normalizeFactor += cp->fp[r + cp->iradius];
- }
-
- //Now we need to normalize the weights because all our coefficients need to add up to one
- normalizeFactor = 1.0f / normalizeFactor;
- for (r = -cp->iradius; r <= cp->iradius; r ++) {
- cp->fp[r + cp->iradius] *= normalizeFactor;
- cp->ip[r + cp->iradius] = (short)(cp->ip[r + cp->iradius] * 32768);
- }
-}
-
-static void Blur_Bind(const Context *dc, const Script *script,
- void * intrinsicData, uint32_t slot, Allocation *data) {
- ConvolveParams *cp = (ConvolveParams *)intrinsicData;
- rsAssert(slot == 1);
- cp->alloc.set(data);
-}
-
-static void Blur_SetVar(const Context *dc, const Script *script, void * intrinsicData,
- uint32_t slot, void *data, size_t dataLength) {
- ConvolveParams *cp = (ConvolveParams *)intrinsicData;
- rsAssert(slot == 0);
-
- cp->radius = ((const float *)data)[0];
- ComputeGaussianWeights(cp);
-}
-
-
-
-static void OneV(const RsForEachStubParamStruct *p, float4 *out, int32_t x, int32_t y,
- const uchar *ptrIn, int iStride, const float* gPtr, int iradius) {
-
- const uchar *pi = ptrIn + x*4;
-
- float4 blurredPixel = 0;
- for (int r = -iradius; r <= iradius; r ++) {
- int validY = rsMax((y + r), 0);
- validY = rsMin(validY, (int)(p->dimY - 1));
- const uchar4 *pvy = (const uchar4 *)&pi[validY * iStride];
- float4 pf = convert_float4(pvy[0]);
- blurredPixel += pf * gPtr[0];
- gPtr++;
- }
-
- out->xyzw = blurredPixel;
-}
-
-extern "C" void rsdIntrinsicBlurVF_K(void *dst, const void *pin, int stride, const void *gptr, int rct, int x1, int x2);
-extern "C" void rsdIntrinsicBlurHF_K(void *dst, const void *pin, const void *gptr, int rct, int x1, int x2);
-
-static void OneVF(float4 *out,
- const uchar *ptrIn, int iStride, const float* gPtr, int ct,
- int x1, int x2) {
-
-#if defined(ARCH_ARM_HAVE_NEON)
- {
- int t = (x2 - x1);
- t &= ~1;
- if(t) {
- rsdIntrinsicBlurVF_K(out, ptrIn, iStride, gPtr, ct, x1, x1 + t);
- }
- x1 += t;
- }
-#endif
-
- while(x2 > x1) {
- const uchar *pi = ptrIn;
- float4 blurredPixel = 0;
- const float* gp = gPtr;
-
- for (int r = 0; r < ct; r++) {
- float4 pf = convert_float4(((const uchar4 *)pi)[0]);
- blurredPixel += pf * gp[0];
- pi += iStride;
- gp++;
- }
- out->xyzw = blurredPixel;
- x1++;
- out++;
- }
-}
-
-static void OneH(const RsForEachStubParamStruct *p, uchar4 *out, int32_t x,
- const float4 *ptrIn, const float* gPtr, int iradius) {
-
- float4 blurredPixel = 0;
- for (int r = -iradius; r <= iradius; r ++) {
- int validX = rsMax((x + r), 0);
- validX = rsMin(validX, (int)(p->dimX - 1));
- float4 pf = ptrIn[validX];
- blurredPixel += pf * gPtr[0];
- gPtr++;
- }
-
- out->xyzw = convert_uchar4(blurredPixel);
-}
-
-
-static void Blur_uchar4(const RsForEachStubParamStruct *p,
- uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
- float buf[4 * 2048];
- ConvolveParams *cp = (ConvolveParams *)p->usr;
- if (!cp->alloc.get()) {
- ALOGE("Blur executed without input, skipping");
- return;
- }
- DrvAllocation *din = (DrvAllocation *)cp->alloc->mHal.drv;
- const uchar *pin = (const uchar *)din->lod[0].mallocPtr;
-
- uchar4 *out = (uchar4 *)p->out;
- uint32_t x1 = xstart;
- uint32_t x2 = xend;
-
- float4 *fout = (float4 *)buf;
- int y = p->y;
- if ((y > cp->iradius) && (y < ((int)p->dimY - cp->iradius))) {
- const uchar *pi = pin + (y - cp->iradius) * din->lod[0].stride;
- OneVF(fout, pi, din->lod[0].stride, cp->fp, cp->iradius * 2 + 1, x1, x2);
- } else {
- while(x2 > x1) {
- OneV(p, fout, x1, y, pin, din->lod[0].stride, cp->fp, cp->iradius);
- fout++;
- x1++;
- }
- }
-
- x1 = xstart;
- while ((x1 < (uint32_t)cp->iradius) && (x1 < x2)) {
- OneH(p, out, x1, (float4 *)buf, cp->fp, cp->iradius);
- out++;
- x1++;
- }
-#if defined(ARCH_ARM_HAVE_NEON)
- if ((x1 + cp->iradius) < x2) {
- rsdIntrinsicBlurHF_K(out, ((float4 *)buf) - cp->iradius, cp->fp, cp->iradius * 2 + 1, x1, x2 - cp->iradius);
- out += (x2 - cp->iradius) - x1;
- x1 = x2 - cp->iradius;
- }
-#endif
- while(x2 > x1) {
- OneH(p, out, x1, (float4 *)buf, cp->fp, cp->iradius);
- out++;
- x1++;
- }
-
-}
-
-void * rsdIntrinsic_InitBlur(const android::renderscript::Context *dc,
- android::renderscript::Script *script,
- RsdIntriniscFuncs_t *funcs) {
-
- script->mHal.info.exportedVariableCount = 2;
- funcs->setVarObj = Blur_Bind;
- funcs->setVar = Blur_SetVar;
- funcs->root = Blur_uchar4;
-
- ConvolveParams *cp = (ConvolveParams *)calloc(1, sizeof(ConvolveParams));
- cp->radius = 5;
- ComputeGaussianWeights(cp);
- return cp;
-}
-
-
diff --git a/driver/rsdIntrinsicColorMatrix.cpp b/driver/rsdIntrinsicColorMatrix.cpp
deleted file mode 100644
index cfe0333..0000000
--- a/driver/rsdIntrinsicColorMatrix.cpp
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-#include "rsdCore.h"
-#include "rsdIntrinsics.h"
-#include "rsdAllocation.h"
-
-#include "rsdIntrinsicInlines.h"
-
-using namespace android;
-using namespace android::renderscript;
-
-struct ConvolveParams {
- float fp[16];
- short ip[16];
- bool use3x3;
- bool useDot;
-};
-
-static void ColorMatrix_SetVar(const Context *dc, const Script *script, void * intrinsicData,
- uint32_t slot, void *data, size_t dataLength) {
- ConvolveParams *cp = (ConvolveParams *)intrinsicData;
-
- rsAssert(slot == 0);
- memcpy (cp->fp, data, dataLength);
- for(int ct=0; ct < 16; ct++) {
- cp->ip[ct] = (short)(cp->fp[ct] * 255.f + 0.5f);
- }
-
- if ((cp->ip[3] == 0) && (cp->ip[7] == 0) && (cp->ip[11] == 0) &&
- (cp->ip[12] == 0) && (cp->ip[13] == 0) && (cp->ip[14] == 0) &&
- (cp->ip[15] == 255)) {
- cp->use3x3 = true;
-
- if ((cp->ip[0] == cp->ip[1]) && (cp->ip[0] == cp->ip[2]) &&
- (cp->ip[4] == cp->ip[5]) && (cp->ip[4] == cp->ip[6]) &&
- (cp->ip[8] == cp->ip[9]) && (cp->ip[8] == cp->ip[10])) {
- cp->useDot = true;
- }
- }
-}
-
-extern "C" void rsdIntrinsicColorMatrix4x4_K(void *dst, const void *src, const short *coef, uint32_t count);
-extern "C" void rsdIntrinsicColorMatrix3x3_K(void *dst, const void *src, const short *coef, uint32_t count);
-extern "C" void rsdIntrinsicColorMatrixDot_K(void *dst, const void *src, const short *coef, uint32_t count);
-
-static void One(const RsForEachStubParamStruct *p, uchar4 *out,
- const uchar4 *py, const float* coeff) {
- float4 i = convert_float4(py[0]);
-
- float4 sum;
- sum.x = i.x * coeff[0] +
- i.y * coeff[4] +
- i.z * coeff[8] +
- i.w * coeff[12];
- sum.y = i.x * coeff[1] +
- i.y * coeff[5] +
- i.z * coeff[9] +
- i.w * coeff[13];
- sum.z = i.x * coeff[2] +
- i.y * coeff[6] +
- i.z * coeff[10] +
- i.w * coeff[14];
- sum.w = i.x * coeff[3] +
- i.y * coeff[7] +
- i.z * coeff[11] +
- i.w * coeff[15];
-
- sum.x = sum.x < 0 ? 0 : (sum.x > 255 ? 255 : sum.x);
- sum.y = sum.y < 0 ? 0 : (sum.y > 255 ? 255 : sum.y);
- sum.z = sum.z < 0 ? 0 : (sum.z > 255 ? 255 : sum.z);
- sum.w = sum.w < 0 ? 0 : (sum.w > 255 ? 255 : sum.w);
-
- *out = convert_uchar4(sum);
-}
-
-static void ColorMatrix_uchar4(const RsForEachStubParamStruct *p,
- uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
- ConvolveParams *cp = (ConvolveParams *)p->usr;
- uchar4 *out = (uchar4 *)p->out;
- uchar4 *in = (uchar4 *)p->in;
- uint32_t x1 = xstart;
- uint32_t x2 = xend;
-
- if(x2 > x1) {
-#if defined(ARCH_ARM_HAVE_NEON)
- int32_t len = (x2 - x1) >> 2;
- if(len > 0) {
- if (cp->use3x3) {
- if (cp->useDot) {
- rsdIntrinsicColorMatrixDot_K(out, in, cp->ip, len);
- } else {
- rsdIntrinsicColorMatrix3x3_K(out, in, cp->ip, len);
- }
- } else {
- rsdIntrinsicColorMatrix4x4_K(out, in, cp->ip, len);
- }
- x1 += len << 2;
- out += len << 2;
- in += len << 2;
- }
-#endif
-
- while(x1 != x2) {
- One(p, out++, in++, cp->fp);
- x1++;
- }
- }
-}
-
-void * rsdIntrinsic_InitColorMatrix(const android::renderscript::Context *dc,
- android::renderscript::Script *script,
- RsdIntriniscFuncs_t *funcs) {
-
- script->mHal.info.exportedVariableCount = 1;
- funcs->setVar = ColorMatrix_SetVar;
- funcs->root = ColorMatrix_uchar4;
-
- ConvolveParams *cp = (ConvolveParams *)calloc(1, sizeof(ConvolveParams));
- cp->fp[0] = 1.f;
- cp->fp[5] = 1.f;
- cp->fp[10] = 1.f;
- cp->fp[15] = 1.f;
- for(int ct=0; ct < 16; ct++) {
- cp->ip[ct] = (short)(cp->fp[ct] * 255.f + 0.5f);
- }
- return cp;
-}
-
-
diff --git a/driver/rsdIntrinsicConvolve3x3.cpp b/driver/rsdIntrinsicConvolve3x3.cpp
deleted file mode 100644
index 55f4360..0000000
--- a/driver/rsdIntrinsicConvolve3x3.cpp
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-#include "rsdCore.h"
-#include "rsdIntrinsics.h"
-#include "rsdAllocation.h"
-
-#include "rsdIntrinsicInlines.h"
-
-using namespace android;
-using namespace android::renderscript;
-
-struct ConvolveParams {
- float fp[16];
- short ip[16];
- ObjectBaseRef<Allocation> alloc;
-};
-
-static void Convolve3x3_Bind(const Context *dc, const Script *script,
- void * intrinsicData, uint32_t slot, Allocation *data) {
- ConvolveParams *cp = (ConvolveParams *)intrinsicData;
- rsAssert(slot == 1);
- cp->alloc.set(data);
-}
-
-static void Convolve3x3_SetVar(const Context *dc, const Script *script, void * intrinsicData,
- uint32_t slot, void *data, size_t dataLength) {
- ConvolveParams *cp = (ConvolveParams *)intrinsicData;
-
- rsAssert(slot == 0);
- memcpy (cp->fp, data, dataLength);
- for(int ct=0; ct < 9; ct++) {
- cp->ip[ct] = (short)(cp->fp[ct] * 255.f + 0.5f);
- }
-}
-
-extern "C" void rsdIntrinsicConvolve3x3_K(void *dst, const void *y0, const void *y1, const void *y2, const short *coef, uint32_t count);
-
-
-static void ConvolveOne(const RsForEachStubParamStruct *p, uint32_t x, uchar4 *out,
- const uchar4 *py0, const uchar4 *py1, const uchar4 *py2,
- const float* coeff) {
-
- uint32_t x1 = rsMax((int32_t)x-1, 0);
- uint32_t x2 = rsMin((int32_t)x+1, (int32_t)p->dimX);
-
- float4 px = convert_float4(py0[x1]) * coeff[0] +
- convert_float4(py0[x]) * coeff[1] +
- convert_float4(py0[x2]) * coeff[2] +
- convert_float4(py1[x1]) * coeff[3] +
- convert_float4(py1[x]) * coeff[4] +
- convert_float4(py1[x2]) * coeff[5] +
- convert_float4(py2[x1]) * coeff[6] +
- convert_float4(py2[x]) * coeff[7] +
- convert_float4(py2[x2]) * coeff[8];
-
- px = clamp(px, 0.f, 255.f);
- uchar4 o = {(uchar)px.x, (uchar)px.y, (uchar)px.z, (uchar)px.w};
- *out = o;
-}
-
-static void Convolve3x3_uchar4(const RsForEachStubParamStruct *p,
- uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
- ConvolveParams *cp = (ConvolveParams *)p->usr;
-
- if (!cp->alloc.get()) {
- ALOGE("Convolve3x3 executed without input, skipping");
- return;
- }
- DrvAllocation *din = (DrvAllocation *)cp->alloc->mHal.drv;
- const uchar *pin = (const uchar *)din->lod[0].mallocPtr;
-
- uint32_t y1 = rsMin((int32_t)p->y + 1, (int32_t)(p->dimY-1));
- uint32_t y2 = rsMax((int32_t)p->y - 1, 0);
- const uchar4 *py0 = (const uchar4 *)(pin + din->lod[0].stride * y2);
- const uchar4 *py1 = (const uchar4 *)(pin + din->lod[0].stride * p->y);
- const uchar4 *py2 = (const uchar4 *)(pin + din->lod[0].stride * y1);
-
- uchar4 *out = (uchar4 *)p->out;
- uint32_t x1 = xstart;
- uint32_t x2 = xend;
- if(x1 == 0) {
- ConvolveOne(p, 0, out, py0, py1, py2, cp->fp);
- x1 ++;
- out++;
- }
-
- if(x2 > x1) {
-#if defined(ARCH_ARM_HAVE_NEON)
- int32_t len = (x2 - x1 - 1) >> 1;
- if(len > 0) {
- rsdIntrinsicConvolve3x3_K(out, &py0[x1-1], &py1[x1-1], &py2[x1-1], cp->ip, len);
- x1 += len << 1;
- out += len << 1;
- }
-#endif
-
- while(x1 != x2) {
- ConvolveOne(p, x1, out, py0, py1, py2, cp->fp);
- out++;
- x1++;
- }
- }
-}
-
-void * rsdIntrinsic_InitConvolve3x3(const android::renderscript::Context *dc,
- android::renderscript::Script *script,
- RsdIntriniscFuncs_t *funcs) {
-
- script->mHal.info.exportedVariableCount = 2;
- funcs->setVarObj = Convolve3x3_Bind;
- funcs->setVar = Convolve3x3_SetVar;
- funcs->root = Convolve3x3_uchar4;
-
- ConvolveParams *cp = (ConvolveParams *)calloc(1, sizeof(ConvolveParams));
- for(int ct=0; ct < 9; ct++) {
- cp->fp[ct] = 1.f / 9.f;
- cp->ip[ct] = (short)(cp->fp[ct] * 255.f + 0.5f);
- }
- return cp;
-}
-
-
diff --git a/driver/rsdIntrinsicConvolve5x5.cpp b/driver/rsdIntrinsicConvolve5x5.cpp
deleted file mode 100644
index fc6b029..0000000
--- a/driver/rsdIntrinsicConvolve5x5.cpp
+++ /dev/null
@@ -1,167 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-#include "rsdCore.h"
-#include "rsdIntrinsics.h"
-#include "rsdAllocation.h"
-
-#include "rsdIntrinsicInlines.h"
-
-using namespace android;
-using namespace android::renderscript;
-
-struct ConvolveParams {
- float fp[28];
- short ip[28];
- ObjectBaseRef<Allocation> alloc;
-};
-
-static void Convolve5x5_Bind(const Context *dc, const Script *script,
- void * intrinsicData, uint32_t slot, Allocation *data) {
- ConvolveParams *cp = (ConvolveParams *)intrinsicData;
- rsAssert(slot == 1);
- cp->alloc.set(data);
-}
-
-static void Convolve5x5_SetVar(const Context *dc, const Script *script, void * intrinsicData,
- uint32_t slot, void *data, size_t dataLength) {
- ConvolveParams *cp = (ConvolveParams *)intrinsicData;
-
- rsAssert(slot == 0);
- memcpy (cp->fp, data, dataLength);
- for(int ct=0; ct < 25; ct++) {
- cp->ip[ct] = (short)(cp->fp[ct] * 255.f + 0.5f);
- }
-}
-
-
-static void One(const RsForEachStubParamStruct *p, uint32_t x, uchar4 *out,
- const uchar4 *py0, const uchar4 *py1, const uchar4 *py2, const uchar4 *py3, const uchar4 *py4,
- const float* coeff) {
-
- uint32_t x0 = rsMax((int32_t)x-2, 0);
- uint32_t x1 = rsMax((int32_t)x-1, 0);
- uint32_t x2 = x;
- uint32_t x3 = rsMin((int32_t)x+1, (int32_t)(p->dimX-1));
- uint32_t x4 = rsMin((int32_t)x+2, (int32_t)(p->dimX-1));
-
- float4 px = convert_float4(py0[x0]) * coeff[0] +
- convert_float4(py0[x1]) * coeff[1] +
- convert_float4(py0[x2]) * coeff[2] +
- convert_float4(py0[x3]) * coeff[3] +
- convert_float4(py0[x4]) * coeff[4] +
-
- convert_float4(py1[x0]) * coeff[5] +
- convert_float4(py1[x1]) * coeff[6] +
- convert_float4(py1[x2]) * coeff[7] +
- convert_float4(py1[x3]) * coeff[8] +
- convert_float4(py1[x4]) * coeff[9] +
-
- convert_float4(py2[x0]) * coeff[10] +
- convert_float4(py2[x1]) * coeff[11] +
- convert_float4(py2[x2]) * coeff[12] +
- convert_float4(py2[x3]) * coeff[13] +
- convert_float4(py2[x4]) * coeff[14] +
-
- convert_float4(py3[x0]) * coeff[15] +
- convert_float4(py3[x1]) * coeff[16] +
- convert_float4(py3[x2]) * coeff[17] +
- convert_float4(py3[x3]) * coeff[18] +
- convert_float4(py3[x4]) * coeff[19] +
-
- convert_float4(py4[x0]) * coeff[20] +
- convert_float4(py4[x1]) * coeff[21] +
- convert_float4(py4[x2]) * coeff[22] +
- convert_float4(py4[x3]) * coeff[23] +
- convert_float4(py4[x4]) * coeff[24];
-
- px = clamp(px, 0.f, 255.f);
- uchar4 o = {(uchar)px.x, (uchar)px.y, (uchar)px.z, (uchar)px.w};
- *out = o;
-}
-
-extern "C" void rsdIntrinsicConvolve5x5_K(void *dst, const void *y0, const void *y1,
- const void *y2, const void *y3, const void *y4,
- const short *coef, uint32_t count);
-
-static void Convolve5x5_uchar4(const RsForEachStubParamStruct *p,
- uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
- ConvolveParams *cp = (ConvolveParams *)p->usr;
- if (!cp->alloc.get()) {
- ALOGE("Convolve5x5 executed without input, skipping");
- return;
- }
- DrvAllocation *din = (DrvAllocation *)cp->alloc->mHal.drv;
- const uchar *pin = (const uchar *)din->lod[0].mallocPtr;
-
- uint32_t y0 = rsMax((int32_t)p->y-2, 0);
- uint32_t y1 = rsMax((int32_t)p->y-1, 0);
- uint32_t y2 = p->y;
- uint32_t y3 = rsMin((int32_t)p->y+1, (int32_t)(p->dimY-1));
- uint32_t y4 = rsMin((int32_t)p->y+2, (int32_t)(p->dimY-1));
-
- const uchar4 *py0 = (const uchar4 *)(pin + din->lod[0].stride * y0);
- const uchar4 *py1 = (const uchar4 *)(pin + din->lod[0].stride * y1);
- const uchar4 *py2 = (const uchar4 *)(pin + din->lod[0].stride * y2);
- const uchar4 *py3 = (const uchar4 *)(pin + din->lod[0].stride * y3);
- const uchar4 *py4 = (const uchar4 *)(pin + din->lod[0].stride * y4);
-
- uchar4 *out = (uchar4 *)p->out;
- uint32_t x1 = xstart;
- uint32_t x2 = xend;
-
- while((x1 < x2) && (x1 < 2)) {
- One(p, x1, out, py0, py1, py2, py3, py4, cp->fp);
- out++;
- x1++;
- }
-
-#if defined(ARCH_ARM_HAVE_NEON)
- if((x1 + 3) < x2) {
- uint32_t len = (x2 - x1 - 3) >> 1;
- rsdIntrinsicConvolve5x5_K(out, py0, py1, py2, py3, py4, cp->ip, len);
- out += len << 1;
- x1 += len << 1;
- }
-#endif
-
- while(x1 < x2) {
- One(p, x1, out, py0, py1, py2, py3, py4, cp->fp);
- out++;
- x1++;
- }
-}
-
-void * rsdIntrinsic_InitConvolve5x5(const android::renderscript::Context *dc,
- android::renderscript::Script *script,
- RsdIntriniscFuncs_t *funcs) {
-
- script->mHal.info.exportedVariableCount = 2;
- funcs->setVarObj = Convolve5x5_Bind;
- funcs->setVar = Convolve5x5_SetVar;
- funcs->root = Convolve5x5_uchar4;
-
- ConvolveParams *cp = (ConvolveParams *)calloc(1, sizeof(ConvolveParams));
- for(int ct=0; ct < 25; ct++) {
- cp->fp[ct] = 1.f / 25.f;
- cp->ip[ct] = (short)(cp->fp[ct] * 255.f + 0.5f);
- }
- return cp;
-}
-
-
diff --git a/driver/rsdIntrinsicInlines.h b/driver/rsdIntrinsicInlines.h
deleted file mode 100644
index ab11b4f..0000000
--- a/driver/rsdIntrinsicInlines.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-
-typedef uint8_t uchar;
-typedef uint16_t ushort;
-typedef uint32_t uint;
-
-typedef float float2 __attribute__((ext_vector_type(2)));
-typedef float float3 __attribute__((ext_vector_type(3)));
-typedef float float4 __attribute__((ext_vector_type(4)));
-typedef uchar uchar2 __attribute__((ext_vector_type(2)));
-typedef uchar uchar3 __attribute__((ext_vector_type(3)));
-typedef uchar uchar4 __attribute__((ext_vector_type(4)));
-typedef ushort ushort2 __attribute__((ext_vector_type(2)));
-typedef ushort ushort3 __attribute__((ext_vector_type(3)));
-typedef ushort ushort4 __attribute__((ext_vector_type(4)));
-typedef uint uint2 __attribute__((ext_vector_type(2)));
-typedef uint uint3 __attribute__((ext_vector_type(3)));
-typedef uint uint4 __attribute__((ext_vector_type(4)));
-typedef char char2 __attribute__((ext_vector_type(2)));
-typedef char char3 __attribute__((ext_vector_type(3)));
-typedef char char4 __attribute__((ext_vector_type(4)));
-typedef short short2 __attribute__((ext_vector_type(2)));
-typedef short short3 __attribute__((ext_vector_type(3)));
-typedef short short4 __attribute__((ext_vector_type(4)));
-typedef int int2 __attribute__((ext_vector_type(2)));
-typedef int int3 __attribute__((ext_vector_type(3)));
-typedef int int4 __attribute__((ext_vector_type(4)));
-typedef long long2 __attribute__((ext_vector_type(2)));
-typedef long long3 __attribute__((ext_vector_type(3)));
-typedef long long4 __attribute__((ext_vector_type(4)));
-
-enum IntrinsicEnums {
- INTRINSIC_UNDEFINED,
- INTRINSIC_CONVOLVE_3x3,
- INTRINXIC_COLORMATRIX
-
-};
-
-static inline int4 convert_int4(uchar4 i) {
- int4 f4 = {i.x, i.y, i.z, i.w};
- return f4;
-}
-
-static inline short4 convert_short4(uchar4 i) {
- short4 f4 = {i.x, i.y, i.z, i.w};
- return f4;
-}
-
-static inline float4 convert_float4(uchar4 i) {
- float4 f4 = {i.x, i.y, i.z, i.w};
- return f4;
-}
-
-static inline uchar4 convert_uchar4(short4 i) {
- uchar4 f4 = {(uchar)i.x, (uchar)i.y, (uchar)i.z, (uchar)i.w};
- return f4;
-}
-
-static inline uchar4 convert_uchar4(int4 i) {
- uchar4 f4 = {(uchar)i.x, (uchar)i.y, (uchar)i.z, (uchar)i.w};
- return f4;
-}
-
-static inline uchar4 convert_uchar4(float4 i) {
- uchar4 f4 = {(uchar)i.x, (uchar)i.y, (uchar)i.z, (uchar)i.w};
- return f4;
-}
-
-
-static inline int4 clamp(int4 amount, int low, int high) {
- int4 r;
- r.x = amount.x < low ? low : (amount.x > high ? high : amount.x);
- r.y = amount.y < low ? low : (amount.y > high ? high : amount.y);
- r.z = amount.z < low ? low : (amount.z > high ? high : amount.z);
- r.w = amount.w < low ? low : (amount.w > high ? high : amount.w);
- return r;
-}
-
-static inline float4 clamp(float4 amount, float low, float high) {
- float4 r;
- r.x = amount.x < low ? low : (amount.x > high ? high : amount.x);
- r.y = amount.y < low ? low : (amount.y > high ? high : amount.y);
- r.z = amount.z < low ? low : (amount.z > high ? high : amount.z);
- r.w = amount.w < low ? low : (amount.w > high ? high : amount.w);
- return r;
-}
-
-
diff --git a/driver/rsdIntrinsicLUT.cpp b/driver/rsdIntrinsicLUT.cpp
deleted file mode 100644
index 818a132..0000000
--- a/driver/rsdIntrinsicLUT.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-#include "rsdCore.h"
-#include "rsdIntrinsics.h"
-#include "rsdAllocation.h"
-
-#include "rsdIntrinsicInlines.h"
-
-using namespace android;
-using namespace android::renderscript;
-
-struct ConvolveParams {
- ObjectBaseRef<Allocation> lut;
-};
-
-static void LUT_Bind(const Context *dc, const Script *script,
- void * intrinsicData, uint32_t slot, Allocation *data) {
- ConvolveParams *cp = (ConvolveParams *)intrinsicData;
- rsAssert(slot == 0);
- cp->lut.set(data);
-}
-
-static void LUT_uchar4(const RsForEachStubParamStruct *p,
- uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
- ConvolveParams *cp = (ConvolveParams *)p->usr;
- uchar4 *out = (uchar4 *)p->out;
- uchar4 *in = (uchar4 *)p->in;
- uint32_t x1 = xstart;
- uint32_t x2 = xend;
-
- DrvAllocation *din = (DrvAllocation *)cp->lut->mHal.drv;
- const uchar *tr = (const uchar *)din->lod[0].mallocPtr;
- const uchar *tg = &tr[256];
- const uchar *tb = &tg[256];
- const uchar *ta = &tb[256];
-
- while (x1 < x2) {
- uchar4 p = *in;
- uchar4 o = {tr[p.x], tg[p.y], tb[p.z], ta[p.w]};
- *out = o;
- in++;
- out++;
- x1++;
- }
-}
-
-void * rsdIntrinsic_InitLUT(const android::renderscript::Context *dc,
- android::renderscript::Script *script,
- RsdIntriniscFuncs_t *funcs) {
-
- script->mHal.info.exportedVariableCount = 1;
- funcs->setVarObj = LUT_Bind;
- funcs->root = LUT_uchar4;
- ConvolveParams *cp = (ConvolveParams *)calloc(1, sizeof(ConvolveParams));
- return cp;
-}
-
-
diff --git a/driver/rsdIntrinsicYuvToRGB.cpp b/driver/rsdIntrinsicYuvToRGB.cpp
deleted file mode 100644
index b3fb059..0000000
--- a/driver/rsdIntrinsicYuvToRGB.cpp
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-#include "rsdCore.h"
-#include "rsdIntrinsics.h"
-#include "rsdAllocation.h"
-
-#include "rsdIntrinsicInlines.h"
-
-using namespace android;
-using namespace android::renderscript;
-
-struct YuvParams {
- ObjectBaseRef<Allocation> alloc;
-};
-
-static void YuvToRGB_Bind(const Context *dc, const Script *script,
- void * intrinsicData, uint32_t slot, Allocation *data) {
- YuvParams *cp = (YuvParams *)intrinsicData;
- rsAssert(slot == 0);
- cp->alloc.set(data);
-}
-
-
-
-static uchar4 rsYuvToRGBA_uchar4(uchar y, uchar u, uchar v) {
- short Y = ((short)y) - 16;
- short U = ((short)u) - 128;
- short V = ((short)v) - 128;
-
- short4 p;
- p.r = (Y * 298 + V * 409 + 128) >> 8;
- p.g = (Y * 298 - U * 100 - V * 208 + 128) >> 8;
- p.b = (Y * 298 + U * 516 + 128) >> 8;
- p.a = 255;
- if(p.r < 0) {
- p.r = 0;
- }
- if(p.r > 255) {
- p.r = 255;
- }
- if(p.g < 0) {
- p.g = 0;
- }
- if(p.g > 255) {
- p.g = 255;
- }
- if(p.b < 0) {
- p.b = 0;
- }
- if(p.b > 255) {
- p.b = 255;
- }
-
- return (uchar4){p.r, p.g, p.b, p.a};
-}
-
-
-static short YuvCoeff[] = {
- 298, 409, -100, 516, -208, 255, 0, 0,
- 16, 16, 16, 16, 16, 16, 16, 16,
- 128, 128, 128, 128, 128, 128, 128, 128,
- 298, 298, 298, 298, 298, 298, 298, 298,
- 255, 255, 255, 255, 255, 255, 255, 255
-
-
-};
-
-extern "C" void rsdIntrinsicYuv_K(void *dst, const uchar *Y, const uchar *uv, uint32_t count, const short *param);
-
-static void YuvToRGB_uchar4(const RsForEachStubParamStruct *p,
- uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
- YuvParams *cp = (YuvParams *)p->usr;
- if (!cp->alloc.get()) {
- ALOGE("YuvToRGB executed without input, skipping");
- return;
- }
- DrvAllocation *din = (DrvAllocation *)cp->alloc->mHal.drv;
- const uchar *pin = (const uchar *)din->lod[0].mallocPtr;
-
- const uchar *Y = pin + (p->y * p->dimX);
- const uchar *uv = pin + (p->dimX * p->dimY);
- uv += (p->y>>1) * p->dimX;
-
- uchar4 *out = (uchar4 *)p->out;
- uint32_t x1 = xstart;
- uint32_t x2 = xend;
-
- if(x2 > x1) {
-#if defined(ARCH_ARM_HAVE_NEON)
- int32_t len = (x2 - x1 - 1) >> 3;
- if(len > 0) {
- rsdIntrinsicYuv_K(out, Y, uv, len, YuvCoeff);
- x1 += len << 3;
- out += len << 3;
- }
-#endif
-
- // ALOGE("y %i %i %i", p->y, x1, x2);
- while(x1 < x2) {
- uchar u = uv[(x1 & 0xffffe) + 1];
- uchar v = uv[(x1 & 0xffffe) + 0];
- *out = rsYuvToRGBA_uchar4(Y[x1], u, v);
- out++;
- x1++;
- *out = rsYuvToRGBA_uchar4(Y[x1], u, v);
- out++;
- x1++;
- }
- }
-}
-
-void * rsdIntrinsic_InitYuvToRGB(const android::renderscript::Context *dc,
- android::renderscript::Script *script,
- RsdIntriniscFuncs_t *funcs) {
-
- script->mHal.info.exportedVariableCount = 1;
- funcs->setVarObj = YuvToRGB_Bind;
- funcs->root = YuvToRGB_uchar4;
- YuvParams *cp = (YuvParams *)calloc(1, sizeof(YuvParams));
- return cp;
-}
-
-
diff --git a/driver/rsdIntrinsics.cpp b/driver/rsdIntrinsics.cpp
deleted file mode 100644
index 0f747fa..0000000
--- a/driver/rsdIntrinsics.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-#include "rsdCore.h"
-#include "rsdIntrinsics.h"
-#include "rsdAllocation.h"
-
-using namespace android;
-using namespace android::renderscript;
-
-void * rsdIntrinsic_InitBlur(const Context *, Script *, RsdIntriniscFuncs_t *);
-void * rsdIntrinsic_InitConvolve3x3(const Context *, Script *, RsdIntriniscFuncs_t *);
-void * rsdIntrinsic_InitConvolve5x5(const Context *, Script *, RsdIntriniscFuncs_t *);
-void * rsdIntrinsic_InitColorMatrix(const Context *, Script *, RsdIntriniscFuncs_t *);
-void * rsdIntrinsic_InitLUT(const Context *, Script *, RsdIntriniscFuncs_t *);
-void * rsdIntrinsic_InitYuvToRGB(const Context *, Script *, RsdIntriniscFuncs_t *);
-void * rsdIntrinsic_InitBlend(const Context *, Script *, RsdIntriniscFuncs_t *);
-
-static void SetVarObj(const Context *, const Script *, void *, uint32_t, Allocation *) {
- rsAssert(!"Intrinsic_SetVarObj unexpectedly called");
-}
-
-static void SetVar(const Context *, const Script *, void *, uint32_t, void *, size_t) {
- rsAssert(!"Intrinsic_Bind unexpectedly called");
-}
-
-static void Destroy(const Context *dc, const Script *script, void * intrinsicData) {
- free(intrinsicData);
-}
-
-void * rsdIntrinsic_Init(const android::renderscript::Context *dc,
- android::renderscript::Script *script,
- RsScriptIntrinsicID iid,
- RsdIntriniscFuncs_t *funcs) {
-
- funcs->setVarObj = SetVarObj;
- funcs->setVar = SetVar;
- funcs->destroy = Destroy;
-
- switch(iid) {
- case RS_SCRIPT_INTRINSIC_ID_CONVOLVE_3x3:
- return rsdIntrinsic_InitConvolve3x3(dc, script, funcs);
- case RS_SCRIPT_INTRINSIC_ID_CONVOLVE_5x5:
- return rsdIntrinsic_InitConvolve5x5(dc, script, funcs);
- case RS_SCRIPT_INTRINSIC_ID_COLOR_MATRIX:
- return rsdIntrinsic_InitColorMatrix(dc, script, funcs);
- case RS_SCRIPT_INTRINSIC_ID_LUT:
- return rsdIntrinsic_InitLUT(dc, script, funcs);
- case RS_SCRIPT_INTRINSIC_ID_BLUR:
- return rsdIntrinsic_InitBlur(dc, script, funcs);
- case RS_SCRIPT_INTRINSIC_ID_YUV_TO_RGB:
- return rsdIntrinsic_InitYuvToRGB(dc, script, funcs);
- case RS_SCRIPT_INTRINSIC_ID_BLEND:
- return rsdIntrinsic_InitBlend(dc, script, funcs);
-
- default:
- return NULL;
- }
- return NULL;
-}
-
-
-
diff --git a/driver/rsdIntrinsics.h b/driver/rsdIntrinsics.h
deleted file mode 100644
index 221a81a..0000000
--- a/driver/rsdIntrinsics.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef RSD_INTRINSICS_H
-#define RSD_INTRINSICS_H
-
-#include <rs_hal.h>
-#include "rsdBcc.h"
-
-void * rsdIntrinsic_Init(const android::renderscript::Context *dc,
- android::renderscript::Script *script,
- RsScriptIntrinsicID id, RsdIntriniscFuncs_t *funcs);
-
-#endif // RSD_INTRINSICS_H
-
diff --git a/driver/rsdIntrinsics_Convolve.S b/driver/rsdIntrinsics_Convolve.S
deleted file mode 100644
index 04dd8b1..0000000
--- a/driver/rsdIntrinsics_Convolve.S
+++ /dev/null
@@ -1,1524 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-
-#include <machine/cpu-features.h>
-#include <machine/asm.h>
-
-/*
- r0 = dst
- r1 = y0 base pointer
- r2 = y1 base pointer
- r3 = y2 base pointer
- sp = coeffs
- sp = length / 2
-*/
-
-ENTRY(rsdIntrinsicConvolve3x3_K)
- push {r4-r8, r10, r11, lr}
- vpush {q4-q7}
-
- /* Get the coeffs pointer from the stack and load the
- coefficients in the q0, q1 NEON registers */
- ldr r4, [sp, #32+64]
- vld1.16 {q0, q1}, [r4]
-
- /* Get count from the stack */
- ldr r4, [sp, #36+64]
-
- /* Load the frequently used immediate in a register */
- mov r5, #8
-
-1:
- /* Load and post-increase the address by r5=#8 */
- vld1.8 {q13}, [r1], r5
- vld1.8 {q14}, [r2], r5
- vld1.8 {q15}, [r3], r5
-
- /* Signal memory for data that will be used in the loop after the next */
- PLD (r1, r5)
- PLD (r2, r5)
- PLD (r3, r5)
-
- vmovl.u8 q2, d26
- vmovl.u8 q3, d27
- vmovl.u8 q4, d28
- vmovl.u8 q5, d29
- vmovl.u8 q6, d30
- vmovl.u8 q7, d31
-
-/*
- The two pixel source array is
- d4, d5, d6, d7
- d8, d9, d10, d11
- d12, d13, d14, d15
-*/
-
- vmull.s16 q8, d4, d0[0]
- vmlal.s16 q8, d5, d0[1]
- vmlal.s16 q8, d6, d0[2]
- vmlal.s16 q8, d8, d0[3]
- vmlal.s16 q8, d9, d1[0]
- vmlal.s16 q8, d10, d1[1]
- vmlal.s16 q8, d12, d1[2]
- vmlal.s16 q8, d13, d1[3]
- vmlal.s16 q8, d14, d2[0]
-
- vmull.s16 q9, d5, d0[0]
- vmlal.s16 q9, d6, d0[1]
- vmlal.s16 q9, d7, d0[2]
- vmlal.s16 q9, d9, d0[3]
- vmlal.s16 q9, d10, d1[0]
- vmlal.s16 q9, d11, d1[1]
- vmlal.s16 q9, d13, d1[2]
- vmlal.s16 q9, d14, d1[3]
- vmlal.s16 q9, d15, d2[0]
-
- vshrn.i32 d16, q8, #8
- vshrn.i32 d17, q9, #8
-
- vqmovun.s16 d16, q8
- vst1.8 d16, [r0]!
-
- /* Are we done yet? */
- subs r4, r4, #1
- bne 1b
-
- /* We're done, bye! */
- vpop {q4-q7}
- pop {r4-r8, r10, r11, lr}
- bx lr
-END(rsdIntrinsicConvolve3x3_K)
-
-/*
- r0 = dst
- r1 = src
- r2 = matrix
- r3 = length
-*/
-ENTRY(rsdIntrinsicColorMatrix4x4_K)
- stmfd sp!, {r4, lr}
- vpush {q4-q7}
-
- vld1.16 {q2}, [r2]!
- vld1.16 {q3}, [r2]!
-
-1:
- vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]!
- vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]!
- vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]!
- vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]!
-
- vmovl.u8 q12, d0 /* R */
- vmovl.u8 q13, d1 /* G */
- vmovl.u8 q14, d2 /* B */
- vmovl.u8 q15, d3 /* A */
-
- vmull.s16 q8, d24, d4[0]
- vmull.s16 q9, d24, d4[1]
- vmull.s16 q10, d24, d4[2]
- vmull.s16 q11, d24, d4[3]
-
- vmlal.s16 q8, d26, d5[0]
- vmlal.s16 q9, d26, d5[1]
- vmlal.s16 q10, d26, d5[2]
- vmlal.s16 q11, d26, d5[3]
-
- vmlal.s16 q8, d28, d6[0]
- vmlal.s16 q9, d28, d6[1]
- vmlal.s16 q10, d28, d6[2]
- vmlal.s16 q11, d28, d6[3]
-
- vmlal.s16 q8, d30, d7[0]
- vmlal.s16 q9, d30, d7[1]
- vmlal.s16 q10, d30, d7[2]
- vmlal.s16 q11, d30, d7[3]
-
- vshrn.i32 d24, q8, #8
- vshrn.i32 d26, q9, #8
- vshrn.i32 d28, q10, #8
- vshrn.i32 d30, q11, #8
-
- vqmovun.s16 d0, q12
- vqmovun.s16 d1, q13
- vqmovun.s16 d2, q14
- vqmovun.s16 d3, q15
-
- vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]!
- vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]!
- vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]!
- vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]!
-
- subs r3, r3, #1
- bne 1b
-
- vpop {q4-q7}
- ldmfd sp!, {r4, lr}
- bx lr
-END(rsdIntrinsicColorMatrix4x4_K)
-
-/*
- r0 = dst
- r1 = src
- r2 = matrix
- r3 = length
-*/
-ENTRY(rsdIntrinsicColorMatrix3x3_K)
- stmfd sp!, {r4, lr}
- vpush {q4-q7}
-
- vld1.16 {q2}, [r2]!
- vld1.16 {q3}, [r2]!
-
-1:
- vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]!
- vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]!
- vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]!
- vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]!
-
- vmovl.u8 q12, d0
- vmovl.u8 q13, d1
- vmovl.u8 q14, d2
-
- vmull.s16 q8, d24, d4[0]
- vmull.s16 q9, d24, d4[1]
- vmull.s16 q10, d24, d4[2]
-
- vmlal.s16 q8, d26, d5[0]
- vmlal.s16 q9, d26, d5[1]
- vmlal.s16 q10, d26, d5[2]
-
- vmlal.s16 q8, d28, d6[0]
- vmlal.s16 q9, d28, d6[1]
- vmlal.s16 q10, d28, d6[2]
-
- vshrn.i32 d24, q8, #8
- vshrn.i32 d26, q9, #8
- vshrn.i32 d28, q10, #8
-
- vqmovun.s16 d0, q12
- vqmovun.s16 d1, q13
- vqmovun.s16 d2, q14
-
- vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]!
- vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]!
- vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]!
- vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]!
-
- subs r3, r3, #1
- bne 1b
-
- vpop {q4-q7}
- ldmfd sp!, {r4, lr}
- bx lr
-END(rsdIntrinsicColorMatrix3x3_K)
-
-/*
- r0 = dst
- r1 = src
- r2 = matrix
- r3 = length
-*/
-ENTRY(rsdIntrinsicColorMatrixDot_K)
- stmfd sp!, {r4, lr}
- vpush {q4-q7}
-
- vld1.16 {q2}, [r2]!
- vld1.16 {q3}, [r2]!
-
-1:
- vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]!
- vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]!
- vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]!
- vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]!
-
- vmovl.u8 q12, d0
- vmovl.u8 q13, d1
- vmovl.u8 q14, d2
-
- vmull.s16 q8, d24, d4[0]
- vmlal.s16 q8, d26, d5[0]
- vmlal.s16 q8, d28, d6[0]
- vshrn.i32 d24, q8, #8
- vqmovun.s16 d0, q12
- vmov.u8 d1, d0
- vmov.u8 d2, d0
-
- vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]!
- vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]!
- vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]!
- vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]!
-
- subs r3, r3, #1
- bne 1b
-
- vpop {q4-q7}
- ldmfd sp!, {r4, lr}
- bx lr
-END(rsdIntrinsicColorMatrixDot_K)
-
-
-/*
-static void OneVF(float4 *out, const uchar *ptrIn, int iStride,
- const float* gPtr, int iradius, int x1, int x2)
-
- r0 = out
- r1 = pin
- r2 = stride
- r3 = gptr
- r4 = sp, ct
- r5 = sp+4, x1
- r6 = sp+8, x2
-*/
-ENTRY(rsdIntrinsicBlurVF_K)
- push {r4-r8, r10, r11, lr}
- vpush {q4-q7}
-
- ldr r4, [sp, #32+64]
- ldr r5, [sp, #32+64 + 4]
- ldr r6, [sp, #32+64 + 8]
-
-1:
- veor q10, q10, q10 /* float4 blurredPixel = 0; */
- veor q11, q11, q11 /* float4 blurredPixel = 0; */
- add r7, r1, r5, lsl #2 /* const uchar *pi = ptrIn + x1 * 4; */
- mov r10, r3
-
- mov r11, r4
-
-2:
- vld1.32 {d2}, [r7]
- vmovl.u8 q1, d2
- vmovl.u16 q3, d2
- vmovl.u16 q4, d3
- vcvt.f32.s32 q3, q3
- vcvt.f32.s32 q4, q4
- vld1.32 {d0[0]}, [r10]!
- add r7, r7, r2
- vmla.f32 q10, q3, d0[0]
- vmla.f32 q11, q4, d0[0]
- subs r11, r11, #1
- bne 2b
-
- vst1.32 {q10}, [r0]!
- vst1.32 {q11}, [r0]!
- add r5, r5, #2
- cmp r5, r6
- bne 1b
-
-
- vpop {q4-q7}
- pop {r4-r8, r10, r11, lr}
- bx lr
-END(rsdIntrinsicBlurVF_K)
-
-/*
-static void OneVF(float4 *out, const uchar *ptrIn, int iStride,
- const float* gPtr, int iradius, int x1, int x2)
-
- r0 = out
- r1 = pin
- r2 = gptr
- r3 = ct
- r4 = sp, x1
- r5 = sp+4, x2
-*/
-ENTRY(rsdIntrinsicBlurHF_K)
- push {r4-r8, r10, r11, lr}
- vpush {q4-q7}
-
- ldr r4, [sp, #32+64]
- ldr r5, [sp, #32+64 + 4]
-
-1:
- add r7, r1, r4, lsl #4 /* const uchar *pi = ptrIn + x1 * 4; */
- mov r10, r2
- mov r11, r3
-
- vld1.32 {q1}, [r7]!
- vld1.32 {d6[0]}, [r10]!
- vmul.f32 q0, q1, d6[0]
- sub r11, r11, #1
-
-2:
- vld1.32 {q1}, [r7]!
- vld1.32 {q2}, [r7]!
- vld1.32 {d6[0]}, [r10]!
- vld1.32 {d6[1]}, [r10]!
- vmla.f32 q0, q1, d6[0]
- vmla.f32 q0, q2, d6[1]
- subs r11, r11, #2
- bne 2b
-
- vcvt.s32.f32 q0, q0
- vmovn.u32 d0, q0
- vmovn.u16 d0, q0
-
- vst1.32 {d0[0]}, [r0]!
- add r4, r4, #1
- cmp r4, r5
- bne 1b
-
- vpop {q4-q7}
- pop {r4-r8, r10, r11, lr}
- bx lr
-END(rsdIntrinsicBlurHF_K)
-
-/*
- r0 = dst
- r1 = Y
- r2 = VU
- r3 = length (pixels / 8)
- r4 = sp, params
-
- This function converts 8 pixels per iteration
-*/
-ENTRY(rsdIntrinsicYuv_K)
- push {r4-r8, r10, r11, lr}
- vpush {q4-q7}
-
- ldr r4, [sp, #32+64]
- vld1.16 {q2}, [r4]! // mults
- vld1.16 {q3}, [r4]! // y offset
- vld1.16 {q4}, [r4]! // 128
- vdup.8 d3, d5[1]
-
-1:
- vld1.8 {d10}, [r1]!
- vld1.8 {d12}, [r2]!
- vmovl.u8 q5, d10 // Y at .16
- vmovl.u8 q6, d12 // vu at .16
-
- vsub.i16 q5, q5, q3
- vsub.i16 q6, q6, q4
- vtrn.16 d12, d13 // d12 = u, d13 = v
- vmov q7, q6
- vtrn.16 d12, d14
- vtrn.32 d12, d14
- vtrn.16 d13, d15
- vtrn.32 d13, d15
-
- vmull.s16 q8, d10, d4[0]
- vmull.s16 q11, d11, d4[0]
- vmov q9, q8
- vmov q10, q8
- vmov q12, q11
- vmov q13, q11
-
- vmlal.s16 q8, d12, d4[1]
- vmlal.s16 q9, d12, d5[0]
- vmlal.s16 q10, d13, d4[3]
- vmlal.s16 q9, d13, d4[2]
-
- vmlal.s16 q11, d14, d4[1]
- vmlal.s16 q12, d14, d5[0]
- vmlal.s16 q13, d15, d4[3]
- vmlal.s16 q12, d15, d4[2]
-
-
- vshrn.i32 d16, q8, #8
- vshrn.i32 d18, q9, #8
- vshrn.i32 d20, q10, #8
- vqmovun.s16 d0, q8
- vqmovun.s16 d1, q9
- vqmovun.s16 d2, q10
- vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]!
- vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]!
- vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]!
- vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]!
-
- vshrn.i32 d16, q11, #8
- vshrn.i32 d18, q12, #8
- vshrn.i32 d20, q13, #8
- vqmovun.s16 d0, q8
- vqmovun.s16 d1, q9
- vqmovun.s16 d2, q10
- vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]!
- vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]!
- vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]!
- vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]!
-
- subs r3, r3, #1
- bne 1b
-
- vpop {q4-q7}
- pop {r4-r8, r10, r11, lr}
- bx lr
-END(rsdIntrinsicYuv_K)
-
-/* Convolve 5x5 */
-
-/*
- r0 = dst
- r1 = y0 base pointer
- r2 = y1 base pointer
- r3 = y2 base pointer
- r4 = y3 base pointer
- r5 = y4 base pointer
- r6 = coeffs
- r7 = length
-*/
-ENTRY(rsdIntrinsicConvolve5x5_K)
- push {r4-r7, lr}
- vpush {q4-q7}
-
- /* load y3 in r4 */
- ldr r4, [sp, #20 + 64]
-
- /* load y4 in r5 */
- ldr r5, [sp, #24 + 64]
-
- /* Load the coefficients pointer */
- ldr r6, [sp, #28 + 64]
-
- /* Create the coefficients vector */
- vld1.16 {d0, d1, d2, d3}, [r6]!
- vld1.16 {d4, d5, d6}, [r6]
-
- /* load the count */
- ldr r6, [sp, #32 + 64]
-
- /* Load the frequently used immediate in a register */
- mov r7, #8
-
-1:
- /* Load the y base pointers in Qregs and post-increase the address by r7=#8 */
- vld1.8 {d24, d25, d26}, [r1], r7 @ y0 ( y - 2 )
- vld1.8 {d27, d28, d29}, [r2], r7 @ y0 ( y - 1 )
-
- /* Signal memory for data that will be used in the loop after the next */
- PLD (r1, r7)
- PLD (r2, r7)
-
- /* Promoting the 8bit channels to 16bit */
- vmovl.u8 q9, d24
- vmovl.u8 q10, d25
- vmovl.u8 q11, d26
- vmovl.u8 q12, d27
- vmovl.u8 q13, d28
- vmovl.u8 q14, d29
-
-/*
- d18, d19, d20, d21, d22, d23,
- d24, d25
-*/
- vmull.s16 q4, d18, d0[0]
- vmlal.s16 q4, d19, d0[1]
- vmlal.s16 q4, d20, d0[2]
- vmlal.s16 q4, d21, d0[3]
- vmlal.s16 q4, d22, d1[0]
-
- vmlal.s16 q4, d24, d1[1]
- vmlal.s16 q4, d25, d1[2]
- vmlal.s16 q4, d26, d1[3]
- vmlal.s16 q4, d27, d2[0]
- vmlal.s16 q4, d28, d2[1]
-
- vmull.s16 q5, d19, d0[0]
- vmlal.s16 q5, d20, d0[1]
- vmlal.s16 q5, d21, d0[2]
- vmlal.s16 q5, d22, d0[3]
- vmlal.s16 q5, d23, d1[0]
-
- vmlal.s16 q5, d25, d1[1]
- vmlal.s16 q5, d26, d1[2]
- vmlal.s16 q5, d27, d1[3]
- vmlal.s16 q5, d28, d2[0]
- vmlal.s16 q5, d29, d2[1]
-
-
- /* Next 2 rows */
- /* Load the y base pointers in Qregs and post-increase the address by r7=#8 */
- vld1.8 {d24, d25, d26}, [r3], r7 @ y0 ( y )
- vld1.8 {d27, d28, d29}, [r4], r7 @ y0 ( y + 1 )
-
- /* Signal memory for data that will be used in the loop after the next */
- PLD (r3, r7)
- PLD (r4, r7)
-
- /* Promoting the 8bit channels to 16bit */
- vmovl.u8 q9, d24
- vmovl.u8 q10, d25
- vmovl.u8 q11, d26
- vmovl.u8 q12, d27
- vmovl.u8 q13, d28
- vmovl.u8 q14, d29
-
-/*
- d18, d19, d20, d21, d22, d23,
- d24, d25
-*/
- vmlal.s16 q4, d18, d2[2]
- vmlal.s16 q4, d19, d2[3]
- vmlal.s16 q4, d20, d3[0]
- vmlal.s16 q4, d21, d3[1]
- vmlal.s16 q4, d22, d3[2]
-
- vmlal.s16 q4, d24, d3[3]
- vmlal.s16 q4, d25, d4[0]
- vmlal.s16 q4, d26, d4[1]
- vmlal.s16 q4, d27, d4[2]
- vmlal.s16 q4, d28, d4[3]
-
- vmlal.s16 q5, d19, d2[2]
- vmlal.s16 q5, d20, d2[3]
- vmlal.s16 q5, d21, d3[0]
- vmlal.s16 q5, d22, d3[1]
- vmlal.s16 q5, d23, d3[2]
-
- vmlal.s16 q5, d25, d3[3]
- vmlal.s16 q5, d26, d4[0]
- vmlal.s16 q5, d27, d4[1]
- vmlal.s16 q5, d28, d4[2]
- vmlal.s16 q5, d29, d4[3]
-
- /* Last row */
- /* Load the y base pointers in Qregs and post-increase the address by r7=#8 */
- vld1.8 {d24, d25, d26}, [r5], r7 @ y0 ( y + 2 )
-
- /* Signal memory for data that will be used in the loop after the next */
- PLD (r5, r7)
-
- /* Promoting the 8bit channels to 16bit */
- vmovl.u8 q9, d24
- vmovl.u8 q10, d25
- vmovl.u8 q11, d26
-
-/*
- d18, d19, d20, d21, d22, d23,
- d24, d25
-*/
-
- vmlal.s16 q4, d18, d5[0]
- vmlal.s16 q4, d19, d5[1]
- vmlal.s16 q4, d20, d5[2]
- vmlal.s16 q4, d21, d5[3]
- vmlal.s16 q4, d22, d6[0]
-
- vmlal.s16 q5, d19, d5[0]
- vmlal.s16 q5, d20, d5[1]
- vmlal.s16 q5, d21, d5[2]
- vmlal.s16 q5, d22, d5[3]
- vmlal.s16 q5, d23, d6[0]
-
-
-
-
-/* Narrow it to a d-reg 32 -> 16 bit */
- vshrn.i32 d8, q4, #8
- vshrn.i32 d9, q5, #8
-
-/* Pack 16 -> 8 bit, saturate, put two pixels into D reg */
- vqmovun.s16 d8, q4
-
- vst1.8 d8, [r0]! @ return the output and increase the address of r0
-
- /* Are we done? */
- subs r6, r6, #1
- bne 1b
-
- /* Yup, bye */
- vpop {q4-q7}
- pop {r4-r7, lr}
- bx lr
-
-END(rsdIntrinsicConvolve5x5_K)
-
-
-
-
-/*
- dst = src + dst * (1.0 - src.a)
-
- r0 = dst
- r1 = src
- r2 = length
-*/
-ENTRY(rsdIntrinsicBlendSrcOver_K)
- .save {r4, lr}
- stmfd sp!, {r4, lr}
- vpush {q4-q7}
-
- mov r4, #255
- vdup.16 q7, r4
-
- mov r4, r0
-1:
-
- /* src */
- vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]!
- vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]!
- vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]!
- vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]!
- vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]!
- vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]!
- vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]!
- vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]!
- vshll.u8 q12, d0, #8
- vshll.u8 q13, d1, #8
- vshll.u8 q14, d2, #8
- vmovl.u8 q6, d3
- vsub.i16 q6, q7, q6 // q6 = 1 - src.a
- vshll.u8 q15, d3, #8
-
- /* dst */
- vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]!
- vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]!
- vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]!
- vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]!
- vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]!
- vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]!
- vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]!
- vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]!
- vmovl.u8 q8, d0
- vmovl.u8 q9, d1
- vmovl.u8 q10, d2
- vmovl.u8 q11, d3
-
- vmla.i16 q12, q8, q6
- vmla.i16 q13, q9, q6
- vmla.i16 q14, q10, q6
- vmla.i16 q15, q11, q6
-
- vshrn.i16 d0, q12, #8
- vshrn.i16 d1, q13, #8
- vshrn.i16 d2, q14, #8
- vshrn.i16 d3, q15, #8
- vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]!
- vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]!
- vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]!
- vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]!
- vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]!
- vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]!
- vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]!
- vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]!
-
- subs r2, r2, #1
- bne 1b
-
- vpop {q4-q7}
- ldmfd sp!, {r4, lr}
- bx lr
-END(rsdIntrinsicBlendSrcOver_K)
-
-/*
- dst = dst + src * (1.0 - dst.a)
-
- r0 = dst
- r1 = src
- r2 = length
-*/
-ENTRY(rsdIntrinsicBlendDstOver_K)
- .save {r4, lr}
- stmfd sp!, {r4, lr}
- vpush {q4-q7}
-
- mov r4, #255
- vdup.16 q7, r4
-
- mov r4, r0
-1:
-
- /* src */
- vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]!
- vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]!
- vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]!
- vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]!
- vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]!
- vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]!
- vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]!
- vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]!
- vmovl.u8 q12, d0
- vmovl.u8 q13, d1
- vmovl.u8 q14, d2
- vmovl.u8 q15, d3
-
- /* dst */
- vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]!
- vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]!
- vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]!
- vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]!
- vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]!
- vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]!
- vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]!
- vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]!
- vshll.u8 q8, d0, #8
- vshll.u8 q9, d1, #8
- vshll.u8 q10, d2, #8
- vmovl.u8 q6, d3
- vsub.i16 q6, q7, q6 // q6 = 1 - dst.a
- vshll.u8 q11, d3, #8
-
-
- vmla.i16 q8, q12, q6
- vmla.i16 q9, q13, q6
- vmla.i16 q10, q14, q6
- vmla.i16 q11, q15, q6
-
- vshrn.i16 d0, q8, #8
- vshrn.i16 d1, q9, #8
- vshrn.i16 d2, q10, #8
- vshrn.i16 d3, q11, #8
- vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]!
- vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]!
- vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]!
- vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]!
- vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]!
- vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]!
- vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]!
- vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]!
-
- subs r2, r2, #1
- bne 1b
-
- vpop {q4-q7}
- ldmfd sp!, {r4, lr}
- bx lr
-END(rsdIntrinsicBlendDstOver_K)
-
-/*
- dst = src * dst.a
-
- r0 = dst
- r1 = src
- r2 = length
-*/
-ENTRY(rsdIntrinsicBlendSrcIn_K)
- .save {r4, lr}
- stmfd sp!, {r4, lr}
- vpush {q4-q7}
-
- mov r4, r0
-1:
-
- /* src */
- vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]!
- vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]!
- vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]!
- vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]!
- vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]!
- vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]!
- vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]!
- vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]!
- vmovl.u8 q12, d0
- vmovl.u8 q13, d1
- vmovl.u8 q14, d2
- vmovl.u8 q15, d3
-
- /* dst */
- vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]!
- vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]!
- vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]!
- vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]!
- vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]!
- vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]!
- vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]!
- vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]!
- //vmovl.u8 q8, d0
- //vmovl.u8 q9, d1
- //vmovl.u8 q10, d2
- vmovl.u8 q11, d3
-
- vmul.i16 q12, q12, q11
- vmul.i16 q13, q13, q11
- vmul.i16 q14, q14, q11
- vmul.i16 q15, q15, q11
-
- vshrn.i16 d0, q12, #8
- vshrn.i16 d1, q13, #8
- vshrn.i16 d2, q14, #8
- vshrn.i16 d3, q15, #8
- vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]!
- vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]!
- vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]!
- vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]!
- vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]!
- vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]!
- vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]!
- vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]!
-
- subs r2, r2, #1
- bne 1b
-
- vpop {q4-q7}
- ldmfd sp!, {r4, lr}
- bx lr
-END(rsdIntrinsicBlendSrcIn_K)
-
-/*
- dst = dst * src.a
-
- r0 = dst
- r1 = src
- r2 = length
-*/
-ENTRY(rsdIntrinsicBlendDstIn_K)
- .save {r4, lr}
- stmfd sp!, {r4, lr}
- vpush {q4-q7}
-
- mov r4, r0
-1:
-
- /* src */
- vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]!
- vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]!
- vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]!
- vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]!
- vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]!
- vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]!
- vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]!
- vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]!
- //vmovl.u8 q12, d0
- //vmovl.u8 q13, d1
- //vmovl.u8 q14, d2
- vmovl.u8 q15, d3
-
- /* dst */
- vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]!
- vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]!
- vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]!
- vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]!
- vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]!
- vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]!
- vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]!
- vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]!
- vmovl.u8 q8, d0
- vmovl.u8 q9, d1
- vmovl.u8 q10, d2
- vmovl.u8 q11, d3
-
- vmul.i16 q8, q8, q15
- vmul.i16 q9, q9, q15
- vmul.i16 q10, q10, q15
- vmul.i16 q11, q11, q15
-
- vshrn.i16 d0, q8, #8
- vshrn.i16 d1, q9, #8
- vshrn.i16 d2, q10, #8
- vshrn.i16 d3, q11, #8
- vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]!
- vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]!
- vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]!
- vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]!
- vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]!
- vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]!
- vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]!
- vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]!
-
- subs r2, r2, #1
- bne 1b
-
- vpop {q4-q7}
- ldmfd sp!, {r4, lr}
- bx lr
-END(rsdIntrinsicBlendDstIn_K)
-
-
-
-/*
- dst = src * (1.0 - dst.a)
-
- r0 = dst
- r1 = src
- r2 = length
-*/
-ENTRY(rsdIntrinsicBlendSrcOut_K)
- .save {r4, lr}
- stmfd sp!, {r4, lr}
- vpush {q4-q7}
-
- mov r4, #255
- vdup.16 q7, r4
-
- mov r4, r0
-1:
-
- /* src */
- vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]!
- vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]!
- vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]!
- vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]!
- vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]!
- vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]!
- vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]!
- vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]!
- vmovl.u8 q12, d0
- vmovl.u8 q13, d1
- vmovl.u8 q14, d2
- vmovl.u8 q15, d3
-
- /* dst */
- vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]!
- vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]!
- vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]!
- vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]!
- vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]!
- vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]!
- vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]!
- vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]!
- //vmovl.u8 q8, d0
- //vmovl.u8 q9, d1
- //vmovl.u8 q10, d2
- vmovl.u8 q11, d3
-
-
- vsub.i16 q6, q7, q11 // q6 = 1 - dst.a
- vmul.i16 q12, q12, q6
- vmul.i16 q13, q13, q6
- vmul.i16 q14, q14, q6
- vmul.i16 q15, q15, q6
-
- vshrn.i16 d0, q12, #8
- vshrn.i16 d1, q13, #8
- vshrn.i16 d2, q14, #8
- vshrn.i16 d3, q15, #8
- vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]!
- vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]!
- vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]!
- vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]!
- vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]!
- vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]!
- vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]!
- vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]!
-
- subs r2, r2, #1
- bne 1b
-
- vpop {q4-q7}
- ldmfd sp!, {r4, lr}
- bx lr
-END(rsdIntrinsicBlendSrcOut_K)
-
-
-/*
- dst = dst * (1.0 - src.a)
-
- r0 = dst
- r1 = src
- r2 = length
-*/
-ENTRY(rsdIntrinsicBlendDstOut_K)
- .save {r4, lr}
- stmfd sp!, {r4, lr}
- vpush {q4-q7}
-
- mov r4, #255
- vdup.16 q7, r4
-
- mov r4, r0
-1:
-
- /* src */
- vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]!
- vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]!
- vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]!
- vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]!
- vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]!
- vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]!
- vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]!
- vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]!
- //vmovl.u8 q12, d0
- //vmovl.u8 q13, d1
- //vmovl.u8 q14, d2
- vmovl.u8 q15, d3
-
- /* dst */
- vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]!
- vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]!
- vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]!
- vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]!
- vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]!
- vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]!
- vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]!
- vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]!
- vmovl.u8 q8, d0
- vmovl.u8 q9, d1
- vmovl.u8 q10, d2
- vmovl.u8 q11, d3
-
-
- vsub.i16 q6, q7, q15 // q6 = 1 - src.a
- vmul.i16 q12, q8, q6
- vmul.i16 q13, q9, q6
- vmul.i16 q14, q10, q6
- vmul.i16 q15, q11, q6
-
- vshrn.i16 d0, q12, #8
- vshrn.i16 d1, q13, #8
- vshrn.i16 d2, q14, #8
- vshrn.i16 d3, q15, #8
- vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]!
- vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]!
- vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]!
- vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]!
- vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]!
- vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]!
- vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]!
- vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]!
-
- subs r2, r2, #1
- bne 1b
-
- vpop {q4-q7}
- ldmfd sp!, {r4, lr}
- bx lr
-END(rsdIntrinsicBlendDstOut_K)
-
-
-/*
- dst.rgb = src.rgb * dst.a + (1.0 - src.a) * dst.rgb
- dst.a = dst.a
-
- r0 = dst
- r1 = src
- r2 = length
-*/
-ENTRY(rsdIntrinsicBlendSrcAtop_K)
- .save {r4, lr}
- stmfd sp!, {r4, lr}
- vpush {q4-q7}
-
- mov r4, #255
- vdup.16 q7, r4
-
- mov r4, r0
-1:
-
- /* src */
- vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]!
- vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]!
- vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]!
- vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]!
- vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]!
- vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]!
- vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]!
- vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]!
- vmovl.u8 q12, d0
- vmovl.u8 q13, d1
- vmovl.u8 q14, d2
- vmovl.u8 q15, d3
-
- /* dst */
- vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]!
- vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]!
- vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]!
- vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]!
- vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]!
- vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]!
- vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]!
- vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]!
- vmovl.u8 q8, d0
- vmovl.u8 q9, d1
- vmovl.u8 q10, d2
- vmovl.u8 q11, d3
-
-
- vsub.i16 q6, q7, q15 // q6 = 1 - src.a
- vmul.i16 q8, q8, q6
- vmul.i16 q9, q9, q6
- vmul.i16 q10, q10, q6
-
- vmla.i16 q8, q12, q11
- vmla.i16 q9, q13, q11
- vmla.i16 q10, q14, q11
-
-
- vshrn.i16 d0, q8, #8
- vshrn.i16 d1, q9, #8
- vshrn.i16 d2, q10, #8
- //vshrn.i16 d3, q15, #8
- vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]!
- vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]!
- vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]!
- vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]!
- vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]!
- vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]!
- vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]!
- vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]!
-
- subs r2, r2, #1
- bne 1b
-
- vpop {q4-q7}
- ldmfd sp!, {r4, lr}
- bx lr
-END(rsdIntrinsicBlendSrcAtop_K)
-
-/*
- dst = dst.rgb * src.a + (1.0 - dst.a) * src.rgb
- dst.a = src.a
-
- r0 = dst
- r1 = src
- r2 = length
-*/
-ENTRY(rsdIntrinsicBlendDstAtop_K)
- .save {r4, lr}
- stmfd sp!, {r4, lr}
- vpush {q4-q7}
-
- mov r4, #255
- vdup.16 q7, r4
-
- mov r4, r0
-1:
-
- /* src */
- vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]!
- vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]!
- vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]!
- vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]!
- vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]!
- vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]!
- vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]!
- vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]!
- vmovl.u8 q12, d0
- vmovl.u8 q13, d1
- vmovl.u8 q14, d2
- vmovl.u8 q15, d3
-
- /* dst */
- vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]!
- vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]!
- vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]!
- vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]!
- vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]!
- vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]!
- vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]!
- vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]!
- vmovl.u8 q8, d0
- vmovl.u8 q9, d1
- vmovl.u8 q10, d2
- vmovl.u8 q11, d3
-
-
- vsub.i16 q6, q7, q11 // q6 = 1 - dst.a
- vmul.i16 q12, q12, q6
- vmul.i16 q13, q13, q6
- vmul.i16 q14, q14, q6
-
- vmla.i16 q12, q8, q15
- vmla.i16 q13, q9, q15
- vmla.i16 q14, q10, q15
-
-
- vshrn.i16 d0, q12, #8
- vshrn.i16 d1, q13, #8
- vshrn.i16 d2, q14, #8
- //vshrn.i16 d3, q15, #8
- vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]!
- vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]!
- vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]!
- vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]!
- vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]!
- vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]!
- vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]!
- vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]!
-
- subs r2, r2, #1
- bne 1b
-
- vpop {q4-q7}
- ldmfd sp!, {r4, lr}
- bx lr
-END(rsdIntrinsicBlendDstAtop_K)
-
-/*
- dst = dst ^ src
-
- r0 = dst
- r1 = src
- r2 = length
-*/
-ENTRY(rsdIntrinsicBlendXor_K)
- .save {r4, lr}
- stmfd sp!, {r4, lr}
- vpush {q4-q7}
-
- mov r4, #255
- vdup.16 q7, r4
-
- mov r4, r0
-1:
-
- /* src */
- vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]!
- vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]!
- vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]!
- vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]!
- vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]!
- vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]!
- vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]!
- vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]!
- vmov.u8 d4, d0
- vmov.u8 d5, d1
- vmov.u8 d6, d2
- vmov.u8 d7, d3
-
- /* dst */
- vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]!
- vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]!
- vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]!
- vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]!
- vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]!
- vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]!
- vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]!
- vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]!
-
- veor d0, d0, d4
- veor d1, d1, d5
- veor d2, d2, d6
- veor d3, d3, d7
-
- vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]!
- vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]!
- vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]!
- vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]!
- vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]!
- vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]!
- vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]!
- vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]!
-
- subs r2, r2, #1
- bne 1b
-
- vpop {q4-q7}
- ldmfd sp!, {r4, lr}
- bx lr
-END(rsdIntrinsicBlendXor_K)
-
-/*
- dst = dst * src
-
- r0 = dst
- r1 = src
- r2 = length
-*/
-ENTRY(rsdIntrinsicBlendMultiply_K)
- .save {r4, lr}
- stmfd sp!, {r4, lr}
- vpush {q4-q7}
-
- mov r4, #255
- vdup.16 q7, r4
-
- mov r4, r0
-1:
-
- /* src */
- vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]!
- vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]!
- vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]!
- vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]!
- vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]!
- vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]!
- vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]!
- vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]!
- vmovl.u8 q12, d0
- vmovl.u8 q13, d1
- vmovl.u8 q14, d2
- vmovl.u8 q15, d3
-
- /* dst */
- vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]!
- vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]!
- vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]!
- vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]!
- vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]!
- vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]!
- vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]!
- vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]!
- vmovl.u8 q8, d0
- vmovl.u8 q9, d1
- vmovl.u8 q10, d2
- vmovl.u8 q11, d3
-
-
- vmul.i16 q8, q8, q12
- vmul.i16 q9, q9, q13
- vmul.i16 q10, q10, q14
- vmul.i16 q11, q11, q15
-
- vshrn.i16 d0, q8, #8
- vshrn.i16 d1, q9, #8
- vshrn.i16 d2, q10, #8
- vshrn.i16 d3, q11, #8
- vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]!
- vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]!
- vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]!
- vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]!
- vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]!
- vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]!
- vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]!
- vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]!
-
- subs r2, r2, #1
- bne 1b
-
- vpop {q4-q7}
- ldmfd sp!, {r4, lr}
- bx lr
-END(rsdIntrinsicBlendMultiply_K)
-
-/*
- dst = min(src + dst, 1.0)
-
- r0 = dst
- r1 = src
- r2 = length
-*/
-ENTRY(rsdIntrinsicBlendAdd_K)
- .save {r4, lr}
- stmfd sp!, {r4, lr}
- vpush {q4-q7}
-
- mov r4, #255
- vdup.16 q7, r4
-
- mov r4, r0
-1:
-
- /* src */
- vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]!
- vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]!
- vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]!
- vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]!
- vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]!
- vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]!
- vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]!
- vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]!
- vmovl.u8 q12, d0
- vmovl.u8 q13, d1
- vmovl.u8 q14, d2
- vmovl.u8 q15, d3
-
- /* dst */
- vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]!
- vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]!
- vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]!
- vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]!
- vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]!
- vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]!
- vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]!
- vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]!
- vmovl.u8 q8, d0
- vmovl.u8 q9, d1
- vmovl.u8 q10, d2
- vmovl.u8 q11, d3
-
-
- vadd.i16 q8, q8, q12
- vadd.i16 q9, q9, q13
- vadd.i16 q10, q10, q14
- vadd.i16 q11, q11, q15
-
- vqmovun.s16 d0, q8
- vqmovun.s16 d1, q9
- vqmovun.s16 d2, q10
- vqmovun.s16 d3, q11
- vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]!
- vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]!
- vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]!
- vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]!
- vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]!
- vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]!
- vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]!
- vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]!
-
- subs r2, r2, #1
- bne 1b
-
- vpop {q4-q7}
- ldmfd sp!, {r4, lr}
- bx lr
-END(rsdIntrinsicBlendAdd_K)
-
-
-/*
- dst = max(dst - src, 0.0)
-
- r0 = dst
- r1 = src
- r2 = length
-*/
-ENTRY(rsdIntrinsicBlendSub_K)
- .save {r4, lr}
- stmfd sp!, {r4, lr}
- vpush {q4-q7}
-
- mov r4, #255
- vdup.16 q7, r4
-
- mov r4, r0
-1:
-
- /* src */
- vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]!
- vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]!
- vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]!
- vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]!
- vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]!
- vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]!
- vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]!
- vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]!
- vmovl.u8 q12, d0
- vmovl.u8 q13, d1
- vmovl.u8 q14, d2
- vmovl.u8 q15, d3
-
- /* dst */
- vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]!
- vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]!
- vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]!
- vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]!
- vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]!
- vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]!
- vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]!
- vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]!
- vmovl.u8 q8, d0
- vmovl.u8 q9, d1
- vmovl.u8 q10, d2
- vmovl.u8 q11, d3
-
-
- vsub.i16 q8, q8, q12
- vsub.i16 q9, q9, q13
- vsub.i16 q10, q10, q14
- vsub.i16 q11, q11, q15
-
- vqmovun.s16 d0, q8
- vqmovun.s16 d1, q9
- vqmovun.s16 d2, q10
- vqmovun.s16 d3, q11
- vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]!
- vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]!
- vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]!
- vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]!
- vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]!
- vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]!
- vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]!
- vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]!
-
- subs r2, r2, #1
- bne 1b
-
- vpop {q4-q7}
- ldmfd sp!, {r4, lr}
- bx lr
-END(rsdIntrinsicBlendSub_K)
-
diff --git a/driver/rsdMeshObj.cpp b/driver/rsdMeshObj.cpp
index 92e02be..e8df21f 100644
--- a/driver/rsdMeshObj.cpp
+++ b/driver/rsdMeshObj.cpp
@@ -151,7 +151,7 @@
mAttribs[ct].ptr = NULL;
} else {
mAttribs[ct].buffer = 0;
- mAttribs[ct].ptr = (const uint8_t*)alloc->mHal.drvState.mallocPtrLOD0;
+ mAttribs[ct].ptr = (const uint8_t*)alloc->mHal.drvState.lod[0].mallocPtr;
}
}
@@ -172,7 +172,7 @@
} else {
RSD_CALL_GL(glBindBuffer, GL_ELEMENT_ARRAY_BUFFER, 0);
RSD_CALL_GL(glDrawElements, mGLPrimitives[primIndex], len, GL_UNSIGNED_SHORT,
- idxAlloc->mHal.drvState.mallocPtrLOD0);
+ idxAlloc->mHal.drvState.lod[0].mallocPtr);
}
} else {
RSD_CALL_GL(glDrawArrays, mGLPrimitives[primIndex], start, len);
diff --git a/driver/rsdRuntime.h b/driver/rsdRuntime.h
deleted file mode 100644
index dc84032..0000000
--- a/driver/rsdRuntime.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef RSD_RUNTIME_STUBS_H
-#define RSD_RUNTIME_STUBS_H
-
-#include <rs_hal.h>
-
-#include "rsMutex.h"
-
-const RsdSymbolTable * rsdLookupSymbolMath(const char *sym);
-
-void* rsdLookupRuntimeStub(void* pContext, char const* name);
-
-#endif
diff --git a/driver/rsdRuntimeMath.cpp b/driver/rsdRuntimeMath.cpp
deleted file mode 100644
index ba37243..0000000
--- a/driver/rsdRuntimeMath.cpp
+++ /dev/null
@@ -1,546 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cutils/compiler.h>
-
-#include "rsContext.h"
-#include "rsScriptC.h"
-#include "rsMatrix4x4.h"
-#include "rsMatrix3x3.h"
-#include "rsMatrix2x2.h"
-
-#include "rsdCore.h"
-#include "rsdRuntime.h"
-
-
-using namespace android;
-using namespace android::renderscript;
-
-
-static float SC_exp10(float v) {
- return pow(10.f, v);
-}
-
-static float SC_fract(float v, float *iptr) {
- int i = (int)floor(v);
- iptr[0] = (float)i;
- return fmin(v - i, 0x1.fffffep-1f);
-}
-
-static float SC_log2(float v) {
- return log10(v) / log10(2.f);
-}
-
-#if 0
-static float SC_pown(float v, int p) {
- return powf(v, (float)p);
-}
-
-static float SC_powr(float v, float p) {
- return powf(v, p);
-}
-#endif
-
-float SC_rootn(float v, int r) {
- return pow(v, 1.f / r);
-}
-
-float SC_rsqrt(float v) {
- return 1.f / sqrtf(v);
-}
-
-float SC_sincos(float v, float *cosptr) {
- *cosptr = cosf(v);
- return sinf(v);
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// Integer
-//////////////////////////////////////////////////////////////////////////////
-
-
-static uint32_t SC_abs_i32(int32_t v) {return abs(v);}
-static uint16_t SC_abs_i16(int16_t v) {return (uint16_t)abs(v);}
-static uint8_t SC_abs_i8(int8_t v) {return (uint8_t)abs(v);}
-
-static uint32_t SC_clz_u32(uint32_t v) {return __builtin_clz(v);}
-static uint16_t SC_clz_u16(uint16_t v) {return (uint16_t)__builtin_clz(v);}
-static uint8_t SC_clz_u8(uint8_t v) {return (uint8_t)__builtin_clz(v);}
-static int32_t SC_clz_i32(int32_t v) {return (int32_t)__builtin_clz((uint32_t)v);}
-static int16_t SC_clz_i16(int16_t v) {return (int16_t)__builtin_clz(v);}
-static int8_t SC_clz_i8(int8_t v) {return (int8_t)__builtin_clz(v);}
-
-static uint32_t SC_max_u32(uint32_t v, uint32_t v2) {return rsMax(v, v2);}
-static uint16_t SC_max_u16(uint16_t v, uint16_t v2) {return rsMax(v, v2);}
-static uint8_t SC_max_u8(uint8_t v, uint8_t v2) {return rsMax(v, v2);}
-static int32_t SC_max_i32(int32_t v, int32_t v2) {return rsMax(v, v2);}
-static int16_t SC_max_i16(int16_t v, int16_t v2) {return rsMax(v, v2);}
-static int8_t SC_max_i8(int8_t v, int8_t v2) {return rsMax(v, v2);}
-
-static uint32_t SC_min_u32(uint32_t v, uint32_t v2) {return rsMin(v, v2);}
-static uint16_t SC_min_u16(uint16_t v, uint16_t v2) {return rsMin(v, v2);}
-static uint8_t SC_min_u8(uint8_t v, uint8_t v2) {return rsMin(v, v2);}
-static int32_t SC_min_i32(int32_t v, int32_t v2) {return rsMin(v, v2);}
-static int16_t SC_min_i16(int16_t v, int16_t v2) {return rsMin(v, v2);}
-static int8_t SC_min_i8(int8_t v, int8_t v2) {return rsMin(v, v2);}
-
-//////////////////////////////////////////////////////////////////////////////
-// Float util
-//////////////////////////////////////////////////////////////////////////////
-
-static float SC_clamp_f32(float amount, float low, float high) {
- return amount < low ? low : (amount > high ? high : amount);
-}
-
-static float SC_max_f32(float v, float v2) {
- return rsMax(v, v2);
-}
-
-static float SC_min_f32(float v, float v2) {
- return rsMin(v, v2);
-}
-
-static float SC_step_f32(float edge, float v) {
- if (v < edge) return 0.f;
- return 1.f;
-}
-
-static float SC_sign_f32(float value) {
- if (value > 0) return 1.f;
- if (value < 0) return -1.f;
- return value;
-}
-
-static void SC_MatrixLoadIdentity_4x4(Matrix4x4 *m) {
- m->loadIdentity();
-}
-static void SC_MatrixLoadIdentity_3x3(Matrix3x3 *m) {
- m->loadIdentity();
-}
-static void SC_MatrixLoadIdentity_2x2(Matrix2x2 *m) {
- m->loadIdentity();
-}
-
-static void SC_MatrixLoad_4x4_f(Matrix4x4 *m, const float *f) {
- m->load(f);
-}
-static void SC_MatrixLoad_3x3_f(Matrix3x3 *m, const float *f) {
- m->load(f);
-}
-static void SC_MatrixLoad_2x2_f(Matrix2x2 *m, const float *f) {
- m->load(f);
-}
-
-static void SC_MatrixLoad_4x4_4x4(Matrix4x4 *m, const Matrix4x4 *s) {
- m->load(s);
-}
-static void SC_MatrixLoad_4x4_3x3(Matrix4x4 *m, const Matrix3x3 *s) {
- m->load(s);
-}
-static void SC_MatrixLoad_4x4_2x2(Matrix4x4 *m, const Matrix2x2 *s) {
- m->load(s);
-}
-static void SC_MatrixLoad_3x3_3x3(Matrix3x3 *m, const Matrix3x3 *s) {
- m->load(s);
-}
-static void SC_MatrixLoad_2x2_2x2(Matrix2x2 *m, const Matrix2x2 *s) {
- m->load(s);
-}
-
-static void SC_MatrixLoadRotate(Matrix4x4 *m, float rot, float x, float y, float z) {
- m->loadRotate(rot, x, y, z);
-}
-static void SC_MatrixLoadScale(Matrix4x4 *m, float x, float y, float z) {
- m->loadScale(x, y, z);
-}
-static void SC_MatrixLoadTranslate(Matrix4x4 *m, float x, float y, float z) {
- m->loadTranslate(x, y, z);
-}
-static void SC_MatrixRotate(Matrix4x4 *m, float rot, float x, float y, float z) {
- m->rotate(rot, x, y, z);
-}
-static void SC_MatrixScale(Matrix4x4 *m, float x, float y, float z) {
- m->scale(x, y, z);
-}
-static void SC_MatrixTranslate(Matrix4x4 *m, float x, float y, float z) {
- m->translate(x, y, z);
-}
-
-static void SC_MatrixLoadMultiply_4x4_4x4_4x4(Matrix4x4 *m, const Matrix4x4 *lhs, const Matrix4x4 *rhs) {
- m->loadMultiply(lhs, rhs);
-}
-static void SC_MatrixLoadMultiply_3x3_3x3_3x3(Matrix3x3 *m, const Matrix3x3 *lhs, const Matrix3x3 *rhs) {
- m->loadMultiply(lhs, rhs);
-}
-static void SC_MatrixLoadMultiply_2x2_2x2_2x2(Matrix2x2 *m, const Matrix2x2 *lhs, const Matrix2x2 *rhs) {
- m->loadMultiply(lhs, rhs);
-}
-
-static void SC_MatrixMultiply_4x4_4x4(Matrix4x4 *m, const Matrix4x4 *rhs) {
- m->multiply(rhs);
-}
-static void SC_MatrixMultiply_3x3_3x3(Matrix3x3 *m, const Matrix3x3 *rhs) {
- m->multiply(rhs);
-}
-static void SC_MatrixMultiply_2x2_2x2(Matrix2x2 *m, const Matrix2x2 *rhs) {
- m->multiply(rhs);
-}
-
-static void SC_MatrixLoadOrtho(Matrix4x4 *m, float l, float r, float b, float t, float n, float f) {
- m->loadOrtho(l, r, b, t, n, f);
-}
-static void SC_MatrixLoadFrustum(Matrix4x4 *m, float l, float r, float b, float t, float n, float f) {
- m->loadFrustum(l, r, b, t, n, f);
-}
-static void SC_MatrixLoadPerspective(Matrix4x4 *m, float fovy, float aspect, float near, float far) {
- m->loadPerspective(fovy, aspect, near, far);
-}
-
-static bool SC_MatrixInverse_4x4(Matrix4x4 *m) {
- return m->inverse();
-}
-static bool SC_MatrixInverseTranspose_4x4(Matrix4x4 *m) {
- return m->inverseTranspose();
-}
-static void SC_MatrixTranspose_4x4(Matrix4x4 *m) {
- m->transpose();
-}
-static void SC_MatrixTranspose_3x3(Matrix3x3 *m) {
- m->transpose();
-}
-static void SC_MatrixTranspose_2x2(Matrix2x2 *m) {
- m->transpose();
-}
-
-static float SC_randf(float max) {
- float r = (float)rand();
- r *= max;
- r /= RAND_MAX;
- return r;
-}
-
-static float SC_randf2(float min, float max) {
- float r = (float)rand();
- r /= RAND_MAX;
- r = r * (max - min) + min;
- return r;
-}
-
-static int SC_randi(int max) {
- return (int)SC_randf(max);
-}
-
-static int SC_randi2(int min, int max) {
- return (int)SC_randf2(min, max);
-}
-
-static float SC_frac(float v) {
- int i = (int)floor(v);
- return fmin(v - i, 0x1.fffffep-1f);
-}
-
-
-static int32_t SC_AtomicCas(volatile int32_t *ptr, int32_t expectedValue, int32_t newValue) {
- int32_t prev;
-
- do {
- int32_t ret = android_atomic_release_cas(expectedValue, newValue, ptr);
- if (!ret) {
- // The android cas return 0 if it wrote the value. This means the
- // previous value was the expected value and we can return.
- return expectedValue;
- }
- // We didn't write the value and need to load the "previous" value.
- prev = *ptr;
-
- // A race condition exists where the expected value could appear after our cas failed
- // above. In this case loop until we have a legit previous value or the
- // write passes.
- } while (prev == expectedValue);
- return prev;
-}
-
-
-static int32_t SC_AtomicInc(volatile int32_t *ptr) {
- return android_atomic_inc(ptr);
-}
-
-static int32_t SC_AtomicDec(volatile int32_t *ptr) {
- return android_atomic_dec(ptr);
-}
-
-static int32_t SC_AtomicAdd(volatile int32_t *ptr, int32_t value) {
- return android_atomic_add(value, ptr);
-}
-
-static int32_t SC_AtomicSub(volatile int32_t *ptr, int32_t value) {
- int32_t prev, status;
- do {
- prev = *ptr;
- status = android_atomic_release_cas(prev, prev - value, ptr);
- } while (CC_UNLIKELY(status != 0));
- return prev;
-}
-
-static int32_t SC_AtomicAnd(volatile int32_t *ptr, int32_t value) {
- return android_atomic_and(value, ptr);
-}
-
-static int32_t SC_AtomicOr(volatile int32_t *ptr, int32_t value) {
- return android_atomic_or(value, ptr);
-}
-
-static int32_t SC_AtomicXor(volatile int32_t *ptr, int32_t value) {
- int32_t prev, status;
- do {
- prev = *ptr;
- status = android_atomic_release_cas(prev, prev ^ value, ptr);
- } while (CC_UNLIKELY(status != 0));
- return prev;
-}
-
-static uint32_t SC_AtomicUMin(volatile uint32_t *ptr, uint32_t value) {
- uint32_t prev, status;
- do {
- prev = *ptr;
- uint32_t n = rsMin(value, prev);
- status = android_atomic_release_cas((int32_t) prev, (int32_t)n, (volatile int32_t*) ptr);
- } while (CC_UNLIKELY(status != 0));
- return prev;
-}
-
-static int32_t SC_AtomicMin(volatile int32_t *ptr, int32_t value) {
- int32_t prev, status;
- do {
- prev = *ptr;
- int32_t n = rsMin(value, prev);
- status = android_atomic_release_cas(prev, n, ptr);
- } while (CC_UNLIKELY(status != 0));
- return prev;
-}
-
-static uint32_t SC_AtomicUMax(volatile uint32_t *ptr, uint32_t value) {
- uint32_t prev, status;
- do {
- prev = *ptr;
- uint32_t n = rsMax(value, prev);
- status = android_atomic_release_cas((int32_t) prev, (int32_t) n, (volatile int32_t*) ptr);
- } while (CC_UNLIKELY(status != 0));
- return prev;
-}
-
-static int32_t SC_AtomicMax(volatile int32_t *ptr, int32_t value) {
- int32_t prev, status;
- do {
- prev = *ptr;
- int32_t n = rsMax(value, prev);
- status = android_atomic_release_cas(prev, n, ptr);
- } while (CC_UNLIKELY(status != 0));
- return prev;
-}
-
-
-
-//////////////////////////////////////////////////////////////////////////////
-// Class implementation
-//////////////////////////////////////////////////////////////////////////////
-
-// llvm name mangling ref
-// <builtin-type> ::= v # void
-// ::= b # bool
-// ::= c # char
-// ::= a # signed char
-// ::= h # unsigned char
-// ::= s # short
-// ::= t # unsigned short
-// ::= i # int
-// ::= j # unsigned int
-// ::= l # long
-// ::= m # unsigned long
-// ::= x # long long, __int64
-// ::= y # unsigned long long, __int64
-// ::= f # float
-// ::= d # double
-
-static RsdSymbolTable gSyms[] = {
- { "_Z4acosf", (void *)&acosf, true },
- { "_Z5acoshf", (void *)&acoshf, true },
- { "_Z4asinf", (void *)&asinf, true },
- { "_Z5asinhf", (void *)&asinhf, true },
- { "_Z4atanf", (void *)&atanf, true },
- { "_Z5atan2ff", (void *)&atan2f, true },
- { "_Z5atanhf", (void *)&atanhf, true },
- { "_Z4cbrtf", (void *)&cbrtf, true },
- { "_Z4ceilf", (void *)&ceilf, true },
- { "_Z8copysignff", (void *)©signf, true },
- { "_Z3cosf", (void *)&cosf, true },
- { "_Z4coshf", (void *)&coshf, true },
- { "_Z4erfcf", (void *)&erfcf, true },
- { "_Z3erff", (void *)&erff, true },
- { "_Z3expf", (void *)&expf, true },
- { "_Z4exp2f", (void *)&exp2f, true },
- { "_Z5exp10f", (void *)&SC_exp10, true },
- { "_Z5expm1f", (void *)&expm1f, true },
- { "_Z4fabsf", (void *)&fabsf, true },
- { "_Z4fdimff", (void *)&fdimf, true },
- { "_Z5floorf", (void *)&floorf, true },
- { "_Z3fmafff", (void *)&fmaf, true },
- { "_Z4fmaxff", (void *)&fmaxf, true },
- { "_Z4fminff", (void *)&fminf, true }, // float fmin(float, float)
- { "_Z4fmodff", (void *)&fmodf, true },
- { "_Z5fractfPf", (void *)&SC_fract, true },
- { "_Z5frexpfPi", (void *)&frexpf, true },
- { "_Z5hypotff", (void *)&hypotf, true },
- { "_Z5ilogbf", (void *)&ilogbf, true },
- { "_Z5ldexpfi", (void *)&ldexpf, true },
- { "_Z6lgammaf", (void *)&lgammaf, true },
- { "_Z6lgammafPi", (void *)&lgammaf_r, true },
- { "_Z3logf", (void *)&logf, true },
- { "_Z4log2f", (void *)&SC_log2, true },
- { "_Z5log10f", (void *)&log10f, true },
- { "_Z5log1pf", (void *)&log1pf, true },
- { "_Z4logbf", (void *)&logbf, true },
- { "_Z4modffPf", (void *)&modff, true },
- //{ "_Z3nanj", (void *)&SC_nan, true },
- { "_Z9nextafterff", (void *)&nextafterf, true },
- { "_Z3powff", (void *)&powf, true },
- { "_Z9remainderff", (void *)&remainderf, true },
- { "_Z6remquoffPi", (void *)&remquof, true },
- { "_Z4rintf", (void *)&rintf, true },
- { "_Z5rootnfi", (void *)&SC_rootn, true },
- { "_Z5roundf", (void *)&roundf, true },
- { "_Z5rsqrtf", (void *)&SC_rsqrt, true },
- { "_Z3sinf", (void *)&sinf, true },
- { "_Z6sincosfPf", (void *)&SC_sincos, true },
- { "_Z4sinhf", (void *)&sinhf, true },
- { "_Z4sqrtf", (void *)&sqrtf, true },
- { "_Z3tanf", (void *)&tanf, true },
- { "_Z4tanhf", (void *)&tanhf, true },
- { "_Z6tgammaf", (void *)&tgammaf, true },
- { "_Z5truncf", (void *)&truncf, true },
-
- { "_Z3absi", (void *)&SC_abs_i32, true },
- { "_Z3abss", (void *)&SC_abs_i16, true },
- { "_Z3absc", (void *)&SC_abs_i8, true },
- { "_Z3clzj", (void *)&SC_clz_u32, true },
- { "_Z3clzt", (void *)&SC_clz_u16, true },
- { "_Z3clzh", (void *)&SC_clz_u8, true },
- { "_Z3clzi", (void *)&SC_clz_i32, true },
- { "_Z3clzs", (void *)&SC_clz_i16, true },
- { "_Z3clzc", (void *)&SC_clz_i8, true },
- { "_Z3maxjj", (void *)&SC_max_u32, true },
- { "_Z3maxtt", (void *)&SC_max_u16, true },
- { "_Z3maxhh", (void *)&SC_max_u8, true },
- { "_Z3maxii", (void *)&SC_max_i32, true },
- { "_Z3maxss", (void *)&SC_max_i16, true },
- { "_Z3maxcc", (void *)&SC_max_i8, true },
- { "_Z3minjj", (void *)&SC_min_u32, true },
- { "_Z3mintt", (void *)&SC_min_u16, true },
- { "_Z3minhh", (void *)&SC_min_u8, true },
- { "_Z3minii", (void *)&SC_min_i32, true },
- { "_Z3minss", (void *)&SC_min_i16, true },
- { "_Z3mincc", (void *)&SC_min_i8, true },
-
- { "_Z5clampfff", (void *)&SC_clamp_f32, true },
- { "_Z3maxff", (void *)&SC_max_f32, true },
- { "_Z3minff", (void *)&SC_min_f32, true },
- { "_Z4stepff", (void *)&SC_step_f32, true },
- //{ "smoothstep", (void *)&, true },
- { "_Z4signf", (void *)&SC_sign_f32, true },
-
- // matrix
- { "_Z20rsMatrixLoadIdentityP12rs_matrix4x4", (void *)&SC_MatrixLoadIdentity_4x4, true },
- { "_Z20rsMatrixLoadIdentityP12rs_matrix3x3", (void *)&SC_MatrixLoadIdentity_3x3, true },
- { "_Z20rsMatrixLoadIdentityP12rs_matrix2x2", (void *)&SC_MatrixLoadIdentity_2x2, true },
-
- { "_Z12rsMatrixLoadP12rs_matrix4x4PKf", (void *)&SC_MatrixLoad_4x4_f, true },
- { "_Z12rsMatrixLoadP12rs_matrix3x3PKf", (void *)&SC_MatrixLoad_3x3_f, true },
- { "_Z12rsMatrixLoadP12rs_matrix2x2PKf", (void *)&SC_MatrixLoad_2x2_f, true },
-
- { "_Z12rsMatrixLoadP12rs_matrix4x4PKS_", (void *)&SC_MatrixLoad_4x4_4x4, true },
- { "_Z12rsMatrixLoadP12rs_matrix4x4PK12rs_matrix3x3", (void *)&SC_MatrixLoad_4x4_3x3, true },
- { "_Z12rsMatrixLoadP12rs_matrix4x4PK12rs_matrix2x2", (void *)&SC_MatrixLoad_4x4_2x2, true },
- { "_Z12rsMatrixLoadP12rs_matrix3x3PKS_", (void *)&SC_MatrixLoad_3x3_3x3, true },
- { "_Z12rsMatrixLoadP12rs_matrix2x2PKS_", (void *)&SC_MatrixLoad_2x2_2x2, true },
-
- { "_Z18rsMatrixLoadRotateP12rs_matrix4x4ffff", (void *)&SC_MatrixLoadRotate, true },
- { "_Z17rsMatrixLoadScaleP12rs_matrix4x4fff", (void *)&SC_MatrixLoadScale, true },
- { "_Z21rsMatrixLoadTranslateP12rs_matrix4x4fff", (void *)&SC_MatrixLoadTranslate, true },
- { "_Z14rsMatrixRotateP12rs_matrix4x4ffff", (void *)&SC_MatrixRotate, true },
- { "_Z13rsMatrixScaleP12rs_matrix4x4fff", (void *)&SC_MatrixScale, true },
- { "_Z17rsMatrixTranslateP12rs_matrix4x4fff", (void *)&SC_MatrixTranslate, true },
-
- { "_Z20rsMatrixLoadMultiplyP12rs_matrix4x4PKS_S2_", (void *)&SC_MatrixLoadMultiply_4x4_4x4_4x4, true },
- { "_Z16rsMatrixMultiplyP12rs_matrix4x4PKS_", (void *)&SC_MatrixMultiply_4x4_4x4, true },
- { "_Z20rsMatrixLoadMultiplyP12rs_matrix3x3PKS_S2_", (void *)&SC_MatrixLoadMultiply_3x3_3x3_3x3, true },
- { "_Z16rsMatrixMultiplyP12rs_matrix3x3PKS_", (void *)&SC_MatrixMultiply_3x3_3x3, true },
- { "_Z20rsMatrixLoadMultiplyP12rs_matrix2x2PKS_S2_", (void *)&SC_MatrixLoadMultiply_2x2_2x2_2x2, true },
- { "_Z16rsMatrixMultiplyP12rs_matrix2x2PKS_", (void *)&SC_MatrixMultiply_2x2_2x2, true },
-
- { "_Z17rsMatrixLoadOrthoP12rs_matrix4x4ffffff", (void *)&SC_MatrixLoadOrtho, true },
- { "_Z19rsMatrixLoadFrustumP12rs_matrix4x4ffffff", (void *)&SC_MatrixLoadFrustum, true },
- { "_Z23rsMatrixLoadPerspectiveP12rs_matrix4x4ffff", (void *)&SC_MatrixLoadPerspective, true },
-
- { "_Z15rsMatrixInverseP12rs_matrix4x4", (void *)&SC_MatrixInverse_4x4, true },
- { "_Z24rsMatrixInverseTransposeP12rs_matrix4x4", (void *)&SC_MatrixInverseTranspose_4x4, true },
- { "_Z17rsMatrixTransposeP12rs_matrix4x4", (void *)&SC_MatrixTranspose_4x4, true },
- { "_Z17rsMatrixTransposeP12rs_matrix3x3", (void *)&SC_MatrixTranspose_3x3, true },
- { "_Z17rsMatrixTransposeP12rs_matrix2x2", (void *)&SC_MatrixTranspose_2x2, true },
-
- // RS Math
- { "_Z6rsRandi", (void *)&SC_randi, true },
- { "_Z6rsRandii", (void *)&SC_randi2, true },
- { "_Z6rsRandf", (void *)&SC_randf, true },
- { "_Z6rsRandff", (void *)&SC_randf2, true },
- { "_Z6rsFracf", (void *)&SC_frac, true },
-
- // Atomics
- { "_Z11rsAtomicIncPVi", (void *)&SC_AtomicInc, true },
- { "_Z11rsAtomicIncPVj", (void *)&SC_AtomicInc, true },
- { "_Z11rsAtomicDecPVi", (void *)&SC_AtomicDec, true },
- { "_Z11rsAtomicDecPVj", (void *)&SC_AtomicDec, true },
- { "_Z11rsAtomicAddPVii", (void *)&SC_AtomicAdd, true },
- { "_Z11rsAtomicAddPVjj", (void *)&SC_AtomicAdd, true },
- { "_Z11rsAtomicSubPVii", (void *)&SC_AtomicSub, true },
- { "_Z11rsAtomicSubPVjj", (void *)&SC_AtomicSub, true },
- { "_Z11rsAtomicAndPVii", (void *)&SC_AtomicAnd, true },
- { "_Z11rsAtomicAndPVjj", (void *)&SC_AtomicAnd, true },
- { "_Z10rsAtomicOrPVii", (void *)&SC_AtomicOr, true },
- { "_Z10rsAtomicOrPVjj", (void *)&SC_AtomicOr, true },
- { "_Z11rsAtomicXorPVii", (void *)&SC_AtomicXor, true },
- { "_Z11rsAtomicXorPVjj", (void *)&SC_AtomicXor, true },
- { "_Z11rsAtomicMinPVii", (void *)&SC_AtomicMin, true },
- { "_Z11rsAtomicMinPVjj", (void *)&SC_AtomicUMin, true },
- { "_Z11rsAtomicMaxPVii", (void *)&SC_AtomicMax, true },
- { "_Z11rsAtomicMaxPVjj", (void *)&SC_AtomicUMax, true },
- { "_Z11rsAtomicCasPViii", (void *)&SC_AtomicCas, true },
- { "_Z11rsAtomicCasPVjjj", (void *)&SC_AtomicCas, true },
-
- { NULL, NULL, false }
-};
-
-const RsdSymbolTable * rsdLookupSymbolMath(const char *sym) {
- const RsdSymbolTable *syms = gSyms;
-
- while (syms->mPtr) {
- if (!strcmp(syms->mName, sym)) {
- return syms;
- }
- syms++;
- }
- return NULL;
-}
-
diff --git a/driver/rsdRuntimeStubs.cpp b/driver/rsdRuntimeStubs.cpp
index 9bd1396..5141c9f 100644
--- a/driver/rsdRuntimeStubs.cpp
+++ b/driver/rsdRuntimeStubs.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2011 The Android Open Source Project
+ * Copyright (C) 2011-2012 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -25,7 +25,6 @@
#include "rsdCore.h"
#include "rsdBcc.h"
-#include "rsdRuntime.h"
#include "rsdPath.h"
#include "rsdAllocation.h"
#include "rsdShaderCache.h"
@@ -36,11 +35,6 @@
using namespace android;
using namespace android::renderscript;
-#define GET_TLS() ScriptTLSStruct * tls = \
- (ScriptTLSStruct *)pthread_getspecific(rsdgThreadTLSKey); \
- Context * rsc = tls->mContext; \
- ScriptC * sc = (ScriptC *) tls->mScript
-
typedef float float2 __attribute__((ext_vector_type(2)));
typedef float float3 __attribute__((ext_vector_type(3)));
typedef float float4 __attribute__((ext_vector_type(4)));
@@ -76,13 +70,13 @@
static void SC_AllocationSyncAll2(Allocation *a, RsAllocationUsageType source) {
- GET_TLS();
- rsrAllocationSyncAll(rsc, sc, a, source);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ rsrAllocationSyncAll(rsc, a, source);
}
static void SC_AllocationSyncAll(Allocation *a) {
- GET_TLS();
- rsrAllocationSyncAll(rsc, sc, a, RS_ALLOCATION_USAGE_SCRIPT);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ rsrAllocationSyncAll(rsc, a, RS_ALLOCATION_USAGE_SCRIPT);
}
static void SC_AllocationCopy1DRange(Allocation *dstAlloc,
@@ -91,7 +85,7 @@
uint32_t count,
Allocation *srcAlloc,
uint32_t srcOff, uint32_t srcMip) {
- GET_TLS();
+ Context *rsc = RsdCpuReference::getTlsContext();
rsrAllocationCopy1DRange(rsc, dstAlloc, dstOff, dstMip, count,
srcAlloc, srcOff, srcMip);
}
@@ -103,7 +97,7 @@
Allocation *srcAlloc,
uint32_t srcXoff, uint32_t srcYoff,
uint32_t srcMip, uint32_t srcFace) {
- GET_TLS();
+ Context *rsc = RsdCpuReference::getTlsContext();
rsrAllocationCopy2DRange(rsc, dstAlloc,
dstXoff, dstYoff, dstMip, dstFace,
width, height,
@@ -112,13 +106,13 @@
}
static void SC_AllocationIoSend(Allocation *alloc) {
- GET_TLS();
+ Context *rsc = RsdCpuReference::getTlsContext();
rsdAllocationIoSend(rsc, alloc);
}
static void SC_AllocationIoReceive(Allocation *alloc) {
- GET_TLS();
+ Context *rsc = RsdCpuReference::getTlsContext();
rsdAllocationIoReceive(rsc, alloc);
}
@@ -129,68 +123,68 @@
//////////////////////////////////////////////////////////////////////////////
static void SC_BindTexture(ProgramFragment *pf, uint32_t slot, Allocation *a) {
- GET_TLS();
- rsrBindTexture(rsc, sc, pf, slot, a);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ rsrBindTexture(rsc, pf, slot, a);
}
static void SC_BindVertexConstant(ProgramVertex *pv, uint32_t slot, Allocation *a) {
- GET_TLS();
- rsrBindConstant(rsc, sc, pv, slot, a);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ rsrBindConstant(rsc, pv, slot, a);
}
static void SC_BindFragmentConstant(ProgramFragment *pf, uint32_t slot, Allocation *a) {
- GET_TLS();
- rsrBindConstant(rsc, sc, pf, slot, a);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ rsrBindConstant(rsc, pf, slot, a);
}
static void SC_BindSampler(ProgramFragment *pf, uint32_t slot, Sampler *s) {
- GET_TLS();
- rsrBindSampler(rsc, sc, pf, slot, s);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ rsrBindSampler(rsc, pf, slot, s);
}
static void SC_BindProgramStore(ProgramStore *ps) {
- GET_TLS();
- rsrBindProgramStore(rsc, sc, ps);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ rsrBindProgramStore(rsc, ps);
}
static void SC_BindProgramFragment(ProgramFragment *pf) {
- GET_TLS();
- rsrBindProgramFragment(rsc, sc, pf);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ rsrBindProgramFragment(rsc, pf);
}
static void SC_BindProgramVertex(ProgramVertex *pv) {
- GET_TLS();
- rsrBindProgramVertex(rsc, sc, pv);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ rsrBindProgramVertex(rsc, pv);
}
static void SC_BindProgramRaster(ProgramRaster *pr) {
- GET_TLS();
- rsrBindProgramRaster(rsc, sc, pr);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ rsrBindProgramRaster(rsc, pr);
}
static void SC_BindFrameBufferObjectColorTarget(Allocation *a, uint32_t slot) {
- GET_TLS();
- rsrBindFrameBufferObjectColorTarget(rsc, sc, a, slot);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ rsrBindFrameBufferObjectColorTarget(rsc, a, slot);
}
static void SC_BindFrameBufferObjectDepthTarget(Allocation *a) {
- GET_TLS();
- rsrBindFrameBufferObjectDepthTarget(rsc, sc, a);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ rsrBindFrameBufferObjectDepthTarget(rsc, a);
}
static void SC_ClearFrameBufferObjectColorTarget(uint32_t slot) {
- GET_TLS();
- rsrClearFrameBufferObjectColorTarget(rsc, sc, slot);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ rsrClearFrameBufferObjectColorTarget(rsc, slot);
}
static void SC_ClearFrameBufferObjectDepthTarget(Context *, Script *) {
- GET_TLS();
- rsrClearFrameBufferObjectDepthTarget(rsc, sc);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ rsrClearFrameBufferObjectDepthTarget(rsc);
}
static void SC_ClearFrameBufferObjectTargets(Context *, Script *) {
- GET_TLS();
- rsrClearFrameBufferObjectTargets(rsc, sc);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ rsrClearFrameBufferObjectTargets(rsc);
}
@@ -199,28 +193,28 @@
//////////////////////////////////////////////////////////////////////////////
static void SC_VpLoadProjectionMatrix(const rsc_Matrix *m) {
- GET_TLS();
- rsrVpLoadProjectionMatrix(rsc, sc, m);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ rsrVpLoadProjectionMatrix(rsc, m);
}
static void SC_VpLoadModelMatrix(const rsc_Matrix *m) {
- GET_TLS();
- rsrVpLoadModelMatrix(rsc, sc, m);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ rsrVpLoadModelMatrix(rsc, m);
}
static void SC_VpLoadTextureMatrix(const rsc_Matrix *m) {
- GET_TLS();
- rsrVpLoadTextureMatrix(rsc, sc, m);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ rsrVpLoadTextureMatrix(rsc, m);
}
static void SC_PfConstantColor(ProgramFragment *pf, float r, float g, float b, float a) {
- GET_TLS();
- rsrPfConstantColor(rsc, sc, pf, r, g, b, a);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ rsrPfConstantColor(rsc, pf, r, g, b, a);
}
static void SC_VpGetProjectionMatrix(rsc_Matrix *m) {
- GET_TLS();
- rsrVpGetProjectionMatrix(rsc, sc, m);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ rsrVpGetProjectionMatrix(rsc, m);
}
@@ -232,7 +226,7 @@
float x2, float y2, float z2, float u2, float v2,
float x3, float y3, float z3, float u3, float v3,
float x4, float y4, float z4, float u4, float v4) {
- GET_TLS();
+ Context *rsc = RsdCpuReference::getTlsContext();
if (!rsc->setupCheck()) {
return;
@@ -266,7 +260,6 @@
float x2, float y2, float z2,
float x3, float y3, float z3,
float x4, float y4, float z4) {
- GET_TLS();
SC_DrawQuadTexCoords(x1, y1, z1, 0, 1,
x2, y2, z2, 1, 1,
x3, y3, z3, 1, 0,
@@ -274,7 +267,7 @@
}
static void SC_DrawSpriteScreenspace(float x, float y, float z, float w, float h) {
- GET_TLS();
+ Context *rsc = RsdCpuReference::getTlsContext();
ObjectBaseRef<const ProgramVertex> tmp(rsc->getProgramVertex());
rsc->setProgramVertex(rsc->getDefaultProgramVertex());
@@ -292,38 +285,34 @@
}
static void SC_DrawRect(float x1, float y1, float x2, float y2, float z) {
- GET_TLS();
-
SC_DrawQuad(x1, y2, z, x2, y2, z, x2, y1, z, x1, y1, z);
-
}
static void SC_DrawPath(Path *p) {
- GET_TLS();
- //rsrDrawPath(rsc, sc, p);
+ Context *rsc = RsdCpuReference::getTlsContext();
rsdPathDraw(rsc, p);
}
static void SC_DrawMesh(Mesh *m) {
- GET_TLS();
- rsrDrawMesh(rsc, sc, m);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ rsrDrawMesh(rsc, m);
}
static void SC_DrawMeshPrimitive(Mesh *m, uint32_t primIndex) {
- GET_TLS();
- rsrDrawMeshPrimitive(rsc, sc, m, primIndex);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ rsrDrawMeshPrimitive(rsc, m, primIndex);
}
static void SC_DrawMeshPrimitiveRange(Mesh *m, uint32_t primIndex, uint32_t start, uint32_t len) {
- GET_TLS();
- rsrDrawMeshPrimitiveRange(rsc, sc, m, primIndex, start, len);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ rsrDrawMeshPrimitiveRange(rsc, m, primIndex, start, len);
}
static void SC_MeshComputeBoundingBox(Mesh *m,
float *minX, float *minY, float *minZ,
float *maxX, float *maxY, float *maxZ) {
- GET_TLS();
- rsrMeshComputeBoundingBox(rsc, sc, m, minX, minY, minZ, maxX, maxY, maxZ);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ rsrMeshComputeBoundingBox(rsc, m, minX, minY, minZ, maxX, maxY, maxZ);
}
@@ -334,67 +323,67 @@
static void SC_Color(float r, float g, float b, float a) {
- GET_TLS();
- rsrColor(rsc, sc, r, g, b, a);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ rsrColor(rsc, r, g, b, a);
}
static void SC_Finish() {
- GET_TLS();
+ Context *rsc = RsdCpuReference::getTlsContext();
rsdGLFinish(rsc);
}
static void SC_ClearColor(float r, float g, float b, float a) {
- GET_TLS();
- rsrPrepareClear(rsc, sc);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ rsrPrepareClear(rsc);
rsdGLClearColor(rsc, r, g, b, a);
}
static void SC_ClearDepth(float v) {
- GET_TLS();
- rsrPrepareClear(rsc, sc);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ rsrPrepareClear(rsc);
rsdGLClearDepth(rsc, v);
}
static uint32_t SC_GetWidth() {
- GET_TLS();
- return rsrGetWidth(rsc, sc);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ return rsrGetWidth(rsc);
}
static uint32_t SC_GetHeight() {
- GET_TLS();
- return rsrGetHeight(rsc, sc);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ return rsrGetHeight(rsc);
}
static void SC_DrawTextAlloc(Allocation *a, int x, int y) {
- GET_TLS();
- rsrDrawTextAlloc(rsc, sc, a, x, y);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ rsrDrawTextAlloc(rsc, a, x, y);
}
static void SC_DrawText(const char *text, int x, int y) {
- GET_TLS();
- rsrDrawText(rsc, sc, text, x, y);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ rsrDrawText(rsc, text, x, y);
}
static void SC_MeasureTextAlloc(Allocation *a,
int32_t *left, int32_t *right, int32_t *top, int32_t *bottom) {
- GET_TLS();
- rsrMeasureTextAlloc(rsc, sc, a, left, right, top, bottom);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ rsrMeasureTextAlloc(rsc, a, left, right, top, bottom);
}
static void SC_MeasureText(const char *text,
int32_t *left, int32_t *right, int32_t *top, int32_t *bottom) {
- GET_TLS();
- rsrMeasureText(rsc, sc, text, left, right, top, bottom);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ rsrMeasureText(rsc, text, left, right, top, bottom);
}
static void SC_BindFont(Font *f) {
- GET_TLS();
- rsrBindFont(rsc, sc, f);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ rsrBindFont(rsc, f);
}
static void SC_FontColor(float r, float g, float b, float a) {
- GET_TLS();
- rsrFontColor(rsc, sc, r, g, b, a);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ rsrFontColor(rsc, r, g, b, a);
}
@@ -404,41 +393,42 @@
//////////////////////////////////////////////////////////////////////////////
static void SC_SetObject(ObjectBase **dst, ObjectBase * src) {
- GET_TLS();
- rsrSetObject(rsc, sc, dst, src);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ rsrSetObject(rsc, dst, src);
}
static void SC_ClearObject(ObjectBase **dst) {
- GET_TLS();
- rsrClearObject(rsc, sc, dst);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ rsrClearObject(rsc, dst);
}
static bool SC_IsObject(const ObjectBase *src) {
- GET_TLS();
- return rsrIsObject(rsc, sc, src);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ return rsrIsObject(rsc, src);
}
static const Allocation * SC_GetAllocation(const void *ptr) {
- GET_TLS();
+ Context *rsc = RsdCpuReference::getTlsContext();
+ const Script *sc = RsdCpuReference::getTlsScript();
return rsdScriptGetAllocationForPointer(rsc, sc, ptr);
}
static void SC_ForEach_SAA(Script *target,
Allocation *in,
Allocation *out) {
- GET_TLS();
- rsrForEach(rsc, sc, target, in, out, NULL, 0, NULL);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ rsrForEach(rsc, target, in, out, NULL, 0, NULL);
}
static void SC_ForEach_SAAU(Script *target,
Allocation *in,
Allocation *out,
const void *usr) {
- GET_TLS();
- rsrForEach(rsc, sc, target, in, out, usr, 0, NULL);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ rsrForEach(rsc, target, in, out, usr, 0, NULL);
}
static void SC_ForEach_SAAUS(Script *target,
@@ -446,8 +436,8 @@
Allocation *out,
const void *usr,
const RsScriptCall *call) {
- GET_TLS();
- rsrForEach(rsc, sc, target, in, out, usr, 0, call);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ rsrForEach(rsc, target, in, out, usr, 0, call);
}
static void SC_ForEach_SAAUL(Script *target,
@@ -455,8 +445,8 @@
Allocation *out,
const void *usr,
uint32_t usrLen) {
- GET_TLS();
- rsrForEach(rsc, sc, target, in, out, usr, usrLen, NULL);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ rsrForEach(rsc, target, in, out, usr, usrLen, NULL);
}
static void SC_ForEach_SAAULS(Script *target,
@@ -465,8 +455,8 @@
const void *usr,
uint32_t usrLen,
const RsScriptCall *call) {
- GET_TLS();
- rsrForEach(rsc, sc, target, in, out, usr, usrLen, call);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ rsrForEach(rsc, target, in, out, usr, usrLen, call);
}
@@ -476,28 +466,29 @@
//////////////////////////////////////////////////////////////////////////////
static float SC_GetDt() {
- GET_TLS();
+ Context *rsc = RsdCpuReference::getTlsContext();
+ const Script *sc = RsdCpuReference::getTlsScript();
return rsrGetDt(rsc, sc);
}
time_t SC_Time(time_t *timer) {
- GET_TLS();
- return rsrTime(rsc, sc, timer);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ return rsrTime(rsc, timer);
}
tm* SC_LocalTime(tm *local, time_t *timer) {
- GET_TLS();
- return rsrLocalTime(rsc, sc, local, timer);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ return rsrLocalTime(rsc, local, timer);
}
int64_t SC_UptimeMillis() {
- GET_TLS();
- return rsrUptimeMillis(rsc, sc);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ return rsrUptimeMillis(rsc);
}
int64_t SC_UptimeNanos() {
- GET_TLS();
- return rsrUptimeNanos(rsc, sc);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ return rsrUptimeNanos(rsc);
}
//////////////////////////////////////////////////////////////////////////////
@@ -505,179 +496,25 @@
//////////////////////////////////////////////////////////////////////////////
static uint32_t SC_ToClient2(int cmdID, void *data, int len) {
- GET_TLS();
- return rsrToClient(rsc, sc, cmdID, data, len);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ return rsrToClient(rsc, cmdID, data, len);
}
static uint32_t SC_ToClient(int cmdID) {
- GET_TLS();
- return rsrToClient(rsc, sc, cmdID, NULL, 0);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ return rsrToClient(rsc, cmdID, NULL, 0);
}
static uint32_t SC_ToClientBlocking2(int cmdID, void *data, int len) {
- GET_TLS();
- return rsrToClientBlocking(rsc, sc, cmdID, data, len);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ return rsrToClientBlocking(rsc, cmdID, data, len);
}
static uint32_t SC_ToClientBlocking(int cmdID) {
- GET_TLS();
- return rsrToClientBlocking(rsc, sc, cmdID, NULL, 0);
+ Context *rsc = RsdCpuReference::getTlsContext();
+ return rsrToClientBlocking(rsc, cmdID, NULL, 0);
}
-int SC_divsi3(int a, int b) {
- return a / b;
-}
-
-int SC_modsi3(int a, int b) {
- return a % b;
-}
-
-unsigned int SC_udivsi3(unsigned int a, unsigned int b) {
- return a / b;
-}
-
-unsigned int SC_umodsi3(unsigned int a, unsigned int b) {
- return a % b;
-}
-
-static void SC_debugF(const char *s, float f) {
- ALOGD("%s %f, 0x%08x", s, f, *((int *) (&f)));
-}
-static void SC_debugFv2(const char *s, float f1, float f2) {
- ALOGD("%s {%f, %f}", s, f1, f2);
-}
-static void SC_debugFv3(const char *s, float f1, float f2, float f3) {
- ALOGD("%s {%f, %f, %f}", s, f1, f2, f3);
-}
-static void SC_debugFv4(const char *s, float f1, float f2, float f3, float f4) {
- ALOGD("%s {%f, %f, %f, %f}", s, f1, f2, f3, f4);
-}
-static void SC_debugF2(const char *s, float2 f) {
- ALOGD("%s {%f, %f}", s, f.x, f.y);
-}
-static void SC_debugF3(const char *s, float3 f) {
- ALOGD("%s {%f, %f, %f}", s, f.x, f.y, f.z);
-}
-static void SC_debugF4(const char *s, float4 f) {
- ALOGD("%s {%f, %f, %f, %f}", s, f.x, f.y, f.z, f.w);
-}
-static void SC_debugD(const char *s, double d) {
- ALOGD("%s %f, 0x%08llx", s, d, *((long long *) (&d)));
-}
-static void SC_debugFM4v4(const char *s, const float *f) {
- ALOGD("%s {%f, %f, %f, %f", s, f[0], f[4], f[8], f[12]);
- ALOGD("%s %f, %f, %f, %f", s, f[1], f[5], f[9], f[13]);
- ALOGD("%s %f, %f, %f, %f", s, f[2], f[6], f[10], f[14]);
- ALOGD("%s %f, %f, %f, %f}", s, f[3], f[7], f[11], f[15]);
-}
-static void SC_debugFM3v3(const char *s, const float *f) {
- ALOGD("%s {%f, %f, %f", s, f[0], f[3], f[6]);
- ALOGD("%s %f, %f, %f", s, f[1], f[4], f[7]);
- ALOGD("%s %f, %f, %f}",s, f[2], f[5], f[8]);
-}
-static void SC_debugFM2v2(const char *s, const float *f) {
- ALOGD("%s {%f, %f", s, f[0], f[2]);
- ALOGD("%s %f, %f}",s, f[1], f[3]);
-}
-static void SC_debugI8(const char *s, char c) {
- ALOGD("%s %hhd 0x%hhx", s, c, (unsigned char)c);
-}
-static void SC_debugC2(const char *s, char2 c) {
- ALOGD("%s {%hhd, %hhd} 0x%hhx 0x%hhx", s, c.x, c.y, (unsigned char)c.x, (unsigned char)c.y);
-}
-static void SC_debugC3(const char *s, char3 c) {
- ALOGD("%s {%hhd, %hhd, %hhd} 0x%hhx 0x%hhx 0x%hhx", s, c.x, c.y, c.z, (unsigned char)c.x, (unsigned char)c.y, (unsigned char)c.z);
-}
-static void SC_debugC4(const char *s, char4 c) {
- ALOGD("%s {%hhd, %hhd, %hhd, %hhd} 0x%hhx 0x%hhx 0x%hhx 0x%hhx", s, c.x, c.y, c.z, c.w, (unsigned char)c.x, (unsigned char)c.y, (unsigned char)c.z, (unsigned char)c.w);
-}
-static void SC_debugU8(const char *s, unsigned char c) {
- ALOGD("%s %hhu 0x%hhx", s, c, c);
-}
-static void SC_debugUC2(const char *s, uchar2 c) {
- ALOGD("%s {%hhu, %hhu} 0x%hhx 0x%hhx", s, c.x, c.y, c.x, c.y);
-}
-static void SC_debugUC3(const char *s, uchar3 c) {
- ALOGD("%s {%hhu, %hhu, %hhu} 0x%hhx 0x%hhx 0x%hhx", s, c.x, c.y, c.z, c.x, c.y, c.z);
-}
-static void SC_debugUC4(const char *s, uchar4 c) {
- ALOGD("%s {%hhu, %hhu, %hhu, %hhu} 0x%hhx 0x%hhx 0x%hhx 0x%hhx", s, c.x, c.y, c.z, c.w, c.x, c.y, c.z, c.w);
-}
-static void SC_debugI16(const char *s, short c) {
- ALOGD("%s %hd 0x%hx", s, c, c);
-}
-static void SC_debugS2(const char *s, short2 c) {
- ALOGD("%s {%hd, %hd} 0x%hx 0x%hx", s, c.x, c.y, c.x, c.y);
-}
-static void SC_debugS3(const char *s, short3 c) {
- ALOGD("%s {%hd, %hd, %hd} 0x%hx 0x%hx 0x%hx", s, c.x, c.y, c.z, c.x, c.y, c.z);
-}
-static void SC_debugS4(const char *s, short4 c) {
- ALOGD("%s {%hd, %hd, %hd, %hd} 0x%hx 0x%hx 0x%hx 0x%hx", s, c.x, c.y, c.z, c.w, c.x, c.y, c.z, c.w);
-}
-static void SC_debugU16(const char *s, unsigned short c) {
- ALOGD("%s %hu 0x%hx", s, c, c);
-}
-static void SC_debugUS2(const char *s, ushort2 c) {
- ALOGD("%s {%hu, %hu} 0x%hx 0x%hx", s, c.x, c.y, c.x, c.y);
-}
-static void SC_debugUS3(const char *s, ushort3 c) {
- ALOGD("%s {%hu, %hu, %hu} 0x%hx 0x%hx 0x%hx", s, c.x, c.y, c.z, c.x, c.y, c.z);
-}
-static void SC_debugUS4(const char *s, ushort4 c) {
- ALOGD("%s {%hu, %hu, %hu, %hu} 0x%hx 0x%hx 0x%hx 0x%hx", s, c.x, c.y, c.z, c.w, c.x, c.y, c.z, c.w);
-}
-static void SC_debugI32(const char *s, int32_t i) {
- ALOGD("%s %d 0x%x", s, i, i);
-}
-static void SC_debugI2(const char *s, int2 i) {
- ALOGD("%s {%d, %d} 0x%x 0x%x", s, i.x, i.y, i.x, i.y);
-}
-static void SC_debugI3(const char *s, int3 i) {
- ALOGD("%s {%d, %d, %d} 0x%x 0x%x 0x%x", s, i.x, i.y, i.z, i.x, i.y, i.z);
-}
-static void SC_debugI4(const char *s, int4 i) {
- ALOGD("%s {%d, %d, %d, %d} 0x%x 0x%x 0x%x 0x%x", s, i.x, i.y, i.z, i.w, i.x, i.y, i.z, i.w);
-}
-static void SC_debugU32(const char *s, uint32_t i) {
- ALOGD("%s %u 0x%x", s, i, i);
-}
-static void SC_debugUI2(const char *s, uint2 i) {
- ALOGD("%s {%u, %u} 0x%x 0x%x", s, i.x, i.y, i.x, i.y);
-}
-static void SC_debugUI3(const char *s, uint3 i) {
- ALOGD("%s {%u, %u, %u} 0x%x 0x%x 0x%x", s, i.x, i.y, i.z, i.x, i.y, i.z);
-}
-static void SC_debugUI4(const char *s, uint4 i) {
- ALOGD("%s {%u, %u, %u, %u} 0x%x 0x%x 0x%x 0x%x", s, i.x, i.y, i.z, i.w, i.x, i.y, i.z, i.w);
-}
-static void SC_debugLL64(const char *s, long long ll) {
- ALOGD("%s %lld 0x%llx", s, ll, ll);
-}
-static void SC_debugL2(const char *s, long2 ll) {
- ALOGD("%s {%lld, %lld} 0x%llx 0x%llx", s, ll.x, ll.y, ll.x, ll.y);
-}
-static void SC_debugL3(const char *s, long3 ll) {
- ALOGD("%s {%lld, %lld, %lld} 0x%llx 0x%llx 0x%llx", s, ll.x, ll.y, ll.z, ll.x, ll.y, ll.z);
-}
-static void SC_debugL4(const char *s, long4 ll) {
- ALOGD("%s {%lld, %lld, %lld, %lld} 0x%llx 0x%llx 0x%llx 0x%llx", s, ll.x, ll.y, ll.z, ll.w, ll.x, ll.y, ll.z, ll.w);
-}
-static void SC_debugULL64(const char *s, unsigned long long ll) {
- ALOGD("%s %llu 0x%llx", s, ll, ll);
-}
-static void SC_debugUL2(const char *s, ulong2 ll) {
- ALOGD("%s {%llu, %llu} 0x%llx 0x%llx", s, ll.x, ll.y, ll.x, ll.y);
-}
-static void SC_debugUL3(const char *s, ulong3 ll) {
- ALOGD("%s {%llu, %llu, %llu} 0x%llx 0x%llx 0x%llx", s, ll.x, ll.y, ll.z, ll.x, ll.y, ll.z);
-}
-static void SC_debugUL4(const char *s, ulong4 ll) {
- ALOGD("%s {%llu, %llu, %llu, %llu} 0x%llx 0x%llx 0x%llx 0x%llx", s, ll.x, ll.y, ll.z, ll.w, ll.x, ll.y, ll.z, ll.w);
-}
-static void SC_debugP(const char *s, const void *p) {
- ALOGD("%s %p", s, p);
-}
//////////////////////////////////////////////////////////////////////////////
@@ -701,10 +538,7 @@
// ::= f # float
// ::= d # double
-static RsdSymbolTable gSyms[] = {
- { "memset", (void *)&memset, true },
- { "memcpy", (void *)&memcpy, true },
-
+static RsdCpuReference::CpuSymbol gSyms[] = {
// Refcounting
{ "_Z11rsSetObjectP10rs_elementS_", (void *)&SC_SetObject, true },
{ "_Z13rsClearObjectP10rs_element", (void *)&SC_ClearObject, true },
@@ -839,86 +673,24 @@
{ "_Z5colorffff", (void *)&SC_Color, false },
{ "_Z9rsgFinishv", (void *)&SC_Finish, false },
- // Debug
- { "_Z7rsDebugPKcf", (void *)&SC_debugF, true },
- { "_Z7rsDebugPKcff", (void *)&SC_debugFv2, true },
- { "_Z7rsDebugPKcfff", (void *)&SC_debugFv3, true },
- { "_Z7rsDebugPKcffff", (void *)&SC_debugFv4, true },
- { "_Z7rsDebugPKcDv2_f", (void *)&SC_debugF2, true },
- { "_Z7rsDebugPKcDv3_f", (void *)&SC_debugF3, true },
- { "_Z7rsDebugPKcDv4_f", (void *)&SC_debugF4, true },
- { "_Z7rsDebugPKcd", (void *)&SC_debugD, true },
- { "_Z7rsDebugPKcPK12rs_matrix4x4", (void *)&SC_debugFM4v4, true },
- { "_Z7rsDebugPKcPK12rs_matrix3x3", (void *)&SC_debugFM3v3, true },
- { "_Z7rsDebugPKcPK12rs_matrix2x2", (void *)&SC_debugFM2v2, true },
- { "_Z7rsDebugPKcc", (void *)&SC_debugI8, true },
- { "_Z7rsDebugPKcDv2_c", (void *)&SC_debugC2, true },
- { "_Z7rsDebugPKcDv3_c", (void *)&SC_debugC3, true },
- { "_Z7rsDebugPKcDv4_c", (void *)&SC_debugC4, true },
- { "_Z7rsDebugPKch", (void *)&SC_debugU8, true },
- { "_Z7rsDebugPKcDv2_h", (void *)&SC_debugUC2, true },
- { "_Z7rsDebugPKcDv3_h", (void *)&SC_debugUC3, true },
- { "_Z7rsDebugPKcDv4_h", (void *)&SC_debugUC4, true },
- { "_Z7rsDebugPKcs", (void *)&SC_debugI16, true },
- { "_Z7rsDebugPKcDv2_s", (void *)&SC_debugS2, true },
- { "_Z7rsDebugPKcDv3_s", (void *)&SC_debugS3, true },
- { "_Z7rsDebugPKcDv4_s", (void *)&SC_debugS4, true },
- { "_Z7rsDebugPKct", (void *)&SC_debugU16, true },
- { "_Z7rsDebugPKcDv2_t", (void *)&SC_debugUS2, true },
- { "_Z7rsDebugPKcDv3_t", (void *)&SC_debugUS3, true },
- { "_Z7rsDebugPKcDv4_t", (void *)&SC_debugUS4, true },
- { "_Z7rsDebugPKci", (void *)&SC_debugI32, true },
- { "_Z7rsDebugPKcDv2_i", (void *)&SC_debugI2, true },
- { "_Z7rsDebugPKcDv3_i", (void *)&SC_debugI3, true },
- { "_Z7rsDebugPKcDv4_i", (void *)&SC_debugI4, true },
- { "_Z7rsDebugPKcj", (void *)&SC_debugU32, true },
- { "_Z7rsDebugPKcDv2_j", (void *)&SC_debugUI2, true },
- { "_Z7rsDebugPKcDv3_j", (void *)&SC_debugUI3, true },
- { "_Z7rsDebugPKcDv4_j", (void *)&SC_debugUI4, true },
- // Both "long" and "unsigned long" need to be redirected to their
- // 64-bit counterparts, since we have hacked Slang to use 64-bit
- // for "long" on Arm (to be similar to Java).
- { "_Z7rsDebugPKcl", (void *)&SC_debugLL64, true },
- { "_Z7rsDebugPKcDv2_l", (void *)&SC_debugL2, true },
- { "_Z7rsDebugPKcDv3_l", (void *)&SC_debugL3, true },
- { "_Z7rsDebugPKcDv4_l", (void *)&SC_debugL4, true },
- { "_Z7rsDebugPKcm", (void *)&SC_debugULL64, true },
- { "_Z7rsDebugPKcDv2_m", (void *)&SC_debugUL2, true },
- { "_Z7rsDebugPKcDv3_m", (void *)&SC_debugUL3, true },
- { "_Z7rsDebugPKcDv4_m", (void *)&SC_debugUL4, true },
- { "_Z7rsDebugPKcx", (void *)&SC_debugLL64, true },
- { "_Z7rsDebugPKcDv2_x", (void *)&SC_debugL2, true },
- { "_Z7rsDebugPKcDv3_x", (void *)&SC_debugL3, true },
- { "_Z7rsDebugPKcDv4_x", (void *)&SC_debugL4, true },
- { "_Z7rsDebugPKcy", (void *)&SC_debugULL64, true },
- { "_Z7rsDebugPKcDv2_y", (void *)&SC_debugUL2, true },
- { "_Z7rsDebugPKcDv3_y", (void *)&SC_debugUL3, true },
- { "_Z7rsDebugPKcDv4_y", (void *)&SC_debugUL4, true },
- { "_Z7rsDebugPKcPKv", (void *)&SC_debugP, true },
-
{ NULL, NULL, false }
};
-void* rsdLookupRuntimeStub(void* pContext, char const* name) {
+extern const RsdCpuReference::CpuSymbol * rsdLookupRuntimeStub(Context * pContext, char const* name) {
ScriptC *s = (ScriptC *)pContext;
- RsdSymbolTable *syms = gSyms;
- const RsdSymbolTable *sym = rsdLookupSymbolMath(name);
+ const RsdCpuReference::CpuSymbol *syms = gSyms;
+ const RsdCpuReference::CpuSymbol *sym = NULL;
if (!sym) {
- while (syms->mPtr) {
- if (!strcmp(syms->mName, name)) {
- sym = syms;
+ while (syms->fnPtr) {
+ if (!strcmp(syms->name, name)) {
+ return syms;
}
syms++;
}
}
- if (sym) {
- s->mHal.info.isThreadable &= sym->threadable;
- return sym->mPtr;
- }
- ALOGE("ScriptC sym lookup failed for %s", name);
return NULL;
}
diff --git a/driver/rsdScriptGroup.cpp b/driver/rsdScriptGroup.cpp
index f4f0f1c..ef802a2 100644
--- a/driver/rsdScriptGroup.cpp
+++ b/driver/rsdScriptGroup.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2011 The Android Open Source Project
+ * Copyright (C) 2011-2012 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -15,11 +15,8 @@
*/
#include "rsdCore.h"
+#include "../cpu_ref/rsd_cpu.h"
-#include <bcc/BCCContext.h>
-#include <bcc/Renderscript/RSCompilerDriver.h>
-#include <bcc/Renderscript/RSExecutable.h>
-#include <bcc/Renderscript/RSInfo.h>
#include "rsScript.h"
#include "rsScriptGroup.h"
@@ -31,236 +28,29 @@
using namespace android::renderscript;
-bool rsdScriptGroupInit(const android::renderscript::Context *rsc,
- const android::renderscript::ScriptGroup *sg) {
- return true;
+bool rsdScriptGroupInit(const Context *rsc, ScriptGroup *sg) {
+ RsdHal *dc = (RsdHal *)rsc->mHal.drv;
+
+ sg->mHal.drv = dc->mCpuRef->createScriptGroup(sg);
+ return sg->mHal.drv != NULL;
}
-void rsdScriptGroupSetInput(const android::renderscript::Context *rsc,
- const android::renderscript::ScriptGroup *sg,
- const android::renderscript::ScriptKernelID *kid,
- android::renderscript::Allocation *) {
+void rsdScriptGroupSetInput(const Context *rsc, const ScriptGroup *sg,
+ const ScriptKernelID *kid, Allocation *) {
}
-void rsdScriptGroupSetOutput(const android::renderscript::Context *rsc,
- const android::renderscript::ScriptGroup *sg,
- const android::renderscript::ScriptKernelID *kid,
- android::renderscript::Allocation *) {
+void rsdScriptGroupSetOutput(const Context *rsc, const ScriptGroup *sg,
+ const ScriptKernelID *kid, Allocation *) {
}
-struct ScriptList {
- size_t count;
- Allocation *const* ins;
- bool const* inExts;
- Allocation *const* outs;
- bool const* outExts;
- const void *const* usrPtrs;
- size_t const *usrSizes;
- uint32_t const *sigs;
- const void *const* fnPtrs;
-
- const ScriptKernelID *const* kernels;
-};
-
-typedef void (*ScriptGroupRootFunc_t)(const RsForEachStubParamStruct *p,
- uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep);
-
-static void ScriptGroupRoot(const RsForEachStubParamStruct *p,
- uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
-
- const ScriptList *sl = (const ScriptList *)p->usr;
- RsForEachStubParamStruct *mp = (RsForEachStubParamStruct *)p;
- const void *oldUsr = p->usr;
-
- for(size_t ct=0; ct < sl->count; ct++) {
- ScriptGroupRootFunc_t func;
- func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct];
- mp->usr = sl->usrPtrs[ct];
-
- mp->ptrIn = NULL;
- mp->in = NULL;
- mp->ptrOut = NULL;
- mp->out = NULL;
-
- if (sl->ins[ct]) {
- DrvAllocation *drv = (DrvAllocation *)sl->ins[ct]->mHal.drv;
- mp->ptrIn = (const uint8_t *)drv->lod[0].mallocPtr;
- mp->in = mp->ptrIn;
- if (sl->inExts[ct]) {
- mp->in = mp->ptrIn + drv->lod[0].stride * p->y;
- } else {
- if (drv->lod[0].dimY > p->lid) {
- mp->in = mp->ptrIn + drv->lod[0].stride * p->lid;
- }
- }
- }
-
- if (sl->outs[ct]) {
- DrvAllocation *drv = (DrvAllocation *)sl->outs[ct]->mHal.drv;
- mp->ptrOut = (uint8_t *)drv->lod[0].mallocPtr;
- mp->out = mp->ptrOut;
- if (sl->outExts[ct]) {
- mp->out = mp->ptrOut + drv->lod[0].stride * p->y;
- } else {
- if (drv->lod[0].dimY > p->lid) {
- mp->out = mp->ptrOut + drv->lod[0].stride * p->lid;
- }
- }
- }
-
- //ALOGE("kernel %i %p,%p %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out);
- func(p, xstart, xend, instep, outstep);
- }
- //ALOGE("script group root");
-
- //ConvolveParams *cp = (ConvolveParams *)p->usr;
-
- mp->usr = oldUsr;
+void rsdScriptGroupExecute(const Context *rsc, const ScriptGroup *sg) {
+ RsdCpuReference::CpuScriptGroup *sgi = (RsdCpuReference::CpuScriptGroup *)sg->mHal.drv;
+ sgi->execute();
}
-
-void rsdScriptGroupExecute(const android::renderscript::Context *rsc,
- const android::renderscript::ScriptGroup *sg) {
-
- Vector<Allocation *> ins;
- Vector<bool> inExts;
- Vector<Allocation *> outs;
- Vector<bool> outExts;
- Vector<const ScriptKernelID *> kernels;
- bool fieldDep = false;
-
- for (size_t ct=0; ct < sg->mNodes.size(); ct++) {
- ScriptGroup::Node *n = sg->mNodes[ct];
- Script *s = n->mKernels[0]->mScript;
-
- //ALOGE("node %i, order %i, in %i out %i", (int)ct, n->mOrder, (int)n->mInputs.size(), (int)n->mOutputs.size());
-
- for (size_t ct2=0; ct2 < n->mInputs.size(); ct2++) {
- if (n->mInputs[ct2]->mDstField.get() && n->mInputs[ct2]->mDstField->mScript) {
- //ALOGE("field %p %zu", n->mInputs[ct2]->mDstField->mScript, n->mInputs[ct2]->mDstField->mSlot);
- s->setVarObj(n->mInputs[ct2]->mDstField->mSlot, n->mInputs[ct2]->mAlloc.get());
- }
- }
-
- for (size_t ct2=0; ct2 < n->mKernels.size(); ct2++) {
- const ScriptKernelID *k = n->mKernels[ct2];
- Allocation *ain = NULL;
- Allocation *aout = NULL;
- bool inExt = false;
- bool outExt = false;
-
- for (size_t ct3=0; ct3 < n->mInputs.size(); ct3++) {
- if (n->mInputs[ct3]->mDstKernel.get() == k) {
- ain = n->mInputs[ct3]->mAlloc.get();
- //ALOGE(" link in %p", ain);
- }
- }
- for (size_t ct3=0; ct3 < sg->mInputs.size(); ct3++) {
- if (sg->mInputs[ct3]->mKernel == k) {
- ain = sg->mInputs[ct3]->mAlloc.get();
- inExt = true;
- //ALOGE(" io in %p", ain);
- }
- }
-
- for (size_t ct3=0; ct3 < n->mOutputs.size(); ct3++) {
- if (n->mOutputs[ct3]->mSource.get() == k) {
- aout = n->mOutputs[ct3]->mAlloc.get();
- if(n->mOutputs[ct3]->mDstField.get() != NULL) {
- fieldDep = true;
- }
- //ALOGE(" link out %p", aout);
- }
- }
- for (size_t ct3=0; ct3 < sg->mOutputs.size(); ct3++) {
- if (sg->mOutputs[ct3]->mKernel == k) {
- aout = sg->mOutputs[ct3]->mAlloc.get();
- outExt = true;
- //ALOGE(" io out %p", aout);
- }
- }
-
- if ((k->mHasKernelOutput == (aout != NULL)) &&
- (k->mHasKernelInput == (ain != NULL))) {
- ins.add(ain);
- inExts.add(inExt);
- outs.add(aout);
- outExts.add(outExt);
- kernels.add(k);
- }
- }
-
- }
-
- RsdHal * dc = (RsdHal *)rsc->mHal.drv;
- MTLaunchStruct mtls;
-
- if(fieldDep) {
- for (size_t ct=0; ct < ins.size(); ct++) {
- Script *s = kernels[ct]->mScript;
- DrvScript *drv = (DrvScript *)s->mHal.drv;
- uint32_t slot = kernels[ct]->mSlot;
-
- rsdScriptInvokeForEachMtlsSetup(rsc, ins[ct], outs[ct], NULL, 0, NULL, &mtls);
- mtls.script = s;
-
- if (drv->mIntrinsicID) {
- mtls.kernel = (void (*)())drv->mIntrinsicFuncs.root;
- mtls.fep.usr = drv->mIntrinsicData;
- } else {
- mtls.kernel = reinterpret_cast<ForEachFunc_t>(
- drv->mExecutable->getExportForeachFuncAddrs()[slot]);
- rsAssert(mtls.kernel != NULL);
- mtls.sig = drv->mExecutable->getInfo().getExportForeachFuncs()[slot].second;
- }
-
- rsdScriptLaunchThreads(rsc, s->mHal.info.isThreadable, ins[ct], outs[ct],
- NULL, 0, NULL, &mtls);
- }
- } else {
- ScriptList sl;
- sl.ins = ins.array();
- sl.outs = outs.array();
- sl.kernels = kernels.array();
- sl.count = kernels.size();
-
- Vector<const void *> usrPtrs;
- Vector<const void *> fnPtrs;
- Vector<uint32_t> sigs;
- for (size_t ct=0; ct < kernels.size(); ct++) {
- Script *s = kernels[ct]->mScript;
- DrvScript *drv = (DrvScript *)s->mHal.drv;
-
- if (drv->mIntrinsicID) {
- fnPtrs.add((void *)drv->mIntrinsicFuncs.root);
- usrPtrs.add(drv->mIntrinsicData);
- sigs.add(0);
- } else {
- int slot = kernels[ct]->mSlot;
- fnPtrs.add((void *)drv->mExecutable->getExportForeachFuncAddrs()[slot]);
- usrPtrs.add(NULL);
- sigs.add(drv->mExecutable->getInfo().getExportForeachFuncs()[slot].second);
- }
- }
- sl.sigs = sigs.array();
- sl.usrPtrs = usrPtrs.array();
- sl.fnPtrs = fnPtrs.array();
- sl.inExts = inExts.array();
- sl.outExts = outExts.array();
-
- rsdScriptInvokeForEachMtlsSetup(rsc, ins[0], outs[0], NULL, 0, NULL, &mtls);
- mtls.script = NULL;
- mtls.kernel = (void (*)())&ScriptGroupRoot;
- mtls.fep.usr = &sl;
- rsdScriptLaunchThreads(rsc, true, ins[0], outs[0], NULL, 0, NULL, &mtls);
- }
-
-}
-
-void rsdScriptGroupDestroy(const android::renderscript::Context *rsc,
- const android::renderscript::ScriptGroup *sg) {
+void rsdScriptGroupDestroy(const Context *rsc, const ScriptGroup *sg) {
+ RsdCpuReference::CpuScriptGroup *sgi = (RsdCpuReference::CpuScriptGroup *)sg->mHal.drv;
+ delete sgi;
}
diff --git a/driver/rsdScriptGroup.h b/driver/rsdScriptGroup.h
index a817aef..ee8cd69 100644
--- a/driver/rsdScriptGroup.h
+++ b/driver/rsdScriptGroup.h
@@ -20,7 +20,7 @@
#include <rs_hal.h>
bool rsdScriptGroupInit(const android::renderscript::Context *rsc,
- const android::renderscript::ScriptGroup *sg);
+ android::renderscript::ScriptGroup *sg);
void rsdScriptGroupSetInput(const android::renderscript::Context *rsc,
const android::renderscript::ScriptGroup *sg,
const android::renderscript::ScriptKernelID *kid,
diff --git a/driver/rsdShader.cpp b/driver/rsdShader.cpp
index 3654090..0361844 100644
--- a/driver/rsdShader.cpp
+++ b/driver/rsdShader.cpp
@@ -346,9 +346,9 @@
rsAssert(0);
}
}
- ALOGE("Element size %u data=%p", elementSize, fd);
+ ALOGV("Element size %u data=%p", elementSize, fd);
fd += elementSize;
- ALOGE("New data=%p", fd);
+ ALOGV("New data=%p", fd);
}
}
@@ -524,8 +524,7 @@
continue;
}
- DrvAllocation *adrv = (DrvAllocation *)alloc->mHal.drv;
- const uint8_t *data = static_cast<const uint8_t *>(adrv->lod[0].mallocPtr);
+ const uint8_t *data = static_cast<const uint8_t *>(alloc->mHal.drvState.lod[0].mallocPtr);
const Element *e = mRSProgram->mHal.state.constantTypes[ct]->getElement();
for (uint32_t field=0; field < e->mHal.state.fieldsCount; field++) {
const Element *f = e->mHal.state.fields[field];