Separate CPU driver impl from reference driver.

Change-Id: Ifb484edda665959b81d7b1f890d108bfa20a535d
diff --git a/driver/rsdBcc.cpp b/driver/rsdBcc.cpp
index ddcaac8..436b9b2 100644
--- a/driver/rsdBcc.cpp
+++ b/driver/rsdBcc.cpp
@@ -14,17 +14,12 @@
  * limitations under the License.
  */
 
+#include "../cpu_ref/rsd_cpu.h"
+
 #include "rsdCore.h"
 
-#include <bcc/BCCContext.h>
-#include <bcc/Renderscript/RSCompilerDriver.h>
-#include <bcc/Renderscript/RSExecutable.h>
-#include <bcc/Renderscript/RSInfo.h>
-
 #include "rsdBcc.h"
-#include "rsdRuntime.h"
 #include "rsdAllocation.h"
-#include "rsdIntrinsics.h"
 
 #include "rsContext.h"
 #include "rsElement.h"
@@ -38,15 +33,6 @@
 using namespace android::renderscript;
 
 
-static Script * setTLS(Script *sc) {
-    ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(rsdgThreadTLSKey);
-    rsAssert(tls);
-    Script *old = tls->mScript;
-    tls->mScript = sc;
-    return old;
-}
-
-
 bool rsdScriptInit(const Context *rsc,
                      ScriptC *script,
                      char const *resName,
@@ -54,358 +40,26 @@
                      uint8_t const *bitcode,
                      size_t bitcodeSize,
                      uint32_t flags) {
-    //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc);
-    //ALOGE("rsdScriptInit %p %p", rsc, script);
-
-    pthread_mutex_lock(&rsdgInitMutex);
-
-    bcc::RSExecutable *exec;
-    const bcc::RSInfo *info;
-    DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript));
-    if (drv == NULL) {
-        goto error;
+    RsdHal *dc = (RsdHal *)rsc->mHal.drv;
+    RsdCpuReference::CpuScript * cs = dc->mCpuRef->createScript(script, resName, cacheDir,
+                                                                bitcode, bitcodeSize, flags);
+    if (cs == NULL) {
+        return false;
     }
-    script->mHal.drv = drv;
-
-    drv->mCompilerContext = NULL;
-    drv->mCompilerDriver = NULL;
-    drv->mExecutable = NULL;
-
-    drv->mCompilerContext = new bcc::BCCContext();
-    if (drv->mCompilerContext == NULL) {
-        ALOGE("bcc: FAILS to create compiler context (out of memory)");
-        goto error;
-    }
-
-    drv->mCompilerDriver = new bcc::RSCompilerDriver();
-    if (drv->mCompilerDriver == NULL) {
-        ALOGE("bcc: FAILS to create compiler driver (out of memory)");
-        goto error;
-    }
-
-    script->mHal.info.isThreadable = true;
-
-    drv->mCompilerDriver->setRSRuntimeLookupFunction(rsdLookupRuntimeStub);
-    drv->mCompilerDriver->setRSRuntimeLookupContext(script);
-
-    exec = drv->mCompilerDriver->build(*drv->mCompilerContext,
-                                       cacheDir, resName,
-                                       (const char *)bitcode, bitcodeSize,
-                                       NULL);
-
-    if (exec == NULL) {
-        ALOGE("bcc: FAILS to prepare executable for '%s'", resName);
-        goto error;
-    }
-
-    drv->mExecutable = exec;
-
-    exec->setThreadable(script->mHal.info.isThreadable);
-    if (!exec->syncInfo()) {
-        ALOGW("bcc: FAILS to synchronize the RS info file to the disk");
-    }
-
-    drv->mRoot = reinterpret_cast<int (*)()>(exec->getSymbolAddress("root"));
-    drv->mRootExpand =
-        reinterpret_cast<int (*)()>(exec->getSymbolAddress("root.expand"));
-    drv->mInit = reinterpret_cast<void (*)()>(exec->getSymbolAddress("init"));
-    drv->mFreeChildren =
-        reinterpret_cast<void (*)()>(exec->getSymbolAddress(".rs.dtor"));
-
-    info = &drv->mExecutable->getInfo();
-    // Copy info over to runtime
-    script->mHal.info.exportedFunctionCount = info->getExportFuncNames().size();
-    script->mHal.info.exportedVariableCount = info->getExportVarNames().size();
-    script->mHal.info.exportedPragmaCount = info->getPragmas().size();
-    script->mHal.info.exportedPragmaKeyList =
-        const_cast<const char**>(exec->getPragmaKeys().array());
-    script->mHal.info.exportedPragmaValueList =
-        const_cast<const char**>(exec->getPragmaValues().array());
-
-    if (drv->mRootExpand) {
-        script->mHal.info.root = drv->mRootExpand;
-    } else {
-        script->mHal.info.root = drv->mRoot;
-    }
-
-    if (script->mHal.info.exportedVariableCount) {
-        drv->mBoundAllocs = new Allocation *[script->mHal.info.exportedVariableCount];
-        memset(drv->mBoundAllocs, 0, sizeof(void *) * script->mHal.info.exportedVariableCount);
-    }
-
-    pthread_mutex_unlock(&rsdgInitMutex);
+    script->mHal.drv = cs;
+    cs->populateScript(script);
     return true;
-
-error:
-
-    pthread_mutex_unlock(&rsdgInitMutex);
-    if (drv) {
-        delete drv->mCompilerContext;
-        delete drv->mCompilerDriver;
-        delete drv->mExecutable;
-        delete[] drv->mBoundAllocs;
-        free(drv);
-    }
-    script->mHal.drv = NULL;
-    return false;
-
 }
 
 bool rsdInitIntrinsic(const Context *rsc, Script *s, RsScriptIntrinsicID iid, Element *e) {
-    pthread_mutex_lock(&rsdgInitMutex);
-
-    DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript));
-    if (drv == NULL) {
-        goto error;
+    RsdHal *dc = (RsdHal *)rsc->mHal.drv;
+    RsdCpuReference::CpuScript * cs = dc->mCpuRef->createIntrinsic(s, iid, e);
+    if (cs == NULL) {
+        return false;
     }
-    s->mHal.drv = drv;
-    drv->mIntrinsicID = iid;
-    drv->mIntrinsicData = rsdIntrinsic_Init(rsc, s, iid, &drv->mIntrinsicFuncs);
-    s->mHal.info.isThreadable = true;
-
-    pthread_mutex_unlock(&rsdgInitMutex);
+    s->mHal.drv = cs;
+    cs->populateScript(s);
     return true;
-
-error:
-    pthread_mutex_unlock(&rsdgInitMutex);
-    return false;
-}
-
-typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
-
-static void wc_xy(void *usr, uint32_t idx) {
-    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
-    RsForEachStubParamStruct p;
-    memcpy(&p, &mtls->fep, sizeof(p));
-    p.lid = idx;
-    RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
-    uint32_t sig = mtls->sig;
-
-#if defined(ARCH_ARM_RS_USE_CACHED_SCANLINE_WRITE)
-    unsigned char buf[1024 * 8];
-#endif
-
-    outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
-    while (1) {
-        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
-        uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize;
-        uint32_t yEnd = yStart + mtls->mSliceSize;
-        yEnd = rsMin(yEnd, mtls->yEnd);
-        if (yEnd <= yStart) {
-            return;
-        }
-
-        //ALOGE("usr idx %i, x %i,%i  y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
-        //ALOGE("usr ptr in %p,  out %p", mtls->fep.ptrIn, mtls->fep.ptrOut);
-
-#if defined(ARCH_ARM_RS_USE_CACHED_SCANLINE_WRITE)
-        if (mtls->fep.yStrideOut < sizeof(buf)) {
-            p.out = buf;
-            for (p.y = yStart; p.y < yEnd; p.y++) {
-                p.in = mtls->fep.ptrIn + (mtls->fep.yStrideIn * p.y);
-                fn(&p, mtls->xStart, mtls->xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut);
-                memcpy(mtls->fep.ptrOut + (mtls->fep.yStrideOut * p.y), buf, mtls->fep.yStrideOut);
-            }
-        } else
-#endif
-            {
-            for (p.y = yStart; p.y < yEnd; p.y++) {
-                p.out = mtls->fep.ptrOut + (mtls->fep.yStrideOut * p.y) +
-                        (mtls->fep.eStrideOut * mtls->xStart);
-                p.in = mtls->fep.ptrIn + (mtls->fep.yStrideIn * p.y) +
-                       (mtls->fep.eStrideIn * mtls->xStart);
-                fn(&p, mtls->xStart, mtls->xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut);
-            }
-        }
-    }
-}
-
-static void wc_x(void *usr, uint32_t idx) {
-    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
-    RsForEachStubParamStruct p;
-    memcpy(&p, &mtls->fep, sizeof(p));
-    p.lid = idx;
-    RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
-    uint32_t sig = mtls->sig;
-
-    outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
-    while (1) {
-        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
-        uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize;
-        uint32_t xEnd = xStart + mtls->mSliceSize;
-        xEnd = rsMin(xEnd, mtls->xEnd);
-        if (xEnd <= xStart) {
-            return;
-        }
-
-        //ALOGE("usr slice %i idx %i, x %i,%i", slice, idx, xStart, xEnd);
-        //ALOGE("usr ptr in %p,  out %p", mtls->fep.ptrIn, mtls->fep.ptrOut);
-
-        p.out = mtls->fep.ptrOut + (mtls->fep.eStrideOut * xStart);
-        p.in = mtls->fep.ptrIn + (mtls->fep.eStrideIn * xStart);
-        fn(&p, xStart, xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut);
-    }
-}
-
-void rsdScriptInvokeForEachMtlsSetup(const Context *rsc,
-                                     const Allocation * ain,
-                                     Allocation * aout,
-                                     const void * usr,
-                                     uint32_t usrLen,
-                                     const RsScriptCall *sc,
-                                     MTLaunchStruct *mtls) {
-
-    memset(mtls, 0, sizeof(MTLaunchStruct));
-
-    if (ain) {
-        mtls->fep.dimX = ain->getType()->getDimX();
-        mtls->fep.dimY = ain->getType()->getDimY();
-        mtls->fep.dimZ = ain->getType()->getDimZ();
-        //mtls->dimArray = ain->getType()->getDimArray();
-    } else if (aout) {
-        mtls->fep.dimX = aout->getType()->getDimX();
-        mtls->fep.dimY = aout->getType()->getDimY();
-        mtls->fep.dimZ = aout->getType()->getDimZ();
-        //mtls->dimArray = aout->getType()->getDimArray();
-    } else {
-        rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
-        return;
-    }
-
-    if (!sc || (sc->xEnd == 0)) {
-        mtls->xEnd = mtls->fep.dimX;
-    } else {
-        rsAssert(sc->xStart < mtls->fep.dimX);
-        rsAssert(sc->xEnd <= mtls->fep.dimX);
-        rsAssert(sc->xStart < sc->xEnd);
-        mtls->xStart = rsMin(mtls->fep.dimX, sc->xStart);
-        mtls->xEnd = rsMin(mtls->fep.dimX, sc->xEnd);
-        if (mtls->xStart >= mtls->xEnd) return;
-    }
-
-    if (!sc || (sc->yEnd == 0)) {
-        mtls->yEnd = mtls->fep.dimY;
-    } else {
-        rsAssert(sc->yStart < mtls->fep.dimY);
-        rsAssert(sc->yEnd <= mtls->fep.dimY);
-        rsAssert(sc->yStart < sc->yEnd);
-        mtls->yStart = rsMin(mtls->fep.dimY, sc->yStart);
-        mtls->yEnd = rsMin(mtls->fep.dimY, sc->yEnd);
-        if (mtls->yStart >= mtls->yEnd) return;
-    }
-
-    mtls->xEnd = rsMax((uint32_t)1, mtls->xEnd);
-    mtls->yEnd = rsMax((uint32_t)1, mtls->yEnd);
-    mtls->zEnd = rsMax((uint32_t)1, mtls->zEnd);
-    mtls->arrayEnd = rsMax((uint32_t)1, mtls->arrayEnd);
-
-    rsAssert(!ain || (ain->getType()->getDimZ() == 0));
-
-    Context *mrsc = (Context *)rsc;
-    mtls->rsc = mrsc;
-    mtls->ain = ain;
-    mtls->aout = aout;
-    mtls->fep.usr = usr;
-    mtls->fep.usrLen = usrLen;
-    mtls->mSliceSize = 10;
-    mtls->mSliceNum = 0;
-
-    mtls->fep.ptrIn = NULL;
-    mtls->fep.eStrideIn = 0;
-
-    if (ain) {
-        DrvAllocation *aindrv = (DrvAllocation *)ain->mHal.drv;
-        mtls->fep.ptrIn = (const uint8_t *)aindrv->lod[0].mallocPtr;
-        mtls->fep.eStrideIn = ain->getType()->getElementSizeBytes();
-        mtls->fep.yStrideIn = aindrv->lod[0].stride;
-    }
-
-    mtls->fep.ptrOut = NULL;
-    mtls->fep.eStrideOut = 0;
-    if (aout) {
-        DrvAllocation *aoutdrv = (DrvAllocation *)aout->mHal.drv;
-        mtls->fep.ptrOut = (uint8_t *)aoutdrv->lod[0].mallocPtr;
-        mtls->fep.eStrideOut = aout->getType()->getElementSizeBytes();
-        mtls->fep.yStrideOut = aoutdrv->lod[0].stride;
-    }
-}
-
-void rsdScriptLaunchThreads(const Context *rsc,
-                            bool isThreadable,
-                            const Allocation * ain,
-                            Allocation * aout,
-                            const void * usr,
-                            uint32_t usrLen,
-                            const RsScriptCall *sc,
-                            MTLaunchStruct *mtls) {
-
-    Context *mrsc = (Context *)rsc;
-    RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
-
-    if ((dc->mWorkers.mCount >= 1) && isThreadable && !dc->mInForEach) {
-        const size_t targetByteChunk = 16 * 1024;
-        dc->mInForEach = true;
-        if (mtls->fep.dimY > 1) {
-            uint32_t s1 = mtls->fep.dimY / ((dc->mWorkers.mCount + 1) * 4);
-            uint32_t s2 = 0;
-
-            // This chooses our slice size to rate limit atomic ops to
-            // one per 16k bytes of reads/writes.
-            if (mtls->fep.yStrideOut) {
-                s2 = targetByteChunk / mtls->fep.yStrideOut;
-            } else {
-                s2 = targetByteChunk / mtls->fep.yStrideIn;
-            }
-            mtls->mSliceSize = rsMin(s1, s2);
-
-            if(mtls->mSliceSize < 1) {
-                mtls->mSliceSize = 1;
-            }
-
-            rsdLaunchThreads(mrsc, wc_xy, mtls);
-        } else {
-            uint32_t s1 = mtls->fep.dimX / ((dc->mWorkers.mCount + 1) * 4);
-            uint32_t s2 = 0;
-
-            // This chooses our slice size to rate limit atomic ops to
-            // one per 16k bytes of reads/writes.
-            if (mtls->fep.eStrideOut) {
-                s2 = targetByteChunk / mtls->fep.eStrideOut;
-            } else {
-                s2 = targetByteChunk / mtls->fep.eStrideIn;
-            }
-            mtls->mSliceSize = rsMin(s1, s2);
-
-            if(mtls->mSliceSize < 1) {
-                mtls->mSliceSize = 1;
-            }
-
-            rsdLaunchThreads(mrsc, wc_x, mtls);
-        }
-        dc->mInForEach = false;
-
-        //ALOGE("launch 1");
-    } else {
-        RsForEachStubParamStruct p;
-        memcpy(&p, &mtls->fep, sizeof(p));
-        uint32_t sig = mtls->sig;
-
-        //ALOGE("launch 3");
-        outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
-        for (p.ar[0] = mtls->arrayStart; p.ar[0] < mtls->arrayEnd; p.ar[0]++) {
-            for (p.z = mtls->zStart; p.z < mtls->zEnd; p.z++) {
-                for (p.y = mtls->yStart; p.y < mtls->yEnd; p.y++) {
-                    uint32_t offset = mtls->fep.dimY * mtls->fep.dimZ * p.ar[0] +
-                                      mtls->fep.dimY * p.z + p.y;
-                    p.out = mtls->fep.ptrOut + (mtls->fep.yStrideOut * offset) +
-                            (mtls->fep.eStrideOut * mtls->xStart);
-                    p.in = mtls->fep.ptrIn + (mtls->fep.yStrideIn * offset) +
-                           (mtls->fep.eStrideIn * mtls->xStart);
-                    fn(&p, mtls->xStart, mtls->xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut);
-                }
-            }
-        }
-    }
 }
 
 void rsdScriptInvokeForEach(const Context *rsc,
@@ -417,237 +71,69 @@
                             uint32_t usrLen,
                             const RsScriptCall *sc) {
 
-    RsdHal * dc = (RsdHal *)rsc->mHal.drv;
-
-    MTLaunchStruct mtls;
-    rsdScriptInvokeForEachMtlsSetup(rsc, ain, aout, usr, usrLen, sc, &mtls);
-    mtls.script = s;
-    mtls.fep.slot = slot;
-
-    DrvScript *drv = (DrvScript *)s->mHal.drv;
-    if (drv->mIntrinsicID) {
-        mtls.kernel = (void (*)())drv->mIntrinsicFuncs.root;
-        mtls.fep.usr = drv->mIntrinsicData;
-    } else {
-        rsAssert(slot < drv->mExecutable->getExportForeachFuncAddrs().size());
-        mtls.kernel = reinterpret_cast<ForEachFunc_t>(
-                          drv->mExecutable->getExportForeachFuncAddrs()[slot]);
-        rsAssert(mtls.kernel != NULL);
-        mtls.sig = drv->mExecutable->getInfo().getExportForeachFuncs()[slot].second;
-    }
-
-
-    Script * oldTLS = setTLS(s);
-    rsdScriptLaunchThreads(rsc, s->mHal.info.isThreadable, ain, aout, usr, usrLen, sc, &mtls);
-    setTLS(oldTLS);
+    RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
+    cs->invokeForEach(slot, ain, aout, usr, usrLen, sc);
 }
 
 
-int rsdScriptInvokeRoot(const Context *dc, Script *script) {
-    DrvScript *drv = (DrvScript *)script->mHal.drv;
-
-    Script * oldTLS = setTLS(script);
-    int ret = drv->mRoot();
-    setTLS(oldTLS);
-
-    return ret;
+int rsdScriptInvokeRoot(const Context *dc, Script *s) {
+    RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
+    return cs->invokeRoot();
 }
 
-void rsdScriptInvokeInit(const Context *dc, Script *script) {
-    DrvScript *drv = (DrvScript *)script->mHal.drv;
-
-    if (drv->mInit) {
-        drv->mInit();
-    }
+void rsdScriptInvokeInit(const Context *dc, Script *s) {
+    RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
+    cs->invokeInit();
 }
 
-void rsdScriptInvokeFreeChildren(const Context *dc, Script *script) {
-    DrvScript *drv = (DrvScript *)script->mHal.drv;
-
-    if (drv->mFreeChildren) {
-        drv->mFreeChildren();
-    }
+void rsdScriptInvokeFreeChildren(const Context *dc, Script *s) {
+    RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
+    cs->invokeFreeChildren();
 }
 
-void rsdScriptInvokeFunction(const Context *dc, Script *script,
+void rsdScriptInvokeFunction(const Context *dc, Script *s,
                             uint32_t slot,
                             const void *params,
                             size_t paramLength) {
-    DrvScript *drv = (DrvScript *)script->mHal.drv;
-    //ALOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength);
-
-    Script * oldTLS = setTLS(script);
-    reinterpret_cast<void (*)(const void *, uint32_t)>(
-        drv->mExecutable->getExportFuncAddrs()[slot])(params, paramLength);
-    setTLS(oldTLS);
+    RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
+    cs->invokeFunction(slot, params, paramLength);
 }
 
-void rsdScriptSetGlobalVar(const Context *dc, const Script *script,
+void rsdScriptSetGlobalVar(const Context *dc, const Script *s,
                            uint32_t slot, void *data, size_t dataLength) {
-    DrvScript *drv = (DrvScript *)script->mHal.drv;
-    //rsAssert(!script->mFieldIsObject[slot]);
-    //ALOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength);
-
-    if (drv->mIntrinsicID) {
-        drv->mIntrinsicFuncs.setVar(dc, script, drv->mIntrinsicData, slot, data, dataLength);
-        return;
-    }
-
-    int32_t *destPtr = reinterpret_cast<int32_t *>(
-                          drv->mExecutable->getExportVarAddrs()[slot]);
-    if (!destPtr) {
-        //ALOGV("Calling setVar on slot = %i which is null", slot);
-        return;
-    }
-
-    memcpy(destPtr, data, dataLength);
+    RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
+    cs->setGlobalVar(slot, data, dataLength);
 }
 
-void rsdScriptSetGlobalVarWithElemDims(
-        const android::renderscript::Context *dc,
-        const android::renderscript::Script *script,
-        uint32_t slot, void *data, size_t dataLength,
-        const android::renderscript::Element *elem,
-        const size_t *dims, size_t dimLength) {
-    DrvScript *drv = (DrvScript *)script->mHal.drv;
-
-    int32_t *destPtr = reinterpret_cast<int32_t *>(
-        drv->mExecutable->getExportVarAddrs()[slot]);
-    if (!destPtr) {
-        //ALOGV("Calling setVar on slot = %i which is null", slot);
-        return;
-    }
-
-    // We want to look at dimension in terms of integer components,
-    // but dimLength is given in terms of bytes.
-    dimLength /= sizeof(int);
-
-    // Only a single dimension is currently supported.
-    rsAssert(dimLength == 1);
-    if (dimLength == 1) {
-        // First do the increment loop.
-        size_t stride = elem->getSizeBytes();
-        char *cVal = reinterpret_cast<char *>(data);
-        for (size_t i = 0; i < dims[0]; i++) {
-            elem->incRefs(cVal);
-            cVal += stride;
-        }
-
-        // Decrement loop comes after (to prevent race conditions).
-        char *oldVal = reinterpret_cast<char *>(destPtr);
-        for (size_t i = 0; i < dims[0]; i++) {
-            elem->decRefs(oldVal);
-            oldVal += stride;
-        }
-    }
-
-    memcpy(destPtr, data, dataLength);
+void rsdScriptSetGlobalVarWithElemDims(const Context *dc, const Script *s,
+                                       uint32_t slot, void *data, size_t dataLength,
+                                       const android::renderscript::Element *elem,
+                                       const size_t *dims, size_t dimLength) {
+    RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
+    cs->setGlobalVarWithElemDims(slot, data, dataLength, elem, dims, dimLength);
 }
 
-void rsdScriptSetGlobalBind(const Context *dc, const Script *script, uint32_t slot, Allocation *data) {
-    DrvScript *drv = (DrvScript *)script->mHal.drv;
-
-    //rsAssert(!script->mFieldIsObject[slot]);
-    //ALOGE("setGlobalBind %p %p %i %p", dc, script, slot, data);
-
-    rsAssert(!drv->mIntrinsicID);
-
-    int32_t *destPtr = reinterpret_cast<int32_t *>(
-                          drv->mExecutable->getExportVarAddrs()[slot]);
-    if (!destPtr) {
-        //ALOGV("Calling setVar on slot = %i which is null", slot);
-        return;
-    }
-
-    void *ptr = NULL;
-    drv->mBoundAllocs[slot] = data;
-    if(data) {
-        DrvAllocation *allocDrv = (DrvAllocation *)data->mHal.drv;
-        ptr = allocDrv->lod[0].mallocPtr;
-    }
-    memcpy(destPtr, &ptr, sizeof(void *));
+void rsdScriptSetGlobalBind(const Context *dc, const Script *s, uint32_t slot, Allocation *data) {
+    RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
+    cs->setGlobalBind(slot, data);
 }
 
-void rsdScriptSetGlobalObj(const Context *dc, const Script *script, uint32_t slot, ObjectBase *data) {
-    DrvScript *drv = (DrvScript *)script->mHal.drv;
-    //rsAssert(script->mFieldIsObject[slot]);
-    //ALOGE("setGlobalObj %p %p %i %p", dc, script, slot, data);
-
-    if (drv->mIntrinsicID) {
-        drv->mIntrinsicFuncs.setVarObj(dc, script, drv->mIntrinsicData, slot,
-                                       static_cast<Allocation *>(data));
-        return;
-    }
-
-    int32_t *destPtr = reinterpret_cast<int32_t *>(
-                          drv->mExecutable->getExportVarAddrs()[slot]);
-    if (!destPtr) {
-        //ALOGV("Calling setVar on slot = %i which is null", slot);
-        return;
-    }
-
-    rsrSetObject(dc, script, (ObjectBase **)destPtr, data);
+void rsdScriptSetGlobalObj(const Context *dc, const Script *s, uint32_t slot, ObjectBase *data) {
+    RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
+    cs->setGlobalObj(slot, data);
 }
 
-void rsdScriptDestroy(const Context *dc, Script *script) {
-    DrvScript *drv = (DrvScript *)script->mHal.drv;
-
-    if (drv == NULL) {
-        return;
-    }
-
-    if (drv->mExecutable) {
-        Vector<void *>::const_iterator var_addr_iter =
-            drv->mExecutable->getExportVarAddrs().begin();
-        Vector<void *>::const_iterator var_addr_end =
-            drv->mExecutable->getExportVarAddrs().end();
-
-        bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_iter =
-            drv->mExecutable->getInfo().getObjectSlots().begin();
-        bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_end =
-            drv->mExecutable->getInfo().getObjectSlots().end();
-
-        while ((var_addr_iter != var_addr_end) &&
-               (is_object_iter != is_object_end)) {
-            // The field address can be NULL if the script-side has optimized
-            // the corresponding global variable away.
-            ObjectBase **obj_addr =
-                reinterpret_cast<ObjectBase **>(*var_addr_iter);
-            if (*is_object_iter) {
-                if (*var_addr_iter != NULL) {
-                    rsrClearObject(dc, script, obj_addr);
-                }
-            }
-            var_addr_iter++;
-            is_object_iter++;
-        }
-    }
-
-    delete drv->mCompilerContext;
-    delete drv->mCompilerDriver;
-    delete drv->mExecutable;
-    delete[] drv->mBoundAllocs;
-    free(drv);
-    script->mHal.drv = NULL;
+void rsdScriptDestroy(const Context *dc, Script *s) {
+    RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
+    delete cs;
+    s->mHal.drv = NULL;
 }
 
+
 Allocation * rsdScriptGetAllocationForPointer(const android::renderscript::Context *dc,
                                               const android::renderscript::Script *sc,
                                               const void *ptr) {
-    DrvScript *drv = (DrvScript *)sc->mHal.drv;
-    if (!ptr) {
-        return NULL;
-    }
-
-    for (uint32_t ct=0; ct < sc->mHal.info.exportedVariableCount; ct++) {
-        Allocation *a = drv->mBoundAllocs[ct];
-        if (!a) continue;
-        DrvAllocation *adrv = (DrvAllocation *)a->mHal.drv;
-        if (adrv->lod[0].mallocPtr == ptr) {
-            return a;
-        }
-    }
-    ALOGE("rsGetAllocation, failed to find %p", ptr);
-    return NULL;
+    RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)sc->mHal.drv;
+    return cs->getAllocationForPointer(ptr);
 }