diff --git a/cpp/rsDispatch.cpp b/cpp/rsDispatch.cpp
index 6a1bbdd..db526fa 100644
--- a/cpp/rsDispatch.cpp
+++ b/cpp/rsDispatch.cpp
@@ -320,6 +320,11 @@
         LOG_API("Couldn't initialize dispatchTab.ScriptKernelIDCreate");
         return false;
     }
+    dispatchTab.ScriptInvokeIDCreate = (ScriptInvokeIDCreateFnPtr)dlsym(handle, "rsScriptInvokeIDCreate");
+    if (dispatchTab.ScriptInvokeIDCreate == NULL) {
+        LOG_API("Couldn't initialize dispatchTab.ScriptInvokeIDCreate");
+        return false;
+    }
     dispatchTab.ScriptFieldIDCreate = (ScriptFieldIDCreateFnPtr)dlsym(handle, "rsScriptFieldIDCreate");
     if (dispatchTab.ScriptFieldIDCreate == NULL) {
         LOG_API("Couldn't initialize dispatchTab.ScriptFieldIDCreate");
diff --git a/cpp/rsDispatch.h b/cpp/rsDispatch.h
index 0d6d29f..066018d 100644
--- a/cpp/rsDispatch.h
+++ b/cpp/rsDispatch.h
@@ -82,6 +82,7 @@
 typedef RsScript (*ScriptCCreateFnPtr) (RsContext, const char*, size_t, const char*, size_t, const char*, size_t);
 typedef RsScript (*ScriptIntrinsicCreateFnPtr) (RsContext, uint32_t id, RsElement);
 typedef RsScriptKernelID (*ScriptKernelIDCreateFnPtr) (RsContext, RsScript, int, int);
+typedef RsScriptInvokeID (*ScriptInvokeIDCreateFnPtr) (RsContext, RsScript, int);
 typedef RsScriptFieldID (*ScriptFieldIDCreateFnPtr) (RsContext, RsScript, int);
 typedef RsScriptGroup (*ScriptGroupCreateFnPtr) (RsContext, RsScriptKernelID*, size_t, RsScriptKernelID*, size_t, RsScriptKernelID*, size_t, RsScriptFieldID*, size_t, const RsType*, size_t);
 typedef RsScriptGroup2 (*ScriptGroup2CreateFnPtr)(RsContext, RsClosure*, size_t);
@@ -160,6 +161,7 @@
     ScriptCCreateFnPtr ScriptCCreate;
     ScriptIntrinsicCreateFnPtr ScriptIntrinsicCreate;
     ScriptKernelIDCreateFnPtr ScriptKernelIDCreate;
+    ScriptInvokeIDCreateFnPtr ScriptInvokeIDCreate;
     ScriptFieldIDCreateFnPtr ScriptFieldIDCreate;
     ScriptGroupCreateFnPtr ScriptGroupCreate;
     ScriptGroup2CreateFnPtr ScriptGroup2Create;
diff --git a/cpu_ref/rsCpuScript.cpp b/cpu_ref/rsCpuScript.cpp
index 3cec0b9..a618a17 100644
--- a/cpu_ref/rsCpuScript.cpp
+++ b/cpu_ref/rsCpuScript.cpp
@@ -898,6 +898,7 @@
     mCtx->unlockMutex();
     if (mScriptSO) {
         dlclose(mScriptSO);
+        mScriptSO = nullptr;
     }
     return false;
 }
diff --git a/cpu_ref/rsCpuScriptGroup2.cpp b/cpu_ref/rsCpuScriptGroup2.cpp
index 90907d0..18c9f73 100644
--- a/cpu_ref/rsCpuScriptGroup2.cpp
+++ b/cpu_ref/rsCpuScriptGroup2.cpp
@@ -31,138 +31,150 @@
 
 void groupRoot(const RsExpandKernelParams *kparams, uint32_t xstart,
                uint32_t xend, uint32_t outstep) {
-  const list<CPUClosure*>& closures = *(list<CPUClosure*>*)kparams->usr;
-  RsExpandKernelParams *mutable_kparams = (RsExpandKernelParams *)kparams;
-  const void **oldIns  = kparams->ins;
-  uint32_t *oldStrides = kparams->inEStrides;
+    const list<CPUClosure*>& closures = *(list<CPUClosure*>*)kparams->usr;
+    RsExpandKernelParams *mutable_kparams = (RsExpandKernelParams *)kparams;
+    const void **oldIns  = kparams->ins;
+    uint32_t *oldStrides = kparams->inEStrides;
 
-  std::vector<const void*> ins(DefaultKernelArgCount);
-  std::vector<uint32_t> strides(DefaultKernelArgCount);
+    std::vector<const void*> ins(DefaultKernelArgCount);
+    std::vector<uint32_t> strides(DefaultKernelArgCount);
 
-  for (CPUClosure* cpuClosure : closures) {
-    const Closure* closure = cpuClosure->mClosure;
+    for (CPUClosure* cpuClosure : closures) {
+        const Closure* closure = cpuClosure->mClosure;
 
-    auto in_iter = ins.begin();
-    auto stride_iter = strides.begin();
+        auto in_iter = ins.begin();
+        auto stride_iter = strides.begin();
 
-    for (const auto& arg : closure->mArgs) {
-      const Allocation* a = (const Allocation*)arg;
-      const uint32_t eStride = a->mHal.state.elementSizeBytes;
-      const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) +
-          eStride * xstart;
-      if (kparams->dimY > 1) {
-        ptr += a->mHal.drvState.lod[0].stride * kparams->y;
-      }
-      *in_iter++ = ptr;
-      *stride_iter++ = eStride;
+        for (const auto& arg : closure->mArgs) {
+            const Allocation* a = (const Allocation*)arg;
+            const uint32_t eStride = a->mHal.state.elementSizeBytes;
+            const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) +
+                    eStride * xstart;
+            if (kparams->dimY > 1) {
+                ptr += a->mHal.drvState.lod[0].stride * kparams->y;
+            }
+            *in_iter++ = ptr;
+            *stride_iter++ = eStride;
+        }
+
+        mutable_kparams->ins = &ins[0];
+        mutable_kparams->inEStrides = &strides[0];
+
+        const Allocation* out = closure->mReturnValue;
+        const uint32_t ostep = out->mHal.state.elementSizeBytes;
+        const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) +
+                ostep * xstart;
+        if (kparams->dimY > 1) {
+            ptr += out->mHal.drvState.lod[0].stride * kparams->y;
+        }
+
+        mutable_kparams->out = (void*)ptr;
+
+        mutable_kparams->usr = cpuClosure->mUsrPtr;
+
+        cpuClosure->mFunc(kparams, xstart, xend, ostep);
     }
 
-    mutable_kparams->ins = &ins[0];
-    mutable_kparams->inEStrides = &strides[0];
-
-    const Allocation* out = closure->mReturnValue;
-    const uint32_t ostep = out->mHal.state.elementSizeBytes;
-    const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) +
-           ostep * xstart;
-    if (kparams->dimY > 1) {
-      ptr += out->mHal.drvState.lod[0].stride * kparams->y;
-    }
-
-    mutable_kparams->out = (void*)ptr;
-
-    mutable_kparams->usr = cpuClosure->mUsrPtr;
-
-    cpuClosure->mFunc(kparams, xstart, xend, ostep);
-  }
-
-  mutable_kparams->ins        = oldIns;
-  mutable_kparams->inEStrides = oldStrides;
-  mutable_kparams->usr        = &closures;
+    mutable_kparams->ins        = oldIns;
+    mutable_kparams->inEStrides = oldStrides;
+    mutable_kparams->usr        = &closures;
 }
 
 }  // namespace
 
 Batch::~Batch() {
-  for (CPUClosure* c : mClosures) {
-    delete c;
-  }
-  if (mScriptObj) {
-    dlclose(mScriptObj);
-  }
+    for (CPUClosure* c : mClosures) {
+        delete c;
+    }
+    if (mScriptObj) {
+        dlclose(mScriptObj);
+    }
 }
 
 bool Batch::conflict(CPUClosure* closure) const {
-  if (mClosures.empty()) {
-    return false;
-  }
+    if (mClosures.empty()) {
+        return false;
+    }
 
-  if (closure->mClosure->mKernelID.get() == nullptr ||
-      mClosures.front()->mClosure->mKernelID.get() == nullptr) {
-    // An invoke should be in a batch by itself, so it conflicts with any other
-    // closure.
-    return true;
-  }
-
-  for (const auto &p : closure->mClosure->mGlobalDeps) {
-    const Closure* dep = p.first;
-    for (CPUClosure* c : mClosures) {
-      if (c->mClosure == dep) {
-        ALOGV("ScriptGroup2: closure %p conflicting with closure %p via its global", closure, dep);
+    if (closure->mClosure->mKernelID.get() == nullptr ||
+        mClosures.front()->mClosure->mKernelID.get() == nullptr) {
+        // An invoke should be in a batch by itself, so it conflicts with any other
+        // closure.
         return true;
-      }
     }
-  }
-  for (const auto &p : closure->mClosure->mArgDeps) {
-    const Closure* dep = p.first;
-    for (CPUClosure* c : mClosures) {
-      if (c->mClosure == dep) {
-        for (const auto &p1 : *p.second) {
-          if (p1.second->get() != nullptr) {
-            ALOGV("ScriptGroup2: closure %p conflicting with closure %p via its arg", closure, dep);
-            return true;
-          }
+
+    for (const auto &p : closure->mClosure->mGlobalDeps) {
+        const Closure* dep = p.first;
+        for (CPUClosure* c : mClosures) {
+            if (c->mClosure == dep) {
+                ALOGV("ScriptGroup2: closure %p conflicting with closure %p via its global",
+                      closure, dep);
+                return true;
+            }
         }
-      }
     }
-  }
-  return false;
+    for (const auto &p : closure->mClosure->mArgDeps) {
+        const Closure* dep = p.first;
+        for (CPUClosure* c : mClosures) {
+            if (c->mClosure == dep) {
+                for (const auto &p1 : *p.second) {
+                    if (p1.second->get() != nullptr) {
+                        ALOGV("ScriptGroup2: closure %p conflicting with closure %p via its arg",
+                              closure, dep);
+                        return true;
+                    }
+                }
+            }
+        }
+    }
+    return false;
 }
 
 CpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl,
                                          const ScriptGroupBase *sg) :
     mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)) {
-  Batch* batch = new Batch(this);
-  for (Closure* closure: mGroup->mClosures) {
-    const ScriptKernelID* kernelID = closure->mKernelID.get();
-    RsdCpuScriptImpl* si =
-        (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(kernelID->mScript);
+    rsAssert(!mGroup->mClosures.empty());
 
-    MTLaunchStruct mtls;
-    si->forEachKernelSetup(kernelID->mSlot, &mtls);
-    // TODO: Is mtls.fep.usrLen ever used?
-    CPUClosure* cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel,
-                                    mtls.fep.usr, mtls.fep.usrLen);
-    if (batch->conflict(cc)) {
-      mBatches.push_back(batch);
-      batch = new Batch(this);
+    Batch* batch = new Batch(this);
+    for (Closure* closure: mGroup->mClosures) {
+        const ScriptKernelID* kernelID = closure->mKernelID.get();
+        RsdCpuScriptImpl* si;
+        CPUClosure* cc;
+        if (kernelID != nullptr) {
+            si = (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(kernelID->mScript);
+            MTLaunchStruct mtls;
+            si->forEachKernelSetup(kernelID->mSlot, &mtls);
+            // TODO: Is mtls.fep.usrLen ever used?
+            cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel,
+                                mtls.fep.usr, mtls.fep.usrLen);
+        } else {
+            si = (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(
+                    closure->mInvokeID->mScript);
+            cc = new CPUClosure(closure, si);
+        }
+
+        if (batch->conflict(cc)) {
+            mBatches.push_back(batch);
+            batch = new Batch(this);
+        }
+
+        batch->mClosures.push_back(cc);
     }
 
-    batch->mClosures.push_back(cc);
-  }
-
-  mBatches.push_back(batch);
+    rsAssert(!batch->mClosures.empty());
+    mBatches.push_back(batch);
 
 #ifndef RS_COMPATIBILITY_LIB
-  for (Batch* batch : mBatches) {
-    batch->tryToCreateFusedKernel(mGroup->mCacheDir.c_str());
-  }
+    for (Batch* batch : mBatches) {
+        batch->tryToCreateFusedKernel(mGroup->mCacheDir.c_str());
+    }
 #endif
 }
 
 CpuScriptGroup2Impl::~CpuScriptGroup2Impl() {
-  for (Batch* batch : mBatches) {
-    delete batch;
-  }
+    for (Batch* batch : mBatches) {
+        delete batch;
+    }
 }
 
 namespace {
@@ -170,78 +182,78 @@
 #ifndef RS_COMPATIBILITY_LIB
 
 string getFileName(string path) {
-  unsigned found = path.find_last_of("/\\");
-  return path.substr(found + 1);
+    unsigned found = path.find_last_of("/\\");
+    return path.substr(found + 1);
 }
 
 void setupCompileArguments(
-    const vector<string>& inputs, const vector<int>& kernels,
-    const string& output_dir, const string& output_filename,
-    const string& rsLib, vector<const char*>* args) {
-  args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH);
-  args->push_back("-fPIC");
-  args->push_back("-embedRSInfo");
-  args->push_back("-mtriple");
-  args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
-  args->push_back("-bclib");
-  args->push_back(rsLib.c_str());
-  for (const string& input : inputs) {
-    args->push_back(input.c_str());
-  }
-  for (int kernel : kernels) {
-    args->push_back("-k");
-    string strKernel = std::to_string(kernel);
-    args->push_back(strKernel.c_str());
-  }
-  args->push_back("-output_path");
-  args->push_back(output_dir.c_str());
-  args->push_back("-o");
-  args->push_back(output_filename.c_str());
-  args->push_back(nullptr);
+        const vector<string>& inputs, const vector<int>& kernels,
+        const string& output_dir, const string& output_filename,
+        const string& rsLib, vector<const char*>* args) {
+    args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH);
+    args->push_back("-fPIC");
+    args->push_back("-embedRSInfo");
+    args->push_back("-mtriple");
+    args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
+    args->push_back("-bclib");
+    args->push_back(rsLib.c_str());
+    for (const string& input : inputs) {
+        args->push_back(input.c_str());
+    }
+    for (int kernel : kernels) {
+        args->push_back("-k");
+        string strKernel = std::to_string(kernel);
+        args->push_back(strKernel.c_str());
+    }
+    args->push_back("-output_path");
+    args->push_back(output_dir.c_str());
+    args->push_back("-o");
+    args->push_back(output_filename.c_str());
+    args->push_back(nullptr);
 }
 
 string convertListToString(int n, const char* const* strs) {
-  string ret;
-  ret.append(strs[0]);
-  for (int i = 1; i < n; i++) {
-    ret.append(" ");
-    ret.append(strs[i]);
-  }
-  return ret;
+    string ret;
+    ret.append(strs[0]);
+    for (int i = 1; i < n; i++) {
+        ret.append(" ");
+        ret.append(strs[i]);
+    }
+    return ret;
 }
 
 bool fuseAndCompile(const char** arguments,
                     const string& commandLine) {
-  const pid_t pid = fork();
+    const pid_t pid = fork();
 
-  if (pid == -1) {
-    ALOGE("Couldn't fork for bcc execution");
-    return false;
-  }
+    if (pid == -1) {
+        ALOGE("Couldn't fork for bcc execution");
+        return false;
+    }
 
-  if (pid == 0) {
-    // Child process
-    ALOGV("Invoking BCC with: %s", commandLine.c_str());
-    execv(RsdCpuScriptImpl::BCC_EXE_PATH, (char* const*)arguments);
+    if (pid == 0) {
+        // Child process
+        ALOGV("Invoking BCC with: %s", commandLine.c_str());
+        execv(RsdCpuScriptImpl::BCC_EXE_PATH, (char* const*)arguments);
 
-    ALOGE("execv() failed: %s", strerror(errno));
-    abort();
-    return false;
-  }
+        ALOGE("execv() failed: %s", strerror(errno));
+        abort();
+        return false;
+    }
 
-  // Parent process
-  int status = 0;
-  const pid_t w = waitpid(pid, &status, 0);
-  if (w == -1) {
-    return false;
-  }
+    // Parent process
+    int status = 0;
+    const pid_t w = waitpid(pid, &status, 0);
+    if (w == -1) {
+        return false;
+    }
 
-  if (!WIFEXITED(status) || WEXITSTATUS(status) != 0 ) {
-    ALOGE("bcc terminated unexpectedly");
-    return false;
-  }
+    if (!WIFEXITED(status) || WEXITSTATUS(status) != 0 ) {
+        ALOGE("bcc terminated unexpectedly");
+        return false;
+    }
 
-  return true;
+    return true;
 }
 #endif
 
@@ -249,160 +261,181 @@
 
 void Batch::tryToCreateFusedKernel(const char *cacheDir) {
 #ifndef RS_COMPATIBILITY_LIB
-  if (mClosures.size() < 2) {
-    ALOGV("Compiler kernel fusion skipped due to only one or zero kernel in"
-          " a script group batch.");
-    return;
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Fuse the input kernels and generate native code in an object file
-  //===--------------------------------------------------------------------===//
-
-  std::vector<string> inputFiles;
-  std::vector<int> slots;
-
-  for (CPUClosure* cpuClosure : mClosures) {
-    const Closure* closure = cpuClosure->mClosure;
-    const ScriptKernelID* kernelID = closure->mKernelID.get();
-    const Script* script = kernelID->mScript;
-
-    if (script->isIntrinsic()) {
-      return;
+    if (mClosures.size() < 2) {
+        return;
     }
 
-    const RsdCpuScriptImpl *cpuScript =
-        (const RsdCpuScriptImpl*)script->mHal.drv;
+    //===--------------------------------------------------------------------===//
+    // Fuse the input kernels and generate native code in an object file
+    //===--------------------------------------------------------------------===//
 
-    const string& bitcodeFilename = cpuScript->getBitcodeFilePath();
+    std::vector<string> inputFiles;
+    std::vector<int> slots;
 
-    inputFiles.push_back(bitcodeFilename);
-    slots.push_back(kernelID->mSlot);
-  }
+    for (CPUClosure* cpuClosure : mClosures) {
+        const Closure* closure = cpuClosure->mClosure;
+        const ScriptKernelID* kernelID = closure->mKernelID.get();
+        const Script* script = kernelID->mScript;
 
-  string outputPath(tempnam(cacheDir, "fused"));
-  string outputFileName = getFileName(outputPath);
-  string objFilePath(outputPath);
-  objFilePath.append(".o");
-  string rsLibPath(SYSLIBPATH"/libclcore.bc");
-  vector<const char*> arguments;
-  setupCompileArguments(inputFiles, slots, cacheDir, outputFileName, rsLibPath,
-                        &arguments);
-  string commandLine =
-      convertListToString(arguments.size() - 1, arguments.data());
+        if (script->isIntrinsic()) {
+            return;
+        }
 
-  if (!fuseAndCompile(arguments.data(), commandLine)) {
-    return;
-  }
+        const RsdCpuScriptImpl *cpuScript =
+                (const RsdCpuScriptImpl*)script->mHal.drv;
 
-  //===--------------------------------------------------------------------===//
-  // Create and load the shared lib
-  //===--------------------------------------------------------------------===//
+        const string& bitcodeFilename = cpuScript->getBitcodeFilePath();
 
-  const char* resName = outputFileName.c_str();
+        inputFiles.push_back(bitcodeFilename);
+        slots.push_back(kernelID->mSlot);
+    }
 
-  if (!SharedLibraryUtils::createSharedLibrary(cacheDir, resName)) {
-    ALOGE("Failed to link object file '%s'", resName);
-    return;
-  }
+    string outputPath(tempnam(cacheDir, "fused"));
+    string outputFileName = getFileName(outputPath);
+    string objFilePath(outputPath);
+    objFilePath.append(".o");
+    string rsLibPath(SYSLIBPATH"/libclcore.bc");
+    vector<const char*> arguments;
+    setupCompileArguments(inputFiles, slots, cacheDir, outputFileName, rsLibPath,
+                          &arguments);
+    string commandLine =
+            convertListToString(arguments.size() - 1, arguments.data());
 
-  void* mSharedObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
-  if (mSharedObj == nullptr) {
-    ALOGE("Unable to load '%s'", resName);
-    return;
-  }
+    if (!fuseAndCompile(arguments.data(), commandLine)) {
+        return;
+    }
 
-  mExecutable = ScriptExecutable::createFromSharedObject(
-      nullptr,  // RS context. Unused.
-      mSharedObj);
+    //===--------------------------------------------------------------------===//
+    // Create and load the shared lib
+    //===--------------------------------------------------------------------===//
+
+    const char* resName = outputFileName.c_str();
+
+    if (!SharedLibraryUtils::createSharedLibrary(cacheDir, resName)) {
+        ALOGE("Failed to link object file '%s'", resName);
+        return;
+    }
+
+    void* mSharedObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
+    if (mSharedObj == nullptr) {
+        ALOGE("Unable to load '%s'", resName);
+        return;
+    }
+
+    mExecutable = ScriptExecutable::createFromSharedObject(
+                                                           nullptr,  // RS context. Unused.
+                                                           mSharedObj);
 
 #endif  // RS_COMPATIBILITY_LIB
 }
 
 void CpuScriptGroup2Impl::execute() {
-  for (auto batch : mBatches) {
-    batch->setGlobalsForBatch();
-    batch->run();
-  }
+    for (auto batch : mBatches) {
+        batch->setGlobalsForBatch();
+        batch->run();
+    }
 }
 
 void Batch::setGlobalsForBatch() {
-  for (CPUClosure* cpuClosure : mClosures) {
-    const Closure* closure = cpuClosure->mClosure;
-    const ScriptKernelID* kernelID = closure->mKernelID.get();
-    Script* s = kernelID->mScript;
-    for (const auto& p : closure->mGlobals) {
-      const void* value = p.second.first;
-      int size = p.second.second;
-      // We use -1 size to indicate an ObjectBase rather than a primitive type
-      if (size < 0) {
-        s->setVarObj(p.first->mSlot, (ObjectBase*)value);
-      } else {
-        s->setVar(p.first->mSlot, (const void*)&value, size);
-      }
+    for (CPUClosure* cpuClosure : mClosures) {
+        const Closure* closure = cpuClosure->mClosure;
+        const ScriptKernelID* kernelID = closure->mKernelID.get();
+        Script* s;
+        if (kernelID != nullptr) {
+            s = kernelID->mScript;
+        } else {
+            s = cpuClosure->mClosure->mInvokeID->mScript;
+        }
+        for (const auto& p : closure->mGlobals) {
+            const void* value = p.second.first;
+            int size = p.second.second;
+            if (value == nullptr && size == 0) {
+                // This indicates the current closure depends on another closure for a
+                // global in their shared module (script). In this case we don't need to
+                // copy the value. For example, an invoke intializes a global variable
+                // which a kernel later reads.
+                continue;
+            }
+            // We use -1 size to indicate an ObjectBase rather than a primitive type
+            if (size < 0) {
+                s->setVarObj(p.first->mSlot, (ObjectBase*)value);
+            } else {
+                s->setVar(p.first->mSlot, (const void*)&value, size);
+            }
+        }
     }
-  }
 }
 
 void Batch::run() {
-  if (mExecutable != nullptr) {
+    if (mExecutable != nullptr) {
+        MTLaunchStruct mtls;
+        const CPUClosure* firstCpuClosure = mClosures.front();
+        const CPUClosure* lastCpuClosure = mClosures.back();
+
+        firstCpuClosure->mSi->forEachMtlsSetup(
+                (const Allocation**)&firstCpuClosure->mClosure->mArgs[0],
+                firstCpuClosure->mClosure->mArgs.size(),
+                lastCpuClosure->mClosure->mReturnValue,
+                nullptr, 0, nullptr, &mtls);
+
+        mtls.script = nullptr;
+        mtls.fep.usr = nullptr;
+        mtls.kernel = mExecutable->getForEachFunction(0);
+
+        mGroup->getCpuRefImpl()->launchThreads(
+                (const Allocation**)&firstCpuClosure->mClosure->mArgs[0],
+                firstCpuClosure->mClosure->mArgs.size(),
+                lastCpuClosure->mClosure->mReturnValue,
+                nullptr, &mtls);
+
+        return;
+    }
+
+    if (mClosures.size() == 1 &&
+        mClosures.front()->mClosure->mKernelID.get() == nullptr) {
+        // This closure is for an invoke function
+        CPUClosure* cc = mClosures.front();
+        const Closure* c = cc->mClosure;
+        const ScriptInvokeID* invokeID = c->mInvokeID;
+        rsAssert(invokeID != nullptr);
+        cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength);
+        return;
+    }
+
+    for (CPUClosure* cpuClosure : mClosures) {
+        const Closure* closure = cpuClosure->mClosure;
+        const ScriptKernelID* kernelID = closure->mKernelID.get();
+        cpuClosure->mSi->preLaunch(kernelID->mSlot,
+                                   (const Allocation**)&closure->mArgs[0],
+                                   closure->mArgs.size(), closure->mReturnValue,
+                                   cpuClosure->mUsrPtr, cpuClosure->mUsrSize,
+                                   nullptr);
+    }
+
+    const CPUClosure* cpuClosure = mClosures.front();
+    const Closure* closure = cpuClosure->mClosure;
     MTLaunchStruct mtls;
-    const CPUClosure* firstCpuClosure = mClosures.front();
-    const CPUClosure* lastCpuClosure = mClosures.back();
 
-    firstCpuClosure->mSi->forEachMtlsSetup(
-        (const Allocation**)&firstCpuClosure->mClosure->mArgs[0],
-        firstCpuClosure->mClosure->mArgs.size(),
-        lastCpuClosure->mClosure->mReturnValue,
-        nullptr, 0, nullptr, &mtls);
+    if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)&closure->mArgs[0],
+                                          closure->mArgs.size(),
+                                          closure->mReturnValue,
+                                          nullptr, 0, nullptr, &mtls)) {
 
-    mtls.script = nullptr;
-    mtls.fep.usr = nullptr;
-    mtls.kernel = mExecutable->getForEachFunction(0);
+        mtls.script = nullptr;
+        mtls.kernel = (void (*)())&groupRoot;
+        mtls.fep.usr = &mClosures;
 
-    mGroup->getCpuRefImpl()->launchThreads(
-        (const Allocation**)&firstCpuClosure->mClosure->mArgs[0],
-        firstCpuClosure->mClosure->mArgs.size(),
-        lastCpuClosure->mClosure->mReturnValue,
-        nullptr, &mtls);
+        mGroup->getCpuRefImpl()->launchThreads(nullptr, 0, nullptr, nullptr, &mtls);
+    }
 
-    return;
-  }
-
-  for (CPUClosure* cpuClosure : mClosures) {
-    const Closure* closure = cpuClosure->mClosure;
-    const ScriptKernelID* kernelID = closure->mKernelID.get();
-    cpuClosure->mSi->preLaunch(kernelID->mSlot,
-                               (const Allocation**)&closure->mArgs[0],
-                               closure->mArgs.size(), closure->mReturnValue,
-                               cpuClosure->mUsrPtr, cpuClosure->mUsrSize,
-                               nullptr);
-  }
-
-  const CPUClosure* cpuClosure = mClosures.front();
-  const Closure* closure = cpuClosure->mClosure;
-  MTLaunchStruct mtls;
-
-  if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)&closure->mArgs[0],
-                                        closure->mArgs.size(),
-                                        closure->mReturnValue,
-                                        nullptr, 0, nullptr, &mtls)) {
-
-      mtls.script = nullptr;
-      mtls.kernel = (void (*)())&groupRoot;
-      mtls.fep.usr = &mClosures;
-
-      mGroup->getCpuRefImpl()->launchThreads(nullptr, 0, nullptr, nullptr, &mtls);
-  }
-
-  for (CPUClosure* cpuClosure : mClosures) {
-    const Closure* closure = cpuClosure->mClosure;
-    const ScriptKernelID* kernelID = closure->mKernelID.get();
-    cpuClosure->mSi->postLaunch(kernelID->mSlot,
-                                (const Allocation**)&closure->mArgs[0],
-                                closure->mArgs.size(), closure->mReturnValue,
-                                nullptr, 0, nullptr);
-  }
+    for (CPUClosure* cpuClosure : mClosures) {
+        const Closure* closure = cpuClosure->mClosure;
+        const ScriptKernelID* kernelID = closure->mKernelID.get();
+        cpuClosure->mSi->postLaunch(kernelID->mSlot,
+                                    (const Allocation**)&closure->mArgs[0],
+                                    closure->mArgs.size(), closure->mReturnValue,
+                                    nullptr, 0, nullptr);
+    }
 }
 
 }  // namespace renderscript
diff --git a/cpu_ref/rsCpuScriptGroup2.h b/cpu_ref/rsCpuScriptGroup2.h
index 53a0fe5..6b816dc 100644
--- a/cpu_ref/rsCpuScriptGroup2.h
+++ b/cpu_ref/rsCpuScriptGroup2.h
@@ -20,59 +20,64 @@
 
 typedef void (*ExpandFuncTy)(const RsExpandKernelParams*, uint32_t, uint32_t,
                              uint32_t);
+typedef void (*InvokeFuncTy)(const void*, uint32_t);
 
 class CPUClosure {
- public:
-  CPUClosure(const Closure* closure, RsdCpuScriptImpl* si, ExpandFuncTy func,
-             const void* usrPtr, const size_t usrSize) :
-      mClosure(closure), mSi(si), mFunc(func), mUsrPtr(usrPtr),
-      mUsrSize(usrSize) {}
+public:
+    CPUClosure(const Closure* closure, RsdCpuScriptImpl* si, ExpandFuncTy func,
+               const void* usrPtr, const size_t usrSize) :
+        mClosure(closure), mSi(si), mFunc(func),
+        mUsrPtr(usrPtr), mUsrSize(usrSize) {}
 
-  // It's important to do forwarding here than inheritance for unbound value
-  // binding to work.
-  const Closure* mClosure;
-  RsdCpuScriptImpl* mSi;
-  const ExpandFuncTy mFunc;
-  const void* mUsrPtr;
-  const size_t mUsrSize;
+    CPUClosure(const Closure* closure, RsdCpuScriptImpl* si) :
+        mClosure(closure), mSi(si), mFunc(nullptr),
+        mUsrPtr(nullptr), mUsrSize(0) {}
+
+    // It's important to do forwarding here than inheritance for unbound value
+    // binding to work.
+    const Closure* mClosure;
+    RsdCpuScriptImpl* mSi;
+    const ExpandFuncTy mFunc;
+    const void* mUsrPtr;
+    const size_t mUsrSize;
 };
 
 class CpuScriptGroup2Impl;
 
 class Batch {
- public:
-  Batch(CpuScriptGroup2Impl* group) : mGroup(group), mExecutable(nullptr) {}
+public:
+    Batch(CpuScriptGroup2Impl* group) : mGroup(group), mExecutable(nullptr) {}
 
-  ~Batch();
+    ~Batch();
 
-  // Returns true if closure depends on any closure in this batch for a global
-  // variable
-  bool conflict(CPUClosure* closure) const;
+    // Returns true if closure depends on any closure in this batch for a global
+    // variable
+    bool conflict(CPUClosure* closure) const;
 
-  void tryToCreateFusedKernel(const char* cacheDir);
-  void setGlobalsForBatch();
-  void run();
+    void tryToCreateFusedKernel(const char* cacheDir);
+    void setGlobalsForBatch();
+    void run();
 
-  CpuScriptGroup2Impl* mGroup;
-  ScriptExecutable* mExecutable;
-  void* mScriptObj;
-  list<CPUClosure*> mClosures;
+    CpuScriptGroup2Impl* mGroup;
+    ScriptExecutable* mExecutable;
+    void* mScriptObj;
+    list<CPUClosure*> mClosures;
 };
 
 class CpuScriptGroup2Impl : public RsdCpuReference::CpuScriptGroup2 {
- public:
-  CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl, const ScriptGroupBase* group);
-  virtual ~CpuScriptGroup2Impl();
+public:
+    CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl, const ScriptGroupBase* group);
+    virtual ~CpuScriptGroup2Impl();
 
-  bool init();
-  virtual void execute();
+    bool init();
+    virtual void execute();
 
-  RsdCpuReferenceImpl* getCpuRefImpl() const { return mCpuRefImpl; }
+    RsdCpuReferenceImpl* getCpuRefImpl() const { return mCpuRefImpl; }
 
- private:
-  RsdCpuReferenceImpl* mCpuRefImpl;
-  const ScriptGroup2* mGroup;
-  list<Batch*> mBatches;
+private:
+    RsdCpuReferenceImpl* mCpuRefImpl;
+    const ScriptGroup2* mGroup;
+    list<Batch*> mBatches;
 };
 
 }  // namespace renderscript
diff --git a/rs.spec b/rs.spec
index d43839a..69b7e7f 100644
--- a/rs.spec
+++ b/rs.spec
@@ -270,6 +270,16 @@
     ret RsClosure
     }
 
+InvokeClosureCreate {
+    direct
+    param RsScriptInvokeID invokeID
+    param const void * params
+    param const RsScriptFieldID * fieldIDs
+    param const uintptr_t * values
+    param const size_t * sizes
+    ret RsClosure
+}
+
 ClosureSetArg {
   param RsClosure closureID
   param uint32_t index
@@ -306,6 +316,12 @@
     param const char * timeZone
     }
 
+ScriptInvokeIDCreate {
+    param RsScript s
+    param uint32_t slot
+    ret RsScriptInvokeID;
+    }
+
 ScriptInvoke {
     param RsScript s
     param uint32_t slot
diff --git a/rsClosure.cpp b/rsClosure.cpp
index 8530fc1..7aa0acb 100644
--- a/rsClosure.cpp
+++ b/rsClosure.cpp
@@ -27,20 +27,32 @@
       (const ScriptFieldID**)depFieldIDs));
 }
 
+RsClosure rsi_InvokeClosureCreate(Context* context, RsScriptInvokeID invokeID,
+                                  const void* params, const size_t paramLength,
+                                  const RsScriptFieldID* fieldIDs, const size_t fieldIDs_length,
+                                  const uintptr_t* values, const size_t values_length,
+                                  const size_t* sizes, const size_t sizes_length) {
+    rsAssert(fieldIDs_length == values_length && values_length == sizes_length);
+    return (RsClosure)(new Closure(
+        context, (const ScriptInvokeID*)invokeID, params, paramLength,
+        fieldIDs_length, (const ScriptFieldID**)fieldIDs, (const void**)values,
+        sizes));
+}
+
 void rsi_ClosureEval(Context* rsc, RsClosure closure) {
-  ((Closure*)closure)->eval();
+    ((Closure*)closure)->eval();
 }
 
 void rsi_ClosureSetArg(Context* rsc, RsClosure closure, uint32_t index,
                        uintptr_t value, size_t size) {
-  ((Closure*)closure)->setArg(index, (const void*)value, size);
+    ((Closure*)closure)->setArg(index, (const void*)value, size);
 }
 
 void rsi_ClosureSetGlobal(Context* rsc, RsClosure closure,
                           RsScriptFieldID fieldID, uintptr_t value,
                           size_t size) {
-  ((Closure*)closure)->setGlobal((const ScriptFieldID*)fieldID,
-                                 (const void*)value, size);
+    ((Closure*)closure)->setGlobal((const ScriptFieldID*)fieldID,
+                                   (const void*)value, size);
 }
 
 Closure::Closure(Context* context,
@@ -53,7 +65,8 @@
                  const Closure** depClosures,
                  const ScriptFieldID** depFieldIDs) :
     ObjectBase(context), mContext(context), mKernelID((ScriptKernelID*)kernelID),
-    mReturnValue(returnValue) {
+    mInvokeID(nullptr), mReturnValue(returnValue), mParams(nullptr),
+    mParamLength(0) {
   size_t i;
 
   for (i = 0; i < (size_t)numValues && fieldIDs[i] == nullptr; i++);
@@ -97,50 +110,61 @@
   }
 }
 
-Closure::~Closure() {
-  for (const auto& p : mArgDeps) {
-    auto map = p.second;
-    for (const auto& p1 : *map) {
-      delete p1.second;
+Closure::Closure(Context* context, const ScriptInvokeID* invokeID,
+                 const void* params, const size_t paramLength,
+                 const size_t numValues, const ScriptFieldID** fieldIDs,
+                 const void** values, const size_t* sizes) :
+    ObjectBase(context), mContext(context), mKernelID(nullptr), mInvokeID(invokeID),
+    mReturnValue(nullptr), mParams(params), mParamLength(paramLength) {
+    for (size_t i = 0; i < numValues; i++) {
+        mGlobals[fieldIDs[i]] = std::make_pair(values[i], sizes[i]);
     }
-    delete p.second;
-  }
+}
 
-  for (const auto& p : mGlobalDeps) {
-    auto map = p.second;
-    for (const auto& p1 : *map) {
-      delete p1.first;
-      delete p1.second;
+Closure::~Closure() {
+    for (const auto& p : mArgDeps) {
+        auto map = p.second;
+        for (const auto& p1 : *map) {
+            delete p1.second;
+        }
+        delete p.second;
     }
-    delete p.second;
-  }
+
+    for (const auto& p : mGlobalDeps) {
+        auto map = p.second;
+        for (const auto& p1 : *map) {
+            delete p1.first;
+            delete p1.second;
+        }
+        delete p.second;
+    }
 }
 
 void Closure::eval() {
-  Script *s = mKernelID->mScript;
+    Script *s = mKernelID->mScript;
 
-  for (const auto& p : mGlobals) {
-    const void* value = p.second.first;
-    int size = p.second.second;
-    // We use -1 size to indicate an ObjectBase rather than a primitive type
-    if (size < 0) {
-      s->setVarObj(p.first->mSlot, (ObjectBase*)value);
-    } else {
-      s->setVar(p.first->mSlot, (const void*)&value, size);
+    for (const auto& p : mGlobals) {
+        const void* value = p.second.first;
+        int size = p.second.second;
+        // We use -1 size to indicate an ObjectBase rather than a primitive type
+        if (size < 0) {
+            s->setVarObj(p.first->mSlot, (ObjectBase*)value);
+        } else {
+            s->setVar(p.first->mSlot, (const void*)&value, size);
+        }
     }
-  }
 
-  s->runForEach(mContext, mKernelID->mSlot, (const Allocation **)(&mArgs[0]),
-                mArgs.size(), mReturnValue, nullptr, 0, nullptr);
+    s->runForEach(mContext, mKernelID->mSlot, (const Allocation **)(&mArgs[0]),
+                  mArgs.size(), mReturnValue, nullptr, 0, nullptr);
 }
 
 void Closure::setArg(const uint32_t index, const void* value, const size_t size) {
-  mArgs[index] = value;
+    mArgs[index] = value;
 }
 
 void Closure::setGlobal(const ScriptFieldID* fieldID, const void* value,
                         const size_t size) {
-  mGlobals[fieldID] = std::make_pair(value, size);
+    mGlobals[fieldID] = std::make_pair(value, size);
 }
 
 }  // namespace renderscript
diff --git a/rsClosure.h b/rsClosure.h
index 372cd32..7f99a57 100644
--- a/rsClosure.h
+++ b/rsClosure.h
@@ -18,58 +18,78 @@
 
 class Allocation;
 class Context;
+class ObjectBase;
 class ScriptFieldID;
+class ScriptInvokeID;
 class ScriptKernelID;
 class Type;
 
 class Closure : public ObjectBase {
  public:
-  Closure(Context* context,
-          const ScriptKernelID* kernelID,
-          Allocation* returnValue,
-          const int numValues,
-          const ScriptFieldID** fieldIDs,
-          const void** values,  // Allocations or primitive (numeric) types
-          const size_t* sizes,   // size for data type. -1 indicates an allocation.
-          const Closure** depClosures,
-          const ScriptFieldID** depFieldIDs);
+    Closure(Context* context,
+            const ScriptKernelID* kernelID,
+            Allocation* returnValue,
+            const int numValues,
+            const ScriptFieldID** fieldIDs,
+            const void** values,  // Allocations or primitive (numeric) types
+            const size_t* sizes,  // size for data type. -1 indicates an allocation.
+            const Closure** depClosures,
+            const ScriptFieldID** depFieldIDs);
+    Closure(Context* context,
+            const ScriptInvokeID* invokeID,
+            const void* params,
+            const size_t paramLength,
+            const size_t numValues,
+            const ScriptFieldID** fieldIDs,
+            const void** values,  // Allocations or primitive (numeric) types
+            const size_t* sizes);  // size for data type. -1 indicates an allocation.
 
-  virtual ~Closure();
+    virtual ~Closure();
 
-  virtual void serialize(Context *rsc, OStream *stream) const {}
+    virtual void serialize(Context *rsc, OStream *stream) const {}
 
-  virtual RsA3DClassID getClassId() const { return RS_A3D_CLASS_ID_CLOSURE; }
+    virtual RsA3DClassID getClassId() const { return RS_A3D_CLASS_ID_CLOSURE; }
 
-  void eval();
+    void eval();
 
-  void setArg(const uint32_t index, const void* value, const size_t size);
-  void setGlobal(const ScriptFieldID* fieldID, const void* value,
-                 const size_t size);
+    void setArg(const uint32_t index, const void* value, const size_t size);
+    void setGlobal(const ScriptFieldID* fieldID, const void* value,
+                   const size_t size);
 
-  Context* mContext;
-  const ObjectBaseRef<ScriptKernelID> mKernelID;
+    Context* mContext;
 
-  // Values referrenced in arguments and globals cannot be futures. They must be
-  // either a known value or unbound value.
-  // For now, all arguments should be Allocations.
-  vector<const void*> mArgs;
+    // If mKernelID is not null, this is a closure for a kernel. Otherwise, it is
+    // a closure for an invoke function, whose id is the next field. At least one
+    // of these fields has to be non-null.
+    const ObjectBaseRef<ScriptKernelID> mKernelID;
+    // TODO(yangni): ObjectBaseRef<ScriptInvokeID>
+    const ScriptInvokeID* mInvokeID;
 
-  // A global could be allocation or any primitive data type.
-  map<const ScriptFieldID*, pair<const void*, int>> mGlobals;
+    // Values referrenced in arguments and globals cannot be futures. They must be
+    // either a known value or unbound value.
+    // For now, all arguments should be Allocations.
+    vector<const void*> mArgs;
 
-  Allocation* mReturnValue;
+    // A global could be allocation or any other data type, including primitive
+    // data types.
+    map<const ScriptFieldID*, pair<const void*, int>> mGlobals;
 
-  // All the other closures that this closure depends on
-  set<const Closure*> mDependences;
+    Allocation* mReturnValue;
 
-  // All the other closures which this closure depends on for one of its
-  // arguments, and the fields which it depends on.
-  map<const Closure*, map<int, const ObjectBaseRef<ScriptFieldID>*>*> mArgDeps;
+    // All the other closures that this closure depends on
+    set<const Closure*> mDependences;
 
-  // All the other closures that this closure depends on for one of its fields,
-  // and the fields that it depends on.
-  map<const Closure*, map<const ObjectBaseRef<ScriptFieldID>*,
-                          const ObjectBaseRef<ScriptFieldID>*>*> mGlobalDeps;
+    // All the other closures which this closure depends on for one of its
+    // arguments, and the fields which it depends on.
+    map<const Closure*, map<int, const ObjectBaseRef<ScriptFieldID>*>*> mArgDeps;
+
+    // All the other closures that this closure depends on for one of its fields,
+    // and the fields that it depends on.
+    map<const Closure*, map<const ObjectBaseRef<ScriptFieldID>*,
+            const ObjectBaseRef<ScriptFieldID>*>*> mGlobalDeps;
+
+    const void* mParams;
+    const size_t mParamLength;
 };
 
 }  // namespace renderscript
diff --git a/rsDefines.h b/rsDefines.h
index 427be03..69a62d6 100644
--- a/rsDefines.h
+++ b/rsDefines.h
@@ -42,6 +42,7 @@
 typedef void * RsSampler;
 typedef void * RsScript;
 typedef void * RsScriptKernelID;
+typedef void * RsScriptInvokeID;
 typedef void * RsScriptFieldID;
 typedef void * RsScriptMethodID;
 typedef void * RsScriptGroup;
diff --git a/rsFileA3D.cpp b/rsFileA3D.cpp
index 3fe6942..f85fffe 100644
--- a/rsFileA3D.cpp
+++ b/rsFileA3D.cpp
@@ -288,6 +288,8 @@
             break;
         case RS_A3D_CLASS_ID_SCRIPT_KERNEL_ID:
             break;
+        case RS_A3D_CLASS_ID_SCRIPT_INVOKE_ID:
+            break;
         case RS_A3D_CLASS_ID_SCRIPT_FIELD_ID:
             break;
         case RS_A3D_CLASS_ID_SCRIPT_METHOD_ID:
diff --git a/rsInternalDefines.h b/rsInternalDefines.h
index 81c13b5..2a3f3fd 100644
--- a/rsInternalDefines.h
+++ b/rsInternalDefines.h
@@ -165,7 +165,8 @@
     RS_A3D_CLASS_ID_SCRIPT_METHOD_ID,
     RS_A3D_CLASS_ID_SCRIPT_GROUP,
     RS_A3D_CLASS_ID_CLOSURE,
-    RS_A3D_CLASS_ID_SCRIPT_GROUP2
+    RS_A3D_CLASS_ID_SCRIPT_GROUP2,
+    RS_A3D_CLASS_ID_SCRIPT_INVOKE_ID
 };
 
 enum RsCullMode {
diff --git a/rsScript.cpp b/rsScript.cpp
index 3059833..b89c96e 100644
--- a/rsScript.cpp
+++ b/rsScript.cpp
@@ -109,37 +109,25 @@
 }
 
 ScriptKernelID::ScriptKernelID(Context *rsc, Script *s, int slot, int sig)
-        : ObjectBase(rsc) {
-
-    mScript = s;
-    mSlot = slot;
+        : IDBase(rsc, s, slot) {
     mHasKernelInput = (sig & 1) != 0;
     mHasKernelOutput = (sig & 2) != 0;
 }
 
-ScriptKernelID::~ScriptKernelID() {
-
-}
-
-void ScriptKernelID::serialize(Context *rsc, OStream *stream) const {
-
-}
-
 RsA3DClassID ScriptKernelID::getClassId() const {
     return RS_A3D_CLASS_ID_SCRIPT_KERNEL_ID;
 }
 
-ScriptFieldID::ScriptFieldID(Context *rsc, Script *s, int slot) : ObjectBase(rsc) {
-    mScript = s;
-    mSlot = slot;
+ScriptInvokeID::ScriptInvokeID(Context *rsc, Script *s, int slot)
+    : IDBase(rsc, s, slot) {
 }
 
-ScriptFieldID::~ScriptFieldID() {
-
+RsA3DClassID ScriptInvokeID::getClassId() const {
+    return RS_A3D_CLASS_ID_SCRIPT_INVOKE_ID;
 }
 
-void ScriptFieldID::serialize(Context *rsc, OStream *stream) const {
-
+ScriptFieldID::ScriptFieldID(Context *rsc, Script *s, int slot) :
+    IDBase(rsc, s, slot) {
 }
 
 RsA3DClassID ScriptFieldID::getClassId() const {
@@ -156,6 +144,12 @@
     return kid;
 }
 
+RsScriptInvokeID rsi_ScriptInvokeIDCreate(Context *rsc, RsScript vs, uint32_t slot) {
+    ScriptInvokeID *iid = new ScriptInvokeID(rsc, (Script *)vs, slot);
+    iid->incUserRef();
+    return iid;
+}
+
 RsScriptFieldID rsi_ScriptFieldIDCreate(Context *rsc, RsScript vs, int slot) {
     ScriptFieldID *fid = new ScriptFieldID(rsc, (Script *)vs, slot);
     fid->incUserRef();
diff --git a/rsScript.h b/rsScript.h
index 1ce5b41..2212032 100644
--- a/rsScript.h
+++ b/rsScript.h
@@ -32,30 +32,44 @@
 class ProgramStore;
 #endif
 
-class ScriptKernelID : public ObjectBase {
+class IDBase : public ObjectBase {
 public:
-    ScriptKernelID(Context *rsc, Script *s, int slot, int sig);
-    virtual ~ScriptKernelID();
+    IDBase(Context *rsc, Script *s, int slot) :
+        ObjectBase(rsc), mScript(s), mSlot(slot) {}
+    virtual ~IDBase() {}
 
-    virtual void serialize(Context *rsc, OStream *stream) const;
+    virtual void serialize(Context *rsc, OStream *stream) const {}
     virtual RsA3DClassID getClassId() const;
 
     Script *mScript;
     int mSlot;
+};
+
+class ScriptKernelID : public IDBase {
+public:
+    ScriptKernelID(Context *rsc, Script *s, int slot, int sig);
+    virtual ~ScriptKernelID() {}
+
+    virtual RsA3DClassID getClassId() const;
+
     bool mHasKernelInput;
     bool mHasKernelOutput;
 };
 
-class ScriptFieldID : public ObjectBase {
+class ScriptInvokeID : public IDBase {
+public:
+    ScriptInvokeID(Context *rsc, Script *s, int slot);
+    virtual ~ScriptInvokeID() {}
+
+    virtual RsA3DClassID getClassId() const;
+};
+
+class ScriptFieldID : public IDBase {
 public:
     ScriptFieldID(Context *rsc, Script *s, int slot);
-    virtual ~ScriptFieldID();
+    virtual ~ScriptFieldID() {}
 
-    virtual void serialize(Context *rsc, OStream *stream) const;
     virtual RsA3DClassID getClassId() const;
-
-    Script *mScript;
-    int mSlot;
 };
 
 class Script : public ObjectBase {
diff --git a/rsScriptGroup2.cpp b/rsScriptGroup2.cpp
index 488c752..df97c9e 100644
--- a/rsScriptGroup2.cpp
+++ b/rsScriptGroup2.cpp
@@ -6,22 +6,22 @@
 namespace renderscript {
 
 void ScriptGroup2::execute(Context* rsc) {
-  if (rsc->mHal.funcs.scriptgroup.execute) {
-    rsc->mHal.funcs.scriptgroup.execute(rsc, this);
-  }
+    if (rsc->mHal.funcs.scriptgroup.execute) {
+        rsc->mHal.funcs.scriptgroup.execute(rsc, this);
+    }
 }
 
 RsScriptGroup2 rsi_ScriptGroup2Create(Context* rsc, const char* cacheDir,
                                       size_t cacheDirLength,
                                       RsClosure* closures, size_t numClosures) {
-  ScriptGroup2* group = new ScriptGroup2(rsc, cacheDir, (Closure**)closures, numClosures);
+    ScriptGroup2* group = new ScriptGroup2(rsc, cacheDir, (Closure**)closures, numClosures);
 
-  // Create a device-specific implementation by calling the device driver
-  if (rsc->mHal.funcs.scriptgroup.init) {
-    rsc->mHal.funcs.scriptgroup.init(rsc, group);
-  }
+    // Create a device-specific implementation by calling the device driver
+    if (rsc->mHal.funcs.scriptgroup.init) {
+        rsc->mHal.funcs.scriptgroup.init(rsc, group);
+    }
 
-  return group;
+    return group;
 }
 
 }  // namespace renderscript
diff --git a/rsScriptGroup2.h b/rsScriptGroup2.h
index d514261..d4a64a0 100644
--- a/rsScriptGroup2.h
+++ b/rsScriptGroup2.h
@@ -14,24 +14,24 @@
 
 class ScriptGroup2 : public ScriptGroupBase {
  public:
-  /*
-    TODO:
-    Inputs and outputs are set and retrieved in Java runtime.
-    They are opaque in the C++ runtime.
-    For better compiler optimizations (of a script group), we need to include
-    input and output information in the C++ runtime.
-   */
-  ScriptGroup2(Context* rsc, const char* cacheDir, Closure** closures,
-               size_t numClosures) :
-      ScriptGroupBase(rsc), mCacheDir(cacheDir),
-      mClosures(closures, closures + numClosures) {}
-  virtual ~ScriptGroup2() {}
+    /*
+      TODO:
+      Inputs and outputs are set and retrieved in Java runtime.
+      They are opaque in the C++ runtime.
+      For better compiler optimizations (of a script group), we need to include
+      input and output information in the C++ runtime.
+    */
+    ScriptGroup2(Context* rsc, const char* cacheDir, Closure** closures,
+                 size_t numClosures) :
+        ScriptGroupBase(rsc), mCacheDir(cacheDir),
+        mClosures(closures, closures + numClosures) {}
+    virtual ~ScriptGroup2() {}
 
-  virtual SG_API_Version getApiVersion() const { return SG_V2; }
-  virtual void execute(Context* rsc);
+    virtual SG_API_Version getApiVersion() const { return SG_V2; }
+    virtual void execute(Context* rsc);
 
-  const std::string mCacheDir;
-  std::list<Closure*> mClosures;
+    const std::string mCacheDir;
+    std::list<Closure*> mClosures;
 };
 
 }  // namespace renderscript
