Collapse code paths for single- and multi-input kernels.

This patch simplifies the RenderScript driver and CPU reference implementation
by removing the distinction between sing- and multi-input kernels in many
places.  The distinction is maintained in some places due to the need to
maintain backwards compatibility.  This permits the deletion of some functions
and struct members that are no longer needed.  Several related functions were
also cleaned up.

Change-Id: Id70a223ea5e3aa2b0b935b2b7f9af933339ae8a4
diff --git a/cpu_ref/rsCpuIntrinsic.cpp b/cpu_ref/rsCpuIntrinsic.cpp
index 5a7fffd..8437c99 100644
--- a/cpu_ref/rsCpuIntrinsic.cpp
+++ b/cpu_ref/rsCpuIntrinsic.cpp
@@ -73,54 +73,29 @@
 }
 
 
-void RsdCpuScriptIntrinsic::preLaunch(uint32_t slot, const Allocation * ain,
-                                      Allocation * aout, const void * usr,
-                                      uint32_t usrLen, const RsScriptCall *sc) {
+void RsdCpuScriptIntrinsic::preLaunch(uint32_t slot, const Allocation ** ains,
+                                      uint32_t inLen, Allocation * aout,
+                                      const void * usr, uint32_t usrLen,
+                                      const RsScriptCall *sc) {
 }
 
-void RsdCpuScriptIntrinsic::postLaunch(uint32_t slot, const Allocation * ain,
-                                       Allocation * aout, const void * usr,
-                                       uint32_t usrLen, const RsScriptCall *sc) {
+void RsdCpuScriptIntrinsic::postLaunch(uint32_t slot, const Allocation ** ains,
+                                       uint32_t inLen, Allocation * aout,
+                                       const void * usr, uint32_t usrLen,
+                                       const RsScriptCall *sc) {
 }
 
 void RsdCpuScriptIntrinsic::invokeForEach(uint32_t slot,
-                                          const Allocation * ain,
+                                          const Allocation ** ains,
+                                          uint32_t inLen,
                                           Allocation * aout,
                                           const void * usr,
                                           uint32_t usrLen,
                                           const RsScriptCall *sc) {
 
     MTLaunchStruct mtls;
-    preLaunch(slot, ain, aout, usr, usrLen, sc);
 
-    forEachMtlsSetup(ain, aout, usr, usrLen, sc, &mtls);
-    mtls.script = this;
-    mtls.fep.slot = slot;
-
-    mtls.kernel = (void (*)())mRootPtr;
-    mtls.fep.usr = this;
-
-    RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
-    mCtx->launchThreads(ain, aout, sc, &mtls);
-    mCtx->setTLS(oldTLS);
-
-    postLaunch(slot, ain, aout, usr, usrLen, sc);
-}
-
-void RsdCpuScriptIntrinsic::invokeForEachMulti(uint32_t slot,
-                                               const Allocation ** ains,
-                                               uint32_t inLen,
-                                               Allocation * aout,
-                                               const void * usr,
-                                               uint32_t usrLen,
-                                               const RsScriptCall *sc) {
-
-    MTLaunchStruct mtls;
-    /*
-     * FIXME: Possibly create new preLaunch and postLaunch functions that take
-     *        all of the input allocation pointers.
-     */
-    preLaunch(slot, ains[0], aout, usr, usrLen, sc);
+    preLaunch(slot, ains, inLen, aout, usr, usrLen, sc);
 
     forEachMtlsSetup(ains, inLen, aout, usr, usrLen, sc, &mtls);
     mtls.script = this;
@@ -133,7 +108,7 @@
     mCtx->launchThreads(ains, inLen, aout, sc, &mtls);
     mCtx->setTLS(oldTLS);
 
-    postLaunch(slot, ains[0], aout, usr, usrLen, sc);
+    postLaunch(slot, ains, inLen, aout, usr, usrLen, sc);
 }
 
 void RsdCpuScriptIntrinsic::forEachKernelSetup(uint32_t slot, MTLaunchStruct *mtls) {