Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2012 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #include "rsCpuCore.h" |
| 18 | #include "rsCpuScript.h" |
| 19 | #include "rsCpuScriptGroup.h" |
| 20 | |
| 21 | #include <malloc.h> |
| 22 | #include "rsContext.h" |
| 23 | |
| 24 | #include <sys/types.h> |
| 25 | #include <sys/resource.h> |
| 26 | #include <sched.h> |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 27 | #include <sys/syscall.h> |
| 28 | #include <string.h> |
Stephen Hines | b0934b6 | 2013-07-03 17:27:38 -0700 | [diff] [blame] | 29 | #include <unistd.h> |
Tim Murray | 0b575de | 2013-03-15 15:56:43 -0700 | [diff] [blame] | 30 | |
Jason Sams | f5ef8df | 2013-08-06 13:49:25 -0700 | [diff] [blame] | 31 | #include <stdio.h> |
| 32 | #include <stdlib.h> |
| 33 | #include <fcntl.h> |
| 34 | |
Stephen Hines | b0934b6 | 2013-07-03 17:27:38 -0700 | [diff] [blame] | 35 | #if !defined(RS_SERVER) && !defined(RS_COMPATIBILITY_LIB) |
Tim Murray | 0b575de | 2013-03-15 15:56:43 -0700 | [diff] [blame] | 36 | #include <cutils/properties.h> |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 37 | #include "utils/StopWatch.h" |
Tim Murray | 0b575de | 2013-03-15 15:56:43 -0700 | [diff] [blame] | 38 | #endif |
| 39 | |
| 40 | #ifdef RS_SERVER |
| 41 | // Android exposes gettid(), standard Linux does not |
| 42 | static pid_t gettid() { |
| 43 | return syscall(SYS_gettid); |
| 44 | } |
| 45 | #endif |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 46 | |
| 47 | using namespace android; |
| 48 | using namespace android::renderscript; |
| 49 | |
| 50 | typedef void (*outer_foreach_t)( |
Chris Wailes | 80ef693 | 2014-07-08 11:22:18 -0700 | [diff] [blame] | 51 | const android::renderscript::RsExpandKernelParams *, |
Chris Wailes | 9ed7910 | 2014-07-25 15:53:28 -0700 | [diff] [blame] | 52 | uint32_t x1, uint32_t x2, uint32_t outstep); |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 53 | |
| 54 | |
| 55 | static pthread_key_t gThreadTLSKey = 0; |
| 56 | static uint32_t gThreadTLSKeyCount = 0; |
| 57 | static pthread_mutex_t gInitMutex = PTHREAD_MUTEX_INITIALIZER; |
| 58 | |
Jason Sams | f5ef8df | 2013-08-06 13:49:25 -0700 | [diff] [blame] | 59 | bool android::renderscript::gArchUseSIMD = false; |
| 60 | |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 61 | RsdCpuReference::~RsdCpuReference() { |
| 62 | } |
| 63 | |
| 64 | RsdCpuReference * RsdCpuReference::create(Context *rsc, uint32_t version_major, |
Jason Sams | cadfac4 | 2013-03-06 18:09:08 -0800 | [diff] [blame] | 65 | uint32_t version_minor, sym_lookup_t lfn, script_lookup_t slfn |
| 66 | #ifndef RS_COMPATIBILITY_LIB |
Stephen Hines | 1d47662 | 2013-03-29 22:08:49 -0700 | [diff] [blame] | 67 | , bcc::RSLinkRuntimeCallback pLinkRuntimeCallback, |
Stephen Hines | 0051132 | 2014-01-31 11:20:23 -0800 | [diff] [blame] | 68 | RSSelectRTCallback pSelectRTCallback, |
| 69 | const char *pBccPluginName |
Jason Sams | cadfac4 | 2013-03-06 18:09:08 -0800 | [diff] [blame] | 70 | #endif |
| 71 | ) { |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 72 | |
| 73 | RsdCpuReferenceImpl *cpu = new RsdCpuReferenceImpl(rsc); |
| 74 | if (!cpu) { |
Chris Wailes | 44bef6f | 2014-08-12 13:51:10 -0700 | [diff] [blame] | 75 | return nullptr; |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 76 | } |
| 77 | if (!cpu->init(version_major, version_minor, lfn, slfn)) { |
| 78 | delete cpu; |
Chris Wailes | 44bef6f | 2014-08-12 13:51:10 -0700 | [diff] [blame] | 79 | return nullptr; |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 80 | } |
Stephen Hines | f218bf1 | 2013-02-12 19:32:38 -0800 | [diff] [blame] | 81 | |
Jason Sams | cadfac4 | 2013-03-06 18:09:08 -0800 | [diff] [blame] | 82 | #ifndef RS_COMPATIBILITY_LIB |
Stephen Hines | f218bf1 | 2013-02-12 19:32:38 -0800 | [diff] [blame] | 83 | cpu->setLinkRuntimeCallback(pLinkRuntimeCallback); |
Stephen Hines | 1d47662 | 2013-03-29 22:08:49 -0700 | [diff] [blame] | 84 | cpu->setSelectRTCallback(pSelectRTCallback); |
Stephen Hines | 0051132 | 2014-01-31 11:20:23 -0800 | [diff] [blame] | 85 | if (pBccPluginName) { |
| 86 | cpu->setBccPluginName(pBccPluginName); |
| 87 | } |
Jason Sams | cadfac4 | 2013-03-06 18:09:08 -0800 | [diff] [blame] | 88 | #endif |
Stephen Hines | f218bf1 | 2013-02-12 19:32:38 -0800 | [diff] [blame] | 89 | |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 90 | return cpu; |
| 91 | } |
| 92 | |
| 93 | |
| 94 | Context * RsdCpuReference::getTlsContext() { |
| 95 | ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(gThreadTLSKey); |
| 96 | return tls->mContext; |
| 97 | } |
| 98 | |
| 99 | const Script * RsdCpuReference::getTlsScript() { |
| 100 | ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(gThreadTLSKey); |
| 101 | return tls->mScript; |
| 102 | } |
| 103 | |
Stephen Hines | f218bf1 | 2013-02-12 19:32:38 -0800 | [diff] [blame] | 104 | pthread_key_t RsdCpuReference::getThreadTLSKey(){ return gThreadTLSKey; } |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 105 | |
| 106 | //////////////////////////////////////////////////////////// |
| 107 | /// |
| 108 | |
| 109 | RsdCpuReferenceImpl::RsdCpuReferenceImpl(Context *rsc) { |
| 110 | mRSC = rsc; |
| 111 | |
| 112 | version_major = 0; |
| 113 | version_minor = 0; |
| 114 | mInForEach = false; |
| 115 | memset(&mWorkers, 0, sizeof(mWorkers)); |
| 116 | memset(&mTlsStruct, 0, sizeof(mTlsStruct)); |
| 117 | mExit = false; |
Jason Sams | cadfac4 | 2013-03-06 18:09:08 -0800 | [diff] [blame] | 118 | #ifndef RS_COMPATIBILITY_LIB |
Chris Wailes | 44bef6f | 2014-08-12 13:51:10 -0700 | [diff] [blame] | 119 | mLinkRuntimeCallback = nullptr; |
| 120 | mSelectRTCallback = nullptr; |
| 121 | mSetupCompilerCallback = nullptr; |
Jason Sams | cadfac4 | 2013-03-06 18:09:08 -0800 | [diff] [blame] | 122 | #endif |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 123 | } |
| 124 | |
| 125 | |
| 126 | void * RsdCpuReferenceImpl::helperThreadProc(void *vrsc) { |
| 127 | RsdCpuReferenceImpl *dc = (RsdCpuReferenceImpl *)vrsc; |
| 128 | |
Tim Murray | 0b575de | 2013-03-15 15:56:43 -0700 | [diff] [blame] | 129 | uint32_t idx = __sync_fetch_and_add(&dc->mWorkers.mLaunchCount, 1); |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 130 | |
| 131 | //ALOGV("RS helperThread starting %p idx=%i", dc, idx); |
| 132 | |
| 133 | dc->mWorkers.mLaunchSignals[idx].init(); |
| 134 | dc->mWorkers.mNativeThreadId[idx] = gettid(); |
| 135 | |
| 136 | memset(&dc->mTlsStruct, 0, sizeof(dc->mTlsStruct)); |
| 137 | int status = pthread_setspecific(gThreadTLSKey, &dc->mTlsStruct); |
| 138 | if (status) { |
| 139 | ALOGE("pthread_setspecific %i", status); |
| 140 | } |
| 141 | |
| 142 | #if 0 |
| 143 | typedef struct {uint64_t bits[1024 / 64]; } cpu_set_t; |
| 144 | cpu_set_t cpuset; |
| 145 | memset(&cpuset, 0, sizeof(cpuset)); |
| 146 | cpuset.bits[idx / 64] |= 1ULL << (idx % 64); |
| 147 | int ret = syscall(241, rsc->mWorkers.mNativeThreadId[idx], |
| 148 | sizeof(cpuset), &cpuset); |
| 149 | ALOGE("SETAFFINITY ret = %i %s", ret, EGLUtils::strerror(ret)); |
| 150 | #endif |
| 151 | |
| 152 | while (!dc->mExit) { |
| 153 | dc->mWorkers.mLaunchSignals[idx].wait(); |
| 154 | if (dc->mWorkers.mLaunchCallback) { |
| 155 | // idx +1 is used because the calling thread is always worker 0. |
| 156 | dc->mWorkers.mLaunchCallback(dc->mWorkers.mLaunchData, idx+1); |
| 157 | } |
Tim Murray | 0b575de | 2013-03-15 15:56:43 -0700 | [diff] [blame] | 158 | __sync_fetch_and_sub(&dc->mWorkers.mRunningCount, 1); |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 159 | dc->mWorkers.mCompleteSignal.set(); |
| 160 | } |
| 161 | |
| 162 | //ALOGV("RS helperThread exited %p idx=%i", dc, idx); |
Chris Wailes | 44bef6f | 2014-08-12 13:51:10 -0700 | [diff] [blame] | 163 | return nullptr; |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 164 | } |
| 165 | |
| 166 | void RsdCpuReferenceImpl::launchThreads(WorkerCallback_t cbk, void *data) { |
| 167 | mWorkers.mLaunchData = data; |
| 168 | mWorkers.mLaunchCallback = cbk; |
Tim Murray | 4d252d6 | 2012-11-29 14:37:59 -0800 | [diff] [blame] | 169 | |
| 170 | // fast path for very small launches |
| 171 | MTLaunchStruct *mtls = (MTLaunchStruct *)data; |
| 172 | if (mtls && mtls->fep.dimY <= 1 && mtls->xEnd <= mtls->xStart + mtls->mSliceSize) { |
| 173 | if (mWorkers.mLaunchCallback) { |
| 174 | mWorkers.mLaunchCallback(mWorkers.mLaunchData, 0); |
| 175 | } |
| 176 | return; |
| 177 | } |
| 178 | |
Tim Murray | 0b575de | 2013-03-15 15:56:43 -0700 | [diff] [blame] | 179 | mWorkers.mRunningCount = mWorkers.mCount; |
| 180 | __sync_synchronize(); |
| 181 | |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 182 | for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) { |
| 183 | mWorkers.mLaunchSignals[ct].set(); |
| 184 | } |
| 185 | |
| 186 | // We use the calling thread as one of the workers so we can start without |
| 187 | // the delay of the thread wakeup. |
| 188 | if (mWorkers.mLaunchCallback) { |
Tim Murray | 4d252d6 | 2012-11-29 14:37:59 -0800 | [diff] [blame] | 189 | mWorkers.mLaunchCallback(mWorkers.mLaunchData, 0); |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 190 | } |
| 191 | |
Tim Murray | 0b575de | 2013-03-15 15:56:43 -0700 | [diff] [blame] | 192 | while (__sync_fetch_and_or(&mWorkers.mRunningCount, 0) != 0) { |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 193 | mWorkers.mCompleteSignal.wait(); |
| 194 | } |
| 195 | } |
| 196 | |
| 197 | |
| 198 | void RsdCpuReferenceImpl::lockMutex() { |
| 199 | pthread_mutex_lock(&gInitMutex); |
| 200 | } |
| 201 | |
| 202 | void RsdCpuReferenceImpl::unlockMutex() { |
| 203 | pthread_mutex_unlock(&gInitMutex); |
| 204 | } |
| 205 | |
Jason Sams | f5ef8df | 2013-08-06 13:49:25 -0700 | [diff] [blame] | 206 | static int |
| 207 | read_file(const char* pathname, char* buffer, size_t buffsize) |
| 208 | { |
| 209 | int fd, len; |
| 210 | |
| 211 | fd = open(pathname, O_RDONLY); |
| 212 | if (fd < 0) |
| 213 | return -1; |
| 214 | |
| 215 | do { |
| 216 | len = read(fd, buffer, buffsize); |
| 217 | } while (len < 0 && errno == EINTR); |
| 218 | |
| 219 | close(fd); |
| 220 | |
| 221 | return len; |
| 222 | } |
| 223 | |
| 224 | static void GetCpuInfo() { |
| 225 | char cpuinfo[4096]; |
| 226 | int cpuinfo_len; |
| 227 | |
| 228 | cpuinfo_len = read_file("/proc/cpuinfo", cpuinfo, sizeof cpuinfo); |
| 229 | if (cpuinfo_len < 0) /* should not happen */ { |
| 230 | return; |
| 231 | } |
| 232 | |
Jason Sams | 074424a | 2014-05-22 13:30:03 -0700 | [diff] [blame] | 233 | #if defined(ARCH_ARM_HAVE_VFP) || defined(ARCH_ARM_USE_INTRINSICS) |
| 234 | gArchUseSIMD = (!!strstr(cpuinfo, " neon")) || |
| 235 | (!!strstr(cpuinfo, " asimd")); |
Rose, James | 7b7060c | 2014-04-22 12:08:06 +0800 | [diff] [blame] | 236 | #elif defined(ARCH_X86_HAVE_SSSE3) |
| 237 | gArchUseSIMD = !!strstr(cpuinfo, " ssse3"); |
| 238 | #endif |
Jason Sams | f5ef8df | 2013-08-06 13:49:25 -0700 | [diff] [blame] | 239 | } |
Jason Sams | f5ef8df | 2013-08-06 13:49:25 -0700 | [diff] [blame] | 240 | |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 241 | bool RsdCpuReferenceImpl::init(uint32_t version_major, uint32_t version_minor, |
| 242 | sym_lookup_t lfn, script_lookup_t slfn) { |
| 243 | |
| 244 | mSymLookupFn = lfn; |
| 245 | mScriptLookupFn = slfn; |
| 246 | |
| 247 | lockMutex(); |
| 248 | if (!gThreadTLSKeyCount) { |
Chris Wailes | 44bef6f | 2014-08-12 13:51:10 -0700 | [diff] [blame] | 249 | int status = pthread_key_create(&gThreadTLSKey, nullptr); |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 250 | if (status) { |
| 251 | ALOGE("Failed to init thread tls key."); |
| 252 | unlockMutex(); |
| 253 | return false; |
| 254 | } |
| 255 | } |
| 256 | gThreadTLSKeyCount++; |
| 257 | unlockMutex(); |
| 258 | |
| 259 | mTlsStruct.mContext = mRSC; |
Chris Wailes | 44bef6f | 2014-08-12 13:51:10 -0700 | [diff] [blame] | 260 | mTlsStruct.mScript = nullptr; |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 261 | int status = pthread_setspecific(gThreadTLSKey, &mTlsStruct); |
| 262 | if (status) { |
| 263 | ALOGE("pthread_setspecific %i", status); |
| 264 | } |
| 265 | |
Jason Sams | f5ef8df | 2013-08-06 13:49:25 -0700 | [diff] [blame] | 266 | GetCpuInfo(); |
Jason Sams | f5ef8df | 2013-08-06 13:49:25 -0700 | [diff] [blame] | 267 | |
Jason Sams | 77d57a3 | 2014-10-23 17:43:53 -0700 | [diff] [blame^] | 268 | int cpu = sysconf(_SC_NPROCESSORS_CONF); |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 269 | if(mRSC->props.mDebugMaxThreads) { |
| 270 | cpu = mRSC->props.mDebugMaxThreads; |
| 271 | } |
| 272 | if (cpu < 2) { |
| 273 | mWorkers.mCount = 0; |
| 274 | return true; |
| 275 | } |
| 276 | |
| 277 | // Subtract one from the cpu count because we also use the command thread as a worker. |
| 278 | mWorkers.mCount = (uint32_t)(cpu - 1); |
| 279 | |
Jason Sams | 8ca358a | 2013-03-19 13:59:40 -0700 | [diff] [blame] | 280 | ALOGV("%p Launching thread(s), CPUs %i", mRSC, mWorkers.mCount + 1); |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 281 | |
| 282 | mWorkers.mThreadId = (pthread_t *) calloc(mWorkers.mCount, sizeof(pthread_t)); |
| 283 | mWorkers.mNativeThreadId = (pid_t *) calloc(mWorkers.mCount, sizeof(pid_t)); |
| 284 | mWorkers.mLaunchSignals = new Signal[mWorkers.mCount]; |
Chris Wailes | 44bef6f | 2014-08-12 13:51:10 -0700 | [diff] [blame] | 285 | mWorkers.mLaunchCallback = nullptr; |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 286 | |
| 287 | mWorkers.mCompleteSignal.init(); |
| 288 | |
Tim Murray | 0b575de | 2013-03-15 15:56:43 -0700 | [diff] [blame] | 289 | mWorkers.mRunningCount = mWorkers.mCount; |
| 290 | mWorkers.mLaunchCount = 0; |
| 291 | __sync_synchronize(); |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 292 | |
| 293 | pthread_attr_t threadAttr; |
| 294 | status = pthread_attr_init(&threadAttr); |
| 295 | if (status) { |
| 296 | ALOGE("Failed to init thread attribute."); |
| 297 | return false; |
| 298 | } |
| 299 | |
| 300 | for (uint32_t ct=0; ct < mWorkers.mCount; ct++) { |
| 301 | status = pthread_create(&mWorkers.mThreadId[ct], &threadAttr, helperThreadProc, this); |
| 302 | if (status) { |
| 303 | mWorkers.mCount = ct; |
| 304 | ALOGE("Created fewer than expected number of RS threads."); |
| 305 | break; |
| 306 | } |
| 307 | } |
Tim Murray | 0b575de | 2013-03-15 15:56:43 -0700 | [diff] [blame] | 308 | while (__sync_fetch_and_or(&mWorkers.mRunningCount, 0) != 0) { |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 309 | usleep(100); |
| 310 | } |
| 311 | |
| 312 | pthread_attr_destroy(&threadAttr); |
| 313 | return true; |
| 314 | } |
| 315 | |
| 316 | |
| 317 | void RsdCpuReferenceImpl::setPriority(int32_t priority) { |
| 318 | for (uint32_t ct=0; ct < mWorkers.mCount; ct++) { |
| 319 | setpriority(PRIO_PROCESS, mWorkers.mNativeThreadId[ct], priority); |
| 320 | } |
| 321 | } |
| 322 | |
| 323 | RsdCpuReferenceImpl::~RsdCpuReferenceImpl() { |
| 324 | mExit = true; |
Chris Wailes | 44bef6f | 2014-08-12 13:51:10 -0700 | [diff] [blame] | 325 | mWorkers.mLaunchData = nullptr; |
| 326 | mWorkers.mLaunchCallback = nullptr; |
Tim Murray | 0b575de | 2013-03-15 15:56:43 -0700 | [diff] [blame] | 327 | mWorkers.mRunningCount = mWorkers.mCount; |
| 328 | __sync_synchronize(); |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 329 | for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) { |
| 330 | mWorkers.mLaunchSignals[ct].set(); |
| 331 | } |
| 332 | void *res; |
| 333 | for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) { |
| 334 | pthread_join(mWorkers.mThreadId[ct], &res); |
| 335 | } |
Tim Murray | 0b575de | 2013-03-15 15:56:43 -0700 | [diff] [blame] | 336 | rsAssert(__sync_fetch_and_or(&mWorkers.mRunningCount, 0) == 0); |
Jens Gulin | 07ef704 | 2014-02-19 18:16:01 +0100 | [diff] [blame] | 337 | free(mWorkers.mThreadId); |
| 338 | free(mWorkers.mNativeThreadId); |
| 339 | delete[] mWorkers.mLaunchSignals; |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 340 | |
| 341 | // Global structure cleanup. |
| 342 | lockMutex(); |
| 343 | --gThreadTLSKeyCount; |
| 344 | if (!gThreadTLSKeyCount) { |
| 345 | pthread_key_delete(gThreadTLSKey); |
| 346 | } |
| 347 | unlockMutex(); |
| 348 | |
| 349 | } |
| 350 | |
| 351 | typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t); |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame] | 352 | typedef void (*walk_loop_t)(MTLaunchStruct*, |
| 353 | RsExpandKernelParams&, |
| 354 | outer_foreach_t); |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 355 | |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame] | 356 | |
| 357 | static void walk_wrapper(void* usr, uint32_t idx, walk_loop_t walk_loop) { |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 358 | MTLaunchStruct *mtls = (MTLaunchStruct *)usr; |
Chris Wailes | 80ef693 | 2014-07-08 11:22:18 -0700 | [diff] [blame] | 359 | |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame] | 360 | uint32_t inLen = mtls->fep.inLen; |
| 361 | |
Chris Wailes | 80ef693 | 2014-07-08 11:22:18 -0700 | [diff] [blame] | 362 | RsExpandKernelParams kparams; |
| 363 | kparams.takeFields(mtls->fep); |
| 364 | |
| 365 | // Used by CpuScriptGroup, IntrinsicBlur, and IntrinsicHistogram |
| 366 | kparams.lid = idx; |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 367 | |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame] | 368 | if (inLen > 0) { |
| 369 | // Allocate space for our input base pointers. |
| 370 | kparams.ins = (const void**)alloca(inLen * sizeof(void*)); |
Chris Wailes | 80ef693 | 2014-07-08 11:22:18 -0700 | [diff] [blame] | 371 | |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame] | 372 | // Allocate space for our input stride information. |
| 373 | kparams.inEStrides = (uint32_t*)alloca(inLen * sizeof(uint32_t)); |
Chris Wailes | 80ef693 | 2014-07-08 11:22:18 -0700 | [diff] [blame] | 374 | |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame] | 375 | // Fill our stride information. |
| 376 | for (int inIndex = inLen; --inIndex >= 0;) { |
| 377 | kparams.inEStrides[inIndex] = mtls->fep.inStrides[inIndex].eStride; |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 378 | } |
| 379 | } |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 380 | |
| 381 | outer_foreach_t fn = (outer_foreach_t) mtls->kernel; |
Chris Wailes | 80ef693 | 2014-07-08 11:22:18 -0700 | [diff] [blame] | 382 | |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame] | 383 | walk_loop(mtls, kparams, fn); |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 384 | } |
| 385 | |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame] | 386 | static void walk_2d(void *usr, uint32_t idx) { |
| 387 | walk_wrapper(usr, idx, [](MTLaunchStruct *mtls, |
| 388 | RsExpandKernelParams &kparams, |
| 389 | outer_foreach_t fn) { |
Chris Wailes | 4b3c34e | 2014-06-11 12:00:29 -0700 | [diff] [blame] | 390 | |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame] | 391 | while (1) { |
| 392 | uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1); |
| 393 | uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize; |
| 394 | uint32_t yEnd = yStart + mtls->mSliceSize; |
Chris Wailes | 4b3c34e | 2014-06-11 12:00:29 -0700 | [diff] [blame] | 395 | |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame] | 396 | yEnd = rsMin(yEnd, mtls->yEnd); |
Chris Wailes | 4b3c34e | 2014-06-11 12:00:29 -0700 | [diff] [blame] | 397 | |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame] | 398 | if (yEnd <= yStart) { |
| 399 | return; |
Chris Wailes | 4b3c34e | 2014-06-11 12:00:29 -0700 | [diff] [blame] | 400 | } |
| 401 | |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame] | 402 | for (kparams.y = yStart; kparams.y < yEnd; kparams.y++) { |
| 403 | kparams.out = mtls->fep.outPtr + |
| 404 | (mtls->fep.outStride.yStride * kparams.y) + |
| 405 | (mtls->fep.outStride.eStride * mtls->xStart); |
Chris Wailes | 4b3c34e | 2014-06-11 12:00:29 -0700 | [diff] [blame] | 406 | |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame] | 407 | for (int inIndex = mtls->fep.inLen; --inIndex >= 0;) { |
| 408 | StridePair &strides = mtls->fep.inStrides[inIndex]; |
Stephen Hines | 4b2bea3 | 2014-08-13 17:32:10 +0000 | [diff] [blame] | 409 | |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame] | 410 | kparams.ins[inIndex] = |
| 411 | mtls->fep.inPtrs[inIndex] + |
| 412 | (strides.yStride * kparams.y) + |
| 413 | (strides.eStride * mtls->xStart); |
Stephen Hines | 4b2bea3 | 2014-08-13 17:32:10 +0000 | [diff] [blame] | 414 | } |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame] | 415 | |
Chris Wailes | 9ed7910 | 2014-07-25 15:53:28 -0700 | [diff] [blame] | 416 | fn(&kparams, mtls->xStart, mtls->xEnd, |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame] | 417 | mtls->fep.outStride.eStride); |
Stephen Hines | 4b2bea3 | 2014-08-13 17:32:10 +0000 | [diff] [blame] | 418 | } |
| 419 | } |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame] | 420 | }); |
Stephen Hines | 4b2bea3 | 2014-08-13 17:32:10 +0000 | [diff] [blame] | 421 | } |
| 422 | |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame] | 423 | static void walk_1d(void *usr, uint32_t idx) { |
| 424 | walk_wrapper(usr, idx, [](MTLaunchStruct *mtls, |
| 425 | RsExpandKernelParams &kparams, |
| 426 | outer_foreach_t fn) { |
| 427 | |
| 428 | while (1) { |
| 429 | uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1); |
| 430 | uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize; |
| 431 | uint32_t xEnd = xStart + mtls->mSliceSize; |
| 432 | |
| 433 | xEnd = rsMin(xEnd, mtls->xEnd); |
| 434 | |
| 435 | if (xEnd <= xStart) { |
| 436 | return; |
| 437 | } |
| 438 | |
| 439 | kparams.out = mtls->fep.outPtr + |
| 440 | (mtls->fep.outStride.eStride * xStart); |
| 441 | |
| 442 | for (int inIndex = mtls->fep.inLen; --inIndex >= 0;) { |
| 443 | StridePair &strides = mtls->fep.inStrides[inIndex]; |
| 444 | |
| 445 | kparams.ins[inIndex] = |
| 446 | mtls->fep.inPtrs[inIndex] + (strides.eStride * xStart); |
| 447 | } |
| 448 | |
Chris Wailes | 9ed7910 | 2014-07-25 15:53:28 -0700 | [diff] [blame] | 449 | fn(&kparams, xStart, xEnd, mtls->fep.outStride.eStride); |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame] | 450 | } |
| 451 | }); |
| 452 | } |
| 453 | |
| 454 | |
| 455 | void RsdCpuReferenceImpl::launchThreads(const Allocation ** ains, |
| 456 | uint32_t inLen, |
| 457 | Allocation* aout, |
| 458 | const RsScriptCall* sc, |
| 459 | MTLaunchStruct* mtls) { |
Stephen Hines | 4b2bea3 | 2014-08-13 17:32:10 +0000 | [diff] [blame] | 460 | |
| 461 | //android::StopWatch kernel_time("kernel time"); |
| 462 | |
| 463 | if ((mWorkers.mCount >= 1) && mtls->isThreadable && !mInForEach) { |
| 464 | const size_t targetByteChunk = 16 * 1024; |
| 465 | mInForEach = true; |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame] | 466 | |
Stephen Hines | 4b2bea3 | 2014-08-13 17:32:10 +0000 | [diff] [blame] | 467 | if (mtls->fep.dimY > 1) { |
| 468 | uint32_t s1 = mtls->fep.dimY / ((mWorkers.mCount + 1) * 4); |
| 469 | uint32_t s2 = 0; |
| 470 | |
| 471 | // This chooses our slice size to rate limit atomic ops to |
| 472 | // one per 16k bytes of reads/writes. |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame] | 473 | if (mtls->fep.outStride.yStride) { |
| 474 | s2 = targetByteChunk / mtls->fep.outStride.yStride; |
Stephen Hines | 4b2bea3 | 2014-08-13 17:32:10 +0000 | [diff] [blame] | 475 | } else { |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame] | 476 | // We know that there is either an output or an input. |
| 477 | s2 = targetByteChunk / mtls->fep.inStrides[0].yStride; |
Stephen Hines | 4b2bea3 | 2014-08-13 17:32:10 +0000 | [diff] [blame] | 478 | } |
| 479 | mtls->mSliceSize = rsMin(s1, s2); |
| 480 | |
| 481 | if(mtls->mSliceSize < 1) { |
| 482 | mtls->mSliceSize = 1; |
| 483 | } |
| 484 | |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame] | 485 | launchThreads(walk_2d, mtls); |
Stephen Hines | 4b2bea3 | 2014-08-13 17:32:10 +0000 | [diff] [blame] | 486 | } else { |
| 487 | uint32_t s1 = mtls->fep.dimX / ((mWorkers.mCount + 1) * 4); |
| 488 | uint32_t s2 = 0; |
| 489 | |
| 490 | // This chooses our slice size to rate limit atomic ops to |
| 491 | // one per 16k bytes of reads/writes. |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame] | 492 | if (mtls->fep.outStride.eStride) { |
| 493 | s2 = targetByteChunk / mtls->fep.outStride.eStride; |
Stephen Hines | 4b2bea3 | 2014-08-13 17:32:10 +0000 | [diff] [blame] | 494 | } else { |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame] | 495 | // We know that there is either an output or an input. |
| 496 | s2 = targetByteChunk / mtls->fep.inStrides[0].eStride; |
Chris Wailes | 4b3c34e | 2014-06-11 12:00:29 -0700 | [diff] [blame] | 497 | } |
| 498 | mtls->mSliceSize = rsMin(s1, s2); |
| 499 | |
| 500 | if (mtls->mSliceSize < 1) { |
| 501 | mtls->mSliceSize = 1; |
| 502 | } |
| 503 | |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame] | 504 | launchThreads(walk_1d, mtls); |
Chris Wailes | 4b3c34e | 2014-06-11 12:00:29 -0700 | [diff] [blame] | 505 | } |
| 506 | mInForEach = false; |
| 507 | |
Chris Wailes | 4b3c34e | 2014-06-11 12:00:29 -0700 | [diff] [blame] | 508 | } else { |
Chris Wailes | 80ef693 | 2014-07-08 11:22:18 -0700 | [diff] [blame] | 509 | RsExpandKernelParams kparams; |
| 510 | kparams.takeFields(mtls->fep); |
Chris Wailes | 4b3c34e | 2014-06-11 12:00:29 -0700 | [diff] [blame] | 511 | |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame] | 512 | if (inLen > 0) { |
| 513 | // Allocate space for our input base pointers. |
| 514 | kparams.ins = (const void**)alloca(inLen * sizeof(void*)); |
Chris Wailes | 4b3c34e | 2014-06-11 12:00:29 -0700 | [diff] [blame] | 515 | |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame] | 516 | // Allocate space for our input stride information. |
| 517 | kparams.inEStrides = (uint32_t*)alloca(inLen * sizeof(uint32_t)); |
Chris Wailes | 4b3c34e | 2014-06-11 12:00:29 -0700 | [diff] [blame] | 518 | |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame] | 519 | // Fill our stride information. |
| 520 | for (int inIndex = inLen; --inIndex >= 0;) { |
| 521 | kparams.inEStrides[inIndex] = |
| 522 | mtls->fep.inStrides[inIndex].eStride; |
| 523 | } |
Chris Wailes | 4b3c34e | 2014-06-11 12:00:29 -0700 | [diff] [blame] | 524 | } |
| 525 | |
| 526 | //ALOGE("launch 3"); |
| 527 | outer_foreach_t fn = (outer_foreach_t) mtls->kernel; |
Chris Wailes | 80ef693 | 2014-07-08 11:22:18 -0700 | [diff] [blame] | 528 | for (uint32_t arrayIndex = mtls->arrayStart; |
| 529 | arrayIndex < mtls->arrayEnd; arrayIndex++) { |
Chris Wailes | 4b3c34e | 2014-06-11 12:00:29 -0700 | [diff] [blame] | 530 | |
Chris Wailes | 80ef693 | 2014-07-08 11:22:18 -0700 | [diff] [blame] | 531 | for (kparams.z = mtls->zStart; kparams.z < mtls->zEnd; |
| 532 | kparams.z++) { |
Chris Wailes | 4b3c34e | 2014-06-11 12:00:29 -0700 | [diff] [blame] | 533 | |
Chris Wailes | 80ef693 | 2014-07-08 11:22:18 -0700 | [diff] [blame] | 534 | for (kparams.y = mtls->yStart; kparams.y < mtls->yEnd; |
| 535 | kparams.y++) { |
Chris Wailes | 4b3c34e | 2014-06-11 12:00:29 -0700 | [diff] [blame] | 536 | |
Chris Wailes | 80ef693 | 2014-07-08 11:22:18 -0700 | [diff] [blame] | 537 | uint32_t offset = |
| 538 | mtls->fep.dimY * mtls->fep.dimZ * arrayIndex + |
| 539 | mtls->fep.dimY * kparams.z + kparams.y; |
Chris Wailes | 4b3c34e | 2014-06-11 12:00:29 -0700 | [diff] [blame] | 540 | |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame] | 541 | kparams.out = mtls->fep.outPtr + |
| 542 | (mtls->fep.outStride.yStride * offset) + |
| 543 | (mtls->fep.outStride.eStride * mtls->xStart); |
Chris Wailes | 4b3c34e | 2014-06-11 12:00:29 -0700 | [diff] [blame] | 544 | |
Chris Wailes | 80ef693 | 2014-07-08 11:22:18 -0700 | [diff] [blame] | 545 | for (int inIndex = inLen; --inIndex >= 0;) { |
| 546 | StridePair &strides = mtls->fep.inStrides[inIndex]; |
| 547 | |
| 548 | kparams.ins[inIndex] = |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame] | 549 | mtls->fep.inPtrs[inIndex] + |
Chris Wailes | 80ef693 | 2014-07-08 11:22:18 -0700 | [diff] [blame] | 550 | (strides.yStride * offset) + |
| 551 | (strides.eStride * mtls->xStart); |
Chris Wailes | 4b3c34e | 2014-06-11 12:00:29 -0700 | [diff] [blame] | 552 | } |
| 553 | |
Chris Wailes | 9ed7910 | 2014-07-25 15:53:28 -0700 | [diff] [blame] | 554 | fn(&kparams, mtls->xStart, mtls->xEnd, |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame] | 555 | mtls->fep.outStride.eStride); |
Chris Wailes | 4b3c34e | 2014-06-11 12:00:29 -0700 | [diff] [blame] | 556 | } |
| 557 | } |
| 558 | } |
Chris Wailes | 4b3c34e | 2014-06-11 12:00:29 -0700 | [diff] [blame] | 559 | } |
| 560 | } |
| 561 | |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 562 | RsdCpuScriptImpl * RsdCpuReferenceImpl::setTLS(RsdCpuScriptImpl *sc) { |
| 563 | //ALOGE("setTls %p", sc); |
| 564 | ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(gThreadTLSKey); |
| 565 | rsAssert(tls); |
| 566 | RsdCpuScriptImpl *old = tls->mImpl; |
| 567 | tls->mImpl = sc; |
| 568 | tls->mContext = mRSC; |
| 569 | if (sc) { |
| 570 | tls->mScript = sc->getScript(); |
| 571 | } else { |
Chris Wailes | 44bef6f | 2014-08-12 13:51:10 -0700 | [diff] [blame] | 572 | tls->mScript = nullptr; |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 573 | } |
| 574 | return old; |
| 575 | } |
| 576 | |
| 577 | const RsdCpuReference::CpuSymbol * RsdCpuReferenceImpl::symLookup(const char *name) { |
| 578 | return mSymLookupFn(mRSC, name); |
| 579 | } |
| 580 | |
| 581 | |
| 582 | RsdCpuReference::CpuScript * RsdCpuReferenceImpl::createScript(const ScriptC *s, |
| 583 | char const *resName, char const *cacheDir, |
| 584 | uint8_t const *bitcode, size_t bitcodeSize, |
| 585 | uint32_t flags) { |
| 586 | |
| 587 | RsdCpuScriptImpl *i = new RsdCpuScriptImpl(this, s); |
Stephen Hines | 0051132 | 2014-01-31 11:20:23 -0800 | [diff] [blame] | 588 | if (!i->init(resName, cacheDir, bitcode, bitcodeSize, flags |
| 589 | #ifndef RS_COMPATIBILITY_LIB |
| 590 | , getBccPluginName() |
| 591 | #endif |
| 592 | )) { |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 593 | delete i; |
Chris Wailes | 44bef6f | 2014-08-12 13:51:10 -0700 | [diff] [blame] | 594 | return nullptr; |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 595 | } |
| 596 | return i; |
| 597 | } |
| 598 | |
Jason Sams | 7c4b888 | 2013-01-04 10:50:05 -0800 | [diff] [blame] | 599 | extern RsdCpuScriptImpl * rsdIntrinsic_3DLUT(RsdCpuReferenceImpl *ctx, |
| 600 | const Script *s, const Element *e); |
Jason Sams | c905efd | 2012-11-26 15:20:18 -0800 | [diff] [blame] | 601 | extern RsdCpuScriptImpl * rsdIntrinsic_Convolve3x3(RsdCpuReferenceImpl *ctx, |
| 602 | const Script *s, const Element *e); |
| 603 | extern RsdCpuScriptImpl * rsdIntrinsic_ColorMatrix(RsdCpuReferenceImpl *ctx, |
| 604 | const Script *s, const Element *e); |
| 605 | extern RsdCpuScriptImpl * rsdIntrinsic_LUT(RsdCpuReferenceImpl *ctx, |
| 606 | const Script *s, const Element *e); |
| 607 | extern RsdCpuScriptImpl * rsdIntrinsic_Convolve5x5(RsdCpuReferenceImpl *ctx, |
| 608 | const Script *s, const Element *e); |
| 609 | extern RsdCpuScriptImpl * rsdIntrinsic_Blur(RsdCpuReferenceImpl *ctx, |
| 610 | const Script *s, const Element *e); |
| 611 | extern RsdCpuScriptImpl * rsdIntrinsic_YuvToRGB(RsdCpuReferenceImpl *ctx, |
| 612 | const Script *s, const Element *e); |
| 613 | extern RsdCpuScriptImpl * rsdIntrinsic_Blend(RsdCpuReferenceImpl *ctx, |
| 614 | const Script *s, const Element *e); |
Jason Sams | 2282e28 | 2013-06-17 16:52:01 -0700 | [diff] [blame] | 615 | extern RsdCpuScriptImpl * rsdIntrinsic_Histogram(RsdCpuReferenceImpl *ctx, |
| 616 | const Script *s, const Element *e); |
Jason Sams | 39ab94a | 2014-04-16 17:14:05 -0700 | [diff] [blame] | 617 | extern RsdCpuScriptImpl * rsdIntrinsic_Resize(RsdCpuReferenceImpl *ctx, |
| 618 | const Script *s, const Element *e); |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 619 | |
| 620 | RsdCpuReference::CpuScript * RsdCpuReferenceImpl::createIntrinsic(const Script *s, |
| 621 | RsScriptIntrinsicID iid, Element *e) { |
| 622 | |
Chris Wailes | 44bef6f | 2014-08-12 13:51:10 -0700 | [diff] [blame] | 623 | RsdCpuScriptImpl *i = nullptr; |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 624 | switch (iid) { |
Jason Sams | 7c4b888 | 2013-01-04 10:50:05 -0800 | [diff] [blame] | 625 | case RS_SCRIPT_INTRINSIC_ID_3DLUT: |
| 626 | i = rsdIntrinsic_3DLUT(this, s, e); |
| 627 | break; |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 628 | case RS_SCRIPT_INTRINSIC_ID_CONVOLVE_3x3: |
Jason Sams | c905efd | 2012-11-26 15:20:18 -0800 | [diff] [blame] | 629 | i = rsdIntrinsic_Convolve3x3(this, s, e); |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 630 | break; |
| 631 | case RS_SCRIPT_INTRINSIC_ID_COLOR_MATRIX: |
Jason Sams | c905efd | 2012-11-26 15:20:18 -0800 | [diff] [blame] | 632 | i = rsdIntrinsic_ColorMatrix(this, s, e); |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 633 | break; |
| 634 | case RS_SCRIPT_INTRINSIC_ID_LUT: |
Jason Sams | c905efd | 2012-11-26 15:20:18 -0800 | [diff] [blame] | 635 | i = rsdIntrinsic_LUT(this, s, e); |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 636 | break; |
| 637 | case RS_SCRIPT_INTRINSIC_ID_CONVOLVE_5x5: |
Jason Sams | c905efd | 2012-11-26 15:20:18 -0800 | [diff] [blame] | 638 | i = rsdIntrinsic_Convolve5x5(this, s, e); |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 639 | break; |
| 640 | case RS_SCRIPT_INTRINSIC_ID_BLUR: |
Jason Sams | c905efd | 2012-11-26 15:20:18 -0800 | [diff] [blame] | 641 | i = rsdIntrinsic_Blur(this, s, e); |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 642 | break; |
| 643 | case RS_SCRIPT_INTRINSIC_ID_YUV_TO_RGB: |
Jason Sams | c905efd | 2012-11-26 15:20:18 -0800 | [diff] [blame] | 644 | i = rsdIntrinsic_YuvToRGB(this, s, e); |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 645 | break; |
| 646 | case RS_SCRIPT_INTRINSIC_ID_BLEND: |
Jason Sams | c905efd | 2012-11-26 15:20:18 -0800 | [diff] [blame] | 647 | i = rsdIntrinsic_Blend(this, s, e); |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 648 | break; |
Jason Sams | 2282e28 | 2013-06-17 16:52:01 -0700 | [diff] [blame] | 649 | case RS_SCRIPT_INTRINSIC_ID_HISTOGRAM: |
| 650 | i = rsdIntrinsic_Histogram(this, s, e); |
| 651 | break; |
Jason Sams | 39ab94a | 2014-04-16 17:14:05 -0700 | [diff] [blame] | 652 | case RS_SCRIPT_INTRINSIC_ID_RESIZE: |
| 653 | i = rsdIntrinsic_Resize(this, s, e); |
| 654 | break; |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 655 | |
| 656 | default: |
| 657 | rsAssert(0); |
| 658 | } |
| 659 | |
| 660 | return i; |
| 661 | } |
| 662 | |
| 663 | RsdCpuReference::CpuScriptGroup * RsdCpuReferenceImpl::createScriptGroup(const ScriptGroup *sg) { |
| 664 | CpuScriptGroupImpl *sgi = new CpuScriptGroupImpl(this, sg); |
| 665 | if (!sgi->init()) { |
| 666 | delete sgi; |
Chris Wailes | 44bef6f | 2014-08-12 13:51:10 -0700 | [diff] [blame] | 667 | return nullptr; |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 668 | } |
| 669 | return sgi; |
| 670 | } |