blob: 2492c22e983bf9ea82850edf5aaa42f34f70f3de [file] [log] [blame]
Jason Sams709a0972012-11-15 18:18:04 -08001/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "rsCpuCore.h"
18#include "rsCpuScript.h"
19#include "rsCpuScriptGroup.h"
Yang Ni1ffd86b2015-01-07 09:16:40 -080020#include "rsCpuScriptGroup2.h"
Jason Sams709a0972012-11-15 18:18:04 -080021
22#include <malloc.h>
23#include "rsContext.h"
24
25#include <sys/types.h>
26#include <sys/resource.h>
27#include <sched.h>
Jason Sams709a0972012-11-15 18:18:04 -080028#include <sys/syscall.h>
29#include <string.h>
Stephen Hinesb0934b62013-07-03 17:27:38 -070030#include <unistd.h>
Tim Murray0b575de2013-03-15 15:56:43 -070031
Jason Samsf5ef8df2013-08-06 13:49:25 -070032#include <stdio.h>
33#include <stdlib.h>
34#include <fcntl.h>
35
Stephen Hinesb0934b62013-07-03 17:27:38 -070036#if !defined(RS_SERVER) && !defined(RS_COMPATIBILITY_LIB)
Tim Murray0b575de2013-03-15 15:56:43 -070037#include <cutils/properties.h>
Jason Sams709a0972012-11-15 18:18:04 -080038#include "utils/StopWatch.h"
Tim Murray0b575de2013-03-15 15:56:43 -070039#endif
40
41#ifdef RS_SERVER
42// Android exposes gettid(), standard Linux does not
43static pid_t gettid() {
44 return syscall(SYS_gettid);
45}
46#endif
Jason Sams709a0972012-11-15 18:18:04 -080047
48using namespace android;
49using namespace android::renderscript;
50
51typedef void (*outer_foreach_t)(
Chris Wailes80ef6932014-07-08 11:22:18 -070052 const android::renderscript::RsExpandKernelParams *,
Chris Wailes9ed79102014-07-25 15:53:28 -070053 uint32_t x1, uint32_t x2, uint32_t outstep);
Jason Sams709a0972012-11-15 18:18:04 -080054
55
56static pthread_key_t gThreadTLSKey = 0;
57static uint32_t gThreadTLSKeyCount = 0;
58static pthread_mutex_t gInitMutex = PTHREAD_MUTEX_INITIALIZER;
59
Jason Samsf5ef8df2013-08-06 13:49:25 -070060bool android::renderscript::gArchUseSIMD = false;
61
Jason Sams709a0972012-11-15 18:18:04 -080062RsdCpuReference::~RsdCpuReference() {
63}
64
65RsdCpuReference * RsdCpuReference::create(Context *rsc, uint32_t version_major,
Jason Samscadfac42013-03-06 18:09:08 -080066 uint32_t version_minor, sym_lookup_t lfn, script_lookup_t slfn
Stephen Hines1d476622013-03-29 22:08:49 -070067 , bcc::RSLinkRuntimeCallback pLinkRuntimeCallback,
Stephen Hines00511322014-01-31 11:20:23 -080068 RSSelectRTCallback pSelectRTCallback,
69 const char *pBccPluginName
Jason Samscadfac42013-03-06 18:09:08 -080070 ) {
Jason Sams709a0972012-11-15 18:18:04 -080071
72 RsdCpuReferenceImpl *cpu = new RsdCpuReferenceImpl(rsc);
73 if (!cpu) {
Chris Wailes44bef6f2014-08-12 13:51:10 -070074 return nullptr;
Jason Sams709a0972012-11-15 18:18:04 -080075 }
76 if (!cpu->init(version_major, version_minor, lfn, slfn)) {
77 delete cpu;
Chris Wailes44bef6f2014-08-12 13:51:10 -070078 return nullptr;
Jason Sams709a0972012-11-15 18:18:04 -080079 }
Stephen Hinesf218bf12013-02-12 19:32:38 -080080
81 cpu->setLinkRuntimeCallback(pLinkRuntimeCallback);
Stephen Hines1d476622013-03-29 22:08:49 -070082 cpu->setSelectRTCallback(pSelectRTCallback);
Stephen Hines00511322014-01-31 11:20:23 -080083 if (pBccPluginName) {
84 cpu->setBccPluginName(pBccPluginName);
85 }
Stephen Hinesf218bf12013-02-12 19:32:38 -080086
Jason Sams709a0972012-11-15 18:18:04 -080087 return cpu;
88}
89
90
91Context * RsdCpuReference::getTlsContext() {
92 ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(gThreadTLSKey);
93 return tls->mContext;
94}
95
96const Script * RsdCpuReference::getTlsScript() {
97 ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(gThreadTLSKey);
98 return tls->mScript;
99}
100
Stephen Hinesf218bf12013-02-12 19:32:38 -0800101pthread_key_t RsdCpuReference::getThreadTLSKey(){ return gThreadTLSKey; }
Jason Sams709a0972012-11-15 18:18:04 -0800102
103////////////////////////////////////////////////////////////
104///
105
106RsdCpuReferenceImpl::RsdCpuReferenceImpl(Context *rsc) {
107 mRSC = rsc;
108
109 version_major = 0;
110 version_minor = 0;
111 mInForEach = false;
112 memset(&mWorkers, 0, sizeof(mWorkers));
113 memset(&mTlsStruct, 0, sizeof(mTlsStruct));
114 mExit = false;
Chris Wailes44bef6f2014-08-12 13:51:10 -0700115 mLinkRuntimeCallback = nullptr;
116 mSelectRTCallback = nullptr;
117 mSetupCompilerCallback = nullptr;
Jason Sams709a0972012-11-15 18:18:04 -0800118}
119
120
121void * RsdCpuReferenceImpl::helperThreadProc(void *vrsc) {
122 RsdCpuReferenceImpl *dc = (RsdCpuReferenceImpl *)vrsc;
123
Tim Murray0b575de2013-03-15 15:56:43 -0700124 uint32_t idx = __sync_fetch_and_add(&dc->mWorkers.mLaunchCount, 1);
Jason Sams709a0972012-11-15 18:18:04 -0800125
126 //ALOGV("RS helperThread starting %p idx=%i", dc, idx);
127
128 dc->mWorkers.mLaunchSignals[idx].init();
129 dc->mWorkers.mNativeThreadId[idx] = gettid();
130
131 memset(&dc->mTlsStruct, 0, sizeof(dc->mTlsStruct));
132 int status = pthread_setspecific(gThreadTLSKey, &dc->mTlsStruct);
133 if (status) {
134 ALOGE("pthread_setspecific %i", status);
135 }
136
137#if 0
138 typedef struct {uint64_t bits[1024 / 64]; } cpu_set_t;
139 cpu_set_t cpuset;
140 memset(&cpuset, 0, sizeof(cpuset));
141 cpuset.bits[idx / 64] |= 1ULL << (idx % 64);
142 int ret = syscall(241, rsc->mWorkers.mNativeThreadId[idx],
143 sizeof(cpuset), &cpuset);
144 ALOGE("SETAFFINITY ret = %i %s", ret, EGLUtils::strerror(ret));
145#endif
146
147 while (!dc->mExit) {
148 dc->mWorkers.mLaunchSignals[idx].wait();
149 if (dc->mWorkers.mLaunchCallback) {
150 // idx +1 is used because the calling thread is always worker 0.
151 dc->mWorkers.mLaunchCallback(dc->mWorkers.mLaunchData, idx+1);
152 }
Tim Murray0b575de2013-03-15 15:56:43 -0700153 __sync_fetch_and_sub(&dc->mWorkers.mRunningCount, 1);
Jason Sams709a0972012-11-15 18:18:04 -0800154 dc->mWorkers.mCompleteSignal.set();
155 }
156
157 //ALOGV("RS helperThread exited %p idx=%i", dc, idx);
Chris Wailes44bef6f2014-08-12 13:51:10 -0700158 return nullptr;
Jason Sams709a0972012-11-15 18:18:04 -0800159}
160
161void RsdCpuReferenceImpl::launchThreads(WorkerCallback_t cbk, void *data) {
162 mWorkers.mLaunchData = data;
163 mWorkers.mLaunchCallback = cbk;
Tim Murray4d252d62012-11-29 14:37:59 -0800164
165 // fast path for very small launches
166 MTLaunchStruct *mtls = (MTLaunchStruct *)data;
Jason Samsbf2111d2015-01-26 18:13:41 -0800167 if (mtls && mtls->fep.dim.y <= 1 && mtls->end.x <= mtls->start.x + mtls->mSliceSize) {
Tim Murray4d252d62012-11-29 14:37:59 -0800168 if (mWorkers.mLaunchCallback) {
169 mWorkers.mLaunchCallback(mWorkers.mLaunchData, 0);
170 }
171 return;
172 }
173
Tim Murray0b575de2013-03-15 15:56:43 -0700174 mWorkers.mRunningCount = mWorkers.mCount;
175 __sync_synchronize();
176
Jason Sams709a0972012-11-15 18:18:04 -0800177 for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) {
178 mWorkers.mLaunchSignals[ct].set();
179 }
180
181 // We use the calling thread as one of the workers so we can start without
182 // the delay of the thread wakeup.
183 if (mWorkers.mLaunchCallback) {
Tim Murray4d252d62012-11-29 14:37:59 -0800184 mWorkers.mLaunchCallback(mWorkers.mLaunchData, 0);
Jason Sams709a0972012-11-15 18:18:04 -0800185 }
186
Tim Murray0b575de2013-03-15 15:56:43 -0700187 while (__sync_fetch_and_or(&mWorkers.mRunningCount, 0) != 0) {
Jason Sams709a0972012-11-15 18:18:04 -0800188 mWorkers.mCompleteSignal.wait();
189 }
190}
191
192
193void RsdCpuReferenceImpl::lockMutex() {
194 pthread_mutex_lock(&gInitMutex);
195}
196
197void RsdCpuReferenceImpl::unlockMutex() {
198 pthread_mutex_unlock(&gInitMutex);
199}
200
Jason Samsf5ef8df2013-08-06 13:49:25 -0700201static int
202read_file(const char* pathname, char* buffer, size_t buffsize)
203{
204 int fd, len;
205
206 fd = open(pathname, O_RDONLY);
207 if (fd < 0)
208 return -1;
209
210 do {
211 len = read(fd, buffer, buffsize);
212 } while (len < 0 && errno == EINTR);
213
214 close(fd);
215
216 return len;
217}
218
219static void GetCpuInfo() {
220 char cpuinfo[4096];
221 int cpuinfo_len;
222
223 cpuinfo_len = read_file("/proc/cpuinfo", cpuinfo, sizeof cpuinfo);
224 if (cpuinfo_len < 0) /* should not happen */ {
225 return;
226 }
227
Jason Sams074424a2014-05-22 13:30:03 -0700228#if defined(ARCH_ARM_HAVE_VFP) || defined(ARCH_ARM_USE_INTRINSICS)
229 gArchUseSIMD = (!!strstr(cpuinfo, " neon")) ||
230 (!!strstr(cpuinfo, " asimd"));
Rose, James7b7060c2014-04-22 12:08:06 +0800231#elif defined(ARCH_X86_HAVE_SSSE3)
232 gArchUseSIMD = !!strstr(cpuinfo, " ssse3");
233#endif
Jason Samsf5ef8df2013-08-06 13:49:25 -0700234}
Jason Samsf5ef8df2013-08-06 13:49:25 -0700235
Jason Sams709a0972012-11-15 18:18:04 -0800236bool RsdCpuReferenceImpl::init(uint32_t version_major, uint32_t version_minor,
237 sym_lookup_t lfn, script_lookup_t slfn) {
238
239 mSymLookupFn = lfn;
240 mScriptLookupFn = slfn;
241
242 lockMutex();
243 if (!gThreadTLSKeyCount) {
Chris Wailes44bef6f2014-08-12 13:51:10 -0700244 int status = pthread_key_create(&gThreadTLSKey, nullptr);
Jason Sams709a0972012-11-15 18:18:04 -0800245 if (status) {
246 ALOGE("Failed to init thread tls key.");
247 unlockMutex();
248 return false;
249 }
250 }
251 gThreadTLSKeyCount++;
252 unlockMutex();
253
254 mTlsStruct.mContext = mRSC;
Chris Wailes44bef6f2014-08-12 13:51:10 -0700255 mTlsStruct.mScript = nullptr;
Jason Sams709a0972012-11-15 18:18:04 -0800256 int status = pthread_setspecific(gThreadTLSKey, &mTlsStruct);
257 if (status) {
258 ALOGE("pthread_setspecific %i", status);
259 }
260
Jason Samsf5ef8df2013-08-06 13:49:25 -0700261 GetCpuInfo();
Jason Samsf5ef8df2013-08-06 13:49:25 -0700262
Jason Sams77d57a32014-10-23 17:43:53 -0700263 int cpu = sysconf(_SC_NPROCESSORS_CONF);
Jason Sams709a0972012-11-15 18:18:04 -0800264 if(mRSC->props.mDebugMaxThreads) {
265 cpu = mRSC->props.mDebugMaxThreads;
266 }
267 if (cpu < 2) {
268 mWorkers.mCount = 0;
269 return true;
270 }
271
272 // Subtract one from the cpu count because we also use the command thread as a worker.
273 mWorkers.mCount = (uint32_t)(cpu - 1);
274
Jason Sams8ca358a2013-03-19 13:59:40 -0700275 ALOGV("%p Launching thread(s), CPUs %i", mRSC, mWorkers.mCount + 1);
Jason Sams709a0972012-11-15 18:18:04 -0800276
277 mWorkers.mThreadId = (pthread_t *) calloc(mWorkers.mCount, sizeof(pthread_t));
278 mWorkers.mNativeThreadId = (pid_t *) calloc(mWorkers.mCount, sizeof(pid_t));
279 mWorkers.mLaunchSignals = new Signal[mWorkers.mCount];
Chris Wailes44bef6f2014-08-12 13:51:10 -0700280 mWorkers.mLaunchCallback = nullptr;
Jason Sams709a0972012-11-15 18:18:04 -0800281
282 mWorkers.mCompleteSignal.init();
283
Tim Murray0b575de2013-03-15 15:56:43 -0700284 mWorkers.mRunningCount = mWorkers.mCount;
285 mWorkers.mLaunchCount = 0;
286 __sync_synchronize();
Jason Sams709a0972012-11-15 18:18:04 -0800287
288 pthread_attr_t threadAttr;
289 status = pthread_attr_init(&threadAttr);
290 if (status) {
291 ALOGE("Failed to init thread attribute.");
292 return false;
293 }
294
295 for (uint32_t ct=0; ct < mWorkers.mCount; ct++) {
296 status = pthread_create(&mWorkers.mThreadId[ct], &threadAttr, helperThreadProc, this);
297 if (status) {
298 mWorkers.mCount = ct;
299 ALOGE("Created fewer than expected number of RS threads.");
300 break;
301 }
302 }
Tim Murray0b575de2013-03-15 15:56:43 -0700303 while (__sync_fetch_and_or(&mWorkers.mRunningCount, 0) != 0) {
Jason Sams709a0972012-11-15 18:18:04 -0800304 usleep(100);
305 }
306
307 pthread_attr_destroy(&threadAttr);
308 return true;
309}
310
311
312void RsdCpuReferenceImpl::setPriority(int32_t priority) {
313 for (uint32_t ct=0; ct < mWorkers.mCount; ct++) {
314 setpriority(PRIO_PROCESS, mWorkers.mNativeThreadId[ct], priority);
315 }
316}
317
318RsdCpuReferenceImpl::~RsdCpuReferenceImpl() {
319 mExit = true;
Chris Wailes44bef6f2014-08-12 13:51:10 -0700320 mWorkers.mLaunchData = nullptr;
321 mWorkers.mLaunchCallback = nullptr;
Tim Murray0b575de2013-03-15 15:56:43 -0700322 mWorkers.mRunningCount = mWorkers.mCount;
323 __sync_synchronize();
Jason Sams709a0972012-11-15 18:18:04 -0800324 for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) {
325 mWorkers.mLaunchSignals[ct].set();
326 }
327 void *res;
328 for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) {
329 pthread_join(mWorkers.mThreadId[ct], &res);
330 }
Tim Murray0b575de2013-03-15 15:56:43 -0700331 rsAssert(__sync_fetch_and_or(&mWorkers.mRunningCount, 0) == 0);
Jens Gulin07ef7042014-02-19 18:16:01 +0100332 free(mWorkers.mThreadId);
333 free(mWorkers.mNativeThreadId);
334 delete[] mWorkers.mLaunchSignals;
Jason Sams709a0972012-11-15 18:18:04 -0800335
336 // Global structure cleanup.
337 lockMutex();
338 --gThreadTLSKeyCount;
339 if (!gThreadTLSKeyCount) {
340 pthread_key_delete(gThreadTLSKey);
341 }
342 unlockMutex();
343
344}
345
346typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
Jason Samsc0d68472015-01-20 14:29:52 -0800347typedef void (*walk_loop_t)(const MTLaunchStruct*,
348 RsExpandKernelDriverInfo,
Chris Wailesf3712132014-07-16 15:18:30 -0700349 outer_foreach_t);
Jason Sams709a0972012-11-15 18:18:04 -0800350
Jason Samsc0d68472015-01-20 14:29:52 -0800351static void kparamSetup(RsExpandKernelParams *kparams, const RsExpandKernelDriverInfo *fep) {
352 //ALOGE("kp usr %p", fep->usr);
353 //ALOGE("kp slot %i", fep->slot);
354 //ALOGE("kp dim %i %i %i", fep->dim.x, fep->dim.y, fep->dim.z);
355 //ALOGE("kp lid %i", fep->lid);
356 //ALOGE("kp in[0] stide %i ptr %p", fep->inStride[0], fep->inPtr[0]);
357 //ALOGE("kp out[0] ptr %p", fep->outPtr[0]);
358 //ALOGE("kp loc %i %i %i", fep->current.x, fep->current.y, fep->current.z);
Chris Wailesf3712132014-07-16 15:18:30 -0700359
Jason Samsc0d68472015-01-20 14:29:52 -0800360 kparams->usr = fep->usr;
361 kparams->slot = fep->slot;
362 kparams->dimX = fep->dim.x;
363 kparams->dimY = fep->dim.y;
364 kparams->dimZ = fep->dim.z;
365 kparams->lid = fep->lid;
366 kparams->inEStrides = (uint32_t *)&fep->inStride[0];
367 kparams->ins = (const void **)&fep->inPtr[0];
368 kparams->out = fep->outPtr[0];
369 kparams->y = fep->current.y;
370 kparams->z = fep->current.z;
Jason Sams709a0972012-11-15 18:18:04 -0800371}
372
Jason Samsbf2111d2015-01-26 18:13:41 -0800373static inline void FepPtrSetup(const MTLaunchStruct *mtls, RsExpandKernelDriverInfo *fep,
Jason Samsc0d68472015-01-20 14:29:52 -0800374 uint32_t x, uint32_t y,
375 uint32_t z = 0, uint32_t lod = 0,
376 RsAllocationCubemapFace face = RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X,
377 uint32_t a1 = 0, uint32_t a2 = 0, uint32_t a3 = 0, uint32_t a4 = 0) {
378
379 for (uint32_t i = 0; i < fep->inLen; i++) {
380 fep->inPtr[i] = (const uint8_t *)mtls->ains[i]->getPointerUnchecked(x, y, z, lod, face, a1, a2, a3, a4);
381 }
382
383 if (mtls->aout[0] != nullptr) {
384 fep->outPtr[0] = (uint8_t *)mtls->aout[0]->getPointerUnchecked(x, y, z, lod, face, a1, a2, a3, a4);
385 }
386}
387
Jason Samsbf2111d2015-01-26 18:13:41 -0800388static uint32_t sliceInt(uint32_t *p, uint32_t val, uint32_t start, uint32_t end) {
389 if (start >= end) {
390 *p = start;
391 return val;
392 }
393
394 uint32_t div = end - start;
395
396 uint32_t n = val / div;
397 *p = (val - (n * div)) + start;
398 return n;
399}
400
401static bool SelectOuterSlice(MTLaunchStruct* mtls, uint32_t sliceNum) {
402
403 uint32_t r = sliceNum;
404 r = sliceInt(&mtls->fep.current.z, r, mtls->start.z, mtls->end.z);
405 r = sliceInt(&mtls->fep.current.lod, r, mtls->start.lod, mtls->end.lod);
406 r = sliceInt(&mtls->fep.current.face, r, mtls->start.face, mtls->end.face);
407 r = sliceInt(&mtls->fep.current.array[0], r, mtls->start.array[0], mtls->end.array[0]);
408 r = sliceInt(&mtls->fep.current.array[1], r, mtls->start.array[1], mtls->end.array[1]);
409 r = sliceInt(&mtls->fep.current.array[2], r, mtls->start.array[2], mtls->end.array[2]);
410 r = sliceInt(&mtls->fep.current.array[3], r, mtls->start.array[3], mtls->end.array[3]);
411 return r == 0;
412}
413
414
415static void walk_general(void *usr, uint32_t idx) {
416 MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
417 RsExpandKernelDriverInfo fep = mtls->fep;
418 fep.lid = idx;
419 outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
420
421
422 while(1) {
423 uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
424
425 if (!SelectOuterSlice(mtls, slice)) {
426 return;
427 }
428
429 for (mtls->fep.current.y = mtls->start.y;
430 mtls->fep.current.y < mtls->end.y;
431 mtls->fep.current.y++) {
432
433 FepPtrSetup(mtls, &mtls->fep, mtls->start.x,
434 mtls->fep.current.y, mtls->fep.current.z, mtls->fep.current.lod,
435 (RsAllocationCubemapFace)mtls->fep.current.face,
436 mtls->fep.current.array[0], mtls->fep.current.array[1],
437 mtls->fep.current.array[2], mtls->fep.current.array[3]);
438
439 RsExpandKernelParams kparams;
440 kparamSetup(&kparams, &mtls->fep);
441 fn(&kparams, mtls->start.x, mtls->end.x, mtls->fep.outStride[0]);
442 }
443 }
444
445}
Jason Samsc0d68472015-01-20 14:29:52 -0800446
Chris Wailesf3712132014-07-16 15:18:30 -0700447static void walk_2d(void *usr, uint32_t idx) {
Jason Samsc0d68472015-01-20 14:29:52 -0800448 MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
449 RsExpandKernelDriverInfo fep = mtls->fep;
450 fep.lid = idx;
Jason Samsbf2111d2015-01-26 18:13:41 -0800451 outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
Chris Wailes4b3c34e2014-06-11 12:00:29 -0700452
Jason Samsc0d68472015-01-20 14:29:52 -0800453 while (1) {
454 uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
Jason Samsbf2111d2015-01-26 18:13:41 -0800455 uint32_t yStart = mtls->start.y + slice * mtls->mSliceSize;
Jason Samsc0d68472015-01-20 14:29:52 -0800456 uint32_t yEnd = yStart + mtls->mSliceSize;
Chris Wailes4b3c34e2014-06-11 12:00:29 -0700457
Jason Samsbf2111d2015-01-26 18:13:41 -0800458 yEnd = rsMin(yEnd, mtls->end.y);
Chris Wailes4b3c34e2014-06-11 12:00:29 -0700459
Jason Samsc0d68472015-01-20 14:29:52 -0800460 if (yEnd <= yStart) {
461 return;
Stephen Hines4b2bea32014-08-13 17:32:10 +0000462 }
Jason Samsc0d68472015-01-20 14:29:52 -0800463
464 for (fep.current.y = yStart; fep.current.y < yEnd; fep.current.y++) {
Jason Samsbf2111d2015-01-26 18:13:41 -0800465 FepPtrSetup(mtls, &fep, mtls->start.x, fep.current.y);
Jason Samsc0d68472015-01-20 14:29:52 -0800466
467 RsExpandKernelParams kparams;
468 kparamSetup(&kparams, &fep);
469
Jason Samsbf2111d2015-01-26 18:13:41 -0800470 fn(&kparams, mtls->start.x, mtls->end.x, fep.outStride[0]);
Jason Samsc0d68472015-01-20 14:29:52 -0800471 }
472 }
Stephen Hines4b2bea32014-08-13 17:32:10 +0000473}
474
Chris Wailesf3712132014-07-16 15:18:30 -0700475static void walk_1d(void *usr, uint32_t idx) {
Jason Samsc0d68472015-01-20 14:29:52 -0800476 MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
477 RsExpandKernelDriverInfo fep = mtls->fep;
478 fep.lid = idx;
Jason Samsbf2111d2015-01-26 18:13:41 -0800479 outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
Chris Wailesf3712132014-07-16 15:18:30 -0700480
Jason Samsc0d68472015-01-20 14:29:52 -0800481 while (1) {
482 uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
Jason Samsbf2111d2015-01-26 18:13:41 -0800483 uint32_t xStart = mtls->start.x + slice * mtls->mSliceSize;
Jason Samsc0d68472015-01-20 14:29:52 -0800484 uint32_t xEnd = xStart + mtls->mSliceSize;
Chris Wailesf3712132014-07-16 15:18:30 -0700485
Jason Samsbf2111d2015-01-26 18:13:41 -0800486 xEnd = rsMin(xEnd, mtls->end.x);
Chris Wailesf3712132014-07-16 15:18:30 -0700487
Jason Samsc0d68472015-01-20 14:29:52 -0800488 if (xEnd <= xStart) {
489 return;
Chris Wailesf3712132014-07-16 15:18:30 -0700490 }
Jason Samsc0d68472015-01-20 14:29:52 -0800491
Jason Samsbf2111d2015-01-26 18:13:41 -0800492 FepPtrSetup(mtls, &fep, xStart, 0);
Jason Samsc0d68472015-01-20 14:29:52 -0800493
494 RsExpandKernelParams kparams;
495 kparamSetup(&kparams, &fep);
496
Jason Samsc0d68472015-01-20 14:29:52 -0800497 fn(&kparams, xStart, xEnd, fep.outStride[0]);
498 }
Chris Wailesf3712132014-07-16 15:18:30 -0700499}
500
Chris Wailesf3712132014-07-16 15:18:30 -0700501void RsdCpuReferenceImpl::launchThreads(const Allocation ** ains,
502 uint32_t inLen,
503 Allocation* aout,
504 const RsScriptCall* sc,
505 MTLaunchStruct* mtls) {
Stephen Hines4b2bea32014-08-13 17:32:10 +0000506
507 //android::StopWatch kernel_time("kernel time");
508
Jason Samsbf2111d2015-01-26 18:13:41 -0800509 bool outerDims = (mtls->start.z != mtls->end.z) ||
510 (mtls->start.face != mtls->end.face) ||
511 (mtls->start.lod != mtls->end.lod) ||
512 (mtls->start.array[0] != mtls->end.array[0]) ||
513 (mtls->start.array[1] != mtls->end.array[1]) ||
514 (mtls->start.array[2] != mtls->end.array[2]) ||
515 (mtls->start.array[3] != mtls->end.array[3]);
516
Stephen Hines4b2bea32014-08-13 17:32:10 +0000517 if ((mWorkers.mCount >= 1) && mtls->isThreadable && !mInForEach) {
518 const size_t targetByteChunk = 16 * 1024;
519 mInForEach = true;
Chris Wailesf3712132014-07-16 15:18:30 -0700520
Jason Samsbf2111d2015-01-26 18:13:41 -0800521 if (outerDims) {
522 // No fancy logic for chunk size
523 mtls->mSliceSize = 1;
524 launchThreads(walk_general, mtls);
525 } else if (mtls->fep.dim.y > 1) {
Jason Samsc0d68472015-01-20 14:29:52 -0800526 uint32_t s1 = mtls->fep.dim.y / ((mWorkers.mCount + 1) * 4);
Stephen Hines4b2bea32014-08-13 17:32:10 +0000527 uint32_t s2 = 0;
528
529 // This chooses our slice size to rate limit atomic ops to
530 // one per 16k bytes of reads/writes.
Jason Samsc0d68472015-01-20 14:29:52 -0800531 if ((mtls->aout[0] != nullptr) && mtls->aout[0]->mHal.drvState.lod[0].stride) {
532 s2 = targetByteChunk / mtls->aout[0]->mHal.drvState.lod[0].stride;
Stephen Hines4b2bea32014-08-13 17:32:10 +0000533 } else {
Chris Wailesf3712132014-07-16 15:18:30 -0700534 // We know that there is either an output or an input.
Jason Samsc0d68472015-01-20 14:29:52 -0800535 s2 = targetByteChunk / mtls->ains[0]->mHal.drvState.lod[0].stride;
Stephen Hines4b2bea32014-08-13 17:32:10 +0000536 }
537 mtls->mSliceSize = rsMin(s1, s2);
538
539 if(mtls->mSliceSize < 1) {
540 mtls->mSliceSize = 1;
541 }
542
Chris Wailesf3712132014-07-16 15:18:30 -0700543 launchThreads(walk_2d, mtls);
Stephen Hines4b2bea32014-08-13 17:32:10 +0000544 } else {
Jason Samsc0d68472015-01-20 14:29:52 -0800545 uint32_t s1 = mtls->fep.dim.x / ((mWorkers.mCount + 1) * 4);
Stephen Hines4b2bea32014-08-13 17:32:10 +0000546 uint32_t s2 = 0;
547
548 // This chooses our slice size to rate limit atomic ops to
549 // one per 16k bytes of reads/writes.
Jason Samsc0d68472015-01-20 14:29:52 -0800550 if ((mtls->aout[0] != nullptr) && mtls->aout[0]->getType()->getElementSizeBytes()) {
551 s2 = targetByteChunk / mtls->aout[0]->getType()->getElementSizeBytes();
Stephen Hines4b2bea32014-08-13 17:32:10 +0000552 } else {
Chris Wailesf3712132014-07-16 15:18:30 -0700553 // We know that there is either an output or an input.
Jason Samsc0d68472015-01-20 14:29:52 -0800554 s2 = targetByteChunk / mtls->ains[0]->getType()->getElementSizeBytes();
Chris Wailes4b3c34e2014-06-11 12:00:29 -0700555 }
556 mtls->mSliceSize = rsMin(s1, s2);
557
558 if (mtls->mSliceSize < 1) {
559 mtls->mSliceSize = 1;
560 }
561
Chris Wailesf3712132014-07-16 15:18:30 -0700562 launchThreads(walk_1d, mtls);
Chris Wailes4b3c34e2014-06-11 12:00:29 -0700563 }
564 mInForEach = false;
565
Chris Wailes4b3c34e2014-06-11 12:00:29 -0700566 } else {
Chris Wailes4b3c34e2014-06-11 12:00:29 -0700567 outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
Jason Samsbf2111d2015-01-26 18:13:41 -0800568 uint32_t slice = 0;
Chris Wailes4b3c34e2014-06-11 12:00:29 -0700569
Chris Wailes4b3c34e2014-06-11 12:00:29 -0700570
Jason Samsbf2111d2015-01-26 18:13:41 -0800571 while(SelectOuterSlice(mtls, slice++)) {
572 for (mtls->fep.current.y = mtls->start.y;
573 mtls->fep.current.y < mtls->end.y;
574 mtls->fep.current.y++) {
Chris Wailes4b3c34e2014-06-11 12:00:29 -0700575
Jason Samsbf2111d2015-01-26 18:13:41 -0800576 FepPtrSetup(mtls, &mtls->fep, mtls->start.x,
577 mtls->fep.current.y, mtls->fep.current.z, mtls->fep.current.lod,
578 (RsAllocationCubemapFace) mtls->fep.current.face,
579 mtls->fep.current.array[0], mtls->fep.current.array[1],
580 mtls->fep.current.array[2], mtls->fep.current.array[3]);
Chris Wailes4b3c34e2014-06-11 12:00:29 -0700581
Jason Samsbf2111d2015-01-26 18:13:41 -0800582 RsExpandKernelParams kparams;
583 kparamSetup(&kparams, &mtls->fep);
584 fn(&kparams, mtls->start.x, mtls->end.x, mtls->fep.outStride[0]);
Chris Wailes4b3c34e2014-06-11 12:00:29 -0700585 }
586 }
Chris Wailes4b3c34e2014-06-11 12:00:29 -0700587 }
588}
589
Jason Sams709a0972012-11-15 18:18:04 -0800590RsdCpuScriptImpl * RsdCpuReferenceImpl::setTLS(RsdCpuScriptImpl *sc) {
591 //ALOGE("setTls %p", sc);
592 ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(gThreadTLSKey);
593 rsAssert(tls);
594 RsdCpuScriptImpl *old = tls->mImpl;
595 tls->mImpl = sc;
596 tls->mContext = mRSC;
597 if (sc) {
598 tls->mScript = sc->getScript();
599 } else {
Chris Wailes44bef6f2014-08-12 13:51:10 -0700600 tls->mScript = nullptr;
Jason Sams709a0972012-11-15 18:18:04 -0800601 }
602 return old;
603}
604
605const RsdCpuReference::CpuSymbol * RsdCpuReferenceImpl::symLookup(const char *name) {
606 return mSymLookupFn(mRSC, name);
607}
608
609
610RsdCpuReference::CpuScript * RsdCpuReferenceImpl::createScript(const ScriptC *s,
611 char const *resName, char const *cacheDir,
612 uint8_t const *bitcode, size_t bitcodeSize,
613 uint32_t flags) {
614
615 RsdCpuScriptImpl *i = new RsdCpuScriptImpl(this, s);
Stephen Hines00511322014-01-31 11:20:23 -0800616 if (!i->init(resName, cacheDir, bitcode, bitcodeSize, flags
Stephen Hines00511322014-01-31 11:20:23 -0800617 , getBccPluginName()
Stephen Hines00511322014-01-31 11:20:23 -0800618 )) {
Jason Sams709a0972012-11-15 18:18:04 -0800619 delete i;
Chris Wailes44bef6f2014-08-12 13:51:10 -0700620 return nullptr;
Jason Sams709a0972012-11-15 18:18:04 -0800621 }
622 return i;
623}
624
Jason Sams7c4b8882013-01-04 10:50:05 -0800625extern RsdCpuScriptImpl * rsdIntrinsic_3DLUT(RsdCpuReferenceImpl *ctx,
626 const Script *s, const Element *e);
Jason Samsc905efd2012-11-26 15:20:18 -0800627extern RsdCpuScriptImpl * rsdIntrinsic_Convolve3x3(RsdCpuReferenceImpl *ctx,
628 const Script *s, const Element *e);
629extern RsdCpuScriptImpl * rsdIntrinsic_ColorMatrix(RsdCpuReferenceImpl *ctx,
630 const Script *s, const Element *e);
631extern RsdCpuScriptImpl * rsdIntrinsic_LUT(RsdCpuReferenceImpl *ctx,
632 const Script *s, const Element *e);
633extern RsdCpuScriptImpl * rsdIntrinsic_Convolve5x5(RsdCpuReferenceImpl *ctx,
634 const Script *s, const Element *e);
635extern RsdCpuScriptImpl * rsdIntrinsic_Blur(RsdCpuReferenceImpl *ctx,
636 const Script *s, const Element *e);
637extern RsdCpuScriptImpl * rsdIntrinsic_YuvToRGB(RsdCpuReferenceImpl *ctx,
638 const Script *s, const Element *e);
639extern RsdCpuScriptImpl * rsdIntrinsic_Blend(RsdCpuReferenceImpl *ctx,
640 const Script *s, const Element *e);
Jason Sams2282e282013-06-17 16:52:01 -0700641extern RsdCpuScriptImpl * rsdIntrinsic_Histogram(RsdCpuReferenceImpl *ctx,
642 const Script *s, const Element *e);
Jason Sams39ab94a2014-04-16 17:14:05 -0700643extern RsdCpuScriptImpl * rsdIntrinsic_Resize(RsdCpuReferenceImpl *ctx,
644 const Script *s, const Element *e);
Tim Murray64c682b2015-01-09 12:08:43 -0800645extern RsdCpuScriptImpl * rsdIntrinsic_BLAS(RsdCpuReferenceImpl *ctx,
646 const Script *s, const Element *e);
Jason Sams709a0972012-11-15 18:18:04 -0800647
648RsdCpuReference::CpuScript * RsdCpuReferenceImpl::createIntrinsic(const Script *s,
649 RsScriptIntrinsicID iid, Element *e) {
650
Chris Wailes44bef6f2014-08-12 13:51:10 -0700651 RsdCpuScriptImpl *i = nullptr;
Jason Sams709a0972012-11-15 18:18:04 -0800652 switch (iid) {
Jason Sams7c4b8882013-01-04 10:50:05 -0800653 case RS_SCRIPT_INTRINSIC_ID_3DLUT:
654 i = rsdIntrinsic_3DLUT(this, s, e);
655 break;
Jason Sams709a0972012-11-15 18:18:04 -0800656 case RS_SCRIPT_INTRINSIC_ID_CONVOLVE_3x3:
Jason Samsc905efd2012-11-26 15:20:18 -0800657 i = rsdIntrinsic_Convolve3x3(this, s, e);
Jason Sams709a0972012-11-15 18:18:04 -0800658 break;
659 case RS_SCRIPT_INTRINSIC_ID_COLOR_MATRIX:
Jason Samsc905efd2012-11-26 15:20:18 -0800660 i = rsdIntrinsic_ColorMatrix(this, s, e);
Jason Sams709a0972012-11-15 18:18:04 -0800661 break;
662 case RS_SCRIPT_INTRINSIC_ID_LUT:
Jason Samsc905efd2012-11-26 15:20:18 -0800663 i = rsdIntrinsic_LUT(this, s, e);
Jason Sams709a0972012-11-15 18:18:04 -0800664 break;
665 case RS_SCRIPT_INTRINSIC_ID_CONVOLVE_5x5:
Jason Samsc905efd2012-11-26 15:20:18 -0800666 i = rsdIntrinsic_Convolve5x5(this, s, e);
Jason Sams709a0972012-11-15 18:18:04 -0800667 break;
668 case RS_SCRIPT_INTRINSIC_ID_BLUR:
Jason Samsc905efd2012-11-26 15:20:18 -0800669 i = rsdIntrinsic_Blur(this, s, e);
Jason Sams709a0972012-11-15 18:18:04 -0800670 break;
671 case RS_SCRIPT_INTRINSIC_ID_YUV_TO_RGB:
Jason Samsc905efd2012-11-26 15:20:18 -0800672 i = rsdIntrinsic_YuvToRGB(this, s, e);
Jason Sams709a0972012-11-15 18:18:04 -0800673 break;
674 case RS_SCRIPT_INTRINSIC_ID_BLEND:
Jason Samsc905efd2012-11-26 15:20:18 -0800675 i = rsdIntrinsic_Blend(this, s, e);
Jason Sams709a0972012-11-15 18:18:04 -0800676 break;
Jason Sams2282e282013-06-17 16:52:01 -0700677 case RS_SCRIPT_INTRINSIC_ID_HISTOGRAM:
678 i = rsdIntrinsic_Histogram(this, s, e);
679 break;
Jason Sams39ab94a2014-04-16 17:14:05 -0700680 case RS_SCRIPT_INTRINSIC_ID_RESIZE:
681 i = rsdIntrinsic_Resize(this, s, e);
682 break;
Tim Murray64c682b2015-01-09 12:08:43 -0800683#if !defined(RS_COMPATIBILITY_LIB)
684 case RS_SCRIPT_INTRINSIC_ID_BLAS:
685 i = rsdIntrinsic_BLAS(this, s, e);
686 break;
687#endif
Jason Sams709a0972012-11-15 18:18:04 -0800688
689 default:
690 rsAssert(0);
691 }
692
693 return i;
694}
695
Yang Ni1ffd86b2015-01-07 09:16:40 -0800696void* RsdCpuReferenceImpl::createScriptGroup(const ScriptGroupBase *sg) {
697 switch (sg->getApiVersion()) {
698 case ScriptGroupBase::SG_V1: {
699 CpuScriptGroupImpl *sgi = new CpuScriptGroupImpl(this, sg);
700 if (!sgi->init()) {
Jason Sams709a0972012-11-15 18:18:04 -0800701 delete sgi;
Chris Wailes44bef6f2014-08-12 13:51:10 -0700702 return nullptr;
Yang Ni1ffd86b2015-01-07 09:16:40 -0800703 }
704 return sgi;
Jason Sams709a0972012-11-15 18:18:04 -0800705 }
Yang Ni1ffd86b2015-01-07 09:16:40 -0800706 case ScriptGroupBase::SG_V2: {
707 return new CpuScriptGroup2Impl(this, sg);
708 }
709 }
710 return nullptr;
Jason Sams709a0972012-11-15 18:18:04 -0800711}