blob: f3b7146d8e0d859b8d3ab2a19e7d27084d45b2ab [file] [log] [blame]
Jason Sams2282e282013-06-17 16:52:01 -07001/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "rsCpuIntrinsic.h"
18#include "rsCpuIntrinsicInlines.h"
19
Jason Sams2282e282013-06-17 16:52:01 -070020namespace android {
21namespace renderscript {
22
23
24class RsdCpuScriptIntrinsicHistogram : public RsdCpuScriptIntrinsic {
25public:
Stephen Hinesc060f142015-05-13 19:26:09 -070026 void populateScript(Script *) override;
27 void invokeFreeChildren() override;
Jason Sams2282e282013-06-17 16:52:01 -070028
Stephen Hinesc060f142015-05-13 19:26:09 -070029 void setGlobalVar(uint32_t slot, const void *data, size_t dataLength) override;
30 void setGlobalObj(uint32_t slot, ObjectBase *data) override;
Jason Sams2282e282013-06-17 16:52:01 -070031
Stephen Hinesc060f142015-05-13 19:26:09 -070032 ~RsdCpuScriptIntrinsicHistogram() override;
Jason Sams2282e282013-06-17 16:52:01 -070033 RsdCpuScriptIntrinsicHistogram(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
34
35protected:
Chris Wailesf3712132014-07-16 15:18:30 -070036 void preLaunch(uint32_t slot, const Allocation ** ains, uint32_t inLen,
Jason Sams2282e282013-06-17 16:52:01 -070037 Allocation * aout, const void * usr,
38 uint32_t usrLen, const RsScriptCall *sc);
Chris Wailesf3712132014-07-16 15:18:30 -070039 void postLaunch(uint32_t slot, const Allocation ** ains, uint32_t inLen,
Jason Sams2282e282013-06-17 16:52:01 -070040 Allocation * aout, const void * usr,
41 uint32_t usrLen, const RsScriptCall *sc);
42
43
44 float mDot[4];
45 int mDotI[4];
46 int *mSums;
47 ObjectBaseRef<Allocation> mAllocOut;
48
David Grossb0abb142015-03-12 15:23:03 -070049 static void kernelP1U4(const RsExpandKernelDriverInfo *info,
Chris Wailes9ed79102014-07-25 15:53:28 -070050 uint32_t xstart, uint32_t xend,
51 uint32_t outstep);
David Grossb0abb142015-03-12 15:23:03 -070052 static void kernelP1U3(const RsExpandKernelDriverInfo *info,
Chris Wailes9ed79102014-07-25 15:53:28 -070053 uint32_t xstart, uint32_t xend,
54 uint32_t outstep);
David Grossb0abb142015-03-12 15:23:03 -070055 static void kernelP1U2(const RsExpandKernelDriverInfo *info,
Chris Wailes9ed79102014-07-25 15:53:28 -070056 uint32_t xstart, uint32_t xend,
57 uint32_t outstep);
David Grossb0abb142015-03-12 15:23:03 -070058 static void kernelP1U1(const RsExpandKernelDriverInfo *info,
Chris Wailes9ed79102014-07-25 15:53:28 -070059 uint32_t xstart, uint32_t xend,
60 uint32_t outstep);
Jason Sams2282e282013-06-17 16:52:01 -070061
David Grossb0abb142015-03-12 15:23:03 -070062 static void kernelP1L4(const RsExpandKernelDriverInfo *info,
Jason Sams75adb822013-10-22 11:43:54 -070063 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -070064 uint32_t outstep);
David Grossb0abb142015-03-12 15:23:03 -070065 static void kernelP1L3(const RsExpandKernelDriverInfo *info,
Jason Sams75adb822013-10-22 11:43:54 -070066 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -070067 uint32_t outstep);
David Grossb0abb142015-03-12 15:23:03 -070068 static void kernelP1L2(const RsExpandKernelDriverInfo *info,
Jason Sams75adb822013-10-22 11:43:54 -070069 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -070070 uint32_t outstep);
David Grossb0abb142015-03-12 15:23:03 -070071 static void kernelP1L1(const RsExpandKernelDriverInfo *info,
Jason Sams75adb822013-10-22 11:43:54 -070072 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -070073 uint32_t outstep);
Jason Sams75adb822013-10-22 11:43:54 -070074
Jason Sams2282e282013-06-17 16:52:01 -070075};
76
Jason Sams2282e282013-06-17 16:52:01 -070077void RsdCpuScriptIntrinsicHistogram::setGlobalObj(uint32_t slot, ObjectBase *data) {
78 rsAssert(slot == 1);
79 mAllocOut.set(static_cast<Allocation *>(data));
80}
81
82void RsdCpuScriptIntrinsicHistogram::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) {
83 rsAssert(slot == 0);
84 rsAssert(dataLength == 16);
85 memcpy(mDot, data, 16);
86 mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f);
87 mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f);
88 mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f);
89 mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f);
90}
91
92
93
Chris Wailesf3712132014-07-16 15:18:30 -070094void
95RsdCpuScriptIntrinsicHistogram::preLaunch(uint32_t slot,
96 const Allocation ** ains,
97 uint32_t inLen, Allocation * aout,
98 const void * usr, uint32_t usrLen,
99 const RsScriptCall *sc) {
Jason Sams2282e282013-06-17 16:52:01 -0700100
101 const uint32_t threads = mCtx->getThreadCount();
Jason Sams75adb822013-10-22 11:43:54 -0700102 uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize();
Jason Sams2282e282013-06-17 16:52:01 -0700103
104 switch (slot) {
105 case 0:
Jason Sams75adb822013-10-22 11:43:54 -0700106 switch(vSize) {
Jason Samsb68ba7e2013-06-18 16:29:39 -0700107 case 1:
Jason Sams2282e282013-06-17 16:52:01 -0700108 mRootPtr = &kernelP1U1;
Jason Samsb68ba7e2013-06-18 16:29:39 -0700109 break;
110 case 2:
111 mRootPtr = &kernelP1U2;
112 break;
113 case 3:
114 mRootPtr = &kernelP1U3;
Jason Sams75adb822013-10-22 11:43:54 -0700115 vSize = 4;
Jason Samsb68ba7e2013-06-18 16:29:39 -0700116 break;
117 case 4:
Jason Sams2282e282013-06-17 16:52:01 -0700118 mRootPtr = &kernelP1U4;
Jason Samsb68ba7e2013-06-18 16:29:39 -0700119 break;
Jason Sams2282e282013-06-17 16:52:01 -0700120 }
121 break;
122 case 1:
Chris Wailesf3712132014-07-16 15:18:30 -0700123 switch(ains[0]->getType()->getElement()->getVectorSize()) {
Jason Sams75adb822013-10-22 11:43:54 -0700124 case 1:
125 mRootPtr = &kernelP1L1;
126 break;
127 case 2:
128 mRootPtr = &kernelP1L2;
129 break;
130 case 3:
131 mRootPtr = &kernelP1L3;
132 break;
133 case 4:
134 mRootPtr = &kernelP1L4;
135 break;
136 }
Jason Sams2282e282013-06-17 16:52:01 -0700137 break;
138 }
Jason Sams75adb822013-10-22 11:43:54 -0700139 memset(mSums, 0, 256 * sizeof(int32_t) * threads * vSize);
Jason Sams2282e282013-06-17 16:52:01 -0700140}
141
Chris Wailesf3712132014-07-16 15:18:30 -0700142void
143RsdCpuScriptIntrinsicHistogram::postLaunch(uint32_t slot,
144 const Allocation ** ains,
145 uint32_t inLen, Allocation * aout,
146 const void * usr, uint32_t usrLen,
147 const RsScriptCall *sc) {
Jason Sams2282e282013-06-17 16:52:01 -0700148
149 unsigned int *o = (unsigned int *)mAllocOut->mHal.drvState.lod[0].mallocPtr;
150 uint32_t threads = mCtx->getThreadCount();
151 uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize();
152
Jason Sams75adb822013-10-22 11:43:54 -0700153 if (vSize == 3) vSize = 4;
154
Jason Sams2282e282013-06-17 16:52:01 -0700155 for (uint32_t ct=0; ct < (256 * vSize); ct++) {
156 o[ct] = mSums[ct];
157 for (uint32_t t=1; t < threads; t++) {
Tim Murray6de1d832013-11-13 17:13:37 -0800158 o[ct] += mSums[ct + (256 * vSize * t)];
Jason Sams2282e282013-06-17 16:52:01 -0700159 }
160 }
161}
162
David Grossb0abb142015-03-12 15:23:03 -0700163void RsdCpuScriptIntrinsicHistogram::kernelP1U4(const RsExpandKernelDriverInfo *info,
Jason Sams2282e282013-06-17 16:52:01 -0700164 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -0700165 uint32_t outstep) {
Jason Sams2282e282013-06-17 16:52:01 -0700166
David Grossb0abb142015-03-12 15:23:03 -0700167 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
168 uchar *in = (uchar *)info->inPtr[0];
169 int * sums = &cp->mSums[256 * 4 * info->lid];
Jason Sams2282e282013-06-17 16:52:01 -0700170
171 for (uint32_t x = xstart; x < xend; x++) {
Jason Samsb68ba7e2013-06-18 16:29:39 -0700172 sums[(in[0] << 2) ] ++;
173 sums[(in[1] << 2) + 1] ++;
174 sums[(in[2] << 2) + 2] ++;
175 sums[(in[3] << 2) + 3] ++;
David Grossb0abb142015-03-12 15:23:03 -0700176 in += info->inStride[0];
Jason Samsb68ba7e2013-06-18 16:29:39 -0700177 }
178}
179
David Grossb0abb142015-03-12 15:23:03 -0700180void RsdCpuScriptIntrinsicHistogram::kernelP1U3(const RsExpandKernelDriverInfo *info,
Jason Samsb68ba7e2013-06-18 16:29:39 -0700181 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -0700182 uint32_t outstep) {
Jason Samsb68ba7e2013-06-18 16:29:39 -0700183
David Grossb0abb142015-03-12 15:23:03 -0700184 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
185 uchar *in = (uchar *)info->inPtr[0];
186 int * sums = &cp->mSums[256 * 4 * info->lid];
Jason Samsb68ba7e2013-06-18 16:29:39 -0700187
188 for (uint32_t x = xstart; x < xend; x++) {
189 sums[(in[0] << 2) ] ++;
190 sums[(in[1] << 2) + 1] ++;
191 sums[(in[2] << 2) + 2] ++;
David Grossb0abb142015-03-12 15:23:03 -0700192 in += info->inStride[0];
Jason Samsb68ba7e2013-06-18 16:29:39 -0700193 }
194}
195
David Grossb0abb142015-03-12 15:23:03 -0700196void RsdCpuScriptIntrinsicHistogram::kernelP1U2(const RsExpandKernelDriverInfo *info,
Jason Samsb68ba7e2013-06-18 16:29:39 -0700197 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -0700198 uint32_t outstep) {
Jason Samsb68ba7e2013-06-18 16:29:39 -0700199
David Grossb0abb142015-03-12 15:23:03 -0700200 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
201 uchar *in = (uchar *)info->inPtr[0];
202 int * sums = &cp->mSums[256 * 2 * info->lid];
Jason Samsb68ba7e2013-06-18 16:29:39 -0700203
204 for (uint32_t x = xstart; x < xend; x++) {
Jason Sams75adb822013-10-22 11:43:54 -0700205 sums[(in[0] << 1) ] ++;
206 sums[(in[1] << 1) + 1] ++;
David Grossb0abb142015-03-12 15:23:03 -0700207 in += info->inStride[0];
Jason Sams2282e282013-06-17 16:52:01 -0700208 }
209}
210
David Grossb0abb142015-03-12 15:23:03 -0700211void RsdCpuScriptIntrinsicHistogram::kernelP1L4(const RsExpandKernelDriverInfo *info,
Jason Sams75adb822013-10-22 11:43:54 -0700212 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -0700213 uint32_t outstep) {
Jason Sams2282e282013-06-17 16:52:01 -0700214
David Grossb0abb142015-03-12 15:23:03 -0700215 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
216 uchar *in = (uchar *)info->inPtr[0];
217 int * sums = &cp->mSums[256 * info->lid];
Jason Sams2282e282013-06-17 16:52:01 -0700218
219 for (uint32_t x = xstart; x < xend; x++) {
Jason Samsb68ba7e2013-06-18 16:29:39 -0700220 int t = (cp->mDotI[0] * in[0]) +
221 (cp->mDotI[1] * in[1]) +
222 (cp->mDotI[2] * in[2]) +
223 (cp->mDotI[3] * in[3]);
Jason Sams75adb822013-10-22 11:43:54 -0700224 sums[(t + 0x7f) >> 8] ++;
David Grossb0abb142015-03-12 15:23:03 -0700225 in += info->inStride[0];
Jason Sams75adb822013-10-22 11:43:54 -0700226 }
227}
228
David Grossb0abb142015-03-12 15:23:03 -0700229void RsdCpuScriptIntrinsicHistogram::kernelP1L3(const RsExpandKernelDriverInfo *info,
Jason Sams75adb822013-10-22 11:43:54 -0700230 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -0700231 uint32_t outstep) {
Jason Sams75adb822013-10-22 11:43:54 -0700232
David Grossb0abb142015-03-12 15:23:03 -0700233 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
234 uchar *in = (uchar *)info->inPtr[0];
235 int * sums = &cp->mSums[256 * info->lid];
Jason Sams75adb822013-10-22 11:43:54 -0700236
237 for (uint32_t x = xstart; x < xend; x++) {
238 int t = (cp->mDotI[0] * in[0]) +
239 (cp->mDotI[1] * in[1]) +
240 (cp->mDotI[2] * in[2]);
241 sums[(t + 0x7f) >> 8] ++;
David Grossb0abb142015-03-12 15:23:03 -0700242 in += info->inStride[0];
Jason Sams75adb822013-10-22 11:43:54 -0700243 }
244}
245
David Grossb0abb142015-03-12 15:23:03 -0700246void RsdCpuScriptIntrinsicHistogram::kernelP1L2(const RsExpandKernelDriverInfo *info,
Jason Sams75adb822013-10-22 11:43:54 -0700247 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -0700248 uint32_t outstep) {
Jason Sams75adb822013-10-22 11:43:54 -0700249
David Grossb0abb142015-03-12 15:23:03 -0700250 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
251 uchar *in = (uchar *)info->inPtr[0];
252 int * sums = &cp->mSums[256 * info->lid];
Jason Sams75adb822013-10-22 11:43:54 -0700253
254 for (uint32_t x = xstart; x < xend; x++) {
255 int t = (cp->mDotI[0] * in[0]) +
256 (cp->mDotI[1] * in[1]);
257 sums[(t + 0x7f) >> 8] ++;
David Grossb0abb142015-03-12 15:23:03 -0700258 in += info->inStride[0];
Jason Sams75adb822013-10-22 11:43:54 -0700259 }
260}
261
David Grossb0abb142015-03-12 15:23:03 -0700262void RsdCpuScriptIntrinsicHistogram::kernelP1L1(const RsExpandKernelDriverInfo *info,
Jason Sams75adb822013-10-22 11:43:54 -0700263 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -0700264 uint32_t outstep) {
Jason Sams75adb822013-10-22 11:43:54 -0700265
David Grossb0abb142015-03-12 15:23:03 -0700266 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
267 uchar *in = (uchar *)info->inPtr[0];
268 int * sums = &cp->mSums[256 * info->lid];
Jason Sams75adb822013-10-22 11:43:54 -0700269
270 for (uint32_t x = xstart; x < xend; x++) {
271 int t = (cp->mDotI[0] * in[0]);
272 sums[(t + 0x7f) >> 8] ++;
David Grossb0abb142015-03-12 15:23:03 -0700273 in += info->inStride[0];
Jason Sams2282e282013-06-17 16:52:01 -0700274 }
275}
276
David Grossb0abb142015-03-12 15:23:03 -0700277void RsdCpuScriptIntrinsicHistogram::kernelP1U1(const RsExpandKernelDriverInfo *info,
Jason Sams2282e282013-06-17 16:52:01 -0700278 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -0700279 uint32_t outstep) {
Jason Sams2282e282013-06-17 16:52:01 -0700280
David Grossb0abb142015-03-12 15:23:03 -0700281 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
282 uchar *in = (uchar *)info->inPtr[0];
283 int * sums = &cp->mSums[256 * info->lid];
Jason Sams75adb822013-10-22 11:43:54 -0700284
285 for (uint32_t x = xstart; x < xend; x++) {
286 sums[in[0]] ++;
David Grossb0abb142015-03-12 15:23:03 -0700287 in += info->inStride[0];
Jason Sams75adb822013-10-22 11:43:54 -0700288 }
Jason Sams2282e282013-06-17 16:52:01 -0700289}
290
291
292RsdCpuScriptIntrinsicHistogram::RsdCpuScriptIntrinsicHistogram(RsdCpuReferenceImpl *ctx,
293 const Script *s, const Element *e)
Tim Murray6de1d832013-11-13 17:13:37 -0800294 : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_HISTOGRAM) {
Jason Sams2282e282013-06-17 16:52:01 -0700295
Chris Wailes44bef6f2014-08-12 13:51:10 -0700296 mRootPtr = nullptr;
Jason Sams2282e282013-06-17 16:52:01 -0700297 mSums = new int[256 * 4 * mCtx->getThreadCount()];
298 mDot[0] = 0.299f;
299 mDot[1] = 0.587f;
300 mDot[2] = 0.114f;
301 mDot[3] = 0;
302 mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f);
303 mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f);
304 mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f);
305 mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f);
306}
307
308RsdCpuScriptIntrinsicHistogram::~RsdCpuScriptIntrinsicHistogram() {
309 if (mSums) {
310 delete []mSums;
311 }
312}
313
314void RsdCpuScriptIntrinsicHistogram::populateScript(Script *s) {
315 s->mHal.info.exportedVariableCount = 2;
316}
317
318void RsdCpuScriptIntrinsicHistogram::invokeFreeChildren() {
319}
320
Jason Sams2282e282013-06-17 16:52:01 -0700321RsdCpuScriptImpl * rsdIntrinsic_Histogram(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) {
322
323 return new RsdCpuScriptIntrinsicHistogram(ctx, s, e);
324}
Chih-Hung Hsieh462de212016-11-16 11:33:57 -0800325
326} // namespace renderscript
327} // namespace android