blob: a2cd8e51c2a8f0097e1f4b49738ba1a65a27681c [file] [log] [blame]
Jason Sams709a0972012-11-15 18:18:04 -08001/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18#include "rsCpuIntrinsic.h"
19#include "rsCpuIntrinsicInlines.h"
20
Jason Sams709a0972012-11-15 18:18:04 -080021namespace android {
22namespace renderscript {
23
24
25class RsdCpuScriptIntrinsicConvolve3x3 : public RsdCpuScriptIntrinsic {
26public:
Stephen Hinesc060f142015-05-13 19:26:09 -070027 void populateScript(Script *) override;
28 void invokeFreeChildren() override;
Jason Sams709a0972012-11-15 18:18:04 -080029
Stephen Hinesc060f142015-05-13 19:26:09 -070030 void setGlobalVar(uint32_t slot, const void *data, size_t dataLength) override;
31 void setGlobalObj(uint32_t slot, ObjectBase *data) override;
Jason Sams709a0972012-11-15 18:18:04 -080032
Stephen Hinesc060f142015-05-13 19:26:09 -070033 ~RsdCpuScriptIntrinsicConvolve3x3() override;
Jason Samsc905efd2012-11-26 15:20:18 -080034 RsdCpuScriptIntrinsicConvolve3x3(RsdCpuReferenceImpl *ctx, const Script *s, const Element *);
Jason Sams709a0972012-11-15 18:18:04 -080035
36protected:
Jason Samsc905efd2012-11-26 15:20:18 -080037 float mFp[16];
Chih-Hung Hsiehc9a8d132018-08-17 13:37:59 -070038 int16_t mIp[16];
Jason Samsc905efd2012-11-26 15:20:18 -080039 ObjectBaseRef<const Allocation> mAlloc;
40 ObjectBaseRef<const Element> mElement;
Jason Sams709a0972012-11-15 18:18:04 -080041
David Grossb0abb142015-03-12 15:23:03 -070042 static void kernelU1(const RsExpandKernelDriverInfo *info,
Jason Sams3b35d772013-06-25 17:47:02 -070043 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -070044 uint32_t outstep);
David Grossb0abb142015-03-12 15:23:03 -070045 static void kernelU2(const RsExpandKernelDriverInfo *info,
Jason Sams3b35d772013-06-25 17:47:02 -070046 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -070047 uint32_t outstep);
David Grossb0abb142015-03-12 15:23:03 -070048 static void kernelU4(const RsExpandKernelDriverInfo *info,
Jason Sams3b35d772013-06-25 17:47:02 -070049 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -070050 uint32_t outstep);
David Grossb0abb142015-03-12 15:23:03 -070051 static void kernelF1(const RsExpandKernelDriverInfo *info,
Jason Sams3b35d772013-06-25 17:47:02 -070052 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -070053 uint32_t outstep);
David Grossb0abb142015-03-12 15:23:03 -070054 static void kernelF2(const RsExpandKernelDriverInfo *info,
Jason Sams3b35d772013-06-25 17:47:02 -070055 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -070056 uint32_t outstep);
David Grossb0abb142015-03-12 15:23:03 -070057 static void kernelF4(const RsExpandKernelDriverInfo *info,
Jason Sams3b35d772013-06-25 17:47:02 -070058 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -070059 uint32_t outstep);
Jason Sams709a0972012-11-15 18:18:04 -080060};
61
Jason Sams709a0972012-11-15 18:18:04 -080062void RsdCpuScriptIntrinsicConvolve3x3::setGlobalObj(uint32_t slot, ObjectBase *data) {
63 rsAssert(slot == 1);
Jason Samsc905efd2012-11-26 15:20:18 -080064 mAlloc.set(static_cast<Allocation *>(data));
Jason Sams709a0972012-11-15 18:18:04 -080065}
66
67void RsdCpuScriptIntrinsicConvolve3x3::setGlobalVar(uint32_t slot, const void *data,
68 size_t dataLength) {
69 rsAssert(slot == 0);
Jason Samsc905efd2012-11-26 15:20:18 -080070 memcpy (&mFp, data, dataLength);
Jason Sams709a0972012-11-15 18:18:04 -080071 for(int ct=0; ct < 9; ct++) {
Jason Sams3b35d772013-06-25 17:47:02 -070072 if (mFp[ct] >= 0) {
Chih-Hung Hsiehc9a8d132018-08-17 13:37:59 -070073 mIp[ct] = (int16_t)(mFp[ct] * 256.f + 0.5f);
Jason Sams3b35d772013-06-25 17:47:02 -070074 } else {
Chih-Hung Hsiehc9a8d132018-08-17 13:37:59 -070075 mIp[ct] = (int16_t)(mFp[ct] * 256.f - 0.5f);
Jason Sams3b35d772013-06-25 17:47:02 -070076 }
Jason Sams709a0972012-11-15 18:18:04 -080077 }
78}
79
80extern "C" void rsdIntrinsicConvolve3x3_K(void *dst, const void *y0, const void *y1,
Chih-Hung Hsiehc9a8d132018-08-17 13:37:59 -070081 const void *y2, const int16_t *coef, uint32_t count);
Jason Sams709a0972012-11-15 18:18:04 -080082
83
David Grossb0abb142015-03-12 15:23:03 -070084static void ConvolveOneU4(const RsExpandKernelDriverInfo *info, uint32_t x, uchar4 *out,
Jason Sams3b35d772013-06-25 17:47:02 -070085 const uchar4 *py0, const uchar4 *py1, const uchar4 *py2,
86 const float* coeff) {
Jason Sams709a0972012-11-15 18:18:04 -080087
88 uint32_t x1 = rsMax((int32_t)x-1, 0);
David Grossb0abb142015-03-12 15:23:03 -070089 uint32_t x2 = rsMin((int32_t)x+1, (int32_t)info->dim.x-1);
Jason Sams709a0972012-11-15 18:18:04 -080090
91 float4 px = convert_float4(py0[x1]) * coeff[0] +
92 convert_float4(py0[x]) * coeff[1] +
93 convert_float4(py0[x2]) * coeff[2] +
94 convert_float4(py1[x1]) * coeff[3] +
95 convert_float4(py1[x]) * coeff[4] +
96 convert_float4(py1[x2]) * coeff[5] +
97 convert_float4(py2[x1]) * coeff[6] +
98 convert_float4(py2[x]) * coeff[7] +
99 convert_float4(py2[x2]) * coeff[8];
100
Miao Wang4283f572014-11-17 14:59:39 -0800101 px = clamp(px + 0.5f, 0.f, 255.f);
Jason Sams709a0972012-11-15 18:18:04 -0800102 uchar4 o = {(uchar)px.x, (uchar)px.y, (uchar)px.z, (uchar)px.w};
103 *out = o;
104}
105
David Grossb0abb142015-03-12 15:23:03 -0700106static void ConvolveOneU2(const RsExpandKernelDriverInfo *info, uint32_t x, uchar2 *out,
Jason Sams3b35d772013-06-25 17:47:02 -0700107 const uchar2 *py0, const uchar2 *py1, const uchar2 *py2,
108 const float* coeff) {
109
110 uint32_t x1 = rsMax((int32_t)x-1, 0);
David Grossb0abb142015-03-12 15:23:03 -0700111 uint32_t x2 = rsMin((int32_t)x+1, (int32_t)info->dim.x-1);
Jason Sams3b35d772013-06-25 17:47:02 -0700112
113 float2 px = convert_float2(py0[x1]) * coeff[0] +
114 convert_float2(py0[x]) * coeff[1] +
115 convert_float2(py0[x2]) * coeff[2] +
116 convert_float2(py1[x1]) * coeff[3] +
117 convert_float2(py1[x]) * coeff[4] +
118 convert_float2(py1[x2]) * coeff[5] +
119 convert_float2(py2[x1]) * coeff[6] +
120 convert_float2(py2[x]) * coeff[7] +
121 convert_float2(py2[x2]) * coeff[8];
122
Miao Wang4283f572014-11-17 14:59:39 -0800123 px = clamp(px + 0.5f, 0.f, 255.f);
Jason Sams3b35d772013-06-25 17:47:02 -0700124 *out = convert_uchar2(px);
125}
126
David Grossb0abb142015-03-12 15:23:03 -0700127static void ConvolveOneU1(const RsExpandKernelDriverInfo *info, uint32_t x, uchar *out,
Jason Sams3b35d772013-06-25 17:47:02 -0700128 const uchar *py0, const uchar *py1, const uchar *py2,
129 const float* coeff) {
130
131 uint32_t x1 = rsMax((int32_t)x-1, 0);
David Grossb0abb142015-03-12 15:23:03 -0700132 uint32_t x2 = rsMin((int32_t)x+1, (int32_t)info->dim.x-1);
Jason Sams3b35d772013-06-25 17:47:02 -0700133
134 float px = ((float)py0[x1]) * coeff[0] +
135 ((float)py0[x]) * coeff[1] +
136 ((float)py0[x2]) * coeff[2] +
137 ((float)py1[x1]) * coeff[3] +
138 ((float)py1[x]) * coeff[4] +
139 ((float)py1[x2]) * coeff[5] +
140 ((float)py2[x1]) * coeff[6] +
141 ((float)py2[x]) * coeff[7] +
142 ((float)py2[x2]) * coeff[8];
Miao Wang4283f572014-11-17 14:59:39 -0800143 *out = clamp(px + 0.5f, 0.f, 255.f);
Jason Sams3b35d772013-06-25 17:47:02 -0700144}
145
David Grossb0abb142015-03-12 15:23:03 -0700146static void ConvolveOneF4(const RsExpandKernelDriverInfo *info, uint32_t x, float4 *out,
Jason Sams3b35d772013-06-25 17:47:02 -0700147 const float4 *py0, const float4 *py1, const float4 *py2,
148 const float* coeff) {
149
150 uint32_t x1 = rsMax((int32_t)x-1, 0);
David Grossb0abb142015-03-12 15:23:03 -0700151 uint32_t x2 = rsMin((int32_t)x+1, (int32_t)info->dim.x-1);
Jason Sams3b35d772013-06-25 17:47:02 -0700152 *out = (py0[x1] * coeff[0]) + (py0[x] * coeff[1]) + (py0[x2] * coeff[2]) +
153 (py1[x1] * coeff[3]) + (py1[x] * coeff[4]) + (py1[x2] * coeff[5]) +
154 (py2[x1] * coeff[6]) + (py2[x] * coeff[7]) + (py2[x2] * coeff[8]);
155}
156
David Grossb0abb142015-03-12 15:23:03 -0700157static void ConvolveOneF2(const RsExpandKernelDriverInfo *info, uint32_t x, float2 *out,
Jason Sams3b35d772013-06-25 17:47:02 -0700158 const float2 *py0, const float2 *py1, const float2 *py2,
159 const float* coeff) {
160
161 uint32_t x1 = rsMax((int32_t)x-1, 0);
David Grossb0abb142015-03-12 15:23:03 -0700162 uint32_t x2 = rsMin((int32_t)x+1, (int32_t)info->dim.x-1);
Jason Sams3b35d772013-06-25 17:47:02 -0700163 *out = (py0[x1] * coeff[0]) + (py0[x] * coeff[1]) + (py0[x2] * coeff[2]) +
164 (py1[x1] * coeff[3]) + (py1[x] * coeff[4]) + (py1[x2] * coeff[5]) +
165 (py2[x1] * coeff[6]) + (py2[x] * coeff[7]) + (py2[x2] * coeff[8]);
166}
167
David Grossb0abb142015-03-12 15:23:03 -0700168static void ConvolveOneF1(const RsExpandKernelDriverInfo *info, uint32_t x, float *out,
Jason Sams3b35d772013-06-25 17:47:02 -0700169 const float *py0, const float *py1, const float *py2,
170 const float* coeff) {
171
172 uint32_t x1 = rsMax((int32_t)x-1, 0);
David Grossb0abb142015-03-12 15:23:03 -0700173 uint32_t x2 = rsMin((int32_t)x+1, (int32_t)info->dim.x-1);
Jason Sams3b35d772013-06-25 17:47:02 -0700174 *out = (py0[x1] * coeff[0]) + (py0[x] * coeff[1]) + (py0[x2] * coeff[2]) +
175 (py1[x1] * coeff[3]) + (py1[x] * coeff[4]) + (py1[x2] * coeff[5]) +
176 (py2[x1] * coeff[6]) + (py2[x] * coeff[7]) + (py2[x2] * coeff[8]);
177}
178
David Grossb0abb142015-03-12 15:23:03 -0700179void RsdCpuScriptIntrinsicConvolve3x3::kernelU4(const RsExpandKernelDriverInfo *info,
Jason Sams3b35d772013-06-25 17:47:02 -0700180 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -0700181 uint32_t outstep) {
David Grossb0abb142015-03-12 15:23:03 -0700182 RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)info->usr;
Jason Sams709a0972012-11-15 18:18:04 -0800183
Jason Samsc905efd2012-11-26 15:20:18 -0800184 if (!cp->mAlloc.get()) {
Jason Sams709a0972012-11-15 18:18:04 -0800185 ALOGE("Convolve3x3 executed without input, skipping");
186 return;
187 }
Jason Samsc905efd2012-11-26 15:20:18 -0800188 const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
189 const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
Jason Sams709a0972012-11-15 18:18:04 -0800190
David Grossb0abb142015-03-12 15:23:03 -0700191 uint32_t y1 = rsMin((int32_t)info->current.y + 1, (int32_t)(info->dim.y-1));
192 uint32_t y2 = rsMax((int32_t)info->current.y - 1, 0);
Jason Sams709a0972012-11-15 18:18:04 -0800193 const uchar4 *py0 = (const uchar4 *)(pin + stride * y2);
David Grossb0abb142015-03-12 15:23:03 -0700194 const uchar4 *py1 = (const uchar4 *)(pin + stride * info->current.y);
Jason Sams709a0972012-11-15 18:18:04 -0800195 const uchar4 *py2 = (const uchar4 *)(pin + stride * y1);
196
David Grossb0abb142015-03-12 15:23:03 -0700197 uchar4 *out = (uchar4 *)info->outPtr[0];
Jason Sams709a0972012-11-15 18:18:04 -0800198 uint32_t x1 = xstart;
199 uint32_t x2 = xend;
200 if(x1 == 0) {
David Grossb0abb142015-03-12 15:23:03 -0700201 ConvolveOneU4(info, 0, out, py0, py1, py2, cp->mFp);
Jason Sams709a0972012-11-15 18:18:04 -0800202 x1 ++;
203 out++;
204 }
205
206 if(x2 > x1) {
Jason Sams074424a2014-05-22 13:30:03 -0700207#if defined(ARCH_ARM_USE_INTRINSICS) || defined(ARCH_X86_HAVE_SSSE3)
Jason Samsf5ef8df2013-08-06 13:49:25 -0700208 if (gArchUseSIMD) {
209 int32_t len = (x2 - x1 - 1) >> 1;
210 if(len > 0) {
211 rsdIntrinsicConvolve3x3_K(out, &py0[x1-1], &py1[x1-1], &py2[x1-1], cp->mIp, len);
212 x1 += len << 1;
213 out += len << 1;
214 }
Jason Sams709a0972012-11-15 18:18:04 -0800215 }
216#endif
217
218 while(x1 != x2) {
David Grossb0abb142015-03-12 15:23:03 -0700219 ConvolveOneU4(info, x1, out, py0, py1, py2, cp->mFp);
Jason Sams3b35d772013-06-25 17:47:02 -0700220 out++;
221 x1++;
222 }
223 }
224}
225
David Grossb0abb142015-03-12 15:23:03 -0700226void RsdCpuScriptIntrinsicConvolve3x3::kernelU2(const RsExpandKernelDriverInfo *info,
Jason Sams3b35d772013-06-25 17:47:02 -0700227 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -0700228 uint32_t outstep) {
David Grossb0abb142015-03-12 15:23:03 -0700229 RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)info->usr;
Jason Sams3b35d772013-06-25 17:47:02 -0700230
231 if (!cp->mAlloc.get()) {
232 ALOGE("Convolve3x3 executed without input, skipping");
233 return;
234 }
235 const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
236 const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
237
David Grossb0abb142015-03-12 15:23:03 -0700238 uint32_t y1 = rsMin((int32_t)info->current.y + 1, (int32_t)(info->dim.y-1));
239 uint32_t y2 = rsMax((int32_t)info->current.y - 1, 0);
Jason Sams3b35d772013-06-25 17:47:02 -0700240 const uchar2 *py0 = (const uchar2 *)(pin + stride * y2);
David Grossb0abb142015-03-12 15:23:03 -0700241 const uchar2 *py1 = (const uchar2 *)(pin + stride * info->current.y);
Jason Sams3b35d772013-06-25 17:47:02 -0700242 const uchar2 *py2 = (const uchar2 *)(pin + stride * y1);
243
David Grossb0abb142015-03-12 15:23:03 -0700244 uchar2 *out = (uchar2 *)info->outPtr[0];
Jason Sams3b35d772013-06-25 17:47:02 -0700245 uint32_t x1 = xstart;
246 uint32_t x2 = xend;
247 if(x1 == 0) {
David Grossb0abb142015-03-12 15:23:03 -0700248 ConvolveOneU2(info, 0, out, py0, py1, py2, cp->mFp);
Jason Sams3b35d772013-06-25 17:47:02 -0700249 x1 ++;
250 out++;
251 }
252
253 if(x2 > x1) {
254#if 0//defined(ARCH_ARM_HAVE_NEON)
255 int32_t len = (x2 - x1 - 1) >> 1;
256 if(len > 0) {
257 rsdIntrinsicConvolve3x3_K(out, &py0[x1-1], &py1[x1-1], &py2[x1-1], cp->mIp, len);
258 x1 += len << 1;
259 out += len << 1;
260 }
261#endif
262
263 while(x1 != x2) {
David Grossb0abb142015-03-12 15:23:03 -0700264 ConvolveOneU2(info, x1, out, py0, py1, py2, cp->mFp);
Jason Sams3b35d772013-06-25 17:47:02 -0700265 out++;
266 x1++;
267 }
268 }
269}
270
David Grossb0abb142015-03-12 15:23:03 -0700271void RsdCpuScriptIntrinsicConvolve3x3::kernelU1(const RsExpandKernelDriverInfo *info,
Jason Sams3b35d772013-06-25 17:47:02 -0700272 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -0700273 uint32_t outstep) {
David Grossb0abb142015-03-12 15:23:03 -0700274 RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)info->usr;
Jason Sams3b35d772013-06-25 17:47:02 -0700275
276 if (!cp->mAlloc.get()) {
277 ALOGE("Convolve3x3 executed without input, skipping");
278 return;
279 }
280 const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
281 const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
282
David Grossb0abb142015-03-12 15:23:03 -0700283 uint32_t y1 = rsMin((int32_t)info->current.y + 1, (int32_t)(info->dim.y-1));
284 uint32_t y2 = rsMax((int32_t)info->current.y - 1, 0);
Jason Sams3b35d772013-06-25 17:47:02 -0700285 const uchar *py0 = (const uchar *)(pin + stride * y2);
David Grossb0abb142015-03-12 15:23:03 -0700286 const uchar *py1 = (const uchar *)(pin + stride * info->current.y);
Jason Sams3b35d772013-06-25 17:47:02 -0700287 const uchar *py2 = (const uchar *)(pin + stride * y1);
288
David Grossb0abb142015-03-12 15:23:03 -0700289 uchar *out = (uchar *)info->outPtr[0];
Jason Sams3b35d772013-06-25 17:47:02 -0700290 uint32_t x1 = xstart;
291 uint32_t x2 = xend;
292 if(x1 == 0) {
David Grossb0abb142015-03-12 15:23:03 -0700293 ConvolveOneU1(info, 0, out, py0, py1, py2, cp->mFp);
Jason Sams3b35d772013-06-25 17:47:02 -0700294 x1 ++;
295 out++;
296 }
297
298 if(x2 > x1) {
299#if 0//defined(ARCH_ARM_HAVE_NEON)
300 int32_t len = (x2 - x1 - 1) >> 1;
301 if(len > 0) {
302 rsdIntrinsicConvolve3x3_K(out, &py0[x1-1], &py1[x1-1], &py2[x1-1], cp->mIp, len);
303 x1 += len << 1;
304 out += len << 1;
305 }
306#endif
307
308 while(x1 != x2) {
David Grossb0abb142015-03-12 15:23:03 -0700309 ConvolveOneU1(info, x1, out, py0, py1, py2, cp->mFp);
Jason Sams3b35d772013-06-25 17:47:02 -0700310 out++;
311 x1++;
312 }
313 }
314}
315
David Grossb0abb142015-03-12 15:23:03 -0700316void RsdCpuScriptIntrinsicConvolve3x3::kernelF4(const RsExpandKernelDriverInfo *info,
Jason Sams3b35d772013-06-25 17:47:02 -0700317 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -0700318 uint32_t outstep) {
David Grossb0abb142015-03-12 15:23:03 -0700319 RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)info->usr;
Jason Sams3b35d772013-06-25 17:47:02 -0700320
321 if (!cp->mAlloc.get()) {
322 ALOGE("Convolve3x3 executed without input, skipping");
323 return;
324 }
325 const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
326 const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
327
David Grossb0abb142015-03-12 15:23:03 -0700328 uint32_t y1 = rsMin((int32_t)info->current.y + 1, (int32_t)(info->dim.y-1));
329 uint32_t y2 = rsMax((int32_t)info->current.y - 1, 0);
Jason Sams3b35d772013-06-25 17:47:02 -0700330 const float4 *py0 = (const float4 *)(pin + stride * y2);
David Grossb0abb142015-03-12 15:23:03 -0700331 const float4 *py1 = (const float4 *)(pin + stride * info->current.y);
Jason Sams3b35d772013-06-25 17:47:02 -0700332 const float4 *py2 = (const float4 *)(pin + stride * y1);
333
David Grossb0abb142015-03-12 15:23:03 -0700334 float4 *out = (float4 *)info->outPtr[0];
Jason Sams3b35d772013-06-25 17:47:02 -0700335 uint32_t x1 = xstart;
336 uint32_t x2 = xend;
337 if(x1 == 0) {
David Grossb0abb142015-03-12 15:23:03 -0700338 ConvolveOneF4(info, 0, out, py0, py1, py2, cp->mFp);
Jason Sams3b35d772013-06-25 17:47:02 -0700339 x1 ++;
340 out++;
341 }
342
343 if(x2 > x1) {
344#if 0//defined(ARCH_ARM_HAVE_NEON)
345 int32_t len = (x2 - x1 - 1) >> 1;
346 if(len > 0) {
347 rsdIntrinsicConvolve3x3_K(out, &py0[x1-1], &py1[x1-1], &py2[x1-1], cp->mIp, len);
348 x1 += len << 1;
349 out += len << 1;
350 }
351#endif
352
353 while(x1 != x2) {
David Grossb0abb142015-03-12 15:23:03 -0700354 ConvolveOneF4(info, x1, out, py0, py1, py2, cp->mFp);
Jason Sams3b35d772013-06-25 17:47:02 -0700355 out++;
356 x1++;
357 }
358 }
359}
360
David Grossb0abb142015-03-12 15:23:03 -0700361void RsdCpuScriptIntrinsicConvolve3x3::kernelF2(const RsExpandKernelDriverInfo *info,
Jason Sams3b35d772013-06-25 17:47:02 -0700362 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -0700363 uint32_t outstep) {
David Grossb0abb142015-03-12 15:23:03 -0700364 RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)info->usr;
Jason Sams3b35d772013-06-25 17:47:02 -0700365
366 if (!cp->mAlloc.get()) {
367 ALOGE("Convolve3x3 executed without input, skipping");
368 return;
369 }
370 const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
371 const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
372
David Grossb0abb142015-03-12 15:23:03 -0700373 uint32_t y1 = rsMin((int32_t)info->current.y + 1, (int32_t)(info->dim.y-1));
374 uint32_t y2 = rsMax((int32_t)info->current.y - 1, 0);
Jason Sams3b35d772013-06-25 17:47:02 -0700375 const float2 *py0 = (const float2 *)(pin + stride * y2);
David Grossb0abb142015-03-12 15:23:03 -0700376 const float2 *py1 = (const float2 *)(pin + stride * info->current.y);
Jason Sams3b35d772013-06-25 17:47:02 -0700377 const float2 *py2 = (const float2 *)(pin + stride * y1);
378
David Grossb0abb142015-03-12 15:23:03 -0700379 float2 *out = (float2 *)info->outPtr[0];
Jason Sams3b35d772013-06-25 17:47:02 -0700380 uint32_t x1 = xstart;
381 uint32_t x2 = xend;
382 if(x1 == 0) {
David Grossb0abb142015-03-12 15:23:03 -0700383 ConvolveOneF2(info, 0, out, py0, py1, py2, cp->mFp);
Jason Sams3b35d772013-06-25 17:47:02 -0700384 x1 ++;
385 out++;
386 }
387
388 if(x2 > x1) {
389#if 0//defined(ARCH_ARM_HAVE_NEON)
390 int32_t len = (x2 - x1 - 1) >> 1;
391 if(len > 0) {
392 rsdIntrinsicConvolve3x3_K(out, &py0[x1-1], &py1[x1-1], &py2[x1-1], cp->mIp, len);
393 x1 += len << 1;
394 out += len << 1;
395 }
396#endif
397
398 while(x1 != x2) {
David Grossb0abb142015-03-12 15:23:03 -0700399 ConvolveOneF2(info, x1, out, py0, py1, py2, cp->mFp);
Jason Sams3b35d772013-06-25 17:47:02 -0700400 out++;
401 x1++;
402 }
403 }
404}
David Grossb0abb142015-03-12 15:23:03 -0700405void RsdCpuScriptIntrinsicConvolve3x3::kernelF1(const RsExpandKernelDriverInfo *info,
Jason Sams3b35d772013-06-25 17:47:02 -0700406 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -0700407 uint32_t outstep) {
David Grossb0abb142015-03-12 15:23:03 -0700408 RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)info->usr;
Jason Sams3b35d772013-06-25 17:47:02 -0700409
410 if (!cp->mAlloc.get()) {
411 ALOGE("Convolve3x3 executed without input, skipping");
412 return;
413 }
414 const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
415 const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
416
David Grossb0abb142015-03-12 15:23:03 -0700417 uint32_t y1 = rsMin((int32_t)info->current.y + 1, (int32_t)(info->dim.y-1));
418 uint32_t y2 = rsMax((int32_t)info->current.y - 1, 0);
Jason Sams3b35d772013-06-25 17:47:02 -0700419 const float *py0 = (const float *)(pin + stride * y2);
David Grossb0abb142015-03-12 15:23:03 -0700420 const float *py1 = (const float *)(pin + stride * info->current.y);
Jason Sams3b35d772013-06-25 17:47:02 -0700421 const float *py2 = (const float *)(pin + stride * y1);
422
David Grossb0abb142015-03-12 15:23:03 -0700423 float *out = (float *)info->outPtr[0];
Jason Sams3b35d772013-06-25 17:47:02 -0700424 uint32_t x1 = xstart;
425 uint32_t x2 = xend;
426 if(x1 == 0) {
David Grossb0abb142015-03-12 15:23:03 -0700427 ConvolveOneF1(info, 0, out, py0, py1, py2, cp->mFp);
Jason Sams3b35d772013-06-25 17:47:02 -0700428 x1 ++;
429 out++;
430 }
431
432 if(x2 > x1) {
433#if 0//defined(ARCH_ARM_HAVE_NEON)
434 int32_t len = (x2 - x1 - 1) >> 1;
435 if(len > 0) {
436 rsdIntrinsicConvolve3x3_K(out, &py0[x1-1], &py1[x1-1], &py2[x1-1], cp->mIp, len);
437 x1 += len << 1;
438 out += len << 1;
439 }
440#endif
441
442 while(x1 != x2) {
David Grossb0abb142015-03-12 15:23:03 -0700443 ConvolveOneF1(info, x1, out, py0, py1, py2, cp->mFp);
Jason Sams709a0972012-11-15 18:18:04 -0800444 out++;
445 x1++;
446 }
447 }
448}
449
450RsdCpuScriptIntrinsicConvolve3x3::RsdCpuScriptIntrinsicConvolve3x3(
Jason Samsc905efd2012-11-26 15:20:18 -0800451 RsdCpuReferenceImpl *ctx, const Script *s, const Element *e)
452 : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_CONVOLVE_3x3) {
Jason Sams709a0972012-11-15 18:18:04 -0800453
Jason Sams3b35d772013-06-25 17:47:02 -0700454 if (e->getType() == RS_TYPE_FLOAT_32) {
455 switch(e->getVectorSize()) {
456 case 1:
457 mRootPtr = &kernelF1;
458 break;
459 case 2:
460 mRootPtr = &kernelF2;
461 break;
462 case 3:
463 case 4:
464 mRootPtr = &kernelF4;
465 break;
466 }
467 } else {
468 switch(e->getVectorSize()) {
469 case 1:
470 mRootPtr = &kernelU1;
471 break;
472 case 2:
473 mRootPtr = &kernelU2;
474 break;
475 case 3:
476 case 4:
477 mRootPtr = &kernelU4;
478 break;
479 }
480 }
Jason Sams709a0972012-11-15 18:18:04 -0800481 for(int ct=0; ct < 9; ct++) {
Jason Samsc905efd2012-11-26 15:20:18 -0800482 mFp[ct] = 1.f / 9.f;
Chih-Hung Hsiehc9a8d132018-08-17 13:37:59 -0700483 mIp[ct] = (int16_t)(mFp[ct] * 256.f + 0.5f);
Jason Sams709a0972012-11-15 18:18:04 -0800484 }
485}
486
487RsdCpuScriptIntrinsicConvolve3x3::~RsdCpuScriptIntrinsicConvolve3x3() {
488}
489
490void RsdCpuScriptIntrinsicConvolve3x3::populateScript(Script *s) {
491 s->mHal.info.exportedVariableCount = 2;
492}
493
494void RsdCpuScriptIntrinsicConvolve3x3::invokeFreeChildren() {
Jason Samsc905efd2012-11-26 15:20:18 -0800495 mAlloc.clear();
Jason Sams709a0972012-11-15 18:18:04 -0800496}
497
Jason Samsc905efd2012-11-26 15:20:18 -0800498RsdCpuScriptImpl * rsdIntrinsic_Convolve3x3(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) {
Jason Sams709a0972012-11-15 18:18:04 -0800499
Jason Samsc905efd2012-11-26 15:20:18 -0800500 return new RsdCpuScriptIntrinsicConvolve3x3(ctx, s, e);
Jason Sams709a0972012-11-15 18:18:04 -0800501}
Chih-Hung Hsieh462de212016-11-16 11:33:57 -0800502
503} // namespace renderscript
504} // namespace android