| /* |
| * Copyright (C) 2014 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| |
| #include "rsCpuIntrinsic.h" |
| #include "rsCpuIntrinsicInlines.h" |
| |
| using namespace android; |
| using namespace android::renderscript; |
| |
| namespace android { |
| namespace renderscript { |
| |
| |
| class RsdCpuScriptIntrinsicResize : public RsdCpuScriptIntrinsic { |
| public: |
| virtual void populateScript(Script *); |
| virtual void invokeFreeChildren(); |
| |
| virtual void setGlobalObj(uint32_t slot, ObjectBase *data); |
| |
| virtual ~RsdCpuScriptIntrinsicResize(); |
| RsdCpuScriptIntrinsicResize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *); |
| |
| virtual void preLaunch(uint32_t slot, const Allocation * ain, |
| Allocation * aout, const void * usr, |
| uint32_t usrLen, const RsScriptCall *sc); |
| |
| float scaleX; |
| float scaleY; |
| |
| protected: |
| ObjectBaseRef<const Allocation> mAlloc; |
| ObjectBaseRef<const Element> mElement; |
| |
| static void kernelU1(const RsExpandKernelParams *p, |
| uint32_t xstart, uint32_t xend, |
| uint32_t instep, uint32_t outstep); |
| static void kernelU2(const RsExpandKernelParams *p, |
| uint32_t xstart, uint32_t xend, |
| uint32_t instep, uint32_t outstep); |
| static void kernelU4(const RsExpandKernelParams *p, |
| uint32_t xstart, uint32_t xend, |
| uint32_t instep, uint32_t outstep); |
| }; |
| |
| } |
| } |
| |
| |
| void RsdCpuScriptIntrinsicResize::setGlobalObj(uint32_t slot, ObjectBase *data) { |
| rsAssert(slot == 0); |
| mAlloc.set(static_cast<Allocation *>(data)); |
| } |
| |
| |
| extern "C" void rsdIntrinsicConvolve3x3_K(void *dst, const void *y0, const void *y1, |
| const void *y2, const short *coef, uint32_t count); |
| |
| static float4 cubicInterpolate(float4 p0,float4 p1,float4 p2,float4 p3, float x) { |
| return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3 |
| + x * (3.f * (p1 - p2) + p3 - p0))); |
| } |
| |
| static float2 cubicInterpolate(float2 p0,float2 p1,float2 p2,float2 p3, float x) { |
| return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3 |
| + x * (3.f * (p1 - p2) + p3 - p0))); |
| } |
| |
| static float cubicInterpolate(float p0,float p1,float p2,float p3 , float x) { |
| return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3 |
| + x * (3.f * (p1 - p2) + p3 - p0))); |
| } |
| |
| static uchar4 OneBiCubic(const uchar4 *yp0, const uchar4 *yp1, const uchar4 *yp2, const uchar4 *yp3, |
| float xf, float yf, int width) { |
| int startx = (int) floor(xf - 2); |
| xf = xf - floor(xf); |
| int maxx = width - 1; |
| int xs0 = rsMax(0, startx + 0); |
| int xs1 = rsMax(0, startx + 1); |
| int xs2 = rsMin(maxx, startx + 2); |
| int xs3 = rsMin(maxx, startx + 3); |
| |
| float4 p0 = cubicInterpolate(convert_float4(yp0[xs0]), |
| convert_float4(yp0[xs1]), |
| convert_float4(yp0[xs2]), |
| convert_float4(yp0[xs3]), xf); |
| |
| float4 p1 = cubicInterpolate(convert_float4(yp1[xs0]), |
| convert_float4(yp1[xs1]), |
| convert_float4(yp1[xs2]), |
| convert_float4(yp1[xs3]), xf); |
| |
| float4 p2 = cubicInterpolate(convert_float4(yp2[xs0]), |
| convert_float4(yp2[xs1]), |
| convert_float4(yp2[xs2]), |
| convert_float4(yp2[xs3]), xf); |
| |
| float4 p3 = cubicInterpolate(convert_float4(yp3[xs0]), |
| convert_float4(yp3[xs1]), |
| convert_float4(yp3[xs2]), |
| convert_float4(yp3[xs3]), xf); |
| |
| float4 p = cubicInterpolate(p0, p1, p2, p3, yf); |
| p = clamp(p, 0.f, 255.f); |
| return convert_uchar4(p); |
| } |
| |
| static uchar2 OneBiCubic(const uchar2 *yp0, const uchar2 *yp1, const uchar2 *yp2, const uchar2 *yp3, |
| float xf, float yf, int width) { |
| int startx = (int) floor(xf - 2); |
| xf = xf - floor(xf); |
| int maxx = width - 1; |
| int xs0 = rsMax(0, startx + 0); |
| int xs1 = rsMax(0, startx + 1); |
| int xs2 = rsMin(maxx, startx + 2); |
| int xs3 = rsMin(maxx, startx + 3); |
| |
| float2 p0 = cubicInterpolate(convert_float2(yp0[xs0]), |
| convert_float2(yp0[xs1]), |
| convert_float2(yp0[xs2]), |
| convert_float2(yp0[xs3]), xf); |
| |
| float2 p1 = cubicInterpolate(convert_float2(yp1[xs0]), |
| convert_float2(yp1[xs1]), |
| convert_float2(yp1[xs2]), |
| convert_float2(yp1[xs3]), xf); |
| |
| float2 p2 = cubicInterpolate(convert_float2(yp2[xs0]), |
| convert_float2(yp2[xs1]), |
| convert_float2(yp2[xs2]), |
| convert_float2(yp2[xs3]), xf); |
| |
| float2 p3 = cubicInterpolate(convert_float2(yp3[xs0]), |
| convert_float2(yp3[xs1]), |
| convert_float2(yp3[xs2]), |
| convert_float2(yp3[xs3]), xf); |
| |
| float2 p = cubicInterpolate(p0, p1, p2, p3, yf); |
| p = clamp(p, 0.f, 255.f); |
| return convert_uchar2(p); |
| } |
| |
| static uchar OneBiCubic(const uchar *yp0, const uchar *yp1, const uchar *yp2, const uchar *yp3, |
| float xf, float yf, int width) { |
| int startx = (int) floor(xf - 2); |
| xf = xf - floor(xf); |
| int maxx = width - 1; |
| int xs0 = rsMax(0, startx + 0); |
| int xs1 = rsMax(0, startx + 1); |
| int xs2 = rsMin(maxx, startx + 2); |
| int xs3 = rsMin(maxx, startx + 3); |
| |
| float p0 = cubicInterpolate((float)yp0[xs0], (float)yp0[xs1], |
| (float)yp0[xs2], (float)yp0[xs3], xf); |
| float p1 = cubicInterpolate((float)yp1[xs0], (float)yp1[xs1], |
| (float)yp1[xs2], (float)yp1[xs3], xf); |
| float p2 = cubicInterpolate((float)yp2[xs0], (float)yp2[xs1], |
| (float)yp2[xs2], (float)yp2[xs3], xf); |
| float p3 = cubicInterpolate((float)yp3[xs0], (float)yp3[xs1], |
| (float)yp3[xs2], (float)yp3[xs3], xf); |
| |
| float p = cubicInterpolate(p0, p1, p2, p3, yf); |
| p = clamp(p, 0.f, 255.f); |
| return (uchar)p; |
| } |
| |
| void RsdCpuScriptIntrinsicResize::kernelU4(const RsExpandKernelParams *p, |
| uint32_t xstart, uint32_t xend, |
| uint32_t instep, uint32_t outstep) { |
| RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr; |
| |
| if (!cp->mAlloc.get()) { |
| ALOGE("Resize executed without input, skipping"); |
| return; |
| } |
| const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr; |
| const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY; |
| const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX; |
| const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride; |
| |
| float yf = p->y * cp->scaleY; |
| int starty = (int) floor(yf - 2); |
| yf = yf - floor(yf); |
| int maxy = srcHeight - 1; |
| int ys0 = rsMax(0, starty + 0); |
| int ys1 = rsMax(0, starty + 1); |
| int ys2 = rsMin(maxy, starty + 2); |
| int ys3 = rsMin(maxy, starty + 3); |
| |
| const uchar4 *yp0 = (const uchar4 *)(pin + stride * ys0); |
| const uchar4 *yp1 = (const uchar4 *)(pin + stride * ys1); |
| const uchar4 *yp2 = (const uchar4 *)(pin + stride * ys2); |
| const uchar4 *yp3 = (const uchar4 *)(pin + stride * ys3); |
| |
| uchar4 *out = ((uchar4 *)p->out) + xstart; |
| uint32_t x1 = xstart; |
| uint32_t x2 = xend; |
| |
| while(x1 < x2) { |
| float xf = x1 * cp->scaleX; |
| *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth); |
| out++; |
| x1++; |
| } |
| } |
| |
| void RsdCpuScriptIntrinsicResize::kernelU2(const RsExpandKernelParams *p, |
| uint32_t xstart, uint32_t xend, |
| uint32_t instep, uint32_t outstep) { |
| RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr; |
| |
| if (!cp->mAlloc.get()) { |
| ALOGE("Resize executed without input, skipping"); |
| return; |
| } |
| const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr; |
| const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY; |
| const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX; |
| const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride; |
| |
| float yf = p->y * cp->scaleY; |
| int starty = (int) floor(yf - 2); |
| yf = yf - floor(yf); |
| int maxy = srcHeight - 1; |
| int ys0 = rsMax(0, starty + 0); |
| int ys1 = rsMax(0, starty + 1); |
| int ys2 = rsMin(maxy, starty + 2); |
| int ys3 = rsMin(maxy, starty + 3); |
| |
| const uchar2 *yp0 = (const uchar2 *)(pin + stride * ys0); |
| const uchar2 *yp1 = (const uchar2 *)(pin + stride * ys1); |
| const uchar2 *yp2 = (const uchar2 *)(pin + stride * ys2); |
| const uchar2 *yp3 = (const uchar2 *)(pin + stride * ys3); |
| |
| uchar2 *out = ((uchar2 *)p->out) + xstart; |
| uint32_t x1 = xstart; |
| uint32_t x2 = xend; |
| |
| while(x1 < x2) { |
| float xf = x1 * cp->scaleX; |
| *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth); |
| out++; |
| x1++; |
| } |
| } |
| |
| void RsdCpuScriptIntrinsicResize::kernelU1(const RsExpandKernelParams *p, |
| uint32_t xstart, uint32_t xend, |
| uint32_t instep, uint32_t outstep) { |
| RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr; |
| |
| if (!cp->mAlloc.get()) { |
| ALOGE("Resize executed without input, skipping"); |
| return; |
| } |
| const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr; |
| const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY; |
| const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX; |
| const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride; |
| |
| float yf = p->y * cp->scaleY; |
| int starty = (int) floor(yf - 2); |
| yf = yf - floor(yf); |
| int maxy = srcHeight - 1; |
| int ys0 = rsMax(0, starty + 0); |
| int ys1 = rsMax(0, starty + 1); |
| int ys2 = rsMin(maxy, starty + 2); |
| int ys3 = rsMin(maxy, starty + 3); |
| |
| const uchar *yp0 = pin + stride * ys0; |
| const uchar *yp1 = pin + stride * ys1; |
| const uchar *yp2 = pin + stride * ys2; |
| const uchar *yp3 = pin + stride * ys3; |
| |
| uchar *out = ((uchar *)p->out) + xstart; |
| uint32_t x1 = xstart; |
| uint32_t x2 = xend; |
| |
| while(x1 < x2) { |
| float xf = x1 * cp->scaleX; |
| *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth); |
| out++; |
| x1++; |
| } |
| } |
| |
| RsdCpuScriptIntrinsicResize::RsdCpuScriptIntrinsicResize ( |
| RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) |
| : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_RESIZE) { |
| |
| } |
| |
| RsdCpuScriptIntrinsicResize::~RsdCpuScriptIntrinsicResize() { |
| } |
| |
| void RsdCpuScriptIntrinsicResize::preLaunch(uint32_t slot, const Allocation * ain, |
| Allocation * aout, const void * usr, |
| uint32_t usrLen, const RsScriptCall *sc) |
| { |
| if (!mAlloc.get()) { |
| ALOGE("Resize executed without input, skipping"); |
| return; |
| } |
| const uint32_t srcHeight = mAlloc->mHal.drvState.lod[0].dimY; |
| const uint32_t srcWidth = mAlloc->mHal.drvState.lod[0].dimX; |
| const size_t stride = mAlloc->mHal.drvState.lod[0].stride; |
| |
| switch(mAlloc->getType()->getElement()->getVectorSize()) { |
| case 1: |
| mRootPtr = &kernelU1; |
| break; |
| case 2: |
| mRootPtr = &kernelU2; |
| break; |
| case 3: |
| case 4: |
| mRootPtr = &kernelU4; |
| break; |
| } |
| |
| scaleX = (float)srcWidth / aout->mHal.drvState.lod[0].dimX; |
| scaleY = (float)srcHeight / aout->mHal.drvState.lod[0].dimY; |
| |
| } |
| |
| void RsdCpuScriptIntrinsicResize::populateScript(Script *s) { |
| s->mHal.info.exportedVariableCount = 1; |
| } |
| |
| void RsdCpuScriptIntrinsicResize::invokeFreeChildren() { |
| mAlloc.clear(); |
| } |
| |
| |
| RsdCpuScriptImpl * rsdIntrinsic_Resize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) { |
| |
| return new RsdCpuScriptIntrinsicResize(ctx, s, e); |
| } |
| |
| |