| /* |
| * Copyright (C) 2012 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #ifndef RSD_CPU_CORE_H |
| #define RSD_CPU_CORE_H |
| |
| #include "rsd_cpu.h" |
| #include "rsSignal.h" |
| #include "rsContext.h" |
| #include "rsCppUtils.h" |
| #include "rsElement.h" |
| #include "rsScriptC.h" |
| #include "rsCpuCoreRuntime.h" |
| |
| namespace android { |
| namespace renderscript { |
| |
| // Whether the CPU we're running on supports SIMD instructions |
| extern bool gArchUseSIMD; |
| |
| // Function types found in RenderScript code |
| typedef void (*ReduceAccumulatorFunc_t)(const RsExpandKernelDriverInfo *info, uint32_t x1, uint32_t x2, uint8_t *accum); |
| typedef void (*ReduceCombinerFunc_t)(uint8_t *accum, const uint8_t *other); |
| typedef void (*ReduceInitializerFunc_t)(uint8_t *accum); |
| typedef void (*ReduceOutConverterFunc_t)(uint8_t *out, const uint8_t *accum); |
| typedef void (*ForEachFunc_t)(const RsExpandKernelDriverInfo *info, uint32_t x1, uint32_t x2, uint32_t outStride); |
| typedef void (*InvokeFunc_t)(void *params); |
| typedef void (*InitOrDtorFunc_t)(void); |
| typedef int (*RootFunc_t)(void); |
| |
| struct ReduceDescription { |
| ReduceAccumulatorFunc_t accumFunc; // expanded accumulator function |
| ReduceInitializerFunc_t initFunc; // user initializer function |
| ReduceCombinerFunc_t combFunc; // user combiner function |
| ReduceOutConverterFunc_t outFunc; // user outconverter function |
| size_t accumSize; // accumulator datum size, in bytes |
| }; |
| |
| // Internal driver callback used to execute a kernel |
| typedef void (*WorkerCallback_t)(void *usr, uint32_t idx); |
| |
| class RsdCpuScriptImpl; |
| class RsdCpuReferenceImpl; |
| |
| struct ScriptTLSStruct { |
| android::renderscript::Context * mContext; |
| const android::renderscript::Script * mScript; |
| RsdCpuScriptImpl *mImpl; |
| }; |
| |
| // MTLaunchStruct passes information about a multithreaded kernel launch. |
| struct MTLaunchStructCommon { |
| RsdCpuReferenceImpl *rs; |
| RsdCpuScriptImpl *script; |
| |
| uint32_t mSliceSize; |
| volatile int mSliceNum; |
| bool isThreadable; |
| |
| // Boundary information about the launch |
| RsLaunchDimensions start; |
| RsLaunchDimensions end; |
| // Points to MTLaunchStructForEach::fep::dim or |
| // MTLaunchStructReduce::redp::dim. |
| RsLaunchDimensions *dimPtr; |
| }; |
| |
| struct MTLaunchStructForEach : public MTLaunchStructCommon { |
| // Driver info structure |
| RsExpandKernelDriverInfo fep; |
| |
| ForEachFunc_t kernel; |
| const Allocation *ains[RS_KERNEL_INPUT_LIMIT]; |
| Allocation *aout[RS_KERNEL_INPUT_LIMIT]; |
| }; |
| |
| struct MTLaunchStructReduce : public MTLaunchStructCommon { |
| // Driver info structure |
| RsExpandKernelDriverInfo redp; |
| |
| const Allocation *ains[RS_KERNEL_INPUT_LIMIT]; |
| |
| ReduceAccumulatorFunc_t accumFunc; |
| ReduceInitializerFunc_t initFunc; |
| ReduceCombinerFunc_t combFunc; |
| ReduceOutConverterFunc_t outFunc; |
| |
| size_t accumSize; // accumulator datum size in bytes |
| |
| size_t accumStride; // stride between accumulators in accumAlloc (below) |
| |
| // These fields are used for managing accumulator data items in a |
| // multithreaded execution. |
| // |
| // Let the number of threads be N. |
| // Let Outc be true iff there is an outconverter. |
| // |
| // accumAlloc is a pointer to a single allocation of (N - !Outc) |
| // accumulators. (If there is no outconverter, then the output |
| // allocation acts as an accumulator.) It is created at kernel |
| // launch time. Within that allocation, the distance between the |
| // start of adjacent accumulators is accumStride bytes -- this |
| // might be the same as accumSize, or it might be larger, if we |
| // are attempting to avoid false sharing. |
| // |
| // accumCount is an atomic counter of how many accumulators have |
| // been grabbed by threads. It is initialized to zero at kernel |
| // launch time. See accumPtr for further description. |
| // |
| // accumPtr is pointer to an array of N pointers to accumulators. |
| // The array is created at kernel launch time, and each element is |
| // initialized to nullptr. When a particular thread goes to work, |
| // that thread obtains its accumulator from its entry in this |
| // array. If the entry is nullptr, that thread needs to obtain an |
| // accumulator, and initialize its entry in the array accordingly. |
| // It does so via atomic access (fetch-and-add) to accumCount. |
| // - If Outc, then the fetched value is used as an index into |
| // accumAlloc. |
| // - If !Outc, then |
| // - If the fetched value is zero, then this thread gets the |
| // output allocation for its accumulator. |
| // - If the fetched value is nonzero, then (fetched value - 1) |
| // is used as an index into accumAlloc. |
| uint8_t *accumAlloc; |
| uint8_t **accumPtr; |
| uint32_t accumCount; |
| |
| // Logging control |
| uint32_t logReduce; |
| }; |
| |
| class RsdCpuReferenceImpl : public RsdCpuReference { |
| public: |
| ~RsdCpuReferenceImpl() override; |
| RsdCpuReferenceImpl(Context *); |
| |
| void lockMutex(); |
| void unlockMutex(); |
| |
| bool init(uint32_t version_major, uint32_t version_minor, sym_lookup_t, script_lookup_t); |
| void setPriority(int32_t priority) override; |
| virtual void launchThreads(WorkerCallback_t cbk, void *data); |
| static void * helperThreadProc(void *vrsc); |
| RsdCpuScriptImpl * setTLS(RsdCpuScriptImpl *sc); |
| |
| Context * getContext() {return mRSC;} |
| uint32_t getThreadCount() const { |
| return mWorkers.mCount + 1; |
| } |
| |
| // Launch foreach kernel |
| void launchForEach(const Allocation **ains, uint32_t inLen, Allocation *aout, |
| const RsScriptCall *sc, MTLaunchStructForEach *mtls); |
| |
| // Launch a general reduce kernel |
| void launchReduce(const Allocation ** ains, uint32_t inLen, Allocation *aout, |
| MTLaunchStructReduce *mtls); |
| |
| CpuScript * createScript(const ScriptC *s, char const *resName, char const *cacheDir, |
| uint8_t const *bitcode, size_t bitcodeSize, uint32_t flags) override; |
| CpuScript * createIntrinsic(const Script *s, RsScriptIntrinsicID iid, Element *e) override; |
| void* createScriptGroup(const ScriptGroupBase *sg) override; |
| |
| const RsdCpuReference::CpuSymbol *symLookup(const char *); |
| |
| RsdCpuReference::CpuScript *lookupScript(const Script *s) { |
| return mScriptLookupFn(mRSC, s); |
| } |
| |
| void setSelectRTCallback(RSSelectRTCallback pSelectRTCallback) { |
| mSelectRTCallback = pSelectRTCallback; |
| } |
| RSSelectRTCallback getSelectRTCallback() { |
| return mSelectRTCallback; |
| } |
| |
| virtual void setBccPluginName(const char *name) { |
| mBccPluginName.setTo(name); |
| } |
| virtual const char *getBccPluginName() const { |
| return mBccPluginName.string(); |
| } |
| bool getInKernel() override { return mInKernel; } |
| |
| // Set to true if we should embed global variable information in the code. |
| void setEmbedGlobalInfo(bool v) override { |
| mEmbedGlobalInfo = v; |
| } |
| |
| // Returns true if we should embed global variable information in the code. |
| bool getEmbedGlobalInfo() const override { |
| return mEmbedGlobalInfo; |
| } |
| |
| // Set to true if we should skip constant (immutable) global variables when |
| // potentially embedding information about globals. |
| void setEmbedGlobalInfoSkipConstant(bool v) override { |
| mEmbedGlobalInfoSkipConstant = v; |
| } |
| |
| // Returns true if we should skip constant (immutable) global variables when |
| // potentially embedding information about globals. |
| bool getEmbedGlobalInfoSkipConstant() const override { |
| return mEmbedGlobalInfoSkipConstant; |
| } |
| |
| protected: |
| Context *mRSC; |
| uint32_t version_major; |
| uint32_t version_minor; |
| //bool mHasGraphics; |
| bool mInKernel; // Is a parallel kernel execution underway? |
| |
| struct Workers { |
| volatile int mRunningCount; |
| volatile int mLaunchCount; |
| uint32_t mCount; |
| pthread_t *mThreadId; |
| pid_t *mNativeThreadId; |
| Signal mCompleteSignal; |
| Signal *mLaunchSignals; |
| WorkerCallback_t mLaunchCallback; |
| void *mLaunchData; |
| }; |
| Workers mWorkers; |
| bool mExit; |
| sym_lookup_t mSymLookupFn; |
| script_lookup_t mScriptLookupFn; |
| |
| ScriptTLSStruct mTlsStruct; |
| |
| RSSelectRTCallback mSelectRTCallback; |
| String8 mBccPluginName; |
| |
| // Specifies whether we should embed global variable information in the |
| // code via special RS variables that can be examined later by the driver. |
| // Defaults to true. |
| bool mEmbedGlobalInfo; |
| |
| // Specifies whether we should skip constant (immutable) global variables |
| // when potentially embedding information about globals. |
| // Defaults to true. |
| bool mEmbedGlobalInfoSkipConstant; |
| |
| long mPageSize; |
| |
| // Launch a general reduce kernel |
| void launchReduceSerial(const Allocation ** ains, uint32_t inLen, Allocation *aout, |
| MTLaunchStructReduce *mtls); |
| void launchReduceParallel(const Allocation ** ains, uint32_t inLen, Allocation *aout, |
| MTLaunchStructReduce *mtls); |
| }; |
| |
| |
| } |
| } |
| |
| #endif |