| /* |
| * Copyright (C) 2017 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #define LOG_TAG "CpuExecutor" |
| |
| #include "CpuExecutor.h" |
| |
| #include "LSHProjection.h" |
| #include "NeuralNetworks.h" |
| #include "Operations.h" |
| #include "RNN.h" |
| |
| namespace android { |
| namespace nn { |
| |
| bool RunTimePoolInfo::set(const hidl_memory& hidlMemory) { |
| memory = mapMemory(hidlMemory); |
| if (memory == nullptr) { |
| LOG(ERROR) << "Can't map shared memory."; |
| return false; |
| } |
| memory->update(); |
| buffer = reinterpret_cast<uint8_t*>(static_cast<void*>(memory->getPointer())); |
| if (buffer == nullptr) { |
| LOG(ERROR) << "Can't access shared memory."; |
| return false; |
| } |
| return true; |
| } |
| |
| // If we don't have a buffer, allocate it. |
| static bool allocateIfNeeded(RunTimeOperandInfo* info, const Shape& shape) { |
| info->type = shape.type; |
| info->dimensions = shape.dimensions; |
| info->scale = shape.scale; |
| info->offset = shape.offset; |
| if (info->buffer == nullptr) { |
| uint32_t length = sizeOfData(info->type, info->dimensions); |
| info->buffer = new uint8_t[length]; |
| if (info->buffer == nullptr) { |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| template <typename T> |
| static T getScalarData(RunTimeOperandInfo& info) { |
| T* data = reinterpret_cast<T*>(info.buffer); |
| return data[0]; |
| } |
| |
| // Ignore the .pools entry in model and request. This will have been taken care of |
| // by the caller. |
| int CpuExecutor::run(const Model& model, const Request& request, |
| const std::vector<RunTimePoolInfo>& runTimePoolInfos) { |
| LOG(DEBUG) << "CpuExecutor::run()"; |
| LOG(DEBUG) << "model: " << toString(model); |
| LOG(DEBUG) << "request: " << toString(request); |
| |
| mModel = &model; |
| mRequest = &request; // TODO check if mRequest is needed |
| initializeRunTimeInfo(runTimePoolInfos); |
| // The model has serialized the operation in execution order. |
| for (const auto& operation : model.operations) { |
| int n = executeOperation(operation); |
| if (n != ANEURALNETWORKS_NO_ERROR) { |
| return n; |
| } |
| } |
| mModel = nullptr; |
| mRequest = nullptr; |
| LOG(DEBUG) << "Completed run normally"; |
| return ANEURALNETWORKS_NO_ERROR; |
| } |
| |
| bool CpuExecutor::initializeRunTimeInfo(const std::vector<RunTimePoolInfo>& runTimePoolInfos) { |
| LOG(DEBUG) << "CpuExecutor::initializeRunTimeInfo"; |
| const size_t count = mModel->operands.size(); |
| mOperands.resize(count); |
| for (size_t i = 0; i < count; i++) { |
| const Operand& from = mModel->operands[i]; |
| if (!setRunTimeOperandInfo(i, from.dimensions, from.location, from.numberOfConsumers, |
| runTimePoolInfos)) { |
| return false; |
| } |
| mOperands[i].type = from.type; |
| mOperands[i].scale = from.scale; |
| mOperands[i].offset = from.zeroPoint; |
| } |
| |
| nnAssert(mModel->inputIndexes.size() == mRequest->inputs.size()); |
| for (size_t i = 0; i < mModel->inputIndexes.size(); i++) { |
| const InputOutputInfo& from = mRequest->inputs[i]; |
| if (!setRunTimeOperandInfo(mModel->inputIndexes[i], from.dimensions, from.location, 0, |
| runTimePoolInfos)) { |
| return false; |
| } |
| } |
| nnAssert(mModel->outputIndexes.size() == mRequest->outputs.size()); |
| for (size_t i = 0; i < mModel->outputIndexes.size(); i++) { |
| const InputOutputInfo& from = mRequest->outputs[i]; |
| if (!setRunTimeOperandInfo(mModel->outputIndexes[i], from.dimensions, from.location, 0, |
| runTimePoolInfos)) { |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| bool CpuExecutor::setRunTimeOperandInfo(uint32_t operandIndex, |
| const std::vector<uint32_t>& dimensions, |
| const DataLocation& location, uint32_t useCount, |
| const std::vector<RunTimePoolInfo>& runTimePoolInfos) { |
| LOG(DEBUG) << "CpuExecutor::setRunTimeOperand(" << operandIndex << ", " << toString(dimensions) |
| << ", " << toString(location) << ")"; |
| |
| RunTimeOperandInfo& to = mOperands[operandIndex]; |
| if (dimensions.size() > 0) { |
| to.dimensions = dimensions; |
| } |
| if (location.poolIndex == RUN_TIME) { |
| to.buffer = nullptr; |
| to.numberOfUsesLeft = useCount; |
| } else if (location.poolIndex == SAME_BLOCK) { |
| to.buffer = const_cast<uint8_t*>(&mModel->operandValues[location.offset]); |
| to.numberOfUsesLeft = 0; |
| } else { |
| if (location.poolIndex >= runTimePoolInfos.size()) { |
| LOG(ERROR) << "For operand " << operandIndex << ", got a poolIndex id " |
| << location.poolIndex << " which is >= " << runTimePoolInfos.size(); |
| return false; |
| } |
| auto& r = runTimePoolInfos[location.poolIndex]; |
| to.buffer = r.buffer + location.offset; |
| to.numberOfUsesLeft = 0; |
| } |
| to.length = location.length; |
| return true; |
| } |
| |
| void CpuExecutor::freeNoLongerUsedOperands(const std::vector<uint32_t>& inputs) { |
| for (uint32_t i : inputs) { |
| auto& info = mOperands[i]; |
| // Check if it's a static or model input/output. |
| if (info.numberOfUsesLeft == 0) { |
| continue; |
| } |
| nnAssert(mModel->operands[i].location.poolIndex == RUN_TIME); |
| info.numberOfUsesLeft--; |
| if (info.numberOfUsesLeft == 0) { |
| nnAssert(info.buffer != nullptr); |
| delete[] info.buffer; |
| info.buffer = nullptr; |
| } |
| } |
| } |
| |
| int CpuExecutor::executeOperation(const Operation& operation) { |
| LOG(DEBUG) << "CpuExecutor::executeOperation(" << toString(operation) << ")"; |
| const auto& ins = operation.inputs; |
| const auto& outs = operation.outputs; |
| bool success = false; |
| |
| // Function to verify that the number of input and output parameters |
| // matches what is expected. |
| auto parameterCountIs = [&ins, &outs, &operation](size_t expectedIns, |
| size_t expectedOuts) -> bool { |
| if (ins.size() != expectedIns || outs.size() != expectedOuts) { |
| LOG(ERROR) << getOperationName(operation.opTuple.operationType) |
| << ": Invalid number of ins " |
| << ins.size() << " / " << expectedIns |
| << " and outs " << outs.size() << " / " |
| << expectedOuts; |
| return false; |
| } |
| return true; |
| }; |
| |
| switch (operation.opTuple.operationType) { |
| case OperationType::ADD: { |
| if (!parameterCountIs(3, 1)) { |
| return ANEURALNETWORKS_BAD_DATA; |
| } |
| const RunTimeOperandInfo& in1 = mOperands[ins[0]]; |
| const RunTimeOperandInfo& in2 = mOperands[ins[1]]; |
| int32_t activation = getScalarData<int32_t>(mOperands[ins[2]]); |
| |
| RunTimeOperandInfo& out = mOperands[outs[0]]; |
| Shape outShape = out.shape(); |
| |
| if (operation.opTuple.operandType == OperandType::TENSOR_FLOAT32) { |
| success = addPrepare(in1.shape(), in2.shape(), &outShape) && |
| allocateIfNeeded(&out, outShape) && |
| addFloat32(reinterpret_cast<const float*>(in1.buffer), |
| reinterpret_cast<const float*>(in2.buffer), |
| activation, |
| reinterpret_cast<float*>(out.buffer), |
| outShape); |
| } |
| } break; |
| case OperationType::MUL: { |
| if (!parameterCountIs(3, 1)) { |
| return ANEURALNETWORKS_BAD_DATA; |
| } |
| const RunTimeOperandInfo& in1 = mOperands[ins[0]]; |
| const RunTimeOperandInfo& in2 = mOperands[ins[1]]; |
| int32_t activation = getScalarData<int32_t>(mOperands[ins[2]]); |
| |
| RunTimeOperandInfo& out = mOperands[outs[0]]; |
| Shape outShape = out.shape(); |
| |
| if (operation.opTuple.operandType == OperandType::TENSOR_FLOAT32) { |
| success = mulPrepare(in1.shape(), in2.shape(), &outShape) && |
| allocateIfNeeded(&out, outShape) && |
| mulFloat32(reinterpret_cast<const float*>(in1.buffer), |
| reinterpret_cast<const float*>(in2.buffer), |
| activation, |
| reinterpret_cast<float*>(out.buffer), |
| outShape); |
| } |
| } break; |
| case OperationType::FLOOR: { |
| if (!parameterCountIs(1, 1)) { |
| return ANEURALNETWORKS_BAD_DATA; |
| } |
| const RunTimeOperandInfo& input = mOperands[ins[0]]; |
| RunTimeOperandInfo& output = mOperands[outs[0]]; |
| Shape outShape = output.shape(); |
| |
| if (operation.opTuple.operandType == OperandType::TENSOR_FLOAT32) { |
| success = floorPrepare(input.shape(), &outShape) && |
| allocateIfNeeded(&output, outShape) && |
| floorFloat32(reinterpret_cast<const float*>(input.buffer), |
| reinterpret_cast<float*>(output.buffer), |
| outShape); |
| } |
| } break; |
| case OperationType::DEQUANTIZE: { |
| if (!parameterCountIs(1, 1)) { |
| return ANEURALNETWORKS_BAD_DATA; |
| } |
| const RunTimeOperandInfo& input = mOperands[ins[0]]; |
| RunTimeOperandInfo& output = mOperands[outs[0]]; |
| Shape outShape = output.shape(); |
| |
| if (operation.opTuple.operandType == OperandType::TENSOR_QUANT8_ASYMM) { |
| success = dequantizePrepare(input.shape(), &outShape) && |
| allocateIfNeeded(&output, outShape) && |
| dequantizeQuant8ToFloat32( |
| reinterpret_cast<const uint8_t*>(input.buffer), |
| reinterpret_cast<float*>(output.buffer), |
| outShape); |
| } |
| } break; |
| case OperationType::DEPTHWISE_CONV: { |
| if (!parameterCountIs(8, 1)) { |
| return ANEURALNETWORKS_BAD_DATA; |
| } |
| const RunTimeOperandInfo& input = mOperands[ins[0]]; |
| const RunTimeOperandInfo& filter = mOperands[ins[1]]; |
| const RunTimeOperandInfo& bias = mOperands[ins[2]]; |
| |
| int32_t padding = getScalarData<int32_t>(mOperands[ins[3]]); |
| int32_t stride_width = getScalarData<int32_t>(mOperands[ins[4]]); |
| int32_t stride_height = getScalarData<int32_t>(mOperands[ins[5]]); |
| int32_t depth_multiplier = getScalarData<int32_t>(mOperands[ins[6]]); |
| int32_t activation = getScalarData<int32_t>(mOperands[ins[7]]); |
| |
| RunTimeOperandInfo& output = mOperands[outs[0]]; |
| Shape outShape = output.shape(); |
| |
| if (operation.opTuple.operandType == OperandType::TENSOR_FLOAT32) { |
| success = depthwiseConvPrepare(input.shape(), filter.shape(), bias.shape(), |
| padding, stride_width, stride_height, |
| &outShape) && |
| allocateIfNeeded(&output, outShape) && |
| depthwiseConvFloat32(reinterpret_cast<const float*>(input.buffer), |
| input.shape(), |
| reinterpret_cast<const float*>(filter.buffer), |
| filter.shape(), |
| reinterpret_cast<const float*>(bias.buffer), |
| bias.shape(), |
| padding, stride_width, stride_height, |
| depth_multiplier, activation, |
| reinterpret_cast<float*>(output.buffer), |
| outShape); |
| } else if (operation.opTuple.operandType == OperandType::TENSOR_QUANT8_ASYMM) { |
| success = depthwiseConvPrepare(input.shape(), filter.shape(), bias.shape(), |
| padding, stride_width, stride_height, |
| &outShape) && |
| allocateIfNeeded(&output, outShape) && |
| depthwiseConvQuant8(reinterpret_cast<const uint8_t*>(input.buffer), |
| input.shape(), |
| reinterpret_cast<const uint8_t*>(filter.buffer), |
| filter.shape(), |
| reinterpret_cast<const int32_t*>(bias.buffer), |
| bias.shape(), |
| padding, stride_width, stride_height, |
| depth_multiplier, activation, |
| reinterpret_cast<uint8_t*>(output.buffer), |
| outShape); |
| } |
| |
| } break; |
| case OperationType::CONV: { |
| if (!parameterCountIs(7, 1)) { |
| return ANEURALNETWORKS_BAD_DATA; |
| } |
| const RunTimeOperandInfo& input = mOperands[ins[0]]; |
| const RunTimeOperandInfo& filter = mOperands[ins[1]]; |
| const RunTimeOperandInfo& bias = mOperands[ins[2]]; |
| |
| int32_t padding = getScalarData<int32_t>(mOperands[ins[3]]); |
| int32_t stride_width = getScalarData<int32_t>(mOperands[ins[4]]); |
| int32_t stride_height = getScalarData<int32_t>(mOperands[ins[5]]); |
| int32_t activation = getScalarData<int32_t>(mOperands[ins[6]]); |
| |
| RunTimeOperandInfo& output = mOperands[outs[0]]; |
| Shape outShape = output.shape(); |
| |
| if (operation.opTuple.operandType == OperandType::TENSOR_FLOAT32) { |
| success = convPrepare(input.shape(), filter.shape(), bias.shape(), |
| padding, stride_width, stride_height, |
| &outShape) && |
| allocateIfNeeded(&output, outShape) && |
| convFloat32(reinterpret_cast<const float*>(input.buffer), input.shape(), |
| reinterpret_cast<const float*>(filter.buffer), filter.shape(), |
| reinterpret_cast<const float*>(bias.buffer), bias.shape(), |
| padding, stride_width, stride_height, activation, |
| reinterpret_cast<float*>(output.buffer), outShape); |
| } else if (operation.opTuple.operandType == OperandType::TENSOR_QUANT8_ASYMM) { |
| success = convPrepare(input.shape(), filter.shape(), bias.shape(), |
| padding, stride_width, stride_height, |
| &outShape) && |
| allocateIfNeeded(&output, outShape) && |
| convQuant8(reinterpret_cast<const uint8_t*>(input.buffer), |
| input.shape(), |
| reinterpret_cast<const uint8_t*>(filter.buffer), |
| filter.shape(), |
| reinterpret_cast<const int32_t*>(bias.buffer), |
| bias.shape(), |
| padding, stride_width, stride_height, activation, |
| reinterpret_cast<uint8_t*>(output.buffer), |
| outShape); |
| } |
| } break; |
| case OperationType::AVERAGE_POOL: { |
| if (!parameterCountIs(7, 1)) { |
| return ANEURALNETWORKS_BAD_DATA; |
| } |
| const RunTimeOperandInfo& input = mOperands[ins[0]]; |
| |
| int32_t padding = getScalarData<int32_t>(mOperands[ins[1]]); |
| int32_t stride_width = getScalarData<int32_t>(mOperands[ins[2]]); |
| int32_t stride_height = getScalarData<int32_t>(mOperands[ins[3]]); |
| int32_t filter_width = getScalarData<int32_t>(mOperands[ins[4]]); |
| int32_t filter_height = getScalarData<int32_t>(mOperands[ins[5]]); |
| int32_t activation = getScalarData<int32_t>(mOperands[ins[6]]); |
| |
| RunTimeOperandInfo& output = mOperands[outs[0]]; |
| Shape outShape = output.shape(); |
| |
| if (operation.opTuple.operandType == OperandType::TENSOR_FLOAT32) { |
| success = genericPoolingPrepare(input.shape(), |
| padding, stride_width, stride_height, |
| filter_width, filter_height, |
| &outShape) && |
| allocateIfNeeded(&output, outShape) && |
| averagePoolFloat32(reinterpret_cast<const float*>(input.buffer), |
| input.shape(), |
| padding, stride_width, stride_height, |
| filter_width, filter_height, activation, |
| reinterpret_cast<float*>(output.buffer), |
| outShape); |
| } else if (operation.opTuple.operandType == OperandType::TENSOR_QUANT8_ASYMM) { |
| success = genericPoolingPrepare(input.shape(), |
| padding, stride_width, stride_height, |
| filter_width, filter_height, |
| &outShape) && |
| allocateIfNeeded(&output, outShape) && |
| averagePoolQuant8(reinterpret_cast<const uint8_t*>(input.buffer), |
| input.shape(), |
| padding, stride_width, stride_height, |
| filter_width, filter_height, activation, |
| reinterpret_cast<uint8_t*>(output.buffer), |
| outShape); |
| } |
| } break; |
| case OperationType::L2_POOL: { |
| if (!parameterCountIs(7, 1)) { |
| return ANEURALNETWORKS_BAD_DATA; |
| } |
| const RunTimeOperandInfo& input = mOperands[ins[0]]; |
| |
| int32_t padding = getScalarData<int32_t>(mOperands[ins[1]]); |
| int32_t stride_width = getScalarData<int32_t>(mOperands[ins[2]]); |
| int32_t stride_height = getScalarData<int32_t>(mOperands[ins[3]]); |
| int32_t filter_width = getScalarData<int32_t>(mOperands[ins[4]]); |
| int32_t filter_height = getScalarData<int32_t>(mOperands[ins[5]]); |
| int32_t activation = getScalarData<int32_t>(mOperands[ins[6]]); |
| |
| RunTimeOperandInfo& output = mOperands[outs[0]]; |
| Shape outShape = output.shape(); |
| |
| if (operation.opTuple.operandType == OperandType::TENSOR_FLOAT32) { |
| success = genericPoolingPrepare(input.shape(), |
| padding, stride_width, stride_height, |
| filter_width, filter_height, |
| &outShape) && |
| allocateIfNeeded(&output, outShape) && |
| l2PoolFloat32(reinterpret_cast<const float*>(input.buffer), |
| input.shape(), |
| padding, stride_width, stride_height, |
| filter_width, filter_height, activation, |
| reinterpret_cast<float*>(output.buffer), |
| outShape); |
| } |
| } break; |
| case OperationType::MAX_POOL: { |
| if (!parameterCountIs(7, 1)) { |
| return ANEURALNETWORKS_BAD_DATA; |
| } |
| const RunTimeOperandInfo& input = mOperands[ins[0]]; |
| |
| int32_t padding = getScalarData<int32_t>(mOperands[ins[1]]); |
| int32_t stride_width = getScalarData<int32_t>(mOperands[ins[2]]); |
| int32_t stride_height = getScalarData<int32_t>(mOperands[ins[3]]); |
| int32_t filter_width = getScalarData<int32_t>(mOperands[ins[4]]); |
| int32_t filter_height = getScalarData<int32_t>(mOperands[ins[5]]); |
| int32_t activation = getScalarData<int32_t>(mOperands[ins[6]]); |
| |
| RunTimeOperandInfo& output = mOperands[outs[0]]; |
| Shape outShape = output.shape(); |
| |
| if (operation.opTuple.operandType == OperandType::TENSOR_FLOAT32) { |
| success = genericPoolingPrepare(input.shape(), |
| padding, stride_width, stride_height, |
| filter_width, filter_height, |
| &outShape) && |
| allocateIfNeeded(&output, outShape) && |
| maxPoolFloat32(reinterpret_cast<const float*>(input.buffer), |
| input.shape(), |
| padding, stride_width, stride_height, |
| filter_width, filter_height, activation, |
| reinterpret_cast<float*>(output.buffer), |
| outShape); |
| } else if (operation.opTuple.operandType == OperandType::TENSOR_QUANT8_ASYMM) { |
| success = genericPoolingPrepare(input.shape(), |
| padding, stride_width, stride_height, |
| filter_width, filter_height, |
| &outShape) && |
| allocateIfNeeded(&output, outShape) && |
| maxPoolQuant8(reinterpret_cast<const uint8_t*>(input.buffer), |
| input.shape(), |
| padding, stride_width, stride_height, |
| filter_width, filter_height, activation, |
| reinterpret_cast<uint8_t*>(output.buffer), |
| outShape); |
| } |
| |
| } break; |
| case OperationType::RELU: { |
| if (!parameterCountIs(1, 1)) { |
| return ANEURALNETWORKS_BAD_DATA; |
| } |
| const RunTimeOperandInfo& input = mOperands[ins[0]]; |
| RunTimeOperandInfo& output = mOperands[outs[0]]; |
| Shape outShape = output.shape(); |
| |
| if (operation.opTuple.operandType == OperandType::TENSOR_FLOAT32) { |
| success = genericActivationPrepare(input.shape(), &outShape) && |
| allocateIfNeeded(&output, outShape) && |
| reluFloat32(reinterpret_cast<const float*>(input.buffer), |
| input.shape(), |
| reinterpret_cast<float*>(output.buffer), |
| outShape); |
| } else if (operation.opTuple.operandType == OperandType::TENSOR_QUANT8_ASYMM) { |
| success = genericActivationPrepare(input.shape(), &outShape) && |
| allocateIfNeeded(&output, outShape) && |
| reluQuant8(reinterpret_cast<const uint8_t*>(input.buffer), |
| input.shape(), |
| reinterpret_cast<uint8_t*>(output.buffer), |
| outShape); |
| } |
| } break; |
| case OperationType::RELU1: { |
| if (!parameterCountIs(1, 1)) { |
| return ANEURALNETWORKS_BAD_DATA; |
| } |
| const RunTimeOperandInfo& input = mOperands[ins[0]]; |
| RunTimeOperandInfo& output = mOperands[outs[0]]; |
| Shape outShape = output.shape(); |
| |
| if (operation.opTuple.operandType == OperandType::TENSOR_FLOAT32) { |
| success = genericActivationPrepare(input.shape(), &outShape) && |
| allocateIfNeeded(&output, outShape) && |
| relu1Float32(reinterpret_cast<const float*>(input.buffer), |
| input.shape(), |
| reinterpret_cast<float*>(output.buffer), |
| outShape); |
| } else if (operation.opTuple.operandType == OperandType::TENSOR_QUANT8_ASYMM) { |
| success = genericActivationPrepare(input.shape(), &outShape) && |
| allocateIfNeeded(&output, outShape) && |
| relu1Quant8(reinterpret_cast<const uint8_t*>(input.buffer), |
| input.shape(), |
| reinterpret_cast<uint8_t*>(output.buffer), |
| outShape); |
| } |
| } break; |
| case OperationType::RELU6: { |
| if (!parameterCountIs(1, 1)) { |
| return ANEURALNETWORKS_BAD_DATA; |
| } |
| const RunTimeOperandInfo& input = mOperands[ins[0]]; |
| RunTimeOperandInfo& output = mOperands[outs[0]]; |
| Shape outShape = output.shape(); |
| |
| if (operation.opTuple.operandType == OperandType::TENSOR_FLOAT32) { |
| success = genericActivationPrepare(input.shape(), &outShape) && |
| allocateIfNeeded(&output, outShape) && |
| relu6Float32(reinterpret_cast<const float*>(input.buffer), |
| input.shape(), |
| reinterpret_cast<float*>(output.buffer), |
| outShape); |
| } else if (operation.opTuple.operandType == OperandType::TENSOR_QUANT8_ASYMM) { |
| success = genericActivationPrepare(input.shape(), &outShape) && |
| allocateIfNeeded(&output, outShape) && |
| relu6Quant8(reinterpret_cast<const uint8_t*>(input.buffer), |
| input.shape(), |
| reinterpret_cast<uint8_t*>(output.buffer), |
| outShape); |
| } |
| } break; |
| case OperationType::TANH: { |
| if (!parameterCountIs(1, 1)) { |
| return ANEURALNETWORKS_BAD_DATA; |
| } |
| const RunTimeOperandInfo& input = mOperands[ins[0]]; |
| RunTimeOperandInfo& output = mOperands[outs[0]]; |
| Shape outShape = output.shape(); |
| |
| if (operation.opTuple.operandType == OperandType::TENSOR_FLOAT32) { |
| success = genericActivationPrepare(input.shape(), &outShape) && |
| allocateIfNeeded(&output, outShape) && |
| tanhFloat32(reinterpret_cast<const float*>(input.buffer), |
| input.shape(), |
| reinterpret_cast<float*>(output.buffer), |
| outShape); |
| } |
| } break; |
| case OperationType::LOGISTIC: { |
| if (!parameterCountIs(1, 1)) { |
| return ANEURALNETWORKS_BAD_DATA; |
| } |
| const RunTimeOperandInfo& input = mOperands[ins[0]]; |
| RunTimeOperandInfo& output = mOperands[outs[0]]; |
| Shape outShape = output.shape(); |
| |
| if (operation.opTuple.operandType == OperandType::TENSOR_FLOAT32) { |
| success = genericActivationPrepare(input.shape(), &outShape) && |
| allocateIfNeeded(&output, outShape) && |
| logisticFloat32(reinterpret_cast<const float*>(input.buffer), |
| input.shape(), |
| reinterpret_cast<float*>(output.buffer), |
| outShape); |
| } else if (operation.opTuple.operandType == OperandType::TENSOR_QUANT8_ASYMM) { |
| success = genericActivationPrepare(input.shape(), &outShape) && |
| allocateIfNeeded(&output, outShape) && |
| logisticQuant8(reinterpret_cast<const uint8_t*>(input.buffer), |
| input.shape(), |
| reinterpret_cast<uint8_t*>(output.buffer), |
| outShape); |
| } |
| } break; |
| case OperationType::SOFTMAX: { |
| if (!parameterCountIs(2, 1)) { |
| return ANEURALNETWORKS_BAD_DATA; |
| } |
| RunTimeOperandInfo& input = mOperands[ins[0]]; |
| float beta = getScalarData<float>(mOperands[ins[1]]); |
| |
| RunTimeOperandInfo& output = mOperands[outs[0]]; |
| Shape outShape = output.shape(); |
| |
| if (operation.opTuple.operandType == OperandType::TENSOR_FLOAT32) { |
| success = genericActivationPrepare(input.shape(), &outShape) && |
| allocateIfNeeded(&output, outShape) && |
| softmaxFloat32(reinterpret_cast<const float*>(input.buffer), |
| input.shape(), |
| beta, |
| reinterpret_cast<float*>(output.buffer), |
| output.shape()); |
| } else if (operation.opTuple.operandType == OperandType::TENSOR_QUANT8_ASYMM) { |
| success = genericActivationPrepare(input.shape(), &outShape) && |
| allocateIfNeeded(&output, outShape) && |
| softmaxQuant8(reinterpret_cast<const uint8_t*>(input.buffer), |
| input.shape(), |
| beta, |
| reinterpret_cast<uint8_t*>(output.buffer), |
| output.shape()); |
| } |
| } break; |
| case OperationType::FULLY_CONNECTED: { |
| if (!parameterCountIs(4, 1)) { |
| return ANEURALNETWORKS_BAD_DATA; |
| } |
| RunTimeOperandInfo& input = mOperands[ins[0]]; |
| RunTimeOperandInfo& weights = mOperands[ins[1]]; |
| RunTimeOperandInfo& bias = mOperands[ins[2]]; |
| |
| int32_t activation = getScalarData<int32_t>(mOperands[ins[3]]); |
| |
| RunTimeOperandInfo& output = mOperands[outs[0]]; |
| Shape outShape = output.shape(); |
| |
| if (operation.opTuple.operandType == OperandType::TENSOR_FLOAT32) { |
| success = fullyConnectedPrepare(input.shape(), weights.shape(), bias.shape(), |
| &outShape) && |
| allocateIfNeeded(&output, outShape) && |
| fullyConnectedFloat32(reinterpret_cast<const float*>(input.buffer), |
| input.shape(), |
| reinterpret_cast<const float*>(weights.buffer), |
| weights.shape(), |
| reinterpret_cast<const float*>(bias.buffer), |
| bias.shape(), |
| activation, |
| reinterpret_cast<float*>(output.buffer), |
| outShape); |
| } else if (operation.opTuple.operandType == OperandType::TENSOR_QUANT8_ASYMM) { |
| success = fullyConnectedPrepare(input.shape(), weights.shape(), bias.shape(), |
| &outShape) && |
| allocateIfNeeded(&output, outShape) && |
| fullyConnectedQuant8(reinterpret_cast<const uint8_t*>(input.buffer), |
| input.shape(), |
| reinterpret_cast<const uint8_t*>(weights.buffer), |
| weights.shape(), |
| reinterpret_cast<const int32_t*>(bias.buffer), |
| bias.shape(), |
| activation, |
| reinterpret_cast<uint8_t*>(output.buffer), |
| outShape); |
| } |
| } break; |
| case OperationType::CONCATENATION: { |
| if (outs.size() != 1 || ins.size() < 3) { |
| return ANEURALNETWORKS_BAD_DATA; |
| } |
| int numInputTensors = ins.size() - 2; |
| int32_t axis = getScalarData<int32_t>(mOperands[ins[numInputTensors]]); |
| int32_t activation = getScalarData<int32_t>(mOperands[ins[numInputTensors+1]]); |
| |
| RunTimeOperandInfo& output = mOperands[outs[0]]; |
| Shape outShape = output.shape(); |
| |
| if (operation.opTuple.operandType == OperandType::TENSOR_FLOAT32) { |
| std::vector<Shape> inputShapes(numInputTensors); |
| std::vector<const float*> inputDataPtrs(numInputTensors); |
| |
| for (int i=0; i<numInputTensors; i++) { |
| RunTimeOperandInfo& input = mOperands[ins[i]]; |
| inputShapes[i] = input.shape(); |
| inputDataPtrs[i] = reinterpret_cast<const float*>(input.buffer); |
| } |
| success = concatenationPrepare(inputShapes, axis, &outShape) && |
| allocateIfNeeded(&output, outShape) && |
| concatenationFloat32(inputDataPtrs, inputShapes, |
| axis, activation, |
| reinterpret_cast<float*>(output.buffer), outShape); |
| } else if (operation.opTuple.operandType == OperandType::TENSOR_QUANT8_ASYMM) { |
| std::vector<Shape> inputShapes(numInputTensors); |
| std::vector<const uint8_t*> inputDataPtrs(numInputTensors); |
| |
| for (int i=0; i<numInputTensors; i++) { |
| RunTimeOperandInfo& input = mOperands[ins[i]]; |
| inputShapes[i] = input.shape(); |
| inputDataPtrs[i] = reinterpret_cast<const uint8_t*>(input.buffer); |
| } |
| success = concatenationPrepare(inputShapes, axis, &outShape) && |
| allocateIfNeeded(&output, outShape) && |
| concatenationQuant8(inputDataPtrs, inputShapes, |
| axis, activation, |
| reinterpret_cast<uint8_t*>(output.buffer), |
| outShape); |
| } |
| } break; |
| case OperationType::L2_NORMALIZATION: { |
| if (!parameterCountIs(1, 1)) { |
| return ANEURALNETWORKS_BAD_DATA; |
| } |
| const RunTimeOperandInfo& input = mOperands[ins[0]]; |
| RunTimeOperandInfo& output = mOperands[outs[0]]; |
| Shape outShape = output.shape(); |
| |
| if (operation.opTuple.operandType == OperandType::TENSOR_FLOAT32) { |
| success = genericNormalizationPrepare(input.shape(), &outShape) && |
| allocateIfNeeded(&output, outShape) && |
| l2normFloat32(reinterpret_cast<const float*>(input.buffer), |
| input.shape(), |
| reinterpret_cast<float*>(output.buffer), |
| outShape); |
| } else if (operation.opTuple.operandType == OperandType::TENSOR_QUANT8_ASYMM) { |
| success = genericNormalizationPrepare(input.shape(), &outShape) && |
| allocateIfNeeded(&output, outShape) && |
| l2normQuant8(reinterpret_cast<const uint8_t*>(input.buffer), |
| input.shape(), |
| reinterpret_cast<uint8_t*>(output.buffer), |
| outShape); |
| } |
| } break; |
| case OperationType::LOCAL_RESPONSE_NORMALIZATION: { |
| if (!parameterCountIs(5, 1)) { |
| return ANEURALNETWORKS_BAD_DATA; |
| } |
| const RunTimeOperandInfo& input = mOperands[ins[0]]; |
| int32_t radius = getScalarData<int32_t>(mOperands[ins[1]]); |
| float bias = getScalarData<float>(mOperands[ins[2]]); |
| float alpha = getScalarData<float>(mOperands[ins[3]]); |
| float beta = getScalarData<float>(mOperands[ins[4]]); |
| |
| RunTimeOperandInfo& output = mOperands[outs[0]]; |
| Shape outShape = output.shape(); |
| |
| if (operation.opTuple.operandType == OperandType::TENSOR_FLOAT32) { |
| success = genericNormalizationPrepare(input.shape(), &outShape) && |
| allocateIfNeeded(&output, outShape) && |
| localResponseNormFloat32(reinterpret_cast<const float*>(input.buffer), |
| input.shape(), |
| radius, bias, alpha, beta, |
| reinterpret_cast<float*>(output.buffer), |
| outShape); |
| } |
| } break; |
| case OperationType::RESHAPE: { |
| if (!parameterCountIs(2, 1)) { |
| return ANEURALNETWORKS_BAD_DATA; |
| } |
| const RunTimeOperandInfo& input = mOperands[ins[0]]; |
| const RunTimeOperandInfo& targetShape = mOperands[ins[1]]; |
| |
| RunTimeOperandInfo& output = mOperands[outs[0]]; |
| Shape outShape = output.shape(); |
| |
| success = reshapePrepare(input.shape(), |
| reinterpret_cast<const int32_t*>(targetShape.buffer), |
| getNumberOfElements(targetShape.shape()), |
| &outShape) && |
| allocateIfNeeded(&output, outShape) && |
| reshapeGeneric(reinterpret_cast<const void*>(input.buffer), |
| input.shape(), |
| reinterpret_cast<void*>(output.buffer), |
| outShape); |
| } break; |
| case OperationType::RESIZE_BILINEAR: { |
| if (!parameterCountIs(3, 1)) { |
| return ANEURALNETWORKS_BAD_DATA; |
| } |
| const RunTimeOperandInfo& input = mOperands[ins[0]]; |
| int32_t height = getScalarData<int32_t>(mOperands[ins[1]]); |
| int32_t width = getScalarData<int32_t>(mOperands[ins[2]]); |
| |
| RunTimeOperandInfo& output = mOperands[outs[0]]; |
| Shape outShape = output.shape(); |
| |
| if (operation.opTuple.operandType == OperandType::TENSOR_FLOAT32) { |
| success = resizeBilinearPrepare(input.shape(), |
| height, width, |
| &outShape) && |
| allocateIfNeeded(&output, outShape) && |
| resizeBilinearFloat32(reinterpret_cast<const float*>(input.buffer), |
| input.shape(), |
| reinterpret_cast<float*>(output.buffer), |
| outShape); |
| } |
| } break; |
| case OperationType::DEPTH_TO_SPACE: { |
| if (!parameterCountIs(2, 1)) { |
| return ANEURALNETWORKS_BAD_DATA; |
| } |
| const RunTimeOperandInfo& input = mOperands[ins[0]]; |
| int32_t blockSize = getScalarData<int32_t>(mOperands[ins[1]]); |
| |
| RunTimeOperandInfo& output = mOperands[outs[0]]; |
| Shape outShape = output.shape(); |
| |
| success = depthToSpacePrepare(input.shape(), |
| blockSize, |
| &outShape) && |
| allocateIfNeeded(&output, outShape) && |
| depthToSpaceGeneric(input.buffer, |
| input.shape(), |
| blockSize, |
| output.buffer, |
| outShape); |
| } break; |
| case OperationType::SPACE_TO_DEPTH: { |
| if (!parameterCountIs(2, 1)) { |
| return ANEURALNETWORKS_BAD_DATA; |
| } |
| const RunTimeOperandInfo& input = mOperands[ins[0]]; |
| int32_t blockSize = getScalarData<int32_t>(mOperands[ins[1]]); |
| |
| RunTimeOperandInfo& output = mOperands[outs[0]]; |
| Shape outShape = output.shape(); |
| |
| success = spaceToDepthPrepare(input.shape(), |
| blockSize, |
| &outShape) && |
| allocateIfNeeded(&output, outShape) && |
| spaceToDepthGeneric(input.buffer, |
| input.shape(), |
| blockSize, |
| output.buffer, |
| outShape); |
| } break; |
| case OperationType::LSH_PROJECTION: { |
| LSHProjection lsh(operation, mOperands); |
| success = lsh.Eval(); |
| } break; |
| case OperationType::RNN: { |
| RNN rnn_cell(operation, mOperands); |
| success = rnn_cell.Eval(); |
| } break; |
| default: |
| nnAssert(false); |
| break; |
| } |
| if (!success) { |
| LOG(ERROR) << getOperationName(operation.opTuple.operationType) << " failed."; |
| return ANEURALNETWORKS_OP_FAILED; |
| } |
| |
| freeNoLongerUsedOperands(ins); |
| return ANEURALNETWORKS_NO_ERROR; |
| } |
| |
| } // namespace nn |
| } // namespace android |