| /* |
| * Copyright (C) 2017 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #define LOG_TAG "Operations" |
| |
| #include "SVDF.h" |
| |
| #include "CpuExecutor.h" |
| #include "CpuOperationUtils.h" |
| #include "HalInterfaces.h" |
| |
| #include "Tracing.h" |
| |
| namespace android { |
| namespace nn { |
| |
| SVDF::SVDF(const Operation& operation, |
| std::vector<RunTimeOperandInfo>& operands) { |
| NNTRACE_TRANS("SVDF::SVDF"); |
| input_ = GetInput(operation, operands, kInputTensor); |
| weights_feature_ = GetInput(operation, operands, kWeightsFeatureTensor); |
| weights_time_ = GetInput(operation, operands, kWeightsTimeTensor); |
| bias_ = GetInput(operation, operands, kBiasTensor); |
| state_in_ = GetInput(operation, operands, kStateInTensor); |
| |
| params_.rank_ = getScalarData<int>(*GetInput(operation, operands, kRankParam)); |
| params_.activation_ = static_cast<TfLiteFusedActivation>(getScalarData<int>( |
| *GetInput(operation, operands, kActivationParam))); |
| |
| state_out_ = GetOutput(operation, operands, kStateOutTensor); |
| output_ = GetOutput(operation, operands, kOutputTensor); |
| } |
| |
| bool SVDF::Prepare(const Operation &operation, |
| std::vector<RunTimeOperandInfo> &operands, |
| Shape *stateShape, |
| Shape *outputShape) { |
| NNTRACE_TRANS("SVDF::Prepare"); |
| // Check we have all the inputs and outputs we need. |
| const int num_inputs = NumInputsWithValues(operation, operands); |
| |
| NN_CHECK(num_inputs == 6 || num_inputs == 7); |
| NN_CHECK_EQ(NumOutputs(operation), 2); |
| |
| const RunTimeOperandInfo *input = |
| GetInput(operation, operands, SVDF::kInputTensor); |
| const RunTimeOperandInfo *weights_feature = |
| GetInput(operation, operands, SVDF::kWeightsFeatureTensor); |
| const RunTimeOperandInfo *weights_time = |
| GetInput(operation, operands, SVDF::kWeightsTimeTensor); |
| |
| // Check all the parameters of tensor match within themselves and match the |
| // input configuration. |
| const int rank = getScalarData<int>(*GetInput(operation, operands, kRankParam)); |
| const uint32_t batch_size = SizeOfDimension(input, 0); |
| const uint32_t num_filters = SizeOfDimension(weights_feature, 0); |
| NN_CHECK_EQ(num_filters % rank, 0); |
| const uint32_t num_units = num_filters / rank; |
| const uint32_t memory_size = SizeOfDimension(weights_time, 1); |
| NN_CHECK_EQ(SizeOfDimension(input, 1), SizeOfDimension(weights_feature, 1)); |
| NN_CHECK_EQ(SizeOfDimension(weights_time, 0), num_filters); |
| |
| const RunTimeOperandInfo *bias = |
| GetInput(operation, operands, kBiasTensor); |
| if (!IsNullInput(bias)) { |
| NN_CHECK_EQ(SizeOfDimension(bias, 0), num_units); |
| } |
| |
| // Resize state. |
| const Shape &inputShape = input->shape(); |
| stateShape->type = inputShape.type; |
| stateShape->dimensions = { batch_size, memory_size * num_filters }; |
| stateShape->offset = inputShape.offset; |
| stateShape->scale = inputShape.scale; |
| |
| // Resize output. |
| outputShape->type = inputShape.type; |
| outputShape->dimensions = { batch_size, num_units }; |
| outputShape->offset = inputShape.offset; |
| outputShape->scale = inputShape.scale; |
| |
| return true; |
| } |
| |
| bool SVDF::Eval() { |
| NNTRACE_TRANS("SVDF::Eval"); |
| switch (input_->type) { |
| case OperandType::TENSOR_FLOAT16: { |
| std::vector<float> inputDataFloat32(getNumberOfElements(input_->shape())); |
| convertFloat16ToFloat32(reinterpret_cast<_Float16*>(input_->buffer), &inputDataFloat32); |
| std::vector<float> inputStateDataFloat32(getNumberOfElements(state_in_->shape())); |
| convertFloat16ToFloat32(reinterpret_cast<_Float16*>(state_in_->buffer), |
| &inputStateDataFloat32); |
| std::vector<float> biasDataFloat32(getNumberOfElements(bias_->shape())); |
| if (!IsNullInput(bias_)) { |
| convertFloat16ToFloat32(reinterpret_cast<_Float16*>(bias_->buffer), |
| &biasDataFloat32); |
| } |
| std::vector<float> weightsFeatureDataFloat32( |
| getNumberOfElements(weights_feature_->shape())); |
| convertFloat16ToFloat32(reinterpret_cast<_Float16*>(weights_feature_->buffer), |
| &weightsFeatureDataFloat32); |
| std::vector<float> weightsTimeDataFloat32(getNumberOfElements(weights_time_->shape())); |
| convertFloat16ToFloat32(reinterpret_cast<_Float16*>(weights_time_->buffer), |
| &weightsTimeDataFloat32); |
| std::vector<float> outputDataFloat32(getNumberOfElements(output_->shape())); |
| std::vector<float> outputStateDataFloat32(getNumberOfElements(state_out_->shape())); |
| |
| EvalFloat32(inputDataFloat32.data(), inputStateDataFloat32.data(), |
| biasDataFloat32.data(), weightsFeatureDataFloat32.data(), |
| weightsTimeDataFloat32.data(), outputDataFloat32.data(), |
| outputStateDataFloat32.data()); |
| convertFloat32ToFloat16(outputDataFloat32, |
| reinterpret_cast<_Float16*>(output_->buffer)); |
| convertFloat32ToFloat16(outputStateDataFloat32, |
| reinterpret_cast<_Float16*>(state_out_->buffer)); |
| break; |
| } |
| case OperandType::TENSOR_FLOAT32: { |
| EvalFloat32(reinterpret_cast<float*>(input_->buffer), |
| reinterpret_cast<float*>(state_in_->buffer), |
| reinterpret_cast<float*>(bias_->buffer), |
| reinterpret_cast<float*>(weights_feature_->buffer), |
| reinterpret_cast<float*>(weights_time_->buffer), |
| reinterpret_cast<float*>(output_->buffer), |
| reinterpret_cast<float*>(state_out_->buffer)); |
| break; |
| } |
| default: { |
| LOG(ERROR) << "Unsupported data type: " << static_cast<int>(input_->type); |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| void SVDF::EvalFloat32(const float* inputData, const float* inputStateData, const float* biasData, |
| const float* weightsFeatureData, const float* weightsTimeData, |
| float* outputData, float* outputStateData) { |
| NNTRACE_COMP("SVDF::EvalFloat32"); |
| |
| const int rank = params_.rank_; |
| const int batch_size = SizeOfDimension(input_, 0); |
| const int input_size = SizeOfDimension(input_, 1); |
| const int num_filters = SizeOfDimension(weights_feature_, 0); |
| const int num_units = num_filters / rank; |
| const int memory_size = SizeOfDimension(weights_time_, 1); |
| |
| memcpy(outputStateData, inputStateData, sizeof(float) * batch_size * memory_size * num_filters); |
| // Compute conv1d(inputs, weights_feature). |
| for (int b = 0; b < batch_size; b++) { |
| float* state_ptr_batch = outputStateData + b * memory_size * num_filters; |
| for (int c = 0; c < num_filters; c++) { |
| float* state_ptr = state_ptr_batch + c * memory_size; |
| state_ptr[memory_size - 1] = 0.0; |
| } |
| } |
| // The state left most column is used to save current cycle activation. This |
| // is achieved by starting at state->data.f[memory_size - 1] and having the |
| // stride equal to memory_size. |
| tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate( |
| weightsFeatureData, num_filters, input_size, inputData, batch_size, |
| &outputStateData[memory_size - 1], memory_size); |
| |
| // Compute matmul(state, weights_time). |
| // The right most column is used to save temporary output (with the size of |
| // num_filters). This is achieved by starting at state->data.f and having the |
| // stride equal to memory_size. |
| float scratch[batch_size * num_filters]; |
| for (int b = 0; b < batch_size; b++) { |
| float* state_out_ptr_batch = outputStateData + b * memory_size * num_filters; |
| float* scratch_ptr_batch = scratch + b * num_filters; |
| tflite::tensor_utils::BatchVectorBatchVectorDotProduct( |
| weightsTimeData, state_out_ptr_batch, memory_size, num_filters, scratch_ptr_batch, |
| /*result_stride=*/1); |
| } |
| |
| // Initialize output with bias if provided. |
| if (!IsNullInput(bias_)) { |
| tflite::tensor_utils::VectorBatchVectorAssign(biasData, num_units, batch_size, outputData); |
| } else { |
| tflite::tensor_utils::ZeroVector(outputData, batch_size * num_units); |
| } |
| |
| // Reduction sum |
| for (int b = 0; b < batch_size; b++) { |
| float* output_ptr_batch = outputData + b * num_units; |
| float* scratch_ptr_batch = scratch + b * num_filters; |
| tflite::tensor_utils::ReductionSumVector(scratch_ptr_batch, output_ptr_batch, num_units, |
| rank); |
| } |
| |
| // Apply activation. |
| for (int b = 0; b < batch_size; b++) { |
| float* output_ptr_batch = outputData + b * num_units; |
| tflite::tensor_utils::ApplyActivationToVector(output_ptr_batch, num_units, |
| params_.activation_, output_ptr_batch); |
| } |
| |
| // Right shift the state. |
| for (int b = 0; b < batch_size; b++) { |
| float* state_out_ptr_batch = outputStateData + b * memory_size * num_filters; |
| for (int f = 0; f < num_filters; f++) { |
| tflite::tensor_utils::VectorShiftLeft(state_out_ptr_batch, memory_size, |
| /*shift_value=*/0.0); |
| state_out_ptr_batch += memory_size; |
| } |
| } |
| } |
| |
| } // namespace nn |
| } // namespace android |