common/cpu_operations/SVDF.cpp - platform/packages/modules/NeuralNetworks - Git at Google

 /*
  * Copyright (C) 2017 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #define LOG_TAG "Operations"

 #include "SVDF.h"

 #include <tensorflow/lite/kernels/internal/tensor_utils.h>

 #include <algorithm>
 #include <vector>

 #include "CpuExecutor.h"
 #include "CpuOperationUtils.h"
 #include "Tracing.h"

 namespace android {
 namespace nn {

 SVDF::SVDF(const Operation& operation, RunTimeOperandInfo* operands) {
     NNTRACE_TRANS("SVDF::SVDF");
     input_ = GetInput(operation, operands, kInputTensor);
     weights_feature_ = GetInput(operation, operands, kWeightsFeatureTensor);
     weights_time_ = GetInput(operation, operands, kWeightsTimeTensor);
     bias_ = GetInput(operation, operands, kBiasTensor);
     state_in_ = GetInput(operation, operands, kStateInTensor);

     const auto& rankOperand = *GetInput(operation, operands, kRankParam);
     params_.rank_ = getScalarDataWithDefault<int>(rankOperand, 0);
     const auto& activationOperand = *GetInput(operation, operands, kActivationParam);
     params_.activation_ = static_cast<ActivationFn>(getScalarDataWithDefault<int>(
             activationOperand, TfLiteFusedActivation::kTfLiteActNone));

     state_out_ = GetOutput(operation, operands, kStateOutTensor);
     output_ = GetOutput(operation, operands, kOutputTensor);
 }

 bool SVDF::Prepare(const Operation& operation, RunTimeOperandInfo* operands, Shape* stateShape,
                    Shape* outputShape) {
     NNTRACE_TRANS("SVDF::Prepare");
     // Check we have all the inputs and outputs we need.
     const int num_inputs = NumInputsWithValues(operation, operands);

     NN_CHECK(num_inputs == 6 || num_inputs == 7);
     constexpr int requiredInputs[] = {
             kInputTensor, kWeightsFeatureTensor, kWeightsTimeTensor, kStateInTensor,
             kRankParam,   kActivationParam,
     };
     for (const int requiredInput : requiredInputs) {
         NN_RET_CHECK(!IsNullInput(GetInput(operation, operands, requiredInput)))
                 << "required input " << requiredInput << " is omitted";
     }
     NN_CHECK_EQ(NumOutputs(operation), 2);

     // Check that the scalar operands' buffers are large enough.
     const auto& rankOperand = *GetInput(operation, operands, kRankParam);
     NN_RET_CHECK(rankOperand.length >= sizeof(int));
     const auto& activationOperand = *GetInput(operation, operands, kActivationParam);
     NN_RET_CHECK(activationOperand.length >= sizeof(int));

     const RunTimeOperandInfo* input = GetInput(operation, operands, SVDF::kInputTensor);
     const RunTimeOperandInfo* weights_feature =
             GetInput(operation, operands, SVDF::kWeightsFeatureTensor);
     const RunTimeOperandInfo* weights_time =
             GetInput(operation, operands, SVDF::kWeightsTimeTensor);

     // Check all the parameters of tensor match within themselves and match the
     // input configuration.
     const int rank = getScalarData<int>(*GetInput(operation, operands, kRankParam));
     const uint32_t batch_size = SizeOfDimension(input, 0);
     const uint32_t num_filters = SizeOfDimension(weights_feature, 0);
     NN_CHECK_EQ(num_filters % rank, 0u);
     const uint32_t num_units = num_filters / rank;
     const uint32_t memory_size = SizeOfDimension(weights_time, 1);
     NN_CHECK_EQ(SizeOfDimension(input, 1), SizeOfDimension(weights_feature, 1));
     NN_CHECK_EQ(SizeOfDimension(weights_time, 0), num_filters);

     const RunTimeOperandInfo* bias = GetInput(operation, operands, kBiasTensor);
     if (!IsNullInput(bias)) {
         NN_CHECK_EQ(SizeOfDimension(bias, 0), num_units);
     }

     // Resize state.
     const Shape& inputShape = input->shape();
     stateShape->type = inputShape.type;
     stateShape->dimensions = {batch_size, memory_size * num_filters};
     stateShape->offset = inputShape.offset;
     stateShape->scale = inputShape.scale;

     // Resize output.
     outputShape->type = inputShape.type;
     outputShape->dimensions = {batch_size, num_units};
     outputShape->offset = inputShape.offset;
     outputShape->scale = inputShape.scale;

     return true;
 }

 bool SVDF::Eval() {
     NNTRACE_TRANS("SVDF::Eval");
     switch (input_->type) {
         case OperandType::TENSOR_FLOAT16: {
             std::vector<float> inputDataFloat32(getNumberOfElements(input_->shape()));
             convertFloat16ToFloat32(reinterpret_cast<_Float16*>(input_->buffer), &inputDataFloat32);
             std::vector<float> inputStateDataFloat32(getNumberOfElements(state_in_->shape()));
             convertFloat16ToFloat32(reinterpret_cast<_Float16*>(state_in_->buffer),
                                     &inputStateDataFloat32);
             std::vector<float> biasDataFloat32(getNumberOfElements(bias_->shape()));
             if (!IsNullInput(bias_)) {
                 convertFloat16ToFloat32(reinterpret_cast<_Float16*>(bias_->buffer),
                                         &biasDataFloat32);
             }
             std::vector<float> weightsFeatureDataFloat32(
                     getNumberOfElements(weights_feature_->shape()));
             convertFloat16ToFloat32(reinterpret_cast<_Float16*>(weights_feature_->buffer),
                                     &weightsFeatureDataFloat32);
             std::vector<float> weightsTimeDataFloat32(getNumberOfElements(weights_time_->shape()));
             convertFloat16ToFloat32(reinterpret_cast<_Float16*>(weights_time_->buffer),
                                     &weightsTimeDataFloat32);
             std::vector<float> outputDataFloat32(getNumberOfElements(output_->shape()));
             std::vector<float> outputStateDataFloat32(getNumberOfElements(state_out_->shape()));

             EvalFloat32(inputDataFloat32.data(), inputStateDataFloat32.data(),
                         biasDataFloat32.data(), weightsFeatureDataFloat32.data(),
                         weightsTimeDataFloat32.data(), outputDataFloat32.data(),
                         outputStateDataFloat32.data());
             convertFloat32ToFloat16(outputDataFloat32,
                                     reinterpret_cast<_Float16*>(output_->buffer));
             convertFloat32ToFloat16(outputStateDataFloat32,
                                     reinterpret_cast<_Float16*>(state_out_->buffer));
             break;
         }
         case OperandType::TENSOR_FLOAT32: {
             EvalFloat32(reinterpret_cast<float*>(input_->buffer),
                         reinterpret_cast<float*>(state_in_->buffer),
                         reinterpret_cast<float*>(bias_->buffer),
                         reinterpret_cast<float*>(weights_feature_->buffer),
                         reinterpret_cast<float*>(weights_time_->buffer),
                         reinterpret_cast<float*>(output_->buffer),
                         reinterpret_cast<float*>(state_out_->buffer));
             break;
         }
         default: {
             LOG(ERROR) << "Unsupported data type: " << static_cast<int>(input_->type);
             return false;
         }
     }
     return true;
 }

 void SVDF::EvalFloat32(const float* inputData, const float* inputStateData, const float* biasData,
                        const float* weightsFeatureData, const float* weightsTimeData,
                        float* outputData, float* outputStateData) {
     NNTRACE_COMP("SVDF::EvalFloat32");

     const int rank = params_.rank_;
     const int batch_size = SizeOfDimension(input_, 0);
     const int input_size = SizeOfDimension(input_, 1);
     const int num_filters = SizeOfDimension(weights_feature_, 0);
     const int num_units = num_filters / rank;
     const int memory_size = SizeOfDimension(weights_time_, 1);

     memcpy(outputStateData, inputStateData, sizeof(float) * batch_size * memory_size * num_filters);
     // Compute conv1d(inputs, weights_feature).
     for (int b = 0; b < batch_size; b++) {
         float* state_ptr_batch = outputStateData + b * memory_size * num_filters;
         for (int c = 0; c < num_filters; c++) {
             float* state_ptr = state_ptr_batch + c * memory_size;
             state_ptr[memory_size - 1] = 0.0;
         }
     }

     // Clear scratch (the matmul is accumulative).
     float scratch[batch_size * num_filters];
     std::fill_n(scratch, batch_size * num_filters, 0.0f);
     tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate(
             weightsFeatureData, num_filters, input_size, inputData, batch_size, scratch);

     // Copy the latest activation from scratch into activation_state:
     // The last, i.e. (memory_size-1)th entry for each batch, and filter.
     for (int i = 0; i < batch_size * num_filters; ++i) {
         outputStateData[i * memory_size + memory_size - 1] = scratch[i];
     }

     // Begin ApplyTimeWeightsBiasAndActivation
     // Compute matmul(state, weights_time).
     for (int b = 0; b < batch_size; b++) {
         float* state_out_ptr_batch = outputStateData + b * memory_size * num_filters;
         float* scratch_ptr_batch = scratch + b * num_filters;
         tflite::tensor_utils::BatchVectorBatchVectorDotProduct(
                 weightsTimeData, state_out_ptr_batch, memory_size, num_filters, scratch_ptr_batch);
     }

     // Reduction sum
     tflite::tensor_utils::ReductionSumVector(scratch, outputData, batch_size * num_units, rank);

     // Add bias if provided.
     if (!IsNullInput(bias_)) {
         tflite::tensor_utils::VectorBatchVectorAdd(biasData, num_units, batch_size, outputData);
     }

     // Apply activation.
     tflite::tensor_utils::ApplyActivationToVector(
             outputData, batch_size * num_units,
             static_cast<TfLiteFusedActivation>(params_.activation_), outputData);
     // Finished ApplyTimeWeightsBiasAndActivation

     // Right shift the state.
     for (int b = 0; b < batch_size; b++) {
         float* state_out_ptr_batch = outputStateData + b * memory_size * num_filters;
         for (int f = 0; f < num_filters; f++) {
             std::copy(state_out_ptr_batch + 1, state_out_ptr_batch + memory_size,
                       state_out_ptr_batch);
             state_out_ptr_batch[memory_size - 1] = 0.0;
             state_out_ptr_batch += memory_size;
         }
     }
 }

 }  // namespace nn
 }  // namespace android
	/*
	* Copyright (C) 2017 The Android Open Source Project
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	#define LOG_TAG "Operations"

	#include "SVDF.h"

	#include <tensorflow/lite/kernels/internal/tensor_utils.h>

	#include <algorithm>
	#include <vector>

	#include "CpuExecutor.h"
	#include "CpuOperationUtils.h"
	#include "Tracing.h"

	namespace android {
	namespace nn {

	SVDF::SVDF(const Operation& operation, RunTimeOperandInfo* operands) {
	NNTRACE_TRANS("SVDF::SVDF");
	input_ = GetInput(operation, operands, kInputTensor);
	weights_feature_ = GetInput(operation, operands, kWeightsFeatureTensor);
	weights_time_ = GetInput(operation, operands, kWeightsTimeTensor);
	bias_ = GetInput(operation, operands, kBiasTensor);
	state_in_ = GetInput(operation, operands, kStateInTensor);

	const auto& rankOperand = *GetInput(operation, operands, kRankParam);
	params_.rank_ = getScalarDataWithDefault<int>(rankOperand, 0);
	const auto& activationOperand = *GetInput(operation, operands, kActivationParam);
	params_.activation_ = static_cast<ActivationFn>(getScalarDataWithDefault<int>(
	activationOperand, TfLiteFusedActivation::kTfLiteActNone));

	state_out_ = GetOutput(operation, operands, kStateOutTensor);
	output_ = GetOutput(operation, operands, kOutputTensor);
	}

	bool SVDF::Prepare(const Operation& operation, RunTimeOperandInfo* operands, Shape* stateShape,
	Shape* outputShape) {
	NNTRACE_TRANS("SVDF::Prepare");
	// Check we have all the inputs and outputs we need.
	const int num_inputs = NumInputsWithValues(operation, operands);

	NN_CHECK(num_inputs == 6 \|\| num_inputs == 7);
	constexpr int requiredInputs[] = {
	kInputTensor, kWeightsFeatureTensor, kWeightsTimeTensor, kStateInTensor,
	kRankParam, kActivationParam,
	};
	for (const int requiredInput : requiredInputs) {
	NN_RET_CHECK(!IsNullInput(GetInput(operation, operands, requiredInput)))
	<< "required input " << requiredInput << " is omitted";
	}
	NN_CHECK_EQ(NumOutputs(operation), 2);

	// Check that the scalar operands' buffers are large enough.
	const auto& rankOperand = *GetInput(operation, operands, kRankParam);
	NN_RET_CHECK(rankOperand.length >= sizeof(int));
	const auto& activationOperand = *GetInput(operation, operands, kActivationParam);
	NN_RET_CHECK(activationOperand.length >= sizeof(int));

	const RunTimeOperandInfo* input = GetInput(operation, operands, SVDF::kInputTensor);
	const RunTimeOperandInfo* weights_feature =
	GetInput(operation, operands, SVDF::kWeightsFeatureTensor);
	const RunTimeOperandInfo* weights_time =
	GetInput(operation, operands, SVDF::kWeightsTimeTensor);

	// Check all the parameters of tensor match within themselves and match the
	// input configuration.
	const int rank = getScalarData<int>(*GetInput(operation, operands, kRankParam));
	const uint32_t batch_size = SizeOfDimension(input, 0);
	const uint32_t num_filters = SizeOfDimension(weights_feature, 0);
	NN_CHECK_EQ(num_filters % rank, 0u);
	const uint32_t num_units = num_filters / rank;
	const uint32_t memory_size = SizeOfDimension(weights_time, 1);
	NN_CHECK_EQ(SizeOfDimension(input, 1), SizeOfDimension(weights_feature, 1));
	NN_CHECK_EQ(SizeOfDimension(weights_time, 0), num_filters);

	const RunTimeOperandInfo* bias = GetInput(operation, operands, kBiasTensor);
	if (!IsNullInput(bias)) {
	NN_CHECK_EQ(SizeOfDimension(bias, 0), num_units);
	}

	// Resize state.
	const Shape& inputShape = input->shape();
	stateShape->type = inputShape.type;
	stateShape->dimensions = {batch_size, memory_size * num_filters};
	stateShape->offset = inputShape.offset;
	stateShape->scale = inputShape.scale;

	// Resize output.
	outputShape->type = inputShape.type;
	outputShape->dimensions = {batch_size, num_units};
	outputShape->offset = inputShape.offset;
	outputShape->scale = inputShape.scale;

	return true;
	}

	bool SVDF::Eval() {
	NNTRACE_TRANS("SVDF::Eval");
	switch (input_->type) {
	case OperandType::TENSOR_FLOAT16: {
	std::vector<float> inputDataFloat32(getNumberOfElements(input_->shape()));
	convertFloat16ToFloat32(reinterpret_cast<_Float16*>(input_->buffer), &inputDataFloat32);
	std::vector<float> inputStateDataFloat32(getNumberOfElements(state_in_->shape()));
	convertFloat16ToFloat32(reinterpret_cast<_Float16*>(state_in_->buffer),
	&inputStateDataFloat32);
	std::vector<float> biasDataFloat32(getNumberOfElements(bias_->shape()));
	if (!IsNullInput(bias_)) {
	convertFloat16ToFloat32(reinterpret_cast<_Float16*>(bias_->buffer),
	&biasDataFloat32);
	}
	std::vector<float> weightsFeatureDataFloat32(
	getNumberOfElements(weights_feature_->shape()));
	convertFloat16ToFloat32(reinterpret_cast<_Float16*>(weights_feature_->buffer),
	&weightsFeatureDataFloat32);
	std::vector<float> weightsTimeDataFloat32(getNumberOfElements(weights_time_->shape()));
	convertFloat16ToFloat32(reinterpret_cast<_Float16*>(weights_time_->buffer),
	&weightsTimeDataFloat32);
	std::vector<float> outputDataFloat32(getNumberOfElements(output_->shape()));
	std::vector<float> outputStateDataFloat32(getNumberOfElements(state_out_->shape()));

	EvalFloat32(inputDataFloat32.data(), inputStateDataFloat32.data(),
	biasDataFloat32.data(), weightsFeatureDataFloat32.data(),
	weightsTimeDataFloat32.data(), outputDataFloat32.data(),
	outputStateDataFloat32.data());
	convertFloat32ToFloat16(outputDataFloat32,
	reinterpret_cast<_Float16*>(output_->buffer));
	convertFloat32ToFloat16(outputStateDataFloat32,
	reinterpret_cast<_Float16*>(state_out_->buffer));
	break;
	}
	case OperandType::TENSOR_FLOAT32: {
	EvalFloat32(reinterpret_cast<float*>(input_->buffer),
	reinterpret_cast<float*>(state_in_->buffer),
	reinterpret_cast<float*>(bias_->buffer),
	reinterpret_cast<float*>(weights_feature_->buffer),
	reinterpret_cast<float*>(weights_time_->buffer),
	reinterpret_cast<float*>(output_->buffer),
	reinterpret_cast<float*>(state_out_->buffer));
	break;
	}
	default: {
	LOG(ERROR) << "Unsupported data type: " << static_cast<int>(input_->type);
	return false;
	}
	}
	return true;
	}

	void SVDF::EvalFloat32(const float* inputData, const float* inputStateData, const float* biasData,
	const float* weightsFeatureData, const float* weightsTimeData,
	float* outputData, float* outputStateData) {
	NNTRACE_COMP("SVDF::EvalFloat32");

	const int rank = params_.rank_;
	const int batch_size = SizeOfDimension(input_, 0);
	const int input_size = SizeOfDimension(input_, 1);
	const int num_filters = SizeOfDimension(weights_feature_, 0);
	const int num_units = num_filters / rank;
	const int memory_size = SizeOfDimension(weights_time_, 1);

	memcpy(outputStateData, inputStateData, sizeof(float) * batch_size * memory_size * num_filters);
	// Compute conv1d(inputs, weights_feature).
	for (int b = 0; b < batch_size; b++) {
	float* state_ptr_batch = outputStateData + b * memory_size * num_filters;
	for (int c = 0; c < num_filters; c++) {
	float* state_ptr = state_ptr_batch + c * memory_size;
	state_ptr[memory_size - 1] = 0.0;
	}
	}

	// Clear scratch (the matmul is accumulative).
	float scratch[batch_size * num_filters];
	std::fill_n(scratch, batch_size * num_filters, 0.0f);
	tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate(
	weightsFeatureData, num_filters, input_size, inputData, batch_size, scratch);

	// Copy the latest activation from scratch into activation_state:
	// The last, i.e. (memory_size-1)th entry for each batch, and filter.
	for (int i = 0; i < batch_size * num_filters; ++i) {
	outputStateData[i * memory_size + memory_size - 1] = scratch[i];
	}

	// Begin ApplyTimeWeightsBiasAndActivation
	// Compute matmul(state, weights_time).
	for (int b = 0; b < batch_size; b++) {
	float* state_out_ptr_batch = outputStateData + b * memory_size * num_filters;
	float* scratch_ptr_batch = scratch + b * num_filters;
	tflite::tensor_utils::BatchVectorBatchVectorDotProduct(
	weightsTimeData, state_out_ptr_batch, memory_size, num_filters, scratch_ptr_batch);
	}

	// Reduction sum
	tflite::tensor_utils::ReductionSumVector(scratch, outputData, batch_size * num_units, rank);

	// Add bias if provided.
	if (!IsNullInput(bias_)) {
	tflite::tensor_utils::VectorBatchVectorAdd(biasData, num_units, batch_size, outputData);
	}

	// Apply activation.
	tflite::tensor_utils::ApplyActivationToVector(
	outputData, batch_size * num_units,
	static_cast<TfLiteFusedActivation>(params_.activation_), outputData);
	// Finished ApplyTimeWeightsBiasAndActivation

	// Right shift the state.
	for (int b = 0; b < batch_size; b++) {
	float* state_out_ptr_batch = outputStateData + b * memory_size * num_filters;
	for (int f = 0; f < num_filters; f++) {
	std::copy(state_out_ptr_batch + 1, state_out_ptr_batch + memory_size,
	state_out_ptr_batch);
	state_out_ptr_batch[memory_size - 1] = 0.0;
	state_out_ptr_batch += memory_size;
	}
	}
	}

	} // namespace nn
	} // namespace android