blob: 12f27704309425ba0e371a6592d05a21fe82cd2e [file] [log] [blame]
/*
* Copyright (C) 2017 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define LOG_TAG "Operations"
#include "FullyConnected.h"
#include <vector>
#include "OperationResolver.h"
#include "Tracing.h"
#ifdef NN_INCLUDE_CPU_IMPLEMENTATION
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunused-parameter"
#pragma clang diagnostic ignored "-Wsign-compare"
#pragma clang diagnostic ignored "-Winvalid-partial-specialization"
#include <tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h>
#include <tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h>
#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
#include <tensorflow/lite/kernels/internal/types.h>
#pragma clang diagnostic pop
#include "CpuOperationUtils.h"
#endif // NN_INCLUDE_CPU_IMPLEMENTATION
namespace android {
namespace nn {
namespace fully_connected {
#ifdef NN_INCLUDE_CPU_IMPLEMENTATION
namespace {
// executionMutex is used to protect concurrent access of non-threadsafe resources
// like gemmlowp::GemmContext.
// std::mutex is safe for pthreads on Android.
static std::mutex executionMutex;
bool fullyConnectedFloat32(const float* inputData, const Shape& inputShape,
const float* weightsData, const Shape& weightsShape,
const float* biasData, const Shape& biasShape, int32_t activation,
float* outputData, const Shape& outputShape) {
NNTRACE_TRANS("fullyConnectedFloat32");
float output_activation_min, output_activation_max;
CalculateActivationRangeFloat(activation, &output_activation_min, &output_activation_max);
// b/80425683, optimized implementation produces incorrect results when the
// number of input elements is the squre of batch_size.
uint32_t batch_size = getSizeOfDimension(outputShape, 0);
uint32_t input_n_elements = getNumberOfElements(inputShape);
if (batch_size * batch_size == input_n_elements) {
NNTRACE_COMP_SWITCH("reference_ops::FullyConnected");
tflite::reference_ops::FullyConnected(inputData, convertShapeToDims(inputShape),
weightsData, convertShapeToDims(weightsShape),
biasData, convertShapeToDims(biasShape),
output_activation_min, output_activation_max,
outputData, convertShapeToDims(outputShape));
} else {
NNTRACE_COMP_SWITCH("optimized_ops::FullyConnected");
tflite::optimized_ops::FullyConnected(inputData, convertShapeToDims(inputShape),
weightsData, convertShapeToDims(weightsShape),
biasData, convertShapeToDims(biasShape),
output_activation_min, output_activation_max,
outputData, convertShapeToDims(outputShape));
}
return true;
}
bool fullyConnectedFloat16(const _Float16* inputData, const Shape& inputShape,
const _Float16* weightsData, const Shape& weightsShape,
const _Float16* biasData, const Shape& biasShape, int32_t activation,
_Float16* outputData, const Shape& outputShape) {
NNTRACE_TRANS("fullyConnectedFloat16");
std::vector<float> inputDataFloat32(getNumberOfElements(inputShape));
convertFloat16ToFloat32(inputData, &inputDataFloat32);
std::vector<float> weightsDataFloat32(getNumberOfElements(weightsShape));
convertFloat16ToFloat32(weightsData, &weightsDataFloat32);
std::vector<float> biasDataFloat32(getNumberOfElements(biasShape));
convertFloat16ToFloat32(biasData, &biasDataFloat32);
std::vector<float> outputDataFloat32(getNumberOfElements(outputShape));
fullyConnectedFloat32(inputDataFloat32.data(), inputShape, weightsDataFloat32.data(),
weightsShape, biasDataFloat32.data(), biasShape, activation,
outputDataFloat32.data(), outputShape);
convertFloat32ToFloat16(outputDataFloat32, outputData);
return true;
}
bool fullyConnectedQuant8(const uint8_t* inputData, const Shape& inputShape,
const uint8_t* weightsData, const Shape& weightsShape,
const int32_t* biasData, const Shape& biasShape, int32_t activation,
uint8_t* outputData, const Shape& outputShape) {
NNTRACE_TRANS("fullyConnectedQuant8");
int32_t inputOffset = -inputShape.offset;
int32_t weightsOffset = -weightsShape.offset;
int32_t outputOffset = outputShape.offset;
double realMultiplier = 0.0;
int32_t outputMultiplier = 0;
int32_t outputShift = 0;
int32_t outputActivationMin = 0;
int32_t outputActivationMax = 0;
NN_RET_CHECK(GetQuantizedConvolutionMultiplier(inputShape, weightsShape, biasShape, outputShape,
&realMultiplier));
int exponent;
NN_RET_CHECK(QuantizeMultiplier(realMultiplier, &outputMultiplier, &exponent));
outputShift = -exponent;
CalculateActivationRangeUint8(activation, outputShape, &outputActivationMin,
&outputActivationMax);
static gemmlowp::GemmContext gemmContext;
// Prevent concurrent executions that access gemmContext.
std::unique_lock<std::mutex> lock(executionMutex);
// Alow gemmlowp automatically decide how many threads to use.
gemmContext.set_max_num_threads(0);
NNTRACE_COMP_SWITCH("optimized_ops::FullyConnected");
tflite::optimized_ops::FullyConnected(inputData, convertShapeToDims(inputShape), inputOffset,
weightsData, convertShapeToDims(weightsShape),
weightsOffset, biasData, convertShapeToDims(biasShape),
outputOffset, outputMultiplier, outputShift,
outputActivationMin, outputActivationMax, outputData,
convertShapeToDims(outputShape), &gemmContext);
return true;
}
bool fullyConnectedQuant8(const int8_t* inputData, const Shape& inputShape,
const int8_t* weightsData, const Shape& weightsShape,
const int32_t* biasData, const Shape& biasShape, int32_t activation,
int8_t* outputData, const Shape& outputShape) {
NNTRACE_TRANS("fullyConnectedQuant8Signed");
double realMultiplier = 0.0;
int32_t outputMultiplier = 0;
int32_t outputShift = 0;
int32_t outputActivationMin = 0;
int32_t outputActivationMax = 0;
NN_RET_CHECK(GetQuantizedConvolutionMultiplier(inputShape, weightsShape, biasShape, outputShape,
&realMultiplier));
NN_RET_CHECK(QuantizeMultiplier(realMultiplier, &outputMultiplier, &outputShift));
CalculateActivationRangeInt8(activation, outputShape, &outputActivationMin,
&outputActivationMax);
tflite::FullyConnectedParams params;
params.input_offset = -inputShape.offset;
params.weights_offset = -weightsShape.offset;
params.output_offset = outputShape.offset;
params.output_multiplier = outputMultiplier;
params.output_shift = outputShift;
params.quantized_activation_min = outputActivationMin;
params.quantized_activation_max = outputActivationMax;
NNTRACE_COMP_SWITCH("reference_integer_ops::FullyConnected");
tflite::reference_integer_ops::FullyConnected(
params, convertShapeToTflshape(inputShape), inputData,
convertShapeToTflshape(weightsShape), weightsData, convertShapeToTflshape(biasShape),
biasData, convertShapeToTflshape(outputShape), outputData);
return true;
}
} // namespace
bool prepare(IOperationExecutionContext* context) {
Shape input = context->getInputShape(kInputTensor);
Shape weights = context->getInputShape(kWeightsTensor);
Shape bias = context->getInputShape(kBiasTensor);
Shape output = context->getOutputShape(kOutputTensor);
NN_RET_CHECK(validateShapes(input, weights, bias, &output));
return context->setOutputShape(kOutputTensor, output);
}
bool execute(IOperationExecutionContext* context) {
// Bypass execution in the case of zero-sized input.
if (getNumberOfElements(context->getOutputShape(kOutputTensor)) == 0) return true;
switch (context->getInputType(kInputTensor)) {
case OperandType::TENSOR_FLOAT32:
return fullyConnectedFloat32(context->getInputBuffer<float>(kInputTensor),
context->getInputShape(kInputTensor),
context->getInputBuffer<float>(kWeightsTensor),
context->getInputShape(kWeightsTensor),
context->getInputBuffer<float>(kBiasTensor),
context->getInputShape(kBiasTensor),
context->getInputValue<int32_t>(kActivationScalar),
context->getOutputBuffer<float>(kOutputTensor),
context->getOutputShape(kOutputTensor));
case OperandType::TENSOR_FLOAT16:
return fullyConnectedFloat16(context->getInputBuffer<_Float16>(kInputTensor),
context->getInputShape(kInputTensor),
context->getInputBuffer<_Float16>(kWeightsTensor),
context->getInputShape(kWeightsTensor),
context->getInputBuffer<_Float16>(kBiasTensor),
context->getInputShape(kBiasTensor),
context->getInputValue<int32_t>(kActivationScalar),
context->getOutputBuffer<_Float16>(kOutputTensor),
context->getOutputShape(kOutputTensor));
case OperandType::TENSOR_QUANT8_ASYMM:
return fullyConnectedQuant8(context->getInputBuffer<uint8_t>(kInputTensor),
context->getInputShape(kInputTensor),
context->getInputBuffer<uint8_t>(kWeightsTensor),
context->getInputShape(kWeightsTensor),
context->getInputBuffer<int32_t>(kBiasTensor),
context->getInputShape(kBiasTensor),
context->getInputValue<int32_t>(kActivationScalar),
context->getOutputBuffer<uint8_t>(kOutputTensor),
context->getOutputShape(kOutputTensor));
case OperandType::TENSOR_QUANT8_ASYMM_SIGNED:
return fullyConnectedQuant8(context->getInputBuffer<int8_t>(kInputTensor),
context->getInputShape(kInputTensor),
context->getInputBuffer<int8_t>(kWeightsTensor),
context->getInputShape(kWeightsTensor),
context->getInputBuffer<int32_t>(kBiasTensor),
context->getInputShape(kBiasTensor),
context->getInputValue<int32_t>(kActivationScalar),
context->getOutputBuffer<int8_t>(kOutputTensor),
context->getOutputShape(kOutputTensor));
default:
NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation " << kOperationName;
}
}
#endif // NN_INCLUDE_CPU_IMPLEMENTATION
} // namespace fully_connected
NN_REGISTER_OPERATION_DEFAULT_VALIDATION(FULLY_CONNECTED, fully_connected::prepare,
fully_connected::execute, .allowZeroSizedInput = true);
} // namespace nn
} // namespace android