Initial implementation of the following quantized ops.
- CONV_QUANT8
- DEPTHWISE_CONV_QUANT8
- AVERAGE_POOL_QUANT8
- MAX_POOL_QUANT8
- LOGISTIC_QUANT8
Additionally, added functions to plumb through quantization
parameters.
Bug: 63905942
Test: mm
Test: end-to-end MobileNet quantized test pass
Change-Id: Ib2753c68bf2c51467ae1c158b45541bcfdf10789
diff --git a/common/CpuExecutor.cpp b/common/CpuExecutor.cpp
index 5e36da9..897105e 100644
--- a/common/CpuExecutor.cpp
+++ b/common/CpuExecutor.cpp
@@ -28,6 +28,8 @@
static bool allocateIfNeeded(RunTimeOperandInfo* info, const Shape& shape) {
info->type = shape.type;
info->dimensions = shape.dimensions;
+ info->scale = shape.scale;
+ info->offset = shape.offset;
if (info->buffer == nullptr) {
uint32_t length = sizeOfData(info->type, info->dimensions);
info->buffer = new uint8_t[length];
@@ -78,6 +80,8 @@
return false;
}
mOperands[i].type = from.type;
+ mOperands[i].scale = from.scale;
+ mOperands[i].offset = from.zeroPoint;
}
nnAssert(mModel->inputIndexes.size() == mRequest->inputs.size());
@@ -159,16 +163,18 @@
auto parameterCountIs = [&ins, &outs, &operation](size_t expectedIns,
size_t expectedOuts) -> bool {
if (ins.size() != expectedIns || outs.size() != expectedOuts) {
- LOG(ERROR) << getOperationName(operation.type) << ": Invalid number of ins "
- << ins.size() << " / " << expectedIns << " and outs " << outs.size() << " / "
+ LOG(ERROR) << getOperationName(operation.opTuple.operationType)
+ << ": Invalid number of ins "
+ << ins.size() << " / " << expectedIns
+ << " and outs " << outs.size() << " / "
<< expectedOuts;
return false;
}
return true;
};
- switch (operation.type) { // static_cast<OperationType>(operation.type)) {
- case OperationType::ADD_FLOAT32: {
+ switch (operation.opTuple.operationType) {
+ case OperationType::ADD: {
if (!parameterCountIs(2, 1)) {
return ANEURALNETWORKS_BAD_DATA;
}
@@ -177,13 +183,16 @@
RunTimeOperandInfo& out = mOperands[outs[0]];
Shape outShape = out.shape();
- success = addTensorsFloat32Prepare(in1.shape(), in2.shape(), &outShape) &&
- allocateIfNeeded(&out, outShape) &&
- addTensorsFloat32(reinterpret_cast<const float*>(in1.buffer),
- reinterpret_cast<const float*>(in2.buffer),
- reinterpret_cast<float*>(out.buffer), outShape);
+ if (operation.opTuple.operandType == OperandType::TENSOR_FLOAT32) {
+ success = addTensorsPrepare(in1.shape(), in2.shape(), &outShape) &&
+ allocateIfNeeded(&out, outShape) &&
+ addTensorsFloat32(reinterpret_cast<const float*>(in1.buffer),
+ reinterpret_cast<const float*>(in2.buffer),
+ reinterpret_cast<float*>(out.buffer),
+ outShape);
+ }
} break;
- case OperationType::DEPTHWISE_CONV_FLOAT32: {
+ case OperationType::DEPTHWISE_CONV: {
if (!parameterCountIs(8, 1)) {
return ANEURALNETWORKS_BAD_DATA;
}
@@ -200,23 +209,40 @@
RunTimeOperandInfo& output = mOperands[outs[0]];
Shape outShape = output.shape();
- success = depthwiseConvFloat32Prepare(input.shape(), filter.shape(), bias.shape(),
- padding, stride_width, stride_height,
- &outShape) &&
- allocateIfNeeded(&output, outShape) &&
- depthwiseConvFloat32(reinterpret_cast<const float*>(input.buffer),
- input.shape(),
- reinterpret_cast<const float*>(filter.buffer),
- filter.shape(),
- reinterpret_cast<const float*>(bias.buffer),
- bias.shape(),
- padding, stride_width, stride_height,
- depth_multiplier, activation,
- reinterpret_cast<float*>(output.buffer),
- outShape);
+ if (operation.opTuple.operandType == OperandType::TENSOR_FLOAT32) {
+ success = depthwiseConvPrepare(input.shape(), filter.shape(), bias.shape(),
+ padding, stride_width, stride_height,
+ &outShape) &&
+ allocateIfNeeded(&output, outShape) &&
+ depthwiseConvFloat32(reinterpret_cast<const float*>(input.buffer),
+ input.shape(),
+ reinterpret_cast<const float*>(filter.buffer),
+ filter.shape(),
+ reinterpret_cast<const float*>(bias.buffer),
+ bias.shape(),
+ padding, stride_width, stride_height,
+ depth_multiplier, activation,
+ reinterpret_cast<float*>(output.buffer),
+ outShape);
+ } else if (operation.opTuple.operandType == OperandType::TENSOR_QUANT8_ASYMM) {
+ success = depthwiseConvPrepare(input.shape(), filter.shape(), bias.shape(),
+ padding, stride_width, stride_height,
+ &outShape) &&
+ allocateIfNeeded(&output, outShape) &&
+ depthwiseConvQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
+ input.shape(),
+ reinterpret_cast<const uint8_t*>(filter.buffer),
+ filter.shape(),
+ reinterpret_cast<const int32_t*>(bias.buffer),
+ bias.shape(),
+ padding, stride_width, stride_height,
+ depth_multiplier, activation,
+ reinterpret_cast<uint8_t*>(output.buffer),
+ outShape);
+ }
} break;
- case OperationType::CONV_FLOAT32: {
+ case OperationType::CONV: {
if (!parameterCountIs(7, 1)) {
return ANEURALNETWORKS_BAD_DATA;
}
@@ -232,22 +258,37 @@
RunTimeOperandInfo& output = mOperands[outs[0]];
Shape outShape = output.shape();
- success = convFloat32Prepare(input.shape(), filter.shape(), bias.shape(),
- padding, stride_width, stride_height,
- &outShape) &&
- allocateIfNeeded(&output, outShape) &&
- convFloat32(reinterpret_cast<const float*>(input.buffer), input.shape(),
- reinterpret_cast<const float*>(filter.buffer), filter.shape(),
- reinterpret_cast<const float*>(bias.buffer), bias.shape(),
- padding, stride_width, stride_height, activation,
- reinterpret_cast<float*>(output.buffer), outShape);
-
+ if (operation.opTuple.operandType == OperandType::TENSOR_FLOAT32) {
+ success = convPrepare(input.shape(), filter.shape(), bias.shape(),
+ padding, stride_width, stride_height,
+ &outShape) &&
+ allocateIfNeeded(&output, outShape) &&
+ convFloat32(reinterpret_cast<const float*>(input.buffer), input.shape(),
+ reinterpret_cast<const float*>(filter.buffer), filter.shape(),
+ reinterpret_cast<const float*>(bias.buffer), bias.shape(),
+ padding, stride_width, stride_height, activation,
+ reinterpret_cast<float*>(output.buffer), outShape);
+ } else if (operation.opTuple.operandType == OperandType::TENSOR_QUANT8_ASYMM) {
+ success = convPrepare(input.shape(), filter.shape(), bias.shape(),
+ padding, stride_width, stride_height,
+ &outShape) &&
+ allocateIfNeeded(&output, outShape) &&
+ convQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
+ input.shape(),
+ reinterpret_cast<const uint8_t*>(filter.buffer),
+ filter.shape(),
+ reinterpret_cast<const int32_t*>(bias.buffer),
+ bias.shape(),
+ padding, stride_width, stride_height, activation,
+ reinterpret_cast<uint8_t*>(output.buffer),
+ outShape);
+ }
} break;
- case OperationType::AVERAGE_POOL_FLOAT32: {
+ case OperationType::AVERAGE_POOL: {
if (!parameterCountIs(7, 1)) {
return ANEURALNETWORKS_BAD_DATA;
}
- const RunTimeOperandInfo& input = mOperands[ins[0]];
+ const RunTimeOperandInfo& input = mOperands[ins[0]];
int32_t padding = getInt32ScalarData(mOperands[ins[1]]);
int32_t stride_width = getInt32ScalarData(mOperands[ins[2]]);
@@ -259,129 +300,186 @@
RunTimeOperandInfo& output = mOperands[outs[0]];
Shape outShape = output.shape();
- success = genericPoolingFloat32Prepare(input.shape(),
- padding, stride_width, stride_height,
- filter_width, filter_height,
- &outShape) &&
- allocateIfNeeded(&output, outShape) &&
- averagePoolFloat32(reinterpret_cast<const float*>(input.buffer), input.shape(),
+ if (operation.opTuple.operandType == OperandType::TENSOR_FLOAT32) {
+ success = genericPoolingPrepare(input.shape(),
+ padding, stride_width, stride_height,
+ filter_width, filter_height,
+ &outShape) &&
+ allocateIfNeeded(&output, outShape) &&
+ averagePoolFloat32(reinterpret_cast<const float*>(input.buffer),
+ input.shape(),
+ padding, stride_width, stride_height,
+ filter_width, filter_height, activation,
+ reinterpret_cast<float*>(output.buffer),
+ outShape);
+ } else if (operation.opTuple.operandType == OperandType::TENSOR_QUANT8_ASYMM) {
+ success = genericPoolingPrepare(input.shape(),
+ padding, stride_width, stride_height,
+ filter_width, filter_height,
+ &outShape) &&
+ allocateIfNeeded(&output, outShape) &&
+ averagePoolQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
+ input.shape(),
+ padding, stride_width, stride_height,
+ filter_width, filter_height, activation,
+ reinterpret_cast<uint8_t*>(output.buffer),
+ outShape);
+ }
+ } break;
+ case OperationType::L2_POOL: {
+ if (!parameterCountIs(7, 1)) {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ const RunTimeOperandInfo& input = mOperands[ins[0]];
+
+ int32_t padding = getInt32ScalarData(mOperands[ins[1]]);
+ int32_t stride_width = getInt32ScalarData(mOperands[ins[2]]);
+ int32_t stride_height = getInt32ScalarData(mOperands[ins[3]]);
+ int32_t filter_width = getInt32ScalarData(mOperands[ins[4]]);
+ int32_t filter_height = getInt32ScalarData(mOperands[ins[5]]);
+ int32_t activation = getInt32ScalarData(mOperands[ins[6]]);
+
+ RunTimeOperandInfo& output = mOperands[outs[0]];
+ Shape outShape = output.shape();
+
+ if (operation.opTuple.operandType == OperandType::TENSOR_FLOAT32) {
+ success = genericPoolingPrepare(input.shape(),
+ padding, stride_width, stride_height,
+ filter_width, filter_height,
+ &outShape) &&
+ allocateIfNeeded(&output, outShape) &&
+ l2PoolFloat32(reinterpret_cast<const float*>(input.buffer),
+ input.shape(),
+ padding, stride_width, stride_height,
+ filter_width, filter_height, activation,
+ reinterpret_cast<float*>(output.buffer),
+ outShape);
+ }
+ } break;
+ case OperationType::MAX_POOL: {
+ if (!parameterCountIs(7, 1)) {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ const RunTimeOperandInfo& input = mOperands[ins[0]];
+
+ int32_t padding = getInt32ScalarData(mOperands[ins[1]]);
+ int32_t stride_width = getInt32ScalarData(mOperands[ins[2]]);
+ int32_t stride_height = getInt32ScalarData(mOperands[ins[3]]);
+ int32_t filter_width = getInt32ScalarData(mOperands[ins[4]]);
+ int32_t filter_height = getInt32ScalarData(mOperands[ins[5]]);
+ int32_t activation = getInt32ScalarData(mOperands[ins[6]]);
+
+ RunTimeOperandInfo& output = mOperands[outs[0]];
+ Shape outShape = output.shape();
+
+ if (operation.opTuple.operandType == OperandType::TENSOR_FLOAT32) {
+ success = genericPoolingPrepare(input.shape(),
+ padding, stride_width, stride_height,
+ filter_width, filter_height,
+ &outShape) &&
+ allocateIfNeeded(&output, outShape) &&
+ maxPoolFloat32(reinterpret_cast<const float*>(input.buffer),
+ input.shape(),
padding, stride_width, stride_height,
filter_width, filter_height, activation,
- reinterpret_cast<float*>(output.buffer), outShape);
-
- } break;
- case OperationType::L2_POOL_FLOAT32: {
- if (!parameterCountIs(7, 1)) {
- return ANEURALNETWORKS_BAD_DATA;
+ reinterpret_cast<float*>(output.buffer),
+ outShape);
+ } else if (operation.opTuple.operandType == OperandType::TENSOR_QUANT8_ASYMM) {
+ success = genericPoolingPrepare(input.shape(),
+ padding, stride_width, stride_height,
+ filter_width, filter_height,
+ &outShape) &&
+ allocateIfNeeded(&output, outShape) &&
+ maxPoolQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
+ input.shape(),
+ padding, stride_width, stride_height,
+ filter_width, filter_height, activation,
+ reinterpret_cast<uint8_t*>(output.buffer),
+ outShape);
}
- const RunTimeOperandInfo& input = mOperands[ins[0]];
-
- int32_t padding = getInt32ScalarData(mOperands[ins[1]]);
- int32_t stride_width = getInt32ScalarData(mOperands[ins[2]]);
- int32_t stride_height = getInt32ScalarData(mOperands[ins[3]]);
- int32_t filter_width = getInt32ScalarData(mOperands[ins[4]]);
- int32_t filter_height = getInt32ScalarData(mOperands[ins[5]]);
- int32_t activation = getInt32ScalarData(mOperands[ins[6]]);
-
- RunTimeOperandInfo& output = mOperands[outs[0]];
- Shape outShape = output.shape();
-
- success = genericPoolingFloat32Prepare(input.shape(),
- padding, stride_width, stride_height,
- filter_width, filter_height,
- &outShape) &&
- allocateIfNeeded(&output, outShape) &&
- l2PoolFloat32(reinterpret_cast<const float*>(input.buffer), input.shape(),
- padding, stride_width, stride_height,
- filter_width, filter_height, activation,
- reinterpret_cast<float*>(output.buffer), outShape);
} break;
- case OperationType::MAX_POOL_FLOAT32: {
- if (!parameterCountIs(7, 1)) {
- return ANEURALNETWORKS_BAD_DATA;
- }
- const RunTimeOperandInfo& input = mOperands[ins[0]];
-
- int32_t padding = getInt32ScalarData(mOperands[ins[1]]);
- int32_t stride_width = getInt32ScalarData(mOperands[ins[2]]);
- int32_t stride_height = getInt32ScalarData(mOperands[ins[3]]);
- int32_t filter_width = getInt32ScalarData(mOperands[ins[4]]);
- int32_t filter_height = getInt32ScalarData(mOperands[ins[5]]);
- int32_t activation = getInt32ScalarData(mOperands[ins[6]]);
-
- RunTimeOperandInfo& output = mOperands[outs[0]];
- Shape outShape = output.shape();
-
- success = genericPoolingFloat32Prepare(input.shape(),
- padding, stride_width, stride_height,
- filter_width, filter_height,
- &outShape) &&
- allocateIfNeeded(&output, outShape) &&
- maxPoolFloat32(reinterpret_cast<const float*>(input.buffer), input.shape(),
- padding, stride_width, stride_height,
- filter_width, filter_height, activation,
- reinterpret_cast<float*>(output.buffer), outShape);
-
- } break;
- case OperationType::RELU_FLOAT32: {
+ case OperationType::RELU: {
if (!parameterCountIs(1, 1)) {
return ANEURALNETWORKS_BAD_DATA;
}
- const RunTimeOperandInfo& input = mOperands[ins[0]];
+ const RunTimeOperandInfo& input = mOperands[ins[0]];
RunTimeOperandInfo& output = mOperands[outs[0]];
Shape outShape = output.shape();
- success = genericActivationFloat32Prepare(input.shape(), &outShape) &&
- allocateIfNeeded(&output, outShape) &&
- reluFloat32(reinterpret_cast<const float*>(input.buffer), input.shape(),
- reinterpret_cast<float*>(output.buffer), outShape);
+ if (operation.opTuple.operandType == OperandType::TENSOR_FLOAT32) {
+ success = genericActivationPrepare(input.shape(), &outShape) &&
+ allocateIfNeeded(&output, outShape) &&
+ reluFloat32(reinterpret_cast<const float*>(input.buffer),
+ input.shape(),
+ reinterpret_cast<float*>(output.buffer),
+ outShape);
+ }
} break;
- case OperationType::RELU6_FLOAT32: {
+ case OperationType::RELU6: {
if (!parameterCountIs(1, 1)) {
return ANEURALNETWORKS_BAD_DATA;
}
- const RunTimeOperandInfo& input = mOperands[ins[0]];
+ const RunTimeOperandInfo& input = mOperands[ins[0]];
RunTimeOperandInfo& output = mOperands[outs[0]];
Shape outShape = output.shape();
- success = genericActivationFloat32Prepare(input.shape(), &outShape) &&
- allocateIfNeeded(&output, outShape) &&
- relu6Float32(reinterpret_cast<const float*>(input.buffer), input.shape(),
- reinterpret_cast<float*>(output.buffer), outShape);
+ if (operation.opTuple.operandType == OperandType::TENSOR_FLOAT32) {
+ success = genericActivationPrepare(input.shape(), &outShape) &&
+ allocateIfNeeded(&output, outShape) &&
+ relu6Float32(reinterpret_cast<const float*>(input.buffer),
+ input.shape(),
+ reinterpret_cast<float*>(output.buffer),
+ outShape);
+ }
} break;
- case OperationType::TANH_FLOAT32: {
+ case OperationType::TANH: {
if (!parameterCountIs(1, 1)) {
return ANEURALNETWORKS_BAD_DATA;
}
- const RunTimeOperandInfo& input = mOperands[ins[0]];
+ const RunTimeOperandInfo& input = mOperands[ins[0]];
RunTimeOperandInfo& output = mOperands[outs[0]];
Shape outShape = output.shape();
- success = genericActivationFloat32Prepare(input.shape(), &outShape) &&
- allocateIfNeeded(&output, outShape) &&
- tanhFloat32(reinterpret_cast<const float*>(input.buffer), input.shape(),
- reinterpret_cast<float*>(output.buffer), outShape);
+ if (operation.opTuple.operandType == OperandType::TENSOR_FLOAT32) {
+ success = genericActivationPrepare(input.shape(), &outShape) &&
+ allocateIfNeeded(&output, outShape) &&
+ tanhFloat32(reinterpret_cast<const float*>(input.buffer),
+ input.shape(),
+ reinterpret_cast<float*>(output.buffer),
+ outShape);
+ }
} break;
- case OperationType::LOGISTIC_FLOAT32: {
+ case OperationType::LOGISTIC: {
if (!parameterCountIs(1, 1)) {
return ANEURALNETWORKS_BAD_DATA;
}
- const RunTimeOperandInfo& input = mOperands[ins[0]];
+ const RunTimeOperandInfo& input = mOperands[ins[0]];
RunTimeOperandInfo& output = mOperands[outs[0]];
Shape outShape = output.shape();
- success = genericActivationFloat32Prepare(input.shape(), &outShape) &&
- allocateIfNeeded(&output, outShape) &&
- logisticFloat32(reinterpret_cast<const float*>(input.buffer), input.shape(),
- reinterpret_cast<float*>(output.buffer), outShape);
+ if (operation.opTuple.operandType == OperandType::TENSOR_FLOAT32) {
+ success = genericActivationPrepare(input.shape(), &outShape) &&
+ allocateIfNeeded(&output, outShape) &&
+ logisticFloat32(reinterpret_cast<const float*>(input.buffer),
+ input.shape(),
+ reinterpret_cast<float*>(output.buffer),
+ outShape);
+ } else if (operation.opTuple.operandType == OperandType::TENSOR_QUANT8_ASYMM) {
+ success = genericActivationPrepare(input.shape(), &outShape) &&
+ allocateIfNeeded(&output, outShape) &&
+ logisticQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
+ input.shape(),
+ reinterpret_cast<uint8_t*>(output.buffer),
+ outShape);
+ }
} break;
default:
nnAssert(false);
break;
}
if (!success) {
- LOG(ERROR) << getOperationName(operation.type) << " failed.";
+ LOG(ERROR) << getOperationName(operation.opTuple.operationType) << " failed.";
return ANEURALNETWORKS_OP_FAILED;
}
diff --git a/common/OperationsUtils.cpp b/common/OperationsUtils.cpp
index 401f7e6..b2ce63a 100644
--- a/common/OperationsUtils.cpp
+++ b/common/OperationsUtils.cpp
@@ -17,8 +17,11 @@
#define LOG_TAG "OperationsUtils"
#include "OperationsUtils.h"
+#include "Operations.h"
#include "Utils.h"
+#include <cmath>
+
namespace android {
namespace nn {
@@ -62,5 +65,102 @@
return shape.dimensions[dimensionIdx];
}
+
+void QuantizeMultiplierSmallerThanOne(double double_multiplier,
+ int32_t* quantized_multiplier,
+ int32_t* right_shift) {
+ CHECK(double_multiplier >= 0.);
+ CHECK(double_multiplier < 1.);
+ if (double_multiplier == 0.) {
+ *quantized_multiplier = 0;
+ *right_shift = 0;
+ return;
+ }
+ CHECK(double_multiplier > 0.);
+ const double q = std::frexp(double_multiplier, right_shift);
+ *right_shift *= -1;
+ int64_t q_fixed = static_cast<int64_t>(std::round(q * (1ll << 31)));
+ CHECK(q_fixed <= (1ll << 31));
+ if (q_fixed == (1ll << 31)) {
+ q_fixed /= 2;
+ --*right_shift;
+ }
+ CHECK_GE(*right_shift, 0);
+ CHECK_LE(q_fixed, std::numeric_limits<int32_t>::max());
+ *quantized_multiplier = static_cast<int32_t>(q_fixed);
+}
+
+void QuantizeMultiplierGreaterThanOne(double double_multiplier,
+ int32_t* quantized_multiplier,
+ int* left_shift) {
+ CHECK(double_multiplier > 1.);
+ const double q = std::frexp(double_multiplier, left_shift);
+ int64_t q_fixed = static_cast<int64_t>(std::round(q * (1ll << 31)));
+ CHECK(q_fixed <= (1ll << 31));
+ if (q_fixed == (1ll << 31)) {
+ q_fixed /= 2;
+ ++*left_shift;
+ }
+ CHECK_GE(*left_shift, 0);
+ CHECK_LE(q_fixed, std::numeric_limits<int32_t>::max());
+ *quantized_multiplier = static_cast<int32_t>(q_fixed);
+}
+
+void GetQuantizedConvolutionMultipler(const Shape& inputShape,
+ const Shape& filterShape,
+ const Shape& biasShape,
+ const Shape& outputShape,
+ float* multiplier) {
+ const float input_product_scale = inputShape.scale * filterShape.scale;
+ const float bias_scale = biasShape.scale;
+ const float output_scale = outputShape.scale;
+
+ // The following conditions must be guaranteed by the training pipeline.
+ CHECK(std::abs(input_product_scale - bias_scale) <=
+ 1e-6 * std::min(input_product_scale, bias_scale));
+ CHECK(input_product_scale >= 0);
+ CHECK(input_product_scale < output_scale);
+ *multiplier = input_product_scale / output_scale;
+}
+
+void CalculateActivationRangeUint8(int32_t activation,
+ const Shape& outputShape,
+ int32_t* act_min,
+ int32_t* act_max) {
+ const int32_t qmin = std::numeric_limits<uint8_t>::min();
+ const int32_t qmax = std::numeric_limits<uint8_t>::max();
+
+ const auto scale = outputShape.scale;
+ const auto zero_point = outputShape.offset;
+
+ auto quantize = [scale, zero_point](float f) {
+ return zero_point + static_cast<int32_t>(std::round(f / scale));
+ };
+
+ if (activation == kActivationRelu) {
+ *act_min = std::max(qmin, quantize(0.0));
+ *act_max = qmax;
+ } else if (activation == kActivationRelu6) {
+ *act_min = std::max(qmin, quantize(0.0));
+ *act_max = std::min(qmax, quantize(6.0));
+ } else if (activation == kActivationRelu1) {
+ *act_min = std::max(qmin, quantize(-1.0));
+ *act_max = std::min(qmax, quantize(1.0));
+ } else {
+ *act_min = qmin;
+ *act_max = qmax;
+ }
+}
+
+int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift) {
+ const double max_input_rescaled = 1.0 * ((1 << input_integer_bits) - 1) *
+ (1ll << (31 - input_integer_bits)) /
+ (1ll << input_left_shift);
+ // Tighten bound using floor. Suppose that we could use the exact value.
+ // After scaling the difference, the result would be at the maximum. Thus we
+ // must ensure that our value has lower magnitude.
+ return static_cast<int32_t>(std::floor(max_input_rescaled));
+}
+
} // namespace nn
} // namespace android
diff --git a/common/Utils.cpp b/common/Utils.cpp
index a050dfd..5a4bf9a 100644
--- a/common/Utils.cpp
+++ b/common/Utils.cpp
@@ -37,7 +37,7 @@
"UINT32",
"TENSOR_FLOAT16",
"TENSOR_FLOAT32",
- "TENSOR_SIMMETRICAL_QUANT8",
+ "TENSOR_QUANT8_ASYMM",
};
// TODO Check if this useful
@@ -47,39 +47,39 @@
};
const char* kOperationNames[ANEURALNETWORKS_NUMBER_OPERATION_TYPES] = {
- "AVERAGE_POOL_FLOAT32",
- "CONCATENATION_FLOAT32",
- "CONV_FLOAT32",
- "DEPTHWISE_CONV_FLOAT32",
- "MAX_POOL_FLOAT32",
- "L2_POOL_FLOAT32",
- "DEPTH_TO_SPACE_FLOAT32",
- "SPACE_TO_DEPTH_FLOAT32",
- "LOCAL_RESPONSE_NORMALIZATION_FLOAT32",
- "SOFTMAX_FLOAT32",
- "RESHAPE_FLOAT32",
- "SPLIT_FLOAT32",
- "FAKE_QUANT_FLOAT32",
- "ADD_FLOAT32",
- "FULLY_CONNECTED_FLOAT32",
- "CAST_FLOAT32",
- "MUL_FLOAT32",
- "L2_NORMALIZATION_FLOAT32",
- "LOGISTIC_FLOAT32",
- "RELU_FLOAT32",
- "RELU6_FLOAT32",
- "RELU1_FLOAT32",
- "TANH_FLOAT32",
- "DEQUANTIZE_FLOAT32",
- "FLOOR_FLOAT32",
- "GATHER_FLOAT32",
- "RESIZE_BILINEAR_FLOAT32",
- "LSH_PROJECTION_FLOAT32",
- "LSTM_FLOAT32",
- "SVDF_FLOAT32",
- "RNN_FLOAT32",
- "N_GRAM_FLOAT32",
- "LOOKUP_FLOAT32",
+ "AVERAGE_POOL",
+ "CONCATENATION",
+ "CONV",
+ "DEPTHWISE_CONV",
+ "MAX_POOL",
+ "L2_POOL",
+ "DEPTH_TO_SPACE",
+ "SPACE_TO_DEPTH",
+ "LOCAL_RESPONSE_NORMALIZATION",
+ "SOFTMAX",
+ "RESHAPE",
+ "SPLIT",
+ "FAKE_QUANT",
+ "ADD",
+ "FULLY_CONNECTED",
+ "CAST",
+ "MUL",
+ "L2_NORMALIZATION",
+ "LOGISTIC",
+ "RELU",
+ "RELU6",
+ "RELU1",
+ "TANH",
+ "DEQUANTIZE",
+ "FLOOR",
+ "GATHER",
+ "RESIZE_BILINEAR",
+ "LSH_PROJECTION",
+ "LSTM",
+ "SVDF",
+ "RNN",
+ "N_GRAM",
+ "LOOKUP",
};
const char* getOperationName(OperationType type) {
@@ -191,8 +191,8 @@
static bool validOperations(const hidl_vec<Operation>& operations, size_t operandCount) {
for (auto& op : operations) {
- if (static_cast<uint32_t>(op.type) >= HAL_NUM_OPERATION_TYPES) {
- LOG(ERROR) << "Invalid operation type " << toString(op.type);
+ if (static_cast<uint32_t>(op.opTuple.operationType) >= HAL_NUM_OPERATION_TYPES) {
+ LOG(ERROR) << "Invalid operation type " << toString(op.opTuple.operationType);
return false;
}
if (!validOperandIndexes(op.inputs, operandCount) ||
diff --git a/common/include/CpuExecutor.h b/common/include/CpuExecutor.h
index ae49406..8065e00 100644
--- a/common/include/CpuExecutor.h
+++ b/common/include/CpuExecutor.h
@@ -36,6 +36,9 @@
// to pass together with the dimension to the functions implementing
// the operators.
std::vector<uint32_t> dimensions;
+
+ float scale;
+ int32_t offset;
// Where the operand's data is stored. Check the corresponding
// location information in the model to figure out if this points
// to memory we have allocated for an temporary operand.
@@ -50,7 +53,9 @@
Shape shape() const {
return Shape{.type = type,
- .dimensions = dimensions};
+ .dimensions = dimensions,
+ .scale = scale,
+ .offset = offset};
}
};
diff --git a/common/include/HalInterfaces.h b/common/include/HalInterfaces.h
index 8e341ab..670d826 100644
--- a/common/include/HalInterfaces.h
+++ b/common/include/HalInterfaces.h
@@ -40,6 +40,7 @@
using ::android::hardware::neuralnetworks::V1_0::Operand;
using ::android::hardware::neuralnetworks::V1_0::OperandType;
using ::android::hardware::neuralnetworks::V1_0::Operation;
+using ::android::hardware::neuralnetworks::V1_0::OperationTuple;
using ::android::hardware::neuralnetworks::V1_0::OperationType;
using ::android::hardware::neuralnetworks::V1_0::PerformanceInfo;
using ::android::hardware::neuralnetworks::V1_0::Request;
diff --git a/common/include/Operations.h b/common/include/Operations.h
index 3774355..bfe5b06 100644
--- a/common/include/Operations.h
+++ b/common/include/Operations.h
@@ -33,13 +33,14 @@
enum ActivationFn {
kActivationNone = 0,
kActivationRelu = 1,
+ kActivationRelu1 = 2,
kActivationRelu6 = 3,
};
-bool addTensorsFloat32Prepare(const Shape& in1, const Shape& in2, Shape* out1);
+bool addTensorsPrepare(const Shape& in1, const Shape& in2, Shape* out1);
bool addTensorsFloat32(const float* in1, const float* in2, float* out, const Shape& shape);
-bool depthwiseConvFloat32Prepare(const Shape& input,
+bool depthwiseConvPrepare(const Shape& input,
const Shape& filter,
const Shape& bias,
int32_t padding,
@@ -51,28 +52,45 @@
int32_t padding, int32_t stride_width, int32_t stride_height,
int32_t depth_multiplier, int32_t activation,
float* outputData, const Shape& outputShape);
+bool depthwiseConvQuant8(const uint8_t* inputData, const Shape& inputShape,
+ const uint8_t* filterData, const Shape& filterShape,
+ const int32_t* biasData, const Shape& biasShape,
+ int32_t padding, int32_t stride_width, int32_t stride_height,
+ int32_t depth_multiplier, int32_t activation,
+ uint8_t* outputData, const Shape& outputShape);
-bool convFloat32Prepare(const Shape& input,
- const Shape& filter,
- const Shape& bias,
- int32_t padding,
- int32_t stride_width, int32_t stride_height,
- Shape* output);
+bool convPrepare(const Shape& input,
+ const Shape& filter,
+ const Shape& bias,
+ int32_t padding,
+ int32_t stride_width, int32_t stride_height,
+ Shape* output);
bool convFloat32(const float* inputData, const Shape& inputShape,
const float* filterData, const Shape& filterShape,
const float* biasData, const Shape& biasShape,
- int32_t padding, int32_t stride_width, int32_t stride_height, int32_t activation,
+ int32_t padding, int32_t stride_width, int32_t stride_height,
+ int32_t activation,
float* outputData, const Shape& outputShape);
+bool convQuant8(const uint8_t* inputData, const Shape& inputShape,
+ const uint8_t* filterData, const Shape& filterShape,
+ const int32_t* biasData, const Shape& biasShape,
+ int32_t padding, int32_t stride_width, int32_t stride_height,
+ int32_t activation,
+ uint8_t* outputData, const Shape& outputShape);
-bool genericPoolingFloat32Prepare(const Shape& input,
- int32_t padding,
- int32_t stride_width, int32_t stride_height,
- int32_t filter_width, int32_t filter_height,
- Shape* output);
+bool genericPoolingPrepare(const Shape& input,
+ int32_t padding,
+ int32_t stride_width, int32_t stride_height,
+ int32_t filter_width, int32_t filter_height,
+ Shape* output);
bool averagePoolFloat32(const float* inputData, const Shape& inputShape,
int32_t padding, int32_t stride_width, int32_t stride_height,
int32_t filter_width, int32_t filter_height, int32_t activation,
float* outputData, const Shape& outputShape);
+bool averagePoolQuant8(const uint8_t* inputData, const Shape& inputShape,
+ int32_t padding, int32_t stride_width, int32_t stride_height,
+ int32_t filter_width, int32_t filter_height, int32_t activation,
+ uint8_t* outputData, const Shape& outputShape);
bool l2PoolFloat32(const float* inputData, const Shape& inputShape,
int32_t padding, int32_t stride_width, int32_t stride_height,
int32_t filter_width, int32_t filter_height, int32_t activation,
@@ -81,8 +99,12 @@
int32_t padding, int32_t stride_width, int32_t stride_height,
int32_t filter_width, int32_t filter_height, int32_t activation,
float* outputData, const Shape& outputShape);
+bool maxPoolQuant8(const uint8_t* inputData, const Shape& inputShape,
+ int32_t padding, int32_t stride_width, int32_t stride_height,
+ int32_t filter_width, int32_t filter_height, int32_t activation,
+ uint8_t* outputData, const Shape& outputShape);
-bool genericActivationFloat32Prepare(const Shape& input, Shape* output);
+bool genericActivationPrepare(const Shape& input, Shape* output);
bool reluFloat32(const float* inputData, const Shape& inputShape,
float* outputData, const Shape& outputShape);
bool relu6Float32(const float* inputData, const Shape& inputShape,
@@ -91,7 +113,8 @@
float* outputData, const Shape& outputShape);
bool logisticFloat32(const float* inputData, const Shape& inputShape,
float* outputData, const Shape& outputShape);
-
+bool logisticQuant8(const uint8_t* inputData, const Shape& inputShape,
+ uint8_t* outputData, const Shape& outputShape);
} // namespace nn
} // namespace android
diff --git a/common/include/OperationsUtils.h b/common/include/OperationsUtils.h
index 586046a..3be7479 100644
--- a/common/include/OperationsUtils.h
+++ b/common/include/OperationsUtils.h
@@ -29,6 +29,8 @@
struct Shape {
OperandType type;
std::vector<uint32_t> dimensions;
+ float scale;
+ int32_t offset;
};
// Verifies that the two shapes are the same.
@@ -47,9 +49,35 @@
inline uint32_t ComputePadding(uint32_t stride, uint32_t in_size, uint32_t filter_size,
uint32_t out_size) {
- return ((out_size - 1) * stride + filter_size - in_size) / 2;
+ uint32_t tmp = (out_size - 1) * stride + filter_size;
+ if (tmp > in_size) {
+ return (tmp - in_size) / 2;
+ } else {
+ return 0;
+ }
}
+void QuantizeMultiplierSmallerThanOne(double double_multiplier,
+ int32_t* quantized_multiplier,
+ int32_t* right_shift);
+
+void QuantizeMultiplierGreaterThanOne(double double_multiplier,
+ int32_t* quantized_multiplier,
+ int* left_shift);
+
+void GetQuantizedConvolutionMultipler(const Shape& inputShape,
+ const Shape& filterShape,
+ const Shape& biasShape,
+ const Shape& outputShape,
+ float* multiplier);
+
+void CalculateActivationRangeUint8(int32_t activation,
+ const Shape& outputShape,
+ int32_t* act_min,
+ int32_t* act_max);
+
+int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift);
+
} // namespace nn
} // namespace android
diff --git a/common/operations/Activation.cpp b/common/operations/Activation.cpp
index c96584b..224b6b7 100644
--- a/common/operations/Activation.cpp
+++ b/common/operations/Activation.cpp
@@ -18,13 +18,12 @@
#include "OperationsUtils.h"
#include "internal/optimized/optimized_ops.h"
-#include "internal/reference/reference_ops.h"
namespace android {
namespace nn {
-bool genericActivationFloat32Prepare(const Shape& input,
- Shape* output) {
+bool genericActivationPrepare(const Shape& input,
+ Shape* output) {
DCHECK_EQ(getNumberOfDimensions(input), 4);
return SetShape(input, output);
}
@@ -65,5 +64,31 @@
return true;
}
+bool logisticQuant8(const uint8_t* inputData, const Shape& inputShape,
+ uint8_t* outputData, const Shape& outputShape) {
+ int numElements = getNumberOfElements(inputShape);
+ static constexpr int kInputIntegerBits = 4;
+
+ const double input_real_multiplier =
+ inputShape.scale *
+ static_cast<double>(1 << (31 - kInputIntegerBits));
+
+ int32_t input_multiplier = 0;
+ int32_t input_left_shift = 0;
+ QuantizeMultiplierGreaterThanOne(input_real_multiplier,
+ &input_multiplier,
+ &input_left_shift);
+ int32_t input_range_radius =
+ CalculateInputRadius(kInputIntegerBits, input_left_shift);
+
+ optimized_ops::Logistic(
+ inputData, convertShapeToDims(inputShape),
+ inputShape.offset, input_range_radius,
+ input_multiplier, input_left_shift,
+ outputData, convertShapeToDims(outputShape));
+
+ return true;
+}
+
} // namespace nn
} // namespace android
diff --git a/common/operations/Conv2D.cpp b/common/operations/Conv2D.cpp
index aff2dfb..c68cc93 100644
--- a/common/operations/Conv2D.cpp
+++ b/common/operations/Conv2D.cpp
@@ -18,7 +18,6 @@
#include "OperationsUtils.h"
#include "internal/optimized/optimized_ops.h"
-#include "internal/reference/reference_ops.h"
namespace android {
namespace nn {
@@ -27,12 +26,12 @@
static constexpr int kStaticBufferSize = 1605632;
static char static_scratch_buffer[kStaticBufferSize];
-bool convFloat32Prepare(const Shape& input,
- const Shape& filter,
- const Shape& bias,
- int32_t padding,
- int32_t stride_width, int32_t stride_height,
- Shape* output) {
+bool convPrepare(const Shape& input,
+ const Shape& filter,
+ const Shape& bias,
+ int32_t padding,
+ int32_t stride_width, int32_t stride_height,
+ Shape* output) {
DCHECK_EQ(getNumberOfDimensions(input), 4);
DCHECK_EQ(getNumberOfDimensions(filter), 4);
DCHECK_EQ(getNumberOfDimensions(bias), 1);
@@ -66,45 +65,50 @@
return true;
}
+#define ANDROID_NN_CONV_PARAMETERS(Type) \
+ uint32_t height = getSizeOfDimension(inputShape, 1); \
+ uint32_t width = getSizeOfDimension(inputShape, 2); \
+ uint32_t filterHeight = getSizeOfDimension(filterShape, 1); \
+ uint32_t filterWidth = getSizeOfDimension(filterShape, 2); \
+ uint32_t outHeight = getSizeOfDimension(outputShape, 1); \
+ uint32_t outWidth = getSizeOfDimension(outputShape, 2); \
+ uint32_t inDepth = getSizeOfDimension(inputShape, 3); \
+ \
+ uint32_t paddingHeight = \
+ ComputePadding(stride_height, height, filterHeight, outHeight); \
+ uint32_t paddingWidth = \
+ ComputePadding(stride_width, width, filterWidth, outWidth); \
+ \
+ Dims<4> im2colDim; \
+ im2colDim.sizes[3] = (int)getSizeOfDimension(outputShape, 0); \
+ im2colDim.sizes[2] = (int)getSizeOfDimension(outputShape, 1); \
+ im2colDim.sizes[1] = (int)getSizeOfDimension(outputShape, 2); \
+ im2colDim.sizes[0] = (int)inDepth * filterHeight * filterWidth; \
+ \
+ im2colDim.strides[0] = 1; \
+ for (int i=1; i<4; i++) { \
+ im2colDim.strides[i] = im2colDim.strides[i-1] * im2colDim.sizes[i-1]; \
+ } \
+ \
+ Type* im2colData = nullptr; \
+ int im2colByteSize = sizeof(Type); \
+ for (int i=0; i<4; i++) { \
+ im2colByteSize *= im2colDim.sizes[i]; \
+ } \
+ if (im2colByteSize <= kStaticBufferSize) { \
+ im2colData = reinterpret_cast<Type *>(static_scratch_buffer); \
+ } else { \
+ im2colData = new (std::nothrow) Type[im2colByteSize / sizeof(Type)]; \
+ }
+
+
bool convFloat32(const float* inputData, const Shape& inputShape,
const float* filterData, const Shape& filterShape,
const float* biasData, const Shape& biasShape,
int32_t padding, int32_t stride_width, int32_t stride_height, int32_t activation,
float* outputData, const Shape& outputShape) {
- uint32_t height = getSizeOfDimension(inputShape, 1);
- uint32_t width = getSizeOfDimension(inputShape, 2);
- uint32_t filterHeight = getSizeOfDimension(filterShape, 1);
- uint32_t filterWidth = getSizeOfDimension(filterShape, 2);
- uint32_t outHeight = getSizeOfDimension(outputShape, 1);
- uint32_t outWidth = getSizeOfDimension(outputShape, 2);
- uint32_t inDepth = getSizeOfDimension(inputShape, 3);
- uint32_t paddingHeight =
- ComputePadding(stride_height, height, filterHeight, outHeight);
- uint32_t paddingWidth =
- ComputePadding(stride_width, width, filterWidth, outWidth);
-
- Dims<4> im2colDim;
- im2colDim.sizes[3] = (int)getSizeOfDimension(outputShape, 0);
- im2colDim.sizes[2] = (int)getSizeOfDimension(outputShape, 1);
- im2colDim.sizes[1] = (int)getSizeOfDimension(outputShape, 2);
- im2colDim.sizes[0] = (int)inDepth * filterHeight * filterWidth;
-
- im2colDim.strides[0] = 1;
- for (int i=1; i<4; i++) {
- im2colDim.strides[i] = im2colDim.strides[i-1] * im2colDim.sizes[i-1];
- }
-
- float* im2colData = nullptr;
- int im2colByteSize = sizeof(float);
- for (int i=0; i<4; i++) {
- im2colByteSize *= im2colDim.sizes[i];
- }
- if (im2colByteSize <= kStaticBufferSize) {
- im2colData = reinterpret_cast<float *>(static_scratch_buffer);
- } else {
- im2colData = new (std::nothrow) float[im2colByteSize / sizeof(float)];
- }
+ ANDROID_NN_CONV_PARAMETERS(float)
#define ANDROID_NN_CONV(activation) \
optimized_ops::Conv<FusedActivationFunctionType::activation>( \
@@ -133,5 +137,62 @@
return true;
}
+bool convQuant8(const uint8_t* inputData, const Shape& inputShape,
+ const uint8_t* filterData, const Shape& filterShape,
+ const int32_t* biasData, const Shape& biasShape,
+ int32_t padding, int32_t stride_width, int32_t stride_height, int32_t activation,
+ uint8_t* outputData, const Shape& outputShape) {
+
+ ANDROID_NN_CONV_PARAMETERS(uint8_t)
+
+ float real_multiplier = 0.0;
+ int32_t output_multiplier = 0;
+ int32_t output_shift = 0;
+ int32_t output_activation_min = 0;
+ int32_t output_activation_max = 0;
+
+ GetQuantizedConvolutionMultipler(inputShape, filterShape, biasShape,
+ outputShape, &real_multiplier);
+ QuantizeMultiplierSmallerThanOne(real_multiplier, &output_multiplier,
+ &output_shift);
+ CalculateActivationRangeUint8(activation, outputShape,
+ &output_activation_min,
+ &output_activation_max);
+
+ static gemmlowp::GemmContext gemm_context;
+
+ int32_t inputOffset = -inputShape.offset;
+ int32_t filterOffset = -filterShape.offset;
+ int32_t outputOffset = outputShape.offset;
+ #define ANDROID_NN_CONV(activation) \
+ optimized_ops::Conv<FusedActivationFunctionType::activation>( \
+ inputData, convertShapeToDims(inputShape), inputOffset, \
+ filterData, convertShapeToDims(filterShape), filterOffset, \
+ biasData, convertShapeToDims(biasShape), \
+ stride_width, paddingWidth, paddingHeight, \
+ outputOffset, output_multiplier, output_shift, \
+ output_activation_min, output_activation_max, \
+ outputData, convertShapeToDims(outputShape), \
+ im2colData, im2colDim, &gemm_context)
+
+ if (activation == kActivationNone) {
+ ANDROID_NN_CONV(kNone);
+ }
+ if (activation == kActivationRelu) {
+ ANDROID_NN_CONV(kRelu);
+ }
+ if (activation == kActivationRelu6) {
+ ANDROID_NN_CONV(kRelu6);
+ }
+
+ #undef ANDROID_NN_CONV
+
+ if (im2colByteSize > kStaticBufferSize) {
+ delete[] im2colData;
+ }
+ return true;
+}
+
+#undef ANDROID_NN_CONV_PARAMETERS
} // namespace nn
} // namespace android
diff --git a/common/operations/DepthwiseConv2D.cpp b/common/operations/DepthwiseConv2D.cpp
index 2ab4c15..68012f2 100644
--- a/common/operations/DepthwiseConv2D.cpp
+++ b/common/operations/DepthwiseConv2D.cpp
@@ -18,17 +18,17 @@
#include "OperationsUtils.h"
#include "internal/optimized/depthwiseconv_float.h"
-#include "internal/reference/depthwiseconv_float.h"
+#include "internal/optimized/depthwiseconv_uint8.h"
namespace android {
namespace nn {
-bool depthwiseConvFloat32Prepare(const Shape& input,
- const Shape& filter,
- const Shape& bias,
- int32_t padding,
- int32_t stride_width, int32_t stride_height,
- Shape* output) {
+bool depthwiseConvPrepare(const Shape& input,
+ const Shape& filter,
+ const Shape& bias,
+ int32_t padding,
+ int32_t stride_width, int32_t stride_height,
+ Shape* output) {
DCHECK_EQ(getNumberOfDimensions(input), 4);
DCHECK_EQ(getNumberOfDimensions(filter), 4);
DCHECK_EQ(getNumberOfDimensions(bias), 1);
@@ -61,23 +61,28 @@
return true;
}
+
+#define ANDROID_NN_DEPTHWISE_CONV_PARAMETERS \
+ uint32_t height = getSizeOfDimension(inputShape, 1); \
+ uint32_t width = getSizeOfDimension(inputShape, 2); \
+ uint32_t filterHeight = getSizeOfDimension(filterShape, 1); \
+ uint32_t filterWidth = getSizeOfDimension(filterShape, 2); \
+ uint32_t outHeight = getSizeOfDimension(outputShape, 1); \
+ uint32_t outWidth = getSizeOfDimension(outputShape, 2); \
+ \
+ uint32_t paddingHeight = \
+ ComputePadding(stride_height, height, filterHeight, outHeight); \
+ uint32_t paddingWidth = \
+ ComputePadding(stride_width, width, filterWidth, outWidth);
+
bool depthwiseConvFloat32(const float* inputData, const Shape& inputShape,
const float* filterData, const Shape& filterShape,
const float* biasData, const Shape& biasShape,
int32_t padding, int32_t stride_width, int32_t stride_height,
int32_t depth_multiplier, int32_t activation,
float* outputData, const Shape& outputShape) {
- uint32_t height = getSizeOfDimension(inputShape, 1);
- uint32_t width = getSizeOfDimension(inputShape, 2);
- uint32_t filterHeight = getSizeOfDimension(filterShape, 1);
- uint32_t filterWidth = getSizeOfDimension(filterShape, 2);
- uint32_t outHeight = getSizeOfDimension(outputShape, 1);
- uint32_t outWidth = getSizeOfDimension(outputShape, 2);
- uint32_t paddingHeight =
- ComputePadding(stride_height, height, filterHeight, outHeight);
- uint32_t paddingWidth =
- ComputePadding(stride_width, width, filterWidth, outWidth);
+ ANDROID_NN_DEPTHWISE_CONV_PARAMETERS
#define ANDROID_NN_DEPTHWISE_CONV(activation) \
optimized_ops::DepthwiseConv<FusedActivationFunctionType::activation>( \
@@ -102,5 +107,58 @@
return true;
}
+
+bool depthwiseConvQuant8(const uint8_t* inputData, const Shape& inputShape,
+ const uint8_t* filterData, const Shape& filterShape,
+ const int32_t* biasData, const Shape& biasShape,
+ int32_t padding, int32_t stride_width, int32_t stride_height,
+ int32_t depth_multiplier, int32_t activation,
+ uint8_t* outputData, const Shape& outputShape) {
+
+ ANDROID_NN_DEPTHWISE_CONV_PARAMETERS
+
+ float real_multiplier = 0.0;
+ int32_t output_multiplier = 0;
+ int32_t output_shift = 0;
+ int32_t output_activation_min = 0;
+ int32_t output_activation_max = 0;
+
+ GetQuantizedConvolutionMultipler(inputShape, filterShape, biasShape,
+ outputShape, &real_multiplier);
+ QuantizeMultiplierSmallerThanOne(real_multiplier, &output_multiplier,
+ &output_shift);
+ CalculateActivationRangeUint8(activation, outputShape,
+ &output_activation_min,
+ &output_activation_max);
+
+ uint32_t inputOffset = -inputShape.offset;
+ uint32_t filterOffset = -filterShape.offset;
+ uint32_t outputOffset = outputShape.offset;
+ #define ANDROID_NN_DEPTHWISE_CONV(activation) \
+ optimized_ops::DepthwiseConv<FusedActivationFunctionType::activation>( \
+ inputData, convertShapeToDims(inputShape), inputOffset, \
+ filterData, convertShapeToDims(filterShape), filterOffset, \
+ biasData, convertShapeToDims(biasShape), \
+ stride_width, paddingWidth, paddingHeight, depth_multiplier, \
+ outputOffset, output_multiplier, output_shift, \
+ output_activation_min, output_activation_max, \
+ outputData, convertShapeToDims(outputShape))
+
+ if (activation == kActivationNone) {
+ ANDROID_NN_DEPTHWISE_CONV(kNone);
+ }
+ if (activation == kActivationRelu) {
+ ANDROID_NN_DEPTHWISE_CONV(kRelu);
+ }
+ if (activation == kActivationRelu6) {
+ ANDROID_NN_DEPTHWISE_CONV(kRelu6);
+ }
+
+ #undef ANDROID_NN_DEPTHWISE_CONV
+
+ return true;
+}
+
+#undef ANDROID_NN_DEPTHWISE_CONV_PARAMETERS
} // namespace nn
} // namespace android
diff --git a/common/operations/Pooling.cpp b/common/operations/Pooling.cpp
index 0a328b3..bb20be5 100644
--- a/common/operations/Pooling.cpp
+++ b/common/operations/Pooling.cpp
@@ -18,16 +18,15 @@
#include "OperationsUtils.h"
#include "internal/optimized/optimized_ops.h"
-#include "internal/reference/reference_ops.h"
namespace android {
namespace nn {
-bool genericPoolingFloat32Prepare(const Shape& input,
- int32_t padding,
- int32_t stride_width, int32_t stride_height,
- int32_t filter_width, int32_t filter_height,
- Shape* output) {
+bool genericPoolingPrepare(const Shape& input,
+ int32_t padding,
+ int32_t stride_width, int32_t stride_height,
+ int32_t filter_width, int32_t filter_height,
+ Shape* output) {
DCHECK_EQ(getNumberOfDimensions(input), 4);
DCHECK_EQ(stride_width, stride_height);
@@ -54,19 +53,23 @@
return true;
}
+#define ANDROID_NN_POOLING_PARAMETERS \
+ uint32_t height = getSizeOfDimension(inputShape, 1); \
+ uint32_t width = getSizeOfDimension(inputShape, 2); \
+ uint32_t outHeight = getSizeOfDimension(outputShape, 1); \
+ uint32_t outWidth = getSizeOfDimension(outputShape, 2); \
+ \
+ uint32_t paddingHeight = \
+ ComputePadding(stride_height, height, filter_height, outHeight); \
+ uint32_t paddingWidth = \
+ ComputePadding(stride_width, width, filter_width, outWidth);
+
bool averagePoolFloat32(const float* inputData, const Shape& inputShape,
int32_t padding, int32_t stride_width, int32_t stride_height,
int32_t filter_width, int32_t filter_height, int32_t activation,
float* outputData, const Shape& outputShape) {
- uint32_t height = getSizeOfDimension(inputShape, 1);
- uint32_t width = getSizeOfDimension(inputShape, 2);
- uint32_t outHeight = getSizeOfDimension(outputShape, 1);
- uint32_t outWidth = getSizeOfDimension(outputShape, 2);
- uint32_t paddingHeight =
- ComputePadding(stride_height, height, filter_height, outHeight);
- uint32_t paddingWidth =
- ComputePadding(stride_width, width, filter_width, outWidth);
+ ANDROID_NN_POOLING_PARAMETERS
#define ANDROID_NN_AVERAGE_POOL(activation) \
optimized_ops::AveragePool<FusedActivationFunctionType::activation>( \
@@ -90,19 +93,49 @@
return true;
}
+bool averagePoolQuant8(const uint8_t* inputData, const Shape& inputShape,
+ int32_t padding, int32_t stride_width, int32_t stride_height,
+ int32_t filter_width, int32_t filter_height, int32_t activation,
+ uint8_t* outputData, const Shape& outputShape) {
+
+ ANDROID_NN_POOLING_PARAMETERS
+
+ int32_t output_activation_min = 0;
+ int32_t output_activation_max = 0;
+
+ CalculateActivationRangeUint8(activation, outputShape,
+ &output_activation_min,
+ &output_activation_max);
+
+ #define ANDROID_NN_AVERAGE_POOL(activation) \
+ optimized_ops::AveragePool<FusedActivationFunctionType::activation>( \
+ inputData, convertShapeToDims(inputShape), \
+ stride_width, paddingWidth, paddingHeight, \
+ filter_width, filter_height, \
+ output_activation_min, output_activation_max, \
+ outputData, convertShapeToDims(outputShape))
+
+ if (activation == kActivationNone) {
+ ANDROID_NN_AVERAGE_POOL(kNone);
+ }
+ if (activation == kActivationRelu) {
+ ANDROID_NN_AVERAGE_POOL(kRelu);
+ }
+ if (activation == kActivationRelu6) {
+ ANDROID_NN_AVERAGE_POOL(kRelu6);
+ }
+
+ #undef ANDROID_NN_AVERAGE_POOL
+
+ return true;
+}
+
bool l2PoolFloat32(const float* inputData, const Shape& inputShape,
int32_t padding, int32_t stride_width, int32_t stride_height,
int32_t filter_width, int32_t filter_height, int32_t activation,
float* outputData, const Shape& outputShape) {
- uint32_t height = getSizeOfDimension(inputShape, 1);
- uint32_t width = getSizeOfDimension(inputShape, 2);
- uint32_t outHeight = getSizeOfDimension(outputShape, 1);
- uint32_t outWidth = getSizeOfDimension(outputShape, 2);
- uint32_t paddingHeight =
- ComputePadding(stride_height, height, filter_height, outHeight);
- uint32_t paddingWidth =
- ComputePadding(stride_width, width, filter_width, outWidth);
+ ANDROID_NN_POOLING_PARAMETERS
#define ANDROID_NN_L2_POOL(activation) \
optimized_ops::L2Pool<FusedActivationFunctionType::activation>( \
@@ -130,15 +163,8 @@
int32_t padding, int32_t stride_width, int32_t stride_height,
int32_t filter_width, int32_t filter_height, int32_t activation,
float* outputData, const Shape& outputShape) {
- uint32_t height = getSizeOfDimension(inputShape, 1);
- uint32_t width = getSizeOfDimension(inputShape, 2);
- uint32_t outHeight = getSizeOfDimension(outputShape, 1);
- uint32_t outWidth = getSizeOfDimension(outputShape, 2);
- uint32_t paddingHeight =
- ComputePadding(stride_height, height, filter_height, outHeight);
- uint32_t paddingWidth =
- ComputePadding(stride_width, width, filter_width, outWidth);
+ ANDROID_NN_POOLING_PARAMETERS
#define ANDROID_NN_MAX_POOL(activation) \
optimized_ops::MaxPool<FusedActivationFunctionType::activation>( \
@@ -162,7 +188,43 @@
return true;
}
+bool maxPoolQuant8(const uint8_t* inputData, const Shape& inputShape,
+ int32_t padding, int32_t stride_width, int32_t stride_height,
+ int32_t filter_width, int32_t filter_height, int32_t activation,
+ uint8_t* outputData, const Shape& outputShape) {
+ ANDROID_NN_POOLING_PARAMETERS
+ int32_t output_activation_min = 0;
+ int32_t output_activation_max = 0;
+
+ CalculateActivationRangeUint8(activation, outputShape,
+ &output_activation_min,
+ &output_activation_max);
+
+ #define ANDROID_NN_MAX_POOL(activation) \
+ optimized_ops::MaxPool<FusedActivationFunctionType::activation>( \
+ inputData, convertShapeToDims(inputShape), \
+ stride_width, paddingWidth, paddingHeight, \
+ filter_width, filter_height, \
+ output_activation_min, output_activation_max, \
+ outputData, convertShapeToDims(outputShape))
+
+ if (activation == kActivationNone) {
+ ANDROID_NN_MAX_POOL(kNone);
+ }
+ if (activation == kActivationRelu) {
+ ANDROID_NN_MAX_POOL(kRelu);
+ }
+ if (activation == kActivationRelu6) {
+ ANDROID_NN_MAX_POOL(kRelu6);
+ }
+
+ #undef ANDROID_NN_MAX_POOL
+
+ return true;
+}
+
+#undef ANDROID_NN_POOLING_PARAMETERS
} // namespace nn
} // namespace android
diff --git a/common/operations/SimpleMath.cpp b/common/operations/SimpleMath.cpp
index 36b1cb0..882304a 100644
--- a/common/operations/SimpleMath.cpp
+++ b/common/operations/SimpleMath.cpp
@@ -24,7 +24,7 @@
namespace android {
namespace nn {
-bool addTensorsFloat32Prepare(const Shape& in1, const Shape& in2, Shape* out) {
+bool addTensorsPrepare(const Shape& in1, const Shape& in2, Shape* out) {
return SameShape(in1, in2) && SetShape(in1, out);
}