Initial implementation of the following quantized ops.
- CONV_QUANT8
- DEPTHWISE_CONV_QUANT8
- AVERAGE_POOL_QUANT8
- MAX_POOL_QUANT8
- LOGISTIC_QUANT8
Additionally, added functions to plumb through quantization
parameters.
Bug: 63905942
Test: mm
Test: end-to-end MobileNet quantized test pass
Change-Id: Ib2753c68bf2c51467ae1c158b45541bcfdf10789
diff --git a/runtime/ModelBuilder.cpp b/runtime/ModelBuilder.cpp
index 0d6fce7..1d386c0 100644
--- a/runtime/ModelBuilder.cpp
+++ b/runtime/ModelBuilder.cpp
@@ -40,6 +40,9 @@
mOperands.resize(idx + 1);
auto& entry = mOperands[idx];
entry.type = static_cast<OperandType>(type.type);
+ entry.scale = type.scale;
+ entry.zeroPoint = type.offset;
+
// TODO entry.numberOfConsumers = 0;
setFromIntList(&entry.dimensions, type.dimensions);
entry.location = {.poolIndex = static_cast<uint32_t>(LocationValues::LOCATION_AT_RUN_TIME),
@@ -89,7 +92,8 @@
}
mOperations.resize(operationIndex + 1);
auto& entry = mOperations[operationIndex];
- entry.type = static_cast<OperationType>(type);
+ entry.opTuple = {static_cast<OperationType>(type),
+ static_cast<OperandType>(mOperands[inputs->data[0]].type)};
setFromIntList(&entry.inputs, *inputs);
setFromIntList(&entry.outputs, *outputs);
diff --git a/runtime/NeuralNetworks.cpp b/runtime/NeuralNetworks.cpp
index 57e29aa..c6f2fae 100644
--- a/runtime/NeuralNetworks.cpp
+++ b/runtime/NeuralNetworks.cpp
@@ -40,8 +40,8 @@
"ANEURALNETWORKS_TENSOR_FLOAT16 may have changed");
static_assert(ANEURALNETWORKS_TENSOR_FLOAT32 == 9,
"ANEURALNETWORKS_TENSOR_FLOAT32 may have changed");
-static_assert(ANEURALNETWORKS_TENSOR_SYMMETRICAL_QUANT8 == 10,
- "ANEURALNETWORKS_TENSOR_SYMMETRICAL_QUANT8 may have changed");
+static_assert(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM == 10,
+ "ANEURALNETWORKS_TENSOR_QUANT8_ASYMM may have changed");
// Ensure that the constants are compatible with the values defined in the hal files.
static_assert(static_cast<uint32_t>(OperandType::FLOAT16) == ANEURALNETWORKS_FLOAT16,
@@ -64,9 +64,9 @@
"TENSOR_FLOAT16 != ANEURALNETWORKS_TENSOR_FLOAT16");
static_assert(static_cast<uint32_t>(OperandType::TENSOR_FLOAT32) == ANEURALNETWORKS_TENSOR_FLOAT32,
"TENSOR_FLOAT32 != ANEURALNETWORKS_TENSOR_FLOAT32");
-static_assert(static_cast<uint32_t>(OperandType::TENSOR_SYMMETRICAL_QUANT8) ==
- ANEURALNETWORKS_TENSOR_SYMMETRICAL_QUANT8,
- "TENSOR_SYMMETRICAL_QUANT8 != ANEURALNETWORKS_TENSOR_SYMMETRICAL_QUANT8");
+static_assert(static_cast<uint32_t>(OperandType::TENSOR_QUANT8_ASYMM) ==
+ ANEURALNETWORKS_TENSOR_QUANT8_ASYMM,
+ "TENSOR_QUANT8_ASYMM != ANEURALNETWORKS_TENSOR_QUANT8_ASYMM");
using namespace android::nn;
diff --git a/runtime/include/NeuralNetworks.h b/runtime/include/NeuralNetworks.h
index b09ad5e..bea2948 100644
--- a/runtime/include/NeuralNetworks.h
+++ b/runtime/include/NeuralNetworks.h
@@ -44,7 +44,7 @@
ANEURALNETWORKS_UINT32 = 7,
ANEURALNETWORKS_TENSOR_FLOAT16 = 8,
ANEURALNETWORKS_TENSOR_FLOAT32 = 9,
- ANEURALNETWORKS_TENSOR_SYMMETRICAL_QUANT8 = 10,
+ ANEURALNETWORKS_TENSOR_QUANT8_ASYMM = 10,
ANEURALNETWORKS_NUMBER_DATA_TYPES = 11
};
@@ -210,7 +210,7 @@
* TODO: revisit once we have a final representation for quantization.
*/
float scale;
- float offset;
+ int32_t offset;
} ANeuralNetworksOperandType;
/**
diff --git a/runtime/include/NeuralNetworksWrapper.h b/runtime/include/NeuralNetworksWrapper.h
index 5b8e30c..b9b2a14 100644
--- a/runtime/include/NeuralNetworksWrapper.h
+++ b/runtime/include/NeuralNetworksWrapper.h
@@ -21,6 +21,7 @@
#include "NeuralNetworks.h"
+#include <math.h>
#include <vector>
namespace android {
@@ -38,7 +39,7 @@
UINT32 = ANEURALNETWORKS_UINT32,
TENSOR_FLOAT16 = ANEURALNETWORKS_TENSOR_FLOAT16,
TENSOR_FLOAT32 = ANEURALNETWORKS_TENSOR_FLOAT32,
- TENSOR_SYMMETRICAL_QUANT8 = ANEURALNETWORKS_TENSOR_SYMMETRICAL_QUANT8,
+ TENSOR_QUANT8_ASYMM = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM,
};
enum class ExecutePreference {
@@ -62,9 +63,34 @@
OperandType(Type type, const std::vector<uint32_t>& d) : dimensions(d) {
operandType.type = static_cast<uint32_t>(type);
+ operandType.scale = 0.0f;
+ operandType.offset = 0;
+
operandType.dimensions.count = static_cast<uint32_t>(dimensions.size());
operandType.dimensions.data = dimensions.data();
}
+
+ OperandType(Type type, float scale,
+ const std::vector<uint32_t>& d) : dimensions(d) {
+ OperandType(type, d);
+ operandType.scale = scale;
+ }
+
+ OperandType(Type type, float f_min, float f_max,
+ const std::vector<uint32_t>& d) : dimensions(d) {
+ OperandType(type, d);
+
+ uint8_t q_min = std::numeric_limits<uint8_t>::min();
+ uint8_t q_max = std::numeric_limits<uint8_t>::max();
+ float range = q_max - q_min;
+ float scale = (f_max - f_min) / range;
+ int32_t offset =
+ fmin(q_max, fmax(q_min, static_cast<uint8_t>(round(q_min - f_min / scale))));
+
+ operandType.scale = scale;
+ operandType.offset = offset;
+ }
+
};
inline Result Initialize() {