Initial implementation of the following quantized ops. - CONV_QUANT8 - DEPTHWISE_CONV_QUANT8 - AVERAGE_POOL_QUANT8 - MAX_POOL_QUANT8 - LOGISTIC_QUANT8 Additionally, added functions to plumb through quantization parameters. Bug: 63905942 Test: mm Test: end-to-end MobileNet quantized test pass Change-Id: Ib2753c68bf2c51467ae1c158b45541bcfdf10789

commit: cd67a3ca55d3d9757441212d0dd9dc9452731893 [log] [tgz]
author: Miao Wang <[email protected]> Wed Aug 02 18:58:17 2017 -0700
committer: Miao Wang <[email protected]> Fri Aug 11 19:09:49 2017 -0700
tree: 7d588dd32f70e0c075d4a4599b6e1474cac57226
parent: 55e2526bf9b64872efee3cecc13578b0c5b641ae [diff]
diff --git a/runtime/ModelBuilder.cpp b/runtime/ModelBuilder.cpp
index 0d6fce7..1d386c0 100644
--- a/runtime/ModelBuilder.cpp
+++ b/runtime/ModelBuilder.cpp

@@ -40,6 +40,9 @@
     mOperands.resize(idx + 1);
     auto& entry = mOperands[idx];
     entry.type = static_cast<OperandType>(type.type);
+    entry.scale = type.scale;
+    entry.zeroPoint = type.offset;
+
     // TODO  entry.numberOfConsumers = 0;
     setFromIntList(&entry.dimensions, type.dimensions);
     entry.location = {.poolIndex = static_cast<uint32_t>(LocationValues::LOCATION_AT_RUN_TIME),
@@ -89,7 +92,8 @@
     }
     mOperations.resize(operationIndex + 1);
     auto& entry = mOperations[operationIndex];
-    entry.type = static_cast<OperationType>(type);
+    entry.opTuple = {static_cast<OperationType>(type),
+                     static_cast<OperandType>(mOperands[inputs->data[0]].type)};
 
     setFromIntList(&entry.inputs, *inputs);
     setFromIntList(&entry.outputs, *outputs);

diff --git a/runtime/NeuralNetworks.cpp b/runtime/NeuralNetworks.cpp
index 57e29aa..c6f2fae 100644
--- a/runtime/NeuralNetworks.cpp
+++ b/runtime/NeuralNetworks.cpp

@@ -40,8 +40,8 @@
               "ANEURALNETWORKS_TENSOR_FLOAT16 may have changed");
 static_assert(ANEURALNETWORKS_TENSOR_FLOAT32 == 9,
               "ANEURALNETWORKS_TENSOR_FLOAT32 may have changed");
-static_assert(ANEURALNETWORKS_TENSOR_SYMMETRICAL_QUANT8 == 10,
-              "ANEURALNETWORKS_TENSOR_SYMMETRICAL_QUANT8 may have changed");
+static_assert(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM == 10,
+              "ANEURALNETWORKS_TENSOR_QUANT8_ASYMM may have changed");
 
 // Ensure that the constants are compatible with the values defined in the hal files.
 static_assert(static_cast<uint32_t>(OperandType::FLOAT16) == ANEURALNETWORKS_FLOAT16,
@@ -64,9 +64,9 @@
               "TENSOR_FLOAT16 != ANEURALNETWORKS_TENSOR_FLOAT16");
 static_assert(static_cast<uint32_t>(OperandType::TENSOR_FLOAT32) == ANEURALNETWORKS_TENSOR_FLOAT32,
               "TENSOR_FLOAT32 != ANEURALNETWORKS_TENSOR_FLOAT32");
-static_assert(static_cast<uint32_t>(OperandType::TENSOR_SYMMETRICAL_QUANT8) ==
-                      ANEURALNETWORKS_TENSOR_SYMMETRICAL_QUANT8,
-              "TENSOR_SYMMETRICAL_QUANT8 != ANEURALNETWORKS_TENSOR_SYMMETRICAL_QUANT8");
+static_assert(static_cast<uint32_t>(OperandType::TENSOR_QUANT8_ASYMM) ==
+                      ANEURALNETWORKS_TENSOR_QUANT8_ASYMM,
+              "TENSOR_QUANT8_ASYMM != ANEURALNETWORKS_TENSOR_QUANT8_ASYMM");
 
 using namespace android::nn;
 

diff --git a/runtime/include/NeuralNetworks.h b/runtime/include/NeuralNetworks.h
index b09ad5e..bea2948 100644
--- a/runtime/include/NeuralNetworks.h
+++ b/runtime/include/NeuralNetworks.h

@@ -44,7 +44,7 @@
     ANEURALNETWORKS_UINT32 = 7,
     ANEURALNETWORKS_TENSOR_FLOAT16 = 8,
     ANEURALNETWORKS_TENSOR_FLOAT32 = 9,
-    ANEURALNETWORKS_TENSOR_SYMMETRICAL_QUANT8 = 10,
+    ANEURALNETWORKS_TENSOR_QUANT8_ASYMM = 10,
 
     ANEURALNETWORKS_NUMBER_DATA_TYPES = 11
 };
@@ -210,7 +210,7 @@
      * TODO: revisit once we have a final representation for quantization.
      */
     float scale;
-    float offset;
+    int32_t offset;
 } ANeuralNetworksOperandType;
 
 /**

diff --git a/runtime/include/NeuralNetworksWrapper.h b/runtime/include/NeuralNetworksWrapper.h
index 5b8e30c..b9b2a14 100644
--- a/runtime/include/NeuralNetworksWrapper.h
+++ b/runtime/include/NeuralNetworksWrapper.h

@@ -21,6 +21,7 @@
 
 #include "NeuralNetworks.h"
 
+#include <math.h>
 #include <vector>
 
 namespace android {
@@ -38,7 +39,7 @@
     UINT32 = ANEURALNETWORKS_UINT32,
     TENSOR_FLOAT16 = ANEURALNETWORKS_TENSOR_FLOAT16,
     TENSOR_FLOAT32 = ANEURALNETWORKS_TENSOR_FLOAT32,
-    TENSOR_SYMMETRICAL_QUANT8 = ANEURALNETWORKS_TENSOR_SYMMETRICAL_QUANT8,
+    TENSOR_QUANT8_ASYMM = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM,
 };
 
 enum class ExecutePreference {
@@ -62,9 +63,34 @@
 
     OperandType(Type type, const std::vector<uint32_t>& d) : dimensions(d) {
         operandType.type = static_cast<uint32_t>(type);
+        operandType.scale = 0.0f;
+        operandType.offset = 0;
+
         operandType.dimensions.count = static_cast<uint32_t>(dimensions.size());
         operandType.dimensions.data = dimensions.data();
     }
+
+    OperandType(Type type, float scale,
+                const std::vector<uint32_t>& d) : dimensions(d) {
+        OperandType(type, d);
+        operandType.scale = scale;
+    }
+
+    OperandType(Type type, float f_min, float f_max,
+                const std::vector<uint32_t>& d) : dimensions(d) {
+        OperandType(type, d);
+
+        uint8_t q_min = std::numeric_limits<uint8_t>::min();
+        uint8_t q_max = std::numeric_limits<uint8_t>::max();
+        float range = q_max - q_min;
+        float scale = (f_max - f_min) / range;
+        int32_t offset =
+            fmin(q_max, fmax(q_min, static_cast<uint8_t>(round(q_min - f_min / scale))));
+
+        operandType.scale = scale;
+        operandType.offset = offset;
+    }
+
 };
 
 inline Result Initialize() {
commit	cd67a3ca55d3d9757441212d0dd9dc9452731893	[log] [tgz]
author	Miao Wang <[email protected]>	Wed Aug 02 18:58:17 2017 -0700
committer	Miao Wang <[email protected]>	Fri Aug 11 19:09:49 2017 -0700
tree	7d588dd32f70e0c075d4a4599b6e1474cac57226
parent	55e2526bf9b64872efee3cecc13578b0c5b641ae [diff]