Add folders for lq.quantize and lq.dequantize (#654)

lgeiger · Tombana · web-flow · commit 3e1b148e3fce · 2021-06-08T17:05:08.000+01:00
Co-Authored-By: Tom Bannink &lt;Tombana@users.noreply.github.com&gt;

Co-authored-by: Tom Bannink &lt;Tombana@users.noreply.github.com&gt;
diff --git a/configure.py b/configure.py
@@ -72,6 +72,7 @@ def get_input(question):
 
 def get_from_env_or_user_or_default(environ_cp, var_name, ask_for_var, var_default):
     """Get var_name either from env, or user or default.
+
     If var_name has been set as environment variable, use the preset value, else
     ask for user input. If no input is provided, the default is used.
     Args:
@@ -101,6 +102,7 @@ def get_var(
     no_reply=None,
 ):
     """Get boolean input from user.
+
     If var_name is not set in env, ask user to enable query_item or not. If the
     response is empty, use the default.
     Args:
@@ -380,6 +382,7 @@ def setup_python(environ_cp):
 
 def set_cc_opt_flags(environ_cp):
     """Set up architecture-dependent optimization flags.
+
     Also append CC optimization flags to bazel.rc.
     Args:
       environ_cp: copy of the os.environ.
diff --git a/larq_compute_engine/core/BUILD b/larq_compute_engine/core/BUILD
@@ -7,6 +7,9 @@ cc_library(
     hdrs = [
         "types.h",
     ],
+    deps = [
+        "@org_tensorflow//tensorflow/lite/kernels/internal:cppmath",
+    ],
 )
 
 cc_library(
diff --git a/larq_compute_engine/core/types.h b/larq_compute_engine/core/types.h
@@ -6,6 +6,8 @@
 #include <limits>
 #include <type_traits>
 
+#include "tensorflow/lite/kernels/internal/cppmath.h"
+
 namespace compute_engine {
 namespace core {
 
@@ -17,6 +19,12 @@ namespace core {
 #define LCE_UNLIKELY(condition) (condition)
 #endif
 
+#if defined(__GNUC__)
+#define FORCE_INLINE __attribute__((always_inline)) inline
+#else
+#define FORCE_INLINE inline
+#endif
+
 // Check that 0 <= index < limit using a single comparison, assuming
 // that 0 <= limit if Index is signed.  Intended for use in performance
 // critical contexts where 0 <= index < limit is almost always true.
@@ -38,6 +46,53 @@ inline int xor_popcount(const TBitpacked& a, const TBitpacked& b) {
   return std::bitset<bitpacking_bitwidth>(a ^ b).count();
 }
 
+// Clamp an int32 value to int8 range
+inline std::int8_t saturate(std::int32_t x) {
+#ifdef __arm__
+  std::int8_t y;
+  asm("ssat %[y], #8, %[x]\n" : [y] "=r"(y) : [x] "r"(x));
+  return y;
+#else
+  x = std::min<std::int32_t>(x, std::numeric_limits<std::int8_t>::max());
+  x = std::max<std::int32_t>(x, std::numeric_limits<std::int8_t>::lowest());
+  return static_cast<std::int8_t>(x);
+#endif
+}
+
+// arithmetic right shift and clamp an int32 value to int8 range
+template <int shift>
+inline std::int8_t shift_saturate(std::int32_t x) {
+#ifdef __arm__
+  std::int8_t y;
+  asm("ssat %[y], #8, %[x], asr %[shift]\n"
+      : [y] "=r"(y)
+      : [x] "r"(x), [shift] "i"(shift));
+  return y;
+#else
+  x = x >> shift;
+  x = std::min<std::int32_t>(x, std::numeric_limits<std::int8_t>::max());
+  x = std::max<std::int32_t>(x, std::numeric_limits<std::int8_t>::lowest());
+  return static_cast<std::int8_t>(x);
+#endif
+}
+
+// Round-to-nearest. Handling of ties is allowed to be anything, as discussed in
+// https://github.com/tensorflow/tensorflow/issues/25087
+inline std::int32_t round(float x) {
+#if defined(__thumb__) && defined(__VFP_FP__) && !defined(__SOFTFP__)
+  // The `vcvtr` instructions follows the IEEE 754 rounding standard which
+  // rounds halfway points to the nearest *even* integer.
+  std::int32_t y;
+  asm("vcvtr.s32.f32 %[x], %[x] \n"
+      "vmov %[y], %[x] \n"
+      : [y] "=r"(y)
+      : [x] "t"(x));  // The "t" means `x` will be in an FPU register
+  return y;
+#else
+  return ::tflite::TfLiteRound(x);
+#endif
+}
+
 template <typename T, typename S>
 constexpr T CeilDiv(T a, S b) {
   return (a + b - 1) / b;
diff --git a/larq_compute_engine/mlir/BUILD b/larq_compute_engine/mlir/BUILD
@@ -134,6 +134,21 @@ gentbl(
     ],
 )
 
+cc_library(
+    name = "larq_compute_engine_bitpack",
+    srcs = [
+        "transforms/bitpack.cc",
+    ],
+    hdrs = [
+        "transforms/bitpack.h",
+    ],
+    deps = [
+        "//larq_compute_engine/core:types",
+        "//larq_compute_engine/core/bitpacking:bitpack",
+        "@llvm-project//mlir:IR",
+    ],
+)
+
 cc_library(
     name = "larq_compute_engine",
     srcs = [
@@ -147,6 +162,7 @@ cc_library(
         "transforms/passes.h",
     ],
     deps = [
+        ":larq_compute_engine_bitpack",
         "//larq_compute_engine/core/bitpacking:bitpack",
         "@flatbuffers",
         "@llvm-project//mlir:QuantOps",
@@ -225,8 +241,7 @@ cc_library(
     ],
     deps = [
         ":larq_compute_engine",
-        "//larq_compute_engine/core:types",
-        "//larq_compute_engine/core/bitpacking:bitpack",
+        ":larq_compute_engine_bitpack",
         "@org_tensorflow//tensorflow/compiler/mlir/lite:tensorflow_lite",
         "@org_tensorflow//tensorflow/compiler/mlir/tensorflow",
     ],
diff --git a/larq_compute_engine/mlir/ir/lce_ops.cc b/larq_compute_engine/mlir/ir/lce_ops.cc
@@ -2,6 +2,7 @@
 
 #include "flatbuffers/flexbuffers.h"
 #include "larq_compute_engine/core/bitpacking/bitpack.h"
+#include "larq_compute_engine/mlir/transforms/bitpack.h"
 #include "tensorflow/lite/schema/schema_generated.h"
 
 static tflite::Padding ConvertPaddingAttr(llvm::StringRef str) {
@@ -67,6 +68,18 @@ void QuantizeOp::build(OpBuilder& builder, OperationState& state, Value x) {
   state.addTypes(RankedTensorType::get(shape, builder.getIntegerType(32)));
 }
 
+OpFoldResult QuantizeOp::fold(ArrayRef<Attribute> operands) {
+  mlir::OpBuilder builder(getOperation());
+  if (!operands[0]) return nullptr;
+  return mlir::TFL::Bitpack(&builder, operands[0]);
+}
+
+OpFoldResult DequantizeOp::fold(ArrayRef<Attribute> operands) {
+  auto result_type = getType().cast<ShapedType>();
+  if (!operands[0]) return nullptr;
+  return mlir::TFL::Unpack(operands[0], result_type);
+}
+
 void LarqDialect::initialize() {
   addOperations<
 #define GET_OP_LIST
diff --git a/larq_compute_engine/mlir/ir/lce_ops.td b/larq_compute_engine/mlir/ir/lce_ops.td
@@ -82,6 +82,8 @@ Converts floating point or integer tensors to binarized bitpacked tensors.
   );
 
   let builders = [OpBuilder<(ins "Value":$x)>];
+
+  let hasFolder = 1;
 }
 
 def LQ_DequantizeOp : LQ_Op<"Dequantize", [NoSideEffect]> {
@@ -98,6 +100,8 @@ Converts binarized bitpacked tensors to floating point or integer tensors.
   let results = (outs
     TensorOf<[BF16, F16, F32, F64, I32, I64, QI8, QI16]>:$y
   );
+
+  let hasFolder = 1;
 }
 
 def LQ_Bconv2dOp : LQ_Op<"Bconv2d", [NoSideEffect]> {
diff --git a/larq_compute_engine/mlir/lce_mlir_opt.cc b/larq_compute_engine/mlir/lce_mlir_opt.cc
@@ -2,10 +2,12 @@
 #include "mlir/Dialect/Quant/QuantOps.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Support/MlirOptMain.h"
+#include "mlir/Transforms/Passes.h"
 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
 
 int main(int argc, char** argv) {
+  mlir::registerTransformsPasses();
   mlir::DialectRegistry registry;
   registry.insert<mlir::StandardOpsDialect, mlir::quant::QuantizationDialect,
                   mlir::TF::TensorFlowDialect, mlir::TFL::TensorFlowLiteDialect,
diff --git a/larq_compute_engine/mlir/tests/const-fold.mlir b/larq_compute_engine/mlir/tests/const-fold.mlir
@@ -0,0 +1,27 @@
+// RUN: lce-tf-opt %s -canonicalize | FileCheck %s
+
+// CHECK-LABEL: @quantize
+func @quantize() -> (tensor<1x1x2x1xi32>, tensor<1x1x2x1xi32>) {
+  %pos = constant dense< 0.5> : tensor<1x1x2x32xf32>
+  %neg = constant dense<-0.5> : tensor<1x1x2x32xf32>
+  %0 = "lq.Quantize"(%pos) {} : (tensor<1x1x2x32xf32>) -> tensor<1x1x2x1xi32>
+  %1 = "lq.Quantize"(%neg) {} : (tensor<1x1x2x32xf32>) -> tensor<1x1x2x1xi32>
+  return %0, %1 : tensor<1x1x2x1xi32>, tensor<1x1x2x1xi32>
+
+  // CHECK: %[[neg:.*]] = constant dense<-1> : tensor<1x1x2x1xi32>
+  // CHECK: %[[pos:.*]] = constant dense<0> : tensor<1x1x2x1xi32>
+  // CHECK: return %[[pos]], %[[neg]] : tensor<1x1x2x1xi32>, tensor<1x1x2x1xi32>
+}
+
+// CHECK-LABEL: @dequantize
+func @dequantize() -> (tensor<1x1x2x32xf32>, tensor<1x1x2x32xf32>) {
+  %pos = constant dense<0> : tensor<1x1x2x1xi32>
+  %neg = constant dense<-1> : tensor<1x1x2x1xi32>
+  %0 = "lq.Dequantize"(%pos) {} : (tensor<1x1x2x1xi32>) -> tensor<1x1x2x32xf32>
+  %1 = "lq.Dequantize"(%neg) {} : (tensor<1x1x2x1xi32>) -> tensor<1x1x2x32xf32>
+  return %0, %1 : tensor<1x1x2x32xf32>, tensor<1x1x2x32xf32>
+
+  // CHECK: %[[neg:.*]] = constant dense<-1.000000e+00> : tensor<1x1x2x32xf32>
+  // CHECK: %[[pos:.*]] = constant dense<1.000000e+00> : tensor<1x1x2x32xf32>
+  // CHECK: return %[[pos]], %[[neg]] : tensor<1x1x2x32xf32>, tensor<1x1x2x32xf32>
+}
diff --git a/larq_compute_engine/mlir/transforms/bitpack.cc b/larq_compute_engine/mlir/transforms/bitpack.cc
@@ -0,0 +1,111 @@
+#include "larq_compute_engine/mlir/transforms/bitpack.h"
+
+#include <cmath>
+#include <vector>
+
+#include "larq_compute_engine/core/bitpacking/bitpack.h"
+#include "larq_compute_engine/core/types.h"
+#include "mlir/Dialect/Quant/QuantTypes.h"
+
+namespace mlir {
+namespace TFL {
+
+using compute_engine::core::bitpacking_bitwidth;
+using compute_engine::core::round;
+using compute_engine::core::saturate;
+using compute_engine::core::TBitpacked;
+using namespace compute_engine::core::bitpacking;
+
+DenseElementsAttr Bitpack(mlir::Builder* builder, Attribute x) {
+  if (!x) return nullptr;
+
+  // ShapedType is something like tensor<1x2x3xf32> and element_type is f32
+  auto shaped_type = x.getType().cast<ShapedType>();
+  auto shape = shaped_type.getShape();
+  auto element_type = shaped_type.getElementType();
+
+  int num_rows = shape[0] * shape[1] * shape[2];
+  int unpacked_channels = shape[3];
+  int packed_channels = GetBitpackedSize(unpacked_channels);
+
+  std::vector<TBitpacked> new_values(num_rows * packed_channels);
+
+  if (element_type.isF32()) {
+    const auto& dense_elements_iter =
+        x.cast<DenseElementsAttr>().getValues<float>();
+
+    std::vector<float> old_values(num_rows * unpacked_channels);
+
+    int i = 0;
+    for (float x : dense_elements_iter) {
+      old_values[i++] = x;
+    }
+    assert(i == num_rows * unpacked_channels);
+
+    bitpack_matrix(old_values.data(), num_rows, unpacked_channels,
+                   new_values.data());
+  } else {
+    // constant-fold bitpacking int8 tensors is currently not supported
+    return nullptr;
+  }
+
+  RankedTensorType out_tensor_type =
+      RankedTensorType::get({shape[0], shape[1], shape[2], packed_channels},
+                            builder->getIntegerType(bitpacking_bitwidth));
+
+  return DenseElementsAttr::get<TBitpacked>(out_tensor_type, new_values);
+}
+
+DenseElementsAttr Unpack(Attribute x, ShapedType result_type) {
+  if (!x) return nullptr;
+  if (!result_type.hasStaticShape()) return nullptr;
+
+  auto input_shape = x.getType().cast<ShapedType>().getShape();
+  auto output_shape = result_type.getShape();
+  auto output_type = result_type.getElementType();
+
+  int num_rows = output_shape[0] * output_shape[1] * output_shape[2];
+  int unpacked_channels = output_shape[3];
+  int packed_channels = GetBitpackedSize(unpacked_channels);
+  if (input_shape[0] != output_shape[0] || input_shape[1] != output_shape[1] ||
+      input_shape[2] != output_shape[2] || input_shape[3] != packed_channels) {
+    return nullptr;
+  }
+
+  std::vector<TBitpacked> old_values(num_rows * packed_channels);
+
+  const auto& dense_elements_iter =
+      x.cast<DenseElementsAttr>().getValues<TBitpacked>();
+
+  int i = 0;
+  for (TBitpacked x : dense_elements_iter) {
+    old_values[i++] = x;
+  }
+  assert(i == num_rows * packed_channels);
+
+  if (output_type.isF32()) {
+    std::vector<float> new_values(num_rows * unpacked_channels);
+
+    unpack_matrix(old_values.data(), num_rows, unpacked_channels,
+                  new_values.data());
+
+    return DenseElementsAttr::get<float>(result_type, new_values);
+  } else {
+    auto quant_type = output_type.cast<mlir::quant::UniformQuantizedType>();
+    const double scale = quant_type.getScale();
+    const int zero_point = quant_type.getZeroPoint();
+
+    std::int8_t zero_bit_result = saturate(zero_point + round(+1.0 / scale));
+    std::int8_t one_bit_result = saturate(zero_point + round(-1.0 / scale));
+
+    std::vector<std::int8_t> new_values(num_rows * unpacked_channels);
+
+    unpack_matrix(old_values.data(), num_rows, unpacked_channels,
+                  new_values.data(), zero_bit_result, one_bit_result);
+
+    return DenseElementsAttr::get<std::int8_t>(result_type, new_values);
+  }
+}
+
+}  // namespace TFL
+}  // namespace mlir
diff --git a/larq_compute_engine/mlir/transforms/bitpack.h b/larq_compute_engine/mlir/transforms/bitpack.h
@@ -0,0 +1,18 @@
+#ifndef LARQ_COMPUTE_ENGINE_MLIR_TRANSFORMS_BITPACK_H_
+#define LARQ_COMPUTE_ENGINE_MLIR_TRANSFORMS_BITPACK_H_
+
+#include "mlir/IR/Attributes.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinTypes.h"
+
+namespace mlir {
+namespace TFL {
+
+DenseElementsAttr Bitpack(mlir::Builder* builder, Attribute x);
+
+DenseElementsAttr Unpack(Attribute x, ShapedType result_type);
+
+}  // namespace TFL
+}  // namespace mlir
+
+#endif
diff --git a/larq_compute_engine/mlir/transforms/bitpack_weights.cc b/larq_compute_engine/mlir/transforms/bitpack_weights.cc
diff --git a/larq_compute_engine/mlir/transforms/bitpack_weights_patterns.td b/larq_compute_engine/mlir/transforms/bitpack_weights_patterns.td

Original file line number	Diff line number	Diff line change
`@@ -7,6 +7,9 @@ cc_library(`
`7`	`7`	`hdrs = [`
`8`	`8`	`"types.h",`
`9`	`9`	`],`
	`10`	`+ deps = [`
	`11`	`+ "@org_tensorflow//tensorflow/lite/kernels/internal:cppmath",`
	`12`	`+ ],`
`10`	`13`	`)`
`11`	`14`
`12`	`15`	`cc_library(`
Original file line number	Diff line number	Diff line change
`@@ -82,6 +82,8 @@ Converts floating point or integer tensors to binarized bitpacked tensors.`
`82`	`82`	`);`
`83`	`83`
`84`	`84`	`let builders = [OpBuilder<(ins "Value":$x)>];`
	`85`	`+`
	`86`	`+ let hasFolder = 1;`
`85`	`87`	`}`
`86`	`88`
`87`	`89`	`def LQ_DequantizeOp : LQ_Op<"Dequantize", [NoSideEffect]> {`
`@@ -98,6 +100,8 @@ Converts binarized bitpacked tensors to floating point or integer tensors.`
`98`	`100`	`let results = (outs`
`99`	`101`	`TensorOf<[BF16, F16, F32, F64, I32, I64, QI8, QI16]>:$y`
`100`	`102`	`);`
	`103`	`+`
	`104`	`+ let hasFolder = 1;`
`101`	`105`	`}`
`102`	`106`
`103`	`107`	`def LQ_Bconv2dOp : LQ_Op<"Bconv2d", [NoSideEffect]> {`