Bazel BUILD file for XNNPACK
- Support Linux (x86-64)
- Support Android (ARMv7, ARM64, x86, x86-64)
PiperOrigin-RevId: 272677580
diff --git a/.bazelrc b/.bazelrc
new file mode 100644
index 0000000..ea28201
--- /dev/null
+++ b/.bazelrc
@@ -0,0 +1,46 @@
+# Basic build settings
+build --jobs 128
+
+# Sets the default Apple platform to macOS.
+build --apple_platform_type=macos
+
+# Android configs.
+build:android --crosstool_top=//external:android/crosstool
+build:android --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
+build:android --linkopt=-ldl
+build:android --linkopt=-Wl,--gc-sections
+
+build:android_arm --config=android
+build:android_arm --cpu=armeabi-v7a
+build:android_arm --fat_apk_cpu=armeabi-v7a
+
+build:android_arm64 --config=android
+build:android_arm64 --cpu=arm64-v8a
+build:android_arm64 --fat_apk_cpu=arm64-v8a
+
+# iOS configs.
+build:ios --apple_platform_type=ios
+
+build:ios_i386 --config=ios
+build:ios_i386 --cpu=ios_i386
+build:ios_i386 --watchos_cpus=i386
+
+build:ios_x86_64 --config=ios
+build:ios_x86_64 --cpu=ios_x86_64
+build:ios_x86_64 --watchos_cpus=i386
+
+build:ios_armv7 --config=ios
+build:ios_armv7 --cpu=ios_armv7
+build:ios_armv7 --watchos_cpus=armv7k
+
+build:ios_arm64 --config=ios
+build:ios_arm64 --cpu=ios_arm64
+build:ios_arm64 --watchos_cpus=armv7k
+
+build:ios_arm64e --config=ios
+build:ios_arm64e --cpu=ios_arm64e
+build:ios_arm64e --watchos_cpus=armv7k
+
+build:ios_fat --config=ios
+build:ios_fat --ios_multi_cpus=armv7,arm64
+build:ios_fat --watchos_cpus=armv7k
diff --git a/BUILD b/BUILD
new file mode 100644
index 0000000..7bbec91
--- /dev/null
+++ b/BUILD
@@ -0,0 +1,1580 @@
+# Copyright 2019 Google LLC
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+#
+# Description:
+# XNNPACK - optimized floating-point neural network operators library
+
+licenses(["notice"])
+
+exports_files(["LICENSE"])
+
+load(":build_defs.bzl", "xnnpack_aggregate_library", "xnnpack_benchmark", "xnnpack_binary", "xnnpack_cc_library", "xnnpack_min_size_copts", "xnnpack_optional_armcl_copts", "xnnpack_optional_armcl_deps", "xnnpack_optional_gemmlowp_copts", "xnnpack_optional_gemmlowp_deps", "xnnpack_optional_ruy_copts", "xnnpack_optional_ruy_deps", "xnnpack_optional_tflite_copts", "xnnpack_optional_tflite_deps", "xnnpack_std_copts", "xnnpack_unit_test", "xnnpack_visibility")
+
+OPERATOR_BENCHMARK_DEPS = [
+ ":XNNPACK",
+ ":bench_utils",
+ "@cpuinfo",
+ "@pthreadpool",
+]
+
+MICROKERNEL_BENCHMARK_DEPS = [
+ ":ukernels",
+ ":bench_utils",
+ "@cpuinfo",
+ "@FP16",
+ "@pthreadpool",
+]
+
+MICROKERNEL_TEST_DEPS = [
+ ":ukernels",
+ "@cpuinfo",
+ "@FP16",
+ "@pthreadpool",
+]
+
+OPERATOR_TEST_DEPS = [
+ ":XNNPACK",
+ "@pthreadpool",
+ "@FP16",
+]
+
+OPERATOR_SRCS = [
+ "src/add.c",
+ "src/argmax-pooling.c",
+ "src/average-pooling.c",
+ "src/channel-pad.c",
+ "src/channel-shuffle.c",
+ "src/clamp.c",
+ "src/convolution-spnchw.c",
+ "src/convolution.c",
+ "src/deconvolution.c",
+ "src/fully-connected.c",
+ "src/global-average-pooling-spnchw.c",
+ "src/global-average-pooling.c",
+ "src/hardswish.c",
+ "src/leaky-relu.c",
+ "src/max-pooling.c",
+ "src/prelu.c",
+ "src/sigmoid.c",
+ "src/softargmax.c",
+ "src/unpooling.c",
+]
+
+SCALAR_UKERNELS = [
+ "src/f32-argmaxpool/mp9p8q-scalar.c",
+ "src/f32-argmaxpool/up4-scalar.c",
+ "src/f32-argmaxpool/up9-scalar.c",
+ "src/f32-avgpool/mp9p8q-scalar.c",
+ "src/f32-avgpool/up9-scalar.c",
+ "src/f32-clamp/scalar.c",
+ "src/f32-igemm/1x4-scalar.c",
+ "src/f32-igemm/2x4-scalar.c",
+ "src/f32-igemm/4x2-scalar.c",
+ "src/f32-igemm/4x4-scalar.c",
+ "src/f32-dwconv/up1x25-scalar.c",
+ "src/f32-dwconv/up1x4-scalar.c",
+ "src/f32-dwconv/up1x9-scalar.c",
+ "src/f32-gavgpool/mp7p7q-scalar.c",
+ "src/f32-gavgpool/up7-scalar.c",
+ "src/f32-gemm/1x4-scalar.c",
+ "src/f32-gemm/2x4-scalar.c",
+ "src/f32-gemm/4x2-scalar.c",
+ "src/f32-gemm/4x4-scalar.c",
+ "src/f32-gemminc/1x4-scalar.c",
+ "src/f32-gemminc/2x4-scalar.c",
+ "src/f32-gemminc/4x4-scalar.c",
+ "src/f32-hswish/scalar.c",
+ "src/f32-maxpool/9p8q-scalar.c",
+ "src/f32-pavgpool/mp9p8q-scalar.c",
+ "src/f32-pavgpool/up9-scalar.c",
+ "src/f32-ppmm/2x4-scalar.c",
+ "src/f32-ppmm/3x3-scalar.c",
+ "src/f32-ppmm/4x2-scalar.c",
+ "src/f32-ppmm/4x4-scalar.c",
+ "src/f32-prelu/x4-scalar.c",
+ "src/f32-rmax/scalar.c",
+ "src/f32-spmm/1x1-scalar-pipelined.c",
+ "src/f32-spmm/1x1-scalar-unroll2.c",
+ "src/f32-spmm/1x1-scalar.c",
+ "src/f32-spmm/2x1-scalar-pipelined.c",
+ "src/f32-spmm/2x1-scalar-unroll2.c",
+ "src/f32-spmm/2x1-scalar.c",
+ "src/f32-spmm/4x1-scalar-pipelined.c",
+ "src/f32-spmm/4x1-scalar-unroll2.c",
+ "src/f32-spmm/4x1-scalar.c",
+ "src/f32-spmm/8x1-scalar-pipelined.c",
+ "src/f32-spmm/8x1-scalar-unroll2.c",
+ "src/f32-spmm/8x1-scalar.c",
+ "src/f32-vadd/scalar.c",
+ "src/f32-vmul/scalar.c",
+ "src/f32-vmulcaddc/c1-scalar-x2.c",
+ "src/f32-vsub/scalar.c",
+ "src/q8-avgpool/mp9p8q-scalar.c",
+ "src/q8-avgpool/up9-scalar.c",
+ "src/q8-igemm/2x2-scalar.c",
+ "src/q8-dwconv/up1x9-scalar.c",
+ "src/q8-gavgpool/mp7p7q-scalar.c",
+ "src/q8-gavgpool/up7-scalar.c",
+ "src/q8-gemm/2x2-scalar.c",
+ "src/q8-vadd/scalar.c",
+ "src/u8-clamp/scalar.c",
+ "src/u8-lut32norm/scalar.c",
+ "src/u8-maxpool/9p8q-scalar.c",
+ "src/u8-rmax/scalar.c",
+ "src/x32-packx/x2-scalar.c",
+ "src/x32-packx/x3-scalar.c",
+ "src/x32-packx/x4-scalar.c",
+ "src/x32-pad/x2-scalar.c",
+ "src/x32-unpool/scalar.c",
+ "src/x32-zip/x2-scalar.c",
+ "src/x32-zip/x3-scalar.c",
+ "src/x32-zip/x4-scalar.c",
+ "src/x32-zip/xm-scalar.c",
+ "src/x8-lut/scalar.c",
+ "src/x8-zip/x2-scalar.c",
+ "src/x8-zip/x3-scalar.c",
+ "src/x8-zip/x4-scalar.c",
+ "src/x8-zip/xm-scalar.c",
+]
+
+PSIMD_UKERNELS = [
+ "src/f32-argmaxpool/mp9p8q-psimd.c",
+ "src/f32-argmaxpool/up4-psimd.c",
+ "src/f32-argmaxpool/up9-psimd.c",
+ "src/f32-avgpool/mp9p8q-psimd.c",
+ "src/f32-avgpool/up9-psimd.c",
+ "src/f32-clamp/psimd.c",
+ "src/f32-igemm/1x8-psimd-loadsplat.c",
+ "src/f32-igemm/1x8-psimd-splat.c",
+ "src/f32-igemm/1x8s4-psimd.c",
+ "src/f32-igemm/4x2c4-psimd.c",
+ "src/f32-igemm/4x8-psimd-loadsplat.c",
+ "src/f32-igemm/4x8-psimd-splat.c",
+ "src/f32-igemm/4x8s4-psimd.c",
+ "src/f32-igemm/6x8-psimd-loadsplat.c",
+ "src/f32-igemm/6x8-psimd-splat.c",
+ "src/f32-igemm/6x8s4-psimd.c",
+ "src/f32-dwconv/up4x25-psimd.c",
+ "src/f32-dwconv/up4x4-psimd.c",
+ "src/f32-dwconv/up4x9-psimd.c",
+ "src/f32-gavgpool/mp7p7q-psimd.c",
+ "src/f32-gavgpool/up7-psimd.c",
+ "src/f32-gemm/1x8-psimd-loadsplat.c",
+ "src/f32-gemm/1x8-psimd-splat.c",
+ "src/f32-gemm/1x8s4-psimd.c",
+ "src/f32-gemm/4x8-psimd-loadsplat.c",
+ "src/f32-gemm/4x8-psimd-splat.c",
+ "src/f32-gemm/4x8s4-psimd.c",
+ "src/f32-gemm/6x8-psimd-loadsplat.c",
+ "src/f32-gemm/6x8-psimd-splat.c",
+ "src/f32-gemm/6x8s4-psimd.c",
+ "src/f32-gemminc/1x8-psimd-loadsplat.c",
+ "src/f32-gemminc/1x8-psimd-splat.c",
+ "src/f32-gemminc/1x8s4-psimd.c",
+ "src/f32-gemminc/4x8-psimd-loadsplat.c",
+ "src/f32-gemminc/4x8-psimd-splat.c",
+ "src/f32-gemminc/4x8s4-psimd.c",
+ "src/f32-gemminc/6x8-psimd-loadsplat.c",
+ "src/f32-gemminc/6x8-psimd-splat.c",
+ "src/f32-gemminc/6x8s4-psimd.c",
+ "src/f32-hswish/psimd.c",
+ "src/f32-maxpool/9p8q-psimd.c",
+ "src/f32-pavgpool/mp9p8q-psimd.c",
+ "src/f32-pavgpool/up9-psimd.c",
+ "src/f32-ppmm/4x8-psimd.c",
+ "src/f32-prelu/x4-psimd.c",
+ "src/f32-vadd/psimd.c",
+ "src/f32-vmul/psimd.c",
+ "src/f32-vmulcaddc/c4-psimd-x2.c",
+ "src/f32-vsub/psimd.c",
+ "src/x32-packx/x4-psimd.c",
+ "src/x32-pad/x2-psimd.c",
+ "src/x32-unpool/psimd.c",
+ "src/x32-zip/x2-psimd.c",
+ "src/x32-zip/x3-psimd.c",
+ "src/x32-zip/x4-psimd.c",
+ "src/x32-zip/xm-psimd.c",
+]
+
+# ISA-specific micro-kernels
+NEON_UKERNELS = [
+ "src/f32-avgpool/mp9p8q-neon.c",
+ "src/f32-avgpool/up9-neon.c",
+ "src/f32-clamp/neon.c",
+ "src/f32-igemm/1x8-neon-ld64.c",
+ "src/f32-igemm/4x12-neon-ld64.c",
+ "src/f32-igemm/4x2-neon-ld64.c",
+ "src/f32-igemm/4x4-neon-ld64.c",
+ "src/f32-igemm/4x8-neon-ld128.c",
+ "src/f32-igemm/4x8-neon-ld64.c",
+ "src/f32-igemm/6x8-neon-ld64.c",
+ "src/f32-dwconv/up4x9-neon.c",
+ "src/f32-gavgpool-spchw/neon-x4.c",
+ "src/f32-gavgpool/mp7p7q-neon.c",
+ "src/f32-gavgpool/up7-neon.c",
+ "src/f32-gemm/1x8-neon-ld64.c",
+ "src/f32-gemm/4x12-neon-ld64.c",
+ "src/f32-gemm/4x2-neon-ld64.c",
+ "src/f32-gemm/4x8-neon-ld128.c",
+ "src/f32-gemm/4x8-neon-ld64.c",
+ "src/f32-gemm/5x8-neon-ld64.c",
+ "src/f32-gemm/6x8-neon-ld64.c",
+ "src/f32-gemminc/1x8-neon-ld64.c",
+ "src/f32-gemminc/4x12-neon-ld64.c",
+ "src/f32-gemminc/4x8-neon-ld128.c",
+ "src/f32-gemminc/4x8-neon-ld64.c",
+ "src/f32-gemminc/5x8-neon-ld64.c",
+ "src/f32-gemminc/6x8-neon-ld64.c",
+ "src/f32-hswish/neon.c",
+ "src/f32-pavgpool/mp9p8q-neon.c",
+ "src/f32-pavgpool/up9-neon.c",
+ "src/f32-ppmm/4x8-neon.c",
+ "src/f32-ppmm/8x8-neon.c",
+ "src/f32-rmax/neon.c",
+ "src/f32-vmulcaddc/c4-neon-x2.c",
+ "src/q8-avgpool/mp9p8q-neon.c",
+ "src/q8-avgpool/up9-neon.c",
+ "src/q8-igemm/4x8-neon.c",
+ "src/q8-igemm/8x8-neon.c",
+ "src/q8-dwconv/up8x9-neon.c",
+ "src/q8-gavgpool/mp7p7q-neon.c",
+ "src/q8-gavgpool/up7-neon.c",
+ "src/q8-gemm/4x8-neon.c",
+ "src/q8-gemm/8x8-neon.c",
+ "src/q8-vadd/neon.c",
+ "src/u8-clamp/neon.c",
+ "src/u8-maxpool/9p8q-neon.c",
+ "src/u8-rmax/neon.c",
+ "src/x32-packx/x4-neon-st4.c",
+ "src/x32-pad/x2-neon.c",
+ "src/x32-zip/x2-neon.c",
+ "src/x32-zip/x3-neon.c",
+ "src/x32-zip/x4-neon.c",
+ "src/x32-zip/xm-neon.c",
+ "src/x8-zip/x2-neon.c",
+ "src/x8-zip/x3-neon.c",
+ "src/x8-zip/x4-neon.c",
+ "src/x8-zip/xm-neon.c",
+]
+
+NEONFMA_UKERNELS = [
+ "src/f32-igemm/4x12-neonfma-ld64.c",
+ "src/f32-igemm/4x2-neonfma-ld64.c",
+ "src/f32-igemm/4x4-neonfma-ld64.c",
+ "src/f32-igemm/4x8-neonfma-ld128.c",
+ "src/f32-igemm/4x8-neonfma-ld64.c",
+ "src/f32-igemm/6x8-neonfma-ld64.c",
+ "src/f32-dwconv/up4x9-neonfma.c",
+ "src/f32-dwconv/up8x9-neonfma.c",
+ "src/f32-gemm/1x8-neonfma-ld64.c",
+ "src/f32-gemm/4x12-neonfma-ld64.c",
+ "src/f32-gemm/4x2-neonfma-ld64.c",
+ "src/f32-gemm/4x8-neonfma-ld128.c",
+ "src/f32-gemm/4x8-neonfma-ld64.c",
+ "src/f32-gemm/5x8-neonfma-ld64.c",
+ "src/f32-gemm/6x8-neonfma-ld64.c",
+ "src/f32-gemminc/1x8-neonfma-ld64.c",
+ "src/f32-gemminc/4x12-neonfma-ld64.c",
+ "src/f32-gemminc/4x8-neonfma-ld128.c",
+ "src/f32-gemminc/4x8-neonfma-ld64.c",
+ "src/f32-gemminc/5x8-neonfma-ld64.c",
+ "src/f32-gemminc/6x8-neonfma-ld64.c",
+ "src/f32-hswish/neonfma.c",
+ "src/f32-ppmm/4x8-neonfma.c",
+ "src/f32-ppmm/8x8-neonfma.c",
+ "src/f32-vmulcaddc/c4-neonfma-x2.c",
+]
+
+AARCH64_NEONFMA_UKERNELS = [
+ "src/f32-conv-hwc/3x3s2p1c3x4-neonfma-2x2.c",
+ "src/f32-conv-hwc/3x3s2p1c3x8-neonfma-2x2.c",
+ "src/f32-conv-hwc2spchw/3x3s2p1c3x4-neonfma-2x2.c",
+ "src/f32-dwconv-spchw/3x3p1-neonfma.c",
+ "src/f32-dwconv-spchw/5x5p2-neonfma.c",
+ "src/f32-dwconv-spchw/3x3s2p1-neonfma.c",
+ "src/f32-dwconv-spchw/5x5s2p2-neonfma.c",
+ "src/f32-spmm/12x1-neonfma.c",
+ "src/f32-spmm/12x2-neonfma.c",
+ "src/f32-spmm/12x4-neonfma.c",
+ "src/f32-spmm/16x1-neonfma-pipelined.c",
+ "src/f32-spmm/16x1-neonfma-unroll2.c",
+ "src/f32-spmm/16x1-neonfma.c",
+ "src/f32-spmm/16x2-neonfma.c",
+ "src/f32-spmm/16x4-neonfma.c",
+ "src/f32-spmm/4x1-neonfma-pipelined.c",
+ "src/f32-spmm/4x1-neonfma-unroll2.c",
+ "src/f32-spmm/4x1-neonfma.c",
+ "src/f32-spmm/4x2-neonfma.c",
+ "src/f32-spmm/4x4-neonfma.c",
+ "src/f32-spmm/8x1-neonfma-pipelined.c",
+ "src/f32-spmm/8x1-neonfma-unroll2.c",
+ "src/f32-spmm/8x1-neonfma.c",
+ "src/f32-spmm/8x2-neonfma.c",
+ "src/f32-spmm/8x4-neonfma.c",
+]
+
+AARCH64_NEONFP16ARITH_UKERNELS = [
+ "src/f16-gemm/4x8-neonfp16arith-ld64.c",
+ "src/f16-gemm/6x8-neonfp16arith-ld64.c",
+ "src/f16-gemm/8x8-neonfp16arith-ld64.c",
+]
+
+SSE_UKERNELS = [
+ "src/f32-avgpool/mp9p8q-sse.c",
+ "src/f32-avgpool/up9-sse.c",
+ "src/f32-clamp/sse.c",
+ "src/f32-igemm/1x8-sse-dup.c",
+ "src/f32-igemm/1x8-sse-load1.c",
+ "src/f32-igemm/1x8s4-sse.c",
+ "src/f32-igemm/4x2c4-sse.c",
+ "src/f32-igemm/4x8-sse-dup.c",
+ "src/f32-igemm/4x8-sse-load1.c",
+ "src/f32-igemm/4x8s4-sse.c",
+ "src/f32-dwconv/up4x25-sse.c",
+ "src/f32-dwconv/up4x4-sse.c",
+ "src/f32-dwconv/up4x9-sse.c",
+ "src/f32-gavgpool-spchw/sse-x4.c",
+ "src/f32-gavgpool/mp7p7q-sse.c",
+ "src/f32-gavgpool/up7-sse.c",
+ "src/f32-gemm/1x8-sse-dup.c",
+ "src/f32-gemm/1x8-sse-load1.c",
+ "src/f32-gemm/1x8s4-sse.c",
+ "src/f32-gemm/4x8-sse-dup.c",
+ "src/f32-gemm/4x8-sse-load1.c",
+ "src/f32-gemm/4x8s4-sse.c",
+ "src/f32-gemminc/1x8-sse-dup.c",
+ "src/f32-gemminc/1x8-sse-load1.c",
+ "src/f32-gemminc/1x8s4-sse.c",
+ "src/f32-gemminc/4x8-sse-dup.c",
+ "src/f32-gemminc/4x8-sse-load1.c",
+ "src/f32-gemminc/4x8s4-sse.c",
+ "src/f32-hswish/sse.c",
+ "src/f32-maxpool/9p8q-sse.c",
+ "src/f32-pavgpool/mp9p8q-sse.c",
+ "src/f32-pavgpool/up9-sse.c",
+ "src/f32-dwconv-spchw/3x3p1-sse.c",
+ "src/f32-dwconv-spchw/3x3s2p1-sse.c",
+ "src/f32-ppmm/4x8-sse.c",
+ "src/f32-prelu/x4-sse.c",
+ "src/f32-rmax/sse.c",
+ "src/f32-spmm/4x1-sse.c",
+ "src/f32-spmm/8x1-sse.c",
+ "src/f32-vadd/sse.c",
+ "src/f32-vmul/sse.c",
+ "src/f32-vmulcaddc/c4-sse-x2.c",
+ "src/f32-vsub/sse.c",
+ "src/x32-packx/x4-sse.c",
+]
+
+SSE2_UKERNELS = [
+ "src/f32-argmaxpool/mp9p8q-sse2.c",
+ "src/f32-argmaxpool/up4-sse2.c",
+ "src/f32-argmaxpool/up9-sse2.c",
+ "src/q8-avgpool/mp9p8q-sse2.c",
+ "src/q8-avgpool/up9-sse2.c",
+ "src/q8-igemm/4x4c2-sse2.c",
+ "src/q8-dwconv/up8x9-sse2.c",
+ "src/q8-gavgpool/mp7p7q-sse2.c",
+ "src/q8-gavgpool/up7-sse2.c",
+ "src/q8-gemm/2x4c8-sse2.c",
+ "src/q8-gemm/4x4c2-sse2.c",
+ "src/q8-vadd/sse2.c",
+ "src/u8-clamp/sse2.c",
+ "src/u8-maxpool/9p8q-sse2.c",
+ "src/u8-rmax/sse2.c",
+ "src/x32-pad/x2-sse2.c",
+ "src/x32-zip/x2-sse2.c",
+ "src/x32-zip/x3-sse2.c",
+ "src/x32-zip/x4-sse2.c",
+ "src/x32-zip/xm-sse2.c",
+ "src/x8-zip/x2-sse2.c",
+ "src/x8-zip/x3-sse2.c",
+ "src/x8-zip/x4-sse2.c",
+ "src/x8-zip/xm-sse2.c",
+]
+
+AVX_UKERNELS = [
+ "src/f32-rmax/avx.c",
+]
+
+AVX512F_UKERNELS = [
+ "src/f32-rmax/avx512f.c",
+]
+
+AARCH32_ASM_UKERNELS = [
+ "src/q8-dwconv/up8x9-aarch32-neon.S",
+]
+
+AARCH64_ASM_UKERNELS = [
+ "src/f32-dwconv/up4x9-aarch64-neonfma-cortex-a55.S",
+ "src/f32-dwconv/up4x9-aarch64-neonfma.S",
+ "src/f32-gemm/1x12-aarch64-neonfma-cortex-a53.S",
+ "src/f32-gemm/1x8-aarch64-neonfma-cortex-a57.S",
+ "src/f32-gemm/1x8-aarch64-neonfma-cortex-a75.S",
+ "src/f32-gemm/4x12-aarch64-neonfma-cortex-a53.S",
+ "src/f32-gemm/4x8-aarch64-neonfma-cortex-a57.S",
+ "src/f32-gemm/4x8-aarch64-neonfma-cortex-a75.S",
+ "src/f32-gemm/4x8-aarch64-neonfma-ld128.S",
+ "src/f32-gemm/4x8-aarch64-neonfma-ld64.S",
+ "src/f32-gemm/5x8-aarch64-neonfma-cortex-a75.S",
+ "src/f32-gemm/6x8-aarch64-neonfma-cortex-a57.S",
+ "src/f32-gemm/6x8-aarch64-neonfma-cortex-a73.S",
+ "src/f32-gemm/6x8-aarch64-neonfma-cortex-a75.S",
+ "src/f32-gemm/6x8-aarch64-neonfma-ld128.S",
+ "src/f32-gemm/6x8-aarch64-neonfma-ld64.S",
+ "src/f32-gemminc/1x12-aarch64-neonfma-cortex-a53.S",
+ "src/f32-gemminc/1x8-aarch64-neonfma-cortex-a57.S",
+ "src/f32-gemminc/1x8-aarch64-neonfma-cortex-a75.S",
+ "src/f32-gemminc/4x12-aarch64-neonfma-cortex-a53.S",
+ "src/f32-gemminc/4x8-aarch64-neonfma-cortex-a57.S",
+ "src/f32-gemminc/4x8-aarch64-neonfma-cortex-a75.S",
+ "src/f32-gemminc/4x8-aarch64-neonfma-ld128.S",
+ "src/f32-gemminc/4x8-aarch64-neonfma-ld64.S",
+ "src/f32-gemminc/5x8-aarch64-neonfma-cortex-a75.S",
+ "src/f32-gemminc/6x8-aarch64-neonfma-cortex-a57.S",
+ "src/f32-gemminc/6x8-aarch64-neonfma-cortex-a73.S",
+ "src/f32-gemminc/6x8-aarch64-neonfma-cortex-a75.S",
+ "src/f32-gemminc/6x8-aarch64-neonfma-ld128.S",
+ "src/f32-gemminc/6x8-aarch64-neonfma-ld64.S",
+ "src/f32-igemm/1x12-aarch64-neonfma-cortex-a53.S",
+ "src/f32-igemm/1x8-aarch64-neonfma-cortex-a57.S",
+ "src/f32-igemm/1x8-aarch64-neonfma-cortex-a75.S",
+ "src/f32-igemm/4x12-aarch64-neonfma-cortex-a53.S",
+ "src/f32-igemm/4x8-aarch64-neonfma-cortex-a75.S",
+ "src/f32-igemm/5x8-aarch64-neonfma-cortex-a75.S",
+ "src/f32-igemm/6x8-aarch64-neonfma-cortex-a57.S",
+ "src/f32-igemm/6x8-aarch64-neonfma-cortex-a73.S",
+ "src/f32-igemm/6x8-aarch64-neonfma-cortex-a75.S",
+]
+
+INTERNAL_MICROKERNEL_HDRS = [
+ "src/xnnpack/argmaxpool.h",
+ "src/xnnpack/avgpool.h",
+ "src/xnnpack/clamp.h",
+ "src/xnnpack/common.h",
+ "src/xnnpack/conv.h",
+ "src/xnnpack/dwconv.h",
+ "src/xnnpack/gavgpool.h",
+ "src/xnnpack/gemm.h",
+ "src/xnnpack/hswish.h",
+ "src/xnnpack/igemm.h",
+ "src/xnnpack/lut.h",
+ "src/xnnpack/math.h",
+ "src/xnnpack/maxpool.h",
+ "src/xnnpack/packx.h",
+ "src/xnnpack/pad.h",
+ "src/xnnpack/params.h",
+ "src/xnnpack/pavgpool.h",
+ "src/xnnpack/ppmm.h",
+ "src/xnnpack/prelu.h",
+ "src/xnnpack/rmax.h",
+ "src/xnnpack/scalar-utils.h",
+ "src/xnnpack/spmm.h",
+ "src/xnnpack/unpool.h",
+ "src/xnnpack/vadd.h",
+ "src/xnnpack/vmul.h",
+ "src/xnnpack/vmulcaddc.h",
+ "src/xnnpack/vsub.h",
+ "src/xnnpack/zip.h",
+]
+
+INTERNAL_HDRS = INTERNAL_MICROKERNEL_HDRS + [
+ "include/xnnpack.h",
+ "src/xnnpack/allocator.h",
+ "src/xnnpack/compute.h",
+ "src/xnnpack/im2col.h",
+ "src/xnnpack/indirection.h",
+ "src/xnnpack/log.h",
+ "src/xnnpack/operator.h",
+ "src/xnnpack/pack.h",
+ "src/xnnpack/requantization.h",
+ "src/xnnpack/requantization-stubs.h",
+]
+
+MICROKERNEL_BENCHMARK_HDRS = INTERNAL_MICROKERNEL_HDRS + [
+ "src/xnnpack/requantization.h",
+ "include/xnnpack.h",
+]
+
+MICROKERNEL_TEST_HDRS = INTERNAL_MICROKERNEL_HDRS + [
+ "src/xnnpack/isa-checks.h",
+ "src/xnnpack/requantization.h",
+ "include/xnnpack.h",
+]
+
+OPERATOR_TEST_PARAMS_HDRS = [
+ "src/xnnpack/params.h",
+ "src/xnnpack/common.h",
+]
+
+WEIGHTS_PACK_HDRS = [
+ "src/xnnpack/pack.h",
+ "src/xnnpack/operator.h",
+ "src/xnnpack/compute.h",
+]
+
+xnnpack_cc_library(
+ name = "scalar_ukernels",
+ srcs = SCALAR_UKERNELS,
+ hdrs = INTERNAL_HDRS,
+ aarch32_copts = ["-marm"],
+ copts = xnnpack_std_copts(),
+ deps = [
+ "@FP16",
+ "@FXdiv",
+ ],
+)
+
+xnnpack_cc_library(
+ name = "psimd_ukernels",
+ srcs = PSIMD_UKERNELS,
+ hdrs = INTERNAL_HDRS,
+ aarch32_copts = [
+ "-marm",
+ "-mfpu=neon",
+ ],
+ copts = xnnpack_std_copts(),
+ optimized_copts = [
+ "-O3",
+ "-ffast-math",
+ ],
+ deps = [
+ "@FP16",
+ "@psimd",
+ ],
+)
+
+xnnpack_cc_library(
+ name = "neon_ukernels",
+ hdrs = INTERNAL_HDRS,
+ aarch32_copts = [
+ "-marm",
+ "-mfpu=neon",
+ ],
+ aarch32_srcs = NEON_UKERNELS,
+ aarch64_srcs = NEON_UKERNELS,
+ copts = xnnpack_std_copts(),
+ deps = ["@FP16"],
+)
+
+xnnpack_cc_library(
+ name = "neonfma_ukernels",
+ hdrs = INTERNAL_HDRS,
+ aarch32_copts = [
+ "-marm",
+ "-mfpu=neon-vfpv4",
+ ],
+ aarch32_srcs = NEONFMA_UKERNELS,
+ aarch64_srcs = NEONFMA_UKERNELS + AARCH64_NEONFMA_UKERNELS,
+ copts = xnnpack_std_copts(),
+ deps = ["@FP16"],
+)
+
+xnnpack_cc_library(
+ name = "neonfp16arith_ukernels",
+ hdrs = INTERNAL_HDRS,
+ aarch64_copts = ["-march=armv8.2-a+fp16"],
+ aarch64_srcs = AARCH64_NEONFP16ARITH_UKERNELS,
+ copts = xnnpack_std_copts(),
+ deps = ["@FP16"],
+)
+
+xnnpack_cc_library(
+ name = "sse2_ukernels",
+ hdrs = INTERNAL_HDRS,
+ copts = xnnpack_std_copts(),
+ x86_copts = ["-msse2"],
+ x86_srcs = SSE_UKERNELS + SSE2_UKERNELS,
+ deps = ["@FP16"],
+)
+
+xnnpack_cc_library(
+ name = "avx_ukernels",
+ hdrs = INTERNAL_HDRS,
+ copts = xnnpack_std_copts(),
+ x86_copts = ["-mavx"],
+ x86_srcs = AVX_UKERNELS,
+ deps = ["@FP16"],
+)
+
+xnnpack_cc_library(
+ name = "avx512f_ukernels",
+ hdrs = INTERNAL_HDRS,
+ copts = xnnpack_std_copts(),
+ x86_copts = ["-mavx512f"],
+ x86_srcs = AVX512F_UKERNELS,
+ deps = ["@FP16"],
+)
+
+xnnpack_cc_library(
+ name = "asm_ukernels",
+ hdrs = ["src/xnnpack/assembly.h"],
+ aarch32_srcs = AARCH32_ASM_UKERNELS,
+ aarch64_srcs = AARCH64_ASM_UKERNELS,
+)
+
+xnnpack_aggregate_library(
+ name = "ukernels",
+ aarch32_deps = [
+ ":psimd_ukernels",
+ ":neon_ukernels",
+ ":neonfma_ukernels",
+ ":asm_ukernels",
+ ],
+ aarch64_deps = [
+ ":psimd_ukernels",
+ ":neon_ukernels",
+ ":neonfma_ukernels",
+ ":neonfp16arith_ukernels",
+ ":asm_ukernels",
+ ],
+ generic_deps = [":scalar_ukernels"],
+ wasmsimd_deps = [
+ ":psimd_ukernels",
+ ],
+ x86_deps = [
+ ":psimd_ukernels",
+ ":sse2_ukernels",
+ ":avx_ukernels",
+ ":avx512f_ukernels",
+ ],
+)
+
+xnnpack_cc_library(
+ name = "im2col",
+ srcs = ["src/im2col.c"],
+ hdrs = [
+ "src/xnnpack/common.h",
+ "src/xnnpack/im2col.h",
+ ],
+ copts = xnnpack_std_copts(),
+)
+
+xnnpack_cc_library(
+ name = "indirection",
+ srcs = ["src/indirection.c"],
+ hdrs = INTERNAL_HDRS,
+ copts = xnnpack_std_copts(),
+ deps = [
+ "@FP16",
+ "@FXdiv",
+ "@pthreadpool",
+ ],
+)
+
+xnnpack_cc_library(
+ name = "operator_run",
+ srcs = ["src/operator-run.c"],
+ hdrs = INTERNAL_HDRS,
+ copts = xnnpack_std_copts() + [
+ # Wrappers for multi-pass microkernels use VLAs for temporary buffers.
+ "-Wno-vla",
+ ],
+ deps = [
+ "@FP16",
+ "@FXdiv",
+ "@clog",
+ "@pthreadpool",
+ ],
+)
+
+cc_library(
+ name = "enable_assembly",
+ defines = select({
+ ":xnn_enable_assembly_explicit_true": ["XNN_ENABLE_ASSEMBLY=1"],
+ ":xnn_enable_assembly_explicit_false": ["XNN_ENABLE_ASSEMBLY=0"],
+ "//conditions:default": [],
+ }),
+)
+
+cc_library(
+ name = "operators",
+ srcs = OPERATOR_SRCS + [
+ "src/init.c",
+ "src/operator-delete.c",
+ ] + select({
+ ":emscripten_wasm": ["src/wasm-stubs.c"],
+ "//conditions:default": [],
+ }),
+ copts = xnnpack_std_copts() + [
+ "-Isrc",
+ "-Iinclude",
+ ] + select({
+ ":debug_build": [],
+ "//conditions:default": xnnpack_min_size_copts(),
+ }),
+ linkstatic = True,
+ textual_hdrs = INTERNAL_HDRS,
+ deps = [
+ ":enable_assembly",
+ ":indirection",
+ ":ukernels",
+ "@FP16",
+ "@FXdiv",
+ "@clog",
+ "@cpuinfo",
+ "@pthreadpool",
+ ],
+)
+
+cc_library(
+ name = "XNNPACK",
+ hdrs = ["include/xnnpack.h"],
+ includes = ["include"],
+ linkstatic = True,
+ # XNNPACK API is unstable and can break without notice.
+ # End users are encouraged to use this package through a TFLite delegate.
+ visibility = xnnpack_visibility(),
+ deps = [
+ ":operator_run",
+ ":operators",
+ "@pthreadpool",
+ ],
+)
+
+cc_library(
+ name = "bench_utils",
+ srcs = ["bench/utils.cc"],
+ hdrs = ["bench/utils.h"],
+ copts = ["-Wno-unused-result"],
+ linkstatic = True,
+ deps = ["@cpuinfo"],
+)
+
+######################### Unit tests for micro-kernels #########################
+
+xnnpack_benchmark(
+ name = "q8_gemm_bench",
+ srcs = [
+ "bench/gemm.h",
+ "bench/q8-gemm.cc",
+ "src/xnnpack/AlignedAllocator.h",
+ ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
+ copts = ["-Wno-unused-function"] + xnnpack_optional_ruy_copts() + xnnpack_optional_gemmlowp_copts(),
+ deps = MICROKERNEL_BENCHMARK_DEPS + xnnpack_optional_ruy_deps() + xnnpack_optional_gemmlowp_deps(),
+)
+
+xnnpack_benchmark(
+ name = "f16_gemm_bench",
+ srcs = [
+ "bench/f16-gemm.cc",
+ "bench/gemm.h",
+ "src/xnnpack/AlignedAllocator.h",
+ ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
+ copts = ["-Wno-unused-function"],
+ deps = MICROKERNEL_BENCHMARK_DEPS,
+)
+
+xnnpack_benchmark(
+ name = "f32_igemm_bench",
+ srcs = [
+ "bench/f32-igemm.cc",
+ "bench/conv.h",
+ "src/xnnpack/AlignedAllocator.h",
+ ] + MICROKERNEL_BENCHMARK_HDRS,
+ deps = MICROKERNEL_BENCHMARK_DEPS + [":indirection"],
+)
+
+xnnpack_benchmark(
+ name = "f32_conv_hwc_bench",
+ srcs = [
+ "bench/f32-conv-hwc.cc",
+ "bench/dconv.h",
+ "src/xnnpack/AlignedAllocator.h",
+ ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
+ copts = ["-Wno-unused-function"],
+ deps = MICROKERNEL_BENCHMARK_DEPS,
+)
+
+xnnpack_benchmark(
+ name = "f32_dwconv_bench",
+ srcs = [
+ "bench/f32-dwconv.cc",
+ "bench/dwconv.h",
+ "src/xnnpack/AlignedAllocator.h",
+ ] + MICROKERNEL_BENCHMARK_HDRS,
+ deps = MICROKERNEL_BENCHMARK_DEPS + [":indirection"],
+)
+
+xnnpack_benchmark(
+ name = "f32_dwconv_spchw_bench",
+ srcs = [
+ "bench/f32-dwconv-spchw.cc",
+ "bench/dwconv.h",
+ "src/xnnpack/AlignedAllocator.h",
+ ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
+ deps = MICROKERNEL_BENCHMARK_DEPS + [":indirection"],
+)
+
+xnnpack_benchmark(
+ name = "f32_gemm_bench",
+ srcs = [
+ "bench/f32-gemm.cc",
+ "bench/gemm.h",
+ "src/xnnpack/AlignedAllocator.h",
+ ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
+ copts = ["-Wno-unused-function"] + xnnpack_optional_ruy_copts(),
+ deps = MICROKERNEL_BENCHMARK_DEPS + xnnpack_optional_ruy_deps(),
+)
+
+xnnpack_benchmark(
+ name = "f32_rmax_bench",
+ srcs = [
+ "bench/f32-rmax.cc",
+ "src/xnnpack/AlignedAllocator.h",
+ ] + MICROKERNEL_BENCHMARK_HDRS,
+ deps = MICROKERNEL_BENCHMARK_DEPS,
+)
+
+xnnpack_benchmark(
+ name = "f32_spmm_bench",
+ srcs = [
+ "bench/f32-spmm.cc",
+ "bench/gemm.h",
+ "src/xnnpack/AlignedAllocator.h",
+ ] + MICROKERNEL_BENCHMARK_HDRS,
+ copts = ["-Wno-unused-function"],
+ deps = MICROKERNEL_BENCHMARK_DEPS,
+)
+
+xnnpack_benchmark(
+ name = "f32_im2col_gemm_bench",
+ srcs = [
+ "bench/f32-im2col-gemm.cc",
+ "bench/conv.h",
+ "src/xnnpack/AlignedAllocator.h",
+ ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
+ deps = MICROKERNEL_BENCHMARK_DEPS + [":im2col"],
+)
+
+########################### Benchmarks for operators ###########################
+
+xnnpack_benchmark(
+ name = "add_bench",
+ srcs = ["bench/add.cc"],
+ deps = OPERATOR_BENCHMARK_DEPS,
+)
+
+xnnpack_benchmark(
+ name = "average_pooling_bench",
+ srcs = ["bench/average-pooling.cc"],
+ deps = OPERATOR_BENCHMARK_DEPS,
+)
+
+xnnpack_benchmark(
+ name = "channel_shuffle_bench",
+ srcs = ["bench/channel-shuffle.cc"],
+ deps = OPERATOR_BENCHMARK_DEPS,
+)
+
+xnnpack_benchmark(
+ name = "convolution_bench",
+ srcs = ["bench/convolution.cc"],
+ copts = xnnpack_optional_tflite_copts() + xnnpack_optional_armcl_copts(),
+ deps = OPERATOR_BENCHMARK_DEPS + xnnpack_optional_tflite_deps() + xnnpack_optional_armcl_deps(),
+)
+
+xnnpack_benchmark(
+ name = "deconvolution_bench",
+ srcs = ["bench/deconvolution.cc"],
+ copts = xnnpack_optional_tflite_copts(),
+ deps = OPERATOR_BENCHMARK_DEPS + xnnpack_optional_tflite_deps(),
+)
+
+xnnpack_benchmark(
+ name = "global_average_pooling_bench",
+ srcs = ["bench/global-average-pooling.cc"],
+ deps = OPERATOR_BENCHMARK_DEPS,
+)
+
+xnnpack_benchmark(
+ name = "max_pooling_bench",
+ srcs = ["bench/max-pooling.cc"],
+ deps = OPERATOR_BENCHMARK_DEPS,
+)
+
+xnnpack_benchmark(
+ name = "sigmoid_bench",
+ srcs = ["bench/sigmoid.cc"],
+ deps = OPERATOR_BENCHMARK_DEPS,
+)
+
+xnnpack_benchmark(
+ name = "softargmax_bench",
+ srcs = ["bench/softargmax.cc"],
+ deps = OPERATOR_BENCHMARK_DEPS,
+)
+
+######################### Unit tests for micro-kernels #########################
+
+xnnpack_unit_test(
+ name = "f16_gemm_test",
+ srcs = [
+ "test/f16-gemm.cc",
+ "test/gemm-microkernel-tester.h",
+ "src/xnnpack/AlignedAllocator.h",
+ ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "f32_argmaxpool_test",
+ srcs = [
+ "test/f32-argmaxpool.cc",
+ "test/argmaxpool-microkernel-tester.h",
+ "src/xnnpack/AlignedAllocator.h",
+ ] + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "f32_avgpool_test",
+ srcs = [
+ "test/f32-avgpool.cc",
+ "test/avgpool-microkernel-tester.h",
+ "src/xnnpack/AlignedAllocator.h",
+ ] + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "f32_clamp_test",
+ srcs = [
+ "test/f32-clamp.cc",
+ "test/clamp-microkernel-tester.h",
+ ] + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "f32_igemm_test",
+ srcs = [
+ "test/f32-igemm.cc",
+ "test/gemm-microkernel-tester.h",
+ "src/xnnpack/AlignedAllocator.h",
+ ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "f32_conv_hwc_test",
+ srcs = [
+ "test/f32-conv-hwc.cc",
+ "test/conv-hwc-microkernel-tester.h",
+ "src/xnnpack/AlignedAllocator.h",
+ ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "f32_conv_hwc2spchw_test",
+ srcs = [
+ "test/f32-conv-hwc2spchw.cc",
+ "test/conv-hwc2spchw-microkernel-tester.h",
+ "src/xnnpack/AlignedAllocator.h",
+ ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "f32_dwconv_test",
+ srcs = [
+ "test/f32-dwconv.cc",
+ "test/dwconv-microkernel-tester.h",
+ "src/xnnpack/AlignedAllocator.h",
+ ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "f32_dwconv_spchw_test",
+ srcs = [
+ "test/f32-dwconv-spchw.cc",
+ "test/dwconv-spchw-microkernel-tester.h",
+ "src/xnnpack/AlignedAllocator.h",
+ ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "f32_gavgpool_test",
+ srcs = [
+ "test/f32-gavgpool.cc",
+ "test/gavgpool-microkernel-tester.h",
+ "src/xnnpack/AlignedAllocator.h",
+ ] + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "f32_gavgpool_spchw_test",
+ srcs = [
+ "test/f32-gavgpool-spchw.cc",
+ "test/gavgpool-spchw-microkernel-tester.h",
+ "src/xnnpack/AlignedAllocator.h",
+ ] + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "f32_gemm_test",
+ srcs = [
+ "test/f32-gemm.cc",
+ "test/gemm-microkernel-tester.h",
+ "src/xnnpack/AlignedAllocator.h",
+ ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "f32_gemminc_test",
+ srcs = [
+ "test/f32-gemminc.cc",
+ "test/gemm-microkernel-tester.h",
+ "src/xnnpack/AlignedAllocator.h",
+ ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "f32_hswish_test",
+ srcs = [
+ "test/f32-hswish.cc",
+ "test/hswish-microkernel-tester.h",
+ ] + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "f32_maxpool_test",
+ srcs = [
+ "test/f32-maxpool.cc",
+ "test/maxpool-microkernel-tester.h",
+ ] + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "f32_pavgpool_test",
+ srcs = [
+ "test/f32-pavgpool.cc",
+ "test/avgpool-microkernel-tester.h",
+ "src/xnnpack/AlignedAllocator.h",
+ ] + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "f32_ppmm_test",
+ srcs = [
+ "test/f32-ppmm.cc",
+ "test/gemm-microkernel-tester.h",
+ "src/xnnpack/AlignedAllocator.h",
+ ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "f32_prelu_test",
+ srcs = [
+ "test/f32-prelu.cc",
+ "test/prelu-microkernel-tester.h",
+ "src/xnnpack/AlignedAllocator.h",
+ ] + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "f32_rmax_test",
+ srcs = [
+ "test/f32-rmax.cc",
+ "test/rmax-microkernel-tester.h",
+ ] + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "f32_spmm_test",
+ srcs = [
+ "test/f32-spmm.cc",
+ "test/spmm-microkernel-tester.h",
+ "src/xnnpack/AlignedAllocator.h",
+ ] + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "f32_vadd_test",
+ srcs = [
+ "test/f32-vadd.cc",
+ "test/vadd-microkernel-tester.h",
+ ] + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "f32_vsub_test",
+ srcs = [
+ "test/f32-vsub.cc",
+ "test/vsub-microkernel-tester.h",
+ ] + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "f32_vmul_test",
+ srcs = [
+ "test/f32-vmul.cc",
+ "test/vmul-microkernel-tester.h",
+ ] + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "f32_vmulcaddc_test",
+ srcs = [
+ "test/f32-vmulcaddc.cc",
+ "test/vmulcaddc-microkernel-tester.h",
+ "src/xnnpack/AlignedAllocator.h",
+ ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "q8_avgpool_test",
+ srcs = [
+ "test/q8-avgpool.cc",
+ "test/avgpool-microkernel-tester.h",
+ "src/xnnpack/AlignedAllocator.h",
+ ] + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "q8_igemm_test",
+ srcs = [
+ "test/q8-igemm.cc",
+ "test/gemm-microkernel-tester.h",
+ "src/xnnpack/AlignedAllocator.h",
+ ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "q8_dwconv_test",
+ srcs = [
+ "test/q8-dwconv.cc",
+ "test/dwconv-microkernel-tester.h",
+ "src/xnnpack/AlignedAllocator.h",
+ ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "q8_gavgpool_test",
+ srcs = [
+ "test/q8-gavgpool.cc",
+ "test/gavgpool-microkernel-tester.h",
+ "src/xnnpack/AlignedAllocator.h",
+ ] + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "q8_gemm_test",
+ srcs = [
+ "test/q8-gemm.cc",
+ "test/gemm-microkernel-tester.h",
+ "src/xnnpack/AlignedAllocator.h",
+ ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "q8_vadd_test",
+ srcs = [
+ "test/q8-vadd.cc",
+ "test/vadd-microkernel-tester.h",
+ ] + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "u8_clamp_test",
+ srcs = [
+ "test/u8-clamp.cc",
+ "test/clamp-microkernel-tester.h",
+ ] + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "u8_lut32norm_test",
+ srcs = [
+ "test/u8-lut32norm.cc",
+ "test/lut-norm-microkernel-tester.h",
+ ] + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "u8_maxpool_test",
+ srcs = [
+ "test/u8-maxpool.cc",
+ "test/maxpool-microkernel-tester.h",
+ ] + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "u8_rmax_test",
+ srcs = [
+ "test/u8-rmax.cc",
+ "test/rmax-microkernel-tester.h",
+ ] + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "x32_packx_test",
+ srcs = [
+ "test/x32-packx.cc",
+ "test/pack-microkernel-tester.h",
+ "src/xnnpack/AlignedAllocator.h",
+ ] + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "x32_pad_test",
+ srcs = [
+ "test/x32-pad.cc",
+ "test/pad-microkernel-tester.h",
+ ] + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "x32_unpool_test",
+ srcs = [
+ "test/x32-unpool.cc",
+ "test/unpool-microkernel-tester.h",
+ ] + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "x32_zip_test",
+ srcs = [
+ "test/x32-zip.cc",
+ "test/zip-microkernel-tester.h",
+ ] + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "x8_lut_test",
+ srcs = [
+ "test/x8-lut.cc",
+ "test/lut-microkernel-tester.h",
+ ] + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "x8_zip_test",
+ srcs = [
+ "test/x8-zip.cc",
+ "test/zip-microkernel-tester.h",
+ ] + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+########################### Size test for the library ##########################
+
+xnnpack_binary(
+ name = "size_test",
+ srcs = ["test/size.c"],
+ deps = [":XNNPACK"],
+)
+
+########################### Unit tests for operators ###########################
+
+xnnpack_unit_test(
+ name = "add_test",
+ srcs = [
+ "test/add.cc",
+ "test/add-operator-tester.h",
+ ],
+ deps = OPERATOR_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "argmax_pooling_test",
+ srcs = [
+ "test/argmax-pooling.cc",
+ "test/argmax-pooling-operator-tester.h",
+ ] + OPERATOR_TEST_PARAMS_HDRS,
+ deps = OPERATOR_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "average_pooling_test",
+ srcs = [
+ "test/average-pooling.cc",
+ "test/average-pooling-operator-tester.h",
+ ] + OPERATOR_TEST_PARAMS_HDRS,
+ deps = OPERATOR_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "channel_pad_test",
+ srcs = [
+ "test/channel-pad.cc",
+ "test/channel-pad-operator-tester.h",
+ ] + OPERATOR_TEST_PARAMS_HDRS,
+ deps = OPERATOR_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "channel_shuffle_test",
+ srcs = [
+ "test/channel-shuffle.cc",
+ "test/channel-shuffle-operator-tester.h",
+ ],
+ deps = OPERATOR_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "clamp_test",
+ srcs = [
+ "test/clamp.cc",
+ "test/clamp-operator-tester.h",
+ ],
+ deps = OPERATOR_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "convolution_test",
+ srcs = [
+ "test/convolution.cc",
+ "test/convolution-operator-tester.h",
+ ],
+ deps = OPERATOR_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "convolution_spnchw_test",
+ srcs = [
+ "test/convolution-spnchw.cc",
+ "test/convolution-spnchw-operator-tester.h",
+ ],
+ deps = OPERATOR_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "deconvolution_test",
+ srcs = [
+ "test/deconvolution.cc",
+ "test/deconvolution-operator-tester.h",
+ ] + OPERATOR_TEST_PARAMS_HDRS,
+ deps = OPERATOR_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "fully_connected_test",
+ srcs = [
+ "test/fully-connected.cc",
+ "test/fully-connected-operator-tester.h",
+ ],
+ deps = OPERATOR_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "global_average_pooling_test",
+ srcs = [
+ "test/global-average-pooling.cc",
+ "test/global-average-pooling-operator-tester.h",
+ ] + OPERATOR_TEST_PARAMS_HDRS,
+ deps = OPERATOR_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "global_average_pooling_spnchw_test",
+ srcs = [
+ "test/global-average-pooling-spnchw.cc",
+ "test/global-average-pooling-spnchw-operator-tester.h",
+ ],
+ deps = OPERATOR_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "hardswish_test",
+ srcs = [
+ "test/hardswish.cc",
+ "test/hardswish-operator-tester.h",
+ ],
+ deps = OPERATOR_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "leaky_relu_test",
+ srcs = [
+ "test/leaky-relu.cc",
+ "test/leaky-relu-operator-tester.h",
+ ],
+ deps = OPERATOR_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "max_pooling_test",
+ srcs = [
+ "test/max-pooling.cc",
+ "test/max-pooling-operator-tester.h",
+ ] + OPERATOR_TEST_PARAMS_HDRS,
+ deps = OPERATOR_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "prelu_test",
+ srcs = [
+ "test/prelu.cc",
+ "test/prelu-operator-tester.h",
+ ] + OPERATOR_TEST_PARAMS_HDRS,
+ deps = OPERATOR_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "sigmoid_test",
+ srcs = [
+ "test/sigmoid.cc",
+ "test/sigmoid-operator-tester.h",
+ ],
+ deps = OPERATOR_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "softargmax_test",
+ srcs = [
+ "test/softargmax.cc",
+ "test/softargmax-operator-tester.h",
+ ],
+ deps = OPERATOR_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "unpooling_test",
+ srcs = [
+ "test/unpooling.cc",
+ "test/unpooling-operator-tester.h",
+ ],
+ deps = OPERATOR_TEST_DEPS,
+)
+
+############################# Build configurations #############################
+
+config_setting(
+ name = "linux_k8",
+ values = {
+ "cpu": "k8",
+ },
+)
+
+config_setting(
+ name = "android",
+ values = {"crosstool_top": "//external:android/crosstool"},
+)
+
+config_setting(
+ name = "android_armv7",
+ values = {
+ "crosstool_top": "//external:android/crosstool",
+ "cpu": "armeabi-v7a",
+ },
+)
+
+config_setting(
+ name = "android_arm64",
+ values = {
+ "crosstool_top": "//external:android/crosstool",
+ "cpu": "arm64-v8a",
+ },
+)
+
+config_setting(
+ name = "android_x86",
+ values = {
+ "crosstool_top": "//external:android/crosstool",
+ "cpu": "x86",
+ },
+)
+
+config_setting(
+ name = "android_x86_64",
+ values = {
+ "crosstool_top": "//external:android/crosstool",
+ "cpu": "x86_64",
+ },
+)
+
+config_setting(
+ name = "emscripten",
+ values = {"crosstool_top": "//external:android/emscripten"},
+)
+
+config_setting(
+ name = "emscripten_wasm",
+ values = {
+ "crosstool_top": "//external:android/emscripten",
+ "cpu": "wasm",
+ },
+)
+
+config_setting(
+ name = "emscripten_wasmsimd",
+ values = {
+ "crosstool_top": "//external:android/emscripten",
+ "cpu": "wasm",
+ "features": "wasmsimd",
+ },
+)
+
+config_setting(
+ name = "emscripten_asmjs",
+ values = {
+ "crosstool_top": "//external:android/emscripten",
+ "cpu": "asmjs",
+ },
+)
+
+# Builds with -c opt
+config_setting(
+ name = "debug_build",
+ values = {
+ "compilation_mode": "dbg",
+ },
+ visibility = ["//visibility:public"],
+)
+
+# Builds with -c dbg
+config_setting(
+ name = "optimized_build",
+ values = {
+ "compilation_mode": "opt",
+ },
+ visibility = ["//visibility:public"],
+)
+
+# Enables usage of assembly kernels.
+config_setting(
+ name = "xnn_enable_assembly_explicit_true",
+ define_values = {"xnn_enable_assembly": "true"},
+)
+
+# Disables usage of assembly kernels.
+config_setting(
+ name = "xnn_enable_assembly_explicit_false",
+ define_values = {"xnn_enable_assembly": "false"},
+)
diff --git a/README.md b/README.md
index 98434b9..149cc04 100644
--- a/README.md
+++ b/README.md
@@ -4,11 +4,11 @@
## Supported Architectures
-- ARM on Android, Linux, and iOS
-- ARM64 on Android, Linux, and iOS
+- ARM64 on Android
+- ARM on Android
- WebAssembly MVP
- WebAssembly SIMD (experimental)
-- x86 and x86-64 (up to SSE2 only) on Android, Linux, and Mac
+- x86 and x86-64 (up to SSE2 only) on Android and Linux
## Operator Coverage
diff --git a/WORKSPACE b/WORKSPACE
new file mode 100644
index 0000000..6b113ca
--- /dev/null
+++ b/WORKSPACE
@@ -0,0 +1,92 @@
+workspace(name = "xnnpack")
+
+load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
+
+# Google Test framework, used by most unit-tests.
+http_archive(
+ name = "com_google_googletest",
+ urls = ["https://github.com/google/googletest/archive/master.zip"],
+ strip_prefix = "googletest-master",
+)
+
+# Google Benchmark library, used in micro-benchmarks.
+http_archive(
+ name = "com_google_benchmark",
+ urls = [
+ "https://github.com/google/benchmark/archive/master.zip"
+ ],
+ strip_prefix = "benchmark-master",
+ build_file = "@//third_party:benchmark.BUILD",
+)
+
+# FP16 library, used for half-precision conversions
+http_archive(
+ name = "FP16",
+ strip_prefix = "FP16-ba1d31f5eed2eb4a69e4dea3870a68c7c95f998f",
+ sha256 = "9764297a339ad73b0717331a2c3e9c42a52105cd04cab62cb160e2b4598d2ea6",
+ urls = [
+ "https://github.com/Maratyszcza/FP16/archive/ba1d31f5eed2eb4a69e4dea3870a68c7c95f998f.tar.gz",
+ ],
+ build_file = "@//third_party:FP16.BUILD",
+)
+
+# FXdiv library, used for repeated integer division by the same factor
+http_archive(
+ name = "FXdiv",
+ strip_prefix = "FXdiv-f8c5354679ec2597792bc70a9e06eff50c508b9a",
+ sha256 = "7d3215bea832fe77091ec5666200b91156df6724da1e348205078346325fc45e",
+ urls = [
+ "https://github.com/Maratyszcza/FXdiv/archive/f8c5354679ec2597792bc70a9e06eff50c508b9a.tar.gz",
+ ],
+ build_file = "@//third_party:FXdiv.BUILD",
+)
+
+# pthreadpool library, used for parallelization
+http_archive(
+ name = "pthreadpool",
+ strip_prefix = "pthreadpool-0e275fe56094626349c55a524ea8b71a85daa64b",
+ sha256 = "c2328fdf9e48ac9b928953bcbc442eb14402d393e4cfae0541581a3d39efca9d",
+ urls = [
+ "https://github.com/Maratyszcza/pthreadpool/archive/0e275fe56094626349c55a524ea8b71a85daa64b.tar.gz",
+ ],
+ build_file = "@//third_party:pthreadpool.BUILD",
+)
+
+# clog library, used for logging
+http_archive(
+ name = "clog",
+ strip_prefix = "cpuinfo-d5e37adf1406cf899d7d9ec1d317c47506ccb970",
+ sha256 = "3f2dc1970f397a0e59db72f9fca6ff144b216895c1d606f6c94a507c1e53a025",
+ urls = [
+ "https://github.com/pytorch/cpuinfo/archive/d5e37adf1406cf899d7d9ec1d317c47506ccb970.tar.gz",
+ ],
+ build_file = "@//third_party:clog.BUILD",
+)
+
+# cpuinfo library, used for detecting processor characteristics
+http_archive(
+ name = "cpuinfo",
+ strip_prefix = "cpuinfo-d5e37adf1406cf899d7d9ec1d317c47506ccb970",
+ sha256 = "3f2dc1970f397a0e59db72f9fca6ff144b216895c1d606f6c94a507c1e53a025",
+ urls = [
+ "https://github.com/pytorch/cpuinfo/archive/d5e37adf1406cf899d7d9ec1d317c47506ccb970.tar.gz",
+ ],
+ build_file = "@//third_party:cpuinfo.BUILD",
+)
+
+# psimd library, used for fallback 128-bit SIMD micro-kernels
+http_archive(
+ name = "psimd",
+ strip_prefix = "psimd-4f2c53947184b56f58607b9e777416bb63ebbde1",
+ sha256 = "7d1795ebf289af26e404cff5877c284775e491414cf41d7d99ab850ceaced458",
+ urls = [
+ "https://github.com/Maratyszcza/psimd/archive/4f2c53947184b56f58607b9e777416bb63ebbde1.tar.gz",
+ ],
+ build_file = "@//third_party:psimd.BUILD",
+)
+
+# Android NDK location and version is auto-detected from $ANDROID_NDK_HOME environment variable
+android_ndk_repository(name = "androidndk")
+
+# Android SDK location and API is auto-detected from $ANDROID_HOME environment variable
+android_sdk_repository(name = "androidsdk")
diff --git a/bench/convolution.cc b/bench/convolution.cc
index 0857b01..8f33103 100644
--- a/bench/convolution.cc
+++ b/bench/convolution.cc
@@ -31,7 +31,6 @@
#include "tensorflow/lite/interpreter.h"
#include "tensorflow/lite/kernels/register.h"
#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/optional_debug_tools.h"
#include "tensorflow/lite/schema/schema_generated.h"
#include "tensorflow/lite/version.h"
#endif // BENCHMARK_TENSORFLOW_LITE
diff --git a/build_defs.bzl b/build_defs.bzl
new file mode 100644
index 0000000..88d872b
--- /dev/null
+++ b/build_defs.bzl
@@ -0,0 +1,253 @@
+"""Build definitions and rules for XNNPACK."""
+
+load(":emscripten.bzl", "xnnpack_emscripten_benchmark_linkopts", "xnnpack_emscripten_deps", "xnnpack_emscripten_minimal_linkopts", "xnnpack_emscripten_test_linkopts")
+
+def xnnpack_visibility():
+ """Visibility of :XNNPACK target.
+
+ All other targets have private visibility, and can not have external
+ dependencies.
+ """
+ return []
+
+def xnnpack_min_size_copts():
+ """Compiler flags for size-optimized builds."""
+ return ["-Os"]
+
+def xnnpack_std_copts():
+ """Compiler flags to specify language standard for C sources."""
+ return ["-std=c99"]
+
+def xnnpack_std_cxxopts():
+ """Compiler flags to specify language standard for C++ sources."""
+ return ["-std=gnu++11"]
+
+def xnnpack_optional_ruy_copts():
+ """Compiler flags to optionally enable Ruy benchmarks."""
+ return []
+
+def xnnpack_optional_gemmlowp_copts():
+ """Compiler flags to optionally enable Gemmlowp benchmarks."""
+ return []
+
+def xnnpack_optional_tflite_copts():
+ """Compiler flags to optionally enable TensorFlow Lite benchmarks."""
+ return []
+
+def xnnpack_optional_armcl_copts():
+ """Compiler flags to optionally enable ARM ComputeLibrary benchmarks."""
+ return []
+
+def xnnpack_optional_ruy_deps():
+ """Optional Ruy dependencies."""
+ return []
+
+def xnnpack_optional_gemmlowp_deps():
+ """Optional Gemmlowp dependencies."""
+ return []
+
+def xnnpack_optional_tflite_deps():
+ """Optional TensorFlow Lite dependencies."""
+ return []
+
+def xnnpack_optional_armcl_deps():
+ """Optional ARM ComputeLibrary dependencies."""
+ return []
+
+def xnnpack_cc_library(
+ name,
+ srcs = [],
+ x86_srcs = [],
+ aarch32_srcs = [],
+ aarch64_srcs = [],
+ copts = [],
+ x86_copts = [],
+ aarch32_copts = [],
+ aarch64_copts = [],
+ optimized_copts = ["-O2"],
+ hdrs = [],
+ deps = []):
+ """C/C++/assembly library with architecture-specific sources.
+
+ Define a static library with architecture- and instruction-specific
+ source files.
+
+ Args:
+ name: The name of the library target to define.
+ srcs: The list of architecture-independent source files.
+ x86_srcs: The list of x86-specific source files.
+ aarch32_srcs: The list of AArch32-specific source files.
+ aarch64_srcs: The list of AArch64-specific source files.
+ copts: The list of compiler flags to use in all builds. -I flags for
+ include/ and src/ directories of XNNPACK are always prepended
+ before these user-specified flags.
+ x86_copts: The list of compiler flags to use in x86 builds.
+ aarch32_copts: The list of compiler flags to use in AArch32 builds.
+ aarch64_copts: The list of compiler flags to use in AArch64 builds.
+ optimized_copts: The list of compiler flags to use in optimized builds.
+ Defaults to -O2.
+ hdrs: The list of header files published by this library to be textually
+ included by sources in dependent rules.
+ deps: The list of other libraries to be linked.
+ """
+ native.cc_library(
+ name = name,
+ srcs = srcs + select({
+ ":linux_k8": x86_srcs,
+ ":android_armv7": aarch32_srcs,
+ ":android_arm64": aarch64_srcs,
+ ":android_x86": x86_srcs,
+ ":android_x86_64": x86_srcs,
+ "//conditions:default": [],
+ }),
+ copts = [
+ "-Iinclude",
+ "-Isrc",
+ ] + copts + select({
+ ":linux_k8": x86_copts,
+ ":android_armv7": aarch32_copts,
+ ":android_arm64": aarch64_copts,
+ ":android_x86": x86_copts,
+ ":android_x86_64": x86_copts,
+ "//conditions:default": [],
+ }) + select({
+ ":optimized_build": optimized_copts,
+ "//conditions:default": [],
+ }),
+ linkstatic = True,
+ linkopts = select({
+ ":linux_k8": ["-lpthread"],
+ ":android": ["-lm"],
+ "//conditions:default": [],
+ }),
+ textual_hdrs = hdrs,
+ deps = deps,
+ )
+
+def xnnpack_aggregate_library(
+ name,
+ generic_deps = [],
+ x86_deps = [],
+ aarch32_deps = [],
+ aarch64_deps = [],
+ wasm_deps = [],
+ wasmsimd_deps = []):
+ """Static library that aggregates architecture-specific dependencies.
+
+ Args:
+ name: The name of the library target to define.
+ generic_deps: The list of libraries to link on all architectures.
+ x86_deps: The list of libraries to link in x86 and x86-64 builds.
+ aarch32_deps: The list of libraries to link in AArch32 builds.
+ aarch64_deps: The list of libraries to link in AArch32 builds.
+ wasm_deps: The list of libraries to link in WebAssembly (MVP) builds.
+ wasmsimd_deps: The list of libraries to link in WebAssembly SIMD builds.
+ """
+
+ native.cc_library(
+ name = name,
+ linkstatic = True,
+ deps = generic_deps + select({
+ ":linux_k8": x86_deps,
+ ":android_armv7": aarch32_deps,
+ ":android_arm64": aarch64_deps,
+ ":android_x86": x86_deps,
+ ":android_x86_64": x86_deps,
+ ":emscripten_wasm": wasm_deps,
+ ":emscripten_wasmsimd": wasmsimd_deps,
+ ":emscripten_asmjs": [],
+ }),
+ )
+
+def xnnpack_unit_test(name, srcs, copts = [], deps = []):
+ """Unit test binary based on Google Test.
+
+ Args:
+ name: The name of the test target to define.
+ srcs: The list of source and header files.
+ copts: The list of additional compiler flags for the target. -I flags
+ for include/ and src/ directories of XNNPACK are always prepended
+ before these user-specified flags.
+ deps: The list of additional libraries to be linked. Google Test library
+ (with main() function) is always added as a dependency and does not
+ need to be explicitly specified.
+ """
+
+ native.cc_test(
+ name = name,
+ srcs = srcs,
+ copts = xnnpack_std_cxxopts() + [
+ "-Iinclude",
+ "-Isrc",
+ ] + copts,
+ linkopts = select({
+ ":emscripten": xnnpack_emscripten_test_linkopts(),
+ "//conditions:default": [],
+ }),
+ linkstatic = True,
+ deps = [
+ "@com_google_googletest//:gtest_main",
+ ] + deps + select({
+ ":emscripten": xnnpack_emscripten_deps(),
+ "//conditions:default": [],
+ }),
+ )
+
+def xnnpack_binary(name, srcs, copts = [], deps = []):
+ """Minimal binary
+
+ Args:
+ name: The name of the binary target to define.
+ srcs: The list of source and header files.
+ copts: The list of additional compiler flags for the target. -I flags
+ for include/ and src/ directories of XNNPACK are always prepended
+ before these user-specified flags.
+ deps: The list of libraries to be linked.
+ """
+ native.cc_binary(
+ name = name,
+ srcs = srcs,
+ copts = [
+ "-Iinclude",
+ "-Isrc",
+ ] + copts,
+ linkopts = select({
+ ":emscripten": xnnpack_emscripten_minimal_linkopts(),
+ "//conditions:default": [],
+ }),
+ linkstatic = True,
+ deps = deps,
+ )
+
+def xnnpack_benchmark(name, srcs, copts = [], deps = []):
+ """Microbenchmark binary based on Google Benchmark
+
+ Args:
+ name: The name of the binary target to define.
+ srcs: The list of source and header files.
+ copts: The list of additional compiler flags for the target. -I flags
+ for include/ and src/ directories of XNNPACK are always prepended
+ before these user-specified flags.
+ deps: The list of additional libraries to be linked. Google Benchmark
+ library is always added as a dependency and does not need to be
+ explicitly specified.
+ """
+ native.cc_binary(
+ name = name,
+ srcs = srcs,
+ copts = xnnpack_std_cxxopts() + [
+ "-Iinclude",
+ "-Isrc",
+ ] + copts,
+ linkopts = select({
+ ":emscripten": xnnpack_emscripten_benchmark_linkopts(),
+ "//conditions:default": [],
+ }),
+ linkstatic = True,
+ deps = [
+ "@com_google_benchmark//:benchmark",
+ ] + deps + select({
+ ":emscripten": xnnpack_emscripten_deps(),
+ "//conditions:default": [],
+ }),
+ )
diff --git a/emscripten.bzl b/emscripten.bzl
new file mode 100644
index 0000000..9cb10f2
--- /dev/null
+++ b/emscripten.bzl
@@ -0,0 +1,36 @@
+"""Emscripten-specific build definitions for XNNPACK."""
+
+def xnnpack_emscripten_minimal_linkopts():
+ """Minimal Emscripten-specific linkopts for binaries."""
+ return [
+ "-s ASSERTIONS=0",
+ "-s ERROR_ON_UNDEFINED_SYMBOLS=1",
+ "-s EXIT_RUNTIME=1",
+ ]
+
+def xnnpack_emscripten_test_linkopts():
+ """Emscripten-specific linkopts for unit tests."""
+ return [
+ "-s ASSERTIONS=2",
+ "-s ERROR_ON_UNDEFINED_SYMBOLS=1",
+ "-s DEMANGLE_SUPPORT=1",
+ "-s EXIT_RUNTIME=1",
+ "-s ALLOW_MEMORY_GROWTH=1",
+ "--pre-js $(location :preamble.js.lds)",
+ ]
+
+def xnnpack_emscripten_benchmark_linkopts():
+ """Emscripten-specific linkopts for benchmarks."""
+ return [
+ "-s ASSERTIONS=1",
+ "-s ERROR_ON_UNDEFINED_SYMBOLS=1",
+ "-s EXIT_RUNTIME=1",
+ "-s ALLOW_MEMORY_GROWTH=1",
+ "--pre-js $(location :preamble.js.lds)",
+ ]
+
+def xnnpack_emscripten_deps():
+ """Emscripten-specific dependencies for unit tests and benchmarks."""
+ return [
+ ":preamble.js.lds",
+ ]
diff --git a/third_party/BUILD b/third_party/BUILD
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/third_party/BUILD
diff --git a/third_party/FP16.BUILD b/third_party/FP16.BUILD
new file mode 100644
index 0000000..e1018be
--- /dev/null
+++ b/third_party/FP16.BUILD
@@ -0,0 +1,15 @@
+# Description:
+# C/C++ library for conversion to/from half-precision floating-point formats
+
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"])
+
+exports_files(["LICENSE"])
+
+cc_library(
+ name = "FP16",
+ hdrs = glob(["include/**/*.h"]),
+ includes = ["include"],
+ strip_include_prefix = "include",
+)
diff --git a/third_party/FXdiv.BUILD b/third_party/FXdiv.BUILD
new file mode 100644
index 0000000..ef2853b
--- /dev/null
+++ b/third_party/FXdiv.BUILD
@@ -0,0 +1,15 @@
+# Description:
+# C99/C++ library for division via fixed-point multiplication by inverse
+
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"])
+
+exports_files(["LICENSE"])
+
+cc_library(
+ name = "FXdiv",
+ hdrs = glob(["include/fxdiv.h"]),
+ includes = ["include"],
+ strip_include_prefix = "include",
+)
diff --git a/third_party/benchmark.BUILD b/third_party/benchmark.BUILD
new file mode 100644
index 0000000..3d439d6
--- /dev/null
+++ b/third_party/benchmark.BUILD
@@ -0,0 +1,41 @@
+licenses(["notice"])
+
+exports_files(["LICENSE"])
+
+cc_library(
+ name = "benchmark",
+ srcs = glob(["src/*.h", "src/*.cc",]),
+ hdrs = glob(["include/benchmark/*.h"]),
+ copts = [
+ "-DHAVE_POSIX_REGEX",
+ "-Wno-deprecated-declarations",
+ ],
+ linkopts = select({
+ ":linux_x86_64": ["-lm"],
+ ":linux_arm64": ["-lm"],
+ ":android": ["-lm"],
+ "//conditions:default": [],
+ }),
+ includes = ["include"],
+ visibility = ["//visibility:public"],
+)
+
+config_setting(
+ name = "linux_x86_64",
+ values = {"cpu": "k8"},
+ visibility = ["//visibility:public"],
+)
+
+config_setting(
+ name = "linux_arm64",
+ values = {"cpu": "aarch64"},
+ visibility = ["//visibility:public"],
+)
+
+config_setting(
+ name = "android",
+ values = {
+ "crosstool_top": "//external:android/crosstool",
+ },
+ visibility = ["//visibility:public"],
+)
diff --git a/third_party/clog.BUILD b/third_party/clog.BUILD
new file mode 100644
index 0000000..68153fb
--- /dev/null
+++ b/third_party/clog.BUILD
@@ -0,0 +1,35 @@
+# Description:
+# C-style (a-la printf) logging library
+
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"])
+
+exports_files(["LICENSE"])
+
+cc_library(
+ name = "clog",
+ srcs = [
+ "deps/clog/src/clog.c",
+ ],
+ copts = [
+ "-Wno-unused-result",
+ ],
+ hdrs = [
+ "deps/clog/include/clog.h",
+ ],
+ linkopts = select({
+ ":android": [
+ "-llog",
+ ],
+ "//conditions:default": [
+ ],
+ }),
+ strip_include_prefix = "deps/clog/include",
+)
+
+config_setting(
+ name = "android",
+ values = {"crosstool_top": "//external:android/crosstool"},
+ visibility = ["//visibility:public"],
+)
diff --git a/third_party/cpuinfo.BUILD b/third_party/cpuinfo.BUILD
new file mode 100644
index 0000000..af7ca65
--- /dev/null
+++ b/third_party/cpuinfo.BUILD
@@ -0,0 +1,213 @@
+# cpuinfo, a library to detect information about the host CPU
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"])
+
+exports_files(["LICENSE"])
+
+C99OPTS = [
+ "-std=gnu99", # gnu99, not c99, because dprintf is used
+ "-Wno-vla",
+ "-D_GNU_SOURCE=1", # to use CPU_SETSIZE
+ "-DCPUINFO_INTERNAL=",
+ "-DCPUINFO_PRIVATE=",
+]
+
+# Source code common to all platforms.
+COMMON_SRCS = [
+ "src/api.c",
+ "src/init.c",
+]
+
+# Architecture-specific sources and headers.
+X86_SRCS = [
+ "src/x86/cache/descriptor.c",
+ "src/x86/cache/deterministic.c",
+ "src/x86/cache/init.c",
+ "src/x86/info.c",
+ "src/x86/init.c",
+ "src/x86/isa.c",
+ "src/x86/name.c",
+ "src/x86/topology.c",
+ "src/x86/uarch.c",
+ "src/x86/vendor.c",
+]
+
+ARM_SRCS = [
+ "src/arm/cache.c",
+ "src/arm/uarch.c",
+]
+
+# Platform-specific sources and headers
+LINUX_SRCS = [
+ "src/linux/cpulist.c",
+ "src/linux/current.c",
+ "src/linux/multiline.c",
+ "src/linux/processors.c",
+ "src/linux/smallfile.c",
+]
+
+MOCK_LINUX_SRCS = [
+ "src/linux/mockfile.c",
+]
+
+MACH_SRCS = [
+ "src/mach/topology.c",
+]
+
+EMSCRIPTEN_SRCS = [
+ "src/emscripten/init.c",
+]
+
+PNACL_SRCS = [
+ "src/pnacl/init.c",
+]
+
+LINUX_X86_SRCS = [
+ "src/x86/linux/cpuinfo.c",
+ "src/x86/linux/init.c",
+]
+
+LINUX_ARM_SRCS = [
+ "src/arm/linux/chipset.c",
+ "src/arm/linux/clusters.c",
+ "src/arm/linux/cpuinfo.c",
+ "src/arm/linux/hwcap.c",
+ "src/arm/linux/init.c",
+ "src/arm/linux/midr.c",
+]
+
+LINUX_ARM32_SRCS = LINUX_ARM_SRCS + ["src/arm/linux/aarch32-isa.c"]
+
+LINUX_ARM64_SRCS = LINUX_ARM_SRCS + ["src/arm/linux/aarch64-isa.c"]
+
+ANDROID_ARM_SRCS = [
+ "src/arm/android/properties.c",
+]
+
+WINDOWS_X86_SRCS = [
+ "src/x86/windows/init.c",
+]
+
+MACH_X86_SRCS = [
+ "src/x86/mach/init.c",
+]
+
+MACH_ARM_SRCS = [
+ "src/arm/mach/init.c",
+]
+
+cc_library(
+ name = "cpuinfo_impl",
+ srcs = select({
+ ":linux_x86_64": COMMON_SRCS + X86_SRCS + LINUX_SRCS + LINUX_X86_SRCS,
+ ":android_armv7": COMMON_SRCS + ARM_SRCS + LINUX_SRCS + LINUX_ARM32_SRCS + ANDROID_ARM_SRCS,
+ ":android_arm64": COMMON_SRCS + ARM_SRCS + LINUX_SRCS + LINUX_ARM64_SRCS + ANDROID_ARM_SRCS,
+ ":android_x86": COMMON_SRCS + X86_SRCS + LINUX_SRCS + LINUX_X86_SRCS,
+ ":android_x86_64": COMMON_SRCS + X86_SRCS + LINUX_SRCS + LINUX_X86_SRCS,
+ ":emscripten_wasm": COMMON_SRCS + EMSCRIPTEN_SRCS,
+ }),
+ copts = C99OPTS + [
+ "-Iexternal/cpuinfo/include",
+ "-Iexternal/cpuinfo/src",
+ ],
+ linkstatic = True,
+ # Headers must be in textual_hdrs to allow us to set the standard to C99
+ textual_hdrs = [
+ "include/cpuinfo.h",
+ "src/linux/api.h",
+ "src/mach/api.h",
+ "src/cpuinfo/common.h",
+ "src/cpuinfo/internal-api.h",
+ "src/cpuinfo/log.h",
+ "src/cpuinfo/utils.h",
+ "src/x86/api.h",
+ "src/x86/cpuid.h",
+ "src/x86/linux/api.h",
+ "src/arm/android/api.h",
+ "src/arm/linux/api.h",
+ "src/arm/linux/cp.h",
+ "src/arm/api.h",
+ "src/arm/midr.h",
+ ],
+ deps = [
+ "@clog",
+ ],
+)
+
+cc_library(
+ name = "cpuinfo",
+ hdrs = [
+ "include/cpuinfo.h",
+ ],
+ strip_include_prefix = "include",
+ deps = [
+ ":cpuinfo_impl",
+ ],
+)
+
+############################# Build configurations #############################
+
+config_setting(
+ name = "linux_x86_64",
+ values = {"cpu": "k8"},
+ visibility = ["//visibility:public"],
+)
+
+config_setting(
+ name = "android_armv7",
+ values = {
+ "crosstool_top": "//external:android/crosstool",
+ "cpu": "armeabi-v7a",
+ },
+ visibility = ["//visibility:public"],
+)
+
+config_setting(
+ name = "android_arm64",
+ values = {
+ "crosstool_top": "//external:android/crosstool",
+ "cpu": "arm64-v8a",
+ },
+ visibility = ["//visibility:public"],
+)
+
+config_setting(
+ name = "android_x86",
+ values = {
+ "crosstool_top": "//external:android/crosstool",
+ "cpu": "x86",
+ },
+ visibility = ["//visibility:public"],
+)
+
+config_setting(
+ name = "android_x86_64",
+ values = {
+ "crosstool_top": "//external:android/crosstool",
+ "cpu": "x86_64",
+ },
+ visibility = ["//visibility:public"],
+)
+
+config_setting(
+ name = "emscripten_wasm",
+ values = {
+ "cpu": "wasm",
+ },
+)
+
+config_setting(
+ name = "emscripten_wasmsimd",
+ values = {
+ "cpu": "wasm",
+ "features": "wasm_simd",
+ },
+)
+
+config_setting(
+ name = "emscripten_asmjs",
+ values = {
+ "cpu": "asmjs",
+ },
+)
diff --git a/third_party/psimd.BUILD b/third_party/psimd.BUILD
new file mode 100644
index 0000000..fe10181
--- /dev/null
+++ b/third_party/psimd.BUILD
@@ -0,0 +1,15 @@
+# Description:
+# Portable 128-bit SIMD intrinsics
+
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"])
+
+exports_files(["LICENSE"])
+
+cc_library(
+ name = "psimd",
+ hdrs = glob(["include/psimd.h"]),
+ includes = ["include"],
+ strip_include_prefix = "include",
+)
diff --git a/third_party/pthreadpool.BUILD b/third_party/pthreadpool.BUILD
new file mode 100644
index 0000000..1267e4f
--- /dev/null
+++ b/third_party/pthreadpool.BUILD
@@ -0,0 +1,32 @@
+# Description:
+# Portable pthread-based thread pool for C and C++
+
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"])
+
+exports_files(["LICENSE"])
+
+cc_library(
+ name = "pthreadpool",
+ srcs = [
+ "src/threadpool-pthreads.c",
+ "src/threadpool-utils.h",
+ ],
+ hdrs = [
+ "include/pthreadpool.h",
+ ],
+ copts = [
+ "-O2",
+ ],
+ defines = [
+ "PTHREADPOOL_NO_DEPRECATED_API",
+ ],
+ includes = [
+ "include",
+ ],
+ strip_include_prefix = "include",
+ deps = [
+ "@FXdiv",
+ ],
+)