Bazel BUILD file for XNNPACK

- Support Linux (x86-64)
- Support Android (ARMv7, ARM64, x86, x86-64)

PiperOrigin-RevId: 272677580
diff --git a/.bazelrc b/.bazelrc
new file mode 100644
index 0000000..ea28201
--- /dev/null
+++ b/.bazelrc
@@ -0,0 +1,46 @@
+# Basic build settings
+build --jobs 128
+
+# Sets the default Apple platform to macOS.
+build --apple_platform_type=macos
+
+# Android configs.
+build:android --crosstool_top=//external:android/crosstool
+build:android --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
+build:android --linkopt=-ldl
+build:android --linkopt=-Wl,--gc-sections
+
+build:android_arm --config=android
+build:android_arm --cpu=armeabi-v7a
+build:android_arm --fat_apk_cpu=armeabi-v7a
+
+build:android_arm64 --config=android
+build:android_arm64 --cpu=arm64-v8a
+build:android_arm64 --fat_apk_cpu=arm64-v8a
+
+# iOS configs.
+build:ios --apple_platform_type=ios
+
+build:ios_i386 --config=ios
+build:ios_i386 --cpu=ios_i386
+build:ios_i386 --watchos_cpus=i386
+
+build:ios_x86_64 --config=ios
+build:ios_x86_64 --cpu=ios_x86_64
+build:ios_x86_64 --watchos_cpus=i386
+
+build:ios_armv7 --config=ios
+build:ios_armv7 --cpu=ios_armv7
+build:ios_armv7 --watchos_cpus=armv7k
+
+build:ios_arm64 --config=ios
+build:ios_arm64 --cpu=ios_arm64
+build:ios_arm64 --watchos_cpus=armv7k
+
+build:ios_arm64e --config=ios
+build:ios_arm64e --cpu=ios_arm64e
+build:ios_arm64e --watchos_cpus=armv7k
+
+build:ios_fat --config=ios
+build:ios_fat --ios_multi_cpus=armv7,arm64
+build:ios_fat --watchos_cpus=armv7k
diff --git a/BUILD b/BUILD
new file mode 100644
index 0000000..7bbec91
--- /dev/null
+++ b/BUILD
@@ -0,0 +1,1580 @@
+# Copyright 2019 Google LLC
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+#
+# Description:
+#   XNNPACK - optimized floating-point neural network operators library
+
+licenses(["notice"])
+
+exports_files(["LICENSE"])
+
+load(":build_defs.bzl", "xnnpack_aggregate_library", "xnnpack_benchmark", "xnnpack_binary", "xnnpack_cc_library", "xnnpack_min_size_copts", "xnnpack_optional_armcl_copts", "xnnpack_optional_armcl_deps", "xnnpack_optional_gemmlowp_copts", "xnnpack_optional_gemmlowp_deps", "xnnpack_optional_ruy_copts", "xnnpack_optional_ruy_deps", "xnnpack_optional_tflite_copts", "xnnpack_optional_tflite_deps", "xnnpack_std_copts", "xnnpack_unit_test", "xnnpack_visibility")
+
+OPERATOR_BENCHMARK_DEPS = [
+    ":XNNPACK",
+    ":bench_utils",
+    "@cpuinfo",
+    "@pthreadpool",
+]
+
+MICROKERNEL_BENCHMARK_DEPS = [
+    ":ukernels",
+    ":bench_utils",
+    "@cpuinfo",
+    "@FP16",
+    "@pthreadpool",
+]
+
+MICROKERNEL_TEST_DEPS = [
+    ":ukernels",
+    "@cpuinfo",
+    "@FP16",
+    "@pthreadpool",
+]
+
+OPERATOR_TEST_DEPS = [
+    ":XNNPACK",
+    "@pthreadpool",
+    "@FP16",
+]
+
+OPERATOR_SRCS = [
+    "src/add.c",
+    "src/argmax-pooling.c",
+    "src/average-pooling.c",
+    "src/channel-pad.c",
+    "src/channel-shuffle.c",
+    "src/clamp.c",
+    "src/convolution-spnchw.c",
+    "src/convolution.c",
+    "src/deconvolution.c",
+    "src/fully-connected.c",
+    "src/global-average-pooling-spnchw.c",
+    "src/global-average-pooling.c",
+    "src/hardswish.c",
+    "src/leaky-relu.c",
+    "src/max-pooling.c",
+    "src/prelu.c",
+    "src/sigmoid.c",
+    "src/softargmax.c",
+    "src/unpooling.c",
+]
+
+SCALAR_UKERNELS = [
+    "src/f32-argmaxpool/mp9p8q-scalar.c",
+    "src/f32-argmaxpool/up4-scalar.c",
+    "src/f32-argmaxpool/up9-scalar.c",
+    "src/f32-avgpool/mp9p8q-scalar.c",
+    "src/f32-avgpool/up9-scalar.c",
+    "src/f32-clamp/scalar.c",
+    "src/f32-igemm/1x4-scalar.c",
+    "src/f32-igemm/2x4-scalar.c",
+    "src/f32-igemm/4x2-scalar.c",
+    "src/f32-igemm/4x4-scalar.c",
+    "src/f32-dwconv/up1x25-scalar.c",
+    "src/f32-dwconv/up1x4-scalar.c",
+    "src/f32-dwconv/up1x9-scalar.c",
+    "src/f32-gavgpool/mp7p7q-scalar.c",
+    "src/f32-gavgpool/up7-scalar.c",
+    "src/f32-gemm/1x4-scalar.c",
+    "src/f32-gemm/2x4-scalar.c",
+    "src/f32-gemm/4x2-scalar.c",
+    "src/f32-gemm/4x4-scalar.c",
+    "src/f32-gemminc/1x4-scalar.c",
+    "src/f32-gemminc/2x4-scalar.c",
+    "src/f32-gemminc/4x4-scalar.c",
+    "src/f32-hswish/scalar.c",
+    "src/f32-maxpool/9p8q-scalar.c",
+    "src/f32-pavgpool/mp9p8q-scalar.c",
+    "src/f32-pavgpool/up9-scalar.c",
+    "src/f32-ppmm/2x4-scalar.c",
+    "src/f32-ppmm/3x3-scalar.c",
+    "src/f32-ppmm/4x2-scalar.c",
+    "src/f32-ppmm/4x4-scalar.c",
+    "src/f32-prelu/x4-scalar.c",
+    "src/f32-rmax/scalar.c",
+    "src/f32-spmm/1x1-scalar-pipelined.c",
+    "src/f32-spmm/1x1-scalar-unroll2.c",
+    "src/f32-spmm/1x1-scalar.c",
+    "src/f32-spmm/2x1-scalar-pipelined.c",
+    "src/f32-spmm/2x1-scalar-unroll2.c",
+    "src/f32-spmm/2x1-scalar.c",
+    "src/f32-spmm/4x1-scalar-pipelined.c",
+    "src/f32-spmm/4x1-scalar-unroll2.c",
+    "src/f32-spmm/4x1-scalar.c",
+    "src/f32-spmm/8x1-scalar-pipelined.c",
+    "src/f32-spmm/8x1-scalar-unroll2.c",
+    "src/f32-spmm/8x1-scalar.c",
+    "src/f32-vadd/scalar.c",
+    "src/f32-vmul/scalar.c",
+    "src/f32-vmulcaddc/c1-scalar-x2.c",
+    "src/f32-vsub/scalar.c",
+    "src/q8-avgpool/mp9p8q-scalar.c",
+    "src/q8-avgpool/up9-scalar.c",
+    "src/q8-igemm/2x2-scalar.c",
+    "src/q8-dwconv/up1x9-scalar.c",
+    "src/q8-gavgpool/mp7p7q-scalar.c",
+    "src/q8-gavgpool/up7-scalar.c",
+    "src/q8-gemm/2x2-scalar.c",
+    "src/q8-vadd/scalar.c",
+    "src/u8-clamp/scalar.c",
+    "src/u8-lut32norm/scalar.c",
+    "src/u8-maxpool/9p8q-scalar.c",
+    "src/u8-rmax/scalar.c",
+    "src/x32-packx/x2-scalar.c",
+    "src/x32-packx/x3-scalar.c",
+    "src/x32-packx/x4-scalar.c",
+    "src/x32-pad/x2-scalar.c",
+    "src/x32-unpool/scalar.c",
+    "src/x32-zip/x2-scalar.c",
+    "src/x32-zip/x3-scalar.c",
+    "src/x32-zip/x4-scalar.c",
+    "src/x32-zip/xm-scalar.c",
+    "src/x8-lut/scalar.c",
+    "src/x8-zip/x2-scalar.c",
+    "src/x8-zip/x3-scalar.c",
+    "src/x8-zip/x4-scalar.c",
+    "src/x8-zip/xm-scalar.c",
+]
+
+PSIMD_UKERNELS = [
+    "src/f32-argmaxpool/mp9p8q-psimd.c",
+    "src/f32-argmaxpool/up4-psimd.c",
+    "src/f32-argmaxpool/up9-psimd.c",
+    "src/f32-avgpool/mp9p8q-psimd.c",
+    "src/f32-avgpool/up9-psimd.c",
+    "src/f32-clamp/psimd.c",
+    "src/f32-igemm/1x8-psimd-loadsplat.c",
+    "src/f32-igemm/1x8-psimd-splat.c",
+    "src/f32-igemm/1x8s4-psimd.c",
+    "src/f32-igemm/4x2c4-psimd.c",
+    "src/f32-igemm/4x8-psimd-loadsplat.c",
+    "src/f32-igemm/4x8-psimd-splat.c",
+    "src/f32-igemm/4x8s4-psimd.c",
+    "src/f32-igemm/6x8-psimd-loadsplat.c",
+    "src/f32-igemm/6x8-psimd-splat.c",
+    "src/f32-igemm/6x8s4-psimd.c",
+    "src/f32-dwconv/up4x25-psimd.c",
+    "src/f32-dwconv/up4x4-psimd.c",
+    "src/f32-dwconv/up4x9-psimd.c",
+    "src/f32-gavgpool/mp7p7q-psimd.c",
+    "src/f32-gavgpool/up7-psimd.c",
+    "src/f32-gemm/1x8-psimd-loadsplat.c",
+    "src/f32-gemm/1x8-psimd-splat.c",
+    "src/f32-gemm/1x8s4-psimd.c",
+    "src/f32-gemm/4x8-psimd-loadsplat.c",
+    "src/f32-gemm/4x8-psimd-splat.c",
+    "src/f32-gemm/4x8s4-psimd.c",
+    "src/f32-gemm/6x8-psimd-loadsplat.c",
+    "src/f32-gemm/6x8-psimd-splat.c",
+    "src/f32-gemm/6x8s4-psimd.c",
+    "src/f32-gemminc/1x8-psimd-loadsplat.c",
+    "src/f32-gemminc/1x8-psimd-splat.c",
+    "src/f32-gemminc/1x8s4-psimd.c",
+    "src/f32-gemminc/4x8-psimd-loadsplat.c",
+    "src/f32-gemminc/4x8-psimd-splat.c",
+    "src/f32-gemminc/4x8s4-psimd.c",
+    "src/f32-gemminc/6x8-psimd-loadsplat.c",
+    "src/f32-gemminc/6x8-psimd-splat.c",
+    "src/f32-gemminc/6x8s4-psimd.c",
+    "src/f32-hswish/psimd.c",
+    "src/f32-maxpool/9p8q-psimd.c",
+    "src/f32-pavgpool/mp9p8q-psimd.c",
+    "src/f32-pavgpool/up9-psimd.c",
+    "src/f32-ppmm/4x8-psimd.c",
+    "src/f32-prelu/x4-psimd.c",
+    "src/f32-vadd/psimd.c",
+    "src/f32-vmul/psimd.c",
+    "src/f32-vmulcaddc/c4-psimd-x2.c",
+    "src/f32-vsub/psimd.c",
+    "src/x32-packx/x4-psimd.c",
+    "src/x32-pad/x2-psimd.c",
+    "src/x32-unpool/psimd.c",
+    "src/x32-zip/x2-psimd.c",
+    "src/x32-zip/x3-psimd.c",
+    "src/x32-zip/x4-psimd.c",
+    "src/x32-zip/xm-psimd.c",
+]
+
+# ISA-specific micro-kernels
+NEON_UKERNELS = [
+    "src/f32-avgpool/mp9p8q-neon.c",
+    "src/f32-avgpool/up9-neon.c",
+    "src/f32-clamp/neon.c",
+    "src/f32-igemm/1x8-neon-ld64.c",
+    "src/f32-igemm/4x12-neon-ld64.c",
+    "src/f32-igemm/4x2-neon-ld64.c",
+    "src/f32-igemm/4x4-neon-ld64.c",
+    "src/f32-igemm/4x8-neon-ld128.c",
+    "src/f32-igemm/4x8-neon-ld64.c",
+    "src/f32-igemm/6x8-neon-ld64.c",
+    "src/f32-dwconv/up4x9-neon.c",
+    "src/f32-gavgpool-spchw/neon-x4.c",
+    "src/f32-gavgpool/mp7p7q-neon.c",
+    "src/f32-gavgpool/up7-neon.c",
+    "src/f32-gemm/1x8-neon-ld64.c",
+    "src/f32-gemm/4x12-neon-ld64.c",
+    "src/f32-gemm/4x2-neon-ld64.c",
+    "src/f32-gemm/4x8-neon-ld128.c",
+    "src/f32-gemm/4x8-neon-ld64.c",
+    "src/f32-gemm/5x8-neon-ld64.c",
+    "src/f32-gemm/6x8-neon-ld64.c",
+    "src/f32-gemminc/1x8-neon-ld64.c",
+    "src/f32-gemminc/4x12-neon-ld64.c",
+    "src/f32-gemminc/4x8-neon-ld128.c",
+    "src/f32-gemminc/4x8-neon-ld64.c",
+    "src/f32-gemminc/5x8-neon-ld64.c",
+    "src/f32-gemminc/6x8-neon-ld64.c",
+    "src/f32-hswish/neon.c",
+    "src/f32-pavgpool/mp9p8q-neon.c",
+    "src/f32-pavgpool/up9-neon.c",
+    "src/f32-ppmm/4x8-neon.c",
+    "src/f32-ppmm/8x8-neon.c",
+    "src/f32-rmax/neon.c",
+    "src/f32-vmulcaddc/c4-neon-x2.c",
+    "src/q8-avgpool/mp9p8q-neon.c",
+    "src/q8-avgpool/up9-neon.c",
+    "src/q8-igemm/4x8-neon.c",
+    "src/q8-igemm/8x8-neon.c",
+    "src/q8-dwconv/up8x9-neon.c",
+    "src/q8-gavgpool/mp7p7q-neon.c",
+    "src/q8-gavgpool/up7-neon.c",
+    "src/q8-gemm/4x8-neon.c",
+    "src/q8-gemm/8x8-neon.c",
+    "src/q8-vadd/neon.c",
+    "src/u8-clamp/neon.c",
+    "src/u8-maxpool/9p8q-neon.c",
+    "src/u8-rmax/neon.c",
+    "src/x32-packx/x4-neon-st4.c",
+    "src/x32-pad/x2-neon.c",
+    "src/x32-zip/x2-neon.c",
+    "src/x32-zip/x3-neon.c",
+    "src/x32-zip/x4-neon.c",
+    "src/x32-zip/xm-neon.c",
+    "src/x8-zip/x2-neon.c",
+    "src/x8-zip/x3-neon.c",
+    "src/x8-zip/x4-neon.c",
+    "src/x8-zip/xm-neon.c",
+]
+
+NEONFMA_UKERNELS = [
+    "src/f32-igemm/4x12-neonfma-ld64.c",
+    "src/f32-igemm/4x2-neonfma-ld64.c",
+    "src/f32-igemm/4x4-neonfma-ld64.c",
+    "src/f32-igemm/4x8-neonfma-ld128.c",
+    "src/f32-igemm/4x8-neonfma-ld64.c",
+    "src/f32-igemm/6x8-neonfma-ld64.c",
+    "src/f32-dwconv/up4x9-neonfma.c",
+    "src/f32-dwconv/up8x9-neonfma.c",
+    "src/f32-gemm/1x8-neonfma-ld64.c",
+    "src/f32-gemm/4x12-neonfma-ld64.c",
+    "src/f32-gemm/4x2-neonfma-ld64.c",
+    "src/f32-gemm/4x8-neonfma-ld128.c",
+    "src/f32-gemm/4x8-neonfma-ld64.c",
+    "src/f32-gemm/5x8-neonfma-ld64.c",
+    "src/f32-gemm/6x8-neonfma-ld64.c",
+    "src/f32-gemminc/1x8-neonfma-ld64.c",
+    "src/f32-gemminc/4x12-neonfma-ld64.c",
+    "src/f32-gemminc/4x8-neonfma-ld128.c",
+    "src/f32-gemminc/4x8-neonfma-ld64.c",
+    "src/f32-gemminc/5x8-neonfma-ld64.c",
+    "src/f32-gemminc/6x8-neonfma-ld64.c",
+    "src/f32-hswish/neonfma.c",
+    "src/f32-ppmm/4x8-neonfma.c",
+    "src/f32-ppmm/8x8-neonfma.c",
+    "src/f32-vmulcaddc/c4-neonfma-x2.c",
+]
+
+AARCH64_NEONFMA_UKERNELS = [
+    "src/f32-conv-hwc/3x3s2p1c3x4-neonfma-2x2.c",
+    "src/f32-conv-hwc/3x3s2p1c3x8-neonfma-2x2.c",
+    "src/f32-conv-hwc2spchw/3x3s2p1c3x4-neonfma-2x2.c",
+    "src/f32-dwconv-spchw/3x3p1-neonfma.c",
+    "src/f32-dwconv-spchw/5x5p2-neonfma.c",
+    "src/f32-dwconv-spchw/3x3s2p1-neonfma.c",
+    "src/f32-dwconv-spchw/5x5s2p2-neonfma.c",
+    "src/f32-spmm/12x1-neonfma.c",
+    "src/f32-spmm/12x2-neonfma.c",
+    "src/f32-spmm/12x4-neonfma.c",
+    "src/f32-spmm/16x1-neonfma-pipelined.c",
+    "src/f32-spmm/16x1-neonfma-unroll2.c",
+    "src/f32-spmm/16x1-neonfma.c",
+    "src/f32-spmm/16x2-neonfma.c",
+    "src/f32-spmm/16x4-neonfma.c",
+    "src/f32-spmm/4x1-neonfma-pipelined.c",
+    "src/f32-spmm/4x1-neonfma-unroll2.c",
+    "src/f32-spmm/4x1-neonfma.c",
+    "src/f32-spmm/4x2-neonfma.c",
+    "src/f32-spmm/4x4-neonfma.c",
+    "src/f32-spmm/8x1-neonfma-pipelined.c",
+    "src/f32-spmm/8x1-neonfma-unroll2.c",
+    "src/f32-spmm/8x1-neonfma.c",
+    "src/f32-spmm/8x2-neonfma.c",
+    "src/f32-spmm/8x4-neonfma.c",
+]
+
+AARCH64_NEONFP16ARITH_UKERNELS = [
+    "src/f16-gemm/4x8-neonfp16arith-ld64.c",
+    "src/f16-gemm/6x8-neonfp16arith-ld64.c",
+    "src/f16-gemm/8x8-neonfp16arith-ld64.c",
+]
+
+SSE_UKERNELS = [
+    "src/f32-avgpool/mp9p8q-sse.c",
+    "src/f32-avgpool/up9-sse.c",
+    "src/f32-clamp/sse.c",
+    "src/f32-igemm/1x8-sse-dup.c",
+    "src/f32-igemm/1x8-sse-load1.c",
+    "src/f32-igemm/1x8s4-sse.c",
+    "src/f32-igemm/4x2c4-sse.c",
+    "src/f32-igemm/4x8-sse-dup.c",
+    "src/f32-igemm/4x8-sse-load1.c",
+    "src/f32-igemm/4x8s4-sse.c",
+    "src/f32-dwconv/up4x25-sse.c",
+    "src/f32-dwconv/up4x4-sse.c",
+    "src/f32-dwconv/up4x9-sse.c",
+    "src/f32-gavgpool-spchw/sse-x4.c",
+    "src/f32-gavgpool/mp7p7q-sse.c",
+    "src/f32-gavgpool/up7-sse.c",
+    "src/f32-gemm/1x8-sse-dup.c",
+    "src/f32-gemm/1x8-sse-load1.c",
+    "src/f32-gemm/1x8s4-sse.c",
+    "src/f32-gemm/4x8-sse-dup.c",
+    "src/f32-gemm/4x8-sse-load1.c",
+    "src/f32-gemm/4x8s4-sse.c",
+    "src/f32-gemminc/1x8-sse-dup.c",
+    "src/f32-gemminc/1x8-sse-load1.c",
+    "src/f32-gemminc/1x8s4-sse.c",
+    "src/f32-gemminc/4x8-sse-dup.c",
+    "src/f32-gemminc/4x8-sse-load1.c",
+    "src/f32-gemminc/4x8s4-sse.c",
+    "src/f32-hswish/sse.c",
+    "src/f32-maxpool/9p8q-sse.c",
+    "src/f32-pavgpool/mp9p8q-sse.c",
+    "src/f32-pavgpool/up9-sse.c",
+    "src/f32-dwconv-spchw/3x3p1-sse.c",
+    "src/f32-dwconv-spchw/3x3s2p1-sse.c",
+    "src/f32-ppmm/4x8-sse.c",
+    "src/f32-prelu/x4-sse.c",
+    "src/f32-rmax/sse.c",
+    "src/f32-spmm/4x1-sse.c",
+    "src/f32-spmm/8x1-sse.c",
+    "src/f32-vadd/sse.c",
+    "src/f32-vmul/sse.c",
+    "src/f32-vmulcaddc/c4-sse-x2.c",
+    "src/f32-vsub/sse.c",
+    "src/x32-packx/x4-sse.c",
+]
+
+SSE2_UKERNELS = [
+    "src/f32-argmaxpool/mp9p8q-sse2.c",
+    "src/f32-argmaxpool/up4-sse2.c",
+    "src/f32-argmaxpool/up9-sse2.c",
+    "src/q8-avgpool/mp9p8q-sse2.c",
+    "src/q8-avgpool/up9-sse2.c",
+    "src/q8-igemm/4x4c2-sse2.c",
+    "src/q8-dwconv/up8x9-sse2.c",
+    "src/q8-gavgpool/mp7p7q-sse2.c",
+    "src/q8-gavgpool/up7-sse2.c",
+    "src/q8-gemm/2x4c8-sse2.c",
+    "src/q8-gemm/4x4c2-sse2.c",
+    "src/q8-vadd/sse2.c",
+    "src/u8-clamp/sse2.c",
+    "src/u8-maxpool/9p8q-sse2.c",
+    "src/u8-rmax/sse2.c",
+    "src/x32-pad/x2-sse2.c",
+    "src/x32-zip/x2-sse2.c",
+    "src/x32-zip/x3-sse2.c",
+    "src/x32-zip/x4-sse2.c",
+    "src/x32-zip/xm-sse2.c",
+    "src/x8-zip/x2-sse2.c",
+    "src/x8-zip/x3-sse2.c",
+    "src/x8-zip/x4-sse2.c",
+    "src/x8-zip/xm-sse2.c",
+]
+
+AVX_UKERNELS = [
+    "src/f32-rmax/avx.c",
+]
+
+AVX512F_UKERNELS = [
+    "src/f32-rmax/avx512f.c",
+]
+
+AARCH32_ASM_UKERNELS = [
+    "src/q8-dwconv/up8x9-aarch32-neon.S",
+]
+
+AARCH64_ASM_UKERNELS = [
+    "src/f32-dwconv/up4x9-aarch64-neonfma-cortex-a55.S",
+    "src/f32-dwconv/up4x9-aarch64-neonfma.S",
+    "src/f32-gemm/1x12-aarch64-neonfma-cortex-a53.S",
+    "src/f32-gemm/1x8-aarch64-neonfma-cortex-a57.S",
+    "src/f32-gemm/1x8-aarch64-neonfma-cortex-a75.S",
+    "src/f32-gemm/4x12-aarch64-neonfma-cortex-a53.S",
+    "src/f32-gemm/4x8-aarch64-neonfma-cortex-a57.S",
+    "src/f32-gemm/4x8-aarch64-neonfma-cortex-a75.S",
+    "src/f32-gemm/4x8-aarch64-neonfma-ld128.S",
+    "src/f32-gemm/4x8-aarch64-neonfma-ld64.S",
+    "src/f32-gemm/5x8-aarch64-neonfma-cortex-a75.S",
+    "src/f32-gemm/6x8-aarch64-neonfma-cortex-a57.S",
+    "src/f32-gemm/6x8-aarch64-neonfma-cortex-a73.S",
+    "src/f32-gemm/6x8-aarch64-neonfma-cortex-a75.S",
+    "src/f32-gemm/6x8-aarch64-neonfma-ld128.S",
+    "src/f32-gemm/6x8-aarch64-neonfma-ld64.S",
+    "src/f32-gemminc/1x12-aarch64-neonfma-cortex-a53.S",
+    "src/f32-gemminc/1x8-aarch64-neonfma-cortex-a57.S",
+    "src/f32-gemminc/1x8-aarch64-neonfma-cortex-a75.S",
+    "src/f32-gemminc/4x12-aarch64-neonfma-cortex-a53.S",
+    "src/f32-gemminc/4x8-aarch64-neonfma-cortex-a57.S",
+    "src/f32-gemminc/4x8-aarch64-neonfma-cortex-a75.S",
+    "src/f32-gemminc/4x8-aarch64-neonfma-ld128.S",
+    "src/f32-gemminc/4x8-aarch64-neonfma-ld64.S",
+    "src/f32-gemminc/5x8-aarch64-neonfma-cortex-a75.S",
+    "src/f32-gemminc/6x8-aarch64-neonfma-cortex-a57.S",
+    "src/f32-gemminc/6x8-aarch64-neonfma-cortex-a73.S",
+    "src/f32-gemminc/6x8-aarch64-neonfma-cortex-a75.S",
+    "src/f32-gemminc/6x8-aarch64-neonfma-ld128.S",
+    "src/f32-gemminc/6x8-aarch64-neonfma-ld64.S",
+    "src/f32-igemm/1x12-aarch64-neonfma-cortex-a53.S",
+    "src/f32-igemm/1x8-aarch64-neonfma-cortex-a57.S",
+    "src/f32-igemm/1x8-aarch64-neonfma-cortex-a75.S",
+    "src/f32-igemm/4x12-aarch64-neonfma-cortex-a53.S",
+    "src/f32-igemm/4x8-aarch64-neonfma-cortex-a75.S",
+    "src/f32-igemm/5x8-aarch64-neonfma-cortex-a75.S",
+    "src/f32-igemm/6x8-aarch64-neonfma-cortex-a57.S",
+    "src/f32-igemm/6x8-aarch64-neonfma-cortex-a73.S",
+    "src/f32-igemm/6x8-aarch64-neonfma-cortex-a75.S",
+]
+
+INTERNAL_MICROKERNEL_HDRS = [
+    "src/xnnpack/argmaxpool.h",
+    "src/xnnpack/avgpool.h",
+    "src/xnnpack/clamp.h",
+    "src/xnnpack/common.h",
+    "src/xnnpack/conv.h",
+    "src/xnnpack/dwconv.h",
+    "src/xnnpack/gavgpool.h",
+    "src/xnnpack/gemm.h",
+    "src/xnnpack/hswish.h",
+    "src/xnnpack/igemm.h",
+    "src/xnnpack/lut.h",
+    "src/xnnpack/math.h",
+    "src/xnnpack/maxpool.h",
+    "src/xnnpack/packx.h",
+    "src/xnnpack/pad.h",
+    "src/xnnpack/params.h",
+    "src/xnnpack/pavgpool.h",
+    "src/xnnpack/ppmm.h",
+    "src/xnnpack/prelu.h",
+    "src/xnnpack/rmax.h",
+    "src/xnnpack/scalar-utils.h",
+    "src/xnnpack/spmm.h",
+    "src/xnnpack/unpool.h",
+    "src/xnnpack/vadd.h",
+    "src/xnnpack/vmul.h",
+    "src/xnnpack/vmulcaddc.h",
+    "src/xnnpack/vsub.h",
+    "src/xnnpack/zip.h",
+]
+
+INTERNAL_HDRS = INTERNAL_MICROKERNEL_HDRS + [
+    "include/xnnpack.h",
+    "src/xnnpack/allocator.h",
+    "src/xnnpack/compute.h",
+    "src/xnnpack/im2col.h",
+    "src/xnnpack/indirection.h",
+    "src/xnnpack/log.h",
+    "src/xnnpack/operator.h",
+    "src/xnnpack/pack.h",
+    "src/xnnpack/requantization.h",
+    "src/xnnpack/requantization-stubs.h",
+]
+
+MICROKERNEL_BENCHMARK_HDRS = INTERNAL_MICROKERNEL_HDRS + [
+    "src/xnnpack/requantization.h",
+    "include/xnnpack.h",
+]
+
+MICROKERNEL_TEST_HDRS = INTERNAL_MICROKERNEL_HDRS + [
+    "src/xnnpack/isa-checks.h",
+    "src/xnnpack/requantization.h",
+    "include/xnnpack.h",
+]
+
+OPERATOR_TEST_PARAMS_HDRS = [
+    "src/xnnpack/params.h",
+    "src/xnnpack/common.h",
+]
+
+WEIGHTS_PACK_HDRS = [
+    "src/xnnpack/pack.h",
+    "src/xnnpack/operator.h",
+    "src/xnnpack/compute.h",
+]
+
+xnnpack_cc_library(
+    name = "scalar_ukernels",
+    srcs = SCALAR_UKERNELS,
+    hdrs = INTERNAL_HDRS,
+    aarch32_copts = ["-marm"],
+    copts = xnnpack_std_copts(),
+    deps = [
+        "@FP16",
+        "@FXdiv",
+    ],
+)
+
+xnnpack_cc_library(
+    name = "psimd_ukernels",
+    srcs = PSIMD_UKERNELS,
+    hdrs = INTERNAL_HDRS,
+    aarch32_copts = [
+        "-marm",
+        "-mfpu=neon",
+    ],
+    copts = xnnpack_std_copts(),
+    optimized_copts = [
+        "-O3",
+        "-ffast-math",
+    ],
+    deps = [
+        "@FP16",
+        "@psimd",
+    ],
+)
+
+xnnpack_cc_library(
+    name = "neon_ukernels",
+    hdrs = INTERNAL_HDRS,
+    aarch32_copts = [
+        "-marm",
+        "-mfpu=neon",
+    ],
+    aarch32_srcs = NEON_UKERNELS,
+    aarch64_srcs = NEON_UKERNELS,
+    copts = xnnpack_std_copts(),
+    deps = ["@FP16"],
+)
+
+xnnpack_cc_library(
+    name = "neonfma_ukernels",
+    hdrs = INTERNAL_HDRS,
+    aarch32_copts = [
+        "-marm",
+        "-mfpu=neon-vfpv4",
+    ],
+    aarch32_srcs = NEONFMA_UKERNELS,
+    aarch64_srcs = NEONFMA_UKERNELS + AARCH64_NEONFMA_UKERNELS,
+    copts = xnnpack_std_copts(),
+    deps = ["@FP16"],
+)
+
+xnnpack_cc_library(
+    name = "neonfp16arith_ukernels",
+    hdrs = INTERNAL_HDRS,
+    aarch64_copts = ["-march=armv8.2-a+fp16"],
+    aarch64_srcs = AARCH64_NEONFP16ARITH_UKERNELS,
+    copts = xnnpack_std_copts(),
+    deps = ["@FP16"],
+)
+
+xnnpack_cc_library(
+    name = "sse2_ukernels",
+    hdrs = INTERNAL_HDRS,
+    copts = xnnpack_std_copts(),
+    x86_copts = ["-msse2"],
+    x86_srcs = SSE_UKERNELS + SSE2_UKERNELS,
+    deps = ["@FP16"],
+)
+
+xnnpack_cc_library(
+    name = "avx_ukernels",
+    hdrs = INTERNAL_HDRS,
+    copts = xnnpack_std_copts(),
+    x86_copts = ["-mavx"],
+    x86_srcs = AVX_UKERNELS,
+    deps = ["@FP16"],
+)
+
+xnnpack_cc_library(
+    name = "avx512f_ukernels",
+    hdrs = INTERNAL_HDRS,
+    copts = xnnpack_std_copts(),
+    x86_copts = ["-mavx512f"],
+    x86_srcs = AVX512F_UKERNELS,
+    deps = ["@FP16"],
+)
+
+xnnpack_cc_library(
+    name = "asm_ukernels",
+    hdrs = ["src/xnnpack/assembly.h"],
+    aarch32_srcs = AARCH32_ASM_UKERNELS,
+    aarch64_srcs = AARCH64_ASM_UKERNELS,
+)
+
+xnnpack_aggregate_library(
+    name = "ukernels",
+    aarch32_deps = [
+        ":psimd_ukernels",
+        ":neon_ukernels",
+        ":neonfma_ukernels",
+        ":asm_ukernels",
+    ],
+    aarch64_deps = [
+        ":psimd_ukernels",
+        ":neon_ukernels",
+        ":neonfma_ukernels",
+        ":neonfp16arith_ukernels",
+        ":asm_ukernels",
+    ],
+    generic_deps = [":scalar_ukernels"],
+    wasmsimd_deps = [
+        ":psimd_ukernels",
+    ],
+    x86_deps = [
+        ":psimd_ukernels",
+        ":sse2_ukernels",
+        ":avx_ukernels",
+        ":avx512f_ukernels",
+    ],
+)
+
+xnnpack_cc_library(
+    name = "im2col",
+    srcs = ["src/im2col.c"],
+    hdrs = [
+        "src/xnnpack/common.h",
+        "src/xnnpack/im2col.h",
+    ],
+    copts = xnnpack_std_copts(),
+)
+
+xnnpack_cc_library(
+    name = "indirection",
+    srcs = ["src/indirection.c"],
+    hdrs = INTERNAL_HDRS,
+    copts = xnnpack_std_copts(),
+    deps = [
+        "@FP16",
+        "@FXdiv",
+        "@pthreadpool",
+    ],
+)
+
+xnnpack_cc_library(
+    name = "operator_run",
+    srcs = ["src/operator-run.c"],
+    hdrs = INTERNAL_HDRS,
+    copts = xnnpack_std_copts() + [
+        # Wrappers for multi-pass microkernels use VLAs for temporary buffers.
+        "-Wno-vla",
+    ],
+    deps = [
+        "@FP16",
+        "@FXdiv",
+        "@clog",
+        "@pthreadpool",
+    ],
+)
+
+cc_library(
+    name = "enable_assembly",
+    defines = select({
+        ":xnn_enable_assembly_explicit_true": ["XNN_ENABLE_ASSEMBLY=1"],
+        ":xnn_enable_assembly_explicit_false": ["XNN_ENABLE_ASSEMBLY=0"],
+        "//conditions:default": [],
+    }),
+)
+
+cc_library(
+    name = "operators",
+    srcs = OPERATOR_SRCS + [
+        "src/init.c",
+        "src/operator-delete.c",
+    ] + select({
+        ":emscripten_wasm": ["src/wasm-stubs.c"],
+        "//conditions:default": [],
+    }),
+    copts = xnnpack_std_copts() + [
+        "-Isrc",
+        "-Iinclude",
+    ] + select({
+        ":debug_build": [],
+        "//conditions:default": xnnpack_min_size_copts(),
+    }),
+    linkstatic = True,
+    textual_hdrs = INTERNAL_HDRS,
+    deps = [
+        ":enable_assembly",
+        ":indirection",
+        ":ukernels",
+        "@FP16",
+        "@FXdiv",
+        "@clog",
+        "@cpuinfo",
+        "@pthreadpool",
+    ],
+)
+
+cc_library(
+    name = "XNNPACK",
+    hdrs = ["include/xnnpack.h"],
+    includes = ["include"],
+    linkstatic = True,
+    # XNNPACK API is unstable and can break without notice.
+    # End users are encouraged to use this package through a TFLite delegate.
+    visibility = xnnpack_visibility(),
+    deps = [
+        ":operator_run",
+        ":operators",
+        "@pthreadpool",
+    ],
+)
+
+cc_library(
+    name = "bench_utils",
+    srcs = ["bench/utils.cc"],
+    hdrs = ["bench/utils.h"],
+    copts = ["-Wno-unused-result"],
+    linkstatic = True,
+    deps = ["@cpuinfo"],
+)
+
+######################### Unit tests for micro-kernels #########################
+
+xnnpack_benchmark(
+    name = "q8_gemm_bench",
+    srcs = [
+        "bench/gemm.h",
+        "bench/q8-gemm.cc",
+        "src/xnnpack/AlignedAllocator.h",
+    ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
+    copts = ["-Wno-unused-function"] + xnnpack_optional_ruy_copts() + xnnpack_optional_gemmlowp_copts(),
+    deps = MICROKERNEL_BENCHMARK_DEPS + xnnpack_optional_ruy_deps() + xnnpack_optional_gemmlowp_deps(),
+)
+
+xnnpack_benchmark(
+    name = "f16_gemm_bench",
+    srcs = [
+        "bench/f16-gemm.cc",
+        "bench/gemm.h",
+        "src/xnnpack/AlignedAllocator.h",
+    ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
+    copts = ["-Wno-unused-function"],
+    deps = MICROKERNEL_BENCHMARK_DEPS,
+)
+
+xnnpack_benchmark(
+    name = "f32_igemm_bench",
+    srcs = [
+        "bench/f32-igemm.cc",
+        "bench/conv.h",
+        "src/xnnpack/AlignedAllocator.h",
+    ] + MICROKERNEL_BENCHMARK_HDRS,
+    deps = MICROKERNEL_BENCHMARK_DEPS + [":indirection"],
+)
+
+xnnpack_benchmark(
+    name = "f32_conv_hwc_bench",
+    srcs = [
+        "bench/f32-conv-hwc.cc",
+        "bench/dconv.h",
+        "src/xnnpack/AlignedAllocator.h",
+    ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
+    copts = ["-Wno-unused-function"],
+    deps = MICROKERNEL_BENCHMARK_DEPS,
+)
+
+xnnpack_benchmark(
+    name = "f32_dwconv_bench",
+    srcs = [
+        "bench/f32-dwconv.cc",
+        "bench/dwconv.h",
+        "src/xnnpack/AlignedAllocator.h",
+    ] + MICROKERNEL_BENCHMARK_HDRS,
+    deps = MICROKERNEL_BENCHMARK_DEPS + [":indirection"],
+)
+
+xnnpack_benchmark(
+    name = "f32_dwconv_spchw_bench",
+    srcs = [
+        "bench/f32-dwconv-spchw.cc",
+        "bench/dwconv.h",
+        "src/xnnpack/AlignedAllocator.h",
+    ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
+    deps = MICROKERNEL_BENCHMARK_DEPS + [":indirection"],
+)
+
+xnnpack_benchmark(
+    name = "f32_gemm_bench",
+    srcs = [
+        "bench/f32-gemm.cc",
+        "bench/gemm.h",
+        "src/xnnpack/AlignedAllocator.h",
+    ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
+    copts = ["-Wno-unused-function"] + xnnpack_optional_ruy_copts(),
+    deps = MICROKERNEL_BENCHMARK_DEPS + xnnpack_optional_ruy_deps(),
+)
+
+xnnpack_benchmark(
+    name = "f32_rmax_bench",
+    srcs = [
+        "bench/f32-rmax.cc",
+        "src/xnnpack/AlignedAllocator.h",
+    ] + MICROKERNEL_BENCHMARK_HDRS,
+    deps = MICROKERNEL_BENCHMARK_DEPS,
+)
+
+xnnpack_benchmark(
+    name = "f32_spmm_bench",
+    srcs = [
+        "bench/f32-spmm.cc",
+        "bench/gemm.h",
+        "src/xnnpack/AlignedAllocator.h",
+    ] + MICROKERNEL_BENCHMARK_HDRS,
+    copts = ["-Wno-unused-function"],
+    deps = MICROKERNEL_BENCHMARK_DEPS,
+)
+
+xnnpack_benchmark(
+    name = "f32_im2col_gemm_bench",
+    srcs = [
+        "bench/f32-im2col-gemm.cc",
+        "bench/conv.h",
+        "src/xnnpack/AlignedAllocator.h",
+    ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
+    deps = MICROKERNEL_BENCHMARK_DEPS + [":im2col"],
+)
+
+########################### Benchmarks for operators ###########################
+
+xnnpack_benchmark(
+    name = "add_bench",
+    srcs = ["bench/add.cc"],
+    deps = OPERATOR_BENCHMARK_DEPS,
+)
+
+xnnpack_benchmark(
+    name = "average_pooling_bench",
+    srcs = ["bench/average-pooling.cc"],
+    deps = OPERATOR_BENCHMARK_DEPS,
+)
+
+xnnpack_benchmark(
+    name = "channel_shuffle_bench",
+    srcs = ["bench/channel-shuffle.cc"],
+    deps = OPERATOR_BENCHMARK_DEPS,
+)
+
+xnnpack_benchmark(
+    name = "convolution_bench",
+    srcs = ["bench/convolution.cc"],
+    copts = xnnpack_optional_tflite_copts() + xnnpack_optional_armcl_copts(),
+    deps = OPERATOR_BENCHMARK_DEPS + xnnpack_optional_tflite_deps() + xnnpack_optional_armcl_deps(),
+)
+
+xnnpack_benchmark(
+    name = "deconvolution_bench",
+    srcs = ["bench/deconvolution.cc"],
+    copts = xnnpack_optional_tflite_copts(),
+    deps = OPERATOR_BENCHMARK_DEPS + xnnpack_optional_tflite_deps(),
+)
+
+xnnpack_benchmark(
+    name = "global_average_pooling_bench",
+    srcs = ["bench/global-average-pooling.cc"],
+    deps = OPERATOR_BENCHMARK_DEPS,
+)
+
+xnnpack_benchmark(
+    name = "max_pooling_bench",
+    srcs = ["bench/max-pooling.cc"],
+    deps = OPERATOR_BENCHMARK_DEPS,
+)
+
+xnnpack_benchmark(
+    name = "sigmoid_bench",
+    srcs = ["bench/sigmoid.cc"],
+    deps = OPERATOR_BENCHMARK_DEPS,
+)
+
+xnnpack_benchmark(
+    name = "softargmax_bench",
+    srcs = ["bench/softargmax.cc"],
+    deps = OPERATOR_BENCHMARK_DEPS,
+)
+
+######################### Unit tests for micro-kernels #########################
+
+xnnpack_unit_test(
+    name = "f16_gemm_test",
+    srcs = [
+        "test/f16-gemm.cc",
+        "test/gemm-microkernel-tester.h",
+        "src/xnnpack/AlignedAllocator.h",
+    ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "f32_argmaxpool_test",
+    srcs = [
+        "test/f32-argmaxpool.cc",
+        "test/argmaxpool-microkernel-tester.h",
+        "src/xnnpack/AlignedAllocator.h",
+    ] + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "f32_avgpool_test",
+    srcs = [
+        "test/f32-avgpool.cc",
+        "test/avgpool-microkernel-tester.h",
+        "src/xnnpack/AlignedAllocator.h",
+    ] + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "f32_clamp_test",
+    srcs = [
+        "test/f32-clamp.cc",
+        "test/clamp-microkernel-tester.h",
+    ] + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "f32_igemm_test",
+    srcs = [
+        "test/f32-igemm.cc",
+        "test/gemm-microkernel-tester.h",
+        "src/xnnpack/AlignedAllocator.h",
+    ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "f32_conv_hwc_test",
+    srcs = [
+        "test/f32-conv-hwc.cc",
+        "test/conv-hwc-microkernel-tester.h",
+        "src/xnnpack/AlignedAllocator.h",
+    ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "f32_conv_hwc2spchw_test",
+    srcs = [
+        "test/f32-conv-hwc2spchw.cc",
+        "test/conv-hwc2spchw-microkernel-tester.h",
+        "src/xnnpack/AlignedAllocator.h",
+    ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "f32_dwconv_test",
+    srcs = [
+        "test/f32-dwconv.cc",
+        "test/dwconv-microkernel-tester.h",
+        "src/xnnpack/AlignedAllocator.h",
+    ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "f32_dwconv_spchw_test",
+    srcs = [
+        "test/f32-dwconv-spchw.cc",
+        "test/dwconv-spchw-microkernel-tester.h",
+        "src/xnnpack/AlignedAllocator.h",
+    ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "f32_gavgpool_test",
+    srcs = [
+        "test/f32-gavgpool.cc",
+        "test/gavgpool-microkernel-tester.h",
+        "src/xnnpack/AlignedAllocator.h",
+    ] + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "f32_gavgpool_spchw_test",
+    srcs = [
+        "test/f32-gavgpool-spchw.cc",
+        "test/gavgpool-spchw-microkernel-tester.h",
+        "src/xnnpack/AlignedAllocator.h",
+    ] + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "f32_gemm_test",
+    srcs = [
+        "test/f32-gemm.cc",
+        "test/gemm-microkernel-tester.h",
+        "src/xnnpack/AlignedAllocator.h",
+    ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "f32_gemminc_test",
+    srcs = [
+        "test/f32-gemminc.cc",
+        "test/gemm-microkernel-tester.h",
+        "src/xnnpack/AlignedAllocator.h",
+    ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "f32_hswish_test",
+    srcs = [
+        "test/f32-hswish.cc",
+        "test/hswish-microkernel-tester.h",
+    ] + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "f32_maxpool_test",
+    srcs = [
+        "test/f32-maxpool.cc",
+        "test/maxpool-microkernel-tester.h",
+    ] + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "f32_pavgpool_test",
+    srcs = [
+        "test/f32-pavgpool.cc",
+        "test/avgpool-microkernel-tester.h",
+        "src/xnnpack/AlignedAllocator.h",
+    ] + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "f32_ppmm_test",
+    srcs = [
+        "test/f32-ppmm.cc",
+        "test/gemm-microkernel-tester.h",
+        "src/xnnpack/AlignedAllocator.h",
+    ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "f32_prelu_test",
+    srcs = [
+        "test/f32-prelu.cc",
+        "test/prelu-microkernel-tester.h",
+        "src/xnnpack/AlignedAllocator.h",
+    ] + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "f32_rmax_test",
+    srcs = [
+        "test/f32-rmax.cc",
+        "test/rmax-microkernel-tester.h",
+    ] + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "f32_spmm_test",
+    srcs = [
+        "test/f32-spmm.cc",
+        "test/spmm-microkernel-tester.h",
+        "src/xnnpack/AlignedAllocator.h",
+    ] + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "f32_vadd_test",
+    srcs = [
+        "test/f32-vadd.cc",
+        "test/vadd-microkernel-tester.h",
+    ] + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "f32_vsub_test",
+    srcs = [
+        "test/f32-vsub.cc",
+        "test/vsub-microkernel-tester.h",
+    ] + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "f32_vmul_test",
+    srcs = [
+        "test/f32-vmul.cc",
+        "test/vmul-microkernel-tester.h",
+    ] + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "f32_vmulcaddc_test",
+    srcs = [
+        "test/f32-vmulcaddc.cc",
+        "test/vmulcaddc-microkernel-tester.h",
+        "src/xnnpack/AlignedAllocator.h",
+    ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "q8_avgpool_test",
+    srcs = [
+        "test/q8-avgpool.cc",
+        "test/avgpool-microkernel-tester.h",
+        "src/xnnpack/AlignedAllocator.h",
+    ] + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "q8_igemm_test",
+    srcs = [
+        "test/q8-igemm.cc",
+        "test/gemm-microkernel-tester.h",
+        "src/xnnpack/AlignedAllocator.h",
+    ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "q8_dwconv_test",
+    srcs = [
+        "test/q8-dwconv.cc",
+        "test/dwconv-microkernel-tester.h",
+        "src/xnnpack/AlignedAllocator.h",
+    ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "q8_gavgpool_test",
+    srcs = [
+        "test/q8-gavgpool.cc",
+        "test/gavgpool-microkernel-tester.h",
+        "src/xnnpack/AlignedAllocator.h",
+    ] + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "q8_gemm_test",
+    srcs = [
+        "test/q8-gemm.cc",
+        "test/gemm-microkernel-tester.h",
+        "src/xnnpack/AlignedAllocator.h",
+    ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "q8_vadd_test",
+    srcs = [
+        "test/q8-vadd.cc",
+        "test/vadd-microkernel-tester.h",
+    ] + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "u8_clamp_test",
+    srcs = [
+        "test/u8-clamp.cc",
+        "test/clamp-microkernel-tester.h",
+    ] + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "u8_lut32norm_test",
+    srcs = [
+        "test/u8-lut32norm.cc",
+        "test/lut-norm-microkernel-tester.h",
+    ] + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "u8_maxpool_test",
+    srcs = [
+        "test/u8-maxpool.cc",
+        "test/maxpool-microkernel-tester.h",
+    ] + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "u8_rmax_test",
+    srcs = [
+        "test/u8-rmax.cc",
+        "test/rmax-microkernel-tester.h",
+    ] + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "x32_packx_test",
+    srcs = [
+        "test/x32-packx.cc",
+        "test/pack-microkernel-tester.h",
+        "src/xnnpack/AlignedAllocator.h",
+    ] + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "x32_pad_test",
+    srcs = [
+        "test/x32-pad.cc",
+        "test/pad-microkernel-tester.h",
+    ] + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "x32_unpool_test",
+    srcs = [
+        "test/x32-unpool.cc",
+        "test/unpool-microkernel-tester.h",
+    ] + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "x32_zip_test",
+    srcs = [
+        "test/x32-zip.cc",
+        "test/zip-microkernel-tester.h",
+    ] + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "x8_lut_test",
+    srcs = [
+        "test/x8-lut.cc",
+        "test/lut-microkernel-tester.h",
+    ] + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "x8_zip_test",
+    srcs = [
+        "test/x8-zip.cc",
+        "test/zip-microkernel-tester.h",
+    ] + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+########################### Size test for the library ##########################
+
+xnnpack_binary(
+    name = "size_test",
+    srcs = ["test/size.c"],
+    deps = [":XNNPACK"],
+)
+
+########################### Unit tests for operators ###########################
+
+xnnpack_unit_test(
+    name = "add_test",
+    srcs = [
+        "test/add.cc",
+        "test/add-operator-tester.h",
+    ],
+    deps = OPERATOR_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "argmax_pooling_test",
+    srcs = [
+        "test/argmax-pooling.cc",
+        "test/argmax-pooling-operator-tester.h",
+    ] + OPERATOR_TEST_PARAMS_HDRS,
+    deps = OPERATOR_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "average_pooling_test",
+    srcs = [
+        "test/average-pooling.cc",
+        "test/average-pooling-operator-tester.h",
+    ] + OPERATOR_TEST_PARAMS_HDRS,
+    deps = OPERATOR_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "channel_pad_test",
+    srcs = [
+        "test/channel-pad.cc",
+        "test/channel-pad-operator-tester.h",
+    ] + OPERATOR_TEST_PARAMS_HDRS,
+    deps = OPERATOR_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "channel_shuffle_test",
+    srcs = [
+        "test/channel-shuffle.cc",
+        "test/channel-shuffle-operator-tester.h",
+    ],
+    deps = OPERATOR_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "clamp_test",
+    srcs = [
+        "test/clamp.cc",
+        "test/clamp-operator-tester.h",
+    ],
+    deps = OPERATOR_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "convolution_test",
+    srcs = [
+        "test/convolution.cc",
+        "test/convolution-operator-tester.h",
+    ],
+    deps = OPERATOR_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "convolution_spnchw_test",
+    srcs = [
+        "test/convolution-spnchw.cc",
+        "test/convolution-spnchw-operator-tester.h",
+    ],
+    deps = OPERATOR_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "deconvolution_test",
+    srcs = [
+        "test/deconvolution.cc",
+        "test/deconvolution-operator-tester.h",
+    ] + OPERATOR_TEST_PARAMS_HDRS,
+    deps = OPERATOR_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "fully_connected_test",
+    srcs = [
+        "test/fully-connected.cc",
+        "test/fully-connected-operator-tester.h",
+    ],
+    deps = OPERATOR_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "global_average_pooling_test",
+    srcs = [
+        "test/global-average-pooling.cc",
+        "test/global-average-pooling-operator-tester.h",
+    ] + OPERATOR_TEST_PARAMS_HDRS,
+    deps = OPERATOR_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "global_average_pooling_spnchw_test",
+    srcs = [
+        "test/global-average-pooling-spnchw.cc",
+        "test/global-average-pooling-spnchw-operator-tester.h",
+    ],
+    deps = OPERATOR_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "hardswish_test",
+    srcs = [
+        "test/hardswish.cc",
+        "test/hardswish-operator-tester.h",
+    ],
+    deps = OPERATOR_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "leaky_relu_test",
+    srcs = [
+        "test/leaky-relu.cc",
+        "test/leaky-relu-operator-tester.h",
+    ],
+    deps = OPERATOR_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "max_pooling_test",
+    srcs = [
+        "test/max-pooling.cc",
+        "test/max-pooling-operator-tester.h",
+    ] + OPERATOR_TEST_PARAMS_HDRS,
+    deps = OPERATOR_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "prelu_test",
+    srcs = [
+        "test/prelu.cc",
+        "test/prelu-operator-tester.h",
+    ] + OPERATOR_TEST_PARAMS_HDRS,
+    deps = OPERATOR_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "sigmoid_test",
+    srcs = [
+        "test/sigmoid.cc",
+        "test/sigmoid-operator-tester.h",
+    ],
+    deps = OPERATOR_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "softargmax_test",
+    srcs = [
+        "test/softargmax.cc",
+        "test/softargmax-operator-tester.h",
+    ],
+    deps = OPERATOR_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+    name = "unpooling_test",
+    srcs = [
+        "test/unpooling.cc",
+        "test/unpooling-operator-tester.h",
+    ],
+    deps = OPERATOR_TEST_DEPS,
+)
+
+############################# Build configurations #############################
+
+config_setting(
+    name = "linux_k8",
+    values = {
+        "cpu": "k8",
+    },
+)
+
+config_setting(
+    name = "android",
+    values = {"crosstool_top": "//external:android/crosstool"},
+)
+
+config_setting(
+    name = "android_armv7",
+    values = {
+        "crosstool_top": "//external:android/crosstool",
+        "cpu": "armeabi-v7a",
+    },
+)
+
+config_setting(
+    name = "android_arm64",
+    values = {
+        "crosstool_top": "//external:android/crosstool",
+        "cpu": "arm64-v8a",
+    },
+)
+
+config_setting(
+    name = "android_x86",
+    values = {
+        "crosstool_top": "//external:android/crosstool",
+        "cpu": "x86",
+    },
+)
+
+config_setting(
+    name = "android_x86_64",
+    values = {
+        "crosstool_top": "//external:android/crosstool",
+        "cpu": "x86_64",
+    },
+)
+
+config_setting(
+    name = "emscripten",
+    values = {"crosstool_top": "//external:android/emscripten"},
+)
+
+config_setting(
+    name = "emscripten_wasm",
+    values = {
+        "crosstool_top": "//external:android/emscripten",
+        "cpu": "wasm",
+    },
+)
+
+config_setting(
+    name = "emscripten_wasmsimd",
+    values = {
+        "crosstool_top": "//external:android/emscripten",
+        "cpu": "wasm",
+        "features": "wasmsimd",
+    },
+)
+
+config_setting(
+    name = "emscripten_asmjs",
+    values = {
+        "crosstool_top": "//external:android/emscripten",
+        "cpu": "asmjs",
+    },
+)
+
+# Builds with -c opt
+config_setting(
+    name = "debug_build",
+    values = {
+        "compilation_mode": "dbg",
+    },
+    visibility = ["//visibility:public"],
+)
+
+# Builds with -c dbg
+config_setting(
+    name = "optimized_build",
+    values = {
+        "compilation_mode": "opt",
+    },
+    visibility = ["//visibility:public"],
+)
+
+# Enables usage of assembly kernels.
+config_setting(
+    name = "xnn_enable_assembly_explicit_true",
+    define_values = {"xnn_enable_assembly": "true"},
+)
+
+# Disables usage of assembly kernels.
+config_setting(
+    name = "xnn_enable_assembly_explicit_false",
+    define_values = {"xnn_enable_assembly": "false"},
+)
diff --git a/README.md b/README.md
index 98434b9..149cc04 100644
--- a/README.md
+++ b/README.md
@@ -4,11 +4,11 @@
 
 ## Supported Architectures
 
-- ARM on Android, Linux, and iOS
-- ARM64 on Android, Linux, and iOS
+- ARM64 on Android
+- ARM on Android
 - WebAssembly MVP
 - WebAssembly SIMD (experimental)
-- x86 and x86-64 (up to SSE2 only) on Android, Linux, and Mac
+- x86 and x86-64 (up to SSE2 only) on Android and Linux
 
 ## Operator Coverage
 
diff --git a/WORKSPACE b/WORKSPACE
new file mode 100644
index 0000000..6b113ca
--- /dev/null
+++ b/WORKSPACE
@@ -0,0 +1,92 @@
+workspace(name = "xnnpack")
+
+load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
+
+# Google Test framework, used by most unit-tests.
+http_archive(
+     name = "com_google_googletest",
+     urls = ["https://github.com/google/googletest/archive/master.zip"],
+     strip_prefix = "googletest-master",
+)
+
+# Google Benchmark library, used in micro-benchmarks.
+http_archive(
+    name = "com_google_benchmark",
+    urls = [
+        "https://github.com/google/benchmark/archive/master.zip"
+    ],
+    strip_prefix = "benchmark-master",
+    build_file = "@//third_party:benchmark.BUILD",
+)
+
+# FP16 library, used for half-precision conversions
+http_archive(
+    name = "FP16",
+    strip_prefix = "FP16-ba1d31f5eed2eb4a69e4dea3870a68c7c95f998f",
+    sha256 = "9764297a339ad73b0717331a2c3e9c42a52105cd04cab62cb160e2b4598d2ea6",
+    urls = [
+        "https://github.com/Maratyszcza/FP16/archive/ba1d31f5eed2eb4a69e4dea3870a68c7c95f998f.tar.gz",
+    ],
+    build_file = "@//third_party:FP16.BUILD",
+)
+
+# FXdiv library, used for repeated integer division by the same factor
+http_archive(
+    name = "FXdiv",
+    strip_prefix = "FXdiv-f8c5354679ec2597792bc70a9e06eff50c508b9a",
+    sha256 = "7d3215bea832fe77091ec5666200b91156df6724da1e348205078346325fc45e",
+    urls = [
+        "https://github.com/Maratyszcza/FXdiv/archive/f8c5354679ec2597792bc70a9e06eff50c508b9a.tar.gz",
+    ],
+    build_file = "@//third_party:FXdiv.BUILD",
+)
+
+# pthreadpool library, used for parallelization
+http_archive(
+    name = "pthreadpool",
+    strip_prefix = "pthreadpool-0e275fe56094626349c55a524ea8b71a85daa64b",
+    sha256 = "c2328fdf9e48ac9b928953bcbc442eb14402d393e4cfae0541581a3d39efca9d",
+    urls = [
+        "https://github.com/Maratyszcza/pthreadpool/archive/0e275fe56094626349c55a524ea8b71a85daa64b.tar.gz",
+    ],
+    build_file = "@//third_party:pthreadpool.BUILD",
+)
+
+# clog library, used for logging
+http_archive(
+    name = "clog",
+    strip_prefix = "cpuinfo-d5e37adf1406cf899d7d9ec1d317c47506ccb970",
+    sha256 = "3f2dc1970f397a0e59db72f9fca6ff144b216895c1d606f6c94a507c1e53a025",
+    urls = [
+        "https://github.com/pytorch/cpuinfo/archive/d5e37adf1406cf899d7d9ec1d317c47506ccb970.tar.gz",
+    ],
+    build_file = "@//third_party:clog.BUILD",
+)
+
+# cpuinfo library, used for detecting processor characteristics
+http_archive(
+    name = "cpuinfo",
+    strip_prefix = "cpuinfo-d5e37adf1406cf899d7d9ec1d317c47506ccb970",
+    sha256 = "3f2dc1970f397a0e59db72f9fca6ff144b216895c1d606f6c94a507c1e53a025",
+    urls = [
+        "https://github.com/pytorch/cpuinfo/archive/d5e37adf1406cf899d7d9ec1d317c47506ccb970.tar.gz",
+    ],
+    build_file = "@//third_party:cpuinfo.BUILD",
+)
+
+# psimd library, used for fallback 128-bit SIMD micro-kernels
+http_archive(
+    name = "psimd",
+    strip_prefix = "psimd-4f2c53947184b56f58607b9e777416bb63ebbde1",
+    sha256 = "7d1795ebf289af26e404cff5877c284775e491414cf41d7d99ab850ceaced458",
+    urls = [
+        "https://github.com/Maratyszcza/psimd/archive/4f2c53947184b56f58607b9e777416bb63ebbde1.tar.gz",
+    ],
+    build_file = "@//third_party:psimd.BUILD",
+)
+
+# Android NDK location and version is auto-detected from $ANDROID_NDK_HOME environment variable
+android_ndk_repository(name = "androidndk")
+
+# Android SDK location and API is auto-detected from $ANDROID_HOME environment variable
+android_sdk_repository(name = "androidsdk")
diff --git a/bench/convolution.cc b/bench/convolution.cc
index 0857b01..8f33103 100644
--- a/bench/convolution.cc
+++ b/bench/convolution.cc
@@ -31,7 +31,6 @@
 #include "tensorflow/lite/interpreter.h"
 #include "tensorflow/lite/kernels/register.h"
 #include "tensorflow/lite/model.h"
-#include "tensorflow/lite/optional_debug_tools.h"
 #include "tensorflow/lite/schema/schema_generated.h"
 #include "tensorflow/lite/version.h"
 #endif  // BENCHMARK_TENSORFLOW_LITE
diff --git a/build_defs.bzl b/build_defs.bzl
new file mode 100644
index 0000000..88d872b
--- /dev/null
+++ b/build_defs.bzl
@@ -0,0 +1,253 @@
+"""Build definitions and rules for XNNPACK."""
+
+load(":emscripten.bzl", "xnnpack_emscripten_benchmark_linkopts", "xnnpack_emscripten_deps", "xnnpack_emscripten_minimal_linkopts", "xnnpack_emscripten_test_linkopts")
+
+def xnnpack_visibility():
+    """Visibility of :XNNPACK target.
+
+    All other targets have private visibility, and can not have external
+    dependencies.
+    """
+    return []
+
+def xnnpack_min_size_copts():
+    """Compiler flags for size-optimized builds."""
+    return ["-Os"]
+
+def xnnpack_std_copts():
+    """Compiler flags to specify language standard for C sources."""
+    return ["-std=c99"]
+
+def xnnpack_std_cxxopts():
+    """Compiler flags to specify language standard for C++ sources."""
+    return ["-std=gnu++11"]
+
+def xnnpack_optional_ruy_copts():
+    """Compiler flags to optionally enable Ruy benchmarks."""
+    return []
+
+def xnnpack_optional_gemmlowp_copts():
+    """Compiler flags to optionally enable Gemmlowp benchmarks."""
+    return []
+
+def xnnpack_optional_tflite_copts():
+    """Compiler flags to optionally enable TensorFlow Lite benchmarks."""
+    return []
+
+def xnnpack_optional_armcl_copts():
+    """Compiler flags to optionally enable ARM ComputeLibrary benchmarks."""
+    return []
+
+def xnnpack_optional_ruy_deps():
+    """Optional Ruy dependencies."""
+    return []
+
+def xnnpack_optional_gemmlowp_deps():
+    """Optional Gemmlowp dependencies."""
+    return []
+
+def xnnpack_optional_tflite_deps():
+    """Optional TensorFlow Lite dependencies."""
+    return []
+
+def xnnpack_optional_armcl_deps():
+    """Optional ARM ComputeLibrary dependencies."""
+    return []
+
+def xnnpack_cc_library(
+        name,
+        srcs = [],
+        x86_srcs = [],
+        aarch32_srcs = [],
+        aarch64_srcs = [],
+        copts = [],
+        x86_copts = [],
+        aarch32_copts = [],
+        aarch64_copts = [],
+        optimized_copts = ["-O2"],
+        hdrs = [],
+        deps = []):
+    """C/C++/assembly library with architecture-specific sources.
+
+    Define a static library with architecture- and instruction-specific
+    source files.
+
+    Args:
+      name: The name of the library target to define.
+      srcs: The list of architecture-independent source files.
+      x86_srcs: The list of x86-specific source files.
+      aarch32_srcs: The list of AArch32-specific source files.
+      aarch64_srcs: The list of AArch64-specific source files.
+      copts: The list of compiler flags to use in all builds. -I flags for
+             include/ and src/ directories of XNNPACK are always prepended
+             before these user-specified flags.
+      x86_copts: The list of compiler flags to use in x86 builds.
+      aarch32_copts: The list of compiler flags to use in AArch32 builds.
+      aarch64_copts: The list of compiler flags to use in AArch64 builds.
+      optimized_copts: The list of compiler flags to use in optimized builds.
+                       Defaults to -O2.
+      hdrs: The list of header files published by this library to be textually
+            included by sources in dependent rules.
+      deps: The list of other libraries to be linked.
+    """
+    native.cc_library(
+        name = name,
+        srcs = srcs + select({
+            ":linux_k8": x86_srcs,
+            ":android_armv7": aarch32_srcs,
+            ":android_arm64": aarch64_srcs,
+            ":android_x86": x86_srcs,
+            ":android_x86_64": x86_srcs,
+            "//conditions:default": [],
+        }),
+        copts = [
+            "-Iinclude",
+            "-Isrc",
+        ] + copts + select({
+            ":linux_k8": x86_copts,
+            ":android_armv7": aarch32_copts,
+            ":android_arm64": aarch64_copts,
+            ":android_x86": x86_copts,
+            ":android_x86_64": x86_copts,
+            "//conditions:default": [],
+        }) + select({
+            ":optimized_build": optimized_copts,
+            "//conditions:default": [],
+        }),
+        linkstatic = True,
+        linkopts = select({
+            ":linux_k8": ["-lpthread"],
+            ":android": ["-lm"],
+            "//conditions:default": [],
+        }),
+        textual_hdrs = hdrs,
+        deps = deps,
+    )
+
+def xnnpack_aggregate_library(
+        name,
+        generic_deps = [],
+        x86_deps = [],
+        aarch32_deps = [],
+        aarch64_deps = [],
+        wasm_deps = [],
+        wasmsimd_deps = []):
+    """Static library that aggregates architecture-specific dependencies.
+
+    Args:
+      name: The name of the library target to define.
+      generic_deps: The list of libraries to link on all architectures.
+      x86_deps: The list of libraries to link in x86 and x86-64 builds.
+      aarch32_deps: The list of libraries to link in AArch32 builds.
+      aarch64_deps: The list of libraries to link in AArch32 builds.
+      wasm_deps: The list of libraries to link in WebAssembly (MVP) builds.
+      wasmsimd_deps: The list of libraries to link in WebAssembly SIMD builds.
+    """
+
+    native.cc_library(
+        name = name,
+        linkstatic = True,
+        deps = generic_deps + select({
+            ":linux_k8": x86_deps,
+            ":android_armv7": aarch32_deps,
+            ":android_arm64": aarch64_deps,
+            ":android_x86": x86_deps,
+            ":android_x86_64": x86_deps,
+            ":emscripten_wasm": wasm_deps,
+            ":emscripten_wasmsimd": wasmsimd_deps,
+            ":emscripten_asmjs": [],
+        }),
+    )
+
+def xnnpack_unit_test(name, srcs, copts = [], deps = []):
+    """Unit test binary based on Google Test.
+
+    Args:
+      name: The name of the test target to define.
+      srcs: The list of source and header files.
+      copts: The list of additional compiler flags for the target. -I flags
+             for include/ and src/ directories of XNNPACK are always prepended
+             before these user-specified flags.
+      deps: The list of additional libraries to be linked. Google Test library
+            (with main() function) is always added as a dependency and does not
+            need to be explicitly specified.
+    """
+
+    native.cc_test(
+        name = name,
+        srcs = srcs,
+        copts = xnnpack_std_cxxopts() + [
+            "-Iinclude",
+            "-Isrc",
+        ] + copts,
+        linkopts = select({
+            ":emscripten": xnnpack_emscripten_test_linkopts(),
+            "//conditions:default": [],
+        }),
+        linkstatic = True,
+        deps = [
+            "@com_google_googletest//:gtest_main",
+        ] + deps + select({
+            ":emscripten": xnnpack_emscripten_deps(),
+            "//conditions:default": [],
+        }),
+    )
+
+def xnnpack_binary(name, srcs, copts = [], deps = []):
+    """Minimal binary
+
+    Args:
+      name: The name of the binary target to define.
+      srcs: The list of source and header files.
+      copts: The list of additional compiler flags for the target. -I flags
+             for include/ and src/ directories of XNNPACK are always prepended
+             before these user-specified flags.
+      deps: The list of libraries to be linked.
+    """
+    native.cc_binary(
+        name = name,
+        srcs = srcs,
+        copts = [
+            "-Iinclude",
+            "-Isrc",
+        ] + copts,
+        linkopts = select({
+            ":emscripten": xnnpack_emscripten_minimal_linkopts(),
+            "//conditions:default": [],
+        }),
+        linkstatic = True,
+        deps = deps,
+    )
+
+def xnnpack_benchmark(name, srcs, copts = [], deps = []):
+    """Microbenchmark binary based on Google Benchmark
+
+    Args:
+      name: The name of the binary target to define.
+      srcs: The list of source and header files.
+      copts: The list of additional compiler flags for the target. -I flags
+             for include/ and src/ directories of XNNPACK are always prepended
+             before these user-specified flags.
+      deps: The list of additional libraries to be linked. Google Benchmark
+            library is always added as a dependency and does not need to be
+            explicitly specified.
+    """
+    native.cc_binary(
+        name = name,
+        srcs = srcs,
+        copts = xnnpack_std_cxxopts() + [
+            "-Iinclude",
+            "-Isrc",
+        ] + copts,
+        linkopts = select({
+            ":emscripten": xnnpack_emscripten_benchmark_linkopts(),
+            "//conditions:default": [],
+        }),
+        linkstatic = True,
+        deps = [
+            "@com_google_benchmark//:benchmark",
+        ] + deps + select({
+            ":emscripten": xnnpack_emscripten_deps(),
+            "//conditions:default": [],
+        }),
+    )
diff --git a/emscripten.bzl b/emscripten.bzl
new file mode 100644
index 0000000..9cb10f2
--- /dev/null
+++ b/emscripten.bzl
@@ -0,0 +1,36 @@
+"""Emscripten-specific build definitions for XNNPACK."""
+
+def xnnpack_emscripten_minimal_linkopts():
+    """Minimal Emscripten-specific linkopts for binaries."""
+    return [
+        "-s ASSERTIONS=0",
+        "-s ERROR_ON_UNDEFINED_SYMBOLS=1",
+        "-s EXIT_RUNTIME=1",
+    ]
+
+def xnnpack_emscripten_test_linkopts():
+    """Emscripten-specific linkopts for unit tests."""
+    return [
+        "-s ASSERTIONS=2",
+        "-s ERROR_ON_UNDEFINED_SYMBOLS=1",
+        "-s DEMANGLE_SUPPORT=1",
+        "-s EXIT_RUNTIME=1",
+        "-s ALLOW_MEMORY_GROWTH=1",
+        "--pre-js $(location :preamble.js.lds)",
+    ]
+
+def xnnpack_emscripten_benchmark_linkopts():
+    """Emscripten-specific linkopts for benchmarks."""
+    return [
+        "-s ASSERTIONS=1",
+        "-s ERROR_ON_UNDEFINED_SYMBOLS=1",
+        "-s EXIT_RUNTIME=1",
+        "-s ALLOW_MEMORY_GROWTH=1",
+        "--pre-js $(location :preamble.js.lds)",
+    ]
+
+def xnnpack_emscripten_deps():
+    """Emscripten-specific dependencies for unit tests and benchmarks."""
+    return [
+        ":preamble.js.lds",
+    ]
diff --git a/third_party/BUILD b/third_party/BUILD
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/third_party/BUILD
diff --git a/third_party/FP16.BUILD b/third_party/FP16.BUILD
new file mode 100644
index 0000000..e1018be
--- /dev/null
+++ b/third_party/FP16.BUILD
@@ -0,0 +1,15 @@
+# Description:
+#   C/C++ library for conversion to/from half-precision floating-point formats
+
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"])
+
+exports_files(["LICENSE"])
+
+cc_library(
+    name = "FP16",
+    hdrs = glob(["include/**/*.h"]),
+    includes = ["include"],
+    strip_include_prefix = "include",
+)
diff --git a/third_party/FXdiv.BUILD b/third_party/FXdiv.BUILD
new file mode 100644
index 0000000..ef2853b
--- /dev/null
+++ b/third_party/FXdiv.BUILD
@@ -0,0 +1,15 @@
+# Description:
+#   C99/C++ library for division via fixed-point multiplication by inverse
+
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"])
+
+exports_files(["LICENSE"])
+
+cc_library(
+    name = "FXdiv",
+    hdrs = glob(["include/fxdiv.h"]),
+    includes = ["include"],
+    strip_include_prefix = "include",
+)
diff --git a/third_party/benchmark.BUILD b/third_party/benchmark.BUILD
new file mode 100644
index 0000000..3d439d6
--- /dev/null
+++ b/third_party/benchmark.BUILD
@@ -0,0 +1,41 @@
+licenses(["notice"])
+
+exports_files(["LICENSE"])
+
+cc_library(
+    name = "benchmark",
+    srcs = glob(["src/*.h", "src/*.cc",]),
+    hdrs = glob(["include/benchmark/*.h"]),
+    copts = [
+        "-DHAVE_POSIX_REGEX",
+        "-Wno-deprecated-declarations",
+    ],
+    linkopts = select({
+        ":linux_x86_64": ["-lm"],
+        ":linux_arm64": ["-lm"],
+        ":android": ["-lm"],
+        "//conditions:default": [],
+    }),
+    includes = ["include"],
+    visibility = ["//visibility:public"],
+)
+
+config_setting(
+    name = "linux_x86_64",
+    values = {"cpu": "k8"},
+    visibility = ["//visibility:public"],
+)
+
+config_setting(
+    name = "linux_arm64",
+    values = {"cpu": "aarch64"},
+    visibility = ["//visibility:public"],
+)
+
+config_setting(
+    name = "android",
+    values = {
+        "crosstool_top": "//external:android/crosstool",
+    },
+    visibility = ["//visibility:public"],
+)
diff --git a/third_party/clog.BUILD b/third_party/clog.BUILD
new file mode 100644
index 0000000..68153fb
--- /dev/null
+++ b/third_party/clog.BUILD
@@ -0,0 +1,35 @@
+# Description:
+#   C-style (a-la printf) logging library
+
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"])
+
+exports_files(["LICENSE"])
+
+cc_library(
+    name = "clog",
+    srcs = [
+        "deps/clog/src/clog.c",
+    ],
+    copts = [
+        "-Wno-unused-result",
+    ],
+    hdrs = [
+        "deps/clog/include/clog.h",
+    ],
+    linkopts = select({
+        ":android": [
+            "-llog",
+        ],
+        "//conditions:default": [
+        ],
+    }),
+    strip_include_prefix = "deps/clog/include",
+)
+
+config_setting(
+    name = "android",
+    values = {"crosstool_top": "//external:android/crosstool"},
+    visibility = ["//visibility:public"],
+)
diff --git a/third_party/cpuinfo.BUILD b/third_party/cpuinfo.BUILD
new file mode 100644
index 0000000..af7ca65
--- /dev/null
+++ b/third_party/cpuinfo.BUILD
@@ -0,0 +1,213 @@
+# cpuinfo, a library to detect information about the host CPU
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"])
+
+exports_files(["LICENSE"])
+
+C99OPTS = [
+    "-std=gnu99",  # gnu99, not c99, because dprintf is used
+    "-Wno-vla",
+    "-D_GNU_SOURCE=1",  # to use CPU_SETSIZE
+    "-DCPUINFO_INTERNAL=",
+    "-DCPUINFO_PRIVATE=",
+]
+
+# Source code common to all platforms.
+COMMON_SRCS = [
+    "src/api.c",
+    "src/init.c",
+]
+
+# Architecture-specific sources and headers.
+X86_SRCS = [
+    "src/x86/cache/descriptor.c",
+    "src/x86/cache/deterministic.c",
+    "src/x86/cache/init.c",
+    "src/x86/info.c",
+    "src/x86/init.c",
+    "src/x86/isa.c",
+    "src/x86/name.c",
+    "src/x86/topology.c",
+    "src/x86/uarch.c",
+    "src/x86/vendor.c",
+]
+
+ARM_SRCS = [
+    "src/arm/cache.c",
+    "src/arm/uarch.c",
+]
+
+# Platform-specific sources and headers
+LINUX_SRCS = [
+    "src/linux/cpulist.c",
+    "src/linux/current.c",
+    "src/linux/multiline.c",
+    "src/linux/processors.c",
+    "src/linux/smallfile.c",
+]
+
+MOCK_LINUX_SRCS = [
+    "src/linux/mockfile.c",
+]
+
+MACH_SRCS = [
+    "src/mach/topology.c",
+]
+
+EMSCRIPTEN_SRCS = [
+    "src/emscripten/init.c",
+]
+
+PNACL_SRCS = [
+    "src/pnacl/init.c",
+]
+
+LINUX_X86_SRCS = [
+    "src/x86/linux/cpuinfo.c",
+    "src/x86/linux/init.c",
+]
+
+LINUX_ARM_SRCS = [
+    "src/arm/linux/chipset.c",
+    "src/arm/linux/clusters.c",
+    "src/arm/linux/cpuinfo.c",
+    "src/arm/linux/hwcap.c",
+    "src/arm/linux/init.c",
+    "src/arm/linux/midr.c",
+]
+
+LINUX_ARM32_SRCS = LINUX_ARM_SRCS + ["src/arm/linux/aarch32-isa.c"]
+
+LINUX_ARM64_SRCS = LINUX_ARM_SRCS + ["src/arm/linux/aarch64-isa.c"]
+
+ANDROID_ARM_SRCS = [
+    "src/arm/android/properties.c",
+]
+
+WINDOWS_X86_SRCS = [
+    "src/x86/windows/init.c",
+]
+
+MACH_X86_SRCS = [
+    "src/x86/mach/init.c",
+]
+
+MACH_ARM_SRCS = [
+    "src/arm/mach/init.c",
+]
+
+cc_library(
+    name = "cpuinfo_impl",
+    srcs = select({
+        ":linux_x86_64": COMMON_SRCS + X86_SRCS + LINUX_SRCS + LINUX_X86_SRCS,
+        ":android_armv7": COMMON_SRCS + ARM_SRCS + LINUX_SRCS + LINUX_ARM32_SRCS + ANDROID_ARM_SRCS,
+        ":android_arm64": COMMON_SRCS + ARM_SRCS + LINUX_SRCS + LINUX_ARM64_SRCS + ANDROID_ARM_SRCS,
+        ":android_x86": COMMON_SRCS + X86_SRCS + LINUX_SRCS + LINUX_X86_SRCS,
+        ":android_x86_64": COMMON_SRCS + X86_SRCS + LINUX_SRCS + LINUX_X86_SRCS,
+        ":emscripten_wasm": COMMON_SRCS + EMSCRIPTEN_SRCS,
+    }),
+    copts = C99OPTS + [
+        "-Iexternal/cpuinfo/include",
+        "-Iexternal/cpuinfo/src",
+    ],
+    linkstatic = True,
+    # Headers must be in textual_hdrs to allow us to set the standard to C99
+    textual_hdrs = [
+        "include/cpuinfo.h",
+        "src/linux/api.h",
+        "src/mach/api.h",
+        "src/cpuinfo/common.h",
+        "src/cpuinfo/internal-api.h",
+        "src/cpuinfo/log.h",
+        "src/cpuinfo/utils.h",
+        "src/x86/api.h",
+        "src/x86/cpuid.h",
+        "src/x86/linux/api.h",
+        "src/arm/android/api.h",
+        "src/arm/linux/api.h",
+        "src/arm/linux/cp.h",
+        "src/arm/api.h",
+        "src/arm/midr.h",
+    ],
+    deps = [
+        "@clog",
+    ],
+)
+
+cc_library(
+    name = "cpuinfo",
+    hdrs = [
+        "include/cpuinfo.h",
+    ],
+    strip_include_prefix = "include",
+    deps = [
+        ":cpuinfo_impl",
+    ],
+)
+
+############################# Build configurations #############################
+
+config_setting(
+    name = "linux_x86_64",
+    values = {"cpu": "k8"},
+    visibility = ["//visibility:public"],
+)
+
+config_setting(
+    name = "android_armv7",
+    values = {
+        "crosstool_top": "//external:android/crosstool",
+        "cpu": "armeabi-v7a",
+    },
+    visibility = ["//visibility:public"],
+)
+
+config_setting(
+    name = "android_arm64",
+    values = {
+        "crosstool_top": "//external:android/crosstool",
+        "cpu": "arm64-v8a",
+    },
+    visibility = ["//visibility:public"],
+)
+
+config_setting(
+    name = "android_x86",
+    values = {
+        "crosstool_top": "//external:android/crosstool",
+        "cpu": "x86",
+    },
+    visibility = ["//visibility:public"],
+)
+
+config_setting(
+    name = "android_x86_64",
+    values = {
+        "crosstool_top": "//external:android/crosstool",
+        "cpu": "x86_64",
+    },
+    visibility = ["//visibility:public"],
+)
+
+config_setting(
+    name = "emscripten_wasm",
+    values = {
+        "cpu": "wasm",
+    },
+)
+
+config_setting(
+    name = "emscripten_wasmsimd",
+    values = {
+        "cpu": "wasm",
+        "features": "wasm_simd",
+    },
+)
+
+config_setting(
+    name = "emscripten_asmjs",
+    values = {
+        "cpu": "asmjs",
+    },
+)
diff --git a/third_party/psimd.BUILD b/third_party/psimd.BUILD
new file mode 100644
index 0000000..fe10181
--- /dev/null
+++ b/third_party/psimd.BUILD
@@ -0,0 +1,15 @@
+# Description:
+#   Portable 128-bit SIMD intrinsics
+
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"])
+
+exports_files(["LICENSE"])
+
+cc_library(
+    name = "psimd",
+    hdrs = glob(["include/psimd.h"]),
+    includes = ["include"],
+    strip_include_prefix = "include",
+)
diff --git a/third_party/pthreadpool.BUILD b/third_party/pthreadpool.BUILD
new file mode 100644
index 0000000..1267e4f
--- /dev/null
+++ b/third_party/pthreadpool.BUILD
@@ -0,0 +1,32 @@
+# Description:
+#   Portable pthread-based thread pool for C and C++
+
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"])
+
+exports_files(["LICENSE"])
+
+cc_library(
+    name = "pthreadpool",
+    srcs = [
+        "src/threadpool-pthreads.c",
+        "src/threadpool-utils.h",
+    ],
+    hdrs = [
+        "include/pthreadpool.h",
+    ],
+    copts = [
+        "-O2",
+    ],
+    defines = [
+        "PTHREADPOOL_NO_DEPRECATED_API",
+    ],
+    includes = [
+        "include",
+    ],
+    strip_include_prefix = "include",
+    deps = [
+        "@FXdiv",
+    ],
+)