Bazel build of pytorch with gating CI (#36011)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/36011
Differential Revision: D20873430
Pulled By: malfet
fbshipit-source-id: 8ffffd10ca0ff8bdab578a70a9b2b777aed985d0
diff --git a/.bazelrc b/.bazelrc
new file mode 100644
index 0000000..bb94f9a
--- /dev/null
+++ b/.bazelrc
@@ -0,0 +1,3 @@
+build --copt=--std=c++14
+build --copt=-I.
+build --copt=-isystem --copt bazel-out/k8-fastbuild/bin
diff --git a/.bazelversion b/.bazelversion
new file mode 100644
index 0000000..ccbccc3
--- /dev/null
+++ b/.bazelversion
@@ -0,0 +1 @@
+2.2.0
diff --git a/.circleci/cimodel/data/pytorch_build_definitions.py b/.circleci/cimodel/data/pytorch_build_definitions.py
index 2d90341..b042d8d 100644
--- a/.circleci/cimodel/data/pytorch_build_definitions.py
+++ b/.circleci/cimodel/data/pytorch_build_definitions.py
@@ -187,9 +187,9 @@
root = get_root()
found_configs = conf_tree.dfs(root)
- restrict_phases = None
for fc in found_configs:
+ restrict_phases = None
distro_name = fc.find_prop("distro_name")
compiler_name = fc.find_prop("compiler_name")
compiler_version = fc.find_prop("compiler_version")
diff --git a/.circleci/config.yml b/.circleci/config.yml
index c9aeb2e..be2a2d1 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -1557,6 +1557,77 @@
cd ${PROJ_ROOT}/ios/TestApp
instruments -s -devices
fastlane scan
+ pytorch_linux_bazel_build:
+ <<: *pytorch_params
+ machine:
+ image: ubuntu-1604:201903-01
+ steps:
+ # See Note [Workspace for CircleCI scripts] in job-specs-setup.yml
+ - attach_scripts
+ - setup_linux_system_environment
+ - checkout
+ - setup_ci_environment
+ - run:
+ name: Bazel Build
+ no_output_timeout: "1h"
+ command: |
+ set -e
+ # Pull Docker image and run build
+ echo "DOCKER_IMAGE: "${DOCKER_IMAGE}
+ time docker pull ${DOCKER_IMAGE} >/dev/null
+ export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${DOCKER_IMAGE})
+
+ echo "Do NOT merge master branch into $CIRCLE_BRANCH in environment $BUILD_ENVIRONMENT"
+
+ git submodule sync && git submodule update -q --init --recursive
+
+ docker cp /home/circleci/project/. $id:/var/lib/jenkins/workspace
+
+ export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && .jenkins/pytorch/build.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
+
+ echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
+
+ # Push intermediate Docker image for next phase to use
+ if [ -z "${BUILD_ONLY}" ]; then
+ # Augment our output image name with bazel to avoid collisions
+ output_image=${DOCKER_IMAGE}-bazel-${CIRCLE_SHA1}
+ export COMMIT_DOCKER_IMAGE=$output_image
+ docker commit "$id" ${COMMIT_DOCKER_IMAGE}
+ time docker push ${COMMIT_DOCKER_IMAGE}
+ fi
+
+ pytorch_linux_bazel_test:
+ <<: *pytorch_params
+ machine:
+ image: ubuntu-1604:201903-01
+ steps:
+ # See Note [Workspace for CircleCI scripts] in job-specs-setup.yml
+ - attach_scripts
+ - setup_linux_system_environment
+ - setup_ci_environment
+ - run:
+ name: Test
+ no_output_timeout: "90m"
+ command: |
+ set -e
+ output_image=${DOCKER_IMAGE}-bazel-${CIRCLE_SHA1}
+ export COMMIT_DOCKER_IMAGE=$output_image
+ echo "DOCKER_IMAGE: "${COMMIT_DOCKER_IMAGE}
+
+ time docker pull ${COMMIT_DOCKER_IMAGE} >/dev/null
+
+ if [ -n "${USE_CUDA_DOCKER_RUNTIME}" ]; then
+ export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --runtime=nvidia -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
+ else
+ export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
+ fi
+
+ if [[ ${BUILD_ENVIRONMENT} == *"multigpu"* ]]; then
+ export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && .jenkins/pytorch/multigpu-test.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
+ else
+ export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "export CIRCLE_PULL_REQUEST=${CIRCLE_PULL_REQUEST}" && echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && .jenkins/pytorch/test.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
+ fi
+ echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
promote_s3:
<<: *promote_common
@@ -2438,6 +2509,20 @@
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7:e43973a9-9d5a-4138-9181-a08a0fc55e2f"
use_cuda_docker_runtime: "1"
resource_class: gpu.medium
+ - pytorch_linux_bazel_build:
+ name: pytorch_bazel_build
+ requires:
+ - setup
+ build_environment: "pytorch-linux-xenial-py3.6-gcc7-bazel-build"
+ docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc7:f990c76a-a798-42bb-852f-5be5006f8026"
+ resource_class: large
+ - pytorch_linux_bazel_test:
+ name: pytorch_bazel_test
+ requires:
+ - setup
+ - pytorch_bazel_build
+ build_environment: "pytorch-linux-xenial-py3.6-gcc7-bazel-test"
+ docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc7:f990c76a-a798-42bb-852f-5be5006f8026"
- caffe2_linux_build:
name: caffe2_onnx_main_py3_6_clang7_ubuntu16_04_build
requires:
diff --git a/.circleci/generate_config_yml.py b/.circleci/generate_config_yml.py
index d0cd80f..a6f2d4b 100755
--- a/.circleci/generate_config_yml.py
+++ b/.circleci/generate_config_yml.py
@@ -101,6 +101,7 @@
File("workflows-pytorch-ios-builds.yml"),
File("workflows-pytorch-mobile-builds.yml"),
File("workflows-pytorch-ge-config-tests.yml"),
+ File("workflows-pytorch-bazel-builds.yml"),
Listgen(caffe2_build_definitions.get_workflow_jobs, 3),
File("workflows-binary-builds-smoke-subset.yml"),
Listgen(binary_build_definitions.get_binary_smoke_test_jobs, 3),
diff --git a/.circleci/verbatim-sources/job-specs-custom.yml b/.circleci/verbatim-sources/job-specs-custom.yml
index 2e167b3..dfda514 100644
--- a/.circleci/verbatim-sources/job-specs-custom.yml
+++ b/.circleci/verbatim-sources/job-specs-custom.yml
@@ -440,3 +440,74 @@
cd ${PROJ_ROOT}/ios/TestApp
instruments -s -devices
fastlane scan
+ pytorch_linux_bazel_build:
+ <<: *pytorch_params
+ machine:
+ image: ubuntu-1604:201903-01
+ steps:
+ # See Note [Workspace for CircleCI scripts] in job-specs-setup.yml
+ - attach_scripts
+ - setup_linux_system_environment
+ - checkout
+ - setup_ci_environment
+ - run:
+ name: Bazel Build
+ no_output_timeout: "1h"
+ command: |
+ set -e
+ # Pull Docker image and run build
+ echo "DOCKER_IMAGE: "${DOCKER_IMAGE}
+ time docker pull ${DOCKER_IMAGE} >/dev/null
+ export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${DOCKER_IMAGE})
+
+ echo "Do NOT merge master branch into $CIRCLE_BRANCH in environment $BUILD_ENVIRONMENT"
+
+ git submodule sync && git submodule update -q --init --recursive
+
+ docker cp /home/circleci/project/. $id:/var/lib/jenkins/workspace
+
+ export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && .jenkins/pytorch/build.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
+
+ echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
+
+ # Push intermediate Docker image for next phase to use
+ if [ -z "${BUILD_ONLY}" ]; then
+ # Augment our output image name with bazel to avoid collisions
+ output_image=${DOCKER_IMAGE}-bazel-${CIRCLE_SHA1}
+ export COMMIT_DOCKER_IMAGE=$output_image
+ docker commit "$id" ${COMMIT_DOCKER_IMAGE}
+ time docker push ${COMMIT_DOCKER_IMAGE}
+ fi
+
+ pytorch_linux_bazel_test:
+ <<: *pytorch_params
+ machine:
+ image: ubuntu-1604:201903-01
+ steps:
+ # See Note [Workspace for CircleCI scripts] in job-specs-setup.yml
+ - attach_scripts
+ - setup_linux_system_environment
+ - setup_ci_environment
+ - run:
+ name: Test
+ no_output_timeout: "90m"
+ command: |
+ set -e
+ output_image=${DOCKER_IMAGE}-bazel-${CIRCLE_SHA1}
+ export COMMIT_DOCKER_IMAGE=$output_image
+ echo "DOCKER_IMAGE: "${COMMIT_DOCKER_IMAGE}
+
+ time docker pull ${COMMIT_DOCKER_IMAGE} >/dev/null
+
+ if [ -n "${USE_CUDA_DOCKER_RUNTIME}" ]; then
+ export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --runtime=nvidia -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
+ else
+ export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
+ fi
+
+ if [[ ${BUILD_ENVIRONMENT} == *"multigpu"* ]]; then
+ export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && .jenkins/pytorch/multigpu-test.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
+ else
+ export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "export CIRCLE_PULL_REQUEST=${CIRCLE_PULL_REQUEST}" && echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && .jenkins/pytorch/test.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
+ fi
+ echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
diff --git a/.circleci/verbatim-sources/workflows-pytorch-bazel-builds.yml b/.circleci/verbatim-sources/workflows-pytorch-bazel-builds.yml
new file mode 100644
index 0000000..825ea50
--- /dev/null
+++ b/.circleci/verbatim-sources/workflows-pytorch-bazel-builds.yml
@@ -0,0 +1,14 @@
+ - pytorch_linux_bazel_build:
+ name: pytorch_bazel_build
+ requires:
+ - setup
+ build_environment: "pytorch-linux-xenial-py3.6-gcc7-bazel-build"
+ docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc7:f990c76a-a798-42bb-852f-5be5006f8026"
+ resource_class: large
+ - pytorch_linux_bazel_test:
+ name: pytorch_bazel_test
+ requires:
+ - setup
+ - pytorch_bazel_build
+ build_environment: "pytorch-linux-xenial-py3.6-gcc7-bazel-test"
+ docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc7:f990c76a-a798-42bb-852f-5be5006f8026"
diff --git a/.gitignore b/.gitignore
index c27e2b8..380ff11 100644
--- a/.gitignore
+++ b/.gitignore
@@ -255,3 +255,6 @@
# clangd background index
.clangd/
+
+# bazel symlinks
+bazel-*
diff --git a/.jenkins/pytorch/build.sh b/.jenkins/pytorch/build.sh
index 91dd0e1..1de48d9 100755
--- a/.jenkins/pytorch/build.sh
+++ b/.jenkins/pytorch/build.sh
@@ -180,65 +180,72 @@
export CXX=clang++
fi
+if [[ "$BUILD_ENVIRONMENT" == *-bazel-* ]]; then
+ set -e
-# check that setup.py would fail with bad arguments
-echo "The next three invocations are expected to fail with invalid command error messages."
-( ! get_exit_code python setup.py bad_argument )
-( ! get_exit_code python setup.py clean] )
-( ! get_exit_code python setup.py clean bad_argument )
+ get_bazel
-if [[ "$BUILD_ENVIRONMENT" != *libtorch* ]]; then
-
- # ppc64le build fails when WERROR=1
- # set only when building other architectures
- # only use for "python setup.py install" line
- if [[ "$BUILD_ENVIRONMENT" != *ppc64le* && "$BUILD_ENVIRONMENT" != *clang* ]]; then
- WERROR=1 python setup.py install
- else
- python setup.py install
- fi
-
- # TODO: I'm not sure why, but somehow we lose verbose commands
- set -x
-
- if which sccache > /dev/null; then
- echo 'PyTorch Build Statistics'
- sccache --show-stats
- fi
-
- assert_git_not_dirty
-
- # Build custom operator tests.
- CUSTOM_OP_BUILD="$PWD/../custom-op-build"
- CUSTOM_OP_TEST="$PWD/test/custom_operator"
- python --version
- SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
- mkdir "$CUSTOM_OP_BUILD"
- pushd "$CUSTOM_OP_BUILD"
- cmake "$CUSTOM_OP_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES/torch" -DPYTHON_EXECUTABLE="$(which python)"
- make VERBOSE=1
- popd
- assert_git_not_dirty
+ tools/bazel build :torch
else
- # Test standalone c10 build
- if [[ "$BUILD_ENVIRONMENT" == *xenial-cuda10.1-cudnn7-py3* ]]; then
- mkdir -p c10/build
- pushd c10/build
- cmake ..
- make -j
+ # check that setup.py would fail with bad arguments
+ echo "The next three invocations are expected to fail with invalid command error messages."
+ ( ! get_exit_code python setup.py bad_argument )
+ ( ! get_exit_code python setup.py clean] )
+ ( ! get_exit_code python setup.py clean bad_argument )
+
+ if [[ "$BUILD_ENVIRONMENT" != *libtorch* ]]; then
+
+ # ppc64le build fails when WERROR=1
+ # set only when building other architectures
+ # only use for "python setup.py install" line
+ if [[ "$BUILD_ENVIRONMENT" != *ppc64le* && "$BUILD_ENVIRONMENT" != *clang* ]]; then
+ WERROR=1 python setup.py install
+ else
+ python setup.py install
+ fi
+
+ # TODO: I'm not sure why, but somehow we lose verbose commands
+ set -x
+
+ if which sccache > /dev/null; then
+ echo 'PyTorch Build Statistics'
+ sccache --show-stats
+ fi
+
+ assert_git_not_dirty
+
+ # Build custom operator tests.
+ CUSTOM_OP_BUILD="$PWD/../custom-op-build"
+ CUSTOM_OP_TEST="$PWD/test/custom_operator"
+ python --version
+ SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
+ mkdir "$CUSTOM_OP_BUILD"
+ pushd "$CUSTOM_OP_BUILD"
+ cmake "$CUSTOM_OP_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES/torch" -DPYTHON_EXECUTABLE="$(which python)"
+ make VERBOSE=1
popd
assert_git_not_dirty
- fi
+ else
+ # Test standalone c10 build
+ if [[ "$BUILD_ENVIRONMENT" == *xenial-cuda10.1-cudnn7-py3* ]]; then
+ mkdir -p c10/build
+ pushd c10/build
+ cmake ..
+ make -j
+ popd
+ assert_git_not_dirty
+ fi
- # Test no-Python build
- echo "Building libtorch"
- # NB: Install outside of source directory (at the same level as the root
- # pytorch folder) so that it doesn't get cleaned away prior to docker push.
- BUILD_LIBTORCH_PY=$PWD/tools/build_libtorch.py
- mkdir -p ../cpp-build/caffe2
- pushd ../cpp-build/caffe2
- WERROR=1 VERBOSE=1 DEBUG=1 python $BUILD_LIBTORCH_PY
- popd
+ # Test no-Python build
+ echo "Building libtorch"
+ # NB: Install outside of source directory (at the same level as the root
+ # pytorch folder) so that it doesn't get cleaned away prior to docker push.
+ BUILD_LIBTORCH_PY=$PWD/tools/build_libtorch.py
+ mkdir -p ../cpp-build/caffe2
+ pushd ../cpp-build/caffe2
+ WERROR=1 VERBOSE=1 DEBUG=1 python $BUILD_LIBTORCH_PY
+ popd
+ fi
fi
# Test XLA build
diff --git a/.jenkins/pytorch/common.sh b/.jenkins/pytorch/common.sh
index 60cc647..38519c1 100644
--- a/.jenkins/pytorch/common.sh
+++ b/.jenkins/pytorch/common.sh
@@ -187,3 +187,12 @@
set -e
git diff --name-only "$(git merge-base origin master HEAD)" > "$1"
}
+
+function get_bazel() {
+ # download bazel version
+ wget https://github.com/bazelbuild/bazel/releases/download/2.2.0/bazel-2.2.0-linux-x86_64 -O tools/bazel
+ # verify content
+ echo 'b2f002ea0e6194a181af6ac84cd94bd8dc797722eb2354690bebac92dda233ff tools/bazel' | sha256sum --quiet -c
+
+ chmod +x tools/bazel
+}
diff --git a/.jenkins/pytorch/test.sh b/.jenkins/pytorch/test.sh
index 9119880..145041e 100755
--- a/.jenkins/pytorch/test.sh
+++ b/.jenkins/pytorch/test.sh
@@ -42,7 +42,7 @@
fi
# --user breaks ppc64le builds and these packages are already in ppc64le docker
-if [[ "$BUILD_ENVIRONMENT" != *ppc64le* ]]; then
+if [[ "$BUILD_ENVIRONMENT" != *ppc64le* ]] && [[ "$BUILD_ENVIRONMENT" != *-bazel-* ]] ; then
# JIT C++ extensions require ninja.
pip_install --user ninja
# ninja is installed in /var/lib/jenkins/.local/bin
@@ -252,7 +252,15 @@
assert_git_not_dirty
}
-if ! [[ "${BUILD_ENVIRONMENT}" == *libtorch* ]]; then
+test_bazel() {
+ set -e
+
+ get_bazel
+
+ tools/bazel test --test_tag_filters=-gpu-required --test_filter=-*_CUDA :all_tests
+}
+
+if ! [[ "${BUILD_ENVIRONMENT}" == *libtorch* || "${BUILD_ENVIRONMENT}" == *-bazel-* ]]; then
(cd test && python -c "import torch; print(torch.__config__.show())")
(cd test && python -c "import torch; print(torch.__config__.parallel_info())")
fi
@@ -278,6 +286,8 @@
test_aten
test_libtorch
test_custom_script_ops
+elif [[ "${BUILD_ENVIRONMENT}" == *-bazel-* ]]; then
+ test_bazel
else
test_torchvision
test_python_nn
diff --git a/BUILD.bazel b/BUILD.bazel
new file mode 100644
index 0000000..7fa034b
--- /dev/null
+++ b/BUILD.bazel
@@ -0,0 +1,2499 @@
+load("@rules_proto//proto:defs.bzl", "proto_library")
+load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library", "cc_proto_library", "cc_test")
+load("//third_party:substitution.bzl", "template_rule")
+load("//tools/rules:cu.bzl", "cu_library")
+load("//tools/config:defs.bzl", "if_cuda")
+load("//:aten.bzl", "intern_build_aten_ops")
+
+COMMON_COPTS = [
+ "-DHAVE_MALLOC_USABLE_SIZE=1",
+ "-DHAVE_MMAP=1",
+ "-DHAVE_SHM_OPEN=1",
+ "-DHAVE_SHM_UNLINK=1",
+ "-D_FILE_OFFSET_BITS=64",
+ "-DHAVE_GCC_GET_CPUID",
+ "-DUSE_GCC_GET_CPUID",
+ "-DTH_HAVE_THREAD",
+ "-DUSE_FBGEMM",
+ "-DUSE_DISTRIBUTED",
+ "-DAT_PARALLEL_NATIVE=1",
+ "-DATEN_THREADING=NATIVE",
+ "-DNO_CUDNN_DESTROY_HANDLE",
+] + if_cuda([
+ "-DUSE_CUDA",
+ "-DUSE_CUDNN",
+])
+
+# c10
+template_rule(
+ name = "cmake_macros_h",
+ src = "c10/macros/cmake_macros.h.in",
+ out = "c10/macros/cmake_macros.h",
+ substitutions = {
+ "cmakedefine": "define",
+ "#define FEATURE_TORCH_MOBILE": "/* #undef FEATURE_TORCH_MOBILE */",
+ "#define USE_STATIC_DISPATCH": "/* #undef USE_STATIC_DISPATCH */",
+ "#define C10_USE_NUMA": "/* #undef C10_USE_NUMA */",
+ },
+)
+
+template_rule(
+ name = "cuda_cmake_macros_h",
+ src = "c10/cuda/impl/cuda_cmake_macros.h.in",
+ out = "c10/cuda/impl/cuda_cmake_macros.h",
+ substitutions = {
+ "cmakedefine": "define",
+ },
+)
+
+cc_library(
+ name = "c10_headers",
+ hdrs = glob([
+ "c10/core/*.h",
+ "c10/core/impl/*.h",
+ "c10/cuda/*.h",
+ "c10/cuda/impl/*.h",
+ "c10/macros/*.h",
+ "c10/util/*.h",
+ ]) + [
+ "c10/macros/cmake_macros.h",
+ "c10/cuda/impl/cuda_cmake_macros.h",
+ ],
+ deps = [
+ "@com_github_gflags_gflags//:gflags",
+ "@com_github_glog//:glog",
+ ],
+)
+
+cc_library(
+ name = "c10",
+ srcs = glob([
+ "c10/core/*.cpp",
+ "c10/core/impl/*.cpp",
+ "c10/util/*.cpp",
+ ]) + if_cuda(
+ glob([
+ "c10/cuda/*.cpp",
+ "c10/cuda/impl/*.cpp",
+ ]),
+ [],
+ ),
+ copts = ["-DCAFFE2_BUILD_MAIN_LIB"],
+ deps = [
+ ":c10_headers",
+ ] + if_cuda(
+ ["@cuda"],
+ [],
+ ),
+ alwayslink = True,
+)
+
+cc_test(
+ name = "c10_tests",
+ size = "small",
+ srcs = glob([
+ "c10/test/util/*.cpp",
+ "c10/test/util/*.h",
+ "c10/test/core/*.cpp",
+ "c10/test/core/impl/*.cpp",
+ ]),
+ copts = ["-Wno-deprecated-declarations"],
+ deps = [
+ ":c10",
+ ":c10_headers",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+py_binary(
+ name = "gen",
+ srcs = ["aten/src/ATen/gen.py"],
+)
+
+genrule(
+ name = "generated_cpp",
+ srcs = [
+ "aten/src/ATen/Declarations.cwrap",
+ "aten/src/THCUNN/generic/THCUNN.h",
+ "aten/src/ATen/nn.yaml",
+ "aten/src/ATen/native/native_functions.yaml",
+ ] + glob(["aten/src/ATen/templates/**"]),
+ outs = [
+ "aten/src/ATen/Declarations.yaml",
+ "aten/src/ATen/CPUType.h",
+ "aten/src/ATen/CPUType.cpp",
+ "aten/src/ATen/Functions.h",
+ "aten/src/ATen/LegacyTHFunctionsCPU.h",
+ "aten/src/ATen/LegacyTHFunctionsCPU.cpp",
+ "aten/src/ATen/NativeFunctions.h",
+ "aten/src/ATen/MkldnnCPUType.h",
+ "aten/src/ATen/MkldnnCPUType.cpp",
+ "aten/src/ATen/QuantizedCPUType.h",
+ "aten/src/ATen/QuantizedCPUType.cpp",
+ "aten/src/ATen/SparseCPUType.h",
+ "aten/src/ATen/SparseCPUType.cpp",
+ "aten/src/ATen/TypeDefault.h",
+ "aten/src/ATen/TypeDefault.cpp",
+ "aten/src/ATen/core/TensorBody.h",
+ "aten/src/ATen/core/TensorMethods.h",
+ "aten/src/ATen/core/OpsAlreadyMovedToC10.cpp",
+ ],
+ cmd = "$(location :gen) --source-path aten/src/ATen --install_dir `dirname $(location aten/src/ATen/Declarations.yaml)` aten/src/ATen/Declarations.cwrap aten/src/THCUNN/generic/THCUNN.h aten/src/ATen/nn.yaml aten/src/ATen/native/native_functions.yaml",
+ tools = [":gen"],
+)
+
+py_library(
+ name = "code_template",
+ srcs = ["aten/src/ATen/code_template.py"],
+ imports = ["aten"],
+)
+
+py_library(
+ name = "tools_autograd",
+ srcs = glob(["tools/autograd/*.py"]),
+ data = glob([
+ "tools/autograd/*.yaml",
+ "tools/autograd/templates/*",
+ ]),
+ deps = [":code_template"],
+)
+
+py_library(
+ name = "tools_jit",
+ srcs = glob(["tools/jit/*.py"]),
+ data = glob(["tools/jit/templates/*"]),
+)
+
+py_binary(
+ name = "generate_code",
+ srcs = ["tools/setup_helpers/generate_code.py"],
+ deps = [
+ ":tools_autograd",
+ ":tools_jit",
+ ],
+)
+
+genrule(
+ name = "generated_code",
+ srcs = [
+ "aten/src/ATen/Declarations.yaml",
+ ],
+ outs = [
+ "torch/csrc/autograd/generated/python_functions.h",
+ "torch/csrc/autograd/generated/python_functions.cpp",
+ "torch/csrc/autograd/generated/python_variable_methods.cpp",
+ "torch/csrc/autograd/generated/python_torch_functions.cpp",
+ "torch/csrc/autograd/generated/python_nn_functions.cpp",
+ "torch/csrc/autograd/generated/VariableType.h",
+ "torch/csrc/autograd/generated/VariableType_0.cpp",
+ "torch/csrc/autograd/generated/VariableType_1.cpp",
+ "torch/csrc/autograd/generated/VariableType_2.cpp",
+ "torch/csrc/autograd/generated/VariableType_3.cpp",
+ "torch/csrc/autograd/generated/VariableType_4.cpp",
+ # "torch/csrc/autograd/generated/VariableTypeEverything.cpp",
+ "torch/csrc/autograd/generated/RegistrationDeclarations.h",
+ "torch/csrc/autograd/generated/Functions.h",
+ "torch/csrc/autograd/generated/Functions.cpp",
+ "torch/csrc/autograd/generated/variable_factories.h",
+ "torch/csrc/jit/generated/register_aten_ops_0.cpp",
+ "torch/csrc/jit/generated/register_aten_ops_1.cpp",
+ "torch/csrc/jit/generated/register_aten_ops_2.cpp",
+ ],
+ cmd = "$(location :generate_code) --install_dir `dirname $(location torch/csrc/autograd/generated/variable_factories.h)`/../.. --declarations-path $(location aten/src/ATen/Declarations.yaml) --nn-path aten/src",
+ tools = [":generate_code"],
+)
+
+exports_files(
+ srcs = ["aten/src/ATen/cpu/tbb/extra/version_string.ver.in"],
+)
+
+# ATen
+filegroup(
+ name = "aten_base_cpp",
+ srcs = glob([
+ "aten/src/ATen/*.cpp",
+ "aten/src/ATen/detail/*.cpp",
+ "aten/src/ATen/cpu/*.cpp",
+ ]),
+)
+
+filegroup(
+ name = "ATen_CORE_SRCS",
+ srcs = glob(
+ [
+ "aten/src/ATen/core/**/*.cpp",
+ ],
+ exclude = [
+ "aten/src/ATen/core/**/*_test.cpp",
+ ],
+ ),
+)
+
+filegroup(
+ name = "aten_native_cpp",
+ srcs = glob(["aten/src/ATen/native/*.cpp"]),
+)
+
+filegroup(
+ name = "aten_native_sparse_cpp",
+ srcs = glob(["aten/src/ATen/native/sparse/*.cpp"]),
+)
+
+filegroup(
+ name = "aten_native_quantized_cpp",
+ srcs = glob(
+ [
+ "aten/src/ATen/native/quantized/*.cpp",
+ "aten/src/ATen/native/quantized/cpu/*.cpp",
+ ],
+ ),
+)
+
+filegroup(
+ name = "aten_native_mkl_cpp",
+ srcs = glob(["aten/src/ATen/native/mkl/*.cpp"]),
+)
+
+filegroup(
+ name = "aten_native_mkldnn_cpp",
+ srcs = glob(["aten/src/ATen/native/mkldnn/*.cpp"]),
+)
+
+filegroup(
+ name = "aten_native_xnnpack",
+ srcs = glob(["aten/src/ATen/native/xnnpack/*.cpp"]),
+)
+
+filegroup(
+ name = "ATen_QUANTIZED_SRCS",
+ srcs = glob(
+ [
+ "aten/src/ATen/quantized/**/*.cpp",
+ ],
+ exclude = [
+ "aten/src/ATen/quantized/**/*_test.cpp",
+ ],
+ ),
+)
+
+filegroup(
+ name = "th_srcs",
+ srcs = [
+ "aten/src/TH/THAllocator.cpp",
+ "aten/src/TH/THBlas.cpp",
+ "aten/src/TH/THDiskFile.cpp",
+ "aten/src/TH/THFile.cpp",
+ "aten/src/TH/THGeneral.cpp",
+ "aten/src/TH/THLapack.cpp",
+ "aten/src/TH/THMemoryFile.cpp",
+ "aten/src/TH/THStorageFunctions.cpp",
+ "aten/src/TH/THTensor.cpp",
+ "aten/src/TH/THTensorEvenMoreMath.cpp",
+ "aten/src/TH/THTensorFill.cpp",
+ "aten/src/TH/THTensorLapack.cpp",
+ "aten/src/TH/THTensorMath.cpp",
+ "aten/src/TH/THTensorMoreMath.cpp",
+ "aten/src/TH/THTensorRandom.cpp",
+ "aten/src/TH/THVector.cpp",
+ "aten/src/TH/vector/AVX.cpp",
+ ],
+)
+
+filegroup(
+ name = "aten_cuda_srcs",
+ srcs = [
+ "aten/src/ATen/cuda/CUDABlas.cpp",
+ "aten/src/ATen/cuda/CUDAContext.cpp",
+ "aten/src/ATen/cuda/CUDAGenerator.cpp",
+ "aten/src/ATen/cuda/CuSparseHandlePool.cpp",
+ "aten/src/ATen/cuda/CublasHandlePool.cpp",
+ "aten/src/ATen/cuda/PinnedMemoryAllocator.cpp",
+ "aten/src/ATen/cuda/detail/CUDAHooks.cpp",
+ "aten/src/ATen/cudnn/Descriptors.cpp",
+ "aten/src/ATen/cudnn/Handle.cpp",
+ "aten/src/ATen/cudnn/Types.cpp",
+ "aten/src/ATen/native/cuda/CUDAUnaryOps.cpp",
+ "aten/src/ATen/native/cuda/LegacyDefinitions.cpp",
+ "aten/src/ATen/native/cuda/TensorShapeCUDA.cpp",
+ "aten/src/ATen/native/cudnn/AffineGridGenerator.cpp",
+ "aten/src/ATen/native/cudnn/BatchNorm.cpp",
+ "aten/src/ATen/native/cudnn/Conv.cpp",
+ "aten/src/ATen/native/cudnn/GridSampler.cpp",
+ "aten/src/ATen/native/cudnn/LossCTC.cpp",
+ "aten/src/ATen/native/cudnn/RNN.cpp",
+ "aten/src/ATen/native/miopen/BatchNorm_miopen.cpp",
+ "aten/src/ATen/native/miopen/Conv_miopen.cpp",
+ "aten/src/ATen/native/miopen/RNN_miopen.cpp",
+ "aten/src/ATen/native/sparse/cuda/SparseCUDATensor.cpp",
+ "aten/src/THC/THCCachingHostAllocator.cpp",
+ "aten/src/THC/THCGeneral.cpp",
+ "aten/src/THC/THCStorageCopy.cpp",
+ "aten/src/THC/THCTensor.cpp",
+ ],
+)
+
+filegroup(
+ name = "thc_srcs_cu",
+ srcs = [
+ "aten/src/THC/THCBlas.cu.cc",
+ "aten/src/THC/THCReduceApplyUtils.cu.cc",
+ "aten/src/THC/THCSleep.cu.cc",
+ "aten/src/THC/THCSortUtils.cu.cc",
+ "aten/src/THC/THCStorage.cu.cc",
+ "aten/src/THC/THCStorageCopy.cu.cc",
+ "aten/src/THC/THCTensor.cu.cc",
+ "aten/src/THC/THCTensorCopy.cu.cc",
+ "aten/src/THC/THCTensorIndex.cu.cc",
+ "aten/src/THC/THCTensorMath.cu.cc",
+ "aten/src/THC/THCTensorMathBlas.cu.cc",
+ "aten/src/THC/THCTensorMathMagma.cu.cc",
+ "aten/src/THC/THCTensorMathPairwise.cu.cc",
+ "aten/src/THC/THCTensorMathReduce.cu.cc",
+ "aten/src/THC/THCTensorMathScan.cu.cc",
+ "aten/src/THC/THCTensorMode.cu.cc",
+ "aten/src/THC/THCTensorRandom.cu.cc",
+ "aten/src/THC/THCTensorScatterGather.cu.cc",
+ "aten/src/THC/THCTensorSort.cu.cc",
+ "aten/src/THC/THCTensorTopK.cu.cc",
+ "aten/src/THC/generated/THCTensorMaskedBFloat16.cu.cc",
+ "aten/src/THC/generated/THCTensorMaskedBool.cu.cc",
+ "aten/src/THC/generated/THCTensorMaskedByte.cu.cc",
+ "aten/src/THC/generated/THCTensorMaskedChar.cu.cc",
+ "aten/src/THC/generated/THCTensorMaskedDouble.cu.cc",
+ "aten/src/THC/generated/THCTensorMaskedFloat.cu.cc",
+ "aten/src/THC/generated/THCTensorMaskedHalf.cu.cc",
+ "aten/src/THC/generated/THCTensorMaskedInt.cu.cc",
+ "aten/src/THC/generated/THCTensorMaskedLong.cu.cc",
+ "aten/src/THC/generated/THCTensorMaskedShort.cu.cc",
+ "aten/src/THC/generated/THCTensorMathPointwiseBool.cu.cc",
+ "aten/src/THC/generated/THCTensorMathPointwiseByte.cu.cc",
+ "aten/src/THC/generated/THCTensorMathPointwiseChar.cu.cc",
+ "aten/src/THC/generated/THCTensorMathPointwiseDouble.cu.cc",
+ "aten/src/THC/generated/THCTensorMathPointwiseFloat.cu.cc",
+ "aten/src/THC/generated/THCTensorMathPointwiseHalf.cu.cc",
+ "aten/src/THC/generated/THCTensorMathPointwiseInt.cu.cc",
+ "aten/src/THC/generated/THCTensorMathPointwiseLong.cu.cc",
+ "aten/src/THC/generated/THCTensorMathPointwiseShort.cu.cc",
+ "aten/src/THC/generated/THCTensorMathReduceBFloat16.cu.cc",
+ "aten/src/THC/generated/THCTensorMathReduceBool.cu.cc",
+ "aten/src/THC/generated/THCTensorMathReduceByte.cu.cc",
+ "aten/src/THC/generated/THCTensorMathReduceChar.cu.cc",
+ "aten/src/THC/generated/THCTensorMathReduceDouble.cu.cc",
+ "aten/src/THC/generated/THCTensorMathReduceFloat.cu.cc",
+ "aten/src/THC/generated/THCTensorMathReduceHalf.cu.cc",
+ "aten/src/THC/generated/THCTensorMathReduceInt.cu.cc",
+ "aten/src/THC/generated/THCTensorMathReduceLong.cu.cc",
+ "aten/src/THC/generated/THCTensorMathReduceShort.cu.cc",
+ "aten/src/THC/generated/THCTensorSortByte.cu.cc",
+ "aten/src/THC/generated/THCTensorSortChar.cu.cc",
+ "aten/src/THC/generated/THCTensorSortDouble.cu.cc",
+ "aten/src/THC/generated/THCTensorSortFloat.cu.cc",
+ "aten/src/THC/generated/THCTensorSortHalf.cu.cc",
+ "aten/src/THC/generated/THCTensorSortInt.cu.cc",
+ "aten/src/THC/generated/THCTensorSortLong.cu.cc",
+ "aten/src/THC/generated/THCTensorSortShort.cu.cc",
+ ],
+)
+
+filegroup(
+ name = "thcunn_srcs_cu",
+ srcs = [
+ "aten/src/THCUNN/BCECriterion.cu.cc",
+ "aten/src/THCUNN/ClassNLLCriterion.cu.cc",
+ "aten/src/THCUNN/ELU.cu.cc",
+ "aten/src/THCUNN/GatedLinearUnit.cu.cc",
+ "aten/src/THCUNN/HardTanh.cu.cc",
+ "aten/src/THCUNN/LeakyReLU.cu.cc",
+ "aten/src/THCUNN/LogSigmoid.cu.cc",
+ "aten/src/THCUNN/MultiLabelMarginCriterion.cu.cc",
+ "aten/src/THCUNN/MultiMarginCriterion.cu.cc",
+ "aten/src/THCUNN/RReLU.cu.cc",
+ "aten/src/THCUNN/SoftMarginCriterion.cu.cc",
+ "aten/src/THCUNN/SoftPlus.cu.cc",
+ "aten/src/THCUNN/SoftShrink.cu.cc",
+ "aten/src/THCUNN/SpatialClassNLLCriterion.cu.cc",
+ "aten/src/THCUNN/SpatialConvolutionMM.cu.cc",
+ "aten/src/THCUNN/SpatialDepthwiseConvolution.cu.cc",
+ "aten/src/THCUNN/Tanh.cu.cc",
+ ],
+)
+
+filegroup(
+ name = "aten_srcs_cu",
+ srcs = [
+ "aten/src/ATen/cuda/detail/IndexUtils.cu.cc",
+ "aten/src/ATen/native/cuda/Activation.cu.cc",
+ "aten/src/ATen/native/cuda/AdaptiveAveragePooling.cu.cc",
+ "aten/src/ATen/native/cuda/AdaptiveAveragePooling3d.cu.cc",
+ "aten/src/ATen/native/cuda/AdaptiveMaxPooling2d.cu.cc",
+ "aten/src/ATen/native/cuda/AdaptiveMaxPooling3d.cu.cc",
+ "aten/src/ATen/native/cuda/AveragePool2d.cu.cc",
+ "aten/src/ATen/native/cuda/AveragePool3d.cu.cc",
+ "aten/src/ATen/native/cuda/BatchLinearAlgebra.cu.cc",
+ "aten/src/ATen/native/cuda/BinaryArithmeticKernel.cu.cc",
+ "aten/src/ATen/native/cuda/BinaryCompareKernel.cu.cc",
+ "aten/src/ATen/native/cuda/BinaryMiscOpsKernels.cu.cc",
+ "aten/src/ATen/native/cuda/CUDAScalar.cu.cc",
+ "aten/src/ATen/native/cuda/Col2Im.cu.cc",
+ "aten/src/ATen/native/cuda/Copy.cu.cc",
+ "aten/src/ATen/native/cuda/CrossKernel.cu.cc",
+ "aten/src/ATen/native/cuda/DilatedMaxPool2d.cu.cc",
+ "aten/src/ATen/native/cuda/DilatedMaxPool3d.cu.cc",
+ "aten/src/ATen/native/cuda/DistanceKernel.cu.cc",
+ "aten/src/ATen/native/cuda/Distributions.cu.cc",
+ "aten/src/ATen/native/cuda/Dropout.cu.cc",
+ "aten/src/ATen/native/cuda/Embedding.cu.cc",
+ "aten/src/ATen/native/cuda/EmbeddingBackwardKernel.cu.cc",
+ "aten/src/ATen/native/cuda/EmbeddingBag.cu.cc",
+ "aten/src/ATen/native/cuda/FillKernel.cu.cc",
+ "aten/src/ATen/native/cuda/FractionalMaxPool2d.cu.cc",
+ "aten/src/ATen/native/cuda/FractionalMaxPool3d.cu.cc",
+ "aten/src/ATen/native/cuda/GridSampler.cu.cc",
+ "aten/src/ATen/native/cuda/Im2Col.cu.cc",
+ "aten/src/ATen/native/cuda/IndexKernel.cu.cc",
+ "aten/src/ATen/native/cuda/Indexing.cu.cc",
+ "aten/src/ATen/native/cuda/Lerp.cu.cc",
+ "aten/src/ATen/native/cuda/LinearAlgebra.cu.cc",
+ "aten/src/ATen/native/cuda/Loss.cu.cc",
+ "aten/src/ATen/native/cuda/LossCTC.cu.cc",
+ "aten/src/ATen/native/cuda/MaxUnpooling.cu.cc",
+ "aten/src/ATen/native/cuda/MultinomialKernel.cu.cc",
+ "aten/src/ATen/native/cuda/NaiveConvolutionTranspose2d.cu.cc",
+ "aten/src/ATen/native/cuda/NaiveConvolutionTranspose3d.cu.cc",
+ "aten/src/ATen/native/cuda/NaiveDilatedConvolution.cu.cc",
+ "aten/src/ATen/native/cuda/Normalization.cu.cc",
+ "aten/src/ATen/native/cuda/PointwiseOpsKernel.cu.cc",
+ "aten/src/ATen/native/cuda/PowKernel.cu.cc",
+ "aten/src/ATen/native/cuda/RNN.cu.cc",
+ "aten/src/ATen/native/cuda/RangeFactories.cu.cc",
+ "aten/src/ATen/native/cuda/Reduce.cu.cc",
+ "aten/src/ATen/native/cuda/ReduceOpsKernel.cu.cc",
+ "aten/src/ATen/native/cuda/ReflectionPad.cu.cc",
+ "aten/src/ATen/native/cuda/Repeat.cu.cc",
+ "aten/src/ATen/native/cuda/ReplicationPadding.cu.cc",
+ "aten/src/ATen/native/cuda/Resize.cu.cc",
+ "aten/src/ATen/native/cuda/SoftMax.cu.cc",
+ "aten/src/ATen/native/cuda/SortingKthValue.cu.cc",
+ "aten/src/ATen/native/cuda/SparseMM.cu.cc",
+ "aten/src/ATen/native/cuda/SpectralOps.cu.cc",
+ "aten/src/ATen/native/cuda/SummaryOps.cu.cc",
+ "aten/src/ATen/native/cuda/TensorCompare.cu.cc",
+ "aten/src/ATen/native/cuda/TensorFactories.cu.cc",
+ "aten/src/ATen/native/cuda/TensorTransformations.cu.cc",
+ "aten/src/ATen/native/cuda/TriangularOps.cu.cc",
+ "aten/src/ATen/native/cuda/UnaryOpsKernel.cu.cc",
+ "aten/src/ATen/native/cuda/Unique.cu.cc",
+ "aten/src/ATen/native/cuda/UpSampleBicubic2d.cu.cc",
+ "aten/src/ATen/native/cuda/UpSampleBilinear2d.cu.cc",
+ "aten/src/ATen/native/cuda/UpSampleLinear1d.cu.cc",
+ "aten/src/ATen/native/cuda/UpSampleNearest1d.cu.cc",
+ "aten/src/ATen/native/cuda/UpSampleNearest2d.cu.cc",
+ "aten/src/ATen/native/cuda/UpSampleNearest3d.cu.cc",
+ "aten/src/ATen/native/cuda/UpSampleTrilinear3d.cu.cc",
+ "aten/src/ATen/native/cuda/WeightNorm.cu.cc",
+ "aten/src/ATen/native/cuda/layer_norm_kernel.cu.cc",
+ "aten/src/ATen/native/quantized/cuda/fake_quantize_core.cu.cc",
+ "aten/src/ATen/native/sparse/cuda/SparseCUDABlas.cu.cc",
+ "aten/src/ATen/native/sparse/cuda/SparseCUDATensor.cu.cc",
+ "aten/src/ATen/native/sparse/cuda/SparseCUDATensorMath.cu.cc",
+ ],
+)
+
+template_rule(
+ name = "aten_src_ATen_config",
+ src = "aten/src/ATen/Config.h.in",
+ out = "aten/src/ATen/Config.h",
+ substitutions = {
+ "@AT_MKLDNN_ENABLED@": "1",
+ "@AT_MKL_ENABLED@": "0",
+ "@AT_NNPACK_ENABLED@": "0",
+ "@CAFFE2_STATIC_LINK_CUDA_INT@": "0",
+ },
+)
+
+template_rule(
+ name = "aten_src_ATen_cuda_config",
+ src = "aten/src/ATen/cuda/CUDAConfig.h.in",
+ out = "aten/src/ATen/cuda/CUDAConfig.h",
+ substitutions = {
+ "@AT_CUDNN_ENABLED@": "1",
+ "@AT_ROCM_ENABLED@": "0",
+ "@NVCC_FLAGS_EXTRA@": "",
+ },
+)
+
+template_rule(
+ name = "aten_src_TH_THGeneral",
+ src = "aten/src/TH/THGeneral.h.in",
+ out = "aten/src/TH/THGeneral.h",
+ substitutions = {
+ "cmakedefine": "define",
+ },
+)
+
+template_rule(
+ name = "aten_src_THC_THCGeneral",
+ src = "aten/src/THC/THCGeneral.h.in",
+ out = "aten/src/THC/THCGeneral.h",
+ substitutions = {
+ "#cmakedefine USE_MAGMA": "",
+ },
+)
+
+cc_library(
+ name = "aten_headers",
+ hdrs = [
+ "aten/src/TH/THGeneral.h",
+ "aten/src/THC/THCGeneral.h",
+ "torch/csrc/WindowsTorchApiMacro.h",
+ "torch/csrc/jit/frontend/function_schema_parser.h",
+ "aten/src/ATen/templates/TensorBody.h",
+ "aten/src/ATen/templates/TensorMethods.h",
+ ] + glob([
+ "aten/src/ATen/*.h",
+ "aten/src/ATen/core/*.h",
+ "aten/src/ATen/core/boxing/*.h",
+ "aten/src/ATen/core/boxing/impl/*.h",
+ "aten/src/ATen/core/dispatch/*.h",
+ "aten/src/ATen/core/op_registration/*.h",
+ "aten/src/ATen/cpu/*.h",
+ "aten/src/ATen/cpu/vec256/*.h",
+ "aten/src/ATen/cuda/*.cuh",
+ "aten/src/ATen/cuda/*.h",
+ "aten/src/ATen/cuda/detail/*.cuh",
+ "aten/src/ATen/cuda/detail/*.h",
+ "aten/src/ATen/cuda/nvrtc_stub/*.h",
+ "aten/src/ATen/cudnn/*.h",
+ "aten/src/ATen/detail/*.h",
+ "aten/src/ATen/mkl/*.h",
+ "aten/src/ATen/mkldnn/*.h",
+ "aten/src/ATen/native/*.h",
+ "aten/src/ATen/native/cpu/*.h",
+ "aten/src/ATen/native/cuda/*.cuh",
+ "aten/src/ATen/native/cuda/*.h",
+ "aten/src/ATen/native/mkldnn/*.h",
+ "aten/src/ATen/native/quantized/*.h",
+ "aten/src/ATen/native/quantized/cuda/*.h",
+ "aten/src/ATen/native/quantized/cpu/*.h",
+ "aten/src/ATen/native/sparse/*.h",
+ "aten/src/ATen/native/sparse/cuda/*.cuh",
+ "aten/src/ATen/native/utils/*.h",
+ "aten/src/ATen/native/xnnpack/*.h",
+ "aten/src/ATen/quantized/*.h",
+ "aten/src/TH/*.hpp",
+ "aten/src/TH/*.h",
+ "aten/src/TH/vector/*.h",
+ "aten/src/TH/generic/*.hpp",
+ "aten/src/TH/generic/*.h",
+ "aten/src/TH/generic/*.cpp",
+ "aten/src/TH/generic/*.c",
+ "aten/src/THC/*.cpp",
+ "aten/src/THC/*.cuh",
+ "aten/src/THC/*.h",
+ "aten/src/THC/*.hpp",
+ "aten/src/THC/generic/*.cpp",
+ "aten/src/THC/generic/*.cu.cc",
+ "aten/src/THC/generic/*.h",
+ "aten/src/THC/generic/*.hpp",
+ "aten/src/THCUNN/*.h",
+ "aten/src/THCUNN/*.cuh",
+ "aten/src/THCUNN/generic/*.h",
+ "aten/src/THCUNN/generic/*.cu.cc",
+ ]) + [
+ ":generated_cpp",
+ ":aten_src_ATen_config",
+ ],
+ includes = [
+ "aten/src",
+ "aten/src/TH",
+ ],
+ deps = [
+ ":c10_headers",
+ ],
+)
+
+ATEN_COPTS = COMMON_COPTS + [
+ "-DUSE_AVX",
+ "-DUSE_AVX2",
+ "-DCAFFE2_BUILD_MAIN_LIBS",
+ "-DHAVE_AVX_CPU_DEFINITION",
+ "-DHAVE_AVX2_CPU_DEFINITION",
+ "-fvisibility-inlines-hidden",
+ "-fno-math-errno",
+ "-fno-trapping-math",
+]
+
+intern_build_aten_ops(
+ copts = ATEN_COPTS,
+ deps = [
+ ":aten_headers",
+ "@fbgemm",
+ ],
+)
+
+cc_library(
+ name = "th",
+ srcs = [
+ ":th_srcs",
+ ],
+ copts = ATEN_COPTS + [
+ "-mavx",
+ ],
+ deps = [
+ ":aten_headers",
+ "@fbgemm",
+ ],
+)
+
+cc_library(
+ name = "aten",
+ srcs = [
+ ":ATen_CORE_SRCS",
+ ":ATen_QUANTIZED_SRCS",
+ ":aten_base_cpp",
+ ":aten_native_cpp",
+ ":aten_native_mkl_cpp",
+ ":aten_native_mkldnn_cpp",
+ ":aten_native_quantized_cpp",
+ ":aten_native_sparse_cpp",
+ ":aten_native_xnnpack",
+ ":aten_src_ATen_config",
+ ":generated_cpp",
+ ],
+ copts = ATEN_COPTS,
+ data = if_cuda(
+ [":libcaffe2_nvrtc.so"],
+ [],
+ ),
+ visibility = ["//visibility:public"],
+ deps = [
+ ":ATen_CPU",
+ ":aten_headers",
+ ":caffe2_for_aten_headers",
+ ":th",
+ ":torch_headers",
+ "@fbgemm",
+ "@ideep",
+ ],
+ alwayslink = True,
+)
+
+cc_library(
+ name = "aten_nvrtc",
+ srcs = glob([
+ "aten/src/ATen/cuda/nvrtc_stub/*.cpp",
+ ]),
+ copts = ATEN_COPTS,
+ linkstatic = True,
+ visibility = ["//visibility:public"],
+ deps = [
+ ":aten_headers",
+ ":c10_headers",
+ "@cuda",
+ "@cuda//:cuda_driver",
+ "@cuda//:nvrtc",
+ ],
+ alwayslink = True,
+)
+
+cc_binary(
+ name = "libcaffe2_nvrtc.so",
+ linkshared = True,
+ visibility = ["//visibility:public"],
+ deps = [
+ ":aten_nvrtc",
+ ],
+)
+
+cc_library(
+ name = "aten_cuda_cpp",
+ srcs = [":aten_cuda_srcs"],
+ copts = ATEN_COPTS,
+ visibility = ["//visibility:public"],
+ deps = [
+ ":aten",
+ "@cuda",
+ "@cuda//:nvrtc",
+ "@cudnn",
+ ],
+ alwayslink = True,
+)
+
+torch_cuda_half_options = [
+ "-DCUDA_HAS_FP16=1",
+ "-D__CUDA_NO_HALF_OPERATORS__",
+ "-D__CUDA_NO_HALF_CONVERSIONS__",
+ "-D__CUDA_NO_HALF2_OPERATORS__",
+]
+
+cu_library(
+ name = "aten_cuda",
+ srcs = [
+ ":aten_srcs_cu",
+ ":thc_srcs_cu",
+ ":thcunn_srcs_cu",
+ ],
+ copts = ATEN_COPTS + torch_cuda_half_options,
+ visibility = ["//visibility:public"],
+ deps = [
+ ":aten_cuda_cpp",
+ "@cuda//:cublas",
+ "@cuda//:cufft",
+ "@cuda//:cusparse",
+ ],
+ alwayslink = True,
+)
+
+# caffe2
+CAFFE2_COPTS = COMMON_COPTS + [
+ "-Dcaffe2_EXPORTS",
+ "-DCAFFE2_USE_GLOO",
+ "-DCAFFE2_USE_CUDNN",
+ "-DCAFFE2_BUILD_MAIN_LIB",
+ "-fvisibility-inlines-hidden",
+ "-fno-math-errno",
+ "-fno-trapping-math",
+]
+
+proto_library(
+ name = "caffe2_proto_source",
+ srcs = glob([
+ "caffe2/proto/*.proto",
+ ]),
+ visibility = ["//visibility:public"],
+)
+
+cc_proto_library(
+ name = "caffe2_protos",
+ deps = [":caffe2_proto_source"],
+)
+
+template_rule(
+ name = "caffe2_core_macros_h",
+ src = "caffe2/core/macros.h.in",
+ out = "caffe2/core/macros.h",
+ substitutions = {
+ "@CAFFE2_VERSION_MAJOR@": "1",
+ "@CAFFE2_VERSION_MINOR@": "3",
+ "@CAFFE2_VERSION_PATCH@": "0",
+ "cmakedefine": "define",
+ "#define CAFFE2_FORCE_FALLBACK_CUDA_MPI": "/* #undef CAFFE2_FORCE_FALLBACK_CUDA_MPI */",
+ "#define CAFFE2_HAS_MKL_DNN": "/* #undef CAFFE2_HAS_MKL_DNN */",
+ "#define CAFFE2_HAS_MKL_SGEMM_PACK": "/* #undef CAFFE2_HAS_MKL_SGEMM_PACK */",
+ "#define CAFFE2_THREADPOOL_MAIN_IMBALANCE": "/* #undef CAFFE2_THREADPOOL_MAIN_IMBALANCE */",
+ "#define CAFFE2_THREADPOOL_STATS": "/* #undef CAFFE2_THREADPOOL_STATS */",
+ "#define CAFFE2_USE_ACCELERATE": "/* #undef CAFFE2_USE_ACCELERATE */",
+ "#define CAFFE2_USE_EIGEN_FOR_BLAS": "/* #undef CAFFE2_USE_EIGEN_FOR_BLAS */",
+ "#define CAFFE2_USE_FBCODE": "/* #undef CAFFE2_USE_FBCODE */",
+ "#define CAFFE2_USE_GOOGLE_GLOG": "/* #undef CAFFE2_USE_GOOGLE_GLOG */",
+ "#define CAFFE2_USE_LITE_PROTO": "/* #undef CAFFE2_USE_LITE_PROTO */",
+ "#define CAFFE2_USE_MKL\n": "/* #undef CAFFE2_USE_MKL */\n",
+ "#define CAFFE2_USE_NVTX": "/* #undef CAFFE2_USE_NVTX */",
+ "#define CAFFE2_USE_TRT": "/* #undef CAFFE2_USE_TRT */",
+ },
+)
+
+filegroup(
+ name = "caffe2_contrib_srcs",
+ srcs = [
+ "caffe2/contrib/gloo/allgather_ops.cc",
+ "caffe2/contrib/gloo/allreduce_ops.cc",
+ "caffe2/contrib/gloo/barrier_ops.cc",
+ "caffe2/contrib/gloo/broadcast_ops.cc",
+ "caffe2/contrib/gloo/common.cc",
+ "caffe2/contrib/gloo/common_world_ops.cc",
+ "caffe2/contrib/gloo/context.cc",
+ "caffe2/contrib/gloo/reduce_scatter_ops.cc",
+ "caffe2/contrib/gloo/store_handler.cc",
+ ],
+)
+
+filegroup(
+ name = "caffe2_core_srcs",
+ srcs = [
+ "caffe2/core/allocator.cc",
+ "caffe2/core/blob_serialization.cc",
+ "caffe2/core/blob_stats.cc",
+ "caffe2/core/common.cc",
+ "caffe2/core/context.cc",
+ "caffe2/core/context_base.cc",
+ "caffe2/core/db.cc",
+ "caffe2/core/event.cc",
+ "caffe2/core/export_c10_op_to_caffe2.cc",
+ "caffe2/core/graph.cc",
+ "caffe2/core/init.cc",
+ "caffe2/core/init_denormals.cc",
+ "caffe2/core/init_intrinsics_check.cc",
+ "caffe2/core/init_omp.cc",
+ "caffe2/core/int8_serialization.cc",
+ "caffe2/core/memonger.cc",
+ "caffe2/core/module.cc",
+ "caffe2/core/net.cc",
+ "caffe2/core/net_async_base.cc",
+ "caffe2/core/net_async_scheduling.cc",
+ "caffe2/core/net_async_task.cc",
+ "caffe2/core/net_async_task_future.cc",
+ "caffe2/core/net_async_task_graph.cc",
+ "caffe2/core/net_async_tracing.cc",
+ "caffe2/core/net_dag_utils.cc",
+ "caffe2/core/net_parallel.cc",
+ "caffe2/core/net_simple.cc",
+ "caffe2/core/net_simple_refcount.cc",
+ "caffe2/core/nomnigraph/Representations/NeuralNet.cc",
+ "caffe2/core/nomnigraph/tests/test_util.cc",
+ "caffe2/core/numa.cc",
+ "caffe2/core/operator.cc",
+ "caffe2/core/operator_schema.cc",
+ "caffe2/core/plan_executor.cc",
+ "caffe2/core/prof_dag_counters.cc",
+ "caffe2/core/qtensor.cc",
+ "caffe2/core/qtensor_serialization.cc",
+ "caffe2/core/stats.cc",
+ "caffe2/core/tensor.cc",
+ "caffe2/core/tensor_int8.cc",
+ "caffe2/core/test_utils.cc",
+ "caffe2/core/transform.cc",
+ "caffe2/core/types.cc",
+ "caffe2/core/workspace.cc",
+ ],
+)
+
+filegroup(
+ name = "caffe2_distributed_srcs",
+ srcs = [
+ "caffe2/distributed/file_store_handler.cc",
+ "caffe2/distributed/file_store_handler_op.cc",
+ "caffe2/distributed/store_handler.cc",
+ "caffe2/distributed/store_ops.cc",
+ ],
+)
+
+filegroup(
+ name = "caffe2_ideep_srcs",
+ srcs = [
+ "caffe2/ideep/operators/adam_op.cc",
+ "caffe2/ideep/operators/channel_shuffle_op.cc",
+ "caffe2/ideep/operators/concat_split_op.cc",
+ "caffe2/ideep/operators/conv_op.cc",
+ "caffe2/ideep/operators/conv_transpose_op.cc",
+ "caffe2/ideep/operators/dropout_op.cc",
+ "caffe2/ideep/operators/elementwise_sum_op.cc",
+ "caffe2/ideep/operators/expand_squeeze_dims_op.cc",
+ "caffe2/ideep/operators/fully_connected_op.cc",
+ "caffe2/ideep/operators/local_response_normalization_op.cc",
+ "caffe2/ideep/operators/momentum_sgd_op.cc",
+ "caffe2/ideep/operators/operator_fallback_ideep.cc",
+ "caffe2/ideep/operators/order_switch_ops.cc",
+ "caffe2/ideep/operators/pool_op.cc",
+ "caffe2/ideep/operators/quantization/int8_add_op.cc",
+ "caffe2/ideep/operators/quantization/int8_conv_op.cc",
+ "caffe2/ideep/operators/quantization/int8_dequantize_op.cc",
+ "caffe2/ideep/operators/quantization/int8_fully_connected_op.cc",
+ "caffe2/ideep/operators/quantization/int8_given_tensor_fill_op.cc",
+ "caffe2/ideep/operators/quantization/int8_pool_op.cc",
+ "caffe2/ideep/operators/quantization/int8_quantize_op.cc",
+ "caffe2/ideep/operators/quantization/int8_relu_op.cc",
+ "caffe2/ideep/operators/queue_ops.cc",
+ "caffe2/ideep/operators/relu_op.cc",
+ "caffe2/ideep/operators/reshape_op.cc",
+ "caffe2/ideep/operators/shape_op.cc",
+ "caffe2/ideep/operators/sigmoid_op.cc",
+ "caffe2/ideep/operators/spatial_batch_norm_op.cc",
+ "caffe2/ideep/operators/transpose_op.cc",
+ "caffe2/ideep/operators/utility_ops.cc",
+ "caffe2/ideep/utils/ideep_register.cc",
+ ],
+)
+
+filegroup(
+ name = "caffe2_onnx_srcs",
+ srcs = [
+ "caffe2/onnx/backend.cc",
+ "caffe2/onnx/backend_rep.cc",
+ "caffe2/onnx/device.cc",
+ "caffe2/onnx/helper.cc",
+ "caffe2/onnx/offline_tensor.cc",
+ "caffe2/onnx/onnx_exporter.cc",
+ "caffe2/onnx/onnxifi_graph_info.cc",
+ "caffe2/onnx/onnxifi_init.cc",
+ ],
+)
+
+filegroup(
+ name = "caffe2_operators_srcs",
+ srcs = [
+ "caffe2/operators/abs_op.cc",
+ "caffe2/operators/accumulate_op.cc",
+ "caffe2/operators/accuracy_op.cc",
+ "caffe2/operators/acos_op.cc",
+ "caffe2/operators/affine_channel_op.cc",
+ "caffe2/operators/alias_with_name.cc",
+ "caffe2/operators/apmeter_op.cc",
+ "caffe2/operators/arg_ops.cc",
+ "caffe2/operators/asin_op.cc",
+ "caffe2/operators/assert_op.cc",
+ "caffe2/operators/atan_op.cc",
+ "caffe2/operators/atomic_ops.cc",
+ "caffe2/operators/batch_box_cox_op.cc",
+ "caffe2/operators/batch_bucketize_op.cc",
+ "caffe2/operators/batch_gather_ops.cc",
+ "caffe2/operators/batch_matmul_op.cc",
+ "caffe2/operators/batch_moments_op.cc",
+ "caffe2/operators/batch_permutation_op.cc",
+ "caffe2/operators/batch_sparse_to_dense_op.cc",
+ "caffe2/operators/bbox_transform_op.cc",
+ "caffe2/operators/bisect_percentile_op.cc",
+ "caffe2/operators/boolean_mask_ops.cc",
+ "caffe2/operators/boolean_unmask_ops.cc",
+ "caffe2/operators/box_with_nms_limit_op.cc",
+ "caffe2/operators/bucketize_op.cc",
+ "caffe2/operators/byte_weight_dequant_op.cc",
+ "caffe2/operators/cast_op.cc",
+ "caffe2/operators/cbrt_op.cc",
+ "caffe2/operators/cc_bmm_bg_op.cc",
+ "caffe2/operators/ceil_op.cc",
+ "caffe2/operators/channel_backprop_stats_op.cc",
+ "caffe2/operators/channel_shuffle_op.cc",
+ "caffe2/operators/channel_stats_op.cc",
+ "caffe2/operators/clip_op.cc",
+ "caffe2/operators/collect_and_distribute_fpn_rpn_proposals_op.cc",
+ "caffe2/operators/communicator_op.cc",
+ "caffe2/operators/concat_split_op.cc",
+ "caffe2/operators/conditional_op.cc",
+ "caffe2/operators/conv_gradient_op.cc",
+ "caffe2/operators/conv_op.cc",
+ "caffe2/operators/conv_op_eigen.cc",
+ "caffe2/operators/conv_op_shared.cc",
+ "caffe2/operators/conv_transpose_gradient_op.cc",
+ "caffe2/operators/conv_transpose_op_mobile.cc",
+ "caffe2/operators/copy_op.cc",
+ "caffe2/operators/copy_rows_to_tensor_op.cc",
+ "caffe2/operators/cos_op.cc",
+ "caffe2/operators/cosh_op.cc",
+ "caffe2/operators/cosine_embedding_criterion_op.cc",
+ "caffe2/operators/counter_ops.cc",
+ "caffe2/operators/crash_op.cc",
+ "caffe2/operators/create_scope_op.cc",
+ "caffe2/operators/crf_viterbi_op.cc",
+ "caffe2/operators/cross_entropy_op.cc",
+ "caffe2/operators/ctc_beam_search_decoder_op.cc",
+ "caffe2/operators/ctc_greedy_decoder_op.cc",
+ "caffe2/operators/cube_op.cc",
+ "caffe2/operators/data_couple.cc",
+ "caffe2/operators/dataset_ops.cc",
+ "caffe2/operators/deform_conv_gradient_op.cc",
+ "caffe2/operators/deform_conv_op.cc",
+ "caffe2/operators/dense_vector_to_id_list_op.cc",
+ "caffe2/operators/distance_op.cc",
+ "caffe2/operators/do_op.cc",
+ "caffe2/operators/dropout_op.cc",
+ "caffe2/operators/elementwise_add_gradient_op.cc",
+ "caffe2/operators/elementwise_add_op.cc",
+ "caffe2/operators/elementwise_div_gradient_op.cc",
+ "caffe2/operators/elementwise_div_op.cc",
+ "caffe2/operators/elementwise_linear_op.cc",
+ "caffe2/operators/elementwise_logical_ops.cc",
+ "caffe2/operators/elementwise_mul_gradient_op.cc",
+ "caffe2/operators/elementwise_mul_op.cc",
+ "caffe2/operators/elementwise_ops.cc",
+ "caffe2/operators/elementwise_ops_schema.cc",
+ "caffe2/operators/elementwise_ops_utils.cc",
+ "caffe2/operators/elementwise_sub_gradient_op.cc",
+ "caffe2/operators/elementwise_sub_op.cc",
+ "caffe2/operators/elementwise_sum_op.cc",
+ "caffe2/operators/elu_op.cc",
+ "caffe2/operators/enforce_finite_op.cc",
+ "caffe2/operators/ensure_clipped_op.cc",
+ "caffe2/operators/ensure_cpu_output_op.cc",
+ "caffe2/operators/erf_op.cc",
+ "caffe2/operators/exp_op.cc",
+ "caffe2/operators/expand_op.cc",
+ "caffe2/operators/expand_squeeze_dims_op.cc",
+ "caffe2/operators/fc_inference.cc",
+ "caffe2/operators/feature_maps_ops.cc",
+ "caffe2/operators/feed_blob_op.cc",
+ "caffe2/operators/filler_op.cc",
+ "caffe2/operators/find_duplicate_elements_op.cc",
+ "caffe2/operators/find_op.cc",
+ "caffe2/operators/flatten_op.cc",
+ "caffe2/operators/flexible_top_k.cc",
+ "caffe2/operators/floor_op.cc",
+ "caffe2/operators/free_op.cc",
+ "caffe2/operators/fully_connected_op.cc",
+ "caffe2/operators/fused_rowwise_8bit_conversion_ops.cc",
+ "caffe2/operators/fused_rowwise_random_quantization_ops.cc",
+ "caffe2/operators/gather_fused_8bit_rowwise_op.cc",
+ "caffe2/operators/gather_op.cc",
+ "caffe2/operators/gather_ranges_to_dense_op.cc",
+ "caffe2/operators/gelu_op.cc",
+ "caffe2/operators/generate_proposals_op.cc",
+ "caffe2/operators/given_tensor_byte_string_to_uint8_fill_op.cc",
+ "caffe2/operators/given_tensor_fill_op.cc",
+ "caffe2/operators/glu_op.cc",
+ "caffe2/operators/group_norm_op.cc",
+ "caffe2/operators/gru_unit_op.cc",
+ "caffe2/operators/h_softmax_op.cc",
+ "caffe2/operators/half_float_ops.cc",
+ "caffe2/operators/hard_sigmoid_op.cc",
+ "caffe2/operators/heatmap_max_keypoint_op.cc",
+ "caffe2/operators/if_op.cc",
+ "caffe2/operators/im2col_op.cc",
+ "caffe2/operators/index_hash_ops.cc",
+ "caffe2/operators/index_ops.cc",
+ "caffe2/operators/inference_lstm_op.cc",
+ "caffe2/operators/instance_norm_gradient_op.cc",
+ "caffe2/operators/instance_norm_op.cc",
+ "caffe2/operators/integral_image_op.cc",
+ "caffe2/operators/is_empty_op.cc",
+ "caffe2/operators/jsd_op.cc",
+ "caffe2/operators/key_split_ops.cc",
+ "caffe2/operators/last_n_window_collector.cc",
+ "caffe2/operators/layer_norm_op.cc",
+ "caffe2/operators/leaky_relu_op.cc",
+ "caffe2/operators/length_split_op.cc",
+ "caffe2/operators/lengths_pad_op.cc",
+ "caffe2/operators/lengths_reducer_fused_8bit_rowwise_ops.cc",
+ "caffe2/operators/lengths_reducer_ops.cc",
+ "caffe2/operators/lengths_reducer_rowwise_8bit_ops.cc",
+ "caffe2/operators/lengths_tile_op.cc",
+ "caffe2/operators/lengths_top_k_op.cc",
+ "caffe2/operators/listwise_l2r_op.cc",
+ "caffe2/operators/load_save_op.cc",
+ "caffe2/operators/load_save_op_util.cc",
+ "caffe2/operators/local_response_normalization_op.cc",
+ "caffe2/operators/locally_connected_op.cc",
+ "caffe2/operators/locally_connected_op_util.cc",
+ "caffe2/operators/log_op.cc",
+ "caffe2/operators/logit_op.cc",
+ "caffe2/operators/loss_op.cc",
+ "caffe2/operators/lp_pool_op.cc",
+ "caffe2/operators/lpnorm_op.cc",
+ "caffe2/operators/lstm_unit_op.cc",
+ "caffe2/operators/map_ops.cc",
+ "caffe2/operators/margin_ranking_criterion_op.cc",
+ "caffe2/operators/matmul_op.cc",
+ "caffe2/operators/mean_op.cc",
+ "caffe2/operators/merge_id_lists_op.cc",
+ "caffe2/operators/minmax_gradient_ops.cc",
+ "caffe2/operators/minmax_ops.cc",
+ "caffe2/operators/mod_op.cc",
+ "caffe2/operators/moments_op.cc",
+ "caffe2/operators/multi_class_accuracy_op.cc",
+ "caffe2/operators/negate_gradient_op.cc",
+ "caffe2/operators/negative_op.cc",
+ "caffe2/operators/ngram_ops.cc",
+ "caffe2/operators/norm_planar_yuv_op.cc",
+ "caffe2/operators/normalize_l1_op.cc",
+ "caffe2/operators/normalize_op.cc",
+ "caffe2/operators/numpy_tile_op.cc",
+ "caffe2/operators/one_hot_ops.cc",
+ "caffe2/operators/onnx_while_op.cc",
+ "caffe2/operators/order_switch_ops.cc",
+ "caffe2/operators/pack_rnn_sequence_op.cc",
+ "caffe2/operators/pack_segments.cc",
+ "caffe2/operators/pad_op.cc",
+ "caffe2/operators/partition_ops.cc",
+ "caffe2/operators/percentile_op.cc",
+ "caffe2/operators/perplexity_op.cc",
+ "caffe2/operators/piecewise_linear_transform_op.cc",
+ "caffe2/operators/pool_gradient_op.cc",
+ "caffe2/operators/pool_op.cc",
+ "caffe2/operators/pool_op_util.cc",
+ "caffe2/operators/pow_op.cc",
+ "caffe2/operators/prelu_op.cc",
+ "caffe2/operators/prepend_dim_op.cc",
+ "caffe2/operators/quant_decode_op.cc",
+ "caffe2/operators/rank_loss_op.cc",
+ "caffe2/operators/reciprocal_gradient_op.cc",
+ "caffe2/operators/reciprocal_op.cc",
+ "caffe2/operators/reduce_front_back_max_ops.cc",
+ "caffe2/operators/reduce_front_back_mean_ops.cc",
+ "caffe2/operators/reduce_front_back_sum_ops.cc",
+ "caffe2/operators/reduce_ops.cc",
+ "caffe2/operators/reduction_ops.cc",
+ "caffe2/operators/relu_n_op.cc",
+ "caffe2/operators/relu_op.cc",
+ "caffe2/operators/remove_data_blocks_op.cc",
+ "caffe2/operators/replace_nan_op.cc",
+ "caffe2/operators/reservoir_sampling.cc",
+ "caffe2/operators/reshape_op.cc",
+ "caffe2/operators/resize_3d_op.cc",
+ "caffe2/operators/resize_op.cc",
+ "caffe2/operators/reverse_packed_segs_op.cc",
+ "caffe2/operators/rmac_regions_op.cc",
+ "caffe2/operators/rnn/recurrent_network_blob_fetcher_op.cc",
+ "caffe2/operators/rnn/recurrent_network_executor.cc",
+ "caffe2/operators/rnn/recurrent_network_op.cc",
+ "caffe2/operators/roi_align_gradient_op.cc",
+ "caffe2/operators/roi_align_op.cc",
+ "caffe2/operators/roi_align_rotated_gradient_op.cc",
+ "caffe2/operators/roi_align_rotated_op.cc",
+ "caffe2/operators/roi_pool_op.cc",
+ "caffe2/operators/rowmul_op.cc",
+ "caffe2/operators/rsqrt_op.cc",
+ "caffe2/operators/scale_blobs_op.cc",
+ "caffe2/operators/scale_op.cc",
+ "caffe2/operators/segment_reduction_op.cc",
+ "caffe2/operators/selu_op.cc",
+ "caffe2/operators/sequence_ops.cc",
+ "caffe2/operators/shape_op.cc",
+ "caffe2/operators/sigmoid_gradient_op.cc",
+ "caffe2/operators/sigmoid_op.cc",
+ "caffe2/operators/sin_op.cc",
+ "caffe2/operators/sinh_op.cc",
+ "caffe2/operators/sinusoid_position_encoding_op.cc",
+ "caffe2/operators/slice_op.cc",
+ "caffe2/operators/softmax_op.cc",
+ "caffe2/operators/softmax_utils.cc",
+ "caffe2/operators/softmax_with_loss_op.cc",
+ "caffe2/operators/softplus_op.cc",
+ "caffe2/operators/softsign_op.cc",
+ "caffe2/operators/space_batch_op.cc",
+ "caffe2/operators/sparse_dropout_with_replacement_op.cc",
+ "caffe2/operators/sparse_normalize_op.cc",
+ "caffe2/operators/sparse_to_dense_mask_op.cc",
+ "caffe2/operators/sparse_to_dense_op.cc",
+ "caffe2/operators/spatial_batch_norm_gradient_op.cc",
+ "caffe2/operators/spatial_batch_norm_op.cc",
+ "caffe2/operators/spatial_softmax_with_loss_op.cc",
+ "caffe2/operators/sqr_op.cc",
+ "caffe2/operators/sqrt_op.cc",
+ "caffe2/operators/square_root_divide_op.cc",
+ "caffe2/operators/stats_ops.cc",
+ "caffe2/operators/stats_put_ops.cc",
+ "caffe2/operators/stop_gradient.cc",
+ "caffe2/operators/string_ops.cc",
+ "caffe2/operators/stump_func_op.cc",
+ "caffe2/operators/stylizer_ops.cc",
+ "caffe2/operators/summarize_op.cc",
+ "caffe2/operators/swish_op.cc",
+ "caffe2/operators/tan_op.cc",
+ "caffe2/operators/tanh_gradient_op.cc",
+ "caffe2/operators/tanh_op.cc",
+ "caffe2/operators/tensor_protos_db_input.cc",
+ "caffe2/operators/text_file_reader.cc",
+ "caffe2/operators/text_file_reader_utils.cc",
+ "caffe2/operators/thresholded_relu_op.cc",
+ "caffe2/operators/tile_op.cc",
+ "caffe2/operators/top_k.cc",
+ "caffe2/operators/transpose_op.cc",
+ "caffe2/operators/tt_linear_op.cc",
+ "caffe2/operators/unique_ops.cc",
+ "caffe2/operators/upsample_op.cc",
+ "caffe2/operators/utility_ops.cc",
+ "caffe2/operators/variable_length_sequence_padding.cc",
+ "caffe2/operators/weighted_multi_sampling_op.cc",
+ "caffe2/operators/weighted_sample_op.cc",
+ "caffe2/operators/while_op.cc",
+ "caffe2/operators/workspace_ops.cc",
+ "caffe2/operators/zero_gradient_op.cc",
+ ],
+)
+
+filegroup(
+ name = "caffe2_opt_srcs",
+ srcs = [
+ "caffe2/opt/annotations.cc",
+ "caffe2/opt/backend_cutting.cc",
+ "caffe2/opt/backend_transformer_base.cc",
+ "caffe2/opt/bound_shape_inferencer.cc",
+ "caffe2/opt/converter.cc",
+ "caffe2/opt/dead_code_elim.cc",
+ "caffe2/opt/device.cc",
+ "caffe2/opt/distributed.cc",
+ "caffe2/opt/distributed_converter.cc",
+ "caffe2/opt/fusion.cc",
+ "caffe2/opt/mobile.cc",
+ "caffe2/opt/onnxifi_op.cc",
+ "caffe2/opt/onnxifi_transformer.cc",
+ "caffe2/opt/optimize_ideep.cc",
+ "caffe2/opt/optimizer.cc",
+ "caffe2/opt/passes.cc",
+ "caffe2/opt/shape_info.cc",
+ "caffe2/opt/tvm_transformer.cc",
+ ],
+)
+
+filegroup(
+ name = "caffe2_perfkernels_srcs",
+ srcs = [
+ "caffe2/perfkernels/adagrad.cc",
+ "caffe2/perfkernels/embedding_lookup.cc",
+ "caffe2/perfkernels/embedding_lookup_idx.cc",
+ "caffe2/perfkernels/fused_8bit_rowwise_conversion.cc",
+ "caffe2/perfkernels/fused_8bit_rowwise_embedding_lookup.cc",
+ "caffe2/perfkernels/fused_8bit_rowwise_embedding_lookup_idx.cc",
+ "caffe2/perfkernels/lstm_unit_cpu_common.cc",
+ "caffe2/perfkernels/math_cpu_base.cc",
+ "caffe2/perfkernels/typed_axpy.cc",
+ ],
+)
+
+filegroup(
+ name = "caffe2_predictor_srcs",
+ srcs = [
+ "caffe2/predictor/emulator/data_filler.cc",
+ "caffe2/predictor/emulator/data_filler.h",
+ "caffe2/predictor/predictor.cc",
+ "caffe2/predictor/predictor_config.cc",
+ "caffe2/predictor/predictor_utils.cc",
+ ],
+)
+
+filegroup(
+ name = "caffe2_quantization_srcs",
+ srcs = [
+ "caffe2/quantization/server/activation_distribution_observer.cc",
+ "caffe2/quantization/server/batch_matmul_dnnlowp_op.cc",
+ "caffe2/quantization/server/caffe2_dnnlowp_utils.cc",
+ "caffe2/quantization/server/channel_shuffle_dnnlowp_op.cc",
+ "caffe2/quantization/server/concat_dnnlowp_op.cc",
+ "caffe2/quantization/server/conv_dnnlowp_acc16_op.cc",
+ "caffe2/quantization/server/conv_dnnlowp_op.cc",
+ "caffe2/quantization/server/conv_relu_op.cc",
+ "caffe2/quantization/server/dequantize_dnnlowp_op.cc",
+ "caffe2/quantization/server/dnnlowp.cc",
+ "caffe2/quantization/server/dnnlowp_partition.cc",
+ "caffe2/quantization/server/dynamic_histogram.cc",
+ "caffe2/quantization/server/elementwise_add_dnnlowp_op.cc",
+ "caffe2/quantization/server/elementwise_linear_dnnlowp_op.cc",
+ "caffe2/quantization/server/elementwise_mul_dnnlowp_op.cc",
+ "caffe2/quantization/server/elementwise_sum_dnnlowp_op.cc",
+ "caffe2/quantization/server/elementwise_sum_relu_op.cc",
+ "caffe2/quantization/server/fbgemm_pack_matrix_cache.cc",
+ "caffe2/quantization/server/fbgemm_pack_op.cc",
+ "caffe2/quantization/server/fully_connected_dnnlowp_acc16_op.cc",
+ "caffe2/quantization/server/fully_connected_dnnlowp_op.cc",
+ "caffe2/quantization/server/fully_connected_fake_lowp_op.cc",
+ "caffe2/quantization/server/group_norm_dnnlowp_op.cc",
+ "caffe2/quantization/server/kl_minimization.cc",
+ "caffe2/quantization/server/lstm_unit_dnnlowp_op.cc",
+ "caffe2/quantization/server/norm_minimization.cc",
+ "caffe2/quantization/server/p99.cc",
+ "caffe2/quantization/server/pool_dnnlowp_op.cc",
+ "caffe2/quantization/server/quantize_dnnlowp_op.cc",
+ "caffe2/quantization/server/relu_dnnlowp_op.cc",
+ "caffe2/quantization/server/sigmoid.cc",
+ "caffe2/quantization/server/sigmoid_dnnlowp_op.cc",
+ "caffe2/quantization/server/spatial_batch_norm_dnnlowp_op.cc",
+ "caffe2/quantization/server/tanh.cc",
+ "caffe2/quantization/server/tanh_dnnlowp_op.cc",
+ "caffe2/quantization/server/utility_dnnlowp_ops.cc",
+ ],
+)
+
+filegroup(
+ name = "caffe2_queue_srcs",
+ srcs = [
+ "caffe2/queue/blobs_queue.cc",
+ "caffe2/queue/blobs_queue_db.cc",
+ "caffe2/queue/queue_ops.cc",
+ "caffe2/queue/rebatching_queue.cc",
+ "caffe2/queue/rebatching_queue_ops.cc",
+ ],
+)
+
+filegroup(
+ name = "caffe2_serialize_srcs",
+ srcs = [
+ "caffe2/serialize/file_adapter.cc",
+ "caffe2/serialize/inline_container.cc",
+ "caffe2/serialize/istream_adapter.cc",
+ "caffe2/serialize/read_adapter_interface.cc",
+ ],
+)
+
+filegroup(
+ name = "caffe2_sgd_srcs",
+ srcs = [
+ "caffe2/sgd/adadelta_op.cc",
+ "caffe2/sgd/adagrad_op.cc",
+ "caffe2/sgd/adam_op.cc",
+ "caffe2/sgd/clip_tensor_op.cc",
+ "caffe2/sgd/ftrl_op.cc",
+ "caffe2/sgd/gftrl_op.cc",
+ "caffe2/sgd/iter_op.cc",
+ "caffe2/sgd/lars_op.cc",
+ "caffe2/sgd/learning_rate_adaption_op.cc",
+ "caffe2/sgd/learning_rate_op.cc",
+ "caffe2/sgd/momentum_sgd_op.cc",
+ "caffe2/sgd/rmsprop_op.cc",
+ "caffe2/sgd/wngrad_op.cc",
+ "caffe2/sgd/yellowfin_op.cc",
+ ],
+)
+
+filegroup(
+ name = "caffe2_transforms_srcs",
+ srcs = [
+ "caffe2/transforms/common_subexpression_elimination.cc",
+ "caffe2/transforms/conv_to_nnpack_transform.cc",
+ "caffe2/transforms/pattern_net_transform.cc",
+ "caffe2/transforms/single_op_transform.cc",
+ ],
+)
+
+filegroup(
+ name = "caffe2_utils_srcs",
+ srcs = [
+ "caffe2/utils/bench_utils.cc",
+ "caffe2/utils/cpuid.cc",
+ "caffe2/utils/math/broadcast.cc",
+ "caffe2/utils/math/elementwise.cc",
+ "caffe2/utils/math/reduce.cc",
+ "caffe2/utils/math/transpose.cc",
+ "caffe2/utils/math/utils.cc",
+ "caffe2/utils/math_cpu.cc",
+ "caffe2/utils/murmur_hash3.cc",
+ "caffe2/utils/proto_convert.cc",
+ "caffe2/utils/proto_utils.cc",
+ "caffe2/utils/proto_wrap.cc",
+ "caffe2/utils/signal_handler.cc",
+ "caffe2/utils/smart_tensor_printer.cc",
+ "caffe2/utils/string_utils.cc",
+ "caffe2/utils/threadpool/ThreadPool.cc",
+ "caffe2/utils/threadpool/ThreadPoolMobile.cc",
+ "caffe2/utils/threadpool/pthreadpool.cc",
+ "caffe2/utils/threadpool/pthreadpool_impl.cc",
+ ],
+)
+
+filegroup(
+ name = "caffe2_cuda_srcs",
+ srcs = [
+ "caffe2/contrib/aten/aten_op_gpu.cc",
+ "caffe2/contrib/gloo/allreduce_ops_gpu.cc",
+ "caffe2/contrib/gloo/broadcast_ops_gpu.cc",
+ "caffe2/contrib/gloo/common_world_ops_gpu.cc",
+ "caffe2/core/blob_serialization_gpu.cc",
+ "caffe2/core/common_cudnn.cc",
+ "caffe2/core/common_gpu.cc",
+ "caffe2/core/event_gpu.cc",
+ "caffe2/db/create_db_op_gpu.cc",
+ "caffe2/distributed/file_store_handler_op_gpu.cc",
+ "caffe2/operators/communicator_op_gpu.cc",
+ "caffe2/operators/concat_split_op_gpu.cc",
+ "caffe2/operators/conv_op_cache_cudnn.cc",
+ "caffe2/operators/conv_op_cudnn.cc",
+ "caffe2/operators/conv_op_gpu.cc",
+ "caffe2/operators/conv_op_shared_gpu.cc",
+ "caffe2/operators/conv_transpose_op_cudnn.cc",
+ "caffe2/operators/conv_transpose_op_gpu.cc",
+ "caffe2/operators/counter_ops_gpu.cc",
+ "caffe2/operators/do_op_gpu.cc",
+ "caffe2/operators/dropout_op_cudnn.cc",
+ "caffe2/operators/elementwise_add_op_gpu.cc",
+ "caffe2/operators/elementwise_sub_op_gpu.cc",
+ "caffe2/operators/elu_op_cudnn.cc",
+ "caffe2/operators/exp_op_gpu.cc",
+ "caffe2/operators/expand_op_gpu.cc",
+ "caffe2/operators/expand_squeeze_dims_op_gpu.cc",
+ "caffe2/operators/free_op_gpu.cc",
+ "caffe2/operators/fully_connected_op_gpu.cc",
+ "caffe2/operators/if_op_gpu.cc",
+ "caffe2/operators/im2col_op_gpu.cc",
+ "caffe2/operators/load_save_op_gpu.cc",
+ "caffe2/operators/local_response_normalization_op_cudnn.cc",
+ "caffe2/operators/locally_connected_op_gpu.cc",
+ "caffe2/operators/log_op_gpu.cc",
+ "caffe2/operators/matmul_op_gpu.cc",
+ "caffe2/operators/negate_gradient_op_gpu.cc",
+ "caffe2/operators/negative_op_gpu.cc",
+ "caffe2/operators/order_switch_ops_cudnn.cc",
+ "caffe2/operators/order_switch_ops_gpu.cc",
+ "caffe2/operators/pool_op_cudnn.cc",
+ "caffe2/operators/prepend_dim_op_gpu.cc",
+ "caffe2/operators/reshape_op_gpu.cc",
+ "caffe2/operators/rnn/recurrent_network_blob_fetcher_op_gpu.cc",
+ "caffe2/operators/rnn/recurrent_network_executor_gpu.cc",
+ "caffe2/operators/rnn/recurrent_op_cudnn.cc",
+ "caffe2/operators/scale_op_gpu.cc",
+ "caffe2/operators/shape_op_gpu.cc",
+ "caffe2/operators/sigmoid_op_cudnn.cc",
+ "caffe2/operators/softmax_op_cudnn.cc",
+ "caffe2/operators/sqr_op_gpu.cc",
+ "caffe2/operators/sqrt_op_gpu.cc",
+ "caffe2/operators/stop_gradient_gpu.cc",
+ "caffe2/operators/tanh_op_cudnn.cc",
+ "caffe2/operators/tensor_protos_db_input_gpu.cc",
+ "caffe2/operators/transpose_op_cudnn.cc",
+ "caffe2/operators/while_op_gpu.cc",
+ "caffe2/operators/zero_gradient_op_gpu.cc",
+ "caffe2/queue/queue_ops_gpu.cc",
+ "caffe2/sgd/iter_op_gpu.cc",
+ "caffe2/sgd/learning_rate_op_gpu.cc",
+ ],
+)
+
+filegroup(
+ name = "caffe2_cu_srcs",
+ srcs = [
+ "caffe2/core/context_gpu.cu.cc",
+ "caffe2/operators/abs_op.cu.cc",
+ "caffe2/operators/accumulate_op.cu.cc",
+ "caffe2/operators/accuracy_op.cu.cc",
+ "caffe2/operators/acos_op.cu.cc",
+ "caffe2/operators/affine_channel_op.cu.cc",
+ "caffe2/operators/alias_with_name.cu.cc",
+ "caffe2/operators/arg_ops.cu.cc",
+ "caffe2/operators/asin_op.cu.cc",
+ "caffe2/operators/assert_op.cu.cc",
+ "caffe2/operators/atan_op.cu.cc",
+ "caffe2/operators/batch_gather_ops.cu.cc",
+ "caffe2/operators/batch_matmul_op.cu.cc",
+ "caffe2/operators/batch_moments_op.cu.cc",
+ "caffe2/operators/batch_permutation_op.cu.cc",
+ "caffe2/operators/batch_sparse_to_dense_op.cu.cc",
+ "caffe2/operators/boolean_mask_ops.cu.cc",
+ "caffe2/operators/boolean_unmask_ops.cu.cc",
+ "caffe2/operators/bucketize_op.cu.cc",
+ "caffe2/operators/cast_op.cu.cc",
+ "caffe2/operators/cbrt_op.cu.cc",
+ "caffe2/operators/ceil_op.cu.cc",
+ "caffe2/operators/channel_backprop_stats_op.cu.cc",
+ "caffe2/operators/channel_shuffle_op.cu.cc",
+ "caffe2/operators/channel_stats_op.cu.cc",
+ "caffe2/operators/channelwise_conv3d_op_cudnn.cu.cc",
+ "caffe2/operators/clip_op.cu.cc",
+ "caffe2/operators/copy_op.cu.cc",
+ "caffe2/operators/cos_op.cu.cc",
+ "caffe2/operators/cosh_op.cu.cc",
+ "caffe2/operators/cosine_embedding_criterion_op.cu.cc",
+ "caffe2/operators/cross_entropy_op.cu.cc",
+ "caffe2/operators/cube_op.cu.cc",
+ "caffe2/operators/data_couple_gpu.cu.cc",
+ "caffe2/operators/deform_conv_op.cu.cc",
+ "caffe2/operators/depthwise_3x3_conv_op_cudnn.cu.cc",
+ "caffe2/operators/distance_op.cu.cc",
+ "caffe2/operators/dropout_op.cu.cc",
+ "caffe2/operators/elementwise_div_op.cu.cc",
+ "caffe2/operators/elementwise_linear_op.cu.cc",
+ "caffe2/operators/elementwise_mul_op.cu.cc",
+ "caffe2/operators/elementwise_ops.cu.cc",
+ "caffe2/operators/elu_op.cu.cc",
+ "caffe2/operators/enforce_finite_op.cu.cc",
+ "caffe2/operators/ensure_cpu_output_op.cu.cc",
+ "caffe2/operators/erf_op.cu.cc",
+ "caffe2/operators/filler_op.cu.cc",
+ "caffe2/operators/find_op.cu.cc",
+ "caffe2/operators/floor_op.cu.cc",
+ "caffe2/operators/gather_op.cu.cc",
+ "caffe2/operators/gelu_op.cu.cc",
+ "caffe2/operators/generate_proposals_op.cu.cc",
+ "caffe2/operators/generate_proposals_op_util_nms_gpu.cu.cc",
+ "caffe2/operators/given_tensor_byte_string_to_uint8_fill_op.cu.cc",
+ "caffe2/operators/given_tensor_fill_op.cu.cc",
+ "caffe2/operators/glu_op.cu.cc",
+ "caffe2/operators/group_norm_op.cu.cc",
+ "caffe2/operators/gru_unit_op_gpu.cu.cc",
+ "caffe2/operators/half_float_ops.cu.cc",
+ "caffe2/operators/hard_sigmoid_op.cu.cc",
+ "caffe2/operators/instance_norm_op.cu.cc",
+ "caffe2/operators/integral_image_op.cu.cc",
+ "caffe2/operators/layer_norm_op.cu.cc",
+ "caffe2/operators/leaky_relu_op.cu.cc",
+ "caffe2/operators/lengths_pad_op.cu.cc",
+ "caffe2/operators/lengths_tile_op.cu.cc",
+ "caffe2/operators/local_response_normalization_op.cu.cc",
+ "caffe2/operators/logit_op.cu.cc",
+ "caffe2/operators/loss_op.cu.cc",
+ "caffe2/operators/lp_pool_op.cu.cc",
+ "caffe2/operators/lstm_unit_op_gpu.cu.cc",
+ "caffe2/operators/margin_ranking_criterion_op.cu.cc",
+ "caffe2/operators/max_pool_with_index.cu.cc",
+ "caffe2/operators/mean_op.cu.cc",
+ "caffe2/operators/mem_query_op.cu.cc",
+ "caffe2/operators/minmax_ops.cu.cc",
+ "caffe2/operators/moments_op.cu.cc",
+ "caffe2/operators/multi_class_accuracy_op.cu.cc",
+ "caffe2/operators/normalize_ops.cu.cc",
+ "caffe2/operators/one_hot_ops.cu.cc",
+ "caffe2/operators/pack_segments.cu.cc",
+ "caffe2/operators/pad_op_gpu.cu.cc",
+ "caffe2/operators/perplexity_op.cu.cc",
+ "caffe2/operators/piecewise_linear_transform_op.cu.cc",
+ "caffe2/operators/pool_op.cu.cc",
+ "caffe2/operators/pow_op.cu.cc",
+ "caffe2/operators/prelu_op.cu.cc",
+ "caffe2/operators/reciprocal_op.cu.cc",
+ "caffe2/operators/reduce_front_back_max_ops.cu.cc",
+ "caffe2/operators/reduce_front_back_sum_mean_ops.cu.cc",
+ "caffe2/operators/reduce_ops.cu.cc",
+ "caffe2/operators/reduction_ops.cu.cc",
+ "caffe2/operators/relu_n_op.cu.cc",
+ "caffe2/operators/relu_op.cu.cc",
+ "caffe2/operators/replace_nan_op.cu.cc",
+ "caffe2/operators/resize_3d_op.cu.cc",
+ "caffe2/operators/resize_op.cu.cc",
+ "caffe2/operators/reverse_packed_segs_op.cu.cc",
+ "caffe2/operators/rmac_regions_op.cu.cc",
+ "caffe2/operators/rnn/recurrent_network_op_gpu.cu.cc",
+ "caffe2/operators/roi_align_gradient_op.cu.cc",
+ "caffe2/operators/roi_align_op.cu.cc",
+ "caffe2/operators/roi_align_rotated_gradient_op.cu.cc",
+ "caffe2/operators/roi_align_rotated_op.cu.cc",
+ "caffe2/operators/roi_pool_op.cu.cc",
+ "caffe2/operators/rsqrt_op.cu.cc",
+ "caffe2/operators/scale_blobs_op.cu.cc",
+ "caffe2/operators/segment_reduction_op_gpu.cu.cc",
+ "caffe2/operators/selu_op.cu.cc",
+ "caffe2/operators/sequence_ops.cu.cc",
+ "caffe2/operators/sigmoid_op.cu.cc",
+ "caffe2/operators/sin_op.cu.cc",
+ "caffe2/operators/sinh_op.cu.cc",
+ "caffe2/operators/slice_op.cu.cc",
+ "caffe2/operators/softmax_ops.cu.cc",
+ "caffe2/operators/softplus_op.cu.cc",
+ "caffe2/operators/softsign_op.cu.cc",
+ "caffe2/operators/space_batch_op_gpu.cu.cc",
+ "caffe2/operators/sparse_normalize_op_gpu.cu.cc",
+ "caffe2/operators/sparse_to_dense_op.cu.cc",
+ "caffe2/operators/spatial_batch_norm_op.cu.cc",
+ "caffe2/operators/spatial_batch_norm_op_cudnn.cu.cc",
+ "caffe2/operators/stump_func_op.cu.cc",
+ "caffe2/operators/summarize_op.cu.cc",
+ "caffe2/operators/swish_op.cu.cc",
+ "caffe2/operators/tan_op.cu.cc",
+ "caffe2/operators/tanh_op.cu.cc",
+ "caffe2/operators/thresholded_relu_op.cu.cc",
+ "caffe2/operators/tile_op.cu.cc",
+ "caffe2/operators/top_k.cu.cc",
+ "caffe2/operators/transpose_op.cu.cc",
+ "caffe2/operators/unique_ops.cu.cc",
+ "caffe2/operators/upsample_op.cu.cc",
+ "caffe2/operators/utility_ops.cu.cc",
+ "caffe2/operators/weighted_sample_op.cu.cc",
+ "caffe2/sgd/adadelta_op_gpu.cu.cc",
+ "caffe2/sgd/adagrad_op_gpu.cu.cc",
+ "caffe2/sgd/adam_op_gpu.cu.cc",
+ "caffe2/sgd/fp16_momentum_sgd_op.cu.cc",
+ "caffe2/sgd/fp32_momentum_sgd_op.cu.cc",
+ "caffe2/sgd/lars_op_gpu.cu.cc",
+ "caffe2/sgd/momentum_sgd_op_gpu.cu.cc",
+ "caffe2/sgd/rmsprop_op_gpu.cu.cc",
+ "caffe2/sgd/yellowfin_op_gpu.cu.cc",
+ "caffe2/utils/math/broadcast.cu.cc",
+ "caffe2/utils/math/elementwise.cu.cc",
+ "caffe2/utils/math/reduce.cu.cc",
+ "caffe2/utils/math/transpose.cu.cc",
+ "caffe2/utils/math_gpu.cu.cc",
+ ],
+)
+
+# To achieve finer granularity and make debug easier, caffe2 is split into three libraries:
+# ATen, caffe2 and caffe2_for_aten_headers. ATen lib group up source codes under
+# aten/ directory and caffe2 contains most files under `caffe2/` directory. Since the
+# ATen lib and the caffe2 lib would depend on each other, `caffe2_for_aten_headers` is splitted
+# out from `caffe2` to avoid dependency cycle.
+cc_library(
+ name = "caffe2_for_aten_headers",
+ hdrs = [
+ "caffe2/core/macros.h",
+ "caffe2/core/common.h",
+ "caffe2/core/logging.h",
+ "caffe2/core/types.h",
+ "caffe2/perfkernels/common.h",
+ "caffe2/perfkernels/embedding_lookup.h",
+ "caffe2/perfkernels/embedding_lookup_idx.h",
+ "caffe2/utils/fixed_divisor.h",
+ "caffe2/utils/cpuid.h",
+ ] + glob([
+ "caffe2/utils/threadpool/*.h",
+ "caffe2/proto/*.h",
+ ]),
+ copts = CAFFE2_COPTS,
+ visibility = ["//visibility:public"],
+ deps = [
+ ":c10_headers",
+ ":caffe2_protos",
+ ],
+)
+
+cc_library(
+ name = "caffe2_headers",
+ hdrs = glob([
+ "caffe2/contrib/aten/*.h",
+ "caffe2/contrib/gloo/*.h",
+ "caffe2/core/*.h",
+ "caffe2/core/nomnigraph/include/nomnigraph/Converters/*.h",
+ "caffe2/core/nomnigraph/include/nomnigraph/Generated/*.h",
+ "caffe2/core/nomnigraph/include/nomnigraph/Graph/*.h",
+ "caffe2/core/nomnigraph/include/nomnigraph/Representations/*.h",
+ "caffe2/core/nomnigraph/include/nomnigraph/Support/*.h",
+ "caffe2/core/nomnigraph/include/nomnigraph/Transformations/*.h",
+ "caffe2/core/nomnigraph/tests/*.h",
+ "caffe2/db/*.h",
+ "caffe2/distributed/*.h",
+ "caffe2/ideep/*.h",
+ "caffe2/ideep/operators/*.h",
+ "caffe2/ideep/operators/quantization/*.h",
+ "caffe2/ideep/utils/*.h",
+ "caffe2/onnx/*.h",
+ "caffe2/operators/*.h",
+ "caffe2/operators/experimental/c10/cpu/*.h",
+ "caffe2/operators/rnn/*.h",
+ "caffe2/opt/*.h",
+ "caffe2/perfkernels/*.h",
+ "caffe2/predictor/*.h",
+ "caffe2/predictor/emulator/*.h",
+ "caffe2/proto/*.h",
+ "caffe2/quantization/server/*.h",
+ "caffe2/queue/*.h",
+ "caffe2/serialize/*.h",
+ "caffe2/sgd/*.h",
+ "caffe2/share/contrib/depthwise/*.h",
+ "caffe2/transforms/*.h",
+ "caffe2/utils/*.h",
+ "caffe2/utils/math/*.h",
+ "caffe2/utils/threadpool/*.h",
+ "modules/**/*.h",
+ ]) + if_cuda(glob([
+ "caffe2/**/*.cuh",
+ "caffe2/image/*.h",
+ ])),
+ copts = CAFFE2_COPTS,
+ includes = [
+ "caffe2/contrib/aten",
+ "caffe2/core/nomnigraph/include",
+ "third_party/miniz-2.0.8",
+ ],
+ visibility = ["//visibility:public"],
+ deps = [
+ ":caffe2_for_aten_headers",
+ ":caffe2_protos",
+ ],
+)
+
+cc_library(
+ name = "caffe2_dnnlowp_avx2_ops",
+ srcs = [
+ "caffe2/quantization/server/elementwise_sum_dnnlowp_op_avx2.cc",
+ "caffe2/quantization/server/fully_connected_fake_lowp_op_avx2.cc",
+ "caffe2/quantization/server/group_norm_dnnlowp_op_avx2.cc",
+ "caffe2/quantization/server/norm_minimization_avx2.cc",
+ "caffe2/quantization/server/pool_dnnlowp_op_avx2.cc",
+ "caffe2/quantization/server/relu_dnnlowp_op_avx2.cc",
+ "caffe2/quantization/server/spatial_batch_norm_dnnlowp_op_avx2.cc",
+ "caffe2/quantization/server/transpose.cc",
+ ],
+ copts = CAFFE2_COPTS + [
+ "-mf16c",
+ "-mavx2",
+ "-mfma",
+ "-mxsave",
+ ],
+ visibility = ["//visibility:public"],
+ deps = [
+ ":caffe2_headers",
+ "@fbgemm",
+ ],
+ alwayslink = True,
+)
+
+cc_library(
+ name = "caffe2",
+ srcs = [
+ "caffe2/db/create_db_op.cc",
+ "caffe2/db/protodb.cc",
+ "caffe2/share/contrib/depthwise/depthwise3x3_conv_op.cc",
+ ":caffe2_contrib_srcs",
+ ":caffe2_core_srcs",
+ ":caffe2_distributed_srcs",
+ ":caffe2_ideep_srcs",
+ ":caffe2_onnx_srcs",
+ ":caffe2_operators_srcs",
+ ":caffe2_opt_srcs",
+ ":caffe2_perfkernels_srcs",
+ ":caffe2_predictor_srcs",
+ ":caffe2_quantization_srcs",
+ ":caffe2_queue_srcs",
+ ":caffe2_serialize_srcs",
+ ":caffe2_sgd_srcs",
+ ":caffe2_transforms_srcs",
+ ":caffe2_utils_srcs",
+ ],
+ copts = CAFFE2_COPTS + ["-mf16c"],
+ linkstatic = 1,
+ visibility = ["//visibility:public"],
+ deps = [
+ ":caffe2_headers",
+ ":caffe2_dnnlowp_avx2_ops",
+ ":caffe2_perfkernels_avx",
+ ":caffe2_perfkernels_avx2",
+ ":caffe2_perfkernels_avx512",
+ ":caffe2_protos",
+ "//third_party/miniz-2.0.8:miniz",
+ "@com_google_protobuf//:protobuf",
+ "@eigen",
+ "@foxi",
+ "@gloo",
+ "@onnx",
+ ] + if_cuda(
+ [
+ ":caffe2_cpp_cuda",
+ ":aten_cuda",
+ ],
+ [":aten"],
+ ),
+ alwayslink = True,
+)
+
+cc_library(
+ name = "caffe2_cpp_cuda",
+ srcs = [":caffe2_cuda_srcs"],
+ copts = CAFFE2_COPTS,
+ visibility = ["//visibility:public"],
+ deps = [
+ ":caffe2_cuda",
+ ":caffe2_headers",
+ ],
+ alwayslink = True,
+)
+
+cu_library(
+ name = "caffe2_cuda",
+ srcs = [":caffe2_cu_srcs"],
+ copts = CAFFE2_COPTS + torch_cuda_half_options,
+ visibility = ["//visibility:public"],
+ deps = [
+ ":aten",
+ ":caffe2_headers",
+ "@cub",
+ "@cuda//:cublas",
+ "@cuda//:curand",
+ "@cudnn",
+ "@eigen",
+ "@gloo",
+ ],
+ alwayslink = True,
+)
+
+PERF_COPTS = [
+ "-DHAVE_GCC_GET_CPUID",
+ "-DUSE_AVX",
+ "-DUSE_AVX2",
+ "-DTH_HAVE_THREAD",
+ "-DHAVE_AVX_CPU_DEFINITION",
+ "-DHAVE_AVX2_CPU_DEFINITION",
+ "-DENABLE_ALIAS=1",
+ "-DHAVE_MALLOC_USABLE_SIZE=1",
+ "-DHAVE_MMAP=1",
+ "-DHAVE_SHM_OPEN=1",
+ "-DHAVE_SHM_UNLINK=1",
+ "-DSLEEF_STATIC_LIBS=1",
+ "-D_FILE_OFFSET_BITS=64",
+ "-DUSE_FBGEMM",
+ "-fvisibility-inlines-hidden",
+ "-Wunused-parameter",
+ "-fno-math-errno",
+ "-fno-trapping-math",
+ "-mf16c",
+]
+
+PERF_HEADERS = glob([
+ "caffe2/perfkernels/*.h",
+ "caffe2/core/*.h",
+])
+
+cc_library(
+ name = "caffe2_perfkernels_avx",
+ srcs = glob([
+ "caffe2/perfkernels/*_avx.cc",
+ ]),
+ hdrs = PERF_HEADERS,
+ copts = PERF_COPTS + [
+ "-mavx",
+ ],
+ visibility = ["//visibility:public"],
+ deps = [
+ ":caffe2_headers",
+ ":c10",
+ ],
+ alwayslink = True,
+)
+
+cc_library(
+ name = "caffe2_perfkernels_avx2",
+ srcs = glob([
+ "caffe2/perfkernels/*_avx2.cc",
+ ]),
+ hdrs = PERF_HEADERS,
+ copts = PERF_COPTS + [
+ "-mavx2",
+ "-mfma",
+ "-mavx",
+ ],
+ visibility = ["//visibility:public"],
+ deps = [
+ ":caffe2_headers",
+ ":c10",
+ ],
+ alwayslink = True,
+)
+
+cc_library(
+ name = "caffe2_perfkernels_avx512",
+ srcs = [
+ "caffe2/perfkernels/common_avx512.cc",
+ ],
+ hdrs = PERF_HEADERS,
+ copts = PERF_COPTS + [
+ "-mavx512f",
+ "-mavx512dq",
+ "-mavx512vl",
+ "-mavx2",
+ "-mfma",
+ "-mavx",
+ ],
+ visibility = ["//visibility:public"],
+ deps = [
+ ":caffe2_headers",
+ ":c10",
+ ],
+ alwayslink = True,
+)
+
+# torch
+cc_library(
+ name = "torch_headers",
+ hdrs = if_cuda(
+ glob(
+ [
+ "torch/csrc/cuda/*.h",
+ ],
+ ),
+ ) + glob(
+ [
+ "torch/*.h",
+ "torch/csrc/*.h",
+ "torch/csrc/api/include/torch/*.h",
+ "torch/csrc/api/include/torch/data/*.h",
+ "torch/csrc/api/include/torch/data/dataloader/*.h",
+ "torch/csrc/api/include/torch/data/datasets/*.h",
+ "torch/csrc/api/include/torch/data/detail/*.h",
+ "torch/csrc/api/include/torch/data/samplers/*.h",
+ "torch/csrc/api/include/torch/data/transforms/*.h",
+ "torch/csrc/api/include/torch/detail/*.h",
+ "torch/csrc/api/include/torch/nn/*.h",
+ "torch/csrc/api/include/torch/nn/functional/*.h",
+ "torch/csrc/api/include/torch/nn/modules/*.h",
+ "torch/csrc/api/include/torch/nn/modules/container/*.h",
+ "torch/csrc/api/include/torch/nn/options/*.h",
+ "torch/csrc/api/include/torch/nn/parallel/*.h",
+ "torch/csrc/api/include/torch/nn/utils/*.h",
+ "torch/csrc/api/include/torch/optim/*.h",
+ "torch/csrc/api/include/torch/python/*.h",
+ "torch/csrc/api/include/torch/serialize/*.h",
+ "torch/csrc/autograd/*.h",
+ "torch/csrc/autograd/functions/*.h",
+ "torch/csrc/autograd/utils/*.h",
+ "torch/csrc/distributed/autograd/functions/*.h",
+ "torch/csrc/distributed/autograd/context/*.h",
+ "torch/csrc/distributed/autograd/engine/*.h",
+ "torch/csrc/distributed/autograd/rpc_messages/*.h",
+ "torch/csrc/distributed/autograd/*.h",
+ "torch/csrc/distributed/c10d/*.h",
+ "torch/csrc/distributed/rpc/*.h",
+ "torch/csrc/generic/*.h",
+ "torch/csrc/generic/*.cpp",
+ "torch/csrc/jit/*.h",
+ "torch/csrc/jit/api/*.h",
+ "torch/csrc/jit/codegen/cuda/*.h",
+ "torch/csrc/jit/codegen/fuser/*.h",
+ "torch/csrc/jit/codegen/fuser/cpu/*.h",
+ "torch/csrc/jit/codegen/fuser/cuda/*.h",
+ "torch/csrc/jit/ir/*.h",
+ "torch/csrc/jit/fuser/*.h",
+ "torch/csrc/jit/fuser/cpu/*.h",
+ "torch/csrc/jit/fuser/cuda/*.h",
+ "torch/csrc/jit/passes/*.h",
+ "torch/csrc/jit/passes/onnx/*.h",
+ "torch/csrc/jit/passes/utils/*.h",
+ "torch/csrc/jit/python/*.h",
+ "torch/csrc/jit/runtime/*.h",
+ "torch/csrc/jit/frontend/*.h",
+ "torch/csrc/jit/mobile/*.h",
+ "torch/csrc/jit/serialization/*.h",
+ "torch/csrc/jit/tensorexpr/*.h",
+ "torch/csrc/jit/testing/*.h",
+ "torch/csrc/multiprocessing/*.h",
+ "torch/csrc/onnx/*.h",
+ "torch/csrc/tensor/*.h",
+ "torch/csrc/utils/*.h",
+ "torch/lib/libshm/*.h",
+ "torch/lib/c10d/*.hpp",
+ ],
+ exclude = [
+ "torch/lib/c10d/ProcessGroupMPI.hpp",
+ "torch/lib/c10d/ProcessGroupNCCL.hpp",
+ ],
+ ) + [":generated_code"],
+ includes = [
+ "torch/csrc",
+ "torch/csrc/api/include",
+ "torch/lib",
+ "torch/lib/libshm",
+ ],
+ visibility = ["//visibility:public"],
+ deps = [
+ ":aten_headers",
+ ":c10_headers",
+ ":caffe2_headers",
+ "@onnx",
+ ],
+ alwayslink = True,
+)
+
+TORCH_COPTS = COMMON_COPTS + [
+ "-Dtorch_EXPORTS",
+ "-DHAVE_AVX_CPU_DEFINITION",
+ "-DHAVE_AVX2_CPU_DEFINITION",
+ "-DCAFFE2_USE_GLOO",
+ "-fvisibility-inlines-hidden",
+ "-fno-math-errno ",
+ "-fno-trapping-math",
+]
+
+filegroup(
+ name = "torch_srcs",
+ srcs = [
+ "torch/csrc/jit/tensorexpr/codegen.cpp",
+ "torch/csrc/jit/tensorexpr/eval.cpp",
+ "torch/csrc/jit/tensorexpr/expr.cpp",
+ "torch/csrc/jit/tensorexpr/function.cpp",
+ "torch/csrc/jit/tensorexpr/hash_provider.cpp",
+ "torch/csrc/jit/tensorexpr/ir.cpp",
+ "torch/csrc/jit/tensorexpr/ir_mutator.cpp",
+ "torch/csrc/jit/tensorexpr/ir_printer.cpp",
+ "torch/csrc/jit/tensorexpr/ir_simplifier.cpp",
+ "torch/csrc/jit/tensorexpr/ir_visitor.cpp",
+ "torch/csrc/jit/tensorexpr/kernel.cpp",
+ "torch/csrc/jit/tensorexpr/llvm_codegen.cpp",
+ "torch/csrc/jit/tensorexpr/llvm_jit.cpp",
+ "torch/csrc/jit/tensorexpr/loopnest.cpp",
+ "torch/csrc/jit/tensorexpr/mem_arena.cpp",
+ "torch/csrc/jit/tensorexpr/tensor.cpp",
+ "torch/csrc/jit/tensorexpr/types.cpp",
+ "torch/csrc/jit/tensorexpr/unique_name_manager.cpp",
+ ],
+)
+
+cc_library(
+ name = "torch",
+ srcs = if_cuda(glob(
+ [
+ "torch/csrc/cuda/*.cpp",
+ "torch/csrc/autograd/functions/comm.cpp",
+ "torch/csrc/jit/tensorexpr/*.cpp",
+ ],
+ exclude = [
+ "torch/csrc/cuda/python_nccl.cpp",
+ "torch/csrc/cuda/nccl.cpp",
+ ],
+ )) + glob(
+ [
+ "torch/csrc/*.cpp",
+ "torch/csrc/api/src/*.cpp",
+ "torch/csrc/api/src/data/datasets/*.cpp",
+ "torch/csrc/api/src/data/samplers/*.cpp",
+ "torch/csrc/api/src/nn/*.cpp",
+ "torch/csrc/api/src/nn/modules/*.cpp",
+ "torch/csrc/api/src/nn/modules/container/*.cpp",
+ "torch/csrc/api/src/nn/options/*.cpp",
+ "torch/csrc/api/src/optim/*.cpp",
+ "torch/csrc/api/src/python/*.cpp",
+ "torch/csrc/api/src/serialize/*.cpp",
+ "torch/csrc/autograd/*.cpp",
+ "torch/csrc/autograd/functions/*.cpp",
+ "torch/csrc/autograd/generated/*.cpp",
+ "torch/csrc/distributed/autograd/*.cpp",
+ "torch/csrc/distributed/autograd/context/*.cpp",
+ "torch/csrc/distributed/autograd/functions/*.cpp",
+ "torch/csrc/distributed/autograd/engine/*.cpp",
+ "torch/csrc/distributed/autograd/rpc_messages/*.cpp",
+ "torch/csrc/distributed/rpc/*.cpp",
+ "torch/csrc/jit/*.cpp",
+ "torch/csrc/jit/api/*.cpp",
+ "torch/csrc/jit/codegen/fuser/*.cpp",
+ "torch/csrc/jit/frontend/*.cpp",
+ "torch/csrc/jit/fuser/*.cpp",
+ "torch/csrc/jit/fuser/cpu/*.cpp",
+ "torch/csrc/jit/ir/*.cpp",
+ "torch/csrc/jit/generated/*.cpp",
+ "torch/csrc/jit/passes/*.cpp",
+ "torch/csrc/jit/passes/onnx/*.cpp",
+ "torch/csrc/jit/passes/utils/*.cpp",
+ "torch/csrc/jit/mobile/*.cpp",
+ "torch/csrc/jit/python/*.cpp",
+ "torch/csrc/jit/runtime/*.cpp",
+ "torch/csrc/jit/serialization/*.cpp",
+ "torch/csrc/jit/testing/*.cpp",
+ "torch/csrc/multiprocessing/*.cpp",
+ "torch/csrc/onnx/*.cpp",
+ "torch/csrc/tensor/*.cpp",
+ "torch/csrc/utils/*.cpp",
+ "torch/lib/libshm/*.cpp",
+ "torch/lib/c10d/*.cpp",
+ ],
+ exclude = glob([
+ "torch/csrc/autograd/*_cuda.cpp",
+ ]) + [
+ "torch/csrc/autograd/functions/comm.cpp",
+ "torch/csrc/autograd/generated/VariableTypeEverything.cpp",
+ "torch/lib/libshm/manager.cpp",
+ "torch/lib/c10d/NCCLUtils.cpp",
+ "torch/lib/c10d/ProcessGroupMPI.cpp",
+ "torch/lib/c10d/ProcessGroupNCCL.cpp",
+ ],
+ ) + [
+ "torch/csrc/jit/codegen/cuda/interface.cpp",
+ ":torch_srcs",
+ ":generated_code",
+ ],
+ copts = TORCH_COPTS + if_cuda(["-DUSE_CUDA=1"]),
+ defines = [
+ "CAFFE2_NIGHTLY_VERSION=20200115",
+ ],
+ linkopts = [
+ "-Wl,--rpath",
+ "-Wl,/opt/conda/lib",
+ "-L/opt/conda/lib",
+ "-lpython3.6m",
+ ],
+ visibility = ["//visibility:public"],
+ deps = [
+ ":caffe2",
+ ":torch_headers",
+ "@local_config_python//:python_headers",
+ "@pybind11",
+ ],
+ alwayslink = True,
+)
+
+cc_library(
+ name = "libtorch_headers",
+ hdrs = glob([
+ "**/*.h",
+ "**/*.cuh",
+ ]) + [
+ ":generated_code",
+ ],
+ includes = [
+ ".",
+ "torch/csrc/api/include",
+ "torch/lib",
+ "torch/lib/libshm",
+ ],
+ visibility = ["//visibility:public"],
+ deps = [
+ ":aten_headers",
+ ":c10_headers",
+ ":caffe2_headers",
+ ],
+)
+
+# cpp api tests
+cc_library(
+ name = "test_support",
+ testonly = True,
+ srcs = [
+ "test/cpp/api/support.cpp",
+ ],
+ hdrs = [
+ "test/cpp/api/support.h",
+ "test/cpp/common/support.h",
+ ],
+ deps = [
+ ":torch",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "any_test",
+ srcs = ["test/cpp/api/any.cpp"],
+ deps = [
+ ":test_support",
+ ":torch",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "autograd_test",
+ srcs = ["test/cpp/api/autograd.cpp"],
+ deps = [
+ ":test_support",
+ ":torch",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "dataloader_test",
+ srcs = ["test/cpp/api/dataloader.cpp"],
+ deps = [
+ ":test_support",
+ ":torch",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "enum_test",
+ srcs = ["test/cpp/api/enum.cpp"],
+ deps = [
+ ":test_support",
+ ":torch",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "expanding_array_test",
+ srcs = ["test/cpp/api/expanding-array.cpp"],
+ deps = [
+ ":test_support",
+ ":torch",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "functional_test",
+ srcs = ["test/cpp/api/functional.cpp"],
+ deps = [
+ ":test_support",
+ ":torch",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "init_test",
+ srcs = [
+ "test/cpp/api/init.cpp",
+ "test/cpp/api/init_baseline.h",
+ ],
+ deps = [
+ ":test_support",
+ ":torch",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+# Torch integration tests rely on a labeled data set from the MNIST database.
+# http://yann.lecun.com/exdb/mnist/
+cc_test(
+ name = "integration_test",
+ srcs = ["test/cpp/api/integration.cpp"],
+ deps = [
+ ":test_support",
+ ":torch",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "jit_test",
+ srcs = ["test/cpp/api/jit.cpp"],
+ deps = [
+ ":test_support",
+ ":torch",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "memory_test",
+ srcs = ["test/cpp/api/memory.cpp"],
+ deps = [
+ ":test_support",
+ ":torch",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "misc_test",
+ srcs = ["test/cpp/api/misc.cpp"],
+ tags = [
+ "exclusive",
+ "gpu-required",
+ ],
+ deps = [
+ ":test_support",
+ ":torch",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "module_test",
+ srcs = ["test/cpp/api/module.cpp"],
+ tags = [
+ "exclusive",
+ "gpu-required",
+ ],
+ deps = [
+ ":test_support",
+ ":torch",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "modulelist_test",
+ srcs = ["test/cpp/api/modulelist.cpp"],
+ deps = [
+ ":test_support",
+ ":torch",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "modules_test",
+ srcs = ["test/cpp/api/modules.cpp"],
+ deps = [
+ ":test_support",
+ ":torch",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "nn_utils_test",
+ srcs = ["test/cpp/api/nn_utils.cpp"],
+ deps = [
+ ":test_support",
+ ":torch",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "optim_test",
+ srcs = [
+ "test/cpp/api/optim.cpp",
+ "test/cpp/api/optim_baseline.h",
+ ],
+ deps = [
+ ":test_support",
+ ":torch",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "ordered_dict_test",
+ srcs = ["test/cpp/api/ordered_dict.cpp"],
+ deps = [
+ ":test_support",
+ ":torch",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "parallel_test",
+ srcs = ["test/cpp/api/parallel.cpp"],
+ copts = COMMON_COPTS,
+ tags = [
+ "exclusive",
+ "gpu-required",
+ ],
+ deps = [
+ ":test_support",
+ ":torch",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "rnn_test",
+ size = "small",
+ srcs = ["test/cpp/api/rnn.cpp"],
+ tags = [
+ "exclusive",
+ "gpu-required",
+ ],
+ deps = [
+ ":test_support",
+ ":torch",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "sequential_test",
+ size = "small",
+ srcs = ["test/cpp/api/sequential.cpp"],
+ copts = ["-Wno-deprecated-declarations"],
+ deps = [
+ ":test_support",
+ ":torch",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "serialize_test",
+ size = "small",
+ srcs = ["test/cpp/api/serialize.cpp"],
+ deps = [
+ ":test_support",
+ ":torch",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "static_test",
+ size = "small",
+ srcs = ["test/cpp/api/static.cpp"],
+ deps = [
+ ":torch",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "tensor_test",
+ size = "small",
+ srcs = ["test/cpp/api/tensor.cpp"],
+ copts = ["-Wno-deprecated-declarations"],
+ deps = [
+ ":test_support",
+ ":torch",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "tensor_cuda_test",
+ size = "small",
+ srcs = ["test/cpp/api/tensor_cuda.cpp"],
+ tags = [
+ "exclusive",
+ "gpu-required",
+ ],
+ deps = [
+ ":torch",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "tensor_options_test",
+ size = "small",
+ srcs = ["test/cpp/api/tensor_options.cpp"],
+ deps = [
+ ":test_support",
+ ":torch",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "tensor_options_cuda_test",
+ size = "small",
+ srcs = ["test/cpp/api/tensor_options_cuda.cpp"],
+ tags = [
+ "exclusive",
+ "gpu-required",
+ ],
+ deps = [
+ ":torch",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "torch_include_test",
+ size = "small",
+ srcs = ["test/cpp/api/torch_include.cpp"],
+ deps = [
+ ":torch",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+test_suite(
+ name = "api_tests",
+ tests = [
+ "any_test",
+ "autograd_test",
+ "dataloader_test",
+ "enum_test",
+ "expanding_array_test",
+ "functional_test",
+ "init_test",
+ "integration_test",
+ "jit_test",
+ "memory_test",
+ "misc_test",
+ "module_test",
+ "modulelist_test",
+ "modules_test",
+ "nn_utils_test",
+ "optim_test",
+ "ordered_dict_test",
+ "rnn_test",
+ "sequential_test",
+ "serialize_test",
+ "static_test",
+ "tensor_options_test",
+ "tensor_test",
+ "torch_include_test",
+ ],
+)
+
+# dist autograd tests
+cc_test(
+ name = "torch_dist_autograd_test",
+ size = "small",
+ srcs = ["test/cpp/dist_autograd/test_dist_autograd.cpp"],
+ tags = [
+ "exclusive",
+ "gpu-required",
+ ],
+ deps = [
+ ":torch",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+# jit tests
+# Because these individual unit tests require custom registering,
+# it is easier to mimic the cmake build by globing together a single test.
+cc_test(
+ name = "jit_tests",
+ size = "small",
+ srcs = glob([
+ "test/cpp/jit/*.cpp",
+ "test/cpp/jit/*.h",
+ "test/cpp/tensorexpr/*.cpp",
+ "test/cpp/tensorexpr/*.h",
+ ]),
+ linkstatic = True,
+ tags = [
+ "exclusive",
+ "gpu-required",
+ ],
+ deps = [
+ ":torch",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+# all tests
+test_suite(
+ name = "all_tests",
+ tests = [
+ "api_tests",
+ "c10_tests",
+ "jit_tests",
+ "torch_dist_autograd_test",
+ ],
+)
diff --git a/WORKSPACE b/WORKSPACE
new file mode 100644
index 0000000..53696dc
--- /dev/null
+++ b/WORKSPACE
@@ -0,0 +1,160 @@
+workspace(name = "pytorch")
+
+load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
+load("//tools/rules:workspace.bzl", "new_patched_local_repository")
+
+http_archive(
+ name = "bazel_skylib",
+ urls = [
+ "https://github.com/bazelbuild/bazel-skylib/releases/download/1.0.2/bazel-skylib-1.0.2.tar.gz",
+ ],
+)
+
+http_archive(
+ name = "com_google_googletest",
+ strip_prefix = "googletest-cd6b9ae3243985d4dc725abd513a874ab4161f3e",
+ urls = [
+ "https://github.com/google/googletest/archive/cd6b9ae3243985d4dc725abd513a874ab4161f3e.tar.gz",
+ ],
+)
+
+http_archive(
+ name = "pybind11_bazel",
+ strip_prefix = "pybind11_bazel-7f397b5d2cc2434bbd651e096548f7b40c128044",
+ urls = ["https://github.com/pybind/pybind11_bazel/archive/7f397b5d2cc2434bbd651e096548f7b40c128044.zip"],
+ sha256 = "e4a9536f49d4a88e3c5a09954de49c4a18d6b1632c457a62d6ec4878c27f1b5b",
+)
+
+new_local_repository(
+ name = "pybind11",
+ build_file = "@pybind11_bazel//:pybind11.BUILD",
+ path = "third_party/pybind11",
+)
+
+http_archive(
+ name = "com_github_glog",
+ strip_prefix = "glog-0.4.0",
+ urls = [
+ "https://github.com/google/glog/archive/v0.4.0.tar.gz",
+ ],
+)
+
+http_archive(
+ name = "com_github_gflags_gflags",
+ strip_prefix = "gflags-2.2.2",
+ urls = [
+ "https://github.com/gflags/gflags/archive/v2.2.2.tar.gz",
+ ],
+ sha256 = "34af2f15cf7367513b352bdcd2493ab14ce43692d2dcd9dfc499492966c64dcf",
+)
+
+new_local_repository(
+ name = "gloo",
+ build_file = "//third_party:gloo.BUILD",
+ path = "third_party/gloo",
+)
+
+new_local_repository(
+ name = "onnx",
+ build_file = "//third_party:onnx.BUILD",
+ path = "third_party/onnx",
+)
+
+new_local_repository(
+ name = "foxi",
+ build_file = "//third_party:foxi.BUILD",
+ path = "third_party/foxi",
+)
+
+local_repository(
+ name = "com_google_protobuf",
+ path = "third_party/protobuf",
+)
+
+new_local_repository(
+ name = "eigen",
+ build_file = "//third_party:eigen.BUILD",
+ path = "third_party/eigen",
+)
+
+new_local_repository(
+ name = "fbgemm",
+ build_file = "//third_party:fbgemm.BUILD",
+ path = "third_party/fbgemm",
+)
+
+new_local_repository(
+ name = "ideep",
+ build_file = "//third_party:ideep.BUILD",
+ path = "third_party/ideep",
+)
+
+new_local_repository(
+ name = "mkl_dnn",
+ build_file = "//third_party:mkl-dnn.BUILD",
+ path = "third_party/ideep/mkl-dnn",
+)
+
+new_local_repository(
+ name = "cpuinfo",
+ build_file = "//third_party:cpuinfo.BUILD",
+ path = "third_party/cpuinfo",
+)
+
+new_local_repository(
+ name = "asmjit",
+ build_file = "//third_party:asmjit.BUILD",
+ path = "third_party/fbgemm/third_party/asmjit",
+)
+
+new_local_repository(
+ name = "sleef",
+ build_file = "//third_party:sleef.BUILD",
+ path = "third_party/sleef",
+)
+
+new_patched_local_repository(
+ name = "tbb",
+ patches = [
+ "@//third_party:tbb.patch",
+ ],
+ patch_strip = 1,
+ build_file = "//third_party:tbb.BUILD",
+ path = "third_party/tbb",
+)
+
+http_archive(
+ name = "mkl",
+ build_file = "//third_party:mkl.BUILD",
+ strip_prefix = "lib",
+ sha256 = "59154b30dd74561e90d547f9a3af26c75b6f4546210888f09c9d4db8f4bf9d4c",
+ urls = [
+ "https://anaconda.org/anaconda/mkl/2020.0/download/linux-64/mkl-2020.0-166.tar.bz2",
+ ],
+)
+
+http_archive(
+ name = "mkl_headers",
+ build_file = "//third_party:mkl_headers.BUILD",
+ sha256 = "2af3494a4bebe5ddccfdc43bacc80fcd78d14c1954b81d2c8e3d73b55527af90",
+ urls = [
+ "https://anaconda.org/anaconda/mkl-include/2020.0/download/linux-64/mkl-include-2020.0-166.tar.bz2",
+ ],
+)
+
+http_archive(
+ name = "rules_python",
+ url = "https://github.com/bazelbuild/rules_python/releases/download/0.0.1/rules_python-0.0.1.tar.gz",
+ sha256 = "aa96a691d3a8177f3215b14b0edc9641787abaaa30363a080165d06ab65e1161",
+)
+
+load("@pybind11_bazel//:python_configure.bzl", "python_configure")
+python_configure(name = "local_config_python")
+
+load("@com_google_protobuf//:protobuf_deps.bzl", "protobuf_deps")
+
+protobuf_deps()
+
+load("@rules_python//python:repositories.bzl", "py_repositories")
+
+py_repositories()
diff --git a/aten.bzl b/aten.bzl
new file mode 100644
index 0000000..e04944e
--- /dev/null
+++ b/aten.bzl
@@ -0,0 +1,37 @@
+load("@rules_cc//cc:defs.bzl", "cc_library")
+
+CPU_CAPABILITY_NAMES = ["DEFAULT", "AVX", "AVX2"]
+PREFIX = "aten/src/ATen/native/"
+
+def intern_build_aten_ops(copts, deps):
+ for cpu_capability in CPU_CAPABILITY_NAMES:
+ srcs = []
+ for impl in native.glob(
+ [
+ PREFIX + "cpu/*.cpp",
+ PREFIX + "quantized/cpu/kernels/*.cpp",
+ ]):
+ name = impl.replace(PREFIX, "")
+ out = PREFIX + name + "." + cpu_capability + ".cpp"
+ native.genrule(
+ name = name + "_" + cpu_capability + "_cp",
+ srcs = [impl],
+ outs = [out],
+ cmd = "cp $< $@",
+ )
+ srcs.append(out)
+
+ cc_library(
+ name = "ATen_CPU_" + cpu_capability,
+ srcs = srcs,
+ copts = copts + [
+ "-DCPU_CAPABILITY=" + cpu_capability,
+ "-DCPU_CAPABILITY_" + cpu_capability,
+ ],
+ deps = deps,
+ )
+ cc_library(
+ name = "ATen_CPU",
+ srcs = ["ATen_CPU_" + cpu_capability for cpu_capability in CPU_CAPABILITY_NAMES],
+ linkstatic = 1,
+ )
diff --git a/third_party/BUILD b/third_party/BUILD
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/third_party/BUILD
diff --git a/third_party/asmjit.BUILD b/third_party/asmjit.BUILD
new file mode 100644
index 0000000..13fadb2
--- /dev/null
+++ b/third_party/asmjit.BUILD
@@ -0,0 +1,28 @@
+load("@rules_cc//cc:defs.bzl", "cc_library")
+
+cc_library(
+ name = "asmjit",
+ srcs = glob([
+ "src/asmjit/core/*.cpp",
+ "src/asmjit/x86/*.cpp",
+ ]),
+ hdrs = glob([
+ "src/asmjit/x86/*.h",
+ "src/asmjit/core/*.h",
+ "src/asmjit/*.h",
+ ]),
+ copts = [
+ "-DASMJIT_STATIC",
+ "-fno-tree-vectorize",
+ "-std=c++17",
+ "-fmerge-all-constants",
+ "-std=gnu++11",
+ "-DTH_BLAS_MKL",
+ ],
+ includes = [
+ "asmjit/",
+ "src/",
+ ],
+ linkstatic = True,
+ visibility = ["//visibility:public"],
+)
diff --git a/third_party/cpuinfo.BUILD b/third_party/cpuinfo.BUILD
new file mode 100644
index 0000000..c448950
--- /dev/null
+++ b/third_party/cpuinfo.BUILD
@@ -0,0 +1,56 @@
+load("@rules_cc//cc:defs.bzl", "cc_library")
+
+cc_library(
+ name = "clog",
+ srcs = [
+ "deps/clog/src/clog.c",
+ ],
+ hdrs = glob([
+ "deps/clog/include/*.h",
+ ]),
+ includes = [
+ "deps/clog/include/",
+ ],
+ linkstatic = True,
+ visibility = ["//visibility:public"],
+)
+
+cc_library(
+ name = "cpuinfo",
+ srcs = glob(
+ [
+ "src/*.c",
+ "src/linux/*.c",
+ "src/x86/*.c",
+ "src/x86/cache/*.c",
+ "src/x86/linux/*.c",
+ ],
+ exclude = [
+ "src/x86/mockcpuid.c",
+ "src/linux/mockfile.c",
+ ],
+ ),
+ hdrs = glob([
+ "include/*.h",
+ "src/*.h",
+ "src/cpuinfo/*.h",
+ "src/include/*.h",
+ "src/x86/*.h",
+ "src/x86/linux/*.h",
+ "src/linux/*.h",
+ ]),
+ copts = [
+ "-DCPUINFO_LOG_LEVEL=2",
+ "-DTH_BLAS_MKL",
+ "-D_GNU_SOURCE=1",
+ ],
+ includes = [
+ "include",
+ "src",
+ ],
+ linkstatic = True,
+ visibility = ["//visibility:public"],
+ deps = [
+ ":clog",
+ ],
+)
diff --git a/third_party/eigen.BUILD b/third_party/eigen.BUILD
new file mode 100644
index 0000000..a6a7353
--- /dev/null
+++ b/third_party/eigen.BUILD
@@ -0,0 +1,91 @@
+# This is BUILD file is derived from https://github.com/tensorflow/tensorflow/blob/master/third_party/eigen.BUILD
+
+# Description:
+# Eigen is a C++ template library for linear algebra: vectors,
+# matrices, and related algorithms.
+
+load("@rules_cc//cc:defs.bzl", "cc_library")
+
+licenses([
+ # Note: Eigen is an MPL2 library that includes GPL v3 and LGPL v2.1+ code.
+ # We've taken special care to not reference any restricted code.
+ "reciprocal", # MPL2
+ "notice", # Portions BSD
+])
+
+exports_files(["COPYING.MPL2"])
+
+# License-restricted (i.e. not reciprocal or notice) files inside Eigen/...
+EIGEN_RESTRICTED_FILES = [
+ "Eigen/src/OrderingMethods/Amd.h",
+ "Eigen/src/SparseCholesky/**",
+]
+
+# Notable transitive dependencies of restricted files inside Eigen/...
+EIGEN_RESTRICTED_DEPS = [
+ "Eigen/Eigen",
+ "Eigen/IterativeLinearSolvers",
+ "Eigen/MetisSupport",
+ "Eigen/Sparse",
+ "Eigen/SparseCholesky",
+ "Eigen/SparseLU",
+]
+
+EIGEN_FILES = [
+ "Eigen/**",
+ "unsupported/Eigen/CXX11/**",
+ "unsupported/Eigen/FFT",
+ "unsupported/Eigen/KroneckerProduct",
+ "unsupported/Eigen/src/FFT/**",
+ "unsupported/Eigen/src/KroneckerProduct/**",
+ "unsupported/Eigen/MatrixFunctions",
+ "unsupported/Eigen/SpecialFunctions",
+ "unsupported/Eigen/Splines",
+ "unsupported/Eigen/src/MatrixFunctions/**",
+ "unsupported/Eigen/src/SpecialFunctions/**",
+ "unsupported/Eigen/src/Splines/**",
+ "unsupported/Eigen/NonLinearOptimization",
+ "unsupported/Eigen/NumericalDiff",
+ "unsupported/Eigen/src/**",
+ "unsupported/Eigen/Polynomials",
+]
+
+# List of files picked up by glob but actually part of another target.
+EIGEN_EXCLUDE_FILES = ["Eigen/src/Core/arch/AVX/PacketMathGoogleTest.cc"]
+
+# Disallowed eigen modules/files in rNA:
+# * Using the custom STL and memory support, it is not needed and should
+# not be used with c++17.
+# * We will only support the EulerAnglesZYX provided by //atg/geometry so
+# just don't allow people to access the unsupported eigen module.
+EIGEN_DISALLOW_FILES = [
+ "Eigen/StlSupport/*.h",
+ "unsupported/Eigen/EulerAngles",
+ "unsupported/Eigen/src/EulerAngles/**",
+]
+
+# Files known to be under MPL2 license.
+EIGEN_MPL2_HEADER_FILES = glob(
+ EIGEN_FILES,
+ exclude = EIGEN_EXCLUDE_FILES +
+ EIGEN_RESTRICTED_FILES +
+ EIGEN_DISALLOW_FILES +
+ EIGEN_RESTRICTED_DEPS + [
+ # Guarantees any file missed by excludes above will not compile.
+ "Eigen/src/Core/util/NonMPL2.h",
+ "Eigen/**/CMakeLists.txt",
+ ],
+)
+
+cc_library(
+ name = "eigen",
+ hdrs = EIGEN_MPL2_HEADER_FILES,
+ defines = [
+ # This define (mostly) guarantees we don't link any problematic
+ # code. We use it, but we do not rely on it, as evidenced above.
+ "EIGEN_MPL2_ONLY",
+ "EIGEN_MAX_ALIGN_BYTES=64",
+ ],
+ includes = ["."],
+ visibility = ["//visibility:public"],
+)
diff --git a/third_party/fbgemm.BUILD b/third_party/fbgemm.BUILD
new file mode 100644
index 0000000..1769179
--- /dev/null
+++ b/third_party/fbgemm.BUILD
@@ -0,0 +1,221 @@
+load("@rules_cc//cc:defs.bzl", "cc_library")
+
+cc_library(
+ name = "fbgemm_src_headers",
+ hdrs = [
+ "src/RefImplementations.h",
+ ],
+ include_prefix = "fbgemm",
+)
+
+cc_library(
+ name = "fbgemm_base",
+ srcs = [
+ "src/EmbeddingSpMDM.cc",
+ "src/EmbeddingSpMDMNBit.cc",
+ "src/ExecuteKernel.cc",
+ "src/ExecuteKernelU8S8.cc",
+ "src/Fbgemm.cc",
+ "src/FbgemmBfloat16Convert.cc",
+ "src/FbgemmConv.cc",
+ "src/FbgemmFP16.cc",
+ "src/FbgemmFloat16Convert.cc",
+ "src/FbgemmI64.cc",
+ "src/FbgemmI8Spmdm.cc",
+ "src/GenerateKernelU8S8S32ACC16.cc",
+ "src/GenerateKernelU8S8S32ACC16Avx512.cc",
+ "src/GenerateKernelU8S8S32ACC16Avx512VNNI.cc",
+ "src/GenerateKernelU8S8S32ACC32.cc",
+ "src/GenerateKernelU8S8S32ACC32Avx512.cc",
+ "src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc",
+ "src/GroupwiseConvAcc32Avx2.cc",
+ "src/PackAMatrix.cc",
+ "src/PackAWithIm2Col.cc",
+ "src/PackBMatrix.cc",
+ "src/PackMatrix.cc",
+ "src/PackAWithQuantRowOffset.cc",
+ "src/PackAWithRowOffset.cc",
+ "src/PackWeightMatrixForGConv.cc",
+ "src/PackWeightsForConv.cc",
+ "src/QuantUtils.cc",
+ "src/RefImplementations.cc",
+ "src/RowWiseSparseAdagradFused.cc",
+ "src/SparseAdagrad.cc",
+ "src/Utils.cc",
+ # Private headers
+ "src/CodeCache.h",
+ "src/CodeGenHelpers.h",
+ "src/ExecuteKernel.h",
+ "src/ExecuteKernelGeneric.h",
+ "src/ExecuteKernelU8S8.h",
+ "src/FbgemmFP16Common.h",
+ "src/GenerateKernel.h",
+ "src/GroupwiseConv.h",
+ "src/RefImplementations.h",
+ "src/TransposeUtils.h",
+ ],
+ hdrs = [
+ "include/fbgemm/FbgemmConvert.h",
+ "include/fbgemm/FbgemmI64.h",
+ ],
+ includes = [
+ ".",
+ "src",
+ ],
+ deps = [
+ ":fbgemm_avx2",
+ ":fbgemm_avx512",
+ ":fbgemm_headers",
+ ":fbgemm_src_headers",
+ "@asmjit",
+ "@cpuinfo",
+ ],
+ linkstatic = 1,
+)
+
+cc_library(
+ name = "fbgemm_avx2_circular",
+ srcs = [
+ "src/FbgemmFloat16ConvertAvx2.cc",
+ ],
+ copts = [
+ "-mavx2",
+ "-mf16c",
+ ],
+ deps = [
+ ":fbgemm_base",
+ ],
+ linkstatic = 1,
+)
+
+cc_library(
+ name = "fbgemm",
+ visibility = ["//visibility:public"],
+ deps = [
+ ":fbgemm_base",
+ ":fbgemm_avx2_circular",
+ ],
+ linkstatic = 1,
+)
+
+cc_library(
+ name = "fbgemm_avx2",
+ srcs = [
+ "src/EmbeddingSpMDMAvx2.cc",
+ "src/FbgemmBfloat16ConvertAvx2.cc",
+ # "src/FbgemmFloat16ConvertAvx2.cc",
+ "src/FbgemmI8Depthwise3DAvx2.cc",
+ "src/FbgemmI8Depthwise3x3Avx2.cc",
+ "src/FbgemmI8DepthwiseAvx2.cc",
+ "src/FbgemmI8DepthwisePerChannelQuantAvx2.cc",
+ "src/OptimizedKernelsAvx2.cc",
+ "src/PackDepthwiseConvMatrixAvx2.cc",
+ "src/QuantUtilsAvx2.cc",
+ "src/UtilsAvx2.cc",
+ # Inline Assembly sources
+ "src/FbgemmFP16UKernelsAvx2.cc",
+ # Private headers
+ "src/FbgemmFP16Common.h",
+ "src/FbgemmFP16UKernelsAvx2.h",
+ "src/FbgemmI8Depthwise2DAvx2-inl.h",
+ "src/FbgemmI8DepthwiseAvx2-inl.h",
+ "src/MaskAvx2.h",
+ "src/OptimizedKernelsAvx2.h",
+ "src/TransposeUtils.h",
+ "src/TransposeUtilsAvx2.h",
+ ],
+ copts = [
+ "-m64",
+ "-mavx2",
+ "-mfma",
+ "-mf16c",
+ "-masm=intel",
+ ],
+ deps = [
+ ":fbgemm_headers",
+ ],
+ linkstatic = 1,
+)
+
+cc_library(
+ name = "fbgemm_avx2_headers",
+ includes = [
+ "src",
+ ],
+ hdrs = [
+ "src/FbgemmFP16UKernelsAvx2.h",
+ "src/MaskAvx2.h",
+ "src/OptimizedKernelsAvx2.h",
+ ],
+)
+
+cc_library(
+ name = "fbgemm_avx512",
+ srcs = [
+ "src/FbgemmBfloat16ConvertAvx512.cc",
+ "src/FbgemmFloat16ConvertAvx512.cc",
+ "src/UtilsAvx512.cc",
+ # Inline Assembly sources
+ "src/FbgemmFP16UKernelsAvx512.cc",
+ "src/FbgemmFP16UKernelsAvx512_256.cc",
+ # Private headers
+ "src/FbgemmFP16UKernelsAvx512.h",
+ "src/FbgemmFP16Common.h",
+ "src/MaskAvx2.h",
+ "src/TransposeUtils.h",
+ "src/TransposeUtilsAvx2.h",
+ ],
+ hdrs = [
+ "src/FbgemmFP16UKernelsAvx512_256.h",
+ ],
+ copts = [
+ "-m64",
+ "-mfma",
+ "-mavx512f",
+ "-mavx512bw",
+ "-mavx512dq",
+ "-mavx512vl",
+ "-masm=intel",
+ ],
+ deps = [
+ ":fbgemm_headers",
+ ],
+ linkstatic = 1,
+)
+
+cc_library(
+ name = "fbgemm_avx512_headers",
+ includes = [
+ "src",
+ ],
+ hdrs = [
+ "src/FbgemmFP16UKernelsAvx512.h",
+ "src/FbgemmFP16UKernelsAvx512_256.h",
+ ],
+)
+
+cc_library(
+ name = "fbgemm_headers",
+ hdrs = [
+ "include/fbgemm/ConvUtils.h",
+ "include/fbgemm/Fbgemm.h",
+ "include/fbgemm/FbgemmBuild.h",
+ "include/fbgemm/FbgemmConvert.h",
+ "include/fbgemm/FbgemmEmbedding.h",
+ "include/fbgemm/FbgemmFP16.h",
+ "include/fbgemm/FbgemmI64.h",
+ "include/fbgemm/FbgemmI8DepthwiseAvx2.h",
+ "include/fbgemm/FbgemmI8Spmdm.h",
+ "include/fbgemm/OutputProcessing-inl.h",
+ "include/fbgemm/PackingTraits-inl.h",
+ "include/fbgemm/QuantUtils.h",
+ "include/fbgemm/QuantUtilsAvx2.h",
+ "include/fbgemm/Types.h",
+ "include/fbgemm/Utils.h",
+ "include/fbgemm/UtilsAvx2.h",
+ ],
+ includes = [
+ "include",
+ ],
+ visibility = ["//visibility:public"],
+)
diff --git a/third_party/foxi.BUILD b/third_party/foxi.BUILD
new file mode 100644
index 0000000..6e19fa0
--- /dev/null
+++ b/third_party/foxi.BUILD
@@ -0,0 +1,16 @@
+load("@rules_cc//cc:defs.bzl", "cc_library")
+
+cc_library(
+ name = "foxi",
+ srcs = [
+ "foxi/onnxifi_loader.c",
+ ],
+ hdrs = glob([
+ "foxi/*.h",
+ ]),
+ includes = [
+ ".",
+ ],
+ linkstatic = 1,
+ visibility = ["//visibility:public"],
+)
diff --git a/third_party/gloo.BUILD b/third_party/gloo.BUILD
new file mode 100644
index 0000000..e73b977
--- /dev/null
+++ b/third_party/gloo.BUILD
@@ -0,0 +1,85 @@
+load("@rules_cc//cc:defs.bzl", "cc_library")
+load("@//tools/rules:cu.bzl", "cu_library")
+load("@//third_party:substitution.bzl", "template_rule")
+load("@//tools/config:defs.bzl", "if_cuda")
+
+template_rule(
+ name = "gloo_config_cmake_macros",
+ src = "gloo/config.h.in",
+ out = "gloo/config.h",
+ substitutions = {
+ "@GLOO_VERSION_MAJOR@": "0",
+ "@GLOO_VERSION_MINOR@": "5",
+ "@GLOO_VERSION_PATCH@": "0",
+ "cmakedefine01 GLOO_USE_CUDA": "define GLOO_USE_CUDA 1",
+ "cmakedefine01 GLOO_USE_NCCL": "define GLOO_USE_NCCL 0",
+ "cmakedefine01 GLOO_USE_ROCM": "define GLOO_USE_ROCM 0",
+ "cmakedefine01 GLOO_USE_RCCL": "define GLOO_USE_RCCL 0",
+ "cmakedefine01 GLOO_USE_REDIS": "define GLOO_USE_REDIS 0",
+ "cmakedefine01 GLOO_USE_IBVERBS": "define GLOO_USE_IBVERBS 0",
+ "cmakedefine01 GLOO_USE_MPI": "define GLOO_USE_MPI 0",
+ "cmakedefine01 GLOO_USE_AVX": "define GLOO_USE_AVX 0",
+ "cmakedefine01 GLOO_USE_LIBUV": "define GLOO_USE_LIBUV 0",
+ "cmakedefine01 GLOO_HAVE_TRANSPORT_TCP": "define GLOO_HAVE_TRANSPORT_TCP 1",
+ "cmakedefine01 GLOO_HAVE_TRANSPORT_IBVERBS": "define GLOO_HAVE_TRANSPORT_IBVERBS 0",
+ "cmakedefine01 GLOO_HAVE_TRANSPORT_UV": "define GLOO_HAVE_TRANSPORT_UV 0",
+ },
+)
+
+cc_library(
+ name = "gloo_headers",
+ hdrs = glob(
+ [
+ "gloo/*.h",
+ "gloo/common/*.h",
+ "gloo/rendezvous/*.h",
+ "gloo/transport/*.h",
+ "gloo/transport/tcp/*.h",
+ ],
+ exclude = [
+ "gloo/rendezvous/redis_store.h",
+ ],
+ ) + ["gloo/config.h"],
+ includes = [
+ ".",
+ ],
+)
+
+cu_library(
+ name = "gloo_cuda",
+ srcs = [
+ "gloo/cuda.cu.cc",
+ "gloo/cuda_private.cu.cc",
+ ],
+ visibility = ["//visibility:public"],
+ deps = [
+ ":gloo_headers",
+ ],
+ alwayslink = True,
+)
+
+cc_library(
+ name = "gloo",
+ srcs = glob(
+ [
+ "gloo/*.cc",
+ "gloo/common/*.cc",
+ "gloo/rendezvous/*.cc",
+ "gloo/transport/*.cc",
+ "gloo/transport/tcp/*.cc",
+ ],
+ exclude = [
+ "gloo/cuda*.cc",
+ "gloo/rendezvous/redis_store.cc",
+ ],
+ ),
+ copts = [
+ "-std=gnu++11",
+ "-std=c++11",
+ ],
+ visibility = ["//visibility:public"],
+ deps = [":gloo_headers"] + if_cuda(
+ [":gloo_cuda"],
+ [],
+ ),
+)
diff --git a/third_party/ideep.BUILD b/third_party/ideep.BUILD
new file mode 100644
index 0000000..882d5cb
--- /dev/null
+++ b/third_party/ideep.BUILD
@@ -0,0 +1,17 @@
+load("@rules_cc//cc:defs.bzl", "cc_library")
+
+cc_library(
+ name = "ideep",
+ hdrs = glob([
+ "include/**/*.hpp",
+ "include/**/*.h",
+ ]),
+ defines = [
+ "IDEEP_USE_MKL",
+ ],
+ includes = [
+ "include/",
+ ],
+ visibility = ["//visibility:public"],
+ deps = ["@mkl_dnn//:mkl-dnn"],
+)
diff --git a/third_party/miniz-2.0.8/BUILD.bazel b/third_party/miniz-2.0.8/BUILD.bazel
new file mode 100644
index 0000000..c105e73
--- /dev/null
+++ b/third_party/miniz-2.0.8/BUILD.bazel
@@ -0,0 +1,10 @@
+cc_library(
+ name = "miniz",
+ srcs = [
+ "miniz.c",
+ ],
+ hdrs = [
+ "miniz.h",
+ ],
+ visibility = ["//visibility:public"],
+)
diff --git a/third_party/mkl-dnn.BUILD b/third_party/mkl-dnn.BUILD
new file mode 100644
index 0000000..fdb887c
--- /dev/null
+++ b/third_party/mkl-dnn.BUILD
@@ -0,0 +1,89 @@
+load("@rules_cc//cc:defs.bzl", "cc_library")
+load("@//third_party:substitution.bzl", "template_rule")
+
+template_rule(
+ name = "include_dnnl_version",
+ src = "include/dnnl_version.h.in",
+ out = "include/dnnl_version.h",
+ substitutions = {
+ "@DNNL_VERSION_MAJOR@": "1",
+ "@DNNL_VERSION_MINOR@": "2",
+ "@DNNL_VERSION_PATCH@": "0",
+ "@DNNL_VERSION_HASH@": "70f8b879ea7a0c38caedb3320b7c85e8497ff50d",
+ },
+)
+
+template_rule(
+ name = "include_dnnl_config",
+ src = "include/dnnl_config.h.in",
+ out = "include/dnnl_config.h",
+ substitutions = {
+ "cmakedefine": "define",
+ "${DNNL_CPU_THREADING_RUNTIME}": "OMP",
+ "${DNNL_CPU_RUNTIME}": "OMP",
+ "${DNNL_GPU_RUNTIME}": "NONE",
+ },
+)
+
+cc_library(
+ name = "mkl-dnn",
+ srcs = glob([
+ "src/common/*.cpp",
+ "src/cpu/*.cpp",
+ "src/cpu/binary/*.cpp",
+ "src/cpu/gemm/*.cpp",
+ "src/cpu/gemm/bf16/*.cpp",
+ "src/cpu/gemm/f32/*.cpp",
+ "src/cpu/gemm/s8x8s32/*.cpp",
+ "src/cpu/jit_utils/*.cpp",
+ "src/cpu/jit_utils/jitprofiling/*.c",
+ "src/cpu/jit_utils/linux_perf/*.cpp",
+ "src/cpu/matmul/*.cpp",
+ "src/cpu/resampling/*.cpp",
+ "src/cpu/rnn/*.cpp",
+ ]),
+ hdrs = glob([
+ "include/*.h",
+ "include/*.hpp",
+ "src/*.hpp",
+ "src/cpu/**/*.hpp",
+ "src/cpu/**/*.h",
+ "src/common/*.hpp",
+ "src/cpu/rnn/*.hpp",
+ ]) + [
+ "include/dnnl_version.h",
+ "include/dnnl_config.h",
+ ],
+ copts = [
+ "-DUSE_AVX",
+ "-DUSE_AVX2",
+ "-DDNNL_DLL",
+ "-DDNNL_DLL_EXPORTS",
+ "-DDNNL_ENABLE_CONCURRENT_EXEC",
+ "-DTH_BLAS_MKL",
+ "-D__STDC_CONSTANT_MACROS",
+ "-D__STDC_LIMIT_MACROS",
+ "-fno-strict-overflow",
+ "-fopenmp",
+ ] + select({
+ "@//tools/config:thread_sanitizer": ["-DMKLDNN_THR=0"],
+ "//conditions:default": ["-DMKLDNN_THR=2"],
+ }),
+ includes = [
+ "include/",
+ "src/",
+ "src/common/",
+ "src/cpu/",
+ "src/cpu/xbyak/",
+ ],
+ visibility = ["//visibility:public"],
+ linkopts = [
+ "-lgomp",
+ ],
+ deps = [
+ "@mkl",
+ ] + select({
+ "@//tools/config:thread_sanitizer": [],
+ "//conditions:default": ["@tbb"],
+ }),
+)
diff --git a/third_party/mkl.BUILD b/third_party/mkl.BUILD
new file mode 100644
index 0000000..bc868b2
--- /dev/null
+++ b/third_party/mkl.BUILD
@@ -0,0 +1,21 @@
+load("@rules_cc//cc:defs.bzl", "cc_library")
+
+cc_library(
+ name = "mkl",
+ srcs = [
+ "libmkl_avx2.so",
+ "libmkl_core.so",
+ "libmkl_def.so",
+ "libmkl_intel_lp64.so",
+ "libmkl_rt.so",
+ "libmkl_sequential.so",
+ "libmkl_vml_avx2.so",
+ "libmkl_vml_avx512.so",
+ "libmkl_vml_def.so",
+ ] + select({
+ "@//tools/config:thread_sanitizer": [],
+ "//conditions:default": ["libmkl_tbb_thread.so"],
+ }),
+ visibility = ["//visibility:public"],
+ deps = ["@mkl_headers"],
+)
diff --git a/third_party/mkl_headers.BUILD b/third_party/mkl_headers.BUILD
new file mode 100644
index 0000000..965801c
--- /dev/null
+++ b/third_party/mkl_headers.BUILD
@@ -0,0 +1,8 @@
+load("@rules_cc//cc:defs.bzl", "cc_library")
+
+cc_library(
+ name = "mkl_headers",
+ hdrs = glob(["include/*.h"]),
+ includes = ["include/"],
+ visibility = ["//visibility:public"],
+)
diff --git a/third_party/onnx.BUILD b/third_party/onnx.BUILD
new file mode 100644
index 0000000..b8809de
--- /dev/null
+++ b/third_party/onnx.BUILD
@@ -0,0 +1,113 @@
+load("@rules_proto//proto:defs.bzl", "proto_library")
+load("@rules_cc//cc:defs.bzl", "cc_library", "cc_proto_library")
+load("@rules_python//python:defs.bzl", "py_binary")
+
+py_binary(
+ name = "gen_proto",
+ srcs = ["onnx/gen_proto.py"],
+ data = [
+ "onnx/onnx.in.proto",
+ "onnx/onnx-operators.in.proto",
+ ],
+)
+
+genrule(
+ name = "generate_onnx_proto",
+ outs = [
+ "onnx/onnx_onnx_torch-ml.proto",
+ "onnx/onnx-ml.pb.h",
+ ],
+ cmd = "$(location :gen_proto) -p onnx_torch -o $(@D)/onnx onnx -m >/dev/null && sed -i 's/onnx_onnx_torch-ml.pb.h/onnx\\/onnx_onnx_torch-ml.pb.h/g' $(@D)/onnx/onnx-ml.pb.h",
+ tools = [":gen_proto"],
+)
+
+genrule(
+ name = "generate_onnx_operators_proto",
+ outs = [
+ "onnx/onnx-operators_onnx_torch-ml.proto",
+ "onnx/onnx-operators-ml.pb.h",
+ ],
+ cmd = "$(location :gen_proto) -p onnx_torch -o $(@D)/onnx onnx-operators -m >/dev/null && sed -i 's/onnx-operators_onnx_torch-ml.pb.h/onnx\\/onnx-operators_onnx_torch-ml.pb.h/g' $(@D)/onnx/onnx-operators-ml.pb.h",
+ tools = [":gen_proto"],
+)
+
+cc_library(
+ name = "onnx",
+ srcs = glob(
+ [
+ "onnx/*.cc",
+ "onnx/common/*.cc",
+ "onnx/defs/*.cc",
+ "onnx/defs/controlflow/*.cc",
+ "onnx/defs/experiments/*.cc",
+ "onnx/defs/generator/*.cc",
+ "onnx/defs/logical/*.cc",
+ "onnx/defs/math/*.cc",
+ "onnx/defs/nn/*.cc",
+ "onnx/defs/object_detection/*.cc",
+ "onnx/defs/quantization/*.cc",
+ "onnx/defs/reduction/*.cc",
+ "onnx/defs/rnn/*.cc",
+ "onnx/defs/sequence/*.cc",
+ "onnx/defs/tensor/*.cc",
+ "onnx/defs/traditionalml/*.cc",
+ "onnx/defs/traditionalml/*.cc",
+ "onnx/optimizer/*.cc",
+ "onnx/shape_inference/*.cc",
+ "onnx/version_converter/*.cc",
+ ],
+ exclude = [
+ "onnx/cpp2py_export.cc",
+ ],
+ ),
+ hdrs = glob([
+ "onnx/*.h",
+ "onnx/version_converter/*.h",
+ "onnx/common/*.h",
+ "onnx/defs/*.h",
+ "onnx/defs/tensor/*.h",
+ "onnx/shape_inference/*.h",
+ "onnx/optimizer/*.h",
+ "onnx/optimizer/passes/*.h",
+ "onnx/version_converter/adapters/*.h",
+ ]) + [
+ "onnx/onnx-ml.pb.h",
+ "onnx/onnx-operators-ml.pb.h",
+ ],
+ defines = [
+ "ONNX_ML=1",
+ "ONNX_NAMESPACE=onnx_torch",
+ ],
+ includes = [
+ ".",
+ "onnx/",
+ ],
+ visibility = ["//visibility:public"],
+ deps = [
+ ":onnx_proto_lib",
+ ],
+)
+
+cc_library(
+ name = "onnx_proto_headers",
+ hdrs = glob([
+ "onnx/*_pb.h",
+ ]),
+ visibility = ["//visibility:public"],
+ deps = [
+ ":onnx_proto_lib",
+ ],
+)
+
+proto_library(
+ name = "onnx_proto",
+ srcs = [
+ "onnx/onnx-operators_onnx_torch-ml.proto",
+ "onnx/onnx_onnx_torch-ml.proto",
+ ],
+)
+
+cc_proto_library(
+ name = "onnx_proto_lib",
+ deps = [":onnx_proto"],
+)
diff --git a/third_party/sleef.BUILD b/third_party/sleef.BUILD
new file mode 100644
index 0000000..6b7349e
--- /dev/null
+++ b/third_party/sleef.BUILD
@@ -0,0 +1,494 @@
+load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library")
+load("@//third_party:sleef.bzl", "sleef_cc_library")
+
+SLEEF_COPTS = [
+ "-DHAVE_MALLOC_USABLE_SIZE=1",
+ "-DHAVE_MMAP=1",
+ "-DHAVE_SHM_OPEN=1",
+ "-DHAVE_SHM_UNLINK=1",
+ "-DIDEEP_USE_MKL",
+ "-DMKLDNN_THR=MKLDNN_THR_TBB",
+ "-DONNX_ML=1",
+ "-DONNX_NAMESPACE=onnx",
+ "-DTH_BLAS_MKL",
+ "-D_FILE_OFFSET_BITS=64",
+ "-ffp-contract=off",
+ "-fno-math-errno",
+ "-fno-trapping-math",
+ "-DCAFFE2_USE_GLOO",
+ "-DCUDA_HAS_FP16=1",
+ "-DHAVE_GCC_GET_CPUID",
+ "-DUSE_AVX",
+ "-DUSE_AVX2",
+ "-DTH_HAVE_THREAD",
+ "-std=gnu99",
+]
+
+SLEEF_COMMON_TARGET_COPTS = [
+ "-DSLEEF_STATIC_LIBS=1",
+ "-DENABLE_ALIAS=1",
+]
+
+SLEEF_PRIVATE_HEADERS = glob([
+ "build/include/*.h",
+ "src/arch/*.h",
+ "src/common/*.h",
+ "src/libm/*.h",
+ "src/libm/include/*.h",
+])
+
+SLEEF_PUBLIC_HEADERS = [
+ ":sleef_h",
+]
+
+SLEEF_PRIVATE_INCLUDES = [
+ "-Iexternal/sleef/src/arch",
+ "-Iexternal/sleef/src/common",
+]
+
+SLEEF_PUBLIC_INCLUDES = [
+ "build/include",
+]
+
+SLEEF_VISIBILITY = [
+ "@pytorch//:__subpackages__",
+]
+
+cc_binary(
+ name = "mkalias",
+ srcs = [
+ "src/libm/funcproto.h",
+ "src/libm/mkalias.c",
+ ],
+)
+
+genrule(
+ name = "alias_avx512f_h",
+ outs = ["alias_avx512f.h"],
+ cmd = "{ " + "; ".join([
+ "$(location :mkalias) -16 __m512 __m512i e avx512f",
+ "$(location :mkalias) 8 __m512d __m256i e avx512f",
+ ]) + "; } > $@",
+ tools = [":mkalias"],
+)
+
+cc_binary(
+ name = "mkdisp",
+ srcs = [
+ "src/libm/funcproto.h",
+ "src/libm/mkdisp.c",
+ ],
+ copts = SLEEF_COPTS,
+)
+
+genrule(
+ name = "dispavx_c",
+ srcs = ["src/libm/dispavx.c.org"],
+ outs = ["dispavx.c"],
+ cmd = "{ cat $(location src/libm/dispavx.c.org); $(location :mkdisp) 4 8 __m256d __m256 __m128i avx fma4 avx2; } > $@",
+ tools = [":mkdisp"],
+)
+
+genrule(
+ name = "dispsse_c",
+ srcs = ["src/libm/dispsse.c.org"],
+ outs = ["dispsse.c"],
+ cmd = "{ cat $(location src/libm/dispsse.c.org); $(location :mkdisp) 2 4 __m128d __m128 __m128i sse2 sse4 avx2128; } > $@",
+ tools = [":mkdisp"],
+)
+
+cc_binary(
+ name = "mkrename",
+ srcs = [
+ "src/libm/funcproto.h",
+ "src/libm/mkrename.c",
+ ],
+)
+
+genrule(
+ name = "renameavx_h",
+ outs = ["renameavx.h"],
+ cmd = "$(location :mkrename) cinz_ 4 8 avx > $@",
+ tools = [":mkrename"],
+)
+
+genrule(
+ name = "renameavx2_h",
+ outs = ["renameavx2.h"],
+ cmd = "$(location :mkrename) finz_ 4 8 avx2 > $@",
+ tools = [":mkrename"],
+)
+
+genrule(
+ name = "renameavx2128_h",
+ outs = ["renameavx2128.h"],
+ cmd = "$(location :mkrename) finz_ 2 4 avx2128 > $@",
+ tools = [":mkrename"],
+)
+
+genrule(
+ name = "renameavx512f_h",
+ outs = ["renameavx512f.h"],
+ cmd = "$(location :mkrename) finz_ 8 16 avx512f > $@",
+ tools = [":mkrename"],
+)
+
+genrule(
+ name = "renameavx512fnofma_h",
+ outs = ["renameavx512fnofma.h"],
+ cmd = "$(location :mkrename) cinz_ 8 16 avx512fnofma > $@",
+ tools = [":mkrename"],
+)
+
+genrule(
+ name = "renamefma4_h",
+ outs = ["renamefma4.h"],
+ cmd = "$(location :mkrename) finz_ 4 8 fma4 > $@",
+ tools = [":mkrename"],
+)
+
+genrule(
+ name = "renamepurec_scalar_h",
+ outs = ["renamepurec_scalar.h"],
+ cmd = "$(location :mkrename) cinz_ 1 1 purec > $@",
+ tools = [":mkrename"],
+)
+
+genrule(
+ name = "renamepurecfma_scalar_h",
+ outs = ["renamepurecfma_scalar.h"],
+ cmd = "$(location :mkrename) finz_ 1 1 purecfma > $@",
+ tools = [":mkrename"],
+)
+
+genrule(
+ name = "renamesse2_h",
+ outs = ["renamesse2.h"],
+ cmd = "$(location :mkrename) cinz_ 2 4 sse2 > $@",
+ tools = [":mkrename"],
+)
+
+genrule(
+ name = "renamesse4_h",
+ outs = ["renamesse4.h"],
+ cmd = "$(location :mkrename) cinz_ 2 4 sse4 > $@",
+ tools = [":mkrename"],
+)
+
+genrule(
+ name = "sleef_h",
+ srcs = [
+ "src/libm/sleeflibm_header.h.org",
+ "src/libm/sleeflibm_footer.h.org",
+ ],
+ outs = ["build/include/sleef.h"],
+ cmd = "{ " + "; ".join([
+ "cat $(location src/libm/sleeflibm_header.h.org)",
+ "$(location :mkrename) cinz_ 2 4 __m128d __m128 __m128i __m128i __SSE2__",
+ "$(location :mkrename) cinz_ 2 4 __m128d __m128 __m128i __m128i __SSE2__ sse2",
+ "$(location :mkrename) cinz_ 2 4 __m128d __m128 __m128i __m128i __SSE2__ sse4",
+ "$(location :mkrename) cinz_ 4 8 __m256d __m256 __m128i \"struct { __m128i x, y; }\" __AVX__",
+ "$(location :mkrename) cinz_ 4 8 __m256d __m256 __m128i \"struct { __m128i x, y; }\" __AVX__ avx",
+ "$(location :mkrename) finz_ 4 8 __m256d __m256 __m128i \"struct { __m128i x, y; }\" __AVX__ fma4",
+ "$(location :mkrename) finz_ 4 8 __m256d __m256 __m128i __m256i __AVX__ avx2",
+ "$(location :mkrename) finz_ 2 4 __m128d __m128 __m128i __m128i __SSE2__ avx2128",
+ "$(location :mkrename) finz_ 8 16 __m512d __m512 __m256i __m512i __AVX512F__",
+ "$(location :mkrename) finz_ 8 16 __m512d __m512 __m256i __m512i __AVX512F__ avx512f",
+ "$(location :mkrename) cinz_ 8 16 __m512d __m512 __m256i __m512i __AVX512F__ avx512fnofma",
+ "$(location :mkrename) cinz_ 1 1 double float int32_t int32_t __STDC__ purec",
+ "$(location :mkrename) finz_ 1 1 double float int32_t int32_t FP_FAST_FMA purecfma",
+ "cat $(location src/libm/sleeflibm_footer.h.org)",
+ ]) + "; } > $@",
+ tools = [":mkrename"],
+)
+
+cc_library(
+ name = "sleef",
+ srcs = [
+ "src/libm/rempitab.c",
+ "src/libm/sleefdp.c",
+ "src/libm/sleefld.c",
+ "src/libm/sleefqp.c",
+ "src/libm/sleefsp.c",
+ ],
+ hdrs = SLEEF_PUBLIC_HEADERS,
+ copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + SLEEF_COMMON_TARGET_COPTS + [
+ "-DDORENAME=1",
+ "-DENABLEFLOAT128=1",
+ "-Wno-unused-result",
+ ],
+ includes = SLEEF_PUBLIC_INCLUDES,
+ # -lgcc resolves
+ # U __addtf3
+ # U __eqtf2
+ # U __fixtfdi
+ # U __floatditf
+ # U __gttf2
+ # U __lttf2
+ # U __multf3
+ # U __subtf3
+ # in bazel-bin/external/sleef/_objs/sleef/sleefqp.pic.o
+ linkopts = [
+ "-lgcc",
+ ],
+ linkstatic = True,
+ visibility = SLEEF_VISIBILITY,
+ # The purpose of the lists in deps is to keep related pairs of
+ # libraries together. In particular, each pair that contains a *det*
+ # library originates with a sleef_cc_library().
+ deps = [
+ ":common",
+ ":dispavx",
+ ":dispsse",
+ ] + [
+ ":sleefavx",
+ ":sleefdetavx",
+ ] + [
+ ":sleefavx2",
+ ":sleefdetavx2",
+ ] + [
+ ":sleefavx2128",
+ ":sleefdetavx2128",
+ ] + [
+ ":sleefavx512f",
+ ":sleefdetavx512f",
+ ] + [
+ ":sleefavx512fnofma",
+ ":sleefdetavx512fnofma",
+ ] + [
+ ":sleeffma4",
+ ":sleefdetfma4",
+ ] + [
+ ":sleefsse2",
+ ":sleefdetsse2",
+ ] + [
+ ":sleefsse4",
+ ":sleefdetsse4",
+ ] + [
+ ":sleefpurec_scalar",
+ ":sleefdetpurec_scalar",
+ ] + [
+ ":sleefpurecfma_scalar",
+ ":sleefdetpurecfma_scalar",
+ ],
+ alwayslink = True,
+)
+
+cc_library(
+ name = "common",
+ srcs = SLEEF_PRIVATE_HEADERS + [
+ "src/common/common.c",
+ ],
+ copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + [
+ "-Wno-unused-result",
+ ],
+ linkstatic = True,
+ visibility = SLEEF_VISIBILITY,
+ alwayslink = True,
+)
+
+cc_library(
+ name = "dispavx",
+ srcs = SLEEF_PRIVATE_HEADERS + SLEEF_PUBLIC_HEADERS + [
+ ":dispavx_c",
+ ],
+ copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + SLEEF_COMMON_TARGET_COPTS + [
+ "-DENABLE_AVX2=1",
+ "-DENABLE_FMA4=1",
+ "-mavx",
+ ],
+ includes = SLEEF_PUBLIC_INCLUDES,
+ linkstatic = True,
+ visibility = SLEEF_VISIBILITY,
+ alwayslink = True,
+)
+
+cc_library(
+ name = "dispsse",
+ srcs = SLEEF_PRIVATE_HEADERS + SLEEF_PUBLIC_HEADERS + [
+ ":dispsse_c",
+ ],
+ copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + SLEEF_COMMON_TARGET_COPTS + [
+ "-DENABLE_AVX2=1",
+ "-DENABLE_FMA4=1",
+ "-msse2",
+ ],
+ includes = SLEEF_PUBLIC_INCLUDES,
+ linkstatic = True,
+ visibility = SLEEF_VISIBILITY,
+ alwayslink = True,
+)
+
+sleef_cc_library(
+ name = "sleefavx512f",
+ srcs = SLEEF_PRIVATE_HEADERS + [
+ "src/libm/sleefsimddp.c",
+ "src/libm/sleefsimdsp.c",
+ ":alias_avx512f_h",
+ ":renameavx512f_h",
+ ],
+ copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + SLEEF_COMMON_TARGET_COPTS + [
+ "-DDORENAME=1",
+ "-DALIAS_NO_EXT_SUFFIX=\\\"alias_avx512f.h\\\"",
+ "-DENABLE_AVX512F=1",
+ "-mavx512f",
+ ],
+ linkstatic = True,
+ visibility = SLEEF_VISIBILITY,
+ alwayslink = True,
+)
+
+sleef_cc_library(
+ name = "sleefavx512fnofma",
+ srcs = SLEEF_PRIVATE_HEADERS + [
+ "src/libm/sleefsimddp.c",
+ "src/libm/sleefsimdsp.c",
+ ":renameavx512fnofma_h",
+ ],
+ copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + SLEEF_COMMON_TARGET_COPTS + [
+ "-DDORENAME=1",
+ "-DENABLE_AVX512FNOFMA=1",
+ "-mavx512f",
+ ],
+ linkstatic = True,
+ visibility = SLEEF_VISIBILITY,
+ alwayslink = True,
+)
+
+sleef_cc_library(
+ name = "sleefavx",
+ srcs = SLEEF_PRIVATE_HEADERS + [
+ "src/libm/sleefsimddp.c",
+ "src/libm/sleefsimdsp.c",
+ ":renameavx_h",
+ ],
+ copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + SLEEF_COMMON_TARGET_COPTS + [
+ "-DDORENAME=1",
+ "-DENABLE_AVX=1",
+ "-mavx",
+ ],
+ linkstatic = True,
+ visibility = SLEEF_VISIBILITY,
+ alwayslink = True,
+)
+
+sleef_cc_library(
+ name = "sleefavx2",
+ srcs = SLEEF_PRIVATE_HEADERS + [
+ "src/libm/sleefsimddp.c",
+ "src/libm/sleefsimdsp.c",
+ ":renameavx2_h",
+ ],
+ copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + SLEEF_COMMON_TARGET_COPTS + [
+ "-DDORENAME=1",
+ "-DENABLE_AVX2=1",
+ "-mavx2",
+ "-mfma",
+ ],
+ linkstatic = True,
+ visibility = SLEEF_VISIBILITY,
+ alwayslink = True,
+)
+
+sleef_cc_library(
+ name = "sleefavx2128",
+ srcs = SLEEF_PRIVATE_HEADERS + [
+ "src/libm/sleefsimddp.c",
+ "src/libm/sleefsimdsp.c",
+ ":renameavx2128_h",
+ ],
+ copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + SLEEF_COMMON_TARGET_COPTS + [
+ "-DDORENAME=1",
+ "-DENABLE_AVX2128=1",
+ "-mavx2",
+ "-mfma",
+ ],
+ linkstatic = True,
+ visibility = SLEEF_VISIBILITY,
+ alwayslink = True,
+)
+
+sleef_cc_library(
+ name = "sleeffma4",
+ srcs = SLEEF_PRIVATE_HEADERS + [
+ "src/libm/sleefsimddp.c",
+ "src/libm/sleefsimdsp.c",
+ ":renamefma4_h",
+ ],
+ copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + SLEEF_COMMON_TARGET_COPTS + [
+ "-DDORENAME=1",
+ "-DENABLE_FMA4=1",
+ "-mfma4",
+ ],
+ linkstatic = True,
+ visibility = SLEEF_VISIBILITY,
+ alwayslink = True,
+)
+
+sleef_cc_library(
+ name = "sleefsse2",
+ srcs = SLEEF_PRIVATE_HEADERS + [
+ "src/libm/sleefsimddp.c",
+ "src/libm/sleefsimdsp.c",
+ ":renamesse2_h",
+ ],
+ copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + SLEEF_COMMON_TARGET_COPTS + [
+ "-DDORENAME=1",
+ "-DENABLE_SSE2=1",
+ "-msse2",
+ ],
+ linkstatic = True,
+ visibility = SLEEF_VISIBILITY,
+ alwayslink = True,
+)
+
+sleef_cc_library(
+ name = "sleefsse4",
+ srcs = SLEEF_PRIVATE_HEADERS + [
+ "src/libm/sleefsimddp.c",
+ "src/libm/sleefsimdsp.c",
+ ":renamesse4_h",
+ ],
+ copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + SLEEF_COMMON_TARGET_COPTS + [
+ "-DDORENAME=1",
+ "-DENABLE_SSE4=1",
+ "-msse4.1",
+ ],
+ linkstatic = True,
+ visibility = SLEEF_VISIBILITY,
+ alwayslink = True,
+)
+
+sleef_cc_library(
+ name = "sleefpurec_scalar",
+ srcs = SLEEF_PRIVATE_HEADERS + [
+ "src/libm/sleefsimddp.c",
+ "src/libm/sleefsimdsp.c",
+ ":renamepurec_scalar_h",
+ ],
+ copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + SLEEF_COMMON_TARGET_COPTS + [
+ "-DDORENAME=1",
+ "-DENABLE_PUREC_SCALAR=1",
+ ],
+ linkstatic = True,
+ visibility = SLEEF_VISIBILITY,
+ alwayslink = True,
+)
+
+sleef_cc_library(
+ name = "sleefpurecfma_scalar",
+ srcs = SLEEF_PRIVATE_HEADERS + [
+ "src/libm/sleefsimddp.c",
+ "src/libm/sleefsimdsp.c",
+ ":renamepurecfma_scalar_h",
+ ],
+ copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + SLEEF_COMMON_TARGET_COPTS + [
+ "-DDORENAME=1",
+ "-DENABLE_PURECFMA_SCALAR=1",
+ "-mavx2",
+ "-mfma",
+ ],
+ linkstatic = True,
+ visibility = SLEEF_VISIBILITY,
+ alwayslink = True,
+)
diff --git a/third_party/sleef.bzl b/third_party/sleef.bzl
new file mode 100644
index 0000000..6fb0223
--- /dev/null
+++ b/third_party/sleef.bzl
@@ -0,0 +1,22 @@
+load("@rules_cc//cc:defs.bzl", "cc_library")
+
+# This macro provides for generating both "sleef<foo>" and
+# "sleefdet<foo>" libraries for a given set of code. The difference is
+# that the "det" libraries get compiled with "-DDETERMINISTIC=1".
+
+def sleef_cc_library(name, copts, **kwargs):
+ cc_library(
+ name = name,
+ copts = copts,
+ **kwargs
+ )
+
+ prefix = "sleef"
+ if not name.startswith(prefix):
+ fail("name {} does not start with {}".format(repr(name), repr(prefix)))
+
+ cc_library(
+ name = name.replace(prefix, prefix + "det", 1),
+ copts = copts + ["-DDETERMINISTIC=1"],
+ **kwargs
+ )
diff --git a/third_party/substitution.bzl b/third_party/substitution.bzl
new file mode 100644
index 0000000..bcc24ca
--- /dev/null
+++ b/third_party/substitution.bzl
@@ -0,0 +1,44 @@
+# This Bazel rules file is derived from https://github.com/tensorflow/tensorflow/blob/master/third_party/common.bzl
+
+# Rule for simple expansion of template files. This performs a simple
+# search over the template file for the keys in substitutions,
+# and replaces them with the corresponding values.
+#
+# Typical usage:
+# load("/tools/build_rules/template_rule", "template_rule")
+# template_rule(
+# name = "ExpandMyTemplate",
+# src = "my.template",
+# out = "my.txt",
+# substitutions = {
+# "$VAR1": "foo",
+# "$VAR2": "bar",
+# }
+# )
+#
+# Args:
+# name: The name of the rule.
+# template: The template file to expand
+# out: The destination of the expanded file
+# substitutions: A dictionary mapping strings to their substitutions
+
+def template_rule_impl(ctx):
+ ctx.actions.expand_template(
+ template = ctx.file.src,
+ output = ctx.outputs.out,
+ substitutions = ctx.attr.substitutions,
+ )
+
+template_rule = rule(
+ attrs = {
+ "src": attr.label(
+ mandatory = True,
+ allow_single_file = True,
+ ),
+ "out": attr.output(mandatory = True),
+ "substitutions": attr.string_dict(mandatory = True),
+ },
+ # output_to_genfiles is required for header files.
+ output_to_genfiles = True,
+ implementation = template_rule_impl,
+)
diff --git a/third_party/tbb.BUILD b/third_party/tbb.BUILD
new file mode 100644
index 0000000..b7e18c9
--- /dev/null
+++ b/third_party/tbb.BUILD
@@ -0,0 +1,75 @@
+load("@rules_cc//cc:defs.bzl", "cc_library")
+load("@//third_party:substitution.bzl", "template_rule")
+
+licenses(["notice"]) # Apache 2.0
+
+template_rule(
+ name = "version_string",
+ src = "@//:aten/src/ATen/cpu/tbb/extra/version_string.ver.in",
+ out = "version_string.h",
+ substitutions = {
+ "@CMAKE_SYSTEM_NAME@": "Unknown",
+ "@CMAKE_SYSTEM@": "Unknown",
+ "@CMAKE_SYSTEM_VERSION@": "Unknown",
+ "@CMAKE_CXX_COMPILER_ID@": "Unknown",
+ "@_configure_date@": "Unknown",
+ }
+)
+
+cc_library(
+ name = "tbb",
+ srcs = [":version_string"] + glob(
+ [
+ "src/old/*.h",
+ "src/rml/client/*.h",
+ "src/rml/include/*.h",
+ "src/rml/server/*.h",
+ "src/tbb/*.h",
+ "src/tbb/tools_api/*.h",
+ "src/tbb/tools_api/legacy/*.h",
+ "src/old/*.cpp",
+ "src/tbb/*.cpp",
+ ],
+ exclude = ["src/old/test_*.cpp"],
+ ) + ["src/rml/client/rml_tbb.cpp"],
+ hdrs = glob(
+ [
+ "include/tbb/*",
+ "include/tbb/compat/*",
+ "include/tbb/internal/*",
+ "include/tbb/machine/*",
+ ],
+ exclude = ["include/tbb/scalable_allocator.h"],
+ ),
+ copts = [
+ "-Iexternal/tbb/src/rml/include",
+ "-Iexternal/tbb/src",
+ "-pthread",
+ "-DDO_ITT_NOTIFY=1",
+ "-DUSE_PTHREAD=1",
+ "-D__TBB_BUILD=1",
+ "-D__TBB_DYNAMIC_LOAD_ENABLED=0",
+ "-D__TBB_SOURCE_DIRECTLY_INCLUDED=1",
+ "-fno-sanitize=vptr",
+ "-fno-sanitize=thread",
+ ],
+ defines = [
+ # TBB Cannot detect the standard library version when using clang with libstdc++.
+ # See https://github.com/01org/tbb/issues/22
+ "TBB_USE_GLIBCXX_VERSION=(_GLIBCXX_RELEASE*10000)",
+ "TBB_PREVIEW_GLOBAL_CONTROL=1",
+ "TBB_PREVIEW_LOCAL_OBSERVER=1",
+ "__TBB_ALLOW_MUTABLE_FUNCTORS=1",
+ ],
+ includes = [
+ "include",
+ "src/tbb/tools_api",
+ ],
+ linkopts = [
+ "-ldl",
+ "-lpthread",
+ "-lrt",
+ ],
+ textual_hdrs = ["src/tbb/tools_api/ittnotify_static.c"],
+ visibility = ["//visibility:public"],
+)
diff --git a/third_party/tbb.patch b/third_party/tbb.patch
new file mode 100644
index 0000000..4a1f684
--- /dev/null
+++ b/third_party/tbb.patch
@@ -0,0 +1,34 @@
+diff --git a/src/rml/server/rml_server.cpp b/src/rml/server/rml_server.cpp
+index 2508465..1e22ad2 100644
+--- a/src/rml/server/rml_server.cpp
++++ b/src/rml/server/rml_server.cpp
+@@ -3279,10 +3279,10 @@ extern "C" void __KMP_call_with_my_server_info( ::rml::server_info_callback_t cb
+ /*
+ * RML server info
+ */
+-#include "version_string.ver"
++#include "version_string.h"
+
+ #ifndef __TBB_VERSION_STRINGS
+-#pragma message("Warning: version_string.ver isn't generated properly by version_info.sh script!")
++#pragma message("Warning: version_string.h isn't generated properly by version_info.sh script!")
+ #endif
+
+ // We use the build time as the RML server info. TBB is required to build RML, so we make it the same as the TBB build time.
+diff --git a/src/tbb/tbb_version.h b/src/tbb/tbb_version.h
+index dcaa55b..4981a8a 100644
+--- a/src/tbb/tbb_version.h
++++ b/src/tbb/tbb_version.h
+@@ -25,10 +25,10 @@
+ #ifndef ENDL
+ #define ENDL "\n"
+ #endif
+-#include "version_string.ver"
++#include "version_string.h"
+
+ #ifndef __TBB_VERSION_STRINGS
+-#pragma message("Warning: version_string.ver isn't generated properly by version_info.sh script!")
++#pragma message("Warning: version_string.h isn't generated properly by version_info.sh script!")
+ // here is an example of macros value:
+ #define __TBB_VERSION_STRINGS \
+ "TBB: BUILD_HOST\tUnknown\n" \
diff --git a/tools/config/BUILD b/tools/config/BUILD
new file mode 100644
index 0000000..a8f9d04
--- /dev/null
+++ b/tools/config/BUILD
@@ -0,0 +1,42 @@
+load("@bazel_skylib//lib:selects.bzl", "selects")
+
+config_setting(
+ name = "cuda",
+ define_values = {
+ "cuda": "true",
+ },
+)
+
+# Even when building with --config=cuda, host targets should be built with cuda disabled
+# as these targets will run on CI machines that have no GPUs.
+selects.config_setting_group(
+ name = "cuda_enabled_and_capable",
+ match_all = [
+ ":cuda",
+ "//tools/toolchain:is_cuda_capable",
+ ],
+)
+
+# Configures the system to build with cuda using clang.
+config_setting(
+ name = "cuda_clang",
+ define_values = {
+ "cuda_clang": "true",
+ },
+)
+
+# Indicates that cuda code should be compiled with nvcc
+# Mostly exists to support _analysis_ of tensorflow; more work is needed to actually make this
+# setting work.
+config_setting(
+ name = "cuda_nvcc",
+ define_values = {
+ "cuda_nvcc": "true",
+ },
+)
+
+config_setting(
+ name = "thread_sanitizer",
+ define_values = {"thread_sanitizer": "1"},
+ visibility = ["//visibility:public"],
+)
diff --git a/tools/config/defs.bzl b/tools/config/defs.bzl
new file mode 100644
index 0000000..6ddd0e9
--- /dev/null
+++ b/tools/config/defs.bzl
@@ -0,0 +1,65 @@
+"""
+ Macros for selecting with / without various GPU libraries. Most of these are meant to be used
+ directly by tensorflow in place of their build's own configure.py + bazel-gen system.
+"""
+
+load("@bazel_skylib//lib:selects.bzl", "selects")
+
+def if_cuda(if_true, if_false = []):
+ """Helper for selecting based on the whether CUDA is configured. """
+ return selects.with_or({
+ "@//tools/config:cuda_enabled_and_capable": if_true,
+ "//conditions:default": if_false,
+ })
+
+def if_tensorrt(if_true, if_false = []):
+ """Helper for selecting based on the whether TensorRT is configured. """
+ return select({
+ "//conditions:default": if_false,
+ })
+
+def if_rocm(if_true, if_false = []):
+ """Helper for selecting based on the whether ROCM is configured. """
+ return select({
+ "//conditions:default": if_false,
+ })
+
+def if_sycl(if_true, if_false = []):
+ """Helper for selecting based on the whether SYCL/ComputeCPP is configured."""
+
+ # NOTE: Tensorflow expects some stange behavior (see their if_sycl) if we
+ # actually plan on supporting this at some point.
+ return select({
+ "//conditions:default": if_false,
+ })
+
+def if_ccpp(if_true, if_false = []):
+ """Helper for selecting based on the whether ComputeCPP is configured. """
+ return select({
+ "//conditions:default": if_false,
+ })
+
+def cuda_default_copts():
+ return if_cuda(["-DGOOGLE_CUDA=1"])
+
+def cuda_default_features():
+ return if_cuda(["-per_object_debug_info", "-use_header_modules", "cuda_clang"])
+
+def rocm_default_copts():
+ return if_rocm(["-x", "rocm"])
+
+def rocm_copts(opts = []):
+ return rocm_default_copts() + if_rocm(opts)
+
+def cuda_is_configured():
+ # FIXME(dcollins): currently only used by tensorflow's xla stuff, which we aren't building. However bazel
+ # query hits it so this needs to be defined. Because bazel doesn't actually resolve config at macro expansion
+ # time, `select` can't be used here (since xla expects lists of strings and not lists of select objects).
+ # Instead, the xla build rules must be rewritten to use `if_cuda_is_configured`
+ return False
+
+def if_cuda_is_configured(x):
+ return if_cuda(x, [])
+
+def if_rocm_is_configured(x):
+ return if_rocm(x, [])
diff --git a/tools/rules/BUILD b/tools/rules/BUILD
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tools/rules/BUILD
diff --git a/tools/rules/cu.bzl b/tools/rules/cu.bzl
new file mode 100644
index 0000000..fa4b80d
--- /dev/null
+++ b/tools/rules/cu.bzl
@@ -0,0 +1,3 @@
+# gpu support is not available
+def cu_library(**kwargs):
+ pass
diff --git a/tools/rules/workspace.bzl b/tools/rules/workspace.bzl
new file mode 100644
index 0000000..b519ae9
--- /dev/null
+++ b/tools/rules/workspace.bzl
@@ -0,0 +1,29 @@
+def _impl(repository_ctx):
+ archive = repository_ctx.attr.name + ".tar"
+ reference = Label("@%s_unpatched//:README" % repository_ctx.attr.name)
+ dirname = repository_ctx.path(reference).dirname
+ repository_ctx.execute(["tar", "hcf", archive, "-C", dirname, "."])
+ repository_ctx.extract(archive)
+ for patch in repository_ctx.attr.patches:
+ repository_ctx.patch(repository_ctx.path(patch), repository_ctx.attr.patch_strip)
+ build_file = repository_ctx.path(repository_ctx.attr.build_file)
+ repository_ctx.execute(["cp", build_file, "BUILD.bazel"])
+
+_patched_rule = repository_rule(
+ implementation = _impl,
+ attrs = {
+ "patches": attr.label_list(),
+ "patch_strip": attr.int(),
+ "build_file": attr.label(),
+ },
+)
+
+def new_patched_local_repository(name, path, **kwargs):
+ native.new_local_repository(
+ name = name + "_unpatched",
+ build_file_content = """
+pkg_tar(name = "content", srcs = glob(["**"]))
+""",
+ path = path,
+ )
+ _patched_rule(name = name, **kwargs)