| """Repository rule for CUDA autoconfiguration. |
| |
| `cuda_configure` depends on the following environment variables: |
| |
| * `TF_NEED_CUDA`: Whether to enable building with CUDA. |
| * `GCC_HOST_COMPILER_PATH`: The GCC host compiler path |
| * `TF_CUDA_CLANG`: Whether to use clang as a cuda compiler. |
| * `CLANG_CUDA_COMPILER_PATH`: The clang compiler path that will be used for |
| both host and device code compilation if TF_CUDA_CLANG is 1. |
| * `TF_SYSROOT`: The sysroot to use when compiling. |
| * `TF_DOWNLOAD_CLANG`: Whether to download a recent release of clang |
| compiler and use it to build tensorflow. When this option is set |
| CLANG_CUDA_COMPILER_PATH is ignored. |
| * `TF_CUDA_PATHS`: The base paths to look for CUDA and cuDNN. Default is |
| `/usr/local/cuda,usr/`. |
| * `CUDA_TOOLKIT_PATH` (deprecated): The path to the CUDA toolkit. Default is |
| `/usr/local/cuda`. |
| * `TF_CUDA_VERSION`: The version of the CUDA toolkit. If this is blank, then |
| use the system default. |
| * `TF_CUDNN_VERSION`: The version of the cuDNN library. |
| * `CUDNN_INSTALL_PATH` (deprecated): The path to the cuDNN library. Default is |
| `/usr/local/cuda`. |
| * `TF_CUDA_COMPUTE_CAPABILITIES`: The CUDA compute capabilities. Default is |
| `3.5,5.2`. |
| * `PYTHON_BIN_PATH`: The python binary path |
| """ |
| |
| load("//third_party/clang_toolchain:download_clang.bzl", "download_clang") |
| load( |
| "@bazel_tools//tools/cpp:lib_cc_configure.bzl", |
| "escape_string", |
| "get_env_var", |
| ) |
| load( |
| "@bazel_tools//tools/cpp:windows_cc_configure.bzl", |
| "find_msvc_tool", |
| "find_vc_path", |
| "setup_vc_env_vars", |
| ) |
| load( |
| "//third_party/remote_config:common.bzl", |
| "config_repo_label", |
| "err_out", |
| "execute", |
| "get_bash_bin", |
| "get_cpu_value", |
| "get_host_environ", |
| "get_python_bin", |
| "is_windows", |
| "raw_exec", |
| "read_dir", |
| "realpath", |
| "which", |
| ) |
| |
| _GCC_HOST_COMPILER_PATH = "GCC_HOST_COMPILER_PATH" |
| _GCC_HOST_COMPILER_PREFIX = "GCC_HOST_COMPILER_PREFIX" |
| _CLANG_CUDA_COMPILER_PATH = "CLANG_CUDA_COMPILER_PATH" |
| _TF_SYSROOT = "TF_SYSROOT" |
| _CUDA_TOOLKIT_PATH = "CUDA_TOOLKIT_PATH" |
| _TF_CUDA_VERSION = "TF_CUDA_VERSION" |
| _TF_CUDNN_VERSION = "TF_CUDNN_VERSION" |
| _CUDNN_INSTALL_PATH = "CUDNN_INSTALL_PATH" |
| _TF_CUDA_COMPUTE_CAPABILITIES = "TF_CUDA_COMPUTE_CAPABILITIES" |
| _TF_CUDA_CONFIG_REPO = "TF_CUDA_CONFIG_REPO" |
| _TF_DOWNLOAD_CLANG = "TF_DOWNLOAD_CLANG" |
| _PYTHON_BIN_PATH = "PYTHON_BIN_PATH" |
| |
| def to_list_of_strings(elements): |
| """Convert the list of ["a", "b", "c"] into '"a", "b", "c"'. |
| |
| This is to be used to put a list of strings into the bzl file templates |
| so it gets interpreted as list of strings in Starlark. |
| |
| Args: |
| elements: list of string elements |
| |
| Returns: |
| single string of elements wrapped in quotes separated by a comma.""" |
| quoted_strings = ["\"" + element + "\"" for element in elements] |
| return ", ".join(quoted_strings) |
| |
| def verify_build_defines(params): |
| """Verify all variables that crosstool/BUILD.tpl expects are substituted. |
| |
| Args: |
| params: dict of variables that will be passed to the BUILD.tpl template. |
| """ |
| missing = [] |
| for param in [ |
| "cxx_builtin_include_directories", |
| "extra_no_canonical_prefixes_flags", |
| "host_compiler_path", |
| "host_compiler_prefix", |
| "host_compiler_warnings", |
| "linker_bin_path", |
| "compiler_deps", |
| "msvc_cl_path", |
| "msvc_env_include", |
| "msvc_env_lib", |
| "msvc_env_path", |
| "msvc_env_tmp", |
| "msvc_lib_path", |
| "msvc_link_path", |
| "msvc_ml_path", |
| "unfiltered_compile_flags", |
| "win_compiler_deps", |
| ]: |
| if ("%{" + param + "}") not in params: |
| missing.append(param) |
| |
| if missing: |
| auto_configure_fail( |
| "BUILD.tpl template is missing these variables: " + |
| str(missing) + |
| ".\nWe only got: " + |
| str(params) + |
| ".", |
| ) |
| |
| def _get_nvcc_tmp_dir_for_windows(repository_ctx): |
| """Return the Windows tmp directory for nvcc to generate intermediate source files.""" |
| escaped_tmp_dir = escape_string( |
| get_env_var(repository_ctx, "TMP", "C:\\Windows\\Temp").replace( |
| "\\", |
| "\\\\", |
| ), |
| ) |
| return escaped_tmp_dir + "\\\\nvcc_inter_files_tmp_dir" |
| |
| def _get_msvc_compiler(repository_ctx): |
| vc_path = find_vc_path(repository_ctx) |
| return find_msvc_tool(repository_ctx, vc_path, "cl.exe").replace("\\", "/") |
| |
| def _get_win_cuda_defines(repository_ctx): |
| """Return CROSSTOOL defines for Windows""" |
| |
| # If we are not on Windows, return fake vaules for Windows specific fields. |
| # This ensures the CROSSTOOL file parser is happy. |
| if not is_windows(repository_ctx): |
| return { |
| "%{msvc_env_tmp}": "msvc_not_used", |
| "%{msvc_env_path}": "msvc_not_used", |
| "%{msvc_env_include}": "msvc_not_used", |
| "%{msvc_env_lib}": "msvc_not_used", |
| "%{msvc_cl_path}": "msvc_not_used", |
| "%{msvc_ml_path}": "msvc_not_used", |
| "%{msvc_link_path}": "msvc_not_used", |
| "%{msvc_lib_path}": "msvc_not_used", |
| } |
| |
| vc_path = find_vc_path(repository_ctx) |
| if not vc_path: |
| auto_configure_fail( |
| "Visual C++ build tools not found on your machine." + |
| "Please check your installation following https://docs.bazel.build/versions/master/windows.html#using", |
| ) |
| return {} |
| |
| env = setup_vc_env_vars(repository_ctx, vc_path) |
| escaped_paths = escape_string(env["PATH"]) |
| escaped_include_paths = escape_string(env["INCLUDE"]) |
| escaped_lib_paths = escape_string(env["LIB"]) |
| escaped_tmp_dir = escape_string( |
| get_env_var(repository_ctx, "TMP", "C:\\Windows\\Temp").replace( |
| "\\", |
| "\\\\", |
| ), |
| ) |
| |
| msvc_cl_path = get_python_bin(repository_ctx) |
| msvc_ml_path = find_msvc_tool(repository_ctx, vc_path, "ml64.exe").replace( |
| "\\", |
| "/", |
| ) |
| msvc_link_path = find_msvc_tool(repository_ctx, vc_path, "link.exe").replace( |
| "\\", |
| "/", |
| ) |
| msvc_lib_path = find_msvc_tool(repository_ctx, vc_path, "lib.exe").replace( |
| "\\", |
| "/", |
| ) |
| |
| # nvcc will generate some temporary source files under %{nvcc_tmp_dir} |
| # The generated files are guaranteed to have unique name, so they can share |
| # the same tmp directory |
| escaped_cxx_include_directories = [ |
| _get_nvcc_tmp_dir_for_windows(repository_ctx), |
| "C:\\\\botcode\\\\w", |
| ] |
| for path in escaped_include_paths.split(";"): |
| if path: |
| escaped_cxx_include_directories.append(path) |
| |
| return { |
| "%{msvc_env_tmp}": escaped_tmp_dir, |
| "%{msvc_env_path}": escaped_paths, |
| "%{msvc_env_include}": escaped_include_paths, |
| "%{msvc_env_lib}": escaped_lib_paths, |
| "%{msvc_cl_path}": msvc_cl_path, |
| "%{msvc_ml_path}": msvc_ml_path, |
| "%{msvc_link_path}": msvc_link_path, |
| "%{msvc_lib_path}": msvc_lib_path, |
| "%{cxx_builtin_include_directories}": to_list_of_strings( |
| escaped_cxx_include_directories, |
| ), |
| } |
| |
| # TODO(dzc): Once these functions have been factored out of Bazel's |
| # cc_configure.bzl, load them from @bazel_tools instead. |
| # BEGIN cc_configure common functions. |
| def find_cc(repository_ctx): |
| """Find the C++ compiler.""" |
| if is_windows(repository_ctx): |
| return _get_msvc_compiler(repository_ctx) |
| |
| if _use_cuda_clang(repository_ctx): |
| target_cc_name = "clang" |
| cc_path_envvar = _CLANG_CUDA_COMPILER_PATH |
| if _flag_enabled(repository_ctx, _TF_DOWNLOAD_CLANG): |
| return "extra_tools/bin/clang" |
| else: |
| target_cc_name = "gcc" |
| cc_path_envvar = _GCC_HOST_COMPILER_PATH |
| cc_name = target_cc_name |
| |
| cc_name_from_env = get_host_environ(repository_ctx, cc_path_envvar) |
| if cc_name_from_env: |
| cc_name = cc_name_from_env |
| if cc_name.startswith("/"): |
| # Absolute path, maybe we should make this supported by our which function. |
| return cc_name |
| cc = which(repository_ctx, cc_name) |
| if cc == None: |
| fail(("Cannot find {}, either correct your path or set the {}" + |
| " environment variable").format(target_cc_name, cc_path_envvar)) |
| return cc |
| |
| _INC_DIR_MARKER_BEGIN = "#include <...>" |
| |
| # OSX add " (framework directory)" at the end of line, strip it. |
| _OSX_FRAMEWORK_SUFFIX = " (framework directory)" |
| _OSX_FRAMEWORK_SUFFIX_LEN = len(_OSX_FRAMEWORK_SUFFIX) |
| |
| def _cxx_inc_convert(path): |
| """Convert path returned by cc -E xc++ in a complete path.""" |
| path = path.strip() |
| if path.endswith(_OSX_FRAMEWORK_SUFFIX): |
| path = path[:-_OSX_FRAMEWORK_SUFFIX_LEN].strip() |
| return path |
| |
| def _normalize_include_path(repository_ctx, path): |
| """Normalizes include paths before writing them to the crosstool. |
| |
| If path points inside the 'crosstool' folder of the repository, a relative |
| path is returned. |
| If path points outside the 'crosstool' folder, an absolute path is returned. |
| """ |
| path = str(repository_ctx.path(path)) |
| crosstool_folder = str(repository_ctx.path(".").get_child("crosstool")) |
| |
| if path.startswith(crosstool_folder): |
| # We drop the path to "$REPO/crosstool" and a trailing path separator. |
| return path[len(crosstool_folder) + 1:] |
| return path |
| |
| def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp, tf_sysroot): |
| """Compute the list of default C or C++ include directories.""" |
| if lang_is_cpp: |
| lang = "c++" |
| else: |
| lang = "c" |
| sysroot = [] |
| if tf_sysroot: |
| sysroot += ["--sysroot", tf_sysroot] |
| result = raw_exec(repository_ctx, [cc, "-E", "-x" + lang, "-", "-v"] + |
| sysroot) |
| stderr = err_out(result) |
| index1 = stderr.find(_INC_DIR_MARKER_BEGIN) |
| if index1 == -1: |
| return [] |
| index1 = stderr.find("\n", index1) |
| if index1 == -1: |
| return [] |
| index2 = stderr.rfind("\n ") |
| if index2 == -1 or index2 < index1: |
| return [] |
| index2 = stderr.find("\n", index2 + 1) |
| if index2 == -1: |
| inc_dirs = stderr[index1 + 1:] |
| else: |
| inc_dirs = stderr[index1 + 1:index2].strip() |
| |
| return [ |
| _normalize_include_path(repository_ctx, _cxx_inc_convert(p)) |
| for p in inc_dirs.split("\n") |
| ] |
| |
| def get_cxx_inc_directories(repository_ctx, cc, tf_sysroot): |
| """Compute the list of default C and C++ include directories.""" |
| |
| # For some reason `clang -xc` sometimes returns include paths that are |
| # different from the ones from `clang -xc++`. (Symlink and a dir) |
| # So we run the compiler with both `-xc` and `-xc++` and merge resulting lists |
| includes_cpp = _get_cxx_inc_directories_impl( |
| repository_ctx, |
| cc, |
| True, |
| tf_sysroot, |
| ) |
| includes_c = _get_cxx_inc_directories_impl( |
| repository_ctx, |
| cc, |
| False, |
| tf_sysroot, |
| ) |
| |
| return includes_cpp + [ |
| inc |
| for inc in includes_c |
| if inc not in includes_cpp |
| ] |
| |
| def auto_configure_fail(msg): |
| """Output failure message when cuda configuration fails.""" |
| red = "\033[0;31m" |
| no_color = "\033[0m" |
| fail("\n%sCuda Configuration Error:%s %s\n" % (red, no_color, msg)) |
| |
| # END cc_configure common functions (see TODO above). |
| |
| def _cuda_include_path(repository_ctx, cuda_config): |
| """Generates the Starlark string with cuda include directories. |
| |
| Args: |
| repository_ctx: The repository context. |
| cc: The path to the gcc host compiler. |
| |
| Returns: |
| A list of the gcc host compiler include directories. |
| """ |
| nvcc_path = repository_ctx.path("%s/bin/nvcc%s" % ( |
| cuda_config.cuda_toolkit_path, |
| ".exe" if cuda_config.cpu_value == "Windows" else "", |
| )) |
| |
| # The expected exit code of this command is non-zero. Bazel remote execution |
| # only caches commands with zero exit code. So force a zero exit code. |
| cmd = "%s -v /dev/null -o /dev/null ; [ $? -eq 1 ]" % str(nvcc_path) |
| result = raw_exec(repository_ctx, [get_bash_bin(repository_ctx), "-c", cmd]) |
| target_dir = "" |
| for one_line in err_out(result).splitlines(): |
| if one_line.startswith("#$ _TARGET_DIR_="): |
| target_dir = ( |
| cuda_config.cuda_toolkit_path + "/" + one_line.replace( |
| "#$ _TARGET_DIR_=", |
| "", |
| ) + "/include" |
| ) |
| inc_entries = [] |
| if target_dir != "": |
| inc_entries.append(realpath(repository_ctx, target_dir)) |
| inc_entries.append(realpath(repository_ctx, cuda_config.cuda_toolkit_path + "/include")) |
| return inc_entries |
| |
| def enable_cuda(repository_ctx): |
| """Returns whether to build with CUDA support.""" |
| return int(get_host_environ(repository_ctx, "TF_NEED_CUDA", False)) |
| |
| def matches_version(environ_version, detected_version): |
| """Checks whether the user-specified version matches the detected version. |
| |
| This function performs a weak matching so that if the user specifies only |
| the |
| major or major and minor versions, the versions are still considered |
| matching |
| if the version parts match. To illustrate: |
| |
| environ_version detected_version result |
| ----------------------------------------- |
| 5.1.3 5.1.3 True |
| 5.1 5.1.3 True |
| 5 5.1 True |
| 5.1.3 5.1 False |
| 5.2.3 5.1.3 False |
| |
| Args: |
| environ_version: The version specified by the user via environment |
| variables. |
| detected_version: The version autodetected from the CUDA installation on |
| the system. |
| Returns: True if user-specified version matches detected version and False |
| otherwise. |
| """ |
| environ_version_parts = environ_version.split(".") |
| detected_version_parts = detected_version.split(".") |
| if len(detected_version_parts) < len(environ_version_parts): |
| return False |
| for i, part in enumerate(detected_version_parts): |
| if i >= len(environ_version_parts): |
| break |
| if part != environ_version_parts[i]: |
| return False |
| return True |
| |
| _NVCC_VERSION_PREFIX = "Cuda compilation tools, release " |
| |
| _DEFINE_CUDNN_MAJOR = "#define CUDNN_MAJOR" |
| |
| def compute_capabilities(repository_ctx): |
| """Returns a list of strings representing cuda compute capabilities. |
| |
| Args: |
| repository_ctx: the repo rule's context. |
| Returns: list of cuda architectures to compile for. 'compute_xy' refers to |
| both PTX and SASS, 'sm_xy' refers to SASS only. |
| """ |
| capabilities = get_host_environ( |
| repository_ctx, |
| _TF_CUDA_COMPUTE_CAPABILITIES, |
| "compute_35,compute_52", |
| ).split(",") |
| |
| # Map old 'x.y' capabilities to 'compute_xy'. |
| if len(capabilities) > 0 and all([len(x.split(".")) == 2 for x in capabilities]): |
| # If all capabilities are in 'x.y' format, only include PTX for the |
| # highest capability. |
| cc_list = sorted([x.replace(".", "") for x in capabilities]) |
| capabilities = ["sm_%s" % x for x in cc_list[:-1]] + ["compute_%s" % cc_list[-1]] |
| for i, capability in enumerate(capabilities): |
| parts = capability.split(".") |
| if len(parts) != 2: |
| continue |
| capabilities[i] = "compute_%s%s" % (parts[0], parts[1]) |
| |
| # Make list unique |
| capabilities = dict(zip(capabilities, capabilities)).keys() |
| |
| # Validate capabilities. |
| for capability in capabilities: |
| if not capability.startswith(("compute_", "sm_")): |
| auto_configure_fail("Invalid compute capability: %s" % capability) |
| for prefix in ["compute_", "sm_"]: |
| if not capability.startswith(prefix): |
| continue |
| if len(capability) == len(prefix) + 2 and capability[-2:].isdigit(): |
| continue |
| auto_configure_fail("Invalid compute capability: %s" % capability) |
| |
| return capabilities |
| |
| def lib_name(base_name, cpu_value, version = None, static = False): |
| """Constructs the platform-specific name of a library. |
| |
| Args: |
| base_name: The name of the library, such as "cudart" |
| cpu_value: The name of the host operating system. |
| version: The version of the library. |
| static: True the library is static or False if it is a shared object. |
| |
| Returns: |
| The platform-specific name of the library. |
| """ |
| version = "" if not version else "." + version |
| if cpu_value in ("Linux", "FreeBSD"): |
| if static: |
| return "lib%s.a" % base_name |
| return "lib%s.so%s" % (base_name, version) |
| elif cpu_value == "Windows": |
| return "%s.lib" % base_name |
| elif cpu_value == "Darwin": |
| if static: |
| return "lib%s.a" % base_name |
| return "lib%s%s.dylib" % (base_name, version) |
| else: |
| auto_configure_fail("Invalid cpu_value: %s" % cpu_value) |
| |
| def _lib_path(lib, cpu_value, basedir, version, static): |
| file_name = lib_name(lib, cpu_value, version, static) |
| return "%s/%s" % (basedir, file_name) |
| |
| def _should_check_soname(version, static): |
| return version and not static |
| |
| def _check_cuda_lib_params(lib, cpu_value, basedir, version, static = False): |
| return ( |
| _lib_path(lib, cpu_value, basedir, version, static), |
| _should_check_soname(version, static), |
| ) |
| |
| def _check_cuda_libs(repository_ctx, script_path, libs): |
| python_bin = get_python_bin(repository_ctx) |
| contents = repository_ctx.read(script_path).splitlines() |
| |
| cmd = "from os import linesep;" |
| cmd += "f = open('script.py', 'w');" |
| for line in contents: |
| cmd += "f.write('%s' + linesep);" % line |
| cmd += "f.close();" |
| cmd += "from os import system;" |
| args = " ".join(["\"" + path + "\" " + str(check) for path, check in libs]) |
| cmd += "system('%s script.py %s');" % (python_bin, args) |
| |
| all_paths = [path for path, _ in libs] |
| checked_paths = execute(repository_ctx, [python_bin, "-c", cmd]).stdout.splitlines() |
| |
| # Filter out empty lines from splitting on '\r\n' on Windows |
| checked_paths = [path for path in checked_paths if len(path) > 0] |
| if all_paths != checked_paths: |
| auto_configure_fail("Error with installed CUDA libs. Expected '%s'. Actual '%s'." % (all_paths, checked_paths)) |
| |
| def _find_libs(repository_ctx, check_cuda_libs_script, cuda_config): |
| """Returns the CUDA and cuDNN libraries on the system. |
| |
| Also, verifies that the script actually exist. |
| |
| Args: |
| repository_ctx: The repository context. |
| check_cuda_libs_script: The path to a script verifying that the cuda |
| libraries exist on the system. |
| cuda_config: The CUDA config as returned by _get_cuda_config |
| |
| Returns: |
| Map of library names to structs of filename and path. |
| """ |
| cpu_value = cuda_config.cpu_value |
| stub_dir = "" if is_windows(repository_ctx) else "/stubs" |
| |
| check_cuda_libs_params = { |
| "cuda": _check_cuda_lib_params( |
| "cuda", |
| cpu_value, |
| cuda_config.config["cuda_library_dir"] + stub_dir, |
| version = None, |
| static = False, |
| ), |
| "cudart": _check_cuda_lib_params( |
| "cudart", |
| cpu_value, |
| cuda_config.config["cuda_library_dir"], |
| cuda_config.cudart_version, |
| static = False, |
| ), |
| "cudart_static": _check_cuda_lib_params( |
| "cudart_static", |
| cpu_value, |
| cuda_config.config["cuda_library_dir"], |
| cuda_config.cudart_version, |
| static = True, |
| ), |
| "cublas": _check_cuda_lib_params( |
| "cublas", |
| cpu_value, |
| cuda_config.config["cublas_library_dir"], |
| cuda_config.cublas_version, |
| static = False, |
| ), |
| "cublasLt": _check_cuda_lib_params( |
| "cublasLt", |
| cpu_value, |
| cuda_config.config["cublas_library_dir"], |
| cuda_config.cublas_version, |
| static = False, |
| ), |
| "cusolver": _check_cuda_lib_params( |
| "cusolver", |
| cpu_value, |
| cuda_config.config["cusolver_library_dir"], |
| cuda_config.cusolver_version, |
| static = False, |
| ), |
| "curand": _check_cuda_lib_params( |
| "curand", |
| cpu_value, |
| cuda_config.config["curand_library_dir"], |
| cuda_config.curand_version, |
| static = False, |
| ), |
| "cufft": _check_cuda_lib_params( |
| "cufft", |
| cpu_value, |
| cuda_config.config["cufft_library_dir"], |
| cuda_config.cufft_version, |
| static = False, |
| ), |
| "cudnn": _check_cuda_lib_params( |
| "cudnn", |
| cpu_value, |
| cuda_config.config["cudnn_library_dir"], |
| cuda_config.cudnn_version, |
| static = False, |
| ), |
| "cupti": _check_cuda_lib_params( |
| "cupti", |
| cpu_value, |
| cuda_config.config["cupti_library_dir"], |
| cuda_config.cuda_version, |
| static = False, |
| ), |
| "cusparse": _check_cuda_lib_params( |
| "cusparse", |
| cpu_value, |
| cuda_config.config["cusparse_library_dir"], |
| cuda_config.cusparse_version, |
| static = False, |
| ), |
| } |
| |
| # Verify that the libs actually exist at their locations. |
| _check_cuda_libs(repository_ctx, check_cuda_libs_script, check_cuda_libs_params.values()) |
| |
| paths = {filename: v[0] for (filename, v) in check_cuda_libs_params.items()} |
| return paths |
| |
| def _cudart_static_linkopt(cpu_value): |
| """Returns additional platform-specific linkopts for cudart.""" |
| return "" if cpu_value == "Darwin" else "\"-lrt\"," |
| |
| def _exec_find_cuda_config(repository_ctx, script_path, cuda_libraries): |
| python_bin = get_python_bin(repository_ctx) |
| |
| # If used with remote execution then repository_ctx.execute() can't |
| # access files from the source tree. A trick is to read the contents |
| # of the file in Starlark and embed them as part of the command. In |
| # this case the trick is not sufficient as the find_cuda_config.py |
| # script has more than 8192 characters. 8192 is the command length |
| # limit of cmd.exe on Windows. Thus we additionally need to compress |
| # the contents locally and decompress them as part of the execute(). |
| compressed_contents = repository_ctx.read(script_path) |
| decompress_and_execute_cmd = ( |
| "from zlib import decompress;" + |
| "from base64 import b64decode;" + |
| "from os import system;" + |
| "script = decompress(b64decode('%s'));" % compressed_contents + |
| "f = open('script.py', 'wb');" + |
| "f.write(script);" + |
| "f.close();" + |
| "system('\"%s\" script.py %s');" % (python_bin, " ".join(cuda_libraries)) |
| ) |
| |
| return execute(repository_ctx, [python_bin, "-c", decompress_and_execute_cmd]) |
| |
| # TODO(csigg): Only call once instead of from here, tensorrt_configure.bzl, |
| # and nccl_configure.bzl. |
| def find_cuda_config(repository_ctx, script_path, cuda_libraries): |
| """Returns CUDA config dictionary from running find_cuda_config.py""" |
| exec_result = _exec_find_cuda_config(repository_ctx, script_path, cuda_libraries) |
| if exec_result.return_code: |
| auto_configure_fail("Failed to run find_cuda_config.py: %s" % err_out(exec_result)) |
| |
| # Parse the dict from stdout. |
| return dict([tuple(x.split(": ")) for x in exec_result.stdout.splitlines()]) |
| |
| def _get_cuda_config(repository_ctx, find_cuda_config_script): |
| """Detects and returns information about the CUDA installation on the system. |
| |
| Args: |
| repository_ctx: The repository context. |
| |
| Returns: |
| A struct containing the following fields: |
| cuda_toolkit_path: The CUDA toolkit installation directory. |
| cudnn_install_basedir: The cuDNN installation directory. |
| cuda_version: The version of CUDA on the system. |
| cudart_version: The CUDA runtime version on the system. |
| cudnn_version: The version of cuDNN on the system. |
| compute_capabilities: A list of the system's CUDA compute capabilities. |
| cpu_value: The name of the host operating system. |
| """ |
| config = find_cuda_config(repository_ctx, find_cuda_config_script, ["cuda", "cudnn"]) |
| cpu_value = get_cpu_value(repository_ctx) |
| toolkit_path = config["cuda_toolkit_path"] |
| |
| is_windows = cpu_value == "Windows" |
| cuda_version = config["cuda_version"].split(".") |
| cuda_major = cuda_version[0] |
| cuda_minor = cuda_version[1] |
| |
| cuda_version = ("64_%s%s" if is_windows else "%s.%s") % (cuda_major, cuda_minor) |
| cudnn_version = ("64_%s" if is_windows else "%s") % config["cudnn_version"] |
| |
| if int(cuda_major) >= 11: |
| # The libcudart soname in CUDA 11.x is versioned as 11.0 for backward compatability. |
| if int(cuda_major) == 11: |
| cudart_version = "64_110" if is_windows else "11.0" |
| else: |
| cudart_version = ("64_%s" if is_windows else "%s") % cuda_major |
| cublas_version = ("64_%s" if is_windows else "%s") % config["cublas_version"].split(".")[0] |
| cusolver_version = ("64_%s" if is_windows else "%s") % config["cusolver_version"].split(".")[0] |
| curand_version = ("64_%s" if is_windows else "%s") % config["curand_version"].split(".")[0] |
| cufft_version = ("64_%s" if is_windows else "%s") % config["cufft_version"].split(".")[0] |
| cusparse_version = ("64_%s" if is_windows else "%s") % config["cusparse_version"].split(".")[0] |
| elif (int(cuda_major), int(cuda_minor)) >= (10, 1): |
| # cuda_lib_version is for libraries like cuBLAS, cuFFT, cuSOLVER, etc. |
| # It changed from 'x.y' to just 'x' in CUDA 10.1. |
| cuda_lib_version = ("64_%s" if is_windows else "%s") % cuda_major |
| cudart_version = cuda_version |
| cublas_version = cuda_lib_version |
| cusolver_version = cuda_lib_version |
| curand_version = cuda_lib_version |
| cufft_version = cuda_lib_version |
| cusparse_version = cuda_lib_version |
| else: |
| cudart_version = cuda_version |
| cublas_version = cuda_version |
| cusolver_version = cuda_version |
| curand_version = cuda_version |
| cufft_version = cuda_version |
| cusparse_version = cuda_version |
| |
| return struct( |
| cuda_toolkit_path = toolkit_path, |
| cuda_version = cuda_version, |
| cuda_version_major = cuda_major, |
| cudart_version = cudart_version, |
| cublas_version = cublas_version, |
| cusolver_version = cusolver_version, |
| curand_version = curand_version, |
| cufft_version = cufft_version, |
| cusparse_version = cusparse_version, |
| cudnn_version = cudnn_version, |
| compute_capabilities = compute_capabilities(repository_ctx), |
| cpu_value = cpu_value, |
| config = config, |
| ) |
| |
| def _tpl(repository_ctx, tpl, substitutions = {}, out = None): |
| if not out: |
| out = tpl.replace(":", "/") |
| repository_ctx.template( |
| out, |
| Label("//third_party/gpus/%s.tpl" % tpl), |
| substitutions, |
| ) |
| |
| def _file(repository_ctx, label): |
| repository_ctx.template( |
| label.replace(":", "/"), |
| Label("//third_party/gpus/%s.tpl" % label), |
| {}, |
| ) |
| |
| _DUMMY_CROSSTOOL_BZL_FILE = """ |
| def error_gpu_disabled(): |
| fail("ERROR: Building with --config=cuda but TensorFlow is not configured " + |
| "to build with GPU support. Please re-run ./configure and enter 'Y' " + |
| "at the prompt to build with GPU support.") |
| |
| native.genrule( |
| name = "error_gen_crosstool", |
| outs = ["CROSSTOOL"], |
| cmd = "echo 'Should not be run.' && exit 1", |
| ) |
| |
| native.filegroup( |
| name = "crosstool", |
| srcs = [":CROSSTOOL"], |
| output_licenses = ["unencumbered"], |
| ) |
| """ |
| |
| _DUMMY_CROSSTOOL_BUILD_FILE = """ |
| load("//crosstool:error_gpu_disabled.bzl", "error_gpu_disabled") |
| |
| error_gpu_disabled() |
| """ |
| |
| def _create_dummy_repository(repository_ctx): |
| cpu_value = get_cpu_value(repository_ctx) |
| |
| # Set up BUILD file for cuda/. |
| _tpl( |
| repository_ctx, |
| "cuda:build_defs.bzl", |
| { |
| "%{cuda_is_configured}": "False", |
| "%{cuda_extra_copts}": "[]", |
| "%{cuda_gpu_architectures}": "[]", |
| }, |
| ) |
| _tpl( |
| repository_ctx, |
| "cuda:BUILD", |
| { |
| "%{cuda_driver_lib}": lib_name("cuda", cpu_value), |
| "%{cudart_static_lib}": lib_name( |
| "cudart_static", |
| cpu_value, |
| static = True, |
| ), |
| "%{cudart_static_linkopt}": _cudart_static_linkopt(cpu_value), |
| "%{cudart_lib}": lib_name("cudart", cpu_value), |
| "%{cublas_lib}": lib_name("cublas", cpu_value), |
| "%{cublasLt_lib}": lib_name("cublasLt", cpu_value), |
| "%{cusolver_lib}": lib_name("cusolver", cpu_value), |
| "%{cudnn_lib}": lib_name("cudnn", cpu_value), |
| "%{cufft_lib}": lib_name("cufft", cpu_value), |
| "%{curand_lib}": lib_name("curand", cpu_value), |
| "%{cupti_lib}": lib_name("cupti", cpu_value), |
| "%{cusparse_lib}": lib_name("cusparse", cpu_value), |
| "%{cub_actual}": ":cuda_headers", |
| "%{copy_rules}": """ |
| filegroup(name="cuda-include") |
| filegroup(name="cublas-include") |
| filegroup(name="cusolver-include") |
| filegroup(name="cufft-include") |
| filegroup(name="cusparse-include") |
| filegroup(name="curand-include") |
| filegroup(name="cudnn-include") |
| """, |
| }, |
| ) |
| |
| # Create dummy files for the CUDA toolkit since they are still required by |
| # tensorflow/tsl/platform/default/build_config:cuda. |
| repository_ctx.file("cuda/cuda/include/cuda.h") |
| repository_ctx.file("cuda/cuda/include/cublas.h") |
| repository_ctx.file("cuda/cuda/include/cudnn.h") |
| repository_ctx.file("cuda/cuda/extras/CUPTI/include/cupti.h") |
| repository_ctx.file("cuda/cuda/lib/%s" % lib_name("cuda", cpu_value)) |
| repository_ctx.file("cuda/cuda/lib/%s" % lib_name("cudart", cpu_value)) |
| repository_ctx.file( |
| "cuda/cuda/lib/%s" % lib_name("cudart_static", cpu_value), |
| ) |
| repository_ctx.file("cuda/cuda/lib/%s" % lib_name("cublas", cpu_value)) |
| repository_ctx.file("cuda/cuda/lib/%s" % lib_name("cublasLt", cpu_value)) |
| repository_ctx.file("cuda/cuda/lib/%s" % lib_name("cusolver", cpu_value)) |
| repository_ctx.file("cuda/cuda/lib/%s" % lib_name("cudnn", cpu_value)) |
| repository_ctx.file("cuda/cuda/lib/%s" % lib_name("curand", cpu_value)) |
| repository_ctx.file("cuda/cuda/lib/%s" % lib_name("cufft", cpu_value)) |
| repository_ctx.file("cuda/cuda/lib/%s" % lib_name("cupti", cpu_value)) |
| repository_ctx.file("cuda/cuda/lib/%s" % lib_name("cusparse", cpu_value)) |
| |
| # Set up cuda_config.h, which is used by |
| # tensorflow/stream_executor/dso_loader.cc. |
| _tpl( |
| repository_ctx, |
| "cuda:cuda_config.h", |
| { |
| "%{cuda_version}": "", |
| "%{cudart_version}": "", |
| "%{cublas_version}": "", |
| "%{cusolver_version}": "", |
| "%{curand_version}": "", |
| "%{cufft_version}": "", |
| "%{cusparse_version}": "", |
| "%{cudnn_version}": "", |
| "%{cuda_toolkit_path}": "", |
| "%{cuda_compute_capabilities}": "", |
| }, |
| "cuda/cuda/cuda_config.h", |
| ) |
| |
| # Set up cuda_config.py, which is used by gen_build_info to provide |
| # static build environment info to the API |
| _tpl( |
| repository_ctx, |
| "cuda:cuda_config.py", |
| _py_tmpl_dict({}), |
| "cuda/cuda/cuda_config.py", |
| ) |
| |
| # If cuda_configure is not configured to build with GPU support, and the user |
| # attempts to build with --config=cuda, add a dummy build rule to intercept |
| # this and fail with an actionable error message. |
| repository_ctx.file( |
| "crosstool/error_gpu_disabled.bzl", |
| _DUMMY_CROSSTOOL_BZL_FILE, |
| ) |
| repository_ctx.file("crosstool/BUILD", _DUMMY_CROSSTOOL_BUILD_FILE) |
| |
| def _norm_path(path): |
| """Returns a path with '/' and remove the trailing slash.""" |
| path = path.replace("\\", "/") |
| if path[-1] == "/": |
| path = path[:-1] |
| return path |
| |
| def make_copy_files_rule(repository_ctx, name, srcs, outs): |
| """Returns a rule to copy a set of files.""" |
| cmds = [] |
| |
| # Copy files. |
| for src, out in zip(srcs, outs): |
| cmds.append('cp -f "%s" "$(location %s)"' % (src, out)) |
| outs = [(' "%s",' % out) for out in outs] |
| return """genrule( |
| name = "%s", |
| outs = [ |
| %s |
| ], |
| cmd = \"""%s \""", |
| )""" % (name, "\n".join(outs), " && \\\n".join(cmds)) |
| |
| def make_copy_dir_rule(repository_ctx, name, src_dir, out_dir, exceptions = None): |
| """Returns a rule to recursively copy a directory. |
| If exceptions is not None, it must be a list of files or directories in |
| 'src_dir'; these will be excluded from copying. |
| """ |
| src_dir = _norm_path(src_dir) |
| out_dir = _norm_path(out_dir) |
| outs = read_dir(repository_ctx, src_dir) |
| post_cmd = "" |
| if exceptions != None: |
| outs = [x for x in outs if not any([ |
| x.startswith(src_dir + "/" + y) |
| for y in exceptions |
| ])] |
| outs = [(' "%s",' % out.replace(src_dir, out_dir)) for out in outs] |
| |
| # '@D' already contains the relative path for a single file, see |
| # http://docs.bazel.build/versions/master/be/make-variables.html#predefined_genrule_variables |
| out_dir = "$(@D)/%s" % out_dir if len(outs) > 1 else "$(@D)" |
| if exceptions != None: |
| for x in exceptions: |
| post_cmd += " ; rm -fR " + out_dir + "/" + x |
| return """genrule( |
| name = "%s", |
| outs = [ |
| %s |
| ], |
| cmd = \"""cp -rLf "%s/." "%s/" %s\""", |
| )""" % (name, "\n".join(outs), src_dir, out_dir, post_cmd) |
| |
| def _flag_enabled(repository_ctx, flag_name): |
| return get_host_environ(repository_ctx, flag_name) == "1" |
| |
| def _use_cuda_clang(repository_ctx): |
| return _flag_enabled(repository_ctx, "TF_CUDA_CLANG") |
| |
| def _tf_sysroot(repository_ctx): |
| return get_host_environ(repository_ctx, _TF_SYSROOT, "") |
| |
| def _compute_cuda_extra_copts(repository_ctx, compute_capabilities): |
| copts = [] |
| for capability in compute_capabilities: |
| if capability.startswith("compute_"): |
| capability = capability.replace("compute_", "sm_") |
| copts.append("--cuda-include-ptx=%s" % capability) |
| copts.append("--cuda-gpu-arch=%s" % capability) |
| |
| return str(copts) |
| |
| def _tpl_path(repository_ctx, filename): |
| return repository_ctx.path(Label("//third_party/gpus/%s.tpl" % filename)) |
| |
| def _basename(repository_ctx, path_str): |
| """Returns the basename of a path of type string. |
| |
| This method is different from path.basename in that it also works if |
| the host platform is different from the execution platform |
| i.e. linux -> windows. |
| """ |
| |
| num_chars = len(path_str) |
| is_win = is_windows(repository_ctx) |
| for i in range(num_chars): |
| r_i = num_chars - 1 - i |
| if (is_win and path_str[r_i] == "\\") or path_str[r_i] == "/": |
| return path_str[r_i + 1:] |
| return path_str |
| |
| def _create_local_cuda_repository(repository_ctx): |
| """Creates the repository containing files set up to build with CUDA.""" |
| |
| # Resolve all labels before doing any real work. Resolving causes the |
| # function to be restarted with all previous state being lost. This |
| # can easily lead to a O(n^2) runtime in the number of labels. |
| # See https://github.com/tensorflow/tensorflow/commit/62bd3534525a036f07d9851b3199d68212904778 |
| tpl_paths = {filename: _tpl_path(repository_ctx, filename) for filename in [ |
| "cuda:build_defs.bzl", |
| "crosstool:clang/bin/crosstool_wrapper_driver_is_not_gcc", |
| "crosstool:windows/msvc_wrapper_for_nvcc.py", |
| "crosstool:BUILD", |
| "crosstool:cc_toolchain_config.bzl", |
| "cuda:cuda_config.h", |
| "cuda:cuda_config.py", |
| ]} |
| tpl_paths["cuda:BUILD"] = _tpl_path(repository_ctx, "cuda:BUILD.windows" if is_windows(repository_ctx) else "cuda:BUILD") |
| find_cuda_config_script = repository_ctx.path(Label("@org_tensorflow//third_party/gpus:find_cuda_config.py.gz.base64")) |
| |
| cuda_config = _get_cuda_config(repository_ctx, find_cuda_config_script) |
| |
| cuda_include_path = cuda_config.config["cuda_include_dir"] |
| cublas_include_path = cuda_config.config["cublas_include_dir"] |
| cudnn_header_dir = cuda_config.config["cudnn_include_dir"] |
| cupti_header_dir = cuda_config.config["cupti_include_dir"] |
| nvvm_libdevice_dir = cuda_config.config["nvvm_library_dir"] |
| |
| # Create genrule to copy files from the installed CUDA toolkit into execroot. |
| copy_rules = [ |
| make_copy_dir_rule( |
| repository_ctx, |
| name = "cuda-include", |
| src_dir = cuda_include_path, |
| out_dir = "cuda/include", |
| ), |
| make_copy_dir_rule( |
| repository_ctx, |
| name = "cuda-nvvm", |
| src_dir = nvvm_libdevice_dir, |
| out_dir = "cuda/nvvm/libdevice", |
| ), |
| make_copy_dir_rule( |
| repository_ctx, |
| name = "cuda-extras", |
| src_dir = cupti_header_dir, |
| out_dir = "cuda/extras/CUPTI/include", |
| ), |
| ] |
| |
| copy_rules.append(make_copy_files_rule( |
| repository_ctx, |
| name = "cublas-include", |
| srcs = [ |
| cublas_include_path + "/cublas.h", |
| cublas_include_path + "/cublas_v2.h", |
| cublas_include_path + "/cublas_api.h", |
| cublas_include_path + "/cublasLt.h", |
| ], |
| outs = [ |
| "cublas/include/cublas.h", |
| "cublas/include/cublas_v2.h", |
| "cublas/include/cublas_api.h", |
| "cublas/include/cublasLt.h", |
| ], |
| )) |
| |
| cusolver_include_path = cuda_config.config["cusolver_include_dir"] |
| copy_rules.append(make_copy_files_rule( |
| repository_ctx, |
| name = "cusolver-include", |
| srcs = [ |
| cusolver_include_path + "/cusolver_common.h", |
| cusolver_include_path + "/cusolverDn.h", |
| ], |
| outs = [ |
| "cusolver/include/cusolver_common.h", |
| "cusolver/include/cusolverDn.h", |
| ], |
| )) |
| |
| cufft_include_path = cuda_config.config["cufft_include_dir"] |
| copy_rules.append(make_copy_files_rule( |
| repository_ctx, |
| name = "cufft-include", |
| srcs = [ |
| cufft_include_path + "/cufft.h", |
| ], |
| outs = [ |
| "cufft/include/cufft.h", |
| ], |
| )) |
| |
| cusparse_include_path = cuda_config.config["cusparse_include_dir"] |
| copy_rules.append(make_copy_files_rule( |
| repository_ctx, |
| name = "cusparse-include", |
| srcs = [ |
| cusparse_include_path + "/cusparse.h", |
| ], |
| outs = [ |
| "cusparse/include/cusparse.h", |
| ], |
| )) |
| |
| curand_include_path = cuda_config.config["curand_include_dir"] |
| copy_rules.append(make_copy_files_rule( |
| repository_ctx, |
| name = "curand-include", |
| srcs = [ |
| curand_include_path + "/curand.h", |
| ], |
| outs = [ |
| "curand/include/curand.h", |
| ], |
| )) |
| |
| check_cuda_libs_script = repository_ctx.path(Label("@org_tensorflow//third_party/gpus:check_cuda_libs.py")) |
| cuda_libs = _find_libs(repository_ctx, check_cuda_libs_script, cuda_config) |
| cuda_lib_srcs = [] |
| cuda_lib_outs = [] |
| for path in cuda_libs.values(): |
| cuda_lib_srcs.append(path) |
| cuda_lib_outs.append("cuda/lib/" + _basename(repository_ctx, path)) |
| copy_rules.append(make_copy_files_rule( |
| repository_ctx, |
| name = "cuda-lib", |
| srcs = cuda_lib_srcs, |
| outs = cuda_lib_outs, |
| )) |
| |
| # copy files mentioned in third_party/nccl/build_defs.bzl.tpl |
| file_ext = ".exe" if is_windows(repository_ctx) else "" |
| bin_files = ( |
| ["crt/link.stub"] + |
| [f + file_ext for f in ["bin2c", "fatbinary", "nvlink", "nvprune"]] |
| ) |
| copy_rules.append(make_copy_files_rule( |
| repository_ctx, |
| name = "cuda-bin", |
| srcs = [cuda_config.cuda_toolkit_path + "/bin/" + f for f in bin_files], |
| outs = ["cuda/bin/" + f for f in bin_files], |
| )) |
| |
| # Select the headers based on the cuDNN version (strip '64_' for Windows). |
| cudnn_headers = ["cudnn.h"] |
| if cuda_config.cudnn_version.rsplit("_", 1)[-1] >= "8": |
| cudnn_headers += [ |
| "cudnn_backend.h", |
| "cudnn_adv_infer.h", |
| "cudnn_adv_train.h", |
| "cudnn_cnn_infer.h", |
| "cudnn_cnn_train.h", |
| "cudnn_ops_infer.h", |
| "cudnn_ops_train.h", |
| "cudnn_version.h", |
| ] |
| |
| cudnn_srcs = [] |
| cudnn_outs = [] |
| for header in cudnn_headers: |
| cudnn_srcs.append(cudnn_header_dir + "/" + header) |
| cudnn_outs.append("cudnn/include/" + header) |
| |
| copy_rules.append(make_copy_files_rule( |
| repository_ctx, |
| name = "cudnn-include", |
| srcs = cudnn_srcs, |
| outs = cudnn_outs, |
| )) |
| |
| # Set up BUILD file for cuda/ |
| repository_ctx.template( |
| "cuda/build_defs.bzl", |
| tpl_paths["cuda:build_defs.bzl"], |
| { |
| "%{cuda_is_configured}": "True", |
| "%{cuda_extra_copts}": _compute_cuda_extra_copts( |
| repository_ctx, |
| cuda_config.compute_capabilities, |
| ), |
| "%{cuda_gpu_architectures}": str(cuda_config.compute_capabilities), |
| }, |
| ) |
| |
| cub_actual = "@cub_archive//:cub" |
| if int(cuda_config.cuda_version_major) >= 11: |
| cub_actual = ":cuda_headers" |
| |
| repository_ctx.template( |
| "cuda/BUILD", |
| tpl_paths["cuda:BUILD"], |
| { |
| "%{cuda_driver_lib}": _basename(repository_ctx, cuda_libs["cuda"]), |
| "%{cudart_static_lib}": _basename(repository_ctx, cuda_libs["cudart_static"]), |
| "%{cudart_static_linkopt}": _cudart_static_linkopt(cuda_config.cpu_value), |
| "%{cudart_lib}": _basename(repository_ctx, cuda_libs["cudart"]), |
| "%{cublas_lib}": _basename(repository_ctx, cuda_libs["cublas"]), |
| "%{cublasLt_lib}": _basename(repository_ctx, cuda_libs["cublasLt"]), |
| "%{cusolver_lib}": _basename(repository_ctx, cuda_libs["cusolver"]), |
| "%{cudnn_lib}": _basename(repository_ctx, cuda_libs["cudnn"]), |
| "%{cufft_lib}": _basename(repository_ctx, cuda_libs["cufft"]), |
| "%{curand_lib}": _basename(repository_ctx, cuda_libs["curand"]), |
| "%{cupti_lib}": _basename(repository_ctx, cuda_libs["cupti"]), |
| "%{cusparse_lib}": _basename(repository_ctx, cuda_libs["cusparse"]), |
| "%{cub_actual}": cub_actual, |
| "%{copy_rules}": "\n".join(copy_rules), |
| }, |
| ) |
| |
| is_cuda_clang = _use_cuda_clang(repository_ctx) |
| tf_sysroot = _tf_sysroot(repository_ctx) |
| |
| should_download_clang = is_cuda_clang and _flag_enabled( |
| repository_ctx, |
| _TF_DOWNLOAD_CLANG, |
| ) |
| if should_download_clang: |
| download_clang(repository_ctx, "crosstool/extra_tools") |
| |
| # Set up crosstool/ |
| cc = find_cc(repository_ctx) |
| cc_fullpath = cc if not should_download_clang else "crosstool/" + cc |
| |
| host_compiler_includes = get_cxx_inc_directories( |
| repository_ctx, |
| cc_fullpath, |
| tf_sysroot, |
| ) |
| cuda_defines = {} |
| cuda_defines["%{builtin_sysroot}"] = tf_sysroot |
| cuda_defines["%{cuda_toolkit_path}"] = "" |
| cuda_defines["%{compiler}"] = "unknown" |
| if is_cuda_clang: |
| cuda_defines["%{cuda_toolkit_path}"] = cuda_config.config["cuda_toolkit_path"] |
| cuda_defines["%{compiler}"] = "clang" |
| |
| host_compiler_prefix = get_host_environ(repository_ctx, _GCC_HOST_COMPILER_PREFIX) |
| if not host_compiler_prefix: |
| host_compiler_prefix = "/usr/bin" |
| |
| cuda_defines["%{host_compiler_prefix}"] = host_compiler_prefix |
| |
| # Bazel sets '-B/usr/bin' flag to workaround build errors on RHEL (see |
| # https://github.com/bazelbuild/bazel/issues/760). |
| # However, this stops our custom clang toolchain from picking the provided |
| # LLD linker, so we're only adding '-B/usr/bin' when using non-downloaded |
| # toolchain. |
| # TODO: when bazel stops adding '-B/usr/bin' by default, remove this |
| # flag from the CROSSTOOL completely (see |
| # https://github.com/bazelbuild/bazel/issues/5634) |
| if should_download_clang: |
| cuda_defines["%{linker_bin_path}"] = "" |
| else: |
| cuda_defines["%{linker_bin_path}"] = host_compiler_prefix |
| |
| cuda_defines["%{extra_no_canonical_prefixes_flags}"] = "" |
| cuda_defines["%{unfiltered_compile_flags}"] = "" |
| if is_cuda_clang: |
| cuda_defines["%{host_compiler_path}"] = str(cc) |
| cuda_defines["%{host_compiler_warnings}"] = """ |
| # Some parts of the codebase set -Werror and hit this warning, so |
| # switch it off for now. |
| "-Wno-invalid-partial-specialization" |
| """ |
| cuda_defines["%{cxx_builtin_include_directories}"] = to_list_of_strings(host_compiler_includes) |
| cuda_defines["%{compiler_deps}"] = ":empty" |
| cuda_defines["%{win_compiler_deps}"] = ":empty" |
| repository_ctx.file( |
| "crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc", |
| "", |
| ) |
| repository_ctx.file("crosstool/windows/msvc_wrapper_for_nvcc.py", "") |
| else: |
| cuda_defines["%{host_compiler_path}"] = "clang/bin/crosstool_wrapper_driver_is_not_gcc" |
| cuda_defines["%{host_compiler_warnings}"] = "" |
| |
| # nvcc has the system include paths built in and will automatically |
| # search them; we cannot work around that, so we add the relevant cuda |
| # system paths to the allowed compiler specific include paths. |
| cuda_defines["%{cxx_builtin_include_directories}"] = to_list_of_strings( |
| host_compiler_includes + _cuda_include_path( |
| repository_ctx, |
| cuda_config, |
| ) + [cupti_header_dir, cudnn_header_dir], |
| ) |
| |
| # For gcc, do not canonicalize system header paths; some versions of gcc |
| # pick the shortest possible path for system includes when creating the |
| # .d file - given that includes that are prefixed with "../" multiple |
| # time quickly grow longer than the root of the tree, this can lead to |
| # bazel's header check failing. |
| cuda_defines["%{extra_no_canonical_prefixes_flags}"] = "\"-fno-canonical-system-headers\"" |
| |
| file_ext = ".exe" if is_windows(repository_ctx) else "" |
| nvcc_path = "%s/nvcc%s" % (cuda_config.config["cuda_binary_dir"], file_ext) |
| cuda_defines["%{compiler_deps}"] = ":crosstool_wrapper_driver_is_not_gcc" |
| cuda_defines["%{win_compiler_deps}"] = ":windows_msvc_wrapper_files" |
| |
| wrapper_defines = { |
| "%{cpu_compiler}": str(cc), |
| "%{cuda_version}": cuda_config.cuda_version, |
| "%{nvcc_path}": nvcc_path, |
| "%{gcc_host_compiler_path}": str(cc), |
| "%{nvcc_tmp_dir}": _get_nvcc_tmp_dir_for_windows(repository_ctx), |
| } |
| repository_ctx.template( |
| "crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc", |
| tpl_paths["crosstool:clang/bin/crosstool_wrapper_driver_is_not_gcc"], |
| wrapper_defines, |
| ) |
| repository_ctx.template( |
| "crosstool/windows/msvc_wrapper_for_nvcc.py", |
| tpl_paths["crosstool:windows/msvc_wrapper_for_nvcc.py"], |
| wrapper_defines, |
| ) |
| |
| cuda_defines.update(_get_win_cuda_defines(repository_ctx)) |
| |
| verify_build_defines(cuda_defines) |
| |
| # Only expand template variables in the BUILD file |
| repository_ctx.template( |
| "crosstool/BUILD", |
| tpl_paths["crosstool:BUILD"], |
| cuda_defines, |
| ) |
| |
| # No templating of cc_toolchain_config - use attributes and templatize the |
| # BUILD file. |
| repository_ctx.template( |
| "crosstool/cc_toolchain_config.bzl", |
| tpl_paths["crosstool:cc_toolchain_config.bzl"], |
| {}, |
| ) |
| |
| # Set up cuda_config.h, which is used by |
| # tensorflow/stream_executor/dso_loader.cc. |
| repository_ctx.template( |
| "cuda/cuda/cuda_config.h", |
| tpl_paths["cuda:cuda_config.h"], |
| { |
| "%{cuda_version}": cuda_config.cuda_version, |
| "%{cudart_version}": cuda_config.cudart_version, |
| "%{cublas_version}": cuda_config.cublas_version, |
| "%{cusolver_version}": cuda_config.cusolver_version, |
| "%{curand_version}": cuda_config.curand_version, |
| "%{cufft_version}": cuda_config.cufft_version, |
| "%{cusparse_version}": cuda_config.cusparse_version, |
| "%{cudnn_version}": cuda_config.cudnn_version, |
| "%{cuda_toolkit_path}": cuda_config.cuda_toolkit_path, |
| "%{cuda_compute_capabilities}": ", ".join([ |
| cc.split("_")[1] |
| for cc in cuda_config.compute_capabilities |
| ]), |
| }, |
| ) |
| |
| # Set up cuda_config.py, which is used by gen_build_info to provide |
| # static build environment info to the API |
| repository_ctx.template( |
| "cuda/cuda/cuda_config.py", |
| tpl_paths["cuda:cuda_config.py"], |
| _py_tmpl_dict({ |
| "cuda_version": cuda_config.cuda_version, |
| "cudnn_version": cuda_config.cudnn_version, |
| "cuda_compute_capabilities": cuda_config.compute_capabilities, |
| "cpu_compiler": str(cc), |
| }), |
| ) |
| |
| def _py_tmpl_dict(d): |
| return {"%{cuda_config}": str(d)} |
| |
| def _create_remote_cuda_repository(repository_ctx, remote_config_repo): |
| """Creates pointers to a remotely configured repo set up to build with CUDA.""" |
| _tpl( |
| repository_ctx, |
| "cuda:build_defs.bzl", |
| { |
| "%{cuda_is_configured}": "True", |
| "%{cuda_extra_copts}": _compute_cuda_extra_copts( |
| repository_ctx, |
| compute_capabilities(repository_ctx), |
| ), |
| }, |
| ) |
| repository_ctx.template( |
| "cuda/BUILD", |
| config_repo_label(remote_config_repo, "cuda:BUILD"), |
| {}, |
| ) |
| repository_ctx.template( |
| "cuda/build_defs.bzl", |
| config_repo_label(remote_config_repo, "cuda:build_defs.bzl"), |
| {}, |
| ) |
| repository_ctx.template( |
| "cuda/cuda/cuda_config.h", |
| config_repo_label(remote_config_repo, "cuda:cuda/cuda_config.h"), |
| {}, |
| ) |
| repository_ctx.template( |
| "cuda/cuda/cuda_config.py", |
| config_repo_label(remote_config_repo, "cuda:cuda/cuda_config.py"), |
| _py_tmpl_dict({}), |
| ) |
| |
| repository_ctx.template( |
| "crosstool/BUILD", |
| config_repo_label(remote_config_repo, "crosstool:BUILD"), |
| {}, |
| ) |
| |
| repository_ctx.template( |
| "crosstool/cc_toolchain_config.bzl", |
| config_repo_label(remote_config_repo, "crosstool:cc_toolchain_config.bzl"), |
| {}, |
| ) |
| |
| repository_ctx.template( |
| "crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc", |
| config_repo_label(remote_config_repo, "crosstool:clang/bin/crosstool_wrapper_driver_is_not_gcc"), |
| {}, |
| ) |
| |
| def _cuda_autoconf_impl(repository_ctx): |
| """Implementation of the cuda_autoconf repository rule.""" |
| build_file = Label("//third_party/gpus:local_config_cuda.BUILD") |
| |
| if not enable_cuda(repository_ctx): |
| _create_dummy_repository(repository_ctx) |
| elif get_host_environ(repository_ctx, _TF_CUDA_CONFIG_REPO) != None: |
| has_cuda_version = get_host_environ(repository_ctx, _TF_CUDA_VERSION) != None |
| has_cudnn_version = get_host_environ(repository_ctx, _TF_CUDNN_VERSION) != None |
| if not has_cuda_version or not has_cudnn_version: |
| auto_configure_fail("%s and %s must also be set if %s is specified" % |
| (_TF_CUDA_VERSION, _TF_CUDNN_VERSION, _TF_CUDA_CONFIG_REPO)) |
| _create_remote_cuda_repository( |
| repository_ctx, |
| get_host_environ(repository_ctx, _TF_CUDA_CONFIG_REPO), |
| ) |
| else: |
| _create_local_cuda_repository(repository_ctx) |
| |
| repository_ctx.symlink(build_file, "BUILD") |
| |
| # For @bazel_tools//tools/cpp:windows_cc_configure.bzl |
| _MSVC_ENVVARS = [ |
| "BAZEL_VC", |
| "BAZEL_VC_FULL_VERSION", |
| "BAZEL_VS", |
| "BAZEL_WINSDK_FULL_VERSION", |
| "VS90COMNTOOLS", |
| "VS100COMNTOOLS", |
| "VS110COMNTOOLS", |
| "VS120COMNTOOLS", |
| "VS140COMNTOOLS", |
| "VS150COMNTOOLS", |
| "VS160COMNTOOLS", |
| ] |
| |
| _ENVIRONS = [ |
| _GCC_HOST_COMPILER_PATH, |
| _GCC_HOST_COMPILER_PREFIX, |
| _CLANG_CUDA_COMPILER_PATH, |
| "TF_NEED_CUDA", |
| "TF_CUDA_CLANG", |
| _TF_DOWNLOAD_CLANG, |
| _CUDA_TOOLKIT_PATH, |
| _CUDNN_INSTALL_PATH, |
| _TF_CUDA_VERSION, |
| _TF_CUDNN_VERSION, |
| _TF_CUDA_COMPUTE_CAPABILITIES, |
| "NVVMIR_LIBRARY_DIR", |
| _PYTHON_BIN_PATH, |
| "TMP", |
| "TMPDIR", |
| "TF_CUDA_PATHS", |
| ] + _MSVC_ENVVARS |
| |
| remote_cuda_configure = repository_rule( |
| implementation = _create_local_cuda_repository, |
| environ = _ENVIRONS, |
| remotable = True, |
| attrs = { |
| "environ": attr.string_dict(), |
| }, |
| ) |
| |
| cuda_configure = repository_rule( |
| implementation = _cuda_autoconf_impl, |
| environ = _ENVIRONS + [_TF_CUDA_CONFIG_REPO], |
| ) |
| """Detects and configures the local CUDA toolchain. |
| |
| Add the following to your WORKSPACE FILE: |
| |
| ```python |
| cuda_configure(name = "local_config_cuda") |
| ``` |
| |
| Args: |
| name: A unique name for this workspace rule. |
| """ |