flang/cmake/modules/AddFlangOffloadRuntime.cmake - toolchain/llvm-project - Git at Google

 option(FLANG_EXPERIMENTAL_CUDA_RUNTIME
   "Compile Fortran runtime as CUDA sources (experimental)" OFF
   )

 option(FLANG_CUDA_RUNTIME_PTX_WITHOUT_GLOBAL_VARS
   "Do not compile global variables' definitions when producing PTX library" OFF
   )

 set(FLANG_LIBCUDACXX_PATH "" CACHE PATH "Path to libcu++ package installation")

 set(FLANG_EXPERIMENTAL_OMP_OFFLOAD_BUILD "off" CACHE STRING
   "Compile Fortran runtime as OpenMP target offload sources (experimental). Valid options are 'off', 'host_device', 'nohost'")

 set(FLANG_OMP_DEVICE_ARCHITECTURES "all" CACHE STRING
   "List of OpenMP device architectures to be used to compile the Fortran runtime (e.g. 'gfx1103;sm_90')")

 macro(enable_cuda_compilation name files)
   if (FLANG_EXPERIMENTAL_CUDA_RUNTIME)
     if (BUILD_SHARED_LIBS)
       message(FATAL_ERROR
         "BUILD_SHARED_LIBS is not supported for CUDA build of Fortran runtime"
         )
     endif()

     enable_language(CUDA)

     # TODO: figure out how to make target property CUDA_SEPARABLE_COMPILATION
     # work, and avoid setting CMAKE_CUDA_SEPARABLE_COMPILATION.
     set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)

     # Treat all supported sources as CUDA files.
     set_source_files_properties(${files} PROPERTIES LANGUAGE CUDA)
     set(CUDA_COMPILE_OPTIONS)
     if ("${CMAKE_CUDA_COMPILER_ID}" MATCHES "Clang")
       # Allow varargs.
       set(CUDA_COMPILE_OPTIONS
         -Xclang -fcuda-allow-variadic-functions
         )
     endif()
     if ("${CMAKE_CUDA_COMPILER_ID}" MATCHES "NVIDIA")
       set(CUDA_COMPILE_OPTIONS
         --expt-relaxed-constexpr
         # Disable these warnings:
         #   'long double' is treated as 'double' in device code
         -Xcudafe --diag_suppress=20208
         -Xcudafe --display_error_number
         )
     endif()
     set_source_files_properties(${files} PROPERTIES COMPILE_OPTIONS
       "${CUDA_COMPILE_OPTIONS}"
       )

     if (EXISTS "${FLANG_LIBCUDACXX_PATH}/include")
       # When using libcudacxx headers files, we have to use them
       # for all files of F18 runtime.
       include_directories(AFTER ${FLANG_LIBCUDACXX_PATH}/include)
       add_compile_definitions(RT_USE_LIBCUDACXX=1)
     endif()

     # Add an OBJECT library consisting of CUDA PTX.
     llvm_add_library(${name}PTX OBJECT PARTIAL_SOURCES_INTENDED ${files})
     set_property(TARGET obj.${name}PTX PROPERTY CUDA_PTX_COMPILATION ON)
     if (FLANG_CUDA_RUNTIME_PTX_WITHOUT_GLOBAL_VARS)
       target_compile_definitions(obj.${name}PTX
         PRIVATE FLANG_RUNTIME_NO_GLOBAL_VAR_DEFS
         )
     endif()
   endif()
 endmacro()

 macro(enable_omp_offload_compilation files)
   if (NOT FLANG_EXPERIMENTAL_OMP_OFFLOAD_BUILD STREQUAL "off")
     # 'host_device' build only works with Clang compiler currently.
     # The build is done with the CMAKE_C/CXX_COMPILER, i.e. it does not use
     # the in-tree built Clang. We may have a mode that would use the in-tree
     # built Clang.
     #
     # 'nohost' is supposed to produce an LLVM Bitcode library,
     # and it has to be done with a C/C++ compiler producing LLVM Bitcode
     # compatible with the LLVM toolchain version distributed with the Flang
     # compiler.
     # In general, the in-tree built Clang should be used for 'nohost' build.
     # Note that 'nohost' build does not produce the host version of Flang
     # runtime library, so there will be two separate distributable objects.
     # 'nohost' build is a TODO.

     if (NOT FLANG_EXPERIMENTAL_OMP_OFFLOAD_BUILD STREQUAL "host_device")
       message(FATAL_ERROR "Unsupported OpenMP offload build of Flang runtime")
     endif()
     if (BUILD_SHARED_LIBS)
       message(FATAL_ERROR
         "BUILD_SHARED_LIBS is not supported for OpenMP offload build of Fortran runtime"
         )
     endif()

     if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" AND
         "${CMAKE_C_COMPILER_ID}" MATCHES "Clang")

       set(all_amdgpu_architectures
         "gfx700;gfx701;gfx801;gfx803;gfx900;gfx902;gfx906"
         "gfx908;gfx90a;gfx90c;gfx940;gfx1010;gfx1030"
         "gfx1031;gfx1032;gfx1033;gfx1034;gfx1035;gfx1036"
         "gfx1100;gfx1101;gfx1102;gfx1103;gfx1150;gfx1151"
         "gfx1152;gfx1153"
         )
       set(all_nvptx_architectures
         "sm_35;sm_37;sm_50;sm_52;sm_53;sm_60;sm_61;sm_62"
         "sm_70;sm_72;sm_75;sm_80;sm_86;sm_89;sm_90"
         )
       set(all_gpu_architectures
         "${all_amdgpu_architectures};${all_nvptx_architectures}"
         )
       # TODO: support auto detection on the build system.
       if (FLANG_OMP_DEVICE_ARCHITECTURES STREQUAL "all")
         set(FLANG_OMP_DEVICE_ARCHITECTURES ${all_gpu_architectures})
       endif()
       list(REMOVE_DUPLICATES FLANG_OMP_DEVICE_ARCHITECTURES)

       string(REPLACE ";" "," compile_for_architectures
         "${FLANG_OMP_DEVICE_ARCHITECTURES}"
         )

       set(OMP_COMPILE_OPTIONS
         -fopenmp
         -fvisibility=hidden
         -fopenmp-cuda-mode
         --offload-arch=${compile_for_architectures}
         # Force LTO for the device part.
         -foffload-lto
         )
       set_source_files_properties(${files} PROPERTIES COMPILE_OPTIONS
         "${OMP_COMPILE_OPTIONS}"
         )

       # Enable "declare target" in the source code.
       set_source_files_properties(${files}
         PROPERTIES COMPILE_DEFINITIONS OMP_OFFLOAD_BUILD
         )
     else()
       message(FATAL_ERROR
         "Flang runtime build is not supported for these compilers:\n"
         "CMAKE_CXX_COMPILER_ID: ${CMAKE_CXX_COMPILER_ID}\n"
         "CMAKE_C_COMPILER_ID: ${CMAKE_C_COMPILER_ID}")
     endif()
   endif()
 endmacro()
	option(FLANG_EXPERIMENTAL_CUDA_RUNTIME
	"Compile Fortran runtime as CUDA sources (experimental)" OFF
	)

	option(FLANG_CUDA_RUNTIME_PTX_WITHOUT_GLOBAL_VARS
	"Do not compile global variables' definitions when producing PTX library" OFF
	)

	set(FLANG_LIBCUDACXX_PATH "" CACHE PATH "Path to libcu++ package installation")

	set(FLANG_EXPERIMENTAL_OMP_OFFLOAD_BUILD "off" CACHE STRING
	"Compile Fortran runtime as OpenMP target offload sources (experimental). Valid options are 'off', 'host_device', 'nohost'")

	set(FLANG_OMP_DEVICE_ARCHITECTURES "all" CACHE STRING
	"List of OpenMP device architectures to be used to compile the Fortran runtime (e.g. 'gfx1103;sm_90')")

	macro(enable_cuda_compilation name files)
	if (FLANG_EXPERIMENTAL_CUDA_RUNTIME)
	if (BUILD_SHARED_LIBS)
	message(FATAL_ERROR
	"BUILD_SHARED_LIBS is not supported for CUDA build of Fortran runtime"
	)
	endif()

	enable_language(CUDA)

	# TODO: figure out how to make target property CUDA_SEPARABLE_COMPILATION
	# work, and avoid setting CMAKE_CUDA_SEPARABLE_COMPILATION.
	set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)

	# Treat all supported sources as CUDA files.
	set_source_files_properties(${files} PROPERTIES LANGUAGE CUDA)
	set(CUDA_COMPILE_OPTIONS)
	if ("${CMAKE_CUDA_COMPILER_ID}" MATCHES "Clang")
	# Allow varargs.
	set(CUDA_COMPILE_OPTIONS
	-Xclang -fcuda-allow-variadic-functions
	)
	endif()
	if ("${CMAKE_CUDA_COMPILER_ID}" MATCHES "NVIDIA")
	set(CUDA_COMPILE_OPTIONS
	--expt-relaxed-constexpr
	# Disable these warnings:
	# 'long double' is treated as 'double' in device code
	-Xcudafe --diag_suppress=20208
	-Xcudafe --display_error_number
	)
	endif()
	set_source_files_properties(${files} PROPERTIES COMPILE_OPTIONS
	"${CUDA_COMPILE_OPTIONS}"
	)

	if (EXISTS "${FLANG_LIBCUDACXX_PATH}/include")
	# When using libcudacxx headers files, we have to use them
	# for all files of F18 runtime.
	include_directories(AFTER ${FLANG_LIBCUDACXX_PATH}/include)
	add_compile_definitions(RT_USE_LIBCUDACXX=1)
	endif()

	# Add an OBJECT library consisting of CUDA PTX.
	llvm_add_library(${name}PTX OBJECT PARTIAL_SOURCES_INTENDED ${files})
	set_property(TARGET obj.${name}PTX PROPERTY CUDA_PTX_COMPILATION ON)
	if (FLANG_CUDA_RUNTIME_PTX_WITHOUT_GLOBAL_VARS)
	target_compile_definitions(obj.${name}PTX
	PRIVATE FLANG_RUNTIME_NO_GLOBAL_VAR_DEFS
	)
	endif()
	endif()
	endmacro()

	macro(enable_omp_offload_compilation files)
	if (NOT FLANG_EXPERIMENTAL_OMP_OFFLOAD_BUILD STREQUAL "off")
	# 'host_device' build only works with Clang compiler currently.
	# The build is done with the CMAKE_C/CXX_COMPILER, i.e. it does not use
	# the in-tree built Clang. We may have a mode that would use the in-tree
	# built Clang.
	#
	# 'nohost' is supposed to produce an LLVM Bitcode library,
	# and it has to be done with a C/C++ compiler producing LLVM Bitcode
	# compatible with the LLVM toolchain version distributed with the Flang
	# compiler.
	# In general, the in-tree built Clang should be used for 'nohost' build.
	# Note that 'nohost' build does not produce the host version of Flang
	# runtime library, so there will be two separate distributable objects.
	# 'nohost' build is a TODO.

	if (NOT FLANG_EXPERIMENTAL_OMP_OFFLOAD_BUILD STREQUAL "host_device")
	message(FATAL_ERROR "Unsupported OpenMP offload build of Flang runtime")
	endif()
	if (BUILD_SHARED_LIBS)
	message(FATAL_ERROR
	"BUILD_SHARED_LIBS is not supported for OpenMP offload build of Fortran runtime"
	)
	endif()

	if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" AND
	"${CMAKE_C_COMPILER_ID}" MATCHES "Clang")

	set(all_amdgpu_architectures
	"gfx700;gfx701;gfx801;gfx803;gfx900;gfx902;gfx906"
	"gfx908;gfx90a;gfx90c;gfx940;gfx1010;gfx1030"
	"gfx1031;gfx1032;gfx1033;gfx1034;gfx1035;gfx1036"
	"gfx1100;gfx1101;gfx1102;gfx1103;gfx1150;gfx1151"
	"gfx1152;gfx1153"
	)
	set(all_nvptx_architectures
	"sm_35;sm_37;sm_50;sm_52;sm_53;sm_60;sm_61;sm_62"
	"sm_70;sm_72;sm_75;sm_80;sm_86;sm_89;sm_90"
	)
	set(all_gpu_architectures
	"${all_amdgpu_architectures};${all_nvptx_architectures}"
	)
	# TODO: support auto detection on the build system.
	if (FLANG_OMP_DEVICE_ARCHITECTURES STREQUAL "all")
	set(FLANG_OMP_DEVICE_ARCHITECTURES ${all_gpu_architectures})
	endif()
	list(REMOVE_DUPLICATES FLANG_OMP_DEVICE_ARCHITECTURES)

	string(REPLACE ";" "," compile_for_architectures
	"${FLANG_OMP_DEVICE_ARCHITECTURES}"
	)

	set(OMP_COMPILE_OPTIONS
	-fopenmp
	-fvisibility=hidden
	-fopenmp-cuda-mode
	--offload-arch=${compile_for_architectures}
	# Force LTO for the device part.
	-foffload-lto
	)
	set_source_files_properties(${files} PROPERTIES COMPILE_OPTIONS
	"${OMP_COMPILE_OPTIONS}"
	)

	# Enable "declare target" in the source code.
	set_source_files_properties(${files}
	PROPERTIES COMPILE_DEFINITIONS OMP_OFFLOAD_BUILD
	)
	else()
	message(FATAL_ERROR
	"Flang runtime build is not supported for these compilers:\n"
	"CMAKE_CXX_COMPILER_ID: ${CMAKE_CXX_COMPILER_ID}\n"
	"CMAKE_C_COMPILER_ID: ${CMAKE_C_COMPILER_ID}")
	endif()
	endif()
	endmacro()