blob: e02c9c3fd0747cb3e800efeb4dbeaf2d9050a8ac [file] [log] [blame]
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* SPDX-License-Identifier: MIT
*/
#include <llvm-c/Core.h>
#include <llvm/Analysis/TargetLibraryInfo.h>
#include <llvm/IR/IRBuilder.h>
#include <llvm/IR/LegacyPassManager.h>
#include <llvm/IR/Module.h>
#include <llvm/IR/Verifier.h>
#include <llvm/Target/TargetMachine.h>
#include <llvm/MC/MCSubtargetInfo.h>
#include <llvm/Support/CommandLine.h>
#include <llvm/Transforms/IPO.h>
#include <llvm/Transforms/Scalar.h>
#include <llvm/Transforms/Utils.h>
#include <llvm/CodeGen/Passes.h>
#include <llvm/Passes/PassBuilder.h>
#include <llvm/Transforms/InstCombine/InstCombine.h>
#include <llvm/Transforms/IPO/AlwaysInliner.h>
#include <llvm/Transforms/IPO/SCCP.h>
#include <llvm/Transforms/Scalar/EarlyCSE.h>
#include <llvm/Transforms/Scalar/LICM.h>
#include <llvm/Transforms/Scalar/SROA.h>
#include <llvm/Transforms/Scalar/SimplifyCFG.h>
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include <cstring>
/* DO NOT REORDER THE HEADERS
* The LLVM headers need to all be included before any Mesa header,
* as they use the `restrict` keyword in ways that are incompatible
* with our #define in include/c99_compat.h
*/
#include "ac_binary.h"
#include "ac_llvm_util.h"
#include "ac_llvm_build.h"
#include "util/macros.h"
using namespace llvm;
class RunAtExitForStaticDestructors : public SDNode
{
public:
/* getSDVTList (protected) calls getValueTypeList (private), which contains static variables. */
RunAtExitForStaticDestructors(): SDNode(0, 0, DebugLoc(), getSDVTList(MVT::Other))
{
}
};
void ac_llvm_run_atexit_for_destructors(void)
{
/* LLVM >= 16 registers static variable destructors on the first compile, which gcc
* implements by calling atexit there. Before that, u_queue registers its atexit
* handler to kill all threads. Since exit() runs atexit handlers in the reverse order,
* the LLVM destructors are called first while shader compiler threads may still be
* running, which crashes in LLVM in SelectionDAG.cpp.
*
* The solution is to run the code that declares the LLVM static variables first,
* so that atexit for LLVM is registered first and u_queue is registered after that,
* which ensures that all u_queue threads are terminated before LLVM destructors are
* called.
*
* This just executes the code that declares static variables.
*/
RunAtExitForStaticDestructors();
}
bool ac_is_llvm_processor_supported(LLVMTargetMachineRef tm, const char *processor)
{
TargetMachine *TM = reinterpret_cast<TargetMachine *>(tm);
return TM->getMCSubtargetInfo()->isCPUStringValid(processor);
}
void ac_reset_llvm_all_options_occurrences()
{
cl::ResetAllOptionOccurrences();
}
void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
{
Argument *A = unwrap<Argument>(val);
A->addAttr(Attribute::getWithDereferenceableBytes(A->getContext(), bytes));
}
void ac_add_attr_alignment(LLVMValueRef val, uint64_t bytes)
{
Argument *A = unwrap<Argument>(val);
A->addAttr(Attribute::getWithAlignment(A->getContext(), Align(bytes)));
}
LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx)
{
TargetMachine *TM = reinterpret_cast<TargetMachine *>(tm);
LLVMModuleRef module = LLVMModuleCreateWithNameInContext("mesa-shader", ctx);
unwrap(module)->setTargetTriple(TM->getTargetTriple().getTriple());
unwrap(module)->setDataLayout(TM->createDataLayout());
return module;
}
LLVMBuilderRef ac_create_builder(LLVMContextRef ctx, enum ac_float_mode float_mode)
{
LLVMBuilderRef builder = LLVMCreateBuilderInContext(ctx);
FastMathFlags flags;
switch (float_mode) {
case AC_FLOAT_MODE_DEFAULT:
case AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO:
break;
case AC_FLOAT_MODE_DEFAULT_OPENGL:
/* Allow optimizations to treat the sign of a zero argument or
* result as insignificant.
*/
flags.setNoSignedZeros(); /* nsz */
/* Allow optimizations to use the reciprocal of an argument
* rather than perform division.
*/
flags.setAllowReciprocal(); /* arcp */
unwrap(builder)->setFastMathFlags(flags);
break;
}
return builder;
}
void ac_enable_signed_zeros(struct ac_llvm_context *ctx)
{
if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
auto *b = unwrap(ctx->builder);
FastMathFlags flags = b->getFastMathFlags();
/* This disables the optimization of (x + 0), which is used
* to convert negative zero to positive zero.
*/
flags.setNoSignedZeros(false);
b->setFastMathFlags(flags);
}
}
void ac_disable_signed_zeros(struct ac_llvm_context *ctx)
{
if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
auto *b = unwrap(ctx->builder);
FastMathFlags flags = b->getFastMathFlags();
flags.setNoSignedZeros();
b->setFastMathFlags(flags);
}
}
/* Implementation of raw_pwrite_stream that works on malloc()ed memory for
* better compatibility with C code. */
struct raw_memory_ostream : public raw_pwrite_stream {
char *buffer;
size_t written;
size_t bufsize;
raw_memory_ostream()
{
buffer = NULL;
written = 0;
bufsize = 0;
SetUnbuffered();
}
~raw_memory_ostream()
{
free(buffer);
}
void take(char *&out_buffer, size_t &out_size)
{
out_buffer = buffer;
out_size = written;
buffer = NULL;
written = 0;
bufsize = 0;
}
void flush() = delete;
void write_impl(const char *ptr, size_t size) override
{
if (unlikely(written + size < written))
abort();
if (written + size > bufsize) {
bufsize = MAX3(1024, written + size, bufsize / 3 * 4);
buffer = (char *)realloc(buffer, bufsize);
if (!buffer) {
fprintf(stderr, "amd: out of memory allocating ELF buffer\n");
abort();
}
}
memcpy(buffer + written, ptr, size);
written += size;
}
void pwrite_impl(const char *ptr, size_t size, uint64_t offset) override
{
assert(offset == (size_t)offset && offset + size >= offset && offset + size <= written);
memcpy(buffer + offset, ptr, size);
}
uint64_t current_pos() const override
{
return written;
}
};
/* The middle-end optimization passes are run using
* the LLVM's new pass manager infrastructure.
*/
struct ac_midend_optimizer
{
TargetMachine *target_machine;
PassBuilder pass_builder;
TargetLibraryInfoImpl target_library_info;
/* Should be declared in this order only,
* so that they are destroyed in the correct order
* due to inter-analysis-manager references.
*/
LoopAnalysisManager loop_am;
FunctionAnalysisManager function_am;
CGSCCAnalysisManager cgscc_am;
ModuleAnalysisManager module_am;
/* Pass Managers */
LoopPassManager loop_pm;
FunctionPassManager function_pm;
ModulePassManager module_pm;
ac_midend_optimizer(TargetMachine *arg_target_machine, bool arg_check_ir)
: target_machine(arg_target_machine),
pass_builder(target_machine, PipelineTuningOptions(), {}),
target_library_info(Triple(target_machine->getTargetTriple()))
{
/* Build the pipeline and optimize.
* Any custom analyses should be registered
* before LLVM's default analysis sets.
*/
function_am.registerPass(
[&] { return TargetLibraryAnalysis(target_library_info); }
);
pass_builder.registerModuleAnalyses(module_am);
pass_builder.registerCGSCCAnalyses(cgscc_am);
pass_builder.registerFunctionAnalyses(function_am);
pass_builder.registerLoopAnalyses(loop_am);
pass_builder.crossRegisterProxies(loop_am, function_am, cgscc_am, module_am);
if (arg_check_ir)
module_pm.addPass(VerifierPass());
/* Adding inliner pass to the module pass manager directly
* ensures that the pass is run on all functions first, which makes sure
* that the following passes are only run on the remaining non-inline
* function, so it removes useless work done on dead inline functions.
*/
module_pm.addPass(AlwaysInlinerPass());
/* The following set of passes run on an individual function/loop first
* before proceeding to the next.
*/
#if LLVM_VERSION_MAJOR >= 16
function_pm.addPass(SROAPass(SROAOptions::ModifyCFG));
#else
// Old version of the code
function_pm.addPass(SROAPass());
#endif
loop_pm.addPass(LICMPass(LICMOptions()));
function_pm.addPass(createFunctionToLoopPassAdaptor(std::move(loop_pm), true));
function_pm.addPass(SimplifyCFGPass());
function_pm.addPass(EarlyCSEPass(true));
module_pm.addPass(createModuleToFunctionPassAdaptor(std::move(function_pm)));
}
void run(Module &module)
{
module_pm.run(module, module_am);
/* After a run(), the results in the analyses managers
* aren't useful to optimize a subsequent LLVM module.
* If used, it can lead to unexpected crashes.
* Hence, the results in the analyses managers
* need to be invalidated and cleared before
* running optimizations on a new LLVM module.
*/
module_am.invalidate(module, PreservedAnalyses::none());
module_am.clear();
cgscc_am.clear();
function_am.clear();
loop_am.clear();
}
};
/* The backend passes for optimizations, instruction selection,
* and code generation in the LLVM compiler still requires the
* legacy::PassManager. The use of the legacy PM will be
* deprecated when the new PM can handle backend passes.
*/
struct ac_backend_optimizer
{
raw_memory_ostream ostream; /* ELF shader binary stream */
legacy::PassManager backend_pass_manager; /* for codegen only */
ac_backend_optimizer(TargetMachine *arg_target_machine)
{
/* add backend passes */
if (arg_target_machine->addPassesToEmitFile(backend_pass_manager, ostream, nullptr,
#if LLVM_VERSION_MAJOR >= 18
CodeGenFileType::ObjectFile)) {
#else
CGFT_ObjectFile)) {
#endif
fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n");
}
}
void run(Module &module, char *&out_buffer, size_t &out_size)
{
backend_pass_manager.run(module);
ostream.take(out_buffer, out_size);
}
};
ac_midend_optimizer *ac_create_midend_optimizer(LLVMTargetMachineRef tm,
bool check_ir)
{
TargetMachine *TM = reinterpret_cast<TargetMachine *>(tm);
return new ac_midend_optimizer(TM, check_ir);
}
void ac_destroy_midend_optimiser(ac_midend_optimizer *meo)
{
delete meo;
}
bool ac_llvm_optimize_module(ac_midend_optimizer *meo, LLVMModuleRef module)
{
if (!meo)
return false;
/* Runs all the middle-end optimizations, no code generation */
meo->run(*unwrap(module));
return true;
}
ac_backend_optimizer *ac_create_backend_optimizer(LLVMTargetMachineRef tm)
{
TargetMachine *TM = reinterpret_cast<TargetMachine *>(tm);
return new ac_backend_optimizer(TM);
}
void ac_destroy_backend_optimizer(ac_backend_optimizer *beo)
{
delete beo;
}
bool ac_compile_module_to_elf(ac_backend_optimizer *beo, LLVMModuleRef module,
char **pelf_buffer, size_t *pelf_size)
{
if (!beo)
return false;
/* Runs all backend optimizations and code generation */
beo->run(*unwrap(module), *pelf_buffer, *pelf_size);
return true;
}
LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
LLVMValueRef ptr, LLVMValueRef val, const char *sync_scope)
{
AtomicRMWInst::BinOp binop;
switch (op) {
case LLVMAtomicRMWBinOpXchg:
binop = AtomicRMWInst::Xchg;
break;
case LLVMAtomicRMWBinOpAdd:
binop = AtomicRMWInst::Add;
break;
case LLVMAtomicRMWBinOpSub:
binop = AtomicRMWInst::Sub;
break;
case LLVMAtomicRMWBinOpAnd:
binop = AtomicRMWInst::And;
break;
case LLVMAtomicRMWBinOpNand:
binop = AtomicRMWInst::Nand;
break;
case LLVMAtomicRMWBinOpOr:
binop = AtomicRMWInst::Or;
break;
case LLVMAtomicRMWBinOpXor:
binop = AtomicRMWInst::Xor;
break;
case LLVMAtomicRMWBinOpMax:
binop = AtomicRMWInst::Max;
break;
case LLVMAtomicRMWBinOpMin:
binop = AtomicRMWInst::Min;
break;
case LLVMAtomicRMWBinOpUMax:
binop = AtomicRMWInst::UMax;
break;
case LLVMAtomicRMWBinOpUMin:
binop = AtomicRMWInst::UMin;
break;
case LLVMAtomicRMWBinOpFAdd:
binop = AtomicRMWInst::FAdd;
break;
default:
unreachable("invalid LLVMAtomicRMWBinOp");
break;
}
unsigned SSID = unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
return wrap(unwrap(ctx->builder)
->CreateAtomicRMW(binop, unwrap(ptr), unwrap(val),
MaybeAlign(0),
AtomicOrdering::SequentiallyConsistent, SSID));
}
LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr,
LLVMValueRef cmp, LLVMValueRef val, const char *sync_scope)
{
unsigned SSID = unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
return wrap(unwrap(ctx->builder)
->CreateAtomicCmpXchg(unwrap(ptr), unwrap(cmp),
unwrap(val),
MaybeAlign(0),
AtomicOrdering::SequentiallyConsistent,
AtomicOrdering::SequentiallyConsistent, SSID));
}