Handle global allocations for 64-bit targets On 64-bit targets global allocations are not promoted to registers and take memcpys instead when accessed, so the analysis in global allocation LLVM pass is extended to handle that case. A new LIT testcase is added. This CL also removed global allocation declarations from LLVM IR to prevent them from showing in the result SPIR-V code, which otherwise may crash certain Vulkan drivers. Bug: 30964317 Test: RSoVTest, RSoV lit tests Change-Id: Id8531781599130c152816d7eb551999458c075e2
diff --git a/rsov/compiler/GlobalAllocPass.cpp b/rsov/compiler/GlobalAllocPass.cpp index 06e468c..ef0b020 100644 --- a/rsov/compiler/GlobalAllocPass.cpp +++ b/rsov/compiler/GlobalAllocPass.cpp
@@ -30,6 +30,19 @@ namespace rs2spirv { namespace { +bool collectGlobalAllocs(Module &M, + SmallVectorImpl<GlobalVariable *> &GlobalAllocs) { + for (auto &GV : M.globals()) { + if (!isRSAllocation(GV)) + continue; + + DEBUG(GV.dump()); + GlobalAllocs.push_back(&GV); + } + + return !GlobalAllocs.empty(); +} + // // This pass would enumerate used global rs_allocations (TBD) and // lowers calls to accessors of the following type: @@ -74,25 +87,43 @@ DEBUG(dbgs() << "RS2SPIRVGlobalAllocPass end\n"); return true; } +}; -private: - bool collectGlobalAllocs(Module &M, - SmallVectorImpl<GlobalVariable *> &GlobalAllocs) { - for (auto &GV : M.globals()) { - if (!isRSAllocation(GV)) - continue; +// A simple pass to remove all global allocations forcibly +class RemoveAllGlobalAllocPass : public ModulePass { +public: + static char ID; + RemoveAllGlobalAllocPass() : ModulePass(ID) {} + const char *getPassName() const override { + return "RemoveAllGlobalAllocPass"; + } - DEBUG(GV.dump()); - GlobalAllocs.push_back(&GV); + bool runOnModule(Module &M) override { + DEBUG(dbgs() << "RemoveAllGlobalAllocPass\n"); + DEBUG(M.dump()); + + SmallVector<GlobalVariable *, 8> GlobalAllocs; + const bool CollectRes = collectGlobalAllocs(M, GlobalAllocs); + if (!CollectRes) + return false; // Module not modified. + // Remove global allocations + for (auto *G : GlobalAllocs) { + G->eraseFromParent(); } - - return !GlobalAllocs.empty(); + DEBUG(dbgs() << "RemoveAllGlobalAllocPass end\n"); + DEBUG(M.dump()); + // Return true, as the pass modifies module. + return true; } }; + } // namespace - char GlobalAllocPass::ID = 0; +char RemoveAllGlobalAllocPass::ID = 0; +ModulePass *createRemoveAllGlobalAllocPass() { + return new RemoveAllGlobalAllocPass(); +} ModulePass *createGlobalAllocPass() { return new GlobalAllocPass(); } } // namespace rs2spirv
diff --git a/rsov/compiler/GlobalAllocPass.h b/rsov/compiler/GlobalAllocPass.h index dc39f13..9312c21 100644 --- a/rsov/compiler/GlobalAllocPass.h +++ b/rsov/compiler/GlobalAllocPass.h
@@ -24,6 +24,7 @@ namespace rs2spirv { llvm::ModulePass *createGlobalAllocPass(); +llvm::ModulePass *createRemoveAllGlobalAllocPass(); } // namespace rs2spirv
diff --git a/rsov/compiler/RSAllocationUtils.cpp b/rsov/compiler/RSAllocationUtils.cpp index 84fcfe8..2c6b891 100644 --- a/rsov/compiler/RSAllocationUtils.cpp +++ b/rsov/compiler/RSAllocationUtils.cpp
@@ -106,9 +106,23 @@ if (auto *F = FCall->getCalledFunction()) { const auto FName = F->getName(); DEBUG(dbgs() << "Discovered function call to : " << FName << '\n'); + // Treat memcpy as moves for the purpose of this analysis + if (FName.startswith("llvm.memcpy")) { + assert(FCall->getNumArgOperands() > 0); + Value *CopyDest = FCall->getArgOperand(0); + // We are interested in the users of the dest operand of + // memcpy here + Value *LocalCopy = CopyDest->stripPointerCasts(); + User *NewU = dyn_cast<User>(LocalCopy); + assert(NewU); + WorkList.push_back(NewU); + continue; + } char *demangled = __cxxabiv1::__cxa_demangle( FName.str().c_str(), nullptr, nullptr, nullptr); + if (!demangled) + continue; const StringRef DemangledNameRef(demangled); DEBUG(dbgs() << "Demangled name: " << DemangledNameRef << '\n');
diff --git a/rsov/compiler/RSSPIRVWriter.cpp b/rsov/compiler/RSSPIRVWriter.cpp index 4ad6c0f..200c388 100644 --- a/rsov/compiler/RSSPIRVWriter.cpp +++ b/rsov/compiler/RSSPIRVWriter.cpp
@@ -82,10 +82,11 @@ PassMgr.add(createGlobalMergePass()); // Transform global allocations and accessors (rs[GS]etElementAt) PassMgr.add(createGlobalAllocPass()); + // Removed dead MemCpys in 64-bit targets after global alloc pass + PassMgr.add(createDeadStoreEliminationPass()); PassMgr.add(createAggressiveDCEPass()); - // Delete unreachable globals. - PassMgr.add(createGlobalDCEPass()); - // Remove global allocations + // Delete unreachable globals (after removing global allocations) + PassMgr.add(createRemoveAllGlobalAllocPass()); PassMgr.add(createPromoteMemoryToRegisterPass()); PassMgr.add(createTransOCLMD()); // TODO: investigate removal of OCLTypeToSPIRV pass.
diff --git a/rsov/compiler/tests/rs_allocation/getdimx_64.ll b/rsov/compiler/tests/rs_allocation/getdimx_64.ll new file mode 100644 index 0000000..f5976fb --- /dev/null +++ b/rsov/compiler/tests/rs_allocation/getdimx_64.ll
@@ -0,0 +1,75 @@ +; RUN: rs2spirv_lit_driver.sh %s | FileCheck %s +; Source: +; rs_allocation g; +; int32_t RS_KERNEL getDim(int32_t dummy) { +; return rsAllocationGetDimX(g); +; } +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-none-linux-gnueabi" + +; CHECK: OpMemberDecorate [[MetadataS:%[a-zA-Z_0-9]*]] 0 Offset 4 +; CHECK: OpMemberDecorate [[MetadataS]] 1 Offset 8 +; CHECK: OpMemberDecorate [[MetadataS]] 2 Offset 12 +; CHECK: OpMemberDecorate [[MetadataS]] 3 Offset 16 +; CHECK: OpDecorate [[RuntimeArrS:%[a-zA-Z_0-9]*]] ArrayStride {{[0-9]*}} +; CHECK: OpDecorate [[MetadataSSBO:%[a-zA-Z_0-9]*]] BufferBlock +; CHECK: OpDecorate [[Metadata:%[a-zA-Z_0-9]*]] DescriptorSet 0 +; CHECK: OpDecorate [[Metadata]] Binding 0 + +%struct.rs_allocation.1 = type { i64*, i64*, i64*, i64* } + +@g = common global %struct.rs_allocation.1 zeroinitializer, align 8 +; CHECK-NOT: %g = OpVariable %{{.*}} Uniform +; CHECK-NOT: OpCopyMemorySized +; CHECK-NOT: OpFunctionCall %uint %__rsov_rsAllocationGetDimX +; CHECK: [[DimX:%[a-zA-Z_0-9]*]] = OpAccessChain %_ptr_Uniform_uint [[Metadata]] +; CHECK: [[Res:%[a-zA-Z_0-9]*]] = OpLoad %uint [[DimX]] +; CHECK: OpReturnValue [[Res]] + +; Function Attrs: nounwind +define i32 @getDim(i32 %dummy) unnamed_addr #0 { +entry: + %tmp = alloca %struct.rs_allocation.1, align 8 + %0 = bitcast %struct.rs_allocation.1* %tmp to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.rs_allocation.1* @g to i8*), i64 32, i32 8, i1 false), !tbaa.struct !8 + %call = call i32 @_Z19rsAllocationGetDimX13rs_allocation(%struct.rs_allocation.1* %tmp) #0 + ret i32 %call +} + +declare i32 @_Z19rsAllocationGetDimX13rs_allocation(%struct.rs_allocation.1*) unnamed_addr + +; Function Attrs: argmemonly nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #1 + +; Function Attrs: nounwind +define void @.rs.dtor() unnamed_addr #0 { +entry: + tail call void @_Z13rsClearObjectP13rs_allocation(%struct.rs_allocation.1* @g) #0 + ret void +} + +declare void @_Z13rsClearObjectP13rs_allocation(%struct.rs_allocation.1*) unnamed_addr + +attributes #0 = { nounwind } +attributes #1 = { argmemonly nounwind } + +!llvm.ident = !{!0} +!\23pragma = !{!1, !2} +!\23rs_export_var = !{!3} +!\23rs_object_slots = !{!4} +!\23rs_export_foreach_name = !{!5, !6} +!\23rs_export_foreach = !{!4, !7} + +!0 = !{!"Android clang version 3.8.275480 (based on LLVM 3.8.275480)"} +!1 = !{!"version", !"1"} +!2 = !{!"java_package_name", !"com.android.rs.rsov.test"} +!3 = !{!"g", !"20"} +!4 = !{!"0"} +!5 = !{!"root"} +!6 = !{!"getDim"} +!7 = !{!"35"} +!8 = !{i64 0, i64 8, !9, i64 8, i64 8, !9, i64 16, i64 8, !9, i64 24, i64 8, !9} +!9 = !{!10, !10, i64 0} +!10 = !{!"any pointer", !11, i64 0} +!11 = !{!"omnipotent char", !12, i64 0} +!12 = !{!"Simple C/C++ TBAA"}
diff --git a/rsov/compiler/tests/rs_allocation/rewrite_getdim.ll b/rsov/compiler/tests/rs_allocation/rewrite_getdim.ll index 7133e40..9d252f2 100644 --- a/rsov/compiler/tests/rs_allocation/rewrite_getdim.ll +++ b/rsov/compiler/tests/rs_allocation/rewrite_getdim.ll
@@ -21,6 +21,7 @@ @g = common global %struct.rs_allocation zeroinitializer, align 4 +; CHECK-NOT: %g = OpVariable %{{.*}} Uniform ; CHECK-NOT: OpFunctionCall %uint %__rsov_rsAllocationGetDimX ; CHECK: [[DimX:%[a-zA-Z_0-9]*]] = OpAccessChain %_ptr_Uniform_uint [[Metadata]] ; CHECK: [[Res:%[a-zA-Z_0-9]*]] = OpLoad %uint [[DimX]]