Handle global allocations for 64-bit targets
On 64-bit targets global allocations are not promoted to registers and take
memcpys instead when accessed, so the analysis in global allocation LLVM
pass is extended to handle that case. A new LIT testcase is added.
This CL also removed global allocation declarations from LLVM IR to
prevent them from showing in the result SPIR-V code, which otherwise
may crash certain Vulkan drivers.
Bug: 30964317
Test: RSoVTest, RSoV lit tests
Change-Id: Id8531781599130c152816d7eb551999458c075e2
diff --git a/rsov/compiler/GlobalAllocPass.cpp b/rsov/compiler/GlobalAllocPass.cpp
index 06e468c..ef0b020 100644
--- a/rsov/compiler/GlobalAllocPass.cpp
+++ b/rsov/compiler/GlobalAllocPass.cpp
@@ -30,6 +30,19 @@
namespace rs2spirv {
namespace {
+bool collectGlobalAllocs(Module &M,
+ SmallVectorImpl<GlobalVariable *> &GlobalAllocs) {
+ for (auto &GV : M.globals()) {
+ if (!isRSAllocation(GV))
+ continue;
+
+ DEBUG(GV.dump());
+ GlobalAllocs.push_back(&GV);
+ }
+
+ return !GlobalAllocs.empty();
+}
+
//
// This pass would enumerate used global rs_allocations (TBD) and
// lowers calls to accessors of the following type:
@@ -74,25 +87,43 @@
DEBUG(dbgs() << "RS2SPIRVGlobalAllocPass end\n");
return true;
}
+};
-private:
- bool collectGlobalAllocs(Module &M,
- SmallVectorImpl<GlobalVariable *> &GlobalAllocs) {
- for (auto &GV : M.globals()) {
- if (!isRSAllocation(GV))
- continue;
+// A simple pass to remove all global allocations forcibly
+class RemoveAllGlobalAllocPass : public ModulePass {
+public:
+ static char ID;
+ RemoveAllGlobalAllocPass() : ModulePass(ID) {}
+ const char *getPassName() const override {
+ return "RemoveAllGlobalAllocPass";
+ }
- DEBUG(GV.dump());
- GlobalAllocs.push_back(&GV);
+ bool runOnModule(Module &M) override {
+ DEBUG(dbgs() << "RemoveAllGlobalAllocPass\n");
+ DEBUG(M.dump());
+
+ SmallVector<GlobalVariable *, 8> GlobalAllocs;
+ const bool CollectRes = collectGlobalAllocs(M, GlobalAllocs);
+ if (!CollectRes)
+ return false; // Module not modified.
+ // Remove global allocations
+ for (auto *G : GlobalAllocs) {
+ G->eraseFromParent();
}
-
- return !GlobalAllocs.empty();
+ DEBUG(dbgs() << "RemoveAllGlobalAllocPass end\n");
+ DEBUG(M.dump());
+ // Return true, as the pass modifies module.
+ return true;
}
};
+
} // namespace
-
char GlobalAllocPass::ID = 0;
+char RemoveAllGlobalAllocPass::ID = 0;
+ModulePass *createRemoveAllGlobalAllocPass() {
+ return new RemoveAllGlobalAllocPass();
+}
ModulePass *createGlobalAllocPass() { return new GlobalAllocPass(); }
} // namespace rs2spirv
diff --git a/rsov/compiler/GlobalAllocPass.h b/rsov/compiler/GlobalAllocPass.h
index dc39f13..9312c21 100644
--- a/rsov/compiler/GlobalAllocPass.h
+++ b/rsov/compiler/GlobalAllocPass.h
@@ -24,6 +24,7 @@
namespace rs2spirv {
llvm::ModulePass *createGlobalAllocPass();
+llvm::ModulePass *createRemoveAllGlobalAllocPass();
} // namespace rs2spirv
diff --git a/rsov/compiler/RSAllocationUtils.cpp b/rsov/compiler/RSAllocationUtils.cpp
index 84fcfe8..2c6b891 100644
--- a/rsov/compiler/RSAllocationUtils.cpp
+++ b/rsov/compiler/RSAllocationUtils.cpp
@@ -106,9 +106,23 @@
if (auto *F = FCall->getCalledFunction()) {
const auto FName = F->getName();
DEBUG(dbgs() << "Discovered function call to : " << FName << '\n');
+ // Treat memcpy as moves for the purpose of this analysis
+ if (FName.startswith("llvm.memcpy")) {
+ assert(FCall->getNumArgOperands() > 0);
+ Value *CopyDest = FCall->getArgOperand(0);
+ // We are interested in the users of the dest operand of
+ // memcpy here
+ Value *LocalCopy = CopyDest->stripPointerCasts();
+ User *NewU = dyn_cast<User>(LocalCopy);
+ assert(NewU);
+ WorkList.push_back(NewU);
+ continue;
+ }
char *demangled = __cxxabiv1::__cxa_demangle(
FName.str().c_str(), nullptr, nullptr, nullptr);
+ if (!demangled)
+ continue;
const StringRef DemangledNameRef(demangled);
DEBUG(dbgs() << "Demangled name: " << DemangledNameRef << '\n');
diff --git a/rsov/compiler/RSSPIRVWriter.cpp b/rsov/compiler/RSSPIRVWriter.cpp
index 4ad6c0f..200c388 100644
--- a/rsov/compiler/RSSPIRVWriter.cpp
+++ b/rsov/compiler/RSSPIRVWriter.cpp
@@ -82,10 +82,11 @@
PassMgr.add(createGlobalMergePass());
// Transform global allocations and accessors (rs[GS]etElementAt)
PassMgr.add(createGlobalAllocPass());
+ // Removed dead MemCpys in 64-bit targets after global alloc pass
+ PassMgr.add(createDeadStoreEliminationPass());
PassMgr.add(createAggressiveDCEPass());
- // Delete unreachable globals.
- PassMgr.add(createGlobalDCEPass());
- // Remove global allocations
+ // Delete unreachable globals (after removing global allocations)
+ PassMgr.add(createRemoveAllGlobalAllocPass());
PassMgr.add(createPromoteMemoryToRegisterPass());
PassMgr.add(createTransOCLMD());
// TODO: investigate removal of OCLTypeToSPIRV pass.
diff --git a/rsov/compiler/tests/rs_allocation/getdimx_64.ll b/rsov/compiler/tests/rs_allocation/getdimx_64.ll
new file mode 100644
index 0000000..f5976fb
--- /dev/null
+++ b/rsov/compiler/tests/rs_allocation/getdimx_64.ll
@@ -0,0 +1,75 @@
+; RUN: rs2spirv_lit_driver.sh %s | FileCheck %s
+; Source:
+; rs_allocation g;
+; int32_t RS_KERNEL getDim(int32_t dummy) {
+; return rsAllocationGetDimX(g);
+; }
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-none-linux-gnueabi"
+
+; CHECK: OpMemberDecorate [[MetadataS:%[a-zA-Z_0-9]*]] 0 Offset 4
+; CHECK: OpMemberDecorate [[MetadataS]] 1 Offset 8
+; CHECK: OpMemberDecorate [[MetadataS]] 2 Offset 12
+; CHECK: OpMemberDecorate [[MetadataS]] 3 Offset 16
+; CHECK: OpDecorate [[RuntimeArrS:%[a-zA-Z_0-9]*]] ArrayStride {{[0-9]*}}
+; CHECK: OpDecorate [[MetadataSSBO:%[a-zA-Z_0-9]*]] BufferBlock
+; CHECK: OpDecorate [[Metadata:%[a-zA-Z_0-9]*]] DescriptorSet 0
+; CHECK: OpDecorate [[Metadata]] Binding 0
+
+%struct.rs_allocation.1 = type { i64*, i64*, i64*, i64* }
+
+@g = common global %struct.rs_allocation.1 zeroinitializer, align 8
+; CHECK-NOT: %g = OpVariable %{{.*}} Uniform
+; CHECK-NOT: OpCopyMemorySized
+; CHECK-NOT: OpFunctionCall %uint %__rsov_rsAllocationGetDimX
+; CHECK: [[DimX:%[a-zA-Z_0-9]*]] = OpAccessChain %_ptr_Uniform_uint [[Metadata]]
+; CHECK: [[Res:%[a-zA-Z_0-9]*]] = OpLoad %uint [[DimX]]
+; CHECK: OpReturnValue [[Res]]
+
+; Function Attrs: nounwind
+define i32 @getDim(i32 %dummy) unnamed_addr #0 {
+entry:
+ %tmp = alloca %struct.rs_allocation.1, align 8
+ %0 = bitcast %struct.rs_allocation.1* %tmp to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.rs_allocation.1* @g to i8*), i64 32, i32 8, i1 false), !tbaa.struct !8
+ %call = call i32 @_Z19rsAllocationGetDimX13rs_allocation(%struct.rs_allocation.1* %tmp) #0
+ ret i32 %call
+}
+
+declare i32 @_Z19rsAllocationGetDimX13rs_allocation(%struct.rs_allocation.1*) unnamed_addr
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #1
+
+; Function Attrs: nounwind
+define void @.rs.dtor() unnamed_addr #0 {
+entry:
+ tail call void @_Z13rsClearObjectP13rs_allocation(%struct.rs_allocation.1* @g) #0
+ ret void
+}
+
+declare void @_Z13rsClearObjectP13rs_allocation(%struct.rs_allocation.1*) unnamed_addr
+
+attributes #0 = { nounwind }
+attributes #1 = { argmemonly nounwind }
+
+!llvm.ident = !{!0}
+!\23pragma = !{!1, !2}
+!\23rs_export_var = !{!3}
+!\23rs_object_slots = !{!4}
+!\23rs_export_foreach_name = !{!5, !6}
+!\23rs_export_foreach = !{!4, !7}
+
+!0 = !{!"Android clang version 3.8.275480 (based on LLVM 3.8.275480)"}
+!1 = !{!"version", !"1"}
+!2 = !{!"java_package_name", !"com.android.rs.rsov.test"}
+!3 = !{!"g", !"20"}
+!4 = !{!"0"}
+!5 = !{!"root"}
+!6 = !{!"getDim"}
+!7 = !{!"35"}
+!8 = !{i64 0, i64 8, !9, i64 8, i64 8, !9, i64 16, i64 8, !9, i64 24, i64 8, !9}
+!9 = !{!10, !10, i64 0}
+!10 = !{!"any pointer", !11, i64 0}
+!11 = !{!"omnipotent char", !12, i64 0}
+!12 = !{!"Simple C/C++ TBAA"}
diff --git a/rsov/compiler/tests/rs_allocation/rewrite_getdim.ll b/rsov/compiler/tests/rs_allocation/rewrite_getdim.ll
index 7133e40..9d252f2 100644
--- a/rsov/compiler/tests/rs_allocation/rewrite_getdim.ll
+++ b/rsov/compiler/tests/rs_allocation/rewrite_getdim.ll
@@ -21,6 +21,7 @@
@g = common global %struct.rs_allocation zeroinitializer, align 4
+; CHECK-NOT: %g = OpVariable %{{.*}} Uniform
; CHECK-NOT: OpFunctionCall %uint %__rsov_rsAllocationGetDimX
; CHECK: [[DimX:%[a-zA-Z_0-9]*]] = OpAccessChain %_ptr_Uniform_uint [[Metadata]]
; CHECK: [[Res:%[a-zA-Z_0-9]*]] = OpLoad %uint [[DimX]]