Handle global allocations for 64-bit targets

On 64-bit targets global allocations are not promoted to registers and take
memcpys instead when accessed, so the analysis in global allocation LLVM
pass is extended to handle that case. A new LIT testcase is added.

This CL also removed global allocation declarations from LLVM IR to
prevent them from showing in the result SPIR-V code, which otherwise
may crash certain Vulkan drivers.

Bug: 30964317
Test: RSoVTest, RSoV lit tests
Change-Id: Id8531781599130c152816d7eb551999458c075e2
diff --git a/rsov/compiler/GlobalAllocPass.cpp b/rsov/compiler/GlobalAllocPass.cpp
index 06e468c..ef0b020 100644
--- a/rsov/compiler/GlobalAllocPass.cpp
+++ b/rsov/compiler/GlobalAllocPass.cpp
@@ -30,6 +30,19 @@
 namespace rs2spirv {
 
 namespace {
+bool collectGlobalAllocs(Module &M,
+                         SmallVectorImpl<GlobalVariable *> &GlobalAllocs) {
+  for (auto &GV : M.globals()) {
+    if (!isRSAllocation(GV))
+      continue;
+
+    DEBUG(GV.dump());
+    GlobalAllocs.push_back(&GV);
+  }
+
+  return !GlobalAllocs.empty();
+}
+
 //
 // This pass would enumerate used global rs_allocations (TBD) and
 // lowers calls to accessors of the following type:
@@ -74,25 +87,43 @@
     DEBUG(dbgs() << "RS2SPIRVGlobalAllocPass end\n");
     return true;
   }
+};
 
-private:
-  bool collectGlobalAllocs(Module &M,
-                           SmallVectorImpl<GlobalVariable *> &GlobalAllocs) {
-    for (auto &GV : M.globals()) {
-      if (!isRSAllocation(GV))
-        continue;
+// A simple pass to remove all global allocations forcibly
+class RemoveAllGlobalAllocPass : public ModulePass {
+public:
+  static char ID;
+  RemoveAllGlobalAllocPass() : ModulePass(ID) {}
+  const char *getPassName() const override {
+    return "RemoveAllGlobalAllocPass";
+  }
 
-      DEBUG(GV.dump());
-      GlobalAllocs.push_back(&GV);
+  bool runOnModule(Module &M) override {
+    DEBUG(dbgs() << "RemoveAllGlobalAllocPass\n");
+    DEBUG(M.dump());
+
+    SmallVector<GlobalVariable *, 8> GlobalAllocs;
+    const bool CollectRes = collectGlobalAllocs(M, GlobalAllocs);
+    if (!CollectRes)
+      return false; // Module not modified.
+    // Remove global allocations
+    for (auto *G : GlobalAllocs) {
+      G->eraseFromParent();
     }
-
-    return !GlobalAllocs.empty();
+    DEBUG(dbgs() << "RemoveAllGlobalAllocPass end\n");
+    DEBUG(M.dump());
+    // Return true, as the pass modifies module.
+    return true;
   }
 };
+
 } // namespace
-
 char GlobalAllocPass::ID = 0;
+char RemoveAllGlobalAllocPass::ID = 0;
 
+ModulePass *createRemoveAllGlobalAllocPass() {
+  return new RemoveAllGlobalAllocPass();
+}
 ModulePass *createGlobalAllocPass() { return new GlobalAllocPass(); }
 
 } // namespace rs2spirv
diff --git a/rsov/compiler/GlobalAllocPass.h b/rsov/compiler/GlobalAllocPass.h
index dc39f13..9312c21 100644
--- a/rsov/compiler/GlobalAllocPass.h
+++ b/rsov/compiler/GlobalAllocPass.h
@@ -24,6 +24,7 @@
 namespace rs2spirv {
 
 llvm::ModulePass *createGlobalAllocPass();
+llvm::ModulePass *createRemoveAllGlobalAllocPass();
 
 } // namespace rs2spirv
 
diff --git a/rsov/compiler/RSAllocationUtils.cpp b/rsov/compiler/RSAllocationUtils.cpp
index 84fcfe8..2c6b891 100644
--- a/rsov/compiler/RSAllocationUtils.cpp
+++ b/rsov/compiler/RSAllocationUtils.cpp
@@ -106,9 +106,23 @@
         if (auto *F = FCall->getCalledFunction()) {
           const auto FName = F->getName();
           DEBUG(dbgs() << "Discovered function call to : " << FName << '\n');
+          // Treat memcpy as moves for the purpose of this analysis
+          if (FName.startswith("llvm.memcpy")) {
+            assert(FCall->getNumArgOperands() > 0);
+            Value *CopyDest = FCall->getArgOperand(0);
+            // We are interested in the users of the dest operand of
+            // memcpy here
+            Value *LocalCopy = CopyDest->stripPointerCasts();
+            User *NewU = dyn_cast<User>(LocalCopy);
+            assert(NewU);
+            WorkList.push_back(NewU);
+            continue;
+          }
 
           char *demangled = __cxxabiv1::__cxa_demangle(
               FName.str().c_str(), nullptr, nullptr, nullptr);
+          if (!demangled)
+            continue;
           const StringRef DemangledNameRef(demangled);
           DEBUG(dbgs() << "Demangled name: " << DemangledNameRef << '\n');
 
diff --git a/rsov/compiler/RSSPIRVWriter.cpp b/rsov/compiler/RSSPIRVWriter.cpp
index 4ad6c0f..200c388 100644
--- a/rsov/compiler/RSSPIRVWriter.cpp
+++ b/rsov/compiler/RSSPIRVWriter.cpp
@@ -82,10 +82,11 @@
   PassMgr.add(createGlobalMergePass());
   // Transform global allocations and accessors (rs[GS]etElementAt)
   PassMgr.add(createGlobalAllocPass());
+  // Removed dead MemCpys in 64-bit targets after global alloc pass
+  PassMgr.add(createDeadStoreEliminationPass());
   PassMgr.add(createAggressiveDCEPass());
-  // Delete unreachable globals.
-  PassMgr.add(createGlobalDCEPass());
-  // Remove global allocations
+  // Delete unreachable globals (after removing global allocations)
+  PassMgr.add(createRemoveAllGlobalAllocPass());
   PassMgr.add(createPromoteMemoryToRegisterPass());
   PassMgr.add(createTransOCLMD());
   // TODO: investigate removal of OCLTypeToSPIRV pass.
diff --git a/rsov/compiler/tests/rs_allocation/getdimx_64.ll b/rsov/compiler/tests/rs_allocation/getdimx_64.ll
new file mode 100644
index 0000000..f5976fb
--- /dev/null
+++ b/rsov/compiler/tests/rs_allocation/getdimx_64.ll
@@ -0,0 +1,75 @@
+; RUN: rs2spirv_lit_driver.sh %s | FileCheck %s
+; Source:
+; rs_allocation g;
+; int32_t RS_KERNEL getDim(int32_t dummy) {
+;    return rsAllocationGetDimX(g);
+; }
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-none-linux-gnueabi"
+
+; CHECK: OpMemberDecorate [[MetadataS:%[a-zA-Z_0-9]*]] 0 Offset 4
+; CHECK: OpMemberDecorate [[MetadataS]] 1 Offset 8
+; CHECK: OpMemberDecorate [[MetadataS]] 2 Offset 12
+; CHECK: OpMemberDecorate [[MetadataS]] 3 Offset 16
+; CHECK: OpDecorate [[RuntimeArrS:%[a-zA-Z_0-9]*]] ArrayStride {{[0-9]*}}
+; CHECK: OpDecorate [[MetadataSSBO:%[a-zA-Z_0-9]*]] BufferBlock
+; CHECK: OpDecorate [[Metadata:%[a-zA-Z_0-9]*]] DescriptorSet 0
+; CHECK: OpDecorate [[Metadata]] Binding 0
+
+%struct.rs_allocation.1 = type { i64*, i64*, i64*, i64* }
+
+@g = common global %struct.rs_allocation.1 zeroinitializer, align 8
+; CHECK-NOT: %g = OpVariable %{{.*}} Uniform
+; CHECK-NOT: OpCopyMemorySized
+; CHECK-NOT: OpFunctionCall %uint %__rsov_rsAllocationGetDimX
+; CHECK: [[DimX:%[a-zA-Z_0-9]*]] = OpAccessChain %_ptr_Uniform_uint [[Metadata]]
+; CHECK: [[Res:%[a-zA-Z_0-9]*]] = OpLoad %uint [[DimX]]
+; CHECK: OpReturnValue [[Res]]
+
+; Function Attrs: nounwind
+define i32 @getDim(i32 %dummy) unnamed_addr #0 {
+entry:
+  %tmp = alloca %struct.rs_allocation.1, align 8
+  %0 = bitcast %struct.rs_allocation.1* %tmp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.rs_allocation.1* @g to i8*), i64 32, i32 8, i1 false), !tbaa.struct !8
+  %call = call i32 @_Z19rsAllocationGetDimX13rs_allocation(%struct.rs_allocation.1* %tmp) #0
+  ret i32 %call
+}
+
+declare i32 @_Z19rsAllocationGetDimX13rs_allocation(%struct.rs_allocation.1*) unnamed_addr
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #1
+
+; Function Attrs: nounwind
+define void @.rs.dtor() unnamed_addr #0 {
+entry:
+  tail call void @_Z13rsClearObjectP13rs_allocation(%struct.rs_allocation.1* @g) #0
+  ret void
+}
+
+declare void @_Z13rsClearObjectP13rs_allocation(%struct.rs_allocation.1*) unnamed_addr
+
+attributes #0 = { nounwind }
+attributes #1 = { argmemonly nounwind }
+
+!llvm.ident = !{!0}
+!\23pragma = !{!1, !2}
+!\23rs_export_var = !{!3}
+!\23rs_object_slots = !{!4}
+!\23rs_export_foreach_name = !{!5, !6}
+!\23rs_export_foreach = !{!4, !7}
+
+!0 = !{!"Android clang version 3.8.275480  (based on LLVM 3.8.275480)"}
+!1 = !{!"version", !"1"}
+!2 = !{!"java_package_name", !"com.android.rs.rsov.test"}
+!3 = !{!"g", !"20"}
+!4 = !{!"0"}
+!5 = !{!"root"}
+!6 = !{!"getDim"}
+!7 = !{!"35"}
+!8 = !{i64 0, i64 8, !9, i64 8, i64 8, !9, i64 16, i64 8, !9, i64 24, i64 8, !9}
+!9 = !{!10, !10, i64 0}
+!10 = !{!"any pointer", !11, i64 0}
+!11 = !{!"omnipotent char", !12, i64 0}
+!12 = !{!"Simple C/C++ TBAA"}
diff --git a/rsov/compiler/tests/rs_allocation/rewrite_getdim.ll b/rsov/compiler/tests/rs_allocation/rewrite_getdim.ll
index 7133e40..9d252f2 100644
--- a/rsov/compiler/tests/rs_allocation/rewrite_getdim.ll
+++ b/rsov/compiler/tests/rs_allocation/rewrite_getdim.ll
@@ -21,6 +21,7 @@
 
 @g = common global %struct.rs_allocation zeroinitializer, align 4
 
+; CHECK-NOT: %g = OpVariable %{{.*}} Uniform
 ; CHECK-NOT: OpFunctionCall %uint %__rsov_rsAllocationGetDimX
 ; CHECK: [[DimX:%[a-zA-Z_0-9]*]] = OpAccessChain %_ptr_Uniform_uint [[Metadata]]
 ; CHECK: [[Res:%[a-zA-Z_0-9]*]] = OpLoad %uint [[DimX]]