Add a flag to the API to specify -O0 on the bcc command line.
Add a flag RS_CONTEXT_OPT_LEVEL_0 which sets a field mOptLevel in the Context
to 0 when calling bcc.
Using this flag will result in an object file that is easier to debug.
Change-Id: I9673506710cc9f34c415b694fe5d6bc7e967f1db
Signed-off-by: verena beckham <[email protected]>
diff --git a/cpp/RenderScript.cpp b/cpp/RenderScript.cpp
index aa7dbd7..3d81d62 100644
--- a/cpp/RenderScript.cpp
+++ b/cpp/RenderScript.cpp
@@ -165,7 +165,8 @@
}
if (flags & ~(RS_CONTEXT_SYNCHRONOUS | RS_CONTEXT_LOW_LATENCY |
- RS_CONTEXT_LOW_POWER | RS_CONTEXT_WAIT_FOR_ATTACH)) {
+ RS_CONTEXT_LOW_POWER | RS_CONTEXT_WAIT_FOR_ATTACH |
+ RS_CONTEXT_OPT_LEVEL_0)) {
ALOGE("Invalid flags passed");
return false;
}
diff --git a/cpp/rsCppStructs.h b/cpp/rsCppStructs.h
index e7acbed..d834ef5 100644
--- a/cpp/rsCppStructs.h
+++ b/cpp/rsCppStructs.h
@@ -82,7 +82,8 @@
RS_INIT_LOW_LATENCY = 2, ///< Prefer low latency devices over potentially higher throughput devices.
// Bitflag 4 is reserved for the context flag low power
RS_INIT_WAIT_FOR_ATTACH = 8, ///< Kernel execution will hold to give time for a debugger to be attached
- RS_INIT_MAX = 16
+ RS_INIT_OPT_LEVEL_0 = 16, ///< Use the -O0 option to set the optimization level to zero when calling the bcc compiler.
+ RS_INIT_MAX = 32
};
/**
diff --git a/cpu_ref/rsCpuScript.cpp b/cpu_ref/rsCpuScript.cpp
index 1909e13..09e7ab7 100644
--- a/cpu_ref/rsCpuScript.cpp
+++ b/cpu_ref/rsCpuScript.cpp
@@ -79,7 +79,7 @@
const char* cacheDir, const char* resName,
const char* core_lib, bool useRSDebugContext,
const char* bccPluginName, bool emitGlobalInfo,
- bool emitGlobalInfoSkipConstant) {
+ int optLevel, bool emitGlobalInfoSkipConstant) {
rsAssert(cacheDir && resName && core_lib);
args->push_back(android::renderscript::RsdCpuScriptImpl::BCC_EXE_PATH);
args->push_back("-unroll-runtime");
@@ -98,6 +98,20 @@
args->push_back(core_lib);
args->push_back("-mtriple");
args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
+ args->push_back("-O");
+
+ switch (optLevel) {
+ case (0):
+ args->push_back("0");
+ break;
+ case (3):
+ args->push_back("3");
+ break;
+ default:
+ ALOGW("Expected optimization level of 0 or 3. Received %d", optLevel);
+ args->push_back("3");
+ break;
+ }
// Enable workaround for A53 codegen by default.
#if defined(__aarch64__) && !defined(DISABLE_A53_WORKAROUND)
@@ -317,6 +331,8 @@
useRSDebugContext = true;
}
+ int optLevel = mCtx->getContext()->getOptLevel();
+
std::string bcFileName(cacheDir);
bcFileName.append("/");
bcFileName.append(resName);
@@ -327,7 +343,7 @@
bool emitGlobalInfoSkipConstant = mCtx->getEmbedGlobalInfoSkipConstant();
setCompileArguments(&compileArguments, bcFileName, cacheDir, resName, core_lib,
useRSDebugContext, bccPluginName, emitGlobalInfo,
- emitGlobalInfoSkipConstant);
+ optLevel, emitGlobalInfoSkipConstant);
mChecksumNeeded = isChecksumNeeded(cacheDir);
if (mChecksumNeeded) {
diff --git a/cpu_ref/rsCpuScriptGroup2.cpp b/cpu_ref/rsCpuScriptGroup2.cpp
index bf01403..50b203d 100644
--- a/cpu_ref/rsCpuScriptGroup2.cpp
+++ b/cpu_ref/rsCpuScriptGroup2.cpp
@@ -252,7 +252,7 @@
const char* outputDir, const char* outputFileName,
const char* coreLibPath, const char* coreLibRelaxedPath,
const bool emitGlobalInfo, const bool emitGlobalInfoSkipConstant,
- vector<const char*>* args) {
+ int optLevel, vector<const char*>* args) {
args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH);
args->push_back("-fPIC");
args->push_back("-embedRSInfo");
@@ -282,6 +282,9 @@
args->push_back("-output_path");
args->push_back(outputDir);
+ args->push_back("-O");
+ args->push_back(std::to_string(optLevel).c_str());
+
// The output filename has to be the last, in case we need to pop it out and
// replace with a different name.
args->push_back("-o");
@@ -372,13 +375,15 @@
const string& coreLibPath = getCoreLibPath(getCpuRefImpl()->getContext(),
&coreLibRelaxedPath);
+ int optLevel = getCpuRefImpl()->getContext()->getOptLevel();
+
vector<const char*> arguments;
bool emitGlobalInfo = getCpuRefImpl()->getEmbedGlobalInfo();
bool emitGlobalInfoSkipConstant = getCpuRefImpl()->getEmbedGlobalInfoSkipConstant();
setupCompileArguments(inputs, kernelBatches, invokeBatches, cacheDir,
resName, coreLibPath.c_str(), coreLibRelaxedPath.c_str(),
emitGlobalInfo, emitGlobalInfoSkipConstant,
- &arguments);
+ optLevel, &arguments);
std::unique_ptr<const char> cmdLine(rsuJoinStrings(arguments.size() - 1,
arguments.data()));
diff --git a/rsContext.cpp b/rsContext.cpp
index c21542f..e018eb0 100644
--- a/rsContext.cpp
+++ b/rsContext.cpp
@@ -459,6 +459,7 @@
memset(&mHal, 0, sizeof(mHal));
mForceCpu = false;
mContextType = RS_CONTEXT_TYPE_NORMAL;
+ mOptLevel = 3;
mSynchronous = false;
mFatalErrorOccured = false;
@@ -496,6 +497,9 @@
if (flags & RS_CONTEXT_SYNCHRONOUS) {
rsc->mSynchronous = true;
}
+ if (flags & RS_CONTEXT_OPT_LEVEL_0) {
+ rsc->mOptLevel = 0;
+ }
rsc->mContextType = ct;
rsc->mHal.flags = flags;
diff --git a/rsContext.h b/rsContext.h
index 09b7483..890459d 100644
--- a/rsContext.h
+++ b/rsContext.h
@@ -258,6 +258,9 @@
// the process lives long enough to get the error to the developer
bool hadFatalError() {return mFatalErrorOccured;}
+ uint32_t getOptLevel() const { return mOptLevel; }
+ void setOptLevel(uint32_t optLevel) { mOptLevel = optLevel; }
+
Device *mDev;
#ifdef RS_COMPATIBILITY_LIB
@@ -311,6 +314,7 @@
bool mForceCpu;
RsContextType mContextType;
+ uint32_t mOptLevel;
bool mRunning;
bool mExit;
diff --git a/rsDefines.h b/rsDefines.h
index 6da672f..0c6f8df 100644
--- a/rsDefines.h
+++ b/rsDefines.h
@@ -251,7 +251,8 @@
RS_CONTEXT_SYNCHRONOUS = 0x0001,
RS_CONTEXT_LOW_LATENCY = 0x0002,
RS_CONTEXT_LOW_POWER = 0x0004,
- RS_CONTEXT_WAIT_FOR_ATTACH = 0x0008
+ RS_CONTEXT_WAIT_FOR_ATTACH = 0x0008,
+ RS_CONTEXT_OPT_LEVEL_0 = 0x0010
};
enum RsBlasTranspose {