NNAPI systrace for timing statistics
This adds systrace tracing to NNAPI. The tracing
will be helpful for:
- getting numbers on where time is spent currently
- diagnosing and improving performance
- benchmarking
TODOs:
- Write analysis tools for traces
Change-Id: I9026f1043428cb715b577901bec3a2e1e39a82e3
Merged-In: I9026f1043428cb715b577901bec3a2e1e39a82e3
Bug: 78137932
Test: manually run systrace.py against unit tests
Test: manually run systrace.py against benchmarking app
(cherry picked from commit e9e637ab73b68b5982281a3f7c621f6a75d51743)
diff --git a/runtime/ExecutionBuilder.cpp b/runtime/ExecutionBuilder.cpp
index b5a472b..76d16d9 100644
--- a/runtime/ExecutionBuilder.cpp
+++ b/runtime/ExecutionBuilder.cpp
@@ -23,6 +23,7 @@
#include "HalInterfaces.h"
#include "Manager.h"
#include "ModelBuilder.h"
+#include "Tracing.h"
#include "Utils.h"
#include <mutex>
@@ -219,6 +220,7 @@
// Ensure that executionCallback->notify() is called.
static void cpuFallbackFull(const ExecutionBuilder* executionBuilder,
const sp<ExecutionCallback>& executionCallback) {
+ NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "cpuFallbackFull");
VLOG(EXECUTION) << "cpuFallbackFull";
StepExecutor executor(executionBuilder, executionBuilder->getModel(),
nullptr /* no VersionedIDevice, so CPU */,
@@ -244,6 +246,7 @@
const ExecutionPlan* plan,
std::shared_ptr<ExecutionPlan::Controller> controller,
const sp<ExecutionCallback>& executionCallback) {
+ NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "cpuFallbackPartial");
VLOG(EXECUTION) << "cpuFallbackPartial";
std::shared_ptr<StepExecutor> executor;
int n = plan->fallback(controller, &executor);
@@ -578,6 +581,7 @@
}
}
+ NNTRACE_RT(NNTRACE_PHASE_INPUTS_AND_OUTPUTS, "StepExecutor::startComputeOnDevice");
// We separate the input & output pools so that we reduce the copying done if we
// do an eventual remoting (hidl_memory->update()). We could also use it to set
// protection on read only memory but that's not currently done.
@@ -618,6 +622,9 @@
request.pools[i] = mMemories[i]->getHidlMemory();
}
+ NNTRACE_FULL_SWITCH(NNTRACE_LAYER_IPC, NNTRACE_PHASE_EXECUTION,
+ "StepExecutor::startComputeOnDevice::execute");
+
// Prepare the callback for asynchronous execution. sp<ExecutionCallback>
// object is returned when the execution has been successfully launched,
// otherwise a nullptr is returned. The executionCallback is abstracted in
@@ -649,6 +656,8 @@
// TODO: Remove this synchronization point when the block of code below is
// removed.
executionCallback->wait();
+ NNTRACE_FULL_SWITCH(NNTRACE_LAYER_RUNTIME, NNTRACE_PHASE_EXECUTION,
+ "StepExecutor::startComputeOnDevice::waited");
Return<ErrorStatus> callbackStatus = executionCallback->getStatus();
if (!callbackStatus.isOk() || callbackStatus != ErrorStatus::NONE) {
VLOG(EXECUTION) << "**Execute async failed**";
@@ -661,6 +670,7 @@
// TODO: Move this block of code somewhere else. It should not be in the
// startCompute function.
// TODO: outputMemory->update(); outputMemory->commit()
+ NNTRACE_RT_SWITCH(NNTRACE_PHASE_RESULTS, "StepExecutor::startComputeOnDevice");
for (auto& info : mOutputs) {
if (info.state == ModelArgumentInfo::POINTER) {
DataLocation& loc = info.locationAndLength;
@@ -682,6 +692,7 @@
const std::vector<RunTimePoolInfo>& modelPoolInfos,
const std::vector<RunTimePoolInfo>& requestPoolInfos,
const sp<IExecutionCallback>& executionCallback) {
+ NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "asyncStartComputeOnCpu");
CpuExecutor executor;
int err = executor.run(model, request, modelPoolInfos, requestPoolInfos);
executionCallback->notify(convertResultCodeToErrorStatus(err));
@@ -689,6 +700,8 @@
int StepExecutor::startComputeOnCpu(sp<ExecutionCallback>* synchronizationCallback) {
// TODO: use a thread pool
+ // TODO(mikie): this could have NNTRACE so we could measure the overhead of
+ // spinning up a new thread.
Model model;
mModel->setHidlModel(&model);
diff --git a/runtime/ExecutionPlan.cpp b/runtime/ExecutionPlan.cpp
index c8f7550..5fd370d 100644
--- a/runtime/ExecutionPlan.cpp
+++ b/runtime/ExecutionPlan.cpp
@@ -23,6 +23,7 @@
#include "ExecutionBuilder.h"
#include "Manager.h"
#include "ModelBuilder.h"
+#include "Tracing.h"
#include "Utils.h"
#include <functional>
@@ -46,6 +47,10 @@
model->setHidlModel(&hidlModel);
sp<PreparedModelCallback> preparedModelCallback = new PreparedModelCallback();
+
+ // Note that some work within VersionedIDevice will be subtracted from the
+ // IPC layer
+ NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_COMPILATION, "prepareModel");
Return<ErrorStatus> prepareLaunchStatus = device->getInterface()->prepareModel(
hidlModel, static_cast<ExecutionPreference>(executionPreference), preparedModelCallback);
if (!prepareLaunchStatus.isOk()) {
diff --git a/runtime/NeuralNetworks.cpp b/runtime/NeuralNetworks.cpp
index 6f1dcca..e384f3d 100644
--- a/runtime/NeuralNetworks.cpp
+++ b/runtime/NeuralNetworks.cpp
@@ -29,6 +29,7 @@
#include "Memory.h"
#include "NeuralNetworksOEM.h"
#include "ModelBuilder.h"
+#include "Tracing.h"
#include "Utils.h"
#include <memory>
@@ -251,6 +252,7 @@
int ANeuralNetworksMemory_createFromFd(size_t size, int prot, int fd, size_t offset,
ANeuralNetworksMemory** memory) {
+ NNTRACE_RT(NNTRACE_PHASE_PREPARATION, "ANeuralNetworksMemory_createFromFd");
*memory = nullptr;
std::unique_ptr<MemoryFd> m = std::make_unique<MemoryFd>();
if (m == nullptr) {
@@ -265,12 +267,14 @@
}
void ANeuralNetworksMemory_free(ANeuralNetworksMemory* memory) {
+ NNTRACE_RT(NNTRACE_PHASE_TERMINATION, "ANeuralNetworksMemory_free");
// No validation. Free of nullptr is valid.
Memory* m = reinterpret_cast<Memory*>(memory);
delete m;
}
int ANeuralNetworksModel_create(ANeuralNetworksModel** model) {
+ NNTRACE_RT(NNTRACE_PHASE_PREPARATION, "ANeuralNetworksModel_create");
initVLogMask();
if (!model) {
LOG(ERROR) << "ANeuralNetworksModel_create passed a nullptr";
@@ -286,12 +290,14 @@
}
void ANeuralNetworksModel_free(ANeuralNetworksModel* model) {
+ NNTRACE_RT(NNTRACE_PHASE_TERMINATION, "ANeuralNetworksModel_free");
// No validation. Free of nullptr is valid.
ModelBuilder* m = reinterpret_cast<ModelBuilder*>(model);
delete m;
}
int ANeuralNetworksModel_finish(ANeuralNetworksModel* model) {
+ NNTRACE_RT(NNTRACE_PHASE_PREPARATION, "ANeuralNetworksModel_finish");
if (!model) {
LOG(ERROR) << "ANeuralNetworksModel_finish passed a nullptr";
return ANEURALNETWORKS_UNEXPECTED_NULL;
@@ -302,6 +308,7 @@
int ANeuralNetworksModel_addOperand(ANeuralNetworksModel* model,
const ANeuralNetworksOperandType* type) {
+ NNTRACE_RT(NNTRACE_PHASE_PREPARATION, "ANeuralNetworksModel_addOperand");
if (!model || !type) {
LOG(ERROR) << "ANeuralNetworksModel_addOperand passed a nullptr";
return ANEURALNETWORKS_UNEXPECTED_NULL;
@@ -312,6 +319,7 @@
int ANeuralNetworksModel_setOperandValue(ANeuralNetworksModel* model, int32_t index,
const void* buffer, size_t length) {
+ NNTRACE_RT(NNTRACE_PHASE_PREPARATION, "ANeuralNetworksModel_setOperandValue");
if (!model || (!buffer && length != 0)) {
LOG(ERROR) << "ANeuralNetworksModel_setOperandValue passed a nullptr";
return ANEURALNETWORKS_UNEXPECTED_NULL;
@@ -323,6 +331,7 @@
int ANeuralNetworksModel_setOperandValueFromMemory(ANeuralNetworksModel* model, int32_t index,
const ANeuralNetworksMemory* memory,
size_t offset, size_t length) {
+ NNTRACE_RT(NNTRACE_PHASE_PREPARATION, "ANeuralNetworksModel_setOperandValueFromMemory");
if (!model || !memory) {
LOG(ERROR) << "ANeuralNetworksModel_setOperandValue passed a nullptr";
return ANEURALNETWORKS_UNEXPECTED_NULL;
@@ -336,6 +345,7 @@
ANeuralNetworksOperationType type, uint32_t inputCount,
const uint32_t* inputs, uint32_t outputCount,
const uint32_t* outputs) {
+ NNTRACE_RT(NNTRACE_PHASE_PREPARATION, "ANeuralNetworksModel_addOperation");
if (!model || !inputs || !outputs) {
LOG(ERROR) << "ANeuralNetworksModel_addOperation passed a nullptr";
return ANEURALNETWORKS_UNEXPECTED_NULL;
@@ -347,6 +357,7 @@
int ANeuralNetworksModel_identifyInputsAndOutputs(ANeuralNetworksModel* model, uint32_t inputCount,
const uint32_t* inputs, uint32_t outputCount,
const uint32_t* outputs) {
+ NNTRACE_RT(NNTRACE_PHASE_PREPARATION, "ANeuralNetworksModel_identifyInputsAndOutputs");
if (!model || !inputs || !outputs) {
LOG(ERROR) << ("ANeuralNetworksModel_identifyInputsAndOutputs passed a nullptr");
return ANEURALNETWORKS_UNEXPECTED_NULL;
@@ -357,6 +368,7 @@
int ANeuralNetworksModel_relaxComputationFloat32toFloat16(ANeuralNetworksModel* model,
bool allow) {
+ NNTRACE_RT(NNTRACE_PHASE_PREPARATION, "ANeuralNetworksModel_relaxComputationFloat32toFloat16");
if (!model) {
LOG(ERROR) << ("ANeuralNetworksModel_relaxComputationFloat32toFloat16 passed a nullptr");
return ANEURALNETWORKS_UNEXPECTED_NULL;
@@ -367,6 +379,7 @@
int ANeuralNetworksCompilation_create(ANeuralNetworksModel* model,
ANeuralNetworksCompilation** compilation) {
+ NNTRACE_RT(NNTRACE_PHASE_COMPILATION, "ANeuralNetworksCompilation_create");
if (!model || !compilation) {
LOG(ERROR) << "ANeuralNetworksCompilation_create passed a nullptr";
return ANEURALNETWORKS_UNEXPECTED_NULL;
@@ -380,6 +393,7 @@
}
void ANeuralNetworksCompilation_free(ANeuralNetworksCompilation* compilation) {
+ NNTRACE_RT(NNTRACE_PHASE_TERMINATION, "ANeuralNetworksCompilation_free");
// No validation. Free of nullptr is valid.
// TODO specification says that a compilation-in-flight can be deleted
CompilationBuilder* c = reinterpret_cast<CompilationBuilder*>(compilation);
@@ -388,6 +402,7 @@
int ANeuralNetworksCompilation_setPreference(ANeuralNetworksCompilation* compilation,
int32_t preference) {
+ NNTRACE_RT(NNTRACE_PHASE_COMPILATION, "ANeuralNetworksCompilation_setPreference");
if (!compilation) {
LOG(ERROR) << "ANeuralNetworksCompilation_setPreference passed a nullptr";
return ANEURALNETWORKS_UNEXPECTED_NULL;
@@ -397,6 +412,7 @@
}
int ANeuralNetworksCompilation_finish(ANeuralNetworksCompilation* compilation) {
+ NNTRACE_RT(NNTRACE_PHASE_COMPILATION, "ANeuralNetworksCompilation_finish");
if (!compilation) {
LOG(ERROR) << "ANeuralNetworksCompilation_finish passed a nullptr";
return ANEURALNETWORKS_UNEXPECTED_NULL;
@@ -407,6 +423,7 @@
int ANeuralNetworksExecution_create(ANeuralNetworksCompilation* compilation,
ANeuralNetworksExecution** execution) {
+ NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "ANeuralNetworksExecution_create");
if (!compilation || !execution) {
LOG(ERROR) << "ANeuralNetworksExecution_create passed a nullptr";
return ANEURALNETWORKS_UNEXPECTED_NULL;
@@ -420,6 +437,7 @@
}
void ANeuralNetworksExecution_free(ANeuralNetworksExecution* execution) {
+ NNTRACE_RT(NNTRACE_PHASE_TERMINATION, "ANeuralNetworksExecution_free");
// TODO specification says that an execution-in-flight can be deleted
// No validation. Free of nullptr is valid.
ExecutionBuilder* r = reinterpret_cast<ExecutionBuilder*>(execution);
@@ -429,6 +447,7 @@
int ANeuralNetworksExecution_setInput(ANeuralNetworksExecution* execution, int32_t index,
const ANeuralNetworksOperandType* type, const void* buffer,
size_t length) {
+ NNTRACE_RT(NNTRACE_PHASE_INPUTS_AND_OUTPUTS, "ANeuralNetworksExecution_setInput");
if (!execution || (!buffer && length != 0)) {
LOG(ERROR) << "ANeuralNetworksExecution_setInput passed a nullptr";
return ANEURALNETWORKS_UNEXPECTED_NULL;
@@ -441,6 +460,7 @@
const ANeuralNetworksOperandType* type,
const ANeuralNetworksMemory* memory, size_t offset,
size_t length) {
+ NNTRACE_RT(NNTRACE_PHASE_INPUTS_AND_OUTPUTS, "ANeuralNetworksExecution_setInputFromMemory");
if (!execution || !memory) {
LOG(ERROR) << "ANeuralNetworksExecution_setInputFromMemory passed a nullptr";
return ANEURALNETWORKS_UNEXPECTED_NULL;
@@ -454,6 +474,7 @@
int ANeuralNetworksExecution_setOutput(ANeuralNetworksExecution* execution, int32_t index,
const ANeuralNetworksOperandType* type, void* buffer,
size_t length) {
+ NNTRACE_RT(NNTRACE_PHASE_INPUTS_AND_OUTPUTS, "ANeuralNetworksExecution_setOutput");
if (!execution || (!buffer && length != 0)) {
LOG(ERROR) << "ANeuralNetworksExecution_setOutput passed a nullptr";
return ANEURALNETWORKS_UNEXPECTED_NULL;
@@ -466,6 +487,7 @@
const ANeuralNetworksOperandType* type,
const ANeuralNetworksMemory* memory, size_t offset,
size_t length) {
+ NNTRACE_RT(NNTRACE_PHASE_INPUTS_AND_OUTPUTS, "ANeuralNetworksExecution_setOutputFromMemory");
if (!execution || !memory) {
LOG(ERROR) << "ANeuralNetworksExecution_setOutputFromMemory passed a nullptr";
return ANEURALNETWORKS_UNEXPECTED_NULL;
@@ -478,6 +500,7 @@
int ANeuralNetworksExecution_startCompute(ANeuralNetworksExecution* execution,
ANeuralNetworksEvent** event) {
+ NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "ANeuralNetworksExecution_startCompute");
if (!execution || !event) {
LOG(ERROR) << "ANeuralNetworksExecution_startCompute passed a nullptr";
return ANEURALNETWORKS_UNEXPECTED_NULL;
@@ -504,6 +527,7 @@
}
int ANeuralNetworksEvent_wait(ANeuralNetworksEvent* event) {
+ NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "ANeuralNetworksEvent_wait");
if (event == nullptr) {
LOG(ERROR) << "ANeuralNetworksEvent_wait passed a nullptr";
return ANEURALNETWORKS_UNEXPECTED_NULL;
@@ -515,6 +539,7 @@
}
void ANeuralNetworksEvent_free(ANeuralNetworksEvent* event) {
+ NNTRACE_RT(NNTRACE_PHASE_TERMINATION, "ANeuralNetworksEvent_free");
// No validation. Free of nullptr is valid.
if (event) {
sp<ExecutionCallback>* e = reinterpret_cast<sp<ExecutionCallback>*>(event);
diff --git a/runtime/VersionedIDevice.cpp b/runtime/VersionedIDevice.cpp
index 110eecc..1226552 100644
--- a/runtime/VersionedIDevice.cpp
+++ b/runtime/VersionedIDevice.cpp
@@ -16,6 +16,7 @@
#include "VersionedIDevice.h"
+#include "Tracing.h"
#include "Utils.h"
#include <android-base/logging.h>
@@ -32,6 +33,7 @@
std::pair<ErrorStatus, Capabilities> result;
if (mDeviceV1_1 != nullptr) {
+ NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_INITIALIZATION, "getCapabilities_1_1");
Return<void> ret = mDeviceV1_1->getCapabilities_1_1(
[&result](ErrorStatus error, const Capabilities& capabilities) {
result = std::make_pair(error, capabilities);
@@ -41,8 +43,10 @@
return {ErrorStatus::GENERAL_FAILURE, {}};
}
} else if (mDeviceV1_0 != nullptr) {
+ NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_INITIALIZATION, "getCapabilities");
Return<void> ret = mDeviceV1_0->getCapabilities(
[&result](ErrorStatus error, const V1_0::Capabilities& capabilities) {
+ // Time taken to convert capabilities is trivial
result = std::make_pair(error, convertToV1_1(capabilities));
});
if (!ret.isOk()) {
@@ -62,6 +66,7 @@
std::pair<ErrorStatus, hidl_vec<bool>> result;
if (mDeviceV1_1 != nullptr) {
+ NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_COMPILATION, "getSupportedOperations_1_1");
Return<void> ret = mDeviceV1_1->getSupportedOperations_1_1(
model, [&result](ErrorStatus error, const hidl_vec<bool>& supported) {
result = std::make_pair(error, supported);
@@ -71,8 +76,10 @@
return {ErrorStatus::GENERAL_FAILURE, {}};
}
} else if (mDeviceV1_0 != nullptr && compliantWithV1_0(model)) {
+ V1_0::Model model10 = convertToV1_0(model);
+ NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_COMPILATION, "getSupportedOperations_1_0");
Return<void> ret = mDeviceV1_0->getSupportedOperations(
- convertToV1_0(model), [&result](ErrorStatus error, const hidl_vec<bool>& supported) {
+ model10, [&result](ErrorStatus error, const hidl_vec<bool>& supported) {
result = std::make_pair(error, supported);
});
if (!ret.isOk()) {
@@ -98,17 +105,35 @@
return ErrorStatus::GENERAL_FAILURE;
}
return static_cast<ErrorStatus>(ret);
- } else if (mDeviceV1_0 != nullptr && compliantWithV1_0(model)) {
- Return<ErrorStatus> ret = mDeviceV1_0->prepareModel(convertToV1_0(model), callback);
- if (!ret.isOk()) {
- LOG(ERROR) << "prepareModel failure: " << ret.description();
+ } else if (mDeviceV1_0 != nullptr) {
+ bool compliant = false;
+ V1_0::Model model10;
+ {
+ // Attribute time spent in model inspection and conversion to
+ // Runtime, as the time may be substantial (0.03ms for mobilenet,
+ // but could be larger for other models).
+ NNTRACE_FULL_SUBTRACT(NNTRACE_LAYER_RUNTIME, NNTRACE_PHASE_COMPILATION,
+ "VersionedIDevice::prepareModel");
+ compliant = compliantWithV1_0(model);
+ if (compliant) {
+ model10 = convertToV1_0(model); // copy is elided
+ }
+ }
+ if (compliant) {
+ Return<ErrorStatus> ret = mDeviceV1_0->prepareModel(model10, callback);
+ if (!ret.isOk()) {
+ LOG(ERROR) << "prepareModel failure: " << ret.description();
+ return ErrorStatus::GENERAL_FAILURE;
+ }
+ return static_cast<ErrorStatus>(ret);
+ } else {
+ // TODO: partition the model such that v1.1 ops are not passed to v1.0
+ // device
+ LOG(ERROR) << "Could not handle prepareModel!";
return ErrorStatus::GENERAL_FAILURE;
}
- return static_cast<ErrorStatus>(ret);
} else {
- // TODO: partition the model such that v1.1 ops are not passed to v1.0
- // device
- LOG(ERROR) << "Could not handle prepareModel!";
+ LOG(ERROR) << "prepareModel called with no device";
return ErrorStatus::GENERAL_FAILURE;
}
}
diff --git a/runtime/test/Android.bp b/runtime/test/Android.bp
index 7dbb28a..addc5c0 100644
--- a/runtime/test/Android.bp
+++ b/runtime/test/Android.bp
@@ -22,6 +22,7 @@
shared_libs: [
"libandroid",
"libbase",
+ "libcutils",
"libhidlbase",
"libhidltransport",
"libhidlmemory",
diff --git a/runtime/test/GeneratedUtils.cpp b/runtime/test/GeneratedUtils.cpp
index fc5671f..a9a33fa 100644
--- a/runtime/test/GeneratedUtils.cpp
+++ b/runtime/test/GeneratedUtils.cpp
@@ -31,6 +31,17 @@
#include <map>
#include <thread>
+// Systrace is not available from CTS tests due to platform layering
+// constraints. We reuse the NNTEST_ONLY_PUBLIC_API flag, as that should also be
+// the case for CTS (public APIs only).
+#ifndef NNTEST_ONLY_PUBLIC_API
+#include "Tracing.h"
+#else
+#define NNTRACE_FULL_RAW(...)
+#define NNTRACE_APP(...)
+#define NNTRACE_APP_SWITCH(...)
+#endif
+
namespace generated_tests {
using namespace android::nn::wrapper;
using namespace test_helper;
@@ -62,9 +73,13 @@
}
Compilation createAndCompileModel(Model* model, std::function<void(Model*)> createModel) {
+ NNTRACE_APP(NNTRACE_PHASE_PREPARATION, "createAndCompileModel");
+
createModel(model);
model->finish();
graphDump("", *model);
+
+ NNTRACE_APP_SWITCH(NNTRACE_PHASE_COMPILATION, "createAndCompileModel");
Compilation compilation(model);
compilation.finish();
@@ -86,6 +101,7 @@
// If in relaxed mode, set the error range to be 5ULP of FP16.
float fpRange = !model->isRelaxed() ? 1e-5f : 5.0f * 0.0009765625f;
for (auto& example : examples) {
+ NNTRACE_APP(NNTRACE_PHASE_EXECUTION, "executeWithCompilation example");
SCOPED_TRACE(exampleNo);
// TODO: We leave it as a copy here.
// Should verify if the input gets modified by the test later.
@@ -94,6 +110,7 @@
Execution execution(compilation);
+ NNTRACE_APP_SWITCH(NNTRACE_PHASE_INPUTS_AND_OUTPUTS, "executeWithCompilation example");
// Set all inputs
for_all(inputs, [&execution](int idx, const void* p, size_t s) {
const void* buffer = s == 0 ? nullptr : p;
@@ -108,9 +125,11 @@
ASSERT_EQ(Result::NO_ERROR, execution.setOutput(idx, buffer, s));
});
+ NNTRACE_APP_SWITCH(NNTRACE_PHASE_EXECUTION, "executeWithCompilation example");
Result r = execution.compute();
ASSERT_EQ(Result::NO_ERROR, r);
+ NNTRACE_APP_SWITCH(NNTRACE_PHASE_RESULTS, "executeWithCompilation example");
// Dump all outputs for the slicing tool
if (dumpToFile) {
s << "output" << exampleNo << " = {\n";
@@ -133,6 +152,7 @@
std::function<bool(int)> isIgnored,
std::vector<MixedTypedExample>& examples,
std::string dumpFile) {
+ NNTRACE_APP(NNTRACE_PHASE_OVERALL, "executeOnce");
Model model;
Compilation compilation = createAndCompileModel(&model, createModel);
executeWithCompilation(&model, &compilation, isIgnored, examples, dumpFile);
@@ -142,6 +162,7 @@
void executeMultithreadedOwnCompilation(std::function<void(Model*)> createModel,
std::function<bool(int)> isIgnored,
std::vector<MixedTypedExample>& examples) {
+ NNTRACE_APP(NNTRACE_PHASE_OVERALL, "executeMultithreadedOwnCompilation");
SCOPED_TRACE("MultithreadedOwnCompilation");
std::vector<std::thread> threads;
for (int i = 0; i < 10; i++) {
@@ -157,6 +178,7 @@
void executeMultithreadedSharedCompilation(std::function<void(Model*)> createModel,
std::function<bool(int)> isIgnored,
std::vector<MixedTypedExample>& examples) {
+ NNTRACE_APP(NNTRACE_PHASE_OVERALL, "executeMultithreadedSharedCompilation");
SCOPED_TRACE("MultithreadedSharedCompilation");
Model model;
Compilation compilation = createAndCompileModel(&model, createModel);