| /* |
| * Copyright (C) 2021 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #define LOG_TAG "TelemetryStatsd" |
| |
| #include "TelemetryStatsd.h" |
| |
| #include <android-base/logging.h> |
| #include <android-base/no_destructor.h> |
| #include <statslog_neuralnetworks.h> |
| |
| #include <algorithm> |
| #include <limits> |
| #include <map> |
| #include <mutex> |
| #include <queue> |
| #include <string> |
| #include <thread> |
| #include <vector> |
| |
| #include "FeatureLevel.h" |
| #include "Telemetry.h" |
| #include "Tracing.h" |
| |
| namespace android::nn::telemetry { |
| namespace { |
| |
| constexpr uint64_t kNoTimeReportedRuntime = std::numeric_limits<uint64_t>::max(); |
| constexpr int64_t kNoTimeReportedStatsd = std::numeric_limits<int64_t>::max(); |
| constexpr size_t kInitialChannelSize = 100; |
| |
| // Statsd specifies that "Atom logging frequency should not exceed once per 10 milliseconds (i.e. |
| // consecutive atom calls should be at least 10 milliseconds apart)." A quiet period of 100ms is |
| // chosen here to reduce the chance that the NNAPI logs too frequently, even from separate |
| // applications. |
| constexpr auto kMinimumLoggingQuietPeriod = std::chrono::milliseconds(100); |
| |
| int32_t getUid() { |
| static const int32_t uid = getuid(); |
| return uid; |
| } |
| |
| constexpr int64_t nanosToMillis(uint64_t time) { |
| constexpr uint64_t kNanosPerMilli = 1'000'000; |
| return time == kNoTimeReportedRuntime ? kNoTimeReportedStatsd : time / kNanosPerMilli; |
| } |
| |
| constexpr int64_t nanosToMicros(uint64_t time) { |
| constexpr uint64_t kNanosPerMicro = 1'000; |
| return time == kNoTimeReportedRuntime ? kNoTimeReportedStatsd : time / kNanosPerMicro; |
| } |
| |
| AtomValue::AccumulatedTiming accumulatedTimingFrom(int64_t timing) { |
| if (timing == kNoTimeReportedStatsd) { |
| return {}; |
| } |
| return { |
| .sumTime = timing, |
| .minTime = timing, |
| .maxTime = timing, |
| .sumSquaredTime = timing * timing, |
| .count = 1, |
| }; |
| } |
| |
| void combineAccumulatedTiming(AtomValue::AccumulatedTiming* accumulatedTime, |
| const AtomValue::AccumulatedTiming& timing) { |
| if (timing.count == 0) { |
| return; |
| } |
| accumulatedTime->sumTime += timing.sumTime; |
| accumulatedTime->minTime = std::min(accumulatedTime->minTime, timing.minTime); |
| accumulatedTime->maxTime = std::max(accumulatedTime->maxTime, timing.maxTime); |
| accumulatedTime->sumSquaredTime += timing.sumSquaredTime; |
| accumulatedTime->count += timing.count; |
| } |
| |
| stats::BytesField makeBytesField(const ModelArchHash& modelArchHash) { |
| return stats::BytesField(reinterpret_cast<const char*>(modelArchHash.data()), |
| modelArchHash.size()); |
| } |
| |
| ModelArchHash makeModelArchHash(const uint8_t* modelArchHash) { |
| ModelArchHash output; |
| std::memcpy(output.data(), modelArchHash, BYTE_SIZE_OF_MODEL_ARCH_HASH); |
| return output; |
| } |
| |
| #define STATIC_ASSERT_DATA_CLASS_EQ_VALUE(type, inout, value) \ |
| static_assert(static_cast<int32_t>(DataClass::value) == \ |
| stats::NEURAL_NETWORKS_##type##__##inout##_DATA_CLASS__DATA_CLASS_##value) |
| |
| #define STATIC_ASSERT_DATA_CLASS_EQ(type, inout) \ |
| STATIC_ASSERT_DATA_CLASS_EQ_VALUE(type, inout, UNKNOWN); \ |
| STATIC_ASSERT_DATA_CLASS_EQ_VALUE(type, inout, OTHER); \ |
| STATIC_ASSERT_DATA_CLASS_EQ_VALUE(type, inout, FLOAT32); \ |
| STATIC_ASSERT_DATA_CLASS_EQ_VALUE(type, inout, FLOAT16); \ |
| STATIC_ASSERT_DATA_CLASS_EQ_VALUE(type, inout, QUANT); \ |
| STATIC_ASSERT_DATA_CLASS_EQ_VALUE(type, inout, MIXED) |
| |
| STATIC_ASSERT_DATA_CLASS_EQ(COMPILATION_COMPLETED, INPUT); |
| STATIC_ASSERT_DATA_CLASS_EQ(COMPILATION_COMPLETED, OUTPUT); |
| STATIC_ASSERT_DATA_CLASS_EQ(COMPILATION_FAILED, INPUT); |
| STATIC_ASSERT_DATA_CLASS_EQ(COMPILATION_FAILED, OUTPUT); |
| STATIC_ASSERT_DATA_CLASS_EQ(EXECUTION_COMPLETED, INPUT); |
| STATIC_ASSERT_DATA_CLASS_EQ(EXECUTION_COMPLETED, OUTPUT); |
| STATIC_ASSERT_DATA_CLASS_EQ(EXECUTION_FAILED, INPUT); |
| STATIC_ASSERT_DATA_CLASS_EQ(EXECUTION_FAILED, OUTPUT); |
| |
| #define STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, value) \ |
| static_assert(ANEURALNETWORKS_##value == \ |
| stats::NEURAL_NETWORKS_##type##__ERROR_CODE__RESULT_CODE_##value) |
| |
| #define STATIC_ASSERT_RESULT_CODE_EQ(type) \ |
| STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, NO_ERROR); \ |
| STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, OUT_OF_MEMORY); \ |
| STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, INCOMPLETE); \ |
| STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, UNEXPECTED_NULL); \ |
| STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, BAD_DATA); \ |
| STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, OP_FAILED); \ |
| STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, BAD_STATE); \ |
| STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, UNMAPPABLE); \ |
| STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, OUTPUT_INSUFFICIENT_SIZE); \ |
| STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, UNAVAILABLE_DEVICE); \ |
| STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, MISSED_DEADLINE_TRANSIENT); \ |
| STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, MISSED_DEADLINE_PERSISTENT); \ |
| STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, RESOURCE_EXHAUSTED_TRANSIENT); \ |
| STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, RESOURCE_EXHAUSTED_PERSISTENT); \ |
| STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, DEAD_OBJECT) |
| |
| STATIC_ASSERT_RESULT_CODE_EQ(COMPILATION_FAILED); |
| STATIC_ASSERT_RESULT_CODE_EQ(EXECUTION_FAILED); |
| |
| #undef STATIC_ASSERT_DATA_CLASS_EQ_VALUE |
| #undef STATIC_ASSERT_DATA_CLASS_EQ |
| #undef STATIC_ASSERT_RESULT_CODE_EQ_VALUE |
| #undef STATIC_ASSERT_RESULT_CODE_EQ |
| |
| int32_t convertDataClass(DataClass dataClass) { |
| switch (dataClass) { |
| case DataClass::UNKNOWN: |
| case DataClass::OTHER: |
| case DataClass::FLOAT32: |
| case DataClass::FLOAT16: |
| case DataClass::QUANT: |
| case DataClass::MIXED: |
| return static_cast<int32_t>(dataClass); |
| } |
| return static_cast<int32_t>(DataClass::UNKNOWN); |
| } |
| |
| int32_t convertExecutionMode(ExecutionMode executionMode) { |
| switch (executionMode) { |
| case ExecutionMode::ASYNC: |
| return stats::NEURAL_NETWORKS_EXECUTION_FAILED__MODE__MODE_ASYNC; |
| case ExecutionMode::SYNC: |
| return stats::NEURAL_NETWORKS_EXECUTION_FAILED__MODE__MODE_SYNC; |
| case ExecutionMode::BURST: |
| return stats::NEURAL_NETWORKS_EXECUTION_FAILED__MODE__MODE_BURST; |
| case ExecutionMode::ASYNC_WITH_DEPS: |
| return stats::NEURAL_NETWORKS_EXECUTION_FAILED__MODE__MODE_ASYNC_WITH_DEPS; |
| } |
| return stats::NEURAL_NETWORKS_EXECUTION_FAILED__MODE__MODE_UNKNOWN; |
| } |
| |
| int32_t convertResultCode(int32_t resultCode) { |
| return resultCode >= ANEURALNETWORKS_NO_ERROR && resultCode <= ANEURALNETWORKS_DEAD_OBJECT |
| ? resultCode |
| : ANEURALNETWORKS_OP_FAILED; |
| } |
| |
| int64_t compressTo64(const ModelArchHash& modelArchHash) { |
| int64_t hash = 0; |
| const char* data = reinterpret_cast<const char*>(modelArchHash.data()); |
| for (size_t i = 0; i + sizeof(int64_t) <= sizeof(ModelArchHash); i += sizeof(int64_t)) { |
| int64_t tmp = 0; |
| std::memcpy(&tmp, data + i, sizeof(int64_t)); |
| hash ^= tmp; |
| } |
| return hash; |
| } |
| |
| void logAtomToStatsd(Atom&& atom) { |
| NNTRACE_RT(NNTRACE_PHASE_UNSPECIFIED, "logAtomToStatsd"); |
| const auto& [key, value] = atom; |
| |
| const auto modelArchHash64 = compressTo64(key.modelArchHash); |
| |
| if (!key.isExecution) { |
| if (key.errorCode == ANEURALNETWORKS_NO_ERROR) { |
| stats::stats_write( |
| stats::NEURALNETWORKS_COMPILATION_COMPLETED, getUid(), getSessionId(), |
| kNnapiApexVersion, makeBytesField(key.modelArchHash), key.deviceId.c_str(), |
| convertDataClass(key.inputDataClass), convertDataClass(key.outputDataClass), |
| key.fallbackToCpuFromError, key.introspectionEnabled, key.cacheEnabled, |
| key.hasControlFlow, key.hasDynamicTemporaries, |
| value.compilationTimeMillis.sumTime, value.compilationTimeMillis.minTime, |
| value.compilationTimeMillis.maxTime, value.compilationTimeMillis.sumSquaredTime, |
| value.compilationTimeMillis.count, value.count, modelArchHash64); |
| } else { |
| stats::stats_write( |
| stats::NEURALNETWORKS_COMPILATION_FAILED, getUid(), getSessionId(), |
| kNnapiApexVersion, makeBytesField(key.modelArchHash), key.deviceId.c_str(), |
| convertDataClass(key.inputDataClass), convertDataClass(key.outputDataClass), |
| convertResultCode(key.errorCode), key.introspectionEnabled, key.cacheEnabled, |
| key.hasControlFlow, key.hasDynamicTemporaries, value.count, modelArchHash64); |
| } |
| } else { |
| if (key.errorCode == ANEURALNETWORKS_NO_ERROR) { |
| stats::stats_write( |
| stats::NEURALNETWORKS_EXECUTION_COMPLETED, getUid(), getSessionId(), |
| kNnapiApexVersion, makeBytesField(key.modelArchHash), key.deviceId.c_str(), |
| convertExecutionMode(key.executionMode), convertDataClass(key.inputDataClass), |
| convertDataClass(key.outputDataClass), key.introspectionEnabled, |
| key.cacheEnabled, key.hasControlFlow, key.hasDynamicTemporaries, |
| value.durationRuntimeMicros.sumTime, value.durationRuntimeMicros.minTime, |
| value.durationRuntimeMicros.maxTime, value.durationRuntimeMicros.sumSquaredTime, |
| value.durationRuntimeMicros.count, value.durationDriverMicros.sumTime, |
| value.durationDriverMicros.minTime, value.durationDriverMicros.maxTime, |
| value.durationDriverMicros.sumSquaredTime, value.durationDriverMicros.count, |
| value.durationHardwareMicros.sumTime, value.durationHardwareMicros.minTime, |
| value.durationHardwareMicros.maxTime, |
| value.durationHardwareMicros.sumSquaredTime, value.durationHardwareMicros.count, |
| value.count, modelArchHash64); |
| } else { |
| stats::stats_write( |
| stats::NEURALNETWORKS_EXECUTION_FAILED, getUid(), getSessionId(), |
| kNnapiApexVersion, makeBytesField(key.modelArchHash), key.deviceId.c_str(), |
| convertExecutionMode(key.executionMode), convertDataClass(key.inputDataClass), |
| convertDataClass(key.outputDataClass), convertResultCode(key.errorCode), |
| key.introspectionEnabled, key.cacheEnabled, key.hasControlFlow, |
| key.hasDynamicTemporaries, value.count, modelArchHash64); |
| } |
| } |
| } |
| |
| AsyncLogger& getStatsdLogger() { |
| static base::NoDestructor<AsyncLogger> logger(logAtomToStatsd, kMinimumLoggingQuietPeriod); |
| return *logger; |
| } |
| |
| constexpr auto asTuple(const AtomKey& v) { |
| return std::tie(v.isExecution, v.modelArchHash, v.deviceId, v.executionMode, v.errorCode, |
| v.inputDataClass, v.outputDataClass, v.fallbackToCpuFromError, |
| v.introspectionEnabled, v.cacheEnabled, v.hasControlFlow, |
| v.hasDynamicTemporaries); |
| }; |
| |
| } // namespace |
| |
| bool operator==(const AtomKey& lhs, const AtomKey& rhs) { |
| return asTuple(lhs) == asTuple(rhs); |
| } |
| |
| bool operator<(const AtomKey& lhs, const AtomKey& rhs) { |
| return asTuple(lhs) < asTuple(rhs); |
| } |
| |
| void combineAtomValues(AtomValue* accumulatedValue, const AtomValue& value) { |
| accumulatedValue->count += value.count; |
| combineAccumulatedTiming(&accumulatedValue->compilationTimeMillis, value.compilationTimeMillis); |
| combineAccumulatedTiming(&accumulatedValue->durationRuntimeMicros, value.durationRuntimeMicros); |
| combineAccumulatedTiming(&accumulatedValue->durationDriverMicros, value.durationDriverMicros); |
| combineAccumulatedTiming(&accumulatedValue->durationHardwareMicros, |
| value.durationHardwareMicros); |
| } |
| |
| bool AtomAggregator::empty() const { |
| return mOrder.empty(); |
| } |
| |
| void AtomAggregator::push(Atom&& atom) { |
| const AtomValue& value = atom.second; |
| if (const auto [it, inserted] = mAggregate.try_emplace(std::move(atom.first), value); |
| !inserted) { |
| combineAtomValues(&it->second, value); |
| } else { |
| mOrder.push(&it->first); |
| } |
| } |
| |
| std::pair<AtomKey, AtomValue> AtomAggregator::pop() { |
| CHECK(!empty()); |
| |
| // Find the key of the aggregated atom to log and remove it. |
| const AtomKey* key = mOrder.front(); |
| mOrder.pop(); |
| |
| // Find the value that corresponds to the key and remove the (key,value) from the map. |
| auto node = mAggregate.extract(*key); |
| CHECK(!node.empty()); |
| |
| return std::make_pair(std::move(node.key()), node.mapped()); |
| } |
| |
| AsyncLogger::AsyncLogger(LoggerFn logger, Duration loggingQuietPeriodDuration) { |
| mChannel.reserve(kInitialChannelSize); |
| mThread = std::thread([this, log = std::move(logger), loggingQuietPeriodDuration]() { |
| AtomAggregator data; |
| std::vector<Atom> atoms; |
| atoms.reserve(kInitialChannelSize); |
| |
| // Loop until the thread is being torn down. |
| while (true) { |
| // Get data if it's available. |
| const Result result = takeAll(&atoms, /*blockUntilDataIsAvailable=*/data.empty()); |
| if (result == Result::TEARDOWN) { |
| break; |
| } |
| |
| // Aggregate the data locally. |
| std::for_each(atoms.begin(), atoms.end(), |
| [&data](Atom& atom) { data.push(std::move(atom)); }); |
| atoms.clear(); |
| |
| // Log data if available and sleep. |
| if (!data.empty()) { |
| log(data.pop()); |
| const Result result = sleepFor(loggingQuietPeriodDuration); |
| if (result == Result::TEARDOWN) { |
| break; |
| } |
| } |
| } |
| }); |
| } |
| |
| void AsyncLogger::write(Atom&& atom) { |
| bool wasEmpty = false; |
| { |
| std::lock_guard hold(mMutex); |
| wasEmpty = mChannel.empty(); |
| mChannel.push_back(std::move(atom)); |
| } |
| if (wasEmpty) { |
| mNotEmptyOrTeardown.notify_one(); |
| } |
| } |
| |
| AsyncLogger::Result AsyncLogger::takeAll(std::vector<Atom>* output, |
| bool blockUntilDataIsAvailable) { |
| CHECK(output != nullptr); |
| CHECK(output->empty()); |
| const auto blockUntil = blockUntilDataIsAvailable ? TimePoint::max() : TimePoint{}; |
| std::unique_lock lock(mMutex); |
| mNotEmptyOrTeardown.wait_until( |
| lock, blockUntil, [this]() REQUIRES(mMutex) { return !mChannel.empty() || mTeardown; }); |
| std::swap(*output, mChannel); |
| return mTeardown ? Result::TEARDOWN : Result::SUCCESS; |
| } |
| |
| AsyncLogger::Result AsyncLogger::sleepFor(Duration duration) { |
| std::unique_lock lock(mMutex); |
| mNotEmptyOrTeardown.wait_for(lock, duration, [this]() REQUIRES(mMutex) { return mTeardown; }); |
| return mTeardown ? Result::TEARDOWN : Result::SUCCESS; |
| } |
| |
| AsyncLogger::~AsyncLogger() { |
| { |
| std::lock_guard hold(mMutex); |
| mTeardown = true; |
| } |
| mNotEmptyOrTeardown.notify_one(); |
| mThread.join(); |
| } |
| |
| Atom createAtomFrom(const DiagnosticCompilationInfo* info) { |
| Atom atom = Atom{ |
| AtomKey{ |
| .isExecution = false, |
| .modelArchHash = makeModelArchHash(info->modelArchHash), |
| .deviceId = info->deviceId, |
| .executionMode = ExecutionMode::SYNC, |
| .errorCode = info->errorCode, |
| .inputDataClass = info->inputDataClass, |
| .outputDataClass = info->outputDataClass, |
| .fallbackToCpuFromError = info->fallbackToCpuFromError, |
| .introspectionEnabled = info->introspectionEnabled, |
| .cacheEnabled = info->cacheEnabled, |
| .hasControlFlow = info->hasControlFlow, |
| .hasDynamicTemporaries = info->hasDynamicTemporaries, |
| }, |
| AtomValue{ |
| .count = 1, |
| }, |
| }; |
| |
| // Timing information is only relevant for the "Completed" path. |
| if (info->errorCode == ANEURALNETWORKS_NO_ERROR) { |
| auto& value = atom.second; |
| const auto compilationTimeMillis = nanosToMillis(info->compilationTimeNanos); |
| value.compilationTimeMillis = accumulatedTimingFrom(compilationTimeMillis); |
| } |
| |
| return atom; |
| } |
| |
| Atom createAtomFrom(const DiagnosticExecutionInfo* info) { |
| Atom atom = Atom{ |
| AtomKey{ |
| .isExecution = true, |
| .modelArchHash = makeModelArchHash(info->modelArchHash), |
| .deviceId = info->deviceId, |
| .executionMode = info->executionMode, |
| .errorCode = info->errorCode, |
| .inputDataClass = info->inputDataClass, |
| .outputDataClass = info->outputDataClass, |
| .fallbackToCpuFromError = false, |
| .introspectionEnabled = info->introspectionEnabled, |
| .cacheEnabled = info->cacheEnabled, |
| .hasControlFlow = info->hasControlFlow, |
| .hasDynamicTemporaries = info->hasDynamicTemporaries, |
| }, |
| AtomValue{ |
| .count = 1, |
| }, |
| }; |
| |
| // Timing information is only relevant for the "Completed" path. |
| if (info->errorCode == ANEURALNETWORKS_NO_ERROR) { |
| auto& value = atom.second; |
| const auto durationRuntimeMicros = nanosToMicros(info->durationRuntimeNanos); |
| const auto durationDriverMicros = nanosToMicros(info->durationDriverNanos); |
| const auto durationHardwareMicros = nanosToMicros(info->durationHardwareNanos); |
| value.durationRuntimeMicros = accumulatedTimingFrom(durationRuntimeMicros); |
| value.durationDriverMicros = accumulatedTimingFrom(durationDriverMicros); |
| value.durationHardwareMicros = accumulatedTimingFrom(durationHardwareMicros); |
| }; |
| |
| return atom; |
| } |
| |
| void logCompilationToStatsd(const DiagnosticCompilationInfo* info) { |
| NNTRACE_RT(NNTRACE_PHASE_UNSPECIFIED, "logCompilationStatsd"); |
| getStatsdLogger().write(createAtomFrom(info)); |
| } |
| |
| void logExecutionToStatsd(const DiagnosticExecutionInfo* info) { |
| NNTRACE_RT(NNTRACE_PHASE_UNSPECIFIED, "logExecutionStatsd"); |
| getStatsdLogger().write(createAtomFrom(info)); |
| } |
| |
| } // namespace android::nn::telemetry |