Add GPU draw stats to gfxinfo and GraphicsStatsService
Test: ran dumpsys with gfxinfo and graphicsstats
Change-Id: Id9950de87dc4343c6878baa6a6dd42fbc8aeddef
diff --git a/libs/hwui/FrameInfo.cpp b/libs/hwui/FrameInfo.cpp
index 71cc9a8..0698775 100644
--- a/libs/hwui/FrameInfo.cpp
+++ b/libs/hwui/FrameInfo.cpp
@@ -37,13 +37,14 @@
"FrameCompleted",
"DequeueBufferDuration",
"QueueBufferDuration",
+ "GpuCompleted",
};
static_assert((sizeof(FrameInfoNames) / sizeof(FrameInfoNames[0])) ==
static_cast<int>(FrameInfoIndex::NumIndexes),
"size mismatch: FrameInfoNames doesn't match the enum!");
-static_assert(static_cast<int>(FrameInfoIndex::NumIndexes) == 16,
+static_assert(static_cast<int>(FrameInfoIndex::NumIndexes) == 17,
"Must update value in FrameMetrics.java#FRAME_STATS_COUNT (and here)");
void FrameInfo::importUiThreadInfo(int64_t* info) {
diff --git a/libs/hwui/FrameInfo.h b/libs/hwui/FrameInfo.h
index b75192f..51674fb 100644
--- a/libs/hwui/FrameInfo.h
+++ b/libs/hwui/FrameInfo.h
@@ -51,6 +51,8 @@
DequeueBufferDuration,
QueueBufferDuration,
+ GpuCompleted,
+
// Must be the last value!
// Also must be kept in sync with FrameMetrics.java#FRAME_STATS_COUNT
NumIndexes
@@ -143,6 +145,13 @@
return duration(FrameInfoIndex::IntendedVsync, FrameInfoIndex::FrameCompleted);
}
+ inline int64_t gpuDrawTime() const {
+ // GPU start time is approximated to the moment before swapBuffer is invoked.
+ // We could add an EGLSyncKHR fence at the beginning of the frame, but that is an overhead.
+ int64_t endTime = get(FrameInfoIndex::GpuCompleted);
+ return endTime > 0 ? endTime - get(FrameInfoIndex::SwapBuffers) : -1;
+ }
+
inline int64_t& set(FrameInfoIndex index) { return mFrameInfo[static_cast<int>(index)]; }
inline int64_t get(FrameInfoIndex index) const {
diff --git a/libs/hwui/JankTracker.cpp b/libs/hwui/JankTracker.cpp
index 53c5ad8..eae3584 100644
--- a/libs/hwui/JankTracker.cpp
+++ b/libs/hwui/JankTracker.cpp
@@ -232,5 +232,13 @@
: FrameInfoIndex::IntendedVsync;
}
+void JankTracker::finishGpuDraw(const FrameInfo& frame) {
+ int64_t totalGPUDrawTime = frame.gpuDrawTime();
+ if (totalGPUDrawTime >= 0) {
+ mData->reportGPUFrame(totalGPUDrawTime);
+ (*mGlobalData)->reportGPUFrame(totalGPUDrawTime);
+ }
+}
+
} /* namespace uirenderer */
} /* namespace android */
diff --git a/libs/hwui/JankTracker.h b/libs/hwui/JankTracker.h
index 110211e..08059268 100644
--- a/libs/hwui/JankTracker.h
+++ b/libs/hwui/JankTracker.h
@@ -58,6 +58,7 @@
FrameInfo* startFrame() { return &mFrames.next(); }
void finishFrame(const FrameInfo& frame);
+ void finishGpuDraw(const FrameInfo& frame);
void dumpStats(int fd) { dumpData(fd, &mDescription, mData.get()); }
void dumpFrames(int fd);
diff --git a/libs/hwui/ProfileData.cpp b/libs/hwui/ProfileData.cpp
index c7f9232..7921662 100644
--- a/libs/hwui/ProfileData.cpp
+++ b/libs/hwui/ProfileData.cpp
@@ -98,6 +98,10 @@
if (mStatStartTime > other.mStatStartTime || mStatStartTime == 0) {
mStatStartTime = other.mStatStartTime;
}
+ for (size_t i = 0; i < other.mGPUFrameCounts.size(); i++) {
+ mGPUFrameCounts[i] >>= divider;
+ mGPUFrameCounts[i] += other.mGPUFrameCounts[i];
+ }
}
void ProfileData::dump(int fd) const {
@@ -117,6 +121,14 @@
histogramForEach([fd](HistogramEntry entry) {
dprintf(fd, " %ums=%u", entry.renderTimeMs, entry.frameCount);
});
+ dprintf(fd, "\n50th gpu percentile: %ums", findGPUPercentile(50));
+ dprintf(fd, "\n90th gpu percentile: %ums", findGPUPercentile(90));
+ dprintf(fd, "\n95th gpu percentile: %ums", findGPUPercentile(95));
+ dprintf(fd, "\n99th gpu percentile: %ums", findGPUPercentile(99));
+ dprintf(fd, "\nGPU HISTOGRAM:");
+ histogramGPUForEach([fd](HistogramEntry entry) {
+ dprintf(fd, " %ums=%u", entry.renderTimeMs, entry.frameCount);
+ });
}
uint32_t ProfileData::findPercentile(int percentile) const {
@@ -140,6 +152,7 @@
void ProfileData::reset() {
mJankTypeCounts.fill(0);
mFrameCounts.fill(0);
+ mGPUFrameCounts.fill(0);
mSlowFrameCounts.fill(0);
mTotalFrameCount = 0;
mJankFrameCount = 0;
@@ -167,5 +180,40 @@
}
}
+uint32_t ProfileData::findGPUPercentile(int percentile) const {
+ uint32_t totalGPUFrameCount = 0; // this is usually mTotalFrameCount - 3.
+ for (int i = mGPUFrameCounts.size() - 1; i >= 0; i--) {
+ totalGPUFrameCount += mGPUFrameCounts[i];
+ }
+ int pos = percentile * totalGPUFrameCount / 100;
+ int remaining = totalGPUFrameCount - pos;
+ for (int i = mGPUFrameCounts.size() - 1; i >= 0; i--) {
+ remaining -= mGPUFrameCounts[i];
+ if (remaining <= 0) {
+ return GPUFrameTimeForFrameCountIndex(i);
+ }
+ }
+ return 0;
+}
+
+uint32_t ProfileData::GPUFrameTimeForFrameCountIndex(uint32_t index) {
+ return index != 25 ? index + 1 : 4950;
+}
+
+void ProfileData::reportGPUFrame(int64_t duration) {
+ uint32_t index = static_cast<uint32_t>(ns2ms(duration));
+ if (index > 25) {
+ index = 25;
+ }
+
+ mGPUFrameCounts[index]++;
+}
+
+void ProfileData::histogramGPUForEach(const std::function<void(HistogramEntry)>& callback) const {
+ for (size_t i = 0; i < mGPUFrameCounts.size(); i++) {
+ callback(HistogramEntry{GPUFrameTimeForFrameCountIndex(i), mGPUFrameCounts[i]});
+ }
+}
+
} /* namespace uirenderer */
} /* namespace android */
\ No newline at end of file
diff --git a/libs/hwui/ProfileData.h b/libs/hwui/ProfileData.h
index 564920b..ccbffc6 100644
--- a/libs/hwui/ProfileData.h
+++ b/libs/hwui/ProfileData.h
@@ -54,8 +54,10 @@
void mergeWith(const ProfileData& other);
void dump(int fd) const;
uint32_t findPercentile(int percentile) const;
+ uint32_t findGPUPercentile(int percentile) const;
void reportFrame(int64_t duration);
+ void reportGPUFrame(int64_t duration);
void reportJank() { mJankFrameCount++; }
void reportJankType(JankType type) { mJankTypeCounts[static_cast<int>(type)]++; }
@@ -69,15 +71,21 @@
uint32_t frameCount;
};
void histogramForEach(const std::function<void(HistogramEntry)>& callback) const;
+ void histogramGPUForEach(const std::function<void(HistogramEntry)>& callback) const;
constexpr static int HistogramSize() {
return std::tuple_size<decltype(ProfileData::mFrameCounts)>::value +
std::tuple_size<decltype(ProfileData::mSlowFrameCounts)>::value;
}
+ constexpr static int GPUHistogramSize() {
+ return std::tuple_size<decltype(ProfileData::mGPUFrameCounts)>::value;
+ }
+
// Visible for testing
static uint32_t frameTimeForFrameCountIndex(uint32_t index);
static uint32_t frameTimeForSlowFrameCountIndex(uint32_t index);
+ static uint32_t GPUFrameTimeForFrameCountIndex(uint32_t index);
private:
// Open our guts up to unit tests
@@ -88,6 +96,9 @@
std::array<uint32_t, 57> mFrameCounts;
// Holds a histogram of frame times in 50ms increments from 150ms to 5s
std::array<uint16_t, 97> mSlowFrameCounts;
+ // Holds a histogram of GPU draw times in 1ms increments. Frames longer than 25ms are placed in
+ // last bucket.
+ std::array<uint32_t, 26> mGPUFrameCounts;
uint32_t mTotalFrameCount;
uint32_t mJankFrameCount;
diff --git a/libs/hwui/protos/graphicsstats.proto b/libs/hwui/protos/graphicsstats.proto
index 1226d44..0cd5c62 100644
--- a/libs/hwui/protos/graphicsstats.proto
+++ b/libs/hwui/protos/graphicsstats.proto
@@ -46,6 +46,9 @@
// The frame time histogram for the package
repeated GraphicsStatsHistogramBucketProto histogram = 6;
+
+ // The gpu frame time histogram for the package
+ repeated GraphicsStatsHistogramBucketProto gpu_histogram = 7;
}
message GraphicsStatsJankSummaryProto {
diff --git a/libs/hwui/renderthread/CanvasContext.cpp b/libs/hwui/renderthread/CanvasContext.cpp
index d19351b..88a0c6e 100644
--- a/libs/hwui/renderthread/CanvasContext.cpp
+++ b/libs/hwui/renderthread/CanvasContext.cpp
@@ -147,6 +147,7 @@
mNativeSurface = new ReliableSurface{std::move(surface)};
// TODO: Fix error handling & re-shorten timeout
mNativeSurface->setDequeueTimeout(4000_ms);
+ mNativeSurface->enableFrameTimestamps(true);
} else {
mNativeSurface = nullptr;
}
@@ -294,6 +295,7 @@
// just keep using the previous frame's structure instead
if (!wasSkipped(mCurrentFrameInfo)) {
mCurrentFrameInfo = mJankTracker.startFrame();
+ mLast4FrameInfos.next().first = mCurrentFrameInfo;
}
mCurrentFrameInfo->importUiThreadInfo(uiFrameInfo);
mCurrentFrameInfo->set(FrameInfoIndex::SyncQueued) = syncQueued;
@@ -445,7 +447,7 @@
mContentDrawBounds, mOpaque, mLightInfo, mRenderNodes,
&(profiler()));
- int64_t frameCompleteNr = mFrameCompleteCallbacks.size() ? getFrameNumber() : -1;
+ int64_t frameCompleteNr = getFrameNumber();
waitOnFences();
@@ -500,11 +502,13 @@
}
mCurrentFrameInfo->set(FrameInfoIndex::DequeueBufferDuration) = swap.dequeueDuration;
mCurrentFrameInfo->set(FrameInfoIndex::QueueBufferDuration) = swap.queueDuration;
+ mLast4FrameInfos[-1].second = frameCompleteNr;
mHaveNewSurface = false;
mFrameNumber = -1;
} else {
mCurrentFrameInfo->set(FrameInfoIndex::DequeueBufferDuration) = 0;
mCurrentFrameInfo->set(FrameInfoIndex::QueueBufferDuration) = 0;
+ mLast4FrameInfos[-1].second = -1;
}
// TODO: Use a fence for real completion?
@@ -537,6 +541,19 @@
mFrameMetricsReporter->reportFrameMetrics(mCurrentFrameInfo->data());
}
+ if (mLast4FrameInfos.size() == mLast4FrameInfos.capacity()) {
+ // By looking 4 frames back, we guarantee all SF stats are available. There are at
+ // most 3 buffers in BufferQueue. Surface object keeps stats for the last 8 frames.
+ FrameInfo* forthBehind = mLast4FrameInfos.front().first;
+ int64_t composedFrameId = mLast4FrameInfos.front().second;
+ nsecs_t acquireTime = -1;
+ mNativeSurface->getFrameTimestamps(composedFrameId, nullptr, &acquireTime, nullptr, nullptr,
+ nullptr, nullptr, nullptr, nullptr, nullptr);
+ // Ignore default -1, NATIVE_WINDOW_TIMESTAMP_INVALID and NATIVE_WINDOW_TIMESTAMP_PENDING
+ forthBehind->set(FrameInfoIndex::GpuCompleted) = acquireTime > 0 ? acquireTime : -1;
+ mJankTracker.finishGpuDraw(*forthBehind);
+ }
+
GpuMemoryTracker::onFrameCompleted();
}
diff --git a/libs/hwui/renderthread/CanvasContext.h b/libs/hwui/renderthread/CanvasContext.h
index f9b9310..8a76d6b 100644
--- a/libs/hwui/renderthread/CanvasContext.h
+++ b/libs/hwui/renderthread/CanvasContext.h
@@ -29,6 +29,7 @@
#include "RenderNode.h"
#include "renderthread/RenderTask.h"
#include "renderthread/RenderThread.h"
+#include "utils/RingBuffer.h"
#include <SkBitmap.h>
#include <SkRect.h>
@@ -41,6 +42,7 @@
#include <future>
#include <set>
#include <string>
+#include <utility>
#include <vector>
namespace android {
@@ -260,6 +262,7 @@
std::vector<sp<RenderNode>> mRenderNodes;
FrameInfo* mCurrentFrameInfo = nullptr;
+ RingBuffer<std::pair<FrameInfo*, int64_t>, 4> mLast4FrameInfos;
std::string mName;
JankTracker mJankTracker;
FrameInfoVisualizer mProfiler;
diff --git a/libs/hwui/renderthread/ReliableSurface.h b/libs/hwui/renderthread/ReliableSurface.h
index 41fc35e..7f1a078 100644
--- a/libs/hwui/renderthread/ReliableSurface.h
+++ b/libs/hwui/renderthread/ReliableSurface.h
@@ -49,6 +49,21 @@
return ret;
}
+ status_t getFrameTimestamps(uint64_t frameNumber,
+ nsecs_t* outRequestedPresentTime, nsecs_t* outAcquireTime,
+ nsecs_t* outLatchTime, nsecs_t* outFirstRefreshStartTime,
+ nsecs_t* outLastRefreshStartTime, nsecs_t* outGlCompositionDoneTime,
+ nsecs_t* outDisplayPresentTime, nsecs_t* outDequeueReadyTime,
+ nsecs_t* outReleaseTime) {
+ return mSurface->getFrameTimestamps(frameNumber, outRequestedPresentTime, outAcquireTime,
+ outLatchTime, outFirstRefreshStartTime, outLastRefreshStartTime,
+ outGlCompositionDoneTime, outDisplayPresentTime, outDequeueReadyTime, outReleaseTime);
+ }
+
+ void enableFrameTimestamps(bool enable) {
+ return mSurface->enableFrameTimestamps(enable);
+ }
+
private:
const sp<Surface> mSurface;
diff --git a/libs/hwui/service/GraphicsStatsService.cpp b/libs/hwui/service/GraphicsStatsService.cpp
index 8a16b20..8b5912b 100644
--- a/libs/hwui/service/GraphicsStatsService.cpp
+++ b/libs/hwui/service/GraphicsStatsService.cpp
@@ -40,6 +40,7 @@
static_assert(sizeof(sCurrentFileVersion) == sHeaderSize, "Header size is wrong");
constexpr int sHistogramSize = ProfileData::HistogramSize();
+constexpr int sGPUHistogramSize = ProfileData::GPUHistogramSize();
static bool mergeProfileDataIntoProto(protos::GraphicsStatsProto* proto,
const std::string& package, int64_t versionCode,
@@ -211,6 +212,37 @@
bucket->set_frame_count(bucket->frame_count() + entry.frameCount);
index++;
});
+ if (hitMergeError) return false;
+ // fill in GPU frame time histogram
+ creatingHistogram = false;
+ if (proto->gpu_histogram_size() == 0) {
+ proto->mutable_gpu_histogram()->Reserve(sGPUHistogramSize);
+ creatingHistogram = true;
+ } else if (proto->gpu_histogram_size() != sGPUHistogramSize) {
+ ALOGE("GPU histogram size mismatch, proto is %d expected %d", proto->gpu_histogram_size(),
+ sGPUHistogramSize);
+ return false;
+ }
+ index = 0;
+ data->histogramGPUForEach([&](ProfileData::HistogramEntry entry) {
+ if (hitMergeError) return;
+
+ protos::GraphicsStatsHistogramBucketProto* bucket;
+ if (creatingHistogram) {
+ bucket = proto->add_gpu_histogram();
+ bucket->set_render_millis(entry.renderTimeMs);
+ } else {
+ bucket = proto->mutable_gpu_histogram(index);
+ if (bucket->render_millis() != static_cast<int32_t>(entry.renderTimeMs)) {
+ ALOGW("GPU frame time mistmatch %d vs. %u", bucket->render_millis(),
+ entry.renderTimeMs);
+ hitMergeError = true;
+ return;
+ }
+ }
+ bucket->set_frame_count(bucket->frame_count() + entry.frameCount);
+ index++;
+ });
return !hitMergeError;
}
@@ -226,6 +258,22 @@
return 0;
}
+static int32_t findGPUPercentile(protos::GraphicsStatsProto* proto, int percentile) {
+ uint32_t totalGPUFrameCount = 0; // this is usually proto->summary().total_frames() - 3.
+ for (auto it = proto->gpu_histogram().rbegin(); it != proto->gpu_histogram().rend(); ++it) {
+ totalGPUFrameCount += it->frame_count();
+ }
+ int32_t pos = percentile * totalGPUFrameCount / 100;
+ int32_t remaining = totalGPUFrameCount - pos;
+ for (auto it = proto->gpu_histogram().rbegin(); it != proto->gpu_histogram().rend(); ++it) {
+ remaining -= it->frame_count();
+ if (remaining <= 0) {
+ return it->render_millis();
+ }
+ }
+ return 0;
+}
+
void dumpAsTextToFd(protos::GraphicsStatsProto* proto, int fd) {
// This isn't a full validation, just enough that we can deref at will
if (proto->package_name().empty() || !proto->has_summary()) {
@@ -255,6 +303,14 @@
for (const auto& it : proto->histogram()) {
dprintf(fd, " %dms=%d", it.render_millis(), it.frame_count());
}
+ dprintf(fd, "\n50th gpu percentile: %dms", findGPUPercentile(proto, 50));
+ dprintf(fd, "\n90th gpu percentile: %dms", findGPUPercentile(proto, 90));
+ dprintf(fd, "\n95th gpu percentile: %dms", findGPUPercentile(proto, 95));
+ dprintf(fd, "\n99th gpu percentile: %dms", findGPUPercentile(proto, 99));
+ dprintf(fd, "\nGPU HISTOGRAM:");
+ for (const auto& it : proto->gpu_histogram()) {
+ dprintf(fd, " %dms=%d", it.render_millis(), it.frame_count());
+ }
dprintf(fd, "\n");
}