| // Copyright 2022 The Android Open Source Project |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #include "AstcTexture.h" |
| |
| #include <atomic> |
| #include <chrono> |
| #include <cstring> |
| #include <optional> |
| #include <vector> |
| |
| #include "aemu/base/HealthMonitor.h" |
| #include "host-common/logging.h" |
| #include "vulkan/vk_util.h" |
| |
| namespace gfxstream { |
| namespace vk { |
| namespace { |
| |
| using std::chrono::milliseconds; |
| |
| // Print stats each time we decompress this many pixels: |
| constexpr uint64_t kProcessedPixelsLogInterval = 10'000'000; |
| |
| std::atomic<uint64_t> pixels_processed = 0; |
| std::atomic<uint64_t> ms_elapsed = 0; |
| std::atomic<int64_t> bytes_used = 0; |
| |
| uint32_t mipmapSize(uint32_t size, uint32_t mipLevel) { |
| return std::max<uint32_t>(size >> mipLevel, 1); |
| } |
| |
| bool isRegionValid(const VkBufferImageCopy& region, uint32_t width, uint32_t height) { |
| // TODO(gregschlom) deal with those cases. See details at: |
| // https://registry.khronos.org/vulkan/specs/1.0-extensions/html/chap20.html#copies-buffers-images-addressing |
| // https://stackoverflow.com/questions/46501832/vulkan-vkbufferimagecopy-for-partial-transfer |
| |
| if (region.bufferRowLength != 0 || region.bufferImageHeight != 0) { |
| WARN("ASTC CPU decompression skipped: non-packed buffer"); |
| return false; |
| } |
| if (region.imageOffset.x != 0 || region.imageOffset.y != 0) { |
| WARN("ASTC CPU decompression skipped: imageOffset is non-zero"); |
| return false; |
| } |
| if (region.imageExtent.width != width || region.imageExtent.height != height) { |
| WARN("ASTC CPU decompression skipped: imageExtent is less than the entire image"); |
| return false; |
| } |
| return true; |
| } |
| |
| } // namespace |
| |
| AstcTexture::AstcTexture(VulkanDispatch* vk, VkDevice device, VkPhysicalDevice physicalDevice, |
| VkExtent3D imgSize, uint32_t blockWidth, uint32_t blockHeight, |
| AstcCpuDecompressor* decompressor) |
| : mVk(vk), |
| mDevice(device), |
| mPhysicalDevice(physicalDevice), |
| mImgSize(imgSize), |
| mBlockWidth(blockWidth), |
| mBlockHeight(blockHeight), |
| mDecompressor(decompressor) {} |
| |
| AstcTexture::~AstcTexture() { destroyVkBuffer(); } |
| |
| bool AstcTexture::canDecompressOnCpu() const { return mDecompressor->available(); } |
| |
| uint8_t* AstcTexture::createVkBufferAndMapMemory(size_t bufferSize) { |
| VkResult res; |
| mBufferSize = bufferSize; // Save the buffer size, for statistics purpose only |
| bytes_used += bufferSize; |
| |
| if (mDecompBuffer || mDecompBufferMemory) { |
| WARN("ASTC CPU decompression failed: tried to decompress same image more than once."); |
| return nullptr; |
| } |
| |
| VkBufferCreateInfo bufferInfo = { |
| .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, |
| .size = bufferSize, |
| .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT, |
| .sharingMode = VK_SHARING_MODE_EXCLUSIVE, |
| }; |
| res = mVk->vkCreateBuffer(mDevice, &bufferInfo, nullptr, &mDecompBuffer); |
| if (res != VK_SUCCESS) { |
| WARN("ASTC CPU decompression: vkCreateBuffer failed: %d", res); |
| mDecompBuffer = VK_NULL_HANDLE; |
| return nullptr; |
| } |
| |
| VkMemoryRequirements memRequirements; |
| mVk->vkGetBufferMemoryRequirements(mDevice, mDecompBuffer, &memRequirements); |
| |
| std::optional<uint32_t> memIndex = vk_util::findMemoryType( |
| mVk, mPhysicalDevice, memRequirements.memoryTypeBits, |
| VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | |
| VK_MEMORY_PROPERTY_HOST_CACHED_BIT); |
| if (!memIndex) { |
| // Do it again, but without VK_MEMORY_PROPERTY_HOST_CACHED_BIT this time |
| memIndex = vk_util::findMemoryType( |
| mVk, mPhysicalDevice, memRequirements.memoryTypeBits, |
| VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); |
| } |
| if (!memIndex) { |
| WARN("ASTC CPU decompression: no suitable memory type to decompress the image"); |
| return nullptr; |
| } |
| |
| VkMemoryAllocateInfo allocInfo = { |
| .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, |
| .allocationSize = memRequirements.size, |
| .memoryTypeIndex = *memIndex, |
| }; |
| res = mVk->vkAllocateMemory(mDevice, &allocInfo, nullptr, &mDecompBufferMemory); |
| if (res != VK_SUCCESS) { |
| WARN("ASTC CPU decompression: vkAllocateMemory failed: %d", res); |
| mDecompBufferMemory = VK_NULL_HANDLE; |
| return nullptr; |
| } |
| |
| res = mVk->vkBindBufferMemory(mDevice, mDecompBuffer, mDecompBufferMemory, 0); |
| if (res != VK_SUCCESS) { |
| WARN("ASTC CPU decompression: vkBindBufferMemory failed: %d", res); |
| return nullptr; |
| } |
| |
| uint8_t* decompData; |
| res = mVk->vkMapMemory(mDevice, mDecompBufferMemory, 0, bufferSize, 0, (void**)&decompData); |
| if (res != VK_SUCCESS) { |
| WARN("ASTC CPU decompression: vkMapMemory failed: %d", res); |
| return nullptr; |
| } |
| |
| return decompData; |
| } |
| |
| void AstcTexture::destroyVkBuffer() { |
| bytes_used -= mBufferSize; |
| if (mVk && mDevice) { |
| mVk->vkDestroyBuffer(mDevice, mDecompBuffer, nullptr); |
| mVk->vkFreeMemory(mDevice, mDecompBufferMemory, nullptr); |
| mDecompBuffer = VK_NULL_HANDLE; |
| mDecompBufferMemory = VK_NULL_HANDLE; |
| } |
| } |
| |
| template<typename T> |
| void AstcTexture::on_vkCmdCopyBufferToImageImpl(VkCommandBuffer commandBuffer, uint8_t* srcAstcData, |
| size_t astcDataSize, VkImage dstImage, |
| VkImageLayout dstImageLayout, uint32_t regionCount, |
| const T* pRegions, |
| const VkDecoderContext& context) { |
| auto watchdog = |
| WATCHDOG_BUILDER(context.healthMonitor, "AstcTexture::on_vkCmdCopyBufferToImageImpl").build(); |
| auto start_time = std::chrono::steady_clock::now(); |
| mSuccess = false; |
| size_t decompSize = 0; // How many bytes we need to hold the decompressed data |
| |
| // Holds extra data about the region |
| struct RegionInfo { |
| uint32_t width; // actual width (ie: mipmap width) |
| uint32_t height; // actual height (ie: mipmap height) |
| uint32_t compressedSize; // size of ASTC data for that region |
| }; |
| |
| std::vector<RegionInfo> regionInfos; |
| regionInfos.reserve(regionCount); |
| |
| // Make a copy of the regions and update the buffer offset of each to reflect the |
| // correct location of the decompressed data |
| std::vector<VkBufferImageCopy> decompRegions(regionCount); |
| for (size_t i = 0; i < regionCount; ++i) { |
| decompRegions[i] = VkBufferImageCopy { |
| pRegions[i].bufferOffset, |
| pRegions[i].bufferRowLength, |
| pRegions[i].bufferImageHeight, |
| pRegions[i].imageSubresource, |
| pRegions[i].imageOffset, |
| pRegions[i].imageExtent |
| }; |
| } |
| for (auto& decompRegion : decompRegions) { |
| const uint32_t mipLevel = decompRegion.imageSubresource.mipLevel; |
| const uint32_t width = mipmapSize(mImgSize.width, mipLevel); |
| const uint32_t height = mipmapSize(mImgSize.height, mipLevel); |
| const uint32_t numAstcBlocks = ((width + mBlockWidth - 1) / mBlockWidth) * |
| ((height + mBlockHeight - 1) / mBlockHeight); |
| const uint32_t compressedSize = numAstcBlocks * 16; |
| // We haven't updated decompRegion.bufferOffset yet, so it's still the _compressed_ offset. |
| const uint32_t compressedDataOffset = decompRegion.bufferOffset; |
| |
| // Do all the precondition checks |
| if (!isRegionValid(decompRegion, width, height)) return; |
| if (compressedDataOffset + compressedSize > astcDataSize) { |
| WARN("ASTC CPU decompression: data out of bounds. Offset: %llu, Size: %llu, Total %llu", |
| compressedDataOffset, compressedSize, astcDataSize); |
| return; |
| } |
| |
| decompRegion.bufferOffset = decompSize; |
| decompSize += width * height * 4; |
| regionInfos.push_back({width, height, compressedSize}); |
| } |
| |
| // Create a new VkBuffer to hold the decompressed data |
| uint8_t* decompData = createVkBufferAndMapMemory(decompSize); |
| if (!decompData) { |
| destroyVkBuffer(); // The destructor would have done it anyway, but may as well do it early |
| return; |
| } |
| |
| // Decompress each region |
| for (int i = 0; i < regionCount; i++) { |
| const auto& compRegion = pRegions[i]; |
| const auto& decompRegion = decompRegions[i]; |
| const auto& regionInfo = regionInfos[i]; |
| |
| int32_t status = mDecompressor->decompress( |
| regionInfo.width, regionInfo.height, mBlockWidth, mBlockHeight, |
| srcAstcData + compRegion.bufferOffset, regionInfo.compressedSize, |
| decompData + decompRegion.bufferOffset); |
| |
| if (status != 0) { |
| WARN("ASTC CPU decompression failed: %s.", mDecompressor->getStatusString(status)); |
| mVk->vkUnmapMemory(mDevice, mDecompBufferMemory); |
| destroyVkBuffer(); |
| return; |
| } |
| } |
| |
| mVk->vkUnmapMemory(mDevice, mDecompBufferMemory); |
| |
| // Finally, actually copy the buffer to the image |
| mVk->vkCmdCopyBufferToImage(commandBuffer, mDecompBuffer, dstImage, dstImageLayout, |
| decompRegions.size(), decompRegions.data()); |
| |
| mSuccess = true; |
| auto end_time = std::chrono::steady_clock::now(); |
| |
| // Compute stats |
| pixels_processed += decompSize / 4; |
| ms_elapsed += std::chrono::duration_cast<milliseconds>(end_time - start_time).count(); |
| |
| uint64_t total_pixels = pixels_processed.load(); |
| uint64_t total_time = ms_elapsed.load(); |
| |
| if (total_pixels >= kProcessedPixelsLogInterval && total_time > 0) { |
| pixels_processed.store(0); |
| ms_elapsed.store(0); |
| INFO("ASTC CPU decompression: %.2f Mpix in %.2f seconds (%.2f Mpix/s). Total mem: %.2f MB", |
| total_pixels / 1'000'000.0, total_time / 1000.0, |
| (float)total_pixels / total_time / 1000.0, bytes_used / 1000000.0); |
| } |
| } |
| |
| void AstcTexture::on_vkCmdCopyBufferToImage(VkCommandBuffer commandBuffer, uint8_t* srcAstcData, |
| size_t astcDataSize, VkImage dstImage, |
| VkImageLayout dstImageLayout, uint32_t regionCount, |
| const VkBufferImageCopy* pRegions, |
| const VkDecoderContext& context) { |
| on_vkCmdCopyBufferToImageImpl(commandBuffer, srcAstcData, astcDataSize, dstImage, dstImageLayout, regionCount, pRegions, context); |
| } |
| |
| void AstcTexture::on_vkCmdCopyBufferToImage2(VkCommandBuffer commandBuffer, uint8_t* srcAstcData, |
| size_t astcDataSize, const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo, |
| const VkDecoderContext& context) { |
| on_vkCmdCopyBufferToImageImpl(commandBuffer, |
| srcAstcData, |
| astcDataSize, |
| pCopyBufferToImageInfo->dstImage, |
| pCopyBufferToImageInfo->dstImageLayout, |
| pCopyBufferToImageInfo->regionCount, |
| pCopyBufferToImageInfo->pRegions, |
| context); |
| } |
| |
| } // namespace vk |
| } // namespace gfxstream |