blob: e15387ea81179957045b2fb2cdf1a956ca83a7da [file] [log] [blame]
//
// Copyright 2021 The ANGLE Project Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// CLKernelVk.cpp: Implements the class methods for CLKernelVk.
#include "common/PackedEnums.h"
#include "libANGLE/renderer/vulkan/CLContextVk.h"
#include "libANGLE/renderer/vulkan/CLDeviceVk.h"
#include "libANGLE/renderer/vulkan/CLKernelVk.h"
#include "libANGLE/renderer/vulkan/CLMemoryVk.h"
#include "libANGLE/renderer/vulkan/CLProgramVk.h"
#include "libANGLE/renderer/vulkan/vk_wrapper.h"
#include "libANGLE/CLBuffer.h"
#include "libANGLE/CLContext.h"
#include "libANGLE/CLKernel.h"
#include "libANGLE/CLProgram.h"
#include "libANGLE/cl_utils.h"
#include "spirv/unified1/NonSemanticClspvReflection.h"
namespace rx
{
CLKernelVk::CLKernelVk(const cl::Kernel &kernel,
std::string &name,
std::string &attributes,
CLKernelArguments &args)
: CLKernelImpl(kernel),
mProgram(&kernel.getProgram().getImpl<CLProgramVk>()),
mContext(&kernel.getProgram().getContext().getImpl<CLContextVk>()),
mName(name),
mAttributes(attributes),
mArgs(args),
mPodBuffer(nullptr)
{
mShaderProgramHelper.setShader(gl::ShaderType::Compute,
mKernel.getProgram().getImpl<CLProgramVk>().getShaderModule());
}
CLKernelVk::~CLKernelVk()
{
mComputePipelineCache.destroy(mContext);
mShaderProgramHelper.destroy(mContext->getRenderer());
if (mPodBuffer)
{
// mPodBuffer assignment will make newly created buffer
// return refcount of 2, so need to release by 1
mPodBuffer->release();
}
}
angle::Result CLKernelVk::init()
{
const CLProgramVk::DeviceProgramData *deviceProgramData =
mProgram->getDeviceProgramData(mName.c_str());
// Literal sampler handling
for (const ClspvLiteralSampler &literalSampler :
deviceProgramData->reflectionData.literalSamplers)
{
mDescriptorSetLayoutDescs[DescriptorSetIndex::LiteralSampler].addBinding(
literalSampler.binding, VK_DESCRIPTOR_TYPE_SAMPLER, 1, VK_SHADER_STAGE_COMPUTE_BIT,
nullptr);
}
vk::DescriptorSetLayoutDesc &descriptorSetLayoutDesc =
mDescriptorSetLayoutDescs[DescriptorSetIndex::KernelArguments];
VkPushConstantRange pcRange = deviceProgramData->pushConstRange;
size_t podBufferSize = 0;
bool podFound = false;
for (const auto &arg : getArgs())
{
VkDescriptorType descType = VK_DESCRIPTOR_TYPE_MAX_ENUM;
switch (arg.type)
{
case NonSemanticClspvReflectionArgumentStorageBuffer:
descType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
break;
case NonSemanticClspvReflectionArgumentUniform:
case NonSemanticClspvReflectionArgumentPointerUniform:
descType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
break;
case NonSemanticClspvReflectionArgumentPodUniform:
case NonSemanticClspvReflectionArgumentPodStorageBuffer:
{
uint32_t newPodBufferSize = arg.podStorageBufferOffset + arg.podStorageBufferSize;
podBufferSize = newPodBufferSize > podBufferSize ? newPodBufferSize : podBufferSize;
if (podFound)
{
continue;
}
descType = arg.type == NonSemanticClspvReflectionArgumentPodUniform
? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER
: VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
podFound = true;
break;
}
case NonSemanticClspvReflectionArgumentPodPushConstant:
// Get existing push constant range and see if we need to update
if (arg.pushConstOffset + arg.pushConstantSize > pcRange.offset + pcRange.size)
{
pcRange.size = arg.pushConstOffset + arg.pushConstantSize - pcRange.offset;
}
continue;
case NonSemanticClspvReflectionArgumentSampledImage:
descType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
break;
case NonSemanticClspvReflectionArgumentStorageImage:
descType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
break;
case NonSemanticClspvReflectionArgumentSampler:
descType = VK_DESCRIPTOR_TYPE_SAMPLER;
break;
case NonSemanticClspvReflectionArgumentStorageTexelBuffer:
descType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
break;
case NonSemanticClspvReflectionArgumentUniformTexelBuffer:
descType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
break;
default:
continue;
}
if (descType != VK_DESCRIPTOR_TYPE_MAX_ENUM)
{
descriptorSetLayoutDesc.addBinding(arg.descriptorBinding, descType, 1,
VK_SHADER_STAGE_COMPUTE_BIT, nullptr);
}
}
if (podBufferSize > 0)
{
mPodBuffer =
cl::MemoryPtr(cl::Buffer::Cast(this->mContext->getFrontendObject().createBuffer(
nullptr, cl::MemFlags(CL_MEM_READ_ONLY), podBufferSize, nullptr)));
}
if (usesPrintf())
{
mDescriptorSetLayoutDescs[DescriptorSetIndex::Printf].addBinding(
deviceProgramData->reflectionData.printfBufferStorage.binding,
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr);
}
// Get pipeline layout from cache (creates if missed)
// A given kernel need not have resulted in use of all the descriptor sets. Unless the
// graphicsPipelineLibrary extension is supported, the pipeline layout need all the descriptor
// set layouts to be valide. So set them up in the order of their occurrence.
mPipelineLayoutDesc = {};
for (DescriptorSetIndex index : angle::AllEnums<DescriptorSetIndex>())
{
if (!mDescriptorSetLayoutDescs[index].empty())
{
mPipelineLayoutDesc.updateDescriptorSetLayout(index, mDescriptorSetLayoutDescs[index]);
}
}
// push constant setup
// push constant size must be multiple of 4
pcRange.size = roundUpPow2(pcRange.size, 4u);
mPodArgumentPushConstants.resize(pcRange.size);
// push constant offset must be multiple of 4, round down to ensure this
pcRange.offset = roundDownPow2(pcRange.offset, 4u);
mPipelineLayoutDesc.updatePushConstantRange(pcRange.stageFlags, pcRange.offset, pcRange.size);
// initialize the descriptor pools
// descriptor pools are setup as per their indices
return initializeDescriptorPools();
}
angle::Result CLKernelVk::setArg(cl_uint argIndex, size_t argSize, const void *argValue)
{
auto &arg = mArgs.at(argIndex);
if (arg.used)
{
switch (arg.type)
{
case NonSemanticClspvReflectionArgumentPodPushConstant:
ASSERT(mPodArgumentPushConstants.size() >=
arg.pushConstantSize + arg.pushConstOffset);
arg.handle = &mPodArgumentPushConstants[arg.pushConstOffset];
arg.handleSize = argSize;
if (argSize > 0 && argValue != nullptr)
{
// Copy the contents since app is free to delete/reassign the contents after
memcpy(arg.handle, argValue, arg.handleSize);
}
break;
case NonSemanticClspvReflectionArgumentPodUniform:
case NonSemanticClspvReflectionArgumentPodStorageBuffer:
ASSERT(mPodBuffer->getSize() >= argSize + arg.podUniformOffset);
if (argSize > 0 && argValue != nullptr)
{
ANGLE_TRY(mPodBuffer->getImpl<CLBufferVk>().copyFrom(
argValue, arg.podStorageBufferOffset, argSize));
}
break;
case NonSemanticClspvReflectionArgumentWorkgroup:
ASSERT(arg.workgroupSize != 0);
mSpecConstants.push_back(
KernelSpecConstant{.ID = arg.workgroupSpecId,
.data = static_cast<uint32_t>(argSize / arg.workgroupSize)});
break;
case NonSemanticClspvReflectionArgumentUniform:
case NonSemanticClspvReflectionArgumentStorageBuffer:
case NonSemanticClspvReflectionArgumentStorageImage:
case NonSemanticClspvReflectionArgumentSampledImage:
case NonSemanticClspvReflectionArgumentUniformTexelBuffer:
case NonSemanticClspvReflectionArgumentStorageTexelBuffer:
ASSERT(argSize == sizeof(cl_mem *));
arg.handle = *static_cast<const cl_mem *>(argValue);
arg.handleSize = argSize;
break;
default:
// Just store ptr and size (if we end up here)
arg.handle = const_cast<void *>(argValue);
arg.handleSize = argSize;
break;
}
}
return angle::Result::Continue;
}
angle::Result CLKernelVk::createInfo(CLKernelImpl::Info *info) const
{
info->functionName = mName;
info->attributes = mAttributes;
info->numArgs = static_cast<cl_uint>(mArgs.size());
for (const auto &arg : mArgs)
{
ArgInfo argInfo;
argInfo.name = arg.info.name;
argInfo.typeName = arg.info.typeName;
argInfo.accessQualifier = arg.info.accessQualifier;
argInfo.addressQualifier = arg.info.addressQualifier;
argInfo.typeQualifier = arg.info.typeQualifier;
info->args.push_back(std::move(argInfo));
}
auto &ctx = mKernel.getProgram().getContext();
info->workGroups.resize(ctx.getDevices().size());
const CLProgramVk::DeviceProgramData *deviceProgramData = nullptr;
for (auto i = 0u; i < ctx.getDevices().size(); ++i)
{
auto &workGroup = info->workGroups[i];
const auto deviceVk = &ctx.getDevices()[i]->getImpl<CLDeviceVk>();
deviceProgramData = mProgram->getDeviceProgramData(ctx.getDevices()[i]->getNative());
if (deviceProgramData == nullptr)
{
continue;
}
// TODO: http://anglebug.com/42267005
ANGLE_TRY(
deviceVk->getInfoSizeT(cl::DeviceInfo::MaxWorkGroupSize, &workGroup.workGroupSize));
// TODO: http://anglebug.com/42267004
workGroup.privateMemSize = 0;
workGroup.localMemSize = 0;
workGroup.prefWorkGroupSizeMultiple = 16u;
workGroup.globalWorkSize = {0, 0, 0};
if (deviceProgramData->reflectionData.kernelCompileWorkgroupSize.contains(mName))
{
workGroup.compileWorkGroupSize = {
deviceProgramData->reflectionData.kernelCompileWorkgroupSize.at(mName)[0],
deviceProgramData->reflectionData.kernelCompileWorkgroupSize.at(mName)[1],
deviceProgramData->reflectionData.kernelCompileWorkgroupSize.at(mName)[2]};
}
else
{
workGroup.compileWorkGroupSize = {0, 0, 0};
}
}
return angle::Result::Continue;
}
angle::Result CLKernelVk::initPipelineLayout()
{
PipelineLayoutCache *pipelineLayoutCache = mContext->getPipelineLayoutCache();
return pipelineLayoutCache->getPipelineLayout(mContext, mPipelineLayoutDesc,
mDescriptorSetLayouts, &mPipelineLayout);
}
angle::Result CLKernelVk::getOrCreateComputePipeline(vk::PipelineCacheAccess *pipelineCache,
const cl::NDRange &ndrange,
const cl::Device &device,
vk::PipelineHelper **pipelineOut)
{
const CLProgramVk::DeviceProgramData *devProgramData =
getProgram()->getDeviceProgramData(device.getNative());
ASSERT(devProgramData != nullptr);
// Populate program specialization constants (if any)
uint32_t constantDataOffset = 0;
std::vector<uint32_t> specConstantData;
std::vector<VkSpecializationMapEntry> mapEntries;
for (const auto specConstantUsed : devProgramData->reflectionData.specConstantsUsed)
{
switch (specConstantUsed)
{
case SpecConstantType::WorkDimension:
specConstantData.push_back(ndrange.workDimensions);
break;
case SpecConstantType::WorkgroupSizeX:
specConstantData.push_back(ndrange.localWorkSize[0]);
break;
case SpecConstantType::WorkgroupSizeY:
specConstantData.push_back(ndrange.localWorkSize[1]);
break;
case SpecConstantType::WorkgroupSizeZ:
specConstantData.push_back(ndrange.localWorkSize[2]);
break;
case SpecConstantType::GlobalOffsetX:
specConstantData.push_back(ndrange.globalWorkOffset[0]);
break;
case SpecConstantType::GlobalOffsetY:
specConstantData.push_back(ndrange.globalWorkOffset[1]);
break;
case SpecConstantType::GlobalOffsetZ:
specConstantData.push_back(ndrange.globalWorkOffset[2]);
break;
default:
UNIMPLEMENTED();
continue;
}
mapEntries.push_back(VkSpecializationMapEntry{
.constantID = devProgramData->reflectionData.specConstantIDs[specConstantUsed],
.offset = constantDataOffset,
.size = sizeof(uint32_t)});
constantDataOffset += sizeof(uint32_t);
}
// Populate kernel specialization constants (if any)
for (const auto &specConstant : mSpecConstants)
{
specConstantData.push_back(specConstant.data);
mapEntries.push_back(VkSpecializationMapEntry{
.constantID = specConstant.ID, .offset = constantDataOffset, .size = sizeof(uint32_t)});
constantDataOffset += sizeof(uint32_t);
}
VkSpecializationInfo computeSpecializationInfo{
.mapEntryCount = static_cast<uint32_t>(mapEntries.size()),
.pMapEntries = mapEntries.data(),
.dataSize = specConstantData.size() * sizeof(uint32_t),
.pData = specConstantData.data(),
};
// Now get or create (on compute pipeline cache miss) compute pipeline and return it
vk::ComputePipelineOptions options = vk::GetComputePipelineOptions(
vk::PipelineRobustness::NonRobust, vk::PipelineProtectedAccess::Unprotected);
return mShaderProgramHelper.getOrCreateComputePipeline(
mContext, &mComputePipelineCache, pipelineCache, getPipelineLayout(), options,
PipelineSource::Draw, pipelineOut, mName.c_str(), &computeSpecializationInfo);
}
bool CLKernelVk::usesPrintf() const
{
return mProgram->getDeviceProgramData(mName.c_str())->getKernelFlags(mName) &
NonSemanticClspvReflectionMayUsePrintf;
}
angle::Result CLKernelVk::initializeDescriptorPools()
{
for (DescriptorSetIndex index : angle::AllEnums<DescriptorSetIndex>())
{
if (!mDescriptorSetLayoutDescs[index].empty())
{
ANGLE_TRY(mContext->getMetaDescriptorPool().bindCachedDescriptorPool(
mContext, mDescriptorSetLayoutDescs[index], 1,
mContext->getDescriptorSetLayoutCache(), &mDynamicDescriptorPools[index]));
}
}
return angle::Result::Continue;
}
angle::Result CLKernelVk::allocateDescriptorSet(
DescriptorSetIndex index,
angle::EnumIterator<DescriptorSetIndex> layoutIndex,
vk::OutsideRenderPassCommandBufferHelper *computePassCommands)
{
if (mDescriptorSets[index] && mDescriptorSets[index]->valid())
{
if (mDescriptorSets[index]->usedByCommandBuffer(computePassCommands->getQueueSerial()))
{
mDescriptorSets[index].reset();
}
else
{
return angle::Result::Continue;
}
}
if (mDynamicDescriptorPools[index]->valid())
{
ANGLE_TRY(mDynamicDescriptorPools[index]->allocateDescriptorSet(
mContext, *mDescriptorSetLayouts[*layoutIndex], &mDescriptorSets[index]));
computePassCommands->retainResource(mDescriptorSets[index].get());
}
return angle::Result::Continue;
}
} // namespace rx