| // |
| // Copyright 2021 The ANGLE Project Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| // |
| // CLProgramVk.cpp: Implements the class methods for CLProgramVk. |
| |
| #include "libANGLE/renderer/vulkan/CLProgramVk.h" |
| #include "libANGLE/renderer/vulkan/CLContextVk.h" |
| #include "libANGLE/renderer/vulkan/clspv_utils.h" |
| #include "libANGLE/renderer/vulkan/vk_cache_utils.h" |
| #include "libANGLE/renderer/vulkan/vk_helpers.h" |
| |
| #include "libANGLE/CLContext.h" |
| #include "libANGLE/CLKernel.h" |
| #include "libANGLE/CLProgram.h" |
| #include "libANGLE/cl_utils.h" |
| |
| #include "common/log_utils.h" |
| #include "common/string_utils.h" |
| #include "common/system_utils.h" |
| |
| #include "clspv/Compiler.h" |
| |
| #include "spirv/unified1/NonSemanticClspvReflection.h" |
| #include "spirv/unified1/spirv.hpp" |
| |
| #include "spirv-tools/libspirv.hpp" |
| #include "spirv-tools/optimizer.hpp" |
| |
| namespace rx |
| { |
| |
| namespace |
| { |
| #if defined(ANGLE_ENABLE_ASSERTS) |
| constexpr bool kAngleDebug = true; |
| #else |
| constexpr bool kAngleDebug = false; |
| #endif |
| |
| // Used by SPIRV-Tools to parse reflection info |
| spv_result_t ParseReflection(CLProgramVk::SpvReflectionData &reflectionData, |
| const spv_parsed_instruction_t &spvInstr) |
| { |
| // Parse spir-v opcodes |
| switch (spvInstr.opcode) |
| { |
| // --- Clspv specific parsing for below cases --- |
| case spv::OpExtInst: |
| { |
| if (spvInstr.ext_inst_type != SPV_EXT_INST_TYPE_NONSEMANTIC_CLSPVREFLECTION) |
| { |
| break; |
| } |
| switch (spvInstr.words[4]) |
| { |
| case NonSemanticClspvReflectionKernel: |
| { |
| // Extract kernel name and args - add to kernel args map |
| std::string functionName = reflectionData.spvStrLookup[spvInstr.words[6]]; |
| uint32_t numArgs = reflectionData.spvIntLookup[spvInstr.words[7]]; |
| reflectionData.kernelArgsMap[functionName] = CLKernelArguments(); |
| reflectionData.kernelArgsMap[functionName].resize(numArgs); |
| |
| // Store kernel flags and attributes |
| reflectionData.kernelFlags[functionName] = |
| reflectionData.spvIntLookup[spvInstr.words[8]]; |
| reflectionData.kernelAttributes[functionName] = |
| reflectionData.spvStrLookup[spvInstr.words[9]]; |
| |
| // Save kernel name to reflection table for later use/lookup in parser routine |
| reflectionData.kernelIDs.insert(spvInstr.words[2]); |
| reflectionData.spvStrLookup[spvInstr.words[2]] = std::string(functionName); |
| |
| // If we already parsed some args ahead of time, populate them now |
| if (reflectionData.kernelArgMap.contains(functionName)) |
| { |
| for (const auto &arg : reflectionData.kernelArgMap) |
| { |
| uint32_t ordinal = arg.second.ordinal; |
| reflectionData.kernelArgsMap[functionName].at(ordinal) = |
| std::move(arg.second); |
| } |
| } |
| break; |
| } |
| case NonSemanticClspvReflectionArgumentInfo: |
| { |
| CLKernelVk::ArgInfo kernelArgInfo; |
| kernelArgInfo.name = reflectionData.spvStrLookup[spvInstr.words[5]]; |
| // If instruction has more than 5 instruction operands (minus instruction |
| // name/opcode), that means we have arg qualifiers. ArgumentInfo also counts as |
| // an operand for OpExtInst. In below example, [ %e %f %g %h ] are the arg |
| // qualifier operands. |
| // |
| // %a = OpExtInst %b %c ArgumentInfo %d [ %e %f %g %h ] |
| if (spvInstr.num_operands > 5) |
| { |
| kernelArgInfo.typeName = reflectionData.spvStrLookup[spvInstr.words[6]]; |
| kernelArgInfo.addressQualifier = |
| reflectionData.spvIntLookup[spvInstr.words[7]]; |
| kernelArgInfo.accessQualifier = |
| reflectionData.spvIntLookup[spvInstr.words[8]]; |
| kernelArgInfo.typeQualifier = |
| reflectionData.spvIntLookup[spvInstr.words[9]]; |
| } |
| // Store kern arg for later lookup |
| reflectionData.kernelArgInfos[spvInstr.words[2]] = std::move(kernelArgInfo); |
| break; |
| } |
| case NonSemanticClspvReflectionArgumentPodUniform: |
| case NonSemanticClspvReflectionArgumentPointerUniform: |
| case NonSemanticClspvReflectionArgumentPodStorageBuffer: |
| { |
| CLKernelArgument kernelArg; |
| if (spvInstr.num_operands == 11) |
| { |
| const CLKernelVk::ArgInfo &kernelArgInfo = |
| reflectionData.kernelArgInfos[spvInstr.words[11]]; |
| kernelArg.info.name = kernelArgInfo.name; |
| kernelArg.info.typeName = kernelArgInfo.typeName; |
| kernelArg.info.addressQualifier = kernelArgInfo.addressQualifier; |
| kernelArg.info.accessQualifier = kernelArgInfo.accessQualifier; |
| kernelArg.info.typeQualifier = kernelArgInfo.typeQualifier; |
| } |
| kernelArg.type = spvInstr.words[4]; |
| kernelArg.used = true; |
| kernelArg.ordinal = reflectionData.spvIntLookup[spvInstr.words[6]]; |
| kernelArg.op3 = reflectionData.spvIntLookup[spvInstr.words[7]]; |
| kernelArg.op4 = reflectionData.spvIntLookup[spvInstr.words[8]]; |
| kernelArg.op5 = reflectionData.spvIntLookup[spvInstr.words[9]]; |
| kernelArg.op6 = reflectionData.spvIntLookup[spvInstr.words[10]]; |
| |
| if (reflectionData.kernelIDs.contains(spvInstr.words[5])) |
| { |
| CLKernelArguments &kernelArgs = |
| reflectionData |
| .kernelArgsMap[reflectionData.spvStrLookup[spvInstr.words[5]]]; |
| kernelArgs.at(kernelArg.ordinal) = std::move(kernelArg); |
| } |
| else |
| { |
| // Reflection kernel not yet parsed, place in temp storage for now |
| reflectionData |
| .kernelArgMap[reflectionData.spvStrLookup[spvInstr.words[5]]] = |
| std::move(kernelArg); |
| } |
| |
| break; |
| } |
| case NonSemanticClspvReflectionArgumentUniform: |
| case NonSemanticClspvReflectionArgumentWorkgroup: |
| case NonSemanticClspvReflectionArgumentSampler: |
| case NonSemanticClspvReflectionArgumentStorageImage: |
| case NonSemanticClspvReflectionArgumentSampledImage: |
| case NonSemanticClspvReflectionArgumentStorageBuffer: |
| case NonSemanticClspvReflectionArgumentStorageTexelBuffer: |
| case NonSemanticClspvReflectionArgumentUniformTexelBuffer: |
| case NonSemanticClspvReflectionArgumentPodPushConstant: |
| case NonSemanticClspvReflectionArgumentPointerPushConstant: |
| { |
| CLKernelArgument kernelArg; |
| if (spvInstr.num_operands == 9) |
| { |
| const CLKernelVk::ArgInfo &kernelArgInfo = |
| reflectionData.kernelArgInfos[spvInstr.words[9]]; |
| kernelArg.info.name = kernelArgInfo.name; |
| kernelArg.info.typeName = kernelArgInfo.typeName; |
| kernelArg.info.addressQualifier = kernelArgInfo.addressQualifier; |
| kernelArg.info.accessQualifier = kernelArgInfo.accessQualifier; |
| kernelArg.info.typeQualifier = kernelArgInfo.typeQualifier; |
| } |
| |
| kernelArg.type = spvInstr.words[4]; |
| kernelArg.used = true; |
| kernelArg.ordinal = reflectionData.spvIntLookup[spvInstr.words[6]]; |
| kernelArg.op3 = reflectionData.spvIntLookup[spvInstr.words[7]]; |
| kernelArg.op4 = reflectionData.spvIntLookup[spvInstr.words[8]]; |
| |
| if (reflectionData.kernelIDs.contains(spvInstr.words[5])) |
| { |
| CLKernelArguments &kernelArgs = |
| reflectionData |
| .kernelArgsMap[reflectionData.spvStrLookup[spvInstr.words[5]]]; |
| kernelArgs.at(kernelArg.ordinal) = std::move(kernelArg); |
| } |
| else |
| { |
| // Reflection kernel not yet parsed, place in temp storage for now |
| reflectionData |
| .kernelArgMap[reflectionData.spvStrLookup[spvInstr.words[5]]] = |
| std::move(kernelArg); |
| } |
| break; |
| } |
| case NonSemanticClspvReflectionPushConstantGlobalSize: |
| case NonSemanticClspvReflectionPushConstantGlobalOffset: |
| case NonSemanticClspvReflectionPushConstantRegionOffset: |
| case NonSemanticClspvReflectionPushConstantNumWorkgroups: |
| case NonSemanticClspvReflectionPushConstantRegionGroupOffset: |
| case NonSemanticClspvReflectionPushConstantEnqueuedLocalSize: |
| { |
| uint32_t offset = reflectionData.spvIntLookup[spvInstr.words[5]]; |
| uint32_t size = reflectionData.spvIntLookup[spvInstr.words[6]]; |
| reflectionData.pushConstants[spvInstr.words[4]] = { |
| .stageFlags = 0, .offset = offset, .size = size}; |
| break; |
| } |
| case NonSemanticClspvReflectionSpecConstantWorkgroupSize: |
| { |
| reflectionData.specConstantIDs[SpecConstantType::WorkgroupSizeX] = |
| reflectionData.spvIntLookup[spvInstr.words[5]]; |
| reflectionData.specConstantIDs[SpecConstantType::WorkgroupSizeY] = |
| reflectionData.spvIntLookup[spvInstr.words[6]]; |
| reflectionData.specConstantIDs[SpecConstantType::WorkgroupSizeZ] = |
| reflectionData.spvIntLookup[spvInstr.words[7]]; |
| reflectionData.specConstantsUsed[SpecConstantType::WorkgroupSizeX] = true; |
| reflectionData.specConstantsUsed[SpecConstantType::WorkgroupSizeY] = true; |
| reflectionData.specConstantsUsed[SpecConstantType::WorkgroupSizeZ] = true; |
| break; |
| } |
| case NonSemanticClspvReflectionPropertyRequiredWorkgroupSize: |
| { |
| reflectionData.kernelCompileWorkgroupSize |
| [reflectionData.spvStrLookup[spvInstr.words[5]]] = { |
| reflectionData.spvIntLookup[spvInstr.words[6]], |
| reflectionData.spvIntLookup[spvInstr.words[7]], |
| reflectionData.spvIntLookup[spvInstr.words[8]]}; |
| break; |
| } |
| case NonSemanticClspvReflectionSpecConstantWorkDim: |
| { |
| reflectionData.specConstantIDs[SpecConstantType::WorkDimension] = |
| reflectionData.spvIntLookup[spvInstr.words[5]]; |
| reflectionData.specConstantsUsed[SpecConstantType::WorkDimension] = true; |
| break; |
| } |
| case NonSemanticClspvReflectionSpecConstantGlobalOffset: |
| reflectionData.specConstantIDs[SpecConstantType::GlobalOffsetX] = |
| reflectionData.spvIntLookup[spvInstr.words[5]]; |
| reflectionData.specConstantIDs[SpecConstantType::GlobalOffsetY] = |
| reflectionData.spvIntLookup[spvInstr.words[6]]; |
| reflectionData.specConstantIDs[SpecConstantType::GlobalOffsetZ] = |
| reflectionData.spvIntLookup[spvInstr.words[7]]; |
| reflectionData.specConstantsUsed[SpecConstantType::GlobalOffsetX] = true; |
| reflectionData.specConstantsUsed[SpecConstantType::GlobalOffsetY] = true; |
| reflectionData.specConstantsUsed[SpecConstantType::GlobalOffsetZ] = true; |
| break; |
| case NonSemanticClspvReflectionPrintfInfo: |
| { |
| // Info on the format string used in the builtin printf call in kernel |
| uint32_t printfID = reflectionData.spvIntLookup[spvInstr.words[5]]; |
| std::string formatString = reflectionData.spvStrLookup[spvInstr.words[6]]; |
| reflectionData.printfInfoMap[printfID].id = printfID; |
| reflectionData.printfInfoMap[printfID].formatSpecifier = formatString; |
| for (int i = 6; i < spvInstr.num_operands; i++) |
| { |
| uint16_t offset = spvInstr.operands[i].offset; |
| size_t size = reflectionData.spvIntLookup[spvInstr.words[offset]]; |
| reflectionData.printfInfoMap[printfID].argSizes.push_back( |
| static_cast<uint32_t>(size)); |
| } |
| |
| break; |
| } |
| case NonSemanticClspvReflectionPrintfBufferStorageBuffer: |
| { |
| // Info about the printf storage buffer that contains the formatted content |
| uint32_t set = reflectionData.spvIntLookup[spvInstr.words[5]]; |
| uint32_t binding = reflectionData.spvIntLookup[spvInstr.words[6]]; |
| uint32_t size = reflectionData.spvIntLookup[spvInstr.words[7]]; |
| reflectionData.printfBufferStorage = {set, binding, 0, size}; |
| break; |
| } |
| case NonSemanticClspvReflectionPrintfBufferPointerPushConstant: |
| { |
| ERR() << "Shouldn't be here. Support of printf builtin function is enabled " |
| "through " |
| "PrintfBufferStorageBuffer. Check optins passed down to clspv"; |
| UNREACHABLE(); |
| return SPV_UNSUPPORTED; |
| } |
| case NonSemanticClspvReflectionNormalizedSamplerMaskPushConstant: |
| case NonSemanticClspvReflectionImageArgumentInfoChannelOrderPushConstant: |
| case NonSemanticClspvReflectionImageArgumentInfoChannelDataTypePushConstant: |
| { |
| uint32_t ordinal = reflectionData.spvIntLookup[spvInstr.words[6]]; |
| uint32_t offset = reflectionData.spvIntLookup[spvInstr.words[7]]; |
| uint32_t size = reflectionData.spvIntLookup[spvInstr.words[8]]; |
| VkPushConstantRange pcRange = {.stageFlags = 0, .offset = offset, .size = size}; |
| reflectionData.imagePushConstants[spvInstr.words[4]].push_back( |
| {.pcRange = pcRange, .ordinal = ordinal}); |
| break; |
| } |
| case NonSemanticClspvReflectionLiteralSampler: |
| { |
| uint32_t descriptorSet = reflectionData.spvIntLookup[spvInstr.words[5]]; |
| ASSERT(descriptorSet < static_cast<uint32_t>(DescriptorSetIndex::EnumCount)); |
| uint32_t binding = reflectionData.spvIntLookup[spvInstr.words[6]]; |
| uint32_t mask = reflectionData.spvIntLookup[spvInstr.words[7]]; |
| cl_bool normalizedCoords = clspv_cl::IsNormalizedCoords(mask); |
| cl::AddressingMode addressingMode = clspv_cl::GetAddressingMode(mask); |
| cl::FilterMode filterMode = clspv_cl::GetFilterMode(mask); |
| reflectionData.literalSamplers.push_back({.descriptorSet = descriptorSet, |
| .binding = binding, |
| .normalizedCoords = normalizedCoords, |
| .addressingMode = addressingMode, |
| .filterMode = filterMode}); |
| break; |
| } |
| default: |
| break; |
| } |
| break; |
| } |
| // --- Regular SPIR-V opcode parsing for below cases --- |
| case spv::OpString: |
| { |
| reflectionData.spvStrLookup[spvInstr.words[1]] = |
| reinterpret_cast<const char *>(&spvInstr.words[2]); |
| break; |
| } |
| case spv::OpConstant: |
| { |
| reflectionData.spvIntLookup[spvInstr.words[2]] = spvInstr.words[3]; |
| break; |
| } |
| default: |
| break; |
| } |
| return SPV_SUCCESS; |
| } |
| |
| std::string ProcessBuildOptions(const std::vector<std::string> &optionTokens, |
| CLProgramVk::BuildType buildType) |
| { |
| std::string processedOptions; |
| |
| // Need to remove/replace options that are not 1-1 mapped to clspv |
| for (const std::string &optionToken : optionTokens) |
| { |
| if (optionToken == "-create-library" && buildType == CLProgramVk::BuildType::LINK) |
| { |
| processedOptions += " --output-format=bc"; |
| continue; |
| } |
| processedOptions += " " + optionToken; |
| } |
| |
| switch (buildType) |
| { |
| case CLProgramVk::BuildType::COMPILE: |
| processedOptions += " --output-format=bc"; |
| break; |
| case CLProgramVk::BuildType::LINK: |
| processedOptions += " -x ir"; |
| break; |
| default: |
| break; |
| } |
| |
| return processedOptions; |
| } |
| |
| } // namespace |
| |
| void CLAsyncBuildTask::operator()() |
| { |
| ANGLE_TRACE_EVENT0("gpu.angle", "CLProgramVk::buildInternal (async)"); |
| CLProgramVk::ScopedProgramCallback spc(mNotify); |
| if (!mProgramVk->buildInternal(mDevices, mOptions, mInternalOptions, mBuildType, |
| mLinkProgramsList)) |
| { |
| ERR() << "Async build failed for program (" << mProgramVk |
| << ")! Check the build status or build log for details."; |
| } |
| } |
| |
| CLProgramVk::CLProgramVk(const cl::Program &program) |
| : CLProgramImpl(program), |
| mContext(&program.getContext().getImpl<CLContextVk>()), |
| mAsyncBuildEvent(std::make_shared<angle::WaitableEventDone>()) |
| {} |
| |
| angle::Result CLProgramVk::init() |
| { |
| cl::DevicePtrs devices; |
| ANGLE_TRY(mContext->getDevices(&devices)); |
| |
| // The devices associated with the program object are the devices associated with context |
| for (const cl::DevicePtr &device : devices) |
| { |
| DeviceProgramData deviceProgramData{}; |
| mAssociatedDevicePrograms[device->getNative()] = std::move(deviceProgramData); |
| } |
| |
| return angle::Result::Continue; |
| } |
| |
| angle::Result CLProgramVk::init(const size_t *lengths, |
| const unsigned char **binaries, |
| cl_int *binaryStatus) |
| { |
| // The devices associated with program come from device_list param from |
| // clCreateProgramWithBinary |
| for (const cl::DevicePtr &device : mProgram.getDevices()) |
| { |
| const unsigned char *binaryHandle = *binaries++; |
| size_t binarySize = *lengths++; |
| |
| // Check for header |
| if (binarySize < sizeof(ProgramBinaryOutputHeader)) |
| { |
| if (binaryStatus) |
| { |
| *binaryStatus++ = CL_INVALID_BINARY; |
| } |
| ANGLE_CL_RETURN_ERROR(CL_INVALID_BINARY); |
| } |
| binarySize -= sizeof(ProgramBinaryOutputHeader); |
| |
| // Check for valid binary version from header |
| const ProgramBinaryOutputHeader *binaryHeader = |
| reinterpret_cast<const ProgramBinaryOutputHeader *>(binaryHandle); |
| if (binaryHeader == nullptr) |
| { |
| ERR() << "NULL binary header!"; |
| if (binaryStatus) |
| { |
| *binaryStatus++ = CL_INVALID_BINARY; |
| } |
| ANGLE_CL_RETURN_ERROR(CL_INVALID_BINARY); |
| } |
| else if (binaryHeader->headerVersion < kBinaryVersion) |
| { |
| ERR() << "Binary version not compatible with runtime!"; |
| if (binaryStatus) |
| { |
| *binaryStatus++ = CL_INVALID_BINARY; |
| } |
| ANGLE_CL_RETURN_ERROR(CL_INVALID_BINARY); |
| } |
| binaryHandle += sizeof(ProgramBinaryOutputHeader); |
| |
| // See what kind of binary we have (i.e. SPIR-V or LLVM Bitcode) |
| // https://llvm.org/docs/BitCodeFormat.html#llvm-ir-magic-number |
| // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_magic_number |
| constexpr uint32_t LLVM_BC_MAGIC = 0xDEC04342; |
| constexpr uint32_t SPIRV_MAGIC = 0x07230203; |
| const uint32_t &firstWord = reinterpret_cast<const uint32_t *>(binaryHandle)[0]; |
| bool isBC = firstWord == LLVM_BC_MAGIC; |
| bool isSPV = firstWord == SPIRV_MAGIC; |
| if (!isBC && !isSPV) |
| { |
| ERR() << "Binary is neither SPIR-V nor LLVM Bitcode!"; |
| if (binaryStatus) |
| { |
| *binaryStatus++ = CL_INVALID_BINARY; |
| } |
| ANGLE_CL_RETURN_ERROR(CL_INVALID_BINARY); |
| } |
| |
| // Add device binary to program |
| DeviceProgramData deviceBinary; |
| deviceBinary.spirvVersion = device->getImpl<CLDeviceVk>().getSpirvVersion(); |
| deviceBinary.binaryType = binaryHeader->binaryType; |
| deviceBinary.buildStatus = binaryHeader->buildStatus; |
| switch (deviceBinary.binaryType) |
| { |
| case CL_PROGRAM_BINARY_TYPE_EXECUTABLE: |
| deviceBinary.binary.assign(binarySize / sizeof(uint32_t), 0); |
| std::memcpy(deviceBinary.binary.data(), binaryHandle, binarySize); |
| break; |
| case CL_PROGRAM_BINARY_TYPE_LIBRARY: |
| case CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT: |
| deviceBinary.IR.assign(binarySize, 0); |
| std::memcpy(deviceBinary.IR.data(), binaryHandle, binarySize); |
| break; |
| default: |
| UNREACHABLE(); |
| ERR() << "Invalid binary type!"; |
| if (binaryStatus) |
| { |
| *binaryStatus++ = CL_INVALID_BINARY; |
| } |
| ANGLE_CL_RETURN_ERROR(CL_INVALID_BINARY); |
| } |
| mAssociatedDevicePrograms[device->getNative()] = std::move(deviceBinary); |
| if (binaryStatus) |
| { |
| *binaryStatus++ = CL_SUCCESS; |
| } |
| } |
| |
| return angle::Result::Continue; |
| } |
| |
| CLProgramVk::~CLProgramVk() {} |
| |
| angle::Result CLProgramVk::build(const cl::DevicePtrs &devices, |
| const char *options, |
| cl::Program *notify) |
| { |
| BuildType buildType = !mProgram.getSource().empty() ? BuildType::BUILD : BuildType::BINARY; |
| const cl::DevicePtrs &devicePtrs = !devices.empty() ? devices : mProgram.getDevices(); |
| |
| setBuildStatus(devicePtrs, CL_BUILD_IN_PROGRESS); |
| |
| if (notify) |
| { |
| mAsyncBuildEvent = |
| getPlatform()->postMultiThreadWorkerTask(std::make_shared<CLAsyncBuildTask>( |
| this, devicePtrs, std::string(options ? options : ""), "", buildType, |
| LinkProgramsList{}, notify)); |
| ASSERT(mAsyncBuildEvent != nullptr); |
| } |
| else |
| { |
| if (!buildInternal(devicePtrs, std::string(options ? options : ""), "", buildType, |
| LinkProgramsList{})) |
| { |
| ANGLE_CL_RETURN_ERROR(CL_BUILD_PROGRAM_FAILURE); |
| } |
| } |
| return angle::Result::Continue; |
| } |
| |
| angle::Result CLProgramVk::compile(const cl::DevicePtrs &devices, |
| const char *options, |
| const cl::ProgramPtrs &inputHeaders, |
| const char **headerIncludeNames, |
| cl::Program *notify) |
| { |
| const cl::DevicePtrs &devicePtrs = !devices.empty() ? devices : mProgram.getDevices(); |
| |
| // Ensure OS temp dir is available |
| std::string internalCompileOpts; |
| Optional<std::string> tmpDir = angle::GetTempDirectory(); |
| if (!tmpDir.valid()) |
| { |
| ERR() << "Failed to open OS temp dir"; |
| ANGLE_CL_RETURN_ERROR(CL_INVALID_OPERATION); |
| } |
| internalCompileOpts += inputHeaders.empty() ? "" : " -I" + tmpDir.value(); |
| |
| // Dump input headers to OS temp directory |
| for (size_t i = 0; i < inputHeaders.size(); ++i) |
| { |
| const std::string &inputHeaderSrc = |
| inputHeaders.at(i)->getImpl<CLProgramVk>().mProgram.getSource(); |
| std::string headerFilePath(angle::ConcatenatePath(tmpDir.value(), headerIncludeNames[i])); |
| |
| // Sanitize path so we can use "/" as universal path separator |
| angle::MakeForwardSlashThePathSeparator(headerFilePath); |
| size_t baseDirPos = headerFilePath.find_last_of("/"); |
| |
| // Ensure parent dir(s) exists |
| if (!angle::CreateDirectories(headerFilePath.substr(0, baseDirPos))) |
| { |
| ERR() << "Failed to create output path(s) for header(s)!"; |
| ANGLE_CL_RETURN_ERROR(CL_INVALID_OPERATION); |
| } |
| writeFile(headerFilePath.c_str(), inputHeaderSrc.data(), inputHeaderSrc.size()); |
| } |
| |
| setBuildStatus(devicePtrs, CL_BUILD_IN_PROGRESS); |
| |
| // Perform compile |
| if (notify) |
| { |
| mAsyncBuildEvent = mProgram.getContext().getPlatform().getMultiThreadPool()->postWorkerTask( |
| std::make_shared<CLAsyncBuildTask>( |
| this, devicePtrs, std::string(options ? options : ""), internalCompileOpts, |
| BuildType::COMPILE, LinkProgramsList{}, notify)); |
| ASSERT(mAsyncBuildEvent != nullptr); |
| } |
| else |
| { |
| mAsyncBuildEvent = std::make_shared<angle::WaitableEventDone>(); |
| if (!buildInternal(devicePtrs, std::string(options ? options : ""), internalCompileOpts, |
| BuildType::COMPILE, LinkProgramsList{})) |
| { |
| ANGLE_CL_RETURN_ERROR(CL_COMPILE_PROGRAM_FAILURE); |
| } |
| } |
| |
| return angle::Result::Continue; |
| } |
| |
| angle::Result CLProgramVk::getInfo(cl::ProgramInfo name, |
| size_t valueSize, |
| void *value, |
| size_t *valueSizeRet) const |
| { |
| cl_uint valUInt = 0u; |
| cl_bool valBool = CL_FALSE; |
| void *valPointer = nullptr; |
| const void *copyValue = nullptr; |
| size_t copySize = 0u; |
| unsigned char **outputBins = reinterpret_cast<unsigned char **>(value); |
| std::string kernelNamesList; |
| std::vector<size_t> vBinarySizes; |
| |
| switch (name) |
| { |
| case cl::ProgramInfo::NumKernels: |
| for (const auto &deviceProgram : mAssociatedDevicePrograms) |
| { |
| valUInt += static_cast<decltype(valUInt)>(deviceProgram.second.numKernels()); |
| } |
| copyValue = &valUInt; |
| copySize = sizeof(valUInt); |
| break; |
| case cl::ProgramInfo::BinarySizes: |
| { |
| for (const auto &deviceProgram : mAssociatedDevicePrograms) |
| { |
| vBinarySizes.push_back( |
| sizeof(ProgramBinaryOutputHeader) + |
| (deviceProgram.second.binaryType == CL_PROGRAM_BINARY_TYPE_EXECUTABLE |
| ? deviceProgram.second.binary.size() * sizeof(uint32_t) |
| : deviceProgram.second.IR.size())); |
| } |
| valPointer = vBinarySizes.data(); |
| copyValue = valPointer; |
| copySize = vBinarySizes.size() * sizeof(size_t); |
| break; |
| } |
| case cl::ProgramInfo::Binaries: |
| for (const auto &deviceProgram : mAssociatedDevicePrograms) |
| { |
| const void *bin = |
| deviceProgram.second.binaryType == CL_PROGRAM_BINARY_TYPE_EXECUTABLE |
| ? reinterpret_cast<const void *>(deviceProgram.second.binary.data()) |
| : reinterpret_cast<const void *>(deviceProgram.second.IR.data()); |
| size_t binSize = |
| deviceProgram.second.binaryType == CL_PROGRAM_BINARY_TYPE_EXECUTABLE |
| ? deviceProgram.second.binary.size() * sizeof(uint32_t) |
| : deviceProgram.second.IR.size(); |
| ProgramBinaryOutputHeader header{.headerVersion = kBinaryVersion, |
| .binaryType = deviceProgram.second.binaryType, |
| .buildStatus = deviceProgram.second.buildStatus}; |
| |
| if (outputBins != nullptr) |
| { |
| if (*outputBins != nullptr) |
| { |
| std::memcpy(*outputBins, &header, sizeof(ProgramBinaryOutputHeader)); |
| std::memcpy((*outputBins) + sizeof(ProgramBinaryOutputHeader), bin, |
| binSize); |
| } |
| outputBins++; |
| } |
| |
| // Spec just wants pointer size here |
| copySize += sizeof(unsigned char *); |
| } |
| // We already copied the (headers + binaries) over - nothing else left to copy |
| copyValue = nullptr; |
| break; |
| case cl::ProgramInfo::KernelNames: |
| for (const auto &deviceProgram : mAssociatedDevicePrograms) |
| { |
| kernelNamesList = deviceProgram.second.getKernelNames(); |
| } |
| valPointer = kernelNamesList.data(); |
| copyValue = valPointer; |
| copySize = kernelNamesList.size() + 1; |
| break; |
| case cl::ProgramInfo::ScopeGlobalCtorsPresent: |
| case cl::ProgramInfo::ScopeGlobalDtorsPresent: |
| // These are deprecated by version 3.0 and are currently not supported |
| copyValue = &valBool; |
| copySize = sizeof(cl_bool); |
| break; |
| default: |
| UNREACHABLE(); |
| } |
| |
| if ((value != nullptr) && (copyValue != nullptr)) |
| { |
| std::memcpy(value, copyValue, copySize); |
| } |
| |
| if (valueSizeRet != nullptr) |
| { |
| *valueSizeRet = copySize; |
| } |
| |
| return angle::Result::Continue; |
| } |
| |
| angle::Result CLProgramVk::getBuildInfo(const cl::Device &device, |
| cl::ProgramBuildInfo name, |
| size_t valueSize, |
| void *value, |
| size_t *valueSizeRet) const |
| { |
| cl_uint valUInt = 0; |
| cl_build_status valStatus = 0; |
| const void *copyValue = nullptr; |
| size_t copySize = 0; |
| const DeviceProgramData *deviceProgramData = getDeviceProgramData(device.getNative()); |
| |
| switch (name) |
| { |
| case cl::ProgramBuildInfo::Status: |
| valStatus = deviceProgramData->buildStatus; |
| copyValue = &valStatus; |
| copySize = sizeof(valStatus); |
| break; |
| case cl::ProgramBuildInfo::Log: |
| copyValue = deviceProgramData->buildLog.c_str(); |
| copySize = deviceProgramData->buildLog.size() + 1; |
| break; |
| case cl::ProgramBuildInfo::Options: |
| copyValue = mProgramOpts.c_str(); |
| copySize = mProgramOpts.size() + 1; |
| break; |
| case cl::ProgramBuildInfo::BinaryType: |
| valUInt = deviceProgramData->binaryType; |
| copyValue = &valUInt; |
| copySize = sizeof(valUInt); |
| break; |
| case cl::ProgramBuildInfo::GlobalVariableTotalSize: |
| // Returns 0 if device does not support program scope global variables. |
| valUInt = 0; |
| copyValue = &valUInt; |
| copySize = sizeof(valUInt); |
| break; |
| default: |
| UNREACHABLE(); |
| } |
| |
| if ((value != nullptr) && (copyValue != nullptr)) |
| { |
| memcpy(value, copyValue, std::min(valueSize, copySize)); |
| } |
| |
| if (valueSizeRet != nullptr) |
| { |
| *valueSizeRet = copySize; |
| } |
| |
| return angle::Result::Continue; |
| } |
| |
| angle::Result CLProgramVk::createKernel(const cl::Kernel &kernel, |
| const char *name, |
| CLKernelImpl::Ptr *kernelOut) |
| { |
| // Wait for the compile to finish |
| mAsyncBuildEvent->wait(); |
| |
| std::scoped_lock<angle::SimpleMutex> sl(mProgramMutex); |
| const auto devProgram = getDeviceProgramData(name); |
| ASSERT(devProgram != nullptr); |
| |
| // Create kernel |
| CLKernelArguments kernelArgs = devProgram->getKernelArguments(name); |
| std::string kernelAttributes = devProgram->getKernelAttributes(name); |
| std::string kernelName = std::string(name ? name : ""); |
| CLKernelVk::Ptr kernelImpl = CLKernelVk::Ptr( |
| new (std::nothrow) CLKernelVk(kernel, kernelName, kernelAttributes, kernelArgs)); |
| if (kernelImpl == nullptr) |
| { |
| ERR() << "Could not create kernel obj!"; |
| ANGLE_CL_RETURN_ERROR(CL_OUT_OF_HOST_MEMORY); |
| } |
| |
| ANGLE_TRY(kernelImpl->init()); |
| *kernelOut = std::move(kernelImpl); |
| |
| return angle::Result::Continue; |
| } |
| |
| angle::Result CLProgramVk::createKernels(cl_uint numKernels, |
| CLKernelImpl::CreateFuncs &createFuncs, |
| cl_uint *numKernelsRet) |
| { |
| size_t numDevKernels = 0; |
| for (const auto &dev : mAssociatedDevicePrograms) |
| { |
| numDevKernels += dev.second.numKernels(); |
| } |
| if (numKernelsRet != nullptr) |
| { |
| *numKernelsRet = static_cast<cl_uint>(numDevKernels); |
| } |
| |
| if (numKernels != 0) |
| { |
| for (const auto &dev : mAssociatedDevicePrograms) |
| { |
| for (const auto &kernArgMap : dev.second.getKernelArgsMap()) |
| { |
| createFuncs.emplace_back([this, &kernArgMap](const cl::Kernel &kern) { |
| CLKernelImpl::Ptr implPtr = nullptr; |
| ANGLE_CL_IMPL_TRY(this->createKernel(kern, kernArgMap.first.c_str(), &implPtr)); |
| return CLKernelImpl::Ptr(std::move(implPtr)); |
| }); |
| } |
| } |
| } |
| return angle::Result::Continue; |
| } |
| |
| const CLProgramVk::DeviceProgramData *CLProgramVk::getDeviceProgramData( |
| const _cl_device_id *device) const |
| { |
| if (!mAssociatedDevicePrograms.contains(device)) |
| { |
| WARN() << "Device (" << device << ") is not associated with program (" << this << ") !"; |
| return nullptr; |
| } |
| return &mAssociatedDevicePrograms.at(device); |
| } |
| |
| const CLProgramVk::DeviceProgramData *CLProgramVk::getDeviceProgramData( |
| const char *kernelName) const |
| { |
| for (const auto &deviceProgram : mAssociatedDevicePrograms) |
| { |
| if (deviceProgram.second.containsKernel(kernelName)) |
| { |
| return &deviceProgram.second; |
| } |
| } |
| WARN() << "Kernel name (" << kernelName << ") is not associated with program (" << this |
| << ") !"; |
| return nullptr; |
| } |
| |
| bool CLProgramVk::buildInternal(const cl::DevicePtrs &devices, |
| std::string options, |
| std::string internalOptions, |
| BuildType buildType, |
| const LinkProgramsList &LinkProgramsList) |
| { |
| std::scoped_lock<angle::SimpleMutex> sl(mProgramMutex); |
| |
| // Cache original options string |
| mProgramOpts = options; |
| |
| // Process options and append any other internal (required) options for clspv |
| std::vector<std::string> optionTokens; |
| angle::SplitStringAlongWhitespace(options + " " + internalOptions, &optionTokens); |
| const bool createLibrary = std::find(optionTokens.begin(), optionTokens.end(), |
| "-create-library") != optionTokens.end(); |
| std::string processedOptions = ProcessBuildOptions(optionTokens, buildType); |
| |
| // Build for each associated device |
| for (size_t i = 0; i < devices.size(); ++i) |
| { |
| const cl::RefPointer<cl::Device> &device = devices.at(i); |
| DeviceProgramData &deviceProgramData = mAssociatedDevicePrograms[device->getNative()]; |
| deviceProgramData.spirvVersion = device->getImpl<CLDeviceVk>().getSpirvVersion(); |
| |
| // add clspv compiler options based on device features |
| processedOptions += ClspvGetCompilerOptions(&device->getImpl<CLDeviceVk>()); |
| |
| if (buildType != BuildType::BINARY) |
| { |
| // Invoke clspv |
| switch (buildType) |
| { |
| case BuildType::BUILD: |
| case BuildType::COMPILE: |
| { |
| ScopedClspvContext clspvCtx; |
| const char *clSrc = mProgram.getSource().c_str(); |
| |
| ClspvError clspvRet = ClspvCompileSource( |
| 1, NULL, static_cast<const char **>(&clSrc), processedOptions.c_str(), |
| &clspvCtx.mOutputBin, &clspvCtx.mOutputBinSize, &clspvCtx.mOutputBuildLog); |
| deviceProgramData.buildLog = |
| clspvCtx.mOutputBuildLog != nullptr ? clspvCtx.mOutputBuildLog : ""; |
| if (clspvRet != CLSPV_SUCCESS) |
| { |
| ERR() << "OpenCL build failed with: ClspvError(" << clspvRet << ")!"; |
| deviceProgramData.buildStatus = CL_BUILD_ERROR; |
| return false; |
| } |
| |
| if (buildType == BuildType::COMPILE) |
| { |
| deviceProgramData.IR.assign(clspvCtx.mOutputBinSize, 0); |
| std::memcpy(deviceProgramData.IR.data(), clspvCtx.mOutputBin, |
| clspvCtx.mOutputBinSize); |
| deviceProgramData.binaryType = CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT; |
| } |
| else |
| { |
| deviceProgramData.binary.assign(clspvCtx.mOutputBinSize / sizeof(uint32_t), |
| 0); |
| std::memcpy(deviceProgramData.binary.data(), clspvCtx.mOutputBin, |
| clspvCtx.mOutputBinSize); |
| deviceProgramData.binaryType = CL_PROGRAM_BINARY_TYPE_EXECUTABLE; |
| } |
| break; |
| } |
| case BuildType::LINK: |
| { |
| ScopedClspvContext clspvCtx; |
| std::vector<size_t> vSizes; |
| std::vector<const char *> vBins; |
| const LinkPrograms &linkPrograms = LinkProgramsList.at(i); |
| for (const CLProgramVk::DeviceProgramData *linkProgramData : linkPrograms) |
| { |
| vSizes.push_back(linkProgramData->IR.size()); |
| vBins.push_back(linkProgramData->IR.data()); |
| } |
| |
| ClspvError clspvRet = ClspvCompileSource( |
| linkPrograms.size(), vSizes.data(), vBins.data(), processedOptions.c_str(), |
| &clspvCtx.mOutputBin, &clspvCtx.mOutputBinSize, &clspvCtx.mOutputBuildLog); |
| deviceProgramData.buildLog = |
| clspvCtx.mOutputBuildLog != nullptr ? clspvCtx.mOutputBuildLog : ""; |
| if (clspvRet != CLSPV_SUCCESS) |
| { |
| ERR() << "OpenCL build failed with: ClspvError(" << clspvRet << ")!"; |
| deviceProgramData.buildStatus = CL_BUILD_ERROR; |
| return false; |
| } |
| |
| if (createLibrary) |
| { |
| deviceProgramData.IR.assign(clspvCtx.mOutputBinSize, 0); |
| std::memcpy(deviceProgramData.IR.data(), clspvCtx.mOutputBin, |
| clspvCtx.mOutputBinSize); |
| deviceProgramData.binaryType = CL_PROGRAM_BINARY_TYPE_LIBRARY; |
| } |
| else |
| { |
| deviceProgramData.binary.assign(clspvCtx.mOutputBinSize / sizeof(uint32_t), |
| 0); |
| std::memcpy(deviceProgramData.binary.data(), |
| reinterpret_cast<char *>(clspvCtx.mOutputBin), |
| clspvCtx.mOutputBinSize); |
| deviceProgramData.binaryType = CL_PROGRAM_BINARY_TYPE_EXECUTABLE; |
| } |
| break; |
| } |
| default: |
| UNREACHABLE(); |
| return false; |
| } |
| } |
| |
| // Extract reflection info from spv binary and populate reflection data, as well as create |
| // the shader module |
| if (deviceProgramData.binaryType == CL_PROGRAM_BINARY_TYPE_EXECUTABLE) |
| { |
| // Report SPIR-V validation failure as a build failure |
| if (!ClspvValidate(mContext->getRenderer(), deviceProgramData.binary)) |
| { |
| ERR() << "Failed to validate SPIR-V binary!"; |
| deviceProgramData.buildStatus = CL_BUILD_ERROR; |
| return false; |
| } |
| |
| spvtools::SpirvTools spvTool(deviceProgramData.spirvVersion); |
| bool parseRet = spvTool.Parse( |
| deviceProgramData.binary, |
| [](const spv_endianness_t endianess, const spv_parsed_header_t &instruction) { |
| return SPV_SUCCESS; |
| }, |
| [&deviceProgramData](const spv_parsed_instruction_t &instruction) { |
| return ParseReflection(deviceProgramData.reflectionData, instruction); |
| }); |
| if (!parseRet) |
| { |
| ERR() << "Failed to parse reflection info from SPIR-V!"; |
| deviceProgramData.buildStatus = CL_BUILD_ERROR; |
| return false; |
| } |
| |
| if (mShader) |
| { |
| mShader.reset(); |
| } |
| // Strip SPIR-V binary if Vk implementation does not support non-semantic info |
| angle::spirv::Blob spvBlob = |
| !mContext->getFeatures().supportsShaderNonSemanticInfo.enabled |
| ? stripReflection(&deviceProgramData) |
| : deviceProgramData.binary; |
| ASSERT(!spvBlob.empty()); |
| if (IsError(vk::InitShaderModule(mContext, &mShader, spvBlob.data(), |
| spvBlob.size() * sizeof(uint32_t)))) |
| { |
| ERR() << "Failed to init Vulkan Shader Module!"; |
| deviceProgramData.buildStatus = CL_BUILD_ERROR; |
| return false; |
| } |
| |
| // Setup inital push constant range |
| uint32_t pushConstantMinOffet = UINT32_MAX, pushConstantMaxOffset = 0, |
| pushConstantMaxSize = 0; |
| for (const auto &pushConstant : deviceProgramData.reflectionData.pushConstants) |
| { |
| pushConstantMinOffet = pushConstant.second.offset < pushConstantMinOffet |
| ? pushConstant.second.offset |
| : pushConstantMinOffet; |
| if (pushConstant.second.offset >= pushConstantMaxOffset) |
| { |
| pushConstantMaxOffset = pushConstant.second.offset; |
| pushConstantMaxSize = pushConstant.second.size; |
| } |
| } |
| for (const auto &pushConstant : deviceProgramData.reflectionData.imagePushConstants) |
| { |
| for (const auto imageConstant : pushConstant.second) |
| { |
| pushConstantMinOffet = imageConstant.pcRange.offset < pushConstantMinOffet |
| ? imageConstant.pcRange.offset |
| : pushConstantMinOffet; |
| if (imageConstant.pcRange.offset >= pushConstantMaxOffset) |
| { |
| pushConstantMaxOffset = imageConstant.pcRange.offset; |
| pushConstantMaxSize = imageConstant.pcRange.size; |
| } |
| } |
| } |
| deviceProgramData.pushConstRange.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; |
| deviceProgramData.pushConstRange.offset = |
| pushConstantMinOffet == UINT32_MAX ? 0 : pushConstantMinOffet; |
| deviceProgramData.pushConstRange.size = pushConstantMaxOffset + pushConstantMaxSize; |
| |
| if (kAngleDebug) |
| { |
| if (mContext->getFeatures().clDumpVkSpirv.enabled) |
| { |
| angle::spirv::Print(deviceProgramData.binary); |
| } |
| } |
| } |
| deviceProgramData.buildStatus = CL_BUILD_SUCCESS; |
| } |
| return true; |
| } |
| |
| angle::spirv::Blob CLProgramVk::stripReflection(const DeviceProgramData *deviceProgramData) |
| { |
| angle::spirv::Blob binaryStripped; |
| spvtools::Optimizer opt(deviceProgramData->spirvVersion); |
| opt.RegisterPass(spvtools::CreateStripReflectInfoPass()); |
| spvtools::OptimizerOptions optOptions; |
| optOptions.set_run_validator(false); |
| if (!opt.Run(deviceProgramData->binary.data(), deviceProgramData->binary.size(), |
| &binaryStripped, optOptions)) |
| { |
| ERR() << "Could not strip reflection data from binary!"; |
| } |
| return binaryStripped; |
| } |
| |
| void CLProgramVk::setBuildStatus(const cl::DevicePtrs &devices, cl_build_status status) |
| { |
| std::scoped_lock<angle::SimpleMutex> sl(mProgramMutex); |
| |
| for (const auto &device : devices) |
| { |
| ASSERT(mAssociatedDevicePrograms.contains(device->getNative())); |
| DeviceProgramData &deviceProgram = mAssociatedDevicePrograms.at(device->getNative()); |
| deviceProgram.buildStatus = status; |
| } |
| } |
| |
| const angle::HashMap<uint32_t, ClspvPrintfInfo> *CLProgramVk::getPrintfDescriptors( |
| const std::string &kernelName) const |
| { |
| const DeviceProgramData *deviceProgram = getDeviceProgramData(kernelName.c_str()); |
| if (deviceProgram) |
| { |
| return &deviceProgram->reflectionData.printfInfoMap; |
| } |
| return nullptr; |
| } |
| |
| } // namespace rx |