blob: 05b7c547bcadea01cc975cc764ed4cb6a135425c [file] [log] [blame]
/*
* Copyright © 2021 Collabora Ltd.
*
* Derived from tu_device.c which is:
* Copyright © 2016 Red Hat.
* Copyright © 2016 Bas Nieuwenhuizen
* Copyright © 2015 Intel Corporation
*
* SPDX-License-Identifier: MIT
*/
#include <sys/stat.h>
#include <sys/sysinfo.h>
#include <sys/sysmacros.h>
#include "util/disk_cache.h"
#include "git_sha1.h"
#include "vk_device.h"
#include "vk_drm_syncobj.h"
#include "vk_format.h"
#include "vk_limits.h"
#include "vk_log.h"
#include "vk_shader_module.h"
#include "vk_util.h"
#include "panvk_device.h"
#include "panvk_entrypoints.h"
#include "panvk_instance.h"
#include "panvk_physical_device.h"
#include "panvk_wsi.h"
#include "pan_format.h"
#include "pan_props.h"
#include "genxml/gen_macros.h"
#define ARM_VENDOR_ID 0x13b5
#define MAX_PUSH_DESCRIPTORS 32
/* We reserve one ubo for push constant, one for sysvals and one per-set for the
* descriptor metadata */
#define RESERVED_UBO_COUNT 6
#define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32 - RESERVED_UBO_COUNT
#define MAX_INLINE_UNIFORM_BLOCK_SIZE (1 << 16)
static VkResult
create_kmod_dev(struct panvk_physical_device *device,
const struct panvk_instance *instance, drmDevicePtr drm_device)
{
const char *path = drm_device->nodes[DRM_NODE_RENDER];
drmVersionPtr version;
int fd;
fd = open(path, O_RDWR | O_CLOEXEC);
if (fd < 0) {
return panvk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
"failed to open device %s", path);
}
version = drmGetVersion(fd);
if (!version) {
close(fd);
return panvk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
"failed to query kernel driver version for device %s",
path);
}
if (strcmp(version->name, "panfrost") && strcmp(version->name, "panthor")) {
drmFreeVersion(version);
close(fd);
return VK_ERROR_INCOMPATIBLE_DRIVER;
}
drmFreeVersion(version);
if (instance->debug_flags & PANVK_DEBUG_STARTUP)
vk_logi(VK_LOG_NO_OBJS(instance), "Found compatible device '%s'.", path);
device->kmod.dev = pan_kmod_dev_create(fd, PAN_KMOD_DEV_FLAG_OWNS_FD,
&instance->kmod.allocator);
if (!device->kmod.dev) {
close(fd);
return panvk_errorf(instance, VK_ERROR_OUT_OF_HOST_MEMORY,
"cannot create device");
}
return VK_SUCCESS;
}
static VkResult
get_drm_device_ids(struct panvk_physical_device *device,
const struct panvk_instance *instance,
drmDevicePtr drm_device)
{
struct stat st;
if (stat(drm_device->nodes[DRM_NODE_RENDER], &st)) {
return vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
"failed to query render node stat");
}
device->drm.render_rdev = st.st_rdev;
if (drm_device->available_nodes & (1 << DRM_NODE_PRIMARY)) {
if (stat(drm_device->nodes[DRM_NODE_PRIMARY], &st)) {
return vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
"failed to query primary node stat");
}
device->drm.primary_rdev = st.st_rdev;
}
return VK_SUCCESS;
}
static int
get_cache_uuid(uint16_t family, void *uuid)
{
uint32_t mesa_timestamp;
uint16_t f = family;
if (!disk_cache_get_function_timestamp(get_cache_uuid, &mesa_timestamp))
return -1;
memset(uuid, 0, VK_UUID_SIZE);
memcpy(uuid, &mesa_timestamp, 4);
memcpy((char *)uuid + 4, &f, 2);
snprintf((char *)uuid + 6, VK_UUID_SIZE - 10, "pan");
return 0;
}
static VkResult
get_device_sync_types(struct panvk_physical_device *device,
const struct panvk_instance *instance)
{
const unsigned arch = pan_arch(device->kmod.props.gpu_prod_id);
uint32_t sync_type_count = 0;
device->drm_syncobj_type = vk_drm_syncobj_get_type(device->kmod.dev->fd);
if (!device->drm_syncobj_type.features) {
return vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
"failed to query syncobj features");
}
device->sync_types[sync_type_count++] = &device->drm_syncobj_type;
if (arch >= 10) {
assert(device->drm_syncobj_type.features & VK_SYNC_FEATURE_TIMELINE);
} else {
/* We don't support timelines in the uAPI yet and we don't want it getting
* suddenly turned on by vk_drm_syncobj_get_type() without us adding panvk
* code for it first.
*/
device->drm_syncobj_type.features &= ~VK_SYNC_FEATURE_TIMELINE;
/* vk_sync_timeline requires VK_SYNC_FEATURE_GPU_MULTI_WAIT. Panfrost
* waits on the underlying dma-fences and supports the feature.
*/
device->drm_syncobj_type.features |= VK_SYNC_FEATURE_GPU_MULTI_WAIT;
device->sync_timeline_type =
vk_sync_timeline_get_type(&device->drm_syncobj_type);
device->sync_types[sync_type_count++] = &device->sync_timeline_type.sync;
}
assert(sync_type_count < ARRAY_SIZE(device->sync_types));
device->sync_types[sync_type_count] = NULL;
return VK_SUCCESS;
}
static void
get_device_extensions(const struct panvk_physical_device *device,
struct vk_device_extension_table *ext)
{
const unsigned arch = pan_arch(device->kmod.props.gpu_prod_id);
*ext = (struct vk_device_extension_table){
.KHR_8bit_storage = true,
.KHR_16bit_storage = true,
.KHR_bind_memory2 = true,
.KHR_buffer_device_address = true,
.KHR_copy_commands2 = true,
.KHR_create_renderpass2 = true,
.KHR_dedicated_allocation = true,
.KHR_descriptor_update_template = true,
.KHR_device_group = true,
.KHR_driver_properties = true,
.KHR_dynamic_rendering = true,
.KHR_external_fence = true,
.KHR_external_fence_fd = true,
.KHR_external_memory = true,
.KHR_external_memory_fd = true,
.KHR_external_semaphore = true,
.KHR_external_semaphore_fd = true,
.KHR_get_memory_requirements2 = true,
.KHR_global_priority = true,
.KHR_image_format_list = true,
.KHR_index_type_uint8 = true,
.KHR_maintenance1 = true,
.KHR_maintenance2 = true,
.KHR_maintenance3 = true,
.KHR_map_memory2 = true,
.KHR_multiview = arch >= 10,
.KHR_pipeline_executable_properties = true,
.KHR_pipeline_library = true,
.KHR_push_descriptor = true,
.KHR_relaxed_block_layout = true,
.KHR_sampler_mirror_clamp_to_edge = true,
.KHR_shader_draw_parameters = true,
.KHR_shader_expect_assume = true,
.KHR_shader_float16_int8 = true,
.KHR_shader_non_semantic_info = true,
.KHR_shader_relaxed_extended_instruction = true,
.KHR_storage_buffer_storage_class = true,
#ifdef PANVK_USE_WSI_PLATFORM
.KHR_swapchain = true,
#endif
.KHR_synchronization2 = true,
.KHR_timeline_semaphore = true,
.KHR_variable_pointers = true,
.KHR_vertex_attribute_divisor = true,
.KHR_zero_initialize_workgroup_memory = true,
.EXT_4444_formats = true,
.EXT_buffer_device_address = true,
.EXT_custom_border_color = true,
.EXT_depth_clip_enable = true,
.EXT_external_memory_dma_buf = true,
.EXT_global_priority = true,
.EXT_global_priority_query = true,
.EXT_graphics_pipeline_library = true,
.EXT_host_query_reset = true,
.EXT_image_drm_format_modifier = true,
.EXT_image_robustness = true,
.EXT_index_type_uint8 = true,
.EXT_physical_device_drm = true,
.EXT_pipeline_creation_cache_control = true,
.EXT_pipeline_creation_feedback = true,
.EXT_pipeline_robustness = true,
.EXT_private_data = true,
.EXT_provoking_vertex = true,
.EXT_queue_family_foreign = true,
.EXT_sampler_filter_minmax = arch >= 10,
.EXT_scalar_block_layout = true,
.EXT_shader_module_identifier = true,
.EXT_tooling_info = true,
.GOOGLE_decorate_string = true,
.GOOGLE_hlsl_functionality1 = true,
.GOOGLE_user_type = true,
};
}
static void
get_features(const struct panvk_physical_device *device,
struct vk_features *features)
{
unsigned arch = pan_arch(device->kmod.props.gpu_prod_id);
*features = (struct vk_features){
/* Vulkan 1.0 */
.depthClamp = true,
.depthBiasClamp = true,
.robustBufferAccess = true,
.fullDrawIndexUint32 = true,
.imageCubeArray = true,
.independentBlend = true,
.sampleRateShading = true,
.logicOp = true,
.wideLines = true,
.largePoints = true,
.occlusionQueryPrecise = true,
.samplerAnisotropy = true,
.textureCompressionETC2 = true,
.textureCompressionASTC_LDR = true,
.fragmentStoresAndAtomics = arch >= 10,
.shaderUniformBufferArrayDynamicIndexing = true,
.shaderSampledImageArrayDynamicIndexing = true,
.shaderStorageBufferArrayDynamicIndexing = true,
.shaderStorageImageArrayDynamicIndexing = true,
.shaderInt16 = true,
.shaderInt64 = true,
.drawIndirectFirstInstance = true,
/* Vulkan 1.1 */
.storageBuffer16BitAccess = true,
.uniformAndStorageBuffer16BitAccess = true,
.storagePushConstant16 = false,
.storageInputOutput16 = false,
.multiview = arch >= 10,
.multiviewGeometryShader = false,
.multiviewTessellationShader = false,
.variablePointersStorageBuffer = true,
.variablePointers = true,
.protectedMemory = false,
.samplerYcbcrConversion = false,
.shaderDrawParameters = true,
/* Vulkan 1.2 */
.samplerMirrorClampToEdge = true,
.drawIndirectCount = false,
.storageBuffer8BitAccess = true,
.uniformAndStorageBuffer8BitAccess = false,
.storagePushConstant8 = false,
.shaderBufferInt64Atomics = false,
.shaderSharedInt64Atomics = false,
.shaderFloat16 = false,
.shaderInt8 = true,
.descriptorIndexing = false,
.shaderInputAttachmentArrayDynamicIndexing = false,
.shaderUniformTexelBufferArrayDynamicIndexing = false,
.shaderStorageTexelBufferArrayDynamicIndexing = false,
.shaderUniformBufferArrayNonUniformIndexing = false,
.shaderSampledImageArrayNonUniformIndexing = false,
.shaderStorageBufferArrayNonUniformIndexing = false,
.shaderStorageImageArrayNonUniformIndexing = false,
.shaderInputAttachmentArrayNonUniformIndexing = false,
.shaderUniformTexelBufferArrayNonUniformIndexing = false,
.shaderStorageTexelBufferArrayNonUniformIndexing = false,
.descriptorBindingUniformBufferUpdateAfterBind = false,
.descriptorBindingSampledImageUpdateAfterBind = false,
.descriptorBindingStorageImageUpdateAfterBind = false,
.descriptorBindingStorageBufferUpdateAfterBind = false,
.descriptorBindingUniformTexelBufferUpdateAfterBind = false,
.descriptorBindingStorageTexelBufferUpdateAfterBind = false,
.descriptorBindingUpdateUnusedWhilePending = false,
.descriptorBindingPartiallyBound = false,
.descriptorBindingVariableDescriptorCount = false,
.runtimeDescriptorArray = false,
.samplerFilterMinmax = arch >= 10,
.scalarBlockLayout = true,
.imagelessFramebuffer = false,
.uniformBufferStandardLayout = false,
.shaderSubgroupExtendedTypes = false,
.separateDepthStencilLayouts = false,
.hostQueryReset = true,
.timelineSemaphore = true,
.bufferDeviceAddress = true,
.bufferDeviceAddressCaptureReplay = false,
.bufferDeviceAddressMultiDevice = false,
.vulkanMemoryModel = false,
.vulkanMemoryModelDeviceScope = false,
.vulkanMemoryModelAvailabilityVisibilityChains = false,
.shaderOutputViewportIndex = false,
.shaderOutputLayer = false,
.subgroupBroadcastDynamicId = false,
/* Vulkan 1.3 */
.robustImageAccess = true,
.inlineUniformBlock = false,
.descriptorBindingInlineUniformBlockUpdateAfterBind = false,
.pipelineCreationCacheControl = true,
.privateData = true,
.shaderDemoteToHelperInvocation = false,
.shaderTerminateInvocation = false,
.subgroupSizeControl = false,
.computeFullSubgroups = false,
.synchronization2 = true,
.textureCompressionASTC_HDR = false,
.shaderZeroInitializeWorkgroupMemory = true,
.dynamicRendering = true,
.shaderIntegerDotProduct = false,
.maintenance4 = false,
/* VK_EXT_graphics_pipeline_library */
.graphicsPipelineLibrary = true,
/* VK_KHR_global_priority */
.globalPriorityQuery = true,
/* VK_KHR_index_type_uint8 */
.indexTypeUint8 = true,
/* VK_KHR_vertex_attribute_divisor */
.vertexAttributeInstanceRateDivisor = true,
.vertexAttributeInstanceRateZeroDivisor = true,
/* VK_EXT_depth_clip_enable */
.depthClipEnable = true,
/* VK_EXT_4444_formats */
.formatA4R4G4B4 = true,
.formatA4B4G4R4 = true,
/* VK_EXT_custom_border_color */
.customBorderColors = true,
/* VK_EXT_provoking_vertex */
.provokingVertexLast = true,
.transformFeedbackPreservesProvokingVertex = false,
/* v7 doesn't support AFBC(BGR). We need to tweak the texture swizzle to
* make it work, which forces us to apply the same swizzle on the border
* color, meaning we need to know the format when preparing the border
* color.
*/
.customBorderColorWithoutFormat = arch != 7,
/* VK_KHR_pipeline_executable_properties */
.pipelineExecutableInfo = true,
/* VK_EXT_pipeline_robustness */
.pipelineRobustness = true,
/* VK_KHR_shader_relaxed_extended_instruction */
.shaderRelaxedExtendedInstruction = true,
/* VK_KHR_shader_expect_assume */
.shaderExpectAssume = true,
/* VK_EXT_shader_module_identifier */
.shaderModuleIdentifier = true,
};
}
static uint32_t
get_vk_version()
{
const uint32_t version_override = vk_get_version_override();
if (version_override)
return version_override;
return VK_MAKE_API_VERSION(0, 1, 0, VK_HEADER_VERSION);
}
static void
get_device_properties(const struct panvk_instance *instance,
const struct panvk_physical_device *device,
struct vk_properties *properties)
{
/* HW supports MSAA 4, 8 and 16, but we limit ourselves to MSAA 4 for now. */
VkSampleCountFlags sample_counts =
VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT;
uint64_t os_page_size = 4096;
os_get_page_size(&os_page_size);
unsigned arch = pan_arch(device->kmod.props.gpu_prod_id);
/* Ensure that the max threads count per workgroup is valid for Bifrost */
assert(arch > 8 || device->kmod.props.max_threads_per_wg <= 1024);
*properties = (struct vk_properties){
.apiVersion = get_vk_version(),
.driverVersion = vk_get_driver_version(),
.vendorID = ARM_VENDOR_ID,
/* Collect arch_major, arch_minor, arch_rev and product_major,
* as done by the Arm driver.
*/
.deviceID = device->kmod.props.gpu_prod_id << 16,
.deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
/* Vulkan 1.0 limits */
/* Maximum texture dimension is 2^16. */
.maxImageDimension1D = (1 << 16),
.maxImageDimension2D = (1 << 16),
.maxImageDimension3D = (1 << 16),
.maxImageDimensionCube = (1 << 16),
.maxImageArrayLayers = (1 << 16),
/* Currently limited by the 1D texture size, which is 2^16.
* TODO: If we expose buffer views as 2D textures, we can increase the
* limit.
*/
.maxTexelBufferElements = (1 << 16),
/* Each uniform entry is 16-byte and the number of entries is encoded in a
* 12-bit field, with the minus(1) modifier, which gives 2^20.
*/
.maxUniformBufferRange = 1 << 20,
/* Storage buffer access is lowered to globals, so there's no limit here,
* except for the SW-descriptor we use to encode storage buffer
* descriptors, where the size is a 32-bit field.
*/
.maxStorageBufferRange = UINT32_MAX,
/* 128 bytes of push constants, so we're aligned with the minimum Vulkan
* requirements.
*/
.maxPushConstantsSize = 128,
/* On our kernel drivers we're limited by the available memory rather
* than available allocations. This is better expressed through memory
* properties and budget queries, and by returning
* VK_ERROR_OUT_OF_DEVICE_MEMORY when applicable, rather than
* this limit.
*/
.maxMemoryAllocationCount = UINT32_MAX,
/* On Mali, VkSampler objects do not use any resources other than host
* memory and host address space, availability of which can change
* significantly over time.
*/
.maxSamplerAllocationCount = UINT32_MAX,
/* A cache line. */
.bufferImageGranularity = 64,
/* Sparse binding not supported yet. */
.sparseAddressSpaceSize = 0,
/* On Bifrost, this is a software limit. We pick the minimum required by
* Vulkan, because Bifrost GPUs don't have unified descriptor tables,
* which forces us to agregatte all descriptors from all sets and dispatch
* them to per-type descriptor tables emitted at draw/dispatch time. The
* more sets we support the more copies we are likely to have to do at
* draw time.
*
* Valhall has native support for descriptor sets, and allows a maximum
* of 16 sets, but we reserve one for our internal use, so we have 15
* left.
*/
.maxBoundDescriptorSets = arch <= 7 ? 4 : 15,
/* MALI_RENDERER_STATE::sampler_count is 16-bit. */
.maxDescriptorSetSamplers = UINT16_MAX,
/* MALI_RENDERER_STATE::uniform_buffer_count is 8-bit. We reserve 32 slots
* for our internal UBOs.
*/
.maxPerStageDescriptorUniformBuffers = UINT8_MAX - 32,
.maxDescriptorSetUniformBuffers = UINT8_MAX - 32,
/* SSBOs are limited by the size of a uniform buffer which contains our
* panvk_ssbo_desc objects.
* panvk_ssbo_desc is 16-byte, and each uniform entry in the Mali UBO is
* 16-byte too. The number of entries is encoded in a 12-bit field, with
* a minus(1) modifier, which gives a maximum of 2^12 SSBO
* descriptors.
*/
.maxDescriptorSetStorageBuffers = 1 << 12,
/* MALI_RENDERER_STATE::sampler_count is 16-bit. */
.maxDescriptorSetSampledImages = UINT16_MAX,
/* MALI_ATTRIBUTE::buffer_index is 9-bit, and each image takes two
* MALI_ATTRIBUTE_BUFFER slots, which gives a maximum of (1 << 8) images.
*/
.maxDescriptorSetStorageImages = 1 << 8,
/* A maximum of 8 color render targets, and one depth-stencil render
* target.
*/
.maxDescriptorSetInputAttachments = 9,
/* We could theoretically use the maxDescriptor values here (except for
* UBOs where we're really limited to 256 on the shader side), but on
* Bifrost we have to copy some tables around, which comes at an extra
* memory/processing cost, so let's pick something smaller.
*/
.maxPerStageDescriptorInputAttachments = 9,
.maxPerStageDescriptorSampledImages = 256,
.maxPerStageDescriptorSamplers = 128,
.maxPerStageDescriptorStorageBuffers = 64,
.maxPerStageDescriptorStorageImages = 32,
.maxPerStageDescriptorUniformBuffers = 64,
.maxPerStageResources = 9 + 256 + 128 + 64 + 32 + 64,
/* Software limits to keep VkCommandBuffer tracking sane. */
.maxDescriptorSetUniformBuffersDynamic = 16,
.maxDescriptorSetStorageBuffersDynamic = 8,
/* Software limit to keep VkCommandBuffer tracking sane. The HW supports
* up to 2^9 vertex attributes.
*/
.maxVertexInputAttributes = 16,
.maxVertexInputBindings = 16,
/* MALI_ATTRIBUTE::offset is 32-bit. */
.maxVertexInputAttributeOffset = UINT32_MAX,
/* MALI_ATTRIBUTE_BUFFER::stride is 32-bit. */
.maxVertexInputBindingStride = MESA_VK_MAX_VERTEX_BINDING_STRIDE,
/* 32 vec4 varyings. */
.maxVertexOutputComponents = 128,
/* Tesselation shaders not supported. */
.maxTessellationGenerationLevel = 0,
.maxTessellationPatchSize = 0,
.maxTessellationControlPerVertexInputComponents = 0,
.maxTessellationControlPerVertexOutputComponents = 0,
.maxTessellationControlPerPatchOutputComponents = 0,
.maxTessellationControlTotalOutputComponents = 0,
.maxTessellationEvaluationInputComponents = 0,
.maxTessellationEvaluationOutputComponents = 0,
/* Geometry shaders not supported. */
.maxGeometryShaderInvocations = 0,
.maxGeometryInputComponents = 0,
.maxGeometryOutputComponents = 0,
.maxGeometryOutputVertices = 0,
.maxGeometryTotalOutputComponents = 0,
/* 32 vec4 varyings. */
.maxFragmentInputComponents = 128,
/* 8 render targets. */
.maxFragmentOutputAttachments = 8,
/* We don't support dual source blending yet. */
.maxFragmentDualSrcAttachments = 0,
/* 8 render targets, 2^12 storage buffers and 2^8 storage images (see
* above).
*/
.maxFragmentCombinedOutputResources = 8 + (1 << 12) + (1 << 8),
/* MALI_LOCAL_STORAGE::wls_size_{base,scale} allows us to have up to
* (7 << 30) bytes of shared memory, but we cap it to 32K as it doesn't
* really make sense to expose this amount of memory, especially since
* it's backed by global memory anyway.
*/
.maxComputeSharedMemorySize = 32768,
/* Software limit to meet Vulkan 1.0 requirements. We split the
* dispatch in several jobs if it's too big.
*/
.maxComputeWorkGroupCount = {65535, 65535, 65535},
/* We could also split into serveral jobs but this has many limitations.
* As such we limit to the max threads per workgroup supported by the GPU.
*/
.maxComputeWorkGroupInvocations = device->kmod.props.max_threads_per_wg,
.maxComputeWorkGroupSize = {device->kmod.props.max_threads_per_wg,
device->kmod.props.max_threads_per_wg,
device->kmod.props.max_threads_per_wg},
/* 8-bit subpixel precision. */
.subPixelPrecisionBits = 8,
.subTexelPrecisionBits = 8,
.mipmapPrecisionBits = 8,
/* Software limit. */
.maxDrawIndexedIndexValue = UINT32_MAX,
/* Make it one for now. */
.maxDrawIndirectCount = 1,
.maxSamplerLodBias = (float)INT16_MAX / 256.0f,
.maxSamplerAnisotropy = 16,
.maxViewports = 1,
/* Same as the framebuffer limit. */
.maxViewportDimensions = {(1 << 14), (1 << 14)},
/* Encoded in a 16-bit signed integer. */
.viewportBoundsRange = {INT16_MIN, INT16_MAX},
.viewportSubPixelBits = 0,
/* Align on a page. */
.minMemoryMapAlignment = os_page_size,
/* Some compressed texture formats require 128-byte alignment. */
.minTexelBufferOffsetAlignment = 64,
/* Always aligned on a uniform slot (vec4). */
.minUniformBufferOffsetAlignment = 16,
/* Lowered to global accesses, which happen at the 32-bit granularity. */
.minStorageBufferOffsetAlignment = 4,
/* Signed 4-bit value. */
.minTexelOffset = -8,
.maxTexelOffset = 7,
.minTexelGatherOffset = -8,
.maxTexelGatherOffset = 7,
.minInterpolationOffset = -0.5,
.maxInterpolationOffset = 0.5,
.subPixelInterpolationOffsetBits = 8,
.maxFramebufferWidth = (1 << 14),
.maxFramebufferHeight = (1 << 14),
.maxFramebufferLayers = 256,
.framebufferColorSampleCounts = sample_counts,
.framebufferDepthSampleCounts = sample_counts,
.framebufferStencilSampleCounts = sample_counts,
.framebufferNoAttachmentsSampleCounts = sample_counts,
.maxColorAttachments = 8,
.sampledImageColorSampleCounts = sample_counts,
.sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
.sampledImageDepthSampleCounts = sample_counts,
.sampledImageStencilSampleCounts = sample_counts,
.storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
.maxSampleMaskWords = 1,
.timestampComputeAndGraphics = false,
.timestampPeriod = 0,
.maxClipDistances = 0,
.maxCullDistances = 0,
.maxCombinedClipAndCullDistances = 0,
.discreteQueuePriorities = 2,
.pointSizeRange = {0.125, 4095.9375},
.lineWidthRange = {0.0, 7.9921875},
.pointSizeGranularity = (1.0 / 16.0),
.lineWidthGranularity = (1.0 / 128.0),
.strictLines = false,
.standardSampleLocations = true,
.optimalBufferCopyOffsetAlignment = 64,
.optimalBufferCopyRowPitchAlignment = 64,
.nonCoherentAtomSize = 64,
/* Vulkan 1.0 sparse properties */
.sparseResidencyNonResidentStrict = false,
.sparseResidencyAlignedMipSize = false,
.sparseResidencyStandard2DBlockShape = false,
.sparseResidencyStandard2DMultisampleBlockShape = false,
.sparseResidencyStandard3DBlockShape = false,
/* Vulkan 1.1 properties */
/* XXX: 1.1 support */
.subgroupSize = 8,
.subgroupSupportedStages = 0,
.subgroupSupportedOperations = 0,
.subgroupQuadOperationsInAllStages = false,
.pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES,
.maxMultiviewViewCount = arch >= 10 ? 8 : 0,
.maxMultiviewInstanceIndex = arch >= 10 ? UINT32_MAX : 0,
.protectedNoFault = false,
.maxPerSetDescriptors = UINT16_MAX,
/* Our buffer size fields allow only this much */
.maxMemoryAllocationSize = UINT32_MAX,
/* Vulkan 1.2 properties */
/* XXX: 1.2 support */
/* XXX: VK_KHR_depth_stencil_resolve */
.supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT,
.supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT,
.independentResolveNone = true,
.independentResolve = true,
/* VK_KHR_driver_properties */
.driverID = VK_DRIVER_ID_MESA_PANVK,
.conformanceVersion = (VkConformanceVersion){0, 0, 0, 0},
/* XXX: VK_KHR_shader_float_controls */
.denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
.roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
.shaderSignedZeroInfNanPreserveFloat16 = true,
.shaderSignedZeroInfNanPreserveFloat32 = true,
.shaderSignedZeroInfNanPreserveFloat64 = false,
.shaderDenormPreserveFloat16 = true,
.shaderDenormPreserveFloat32 = true,
.shaderDenormPreserveFloat64 = false,
.shaderDenormFlushToZeroFloat16 = true,
.shaderDenormFlushToZeroFloat32 = true,
.shaderDenormFlushToZeroFloat64 = false,
.shaderRoundingModeRTEFloat16 = true,
.shaderRoundingModeRTEFloat32 = true,
.shaderRoundingModeRTEFloat64 = false,
.shaderRoundingModeRTZFloat16 = true,
.shaderRoundingModeRTZFloat32 = true,
.shaderRoundingModeRTZFloat64 = false,
/* XXX: VK_EXT_descriptor_indexing */
.maxUpdateAfterBindDescriptorsInAllPools = 0,
.shaderUniformBufferArrayNonUniformIndexingNative = false,
.shaderSampledImageArrayNonUniformIndexingNative = false,
.shaderStorageBufferArrayNonUniformIndexingNative = false,
.shaderStorageImageArrayNonUniformIndexingNative = false,
.shaderInputAttachmentArrayNonUniformIndexingNative = false,
.robustBufferAccessUpdateAfterBind = false,
.quadDivergentImplicitLod = false,
.maxPerStageDescriptorUpdateAfterBindSamplers = 0,
.maxPerStageDescriptorUpdateAfterBindUniformBuffers = 0,
.maxPerStageDescriptorUpdateAfterBindStorageBuffers = 0,
.maxPerStageDescriptorUpdateAfterBindSampledImages = 0,
.maxPerStageDescriptorUpdateAfterBindStorageImages = 0,
.maxPerStageDescriptorUpdateAfterBindInputAttachments = 0,
.maxPerStageUpdateAfterBindResources = 0,
.maxDescriptorSetUpdateAfterBindSamplers = 0,
.maxDescriptorSetUpdateAfterBindUniformBuffers = 0,
.maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = 0,
.maxDescriptorSetUpdateAfterBindStorageBuffers = 0,
.maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = 0,
.maxDescriptorSetUpdateAfterBindSampledImages = 0,
.maxDescriptorSetUpdateAfterBindStorageImages = 0,
.maxDescriptorSetUpdateAfterBindInputAttachments = 0,
.filterMinmaxSingleComponentFormats = arch >= 10,
.filterMinmaxImageComponentMapping = arch >= 10,
.maxTimelineSemaphoreValueDifference = INT64_MAX,
.framebufferIntegerColorSampleCounts = sample_counts,
/* Vulkan 1.3 properties */
/* XXX: 1.3 support */
/* XXX: VK_EXT_subgroup_size_control */
.minSubgroupSize = 8,
.maxSubgroupSize = 8,
.maxComputeWorkgroupSubgroups = 48,
.requiredSubgroupSizeStages = VK_SHADER_STAGE_ALL,
/* XXX: VK_EXT_inline_uniform_block */
.maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE,
.maxPerStageDescriptorInlineUniformBlocks =
MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS,
.maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks =
MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS,
.maxDescriptorSetInlineUniformBlocks =
MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS,
.maxDescriptorSetUpdateAfterBindInlineUniformBlocks =
MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS,
.maxInlineUniformTotalSize =
MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS * MAX_INLINE_UNIFORM_BLOCK_SIZE,
/* XXX: VK_KHR_shader_integer_dot_product */
.integerDotProduct8BitUnsignedAccelerated = true,
.integerDotProduct8BitSignedAccelerated = true,
.integerDotProduct4x8BitPackedUnsignedAccelerated = true,
.integerDotProduct4x8BitPackedSignedAccelerated = true,
/* XXX: VK_EXT_texel_buffer_alignment */
.storageTexelBufferOffsetAlignmentBytes = 64,
.storageTexelBufferOffsetSingleTexelAlignment = false,
.uniformTexelBufferOffsetAlignmentBytes = 4,
.uniformTexelBufferOffsetSingleTexelAlignment = true,
/* XXX: VK_KHR_maintenance4 */
.maxBufferSize = 1 << 30,
/* VK_EXT_custom_border_color */
.maxCustomBorderColorSamplers = 32768,
/* VK_EXT_graphics_pipeline_library */
.graphicsPipelineLibraryFastLinking = true,
.graphicsPipelineLibraryIndependentInterpolationDecoration = true,
/* VK_EXT_pipeline_robustness */
.defaultRobustnessStorageBuffers =
VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT,
.defaultRobustnessUniformBuffers =
VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT,
.defaultRobustnessVertexInputs =
VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT,
.defaultRobustnessImages =
VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_EXT,
/* VK_EXT_provoking_vertex */
.provokingVertexModePerPipeline = false,
.transformFeedbackPreservesTriangleFanProvokingVertex = false,
/* VK_KHR_vertex_attribute_divisor */
/* We will have to restrict this a bit for multiview */
.maxVertexAttribDivisor = UINT32_MAX,
.supportsNonZeroFirstInstance = false,
/* VK_KHR_push_descriptor */
.maxPushDescriptors = MAX_PUSH_DESCRIPTORS,
};
snprintf(properties->deviceName, sizeof(properties->deviceName), "%s",
device->name);
memcpy(properties->pipelineCacheUUID, device->cache_uuid, VK_UUID_SIZE);
const struct {
uint16_t vendor_id;
uint32_t device_id;
uint8_t pad[8];
} dev_uuid = {
.vendor_id = ARM_VENDOR_ID,
.device_id = device->model->gpu_id,
};
STATIC_ASSERT(sizeof(dev_uuid) == VK_UUID_SIZE);
memcpy(properties->deviceUUID, &dev_uuid, VK_UUID_SIZE);
STATIC_ASSERT(sizeof(instance->driver_build_sha) >= VK_UUID_SIZE);
memcpy(properties->driverUUID, instance->driver_build_sha, VK_UUID_SIZE);
snprintf(properties->driverName, VK_MAX_DRIVER_NAME_SIZE, "panvk");
snprintf(properties->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
"Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
/* VK_EXT_physical_device_drm */
if (device->drm.primary_rdev) {
properties->drmHasPrimary = true;
properties->drmPrimaryMajor = major(device->drm.primary_rdev);
properties->drmPrimaryMinor = minor(device->drm.primary_rdev);
}
if (device->drm.render_rdev) {
properties->drmHasRender = true;
properties->drmRenderMajor = major(device->drm.render_rdev);
properties->drmRenderMinor = minor(device->drm.render_rdev);
}
/* VK_EXT_shader_module_identifier */
STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
memcpy(properties->shaderModuleIdentifierAlgorithmUUID,
vk_shaderModuleIdentifierAlgorithmUUID,
sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
}
void
panvk_physical_device_finish(struct panvk_physical_device *device)
{
panvk_wsi_finish(device);
pan_kmod_dev_destroy(device->kmod.dev);
vk_physical_device_finish(&device->vk);
}
VkResult
panvk_physical_device_init(struct panvk_physical_device *device,
struct panvk_instance *instance,
drmDevicePtr drm_device)
{
VkResult result;
result = create_kmod_dev(device, instance, drm_device);
if (result != VK_SUCCESS)
return result;
pan_kmod_dev_query_props(device->kmod.dev, &device->kmod.props);
device->model = panfrost_get_model(device->kmod.props.gpu_prod_id,
device->kmod.props.gpu_variant);
unsigned arch = pan_arch(device->kmod.props.gpu_prod_id);
switch (arch) {
case 6:
case 7:
if (!getenv("PAN_I_WANT_A_BROKEN_VULKAN_DRIVER")) {
result = panvk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
"WARNING: panvk is not well-tested on v%d, "
"pass PAN_I_WANT_A_BROKEN_VULKAN_DRIVER=1 "
"if you know what you're doing.", arch);
goto fail;
}
break;
case 10:
break;
default:
result = panvk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
"%s not supported", device->model->name);
goto fail;
}
result = get_drm_device_ids(device, instance, drm_device);
if (result != VK_SUCCESS)
goto fail;
device->formats.all = panfrost_format_table(arch);
device->formats.blendable = panfrost_blendable_format_table(arch);
memset(device->name, 0, sizeof(device->name));
sprintf(device->name, "%s", device->model->name);
if (get_cache_uuid(device->kmod.props.gpu_prod_id, device->cache_uuid)) {
result = panvk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
"cannot generate UUID");
goto fail;
}
result = get_device_sync_types(device, instance);
if (result != VK_SUCCESS)
goto fail;
vk_warn_non_conformant_implementation("panvk");
struct vk_device_extension_table supported_extensions;
get_device_extensions(device, &supported_extensions);
struct vk_features supported_features;
get_features(device, &supported_features);
struct vk_properties properties;
get_device_properties(instance, device, &properties);
struct vk_physical_device_dispatch_table dispatch_table;
vk_physical_device_dispatch_table_from_entrypoints(
&dispatch_table, &panvk_physical_device_entrypoints, true);
vk_physical_device_dispatch_table_from_entrypoints(
&dispatch_table, &wsi_physical_device_entrypoints, false);
result = vk_physical_device_init(&device->vk, &instance->vk,
&supported_extensions, &supported_features,
&properties, &dispatch_table);
if (result != VK_SUCCESS)
goto fail;
device->vk.supported_sync_types = device->sync_types;
result = panvk_wsi_init(device);
if (result != VK_SUCCESS)
goto fail;
return VK_SUCCESS;
fail:
if (device->vk.instance)
vk_physical_device_finish(&device->vk);
pan_kmod_dev_destroy(device->kmod.dev);
return result;
}
static const VkQueueFamilyProperties panvk_queue_family_properties = {
.queueFlags =
VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
.queueCount = 1,
.timestampValidBits = 0,
.minImageTransferGranularity = {1, 1, 1},
};
static void
panvk_fill_global_priority(const struct panvk_physical_device *physical_device,
VkQueueFamilyGlobalPriorityPropertiesKHR *prio)
{
enum pan_kmod_group_allow_priority_flags prio_mask =
physical_device->kmod.props.allowed_group_priorities_mask;
uint32_t prio_idx = 0;
if (prio_mask & PAN_KMOD_GROUP_ALLOW_PRIORITY_LOW)
prio->priorities[prio_idx++] = VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR;
if (prio_mask & PAN_KMOD_GROUP_ALLOW_PRIORITY_MEDIUM)
prio->priorities[prio_idx++] = VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
if (prio_mask & PAN_KMOD_GROUP_ALLOW_PRIORITY_HIGH)
prio->priorities[prio_idx++] = VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR;
if (prio_mask & PAN_KMOD_GROUP_ALLOW_PRIORITY_REALTIME)
prio->priorities[prio_idx++] = VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR;
prio->priorityCount = prio_idx;
}
VKAPI_ATTR void VKAPI_CALL
panvk_GetPhysicalDeviceQueueFamilyProperties2(
VkPhysicalDevice physicalDevice, uint32_t *pQueueFamilyPropertyCount,
VkQueueFamilyProperties2 *pQueueFamilyProperties)
{
VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice);
VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out, pQueueFamilyProperties,
pQueueFamilyPropertyCount);
vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p)
{
p->queueFamilyProperties = panvk_queue_family_properties;
VkQueueFamilyGlobalPriorityPropertiesKHR *prio =
vk_find_struct(p->pNext, QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR);
if (prio)
panvk_fill_global_priority(physical_device, prio);
}
}
static uint64_t
get_system_heap_size()
{
struct sysinfo info;
sysinfo(&info);
uint64_t total_ram = (uint64_t)info.totalram * info.mem_unit;
/* We don't want to burn too much ram with the GPU. If the user has 4GiB
* or less, we use at most half. If they have more than 4GiB, we use 3/4.
*/
uint64_t available_ram;
if (total_ram <= 4ull * 1024 * 1024 * 1024)
available_ram = total_ram / 2;
else
available_ram = total_ram * 3 / 4;
return available_ram;
}
VKAPI_ATTR void VKAPI_CALL
panvk_GetPhysicalDeviceMemoryProperties2(
VkPhysicalDevice physicalDevice,
VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
{
pMemoryProperties->memoryProperties = (VkPhysicalDeviceMemoryProperties){
.memoryHeapCount = 1,
.memoryHeaps[0].size = get_system_heap_size(),
.memoryHeaps[0].flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
.memoryTypeCount = 1,
.memoryTypes[0].propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
.memoryTypes[0].heapIndex = 0,
};
}
#define DEVICE_PER_ARCH_FUNCS(_ver) \
VkResult panvk_v##_ver##_create_device( \
struct panvk_physical_device *physical_device, \
const VkDeviceCreateInfo *pCreateInfo, \
const VkAllocationCallbacks *pAllocator, VkDevice *pDevice); \
\
void panvk_v##_ver##_destroy_device( \
struct panvk_device *device, const VkAllocationCallbacks *pAllocator)
DEVICE_PER_ARCH_FUNCS(6);
DEVICE_PER_ARCH_FUNCS(7);
DEVICE_PER_ARCH_FUNCS(10);
VKAPI_ATTR VkResult VKAPI_CALL
panvk_CreateDevice(VkPhysicalDevice physicalDevice,
const VkDeviceCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator, VkDevice *pDevice)
{
VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice);
unsigned arch = pan_arch(physical_device->kmod.props.gpu_prod_id);
VkResult result = VK_ERROR_INITIALIZATION_FAILED;
panvk_arch_dispatch_ret(arch, create_device, result, physical_device,
pCreateInfo, pAllocator, pDevice);
return result;
}
VKAPI_ATTR void VKAPI_CALL
panvk_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
{
VK_FROM_HANDLE(panvk_device, device, _device);
struct panvk_physical_device *physical_device =
to_panvk_physical_device(device->vk.physical);
unsigned arch = pan_arch(physical_device->kmod.props.gpu_prod_id);
panvk_arch_dispatch(arch, destroy_device, device, pAllocator);
}
static bool
format_is_supported(struct panvk_physical_device *physical_device,
const struct panfrost_format fmt,
enum pipe_format pfmt)
{
/* If the format ID is zero, it's not supported. */
if (!fmt.hw)
return false;
/* Compressed formats (ID < 32) are optional. We need to check against
* the supported formats reported by the GPU. */
if (util_format_is_compressed(pfmt)) {
uint32_t supported_compr_fmts =
panfrost_query_compressed_formats(&physical_device->kmod.props);
if (!(BITFIELD_BIT(fmt.texfeat_bit) & supported_compr_fmts))
return false;
}
return true;
}
static void
get_format_properties(struct panvk_physical_device *physical_device,
VkFormat format, VkFormatProperties *out_properties)
{
VkFormatFeatureFlags tex = 0, buffer = 0;
enum pipe_format pfmt = vk_format_to_pipe_format(format);
unsigned arch = pan_arch(physical_device->kmod.props.gpu_prod_id);
if (pfmt == PIPE_FORMAT_NONE)
goto end;
const struct panfrost_format fmt = physical_device->formats.all[pfmt];
if (!format_is_supported(physical_device, fmt, pfmt))
goto end;
/* 3byte formats are not supported by the buffer <-> image copy helpers. */
if (util_format_get_blocksize(pfmt) == 3)
goto end;
/* Reject sRGB formats (see
* https://github.com/KhronosGroup/Vulkan-Docs/issues/2214).
*/
if ((fmt.bind & PAN_BIND_VERTEX_BUFFER) && !util_format_is_srgb(pfmt))
buffer |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT;
if (fmt.bind & PAN_BIND_SAMPLER_VIEW) {
tex |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
VK_FORMAT_FEATURE_TRANSFER_DST_BIT |
VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT |
VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT |
VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT;
if (arch >= 10)
tex |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_MINMAX_BIT;
/* Integer formats only support nearest filtering */
if (!util_format_is_scaled(pfmt) && !util_format_is_pure_integer(pfmt))
tex |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
if (!util_format_is_depth_or_stencil(pfmt))
buffer |= VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT;
tex |= VK_FORMAT_FEATURE_BLIT_SRC_BIT;
}
if (fmt.bind & PAN_BIND_RENDER_TARGET) {
tex |= VK_FORMAT_FEATURE_BLIT_DST_BIT;
tex |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT;
/* SNORM rendering isn't working yet (nir_lower_blend bugs), disable for
* now.
*
* XXX: Enable once fixed.
*/
if (!util_format_is_snorm(pfmt)) {
tex |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT;
tex |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT;
}
if (!util_format_is_depth_and_stencil(pfmt))
buffer |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT;
}
if (pfmt == PIPE_FORMAT_R32_UINT || pfmt == PIPE_FORMAT_R32_SINT) {
buffer |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT;
tex |= VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT;
}
if (fmt.bind & PAN_BIND_DEPTH_STENCIL)
tex |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT;
end:
out_properties->linearTilingFeatures = tex;
out_properties->optimalTilingFeatures = tex;
out_properties->bufferFeatures = buffer;
}
VKAPI_ATTR void VKAPI_CALL
panvk_GetPhysicalDeviceFormatProperties2(VkPhysicalDevice physicalDevice,
VkFormat format,
VkFormatProperties2 *pFormatProperties)
{
VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice);
get_format_properties(physical_device, format,
&pFormatProperties->formatProperties);
VkDrmFormatModifierPropertiesListEXT *list = vk_find_struct(
pFormatProperties->pNext, DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT);
if (list && pFormatProperties->formatProperties.linearTilingFeatures) {
VK_OUTARRAY_MAKE_TYPED(VkDrmFormatModifierPropertiesEXT, out,
list->pDrmFormatModifierProperties,
&list->drmFormatModifierCount);
vk_outarray_append_typed(VkDrmFormatModifierPropertiesEXT, &out,
mod_props)
{
mod_props->drmFormatModifier = DRM_FORMAT_MOD_LINEAR;
mod_props->drmFormatModifierPlaneCount = 1;
mod_props->drmFormatModifierTilingFeatures =
pFormatProperties->formatProperties.linearTilingFeatures;
}
}
}
static VkResult
get_image_format_properties(struct panvk_physical_device *physical_device,
const VkPhysicalDeviceImageFormatInfo2 *info,
VkImageFormatProperties *pImageFormatProperties,
VkFormatFeatureFlags *p_feature_flags)
{
VkFormatProperties format_props;
VkFormatFeatureFlags format_feature_flags;
VkExtent3D maxExtent;
uint32_t maxMipLevels;
uint32_t maxArraySize;
VkSampleCountFlags sampleCounts = VK_SAMPLE_COUNT_1_BIT;
enum pipe_format format = vk_format_to_pipe_format(info->format);
get_format_properties(physical_device, info->format, &format_props);
switch (info->tiling) {
case VK_IMAGE_TILING_LINEAR:
format_feature_flags = format_props.linearTilingFeatures;
break;
case VK_IMAGE_TILING_OPTIMAL:
format_feature_flags = format_props.optimalTilingFeatures;
break;
case VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT: {
const VkPhysicalDeviceImageDrmFormatModifierInfoEXT *mod_info =
vk_find_struct_const(
info->pNext, PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT);
if (mod_info->drmFormatModifier != DRM_FORMAT_MOD_LINEAR)
goto unsupported;
/* The only difference between optimal and linear is currently whether
* depth/stencil attachments are allowed on depth/stencil formats.
* There's no reason to allow importing depth/stencil textures, so just
* disallow it and then this annoying edge case goes away.
*/
if (util_format_is_depth_or_stencil(format))
goto unsupported;
assert(format_props.optimalTilingFeatures ==
format_props.linearTilingFeatures);
format_feature_flags = format_props.linearTilingFeatures;
break;
}
default:
unreachable("bad VkPhysicalDeviceImageFormatInfo2");
}
if (format_feature_flags == 0)
goto unsupported;
switch (info->type) {
default:
unreachable("bad vkimage type");
case VK_IMAGE_TYPE_1D:
maxExtent.width = 1 << 16;
maxExtent.height = 1;
maxExtent.depth = 1;
maxMipLevels = 17; /* log2(maxWidth) + 1 */
maxArraySize = 1 << 16;
break;
case VK_IMAGE_TYPE_2D:
maxExtent.width = 1 << 16;
maxExtent.height = 1 << 16;
maxExtent.depth = 1;
maxMipLevels = 17; /* log2(maxWidth) + 1 */
maxArraySize = 1 << 16;
break;
case VK_IMAGE_TYPE_3D:
maxExtent.width = 1 << 16;
maxExtent.height = 1 << 16;
maxExtent.depth = 1 << 16;
maxMipLevels = 17; /* log2(maxWidth) + 1 */
maxArraySize = 1;
break;
}
if (info->tiling == VK_IMAGE_TILING_OPTIMAL &&
info->type == VK_IMAGE_TYPE_2D &&
(format_feature_flags &
(VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT |
VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
!(info->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) &&
!(info->usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
sampleCounts |= VK_SAMPLE_COUNT_4_BIT;
}
/* From the Vulkan 1.2.199 spec:
*
* "VK_IMAGE_CREATE_EXTENDED_USAGE_BIT specifies that the image can be
* created with usage flags that are not supported for the format the
* image is created with but are supported for at least one format a
* VkImageView created from the image can have."
*
* If VK_IMAGE_CREATE_EXTENDED_USAGE_BIT is set, views can be created with
* different usage than the image so we can't always filter on usage.
* There is one exception to this below for storage.
*/
if (!(info->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT)) {
if (info->usage & VK_IMAGE_USAGE_SAMPLED_BIT) {
if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) {
goto unsupported;
}
}
if (info->usage & VK_IMAGE_USAGE_STORAGE_BIT) {
if (!(format_feature_flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) {
goto unsupported;
}
}
if (info->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT ||
((info->usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) &&
!vk_format_is_depth_or_stencil(info->format))) {
if (!(format_feature_flags & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) {
goto unsupported;
}
}
if ((info->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) ||
((info->usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) &&
vk_format_is_depth_or_stencil(info->format))) {
if (!(format_feature_flags &
VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) {
goto unsupported;
}
}
}
*pImageFormatProperties = (VkImageFormatProperties){
.maxExtent = maxExtent,
.maxMipLevels = maxMipLevels,
.maxArrayLayers = maxArraySize,
.sampleCounts = sampleCounts,
/* We need to limit images to 32-bit range, because the maximum
* slice-stride is 32-bit wide, meaning that if we allocate an image
* with the maximum width and height, we end up overflowing it.
*
* We get around this by simply limiting the maximum resource size.
*/
.maxResourceSize = UINT32_MAX,
};
if (p_feature_flags)
*p_feature_flags = format_feature_flags;
return VK_SUCCESS;
unsupported:
*pImageFormatProperties = (VkImageFormatProperties){
.maxExtent = {0, 0, 0},
.maxMipLevels = 0,
.maxArrayLayers = 0,
.sampleCounts = 0,
.maxResourceSize = 0,
};
return VK_ERROR_FORMAT_NOT_SUPPORTED;
}
static VkResult
panvk_get_external_image_format_properties(
const struct panvk_physical_device *physical_device,
const VkPhysicalDeviceImageFormatInfo2 *pImageFormatInfo,
VkExternalMemoryHandleTypeFlagBits handleType,
VkExternalMemoryProperties *external_properties)
{
const VkExternalMemoryHandleTypeFlags supported_handle_types =
VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT |
VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
if (!(handleType & supported_handle_types)) {
return panvk_errorf(physical_device, VK_ERROR_FORMAT_NOT_SUPPORTED,
"VkExternalMemoryTypeFlagBits(0x%x) unsupported",
handleType);
}
/* pan_image_layout_init requires 2D for explicit layout */
if (pImageFormatInfo->type != VK_IMAGE_TYPE_2D) {
return panvk_errorf(
physical_device, VK_ERROR_FORMAT_NOT_SUPPORTED,
"VkExternalMemoryTypeFlagBits(0x%x) unsupported for VkImageType(%d)",
handleType, pImageFormatInfo->type);
}
/* There is no restriction on opaque fds. But for dma-bufs, we want to
* make sure vkGetImageSubresourceLayout can be used to query the image
* layout of an exported dma-buf. We also want to make sure
* VkImageDrmFormatModifierExplicitCreateInfoEXT can be used to specify the
* image layout of an imported dma-buf. These add restrictions on the
* image tilings.
*/
VkExternalMemoryFeatureFlags features = 0;
if (handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
pImageFormatInfo->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
features |= VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT |
VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
} else if (pImageFormatInfo->tiling == VK_IMAGE_TILING_LINEAR) {
features |= VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT;
}
if (!features) {
return panvk_errorf(
physical_device, VK_ERROR_FORMAT_NOT_SUPPORTED,
"VkExternalMemoryTypeFlagBits(0x%x) unsupported for VkImageTiling(%d)",
handleType, pImageFormatInfo->tiling);
}
*external_properties = (VkExternalMemoryProperties){
.externalMemoryFeatures = features,
.exportFromImportedHandleTypes = supported_handle_types,
.compatibleHandleTypes = supported_handle_types,
};
return VK_SUCCESS;
}
VKAPI_ATTR VkResult VKAPI_CALL
panvk_GetPhysicalDeviceImageFormatProperties2(
VkPhysicalDevice physicalDevice,
const VkPhysicalDeviceImageFormatInfo2 *base_info,
VkImageFormatProperties2 *base_props)
{
VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice);
const VkPhysicalDeviceExternalImageFormatInfo *external_info = NULL;
const VkPhysicalDeviceImageViewImageFormatInfoEXT *image_view_info = NULL;
VkExternalImageFormatProperties *external_props = NULL;
VkFilterCubicImageViewImageFormatPropertiesEXT *cubic_props = NULL;
VkFormatFeatureFlags format_feature_flags;
VkSamplerYcbcrConversionImageFormatProperties *ycbcr_props = NULL;
VkResult result;
result = get_image_format_properties(physical_device, base_info,
&base_props->imageFormatProperties,
&format_feature_flags);
if (result != VK_SUCCESS)
return result;
/* Extract input structs */
vk_foreach_struct_const(s, base_info->pNext) {
switch (s->sType) {
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO:
external_info = (const void *)s;
break;
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_VIEW_IMAGE_FORMAT_INFO_EXT:
image_view_info = (const void *)s;
break;
default:
break;
}
}
/* Extract output structs */
vk_foreach_struct(s, base_props->pNext) {
switch (s->sType) {
case VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES:
external_props = (void *)s;
break;
case VK_STRUCTURE_TYPE_FILTER_CUBIC_IMAGE_VIEW_IMAGE_FORMAT_PROPERTIES_EXT:
cubic_props = (void *)s;
break;
case VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_IMAGE_FORMAT_PROPERTIES:
ycbcr_props = (void *)s;
break;
default:
break;
}
}
/* From the Vulkan 1.0.42 spec:
*
* If handleType is 0, vkGetPhysicalDeviceImageFormatProperties2 will
* behave as if VkPhysicalDeviceExternalImageFormatInfo was not
* present and VkExternalImageFormatProperties will be ignored.
*/
if (external_info && external_info->handleType != 0) {
VkExternalImageFormatProperties fallback_external_props;
if (!external_props) {
memset(&fallback_external_props, 0, sizeof(fallback_external_props));
external_props = &fallback_external_props;
}
result = panvk_get_external_image_format_properties(
physical_device, base_info, external_info->handleType,
&external_props->externalMemoryProperties);
if (result != VK_SUCCESS)
goto fail;
/* pan_image_layout_init requirements for explicit layout */
base_props->imageFormatProperties.maxMipLevels = 1;
base_props->imageFormatProperties.maxArrayLayers = 1;
base_props->imageFormatProperties.sampleCounts = 1;
}
if (cubic_props) {
/* note: blob only allows cubic filtering for 2D and 2D array views
* its likely we can enable it for 1D and CUBE, needs testing however
*/
if ((image_view_info->imageViewType == VK_IMAGE_VIEW_TYPE_2D ||
image_view_info->imageViewType == VK_IMAGE_VIEW_TYPE_2D_ARRAY) &&
(format_feature_flags &
VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_CUBIC_BIT_EXT)) {
cubic_props->filterCubic = true;
cubic_props->filterCubicMinmax = true;
} else {
cubic_props->filterCubic = false;
cubic_props->filterCubicMinmax = false;
}
}
if (ycbcr_props)
ycbcr_props->combinedImageSamplerDescriptorCount = 1;
return VK_SUCCESS;
fail:
if (result == VK_ERROR_FORMAT_NOT_SUPPORTED) {
/* From the Vulkan 1.0.42 spec:
*
* If the combination of parameters to
* vkGetPhysicalDeviceImageFormatProperties2 is not supported by
* the implementation for use in vkCreateImage, then all members of
* imageFormatProperties will be filled with zero.
*/
base_props->imageFormatProperties = (VkImageFormatProperties){};
}
return result;
}
VKAPI_ATTR void VKAPI_CALL
panvk_GetPhysicalDeviceSparseImageFormatProperties(
VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type,
VkSampleCountFlagBits samples, VkImageUsageFlags usage, VkImageTiling tiling,
uint32_t *pNumProperties, VkSparseImageFormatProperties *pProperties)
{
/* Sparse images are not yet supported. */
*pNumProperties = 0;
}
VKAPI_ATTR void VKAPI_CALL
panvk_GetPhysicalDeviceSparseImageFormatProperties2(
VkPhysicalDevice physicalDevice,
const VkPhysicalDeviceSparseImageFormatInfo2 *pFormatInfo,
uint32_t *pPropertyCount, VkSparseImageFormatProperties2 *pProperties)
{
/* Sparse images are not yet supported. */
*pPropertyCount = 0;
}
VKAPI_ATTR void VKAPI_CALL
panvk_GetPhysicalDeviceExternalBufferProperties(
VkPhysicalDevice physicalDevice,
const VkPhysicalDeviceExternalBufferInfo *pExternalBufferInfo,
VkExternalBufferProperties *pExternalBufferProperties)
{
const VkExternalMemoryHandleTypeFlags supported_handle_types =
VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT |
VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
/* From the Vulkan 1.3.298 spec:
*
* compatibleHandleTypes must include at least handleType.
*/
VkExternalMemoryHandleTypeFlags handle_types =
pExternalBufferInfo->handleType;
VkExternalMemoryFeatureFlags features = 0;
if (pExternalBufferInfo->handleType & supported_handle_types) {
handle_types |= supported_handle_types;
features |= VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT |
VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
}
pExternalBufferProperties->externalMemoryProperties =
(VkExternalMemoryProperties){
.externalMemoryFeatures = features,
.exportFromImportedHandleTypes = handle_types,
.compatibleHandleTypes = handle_types,
};
}