blob: 94b6a1ae3e084c78433128f5f87d36e0d3e9213a [file] [log] [blame]
// Copyright (c) 2016 The vulkano developers
// Licensed under the Apache License, Version 2.0
// <LICENSE-APACHE or
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT
// license <LICENSE-MIT or https://opensource.org/licenses/MIT>,
// at your option. All files in the project carrying such
// notice may not be copied, modified, or distributed except
// according to those terms.
//! Device memory allocation and memory pools.
//!
//! By default, memory allocation is automatically handled by the vulkano library when you create
//! a buffer or an image. But if you want more control, you have the possibility to customise the
//! memory allocation strategy.
//!
//! # Memory types and heaps
//!
//! A physical device is composed of one or more **memory heaps**. A memory heap is a pool of
//! memory that can be allocated.
//!
//! ```
//! // Enumerating memory heaps.
//! # let physical_device: vulkano::device::physical::PhysicalDevice = return;
//! for (index, heap) in physical_device.memory_properties().memory_heaps.iter().enumerate() {
//! println!("Heap #{:?} has a capacity of {:?} bytes", index, heap.size);
//! }
//! ```
//!
//! However you can't allocate directly from a memory heap. A memory heap is shared amongst one or
//! multiple **memory types**, which you can allocate memory from. Each memory type has different
//! characteristics.
//!
//! A memory type may or may not be visible to the host. In other words, it may or may not be
//! directly writable by the CPU. A memory type may or may not be device-local. A device-local
//! memory type has a much quicker access time from the GPU than a non-device-local type. Note
//! that non-device-local memory types are still accessible by the device, they are just slower.
//!
//! ```
//! // Enumerating memory types.
//! # let physical_device: vulkano::device::physical::PhysicalDevice = return;
//! for ty in physical_device.memory_properties().memory_types.iter() {
//! println!("Memory type belongs to heap #{:?}", ty.heap_index);
//! println!("Property flags: {:?}", ty.property_flags);
//! }
//! ```
//!
//! Memory types are order from "best" to "worse". In other words, the implementation prefers that
//! you use the memory types that are earlier in the list. This means that selecting a memory type
//! should always be done by enumerating them and taking the first one that matches our criteria.
//!
//! ## In practice
//!
//! In practice, desktop machines usually have two memory heaps: one that represents the RAM of
//! the CPU, and one that represents the RAM of the GPU. The CPU's RAM is host-accessible but not
//! device-local, while the GPU's RAM is not host-accessible but is device-local.
//!
//! Mobile machines usually have a single memory heap that is "equally local" to both the CPU and
//! the GPU. It is both host-accessible and device-local.
//!
//! # Allocating memory and memory pools
//!
//! Allocating memory can be done by calling `DeviceMemory::allocate()`.
//!
//! Here is an example:
//!
//! ```
//! use vulkano::memory::{DeviceMemory, MemoryAllocateInfo};
//!
//! # let device: std::sync::Arc<vulkano::device::Device> = return;
//! // Taking the first memory type for the sake of this example.
//! let memory_type_index = 0;
//!
//! let memory = DeviceMemory::allocate(
//! device.clone(),
//! MemoryAllocateInfo {
//! allocation_size: 1024,
//! memory_type_index,
//! ..Default::default()
//! },
//! ).expect("Failed to allocate memory");
//!
//! // The memory is automatically freed when `memory` is destroyed.
//! ```
//!
//! However allocating and freeing memory is very slow (up to several hundred milliseconds
//! sometimes). Instead you are strongly encouraged to use a memory pool. A memory pool is not
//! a Vulkan concept but a vulkano concept.
//!
//! A memory pool is any object that implements the `MemoryPool` trait. You can implement that
//! trait on your own structure and then use it when you create buffers and images so that they
//! get memory from that pool. By default if you don't specify any pool when creating a buffer or
//! an image, an instance of `StandardMemoryPool` that is shared by the `Device` object is used.
pub use self::alignment::DeviceAlignment;
use self::allocator::DeviceLayout;
pub use self::device_memory::{
DeviceMemory, DeviceMemoryError, ExternalMemoryHandleType, ExternalMemoryHandleTypes,
MappedDeviceMemory, MemoryAllocateFlags, MemoryAllocateInfo, MemoryImportInfo, MemoryMapError,
};
use crate::{
buffer::{sys::RawBuffer, Subbuffer},
image::{sys::RawImage, ImageAccess, ImageAspects},
macros::vulkan_bitflags,
sync::semaphore::Semaphore,
DeviceSize,
};
use std::{
num::NonZeroU64,
ops::{Bound, Range, RangeBounds, RangeTo},
sync::Arc,
};
mod alignment;
pub mod allocator;
mod device_memory;
/// Properties of the memory in a physical device.
#[derive(Clone, Debug)]
#[non_exhaustive]
pub struct MemoryProperties {
/// The available memory types.
pub memory_types: Vec<MemoryType>,
/// The available memory heaps.
pub memory_heaps: Vec<MemoryHeap>,
}
impl From<ash::vk::PhysicalDeviceMemoryProperties> for MemoryProperties {
#[inline]
fn from(val: ash::vk::PhysicalDeviceMemoryProperties) -> Self {
Self {
memory_types: val.memory_types[0..val.memory_type_count as usize]
.iter()
.map(|vk_memory_type| MemoryType {
property_flags: vk_memory_type.property_flags.into(),
heap_index: vk_memory_type.heap_index,
})
.collect(),
memory_heaps: val.memory_heaps[0..val.memory_heap_count as usize]
.iter()
.map(|vk_memory_heap| MemoryHeap {
size: vk_memory_heap.size,
flags: vk_memory_heap.flags.into(),
})
.collect(),
}
}
}
/// A memory type in a physical device.
#[derive(Clone, Debug)]
#[non_exhaustive]
pub struct MemoryType {
/// The properties of this memory type.
pub property_flags: MemoryPropertyFlags,
/// The index of the memory heap that this memory type corresponds to.
pub heap_index: u32,
}
vulkan_bitflags! {
#[non_exhaustive]
/// Properties of a memory type.
MemoryPropertyFlags = MemoryPropertyFlags(u32);
/// The memory is located on the device, and is allocated from a heap that also has the
/// [`DEVICE_LOCAL`] flag set.
///
/// For some devices, particularly integrated GPUs, the device shares memory with the host and
/// all memory may be device-local, so the distinction is moot. However, if the device has
/// non-device-local memory, it is usually faster for the device to access device-local memory.
/// Therefore, device-local memory is preferred for data that will only be accessed by
/// the device.
///
/// If the device and host do not share memory, data transfer between host and device may
/// involve sending the data over the data bus that connects the two. Accesses are faster if
/// they do not have to cross this barrier: device-local memory is fast for the device to
/// access, but slower to access by the host. However, there are devices that share memory with
/// the host, yet have distinct device-local and non-device local memory types. In that case,
/// the speed difference may not be large.
///
/// For data transfer between host and device, it is most efficient if the memory is located
/// at the destination of the transfer. Thus, if [`HOST_VISIBLE`] versions of both are
/// available, device-local memory is preferred for host-to-device data transfer, while
/// non-device-local memory is preferred for device-to-host data transfer. This is because data
/// is usually written only once but potentially read several times, and because reads can take
/// advantage of caching while writes cannot.
///
/// Devices may have memory types that are neither `DEVICE_LOCAL` nor [`HOST_VISIBLE`]. This
/// is regular host memory that is made available to the device exclusively. Although it will be
/// slower to access from the device than `DEVICE_LOCAL` memory, it can be faster than
/// [`HOST_VISIBLE`] memory. It can be used as overflow space if the device is out of memory.
///
/// [`DEVICE_LOCAL`]: MemoryHeapFlags::DEVICE_LOCAL
/// [`HOST_VISIBLE`]: MemoryPropertyFlags::HOST_VISIBLE
DEVICE_LOCAL = DEVICE_LOCAL,
/// The memory can be mapped into the memory space of the host and accessed as regular RAM.
///
/// Memory of this type is required to transfer data between the host and the device. If
/// the memory is going to be accessed by the device more than a few times, it is recommended
/// to copy the data to non-`HOST_VISIBLE` memory first if it is available.
///
/// `HOST_VISIBLE` memory is always at least either [`HOST_COHERENT`] or [`HOST_CACHED`],
/// but it can be both.
///
/// [`HOST_COHERENT`]: MemoryPropertyFlags::HOST_COHERENT
/// [`HOST_CACHED`]: MemoryPropertyFlags::HOST_CACHED
HOST_VISIBLE = HOST_VISIBLE,
/// Host access to the memory does not require calling [`invalidate_range`] to make device
/// writes visible to the host, nor [`flush_range`] to flush host writes back to the device.
///
/// [`invalidate_range`]: MappedDeviceMemory::invalidate_range
/// [`flush_range`]: MappedDeviceMemory::flush_range
HOST_COHERENT = HOST_COHERENT,
/// The memory is cached by the host.
///
/// `HOST_CACHED` memory is fast for reads and random access from the host, so it is preferred
/// for device-to-host data transfer. Memory that is [`HOST_VISIBLE`] but not `HOST_CACHED` is
/// often slow for all accesses other than sequential writing, so it is more suited for
/// host-to-device transfer, and it is often beneficial to write the data in sequence.
///
/// [`HOST_VISIBLE`]: MemoryPropertyFlags::HOST_VISIBLE
HOST_CACHED = HOST_CACHED,
/// Allocations made from the memory are lazy.
///
/// This means that no actual allocation is performed. Instead memory is automatically
/// allocated by the Vulkan implementation based on need. You can call
/// [`DeviceMemory::commitment`] to query how much memory is currently committed to an
/// allocation.
///
/// Memory of this type can only be used on images created with a certain flag, and is never
/// [`HOST_VISIBLE`].
///
/// [`HOST_VISIBLE`]: MemoryPropertyFlags::HOST_VISIBLE
LAZILY_ALLOCATED = LAZILY_ALLOCATED,
/// The memory can only be accessed by the device, and allows protected queue access.
///
/// Memory of this type is never [`HOST_VISIBLE`], [`HOST_COHERENT`] or [`HOST_CACHED`].
///
/// [`HOST_VISIBLE`]: MemoryPropertyFlags::HOST_VISIBLE
/// [`HOST_COHERENT`]: MemoryPropertyFlags::HOST_COHERENT
/// [`HOST_CACHED`]: MemoryPropertyFlags::HOST_CACHED
PROTECTED = PROTECTED {
api_version: V1_1,
},
/// Device accesses to the memory are automatically made available and visible to other device
/// accesses.
///
/// Memory of this type is slower to access by the device, so it is best avoided for general
/// purpose use. Because of its coherence properties, however, it may be useful for debugging.
DEVICE_COHERENT = DEVICE_COHERENT_AMD {
device_extensions: [amd_device_coherent_memory],
},
/// The memory is not cached on the device.
///
/// `DEVICE_UNCACHED` memory is always also [`DEVICE_COHERENT`].
///
/// [`DEVICE_COHERENT`]: MemoryPropertyFlags::DEVICE_COHERENT
DEVICE_UNCACHED = DEVICE_UNCACHED_AMD {
device_extensions: [amd_device_coherent_memory],
},
/// Other devices can access the memory via remote direct memory access (RDMA).
RDMA_CAPABLE = RDMA_CAPABLE_NV {
device_extensions: [nv_external_memory_rdma],
},
}
/// A memory heap in a physical device.
#[derive(Clone, Debug)]
#[non_exhaustive]
pub struct MemoryHeap {
/// The size of the heap in bytes.
pub size: DeviceSize,
/// Attributes of the heap.
pub flags: MemoryHeapFlags,
}
vulkan_bitflags! {
#[non_exhaustive]
/// Attributes of a memory heap.
MemoryHeapFlags = MemoryHeapFlags(u32);
/// The heap corresponds to device-local memory.
DEVICE_LOCAL = DEVICE_LOCAL,
/// If used on a logical device that represents more than one physical device, allocations are
/// replicated across each physical device's instance of this heap.
MULTI_INSTANCE = MULTI_INSTANCE {
api_version: V1_1,
instance_extensions: [khr_device_group_creation],
},
}
/// Represents requirements expressed by the Vulkan implementation when it comes to binding memory
/// to a resource.
#[derive(Clone, Copy, Debug)]
pub struct MemoryRequirements {
/// Memory layout required for the resource.
pub layout: DeviceLayout,
/// Indicates which memory types can be used. Each bit that is set to 1 means that the memory
/// type whose index is the same as the position of the bit can be used.
pub memory_type_bits: u32,
/// Whether implementation prefers to use dedicated allocations (in other words, allocate
/// a whole block of memory dedicated to this resource alone).
/// This will be `false` if the device API version is less than 1.1 and the
/// [`khr_get_memory_requirements2`](crate::device::DeviceExtensions::khr_get_memory_requirements2)
/// extension is not enabled on the device.
pub prefers_dedicated_allocation: bool,
/// Whether implementation requires the use of a dedicated allocation (in other words, allocate
/// a whole block of memory dedicated to this resource alone).
/// This will be `false` if the device API version is less than 1.1 and the
/// [`khr_get_memory_requirements2`](crate::device::DeviceExtensions::khr_get_memory_requirements2)
/// extension is not enabled on the device.
pub requires_dedicated_allocation: bool,
}
/// Indicates a specific resource to allocate memory for.
///
/// Using dedicated allocations can yield better performance, but requires the
/// [`khr_dedicated_allocation`](crate::device::DeviceExtensions::khr_dedicated_allocation)
/// extension to be enabled on the device.
///
/// If a dedicated allocation is performed, it must not be bound to any resource other than the
/// one that was passed with the enumeration.
#[derive(Clone, Copy, Debug)]
pub enum DedicatedAllocation<'a> {
/// Allocation dedicated to a buffer.
Buffer(&'a RawBuffer),
/// Allocation dedicated to an image.
Image(&'a RawImage),
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(crate) enum DedicatedTo {
Buffer(NonZeroU64),
Image(NonZeroU64),
}
impl From<DedicatedAllocation<'_>> for DedicatedTo {
fn from(dedicated_allocation: DedicatedAllocation<'_>) -> Self {
match dedicated_allocation {
DedicatedAllocation::Buffer(buffer) => Self::Buffer(buffer.id()),
DedicatedAllocation::Image(image) => Self::Image(image.id()),
}
}
}
/// The properties for exporting or importing external memory, when a buffer or image is created
/// with a specific configuration.
#[derive(Clone, Debug, Default)]
#[non_exhaustive]
pub struct ExternalMemoryProperties {
/// Whether a dedicated memory allocation is required for the queried external handle type.
pub dedicated_only: bool,
/// Whether memory can be exported to an external source with the queried
/// external handle type.
pub exportable: bool,
/// Whether memory can be imported from an external source with the queried
/// external handle type.
pub importable: bool,
/// Which external handle types can be re-exported after the queried external handle type has
/// been imported.
pub export_from_imported_handle_types: ExternalMemoryHandleTypes,
/// Which external handle types can be enabled along with the queried external handle type
/// when creating the buffer or image.
pub compatible_handle_types: ExternalMemoryHandleTypes,
}
impl From<ash::vk::ExternalMemoryProperties> for ExternalMemoryProperties {
#[inline]
fn from(val: ash::vk::ExternalMemoryProperties) -> Self {
Self {
dedicated_only: val
.external_memory_features
.intersects(ash::vk::ExternalMemoryFeatureFlags::DEDICATED_ONLY),
exportable: val
.external_memory_features
.intersects(ash::vk::ExternalMemoryFeatureFlags::EXPORTABLE),
importable: val
.external_memory_features
.intersects(ash::vk::ExternalMemoryFeatureFlags::IMPORTABLE),
export_from_imported_handle_types: val.export_from_imported_handle_types.into(),
compatible_handle_types: val.compatible_handle_types.into(),
}
}
}
/// Parameters to execute sparse bind operations on a queue.
#[derive(Clone, Debug)]
pub struct BindSparseInfo {
/// The semaphores to wait for before beginning the execution of this batch of
/// sparse bind operations.
///
/// The default value is empty.
pub wait_semaphores: Vec<Arc<Semaphore>>,
/// The bind operations to perform for buffers.
///
/// The default value is empty.
pub buffer_binds: Vec<(Subbuffer<[u8]>, Vec<SparseBufferMemoryBind>)>,
/// The bind operations to perform for images with an opaque memory layout.
///
/// This should be used for mip tail regions, the metadata aspect, and for the normal regions
/// of images that do not have the `sparse_residency` flag set.
///
/// The default value is empty.
pub image_opaque_binds: Vec<(Arc<dyn ImageAccess>, Vec<SparseImageOpaqueMemoryBind>)>,
/// The bind operations to perform for images with a known memory layout.
///
/// This type of sparse bind can only be used for images that have the `sparse_residency`
/// flag set.
/// Only the normal texel regions can be bound this way, not the mip tail regions or metadata
/// aspect.
///
/// The default value is empty.
pub image_binds: Vec<(Arc<dyn ImageAccess>, Vec<SparseImageMemoryBind>)>,
/// The semaphores to signal after the execution of this batch of sparse bind operations
/// has completed.
///
/// The default value is empty.
pub signal_semaphores: Vec<Arc<Semaphore>>,
pub _ne: crate::NonExhaustive,
}
impl Default for BindSparseInfo {
#[inline]
fn default() -> Self {
Self {
wait_semaphores: Vec::new(),
buffer_binds: Vec::new(),
image_opaque_binds: Vec::new(),
image_binds: Vec::new(),
signal_semaphores: Vec::new(),
_ne: crate::NonExhaustive(()),
}
}
}
/// Parameters for a single sparse bind operation on a buffer.
#[derive(Clone, Debug, Default)]
pub struct SparseBufferMemoryBind {
/// The offset in bytes from the start of the buffer's memory, where memory is to be (un)bound.
///
/// The default value is `0`.
pub offset: DeviceSize,
/// The size in bytes of the memory to be (un)bound.
///
/// The default value is `0`, which must be overridden.
pub size: DeviceSize,
/// If `Some`, specifies the memory and an offset into that memory that is to be bound.
/// The provided memory must match the buffer's memory requirements.
///
/// If `None`, specifies that existing memory at the specified location is to be unbound.
///
/// The default value is `None`.
pub memory: Option<(Arc<DeviceMemory>, DeviceSize)>,
}
/// Parameters for a single sparse bind operation on parts of an image with an opaque memory layout.
///
/// This type of sparse bind should be used for mip tail regions, the metadata aspect, and for the
/// normal regions of images that do not have the `sparse_residency` flag set.
#[derive(Clone, Debug, Default)]
pub struct SparseImageOpaqueMemoryBind {
/// The offset in bytes from the start of the image's memory, where memory is to be (un)bound.
///
/// The default value is `0`.
pub offset: DeviceSize,
/// The size in bytes of the memory to be (un)bound.
///
/// The default value is `0`, which must be overridden.
pub size: DeviceSize,
/// If `Some`, specifies the memory and an offset into that memory that is to be bound.
/// The provided memory must match the image's memory requirements.
///
/// If `None`, specifies that existing memory at the specified location is to be unbound.
///
/// The default value is `None`.
pub memory: Option<(Arc<DeviceMemory>, DeviceSize)>,
/// Sets whether the binding should apply to the metadata aspect of the image, or to the
/// normal texel data.
///
/// The default value is `false`.
pub metadata: bool,
}
/// Parameters for a single sparse bind operation on parts of an image with a known memory layout.
///
/// This type of sparse bind can only be used for images that have the `sparse_residency` flag set.
/// Only the normal texel regions can be bound this way, not the mip tail regions or metadata
/// aspect.
#[derive(Clone, Debug, Default)]
pub struct SparseImageMemoryBind {
/// The aspects of the image where memory is to be (un)bound.
///
/// The default value is `ImageAspects::empty()`, which must be overridden.
pub aspects: ImageAspects,
/// The mip level of the image where memory is to be (un)bound.
///
/// The default value is `0`.
pub mip_level: u32,
/// The array layer of the image where memory is to be (un)bound.
///
/// The default value is `0`.
pub array_layer: u32,
/// The offset in texels (or for compressed images, texel blocks) from the origin of the image,
/// where memory is to be (un)bound.
///
/// This must be a multiple of the
/// [`SparseImageFormatProperties::image_granularity`](crate::image::SparseImageFormatProperties::image_granularity)
/// value of the image.
///
/// The default value is `[0; 3]`.
pub offset: [u32; 3],
/// The extent in texels (or for compressed images, texel blocks) of the image where
/// memory is to be (un)bound.
///
/// This must be a multiple of the
/// [`SparseImageFormatProperties::image_granularity`](crate::image::SparseImageFormatProperties::image_granularity)
/// value of the image, or `offset + extent` for that dimension must equal the image's total
/// extent.
///
/// The default value is `[0; 3]`, which must be overridden.
pub extent: [u32; 3],
/// If `Some`, specifies the memory and an offset into that memory that is to be bound.
/// The provided memory must match the image's memory requirements.
///
/// If `None`, specifies that existing memory at the specified location is to be unbound.
///
/// The default value is `None`.
pub memory: Option<(Arc<DeviceMemory>, DeviceSize)>,
}
#[inline(always)]
pub(crate) fn is_aligned(offset: DeviceSize, alignment: DeviceAlignment) -> bool {
offset & (alignment.as_devicesize() - 1) == 0
}
/// Performs bounds-checking of a Vulkan memory range. Analog of `std::slice::range`.
pub(crate) fn range(
range: impl RangeBounds<DeviceSize>,
bounds: RangeTo<DeviceSize>,
) -> Option<Range<DeviceSize>> {
let len = bounds.end;
let start = match range.start_bound() {
Bound::Included(&start) => start,
Bound::Excluded(start) => start.checked_add(1)?,
Bound::Unbounded => 0,
};
let end = match range.end_bound() {
Bound::Included(end) => end.checked_add(1)?,
Bound::Excluded(&end) => end,
Bound::Unbounded => len,
};
(start <= end && end <= len).then_some(Range { start, end })
}