blob: 243fe0d29ac98238a18384275ad09487dc89da46 [file] [log] [blame]
// Copyright 2022 The ChromiumOS Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//! VAAPI backend for both stateless decoders and encoders.
use std::collections::HashSet;
use std::fmt::Debug;
use std::os::fd::AsRawFd;
use anyhow::anyhow;
use byteorder::ByteOrder;
use byteorder::LittleEndian;
use libva::Display;
use libva::VAConfigAttrib;
use libva::VAConfigAttribType;
use crate::utils::DmabufFrame;
use crate::utils::UserPtrFrame;
use crate::DecodedFormat;
pub mod decoder;
pub mod encoder;
pub mod surface_pool;
fn va_rt_format_to_string(va_rt_format: u32) -> String {
String::from(match va_rt_format {
libva::VA_RT_FORMAT_YUV420 => "YUV420",
libva::VA_RT_FORMAT_YUV422 => "YUV422",
libva::VA_RT_FORMAT_YUV444 => "YUV444",
libva::VA_RT_FORMAT_YUV420_10 => "YUV420_10",
libva::VA_RT_FORMAT_YUV420_12 => "YUV420_12",
libva::VA_RT_FORMAT_YUV422_10 => "YUV422_10",
libva::VA_RT_FORMAT_YUV422_12 => "YUV422_12",
libva::VA_RT_FORMAT_YUV444_10 => "YUV444_10",
libva::VA_RT_FORMAT_YUV444_12 => "YUV444_12",
other => return format!("unknown VA rt_format {}", other),
})
}
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
struct FormatMap {
pub rt_format: u32,
pub va_fourcc: u32,
pub decoded_format: DecodedFormat,
}
/// Maps a given VA_RT_FORMAT to a compatible decoded format in an arbitrary
/// preferred order.
const FORMAT_MAP: [FormatMap; 10] = [
FormatMap {
rt_format: libva::VA_RT_FORMAT_YUV420,
va_fourcc: libva::VA_FOURCC_NV12,
decoded_format: DecodedFormat::NV12,
},
FormatMap {
rt_format: libva::VA_RT_FORMAT_YUV420,
va_fourcc: libva::VA_FOURCC_I420,
decoded_format: DecodedFormat::I420,
},
FormatMap {
rt_format: libva::VA_RT_FORMAT_YUV422,
va_fourcc: libva::VA_FOURCC_422H,
decoded_format: DecodedFormat::I422,
},
FormatMap {
rt_format: libva::VA_RT_FORMAT_YUV444,
va_fourcc: libva::VA_FOURCC_444P,
decoded_format: DecodedFormat::I444,
},
FormatMap {
rt_format: libva::VA_RT_FORMAT_YUV420_10,
va_fourcc: libva::VA_FOURCC_P010,
decoded_format: DecodedFormat::I010,
},
FormatMap {
rt_format: libva::VA_RT_FORMAT_YUV420_12,
va_fourcc: libva::VA_FOURCC_P012,
decoded_format: DecodedFormat::I012,
},
FormatMap {
rt_format: libva::VA_RT_FORMAT_YUV422_10,
va_fourcc: libva::VA_FOURCC_Y210,
decoded_format: DecodedFormat::I210,
},
FormatMap {
rt_format: libva::VA_RT_FORMAT_YUV422_12,
va_fourcc: libva::VA_FOURCC_Y212,
decoded_format: DecodedFormat::I212,
},
FormatMap {
rt_format: libva::VA_RT_FORMAT_YUV444_10,
va_fourcc: libva::VA_FOURCC_Y410,
decoded_format: DecodedFormat::I410,
},
FormatMap {
rt_format: libva::VA_RT_FORMAT_YUV444_12,
va_fourcc: libva::VA_FOURCC_Y412,
decoded_format: DecodedFormat::I412,
},
];
/// Returns a set of supported decoded formats given `rt_format`
fn supported_formats_for_rt_format(
display: &Display,
rt_format: u32,
profile: i32,
entrypoint: u32,
image_formats: &[libva::VAImageFormat],
) -> anyhow::Result<HashSet<FormatMap>> {
let mut attrs = vec![VAConfigAttrib {
type_: VAConfigAttribType::VAConfigAttribRTFormat,
value: 0,
}];
display.get_config_attributes(profile, entrypoint, &mut attrs)?;
// See whether this RT_FORMAT is supported by the given VAProfile and
// VAEntrypoint pair.
if attrs[0].value == libva::VA_ATTRIB_NOT_SUPPORTED || attrs[0].value & rt_format == 0 {
return Err(anyhow!(
"rt_format {:?} not supported for profile {:?} and entrypoint {:?}",
rt_format,
profile,
entrypoint
));
}
let mut supported_formats = HashSet::new();
for format in FORMAT_MAP {
if format.rt_format == rt_format {
supported_formats.insert(format);
}
}
// Only retain those that the hardware can actually map into.
supported_formats.retain(|&entry| {
image_formats
.iter()
.any(|fmt| fmt.fourcc == entry.va_fourcc)
});
Ok(supported_formats)
}
impl TryFrom<&libva::VAImageFormat> for DecodedFormat {
type Error = anyhow::Error;
fn try_from(value: &libva::VAImageFormat) -> Result<Self, Self::Error> {
match value.fourcc {
libva::VA_FOURCC_I420 => Ok(DecodedFormat::I420),
libva::VA_FOURCC_NV12 => Ok(DecodedFormat::NV12),
libva::VA_FOURCC_P010 => Ok(DecodedFormat::I010),
libva::VA_FOURCC_P012 => Ok(DecodedFormat::I012),
libva::VA_FOURCC_Y210 => Ok(DecodedFormat::I210),
libva::VA_FOURCC_Y212 => Ok(DecodedFormat::I212),
libva::VA_FOURCC_Y410 => Ok(DecodedFormat::I410),
libva::VA_FOURCC_Y412 => Ok(DecodedFormat::I412),
_ => Err(anyhow!("Unsupported format")),
}
}
}
/// Copies `src` into `dst` removing all padding and converting from biplanar to triplanar format.
///
/// `useful_pixels` is the number of useful pixels in each sample, e.g. `10` for `P010`, `12` for
/// `P012`, etc.
///
/// This function is VAAPI-specific because of the unusual the source pixels are laid out: VAAPI
/// writes the `useful_pixels` MSBs, but software generally expects the LSBs to contain the data.
fn p01x_to_i01x(
src: &[u8],
dst: &mut [u8],
useful_pixels: usize,
width: usize,
height: usize,
strides: [usize; 3],
offsets: [usize; 3],
) {
let sample_shift = 16 - useful_pixels;
// Copy Y.
//
// VAAPI's Y samples are two byte little endian with the bottom six bits ignored. We need to
// convert that to two byte little endian with top 6 bits ignored.
let src_y_lines = src[offsets[0]..]
.chunks(strides[0])
.map(|line| &line[..width * 2]);
let dst_y_lines = dst.chunks_mut(width * 2);
for (src_line, dst_line) in src_y_lines.zip(dst_y_lines).take(height) {
for (src_y, dst_y) in src_line.chunks(2).zip(dst_line.chunks_mut(2)) {
LittleEndian::write_u16(dst_y, LittleEndian::read_u16(src_y) >> sample_shift);
}
}
let dst_u_offset = width * 2 * height;
// Align width and height to 2 for UV plane.
let width = if width % 2 == 1 { width + 1 } else { width };
let height = if height % 2 == 1 { height + 1 } else { height };
// 1 sample per 4 pixels, but we have two components per line so width remains as-is.
let height = height / 2;
let dst_u_size = width * height;
// Copy U and V and deinterleave into different planes.
//
// We need to perform the same bit shift as luma, but also to de-interleave the data.
let src_uv_lines = src[offsets[1]..]
.chunks(strides[1])
.map(|line| &line[..width * 2]);
let (dst_u_plane, dst_v_plane) = dst[dst_u_offset..].split_at_mut(dst_u_size);
let dst_u_lines = dst_u_plane.chunks_mut(width);
let dst_v_lines = dst_v_plane.chunks_mut(width);
for (src_line, (dst_u_line, dst_v_line)) in
src_uv_lines.zip(dst_u_lines.zip(dst_v_lines)).take(height)
{
for ((src_u, src_v), (dst_u, dst_v)) in src_line
.chunks(4)
.map(|chunk| (&chunk[0..2], &chunk[2..4]))
.zip(dst_u_line.chunks_mut(2).zip(dst_v_line.chunks_mut(2)))
{
LittleEndian::write_u16(dst_u, LittleEndian::read_u16(src_u) >> sample_shift);
LittleEndian::write_u16(dst_v, LittleEndian::read_u16(src_v) >> sample_shift);
}
}
}
/// Copies `src` into `dst` as I21x, removing all padding and changing the layout from packed to
/// triplanar.
///
/// `useful_pixels` is the number of useful pixels in each sample, e.g. `10` for `Y210` or `16` for
/// `Y216`.
///
/// This function is VAAPI-specific because of the unusual the source pixels are laid out: VAAPI
/// writes the `useful_pixels` MSBs, but software generally expects the LSBs to contain the data.
///
/// WARNING: this function could not be tested for lack of supporting hardware.
fn y21x_to_i21x(
src: &[u8],
dst: &mut [u8],
useful_pixels: usize,
width: usize,
height: usize,
strides: [usize; 3],
offsets: [usize; 3],
) {
let sample_shift = 16 - useful_pixels;
// Align width to 2 for U and V planes and divide by 2.
// This should not be necessary as the sampling method requires that width is a multiple of 2
// to begin with.
let uv_width = if width % 2 == 1 { width + 1 } else { width } / 2;
// YUYV representation, i.e. 4 16-bit words per two Y samples meaning we have 4 * width bytes
// of data per line.
let src_lines = src[offsets[0]..]
.chunks(strides[0])
.map(|line| &line[..width * 4]);
let dst_y_size = width * 2 * height;
let dst_u_size = uv_width * 2 * height;
let (dst_y_plane, dst_uv_planes) = dst.split_at_mut(dst_y_size);
let (dst_u_plane, dst_v_plane) = dst_uv_planes.split_at_mut(dst_u_size);
let dst_y_lines = dst_y_plane.chunks_mut(width * 2);
let dst_u_lines = dst_u_plane.chunks_mut(uv_width * 2);
let dst_v_lines = dst_v_plane.chunks_mut(uv_width * 2);
for (src_line, (dst_y_line, (dst_u_line, dst_v_line))) in src_lines
.zip(dst_y_lines.zip(dst_u_lines.zip(dst_v_lines)))
.take(height)
{
for (src, (dst_y, (dst_u, dst_v))) in src_line.chunks(8).zip(
dst_y_line
.chunks_mut(4)
.zip(dst_u_line.chunks_mut(2).zip(dst_v_line.chunks_mut(2))),
) {
let y0 = LittleEndian::read_u16(&src[0..2]) >> sample_shift;
let u = LittleEndian::read_u16(&src[2..4]) >> sample_shift;
let y1 = LittleEndian::read_u16(&src[4..6]) >> sample_shift;
let v = LittleEndian::read_u16(&src[6..8]) >> sample_shift;
LittleEndian::write_u16(&mut dst_y[0..2], y0);
LittleEndian::write_u16(&mut dst_y[2..4], y1);
LittleEndian::write_u16(dst_u, u);
LittleEndian::write_u16(dst_v, v);
}
}
}
/// Copies `src` into `dst` as I412, removing all padding and changing the layout from packed to
/// triplanar. Also drops the alpha channel.
///
/// This function is VAAPI-specific because the samples need to be rolled somehow...
fn y412_to_i412(
src: &[u8],
dst: &mut [u8],
width: usize,
height: usize,
strides: [usize; 3],
offsets: [usize; 3],
) {
let src_lines = src[offsets[0]..]
.chunks(strides[0])
.map(|line| &line[..width * 8]);
let dst_y_size = width * 2 * height;
let dst_u_size = width * 2 * height;
let (dst_y_plane, dst_uv_planes) = dst.split_at_mut(dst_y_size);
let (dst_u_plane, dst_v_plane) = dst_uv_planes.split_at_mut(dst_u_size);
let dst_y_lines = dst_y_plane.chunks_mut(width * 2);
let dst_u_lines = dst_u_plane.chunks_mut(width * 2);
let dst_v_lines = dst_v_plane.chunks_mut(width * 2);
for (src_line, (dst_y_line, (dst_u_line, dst_v_line))) in src_lines
.zip(dst_y_lines.zip(dst_u_lines.zip(dst_v_lines)))
.take(height)
{
for (src, (dst_y, (dst_u, dst_v))) in src_line.chunks(8).zip(
dst_y_line
.chunks_mut(2)
.zip(dst_u_line.chunks_mut(2).zip(dst_v_line.chunks_mut(2))),
) {
let y = LittleEndian::read_u16(&src[2..4]);
let u = LittleEndian::read_u16(&src[0..2]);
let v = LittleEndian::read_u16(&src[4..6]);
// Why is that rotate_right neeed??
LittleEndian::write_u16(dst_y, y.rotate_right(4));
LittleEndian::write_u16(dst_u, u.rotate_right(4));
LittleEndian::write_u16(dst_v, v.rotate_right(4));
}
}
}
impl libva::ExternalBufferDescriptor for UserPtrFrame {
const MEMORY_TYPE: libva::MemoryType = libva::MemoryType::UserPtr;
type DescriptorAttribute = libva::VASurfaceAttribExternalBuffers;
fn va_surface_attribute(&mut self) -> Self::DescriptorAttribute {
let pitches = self
.layout
.planes
.iter()
.map(|p| p.stride as u32)
.chain(std::iter::repeat(0))
.take(4)
.collect::<Vec<_>>()
.try_into()
.unwrap();
let offsets = self
.layout
.planes
.iter()
.map(|p| p.offset as u32)
.chain(std::iter::repeat(0))
.take(4)
.collect::<Vec<_>>()
.try_into()
.unwrap();
libva::VASurfaceAttribExternalBuffers {
pixel_format: self.layout.format.0.into(),
width: self.layout.size.width,
height: self.layout.size.height,
data_size: self.mem_layout.size() as u32,
num_planes: self.layout.planes.len() as u32,
pitches,
offsets,
buffers: self.buffers.as_mut_ptr() as *mut _,
num_buffers: self.buffers.len() as u32,
flags: 0,
private_data: std::ptr::null_mut(),
}
}
}
impl libva::ExternalBufferDescriptor for DmabufFrame {
const MEMORY_TYPE: libva::MemoryType = libva::MemoryType::DrmPrime2;
type DescriptorAttribute = libva::VADRMPRIMESurfaceDescriptor;
fn va_surface_attribute(&mut self) -> Self::DescriptorAttribute {
let objects = self
.fds
.iter()
.map(|fd| libva::VADRMPRIMESurfaceDescriptorObject {
fd: fd.as_raw_fd(),
size: nix::sys::stat::fstat(fd.as_raw_fd())
.map(|stat| stat.st_size as u32)
// If we don't have the information about the plane fd size, fallback to 0.
// Libva seems to be *sometimes* "happy" with zero.
.unwrap_or(0),
// TODO should the descriptor be moved to individual objects?
drm_format_modifier: self.layout.format.1,
})
.chain(std::iter::repeat(Default::default()))
.take(4)
.collect::<Vec<_>>()
.try_into()
.unwrap();
let layers = [
libva::VADRMPRIMESurfaceDescriptorLayer {
drm_format: self.layout.format.0.into(),
num_planes: self.layout.planes.len() as u32,
object_index: [0, 0, 0, 0],
offset: self
.layout
.planes
.iter()
.map(|p| p.offset as u32)
.chain(std::iter::repeat(0))
.take(4)
.collect::<Vec<_>>()
.try_into()
.unwrap(),
pitch: self
.layout
.planes
.iter()
.map(|p| p.stride as u32)
.chain(std::iter::repeat(0))
.take(4)
.collect::<Vec<_>>()
.try_into()
.unwrap(),
},
Default::default(),
Default::default(),
Default::default(),
];
libva::VADRMPRIMESurfaceDescriptor {
// TODO should we match and use VA_FOURCC_* here?
fourcc: self.layout.format.0.into(),
width: self.layout.size.width,
height: self.layout.size.height,
num_objects: 1,
objects,
num_layers: 1,
layers,
}
}
}