Sync cros-codecs code with ChromeOS

There are various downstream code change in cros-codecs directory in
ChromeOS [1]. This CL syncs the code based on upstream [2] to the code
in ChromeOS [3]. This is necessary to pass the build in cros-libva.

[1] https://chromium.googlesource.com/chromiumos/platform2/+/refs/heads/main/cros-codecs/
[2] https://github.com/chromeos/cros-codecs/commit/2c1b9d6c03fa50adeb4c4d3d7d2d77a89e6b3b57
[3] https://chromium.googlesource.com/chromiumos/platform2/+/8917a8d8b806404e0edc57f449d41803e779576b/cros-codecs/

Bug: 380289195, 377619217
Test: mm libcros_codecs
Change-Id: I8a209d3afa8f63e1a2ab5bcc2a2c3b8b7e904ac3
diff --git a/Cargo.toml b/Cargo.toml
index 4c68ab5..ba80813 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -9,13 +9,14 @@
 
 [features]
 default = []
-vaapi = ["libva", "anyhow", "byteorder", "thiserror", "crc32fast", "nix"]
-v4l2 = ["v4l2r", "anyhow", "byteorder", "thiserror", "crc32fast", "nix"]
+backend = []
+vaapi = ["libva", "anyhow", "byteorder", "thiserror", "crc32fast", "nix", "backend"]
+v4l2 = ["v4l2r", "anyhow", "byteorder", "thiserror", "crc32fast", "nix", "backend"]
 
 [dependencies]
 anyhow = { version = "1.0.75", optional = true }
 byteorder = { version = "1.4.3", optional = true }
-libva = { version = "0.0.7", package = "cros-libva", optional = true }
+libva = { version = "0.0.12", package = "cros-libva", optional = true }
 v4l2r = { version = "0.0.5", package = "v4l2r", optional = true }
 log = { version = "0", features = ["release_max_level_debug"] }
 thiserror = { version = "1.0.58", optional = true }
@@ -36,8 +37,8 @@
 
 [[example]]
 name = "ccdec"
-required-features = ["vaapi"]
+required-features = ["backend"]
 
 [[example]]
 name = "ccenc"
-required-features = ["vaapi"]
+required-features = ["backend"]
diff --git a/examples/ccdec-v4l2-stateless.rs b/examples/ccdec-v4l2-stateless.rs
new file mode 100644
index 0000000..fdee24d
--- /dev/null
+++ b/examples/ccdec-v4l2-stateless.rs
@@ -0,0 +1,187 @@
+// Copyright 2024 The ChromiumOS Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+use std::borrow::Cow;
+use std::fs::File;
+use std::io::Read;
+use std::io::Write;
+use std::path::PathBuf;
+use std::str::FromStr;
+
+use argh::FromArgs;
+use cros_codecs::backend::v4l2::decoder::stateless::V4l2StatelessDecoderHandle;
+use cros_codecs::bitstream_utils::NalIterator;
+use cros_codecs::codec::h264::parser::Nalu as H264Nalu;
+use cros_codecs::decoder::stateless::h264::H264;
+use cros_codecs::decoder::stateless::StatelessDecoder;
+use cros_codecs::decoder::stateless::StatelessVideoDecoder;
+use cros_codecs::decoder::BlockingMode;
+use cros_codecs::decoder::DecodedHandle;
+use cros_codecs::decoder::DynDecodedHandle;
+use cros_codecs::multiple_desc_type;
+use cros_codecs::utils::simple_playback_loop;
+use cros_codecs::utils::simple_playback_loop_owned_frames;
+use cros_codecs::utils::DmabufFrame;
+use cros_codecs::utils::UserPtrFrame;
+use cros_codecs::DecodedFormat;
+
+multiple_desc_type! {
+    enum BufferDescriptor {
+        Managed(()),
+        Dmabuf(DmabufFrame),
+        User(UserPtrFrame),
+    }
+}
+
+#[derive(Debug, PartialEq, Eq, Copy, Clone)]
+enum EncodedFormat {
+    H264,
+    H265,
+    VP8,
+    VP9,
+    AV1,
+}
+
+impl FromStr for EncodedFormat {
+    type Err = &'static str;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "h264" | "H264" => Ok(EncodedFormat::H264),
+            "h265" | "H265" => Ok(EncodedFormat::H265),
+            "vp8" | "VP8" => Ok(EncodedFormat::VP8),
+            "vp9" | "VP9" => Ok(EncodedFormat::VP9),
+            "av1" | "AV1" => Ok(EncodedFormat::AV1),
+            _ => Err("unrecognized input format. Valid values: h264, h265, vp8, vp9, av1"),
+        }
+    }
+}
+
+#[derive(Debug, PartialEq, Eq, Copy, Clone)]
+enum FrameMemoryType {
+    Managed,
+    Prime,
+    User,
+}
+
+impl FromStr for FrameMemoryType {
+    type Err = &'static str;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "managed" => Ok(FrameMemoryType::Managed),
+            "prime" => Ok(FrameMemoryType::Prime),
+            "user" => Ok(FrameMemoryType::User),
+            _ => Err("unrecognized memory type. Valid values: managed, prime, user"),
+        }
+    }
+}
+
+/// Simple player using cros-codecs
+#[derive(Debug, FromArgs)]
+struct Args {
+    /// input file
+    #[argh(positional)]
+    input: PathBuf,
+
+    /// output file to write the decoded frames to
+    #[argh(option)]
+    output: Option<PathBuf>,
+
+    /// input format to decode from.
+    #[argh(option)]
+    input_format: EncodedFormat,
+
+    //TODO    /// pixel format to decode into. Default: i420
+    //TODO    #[argh(option, default = "DecodedFormat::I420")]
+    //TODO    output_format: DecodedFormat,
+    /// origin of the memory for decoded buffers (managed, prime or user). Default: managed.
+    #[argh(option, default = "FrameMemoryType::Managed")]
+    frame_memory: FrameMemoryType,
+
+    //TODO    /// path to the GBM device to use if frame-memory=prime
+    //TODO    #[argh(option)]
+    //TODO    gbm_device: Option<PathBuf>,
+    /// whether to decode frames synchronously
+    #[argh(switch)]
+    synchronous: bool,
+    //TODO    /// whether to display the MD5 of the decoded stream, and at which granularity
+    //TODO    /// (stream or frame)
+    //TODO    #[argh(option)]
+    //TODO    compute_md5: Option<Md5Computation>,
+}
+
+fn main() {
+    env_logger::init();
+
+    let args: Args = argh::from_env();
+
+    let input = {
+        let mut buf = Vec::new();
+        File::open(args.input)
+            .expect("error opening input file")
+            .read_to_end(&mut buf)
+            .expect("error reading input file");
+        buf
+    };
+
+    let mut output = args
+        .output
+        .as_ref()
+        .map(|p| File::create(p).expect("Failed to create output file"));
+
+    let blocking_mode = if args.synchronous {
+        todo!() // BlockingMode::Blocking
+    } else {
+        BlockingMode::NonBlocking
+    };
+
+    let (mut decoder, frame_iter) = match args.input_format {
+        EncodedFormat::H264 => {
+            let frame_iter = Box::new(NalIterator::<H264Nalu>::new(&input))
+                as Box<dyn Iterator<Item = Cow<[u8]>>>;
+
+            let decoder = StatelessDecoder::<H264, _>::new_v4l2(blocking_mode).into_trait_object();
+
+            (decoder, frame_iter)
+        }
+        EncodedFormat::VP8 => todo!(),
+        EncodedFormat::VP9 => todo!(),
+        EncodedFormat::H265 => todo!(),
+        EncodedFormat::AV1 => todo!(),
+    };
+
+    let mut on_new_frame = |handle: DynDecodedHandle<()>| {
+        let picture = handle.dyn_picture();
+        let mut handle = picture.dyn_mappable_handle().unwrap();
+        let buffer_size = handle.image_size();
+        let mut frame_data = vec![0; buffer_size];
+        handle.read(&mut frame_data).unwrap();
+        if let Some(output) = &mut output {
+            output
+                .write_all(&frame_data)
+                .expect("Failed to write output file");
+        }
+    };
+
+    simple_playback_loop(
+        decoder.as_mut(),
+        frame_iter,
+        &mut on_new_frame,
+        &mut |stream_info, nb_frames| {
+            Ok(match args.frame_memory {
+                FrameMemoryType::Managed => {
+                    simple_playback_loop_owned_frames(stream_info, nb_frames)?
+                        .into_iter()
+                        .collect()
+                }
+                FrameMemoryType::Prime => todo!(),
+                FrameMemoryType::User => todo!(),
+            })
+        },
+        DecodedFormat::NV12,
+        blocking_mode,
+    )
+    .expect("error during playback loop");
+}
diff --git a/examples/ccdec/main.rs b/examples/ccdec/main.rs
index 81ad007..2547343 100644
--- a/examples/ccdec/main.rs
+++ b/examples/ccdec/main.rs
@@ -1,517 +1,29 @@
-// Copyright 2023 The ChromiumOS Authors
+// Copyright 2024 The ChromiumOS Authors
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
-//! ccdec, a simple decoder program using cros-codecs. Capable of computing MD5 checksums from the
-//! input and writing the raw decoded frames to a file.
-
-use std::borrow::Cow;
-use std::ffi::OsStr;
 use std::fs::File;
-use std::io::Cursor;
-use std::io::Read;
-use std::io::Write;
-use std::os::fd::AsFd;
-use std::os::fd::BorrowedFd;
-use std::path::Path;
-use std::path::PathBuf;
-use std::str::FromStr;
 
 mod md5;
+mod util;
+use util::Args;
 
-use argh::FromArgs;
-use cros_codecs::bitstream_utils::IvfIterator;
-use cros_codecs::bitstream_utils::NalIterator;
-use cros_codecs::codec::h264::parser::Nalu as H264Nalu;
-use cros_codecs::codec::h265::parser::Nalu as H265Nalu;
-use cros_codecs::decoder::stateless::av1::Av1;
-use cros_codecs::decoder::stateless::h264::H264;
-use cros_codecs::decoder::stateless::h265::H265;
-use cros_codecs::decoder::stateless::vp8::Vp8;
-use cros_codecs::decoder::stateless::vp9::Vp9;
-use cros_codecs::decoder::stateless::StatelessDecoder;
-use cros_codecs::decoder::stateless::StatelessVideoDecoder;
-use cros_codecs::decoder::BlockingMode;
-use cros_codecs::decoder::DecodedHandle;
-use cros_codecs::decoder::DynDecodedHandle;
-use cros_codecs::decoder::StreamInfo;
-use cros_codecs::multiple_desc_type;
-use cros_codecs::utils::simple_playback_loop;
-use cros_codecs::utils::simple_playback_loop_owned_frames;
-use cros_codecs::utils::simple_playback_loop_userptr_frames;
-use cros_codecs::utils::DmabufFrame;
-use cros_codecs::utils::UserPtrFrame;
-use cros_codecs::DecodedFormat;
-use cros_codecs::Fourcc;
-use cros_codecs::FrameLayout;
-use cros_codecs::PlaneLayout;
-use cros_codecs::Resolution;
-use matroska_demuxer::Frame;
-use matroska_demuxer::MatroskaFile;
-use md5::md5_digest;
-use md5::MD5Context;
+#[cfg(feature = "vaapi")]
+mod vaapi_decoder;
+#[cfg(feature = "vaapi")]
+use vaapi_decoder::do_decode;
 
-// Our buffer descriptor type.
-//
-// We support buffers which memory is managed by the backend, or imported from user memory or a
-// PRIME buffer.
-multiple_desc_type! {
-    enum BufferDescriptor {
-        Managed(()),
-        Dmabuf(DmabufFrame),
-        User(UserPtrFrame),
-    }
-}
-
-/// Export a file descriptor from a GBM `BufferObject` and turn it into a `DmabufFrame` suitable
-/// for using as the target of a decoder.
-fn export_gbm_bo<T>(obj: &gbm::BufferObject<T>) -> anyhow::Result<DmabufFrame> {
-    let fd = obj.fd()?;
-    let modifier = obj.modifier()?;
-    let format = obj.format()?;
-    let planes = (0..obj.plane_count()? as i32)
-        .map(|i| PlaneLayout {
-            buffer_index: 0,
-            offset: obj.offset(i).unwrap() as usize,
-            stride: obj.stride_for_plane(i).unwrap() as usize,
-        })
-        .collect();
-    let size = Resolution::from((obj.width().unwrap(), obj.height().unwrap()));
-
-    Ok(DmabufFrame {
-        fds: vec![fd],
-        layout: FrameLayout {
-            format: (Fourcc::from(format as u32), modifier.into()),
-            size,
-            planes,
-        },
-    })
-}
-
-/// Buffer allocation callback for `simple_playback_loop` to allocate and export buffers from a GBM
-/// device.
-fn simple_playback_loop_prime_frames<D: AsFd>(
-    device: &gbm::Device<D>,
-    stream_info: &StreamInfo,
-    nb_frames: usize,
-) -> anyhow::Result<Vec<DmabufFrame>> {
-    let gbm_fourcc = match stream_info.format {
-        DecodedFormat::I420 | DecodedFormat::NV12 => gbm::Format::Nv12,
-        _ => anyhow::bail!(
-            "{:?} format is unsupported with GBM memory",
-            stream_info.format
-        ),
-    };
-
-    (0..nb_frames)
-        .map(|_| {
-            device
-                .create_buffer_object::<()>(
-                    stream_info.coded_resolution.width,
-                    stream_info.coded_resolution.height,
-                    gbm_fourcc,
-                    gbm::BufferObjectFlags::SCANOUT,
-                )
-                .map_err(|e| anyhow::anyhow!(e))
-                .and_then(|o| export_gbm_bo(&o))
-        })
-        .collect()
-}
-
-#[derive(Debug, PartialEq, Eq, Copy, Clone)]
-enum EncodedFormat {
-    H264,
-    H265,
-    VP8,
-    VP9,
-    AV1,
-}
-
-impl FromStr for EncodedFormat {
-    type Err = &'static str;
-
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        match s {
-            "h264" | "H264" => Ok(EncodedFormat::H264),
-            "h265" | "H265" => Ok(EncodedFormat::H265),
-            "vp8" | "VP8" => Ok(EncodedFormat::VP8),
-            "vp9" | "VP9" => Ok(EncodedFormat::VP9),
-            "av1" | "AV1" => Ok(EncodedFormat::AV1),
-            _ => Err("unrecognized input format. Valid values: h264, h265, vp8, vp9, av1"),
-        }
-    }
-}
-
-#[derive(Debug, PartialEq, Eq, Copy, Clone)]
-enum FrameMemoryType {
-    Managed,
-    Prime,
-    User,
-}
-
-impl FromStr for FrameMemoryType {
-    type Err = &'static str;
-
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        match s {
-            "managed" => Ok(FrameMemoryType::Managed),
-            "prime" => Ok(FrameMemoryType::Prime),
-            "user" => Ok(FrameMemoryType::User),
-            _ => Err("unrecognized memory type. Valid values: managed, prime, user"),
-        }
-    }
-}
-
-struct MkvFrameIterator<T: AsRef<[u8]>> {
-    input: MatroskaFile<Cursor<T>>,
-    video_track: u64,
-}
-
-impl<T: AsRef<[u8]>> MkvFrameIterator<T> {
-    fn new(input: T) -> anyhow::Result<Self> {
-        let input = MatroskaFile::open(Cursor::new(input))?;
-        let video_track = input
-            .tracks()
-            .iter()
-            .find(|t| t.track_type() == matroska_demuxer::TrackType::Video)
-            .map(|t| t.track_number().get())
-            .ok_or_else(|| anyhow::anyhow!("no video track in input file"))?;
-
-        Ok(Self { input, video_track })
-    }
-}
-
-impl<T: AsRef<[u8]>> Iterator for MkvFrameIterator<T> {
-    type Item = Vec<u8>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        let mut frame = Frame::default();
-        while self.input.next_frame(&mut frame).unwrap() {
-            if frame.track == self.video_track {
-                return Some(frame.data);
-            }
-        }
-
-        None
-    }
-}
-
-#[derive(Debug)]
-enum Md5Computation {
-    Stream,
-    Frame,
-}
-
-impl FromStr for Md5Computation {
-    type Err = &'static str;
-
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        match s {
-            "stream" => Ok(Md5Computation::Stream),
-            "frame" => Ok(Md5Computation::Frame),
-            _ => Err("unrecognized MD5 computation option. Valid values: stream, frame"),
-        }
-    }
-}
-
-/// Simple player using cros-codecs
-#[derive(Debug, FromArgs)]
-struct Args {
-    /// input file
-    #[argh(positional)]
-    input: PathBuf,
-
-    /// output file to write the decoded frames to
-    #[argh(option)]
-    output: Option<PathBuf>,
-
-    /// whether to decode a frame per file. Requires "output" to be set.
-    #[argh(switch)]
-    multiple_output_files: bool,
-
-    /// input format to decode from.
-    #[argh(option)]
-    input_format: EncodedFormat,
-
-    /// pixel format to decode into. Default: i420
-    #[argh(option, default = "DecodedFormat::I420")]
-    output_format: DecodedFormat,
-
-    /// origin of the memory for decoded buffers (managed, prime or user). Default: managed.
-    #[argh(option, default = "FrameMemoryType::Managed")]
-    frame_memory: FrameMemoryType,
-
-    /// path to the GBM device to use if frame-memory=prime
-    #[argh(option)]
-    gbm_device: Option<PathBuf>,
-
-    /// whether to decode frames synchronously
-    #[argh(switch)]
-    synchronous: bool,
-
-    /// whether to display the MD5 of the decoded stream, and at which granularity (stream or
-    /// frame)
-    #[argh(option)]
-    compute_md5: Option<Md5Computation>,
-
-    /// path to JSON file containing golden MD5 sums of each frame.
-    #[argh(option)]
-    golden: Option<PathBuf>,
-}
-
-/// Detects the container type (IVF or MKV) and returns the corresponding frame iterator.
-fn create_vpx_frame_iterator(input: &[u8]) -> Box<dyn Iterator<Item = Cow<[u8]>> + '_> {
-    if input.starts_with(&[0x1a, 0x45, 0xdf, 0xa3]) {
-        Box::new(MkvFrameIterator::new(input).unwrap().map(Cow::Owned))
-    } else {
-        Box::new(IvfIterator::new(input).map(Cow::Borrowed))
-    }
-}
-
-/// Decide the output file name when multiple_output_files is set
-fn decide_output_file_name<'a>(output: &'a Path, index: i32) -> PathBuf {
-    let extract_str = |s: Option<&'a OsStr>| s.and_then(|s| s.to_str()).expect("malformed file");
-
-    let [file_name, stem] = [output.file_name(), output.file_stem()].map(extract_str);
-
-    if output.extension().is_some() {
-        let [extension] = [output.extension()].map(extract_str);
-        let new_file_name = format!("{}_{}.{}", stem, index, extension);
-        PathBuf::from(String::from(output.to_str().unwrap()).replace(file_name, &new_file_name))
-    } else {
-        let new_file_name = format!("{}_{}", stem, index);
-        PathBuf::from(String::from(output.to_str().unwrap()).replace(file_name, &new_file_name))
-    }
-}
+#[cfg(feature = "v4l2")]
+mod v4l2_stateless_decoder;
+#[cfg(feature = "v4l2")]
+use v4l2_stateless_decoder::do_decode;
 
 fn main() {
     env_logger::init();
 
     let args: Args = argh::from_env();
 
-    let input = {
-        let mut buf = Vec::new();
-        File::open(args.input)
-            .expect("error opening input file")
-            .read_to_end(&mut buf)
-            .expect("error reading input file");
-        buf
-    };
+    let input = File::open(&args.input).expect("error opening input file");
 
-    let mut output = if !args.multiple_output_files {
-        args.output
-            .as_ref()
-            .map(|p| File::create(p).expect("error creating output file"))
-    } else {
-        None
-    };
-
-    let blocking_mode = if args.synchronous {
-        BlockingMode::Blocking
-    } else {
-        BlockingMode::NonBlocking
-    };
-
-    let golden_md5s: Vec<String> = match args.golden {
-        None => vec![],
-        Some(ref path) => {
-            let mut golden_file_content = String::new();
-            File::open(&path)
-                .expect("error opening golden file")
-                .read_to_string(&mut golden_file_content)
-                .expect("error reading golden file");
-            let parsed_json: serde_json::Value =
-                serde_json::from_str(&golden_file_content).expect("error parsing golden file");
-            match &parsed_json["md5_checksums"] {
-                serde_json::Value::Array(checksums) => checksums
-                    .iter()
-                    .map(|x| match x {
-                        serde_json::Value::String(checksum) => String::from(checksum),
-                        _ => panic!("error parsing golden file"),
-                    })
-                    .collect(),
-                _ => panic!("error parsing golden file"),
-            }
-        }
-    };
-    let mut golden_iter = golden_md5s.iter();
-
-    let gbm = match args.frame_memory {
-        FrameMemoryType::Managed | FrameMemoryType::User => None,
-        FrameMemoryType::Prime => {
-            /// A simple wrapper for a GBM device node.
-            pub struct GbmDevice(std::fs::File);
-
-            impl AsFd for GbmDevice {
-                fn as_fd(&self) -> BorrowedFd<'_> {
-                    self.0.as_fd()
-                }
-            }
-            impl drm::Device for GbmDevice {}
-
-            /// Simple helper methods for opening a `Card`.
-            impl GbmDevice {
-                pub fn open<P: AsRef<Path>>(path: P) -> std::io::Result<Self> {
-                    std::fs::OpenOptions::new()
-                        .read(true)
-                        .write(true)
-                        .open(path)
-                        .map(GbmDevice)
-                }
-            }
-
-            let gbm_path = args
-                .gbm_device
-                .unwrap_or(PathBuf::from("/dev/dri/renderD128"));
-            let gbm = GbmDevice::open(gbm_path)
-                .and_then(gbm::Device::new)
-                .expect("failed to create GBM device");
-
-            Some(gbm)
-        }
-    };
-
-    let display = libva::Display::open().expect("failed to open libva display");
-
-    // The created `decoder` is turned into a `DynStatelessVideoDecoder` trait object. This allows
-    // the same code to control the decoder no matter what codec or backend we are using.
-    let (mut decoder, frame_iter) = match args.input_format {
-        EncodedFormat::H264 => {
-            let frame_iter = Box::new(NalIterator::<H264Nalu>::new(&input))
-                as Box<dyn Iterator<Item = Cow<[u8]>>>;
-
-            let decoder = StatelessDecoder::<H264, _>::new_vaapi(display, blocking_mode)
-                .unwrap()
-                .into_trait_object();
-
-            (decoder, frame_iter)
-        }
-        EncodedFormat::VP8 => {
-            let frame_iter = create_vpx_frame_iterator(&input);
-
-            let decoder = StatelessDecoder::<Vp8, _>::new_vaapi(display, blocking_mode)
-                .unwrap()
-                .into_trait_object();
-
-            (decoder, frame_iter)
-        }
-        EncodedFormat::VP9 => {
-            let frame_iter = create_vpx_frame_iterator(&input);
-
-            let decoder = StatelessDecoder::<Vp9, _>::new_vaapi(display, blocking_mode)
-                .unwrap()
-                .into_trait_object();
-
-            (decoder, frame_iter)
-        }
-        EncodedFormat::H265 => {
-            let frame_iter = Box::new(NalIterator::<H265Nalu>::new(&input))
-                as Box<dyn Iterator<Item = Cow<[u8]>>>;
-
-            let decoder = StatelessDecoder::<H265, _>::new_vaapi(display, blocking_mode)
-                .unwrap()
-                .into_trait_object();
-
-            (decoder, frame_iter)
-        }
-        EncodedFormat::AV1 => {
-            let frame_iter = create_vpx_frame_iterator(&input);
-
-            let decoder = StatelessDecoder::<Av1, _>::new_vaapi(display, blocking_mode)
-                .unwrap()
-                .into_trait_object();
-
-            (decoder, frame_iter)
-        }
-    };
-
-    let mut md5_context = MD5Context::new();
-    let mut output_filename_idx = 0;
-    let need_per_frame_md5 = match args.compute_md5 {
-        Some(Md5Computation::Frame) => true,
-        _ => args.golden.is_some(),
-    };
-
-    let mut on_new_frame = |handle: DynDecodedHandle<BufferDescriptor>| {
-        if args.output.is_some() || args.compute_md5.is_some() || args.golden.is_some() {
-            handle.sync().unwrap();
-            let picture = handle.dyn_picture();
-            let mut handle = picture.dyn_mappable_handle().unwrap();
-            let buffer_size = handle.image_size();
-            let mut frame_data = vec![0; buffer_size];
-            handle.read(&mut frame_data).unwrap();
-
-            if args.multiple_output_files {
-                let file_name = decide_output_file_name(
-                    args.output
-                        .as_ref()
-                        .expect("multiple_output_files need output to be set"),
-                    output_filename_idx,
-                );
-
-                let mut output = File::create(file_name).expect("error creating output file");
-                output_filename_idx += 1;
-                output
-                    .write_all(&frame_data)
-                    .expect("failed to write to output file");
-            } else if let Some(output) = &mut output {
-                output
-                    .write_all(&frame_data)
-                    .expect("failed to write to output file");
-            }
-
-            let frame_md5: String = if need_per_frame_md5 {
-                md5_digest(&frame_data)
-            } else {
-                "".to_string()
-            };
-
-            match args.compute_md5 {
-                None => (),
-                Some(Md5Computation::Frame) => println!("{}", frame_md5),
-                Some(Md5Computation::Stream) => md5_context.consume(&frame_data),
-            }
-
-            if args.golden.is_some() {
-                assert_eq!(&frame_md5, golden_iter.next().unwrap());
-            }
-        }
-    };
-
-    simple_playback_loop(
-        decoder.as_mut(),
-        frame_iter,
-        &mut on_new_frame,
-        &mut |stream_info, nb_frames| {
-            Ok(match args.frame_memory {
-                FrameMemoryType::Managed => {
-                    simple_playback_loop_owned_frames(stream_info, nb_frames)?
-                        .into_iter()
-                        .map(BufferDescriptor::Managed)
-                        .collect()
-                }
-                FrameMemoryType::Prime => simple_playback_loop_prime_frames(
-                    gbm.as_ref().unwrap(),
-                    stream_info,
-                    nb_frames,
-                )?
-                .into_iter()
-                .map(BufferDescriptor::Dmabuf)
-                .collect(),
-                FrameMemoryType::User => {
-                    simple_playback_loop_userptr_frames(stream_info, nb_frames)?
-                        .into_iter()
-                        .map(BufferDescriptor::User)
-                        .collect()
-                }
-            })
-        },
-        args.output_format,
-        blocking_mode,
-    )
-    .expect("error during playback loop");
-
-    if let Some(Md5Computation::Stream) = args.compute_md5 {
-        println!("{}", md5_context.flush());
-    }
+    do_decode(input, args);
 }
diff --git a/examples/ccdec/util.rs b/examples/ccdec/util.rs
new file mode 100644
index 0000000..1f11a56
--- /dev/null
+++ b/examples/ccdec/util.rs
@@ -0,0 +1,141 @@
+// Copyright 2024 The ChromiumOS Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+use std::ffi::OsStr;
+use std::path::Path;
+use std::path::PathBuf;
+use std::str::FromStr;
+
+use argh::FromArgs;
+
+use cros_codecs::DecodedFormat;
+
+#[derive(Debug, PartialEq, Eq, Copy, Clone)]
+pub enum EncodedFormat {
+    H264,
+    H265,
+    VP8,
+    VP9,
+    AV1,
+}
+
+impl FromStr for EncodedFormat {
+    type Err = &'static str;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "h264" | "H264" => Ok(EncodedFormat::H264),
+            "h265" | "H265" => Ok(EncodedFormat::H265),
+            "vp8" | "VP8" => Ok(EncodedFormat::VP8),
+            "vp9" | "VP9" => Ok(EncodedFormat::VP9),
+            "av1" | "AV1" => Ok(EncodedFormat::AV1),
+            _ => Err("unrecognized input format. Valid values: h264, h265, vp8, vp9, av1"),
+        }
+    }
+}
+
+#[derive(Debug)]
+pub enum Md5Computation {
+    Stream,
+    Frame,
+}
+
+impl FromStr for Md5Computation {
+    type Err = &'static str;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "stream" => Ok(Md5Computation::Stream),
+            "frame" => Ok(Md5Computation::Frame),
+            _ => Err("unrecognized MD5 computation option. Valid values: stream, frame"),
+        }
+    }
+}
+
+#[derive(Debug, PartialEq, Eq, Copy, Clone)]
+pub enum FrameMemoryType {
+    Managed,
+    Prime,
+    User,
+}
+
+impl FromStr for FrameMemoryType {
+    type Err = &'static str;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "managed" => Ok(FrameMemoryType::Managed),
+            "prime" => Ok(FrameMemoryType::Prime),
+            "user" => Ok(FrameMemoryType::User),
+            _ => Err("unrecognized memory type. Valid values: managed, prime, user"),
+        }
+    }
+}
+
+/// Simple player using cros-codecs
+#[derive(Debug, FromArgs)]
+pub struct Args {
+    /// input file
+    #[argh(positional)]
+    pub input: PathBuf,
+
+    /// output file to write the decoded frames to
+    #[argh(option)]
+    pub output: Option<PathBuf>,
+
+    /// whether to decode a frame per file. Requires "output" to be set.
+    #[argh(switch)]
+    pub multiple_output_files: bool,
+
+    /// input format to decode from.
+    #[argh(option)]
+    pub input_format: EncodedFormat,
+
+    /// pixel format to decode into. Default: i420
+    #[argh(option, default = "DecodedFormat::I420")]
+    pub output_format: DecodedFormat,
+
+    /// origin of the memory for decoded buffers (managed, prime or user). Default: managed.
+    #[argh(option, default = "FrameMemoryType::Managed")]
+    pub frame_memory: FrameMemoryType,
+
+    /// path to the GBM device to use if frame-memory=prime
+    #[allow(dead_code)]
+    #[argh(option)]
+    pub gbm_device: Option<PathBuf>,
+
+    /// path to VA-API device. This option is ignored on V4L2 systems.
+    #[argh(option)]
+    #[allow(dead_code)]
+    pub libva_device: Option<PathBuf>,
+
+    /// whether to decode frames synchronously
+    #[argh(switch)]
+    pub synchronous: bool,
+
+    /// whether to display the MD5 of the decoded stream, and at which granularity (stream or
+    /// frame)
+    #[argh(option)]
+    pub compute_md5: Option<Md5Computation>,
+
+    /// path to JSON file containing golden MD5 sums of each frame.
+    #[argh(option)]
+    pub golden: Option<PathBuf>,
+}
+
+/// Decide the output file name when multiple_output_files is set
+pub fn decide_output_file_name<'a>(output: &'a Path, index: i32) -> PathBuf {
+    let extract_str = |s: Option<&'a OsStr>| s.and_then(|s| s.to_str()).expect("malformed file");
+
+    let [file_name, stem] = [output.file_name(), output.file_stem()].map(extract_str);
+
+    if output.extension().is_some() {
+        let [extension] = [output.extension()].map(extract_str);
+        let new_file_name = format!("{}_{}.{}", stem, index, extension);
+        PathBuf::from(String::from(output.to_str().unwrap()).replace(file_name, &new_file_name))
+    } else {
+        let new_file_name = format!("{}_{}", stem, index);
+        PathBuf::from(String::from(output.to_str().unwrap()).replace(file_name, &new_file_name))
+    }
+}
diff --git a/examples/ccdec/v4l2_stateless_decoder.rs b/examples/ccdec/v4l2_stateless_decoder.rs
new file mode 100644
index 0000000..6bf131b
--- /dev/null
+++ b/examples/ccdec/v4l2_stateless_decoder.rs
@@ -0,0 +1,156 @@
+// Copyright 2024 The ChromiumOS Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+use std::borrow::Cow;
+use std::fs::File;
+use std::io::Read;
+use std::io::Write;
+
+use cros_codecs::backend::v4l2::decoder::stateless::V4l2StatelessDecoderHandle;
+use cros_codecs::bitstream_utils::NalIterator;
+use cros_codecs::codec::h264::parser::Nalu as H264Nalu;
+use cros_codecs::decoder::stateless::h264::H264;
+use cros_codecs::decoder::stateless::StatelessDecoder;
+use cros_codecs::decoder::stateless::StatelessVideoDecoder;
+use cros_codecs::decoder::BlockingMode;
+use cros_codecs::decoder::DecodedHandle;
+use cros_codecs::decoder::DynDecodedHandle;
+use cros_codecs::multiple_desc_type;
+use cros_codecs::utils::simple_playback_loop;
+use cros_codecs::utils::simple_playback_loop_owned_frames;
+use cros_codecs::utils::DmabufFrame;
+use cros_codecs::utils::UserPtrFrame;
+use cros_codecs::DecodedFormat;
+
+use crate::md5::md5_digest;
+use crate::md5::MD5Context;
+use crate::util::decide_output_file_name;
+use crate::util::Args;
+use crate::util::EncodedFormat;
+use crate::util::FrameMemoryType;
+use crate::util::Md5Computation;
+
+multiple_desc_type! {
+    enum BufferDescriptor {
+        Managed(()),
+        Dmabuf(DmabufFrame),
+        User(UserPtrFrame),
+    }
+}
+
+pub fn do_decode(mut input: File, args: Args) -> () {
+    let input = {
+        let mut buf = Vec::new();
+        input
+            .read_to_end(&mut buf)
+            .expect("error reading input file");
+        buf
+    };
+
+    let mut output = args
+        .output
+        .as_ref()
+        .map(|p| File::create(p).expect("Failed to create output file"));
+
+    let blocking_mode = if args.synchronous {
+        todo!() // BlockingMode::Blocking
+    } else {
+        BlockingMode::NonBlocking
+    };
+
+    let (mut decoder, frame_iter) = match args.input_format {
+        EncodedFormat::H264 => {
+            let frame_iter = Box::new(NalIterator::<H264Nalu>::new(&input))
+                as Box<dyn Iterator<Item = Cow<[u8]>>>;
+
+            let decoder = StatelessDecoder::<H264, _>::new_v4l2(blocking_mode).into_trait_object();
+
+            (decoder, frame_iter)
+        }
+        EncodedFormat::VP8 => todo!(),
+        EncodedFormat::VP9 => todo!(),
+        EncodedFormat::H265 => todo!(),
+        EncodedFormat::AV1 => todo!(),
+    };
+
+    let mut md5_context = MD5Context::new();
+    let mut output_filename_idx = 0;
+    let need_per_frame_md5 = match args.compute_md5 {
+        Some(Md5Computation::Frame) => true,
+        _ => args.golden.is_some(),
+    };
+
+    let mut on_new_frame = |handle: DynDecodedHandle<()>| {
+        let timestamp = handle.timestamp(); //handle.handle.borrow().timestamp;
+        log::debug!("{:<20} {:?}\n", "on_new_frame", timestamp);
+
+        let picture = handle.dyn_picture();
+        let mut handle = picture.dyn_mappable_handle().unwrap();
+        let buffer_size = handle.image_size();
+        let mut frame_data = vec![0; buffer_size];
+
+        handle.read(&mut frame_data).unwrap();
+        log::debug!(
+            "{:<20} {:?}, {} bytes\n",
+            "on_new_frame",
+            timestamp,
+            buffer_size
+        );
+
+        if args.multiple_output_files {
+            let file_name = decide_output_file_name(
+                args.output
+                    .as_ref()
+                    .expect("multiple_output_files need output to be set"),
+                output_filename_idx,
+            );
+
+            let mut output = File::create(file_name).expect("error creating output file");
+            output_filename_idx += 1;
+            output
+                .write_all(&frame_data)
+                .expect("failed to write to output file");
+        } else if let Some(output) = &mut output {
+            output
+                .write_all(&frame_data)
+                .expect("failed to write to output file");
+        }
+
+        let frame_md5: String = if need_per_frame_md5 {
+            md5_digest(&frame_data)
+        } else {
+            "".to_string()
+        };
+
+        match args.compute_md5 {
+            None => (),
+            Some(Md5Computation::Frame) => println!("{}", frame_md5),
+            Some(Md5Computation::Stream) => md5_context.consume(&frame_data),
+        }
+    };
+
+    simple_playback_loop(
+        decoder.as_mut(),
+        frame_iter,
+        &mut on_new_frame,
+        &mut |stream_info, nb_frames| {
+            Ok(match args.frame_memory {
+                FrameMemoryType::Managed => {
+                    simple_playback_loop_owned_frames(stream_info, nb_frames)?
+                        .into_iter()
+                        .collect()
+                }
+                FrameMemoryType::Prime => todo!(),
+                FrameMemoryType::User => todo!(),
+            })
+        },
+        DecodedFormat::NV12,
+        blocking_mode,
+    )
+    .expect("error during playback loop");
+
+    if let Some(Md5Computation::Stream) = args.compute_md5 {
+        println!("{}", md5_context.flush());
+    }
+}
diff --git a/examples/ccdec/vaapi_decoder.rs b/examples/ccdec/vaapi_decoder.rs
new file mode 100644
index 0000000..6037edb
--- /dev/null
+++ b/examples/ccdec/vaapi_decoder.rs
@@ -0,0 +1,396 @@
+// Copyright 2023 The ChromiumOS Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+//! ccdec, a simple decoder program using cros-codecs. Capable of computing MD5 checksums from the
+//! input and writing the raw decoded frames to a file.
+
+use std::borrow::Cow;
+use std::fs::File;
+use std::io::Cursor;
+use std::io::Read;
+use std::io::Write;
+use std::os::fd::AsFd;
+use std::os::fd::BorrowedFd;
+use std::path::Path;
+use std::path::PathBuf;
+
+use cros_codecs::bitstream_utils::IvfIterator;
+use cros_codecs::bitstream_utils::NalIterator;
+use cros_codecs::codec::h264::parser::Nalu as H264Nalu;
+use cros_codecs::codec::h265::parser::Nalu as H265Nalu;
+use cros_codecs::decoder::stateless::av1::Av1;
+use cros_codecs::decoder::stateless::h264::H264;
+use cros_codecs::decoder::stateless::h265::H265;
+use cros_codecs::decoder::stateless::vp8::Vp8;
+use cros_codecs::decoder::stateless::vp9::Vp9;
+use cros_codecs::decoder::stateless::StatelessDecoder;
+use cros_codecs::decoder::stateless::StatelessVideoDecoder;
+use cros_codecs::decoder::BlockingMode;
+use cros_codecs::decoder::DecodedHandle;
+use cros_codecs::decoder::DynDecodedHandle;
+use cros_codecs::decoder::StreamInfo;
+use cros_codecs::multiple_desc_type;
+use cros_codecs::utils::simple_playback_loop;
+use cros_codecs::utils::simple_playback_loop_owned_frames;
+use cros_codecs::utils::simple_playback_loop_userptr_frames;
+use cros_codecs::utils::DmabufFrame;
+use cros_codecs::utils::UserPtrFrame;
+use cros_codecs::DecodedFormat;
+use cros_codecs::Fourcc;
+use cros_codecs::FrameLayout;
+use cros_codecs::PlaneLayout;
+use cros_codecs::Resolution;
+use matroska_demuxer::Frame;
+use matroska_demuxer::MatroskaFile;
+
+use crate::md5::md5_digest;
+use crate::md5::MD5Context;
+use crate::util::decide_output_file_name;
+use crate::util::Args;
+use crate::util::EncodedFormat;
+use crate::util::FrameMemoryType;
+use crate::util::Md5Computation;
+
+// Our buffer descriptor type.
+//
+// We support buffers which memory is managed by the backend, or imported from user memory or a
+// PRIME buffer.
+multiple_desc_type! {
+    enum BufferDescriptor {
+        Managed(()),
+        Dmabuf(DmabufFrame),
+        User(UserPtrFrame),
+    }
+}
+
+/// Export a file descriptor from a GBM `BufferObject` and turn it into a `DmabufFrame` suitable
+/// for using as the target of a decoder.
+fn export_gbm_bo<T>(obj: &gbm::BufferObject<T>) -> anyhow::Result<DmabufFrame> {
+    let fd = obj.fd()?;
+    let modifier = obj.modifier()?;
+    let format = obj.format()?;
+    let planes = (0..obj.plane_count()? as i32)
+        .map(|i| PlaneLayout {
+            buffer_index: 0,
+            offset: obj.offset(i).unwrap() as usize,
+            stride: obj.stride_for_plane(i).unwrap() as usize,
+        })
+        .collect();
+    let size = Resolution::from((obj.width().unwrap(), obj.height().unwrap()));
+
+    Ok(DmabufFrame {
+        fds: vec![fd],
+        layout: FrameLayout {
+            format: (Fourcc::from(format as u32), modifier.into()),
+            size,
+            planes,
+        },
+    })
+}
+
+/// Buffer allocation callback for `simple_playback_loop` to allocate and export buffers from a GBM
+/// device.
+fn simple_playback_loop_prime_frames<D: AsFd>(
+    device: &gbm::Device<D>,
+    stream_info: &StreamInfo,
+    nb_frames: usize,
+) -> anyhow::Result<Vec<DmabufFrame>> {
+    let gbm_fourcc = match stream_info.format {
+        DecodedFormat::I420 | DecodedFormat::NV12 => gbm::Format::Nv12,
+        _ => anyhow::bail!(
+            "{:?} format is unsupported with GBM memory",
+            stream_info.format
+        ),
+    };
+
+    (0..nb_frames)
+        .map(|_| {
+            device
+                .create_buffer_object::<()>(
+                    stream_info.coded_resolution.width,
+                    stream_info.coded_resolution.height,
+                    gbm_fourcc,
+                    gbm::BufferObjectFlags::SCANOUT,
+                )
+                .map_err(|e| anyhow::anyhow!(e))
+                .and_then(|o| export_gbm_bo(&o))
+        })
+        .collect()
+}
+struct MkvFrameIterator<T: AsRef<[u8]>> {
+    input: MatroskaFile<Cursor<T>>,
+    video_track: u64,
+}
+
+impl<T: AsRef<[u8]>> MkvFrameIterator<T> {
+    fn new(input: T) -> anyhow::Result<Self> {
+        let input = MatroskaFile::open(Cursor::new(input))?;
+        let video_track = input
+            .tracks()
+            .iter()
+            .find(|t| t.track_type() == matroska_demuxer::TrackType::Video)
+            .map(|t| t.track_number().get())
+            .ok_or_else(|| anyhow::anyhow!("no video track in input file"))?;
+
+        Ok(Self { input, video_track })
+    }
+}
+
+impl<T: AsRef<[u8]>> Iterator for MkvFrameIterator<T> {
+    type Item = Vec<u8>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let mut frame = Frame::default();
+        while self.input.next_frame(&mut frame).unwrap() {
+            if frame.track == self.video_track {
+                return Some(frame.data);
+            }
+        }
+
+        None
+    }
+}
+
+/// Detects the container type (IVF or MKV) and returns the corresponding frame iterator.
+fn create_vpx_frame_iterator(input: &[u8]) -> Box<dyn Iterator<Item = Cow<[u8]>> + '_> {
+    if input.starts_with(&[0x1a, 0x45, 0xdf, 0xa3]) {
+        Box::new(MkvFrameIterator::new(input).unwrap().map(Cow::Owned))
+    } else {
+        Box::new(IvfIterator::new(input).map(Cow::Borrowed))
+    }
+}
+
+pub fn do_decode(mut input: File, args: Args) -> () {
+    let input = {
+        let mut buf = Vec::new();
+        input
+            .read_to_end(&mut buf)
+            .expect("error reading input file");
+        buf
+    };
+
+    let mut output = if !args.multiple_output_files {
+        args.output
+            .as_ref()
+            .map(|p| File::create(p).expect("error creating output file"))
+    } else {
+        None
+    };
+
+    let blocking_mode = if args.synchronous {
+        BlockingMode::Blocking
+    } else {
+        BlockingMode::NonBlocking
+    };
+
+    let golden_md5s: Vec<String> = match args.golden {
+        None => vec![],
+        Some(ref path) => {
+            let mut golden_file_content = String::new();
+            File::open(&path)
+                .expect("error opening golden file")
+                .read_to_string(&mut golden_file_content)
+                .expect("error reading golden file");
+            let parsed_json: serde_json::Value =
+                serde_json::from_str(&golden_file_content).expect("error parsing golden file");
+            match &parsed_json["md5_checksums"] {
+                serde_json::Value::Array(checksums) => checksums
+                    .iter()
+                    .map(|x| match x {
+                        serde_json::Value::String(checksum) => String::from(checksum),
+                        _ => panic!("error parsing golden file"),
+                    })
+                    .collect(),
+                _ => panic!("error parsing golden file"),
+            }
+        }
+    };
+    let mut golden_iter = golden_md5s.iter();
+
+    let gbm = match args.frame_memory {
+        FrameMemoryType::Managed | FrameMemoryType::User => None,
+        FrameMemoryType::Prime => {
+            /// A simple wrapper for a GBM device node.
+            pub struct GbmDevice(std::fs::File);
+
+            impl AsFd for GbmDevice {
+                fn as_fd(&self) -> BorrowedFd<'_> {
+                    self.0.as_fd()
+                }
+            }
+            impl drm::Device for GbmDevice {}
+
+            /// Simple helper methods for opening a `Card`.
+            impl GbmDevice {
+                pub fn open<P: AsRef<Path>>(path: P) -> std::io::Result<Self> {
+                    std::fs::OpenOptions::new()
+                        .read(true)
+                        .write(true)
+                        .open(path)
+                        .map(GbmDevice)
+                }
+            }
+
+            let gbm_path = args
+                .gbm_device
+                .unwrap_or(PathBuf::from("/dev/dri/renderD128"));
+            let gbm = GbmDevice::open(gbm_path)
+                .and_then(gbm::Device::new)
+                .expect("failed to create GBM device");
+
+            Some(gbm)
+        }
+    };
+
+    let display = match args.libva_device {
+        Some(libva_device_path) => libva::Display::open_drm_display(libva_device_path.clone())
+            .expect(&format!(
+                "failed to open libva display {}",
+                libva_device_path.display()
+            )),
+        None => libva::Display::open().expect("failed to open libva display"),
+    };
+
+    // The created `decoder` is turned into a `DynStatelessVideoDecoder` trait object. This allows
+    // the same code to control the decoder no matter what codec or backend we are using.
+    let (mut decoder, frame_iter) = match args.input_format {
+        EncodedFormat::H264 => {
+            let frame_iter = Box::new(NalIterator::<H264Nalu>::new(&input))
+                as Box<dyn Iterator<Item = Cow<[u8]>>>;
+
+            let decoder = StatelessDecoder::<H264, _>::new_vaapi(display, blocking_mode)
+                .unwrap()
+                .into_trait_object();
+
+            (decoder, frame_iter)
+        }
+        EncodedFormat::VP8 => {
+            let frame_iter = create_vpx_frame_iterator(&input);
+
+            let decoder = StatelessDecoder::<Vp8, _>::new_vaapi(display, blocking_mode)
+                .unwrap()
+                .into_trait_object();
+
+            (decoder, frame_iter)
+        }
+        EncodedFormat::VP9 => {
+            let frame_iter = create_vpx_frame_iterator(&input);
+
+            let decoder = StatelessDecoder::<Vp9, _>::new_vaapi(display, blocking_mode)
+                .unwrap()
+                .into_trait_object();
+
+            (decoder, frame_iter)
+        }
+        EncodedFormat::H265 => {
+            let frame_iter = Box::new(NalIterator::<H265Nalu>::new(&input))
+                as Box<dyn Iterator<Item = Cow<[u8]>>>;
+
+            let decoder = StatelessDecoder::<H265, _>::new_vaapi(display, blocking_mode)
+                .unwrap()
+                .into_trait_object();
+
+            (decoder, frame_iter)
+        }
+        EncodedFormat::AV1 => {
+            let frame_iter = create_vpx_frame_iterator(&input);
+
+            let decoder = StatelessDecoder::<Av1, _>::new_vaapi(display, blocking_mode)
+                .unwrap()
+                .into_trait_object();
+
+            (decoder, frame_iter)
+        }
+    };
+
+    let mut md5_context = MD5Context::new();
+    let mut output_filename_idx = 0;
+    let need_per_frame_md5 = match args.compute_md5 {
+        Some(Md5Computation::Frame) => true,
+        _ => args.golden.is_some(),
+    };
+
+    let mut on_new_frame = |handle: DynDecodedHandle<BufferDescriptor>| {
+        if args.output.is_some() || args.compute_md5.is_some() || args.golden.is_some() {
+            handle.sync().unwrap();
+            let picture = handle.dyn_picture();
+            let mut handle = picture.dyn_mappable_handle().unwrap();
+            let buffer_size = handle.image_size();
+            let mut frame_data = vec![0; buffer_size];
+            handle.read(&mut frame_data).unwrap();
+
+            if args.multiple_output_files {
+                let file_name = decide_output_file_name(
+                    args.output
+                        .as_ref()
+                        .expect("multiple_output_files need output to be set"),
+                    output_filename_idx,
+                );
+
+                let mut output = File::create(file_name).expect("error creating output file");
+                output_filename_idx += 1;
+                output
+                    .write_all(&frame_data)
+                    .expect("failed to write to output file");
+            } else if let Some(output) = &mut output {
+                output
+                    .write_all(&frame_data)
+                    .expect("failed to write to output file");
+            }
+
+            let frame_md5: String = if need_per_frame_md5 {
+                md5_digest(&frame_data)
+            } else {
+                "".to_string()
+            };
+
+            match args.compute_md5 {
+                None => (),
+                Some(Md5Computation::Frame) => println!("{}", frame_md5),
+                Some(Md5Computation::Stream) => md5_context.consume(&frame_data),
+            }
+
+            if args.golden.is_some() {
+                assert_eq!(&frame_md5, golden_iter.next().unwrap());
+            }
+        }
+    };
+
+    simple_playback_loop(
+        decoder.as_mut(),
+        frame_iter,
+        &mut on_new_frame,
+        &mut |stream_info, nb_frames| {
+            Ok(match args.frame_memory {
+                FrameMemoryType::Managed => {
+                    simple_playback_loop_owned_frames(stream_info, nb_frames)?
+                        .into_iter()
+                        .map(BufferDescriptor::Managed)
+                        .collect()
+                }
+                FrameMemoryType::Prime => simple_playback_loop_prime_frames(
+                    gbm.as_ref().unwrap(),
+                    stream_info,
+                    nb_frames,
+                )?
+                .into_iter()
+                .map(BufferDescriptor::Dmabuf)
+                .collect(),
+                FrameMemoryType::User => {
+                    simple_playback_loop_userptr_frames(stream_info, nb_frames)?
+                        .into_iter()
+                        .map(BufferDescriptor::User)
+                        .collect()
+                }
+            })
+        },
+        args.output_format,
+        blocking_mode,
+    )
+    .expect("error during playback loop");
+
+    if let Some(Md5Computation::Stream) = args.compute_md5 {
+        println!("{}", md5_context.flush());
+    }
+}
diff --git a/examples/ccenc/main.rs b/examples/ccenc/main.rs
new file mode 100644
index 0000000..8563827
--- /dev/null
+++ b/examples/ccenc/main.rs
@@ -0,0 +1,28 @@
+// Copyright 2024 The ChromiumOS Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+use std::fs::File;
+
+mod util;
+use util::Args;
+
+#[cfg(feature = "vaapi")]
+mod vaapi_encoder;
+#[cfg(feature = "vaapi")]
+use vaapi_encoder::do_encode;
+
+#[cfg(feature = "v4l2")]
+mod v4l2_stateful_encoder;
+#[cfg(feature = "v4l2")]
+use v4l2_stateful_encoder::do_encode;
+
+fn main() {
+    env_logger::init();
+
+    let args: Args = argh::from_env();
+
+    let input = File::open(&args.input).expect("error opening input file");
+
+    do_encode(input, args);
+}
diff --git a/examples/ccenc/util.rs b/examples/ccenc/util.rs
new file mode 100644
index 0000000..ab7558a
--- /dev/null
+++ b/examples/ccenc/util.rs
@@ -0,0 +1,87 @@
+// Copyright 2024 The ChromiumOS Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+use std::path::PathBuf;
+use std::str::FromStr;
+
+use argh::FromArgs;
+
+use cros_codecs::DecodedFormat;
+
+#[derive(Debug, PartialEq, Eq, Copy, Clone, Default)]
+pub enum Codec {
+    #[default]
+    H264,
+    H265,
+    VP8,
+    VP9,
+    AV1,
+}
+
+impl FromStr for Codec {
+    type Err = &'static str;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "h264" | "H264" => Ok(Self::H264),
+            "h265" | "H265" => Ok(Self::H265),
+            "vp8" | "VP8" => Ok(Self::VP8),
+            "vp9" | "VP9" => Ok(Self::VP9),
+            "av1" | "AV1" => Ok(Self::AV1),
+            _ => Err("unrecognized codec. Valid values: h264, h265, vp8, vp9, av1"),
+        }
+    }
+}
+
+/// Simple encoder
+#[derive(Debug, FromArgs)]
+pub struct Args {
+    /// input file
+    #[argh(positional)]
+    pub input: PathBuf,
+
+    /// input frames width
+    #[argh(option)]
+    pub width: u32,
+
+    /// input frames height
+    #[argh(option)]
+    pub height: u32,
+
+    /// input frame coded width
+    #[argh(option)]
+    pub coded_width: Option<u32>,
+
+    /// input frame coded height
+    #[argh(option)]
+    pub coded_height: Option<u32>,
+
+    /// input frames count
+    #[argh(option)]
+    pub count: usize,
+
+    /// input fourcc
+    #[argh(option)]
+    pub fourcc: DecodedFormat,
+
+    /// codec
+    #[argh(option)]
+    pub codec: Option<Codec>,
+
+    /// framerate
+    #[argh(option, default = "30")]
+    pub framerate: u32,
+
+    /// bitrate
+    #[argh(option, default = "200000")]
+    pub bitrate: u64,
+
+    /// output file to write the decoded frames to
+    #[argh(option)]
+    pub output: Option<PathBuf>,
+
+    /// set to true if low power version of the API shall be used
+    #[argh(switch)]
+    pub low_power: bool,
+}
diff --git a/examples/ccenc/v4l2_stateful_encoder.rs b/examples/ccenc/v4l2_stateful_encoder.rs
new file mode 100644
index 0000000..e261a0c
--- /dev/null
+++ b/examples/ccenc/v4l2_stateful_encoder.rs
@@ -0,0 +1,396 @@
+// Copyright 2024 The ChromiumOS Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+use std::fs::File;
+use std::io::Write;
+use std::os::unix::prelude::FileExt;
+use std::sync::Arc;
+
+use cros_codecs::backend::v4l2::encoder::find_device_with_capture;
+use cros_codecs::backend::v4l2::encoder::v4l2_format_to_frame_layout;
+use cros_codecs::backend::v4l2::encoder::EncoderCodec;
+use cros_codecs::backend::v4l2::encoder::MmapingCapture;
+use cros_codecs::backend::v4l2::encoder::OutputBuffer;
+use cros_codecs::backend::v4l2::encoder::OutputBufferHandle;
+use cros_codecs::backend::v4l2::encoder::V4L2Backend;
+use cros_codecs::bitstream_utils::IvfFileHeader;
+use cros_codecs::bitstream_utils::IvfFrameHeader;
+use cros_codecs::encoder::simple_encode_loop;
+use cros_codecs::encoder::stateful::h264::v4l2::V4L2StatefulH264Encoder;
+use cros_codecs::encoder::stateful::h265::v4l2::V4L2StatefulH265Encoder;
+use cros_codecs::encoder::stateful::vp8::v4l2::V4L2StatefulVP8Encoder;
+use cros_codecs::encoder::stateful::vp9::v4l2::V4L2StatefulVP9Encoder;
+use cros_codecs::encoder::stateful::StatefulEncoder;
+use cros_codecs::encoder::CodedBitstreamBuffer;
+use cros_codecs::encoder::FrameMetadata;
+use cros_codecs::encoder::RateControl;
+use cros_codecs::encoder::Tunings;
+use cros_codecs::image_processing::extend_border_nv12;
+use cros_codecs::image_processing::i420_to_nv12_chroma;
+use cros_codecs::image_processing::nv12_copy;
+use cros_codecs::DecodedFormat;
+use cros_codecs::Fourcc;
+use cros_codecs::FrameLayout;
+use cros_codecs::Resolution;
+
+use v4l2r::device::Device;
+use v4l2r::device::DeviceConfig;
+use v4l2r::memory::MmapHandle;
+
+use crate::util::Args;
+use crate::util::Codec;
+
+// "Handle" abstraction for this particular use case. All Encoders take a "Handle" generic type
+// that implements the OutputBufferHandle trait, which basically just tells the Encoder how the
+// frame data is going to be loaded into the V4L2 output buffers. This is where we add the code to
+// load the frames from the disk. Technically we could do the disk load in DiskFrameReader and
+// then pass regular u8 buffers to |queue()|, but this way avoids a copy.
+struct MmapNM12Frame<'a> {
+    resolution: Resolution,
+    file: &'a File,
+    input_fourcc: DecodedFormat,
+    pos: u64,
+    input_coded_resolution: Resolution,
+    queue_layout: FrameLayout,
+}
+
+impl OutputBufferHandle for MmapNM12Frame<'_> {
+    type PrimitiveBufferHandles = Vec<MmapHandle>;
+
+    fn queue(self, buffer: OutputBuffer<'_, Self::PrimitiveBufferHandles>) -> anyhow::Result<()> {
+        let mut input_y = vec![0u8; self.input_coded_resolution.get_area()];
+        let mut input_uv = vec![0u8; self.input_coded_resolution.get_area() / 2];
+
+        // Use |read_at()| instead of |read()| so we don't need to take a mutable reference to the
+        // File. We don't know how many in flight OutputBufferHandles will be created in advance,
+        // and we can only mutably borrow once. We could get around this with an Rc RefCell or an
+        // Arc if we decide we need to use |read()| because we want to support non-POSIX platforms.
+        assert_eq!(
+            self.file
+                .read_at(input_y.as_mut_slice(), self.pos)
+                .expect("Unexpected EOF!"),
+            self.input_coded_resolution.get_area()
+        );
+
+        match self.input_fourcc {
+            DecodedFormat::NV12 => {
+                assert_eq!(
+                    self.file
+                        .read_at(
+                            input_uv.as_mut_slice(),
+                            self.pos + self.input_coded_resolution.get_area() as u64
+                        )
+                        .expect("Unexpected EOF!"),
+                    self.input_coded_resolution.get_area() / 2
+                );
+            }
+            DecodedFormat::I420 => {
+                let mut input_u = vec![0u8; self.input_coded_resolution.get_area() / 4];
+                let mut input_v = vec![0u8; self.input_coded_resolution.get_area() / 4];
+                assert_eq!(
+                    self.file
+                        .read_at(
+                            input_u.as_mut_slice(),
+                            self.pos + self.input_coded_resolution.get_area() as u64
+                        )
+                        .expect("Unexpected EOF!"),
+                    self.input_coded_resolution.get_area() / 4
+                );
+                assert_eq!(
+                    self.file
+                        .read_at(
+                            input_v.as_mut_slice(),
+                            self.pos + self.input_coded_resolution.get_area() as u64 * 5 / 4
+                        )
+                        .expect("Unexpected EOF!"),
+                    self.input_coded_resolution.get_area() / 4
+                );
+                i420_to_nv12_chroma(
+                    input_u.as_slice(),
+                    input_v.as_slice(),
+                    input_uv.as_mut_slice(),
+                );
+            }
+            _ => panic!("Unsupported input format!"),
+        };
+
+        let mut y_plane = buffer.get_plane_mapping(0).unwrap();
+        let mut uv_plane = buffer.get_plane_mapping(1).unwrap();
+        nv12_copy(
+            input_y.as_slice(),
+            self.input_coded_resolution.width as usize,
+            y_plane.as_mut(),
+            self.queue_layout.planes[0].stride,
+            input_uv.as_slice(),
+            self.input_coded_resolution.width as usize,
+            uv_plane.as_mut(),
+            self.queue_layout.planes[1].stride,
+            self.resolution.width as usize,
+            self.resolution.height as usize,
+        );
+        extend_border_nv12(
+            y_plane.as_mut(),
+            uv_plane.as_mut(),
+            self.resolution.width as usize,
+            self.resolution.height as usize,
+            self.queue_layout.planes[0].stride as usize,
+            self.queue_layout.size.height as usize,
+        );
+
+        buffer.queue(&[y_plane.len(), uv_plane.len()])?;
+        Ok(())
+    }
+}
+
+// Generator for MmapNM12Frames. Note that we do the actual loading from disk in |queue()|, this
+// just basically just keeps track of offsets.
+struct DiskFrameReader<'a> {
+    file: &'a File,
+    input_fourcc: DecodedFormat,
+    visible_size: Resolution,
+    input_coded_size: Resolution,
+    layout: FrameLayout,
+    pos: u64,
+    frame_num: usize,
+    total_frames: usize,
+}
+
+impl<'a> Iterator for DiskFrameReader<'a> {
+    type Item = (FrameMetadata, MmapNM12Frame<'a>);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.frame_num >= self.total_frames {
+            return None;
+        }
+
+        let meta = FrameMetadata {
+            timestamp: self.frame_num as u64,
+            layout: self.layout.clone(),
+            force_keyframe: false,
+        };
+
+        let handle = MmapNM12Frame {
+            resolution: self.visible_size,
+            file: &self.file,
+            input_fourcc: self.input_fourcc.clone(),
+            pos: self.pos,
+            input_coded_resolution: self.input_coded_size,
+            queue_layout: self.layout.clone(),
+        };
+
+        self.frame_num += 1;
+        // The 3/2 is an implicit assumption about 4:2:0 subsampling. We probably don't need to
+        // support other subsampling methods, but if we do, make sure to change this line!
+        self.pos += self.input_coded_size.get_area() as u64 * 3 / 2;
+
+        Some((meta, handle))
+    }
+}
+
+impl<'a> DiskFrameReader<'_> {
+    pub fn new(
+        file: &'a File,
+        input_fourcc: DecodedFormat,
+        visible_size: Resolution,
+        input_coded_size: Resolution,
+        layout: FrameLayout,
+        total_frames: usize,
+    ) -> DiskFrameReader<'a> {
+        DiskFrameReader {
+            file: file,
+            input_fourcc: input_fourcc,
+            visible_size: visible_size,
+            input_coded_size: input_coded_size,
+            layout: layout,
+            pos: 0,
+            frame_num: 0,
+            total_frames: total_frames,
+        }
+    }
+}
+
+// V4L2 stateful decoders are all of the form "StatefulEncoder<Handle, V4L2Backend<Handle,
+// CaptureBufferz, Codec>>". Since we know that all the encoders in this file are going to be V4L2
+// stateful and we know the Handle type is going to be MmapNM12Frame, we can alias this type a
+// little bit to make the signature smaller.
+type MmapEncoder<'a, Codec> =
+    StatefulEncoder<MmapNM12Frame<'a>, V4L2Backend<MmapNM12Frame<'a>, MmapingCapture, Codec>>;
+
+fn codec_to_pixelformat(codec: Codec) -> v4l2r::PixelFormat {
+    match codec {
+        Codec::H264 => v4l2r::PixelFormat::from_fourcc(b"H264"),
+        Codec::H265 => v4l2r::PixelFormat::from_fourcc(b"HEVC"),
+        Codec::VP9 => v4l2r::PixelFormat::from_fourcc(b"VP90"),
+        Codec::VP8 => v4l2r::PixelFormat::from_fourcc(b"VP80"),
+        _ => panic!("Unsupported format!"),
+    }
+}
+
+fn codec_to_ivf_magic(codec: Codec) -> [u8; 4] {
+    match codec {
+        // Note that H264 does not generally use IVF containers.
+        Codec::VP8 => IvfFileHeader::CODEC_VP8,
+        Codec::VP9 => IvfFileHeader::CODEC_VP9,
+        _ => panic!("Unsupported format!"),
+    }
+}
+
+fn do_encode_loop<'a, Codecz>(
+    mut encoder: MmapEncoder<'a, Codecz>,
+    input: &'a File,
+    args: Args,
+) -> ()
+where
+    V4L2Backend<MmapNM12Frame<'a>, MmapingCapture, Codecz>: EncoderCodec,
+{
+    // This is the part where we G_FMT to get the actual dimensions of the queue buffers.
+    let layout = v4l2_format_to_frame_layout(&encoder.backend().output_format().unwrap());
+
+    let mut frame_reader = DiskFrameReader::new(
+        &input,
+        args.fourcc,
+        (args.width, args.height).into(),
+        (
+            args.coded_width.unwrap_or(args.width),
+            args.coded_height.unwrap_or(args.height),
+        )
+            .into(),
+        layout,
+        args.count,
+    );
+
+    let codec = args.codec.unwrap_or_default();
+    let output_file = args.output.map(|path| {
+        let mut output = File::create(path).expect("Error opening output file!");
+
+        if codec != Codec::H264 {
+            let hdr = IvfFileHeader::new(
+                codec_to_ivf_magic(codec),
+                args.width as u16,
+                args.height as u16,
+                args.framerate,
+                args.count as u32,
+            );
+            hdr.writo_into(&mut output)
+                .expect("Error writing IVF file header!");
+        }
+
+        output
+    });
+
+    // Unwrapping an optional takes ownership of it, so we do that outside of the lambda so we
+    // don't violate FnMut's lifetime requirements.
+    match output_file {
+        Some(mut output_file) => {
+            let frame_consumer = |coded_chunk: CodedBitstreamBuffer| {
+                if codec != Codec::H264 {
+                    let hdr = IvfFrameHeader {
+                        timestamp: coded_chunk.metadata.timestamp,
+                        frame_size: coded_chunk.bitstream.len() as u32,
+                    };
+                    hdr.writo_into(&mut output_file)
+                        .expect("Error writing IVF frame header!");
+                }
+
+                let _ = output_file
+                    .write(&coded_chunk.bitstream[..])
+                    .expect("Error writing output file!");
+            };
+            simple_encode_loop(&mut encoder, &mut frame_reader, frame_consumer)
+                .expect("Failed to encode!");
+        }
+        None => {
+            simple_encode_loop(&mut encoder, &mut frame_reader, |_| ()).expect("Failed to encode!")
+        }
+    };
+}
+
+pub fn do_encode(input: File, args: Args) -> () {
+    let codec = args.codec.unwrap_or_default();
+    let device = find_device_with_capture(codec_to_pixelformat(codec))
+        .expect("Could not find an encoder for codec");
+    let device = Device::open(&device, DeviceConfig::new().non_blocking_dqbuf()).expect("open");
+    let device = Arc::new(device);
+
+    let resolution = Resolution {
+        width: args.width,
+        height: args.height,
+    };
+    let queue_fourcc = Fourcc::from(b"NM12");
+    let tunings: Tunings = Tunings {
+        rate_control: RateControl::ConstantBitrate(args.bitrate),
+        framerate: args.framerate,
+        ..Default::default()
+    };
+
+    match codec {
+        Codec::H264 => do_encode_loop(
+            V4L2StatefulH264Encoder::new(
+                device,
+                MmapingCapture,
+                cros_codecs::encoder::h264::EncoderConfig {
+                    resolution: resolution.clone(),
+                    initial_tunings: tunings.clone(),
+                    ..Default::default()
+                },
+                queue_fourcc,
+                resolution,
+                tunings,
+            )
+            .expect("Failed to create encoder"),
+            &input,
+            args,
+        ),
+        Codec::H265 => do_encode_loop(
+            V4L2StatefulH265Encoder::new(
+                device,
+                MmapingCapture,
+                cros_codecs::encoder::h265::EncoderConfig {
+                    resolution: resolution.clone(),
+                    ..Default::default()
+                },
+                queue_fourcc,
+                resolution,
+                tunings,
+            )
+            .expect("Failed to create encoder"),
+            &input,
+            args,
+        ),
+        Codec::VP8 => do_encode_loop(
+            V4L2StatefulVP8Encoder::new(
+                device,
+                MmapingCapture,
+                cros_codecs::encoder::vp8::EncoderConfig {
+                    resolution: resolution.clone(),
+                    ..Default::default()
+                },
+                queue_fourcc,
+                resolution,
+                tunings,
+            )
+            .expect("Failed to create encoder"),
+            &input,
+            args,
+        ),
+        Codec::VP9 => do_encode_loop(
+            V4L2StatefulVP9Encoder::new(
+                device,
+                MmapingCapture,
+                cros_codecs::encoder::vp9::EncoderConfig {
+                    resolution: resolution.clone(),
+                    initial_tunings: tunings.clone(),
+                    ..Default::default()
+                },
+                queue_fourcc,
+                resolution,
+                tunings,
+            )
+            .expect("Failed to create encoder"),
+            &input,
+            args,
+        ),
+        _ => panic!("Unsupported format!"),
+    };
+}
diff --git a/examples/ccenc/vaapi_encoder.rs b/examples/ccenc/vaapi_encoder.rs
new file mode 100644
index 0000000..f3596bd
--- /dev/null
+++ b/examples/ccenc/vaapi_encoder.rs
@@ -0,0 +1,318 @@
+// Copyright 2024 The ChromiumOS Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+use std::borrow::Borrow;
+use std::fs::File;
+use std::io::Read;
+use std::io::Write;
+use std::rc::Rc;
+
+use cros_codecs::backend::vaapi::surface_pool::PooledVaSurface;
+use cros_codecs::backend::vaapi::surface_pool::VaSurfacePool;
+use cros_codecs::bitstream_utils::IvfFileHeader;
+use cros_codecs::bitstream_utils::IvfFrameHeader;
+use cros_codecs::decoder::FramePool;
+use cros_codecs::encoder::av1::EncoderConfig as AV1EncoderConfig;
+use cros_codecs::encoder::h264::EncoderConfig as H264EncoderConfig;
+use cros_codecs::encoder::stateless::av1;
+use cros_codecs::encoder::stateless::h264;
+use cros_codecs::encoder::stateless::vp9;
+use cros_codecs::encoder::vp9::EncoderConfig as VP9EncoderConfig;
+use cros_codecs::encoder::FrameMetadata;
+use cros_codecs::encoder::RateControl;
+use cros_codecs::encoder::Tunings;
+use cros_codecs::encoder::VideoEncoder;
+use cros_codecs::image_processing::extend_border_nv12;
+use cros_codecs::image_processing::i420_to_nv12_chroma;
+use cros_codecs::image_processing::nv12_copy;
+use cros_codecs::BlockingMode;
+use cros_codecs::DecodedFormat;
+use cros_codecs::Fourcc;
+use cros_codecs::FrameLayout;
+use cros_codecs::PlaneLayout;
+use cros_codecs::Resolution;
+
+use crate::util::Args;
+use crate::util::Codec;
+
+fn upload_img<M: libva::SurfaceMemoryDescriptor>(
+    display: &Rc<libva::Display>,
+    surface: &libva::Surface<M>,
+    resolution: Resolution,
+    input_coded_resolution: Resolution,
+    data: &[u8],
+    input_fourcc: DecodedFormat,
+) -> FrameLayout {
+    let input_y = &data[0..input_coded_resolution.get_area()];
+    let mut tmp_input_uv: Vec<u8> = Vec::new();
+    let input_uv = match input_fourcc {
+        DecodedFormat::NV12 => {
+            &data[input_coded_resolution.get_area()..(input_coded_resolution.get_area() * 3 / 2)]
+        }
+        DecodedFormat::I420 => {
+            tmp_input_uv.resize(input_coded_resolution.get_area() / 2, 0);
+            let input_u = &data
+                [input_coded_resolution.get_area()..(input_coded_resolution.get_area() * 5 / 4)];
+            let input_v = &data[(input_coded_resolution.get_area() * 5 / 4)
+                ..(input_coded_resolution.get_area() * 3 / 2)];
+            i420_to_nv12_chroma(input_u, input_v, tmp_input_uv.as_mut_slice());
+            tmp_input_uv.as_slice()
+        }
+        _ => panic!("Unsupported input format!"),
+    };
+
+    let image_fmts = display.query_image_formats().unwrap();
+    let image_fmt = image_fmts
+        .into_iter()
+        .find(|f| f.fourcc == libva::VA_FOURCC_NV12)
+        .unwrap();
+    let mut image = libva::Image::create_from(
+        surface,
+        image_fmt,
+        (resolution.width, resolution.height),
+        (resolution.width, resolution.height),
+    )
+    .unwrap();
+    let va_image = *image.image();
+    let dst = image.as_mut();
+    let (dst_y, dst_uv) =
+        (&mut dst[va_image.offsets[0] as usize..]).split_at_mut(va_image.offsets[1] as usize);
+
+    nv12_copy(
+        input_y,
+        input_coded_resolution.width as usize,
+        dst_y,
+        va_image.pitches[0] as usize,
+        input_uv,
+        input_coded_resolution.width as usize,
+        dst_uv,
+        va_image.pitches[1] as usize,
+        resolution.width as usize,
+        resolution.height as usize,
+    );
+    extend_border_nv12(
+        dst_y,
+        dst_uv,
+        resolution.width as usize,
+        resolution.height as usize,
+        va_image.pitches[0] as usize,
+        va_image.height as usize,
+    );
+
+    drop(image);
+
+    surface.sync().unwrap();
+
+    FrameLayout {
+        format: (Fourcc::from(b"NV12"), 0),
+        size: resolution,
+        planes: vec![
+            PlaneLayout {
+                buffer_index: 0,
+                offset: va_image.offsets[0] as usize,
+                stride: va_image.pitches[0] as usize,
+            },
+            PlaneLayout {
+                buffer_index: 0,
+                offset: va_image.offsets[1] as usize,
+                stride: va_image.pitches[1] as usize,
+            },
+        ],
+    }
+}
+
+fn new_h264_vaapi_encoder(
+    args: &Args,
+    display: &Rc<libva::Display>,
+) -> Box<dyn VideoEncoder<PooledVaSurface<()>>> {
+    let resolution = Resolution {
+        width: args.width,
+        height: args.height,
+    };
+
+    let config = H264EncoderConfig {
+        resolution,
+        initial_tunings: Tunings {
+            framerate: args.framerate,
+            rate_control: RateControl::ConstantBitrate(args.bitrate),
+            ..Default::default()
+        },
+        ..Default::default()
+    };
+
+    let fourcc = b"NV12".into();
+    let encoder = h264::StatelessEncoder::new_vaapi(
+        Rc::clone(display),
+        config,
+        fourcc,
+        resolution,
+        args.low_power,
+        BlockingMode::Blocking,
+    )
+    .expect("Unable to create encoder");
+
+    Box::new(encoder)
+}
+
+fn new_vp9_vaapi_encoder(
+    args: &Args,
+    display: &Rc<libva::Display>,
+) -> Box<dyn VideoEncoder<PooledVaSurface<()>>> {
+    let resolution = Resolution {
+        width: args.width,
+        height: args.height,
+    };
+
+    let config = VP9EncoderConfig {
+        resolution,
+        initial_tunings: Tunings {
+            framerate: args.framerate,
+            rate_control: RateControl::ConstantBitrate(args.bitrate),
+            ..Default::default()
+        },
+        ..Default::default()
+    };
+
+    let fourcc = b"NV12".into();
+    let encoder = vp9::StatelessEncoder::new_vaapi(
+        Rc::clone(display),
+        config,
+        fourcc,
+        resolution,
+        args.low_power,
+        BlockingMode::Blocking,
+    )
+    .expect("Unable to create encoder");
+
+    Box::new(encoder)
+}
+
+fn new_av1_vaapi_encoder(
+    args: &Args,
+    display: &Rc<libva::Display>,
+) -> Box<dyn VideoEncoder<PooledVaSurface<()>>> {
+    let resolution = Resolution {
+        width: args.width,
+        height: args.height,
+    };
+
+    let config = AV1EncoderConfig {
+        resolution,
+        initial_tunings: Tunings {
+            framerate: args.framerate,
+            rate_control: RateControl::ConstantBitrate(args.bitrate),
+            ..Default::default()
+        },
+        ..Default::default()
+    };
+
+    let fourcc = b"NV12".into();
+    let encoder = av1::StatelessEncoder::new_vaapi(
+        Rc::clone(display),
+        config,
+        fourcc,
+        resolution,
+        args.low_power,
+        BlockingMode::Blocking,
+    )
+    .expect("Unable to create encoder");
+
+    Box::new(encoder)
+}
+
+pub fn do_encode(mut input: File, args: Args) -> () {
+    let display = libva::Display::open().unwrap();
+
+    let codec = args.codec.unwrap_or_default();
+
+    let mut encoder = match codec {
+        Codec::H264 => new_h264_vaapi_encoder(&args, &display),
+        Codec::VP9 => new_vp9_vaapi_encoder(&args, &display),
+        Codec::AV1 => new_av1_vaapi_encoder(&args, &display),
+        _ => panic!("Unsupported format!"),
+    };
+
+    let mut pool = VaSurfacePool::new(
+        Rc::clone(&display),
+        libva::VA_RT_FORMAT_YUV420,
+        Some(libva::UsageHint::USAGE_HINT_ENCODER),
+        Resolution {
+            width: args.width,
+            height: args.height,
+        },
+    );
+
+    pool.add_frames(vec![(); 16]).unwrap();
+
+    let coded_width = args.coded_width.unwrap_or(args.width);
+    let coded_height = args.coded_height.unwrap_or(args.height);
+    let coded_frame_size: usize = (coded_width * coded_height * 3 / 2) as usize;
+
+    let mut output = args.output.map(|output| File::create(output).unwrap());
+
+    if let Some(ref mut output) = output {
+        if codec == Codec::VP9 {
+            let hdr = IvfFileHeader::new(
+                IvfFileHeader::CODEC_VP9,
+                args.width as u16,
+                args.height as u16,
+                args.framerate,
+                args.count as u32,
+            );
+            hdr.writo_into(output).unwrap();
+        }
+    }
+
+    let mut buf = vec![0u8; coded_frame_size];
+    for i in 0..args.count {
+        input.read_exact(&mut buf[..]).unwrap();
+        let handle = pool.get_surface().unwrap();
+        let layout = upload_img(
+            &display,
+            handle.borrow(),
+            (args.width, args.height).into(),
+            (coded_width, coded_height).into(),
+            &buf[..],
+            args.fourcc,
+        );
+
+        let input_frame = FrameMetadata {
+            layout,
+            timestamp: i as u64,
+            force_keyframe: false,
+        };
+
+        encoder.encode(input_frame, handle).unwrap();
+        while let Some(coded) = encoder.poll().unwrap() {
+            if let Some(ref mut output) = output {
+                if codec == Codec::VP9 {
+                    let hdr = IvfFrameHeader {
+                        timestamp: coded.metadata.timestamp,
+                        frame_size: coded.bitstream.len() as u32,
+                    };
+
+                    hdr.writo_into(output).unwrap();
+                }
+
+                output.write_all(&coded.bitstream).unwrap();
+            }
+        }
+    }
+
+    encoder.drain().unwrap();
+    while let Some(coded) = encoder.poll().unwrap() {
+        if let Some(ref mut output) = output {
+            if codec == Codec::VP9 {
+                let hdr = IvfFrameHeader {
+                    timestamp: coded.metadata.timestamp,
+                    frame_size: coded.bitstream.len() as u32,
+                };
+
+                hdr.writo_into(output).unwrap();
+            }
+
+            output.write_all(&coded.bitstream).unwrap();
+        }
+    }
+}
diff --git a/src/backend.rs b/src/backend.rs
index d54fb92..a3eec8f 100644
--- a/src/backend.rs
+++ b/src/backend.rs
@@ -10,7 +10,7 @@
 
 #[cfg(any(test, fuzzing))]
 pub(crate) mod dummy;
-#[cfg(feature = "v4l2")]
+#[cfg(any(feature = "v4l2"))]
 pub mod v4l2;
 #[cfg(feature = "vaapi")]
 pub mod vaapi;
diff --git a/src/backend/v4l2.rs b/src/backend/v4l2.rs
index c607540..5de8778 100644
--- a/src/backend/v4l2.rs
+++ b/src/backend/v4l2.rs
@@ -2,8 +2,10 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
-//! V4L2 backend for stateful encoders.
+//! V4L2 backend
 
+pub mod decoder;
+#[cfg(feature = "v4l2")]
 pub mod encoder;
 
 impl From<v4l2r::PixelFormat> for crate::Fourcc {
diff --git a/src/backend/v4l2/decoder.rs b/src/backend/v4l2/decoder.rs
new file mode 100644
index 0000000..a520dd7
--- /dev/null
+++ b/src/backend/v4l2/decoder.rs
@@ -0,0 +1,18 @@
+// Copyright 2024 The ChromiumOS Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#[cfg(feature = "v4l2")]
+use crate::Resolution;
+
+pub mod stateless;
+
+pub trait V4l2StreamInfo {
+    /// Returns the minimum number of surfaces required to decode the stream.
+    // name was chosen to match vaapi
+    fn min_num_frames(&self) -> usize;
+    /// Returns the coded size of the surfaces required to decode the stream.
+    fn coded_size(&self) -> Resolution;
+    /// Returns the visible rectangle within the coded size for the stream.
+    fn visible_rect(&self) -> ((u32, u32), (u32, u32));
+}
diff --git a/src/backend/v4l2/decoder/stateless.rs b/src/backend/v4l2/decoder/stateless.rs
new file mode 100644
index 0000000..6e3e940
--- /dev/null
+++ b/src/backend/v4l2/decoder/stateless.rs
@@ -0,0 +1,214 @@
+// Copyright 2024 The ChromiumOS Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+use std::cell::RefCell;
+use std::rc::Rc;
+
+use crate::backend::v4l2::decoder::V4l2StreamInfo;
+use crate::decoder::stateless::PoolLayer;
+use crate::decoder::stateless::StatelessCodec;
+use crate::decoder::stateless::StatelessDecoderBackend;
+use crate::decoder::stateless::TryFormat;
+use crate::decoder::DecodedHandle;
+use crate::decoder::DynHandle;
+use crate::decoder::FramePool;
+use crate::decoder::MappableHandle;
+use crate::decoder::StreamInfo;
+use crate::DecodedFormat;
+use crate::Resolution;
+
+use crate::device::v4l2::stateless::device::V4l2Device;
+use crate::device::v4l2::stateless::request::V4l2Request;
+
+pub struct V4l2Picture {
+    request: V4l2Request,
+    // To properly decode stream while output and capture queues
+    // are processed independently it's required for v4l2 backend
+    // to maintain DPB buffer recycling. The following vector
+    // is used to prevent reference pictures to be reused while
+    // current picture is still being decoded.
+    // TODO: handle ref list inernally by V4l2Request.
+    ref_pictures: Option<Vec<Rc<RefCell<V4l2Picture>>>>,
+}
+
+impl V4l2Picture {
+    pub fn new(request: V4l2Request) -> Self {
+        Self {
+            request,
+            ref_pictures: None,
+        }
+    }
+    pub fn timestamp(&self) -> u64 {
+        self.request.timestamp()
+    }
+    pub fn set_ref_pictures(&mut self, ref_pictures: Vec<Rc<RefCell<V4l2Picture>>>) -> &mut Self {
+        self.ref_pictures = Some(ref_pictures);
+        self
+    }
+    pub fn sync(&mut self) -> &mut Self {
+        self.request.sync();
+        self.ref_pictures = None;
+        self
+    }
+    pub fn request(&mut self) -> &mut V4l2Request {
+        &mut self.request
+    }
+}
+
+impl<'a> MappableHandle for std::cell::Ref<'a, V4l2Picture> {
+    fn read(&mut self, data: &mut [u8]) -> anyhow::Result<()> {
+        self.request.result().read(data);
+        Ok(())
+    }
+    fn image_size(&mut self) -> usize {
+        self.request.result().length()
+    }
+}
+
+pub struct BackendHandle {
+    pub picture: Rc<RefCell<V4l2Picture>>,
+}
+
+impl<'a> DynHandle for std::cell::Ref<'a, BackendHandle> {
+    fn dyn_mappable_handle<'b>(&'b self) -> anyhow::Result<Box<dyn MappableHandle + 'b>> {
+        self.picture.borrow_mut().sync();
+        Ok(Box::new(self.picture.borrow()))
+    }
+}
+
+pub struct V4l2StatelessDecoderHandle {
+    pub handle: Rc<RefCell<BackendHandle>>,
+}
+
+impl Clone for V4l2StatelessDecoderHandle {
+    fn clone(&self) -> Self {
+        Self {
+            handle: Rc::clone(&self.handle),
+        }
+    }
+}
+
+impl DecodedHandle for V4l2StatelessDecoderHandle {
+    type Descriptor = ();
+
+    fn coded_resolution(&self) -> Resolution {
+        todo!();
+    }
+
+    fn display_resolution(&self) -> Resolution {
+        todo!();
+    }
+
+    fn timestamp(&self) -> u64 {
+        self.handle.borrow().picture.borrow().timestamp()
+    }
+
+    fn dyn_picture<'a>(&'a self) -> Box<dyn DynHandle + 'a> {
+        Box::new(self.handle.borrow())
+    }
+
+    fn sync(&self) -> anyhow::Result<()> {
+        Ok(())
+    }
+
+    fn is_ready(&self) -> bool {
+        todo!();
+    }
+
+    fn resource(&self) -> std::cell::Ref<()> {
+        todo!();
+    }
+}
+
+pub struct V4l2StatelessDecoderBackend {
+    pub device: V4l2Device,
+    stream_info: StreamInfo,
+}
+
+impl V4l2StatelessDecoderBackend {
+    pub fn new() -> Self {
+        Self {
+            device: V4l2Device::new(),
+            stream_info: StreamInfo {
+                format: DecodedFormat::I420,
+                min_num_frames: 0,
+                coded_resolution: Resolution::from((0, 0)),
+                display_resolution: Resolution::from((0, 0)),
+            },
+        }
+    }
+}
+
+impl FramePool for V4l2StatelessDecoderBackend {
+    type Descriptor = ();
+
+    fn coded_resolution(&self) -> Resolution {
+        todo!();
+    }
+
+    fn set_coded_resolution(&mut self, _resolution: Resolution) {
+        todo!();
+    }
+
+    fn add_frames(&mut self, _descriptors: Vec<Self::Descriptor>) -> Result<(), anyhow::Error> {
+        todo!();
+    }
+
+    fn num_free_frames(&self) -> usize {
+        self.device.num_free_buffers()
+    }
+
+    fn num_managed_frames(&self) -> usize {
+        self.device.num_buffers()
+    }
+
+    fn clear(&mut self) {
+        todo!();
+    }
+}
+
+impl<Codec: StatelessCodec> TryFormat<Codec> for V4l2StatelessDecoderBackend
+where
+    for<'a> &'a Codec::FormatInfo: V4l2StreamInfo,
+{
+    fn try_format(
+        &mut self,
+        format_info: &Codec::FormatInfo,
+        format: DecodedFormat,
+    ) -> anyhow::Result<()> {
+        // TODO
+        // VIDIOC_S/G_FMT has been called on both output and capture buffers.
+        // The VAAPI implementation looks to do actual format checking here.
+        // The values provided here are directly from the codec (modulo format).
+        // Hardware may handle this differently, i.e. buffer padding.
+        self.stream_info.format = format;
+        let visible_rect = format_info.visible_rect();
+
+        let display_resolution = Resolution {
+            width: visible_rect.1 .0 - visible_rect.0 .0,
+            height: visible_rect.1 .1 - visible_rect.0 .1,
+        };
+
+        self.stream_info.min_num_frames = format_info.min_num_frames();
+        self.stream_info.coded_resolution = format_info.coded_size();
+        self.stream_info.display_resolution = display_resolution;
+        Ok(())
+    }
+}
+
+impl StatelessDecoderBackend for V4l2StatelessDecoderBackend {
+    type Handle = V4l2StatelessDecoderHandle;
+
+    type FramePool = Self;
+
+    fn stream_info(&self) -> Option<&StreamInfo> {
+        // TODO
+        Some(&self.stream_info)
+    }
+
+    fn frame_pool(&mut self, _: PoolLayer) -> Vec<&mut Self::FramePool> {
+        self.device.recycle_buffers();
+        vec![self]
+    }
+}
diff --git a/src/backend/v4l2/encoder.rs b/src/backend/v4l2/encoder.rs
index d4cd0e0..aeb900f 100644
--- a/src/backend/v4l2/encoder.rs
+++ b/src/backend/v4l2/encoder.rs
@@ -1,10 +1,12 @@
 // Copyright 2024 The ChromiumOS Authors
 // Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
 
 use std::collections::BTreeMap;
 use std::fmt::Debug;
 use std::marker::PhantomData;
 use std::os::fd::AsRawFd;
+use std::path::PathBuf;
 use std::sync::Arc;
 
 use nix::sys::stat::fstat;
@@ -23,18 +25,19 @@
 use v4l2r::device::queue::direction::Capture;
 use v4l2r::device::queue::direction::Output;
 use v4l2r::device::queue::dqbuf::DqBuffer;
-use v4l2r::device::queue::qbuf::get_free::GetFreeBufferError;
-use v4l2r::device::queue::qbuf::get_free::GetFreeCaptureBuffer;
-use v4l2r::device::queue::qbuf::get_free::GetFreeOutputBuffer;
-use v4l2r::device::queue::qbuf::OutputQueueable;
-use v4l2r::device::queue::qbuf::OutputQueueableProvider;
 use v4l2r::device::queue::qbuf::QBuffer;
 use v4l2r::device::queue::BuffersAllocated;
 use v4l2r::device::queue::CreateQueueError;
+use v4l2r::device::queue::GetFreeBufferError;
+use v4l2r::device::queue::GetFreeCaptureBuffer;
+use v4l2r::device::queue::GetFreeOutputBuffer;
+use v4l2r::device::queue::OutputQueueable;
+use v4l2r::device::queue::OutputQueueableProvider;
 use v4l2r::device::queue::Queue;
 use v4l2r::device::queue::RequestBuffersError;
 use v4l2r::device::AllocatedQueue;
 use v4l2r::device::Device;
+use v4l2r::device::DeviceConfig;
 use v4l2r::device::Stream;
 use v4l2r::device::TryDequeue;
 use v4l2r::ioctl;
@@ -69,6 +72,7 @@
 use crate::utils::DmabufFrame;
 use crate::utils::UserPtrFrame;
 use crate::Fourcc;
+use crate::FrameLayout;
 use crate::Resolution;
 
 #[derive(Debug, Error)]
@@ -320,7 +324,12 @@
     /// otherwise if the buffer may not be queue returns false.
     fn queue(
         &mut self,
-        buffer: QBuffer<'_, Capture, Vec<Self::PlaneHandle>, Vec<Self::PlaneHandle>>,
+        buffer: QBuffer<
+            Capture,
+            Vec<Self::PlaneHandle>,
+            Vec<Self::PlaneHandle>,
+            &Queue<Capture, BuffersAllocated<Vec<Self::PlaneHandle>>>,
+        >,
     ) -> anyhow::Result<bool>;
 
     /// Maps the the buffer and returns its contents in form of [`Vec<u8>`]
@@ -335,7 +344,12 @@
 
     fn queue(
         &mut self,
-        buffer: QBuffer<'_, Capture, Vec<Self::PlaneHandle>, Vec<Self::PlaneHandle>>,
+        buffer: QBuffer<
+            Capture,
+            Vec<Self::PlaneHandle>,
+            Vec<Self::PlaneHandle>,
+            &Queue<Capture, BuffersAllocated<Vec<Self::PlaneHandle>>>,
+        >,
     ) -> anyhow::Result<bool> {
         buffer.queue()?;
         Ok(true)
@@ -629,8 +643,8 @@
         let output_pixfmt: PixelFormat = fourcc.0.into();
 
         let output_format = Format {
-            width: visible_size.width,
-            height: visible_size.height,
+            width: coded_size.width,
+            height: coded_size.height,
             pixelformat: output_pixfmt,
             // Let the driver pick
             plane_fmt: vec![],
@@ -648,11 +662,13 @@
 
         Self::apply_ctrl(&device, "header mode", VideoHeaderMode::JoinedWith1stFrame)?;
 
-        if visible_size.width > coded_size.width || visible_size.height > coded_size.height {
+        if visible_size.width > output_format.width || visible_size.height > output_format.height {
             return Err(InitializationError::Unsupported(
                 UnsupportedError::FrameUpscaling,
             ));
-        } else if visible_size != coded_size {
+        } else if visible_size.width != output_format.width
+            || visible_size.height != output_format.height
+        {
             log::info!("The frame visible size is not aligned to coded size, applying selection");
             if let Err(err) = Self::apply_selection(&device, visible_size) {
                 log::error!("Failed to set selection: {err:?}");
@@ -958,6 +974,60 @@
     }
 }
 
+pub fn find_device_with_capture(pixfmt: v4l2r::PixelFormat) -> Option<PathBuf> {
+    const MAX_DEVICE_NO: usize = 128;
+    for dev_no in 0..MAX_DEVICE_NO {
+        let device_path = PathBuf::from(format!("/dev/video{dev_no}"));
+        let Ok(device) = Device::open(&device_path, DeviceConfig::new()) else {
+            continue;
+        };
+
+        let device = Arc::new(device);
+
+        let Ok(queue) = Queue::get_capture_mplane_queue(device) else {
+            continue;
+        };
+
+        for fmt in queue.format_iter() {
+            if fmt.pixelformat == pixfmt {
+                return Some(device_path);
+            }
+        }
+    }
+
+    None
+}
+
+pub fn v4l2_format_to_frame_layout(format: &v4l2r::Format) -> FrameLayout {
+    let mut layout = FrameLayout {
+        format: (Fourcc::from(format.pixelformat.to_u32()), 0),
+        size: Resolution {
+            width: format.width,
+            height: format.height,
+        },
+        planes: format
+            .plane_fmt
+            .iter()
+            .map(|plane| crate::PlaneLayout {
+                buffer_index: 0,
+                offset: 0,
+                stride: plane.bytesperline as usize,
+            })
+            .collect(),
+    };
+
+    // Patch FrameLayout
+    match &format.pixelformat.to_fourcc() {
+        b"NM12" if layout.planes.len() == 2 => {
+            layout.planes[1].buffer_index = 1;
+        }
+        b"NV12" if layout.planes.len() == 1 => {}
+        _ => panic!("Unknown format"),
+    };
+
+    layout
+}
+
 #[cfg(test)]
 pub(crate) mod tests {
     use std::os::fd::AsFd;
@@ -979,31 +1049,6 @@
     use crate::encoder::tests::fill_test_frame_nm12;
     use crate::encoder::tests::fill_test_frame_nv12;
     use crate::encoder::tests::get_test_frame_t;
-    use crate::FrameLayout;
-
-    pub fn find_device_with_capture(pixfmt: v4l2r::PixelFormat) -> Option<PathBuf> {
-        const MAX_DEVICE_NO: usize = 128;
-        for dev_no in 0..MAX_DEVICE_NO {
-            let device_path = PathBuf::from(format!("/dev/video{dev_no}"));
-            let Ok(device) = Device::open(&device_path, DeviceConfig::new()) else {
-                continue;
-            };
-
-            let device = Arc::new(device);
-
-            let Ok(queue) = Queue::get_capture_mplane_queue(device) else {
-                continue;
-            };
-
-            for fmt in queue.format_iter() {
-                if fmt.pixelformat == pixfmt {
-                    return Some(device_path);
-                }
-            }
-        }
-
-        None
-    }
 
     /// A simple wrapper for a GBM device node.
     pub struct GbmDevice(std::fs::File);
@@ -1137,36 +1182,6 @@
         }
     }
 
-    pub fn v4l2_format_to_frame_layout(format: &v4l2r::Format) -> FrameLayout {
-        let mut layout = FrameLayout {
-            format: (Fourcc::from(format.pixelformat.to_u32()), 0),
-            size: Resolution {
-                width: format.width,
-                height: format.height,
-            },
-            planes: format
-                .plane_fmt
-                .iter()
-                .map(|plane| crate::PlaneLayout {
-                    buffer_index: 0,
-                    offset: 0,
-                    stride: plane.bytesperline as usize,
-                })
-                .collect(),
-        };
-
-        // Patch FrameLayout
-        match &format.pixelformat.to_fourcc() {
-            b"NM12" if layout.planes.len() == 2 => {
-                layout.planes[1].buffer_index = 1;
-            }
-            b"NV12" if layout.planes.len() == 1 => {}
-            _ => panic!("Unknown format"),
-        };
-
-        layout
-    }
-
     pub struct TestMmapFrame {
         meta: FrameMetadata,
         frame_count: u64,
diff --git a/src/backend/vaapi.rs b/src/backend/vaapi.rs
index 40cb68c..243fe0d 100644
--- a/src/backend/vaapi.rs
+++ b/src/backend/vaapi.rs
@@ -25,15 +25,15 @@
 
 fn va_rt_format_to_string(va_rt_format: u32) -> String {
     String::from(match va_rt_format {
-        libva::constants::VA_RT_FORMAT_YUV420 => "YUV420",
-        libva::constants::VA_RT_FORMAT_YUV422 => "YUV422",
-        libva::constants::VA_RT_FORMAT_YUV444 => "YUV444",
-        libva::constants::VA_RT_FORMAT_YUV420_10 => "YUV420_10",
-        libva::constants::VA_RT_FORMAT_YUV420_12 => "YUV420_12",
-        libva::constants::VA_RT_FORMAT_YUV422_10 => "YUV422_10",
-        libva::constants::VA_RT_FORMAT_YUV422_12 => "YUV422_12",
-        libva::constants::VA_RT_FORMAT_YUV444_10 => "YUV444_10",
-        libva::constants::VA_RT_FORMAT_YUV444_12 => "YUV444_12",
+        libva::VA_RT_FORMAT_YUV420 => "YUV420",
+        libva::VA_RT_FORMAT_YUV422 => "YUV422",
+        libva::VA_RT_FORMAT_YUV444 => "YUV444",
+        libva::VA_RT_FORMAT_YUV420_10 => "YUV420_10",
+        libva::VA_RT_FORMAT_YUV420_12 => "YUV420_12",
+        libva::VA_RT_FORMAT_YUV422_10 => "YUV422_10",
+        libva::VA_RT_FORMAT_YUV422_12 => "YUV422_12",
+        libva::VA_RT_FORMAT_YUV444_10 => "YUV444_10",
+        libva::VA_RT_FORMAT_YUV444_12 => "YUV444_12",
         other => return format!("unknown VA rt_format {}", other),
     })
 }
@@ -49,53 +49,53 @@
 /// preferred order.
 const FORMAT_MAP: [FormatMap; 10] = [
     FormatMap {
-        rt_format: libva::constants::VA_RT_FORMAT_YUV420,
-        va_fourcc: libva::constants::VA_FOURCC_NV12,
+        rt_format: libva::VA_RT_FORMAT_YUV420,
+        va_fourcc: libva::VA_FOURCC_NV12,
         decoded_format: DecodedFormat::NV12,
     },
     FormatMap {
-        rt_format: libva::constants::VA_RT_FORMAT_YUV420,
-        va_fourcc: libva::constants::VA_FOURCC_I420,
+        rt_format: libva::VA_RT_FORMAT_YUV420,
+        va_fourcc: libva::VA_FOURCC_I420,
         decoded_format: DecodedFormat::I420,
     },
     FormatMap {
-        rt_format: libva::constants::VA_RT_FORMAT_YUV422,
-        va_fourcc: libva::constants::VA_FOURCC_422H,
+        rt_format: libva::VA_RT_FORMAT_YUV422,
+        va_fourcc: libva::VA_FOURCC_422H,
         decoded_format: DecodedFormat::I422,
     },
     FormatMap {
-        rt_format: libva::constants::VA_RT_FORMAT_YUV444,
-        va_fourcc: libva::constants::VA_FOURCC_444P,
+        rt_format: libva::VA_RT_FORMAT_YUV444,
+        va_fourcc: libva::VA_FOURCC_444P,
         decoded_format: DecodedFormat::I444,
     },
     FormatMap {
-        rt_format: libva::constants::VA_RT_FORMAT_YUV420_10,
-        va_fourcc: libva::constants::VA_FOURCC_P010,
+        rt_format: libva::VA_RT_FORMAT_YUV420_10,
+        va_fourcc: libva::VA_FOURCC_P010,
         decoded_format: DecodedFormat::I010,
     },
     FormatMap {
-        rt_format: libva::constants::VA_RT_FORMAT_YUV420_12,
-        va_fourcc: libva::constants::VA_FOURCC_P012,
+        rt_format: libva::VA_RT_FORMAT_YUV420_12,
+        va_fourcc: libva::VA_FOURCC_P012,
         decoded_format: DecodedFormat::I012,
     },
     FormatMap {
-        rt_format: libva::constants::VA_RT_FORMAT_YUV422_10,
-        va_fourcc: libva::constants::VA_FOURCC_Y210,
+        rt_format: libva::VA_RT_FORMAT_YUV422_10,
+        va_fourcc: libva::VA_FOURCC_Y210,
         decoded_format: DecodedFormat::I210,
     },
     FormatMap {
-        rt_format: libva::constants::VA_RT_FORMAT_YUV422_12,
-        va_fourcc: libva::constants::VA_FOURCC_Y212,
+        rt_format: libva::VA_RT_FORMAT_YUV422_12,
+        va_fourcc: libva::VA_FOURCC_Y212,
         decoded_format: DecodedFormat::I212,
     },
     FormatMap {
-        rt_format: libva::constants::VA_RT_FORMAT_YUV444_10,
-        va_fourcc: libva::constants::VA_FOURCC_Y410,
+        rt_format: libva::VA_RT_FORMAT_YUV444_10,
+        va_fourcc: libva::VA_FOURCC_Y410,
         decoded_format: DecodedFormat::I410,
     },
     FormatMap {
-        rt_format: libva::constants::VA_RT_FORMAT_YUV444_12,
-        va_fourcc: libva::constants::VA_FOURCC_Y412,
+        rt_format: libva::VA_RT_FORMAT_YUV444_12,
+        va_fourcc: libva::VA_FOURCC_Y412,
         decoded_format: DecodedFormat::I412,
     },
 ];
@@ -117,9 +117,7 @@
 
     // See whether this RT_FORMAT is supported by the given VAProfile and
     // VAEntrypoint pair.
-    if attrs[0].value == libva::constants::VA_ATTRIB_NOT_SUPPORTED
-        || attrs[0].value & rt_format == 0
-    {
+    if attrs[0].value == libva::VA_ATTRIB_NOT_SUPPORTED || attrs[0].value & rt_format == 0 {
         return Err(anyhow!(
             "rt_format {:?} not supported for profile {:?} and entrypoint {:?}",
             rt_format,
@@ -151,14 +149,14 @@
 
     fn try_from(value: &libva::VAImageFormat) -> Result<Self, Self::Error> {
         match value.fourcc {
-            libva::constants::VA_FOURCC_I420 => Ok(DecodedFormat::I420),
-            libva::constants::VA_FOURCC_NV12 => Ok(DecodedFormat::NV12),
-            libva::constants::VA_FOURCC_P010 => Ok(DecodedFormat::I010),
-            libva::constants::VA_FOURCC_P012 => Ok(DecodedFormat::I012),
-            libva::constants::VA_FOURCC_Y210 => Ok(DecodedFormat::I210),
-            libva::constants::VA_FOURCC_Y212 => Ok(DecodedFormat::I212),
-            libva::constants::VA_FOURCC_Y410 => Ok(DecodedFormat::I410),
-            libva::constants::VA_FOURCC_Y412 => Ok(DecodedFormat::I412),
+            libva::VA_FOURCC_I420 => Ok(DecodedFormat::I420),
+            libva::VA_FOURCC_NV12 => Ok(DecodedFormat::NV12),
+            libva::VA_FOURCC_P010 => Ok(DecodedFormat::I010),
+            libva::VA_FOURCC_P012 => Ok(DecodedFormat::I012),
+            libva::VA_FOURCC_Y210 => Ok(DecodedFormat::I210),
+            libva::VA_FOURCC_Y212 => Ok(DecodedFormat::I212),
+            libva::VA_FOURCC_Y410 => Ok(DecodedFormat::I410),
+            libva::VA_FOURCC_Y412 => Ok(DecodedFormat::I412),
             _ => Err(anyhow!("Unsupported format")),
         }
     }
diff --git a/src/backend/vaapi/decoder.rs b/src/backend/vaapi/decoder.rs
index 940d500..b91ecf5 100644
--- a/src/backend/vaapi/decoder.rs
+++ b/src/backend/vaapi/decoder.rs
@@ -55,7 +55,7 @@
     handle: &Option<DecodedHandle<M>>,
 ) -> libva::VASurfaceID {
     match handle {
-        None => libva::constants::VA_INVALID_SURFACE,
+        None => libva::VA_INVALID_SURFACE,
         Some(handle) => handle.borrow().surface().id(),
     }
 }
@@ -107,7 +107,7 @@
     /// Returns the minimum number of surfaces required to decode the stream.
     fn min_num_surfaces(&self) -> usize;
     /// Returns the coded size of the surfaces required to decode the stream.
-    fn coded_size(&self) -> (u32, u32);
+    fn coded_size(&self) -> Resolution;
     /// Returns the visible rectangle within the coded size for the stream.
     fn visible_rect(&self) -> ((u32, u32), (u32, u32));
 }
@@ -177,8 +177,7 @@
         let va_profile = hdr.va_profile()?;
         let rt_format = hdr.rt_format()?;
 
-        let coded_resolution =
-            Resolution::from(hdr.coded_size()).round(crate::ResolutionRoundMode::Even);
+        let coded_resolution = hdr.coded_size().round(crate::ResolutionRoundMode::Even);
 
         let format_map = if let Some(format_map) = format_map {
             format_map
@@ -291,15 +290,15 @@
                 config,
                 stream_info: StreamInfo {
                     format: match rt_format {
-                        libva::constants::VA_RT_FORMAT_YUV420 => DecodedFormat::I420,
-                        libva::constants::VA_RT_FORMAT_YUV422 => DecodedFormat::I422,
-                        libva::constants::VA_RT_FORMAT_YUV444 => DecodedFormat::I444,
-                        libva::constants::VA_RT_FORMAT_YUV420_10 => DecodedFormat::I010,
-                        libva::constants::VA_RT_FORMAT_YUV420_12 => DecodedFormat::I012,
-                        libva::constants::VA_RT_FORMAT_YUV422_10 => DecodedFormat::I210,
-                        libva::constants::VA_RT_FORMAT_YUV422_12 => DecodedFormat::I212,
-                        libva::constants::VA_RT_FORMAT_YUV444_10 => DecodedFormat::I410,
-                        libva::constants::VA_RT_FORMAT_YUV444_12 => DecodedFormat::I412,
+                        libva::VA_RT_FORMAT_YUV420 => DecodedFormat::I420,
+                        libva::VA_RT_FORMAT_YUV422 => DecodedFormat::I422,
+                        libva::VA_RT_FORMAT_YUV444 => DecodedFormat::I444,
+                        libva::VA_RT_FORMAT_YUV420_10 => DecodedFormat::I010,
+                        libva::VA_RT_FORMAT_YUV420_12 => DecodedFormat::I012,
+                        libva::VA_RT_FORMAT_YUV422_10 => DecodedFormat::I210,
+                        libva::VA_RT_FORMAT_YUV422_12 => DecodedFormat::I212,
+                        libva::VA_RT_FORMAT_YUV444_10 => DecodedFormat::I410,
+                        libva::VA_RT_FORMAT_YUV444_12 => DecodedFormat::I412,
                         _ => panic!("unrecognized RT format {}", rt_format),
                     },
                     coded_resolution,
@@ -476,10 +475,15 @@
         let offsets = image_inner.offsets.map(|x| x as usize);
 
         match image_inner.format.fourcc {
-            libva::constants::VA_FOURCC_NV12 => {
-                nv12_copy(self.as_ref(), buffer, width, height, pitches, offsets);
+            libva::VA_FOURCC_NV12 => {
+                let (src_y, src_uv) = self.as_ref().split_at(offsets[1]);
+                let (dst_y, dst_uv) = buffer.split_at_mut(width * height);
+                nv12_copy(
+                    src_y, pitches[0], dst_y, width, src_uv, pitches[1], dst_uv, width, width,
+                    height,
+                );
             }
-            libva::constants::VA_FOURCC_I420 => {
+            libva::VA_FOURCC_I420 => {
                 i4xx_copy(
                     self.as_ref(),
                     buffer,
@@ -490,7 +494,7 @@
                     (true, true),
                 );
             }
-            libva::constants::VA_FOURCC_422H => {
+            libva::VA_FOURCC_422H => {
                 i4xx_copy(
                     self.as_ref(),
                     buffer,
@@ -501,7 +505,7 @@
                     (true, false),
                 );
             }
-            libva::constants::VA_FOURCC_444P => {
+            libva::VA_FOURCC_444P => {
                 i4xx_copy(
                     self.as_ref(),
                     buffer,
@@ -512,22 +516,22 @@
                     (false, false),
                 );
             }
-            libva::constants::VA_FOURCC_P010 => {
+            libva::VA_FOURCC_P010 => {
                 p01x_to_i01x(self.as_ref(), buffer, 10, width, height, pitches, offsets);
             }
-            libva::constants::VA_FOURCC_P012 => {
+            libva::VA_FOURCC_P012 => {
                 p01x_to_i01x(self.as_ref(), buffer, 12, width, height, pitches, offsets);
             }
-            libva::constants::VA_FOURCC_Y210 => {
+            libva::VA_FOURCC_Y210 => {
                 y21x_to_i21x(self.as_ref(), buffer, 10, width, height, pitches, offsets);
             }
-            libva::constants::VA_FOURCC_Y212 => {
+            libva::VA_FOURCC_Y212 => {
                 y21x_to_i21x(self.as_ref(), buffer, 12, width, height, pitches, offsets);
             }
-            libva::constants::VA_FOURCC_Y410 => {
+            libva::VA_FOURCC_Y410 => {
                 y410_to_i410(self.as_ref(), buffer, width, height, pitches, offsets);
             }
-            libva::constants::VA_FOURCC_Y412 => {
+            libva::VA_FOURCC_Y412 => {
                 y412_to_i412(self.as_ref(), buffer, width, height, pitches, offsets);
             }
             _ => {
@@ -579,7 +583,7 @@
         // Create a pool with reasonable defaults, as we don't know the format of the stream yet.
         let surface_pools = vec![VaSurfacePool::new(
             Rc::clone(&display),
-            libva::constants::VA_RT_FORMAT_YUV420,
+            libva::VA_RT_FORMAT_YUV420,
             Some(libva::UsageHint::USAGE_HINT_DECODER),
             Resolution::from((16, 16)),
         )];
diff --git a/src/backend/vaapi/encoder.rs b/src/backend/vaapi/encoder.rs
index 5558ad6..256161d 100644
--- a/src/backend/vaapi/encoder.rs
+++ b/src/backend/vaapi/encoder.rs
@@ -414,8 +414,8 @@
 pub(crate) mod tests {
     use std::borrow::Borrow;
 
-    use libva::constants::VA_FOURCC_NV12;
-    use libva::constants::VA_FOURCC_P010;
+    use libva::VA_FOURCC_NV12;
+    use libva::VA_FOURCC_P010;
 
     use super::*;
     use crate::encoder::tests::fill_test_frame_nv12;
diff --git a/src/codec/h264/parser.rs b/src/codec/h264/parser.rs
index 4557830..5cc52b5 100644
--- a/src/codec/h264/parser.rs
+++ b/src/codec/h264/parser.rs
@@ -294,6 +294,10 @@
     /// the bottom field of a coded frame specified in clause 8.2.1.
     pub delta_pic_order_cnt: [i32; 2],
 
+    /// This value is required by V4L2 stateless decode params so it is calculated
+    /// by parser while processing slice header.
+    pub pic_order_cnt_bit_size: usize,
+
     /// Shall be equal to 0 for slices and slice data partitions belonging to
     /// the primary coded picture. The value of `redundant_pic_cnt shall` be
     /// greater than 0 for coded slices or coded slice data partitions of a
@@ -343,6 +347,10 @@
     /// Decoded reference picture marking parsed using 7.3.3.3
     pub dec_ref_pic_marking: RefPicMarking,
 
+    /// This value is required by V4L2 stateless decode params so it is calculated
+    /// by parser while processing slice header.
+    pub dec_ref_pic_marking_bit_size: usize,
+
     /// Specifies the index for determining the initialization table used in the
     /// initialization process for context variables.
     pub cabac_init_idc: u8,
@@ -1869,7 +1877,7 @@
                     Parser::parse_scaling_list(r, &mut scaling_lists8x8[i], &mut use_default)?;
 
                     if use_default {
-                        Parser::fill_default_scaling_list_4x4(&mut scaling_lists4x4[i], i);
+                        Parser::fill_default_scaling_list_8x8(&mut scaling_lists8x8[i], i);
                     }
                 } else if !sps.seq_scaling_matrix_present_flag {
                     // Table 7-2: Fallback rule A
@@ -2407,6 +2415,7 @@
     ) -> Result<(), String> {
         let rpm = &mut header.dec_ref_pic_marking;
 
+        let num_bits_left = r.num_bits_left();
         if nalu.header.idr_pic_flag {
             rpm.no_output_of_prior_pics_flag = r.read_bit()?;
             rpm.long_term_reference_flag = r.read_bit()?;
@@ -2445,6 +2454,7 @@
                 }
             }
         }
+        header.dec_ref_pic_marking_bit_size = num_bits_left - r.num_bits_left();
 
         Ok(())
     }
@@ -2511,6 +2521,7 @@
             header.idr_pic_id = r.read_ue_max(0xffff)?;
         }
 
+        let num_bits_left = r.num_bits_left();
         if sps.pic_order_cnt_type == 0 {
             header.pic_order_cnt_lsb =
                 r.read_bits(usize::from(sps.log2_max_pic_order_cnt_lsb_minus4) + 4)?;
@@ -2526,6 +2537,7 @@
                 header.delta_pic_order_cnt[1] = r.read_se()?;
             }
         }
+        header.pic_order_cnt_bit_size = num_bits_left - r.num_bits_left();
 
         if pps.redundant_pic_cnt_present_flag {
             header.redundant_pic_cnt = r.read_ue_max(127)?;
diff --git a/src/decoder/stateless/av1/vaapi.rs b/src/decoder/stateless/av1/vaapi.rs
index 6e566bd..d91f645 100644
--- a/src/decoder/stateless/av1/vaapi.rs
+++ b/src/decoder/stateless/av1/vaapi.rs
@@ -58,9 +58,9 @@
         match self.seq_profile {
             Profile::Profile0 => {
                 if self.bit_depth == BitDepth::Depth8 {
-                    Ok(libva::constants::VA_RT_FORMAT_YUV420)
+                    Ok(libva::VA_RT_FORMAT_YUV420)
                 } else if self.bit_depth == BitDepth::Depth10 {
-                    Ok(libva::constants::VA_RT_FORMAT_YUV420_10)
+                    Ok(libva::VA_RT_FORMAT_YUV420_10)
                 } else {
                     Err(anyhow!(
                         "Unsupported bit depth {:?} for profile {:?}",
@@ -71,9 +71,9 @@
             }
             Profile::Profile1 => {
                 if self.bit_depth == BitDepth::Depth8 {
-                    Ok(libva::constants::VA_RT_FORMAT_YUV444)
+                    Ok(libva::VA_RT_FORMAT_YUV444)
                 } else if self.bit_depth == BitDepth::Depth10 {
-                    Ok(libva::constants::VA_RT_FORMAT_YUV444_10)
+                    Ok(libva::VA_RT_FORMAT_YUV444_10)
                 } else {
                     Err(anyhow!(
                         "Unsupported bit depth {:?} for profile {:?}",
@@ -93,15 +93,15 @@
         NUM_SURFACES
     }
 
-    fn coded_size(&self) -> (u32, u32) {
-        (
+    fn coded_size(&self) -> Resolution {
+        Resolution::from((
             self.max_frame_width_minus_1 as u32 + 1,
             self.max_frame_height_minus_1 as u32 + 1,
-        )
+        ))
     }
 
     fn visible_rect(&self) -> ((u32, u32), (u32, u32)) {
-        ((0, 0), self.coded_size())
+        ((0, 0), self.coded_size().into())
     }
 }
 
@@ -423,8 +423,8 @@
             .context("Invalid matrix_coefficients")?,
         &seq_info_fields,
         current_frame,
-        libva::constants::VA_INVALID_SURFACE, /* film grain is unsupported for now */
-        vec![],                               /* anchor_frames_list */
+        libva::VA_INVALID_SURFACE, /* film grain is unsupported for now */
+        vec![],                    /* anchor_frames_list */
         u16::try_from(hdr.upscaled_width - 1).context("Invalid frame width")?,
         u16::try_from(hdr.frame_height - 1).context("Invalid frame height")?,
         0, /* output_frame_width_in_tiles_minus_1 */
diff --git a/src/decoder/stateless/h264.rs b/src/decoder/stateless/h264.rs
index c9976be..b6d7841 100644
--- a/src/decoder/stateless/h264.rs
+++ b/src/decoder/stateless/h264.rs
@@ -4,6 +4,8 @@
 
 #[cfg(any(test, fuzzing))]
 mod dummy;
+#[cfg(feature = "v4l2")]
+mod v4l2;
 #[cfg(feature = "vaapi")]
 mod vaapi;
 
diff --git a/src/decoder/stateless/h264/v4l2.rs b/src/decoder/stateless/h264/v4l2.rs
new file mode 100644
index 0000000..2bdd7fd
--- /dev/null
+++ b/src/decoder/stateless/h264/v4l2.rs
@@ -0,0 +1,180 @@
+// Copyright 2024 The ChromiumOS Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+use std::cell::RefCell;
+use std::rc::Rc;
+
+use v4l2r::bindings::v4l2_ctrl_h264_pps;
+use v4l2r::bindings::v4l2_ctrl_h264_sps;
+use v4l2r::controls::codec::H264Pps;
+use v4l2r::controls::codec::H264Sps;
+use v4l2r::controls::SafeExtControl;
+
+use crate::backend::v4l2::decoder::stateless::BackendHandle;
+use crate::backend::v4l2::decoder::stateless::V4l2Picture;
+use crate::backend::v4l2::decoder::stateless::V4l2StatelessDecoderBackend;
+use crate::backend::v4l2::decoder::stateless::V4l2StatelessDecoderHandle;
+use crate::backend::v4l2::decoder::V4l2StreamInfo;
+use crate::codec::h264::dpb::Dpb;
+use crate::codec::h264::dpb::DpbEntry;
+use crate::codec::h264::parser::Pps;
+use crate::codec::h264::parser::Slice;
+use crate::codec::h264::parser::SliceHeader;
+use crate::codec::h264::parser::Sps;
+use crate::codec::h264::picture::PictureData;
+use crate::decoder::stateless::h264::StatelessH264DecoderBackend;
+use crate::decoder::stateless::h264::H264;
+use crate::decoder::stateless::NewPictureError;
+use crate::decoder::stateless::NewPictureResult;
+use crate::decoder::stateless::StatelessBackendResult;
+use crate::decoder::stateless::StatelessDecoder;
+use crate::decoder::stateless::StatelessDecoderBackendPicture;
+use crate::decoder::BlockingMode;
+use crate::device::v4l2::stateless::controls::h264::V4l2CtrlH264DecodeMode;
+use crate::device::v4l2::stateless::controls::h264::V4l2CtrlH264DecodeParams;
+use crate::device::v4l2::stateless::controls::h264::V4l2CtrlH264DpbEntry;
+//TODO use crate::device::v4l2::stateless::controls::h264::V4l2CtrlH264ScalingMatrix;
+use crate::Resolution;
+
+impl V4l2StreamInfo for &Rc<Sps> {
+    fn min_num_frames(&self) -> usize {
+        self.max_dpb_frames() + 4
+    }
+
+    fn coded_size(&self) -> Resolution {
+        Resolution::from((self.width(), self.height()))
+    }
+
+    fn visible_rect(&self) -> ((u32, u32), (u32, u32)) {
+        let rect = self.visible_rectangle();
+
+        ((rect.min.x, rect.min.y), (rect.max.x, rect.max.y))
+    }
+}
+
+impl StatelessDecoderBackendPicture<H264> for V4l2StatelessDecoderBackend {
+    type Picture = Rc<RefCell<V4l2Picture>>;
+}
+
+impl StatelessH264DecoderBackend for V4l2StatelessDecoderBackend {
+    fn new_sequence(&mut self, sps: &Rc<Sps>) -> StatelessBackendResult<()> {
+        let mb_unit = 16;
+        let map_unit = 16;
+        let resolution = Resolution::from((
+            (sps.pic_width_in_mbs_minus1 + 1) as u32 * mb_unit,
+            (sps.pic_height_in_map_units_minus1 + 1) as u32 * map_unit,
+        ));
+        self.device.set_resolution(resolution);
+        Ok(())
+    }
+
+    fn new_picture(&mut self, timestamp: u64) -> NewPictureResult<Self::Picture> {
+        let request_buffer = match self.device.alloc_request(timestamp) {
+            Ok(buffer) => buffer,
+            _ => return Err(NewPictureError::OutOfOutputBuffers),
+        };
+        Ok(Rc::new(RefCell::new(V4l2Picture::new(request_buffer))))
+    }
+
+    fn new_field_picture(&mut self, _: u64, _: &Self::Handle) -> NewPictureResult<Self::Picture> {
+        todo!()
+    }
+
+    fn start_picture(
+        &mut self,
+        picture: &mut Self::Picture,
+        picture_data: &PictureData,
+        sps: &Sps,
+        pps: &Pps,
+        dpb: &Dpb<Self::Handle>,
+        slice_header: &SliceHeader,
+    ) -> StatelessBackendResult<()> {
+        let mut dpb_entries = Vec::<V4l2CtrlH264DpbEntry>::new();
+        let mut ref_pictures = Vec::<Rc<RefCell<V4l2Picture>>>::new();
+        for entry in dpb.entries() {
+            let ref_picture = match &entry.reference {
+                Some(handle) => handle.handle.borrow().picture.clone(),
+                None => todo!(),
+            };
+            dpb_entries.push(V4l2CtrlH264DpbEntry {
+                timestamp: ref_picture.borrow().timestamp(),
+                pic: entry.pic.clone(),
+            });
+            ref_pictures.push(ref_picture);
+        }
+        //TODO let mut h264_scaling_matrix = V4l2CtrlH264ScalingMatrix::new();
+        let mut h264_decode_params = V4l2CtrlH264DecodeParams::new();
+        let h264_sps = SafeExtControl::<H264Sps>::from(v4l2_ctrl_h264_sps::from(sps));
+        let h264_pps = SafeExtControl::<H264Pps>::from(v4l2_ctrl_h264_pps::from(pps));
+        h264_decode_params
+            .set_picture_data(picture_data)
+            .set_dpb_entries(dpb_entries)
+            .set_slice_header(slice_header);
+        let mut picture = picture.borrow_mut();
+        picture
+            .request()
+            .ioctl(h264_sps)
+            .ioctl(h264_pps)
+            //TODO.ioctl(&h264_scaling_matrix)
+            .ioctl(&h264_decode_params)
+            .ioctl(V4l2CtrlH264DecodeMode::FrameBased);
+        picture.set_ref_pictures(ref_pictures);
+        ////////////////////////////////////////////////////////////////////////
+        // DEBUG
+        ////////////////////////////////////////////////////////////////////////
+        {
+            let mut dpb_timestamps = Vec::<u64>::new();
+            for entry in dpb.entries() {
+                match &entry.reference {
+                    Some(handle) => {
+                        dpb_timestamps.push(handle.handle.borrow().picture.borrow().timestamp())
+                    }
+                    None => todo!(),
+                };
+            }
+            log::debug!(
+                "{:<20} {:?} {:?}\n",
+                "start_picture",
+                picture.timestamp(),
+                dpb_timestamps
+            );
+        }
+        ////////////////////////////////////////////////////////////////////////
+        Ok(())
+    }
+
+    fn decode_slice(
+        &mut self,
+        picture: &mut Self::Picture,
+        slice: &Slice,
+        _: &Sps,
+        _: &Pps,
+        _: &[&DpbEntry<Self::Handle>],
+        _: &[&DpbEntry<Self::Handle>],
+    ) -> StatelessBackendResult<()> {
+        picture.borrow_mut().request().write(slice.nalu.as_ref());
+        Ok(())
+    }
+
+    fn submit_picture(&mut self, picture: Self::Picture) -> StatelessBackendResult<Self::Handle> {
+        let handle = Rc::new(RefCell::new(BackendHandle {
+            picture: picture.clone(),
+        }));
+        log::debug!(
+            "{:<20} {:?}\n",
+            "submit_picture",
+            picture.borrow().timestamp()
+        );
+        picture.borrow_mut().request().submit();
+        Ok(V4l2StatelessDecoderHandle { handle })
+    }
+}
+
+impl StatelessDecoder<H264, V4l2StatelessDecoderBackend> {
+    // Creates a new instance of the decoder using the v4l2 backend.
+    pub fn new_v4l2(blocking_mode: BlockingMode) -> Self {
+        Self::new(V4l2StatelessDecoderBackend::new(), blocking_mode)
+            .expect("Failed to create v4l2 stateless decoder backend")
+    }
+}
diff --git a/src/decoder/stateless/h264/vaapi.rs b/src/decoder/stateless/h264/vaapi.rs
index bdbac4d..4f5cee6 100644
--- a/src/decoder/stateless/h264/vaapi.rs
+++ b/src/decoder/stateless/h264/vaapi.rs
@@ -42,6 +42,7 @@
 use crate::decoder::stateless::StatelessDecoder;
 use crate::decoder::stateless::StatelessDecoderBackendPicture;
 use crate::decoder::BlockingMode;
+use crate::Resolution;
 
 impl VaStreamInfo for &Rc<Sps> {
     fn va_profile(&self) -> anyhow::Result<i32> {
@@ -81,15 +82,15 @@
         let chroma_format_idc = self.chroma_format_idc;
 
         match (bit_depth_luma, chroma_format_idc) {
-            (8, 0) | (8, 1) => Ok(libva::constants::VA_RT_FORMAT_YUV420),
-            (8, 2) => Ok(libva::constants::VA_RT_FORMAT_YUV422),
-            (8, 3) => Ok(libva::constants::VA_RT_FORMAT_YUV444),
-            (10, 0) | (10, 1) => Ok(libva::constants::VA_RT_FORMAT_YUV420_10),
-            (10, 2) => Ok(libva::constants::VA_RT_FORMAT_YUV422_10),
-            (10, 3) => Ok(libva::constants::VA_RT_FORMAT_YUV444_10),
-            (12, 0) | (12, 1) => Ok(libva::constants::VA_RT_FORMAT_YUV420_12),
-            (12, 2) => Ok(libva::constants::VA_RT_FORMAT_YUV422_12),
-            (12, 3) => Ok(libva::constants::VA_RT_FORMAT_YUV444_12),
+            (8, 0) | (8, 1) => Ok(libva::VA_RT_FORMAT_YUV420),
+            (8, 2) => Ok(libva::VA_RT_FORMAT_YUV422),
+            (8, 3) => Ok(libva::VA_RT_FORMAT_YUV444),
+            (10, 0) | (10, 1) => Ok(libva::VA_RT_FORMAT_YUV420_10),
+            (10, 2) => Ok(libva::VA_RT_FORMAT_YUV422_10),
+            (10, 3) => Ok(libva::VA_RT_FORMAT_YUV444_10),
+            (12, 0) | (12, 1) => Ok(libva::VA_RT_FORMAT_YUV420_12),
+            (12, 2) => Ok(libva::VA_RT_FORMAT_YUV422_12),
+            (12, 3) => Ok(libva::VA_RT_FORMAT_YUV444_12),
             _ => Err(anyhow!(
                 "unsupported bit depth/chroma format pair {}, {}",
                 bit_depth_luma,
@@ -102,8 +103,8 @@
         self.max_dpb_frames() + 4
     }
 
-    fn coded_size(&self) -> (u32, u32) {
-        (self.width(), self.height())
+    fn coded_size(&self) -> Resolution {
+        Resolution::from((self.width(), self.height()))
     }
 
     fn visible_rect(&self) -> ((u32, u32), (u32, u32)) {
@@ -121,11 +122,11 @@
 ) -> libva::PictureH264 {
     let mut flags = 0;
     let frame_idx = if matches!(h264_pic.reference(), Reference::LongTerm) {
-        flags |= libva::constants::VA_PICTURE_H264_LONG_TERM_REFERENCE;
+        flags |= libva::VA_PICTURE_H264_LONG_TERM_REFERENCE;
         h264_pic.long_term_frame_idx
     } else {
         if matches!(h264_pic.reference(), Reference::ShortTerm { .. }) {
-            flags |= libva::constants::VA_PICTURE_H264_SHORT_TERM_REFERENCE;
+            flags |= libva::VA_PICTURE_H264_SHORT_TERM_REFERENCE;
         }
 
         h264_pic.frame_num
@@ -145,7 +146,7 @@
                     bottom_field_order_cnt = other_field.borrow().bottom_field_order_cnt
                 }
                 (_, _) => {
-                    flags |= libva::constants::VA_PICTURE_H264_TOP_FIELD;
+                    flags |= libva::VA_PICTURE_H264_TOP_FIELD;
                     bottom_field_order_cnt = 0;
                 }
             }
@@ -158,7 +159,7 @@
                     top_field_order_cnt = other_field.borrow().top_field_order_cnt
                 }
                 (_, _) => {
-                    flags |= libva::constants::VA_PICTURE_H264_BOTTOM_FIELD;
+                    flags |= libva::VA_PICTURE_H264_BOTTOM_FIELD;
                     top_field_order_cnt = 0;
                 }
             }
@@ -180,9 +181,9 @@
 /// array slots there is no data to fill them with.
 fn build_invalid_va_h264_pic() -> libva::PictureH264 {
     libva::PictureH264::new(
-        libva::constants::VA_INVALID_ID,
+        libva::VA_INVALID_ID,
         0,
-        libva::constants::VA_PICTURE_H264_INVALID,
+        libva::VA_PICTURE_H264_INVALID,
         0,
         0,
     )
@@ -428,7 +429,7 @@
     let slice_param = libva::SliceParameterBufferH264::new(
         slice_size as u32,
         0,
-        libva::constants::VA_SLICE_DATA_FLAG_ALL,
+        libva::VA_SLICE_DATA_FLAG_ALL,
         hdr.header_bit_size as u16,
         hdr.first_mb_in_slice as u16,
         hdr.slice_type as u8,
diff --git a/src/decoder/stateless/h265/vaapi.rs b/src/decoder/stateless/h265/vaapi.rs
index b02d87b..a899a37 100644
--- a/src/decoder/stateless/h265/vaapi.rs
+++ b/src/decoder/stateless/h265/vaapi.rs
@@ -109,15 +109,15 @@
         let chroma_format_idc = self.chroma_format_idc;
 
         match (bit_depth, chroma_format_idc) {
-            (8, 0) | (8, 1) => Ok(libva::constants::VA_RT_FORMAT_YUV420),
-            (8, 2) => Ok(libva::constants::VA_RT_FORMAT_YUV422),
-            (8, 3) => Ok(libva::constants::VA_RT_FORMAT_YUV444),
-            (9, 0) | (9, 1) | (10, 0) | (10, 1) => Ok(libva::constants::VA_RT_FORMAT_YUV420_10),
-            (9, 2) | (10, 2) => Ok(libva::constants::VA_RT_FORMAT_YUV422_10),
-            (9, 3) | (10, 3) => Ok(libva::constants::VA_RT_FORMAT_YUV444_10),
-            (11, 0) | (11, 1) | (12, 0) | (12, 1) => Ok(libva::constants::VA_RT_FORMAT_YUV420_12),
-            (11, 2) | (12, 2) => Ok(libva::constants::VA_RT_FORMAT_YUV422_12),
-            (11, 3) | (12, 3) => Ok(libva::constants::VA_RT_FORMAT_YUV444_12),
+            (8, 0) | (8, 1) => Ok(libva::VA_RT_FORMAT_YUV420),
+            (8, 2) => Ok(libva::VA_RT_FORMAT_YUV422),
+            (8, 3) => Ok(libva::VA_RT_FORMAT_YUV444),
+            (9, 0) | (9, 1) | (10, 0) | (10, 1) => Ok(libva::VA_RT_FORMAT_YUV420_10),
+            (9, 2) | (10, 2) => Ok(libva::VA_RT_FORMAT_YUV422_10),
+            (9, 3) | (10, 3) => Ok(libva::VA_RT_FORMAT_YUV444_10),
+            (11, 0) | (11, 1) | (12, 0) | (12, 1) => Ok(libva::VA_RT_FORMAT_YUV420_12),
+            (11, 2) | (12, 2) => Ok(libva::VA_RT_FORMAT_YUV422_12),
+            (11, 3) | (12, 3) => Ok(libva::VA_RT_FORMAT_YUV444_12),
             _ => Err(anyhow!(
                 "unsupported bit depth/chroma format pair {}, {}",
                 bit_depth,
@@ -130,8 +130,8 @@
         self.max_dpb_size() + 4
     }
 
-    fn coded_size(&self) -> (u32, u32) {
-        (self.width().into(), self.height().into())
+    fn coded_size(&self) -> Resolution {
+        Resolution::from((self.width().into(), self.height().into()))
     }
 
     fn visible_rect(&self) -> ((u32, u32), (u32, u32)) {
@@ -154,7 +154,7 @@
 
         if let Some(ref_pic_list_entry) = ref_pic_list_entry {
             for (va_ref_idx, va_ref) in va_references.iter().enumerate() {
-                if va_ref.picture_id() == libva::constants::VA_INVALID_ID {
+                if va_ref.picture_id() == libva::VA_INVALID_ID {
                     break;
                 }
 
@@ -183,21 +183,21 @@
         .flatten()
         .any(|dpb_entry| *dpb_entry.0.borrow() == *hevc_pic)
     {
-        libva::constants::VA_PICTURE_HEVC_RPS_ST_CURR_BEFORE
+        libva::VA_PICTURE_HEVC_RPS_ST_CURR_BEFORE
     } else if rps
         .ref_pic_set_st_curr_after
         .iter()
         .flatten()
         .any(|dpb_entry| *dpb_entry.0.borrow() == *hevc_pic)
     {
-        libva::constants::VA_PICTURE_HEVC_RPS_ST_CURR_AFTER
+        libva::VA_PICTURE_HEVC_RPS_ST_CURR_AFTER
     } else if rps
         .ref_pic_set_lt_curr
         .iter()
         .flatten()
         .any(|dpb_entry| *dpb_entry.0.borrow() == *hevc_pic)
     {
-        libva::constants::VA_PICTURE_HEVC_RPS_LT_CURR
+        libva::VA_PICTURE_HEVC_RPS_LT_CURR
     } else {
         0
     }
@@ -206,11 +206,7 @@
 /// Builds an invalid VaPictureHEVC. These pictures are used to fill empty
 /// array slots there is no data to fill them with.
 fn build_invalid_va_hevc_pic() -> libva::PictureHEVC {
-    libva::PictureHEVC::new(
-        libva::constants::VA_INVALID_ID,
-        0,
-        libva::constants::VA_PICTURE_HEVC_INVALID,
-    )
+    libva::PictureHEVC::new(libva::VA_INVALID_ID, 0, libva::VA_PICTURE_HEVC_INVALID)
 }
 
 fn fill_va_hevc_pic<M: SurfaceMemoryDescriptor>(
@@ -221,7 +217,7 @@
     let mut flags = 0;
 
     if matches!(hevc_pic.reference(), Reference::LongTerm) {
-        flags |= libva::constants::VA_PICTURE_HEVC_LONG_TERM_REFERENCE;
+        flags |= libva::VA_PICTURE_HEVC_LONG_TERM_REFERENCE;
     }
 
     flags |= va_rps_flag(hevc_pic, rps);
@@ -775,7 +771,7 @@
         let slice_param = SliceParameterBufferHEVC::new(
             slice.nalu.size as u32,
             0,
-            libva::constants::VA_SLICE_DATA_FLAG_ALL,
+            libva::VA_SLICE_DATA_FLAG_ALL,
             (hdr.header_bit_size / 8) as _,
             hdr.segment_address,
             [ref_pic_list0, ref_pic_list1],
diff --git a/src/decoder/stateless/vp8.rs b/src/decoder/stateless/vp8.rs
index 2a2998c..1a88ea7 100644
--- a/src/decoder/stateless/vp8.rs
+++ b/src/decoder/stateless/vp8.rs
@@ -4,6 +4,8 @@
 
 #[cfg(any(test, fuzzing))]
 mod dummy;
+#[cfg(feature = "v4l2")]
+mod v4l2;
 #[cfg(feature = "vaapi")]
 mod vaapi;
 
diff --git a/src/decoder/stateless/vp8/v4l2.rs b/src/decoder/stateless/vp8/v4l2.rs
new file mode 100644
index 0000000..6484c1d
--- /dev/null
+++ b/src/decoder/stateless/vp8/v4l2.rs
@@ -0,0 +1,107 @@
+// Copyright 2024 The ChromiumOS Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+use std::cell::RefCell;
+use std::rc::Rc;
+
+use v4l2r::bindings::v4l2_ctrl_vp8_frame;
+use v4l2r::controls::SafeExtControl;
+
+use crate::backend::v4l2::decoder::stateless::BackendHandle;
+use crate::backend::v4l2::decoder::stateless::V4l2Picture;
+use crate::backend::v4l2::decoder::stateless::V4l2StatelessDecoderBackend;
+use crate::backend::v4l2::decoder::stateless::V4l2StatelessDecoderHandle;
+use crate::backend::v4l2::decoder::V4l2StreamInfo;
+
+use crate::codec::vp8::parser::Header;
+use crate::codec::vp8::parser::MbLfAdjustments;
+use crate::codec::vp8::parser::Segmentation;
+
+use crate::decoder::stateless::vp8::StatelessVp8DecoderBackend;
+use crate::decoder::stateless::vp8::Vp8;
+
+use crate::decoder::stateless::NewPictureError;
+use crate::decoder::stateless::NewPictureResult;
+use crate::decoder::stateless::StatelessBackendResult;
+use crate::decoder::stateless::StatelessDecoder;
+use crate::decoder::stateless::StatelessDecoderBackendPicture;
+use crate::decoder::BlockingMode;
+
+use crate::device::v4l2::stateless::controls::vp8::V4l2CtrlVp8FrameParams;
+
+use crate::Resolution;
+
+/// The number of frames to allocate for this codec. Same as GStreamer's vavp8dec.
+const NUM_FRAMES: usize = 7;
+
+impl V4l2StreamInfo for &Header {
+    fn min_num_frames(&self) -> usize {
+        NUM_FRAMES
+    }
+
+    fn coded_size(&self) -> Resolution {
+        Resolution::from((self.width as u32, self.height as u32))
+    }
+
+    fn visible_rect(&self) -> ((u32, u32), (u32, u32)) {
+        ((0, 0), self.coded_size().into())
+    }
+}
+
+impl StatelessDecoderBackendPicture<Vp8> for V4l2StatelessDecoderBackend {
+    type Picture = Rc<RefCell<V4l2Picture>>;
+}
+
+impl StatelessVp8DecoderBackend for V4l2StatelessDecoderBackend {
+    fn new_sequence(&mut self, _: &Header) -> StatelessBackendResult<()> {
+        Ok(())
+    }
+
+    fn new_picture(&mut self, timestamp: u64) -> NewPictureResult<Self::Picture> {
+        let request_buffer = match self.device.alloc_request(timestamp) {
+            Ok(buffer) => buffer,
+            _ => return Err(NewPictureError::OutOfOutputBuffers),
+        };
+        Ok(Rc::new(RefCell::new(V4l2Picture::new(request_buffer))))
+    }
+
+    fn submit_picture(
+        &mut self,
+        picture: Self::Picture,
+        hdr: &Header,
+        _: &Option<Self::Handle>,
+        _: &Option<Self::Handle>,
+        _: &Option<Self::Handle>,
+        _: &[u8],
+        segmentation: &Segmentation,
+        mb_lf_adjust: &MbLfAdjustments,
+    ) -> StatelessBackendResult<Self::Handle> {
+        let mut vp8_frame_params = V4l2CtrlVp8FrameParams::new();
+
+        vp8_frame_params
+            .set_loop_filter_params(hdr, mb_lf_adjust)
+            .set_quantization_params(hdr)
+            .set_segmentation_params(segmentation)
+            .set_entropy_params(hdr);
+
+        let handle = Rc::new(RefCell::new(BackendHandle {
+            picture: picture.clone(),
+        }));
+        println!(
+            "{:<20} {:?}\n",
+            "submit_picture",
+            picture.borrow().timestamp()
+        );
+        picture.borrow_mut().request().submit();
+        Ok(V4l2StatelessDecoderHandle { handle })
+    }
+}
+
+impl StatelessDecoder<Vp8, V4l2StatelessDecoderBackend> {
+    // Creates a new instance of the decoder using the v4l2 backend.
+    pub fn new_v4l2(blocking_mode: BlockingMode) -> Self {
+        Self::new(V4l2StatelessDecoderBackend::new(), blocking_mode)
+            .expect("Failed to create v4l2 stateless decoder backend")
+    }
+}
diff --git a/src/decoder/stateless/vp8/vaapi.rs b/src/decoder/stateless/vp8/vaapi.rs
index e4aa389..3e59a44 100644
--- a/src/decoder/stateless/vp8/vaapi.rs
+++ b/src/decoder/stateless/vp8/vaapi.rs
@@ -43,19 +43,19 @@
     }
 
     fn rt_format(&self) -> anyhow::Result<u32> {
-        Ok(libva::constants::VA_RT_FORMAT_YUV420)
+        Ok(libva::VA_RT_FORMAT_YUV420)
     }
 
     fn min_num_surfaces(&self) -> usize {
         NUM_SURFACES
     }
 
-    fn coded_size(&self) -> (u32, u32) {
-        (self.width as u32, self.height as u32)
+    fn coded_size(&self) -> Resolution {
+        Resolution::from((self.width as u32, self.height as u32))
     }
 
     fn visible_rect(&self) -> ((u32, u32), (u32, u32)) {
-        ((0, 0), self.coded_size())
+        ((0, 0), self.coded_size().into())
     }
 }
 
@@ -409,9 +409,9 @@
             &resolution,
             parser.segmentation(),
             parser.mb_lf_adjust(),
-            libva::constants::VA_INVALID_SURFACE,
-            libva::constants::VA_INVALID_SURFACE,
-            libva::constants::VA_INVALID_SURFACE,
+            libva::VA_INVALID_SURFACE,
+            libva::VA_INVALID_SURFACE,
+            libva::VA_INVALID_SURFACE,
         )
         .unwrap();
         let pic_param = match pic_param {
@@ -458,21 +458,15 @@
 
         assert_eq!(pic_param.inner().frame_width, 320);
         assert_eq!(pic_param.inner().frame_height, 240);
-        assert_eq!(
-            pic_param.inner().last_ref_frame,
-            libva::constants::VA_INVALID_SURFACE
-        );
+        assert_eq!(pic_param.inner().last_ref_frame, libva::VA_INVALID_SURFACE);
         assert_eq!(
             pic_param.inner().golden_ref_frame,
-            libva::constants::VA_INVALID_SURFACE
+            libva::VA_INVALID_SURFACE
         );
-        assert_eq!(
-            pic_param.inner().alt_ref_frame,
-            libva::constants::VA_INVALID_SURFACE
-        );
+        assert_eq!(pic_param.inner().alt_ref_frame, libva::VA_INVALID_SURFACE);
         assert_eq!(
             pic_param.inner().out_of_loop_frame,
-            libva::constants::VA_INVALID_SURFACE
+            libva::VA_INVALID_SURFACE
         );
 
         // Safe because this bitfield is initialized by the decoder.
@@ -593,7 +587,7 @@
         assert_eq!(pic_param.inner().alt_ref_frame, 0);
         assert_eq!(
             pic_param.inner().out_of_loop_frame,
-            libva::constants::VA_INVALID_SURFACE
+            libva::VA_INVALID_SURFACE
         );
 
         // Safe because this bitfield is initialized by the decoder.
@@ -707,7 +701,7 @@
         assert_eq!(pic_param.inner().alt_ref_frame, 0);
         assert_eq!(
             pic_param.inner().out_of_loop_frame,
-            libva::constants::VA_INVALID_SURFACE
+            libva::VA_INVALID_SURFACE
         );
 
         // Safe because this bitfield is initialized by the decoder.
diff --git a/src/decoder/stateless/vp9/vaapi.rs b/src/decoder/stateless/vp9/vaapi.rs
index 1212ca7..bd94079 100644
--- a/src/decoder/stateless/vp9/vaapi.rs
+++ b/src/decoder/stateless/vp9/vaapi.rs
@@ -34,6 +34,7 @@
 use crate::decoder::stateless::StatelessDecoder;
 use crate::decoder::stateless::StatelessDecoderBackendPicture;
 use crate::decoder::BlockingMode;
+use crate::Resolution;
 
 /// The number of surfaces to allocate for this codec.
 const NUM_SURFACES: usize = 12;
@@ -46,12 +47,12 @@
     subsampling_y: bool,
 ) -> anyhow::Result<u32> {
     match profile {
-        Profile::Profile0 => Ok(libva::constants::VA_RT_FORMAT_YUV420),
+        Profile::Profile0 => Ok(libva::VA_RT_FORMAT_YUV420),
         Profile::Profile1 => {
             if subsampling_x && !subsampling_y {
-                Ok(libva::constants::VA_RT_FORMAT_YUV422)
+                Ok(libva::VA_RT_FORMAT_YUV422)
             } else if !subsampling_x && !subsampling_y {
-                Ok(libva::constants::VA_RT_FORMAT_YUV444)
+                Ok(libva::VA_RT_FORMAT_YUV444)
             } else {
                 Err(anyhow!(
                     "Unsupported subsampling for profile 1: X: {:?} Y: {:?}",
@@ -65,8 +66,8 @@
                 "Unsupported bit depth for profile 2: {:?}",
                 bit_depth
             )),
-            BitDepth::Depth10 => Ok(libva::constants::VA_RT_FORMAT_YUV420_10),
-            BitDepth::Depth12 => Ok(libva::constants::VA_RT_FORMAT_YUV420_12),
+            BitDepth::Depth10 => Ok(libva::VA_RT_FORMAT_YUV420_10),
+            BitDepth::Depth12 => Ok(libva::VA_RT_FORMAT_YUV420_12),
         },
         Profile::Profile3 => {
             if subsampling_x && !subsampling_y {
@@ -77,8 +78,8 @@
                             subsampling_y,
                             bit_depth
                         )),
-                        BitDepth::Depth10 => Ok(libva::constants::VA_RT_FORMAT_YUV422_10),
-                        BitDepth::Depth12 => Ok(libva::constants::VA_RT_FORMAT_YUV422_12),
+                        BitDepth::Depth10 => Ok(libva::VA_RT_FORMAT_YUV422_10),
+                        BitDepth::Depth12 => Ok(libva::VA_RT_FORMAT_YUV422_12),
                     }
             } else if !subsampling_x && !subsampling_y {
                 match bit_depth {
@@ -88,8 +89,8 @@
                             subsampling_y,
                             bit_depth
                         )),
-                        BitDepth::Depth10 => Ok(libva::constants::VA_RT_FORMAT_YUV444_10),
-                        BitDepth::Depth12 => Ok(libva::constants::VA_RT_FORMAT_YUV444_12),
+                        BitDepth::Depth10 => Ok(libva::VA_RT_FORMAT_YUV444_10),
+                        BitDepth::Depth12 => Ok(libva::VA_RT_FORMAT_YUV444_12),
                     }
             } else {
                 Err(anyhow!(
@@ -126,12 +127,12 @@
         NUM_SURFACES
     }
 
-    fn coded_size(&self) -> (u32, u32) {
-        (self.width, self.height)
+    fn coded_size(&self) -> Resolution {
+        Resolution::from((self.width, self.height))
     }
 
     fn visible_rect(&self) -> ((u32, u32), (u32, u32)) {
-        ((0, 0), self.coded_size())
+        ((0, 0), self.coded_size().into())
     }
 }
 
@@ -232,7 +233,7 @@
         libva::SliceParameter::VP9(libva::SliceParameterBufferVP9::new(
             slice_size as u32,
             0,
-            libva::constants::VA_SLICE_DATA_FLAG_ALL,
+            libva::VA_SLICE_DATA_FLAG_ALL,
             seg_params,
         )),
     ))
@@ -504,11 +505,8 @@
 
         assert_eq!(frame.as_ref().len(), 10674);
 
-        let pic_param = build_pic_param(
-            &frame.header,
-            [libva::constants::VA_INVALID_SURFACE; NUM_REF_FRAMES],
-        )
-        .unwrap();
+        let pic_param =
+            build_pic_param(&frame.header, [libva::VA_INVALID_SURFACE; NUM_REF_FRAMES]).unwrap();
         let pic_param = match pic_param {
             BufferType::PictureParameter(PictureParameter::VP9(pic_param)) => pic_param,
             _ => panic!(),
@@ -525,7 +523,7 @@
         assert_eq!(pic_param.inner().frame_height, 240);
         assert_eq!(
             pic_param.inner().reference_frames,
-            [libva::constants::VA_INVALID_SURFACE; NUM_REF_FRAMES]
+            [libva::VA_INVALID_SURFACE; NUM_REF_FRAMES]
         );
 
         // Safe because this bitfield is initialized by the decoder.
@@ -552,7 +550,7 @@
         assert_eq!(slice_param.inner().slice_data_offset, 0);
         assert_eq!(
             slice_param.inner().slice_data_flag,
-            libva::constants::VA_SLICE_DATA_FLAG_ALL
+            libva::VA_SLICE_DATA_FLAG_ALL
         );
 
         for seg_param in &slice_param.inner().seg_param {
@@ -613,7 +611,7 @@
         assert_eq!(slice_param.inner().slice_data_offset, 0);
         assert_eq!(
             slice_param.inner().slice_data_flag,
-            libva::constants::VA_SLICE_DATA_FLAG_ALL
+            libva::VA_SLICE_DATA_FLAG_ALL
         );
 
         for seg_param in &slice_param.inner().seg_param {
@@ -675,7 +673,7 @@
         assert_eq!(slice_param.inner().slice_data_offset, 0);
         assert_eq!(
             slice_param.inner().slice_data_flag,
-            libva::constants::VA_SLICE_DATA_FLAG_ALL
+            libva::VA_SLICE_DATA_FLAG_ALL
         );
 
         for seg_param in &slice_param.inner().seg_param {
diff --git a/src/device.rs b/src/device.rs
new file mode 100644
index 0000000..e82a330
--- /dev/null
+++ b/src/device.rs
@@ -0,0 +1,6 @@
+// Copyright 2024 The ChromiumOS Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#[cfg(feature = "v4l2")]
+pub mod v4l2;
diff --git a/src/device/v4l2.rs b/src/device/v4l2.rs
new file mode 100644
index 0000000..9ce03c2
--- /dev/null
+++ b/src/device/v4l2.rs
@@ -0,0 +1,5 @@
+// Copyright 2024 The ChromiumOS Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+pub mod stateless;
diff --git a/src/device/v4l2/stateless.rs b/src/device/v4l2/stateless.rs
new file mode 100644
index 0000000..0a6a11b
--- /dev/null
+++ b/src/device/v4l2/stateless.rs
@@ -0,0 +1,8 @@
+// Copyright 2024 The ChromiumOS Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+pub mod controls;
+pub mod device;
+pub mod queue;
+pub mod request;
diff --git a/src/device/v4l2/stateless/controls.rs b/src/device/v4l2/stateless/controls.rs
new file mode 100644
index 0000000..2aec394
--- /dev/null
+++ b/src/device/v4l2/stateless/controls.rs
@@ -0,0 +1,6 @@
+// Copyright 2024 The ChromiumOS Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+pub mod h264;
+pub mod vp8;
diff --git a/src/device/v4l2/stateless/controls/h264.rs b/src/device/v4l2/stateless/controls/h264.rs
new file mode 100644
index 0000000..f29f7ab
--- /dev/null
+++ b/src/device/v4l2/stateless/controls/h264.rs
@@ -0,0 +1,282 @@
+// Copyright 2024 The ChromiumOS Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+use crate::codec::h264::parser::Pps;
+use crate::codec::h264::parser::SliceHeader;
+use crate::codec::h264::parser::Sps;
+use crate::codec::h264::picture::Field;
+use crate::codec::h264::picture::IsIdr;
+use crate::codec::h264::picture::PictureData;
+use crate::codec::h264::picture::RcPictureData;
+use crate::codec::h264::picture::Reference;
+
+use v4l2r::bindings::v4l2_ctrl_h264_decode_params;
+use v4l2r::bindings::v4l2_ctrl_h264_pps;
+use v4l2r::bindings::v4l2_ctrl_h264_scaling_matrix;
+use v4l2r::bindings::v4l2_ctrl_h264_sps;
+use v4l2r::bindings::v4l2_h264_dpb_entry;
+use v4l2r::bindings::v4l2_stateless_h264_decode_mode_V4L2_STATELESS_H264_DECODE_MODE_FRAME_BASED as V4L2_STATELESS_H264_DECODE_MODE_FRAME_BASED;
+use v4l2r::bindings::v4l2_stateless_h264_decode_mode_V4L2_STATELESS_H264_DECODE_MODE_SLICE_BASED as V4L2_STATELESS_H264_DECODE_MODE_SLICE_BASED;
+use v4l2r::bindings::V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD;
+use v4l2r::bindings::V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC;
+use v4l2r::bindings::V4L2_H264_DECODE_PARAM_FLAG_IDR_PIC;
+use v4l2r::bindings::V4L2_H264_DPB_ENTRY_FLAG_ACTIVE;
+use v4l2r::bindings::V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM;
+use v4l2r::bindings::V4L2_H264_DPB_ENTRY_FLAG_VALID;
+use v4l2r::bindings::V4L2_H264_FRAME_REF;
+use v4l2r::bindings::V4L2_H264_PPS_FLAG_BOTTOM_FIELD_PIC_ORDER_IN_FRAME_PRESENT;
+use v4l2r::bindings::V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED;
+use v4l2r::bindings::V4L2_H264_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT;
+use v4l2r::bindings::V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE;
+use v4l2r::bindings::V4L2_H264_PPS_FLAG_REDUNDANT_PIC_CNT_PRESENT;
+use v4l2r::bindings::V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT;
+use v4l2r::bindings::V4L2_H264_PPS_FLAG_TRANSFORM_8X8_MODE;
+use v4l2r::bindings::V4L2_H264_PPS_FLAG_WEIGHTED_PRED;
+use v4l2r::bindings::V4L2_H264_SPS_CONSTRAINT_SET0_FLAG;
+use v4l2r::bindings::V4L2_H264_SPS_CONSTRAINT_SET1_FLAG;
+use v4l2r::bindings::V4L2_H264_SPS_CONSTRAINT_SET2_FLAG;
+use v4l2r::bindings::V4L2_H264_SPS_CONSTRAINT_SET3_FLAG;
+use v4l2r::bindings::V4L2_H264_SPS_CONSTRAINT_SET4_FLAG;
+use v4l2r::bindings::V4L2_H264_SPS_CONSTRAINT_SET5_FLAG;
+use v4l2r::bindings::V4L2_H264_SPS_FLAG_DELTA_PIC_ORDER_ALWAYS_ZERO;
+use v4l2r::bindings::V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE;
+use v4l2r::bindings::V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY;
+use v4l2r::bindings::V4L2_H264_SPS_FLAG_GAPS_IN_FRAME_NUM_VALUE_ALLOWED;
+use v4l2r::bindings::V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD;
+use v4l2r::bindings::V4L2_H264_SPS_FLAG_QPPRIME_Y_ZERO_TRANSFORM_BYPASS;
+use v4l2r::bindings::V4L2_H264_SPS_FLAG_SEPARATE_COLOUR_PLANE;
+use v4l2r::controls::codec::H264DecodeMode;
+use v4l2r::controls::codec::H264DecodeParams;
+use v4l2r::controls::codec::H264ScalingMatrix;
+use v4l2r::controls::SafeExtControl;
+
+impl From<&Sps> for v4l2_ctrl_h264_sps {
+    fn from(sps: &Sps) -> Self {
+        let mut constraint_set_flags: u32 = 0;
+        if sps.constraint_set0_flag {
+            constraint_set_flags |= V4L2_H264_SPS_CONSTRAINT_SET0_FLAG;
+        }
+        if sps.constraint_set1_flag {
+            constraint_set_flags |= V4L2_H264_SPS_CONSTRAINT_SET1_FLAG;
+        }
+        if sps.constraint_set2_flag {
+            constraint_set_flags |= V4L2_H264_SPS_CONSTRAINT_SET2_FLAG;
+        }
+        if sps.constraint_set3_flag {
+            constraint_set_flags |= V4L2_H264_SPS_CONSTRAINT_SET3_FLAG;
+        }
+        if sps.constraint_set4_flag {
+            constraint_set_flags |= V4L2_H264_SPS_CONSTRAINT_SET4_FLAG;
+        }
+        if sps.constraint_set5_flag {
+            constraint_set_flags |= V4L2_H264_SPS_CONSTRAINT_SET5_FLAG;
+        }
+        let mut flags: u32 = 0;
+        if sps.separate_colour_plane_flag {
+            flags |= V4L2_H264_SPS_FLAG_SEPARATE_COLOUR_PLANE;
+        }
+        if sps.qpprime_y_zero_transform_bypass_flag {
+            flags |= V4L2_H264_SPS_FLAG_QPPRIME_Y_ZERO_TRANSFORM_BYPASS;
+        }
+        if sps.delta_pic_order_always_zero_flag {
+            flags |= V4L2_H264_SPS_FLAG_DELTA_PIC_ORDER_ALWAYS_ZERO;
+        }
+        if sps.gaps_in_frame_num_value_allowed_flag {
+            flags |= V4L2_H264_SPS_FLAG_GAPS_IN_FRAME_NUM_VALUE_ALLOWED;
+        }
+        if sps.frame_mbs_only_flag {
+            flags |= V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY;
+        }
+        if sps.mb_adaptive_frame_field_flag {
+            flags |= V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD;
+        }
+        if sps.direct_8x8_inference_flag {
+            flags |= V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE;
+        }
+        Self {
+            profile_idc: sps.profile_idc,
+            constraint_set_flags: constraint_set_flags as u8,
+            level_idc: sps.level_idc as u8,
+            seq_parameter_set_id: sps.seq_parameter_set_id,
+            chroma_format_idc: sps.chroma_format_idc,
+            bit_depth_luma_minus8: sps.bit_depth_luma_minus8,
+            bit_depth_chroma_minus8: sps.bit_depth_chroma_minus8,
+            log2_max_frame_num_minus4: sps.log2_max_frame_num_minus4,
+            pic_order_cnt_type: sps.pic_order_cnt_type,
+            log2_max_pic_order_cnt_lsb_minus4: sps.log2_max_pic_order_cnt_lsb_minus4,
+            max_num_ref_frames: sps.max_num_ref_frames as u8,
+            num_ref_frames_in_pic_order_cnt_cycle: sps.num_ref_frames_in_pic_order_cnt_cycle,
+            offset_for_ref_frame: sps.offset_for_ref_frame,
+            offset_for_non_ref_pic: sps.offset_for_non_ref_pic,
+            offset_for_top_to_bottom_field: sps.offset_for_top_to_bottom_field,
+            pic_width_in_mbs_minus1: sps.pic_width_in_mbs_minus1 as u16,
+            pic_height_in_map_units_minus1: sps.pic_height_in_map_units_minus1 as u16,
+            flags,
+            ..Default::default()
+        }
+    }
+}
+
+impl From<&Pps> for v4l2_ctrl_h264_pps {
+    fn from(pps: &Pps) -> Self {
+        let mut flags: u32 = 0;
+        if pps.entropy_coding_mode_flag {
+            flags |= V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE;
+        }
+        if pps.bottom_field_pic_order_in_frame_present_flag {
+            flags |= V4L2_H264_PPS_FLAG_BOTTOM_FIELD_PIC_ORDER_IN_FRAME_PRESENT;
+        }
+        if pps.weighted_pred_flag {
+            flags |= V4L2_H264_PPS_FLAG_WEIGHTED_PRED;
+        }
+        if pps.deblocking_filter_control_present_flag {
+            flags |= V4L2_H264_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT;
+        }
+        if pps.constrained_intra_pred_flag {
+            flags |= V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED;
+        }
+        if pps.redundant_pic_cnt_present_flag {
+            flags |= V4L2_H264_PPS_FLAG_REDUNDANT_PIC_CNT_PRESENT;
+        }
+        if pps.transform_8x8_mode_flag {
+            flags |= V4L2_H264_PPS_FLAG_TRANSFORM_8X8_MODE;
+        }
+        if pps.pic_scaling_matrix_present_flag {
+            flags |= V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT;
+        }
+        Self {
+            pic_parameter_set_id: pps.pic_parameter_set_id,
+            seq_parameter_set_id: pps.seq_parameter_set_id,
+            num_slice_groups_minus1: pps.num_slice_groups_minus1 as u8,
+            num_ref_idx_l0_default_active_minus1: pps.num_ref_idx_l0_default_active_minus1,
+            num_ref_idx_l1_default_active_minus1: pps.num_ref_idx_l1_default_active_minus1,
+            weighted_bipred_idc: pps.weighted_bipred_idc,
+            pic_init_qp_minus26: pps.pic_init_qp_minus26,
+            pic_init_qs_minus26: pps.pic_init_qs_minus26,
+            chroma_qp_index_offset: pps.chroma_qp_index_offset,
+            second_chroma_qp_index_offset: pps.second_chroma_qp_index_offset,
+            flags: flags as u16,
+            ..Default::default()
+        }
+    }
+}
+
+pub struct V4l2CtrlH264DpbEntry {
+    pub timestamp: u64,
+    pub pic: RcPictureData,
+}
+
+impl From<&V4l2CtrlH264DpbEntry> for v4l2_h264_dpb_entry {
+    fn from(dpb: &V4l2CtrlH264DpbEntry) -> Self {
+        let pic: &PictureData = &dpb.pic.borrow();
+        // TODO     DCHECK_EQ(pic->field, H264Picture::FIELD_NONE)
+        // TODO         << "Interlacing not supported";
+
+        let (frame_num, pic_num): (u16, u32) = match pic.reference() {
+            Reference::LongTerm => (pic.long_term_pic_num as u16, pic.long_term_frame_idx),
+            _ => (pic.frame_num as u16, pic.pic_num as u32),
+        };
+
+        let mut flags: u32 = V4L2_H264_DPB_ENTRY_FLAG_VALID;
+        if pic.nal_ref_idc != 0 {
+            flags |= V4L2_H264_DPB_ENTRY_FLAG_ACTIVE;
+        }
+        if matches!(pic.reference(), Reference::LongTerm) {
+            flags |= V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM;
+        }
+
+        Self {
+            reference_ts: dpb.timestamp * 1000, // usec to nsec
+            frame_num,
+            pic_num,
+            fields: V4L2_H264_FRAME_REF as u8,
+            top_field_order_cnt: pic.top_field_order_cnt,
+            bottom_field_order_cnt: pic.bottom_field_order_cnt,
+            flags,
+            ..Default::default()
+        }
+    }
+}
+
+#[derive(Default)]
+pub struct V4l2CtrlH264ScalingMatrix {
+    handle: v4l2_ctrl_h264_scaling_matrix,
+}
+
+impl V4l2CtrlH264ScalingMatrix {
+    pub fn new() -> Self {
+        Default::default()
+    }
+    pub fn set(&mut self) -> &mut Self {
+        todo!()
+    }
+}
+
+impl From<&V4l2CtrlH264ScalingMatrix> for SafeExtControl<H264ScalingMatrix> {
+    fn from(scaling_matrix: &V4l2CtrlH264ScalingMatrix) -> Self {
+        SafeExtControl::<H264ScalingMatrix>::from(scaling_matrix.handle)
+    }
+}
+
+#[derive(Default)]
+pub struct V4l2CtrlH264DecodeParams {
+    handle: v4l2_ctrl_h264_decode_params,
+}
+
+impl V4l2CtrlH264DecodeParams {
+    pub fn new() -> Self {
+        Default::default()
+    }
+    pub fn set_picture_data(&mut self, pic: &PictureData) -> &mut Self {
+        self.handle.top_field_order_cnt = pic.top_field_order_cnt;
+        self.handle.bottom_field_order_cnt = pic.bottom_field_order_cnt;
+        self.handle.flags |= match pic.field {
+            Field::Top => V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC,
+            Field::Bottom => {
+                V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC | V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD
+            }
+            _ => 0,
+        };
+        self.handle.flags |= match pic.is_idr {
+            IsIdr::Yes { idr_pic_id: _ } => V4L2_H264_DECODE_PARAM_FLAG_IDR_PIC,
+            _ => 0,
+        };
+        self.handle.nal_ref_idc = pic.nal_ref_idc as u16;
+        self
+    }
+    pub fn set_dpb_entries(&mut self, dpb: Vec<V4l2CtrlH264DpbEntry>) -> &mut Self {
+        for i in 0..dpb.len() {
+            self.handle.dpb[i] = v4l2_h264_dpb_entry::from(&dpb[i]);
+        }
+        self
+    }
+    pub fn set_slice_header(&mut self, slice_header: &SliceHeader) -> &mut Self {
+        self.handle.frame_num = slice_header.frame_num;
+        self.handle.idr_pic_id = slice_header.idr_pic_id;
+        self.handle.pic_order_cnt_lsb = slice_header.pic_order_cnt_lsb;
+        self.handle.delta_pic_order_cnt_bottom = slice_header.delta_pic_order_cnt_bottom;
+        self.handle.delta_pic_order_cnt0 = slice_header.delta_pic_order_cnt[0];
+        self.handle.delta_pic_order_cnt1 = slice_header.delta_pic_order_cnt[1];
+        self.handle.dec_ref_pic_marking_bit_size = slice_header.dec_ref_pic_marking_bit_size as u32;
+        self.handle.pic_order_cnt_bit_size = slice_header.pic_order_cnt_bit_size as u32;
+        self
+    }
+}
+
+impl From<&V4l2CtrlH264DecodeParams> for SafeExtControl<H264DecodeParams> {
+    fn from(decode_params: &V4l2CtrlH264DecodeParams) -> Self {
+        SafeExtControl::<H264DecodeParams>::from(decode_params.handle)
+    }
+}
+
+pub enum V4l2CtrlH264DecodeMode {
+    SliceBased = V4L2_STATELESS_H264_DECODE_MODE_SLICE_BASED as isize,
+    FrameBased = V4L2_STATELESS_H264_DECODE_MODE_FRAME_BASED as isize,
+}
+
+impl From<V4l2CtrlH264DecodeMode> for SafeExtControl<H264DecodeMode> {
+    fn from(decode_mode: V4l2CtrlH264DecodeMode) -> Self {
+        SafeExtControl::<H264DecodeMode>::from_value(decode_mode as i32)
+    }
+}
diff --git a/src/device/v4l2/stateless/controls/vp8.rs b/src/device/v4l2/stateless/controls/vp8.rs
new file mode 100644
index 0000000..44e9523
--- /dev/null
+++ b/src/device/v4l2/stateless/controls/vp8.rs
@@ -0,0 +1,128 @@
+// Copyright 2024 The ChromiumOS Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+use crate::codec::vp8::parser::Header;
+use crate::codec::vp8::parser::MbLfAdjustments;
+use crate::codec::vp8::parser::Segmentation;
+
+use v4l2r::bindings::v4l2_ctrl_vp8_frame;
+use v4l2r::bindings::V4L2_VP8_COEFF_PROB_CNT;
+use v4l2r::bindings::V4L2_VP8_FRAME_FLAG_EXPERIMENTAL;
+use v4l2r::bindings::V4L2_VP8_FRAME_FLAG_KEY_FRAME;
+use v4l2r::bindings::V4L2_VP8_FRAME_FLAG_MB_NO_SKIP_COEFF;
+use v4l2r::bindings::V4L2_VP8_FRAME_FLAG_SHOW_FRAME;
+use v4l2r::bindings::V4L2_VP8_FRAME_FLAG_SIGN_BIAS_ALT;
+use v4l2r::bindings::V4L2_VP8_FRAME_FLAG_SIGN_BIAS_GOLDEN;
+use v4l2r::bindings::V4L2_VP8_LF_ADJ_ENABLE;
+use v4l2r::bindings::V4L2_VP8_LF_DELTA_UPDATE;
+use v4l2r::bindings::V4L2_VP8_LF_FILTER_TYPE_SIMPLE;
+use v4l2r::bindings::V4L2_VP8_MV_PROB_CNT;
+use v4l2r::bindings::V4L2_VP8_SEGMENT_FLAG_DELTA_VALUE_MODE;
+use v4l2r::bindings::V4L2_VP8_SEGMENT_FLAG_ENABLED;
+use v4l2r::bindings::V4L2_VP8_SEGMENT_FLAG_UPDATE_FEATURE_DATA;
+use v4l2r::bindings::V4L2_VP8_SEGMENT_FLAG_UPDATE_MAP;
+
+use v4l2r::controls::codec::Vp8Frame;
+use v4l2r::controls::SafeExtControl;
+
+#[derive(Default)]
+pub struct V4l2CtrlVp8FrameParams {
+    handle: v4l2_ctrl_vp8_frame,
+}
+
+impl V4l2CtrlVp8FrameParams {
+    pub fn new() -> Self {
+        Default::default()
+    }
+
+    pub fn set_loop_filter_params(
+        &mut self,
+        hdr: &Header,
+        mb_lf_adjust: &MbLfAdjustments,
+    ) -> &mut Self {
+        self.handle.lf.sharpness_level = hdr.sharpness_level;
+        self.handle.lf.level = hdr.loop_filter_level;
+
+        let mut flags: u32 = 0;
+        if hdr.filter_type {
+            flags |= V4L2_VP8_LF_FILTER_TYPE_SIMPLE;
+        }
+        if mb_lf_adjust.loop_filter_adj_enable {
+            flags |= V4L2_VP8_LF_ADJ_ENABLE;
+        }
+        if mb_lf_adjust.mode_ref_lf_delta_update {
+            flags |= V4L2_VP8_LF_DELTA_UPDATE;
+        }
+        self.handle.lf.flags = flags;
+
+        for i in 0..4 {
+            self.handle.lf.ref_frm_delta[i] = mb_lf_adjust.ref_frame_delta[i];
+            self.handle.lf.mb_mode_delta[i] = mb_lf_adjust.mb_mode_delta[i];
+        }
+
+        self
+    }
+
+    pub fn set_quantization_params(&mut self, hdr: &Header) -> &mut Self {
+        self.handle.quant.y_ac_qi =
+            u8::try_from(hdr.quant_indices.y_ac_qi).expect("Value out of range for u8");
+
+        self.handle.quant.y_dc_delta =
+            i8::try_from(hdr.quant_indices.y_dc_delta).expect("Value out of range for u8");
+        self.handle.quant.y2_dc_delta =
+            i8::try_from(hdr.quant_indices.y2_dc_delta).expect("Value out of range for u8");
+        self.handle.quant.y2_ac_delta =
+            i8::try_from(hdr.quant_indices.y2_ac_delta).expect("Value out of range for u8");
+        self.handle.quant.uv_dc_delta =
+            i8::try_from(hdr.quant_indices.uv_dc_delta).expect("Value out of range for u8");
+        self.handle.quant.uv_ac_delta =
+            i8::try_from(hdr.quant_indices.uv_ac_delta).expect("Value out of range for u8");
+        self
+    }
+
+    pub fn set_segmentation_params(&mut self, segmentation: &Segmentation) -> &mut Self {
+        let mut flags: u32 = 0;
+
+        if segmentation.segmentation_enabled {
+            flags |= V4L2_VP8_SEGMENT_FLAG_ENABLED;
+        }
+        if segmentation.update_mb_segmentation_map {
+            flags |= V4L2_VP8_SEGMENT_FLAG_UPDATE_MAP;
+        }
+        if segmentation.update_segment_feature_data {
+            flags |= V4L2_VP8_SEGMENT_FLAG_UPDATE_FEATURE_DATA;
+        }
+        if segmentation.segment_feature_mode == false {
+            flags |= V4L2_VP8_SEGMENT_FLAG_DELTA_VALUE_MODE;
+        }
+        self.handle.segment.flags = flags;
+
+        for i in 0..4 {
+            self.handle.segment.quant_update[i] = segmentation.quantizer_update_value[i];
+            self.handle.segment.lf_update[i] = segmentation.lf_update_value[i];
+        }
+
+        for i in 0..3 {
+            self.handle.segment.segment_probs[i] = segmentation.segment_prob[i];
+        }
+
+        self.handle.segment.padding = 0;
+
+        self
+    }
+
+    pub fn set_entropy_params(&mut self, hdr: &Header) -> &mut Self {
+        self.handle.entropy.coeff_probs = hdr.coeff_prob;
+        self.handle.entropy.y_mode_probs = hdr.mode_probs.intra_16x16_prob;
+        self.handle.entropy.uv_mode_probs = hdr.mode_probs.intra_chroma_prob;
+        self.handle.entropy.mv_probs = hdr.mv_prob;
+        self
+    }
+}
+
+impl From<&V4l2CtrlVp8FrameParams> for SafeExtControl<Vp8Frame> {
+    fn from(decode_params: &V4l2CtrlVp8FrameParams) -> Self {
+        SafeExtControl::<Vp8Frame>::from(decode_params.handle)
+    }
+}
diff --git a/src/device/v4l2/stateless/device.rs b/src/device/v4l2/stateless/device.rs
new file mode 100644
index 0000000..4d5661a
--- /dev/null
+++ b/src/device/v4l2/stateless/device.rs
@@ -0,0 +1,150 @@
+// Copyright 2024 The ChromiumOS Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+use crate::decoder::stateless::DecodeError;
+use crate::device::v4l2::stateless::queue::V4l2CaptureBuffer;
+use crate::device::v4l2::stateless::queue::V4l2CaptureQueue;
+use crate::device::v4l2::stateless::queue::V4l2OutputBuffer;
+use crate::device::v4l2::stateless::queue::V4l2OutputQueue;
+use crate::device::v4l2::stateless::request::V4l2Request;
+use crate::Resolution;
+
+use std::cell::RefCell;
+use std::collections::HashMap;
+use std::os::fd::AsRawFd;
+use std::os::fd::RawFd;
+use std::path::Path;
+use std::rc::Rc;
+use std::sync::Arc;
+
+use v4l2r::device::Device as VideoDevice;
+use v4l2r::device::DeviceConfig;
+use v4l2r::ioctl;
+use v4l2r::nix::fcntl::open;
+use v4l2r::nix::fcntl::OFlag;
+use v4l2r::nix::sys::stat::Mode;
+
+//TODO: handle memory backends other than mmap
+//TODO: handle video formats other than h264
+//TODO: handle queue start/stop at runtime
+//TODO: handle DRC at runtime
+struct DeviceHandle {
+    video_device: Arc<VideoDevice>,
+    media_device: RawFd,
+    output_queue: V4l2OutputQueue,
+    capture_queue: V4l2CaptureQueue,
+    capture_buffers: HashMap<u64, V4l2CaptureBuffer>,
+}
+
+impl DeviceHandle {
+    fn new() -> Self {
+        // TODO: pass video device path and config via function arguments
+        let video_device_path = Path::new("/dev/video-dec0");
+        let video_device_config = DeviceConfig::new().non_blocking_dqbuf();
+        let video_device = Arc::new(
+            VideoDevice::open(video_device_path, video_device_config)
+                .expect("Failed to open video device"),
+        );
+        // TODO: probe capabilties to find releted media device path
+        let media_device_path = Path::new("/dev/media-dec0");
+        let media_device = open(
+            media_device_path,
+            OFlag::O_RDWR | OFlag::O_CLOEXEC,
+            Mode::empty(),
+        )
+        .unwrap_or_else(|_| panic!("Cannot open {}", media_device_path.display()));
+        // TODO: handle custom configuration
+        const NUM_OUTPUT_BUFFERS: u32 = 8;
+        const NUM_CAPTURE_BUFFERS: u32 = 8;
+        let output_queue = V4l2OutputQueue::new(video_device.clone(), NUM_OUTPUT_BUFFERS);
+        let capture_queue = V4l2CaptureQueue::new(video_device.clone(), NUM_CAPTURE_BUFFERS);
+        Self {
+            video_device,
+            media_device,
+            output_queue,
+            capture_queue,
+            capture_buffers: HashMap::<u64, V4l2CaptureBuffer>::new(),
+        }
+    }
+    fn alloc_request(&self) -> ioctl::Request {
+        ioctl::Request::alloc(&self.media_device).expect("Failed to alloc request handle")
+    }
+    fn alloc_buffer(&self) -> Result<V4l2OutputBuffer, DecodeError> {
+        self.output_queue.alloc_buffer()
+    }
+    fn sync(&mut self, timestamp: u64) -> V4l2CaptureBuffer {
+        // TODO: handle synced buffers internally by capture queue
+        loop {
+            match self.capture_buffers.remove(&timestamp) {
+                Some(buffer) => return buffer,
+                _ => self.recycle_buffers(), // TODO: poll/select
+            };
+        }
+    }
+    fn recycle_buffers(&mut self) {
+        self.output_queue.drain();
+        // TODO: handle synced buffers internally by capture queue
+        loop {
+            match self.capture_queue.dequeue_buffer() {
+                Some(buffer) => {
+                    self.capture_buffers.insert(buffer.timestamp(), buffer);
+                }
+                _ => break,
+            }
+        }
+        self.capture_queue.refill();
+    }
+}
+
+#[derive(Clone)]
+pub struct V4l2Device {
+    handle: Rc<RefCell<DeviceHandle>>,
+}
+
+impl V4l2Device {
+    pub fn new() -> Self {
+        Self {
+            handle: Rc::new(RefCell::new(DeviceHandle::new())),
+        }
+    }
+    pub fn num_free_buffers(&self) -> usize {
+        self.handle.borrow().output_queue.num_free_buffers()
+    }
+    pub fn num_buffers(&self) -> usize {
+        self.handle.borrow().output_queue.num_buffers()
+    }
+    pub fn set_resolution(&mut self, resolution: Resolution) -> &mut Self {
+        self.handle
+            .borrow_mut()
+            .output_queue
+            .set_resolution(resolution);
+        self.handle
+            .borrow_mut()
+            .capture_queue
+            .set_resolution(resolution);
+        self
+    }
+    pub fn alloc_request(&self, timestamp: u64) -> Result<V4l2Request, DecodeError> {
+        let output_buffer = self.handle.borrow().alloc_buffer()?;
+
+        Ok(V4l2Request::new(
+            self.clone(),
+            timestamp,
+            self.handle.borrow().alloc_request(),
+            output_buffer,
+        ))
+    }
+    pub fn sync(&self, timestamp: u64) -> V4l2CaptureBuffer {
+        self.handle.borrow_mut().sync(timestamp)
+    }
+    pub fn recycle_buffers(&self) {
+        self.handle.borrow_mut().recycle_buffers()
+    }
+}
+
+impl AsRawFd for V4l2Device {
+    fn as_raw_fd(&self) -> i32 {
+        self.handle.borrow().video_device.as_raw_fd()
+    }
+}
diff --git a/src/device/v4l2/stateless/queue.rs b/src/device/v4l2/stateless/queue.rs
new file mode 100644
index 0000000..966700c
--- /dev/null
+++ b/src/device/v4l2/stateless/queue.rs
@@ -0,0 +1,338 @@
+// Copyright 2024 The ChromiumOS Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+use anyhow::anyhow;
+use std::cell::RefCell;
+use std::rc::Rc;
+use std::sync::Arc;
+
+use v4l2r::bindings::v4l2_format;
+use v4l2r::device::queue::direction::Capture;
+use v4l2r::device::queue::direction::Output;
+use v4l2r::device::queue::dqbuf::DqBuffer;
+use v4l2r::device::queue::qbuf::QBuffer;
+use v4l2r::device::queue::BuffersAllocated;
+use v4l2r::device::queue::GetFreeCaptureBuffer;
+use v4l2r::device::queue::GetFreeOutputBuffer;
+use v4l2r::device::queue::Queue;
+use v4l2r::device::queue::QueueInit;
+use v4l2r::device::AllocatedQueue;
+use v4l2r::device::Device;
+use v4l2r::device::Stream;
+use v4l2r::device::TryDequeue;
+use v4l2r::memory::MemoryType;
+use v4l2r::memory::MmapHandle;
+use v4l2r::nix::sys::time::TimeVal;
+use v4l2r::Format;
+use v4l2r::PixelFormat;
+use v4l2r::PlaneLayout;
+
+use crate::decoder::stateless::DecodeError;
+use crate::Resolution;
+
+//TODO: handle memory backends other than mmap
+pub struct V4l2OutputBuffer {
+    queue: V4l2OutputQueue,
+    handle: QBuffer<
+        Output,
+        Vec<MmapHandle>,
+        Vec<MmapHandle>,
+        Rc<Queue<Output, BuffersAllocated<Vec<MmapHandle>>>>,
+    >,
+    length: usize,
+}
+
+impl V4l2OutputBuffer {
+    fn new(
+        queue: V4l2OutputQueue,
+        handle: QBuffer<
+            Output,
+            Vec<MmapHandle>,
+            Vec<MmapHandle>,
+            Rc<Queue<Output, BuffersAllocated<Vec<MmapHandle>>>>,
+        >,
+    ) -> Self {
+        Self {
+            queue,
+            handle,
+            length: 0,
+        }
+    }
+    pub fn index(&self) -> usize {
+        self.handle.index()
+    }
+    pub fn length(&self) -> usize {
+        self.length
+    }
+    pub fn write(&mut self, data: &[u8]) -> &mut Self {
+        let mut mapping = self
+            .handle
+            .get_plane_mapping(0)
+            .expect("Failed to mmap output buffer");
+
+        mapping.as_mut()[self.length..self.length + 3].copy_from_slice(&[0, 0, 1]);
+        self.length += 3;
+
+        mapping.as_mut()[self.length..self.length + data.len()].copy_from_slice(data);
+        self.length += data.len();
+
+        drop(mapping);
+        self
+    }
+    pub fn submit(self, timestamp: u64, request_fd: i32) {
+        let handle = &*self.queue.handle.borrow();
+        let queue = match handle {
+            V4l2OutputQueueHandle::Streaming(queue) => queue,
+            _ => panic!("ERROR"),
+        };
+        self.handle
+            .set_timestamp(TimeVal::new(/* FIXME: sec */ 0, timestamp as i64))
+            .set_request(request_fd)
+            .queue(&[self.length])
+            .expect("Failed to queue output buffer");
+    }
+}
+
+//TODO: handle memory backends other than mmap
+//TODO: handle video formats other than h264
+//TODO: handle queue start/stop at runtime
+//TODO: handle DRC at runtime
+#[derive(Default)]
+enum V4l2OutputQueueHandle {
+    Init(Queue<Output, QueueInit>),
+    Streaming(Rc<Queue<Output, BuffersAllocated<Vec<MmapHandle>>>>),
+    #[default]
+    Unknown,
+}
+
+#[derive(Clone)]
+pub struct V4l2OutputQueue {
+    handle: Rc<RefCell<V4l2OutputQueueHandle>>,
+    num_buffers: u32,
+}
+
+impl V4l2OutputQueue {
+    pub fn new(device: Arc<Device>, num_buffers: u32) -> Self {
+        let handle = Queue::get_output_mplane_queue(device).expect("Failed to get output queue");
+        log::debug!("Output queue:\n\tstate: None -> Init\n");
+        let handle = Rc::new(RefCell::new(V4l2OutputQueueHandle::Init(handle)));
+        Self {
+            handle,
+            num_buffers,
+        }
+    }
+    pub fn set_resolution(&mut self, res: Resolution) -> &mut Self {
+        self.handle.replace(match self.handle.take() {
+            V4l2OutputQueueHandle::Init(mut handle) => {
+                let (width, height) = res.into();
+
+                handle
+                    .change_format()
+                    .expect("Failed to change output format")
+                    .set_size(width as usize, height as usize)
+                    .set_pixelformat(PixelFormat::from_fourcc(b"S264"))
+                    // 1 MB per decoding unit should be enough for most streams.
+                    .set_planes_layout(vec![PlaneLayout {
+                        sizeimage: 1024 * 1024,
+                        ..Default::default()
+                    }])
+                    .apply::<v4l2_format>()
+                    .expect("Failed to apply output format");
+
+                let format: Format = handle.get_format().expect("Failed to get output format");
+                log::debug!("Output format:\n\t{:?}\n", format);
+
+                let handle = handle
+                    .request_buffers_generic::<Vec<MmapHandle>>(MemoryType::Mmap, self.num_buffers)
+                    .expect("Failed to request output buffers");
+                log::debug!(
+                    "Output queue:\n\t
+                    num_buffers: {}\n\t
+                    num_queued_buffers: {}\n\t
+                    num_free_buffers: {}\n",
+                    handle.num_buffers(),
+                    handle.num_queued_buffers(),
+                    handle.num_free_buffers()
+                );
+
+                // TODO: handle start/stop at runtime
+                handle.stream_on().expect("Failed to start output queue");
+
+                log::debug!("Output queue:\n\tstate: Init -> Streaming\n");
+                V4l2OutputQueueHandle::Streaming(handle.into())
+            }
+            _ => {
+                /* TODO: handle DRC */
+                todo!()
+            }
+        });
+        self
+    }
+    pub fn num_buffers(&self) -> usize {
+        let handle = &*self.handle.borrow();
+        match handle {
+            V4l2OutputQueueHandle::Streaming(handle) => handle.num_buffers(),
+            _ => 0,
+        }
+    }
+    pub fn num_free_buffers(&self) -> usize {
+        let handle = &*self.handle.borrow();
+        match handle {
+            V4l2OutputQueueHandle::Streaming(handle) => handle.num_free_buffers(),
+            _ => 0,
+        }
+    }
+    pub fn alloc_buffer(&self) -> Result<V4l2OutputBuffer, DecodeError> {
+        let handle = &*self.handle.borrow();
+        match handle {
+            V4l2OutputQueueHandle::Streaming(handle) => match handle.try_get_free_buffer() {
+                Ok(buffer) => Ok(V4l2OutputBuffer::new(self.clone(), buffer)),
+                Err(_) => Err(DecodeError::NotEnoughOutputBuffers(1)),
+            },
+            _ => Err(DecodeError::DecoderError(anyhow!(
+                "Invalid hardware handle"
+            ))),
+        }
+    }
+    pub fn drain(&self) {
+        let handle = &*self.handle.borrow();
+        match handle {
+            V4l2OutputQueueHandle::Streaming(handle) => loop {
+                match handle.try_dequeue() {
+                    Ok(buffer) => continue,
+                    _ => break,
+                }
+            },
+            _ => panic!("ERROR"),
+        }
+    }
+}
+
+// TODO: handle other memory backends
+pub struct V4l2CaptureBuffer {
+    handle: DqBuffer<Capture, Vec<MmapHandle>>,
+}
+
+impl V4l2CaptureBuffer {
+    fn new(handle: DqBuffer<Capture, Vec<MmapHandle>>) -> Self {
+        Self { handle }
+    }
+    pub fn index(&self) -> usize {
+        self.handle.data.index() as usize
+    }
+    pub fn timestamp(&self) -> u64 {
+        self.handle.data.timestamp().tv_usec as u64
+    }
+    pub fn length(&self) -> usize {
+        let mut length = 0;
+        for i in 0..self.handle.data.num_planes() {
+            let mapping = self
+                .handle
+                .get_plane_mapping(i)
+                .expect("Failed to mmap capture buffer");
+            length += mapping.size();
+            drop(mapping);
+        }
+        length
+    }
+    pub fn read(&self, data: &mut [u8]) {
+        let mut offset = 0;
+        for i in 0..self.handle.data.num_planes() {
+            let mapping = self
+                .handle
+                .get_plane_mapping(i)
+                .expect("Failed to mmap capture buffer");
+            data[offset..offset + mapping.size()].copy_from_slice(&mapping);
+            offset += mapping.size();
+            drop(mapping);
+        }
+    }
+}
+
+//TODO: handle memory backends other than mmap
+//TODO: handle video formats other than h264
+//TODO: handle queue start/stop at runtime
+//TODO: handle DRC at runtime
+//TODO: handle synced buffers in Streaming state
+#[derive(Default)]
+enum V4l2CaptureQueueHandle {
+    Init(Queue<Capture, QueueInit>),
+    Streaming(Queue<Capture, BuffersAllocated<Vec<MmapHandle>>>),
+    #[default]
+    Unknown,
+}
+
+pub struct V4l2CaptureQueue {
+    handle: RefCell<V4l2CaptureQueueHandle>,
+    num_buffers: u32,
+}
+
+impl V4l2CaptureQueue {
+    pub fn new(device: Arc<Device>, num_buffers: u32) -> Self {
+        let handle = Queue::get_capture_mplane_queue(device).expect("Failed to get capture queue");
+        log::debug!("Capture queue:\n\tstate: None -> Init\n");
+        let handle = RefCell::new(V4l2CaptureQueueHandle::Init(handle));
+        Self {
+            handle,
+            num_buffers,
+        }
+    }
+    pub fn set_resolution(&mut self, _: Resolution) -> &mut Self {
+        self.handle.replace(match self.handle.take() {
+            V4l2CaptureQueueHandle::Init(handle) => {
+                let format: Format = handle.get_format().expect("Failed to get capture format");
+                log::debug!("Capture format:\n\t{:?}\n", format);
+
+                let handle = handle
+                    .request_buffers_generic::<Vec<MmapHandle>>(MemoryType::Mmap, self.num_buffers)
+                    .expect("Failed to request capture buffers");
+                log::debug!(
+                    "Capture queue:\n\t
+                    num_buffers: {}\n\t
+                    num_queued_buffers: {}\n\t
+                    num_free_buffers: {}\n",
+                    handle.num_buffers(),
+                    handle.num_queued_buffers(),
+                    handle.num_free_buffers()
+                );
+
+                // TODO: handle start/stop at runtime
+                handle.stream_on().expect("Failed to start capture queue");
+
+                log::debug!("Capture queue:\n\tstate: Init -> Streaming\n");
+                V4l2CaptureQueueHandle::Streaming(handle)
+            }
+            _ => {
+                /* TODO: handle DRC */
+                todo!()
+            }
+        });
+        self
+    }
+    pub fn dequeue_buffer(&self) -> Option<V4l2CaptureBuffer> {
+        let handle = &*self.handle.borrow();
+        match handle {
+            V4l2CaptureQueueHandle::Streaming(handle) => match handle.try_dequeue() {
+                Ok(buffer) => Some(V4l2CaptureBuffer::new(buffer)),
+                _ => None,
+            },
+            _ => panic!("ERROR"),
+        }
+    }
+    pub fn refill(&self) {
+        let handle = &*self.handle.borrow();
+        match handle {
+            V4l2CaptureQueueHandle::Streaming(handle) => {
+                while handle.num_free_buffers() != 0 {
+                    let buffer = handle
+                        .try_get_free_buffer()
+                        .expect("Failed to alloc capture buffer");
+                    log::debug!("capture >> index: {}\n", buffer.index());
+                    buffer.queue().expect("Failed to queue capture buffer");
+                }
+            }
+            _ => panic!("ERROR"),
+        }
+    }
+}
diff --git a/src/device/v4l2/stateless/request.rs b/src/device/v4l2/stateless/request.rs
new file mode 100644
index 0000000..c831eff
--- /dev/null
+++ b/src/device/v4l2/stateless/request.rs
@@ -0,0 +1,203 @@
+// Copyright 2024 The ChromiumOS Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+use std::cell::RefCell;
+use std::os::fd::AsRawFd;
+use std::rc::Rc;
+
+use v4l2r::controls::ExtControlTrait;
+use v4l2r::controls::SafeExtControl;
+use v4l2r::ioctl;
+
+use crate::device::v4l2::stateless::device::V4l2Device;
+use crate::device::v4l2::stateless::queue::V4l2CaptureBuffer;
+use crate::device::v4l2::stateless::queue::V4l2OutputBuffer;
+
+struct InitRequestHandle {
+    device: V4l2Device,
+    timestamp: u64,
+    handle: ioctl::Request,
+    buffer: V4l2OutputBuffer,
+}
+
+impl InitRequestHandle {
+    fn new(
+        device: V4l2Device,
+        timestamp: u64,
+        handle: ioctl::Request,
+        buffer: V4l2OutputBuffer,
+    ) -> Self {
+        Self {
+            device,
+            timestamp,
+            handle,
+            buffer,
+        }
+    }
+    fn ioctl<C, T>(&mut self, ctrl: C) -> &mut Self
+    where
+        C: Into<SafeExtControl<T>>,
+        T: ExtControlTrait,
+    {
+        let which = ioctl::CtrlWhich::Request(self.handle.as_raw_fd());
+        let mut ctrl: SafeExtControl<T> = ctrl.into();
+        ioctl::s_ext_ctrls(&self.device, which, &mut ctrl).expect("Failed to set output control");
+        self
+    }
+    fn write(&mut self, data: &[u8]) -> &mut Self {
+        self.buffer.write(data);
+        self
+    }
+    fn submit(self) -> PendingRequestHandle {
+        self.buffer.submit(self.timestamp, self.handle.as_raw_fd());
+        self.handle.queue().expect("Failed to queue request handle");
+        PendingRequestHandle {
+            device: self.device.clone(),
+            timestamp: self.timestamp,
+        }
+    }
+}
+
+struct PendingRequestHandle {
+    device: V4l2Device,
+    timestamp: u64,
+}
+
+impl PendingRequestHandle {
+    fn sync(self) -> DoneRequestHandle {
+        DoneRequestHandle {
+            buffer: Rc::new(RefCell::new(self.device.sync(self.timestamp))),
+        }
+    }
+}
+
+struct DoneRequestHandle {
+    buffer: Rc<RefCell<V4l2CaptureBuffer>>,
+}
+
+impl DoneRequestHandle {
+    fn result(&self) -> V4l2Result {
+        V4l2Result {
+            buffer: self.buffer.clone(),
+        }
+    }
+}
+
+#[derive(Default)]
+enum RequestHandle {
+    Init(InitRequestHandle),
+    Pending(PendingRequestHandle),
+    Done(DoneRequestHandle),
+    #[default]
+    Unknown,
+}
+
+impl RequestHandle {
+    fn new(
+        device: V4l2Device,
+        timestamp: u64,
+        handle: ioctl::Request,
+        buffer: V4l2OutputBuffer,
+    ) -> Self {
+        Self::Init(InitRequestHandle::new(device, timestamp, handle, buffer))
+    }
+    fn timestamp(&self) -> u64 {
+        match self {
+            Self::Init(handle) => handle.timestamp,
+            Self::Pending(handle) => handle.timestamp,
+            Self::Done(handle) => handle.buffer.borrow().timestamp(),
+            _ => panic!("ERROR"),
+        }
+    }
+    fn ioctl<C, T>(&mut self, ctrl: C) -> &mut Self
+    where
+        C: Into<SafeExtControl<T>>,
+        T: ExtControlTrait,
+    {
+        match self {
+            Self::Init(handle) => handle.ioctl(ctrl),
+            _ => panic!("ERROR"),
+        };
+        self
+    }
+    fn write(&mut self, data: &[u8]) -> &mut Self {
+        match self {
+            Self::Init(handle) => handle.write(data),
+            _ => panic!("ERROR"),
+        };
+        self
+    }
+
+    // This method can modify in-place instead of returning a new value. This removes the need for
+    // a RefCell in V4l2Request.
+    fn submit(&mut self) {
+        match std::mem::take(self) {
+            Self::Init(handle) => *self = Self::Pending(handle.submit()),
+            _ => panic!("ERROR"),
+        }
+    }
+    fn sync(&mut self) {
+        match std::mem::take(self) {
+            Self::Pending(handle) => *self = Self::Done(handle.sync()),
+            s @ Self::Done(_) => *self = s,
+            _ => panic!("ERROR"),
+        }
+    }
+    fn result(&self) -> V4l2Result {
+        match self {
+            Self::Done(handle) => handle.result(),
+            _ => panic!("ERROR"),
+        }
+    }
+}
+
+pub struct V4l2Request(RequestHandle);
+
+impl V4l2Request {
+    pub fn new(
+        device: V4l2Device,
+        timestamp: u64,
+        handle: ioctl::Request,
+        buffer: V4l2OutputBuffer,
+    ) -> Self {
+        Self(RequestHandle::new(device, timestamp, handle, buffer))
+    }
+    pub fn timestamp(&self) -> u64 {
+        self.0.timestamp()
+    }
+    pub fn ioctl<C, T>(&mut self, ctrl: C) -> &mut Self
+    where
+        C: Into<SafeExtControl<T>>,
+        T: ExtControlTrait,
+    {
+        self.0.ioctl(ctrl);
+        self
+    }
+    pub fn write(&mut self, data: &[u8]) -> &mut Self {
+        self.0.write(data);
+        self
+    }
+    pub fn submit(&mut self) {
+        self.0.submit();
+    }
+    pub fn sync(&mut self) {
+        self.0.sync();
+    }
+    pub fn result(&self) -> V4l2Result {
+        self.0.result()
+    }
+}
+
+pub struct V4l2Result {
+    buffer: Rc<RefCell<V4l2CaptureBuffer>>,
+}
+
+impl V4l2Result {
+    pub fn length(&self) -> usize {
+        self.buffer.borrow().length()
+    }
+    pub fn read(&self, data: &mut [u8]) {
+        self.buffer.borrow().read(data)
+    }
+}
diff --git a/src/encoder/stateful/h264/v4l2.rs b/src/encoder/stateful/h264/v4l2.rs
index 8bd3704..eb30523 100644
--- a/src/encoder/stateful/h264/v4l2.rs
+++ b/src/encoder/stateful/h264/v4l2.rs
@@ -168,12 +168,12 @@
     use v4l2r::device::Device;
     use v4l2r::device::DeviceConfig;
 
-    use crate::backend::v4l2::encoder::tests::find_device_with_capture;
+    use crate::backend::v4l2::encoder::find_device_with_capture;
     use crate::backend::v4l2::encoder::tests::perform_v4l2_encoder_dmabuf_test;
     use crate::backend::v4l2::encoder::tests::perform_v4l2_encoder_mmap_test;
-    use crate::backend::v4l2::encoder::tests::v4l2_format_to_frame_layout;
     use crate::backend::v4l2::encoder::tests::BoPoolAllocator;
     use crate::backend::v4l2::encoder::tests::GbmDevice;
+    use crate::backend::v4l2::encoder::v4l2_format_to_frame_layout;
     use crate::backend::v4l2::encoder::MmapingCapture;
     use crate::encoder::simple_encode_loop;
     use crate::encoder::tests::userptr_test_frame_generator;
diff --git a/src/encoder/stateful/h265/v4l2.rs b/src/encoder/stateful/h265/v4l2.rs
index 6e34ef1..0ce10b7 100644
--- a/src/encoder/stateful/h265/v4l2.rs
+++ b/src/encoder/stateful/h265/v4l2.rs
@@ -159,12 +159,12 @@
     use v4l2r::device::Device;
     use v4l2r::device::DeviceConfig;
 
-    use crate::backend::v4l2::encoder::tests::find_device_with_capture;
+    use crate::backend::v4l2::encoder::find_device_with_capture;
     use crate::backend::v4l2::encoder::tests::perform_v4l2_encoder_dmabuf_test;
     use crate::backend::v4l2::encoder::tests::perform_v4l2_encoder_mmap_test;
-    use crate::backend::v4l2::encoder::tests::v4l2_format_to_frame_layout;
     use crate::backend::v4l2::encoder::tests::BoPoolAllocator;
     use crate::backend::v4l2::encoder::tests::GbmDevice;
+    use crate::backend::v4l2::encoder::v4l2_format_to_frame_layout;
     use crate::backend::v4l2::encoder::MmapingCapture;
     use crate::encoder::simple_encode_loop;
     use crate::encoder::tests::userptr_test_frame_generator;
diff --git a/src/encoder/stateful/vp8/v4l2.rs b/src/encoder/stateful/vp8/v4l2.rs
index 626cfd2..ea21930 100644
--- a/src/encoder/stateful/vp8/v4l2.rs
+++ b/src/encoder/stateful/vp8/v4l2.rs
@@ -113,12 +113,12 @@
     use v4l2r::device::Device;
     use v4l2r::device::DeviceConfig;
 
-    use crate::backend::v4l2::encoder::tests::find_device_with_capture;
+    use crate::backend::v4l2::encoder::find_device_with_capture;
     use crate::backend::v4l2::encoder::tests::perform_v4l2_encoder_dmabuf_test;
     use crate::backend::v4l2::encoder::tests::perform_v4l2_encoder_mmap_test;
-    use crate::backend::v4l2::encoder::tests::v4l2_format_to_frame_layout;
     use crate::backend::v4l2::encoder::tests::BoPoolAllocator;
     use crate::backend::v4l2::encoder::tests::GbmDevice;
+    use crate::backend::v4l2::encoder::v4l2_format_to_frame_layout;
     use crate::backend::v4l2::encoder::MmapingCapture;
     use crate::encoder::simple_encode_loop;
     use crate::encoder::tests::userptr_test_frame_generator;
diff --git a/src/encoder/stateful/vp9/v4l2.rs b/src/encoder/stateful/vp9/v4l2.rs
index 2543e8b..3b1d98b 100644
--- a/src/encoder/stateful/vp9/v4l2.rs
+++ b/src/encoder/stateful/vp9/v4l2.rs
@@ -130,12 +130,12 @@
     use v4l2r::device::Device;
     use v4l2r::device::DeviceConfig;
 
-    use crate::backend::v4l2::encoder::tests::find_device_with_capture;
+    use crate::backend::v4l2::encoder::find_device_with_capture;
     use crate::backend::v4l2::encoder::tests::perform_v4l2_encoder_dmabuf_test;
     use crate::backend::v4l2::encoder::tests::perform_v4l2_encoder_mmap_test;
-    use crate::backend::v4l2::encoder::tests::v4l2_format_to_frame_layout;
     use crate::backend::v4l2::encoder::tests::BoPoolAllocator;
     use crate::backend::v4l2::encoder::tests::GbmDevice;
+    use crate::backend::v4l2::encoder::v4l2_format_to_frame_layout;
     use crate::backend::v4l2::encoder::MmapingCapture;
     use crate::encoder::simple_encode_loop;
     use crate::encoder::tests::userptr_test_frame_generator;
diff --git a/src/encoder/stateless/av1/vaapi.rs b/src/encoder/stateless/av1/vaapi.rs
index 0652fee..64b461e 100644
--- a/src/encoder/stateless/av1/vaapi.rs
+++ b/src/encoder/stateless/av1/vaapi.rs
@@ -5,7 +5,6 @@
 use std::num::TryFromIntError;
 use std::rc::Rc;
 
-use libva::constants::VA_INVALID_ID;
 use libva::AV1EncLoopFilterFlags;
 use libva::AV1EncLoopRestorationFlags;
 use libva::AV1EncModeControlFlags;
@@ -26,6 +25,7 @@
 use libva::VAProfile::VAProfileAV1Profile0;
 use libva::VAProfile::VAProfileAV1Profile1;
 use libva::VaError;
+use libva::VA_INVALID_ID;
 
 use crate::backend::vaapi::encoder::CodedOutputPromise;
 use crate::backend::vaapi::encoder::Reconstructed;
@@ -230,8 +230,11 @@
         // Current we always expect the reconstructed frame.
         const DISABLE_FRAME_RECON: bool = false;
 
-        // Palette mode is not used.
+        // Palette mode is not used. This also implies force_integer_mv and
+        // allow_screen_content_tools should be false.
         const PALETTE_MODE_ENABLE: bool = false;
+        const FORCE_INTEGER_MV: bool = false;
+        const ALLOW_SCREEN_CONTENT_TOOLS: bool = false;
 
         // Use 16x16 block size for now.
         // TODO: Use maximum available
@@ -392,6 +395,8 @@
                 DISABLE_FRAME_RECON,
                 request.frame.allow_intrabc,
                 PALETTE_MODE_ENABLE,
+                ALLOW_SCREEN_CONTENT_TOOLS,
+                FORCE_INTEGER_MV,
             ),
             SEG_ID_BLOCK_SIZE,
             NUM_TILE_GROUPS_MINUS1,
@@ -574,7 +579,7 @@
             va_profile,
             fourcc,
             coded_size,
-            libva::constants::VA_RC_CQP,
+            libva::VA_RC_CQP,
             low_power,
         )?;
 
@@ -584,12 +589,12 @@
 
 #[cfg(test)]
 mod tests {
-    use libva::constants::VA_RT_FORMAT_YUV420;
-    use libva::constants::VA_RT_FORMAT_YUV420_10;
     use libva::Display;
     use libva::UsageHint;
     use libva::VAEntrypoint::VAEntrypointEncSliceLP;
     use libva::VAProfile::VAProfileAV1Profile0;
+    use libva::VA_RT_FORMAT_YUV420;
+    use libva::VA_RT_FORMAT_YUV420_10;
 
     use super::*;
     use crate::backend::vaapi::encoder::tests::upload_test_frame_nv12;
@@ -673,7 +678,7 @@
                 width: WIDTH,
                 height: HEIGHT,
             },
-            libva::constants::VA_RC_CQP,
+            libva::VA_RC_CQP,
             low_power,
         )
         .unwrap();
diff --git a/src/encoder/stateless/h264/vaapi.rs b/src/encoder/stateless/h264/vaapi.rs
index 47c3165..7d7ab1d 100644
--- a/src/encoder/stateless/h264/vaapi.rs
+++ b/src/encoder/stateless/h264/vaapi.rs
@@ -7,9 +7,6 @@
 use std::rc::Rc;
 
 use anyhow::Context;
-use libva::constants::VA_INVALID_ID;
-use libva::constants::VA_PICTURE_H264_LONG_TERM_REFERENCE;
-use libva::constants::VA_PICTURE_H264_SHORT_TERM_REFERENCE;
 use libva::BufferType;
 use libva::Display;
 use libva::EncCodedBuffer;
@@ -28,6 +25,9 @@
 use libva::Surface;
 use libva::SurfaceMemoryDescriptor;
 use libva::VAProfile;
+use libva::VA_INVALID_ID;
+use libva::VA_PICTURE_H264_LONG_TERM_REFERENCE;
+use libva::VA_PICTURE_H264_SHORT_TERM_REFERENCE;
 
 use crate::backend::vaapi::encoder::tunings_to_libva_rc;
 use crate::backend::vaapi::encoder::CodedOutputPromise;
@@ -79,9 +79,9 @@
     /// holder to fill staticly sized array.
     fn build_invalid_va_h264_pic_enc() -> libva::PictureH264 {
         libva::PictureH264::new(
-            libva::constants::VA_INVALID_ID,
+            libva::VA_INVALID_ID,
             0,
-            libva::constants::VA_PICTURE_H264_INVALID,
+            libva::VA_PICTURE_H264_INVALID,
             0,
             0,
         )
@@ -414,10 +414,15 @@
             tunings_to_libva_rc::<{ MIN_QP as u32 }, { MAX_QP as u32 }>(&request.tunings)?;
         let rc_param = BufferType::EncMiscParameter(libva::EncMiscParameter::RateControl(rc_param));
 
+        let framerate_param = BufferType::EncMiscParameter(libva::EncMiscParameter::FrameRate(
+            libva::EncMiscParameterFrameRate::new(request.tunings.framerate, 0),
+        ));
+
         picture.add_buffer(self.context().create_buffer(seq_param)?);
         picture.add_buffer(self.context().create_buffer(pic_param)?);
         picture.add_buffer(self.context().create_buffer(slice_param)?);
         picture.add_buffer(self.context().create_buffer(rc_param)?);
+        picture.add_buffer(self.context().create_buffer(framerate_param)?);
 
         // Start processing the picture encoding
         let picture = picture.begin().context("picture begin")?;
@@ -462,8 +467,8 @@
         };
 
         let bitrate_control = match config.initial_tunings.rate_control {
-            RateControl::ConstantBitrate(_) => libva::constants::VA_RC_CBR,
-            RateControl::ConstantQuality(_) => libva::constants::VA_RC_CQP,
+            RateControl::ConstantBitrate(_) => libva::VA_RC_CBR,
+            RateControl::ConstantQuality(_) => libva::VA_RC_CQP,
         };
 
         let backend = VaapiBackend::new(
@@ -481,11 +486,11 @@
 
 #[cfg(test)]
 pub(super) mod tests {
-    use libva::constants::VA_RT_FORMAT_YUV420;
     use libva::Display;
     use libva::UsageHint;
     use libva::VAEntrypoint::VAEntrypointEncSliceLP;
     use libva::VAProfile::VAProfileH264Main;
+    use libva::VA_RT_FORMAT_YUV420;
 
     use super::*;
     use crate::backend::vaapi::encoder::tests::upload_test_frame_nv12;
@@ -553,7 +558,7 @@
                 width: WIDTH,
                 height: HEIGHT,
             },
-            libva::constants::VA_RC_CBR,
+            libva::VA_RC_CBR,
             low_power,
         )
         .unwrap();
diff --git a/src/encoder/stateless/vp9/vaapi.rs b/src/encoder/stateless/vp9/vaapi.rs
index e71be45..580dcad 100644
--- a/src/encoder/stateless/vp9/vaapi.rs
+++ b/src/encoder/stateless/vp9/vaapi.rs
@@ -7,7 +7,6 @@
 use std::rc::Rc;
 
 use anyhow::Context;
-use libva::constants::VA_INVALID_SURFACE;
 use libva::BufferType;
 use libva::Display;
 use libva::EncPictureParameter;
@@ -21,6 +20,7 @@
 use libva::VAProfile::VAProfileVP9Profile2;
 use libva::VP9EncPicFlags;
 use libva::VP9EncRefFlags;
+use libva::VA_INVALID_SURFACE;
 
 use crate::backend::vaapi::encoder::tunings_to_libva_rc;
 use crate::backend::vaapi::encoder::CodedOutputPromise;
@@ -240,9 +240,14 @@
             request.input,
         );
 
+        let framerate_param = BufferType::EncMiscParameter(libva::EncMiscParameter::FrameRate(
+            libva::EncMiscParameterFrameRate::new(request.tunings.framerate, 0),
+        ));
+
         picture.add_buffer(self.context().create_buffer(seq_param)?);
         picture.add_buffer(self.context().create_buffer(pic_param)?);
         picture.add_buffer(self.context().create_buffer(rc_param)?);
+        picture.add_buffer(self.context().create_buffer(framerate_param)?);
 
         // Start processing the picture encoding
         let picture = picture.begin().context("picture begin")?;
@@ -274,8 +279,8 @@
         blocking_mode: BlockingMode,
     ) -> EncodeResult<Self> {
         let bitrate_control = match config.initial_tunings.rate_control {
-            RateControl::ConstantBitrate(_) => libva::constants::VA_RC_CBR,
-            RateControl::ConstantQuality(_) => libva::constants::VA_RC_CQP,
+            RateControl::ConstantBitrate(_) => libva::VA_RC_CBR,
+            RateControl::ConstantQuality(_) => libva::VA_RC_CQP,
         };
 
         let va_profile = match config.bit_depth {
@@ -299,11 +304,11 @@
 pub(super) mod tests {
     use std::rc::Rc;
 
-    use libva::constants::VA_RT_FORMAT_YUV420;
-    use libva::constants::VA_RT_FORMAT_YUV420_10;
     use libva::Display;
     use libva::UsageHint;
     use libva::VAEntrypoint::VAEntrypointEncSliceLP;
+    use libva::VA_RT_FORMAT_YUV420;
+    use libva::VA_RT_FORMAT_YUV420_10;
 
     use super::*;
     use crate::backend::vaapi::encoder::tests::upload_test_frame_nv12;
@@ -371,7 +376,7 @@
                 width: WIDTH,
                 height: HEIGHT,
             },
-            libva::constants::VA_RC_CBR,
+            libva::VA_RC_CBR,
             low_power,
         )
         .unwrap();
diff --git a/src/image_processing.rs b/src/image_processing.rs
index 7094b5e..2786ee7 100644
--- a/src/image_processing.rs
+++ b/src/image_processing.rs
@@ -6,39 +6,61 @@
 use byteorder::ByteOrder;
 use byteorder::LittleEndian;
 
-/// Copies `src` into `dst` as NV12, removing any extra padding.
+/// Copies `src` into `dst` as NV12, handling padding.
 pub fn nv12_copy(
-    src: &[u8],
-    dst: &mut [u8],
+    src_y: &[u8],
+    src_y_stride: usize,
+    dst_y: &mut [u8],
+    dst_y_stride: usize,
+    src_uv: &[u8],
+    src_uv_stride: usize,
+    dst_uv: &mut [u8],
+    dst_uv_stride: usize,
     width: usize,
     height: usize,
-    strides: [usize; 3],
-    offsets: [usize; 3],
 ) {
-    // Copy Y.
-    let src_y_lines = src[offsets[0]..]
-        .chunks(strides[0])
-        .map(|line| &line[..width]);
-    let dst_y_lines = dst.chunks_mut(width);
-
-    for (src_line, dst_line) in src_y_lines.zip(dst_y_lines).take(height) {
-        dst_line.copy_from_slice(src_line);
+    for y in 0..height {
+        dst_y[(y * dst_y_stride)..(y * dst_y_stride + width)]
+            .copy_from_slice(&src_y[(y * src_y_stride)..(y * src_y_stride + width)]);
     }
+    for y in 0..(height / 2) {
+        dst_uv[(y * dst_uv_stride)..(y * dst_uv_stride + width)]
+            .copy_from_slice(&src_uv[(y * src_uv_stride)..(y * src_uv_stride + width)]);
+    }
+}
 
-    let dst_u_offset = width * height;
-
-    // Align width and height to 2 for UV plane.
-    // 1 sample per 4 pixels, but we have two components per line so width can remain as-is.
-    let uv_width = if width % 2 == 1 { width + 1 } else { width };
-    let uv_height = if height % 2 == 1 { height + 1 } else { height } / 2;
-
-    // Copy UV.
-    let src_uv_lines = src[offsets[1]..]
-        .chunks(strides[1])
-        .map(|line| &line[..uv_width]);
-    let dst_uv_lines = dst[dst_u_offset..].chunks_mut(uv_width);
-    for (src_line, dst_line) in src_uv_lines.zip(dst_uv_lines).take(uv_height) {
-        dst_line.copy_from_slice(src_line);
+/// Replace 0 padding with the last pixels of the real image. This helps reduce compression
+/// artifacts caused by the sharp transition between real image data and 0.
+pub fn extend_border_nv12(
+    y_plane: &mut [u8],
+    uv_plane: &mut [u8],
+    visible_width: usize,
+    visible_height: usize,
+    coded_width: usize,
+    coded_height: usize,
+) {
+    assert!(visible_width > 1);
+    assert!(visible_height > 1);
+    for y in 0..visible_height {
+        let row_start = y * coded_width;
+        for x in visible_width..coded_width {
+            y_plane[row_start + x] = y_plane[row_start + x - 1]
+        }
+    }
+    for y in visible_height..coded_height {
+        let (src, dst) = y_plane.split_at_mut(y * coded_width);
+        dst[0..coded_width].copy_from_slice(&src[((y - 1) * coded_width)..(y * coded_width)]);
+    }
+    for y in 0..(visible_height / 2) {
+        let row_start = y * coded_width;
+        for x in visible_width..coded_width {
+            // We use minus 2 here because we want to actually repeat the last 2 UV values.
+            uv_plane[row_start + x] = uv_plane[row_start + x - 2]
+        }
+    }
+    for y in (visible_height / 2)..(coded_height / 2) {
+        let (src, dst) = uv_plane.split_at_mut(y * coded_width);
+        dst[0..coded_width].copy_from_slice(&src[((y - 1) * coded_width)..(y * coded_width)]);
     }
 }
 
@@ -140,3 +162,154 @@
         }
     }
 }
+
+/// Simple implementation of MM21 to NV12 detiling. Note that this Rust-only implementation is
+/// unlikely to be fast enough for production code, and is for testing purposes only.
+/// TODO(b:380280455): We will want to speed this up and also add MT2T support.
+pub fn detile_plane(
+    src: &[u8],
+    dst: &mut [u8],
+    width: usize,
+    height: usize,
+    tile_width: usize,
+    tile_height: usize,
+) -> Result<(), String> {
+    if width % tile_width != 0 || height % tile_height != 0 {
+        return Err("Buffers must be aligned to tile dimensions for detiling".to_owned());
+    }
+
+    let tile_size = tile_width * tile_height;
+    let mut output_idx = 0;
+    for y_start in (0..height).step_by(tile_height) {
+        let tile_row_start = y_start * width;
+        for y in 0..tile_height {
+            let row_start = tile_row_start + y * tile_width;
+            for x in (0..width).step_by(tile_width) {
+                let input_idx = row_start + x / tile_width * tile_size;
+                dst[output_idx..(output_idx + tile_width)]
+                    .copy_from_slice(&src[input_idx..(input_idx + tile_width)]);
+                output_idx += tile_width;
+            }
+        }
+    }
+
+    Ok(())
+}
+
+pub fn mm21_to_nv12(
+    src_y: &[u8],
+    dst_y: &mut [u8],
+    src_uv: &[u8],
+    dst_uv: &mut [u8],
+    width: usize,
+    height: usize,
+) -> Result<(), String> {
+    let y_tile_width = 16;
+    let y_tile_height = 32;
+    detile_plane(src_y, dst_y, width, height, y_tile_width, y_tile_height)?;
+    detile_plane(
+        src_uv,
+        dst_uv,
+        width,
+        height / 2,
+        y_tile_width,
+        y_tile_height / 2,
+    )
+}
+
+/// Simple implementation of NV12 to I420. Again, probably not fast enough for production, should
+/// TODO(b:380280455): We may want to speed this up.
+pub fn nv12_to_i420_chroma(src_uv: &[u8], dst_u: &mut [u8], dst_v: &mut [u8]) {
+    for i in 0..src_uv.len() {
+        if i % 2 == 0 {
+            dst_u[i / 2] = src_uv[i];
+        } else {
+            dst_v[i / 2] = src_uv[i];
+        }
+    }
+}
+
+pub fn nv12_to_i420(
+    src_y: &[u8],
+    dst_y: &mut [u8],
+    src_uv: &[u8],
+    dst_u: &mut [u8],
+    dst_v: &mut [u8],
+) {
+    dst_y.copy_from_slice(src_y);
+    nv12_to_i420_chroma(src_uv, dst_u, dst_v);
+}
+
+pub fn i420_to_nv12_chroma(src_u: &[u8], src_v: &[u8], dst_uv: &mut [u8]) {
+    for i in 0..dst_uv.len() {
+        if i % 2 == 0 {
+            dst_uv[i] = src_u[i / 2];
+        } else {
+            dst_uv[i] = src_v[i / 2];
+        }
+    }
+}
+
+pub fn i420_to_nv12(src_y: &[u8], dst_y: &mut [u8], src_u: &[u8], src_v: &[u8], dst_uv: &mut [u8]) {
+    dst_y.copy_from_slice(src_y);
+    i420_to_nv12_chroma(src_u, src_v, dst_uv);
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_mm21_to_nv12() {
+        let test_input = include_bytes!("test_data/puppets-480x270_20230825.mm21.yuv");
+        let test_expected_output = include_bytes!("test_data/puppets-480x270_20230825.nv12.yuv");
+
+        let mut test_output = [0u8; 480 * 288 * 3 / 2];
+        let (test_y_output, test_uv_output) = test_output.split_at_mut(480 * 288);
+        mm21_to_nv12(
+            &test_input[0..480 * 288],
+            test_y_output,
+            &test_input[480 * 288..480 * 288 * 3 / 2],
+            test_uv_output,
+            480,
+            288,
+        )
+        .expect("Failed to detile!");
+        assert_eq!(test_output, *test_expected_output);
+    }
+
+    #[test]
+    fn test_nv12_to_i420() {
+        let test_input = include_bytes!("test_data/puppets-480x270_20230825.nv12.yuv");
+        let test_expected_output = include_bytes!("test_data/puppets-480x270_20230825.i420.yuv");
+
+        let mut test_output = [0u8; 480 * 288 * 3 / 2];
+        let (test_y_output, test_uv_output) = test_output.split_at_mut(480 * 288);
+        let (test_u_output, test_v_output) = test_uv_output.split_at_mut(480 * 288 / 4);
+        nv12_to_i420(
+            &test_input[0..480 * 288],
+            test_y_output,
+            &test_input[480 * 288..480 * 288 * 3 / 2],
+            test_u_output,
+            test_v_output,
+        );
+        assert_eq!(test_output, *test_expected_output);
+    }
+
+    #[test]
+    fn test_i420_to_nv12() {
+        let test_input = include_bytes!("test_data/puppets-480x270_20230825.i420.yuv");
+        let test_expected_output = include_bytes!("test_data/puppets-480x270_20230825.nv12.yuv");
+
+        let mut test_output = [0u8; 480 * 288 * 3 / 2];
+        let (test_y_output, test_uv_output) = test_output.split_at_mut(480 * 288);
+        i420_to_nv12(
+            &test_input[0..(480 * 288)],
+            test_y_output,
+            &test_input[(480 * 288)..(480 * 288 * 5 / 4)],
+            &test_input[(480 * 288 * 5 / 4)..(480 * 288 * 3 / 2)],
+            test_uv_output,
+        );
+        assert_eq!(test_output, *test_expected_output);
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
index 35c9905..9cbd6af 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -27,6 +27,8 @@
 pub mod backend;
 #[cfg(any(feature = "vaapi", feature = "v4l2"))]
 pub mod decoder;
+#[cfg(feature = "v4l2")]
+pub mod device;
 #[cfg(any(feature = "vaapi", feature = "v4l2"))]
 pub mod encoder;
 #[cfg(any(feature = "vaapi", feature = "v4l2"))]
@@ -77,6 +79,10 @@
 
         self
     }
+
+    pub fn get_area(&self) -> usize {
+        (self.width as usize) * (self.height as usize)
+    }
 }
 
 impl From<(u32, u32)> for Resolution {
diff --git a/src/test_data/puppets-480x270_20230825.i420.yuv b/src/test_data/puppets-480x270_20230825.i420.yuv
new file mode 100644
index 0000000..c0822eb
--- /dev/null
+++ b/src/test_data/puppets-480x270_20230825.i420.yuv
Binary files differ
diff --git a/src/test_data/puppets-480x270_20230825.mm21.yuv b/src/test_data/puppets-480x270_20230825.mm21.yuv
new file mode 100644
index 0000000..7c3153c
--- /dev/null
+++ b/src/test_data/puppets-480x270_20230825.mm21.yuv
Binary files differ
diff --git a/src/test_data/puppets-480x270_20230825.nv12.yuv b/src/test_data/puppets-480x270_20230825.nv12.yuv
new file mode 100644
index 0000000..1a32af3
--- /dev/null
+++ b/src/test_data/puppets-480x270_20230825.nv12.yuv
Binary files differ