From c2b1135066bc7942d4b046464399656b12c44587 Mon Sep 17 00:00:00 2001 From: awxkee Date: Tue, 29 Oct 2024 12:20:58 +0000 Subject: [PATCH] Added YUV decoder, drop dcv, added high bit depth support --- Cargo.toml | 3 +- src/codecs/avif/decoder.rs | 460 +++++++++++++++---- src/codecs/avif/mod.rs | 2 + src/codecs/avif/yuv.rs | 891 +++++++++++++++++++++++++++++++++++++ 4 files changed, 1270 insertions(+), 86 deletions(-) create mode 100644 src/codecs/avif/yuv.rs diff --git a/Cargo.toml b/Cargo.toml index 4a6108a26c..000e85b90f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -42,7 +42,6 @@ num-traits = { version = "0.2.0" } # Optional dependencies color_quant = { version = "1.1", optional = true } dav1d = { version = "0.10.3", optional = true } -dcv-color-primitives = { version = "0.6.1", optional = true } exr = { version = "1.5.0", optional = true } gif = { version = "0.13", optional = true } image-webp = { version = "0.2.0", optional = true } @@ -88,7 +87,7 @@ webp = ["dep:image-webp"] rayon = ["dep:rayon", "ravif?/threading"] # Enables multi-threading nasm = ["ravif?/asm"] # Enables use of nasm by rav1e (requires nasm to be installed) color_quant = ["dep:color_quant"] # Enables color quantization -avif-native = ["dep:mp4parse", "dep:dcv-color-primitives", "dep:dav1d"] # Enable native dependency libdav1d +avif-native = ["dep:mp4parse", "dep:dav1d"] # Enable native dependency libdav1d benchmarks = [] # Build some inline benchmarks. Useful only during development (requires nightly Rust) [[bench]] diff --git a/src/codecs/avif/decoder.rs b/src/codecs/avif/decoder.rs index 15b2244b09..5a28e55f8d 100644 --- a/src/codecs/avif/decoder.rs +++ b/src/codecs/avif/decoder.rs @@ -10,8 +10,8 @@ use std::marker::PhantomData; use crate::error::{DecodingError, ImageFormatHint, UnsupportedError, UnsupportedErrorKind}; use crate::{ColorType, ImageDecoder, ImageError, ImageFormat, ImageResult}; +use crate::codecs::avif::yuv::*; use dav1d::{PixelLayout, PlanarImageComponent}; -use dcv_color_primitives as dcp; use mp4parse::{read_avif, ParseStrictness}; fn error_map>>(err: E) -> ImageError { @@ -56,17 +56,7 @@ impl AvifDecoder { match picture.bit_depth() { 8 => (), - 10 | 12 => { - return ImageResult::Err(ImageError::Unsupported( - UnsupportedError::from_format_and_kind( - ImageFormatHint::Exact(ImageFormat::Avif), - UnsupportedErrorKind::GenericFeature(format!( - "Only 8 bit depth is supported but was {}", - picture.bit_depth() - )), - ), - )) - } + 10 | 12 => (), _ => { return ImageResult::Err(ImageError::Decoding(DecodingError::new( ImageFormatHint::Exact(ImageFormat::Avif), @@ -86,13 +76,30 @@ impl AvifDecoder { } } +fn reshape_plane(source: &[u8], stride: usize, width: usize, height: usize) -> Vec { + let mut target_plane = vec![0u16; width * height]; + for (shaped_row, src_row) in target_plane + .chunks_exact_mut(width) + .zip(source.chunks_exact(stride)) + { + for (dst, src) in shaped_row.iter_mut().zip(src_row.chunks_exact(2)) { + *dst = u16::from_le_bytes([src[0], src[1]]); + } + } + target_plane +} + impl ImageDecoder for AvifDecoder { fn dimensions(&self) -> (u32, u32) { (self.picture.width(), self.picture.height()) } fn color_type(&self) -> ColorType { - ColorType::Rgba8 + if self.picture.bit_depth() == 8 { + ColorType::Rgba8 + } else { + ColorType::Rgba16 + } } fn icc_profile(&mut self) -> ImageResult>> { @@ -102,84 +109,369 @@ impl ImageDecoder for AvifDecoder { fn read_image(self, buf: &mut [u8]) -> ImageResult<()> { assert_eq!(u64::try_from(buf.len()), Ok(self.total_bytes())); - if self.picture.pixel_layout() != PixelLayout::I400 { - let pixel_format = match self.picture.pixel_layout() { - PixelLayout::I400 => todo!(), - PixelLayout::I420 => dcp::PixelFormat::I420, - PixelLayout::I422 => dcp::PixelFormat::I422, - PixelLayout::I444 => dcp::PixelFormat::I444, - }; - let src_color_space = match (self.picture.color_primaries(), self.picture.color_range()) - { - (dav1d::pixel::ColorPrimaries::BT709, dav1d::pixel::YUVRange::Full) => { - dcp::ColorSpace::Bt709FR - } - (dav1d::pixel::ColorPrimaries::BT709, dav1d::pixel::YUVRange::Limited) => { - dcp::ColorSpace::Bt709 - } - (_, dav1d::pixel::YUVRange::Full) => dcp::ColorSpace::Bt601FR, - (_, dav1d::pixel::YUVRange::Limited) => dcp::ColorSpace::Bt601, - }; - let src_format = dcp::ImageFormat { - pixel_format, - color_space: src_color_space, - num_planes: 3, - }; - let dst_format = dcp::ImageFormat { - pixel_format: dcp::PixelFormat::Rgba, - color_space: dcp::ColorSpace::Rgb, - num_planes: 1, - }; - let (width, height) = self.dimensions(); - let planes = &[ - self.picture.plane(PlanarImageComponent::Y), - self.picture.plane(PlanarImageComponent::U), - self.picture.plane(PlanarImageComponent::V), - ]; - let src_buffers = planes.iter().map(AsRef::as_ref).collect::>(); - let strides = &[ - self.picture.stride(PlanarImageComponent::Y) as usize, - self.picture.stride(PlanarImageComponent::U) as usize, - self.picture.stride(PlanarImageComponent::V) as usize, - ]; - let dst_buffers = &mut [&mut buf[..]]; - dcp::convert_image( - width, - height, - &src_format, - Some(strides), - &src_buffers, - &dst_format, - None, - dst_buffers, - ) - .map_err(error_map)?; - } else { - let plane = self.picture.plane(PlanarImageComponent::Y); - buf.copy_from_slice(plane.as_ref()); - } + let (width, height) = self.dimensions(); - if let Some(picture) = self.alpha_picture { - if picture.pixel_layout() != PixelLayout::I400 { + let yuv_range = match self.picture.color_range() { + dav1d::pixel::YUVRange::Limited => YuvIntensityRange::Tv, + dav1d::pixel::YUVRange::Full => YuvIntensityRange::Pc, + }; + let color_matrix = match self.picture.color_primaries() { + dav1d::pixel::ColorPrimaries::Reserved0 | dav1d::pixel::ColorPrimaries::Reserved => { + return Err(ImageError::Unsupported( + UnsupportedError::from_format_and_kind( + ImageFormat::Avif.into(), + UnsupportedErrorKind::GenericFeature( + "Using 'Reserved' color matrix is not supported".to_string(), + ), + ), + )); + } + dav1d::pixel::ColorPrimaries::BT709 => YuvStandardMatrix::Bt709, + // This is arguable, some applications prefer to go with Bt.709 as default some applications as Bt.601 + // For ex. chrome always prefer Bt.709 even for SD content + // However, nowadays standard should be Bt.709 for HD+ size otherwise Bt.601 + dav1d::pixel::ColorPrimaries::Unspecified => YuvStandardMatrix::Bt709, + dav1d::pixel::ColorPrimaries::BT470M => YuvStandardMatrix::Bt470_6, + dav1d::pixel::ColorPrimaries::BT470BG => YuvStandardMatrix::Bt601, + dav1d::pixel::ColorPrimaries::ST170M => YuvStandardMatrix::Smpte240, + dav1d::pixel::ColorPrimaries::ST240M => YuvStandardMatrix::Smpte240, + dav1d::pixel::ColorPrimaries::Film => YuvStandardMatrix::Bt2020, + dav1d::pixel::ColorPrimaries::BT2020 => YuvStandardMatrix::Bt2020, + dav1d::pixel::ColorPrimaries::ST428 => YuvStandardMatrix::Bt709, + dav1d::pixel::ColorPrimaries::P3DCI => YuvStandardMatrix::Bt709, + dav1d::pixel::ColorPrimaries::P3Display => YuvStandardMatrix::Bt709, + dav1d::pixel::ColorPrimaries::Tech3213 => { return Err(ImageError::Unsupported( UnsupportedError::from_format_and_kind( ImageFormat::Avif.into(), - UnsupportedErrorKind::GenericFeature(format!( - "Alpha must be PixelLayout::I400 but was: {:?}", - picture.pixel_layout() // PixelLayout does not implement display - )), + UnsupportedErrorKind::GenericFeature("Unknown color matrix".to_string()), ), )); } - let stride = picture.stride(PlanarImageComponent::Y) as usize; - let plane = picture.plane(PlanarImageComponent::Y); - let width = picture.width(); - for (buf, slice) in Iterator::zip( - buf.chunks_exact_mut(width as usize * 4), - plane.as_ref().chunks_exact(stride), - ) { - for i in 0..width as usize { - buf[3 + i * 4] = slice[i]; + }; + + if self.picture.bit_depth() == 8 { + if self.picture.pixel_layout() != PixelLayout::I400 { + let worker = match self.picture.pixel_layout() { + PixelLayout::I400 => unreachable!(), + PixelLayout::I420 => yuv420_to_rgba, + PixelLayout::I422 => yuv422_to_rgba, + PixelLayout::I444 => yuv444_to_rgba, + }; + + let ref_y = self.picture.plane(PlanarImageComponent::Y); + let ref_u = self.picture.plane(PlanarImageComponent::U); + let ref_v = self.picture.plane(PlanarImageComponent::V); + let image = YuvPlanarImage::new( + ref_y.as_ref(), + self.picture.stride(PlanarImageComponent::Y) as usize, + ref_u.as_ref(), + self.picture.stride(PlanarImageComponent::U) as usize, + ref_v.as_ref(), + self.picture.stride(PlanarImageComponent::V) as usize, + width as usize, + height as usize, + ); + + let res = worker(image, buf, 8, yuv_range, color_matrix); + + if let Err(err) = res { + return Err(ImageError::Unsupported( + UnsupportedError::from_format_and_kind( + ImageFormat::Avif.into(), + UnsupportedErrorKind::GenericFeature(err), + ), + )); + } + } else { + let plane = self.picture.plane(PlanarImageComponent::Y); + + let gray_image = YuvGrayImage::new( + plane.as_ref(), + self.picture.stride(PlanarImageComponent::Y) as usize, + width as usize, + height as usize, + ); + + let cr = yuv400_to_rgba(gray_image, buf, 8, yuv_range, color_matrix); + if let Err(err) = cr { + return Err(ImageError::Unsupported( + UnsupportedError::from_format_and_kind( + ImageFormat::Avif.into(), + UnsupportedErrorKind::GenericFeature(err), + ), + )); + } + } + + if let Some(picture) = self.alpha_picture { + if picture.pixel_layout() != PixelLayout::I400 { + return Err(ImageError::Unsupported( + UnsupportedError::from_format_and_kind( + ImageFormat::Avif.into(), + UnsupportedErrorKind::GenericFeature(format!( + "Alpha must be PixelLayout::I400 but was: {:?}", + picture.pixel_layout() // PixelLayout does not implement display + )), + ), + )); + } + let stride = picture.stride(PlanarImageComponent::Y) as usize; + let plane = picture.plane(PlanarImageComponent::Y); + let width = picture.width(); + for (buf, slice) in Iterator::zip( + buf.chunks_exact_mut(width as usize * 4), + plane.as_ref().chunks_exact(stride), + ) { + for (rgba, a_src) in buf.chunks_exact_mut(4).zip(slice) { + rgba[3] = *a_src; + } + } + } + } else { + // 8+ bit-depth case + let rgba16_buf: &mut [u16] = match bytemuck::try_cast_slice_mut(buf) { + Ok(slice) => slice, + Err(_) => { + return Err(ImageError::Unsupported( + UnsupportedError::from_format_and_kind( + ImageFormat::Avif.into(), + UnsupportedErrorKind::GenericFeature( + "Incorrectly determined image type".to_string(), + ), + ), + )); + } + }; + + // dav1d may return not aligned and not correctly constrained data, + // or at least I can't find guarantees on that + // so if it is happened, instead casting we'll need to reshape it into a target slice + // required criteria: bytemuck allows this data align, and stride must be dividable by 2 + + let mut y_plane_stride = self.picture.stride(PlanarImageComponent::Y) >> 1; + + let ref_y = self.picture.plane(PlanarImageComponent::Y); + let mut _bind_y = vec![]; + + let mut shape_y_plane = || { + y_plane_stride = width; + _bind_y = reshape_plane( + ref_y.as_ref(), + self.picture.stride(PlanarImageComponent::Y) as usize, + width as usize, + height as usize, + ); + }; + + let y_plane: &[u16] = if self.picture.stride(PlanarImageComponent::Y) as usize & 1 == 0 + { + match bytemuck::try_cast_slice(ref_y.as_ref()) { + Ok(slice) => slice, + Err(_) => { + shape_y_plane(); + _bind_y.as_slice() + } + } + } else { + shape_y_plane(); + _bind_y.as_slice() + }; + + if self.picture.pixel_layout() != PixelLayout::I400 { + let mut u_plane_stride = self.picture.stride(PlanarImageComponent::U) >> 1; + + let ref_u = self.picture.plane(PlanarImageComponent::U); + let mut _bind_u = vec![]; + let ref_v = self.picture.plane(PlanarImageComponent::V); + let mut _bind_v = vec![]; + + let mut shape_u_plane = || { + u_plane_stride = match self.picture.pixel_layout() { + PixelLayout::I400 => unreachable!(), + PixelLayout::I420 | PixelLayout::I422 => (width + 1) / 2, + PixelLayout::I444 => width, + }; + let u_plane_height = match self.picture.pixel_layout() { + PixelLayout::I400 => unreachable!(), + PixelLayout::I420 => (height + 1) / 2, + PixelLayout::I422 | PixelLayout::I444 => height, + }; + _bind_u = reshape_plane( + ref_u.as_ref(), + self.picture.stride(PlanarImageComponent::U) as usize, + u_plane_stride as usize, + u_plane_height as usize, + ); + }; + + let u_plane: &[u16] = + if self.picture.stride(PlanarImageComponent::U) as usize & 1 == 0 { + match bytemuck::try_cast_slice(ref_u.as_ref()) { + Ok(slice) => slice, + Err(_) => { + shape_u_plane(); + _bind_u.as_slice() + } + } + } else { + shape_u_plane(); + _bind_u.as_slice() + }; + + let mut v_plane_stride = self.picture.stride(PlanarImageComponent::V) >> 1; + + let mut shape_v_plane = || { + v_plane_stride = match self.picture.pixel_layout() { + PixelLayout::I400 => unreachable!(), + PixelLayout::I420 | PixelLayout::I422 => (width + 1) / 2, + PixelLayout::I444 => width, + }; + let v_plane_height = match self.picture.pixel_layout() { + PixelLayout::I400 => unreachable!(), + PixelLayout::I420 => (height + 1) / 2, + PixelLayout::I422 | PixelLayout::I444 => height, + }; + _bind_v = reshape_plane( + ref_v.as_ref(), + self.picture.stride(PlanarImageComponent::V) as usize, + v_plane_stride as usize, + v_plane_height as usize, + ); + }; + + let v_plane: &[u16] = + if self.picture.stride(PlanarImageComponent::V) as usize & 1 == 0 { + match bytemuck::try_cast_slice(ref_v.as_ref()) { + Ok(slice) => slice, + Err(_) => { + shape_v_plane(); + _bind_v.as_slice() + } + } + } else { + shape_v_plane(); + _bind_v.as_slice() + }; + + let worker = match self.picture.pixel_layout() { + PixelLayout::I400 => unreachable!(), + PixelLayout::I420 => yuv420_to_rgba, + PixelLayout::I422 => yuv422_to_rgba, + PixelLayout::I444 => yuv444_to_rgba, + }; + + let image = YuvPlanarImage::new( + y_plane, + y_plane_stride as usize, + u_plane, + u_plane_stride as usize, + v_plane, + v_plane_stride as usize, + width as usize, + height as usize, + ); + + let res = worker( + image, + rgba16_buf, + self.picture.bit_depth() as u32, + yuv_range, + color_matrix, + ); + + if let Err(err) = res { + return Err(ImageError::Unsupported( + UnsupportedError::from_format_and_kind( + ImageFormat::Avif.into(), + UnsupportedErrorKind::GenericFeature(err), + ), + )); + } + } else { + let gray_image = YuvGrayImage::new( + y_plane, + y_plane_stride as usize, + width as usize, + height as usize, + ); + let cr = yuv400_to_rgba( + gray_image, + rgba16_buf, + self.picture.bit_depth() as u32, + yuv_range, + color_matrix, + ); + if let Err(err) = cr { + return Err(ImageError::Unsupported( + UnsupportedError::from_format_and_kind( + ImageFormat::Avif.into(), + UnsupportedErrorKind::GenericFeature(err), + ), + )); + } + } + + // Squashing alpha plane into a picture + if let Some(picture) = self.alpha_picture { + if picture.pixel_layout() != PixelLayout::I400 { + return Err(ImageError::Unsupported( + UnsupportedError::from_format_and_kind( + ImageFormat::Avif.into(), + UnsupportedErrorKind::GenericFeature(format!( + "Alpha must be PixelLayout::I400 but was: {:?}", + picture.pixel_layout() // PixelLayout does not implement display + )), + ), + )); + } + let ref_a = self.picture.plane(PlanarImageComponent::Y); + let mut _bind_a = vec![]; + + let mut a_plane_stride = self.picture.stride(PlanarImageComponent::Y) >> 1; + + let mut shape_a_plane = || { + a_plane_stride = width; + _bind_a = reshape_plane( + ref_a.as_ref(), + picture.stride(PlanarImageComponent::Y) as usize, + width as usize, + height as usize, + ); + }; + + let a_plane: &[u16] = if picture.stride(PlanarImageComponent::Y) as usize & 1 == 0 { + match bytemuck::try_cast_slice(ref_y.as_ref()) { + Ok(slice) => slice, + Err(_) => { + shape_a_plane(); + _bind_a.as_slice() + } + } + } else { + shape_a_plane(); + _bind_a.as_slice() + }; + + let width = picture.width(); + for (buf, slice) in Iterator::zip( + rgba16_buf.chunks_exact_mut(width as usize * 4), + a_plane.as_ref().chunks_exact(a_plane_stride as usize), + ) { + for (rgba, a_src) in buf.chunks_exact_mut(4).zip(slice) { + rgba[3] = *a_src; + } + } + } + + // Expand current bit depth to target 16 + let target_expand_bits = 16u32.saturating_sub(self.picture.bit_depth() as u32); + if target_expand_bits > 0 { + for rgba in rgba16_buf.chunks_exact_mut(4) { + rgba[0] = rgba[0] << target_expand_bits; + rgba[1] = rgba[1] << target_expand_bits; + rgba[2] = rgba[2] << target_expand_bits; + rgba[3] = rgba[3] << target_expand_bits; } } } diff --git a/src/codecs/avif/mod.rs b/src/codecs/avif/mod.rs index 89edfc2c97..7c18cc5f30 100644 --- a/src/codecs/avif/mod.rs +++ b/src/codecs/avif/mod.rs @@ -12,3 +12,5 @@ pub use self::encoder::{AvifEncoder, ColorSpace}; mod decoder; #[cfg(feature = "avif")] mod encoder; +#[cfg(feature = "avif-native")] +mod yuv; diff --git a/src/codecs/avif/yuv.rs b/src/codecs/avif/yuv.rs new file mode 100644 index 0000000000..41f566ba97 --- /dev/null +++ b/src/codecs/avif/yuv.rs @@ -0,0 +1,891 @@ +use num_traits::AsPrimitive; + +#[derive(Debug, Copy, Clone)] +struct CbCrInverseTransform { + pub y_coef: T, + pub cr_coef: T, + pub cb_coef: T, + pub g_coeff_1: T, + pub g_coeff_2: T, +} + +impl CbCrInverseTransform { + fn new( + y_coef: T, + cr_coef: T, + cb_coef: T, + g_coeff_1: T, + g_coeff_2: T, + ) -> CbCrInverseTransform { + CbCrInverseTransform { + y_coef, + cr_coef, + cb_coef, + g_coeff_1, + g_coeff_2, + } + } +} + +impl CbCrInverseTransform { + fn to_integers(self, precision: u32) -> CbCrInverseTransform { + let precision_scale: i32 = 1i32 << (precision as i32); + let cr_coef = (self.cr_coef * precision_scale as f32) as i32; + let cb_coef = (self.cb_coef * precision_scale as f32) as i32; + let y_coef = (self.y_coef * precision_scale as f32) as i32; + let g_coef_1 = (self.g_coeff_1 * precision_scale as f32) as i32; + let g_coef_2 = (self.g_coeff_2 * precision_scale as f32) as i32; + CbCrInverseTransform:: { + y_coef, + cr_coef, + cb_coef, + g_coeff_1: g_coef_1, + g_coeff_2: g_coef_2, + } + } +} + +/// Transformation RGB to YUV with coefficients as specified in [ITU-R](https://www.itu.int/rec/T-REC-H.273/en) +fn get_inverse_transform( + range_bgra: u32, + range_y: u32, + range_uv: u32, + kr: f32, + kb: f32, + precision: u32, +) -> Result, String> { + let range_uv = range_bgra as f32 / range_uv as f32; + let y_coef = range_bgra as f32 / range_y as f32; + let cr_coeff = (2f32 * (1f32 - kr)) * range_uv; + let cb_coeff = (2f32 * (1f32 - kb)) * range_uv; + let kg = 1.0f32 - kr - kb; + if kg == 0f32 { + return Err("1.0f - kr - kg must not be 0".parse().unwrap()); + } + let g_coeff_1 = (2f32 * ((1f32 - kr) * kr / kg)) * range_uv; + let g_coeff_2 = (2f32 * ((1f32 - kb) * kb / kg)) * range_uv; + let exact_transform = + CbCrInverseTransform::new(y_coef, cr_coeff, cb_coeff, g_coeff_1, g_coeff_2); + Ok(exact_transform.to_integers(precision)) +} + +#[repr(C)] +#[derive(Debug, Copy, Clone, PartialOrd, PartialEq)] +/// Declares YUV range TV (limited) or Full, +/// more info [ITU-R](https://www.itu.int/rec/T-REC-H.273/en) +pub(crate) enum YuvIntensityRange { + /// Limited range Y ∈ [16 << (depth - 8), 16 << (depth - 8) + 224 << (depth - 8)], + /// UV ∈ [-1 << (depth - 1), -1 << (depth - 1) + 1 << (depth - 1)] + Tv, + /// Full range Y ∈ [0, 2^bit_depth - 1], + /// UV ∈ [-1 << (depth - 1), -1 << (depth - 1) + 2^bit_depth - 1] + Pc, +} + +#[derive(Debug, Copy, Clone, PartialOrd, PartialEq)] +struct YuvChromaRange { + pub bias_y: u32, + pub bias_uv: u32, + pub range_y: u32, + pub range_uv: u32, + pub range: YuvIntensityRange, +} + +const fn get_yuv_range(depth: u32, range: YuvIntensityRange) -> YuvChromaRange { + match range { + YuvIntensityRange::Tv => YuvChromaRange { + bias_y: 16 << (depth - 8), + bias_uv: 1 << (depth - 1), + range_y: 219 << (depth - 8), + range_uv: 224 << (depth - 8), + range, + }, + YuvIntensityRange::Pc => YuvChromaRange { + bias_y: 0, + bias_uv: 1 << (depth - 1), + range_uv: (1 << depth) - 1, + range_y: (1 << depth) - 1, + range, + }, + } +} + +#[repr(C)] +#[derive(Debug, Copy, Clone, PartialOrd, PartialEq)] +/// Declares standard prebuilt YUV conversion matrices, +/// check [ITU-R](https://www.itu.int/rec/T-REC-H.273/en) information for more info +pub(crate) enum YuvStandardMatrix { + Bt601, + Bt709, + Bt2020, + Smpte240, + Bt470_6, +} + +#[derive(Debug, Copy, Clone, PartialOrd, PartialEq)] +struct YuvBias { + pub kr: f32, + pub kb: f32, +} + +const fn get_kr_kb(matrix: YuvStandardMatrix) -> YuvBias { + match matrix { + YuvStandardMatrix::Bt601 => YuvBias { + kr: 0.299f32, + kb: 0.114f32, + }, + YuvStandardMatrix::Bt709 => YuvBias { + kr: 0.2126f32, + kb: 0.0722f32, + }, + YuvStandardMatrix::Bt2020 => YuvBias { + kr: 0.2627f32, + kb: 0.0593f32, + }, + YuvStandardMatrix::Smpte240 => YuvBias { + kr: 0.087f32, + kb: 0.212f32, + }, + YuvStandardMatrix::Bt470_6 => YuvBias { + kr: 0.2220f32, + kb: 0.0713f32, + }, + } +} + +pub(crate) struct YuvPlanarImage<'a, T> { + y_plane: &'a [T], + y_stride: usize, + u_plane: &'a [T], + u_stride: usize, + v_plane: &'a [T], + v_stride: usize, + width: usize, + height: usize, +} + +impl<'a, T> YuvPlanarImage<'a, T> { + #[allow(clippy::too_many_arguments)] + pub(crate) fn new( + y_plane: &'a [T], + y_stride: usize, + u_plane: &'a [T], + u_stride: usize, + v_plane: &'a [T], + v_stride: usize, + width: usize, + height: usize, + ) -> Self { + YuvPlanarImage { + y_plane, + y_stride, + u_plane, + u_stride, + v_plane, + v_stride, + width, + height, + } + } +} + +pub(crate) struct YuvGrayImage<'a, T> { + y_plane: &'a [T], + y_stride: usize, + width: usize, + height: usize, +} + +impl<'a, T> YuvGrayImage<'a, T> { + pub(crate) fn new(y_plane: &'a [T], y_stride: usize, width: usize, height: usize) -> Self { + YuvGrayImage { + y_plane, + y_stride, + width, + height, + } + } +} + +/// Converts Yuv 400 planar format to Rgba +/// +/// This support not tightly packed data and crop image using stride in place. +/// +/// # Arguments +/// +/// * `y_plane`: Luma plane +/// * `y_stride`: Luma stride +/// * `u_plane`: U chroma plane +/// * `u_stride`: U chroma stride, even odd images is supported this always must match `u_stride * height` +/// * `v_plane`: V chroma plane +/// * `v_stride`: V chroma stride, even odd images is supported this always must match `v_stride * height` +/// * `rgba`: RGBA image layout +/// * `width`: Image width +/// * `height`: Image height +/// * `range`: see [YuvIntensityRange] +/// * `matrix`: see [YuvStandardMatrix] +/// +/// +pub(crate) fn yuv400_to_rgba + 'static>( + image: YuvGrayImage, + rgba: &mut [V], + bit_depth: u32, + range: YuvIntensityRange, + matrix: YuvStandardMatrix, +) -> Result<(), String> +where + i32: AsPrimitive, +{ + let y_plane = image.y_plane; + let y_stride = image.y_stride; + let height = image.height; + let width = image.width; + if y_plane.len() != y_stride * height { + return Err(format!( + "Luma plane expected {} bytes, got {}", + y_stride * height, + y_plane.len() + )); + } + const CHANNELS: usize = 4; + let rgba_stride = width * CHANNELS; + + // If luma plane is in full range it can be just redistributed across the image + if range == YuvIntensityRange::Pc { + let y_iter = y_plane.chunks_exact(y_stride); + let rgb_iter = rgba.chunks_exact_mut(rgba_stride); + + for (y_src, rgb) in y_iter.zip(rgb_iter) { + let rgb_chunks = rgb.chunks_exact_mut(CHANNELS); + + for (y_src, rgb_dst) in y_src.iter().zip(rgb_chunks) { + let r = *y_src; + rgb_dst[0] = r; + rgb_dst[1] = r; + rgb_dst[2] = r; + rgb_dst[3] = r; + } + } + return Ok(()); + } + + let range = get_yuv_range(bit_depth, range); + let kr_kb = get_kr_kb(matrix); + const PRECISION: i32 = 11; + const ROUNDING: i32 = 1 << (PRECISION - 1); + let inverse_transform = get_inverse_transform( + (1 << bit_depth) - 1, + range.range_y, + range.range_uv, + kr_kb.kr, + kr_kb.kb, + PRECISION as u32, + )?; + let y_coef = inverse_transform.y_coef; + + let bias_y = range.bias_y as i32; + + if rgba.len() != width * height * CHANNELS { + return Err(format!( + "RGB image layout expected {} bytes, got {}", + width * height * CHANNELS, + rgba.len() + )); + } + + let max_value = (1 << bit_depth) - 1; + + let y_iter = y_plane.chunks_exact(y_stride); + let rgb_iter = rgba.chunks_exact_mut(rgba_stride); + + for (y_src, rgb) in y_iter.zip(rgb_iter) { + let rgb_chunks = rgb.chunks_exact_mut(CHANNELS); + + for (y_src, rgb_dst) in y_src.iter().zip(rgb_chunks) { + let y_value = (y_src.as_() - bias_y) * y_coef; + + let r = ((y_value + ROUNDING) >> PRECISION).clamp(0, max_value); + rgb_dst[0] = r.as_(); + rgb_dst[1] = r.as_(); + rgb_dst[2] = r.as_(); + rgb_dst[3] = max_value.as_(); + } + } + + Ok(()) +} + +/// Converts YUV420 to Rgb +/// +/// This support not tightly packed data and crop image using stride in place. +/// Stride here is not supports u16 as it can be in passed from FFI. +/// +/// # Arguments +/// +/// * `image`: see [YuvPlanarImage] +/// * `rgb`: RGB image layout +/// * `range`: see [YuvIntensityRange] +/// * `matrix`: see [YuvStandardMatrix] +/// +/// +pub(crate) fn yuv420_to_rgba + 'static>( + image: YuvPlanarImage, + rgb: &mut [V], + bit_depth: u32, + range: YuvIntensityRange, + matrix: YuvStandardMatrix, +) -> Result<(), String> +where + i32: AsPrimitive, +{ + let y_plane = image.y_plane; + let u_plane = image.u_plane; + let v_plane = image.v_plane; + let y_stride = image.y_stride; + let u_stride = image.u_stride; + let v_stride = image.v_stride; + let chroma_height = (image.height + 1) / 2; + if y_plane.len() != y_stride * image.height { + return Err(format!( + "Luma plane expected {} bytes, got {}", + y_stride * image.height, + y_plane.len() + )); + } + + if u_plane.len() != u_stride * chroma_height { + return Err(format!( + "U plane expected {} bytes, got {}", + u_stride * chroma_height, + u_plane.len() + )); + } + + if v_plane.len() != v_stride * chroma_height { + return Err(format!( + "V plane expected {} bytes, got {}", + v_stride * chroma_height, + v_plane.len() + )); + } + + let max_value = (1 << bit_depth) - 1; + + const PRECISION: i32 = 11; + const ROUNDING: i32 = 1 << (PRECISION - 1); + + let range = get_yuv_range(bit_depth, range); + let kr_kb = get_kr_kb(matrix); + let inverse_transform = get_inverse_transform( + (1 << bit_depth) - 1, + range.range_y, + range.range_uv, + kr_kb.kr, + kr_kb.kb, + PRECISION as u32, + )?; + let cr_coef = inverse_transform.cr_coef; + let cb_coef = inverse_transform.cb_coef; + let y_coef = inverse_transform.y_coef; + let g_coef_1 = inverse_transform.g_coeff_1; + let g_coef_2 = inverse_transform.g_coeff_2; + + let bias_y = range.bias_y as i32; + let bias_uv = range.bias_uv as i32; + + const CHANNELS: usize = 4; + + if rgb.len() != image.width * image.height * CHANNELS { + return Err(format!( + "RGB image layout expected {} bytes, got {}", + image.width * image.height * CHANNELS, + rgb.len() + )); + } + + let rgb_stride = image.width * CHANNELS; + + let y_iter = y_plane.chunks_exact(y_stride * 2); + let rgb_iter = rgb.chunks_exact_mut(rgb_stride * 2); + let u_iter = u_plane.chunks_exact(u_stride); + let v_iter = v_plane.chunks_exact(v_stride); + + /* + Sample 4x4 YUV420 planar image + start_y + 0: Y00 Y01 Y02 Y03 + start_y + 4: Y04 Y05 Y06 Y07 + start_y + 8: Y08 Y09 Y10 Y11 + start_y + 12: Y12 Y13 Y14 Y15 + start_cb + 0: Cb00 Cb01 + start_cb + 2: Cb02 Cb03 + start_cr + 0: Cr00 Cr01 + start_cr + 2: Cr02 Cr03 + + For 4 luma components (2x2 on rows and cols) there are 1 chroma Cb/Cr components. + Luma channel must have always exact size as RGB target layout, but chroma is not. + + We're sectioning an image by pair of rows, then for each pair of luma and RGB row, + there is one chroma row. + + As chroma is shrunk by factor of 2 then we're processing by pairs of RGB and luma, + for each RGB and luma pair there is one chroma component. + + If image have odd width then luma channel must be exact, and we're replicating last + chroma component. + + If image have odd height then luma channel is exact, and we're replicating last chroma rows. + */ + + for (((y_src, u_src), v_src), rgb) in y_iter.zip(u_iter).zip(v_iter).zip(rgb_iter) { + // Since we're processing two rows in one loop we need to re-slice once more + let y_iter = y_src.chunks_exact(y_stride); + let rgb_iter = rgb.chunks_exact_mut(rgb_stride); + for (y_src, rgb) in y_iter.zip(rgb_iter) { + let y_iter = y_src.chunks_exact(2); + let rgb_chunks = rgb.chunks_exact_mut(CHANNELS * 2); + for (((y_src, &u_src), &v_src), rgb_dst) in y_iter.zip(u_src).zip(v_src).zip(rgb_chunks) + { + let y_value: i32 = (y_src[0].as_() - bias_y) * y_coef; + let cb_value: i32 = u_src.as_() - bias_uv; + let cr_value: i32 = v_src.as_() - bias_uv; + + let r = + ((y_value + cr_coef * cr_value + ROUNDING) >> PRECISION).clamp(0, max_value); + let b = + ((y_value + cb_coef * cb_value + ROUNDING) >> PRECISION).clamp(0, max_value); + let g = ((y_value - g_coef_1 * cr_value - g_coef_2 * cb_value + ROUNDING) + >> PRECISION) + .clamp(0, max_value); + + rgb_dst[0] = r.as_(); + rgb_dst[1] = g.as_(); + rgb_dst[2] = b.as_(); + rgb_dst[3] = max_value.as_(); + + let y_value = (y_src[1].as_() - bias_y) * y_coef; + + let r = + ((y_value + cr_coef * cr_value + ROUNDING) >> PRECISION).clamp(0, max_value); + let b = + ((y_value + cb_coef * cb_value + ROUNDING) >> PRECISION).clamp(0, max_value); + let g = ((y_value - g_coef_1 * cr_value - g_coef_2 * cb_value + ROUNDING) + >> PRECISION) + .clamp(0, max_value); + + rgb_dst[4] = r.as_(); + rgb_dst[5] = g.as_(); + rgb_dst[6] = b.as_(); + rgb_dst[7] = max_value.as_(); + } + + // Process remainder if width is odd. + if image.width & 1 != 0 { + let y_left = y_src.chunks_exact(2).remainder(); + let rgb_chunks = rgb + .chunks_exact_mut(CHANNELS * 2) + .into_remainder() + .chunks_exact_mut(CHANNELS); + let u_iter = u_src.iter().rev(); + let v_iter = v_src.iter().rev(); + + for (((y_src, u_src), v_src), rgb_dst) in + y_left.iter().zip(u_iter).zip(v_iter).zip(rgb_chunks) + { + let y_value = (y_src.as_() - bias_y) * y_coef; + let cb_value = u_src.as_() - bias_uv; + let cr_value = v_src.as_() - bias_uv; + + let r = ((y_value + cr_coef * cr_value + ROUNDING) >> PRECISION) + .clamp(0, max_value); + let b = ((y_value + cb_coef * cb_value + ROUNDING) >> PRECISION) + .clamp(0, max_value); + let g = ((y_value - g_coef_1 * cr_value - g_coef_2 * cb_value + ROUNDING) + >> PRECISION) + .clamp(0, max_value); + + rgb_dst[0] = r.as_(); + rgb_dst[1] = g.as_(); + rgb_dst[2] = b.as_(); + rgb_dst[3] = max_value.as_(); + } + } + } + } + + // Process remainder if height is odd + + let y_iter = y_plane + .chunks_exact(y_stride * 2) + .remainder() + .chunks_exact(y_stride); + let rgb_iter = rgb.chunks_exact_mut(rgb_stride).rev(); + let u_iter = u_plane.chunks_exact(u_stride).rev(); + let v_iter = v_plane.chunks_exact(v_stride).rev(); + + for (((y_src, u_src), v_src), rgb) in y_iter.zip(u_iter).zip(v_iter).zip(rgb_iter) { + let y_iter = y_src.chunks_exact(2); + let rgb_chunks = rgb.chunks_exact_mut(CHANNELS * 2); + for (((y_src, u_src), v_src), rgb_dst) in y_iter.zip(u_src).zip(v_src).zip(rgb_chunks) { + let y_value = (y_src[0].as_() - bias_y) * y_coef; + let cb_value = u_src.as_() - bias_uv; + let cr_value = v_src.as_() - bias_uv; + + let r = ((y_value + cr_coef * cr_value + ROUNDING) >> PRECISION).clamp(0, max_value); + let b = ((y_value + cb_coef * cb_value + ROUNDING) >> PRECISION).clamp(0, max_value); + let g = ((y_value - g_coef_1 * cr_value - g_coef_2 * cb_value + ROUNDING) >> PRECISION) + .clamp(0, max_value); + + rgb_dst[0] = r.as_(); + rgb_dst[1] = g.as_(); + rgb_dst[2] = b.as_(); + rgb_dst[3] = max_value.as_(); + + let y_value = (y_src[1].as_() - bias_y) * y_coef; + + let r = ((y_value + cr_coef * cr_value + ROUNDING) >> PRECISION).clamp(0, max_value); + let b = ((y_value + cb_coef * cb_value + ROUNDING) >> PRECISION).clamp(0, max_value); + let g = ((y_value - g_coef_1 * cr_value - g_coef_2 * cb_value + ROUNDING) >> PRECISION) + .clamp(0, max_value); + + rgb_dst[4] = r.as_(); + rgb_dst[5] = g.as_(); + rgb_dst[6] = b.as_(); + rgb_dst[7] = max_value.as_(); + } + + let y_left = y_src.chunks_exact(2).remainder(); + let rgb_chunks = rgb + .chunks_exact_mut(CHANNELS * 2) + .into_remainder() + .chunks_exact_mut(CHANNELS); + let u_iter = u_plane.iter().rev(); + let v_iter = v_plane.iter().rev(); + + // Process remainder if width is odd. + + for (((y_src, u_src), v_src), rgb_dst) in + y_left.iter().zip(u_iter).zip(v_iter).zip(rgb_chunks) + { + let y_value = (y_src.as_() - bias_y) * y_coef; + let cb_value = u_src.as_() - bias_uv; + let cr_value = v_src.as_() - bias_uv; + + let r = ((y_value + cr_coef * cr_value + ROUNDING) >> PRECISION).clamp(0, max_value); + let b = ((y_value + cb_coef * cb_value + ROUNDING) >> PRECISION).clamp(0, max_value); + let g = ((y_value - g_coef_1 * cr_value - g_coef_2 * cb_value + ROUNDING) >> PRECISION) + .clamp(0, max_value); + + rgb_dst[0] = r.as_(); + rgb_dst[1] = g.as_(); + rgb_dst[2] = b.as_(); + rgb_dst[3] = max_value.as_(); + } + } + + Ok(()) +} + +/// Converts Yuv 422 planar format to Rgba +/// +/// This support not tightly packed data and crop image using stride in place. +/// +/// # Arguments +/// +/// * `image`: see [YuvPlanarImage] +/// * `rgb`: RGB image layout +/// * `range`: see [YuvIntensityRange] +/// * `matrix`: see [YuvStandardMatrix] +/// +/// +pub(crate) fn yuv422_to_rgba + 'static>( + image: YuvPlanarImage, + rgb: &mut [V], + bit_depth: u32, + range: YuvIntensityRange, + matrix: YuvStandardMatrix, +) -> Result<(), String> +where + i32: AsPrimitive, +{ + let y_plane = image.y_plane; + let u_plane = image.u_plane; + let v_plane = image.v_plane; + let y_stride = image.y_stride; + let u_stride = image.u_stride; + let v_stride = image.v_stride; + let height = image.height; + let width = image.width; + if y_plane.len() != y_stride * height { + return Err(format!( + "Luma plane expected {} bytes, got {}", + y_stride * height, + y_plane.len() + )); + } + + if u_plane.len() != u_stride * height { + return Err(format!( + "U plane expected {} bytes, got {}", + u_stride * height, + u_plane.len() + )); + } + + if v_plane.len() != v_stride * height { + return Err(format!( + "V plane expected {} bytes, got {}", + v_stride * height, + v_plane.len() + )); + } + + let max_value = (1 << bit_depth) - 1; + + let range = get_yuv_range(bit_depth, range); + let kr_kb = get_kr_kb(matrix); + const PRECISION: i32 = 11; + const ROUNDING: i32 = 1 << (PRECISION - 1); + let inverse_transform = get_inverse_transform( + (1 << bit_depth) - 1, + range.range_y, + range.range_uv, + kr_kb.kr, + kr_kb.kb, + PRECISION as u32, + )?; + let cr_coef = inverse_transform.cr_coef; + let cb_coef = inverse_transform.cb_coef; + let y_coef = inverse_transform.y_coef; + let g_coef_1 = inverse_transform.g_coeff_1; + let g_coef_2 = inverse_transform.g_coeff_2; + + let bias_y = range.bias_y as i32; + let bias_uv = range.bias_uv as i32; + + const CHANNELS: usize = 4; + + if rgb.len() != width * height * CHANNELS { + return Err(format!( + "RGB image layout expected {} bytes, got {}", + width * height * CHANNELS, + rgb.len() + )); + } + + /* + Sample 4x4 YUV422 planar image + start_y + 0: Y00 Y01 Y02 Y03 + start_y + 4: Y04 Y05 Y06 Y07 + start_y + 8: Y08 Y09 Y10 Y11 + start_y + 12: Y12 Y13 Y14 Y15 + start_cb + 0: Cb00 Cb01 + start_cb + 2: Cb02 Cb03 + start_cb + 4: Cb04 Cb05 + start_cb + 6: Cb06 Cb07 + start_cr + 0: Cr00 Cr01 + start_cr + 2: Cr02 Cr03 + start_cr + 4: Cr04 Cr05 + start_cr + 6: Cr06 Cr07 + + For 2 luma components there are 1 chroma Cb/Cr components. + Luma channel must have always exact size as RGB target layout, but chroma is not. + + As chroma is shrunk by factor of 2 then we're processing by pairs of RGB and luma, + for each RGB and luma pair there is one chroma component. + + If image have odd width then luma channel must be exact, and we're replicating last + chroma component. + */ + + let rgb_stride = width * CHANNELS; + + let y_iter = y_plane.chunks_exact(y_stride); + let rgb_iter = rgb.chunks_exact_mut(rgb_stride); + let u_iter = u_plane.chunks_exact(u_stride); + let v_iter = v_plane.chunks_exact(v_stride); + + for (((y_src, u_src), v_src), rgb) in y_iter.zip(u_iter).zip(v_iter).zip(rgb_iter) { + let y_iter = y_src.chunks_exact(2); + let rgb_chunks = rgb.chunks_exact_mut(CHANNELS * 2); + + for (((y_src, u_src), v_src), rgb_dst) in y_iter.zip(u_src).zip(v_src).zip(rgb_chunks) { + let y_value = (y_src[0].as_() - bias_y) * y_coef; + let cb_value = u_src.as_() - bias_uv; + let cr_value = v_src.as_() - bias_uv; + + let r = ((y_value + cr_coef * cr_value + ROUNDING) >> PRECISION).clamp(0, max_value); + let b = ((y_value + cb_coef * cb_value + ROUNDING) >> PRECISION).clamp(0, max_value); + let g = ((y_value - g_coef_1 * cr_value - g_coef_2 * cb_value + ROUNDING) >> PRECISION) + .clamp(0, max_value); + + rgb_dst[0] = r.as_(); + rgb_dst[1] = g.as_(); + rgb_dst[2] = b.as_(); + rgb_dst[3] = max_value.as_(); + + let y_value = (y_src[1].as_() - bias_y) * y_coef; + + let r = ((y_value + cr_coef * cr_value + ROUNDING) >> PRECISION).clamp(0, max_value); + let b = ((y_value + cb_coef * cb_value + ROUNDING) >> PRECISION).clamp(0, max_value); + let g = ((y_value - g_coef_1 * cr_value - g_coef_2 * cb_value + ROUNDING) >> PRECISION) + .clamp(0, max_value); + + rgb_dst[4] = r.as_(); + rgb_dst[5] = g.as_(); + rgb_dst[6] = b.as_(); + rgb_dst[7] = max_value.as_(); + } + + // Process left pixels for odd images, this should work since luma must be always exact + if width & 1 != 0 { + let y_left = y_src.chunks_exact(2).remainder(); + let rgb_chunks = rgb + .chunks_exact_mut(CHANNELS * 2) + .into_remainder() + .chunks_exact_mut(CHANNELS); + let u_iter = u_src.iter().rev(); + let v_iter = v_src.iter().rev(); + + for (((y_src, u_src), v_src), rgb_dst) in + y_left.iter().zip(u_iter).zip(v_iter).zip(rgb_chunks) + { + let y_value = (y_src.as_() - bias_y) * y_coef; + let cb_value = u_src.as_() - bias_uv; + let cr_value = v_src.as_() - bias_uv; + + let r = + ((y_value + cr_coef * cr_value + ROUNDING) >> PRECISION).clamp(0, max_value); + let b = + ((y_value + cb_coef * cb_value + ROUNDING) >> PRECISION).clamp(0, max_value); + let g = ((y_value - g_coef_1 * cr_value - g_coef_2 * cb_value + ROUNDING) + >> PRECISION) + .clamp(0, max_value); + + rgb_dst[0] = r.as_(); + rgb_dst[1] = g.as_(); + rgb_dst[2] = b.as_(); + rgb_dst[3] = max_value.as_(); + } + } + } + + Ok(()) +} + +/// Converts YUV444 to Rgb +/// +/// This support not tightly packed data and crop image using stride in place. +/// +/// # Arguments +/// +/// * `image`: see [YuvPlanarImage] +/// * `rgb`: RGB image layout +/// * `range`: see [YuvIntensityRange] +/// * `matrix`: see [YuvStandardMatrix] +/// +/// +pub(crate) fn yuv444_to_rgba + 'static>( + image: YuvPlanarImage, + rgb: &mut [V], + bit_depth: u32, + range: YuvIntensityRange, + matrix: YuvStandardMatrix, +) -> Result<(), String> +where + i32: AsPrimitive, +{ + let y_plane = image.y_plane; + let u_plane = image.u_plane; + let v_plane = image.v_plane; + let y_stride = image.y_stride; + let u_stride = image.u_stride; + let v_stride = image.v_stride; + let height = image.height; + let width = image.width; + if y_plane.len() != y_stride * height { + return Err(format!( + "Luma plane expected {} bytes, got {}", + y_stride * height, + y_plane.len() + )); + } + + if u_plane.len() != u_stride * height { + return Err(format!( + "U plane expected {} bytes, got {}", + u_stride * height, + u_plane.len() + )); + } + + if v_plane.len() != v_stride * height { + return Err(format!( + "V plane expected {} bytes, got {}", + v_stride * height, + v_plane.len() + )); + } + + let range = get_yuv_range(bit_depth, range); + let kr_kb = get_kr_kb(matrix); + const PRECISION: i32 = 11; + const ROUNDING: i32 = 1 << (PRECISION - 1); + let inverse_transform = get_inverse_transform( + (1 << bit_depth) - 1, + range.range_y, + range.range_uv, + kr_kb.kr, + kr_kb.kb, + PRECISION as u32, + )?; + let cr_coef = inverse_transform.cr_coef; + let cb_coef = inverse_transform.cb_coef; + let y_coef = inverse_transform.y_coef; + let g_coef_1 = inverse_transform.g_coeff_1; + let g_coef_2 = inverse_transform.g_coeff_2; + + let bias_y = range.bias_y as i32; + let bias_uv = range.bias_uv as i32; + + const CHANNELS: usize = 4; + + if rgb.len() != width * height * CHANNELS { + return Err(format!( + "RGB image layout expected {} bytes, got {}", + width * height * CHANNELS, + rgb.len() + )); + } + + let max_value = (1 << bit_depth) - 1; + + let rgb_stride = width * CHANNELS; + + let y_iter = y_plane.chunks_exact(y_stride); + let rgb_iter = rgb.chunks_exact_mut(rgb_stride); + let u_iter = u_plane.chunks_exact(u_stride); + let v_iter = v_plane.chunks_exact(v_stride); + + for (((y_src, u_src), v_src), rgb) in y_iter.zip(u_iter).zip(v_iter).zip(rgb_iter) { + let rgb_chunks = rgb.chunks_exact_mut(CHANNELS); + + for (((y_src, u_src), v_src), rgb_dst) in y_src.iter().zip(u_src).zip(v_src).zip(rgb_chunks) + { + let y_value = (y_src.as_() - bias_y) * y_coef; + let cb_value = u_src.as_() - bias_uv; + let cr_value = v_src.as_() - bias_uv; + + let r = ((y_value + cr_coef * cr_value + ROUNDING) >> PRECISION).clamp(0, max_value); + let b = ((y_value + cb_coef * cb_value + ROUNDING) >> PRECISION).clamp(0, max_value); + let g = ((y_value - g_coef_1 * cr_value - g_coef_2 * cb_value + ROUNDING) >> PRECISION) + .clamp(0, max_value); + + rgb_dst[0] = r.as_(); + rgb_dst[1] = g.as_(); + rgb_dst[2] = b.as_(); + rgb_dst[3] = max_value.as_(); + } + } + + Ok(()) +}