From 7ffae8116d8f81ff5f4767325037dc5ddf32e594 Mon Sep 17 00:00:00 2001 From: Scott Lamb Date: Mon, 12 Aug 2024 14:54:00 -0400 Subject: [PATCH] touch-ups to `rbsp` interface These improve suitability for parsing H.265 as well as H.264. The changes to `ByteReader` construction are because H.265 has two-byte NAL headers, so automatically skipping one byte is super confusing. Arguably it was confusing even with H.264. Make the skip explicit. Additionally, `BitRead::read_to` is nice for loading stuff directly into primitives such as `[u8; 11]`. That's the representation that makes the most sense for the H.265 profile (as defined in section 7.3.3, `profile_tier_level`, `if( profilePresentFlag )` block). When constructing a `HEVCDecoderConfigurationRecord`, we need that full 11-byte span; when constructing the RFC 6381 codec string, we need a 6-byte span. So parsing the whole thing into individual fields means reconstructing these later. And finally, I renamed methods to more closely match `bitstream-io`, which is what `h264-reader` is using internally now and is overwhelmingly the commonly used crate for bitstream parsing in general. --- CHANGELOG.md | 6 ++ Cargo.toml | 2 +- src/lib.rs | 2 + src/nal/mod.rs | 8 +-- src/nal/pps.rs | 4 +- src/nal/sei/buffering_period.rs | 5 +- src/nal/sei/pic_timing.rs | 26 ++++---- src/nal/slice/mod.rs | 6 +- src/nal/sps.rs | 39 ++++++----- src/rbsp.rs | 111 +++++++++++++++++++++++--------- 10 files changed, 131 insertions(+), 78 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d36e46c..63eca0b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,12 @@ * BREAKING CHANGE: The `ParamSetId` type has been removed and replaced with separate `PicParamSetId` and `SeqParamSetId` types, since the allowed range of values needs to be different in these two usages. +* BREAKING CHANGE: The `rbsp::ByteReader::new` constructor has been removed in favor of more explicit + `ByteReader::skipping_h264_header`, alongside the new `ByteReader::without_skip` and `ByteReader::skipping_bytes` + that are suitable for other situations or parsing H.265 streams with two-byte NAL headers. +* BREAKING CHANGE: the `rbsp::BitReaderError::ReadError` has been removed; methods consistently return + the variant `rbsp::BitReaderError::ReadErrorFor` which additionally supplies the field name. +* BREAKING CHANGE: some methods in `rbsp::BitRead` have been renamed to match the `bitstream-io` conventions. ## 0.7.0 - 2023-05-30 diff --git a/Cargo.toml b/Cargo.toml index cc1554e..298bf95 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "h264-reader" -version = "0.7.1-dev" +version = "0.8.0-dev" authors = ["David Holroyd "] license = "MIT/Apache-2.0" description = "Reader for H264 bitstream syntax" diff --git a/src/lib.rs b/src/lib.rs index a7a7bb7..1c6add6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,6 +5,8 @@ use std::fmt::Debug; +pub use bitstream_io; + pub mod annexb; pub mod avcc; pub mod nal; diff --git a/src/nal/mod.rs b/src/nal/mod.rs index 23797cb..13d69c2 100644 --- a/src/nal/mod.rs +++ b/src/nal/mod.rs @@ -194,9 +194,9 @@ impl fmt::Debug for NalHeader { /// /// // Reading RBSP as a bit sequence: /// let mut r = nal.rbsp_bits(); -/// assert_eq!(r.read_u8(4, "first nibble").unwrap(), 0x1); -/// assert_eq!(r.read_u8(4, "second nibble").unwrap(), 0x2); -/// assert_eq!(r.read_u32(23, "23 bits at a time").unwrap(), 0x1a_00_00); +/// assert_eq!(r.read::(4, "first nibble").unwrap(), 0x1); +/// assert_eq!(r.read::(4, "second nibble").unwrap(), 0x2); +/// assert_eq!(r.read::(23, "23 bits at a time").unwrap(), 0x1a_00_00); /// assert!(r.has_more_rbsp_data("more left").unwrap()); /// ``` pub trait Nal { @@ -217,7 +217,7 @@ pub trait Nal { /// emulation-prevention-three-bytes). #[inline] fn rbsp_bytes(&self) -> rbsp::ByteReader { - rbsp::ByteReader::new(self.reader()) + rbsp::ByteReader::skipping_h264_header(self.reader()) } /// Reads bits within the RBSP form. diff --git a/src/nal/pps.rs b/src/nal/pps.rs index 8b4f6d2..dc3dd3a 100644 --- a/src/nal/pps.rs +++ b/src/nal/pps.rs @@ -134,7 +134,7 @@ impl SliceGroup { let size = (1f64 + f64::from(num_slice_groups_minus1)).log2().ceil() as u32; let mut run_length_minus1 = Vec::with_capacity(num_slice_groups_minus1 as usize + 1); for _ in 0..pic_size_in_map_units_minus1 + 1 { - run_length_minus1.push(r.read_u32(size, "slice_group_id")?); + run_length_minus1.push(r.read(size, "slice_group_id")?); } Ok(run_length_minus1) } @@ -271,7 +271,7 @@ impl PicParameterSet { "num_ref_idx_l1_default_active_minus1", )?, weighted_pred_flag: r.read_bool("weighted_pred_flag")?, - weighted_bipred_idc: r.read_u8(2, "weighted_bipred_idc")?, + weighted_bipred_idc: r.read(2, "weighted_bipred_idc")?, pic_init_qp_minus26: r.read_se("pic_init_qp_minus26")?, pic_init_qs_minus26: r.read_se("pic_init_qs_minus26")?, chroma_qp_index_offset: r.read_se("chroma_qp_index_offset")?, diff --git a/src/nal/sei/buffering_period.rs b/src/nal/sei/buffering_period.rs index d4983d4..05d5483 100644 --- a/src/nal/sei/buffering_period.rs +++ b/src/nal/sei/buffering_period.rs @@ -36,9 +36,8 @@ fn read_cpb_removal_delay_list( let mut res = vec![]; for _ in 0..count { res.push(InitialCpbRemoval { - initial_cpb_removal_delay: r.read_u32(length, "initial_cpb_removal_delay")?, - initial_cpb_removal_delay_offset: r - .read_u32(length, "initial_cpb_removal_delay_offset")?, + initial_cpb_removal_delay: r.read(length, "initial_cpb_removal_delay")?, + initial_cpb_removal_delay_offset: r.read(length, "initial_cpb_removal_delay_offset")?, }); } Ok(res) diff --git a/src/nal/sei/pic_timing.rs b/src/nal/sei/pic_timing.rs index 4c2c96e..f44b3ca 100644 --- a/src/nal/sei/pic_timing.rs +++ b/src/nal/sei/pic_timing.rs @@ -171,25 +171,25 @@ impl ClockTimestamp { r: &mut R, sps: &sps::SeqParameterSet, ) -> Result { - let ct_type = CtType::from_id(r.read_u8(2, "ct_type")?); + let ct_type = CtType::from_id(r.read(2, "ct_type")?); let nuit_field_based_flag = r.read_bool("nuit_field_based_flag")?; - let counting_type = CountingType::from_id(r.read_u8(5, "counting_type")?); + let counting_type = CountingType::from_id(r.read(5, "counting_type")?); let full_timestamp_flag = r.read_bool("full_timestamp_flag")?; let discontinuity_flag = r.read_bool("discontinuity_flag")?; let cnt_dropped_flag = r.read_bool("cnt_dropped_flag")?; - let n_frames = r.read_u8(8, "n_frames")?; + let n_frames = r.read(8, "n_frames")?; let smh = if full_timestamp_flag { SecMinHour::SMH( - r.read_u8(6, "seconds_value")?, - r.read_u8(6, "minutes_value")?, - r.read_u8(5, "hours_value")?, + r.read(6, "seconds_value")?, + r.read(6, "minutes_value")?, + r.read(5, "hours_value")?, ) } else if r.read_bool("seconds_flag")? { - let seconds = r.read_u8(6, "seconds_value")?; + let seconds = r.read(6, "seconds_value")?; if r.read_bool("minutes_flag")? { - let minutes = r.read_u8(6, "minutes_value")?; + let minutes = r.read(6, "minutes_value")?; if r.read_bool("hours_flag")? { - let hours = r.read_u8(5, "hours_value")?; + let hours = r.read(5, "hours_value")?; SecMinHour::SMH(seconds, minutes, hours) } else { SecMinHour::SM(seconds, minutes) @@ -214,7 +214,7 @@ impl ClockTimestamp { let time_offset = if time_offset_length == 0 { None } else { - Some(r.read_i32(u32::from(time_offset_length), "time_offset_length")?) + Some(r.read(u32::from(time_offset_length), "time_offset_length")?) }; Ok(ClockTimestamp { ct_type, @@ -269,11 +269,11 @@ impl PicTiming { .or_else(|| vui_params.nal_hrd_parameters.as_ref()) { Some(Delays { - cpb_removal_delay: r.read_u32( + cpb_removal_delay: r.read( u32::from(hrd.cpb_removal_delay_length_minus1) + 1, "cpb_removal_delay", )?, - dpb_output_delay: r.read_u32( + dpb_output_delay: r.read( u32::from(hrd.dpb_output_delay_length_minus1) + 1, "dpb_output_delay", )?, @@ -292,7 +292,7 @@ impl PicTiming { ) -> Result, PicTimingError> { Ok(if let Some(ref vui_params) = sps.vui_parameters { if vui_params.pic_struct_present_flag { - let pic_struct = PicStructType::from_id(r.read_u8(4, "pic_struct")?)?; + let pic_struct = PicStructType::from_id(r.read(4, "pic_struct")?)?; let clock_timestamps = Self::read_clock_timestamps(r, &pic_struct, sps)?; Some(PicStruct { diff --git a/src/nal/slice/mod.rs b/src/nal/slice/mod.rs index 68feb23..41e1f21 100644 --- a/src/nal/slice/mod.rs +++ b/src/nal/slice/mod.rs @@ -453,11 +453,11 @@ impl SliceHeader { SliceHeaderError::UndefinedSeqParamSetId(pps.seq_parameter_set_id), )?; let colour_plane = if sps.chroma_info.separate_colour_plane_flag { - Some(ColourPlane::from_id(r.read_u8(2, "colour_plane_id")?)?) + Some(ColourPlane::from_id(r.read(2, "colour_plane_id")?)?) } else { None }; - let frame_num = r.read_u16(u32::from(sps.log2_max_frame_num()), "frame_num")?; + let frame_num = r.read(u32::from(sps.log2_max_frame_num()), "frame_num")?; let field_pic = if let sps::FrameMbsFlags::Fields { .. } = sps.frame_mbs_flags { if r.read_bool("field_pic_flag")? { if r.read_bool("bottom_field_flag")? { @@ -481,7 +481,7 @@ impl SliceHeader { sps::PicOrderCntType::TypeZero { log2_max_pic_order_cnt_lsb_minus4, } => { - let pic_order_cnt_lsb = r.read_u32( + let pic_order_cnt_lsb = r.read( u32::from(log2_max_pic_order_cnt_lsb_minus4) + 4, "pic_order_cnt_lsb", )?; diff --git a/src/nal/sps.rs b/src/nal/sps.rs index 8ba2ac4..112c589 100644 --- a/src/nal/sps.rs +++ b/src/nal/sps.rs @@ -545,7 +545,7 @@ impl AspectRatioInfo { fn read(r: &mut R) -> Result, BitReaderError> { let aspect_ratio_info_present_flag = r.read_bool("aspect_ratio_info_present_flag")?; Ok(if aspect_ratio_info_present_flag { - let aspect_ratio_idc = r.read_u8(8, "aspect_ratio_idc")?; + let aspect_ratio_idc = r.read(8, "aspect_ratio_idc")?; Some(match aspect_ratio_idc { 0 => AspectRatioInfo::Unspecified, 1 => AspectRatioInfo::Ratio1_1, @@ -564,10 +564,9 @@ impl AspectRatioInfo { 14 => AspectRatioInfo::Ratio4_3, 15 => AspectRatioInfo::Ratio3_2, 16 => AspectRatioInfo::Ratio2_1, - 255 => AspectRatioInfo::Extended( - r.read_u16(16, "sar_width")?, - r.read_u16(16, "sar_height")?, - ), + 255 => { + AspectRatioInfo::Extended(r.read(16, "sar_width")?, r.read(16, "sar_height")?) + } _ => AspectRatioInfo::Reserved(aspect_ratio_idc), }) } else { @@ -670,9 +669,9 @@ impl ColourDescription { let colour_description_present_flag = r.read_bool("colour_description_present_flag")?; Ok(if colour_description_present_flag { Some(ColourDescription { - colour_primaries: r.read_u8(8, "colour_primaries")?, - transfer_characteristics: r.read_u8(8, "transfer_characteristics")?, - matrix_coefficients: r.read_u8(8, "matrix_coefficients")?, + colour_primaries: r.read(8, "colour_primaries")?, + transfer_characteristics: r.read(8, "transfer_characteristics")?, + matrix_coefficients: r.read(8, "matrix_coefficients")?, }) } else { None @@ -691,7 +690,7 @@ impl VideoSignalType { let video_signal_type_present_flag = r.read_bool("video_signal_type_present_flag")?; Ok(if video_signal_type_present_flag { Some(VideoSignalType { - video_format: VideoFormat::from(r.read_u8(3, "video_format")?), + video_format: VideoFormat::from(r.read(3, "video_format")?), video_full_range_flag: r.read_bool("video_full_range_flag")?, colour_description: ColourDescription::read(r)?, }) @@ -732,8 +731,8 @@ impl TimingInfo { let timing_info_present_flag = r.read_bool("timing_info_present_flag")?; Ok(if timing_info_present_flag { Some(TimingInfo { - num_units_in_tick: r.read_u32(32, "num_units_in_tick")?, - time_scale: r.read_u32(32, "time_scale")?, + num_units_in_tick: r.read(32, "num_units_in_tick")?, + time_scale: r.read(32, "time_scale")?, fixed_frame_rate_flag: r.read_bool("fixed_frame_rate_flag")?, }) } else { @@ -782,14 +781,14 @@ impl HrdParameters { } let cpb_cnt = cpb_cnt_minus1 + 1; Some(HrdParameters { - bit_rate_scale: r.read_u8(4, "bit_rate_scale")?, - cpb_size_scale: r.read_u8(4, "cpb_size_scale")?, + bit_rate_scale: r.read(4, "bit_rate_scale")?, + cpb_size_scale: r.read(4, "cpb_size_scale")?, cpb_specs: Self::read_cpb_specs(r, cpb_cnt)?, initial_cpb_removal_delay_length_minus1: r - .read_u8(5, "initial_cpb_removal_delay_length_minus1")?, - cpb_removal_delay_length_minus1: r.read_u8(5, "cpb_removal_delay_length_minus1")?, - dpb_output_delay_length_minus1: r.read_u8(5, "dpb_output_delay_length_minus1")?, - time_offset_length: r.read_u8(5, "time_offset_length")?, + .read(5, "initial_cpb_removal_delay_length_minus1")?, + cpb_removal_delay_length_minus1: r.read(5, "cpb_removal_delay_length_minus1")?, + dpb_output_delay_length_minus1: r.read(5, "dpb_output_delay_length_minus1")?, + time_offset_length: r.read(5, "time_offset_length")?, }) } else { None @@ -894,11 +893,11 @@ pub struct SeqParameterSet { } impl SeqParameterSet { pub fn from_bits(mut r: R) -> Result { - let profile_idc = r.read_u8(8, "profile_idc")?.into(); + let profile_idc = r.read::(8, "profile_idc")?.into(); let sps = SeqParameterSet { profile_idc, - constraint_flags: r.read_u8(8, "constraint_flags")?.into(), - level_idc: r.read_u8(8, "level_idc")?, + constraint_flags: r.read::(8, "constraint_flags")?.into(), + level_idc: r.read(8, "level_idc")?, seq_parameter_set_id: SeqParamSetId::from_u32(r.read_ue("seq_parameter_set_id")?) .map_err(SpsError::BadSeqParamSetId)?, chroma_info: ChromaInfo::read(&mut r, profile_idc)?, diff --git a/src/rbsp.rs b/src/rbsp.rs index e3044fd..704a098 100644 --- a/src/rbsp.rs +++ b/src/rbsp.rs @@ -26,19 +26,27 @@ use bitstream_io::read::BitRead as _; use std::borrow::Cow; use std::io::BufRead; use std::io::Read; +use std::num::NonZeroUsize; #[derive(Copy, Clone, Debug)] enum ParseState { Start, OneZero, TwoZero, - HeaderByte, + Skip(NonZeroUsize), Three, PostThree, } -/// [`BufRead`] adapter which returns RBSP bytes given NAL bytes by removing -/// the NAL header and `emulation-prevention-three` bytes. +const H264_HEADER_LEN: NonZeroUsize = match NonZeroUsize::new(1) { + Some(one) => one, + None => panic!("1 should be non-zero"), +}; + +/// [`BufRead`] adapter which returns RBSP from NAL bytes. +/// +/// This optionally skips a given number of leading bytes, then returns any bytes except the +/// `emulation-prevention-three` bytes. /// /// See also [module docs](self). /// @@ -57,16 +65,39 @@ pub struct ByteReader { i: usize, /// The maximum number of bytes in a fresh chunk. Surprisingly, it's - /// significantly faster to limit this, maybe due to CPU cache effects. + /// significantly faster to limit this, maybe due to CPU cache effects, or + /// maybe because it's common to examine at most the headers of large slice NALs. max_fill: usize, } impl ByteReader { - /// Constructs an adapter from the given [BufRead]. The NAL header byte is - /// expected to be present. - pub fn new(inner: R) -> Self { + /// Constructs an adapter from the given [`BufRead`] which does not skip any initial bytes. + pub fn without_skip(inner: R) -> Self { + Self { + inner, + state: ParseState::Start, + i: 0, + max_fill: 128, + } + } + + /// Constructs an adapter from the given [`BufRead`] which skips the 1-byte H.264 header. + pub fn skipping_h264_header(inner: R) -> Self { + Self { + inner, + state: ParseState::Skip(H264_HEADER_LEN), + i: 0, + max_fill: 128, + } + } + + /// Constructs an adapter from the given [`BufRead`] which will skip over the first `skip` bytes. + /// + /// This can be useful for parsing H.265, which uses the same + /// `emulation-prevention-three-bytes` convention but two-byte NAL headers. + pub fn skipping_bytes(inner: R, skip: NonZeroUsize) -> Self { Self { inner, - state: ParseState::HeaderByte, + state: ParseState::Skip(skip), i: 0, max_fill: 128, } @@ -112,10 +143,13 @@ impl ByteReader { } _ => self.state = ParseState::Start, }, - ParseState::HeaderByte => { + ParseState::Skip(remaining) => { debug_assert_eq!(self.i, 0); - self.inner.consume(1); - self.state = ParseState::Start; + let skip = std::cmp::min(chunk.len(), remaining.get()); + self.inner.consume(skip); + self.state = NonZeroUsize::new(remaining.get() - skip) + .map(ParseState::Skip) + .unwrap_or(ParseState::Start); break; } ParseState::Three => { @@ -197,7 +231,7 @@ impl BufRead for ByteReader { pub fn decode_nal<'a>(nal_unit: &'a [u8]) -> Result, std::io::Error> { let mut reader = ByteReader { inner: nal_unit, - state: ParseState::HeaderByte, + state: ParseState::Skip(H264_HEADER_LEN), i: 0, max_fill: usize::MAX, // to borrow if at all possible. }; @@ -221,7 +255,6 @@ pub fn decode_nal<'a>(nal_unit: &'a [u8]) -> Result, std::io::Erro #[derive(Debug)] pub enum BitReaderError { - ReaderError(std::io::Error), ReaderErrorFor(&'static str, std::io::Error), /// An Exp-Golomb-coded syntax elements value has more than 32 bits. @@ -233,14 +266,30 @@ pub enum BitReaderError { Unaligned, } +pub use bitstream_io::{Numeric, Primitive}; + pub trait BitRead { + /// Reads an unsigned Exp-Golomb-coded value, as defined in the H.264 spec. fn read_ue(&mut self, name: &'static str) -> Result; + + /// Reads a signed Exp-Golomb-coded value, as defined in the H.264 spec. fn read_se(&mut self, name: &'static str) -> Result; + + /// Reads a single bit, as in [`crate::bitstream_io::read::BitRead::read_bool`]. fn read_bool(&mut self, name: &'static str) -> Result; - fn read_u8(&mut self, bit_count: u32, name: &'static str) -> Result; - fn read_u16(&mut self, bit_count: u32, name: &'static str) -> Result; - fn read_u32(&mut self, bit_count: u32, name: &'static str) -> Result; - fn read_i32(&mut self, bit_count: u32, name: &'static str) -> Result; + + /// Reads an unsigned value from the bitstream with the given number of bytes, as in + /// [`crate::bitstream_io::read::BitRead::read`]. + fn read(&mut self, bit_count: u32, name: &'static str) + -> Result; + + /// Reads a whole value from the bitstream whose size is equal to its byte size, as in + /// [`crate::bitstream_io::read::BitRead::read_to`]. + fn read_to(&mut self, name: &'static str) -> Result; + + /// Skips the given number of bits in the bitstream, as in + /// [`crate::bitstream_io::read::BitRead::skip`]. + fn skip(&mut self, bit_count: u32, name: &'static str) -> Result<(), BitReaderError>; /// Returns true if positioned before the RBSP trailing bits. /// @@ -294,7 +343,7 @@ impl BitRead for BitReader { if count > 31 { return Err(BitReaderError::ExpGolombTooLarge(name)); } else if count > 0 { - let val = self.read_u32(count, name)?; + let val: u32 = self.read(count, name)?; Ok((1 << count) - 1 + val) } else { Ok(0) @@ -311,27 +360,25 @@ impl BitRead for BitReader { .map_err(|e| BitReaderError::ReaderErrorFor(name, e)) } - fn read_u8(&mut self, bit_count: u32, name: &'static str) -> Result { - self.reader - .read(bit_count) - .map_err(|e| BitReaderError::ReaderErrorFor(name, e)) - } - - fn read_u16(&mut self, bit_count: u32, name: &'static str) -> Result { + fn read( + &mut self, + bit_count: u32, + name: &'static str, + ) -> Result { self.reader .read(bit_count) .map_err(|e| BitReaderError::ReaderErrorFor(name, e)) } - fn read_u32(&mut self, bit_count: u32, name: &'static str) -> Result { + fn read_to(&mut self, name: &'static str) -> Result { self.reader - .read(bit_count) + .read_to() .map_err(|e| BitReaderError::ReaderErrorFor(name, e)) } - fn read_i32(&mut self, bit_count: u32, name: &'static str) -> Result { + fn skip(&mut self, bit_count: u32, name: &'static str) -> Result<(), BitReaderError> { self.reader - .read(bit_count) + .skip(bit_count) .map_err(|e| BitReaderError::ReaderErrorFor(name, e)) } @@ -404,7 +451,7 @@ mod tests { for i in 1..data.len() - 1 { let (head, tail) = data.split_at(i); let r = head.chain(tail); - let mut r = ByteReader::new(r); + let mut r = ByteReader::skipping_h264_header(r); let mut rbsp = Vec::new(); r.read_to_end(&mut rbsp).unwrap(); let expected = hex!( @@ -426,13 +473,13 @@ mod tests { // Should work when the end bit is byte-aligned. let mut reader = BitReader::new(&[0x12, 0x80][..]); assert!(reader.has_more_rbsp_data("call 1").unwrap()); - assert_eq!(reader.read_u8(8, "u8 1").unwrap(), 0x12); + assert_eq!(reader.read::(8, "u8 1").unwrap(), 0x12); assert!(!reader.has_more_rbsp_data("call 2").unwrap()); // and when it's not. let mut reader = BitReader::new(&[0x18][..]); assert!(reader.has_more_rbsp_data("call 3").unwrap()); - assert_eq!(reader.read_u8(4, "u8 2").unwrap(), 0x1); + assert_eq!(reader.read::(4, "u8 2").unwrap(), 0x1); assert!(!reader.has_more_rbsp_data("call 4").unwrap()); // should also work when there are cabac-zero-words.