Skip to content

Commit

Permalink
add rbsp::ByteReader
Browse files Browse the repository at this point in the history
I haven't removed RbspDecoder or decode_nal yet.

FIXME BEFORE MERGING: this uses unsafe. It shouldn't need to.
  • Loading branch information
scottlamb committed Jun 11, 2021
1 parent 86ac91d commit f04a8c3
Show file tree
Hide file tree
Showing 4 changed files with 168 additions and 42 deletions.
3 changes: 2 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
//! Parser for H264 bitstream syntax. Not a video decoder.
#![forbid(unsafe_code)]
// TODO: put this back. See rbsp::ByteReader::fill_buf.
// #![forbid(unsafe_code)]
#![deny(rust_2018_idioms)]

pub mod rbsp;
Expand Down
13 changes: 8 additions & 5 deletions src/nal/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ pub mod slice;
use crate::annexb::NalReader;
use std::cell::RefCell;
use crate::Context;
use crate::rbsp;
use std::fmt;
use log::*;

Expand Down Expand Up @@ -165,12 +166,14 @@ pub trait Nal {
/// If the NAL is incomplete, reads may fail with `ErrorKind::WouldBlock`.
fn reader(&self) -> Self::BufRead;

// /// Reads the bytes in RBSP form (skipping header byte and
// /// emulation-prevention-three-bytes).
// fn rbsp_bytes(&self) -> rbsp::ByteReader<Self::BufRead> { ... }
/// Reads the bytes in RBSP form (skipping header byte and
/// emulation-prevention-three-bytes).
fn rbsp_bytes(&self) -> rbsp::ByteReader<Self::BufRead>;

// /// Reads bits within the RBSP form.
// fn rbsp_bits(&self) -> rbsp::BitReader<rbsp::ByteReader<Self::BufRead>> { ... }
/// Reads bits within the RBSP form.
fn rbsp_bits(&self) -> rbsp::BitReader<rbsp::ByteReader<Self::BufRead>> {
rbsp::BitReader::new(self.rbsp_bytes())
}
}

#[derive(Debug)]
Expand Down
17 changes: 17 additions & 0 deletions src/push/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
//! Push parsing of encoded NALs.
use crate::nal::{NalHeader, NalHeaderError};
use crate::rbsp;

use super::nal::Nal;

Expand Down Expand Up @@ -122,6 +123,16 @@ impl<'a> Nal for AccumulatedNal<'a> {
complete: self.complete,
}
}

#[inline]
fn rbsp_bytes(&self) -> rbsp::ByteReader<Self::BufRead> {
// FIXME: this is wrong if the first chunk has one byte.
rbsp::ByteReader::new(AccumulatedNalReader {
cur: &self.first_chunk[1..],
next: self.second_chunk,
complete: self.complete,
})
}
}
impl<'a> std::fmt::Debug for AccumulatedNal<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
Expand Down Expand Up @@ -254,6 +265,12 @@ mod test {
assert_eq!(r.fill_buf().unwrap(), &[1, 2, 3, 4]);
r.consume(4);
assert_eq!(r.fill_buf().unwrap(), &[]);

// RBSP.
let nal = AccumulatedNal::new([&[0b0101_0001, 1, 0, 0], &[3, 2, 0, 3, 5]], true);
let mut rbsp = Vec::new();
nal.rbsp_bytes().read_to_end(&mut rbsp).unwrap();
assert_eq!(&rbsp[..], &[1, 0, 0, 2, 0, 3, 5]);
}

#[test]
Expand Down
177 changes: 141 additions & 36 deletions src/rbsp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,126 @@
use bitstream_io::read::BitRead;
use std::borrow::Cow;
use std::io::BufRead;
use std::io::Read;
use crate::nal::{NalHandler, NalHeader};
use crate::Context;

#[derive(Debug)]
#[derive(Copy, Clone, Debug)]
enum ParseState {
Start,
OneZero,
TwoZero,
}

/// [BufRead] adapter which removes `emulation-prevention-three-byte`s.
pub struct ByteReader<R: BufRead> {
// self.inner[0..self.i] hasn't yet been emitted and is RBSP (has no
// emulation_prevention_three_bytes).
//
// self.state describes the state before self.inner[self.i].
//
// self.inner[self.i..] has yet to be examined.

inner: R,
state: ParseState,
i: usize,
}
impl<R: BufRead> ByteReader<R> {
/// Constructs an adapter from the given [BufRead]. The caller is expected to have skipped
/// the NAL header byte already.
pub fn new(inner: R) -> Self {
Self {
inner,
state: ParseState::Start,
i: 0,
}
}
}
impl<R: BufRead> Read for ByteReader<R> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
let chunk = self.fill_buf()?;
let amt = std::cmp::min(buf.len(), chunk.len());
if amt == 1 {
// Stolen from std::io::Read implementation for &[u8]:
// apparently this is faster to special-case.
buf[0] = chunk[0];
} else {
buf[..amt].copy_from_slice(&chunk[..amt]);
}
self.consume(amt);
Ok(amt)
}
}
impl<R: BufRead> BufRead for ByteReader<R> {
fn fill_buf(&mut self) -> std::io::Result<&[u8]> {
loop {
let chunk = self.inner.fill_buf()?;
if self.i > 0 || chunk.is_empty() {
// TODO: find a way to do this without unsafe to extend a lifetime.
// The borrow checker is confused by the return from the loop and
// considers everything borrowed in a previous iteration to still be borrowed.
// https://stackoverflow.com/q/57832526/23584
// This looks promising:
// https://www.reddit.com/r/rust/comments/6h0h0b/borrowing_in_loops/diuhx0x/
// https://bluss.github.io/rust/fun/2015/10/11/stuff-the-identity-function-does/
// and/or maybe a RefCell? or Option? I dunno.
return Ok(unsafe { std::mem::transmute(&chunk[0..self.i]) });
}
if find_three(&mut self.state, &mut self.i, chunk) {
if self.i == 0 {
// TODO: didn't really need to re-examine this. could have Skip state.
// That might even get rid of the loop and solve the transmute problem.
// Also, it could be used to skip over the header byte.
self.inner.consume(1);
self.state = ParseState::Start;
} else {
return Ok(unsafe { std::mem::transmute(&chunk[0..self.i]) })
}
}
}
}

fn consume(&mut self, amt: usize) {
self.i = self.i.checked_sub(amt).unwrap();
self.inner.consume(amt);
}
}

/// Searches for an emulation_prevention_three_byte, updating `state` and `i` as a side effect.
/// Returns true if one is found; caller needs to further update `state`/`i` then.
/// (The two callers do different things.)
fn find_three(state: &mut ParseState, i: &mut usize, chunk: &[u8]) -> bool {
while *i < chunk.len() {
match *state {
ParseState::Start => match memchr::memchr(0x00, &chunk[*i..]) {
Some(nonzero_len) => {
*i += nonzero_len;
*state = ParseState::OneZero;
},
None => {
*i = chunk.len();
break
},
},
ParseState::OneZero => match chunk[*i] {
0x00 => *state = ParseState::TwoZero,
_ => *state = ParseState::Start,
},
ParseState::TwoZero => match chunk[*i] {
0x03 => return true,
// I see example PES packet payloads that end with 0x80 0x00 0x00 0x00,
// which triggered this error; guess the example is correct and this code
// was wrong, but not sure why!
// 0x00 => return Err(...),
_ => *state = ParseState::Start,
},
}
*i += 1;
}
false
}

/// Push parser which removes _emulation prevention_ as it calls
/// an inner [NalHandler]. Expects to be called without the NAL header byte.
pub struct RbspDecoder<R>
Expand Down Expand Up @@ -84,39 +194,14 @@ impl<R> NalHandler for RbspDecoder<R>
// buf[i..] has yet to be examined.
let mut i = 0;
while i < buf.len() {
match self.state {
ParseState::Start => match memchr::memchr(0x00, &buf[i..]) {
Some(nonzero_len) => {
i += nonzero_len;
self.to(ParseState::OneZero);
},
None => break,
},
ParseState::OneZero => match buf[i] {
0x00 => self.to(ParseState::TwoZero),
_ => self.to(ParseState::Start),
},
ParseState::TwoZero => match buf[i] {
0x03 => {
// Found an emulation_prevention_three_byte; skip it.
let (rbsp, three_onward) = buf.split_at(i);
self.emit(ctx, rbsp);
buf = &three_onward[1..];
i = 0;
// TODO: per spec, the next byte should be either 0x00, 0x1, 0x02 or
// 0x03, but at the moment we assume this without checking for
// correctness
self.to(ParseState::Start);
continue; // don't increment i; buf[0] hasn't been examined yet.
},
// I see example PES packet payloads that end with 0x80 0x00 0x00 0x00,
// which triggered this error; guess the example is correct and this code
// was wrong, but not sure why!
// 0x00 => { self.err(b); },
_ => self.to(ParseState::Start),
},
if find_three(&mut self.state, &mut i, buf) {
// i now indexes the emulation_prevention_three_byte.
let (rbsp, three_onward) = buf.split_at(i);
self.emit(ctx, rbsp);
buf = &three_onward[1..];
i = 0;
self.state = ParseState::Start;
}
i += 1;
}

// buf is now entirely RBSP.
Expand Down Expand Up @@ -196,10 +281,10 @@ pub enum BitReaderError {

/// Reads H.264 bitstream syntax elements from an RBSP representation (no NAL
/// header byte or emulation prevention three bytes).
pub struct BitReader<R: std::io::BufRead> {
pub struct BitReader<R: BufRead> {
reader: bitstream_io::read::BitReader<R, bitstream_io::BigEndian>,
}
impl<R: std::io::BufRead> BitReader<R> {
impl<R: BufRead> BitReader<R> {
pub fn new(inner: R) -> Self {
Self { reader: bitstream_io::read::BitReader::new(inner) }
}
Expand Down Expand Up @@ -271,6 +356,7 @@ mod tests {
use std::rc::Rc;
use std::cell::RefCell;
use hex_literal::*;
use hex_slice::AsHex;

struct State {
started: bool,
Expand Down Expand Up @@ -304,7 +390,7 @@ mod tests {
}

#[test]
fn it_works() {
fn push_decoder() {
let data = hex!(
"67 64 00 0A AC 72 84 44 26 84 00 00 03
00 04 00 00 03 00 CA 3C 48 96 11 80");
Expand All @@ -328,6 +414,25 @@ mod tests {
}
}

#[test]
fn byte_reader() {
let data = hex!(
"67 64 00 0A AC 72 84 44 26 84 00 00 03
00 04 00 00 03 00 CA 3C 48 96 11 80");
for i in 1..data.len()-1 {
let (head, tail) = data.split_at(i);
println!("split point {}", i);
let r = head.chain(tail);
let mut r = ByteReader::new(r);
let mut rbsp = Vec::new();
r.read_to_end(&mut rbsp).unwrap();
let expected = hex!(
"67 64 00 0A AC 72 84 44 26 84 00 00
00 04 00 00 00 CA 3C 48 96 11 80");
assert!(rbsp == &expected[..], "Mismatch:\nrbsp {:02x}\nexpected {:02x}", rbsp.as_hex(), expected.as_hex());
}
}

#[test]
fn decode_single_nal() {
let data = hex!(
Expand Down

0 comments on commit f04a8c3

Please sign in to comment.