diff --git a/MSVC/m4acut.vcxproj b/MSVC/m4acut.vcxproj index 6f286d1..befdca9 100644 --- a/MSVC/m4acut.vcxproj +++ b/MSVC/m4acut.vcxproj @@ -1,4 +1,4 @@ - + @@ -85,6 +85,7 @@ + @@ -94,6 +95,7 @@ + @@ -105,4 +107,4 @@ - + \ No newline at end of file diff --git a/MSVC/m4acut.vcxproj.filters b/MSVC/m4acut.vcxproj.filters index 74a08b9..722b592 100644 --- a/MSVC/m4acut.vcxproj.filters +++ b/MSVC/m4acut.vcxproj.filters @@ -1,4 +1,4 @@ - + @@ -36,6 +36,9 @@ Source Files + + Source Files + @@ -62,5 +65,8 @@ Header Files + + Header Files + - + \ No newline at end of file diff --git a/Makefile.am b/Makefile.am index 9d2646e..04cb592 100644 --- a/Makefile.am +++ b/Makefile.am @@ -11,6 +11,7 @@ dist_man_MANS = man/m4acut.1 m4acut_SOURCES = src/M4ATrimmer.cpp \ src/MP4Edits.cpp \ src/StringConverterUTF8.cpp \ + src/bitstream.cpp \ src/cuesheet.cpp \ src/main.cpp diff --git a/src/M4ATrimmer.cpp b/src/M4ATrimmer.cpp index 1ff920b..8199e33 100644 --- a/src/M4ATrimmer.cpp +++ b/src/M4ATrimmer.cpp @@ -8,6 +8,89 @@ #include "M4ATrimmer.h" #include #include +#include "bitstream.h" + +void parse_ASC(const void *data, size_t size, + uint8_t *aot, uint32_t *sample_rate) +{ + BitStream bs(static_cast(data), size); + *aot = bs.get(5); + if (*aot != 2 && *aot != 5 && *aot != 29) + throw std::runtime_error("Unsupported AudioSpecificConfig"); + static const unsigned sftab[] = { + 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, + 16000, 12000, 11025, 8000, 7350, 0, 0, 0 + }; + *sample_rate = sftab[bs.get(4)]; + uint8_t chan_config = bs.get(4); + if (*aot == 5 || *aot == 29) { + *sample_rate = sftab[bs.get(4)]; + bs.advance(5); // AOT + } + // GASpecificConfig + bs.advance(1); // frameLengthFlag + if (bs.get(1)) bs.advance(14); // dependsOnCoreCoder + bs.advance(1); // extensionFlag + if (!chan_config) { + bs.advance(10); // element_instance_tag, object_type, sf_index + uint8_t nfront = bs.get(4); + uint8_t nside = bs.get(4); + uint8_t nback = bs.get(4); + uint8_t nlfe = bs.get(2); + uint8_t nassoc = bs.get(3); + uint8_t ncc = bs.get(4); + if (bs.get(1)) bs.get(4); // mono_mixdown + if (bs.get(1)) bs.get(4); // stereo_mixdown + if (bs.get(1)) bs.get(3); // matrix_mixdown + uint8_t nfront_channels = 0; + uint8_t nside_channels = 0; + uint8_t nback_channels = 0; + for (uint8_t i = 0; i < nfront; ++i) { + if (bs.get(1)) // is_cpe + nfront_channels += 2; + else + nfront_channels += 1; + bs.advance(4); // element_tag_select + } + for (uint8_t i = 0; i < nside; ++i) { + if (bs.get(1)) // is_cpe + nside_channels += 2; + else + nside_channels += 1; + bs.advance(4); // element_tag_select + } + for (uint8_t i = 0; i < nback; ++i) { + if (bs.get(1)) // is_cpe + nback_channels += 2; + else + nback_channels += 1; + bs.advance(4); // element_tag_select + } + for (uint8_t i = 0; i < nlfe; ++i) + bs.advance(4); + for (uint8_t i = 0; i < nassoc; ++i) + bs.advance(4); + for (uint8_t i = 0; i < ncc; ++i) + bs.advance(5); + // byte align + bs.advance((8 - (bs.position() & 7)) & 7); + uint8_t comment_len = bs.get(1); + bs.advance(8 * comment_len); + } + if (size * 8 - bs.position() >= 16) { + if (bs.get(11) == 0x2b7) { + uint8_t tmp = bs.get(5); + if (tmp == 5 && bs.get(1)) { + *aot = tmp; + *sample_rate = sftab[bs.get(4)]; + } + if (size * 8 - bs.position() >= 12) { + if (bs.get(11) == 0x548 && bs.get(1)) + *aot = 29; + } + } + } +} void M4ATrimmer::open_input(const std::string &filename) { @@ -40,8 +123,7 @@ void M4ATrimmer::open_input(const std::string &filename) } fetch_chapters(); if (!m_input.track.edits.count()) { - int64_t duration = - m_input.track.media_params.duration >> m_input.track.upsampled; + int64_t duration = m_input.track.media_params.duration; m_input.track.edits.add_entry(0, duration); } } @@ -65,7 +147,7 @@ void M4ATrimmer::open_output(const std::string &filename) { lsmash_movie_parameters_t omp; lsmash_initialize_movie_parameters(&omp); - omp.timescale = m_input.track.timescale(); + omp.timescale = timescale(); DieIF(lsmash_set_movie_parameters(mov, &omp)); } add_audio_track(); @@ -74,18 +156,21 @@ void M4ATrimmer::open_output(const std::string &filename) void M4ATrimmer::select_cut_point(const TimeSpec &startspec, const TimeSpec &endspec) { - int64_t start = startspec.is_samples ? - startspec.value.samples >> m_input.track.upsampled - : startspec.value.seconds * m_input.track.timescale() + .5; - int64_t end = endspec.is_samples ? - endspec.value.samples >> m_input.track.upsampled - : endspec.value.seconds * m_input.track.timescale() + .5; - - if (start > int64_t(m_input.track.duration())) + double starttime = startspec.is_samples ? + double(startspec.value.samples) / m_input.track.sample_rate + : startspec.value.seconds; + double endtime = endspec.is_samples ? + double(endspec.value.samples) / m_input.track.sample_rate + : endspec.value.seconds; + + int64_t start = int64_t(starttime * timescale() + .5); + int64_t end = int64_t(endtime * timescale() + .5); + + if (start > (int64_t)duration()) throw std::runtime_error("the start position for trimming exceeds " "the length of input"); if (end <= 0) - end = m_input.track.duration(); + end = duration(); if (end <= start) throw std::runtime_error("the end position of trimming is before " "the start position"); @@ -95,13 +180,19 @@ void M4ATrimmer::select_cut_point(const TimeSpec &startspec, edits.crop(start, end); int64_t media_start = edits.minimum_media_position(); int64_t media_end = edits.maximum_media_position(); + uint32_t au_size = m_input.track.access_unit_size(); m_cut_start = m_current_au = - std::max(static_cast(0), media_start - 1024) / 1024; - m_cut_end = (media_end + 1023) / 1024 + 1; + std::max(static_cast(0), media_start - au_size) / au_size; + m_cut_end = (media_end + au_size - 1) / au_size; + if (m_input.track.aot != 2) { + unsigned delay = unsigned(962.0 / m_input.track.sample_rate * timescale() + .5); + if (m_cut_end * au_size - media_end < delay) + ++m_cut_end; + } uint64_t num_au = m_input.track.num_access_units(); if (m_cut_end > num_au) m_cut_end = num_au; if (m_cut_start > 0) - edits.shift(-1 * m_cut_start * 1024); + edits.shift(-1 * m_cut_start * au_size); unsigned count = m_output.track.edits.count(); for (unsigned i = 0; i < count; ++i) { @@ -137,7 +228,8 @@ bool M4ATrimmer::copy_next_access_unit() m_current_au + 1); if (!sample) return false; - sample->dts = sample->cts = (m_current_au - m_cut_start) * 1024; + uint32_t au_size = m_input.track.access_unit_size(); + sample->dts = sample->cts = (m_current_au - m_cut_start) * au_size; /* * XXX: leaks a sample when lsmash_append_sample() fails. * Otherwise samples is deallocated internally by lsmash_append_sample() @@ -151,7 +243,8 @@ bool M4ATrimmer::copy_next_access_unit() void M4ATrimmer::finish_write(lsmash_adhoc_remux_callback cb, void *cookie) { lsmash_root_t *mov = m_output.movie.get(); - DieIF(lsmash_flush_pooled_samples(mov, m_output.track.id(), 1024)); + uint32_t au_size = m_input.track.access_unit_size(); + DieIF(lsmash_flush_pooled_samples(mov, m_output.track.id(), au_size)); if (m_output.track.edits.count() == 1) set_iTunSMPB(); for (auto e = m_itunes_metadata.begin(); e != m_itunes_metadata.end(); ++e) @@ -184,7 +277,9 @@ uint32_t M4ATrimmer::find_aac_track() continue; lsmash_audio_summary_t *asummary = reinterpret_cast(summary); - if (asummary->aot != MP4A_AUDIO_OBJECT_TYPE_AAC_LC) + if (asummary->aot != MP4A_AUDIO_OBJECT_TYPE_AAC_LC && + asummary->aot != MP4A_AUDIO_OBJECT_TYPE_SBR && + asummary->aot != MP4A_AUDIO_OBJECT_TYPE_PS) continue; lsmash_cleanup_summary(summary); return track_id; @@ -210,12 +305,22 @@ void M4ATrimmer::fetch_track_info(Track *t, uint32_t track_id) t->summary = std::shared_ptr(summary, lsmash_cleanup_summary); - uint32_t au_duration = retrieve_au_duration(track_id); - if (au_duration != 1024 && au_duration != 2048) - throw std::runtime_error("unexpected access unit duration"); - if (au_duration == 2048) - t->upsampled = 1; - + uint32_t ncs = lsmash_count_codec_specific_data(summary); + for (uint32_t i = 1; i <= ncs; ++i) { + auto c = lsmash_get_codec_specific_data(summary, i); + if (c->type != LSMASH_CODEC_SPECIFIC_DATA_TYPE_MP4SYS_DECODER_CONFIG) + continue; + auto p = + static_cast(c->data.structured); + uint8_t *data; + uint32_t size; + lsmash_get_mp4sys_decoder_specific_info(p, &data, &size); + std::vector cookie(data, data + size); + lsmash_free(data); + parse_ASC(cookie.data(), size, &t->aot, &t->sample_rate); + t->frames_per_packet = (t->aot == 2) ? 1024 : 2048; + break; + } if (m_input.movie_params.timescale >= t->media_params.timescale) { uint32_t nedits = lsmash_count_explicit_timeline_map(mov, track_id); for (uint32_t i = 1; i <= nedits; ++i) { @@ -226,31 +331,11 @@ void M4ATrimmer::fetch_track_info(Track *t, uint32_t track_id) duration *= t->media_params.timescale; if (duration == 0.0) duration = t->media_params.duration - edit.start_time; - t->edits.add_entry(edit.start_time >> t->upsampled, - int64_t(duration + .5) >> t->upsampled); + t->edits.add_entry(edit.start_time, int64_t(duration + .5)); } } } -uint32_t M4ATrimmer::retrieve_au_duration(uint32_t track_id) -{ - /* - * Sometimes the last delta can be subtracted by the amount of padding. - * Therefore, we compute the first delta when possible and pick it. - */ - lsmash_root_t *mov = m_input.movie.get(); - uint64_t delta = lsmash_get_last_sample_delta(mov, track_id); - if (lsmash_get_sample_count_in_media_timeline(mov, track_id) > 1) { - lsmash_sample_t *s1, *s2; - s1 = lsmash_get_sample_from_media_timeline(mov, track_id, 1); - s2 = lsmash_get_sample_from_media_timeline(mov, track_id, 2); - if (s1 && s2) delta = s2->dts - s1->dts; - if (s1) lsmash_delete_sample(s1); - if (s2) lsmash_delete_sample(s2); - } - return static_cast(delta); -} - bool M4ATrimmer::parse_iTunSMPB(const lsmash_itunes_metadata_t &item) { if (item.item != ITUNES_METADATA_ITEM_CUSTOM @@ -282,8 +367,6 @@ bool M4ATrimmer::parse_iTunSMPB(const lsmash_itunes_metadata_t &item) >> std::hex >> padding >> std::hex >> duration) { - priming >>= m_input.track.upsampled; - duration >>= m_input.track.upsampled; m_input.track.edits.add_entry(priming, duration); } return true; @@ -463,7 +546,8 @@ void M4ATrimmer::set_iTunSMPB() "00000000 00000000 00000000 00000000 00000000 00000000"; char buf[256]; - uint64_t total_duration = (m_current_au - m_cut_start) * 1024; + uint64_t total_duration = + uint64_t(m_current_au - m_cut_start) * m_input.track.access_unit_size(); unsigned offset = m_output.track.edits.offset(0); uint64_t duration = m_output.track.edits.duration(0); int32_t padding = total_duration - offset - duration; diff --git a/src/M4ATrimmer.h b/src/M4ATrimmer.h index 09b3ed8..b6d39c3 100644 --- a/src/M4ATrimmer.h +++ b/src/M4ATrimmer.h @@ -61,6 +61,9 @@ class M4ATrimmer { lsmash_track_parameters_t track_params; lsmash_media_parameters_t media_params; std::shared_ptr summary; + uint8_t aot; + uint32_t sample_rate; + uint32_t frames_per_packet; uint8_t upsampled; /* * 1: dual-rate SBR is stored in * upsampled timescale @@ -76,12 +79,19 @@ class M4ATrimmer { uint32_t id() const { return track_params.track_ID; } uint32_t timescale() const { - return media_params.timescale >> upsampled; + return media_params.timescale; + } + uint32_t access_unit_size() const + { + return uint32_t(double(frames_per_packet) * timescale() + / sample_rate + .5); } uint64_t num_access_units() const { - return ((media_params.duration >> upsampled) + 1023) / 1024; + uint32_t au_size = access_unit_size(); + return (media_params.duration + au_size - 1) / au_size; } + // duration in track timescale uint64_t duration() const { return edits.total_duration(); @@ -148,7 +158,7 @@ class M4ATrimmer { void shift_edits(int64_t offset) { Track &t = m_input.track; - t.edits.shift(offset, t.media_params.duration >> t.upsampled); + t.edits.shift(offset, t.media_params.duration); } void set_text_tag(lsmash_itunes_metadata_item fcc, const std::string &s); void set_custom_tag(const std::string &name, const std::string &value); @@ -164,7 +174,6 @@ class M4ATrimmer { } uint32_t find_aac_track(); void fetch_track_info(Track *t, uint32_t track_id); - uint32_t retrieve_au_duration(uint32_t track_id); bool parse_iTunSMPB(const lsmash_itunes_metadata_t &item); void populate_itunes_metadata(const lsmash_itunes_metadata_t &item); void fetch_chapters() diff --git a/src/bitstream.cpp b/src/bitstream.cpp new file mode 100644 index 0000000..f40a319 --- /dev/null +++ b/src/bitstream.cpp @@ -0,0 +1,56 @@ +#include +#include "bitstream.h" + +uint32_t BitStream::peek(uint32_t nbits) +{ + uint8_t *p = &m_buffer[m_cur]; + uint32_t v = (*p++ << m_pos) & 0xff; + if (nbits <= 8 - m_pos) + return v >> (8 - nbits); + v >>= m_pos; + nbits = nbits - 8 + m_pos; + for (; nbits >= 8; nbits -= 8) + v = v << 8 | *p++; + if (nbits > 0) + v = v << nbits | (*p << nbits) >> 8; + return v; +} + +uint32_t BitStream::get(uint32_t nbits) +{ + uint32_t value = peek(nbits); + advance(nbits); + return value; +} + +void BitStream::put(uint32_t value, uint32_t nbits) +{ + uint32_t free_bits = 8 - m_pos; + while (nbits > 0) { + uint32_t width = std::min(free_bits, nbits); + uint32_t new_free_bits = free_bits - width; + uint32_t v = value >> (nbits - width); + uint32_t mask = 0xffu >> (8 - width); + mask <<= new_free_bits; + v = (v << new_free_bits) & mask; + while (m_buffer.size() <= m_cur) + m_buffer.push_back(0); + m_buffer[m_cur] = (m_buffer[m_cur] & ~mask) | v; + nbits -= width; + free_bits = new_free_bits; + if (free_bits == 0) { + ++m_cur; + free_bits = 8; + } + } + m_pos = 8 - free_bits; +} + +void BitStream::advance(size_t nbits) +{ + if (nbits) { + m_pos += nbits; + m_cur += (m_pos >> 3); + m_pos &= 7; + } +} diff --git a/src/bitstream.h b/src/bitstream.h new file mode 100644 index 0000000..8bfa91a --- /dev/null +++ b/src/bitstream.h @@ -0,0 +1,33 @@ +#ifndef BITSTREAM_H +#define BITSTREAM_H + +#include +#include +#include + +class BitStream { + std::vector m_buffer; + size_t m_cur, m_pos; +public: + BitStream(): m_cur(0), m_pos(0) + {} + BitStream(const uint8_t *data, size_t size): + m_buffer(data, data + size), m_cur(0), m_pos(0) + {} + size_t position() const { return (m_cur << 3) + m_pos; } + const uint8_t *data() const { return &m_buffer[0]; } + uint32_t peek(uint32_t nbits); + uint32_t get(uint32_t nbits); + void put(uint32_t value, uint32_t nbits); + void advance(size_t nbits); + void rewind() { m_cur = m_pos = 0; } + void byteAlign() { if (m_pos) put(0, 8 - m_pos); } + uint32_t copy(BitStream &src, uint32_t nbits) + { + uint32_t val = src.get(nbits); + put(val, nbits); + return val; + } +}; + +#endif