Skip to content

Commit

Permalink
better handling of timescale and parse AudioSpecificConfig
Browse files Browse the repository at this point in the history
  • Loading branch information
nu774 committed Sep 26, 2015
1 parent bc326c6 commit deaf662
Show file tree
Hide file tree
Showing 7 changed files with 247 additions and 56 deletions.
6 changes: 4 additions & 2 deletions MSVC/m4acut.vcxproj
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
Expand Down Expand Up @@ -85,6 +85,7 @@
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="..\missings\getopt.c" />
<ClCompile Include="..\src\bitstream.cpp" />
<ClCompile Include="..\src\compat_win32.c" />
<ClCompile Include="..\src\cuesheet.cpp" />
<ClCompile Include="..\src\M4ATrimmer.cpp" />
Expand All @@ -94,6 +95,7 @@
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\missings\getopt.h" />
<ClInclude Include="..\src\bitstream.h" />
<ClInclude Include="..\src\compat.h" />
<ClInclude Include="..\src\cuesheet.h" />
<ClInclude Include="..\src\die.h" />
Expand All @@ -105,4 +107,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>
10 changes: 8 additions & 2 deletions MSVC/m4acut.vcxproj.filters
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="Source Files">
Expand Down Expand Up @@ -36,6 +36,9 @@
<ClCompile Include="..\missings\getopt.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\src\bitstream.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\missings\getopt.h">
Expand All @@ -62,5 +65,8 @@
<ClInclude Include="..\src\version.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\src\bitstream.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
</Project>
</Project>
1 change: 1 addition & 0 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ dist_man_MANS = man/m4acut.1
m4acut_SOURCES = src/M4ATrimmer.cpp \
src/MP4Edits.cpp \
src/StringConverterUTF8.cpp \
src/bitstream.cpp \
src/cuesheet.cpp \
src/main.cpp

Expand Down
180 changes: 132 additions & 48 deletions src/M4ATrimmer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,89 @@
#include "M4ATrimmer.h"
#include <sstream>
#include <algorithm>
#include "bitstream.h"

void parse_ASC(const void *data, size_t size,
uint8_t *aot, uint32_t *sample_rate)
{
BitStream bs(static_cast<const uint8_t *>(data), size);
*aot = bs.get(5);
if (*aot != 2 && *aot != 5 && *aot != 29)
throw std::runtime_error("Unsupported AudioSpecificConfig");
static const unsigned sftab[] = {
96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
16000, 12000, 11025, 8000, 7350, 0, 0, 0
};
*sample_rate = sftab[bs.get(4)];
uint8_t chan_config = bs.get(4);
if (*aot == 5 || *aot == 29) {
*sample_rate = sftab[bs.get(4)];
bs.advance(5); // AOT
}
// GASpecificConfig
bs.advance(1); // frameLengthFlag
if (bs.get(1)) bs.advance(14); // dependsOnCoreCoder
bs.advance(1); // extensionFlag
if (!chan_config) {
bs.advance(10); // element_instance_tag, object_type, sf_index
uint8_t nfront = bs.get(4);
uint8_t nside = bs.get(4);
uint8_t nback = bs.get(4);
uint8_t nlfe = bs.get(2);
uint8_t nassoc = bs.get(3);
uint8_t ncc = bs.get(4);
if (bs.get(1)) bs.get(4); // mono_mixdown
if (bs.get(1)) bs.get(4); // stereo_mixdown
if (bs.get(1)) bs.get(3); // matrix_mixdown
uint8_t nfront_channels = 0;
uint8_t nside_channels = 0;
uint8_t nback_channels = 0;
for (uint8_t i = 0; i < nfront; ++i) {
if (bs.get(1)) // is_cpe
nfront_channels += 2;
else
nfront_channels += 1;
bs.advance(4); // element_tag_select
}
for (uint8_t i = 0; i < nside; ++i) {
if (bs.get(1)) // is_cpe
nside_channels += 2;
else
nside_channels += 1;
bs.advance(4); // element_tag_select
}
for (uint8_t i = 0; i < nback; ++i) {
if (bs.get(1)) // is_cpe
nback_channels += 2;
else
nback_channels += 1;
bs.advance(4); // element_tag_select
}
for (uint8_t i = 0; i < nlfe; ++i)
bs.advance(4);
for (uint8_t i = 0; i < nassoc; ++i)
bs.advance(4);
for (uint8_t i = 0; i < ncc; ++i)
bs.advance(5);
// byte align
bs.advance((8 - (bs.position() & 7)) & 7);
uint8_t comment_len = bs.get(1);
bs.advance(8 * comment_len);
}
if (size * 8 - bs.position() >= 16) {
if (bs.get(11) == 0x2b7) {
uint8_t tmp = bs.get(5);
if (tmp == 5 && bs.get(1)) {
*aot = tmp;
*sample_rate = sftab[bs.get(4)];
}
if (size * 8 - bs.position() >= 12) {
if (bs.get(11) == 0x548 && bs.get(1))
*aot = 29;
}
}
}
}

void M4ATrimmer::open_input(const std::string &filename)
{
Expand Down Expand Up @@ -40,8 +123,7 @@ void M4ATrimmer::open_input(const std::string &filename)
}
fetch_chapters();
if (!m_input.track.edits.count()) {
int64_t duration =
m_input.track.media_params.duration >> m_input.track.upsampled;
int64_t duration = m_input.track.media_params.duration;
m_input.track.edits.add_entry(0, duration);
}
}
Expand All @@ -65,7 +147,7 @@ void M4ATrimmer::open_output(const std::string &filename)
{
lsmash_movie_parameters_t omp;
lsmash_initialize_movie_parameters(&omp);
omp.timescale = m_input.track.timescale();
omp.timescale = timescale();
DieIF(lsmash_set_movie_parameters(mov, &omp));
}
add_audio_track();
Expand All @@ -74,18 +156,21 @@ void M4ATrimmer::open_output(const std::string &filename)
void M4ATrimmer::select_cut_point(const TimeSpec &startspec,
const TimeSpec &endspec)
{
int64_t start = startspec.is_samples ?
startspec.value.samples >> m_input.track.upsampled
: startspec.value.seconds * m_input.track.timescale() + .5;
int64_t end = endspec.is_samples ?
endspec.value.samples >> m_input.track.upsampled
: endspec.value.seconds * m_input.track.timescale() + .5;

if (start > int64_t(m_input.track.duration()))
double starttime = startspec.is_samples ?
double(startspec.value.samples) / m_input.track.sample_rate
: startspec.value.seconds;
double endtime = endspec.is_samples ?
double(endspec.value.samples) / m_input.track.sample_rate
: endspec.value.seconds;

int64_t start = int64_t(starttime * timescale() + .5);
int64_t end = int64_t(endtime * timescale() + .5);

if (start > (int64_t)duration())
throw std::runtime_error("the start position for trimming exceeds "
"the length of input");
if (end <= 0)
end = m_input.track.duration();
end = duration();
if (end <= start)
throw std::runtime_error("the end position of trimming is before "
"the start position");
Expand All @@ -95,13 +180,19 @@ void M4ATrimmer::select_cut_point(const TimeSpec &startspec,
edits.crop(start, end);
int64_t media_start = edits.minimum_media_position();
int64_t media_end = edits.maximum_media_position();
uint32_t au_size = m_input.track.access_unit_size();
m_cut_start = m_current_au =
std::max(static_cast<int64_t>(0), media_start - 1024) / 1024;
m_cut_end = (media_end + 1023) / 1024 + 1;
std::max(static_cast<int64_t>(0), media_start - au_size) / au_size;
m_cut_end = (media_end + au_size - 1) / au_size;
if (m_input.track.aot != 2) {
unsigned delay = unsigned(962.0 / m_input.track.sample_rate * timescale() + .5);
if (m_cut_end * au_size - media_end < delay)
++m_cut_end;
}
uint64_t num_au = m_input.track.num_access_units();
if (m_cut_end > num_au) m_cut_end = num_au;
if (m_cut_start > 0)
edits.shift(-1 * m_cut_start * 1024);
edits.shift(-1 * m_cut_start * au_size);

unsigned count = m_output.track.edits.count();
for (unsigned i = 0; i < count; ++i) {
Expand Down Expand Up @@ -137,7 +228,8 @@ bool M4ATrimmer::copy_next_access_unit()
m_current_au + 1);
if (!sample)
return false;
sample->dts = sample->cts = (m_current_au - m_cut_start) * 1024;
uint32_t au_size = m_input.track.access_unit_size();
sample->dts = sample->cts = (m_current_au - m_cut_start) * au_size;
/*
* XXX: leaks a sample when lsmash_append_sample() fails.
* Otherwise samples is deallocated internally by lsmash_append_sample()
Expand All @@ -151,7 +243,8 @@ bool M4ATrimmer::copy_next_access_unit()
void M4ATrimmer::finish_write(lsmash_adhoc_remux_callback cb, void *cookie)
{
lsmash_root_t *mov = m_output.movie.get();
DieIF(lsmash_flush_pooled_samples(mov, m_output.track.id(), 1024));
uint32_t au_size = m_input.track.access_unit_size();
DieIF(lsmash_flush_pooled_samples(mov, m_output.track.id(), au_size));
if (m_output.track.edits.count() == 1)
set_iTunSMPB();
for (auto e = m_itunes_metadata.begin(); e != m_itunes_metadata.end(); ++e)
Expand Down Expand Up @@ -184,7 +277,9 @@ uint32_t M4ATrimmer::find_aac_track()
continue;
lsmash_audio_summary_t *asummary =
reinterpret_cast<lsmash_audio_summary_t*>(summary);
if (asummary->aot != MP4A_AUDIO_OBJECT_TYPE_AAC_LC)
if (asummary->aot != MP4A_AUDIO_OBJECT_TYPE_AAC_LC &&
asummary->aot != MP4A_AUDIO_OBJECT_TYPE_SBR &&
asummary->aot != MP4A_AUDIO_OBJECT_TYPE_PS)
continue;
lsmash_cleanup_summary(summary);
return track_id;
Expand All @@ -210,12 +305,22 @@ void M4ATrimmer::fetch_track_info(Track *t, uint32_t track_id)
t->summary =
std::shared_ptr<lsmash_summary_t>(summary, lsmash_cleanup_summary);

uint32_t au_duration = retrieve_au_duration(track_id);
if (au_duration != 1024 && au_duration != 2048)
throw std::runtime_error("unexpected access unit duration");
if (au_duration == 2048)
t->upsampled = 1;

uint32_t ncs = lsmash_count_codec_specific_data(summary);
for (uint32_t i = 1; i <= ncs; ++i) {
auto c = lsmash_get_codec_specific_data(summary, i);
if (c->type != LSMASH_CODEC_SPECIFIC_DATA_TYPE_MP4SYS_DECODER_CONFIG)
continue;
auto p =
static_cast<lsmash_mp4sys_decoder_parameters_t*>(c->data.structured);
uint8_t *data;
uint32_t size;
lsmash_get_mp4sys_decoder_specific_info(p, &data, &size);
std::vector<uint8_t> cookie(data, data + size);
lsmash_free(data);
parse_ASC(cookie.data(), size, &t->aot, &t->sample_rate);
t->frames_per_packet = (t->aot == 2) ? 1024 : 2048;
break;
}
if (m_input.movie_params.timescale >= t->media_params.timescale) {
uint32_t nedits = lsmash_count_explicit_timeline_map(mov, track_id);
for (uint32_t i = 1; i <= nedits; ++i) {
Expand All @@ -226,31 +331,11 @@ void M4ATrimmer::fetch_track_info(Track *t, uint32_t track_id)
duration *= t->media_params.timescale;
if (duration == 0.0)
duration = t->media_params.duration - edit.start_time;
t->edits.add_entry(edit.start_time >> t->upsampled,
int64_t(duration + .5) >> t->upsampled);
t->edits.add_entry(edit.start_time, int64_t(duration + .5));
}
}
}

uint32_t M4ATrimmer::retrieve_au_duration(uint32_t track_id)
{
/*
* Sometimes the last delta can be subtracted by the amount of padding.
* Therefore, we compute the first delta when possible and pick it.
*/
lsmash_root_t *mov = m_input.movie.get();
uint64_t delta = lsmash_get_last_sample_delta(mov, track_id);
if (lsmash_get_sample_count_in_media_timeline(mov, track_id) > 1) {
lsmash_sample_t *s1, *s2;
s1 = lsmash_get_sample_from_media_timeline(mov, track_id, 1);
s2 = lsmash_get_sample_from_media_timeline(mov, track_id, 2);
if (s1 && s2) delta = s2->dts - s1->dts;
if (s1) lsmash_delete_sample(s1);
if (s2) lsmash_delete_sample(s2);
}
return static_cast<uint32_t>(delta);
}

bool M4ATrimmer::parse_iTunSMPB(const lsmash_itunes_metadata_t &item)
{
if (item.item != ITUNES_METADATA_ITEM_CUSTOM
Expand Down Expand Up @@ -282,8 +367,6 @@ bool M4ATrimmer::parse_iTunSMPB(const lsmash_itunes_metadata_t &item)
>> std::hex >> padding
>> std::hex >> duration)
{
priming >>= m_input.track.upsampled;
duration >>= m_input.track.upsampled;
m_input.track.edits.add_entry(priming, duration);
}
return true;
Expand Down Expand Up @@ -463,7 +546,8 @@ void M4ATrimmer::set_iTunSMPB()
"00000000 00000000 00000000 00000000 00000000 00000000";
char buf[256];

uint64_t total_duration = (m_current_au - m_cut_start) * 1024;
uint64_t total_duration =
uint64_t(m_current_au - m_cut_start) * m_input.track.access_unit_size();
unsigned offset = m_output.track.edits.offset(0);
uint64_t duration = m_output.track.edits.duration(0);
int32_t padding = total_duration - offset - duration;
Expand Down
17 changes: 13 additions & 4 deletions src/M4ATrimmer.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ class M4ATrimmer {
lsmash_track_parameters_t track_params;
lsmash_media_parameters_t media_params;
std::shared_ptr<lsmash_summary_t> summary;
uint8_t aot;
uint32_t sample_rate;
uint32_t frames_per_packet;
uint8_t upsampled; /*
* 1: dual-rate SBR is stored in
* upsampled timescale
Expand All @@ -76,12 +79,19 @@ class M4ATrimmer {
uint32_t id() const { return track_params.track_ID; }
uint32_t timescale() const
{
return media_params.timescale >> upsampled;
return media_params.timescale;
}
uint32_t access_unit_size() const
{
return uint32_t(double(frames_per_packet) * timescale()
/ sample_rate + .5);
}
uint64_t num_access_units() const
{
return ((media_params.duration >> upsampled) + 1023) / 1024;
uint32_t au_size = access_unit_size();
return (media_params.duration + au_size - 1) / au_size;
}
// duration in track timescale
uint64_t duration() const
{
return edits.total_duration();
Expand Down Expand Up @@ -148,7 +158,7 @@ class M4ATrimmer {
void shift_edits(int64_t offset)
{
Track &t = m_input.track;
t.edits.shift(offset, t.media_params.duration >> t.upsampled);
t.edits.shift(offset, t.media_params.duration);
}
void set_text_tag(lsmash_itunes_metadata_item fcc, const std::string &s);
void set_custom_tag(const std::string &name, const std::string &value);
Expand All @@ -164,7 +174,6 @@ class M4ATrimmer {
}
uint32_t find_aac_track();
void fetch_track_info(Track *t, uint32_t track_id);
uint32_t retrieve_au_duration(uint32_t track_id);
bool parse_iTunSMPB(const lsmash_itunes_metadata_t &item);
void populate_itunes_metadata(const lsmash_itunes_metadata_t &item);
void fetch_chapters()
Expand Down
Loading

0 comments on commit deaf662

Please sign in to comment.