/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "mp4.h"
static int
get_mp4tags(PerlIO *infile, char *file, HV *info, HV *tags)
{
mp4info *mp4 = _mp4_parse(infile, file, info, tags, 0);
Safefree(mp4);
return 0;
}
// wrapper to return just the file offset
int
mp4_find_frame(PerlIO *infile, char *file, int offset)
{
HV *info = newHV();
int frame_offset = -1;
mp4_find_frame_return_info(infile, file, offset, info);
if ( my_hv_exists(info, "seek_offset") ) {
frame_offset = SvIV( *(my_hv_fetch(info, "seek_offset") ) );
}
SvREFCNT_dec(info);
return frame_offset;
}
// offset is in ms
// This is based on code from Rockbox
int
mp4_find_frame_return_info(PerlIO *infile, char *file, int offset, HV *info)
{
int ret = 1;
uint16_t samplerate = 0;
uint32_t sound_sample_loc;
uint32_t i = 0;
uint32_t j = 0;
uint32_t new_sample = 0;
uint32_t new_sound_sample = 0;
uint32_t chunk = 1;
uint32_t range_samples = 0;
uint32_t total_samples = 0;
uint32_t skipped_samples = 0;
uint32_t chunk_sample;
uint32_t prev_chunk;
uint32_t prev_chunk_samples;
uint32_t file_offset;
uint32_t chunk_offset;
uint32_t box_size = 0;
Buffer tmp_buf;
char tmp_size[4];
// We need to read all info first to get some data we need to calculate
HV *tags = newHV();
mp4info *mp4 = _mp4_parse(infile, file, info, tags, 1);
// Init seek buffer
// Newz(0, &tmp_buf, sizeof(Buffer), Buffer);
buffer_init(&tmp_buf, MP4_BLOCK_SIZE);
// Seeking not yet supported for files with multiple tracks
if (mp4->track_count > 1) {
ret = -1;
goto out;
}
if ( !my_hv_exists(info, "samplerate") ) {
PerlIO_printf(PerlIO_stderr(), "find_frame: unknown sample rate\n");
ret = -1;
goto out;
}
// Pull out the samplerate
samplerate = SvIV( *( my_hv_fetch( info, "samplerate" ) ) );
// convert offset to sound_sample_loc
sound_sample_loc = (offset / 10) * (samplerate / 100);
DEBUG_TRACE("Looking for target sample %u\n", sound_sample_loc);
// Make sure we have the necessary metadata
if (
!mp4->num_time_to_samples
|| !mp4->num_sample_byte_sizes
|| !mp4->num_sample_to_chunks
|| !mp4->num_chunk_offsets
) {
PerlIO_printf(PerlIO_stderr(), "find_frame: File does not contain seek metadata: %s\n", file);
ret = -1;
goto out;
}
// Find the destination block from time_to_sample array
while ( (i < mp4->num_time_to_samples) &&
(new_sound_sample < sound_sample_loc)
) {
j = (sound_sample_loc - new_sound_sample) / mp4->time_to_sample[i].sample_duration;
DEBUG_TRACE(
"i = %d / j = %d, sample_count[i]: %d, sample_duration[i]: %d\n",
i, j,
mp4->time_to_sample[i].sample_count,
mp4->time_to_sample[i].sample_duration
);
if (j <= mp4->time_to_sample[i].sample_count) {
new_sample += j;
new_sound_sample += j * mp4->time_to_sample[i].sample_duration;
break;
}
else {
// XXX need test for this bit of code (variable stts)
new_sound_sample += (mp4->time_to_sample[i].sample_duration
* mp4->time_to_sample[i].sample_count);
new_sample += mp4->time_to_sample[i].sample_count;
i++;
}
}
if ( new_sample >= mp4->num_sample_byte_sizes ) {
PerlIO_printf(PerlIO_stderr(), "find_frame: Offset out of range (%d >= %d)\n", new_sample, mp4->num_sample_byte_sizes);
ret = -1;
goto out;
}
DEBUG_TRACE("new_sample: %d, new_sound_sample: %d\n", new_sample, new_sound_sample);
// Write new stts box
{
int i;
uint32_t total_sample_count = _mp4_total_samples(mp4);
uint32_t stts_entries = total_sample_count - new_sample;
uint32_t cur_duration = 0;
struct tts *stts;
int32_t stts_index = -1;
Newz(0, stts, stts_entries * sizeof(*stts), struct tts);
for (i = new_sample; i < total_sample_count; i++) {
uint32_t duration = _mp4_get_sample_duration(mp4, i);
if (cur_duration && cur_duration == duration) {
// same as previous entry, combine together
stts_entries--;
stts[stts_index].sample_count++;
}
else {
stts_index++;
stts[stts_index].sample_count = 1;
stts[stts_index].sample_duration = duration;
cur_duration = duration;
}
}
DEBUG_TRACE("Writing new stts (entries: %d)\n", stts_entries);
buffer_put_int(&tmp_buf, stts_entries);
for (i = 0; i < stts_entries; i++) {
DEBUG_TRACE(" sample_count %d, sample_duration %d\n", stts[i].sample_count, stts[i].sample_duration);
buffer_put_int(&tmp_buf, stts[i].sample_count);
buffer_put_int(&tmp_buf, stts[i].sample_duration);
}
mp4->new_stts = newSVpv("", 0);
put_u32( tmp_size, buffer_len(&tmp_buf) + 12 );
sv_catpvn( mp4->new_stts, tmp_size, 4 );
sv_catpvn( mp4->new_stts, "stts", 4 );
sv_catpvn( mp4->new_stts, "\0\0\0\0", 4 );
sv_catpvn( mp4->new_stts, (char *)buffer_ptr(&tmp_buf), buffer_len(&tmp_buf) );
//buffer_dump(&tmp_buf, 0);
buffer_clear(&tmp_buf);
Safefree(stts);
}
// We know the new block, now calculate the file position
/* Locate the chunk containing the sample */
prev_chunk = mp4->sample_to_chunk[0].first_chunk;
prev_chunk_samples = mp4->sample_to_chunk[0].samples_per_chunk;
for (i = 1; i < mp4->num_sample_to_chunks; i++) {
chunk = mp4->sample_to_chunk[i].first_chunk;
range_samples = (chunk - prev_chunk) * prev_chunk_samples;
DEBUG_TRACE("prev_chunk: %d, prev_chunk_samples: %d, chunk: %d, range_samples: %d\n",
prev_chunk, prev_chunk_samples, chunk, range_samples);
if (new_sample < total_samples + range_samples)
break;
total_samples += range_samples;
prev_chunk = mp4->sample_to_chunk[i].first_chunk;
prev_chunk_samples = mp4->sample_to_chunk[i].samples_per_chunk;
}
DEBUG_TRACE("prev_chunk: %d, prev_chunk_samples: %d, total_samples: %d\n", prev_chunk, prev_chunk_samples, total_samples);
if (new_sample >= mp4->sample_to_chunk[0].samples_per_chunk) {
chunk = prev_chunk + (new_sample - total_samples) / prev_chunk_samples;
}
else {
chunk = 1;
}
DEBUG_TRACE("chunk: %d\n", chunk);
/* Get sample of the first sample in the chunk */
chunk_sample = total_samples + (chunk - prev_chunk) * prev_chunk_samples;
DEBUG_TRACE("chunk_sample: %d\n", chunk_sample);
/* Get offset in file */
if (chunk > mp4->num_chunk_offsets) {
file_offset = mp4->chunk_offset[mp4->num_chunk_offsets - 1];
}
else {
file_offset = mp4->chunk_offset[chunk - 1];
}
DEBUG_TRACE("file_offset: %d\n", file_offset);
if (chunk_sample > new_sample) {
PerlIO_printf(PerlIO_stderr(), "find_frame: sample out of range (%d > %d)\n", chunk_sample, new_sample);
ret = -1;
goto out;
}
// Move offset within the chunk to the correct sample range
for (i = chunk_sample; i < new_sample; i++) {
file_offset += mp4->sample_byte_size[i];
skipped_samples++;
DEBUG_TRACE(" file_offset + %d: %d\n", mp4->sample_byte_size[i], file_offset);
}
if (file_offset > mp4->audio_offset + mp4->audio_size) {
PerlIO_printf(PerlIO_stderr(), "find_frame: file offset out of range (%d > %lld)\n", file_offset, mp4->audio_offset + mp4->audio_size);
ret = -1;
goto out;
}
// Write new stsc box
{
int i;
uint32_t stsc_entries = mp4->num_chunk_offsets - chunk + 1;
uint32_t cur_samples_per_chunk = 0;
struct stc *stsc;
int32_t stsc_index = -1;
uint32_t chunk_delta = 1;
j = 1;
Newz(0, stsc, stsc_entries * sizeof(*stsc), struct stc);
for (i = chunk; i <= mp4->num_chunk_offsets; i++) {
// Find the number of samples in chunk i
uint32_t samples_in_chunk = _mp4_samples_in_chunk(mp4, i);
if (cur_samples_per_chunk && cur_samples_per_chunk == samples_in_chunk) {
// same as previous entry, combine together
stsc_entries--;
}
else {
stsc_index++;
stsc[stsc_index].first_chunk = chunk_delta;
if (j == 1) {
// The first chunk may have less samples in it due to seeking within a chunk
stsc[stsc_index].samples_per_chunk = samples_in_chunk - skipped_samples;
cur_samples_per_chunk = samples_in_chunk - skipped_samples;
j++;
}
else {
stsc[stsc_index].samples_per_chunk = samples_in_chunk;
cur_samples_per_chunk = samples_in_chunk;
}
}
chunk_delta++;
}
DEBUG_TRACE("Writing new stsc (entries: %d)\n", stsc_entries);
buffer_put_int(&tmp_buf, stsc_entries);
for (i = 0; i < stsc_entries; i++) {
DEBUG_TRACE(" first_chunk %d, samples_per_chunk %d\n", stsc[i].first_chunk, stsc[i].samples_per_chunk);
buffer_put_int(&tmp_buf, stsc[i].first_chunk);
buffer_put_int(&tmp_buf, stsc[i].samples_per_chunk);
buffer_put_int(&tmp_buf, 1); // XXX sample description index, is this OK?
}
mp4->new_stsc = newSVpv("", 0);
put_u32( tmp_size, buffer_len(&tmp_buf) + 12 );
sv_catpvn( mp4->new_stsc, tmp_size, 4 );
sv_catpvn( mp4->new_stsc, "stsc", 4 );
sv_catpvn( mp4->new_stsc, "\0\0\0\0", 4 );
sv_catpvn( mp4->new_stsc, (char *)buffer_ptr(&tmp_buf), buffer_len(&tmp_buf) );
DEBUG_TRACE("Created new stsc\n");
//buffer_dump(&tmp_buf, 0);
buffer_clear(&tmp_buf);
Safefree(stsc);
}
// Write new stsz box, num_sample_byte_sizes -= $new_sample, skip $new_sample items
buffer_put_int(&tmp_buf, 0);
buffer_put_int(&tmp_buf, mp4->num_sample_byte_sizes - new_sample);
DEBUG_TRACE("Writing new stsz: %d items\n", mp4->num_sample_byte_sizes - new_sample);
j = 1;
for (i = new_sample; i < mp4->num_sample_byte_sizes; i++) {
DEBUG_TRACE(" sample %d sample_byte_size %d\n", j++, mp4->sample_byte_size[i]);
buffer_put_int(&tmp_buf, mp4->sample_byte_size[i]);
}
mp4->new_stsz = newSVpv("", 0);
put_u32( tmp_size, buffer_len(&tmp_buf) + 12 );
sv_catpvn( mp4->new_stsz, tmp_size, 4 );
sv_catpvn( mp4->new_stsz, "stsz", 4 );
sv_catpvn( mp4->new_stsz, "\0\0\0\0", 4 );
sv_catpvn( mp4->new_stsz, (char *)buffer_ptr(&tmp_buf), buffer_len(&tmp_buf) );
DEBUG_TRACE("Created new stsz\n");
//buffer_dump(&tmp_buf, 0);
buffer_clear(&tmp_buf);
// Total up size of 4 new st* boxes
// stco is calculated directly since we can't write it without offsets
mp4->new_st_size
= sv_len(mp4->new_stts)
+ sv_len(mp4->new_stsc)
+ sv_len(mp4->new_stsz)
+ 12 + ( 4 * (mp4->num_chunk_offsets - chunk + 2) ); // stco size
DEBUG_TRACE("new_st_size: %d, old_st_size: %d\n", mp4->new_st_size, mp4->old_st_size);
// Calculate offset for each chunk
chunk_offset = SvIV( *( my_hv_fetch(info, "audio_offset") ) );
chunk_offset -= ( mp4->old_st_size - mp4->new_st_size );
chunk_offset += 8; // mdat size + fourcc
DEBUG_TRACE("chunk_offset: %d\n", chunk_offset);
// Write new stco box, num_chunk_offsets -= $chunk, skip $chunk items
buffer_put_int(&tmp_buf, mp4->num_chunk_offsets - chunk + 1);
DEBUG_TRACE("Writing new stco: %d items\n", mp4->num_chunk_offsets - chunk + 1);
for (i = chunk - 1; i < mp4->num_chunk_offsets; i++) {
if (i == chunk - 1) {
// The first chunk offset is the start of mdat (chunk_offset)
buffer_put_int( &tmp_buf, chunk_offset );
DEBUG_TRACE( " offset %d (orig %d)\n", chunk_offset, mp4->chunk_offset[i] );
}
else {
buffer_put_int( &tmp_buf, mp4->chunk_offset[i] - file_offset + chunk_offset );
DEBUG_TRACE( " offset %d (orig %d)\n", mp4->chunk_offset[i] - file_offset + chunk_offset, mp4->chunk_offset[i] );
}
}
mp4->new_stco = newSVpv("", 0);
put_u32( tmp_size, buffer_len(&tmp_buf) + 12 );
sv_catpvn( mp4->new_stco, tmp_size, 4 );
sv_catpvn( mp4->new_stco, "stco", 4 );
sv_catpvn( mp4->new_stco, "\0\0\0\0", 4 );
sv_catpvn( mp4->new_stco, (char *)buffer_ptr(&tmp_buf), buffer_len(&tmp_buf) );
DEBUG_TRACE("Created new stco\n");
//buffer_dump(&tmp_buf, 0);
buffer_clear(&tmp_buf);
DEBUG_TRACE("real st size: %ld\n",
sv_len(mp4->new_stts)
+ sv_len(mp4->new_stsc)
+ sv_len(mp4->new_stsz)
+ sv_len(mp4->new_stco)
);
// Make second pass through header, reducing size of all parent boxes by st* size difference
// Copy all boxes, replacing st* boxes with new ones
mp4->seekhdr = newSVpv("", 0);
PerlIO_seek(mp4->infile, 0, SEEK_SET);
// XXX this is ugly, because we are reading a second time we have to reset
// various things in the mp4 struct
Newz(0, mp4->buf, sizeof(Buffer), Buffer);
buffer_init(mp4->buf, MP4_BLOCK_SIZE);
mp4->audio_offset = 0;
mp4->current_track = 0;
mp4->track_count = 0;
// free seek structs because we will be reading them a second time
if (mp4->time_to_sample) Safefree(mp4->time_to_sample);
if (mp4->sample_to_chunk) Safefree(mp4->sample_to_chunk);
if (mp4->sample_byte_size) Safefree(mp4->sample_byte_size);
if (mp4->chunk_offset) Safefree(mp4->chunk_offset);
mp4->time_to_sample = NULL;
mp4->sample_to_chunk = NULL;
mp4->sample_byte_size = NULL;
mp4->chunk_offset = NULL;
while ( (box_size = _mp4_read_box(mp4)) > 0 ) {
mp4->audio_offset += box_size;
DEBUG_TRACE("seek pass 2: read box of size %d\n", box_size);
if (mp4->audio_offset >= mp4->file_size)
break;
}
my_hv_store( info, "seek_offset", newSVuv(file_offset) );
my_hv_store( info, "seek_header", mp4->seekhdr );
if (mp4->buf) {
buffer_free(mp4->buf);
Safefree(mp4->buf);
}
out:
// Don't leak
SvREFCNT_dec(tags);
if (mp4->new_stts) SvREFCNT_dec(mp4->new_stts);
if (mp4->new_stsc) SvREFCNT_dec(mp4->new_stsc);
if (mp4->new_stsz) SvREFCNT_dec(mp4->new_stsz);
if (mp4->new_stco) SvREFCNT_dec(mp4->new_stco);
// free seek structs
if (mp4->time_to_sample) Safefree(mp4->time_to_sample);
if (mp4->sample_to_chunk) Safefree(mp4->sample_to_chunk);
if (mp4->sample_byte_size) Safefree(mp4->sample_byte_size);
if (mp4->chunk_offset) Safefree(mp4->chunk_offset);
// free seek buffer
buffer_free(&tmp_buf);
Safefree(mp4);
if (ret == -1) {
my_hv_store( info, "seek_offset", newSViv(-1) );
}
return ret;
}
mp4info *
_mp4_parse(PerlIO *infile, char *file, HV *info, HV *tags, uint8_t seeking)
{
off_t file_size;
uint32_t box_size = 0;
mp4info *mp4;
Newz(0, mp4, sizeof(mp4info), mp4info);
Newz(0, mp4->buf, sizeof(Buffer), Buffer);
mp4->audio_offset = 0;
mp4->infile = infile;
mp4->file = file;
mp4->info = info;
mp4->tags = tags;
mp4->current_track = 0;
mp4->track_count = 0;
mp4->seen_moov = 0;
mp4->seeking = seeking ? 1 : 0;
mp4->time_to_sample = NULL;
mp4->sample_to_chunk = NULL;
mp4->sample_byte_size = NULL;
mp4->chunk_offset = NULL;
buffer_init(mp4->buf, MP4_BLOCK_SIZE);
file_size = _file_size(infile);
mp4->file_size = file_size;
my_hv_store( info, "file_size", newSVuv(file_size) );
// Create empty tracks array
my_hv_store( info, "tracks", newRV_noinc( (SV *)newAV() ) );
while ( (box_size = _mp4_read_box(mp4)) > 0 ) {
mp4->audio_offset += box_size;
DEBUG_TRACE("read box of size %d / audio_offset %llu\n", box_size, mp4->audio_offset);
if (mp4->audio_offset >= file_size)
break;
}
// XXX: if no ftyp was found, assume it is brand 'mp41'
// if no bitrate was found (i.e. ALAC), calculate based on file_size/song_length_ms
if ( !my_hv_exists(info, "avg_bitrate") ) {
SV **entry = my_hv_fetch(info, "song_length_ms");
if (entry) {
SV **audio_offset = my_hv_fetch(info, "audio_offset");
if (audio_offset) {
uint32_t song_length_ms = SvIV(*entry);
uint32_t bitrate = _bitrate(file_size - SvIV(*audio_offset), song_length_ms);
my_hv_store( info, "avg_bitrate", newSVuv(bitrate) );
mp4->bitrate = bitrate;
}
}
}
// DLNA detection, based on code from libdlna
if (!mp4->dlna_invalid && mp4->samplerate && mp4->bitrate && mp4->channels) {
switch (mp4->audio_object_type) {
case AAC_LC:
case AAC_LC_ER:
{
if (mp4->samplerate < 8000 || mp4->samplerate > 48000)
break;
if (mp4->channels <= 2) {
if (mp4->bitrate <= 192000)
my_hv_store( info, "dlna_profile", newSVpv("AAC_ISO_192", 0) );
else if (mp4->bitrate <= 320000)
my_hv_store( info, "dlna_profile", newSVpv("AAC_ISO_320", 0) );
else if (mp4->bitrate <= 576000)
my_hv_store( info, "dlna_profile", newSVpv("AAC_ISO", 0) );
}
else if (mp4->channels <= 6) {
if (mp4->bitrate <= 1440000)
my_hv_store( info, "dlna_profile", newSVpv("AAC_MULT5_ISO", 0) );
}
break;
}
case AAC_LTP:
case AAC_LTP_ER:
{
if (mp4->samplerate < 8000)
break;
if (mp4->samplerate <= 48000) {
if (mp4->channels <= 2 && mp4->bitrate <= 576000)
my_hv_store( info, "dlna_profile", newSVpv("AAC_LTP_ISO", 0) );
}
else if (mp4->samplerate <= 96000) {
if (mp4->channels <= 6 && mp4->bitrate <= 2880000)
my_hv_store( info, "dlna_profile", newSVpv("AAC_LTP_MULT5_ISO", 0) );
else if (mp4->channels <= 8 && mp4->bitrate <= 4032000)
my_hv_store( info, "dlna_profile", newSVpv("AAC_LTP_MULT7_ISO", 0) );
}
break;
}
case AAC_HE:
{
if (mp4->samplerate < 8000)
break;
if (mp4->samplerate <= 24000) {
if (mp4->channels > 2)
break;
if (mp4->bitrate <= 128000)
my_hv_store( info, "dlna_profile", newSVpv("HEAAC_L2_ISO_128", 0) );
else if (mp4->bitrate <= 320000)
my_hv_store( info, "dlna_profile", newSVpv("HEAAC_L2_ISO_320", 0) );
else if (mp4->bitrate <= 576000)
my_hv_store( info, "dlna_profile", newSVpv("HEAAC_L2_ISO", 0) );
}
else if (mp4->samplerate <= 48000) {
if (mp4->channels <= 2 && mp4->bitrate <= 576000)
my_hv_store( info, "dlna_profile", newSVpv("HEAAC_L3_ISO", 0) );
else if (mp4->channels <= 6 && mp4->bitrate <= 1440000)
my_hv_store( info, "dlna_profile", newSVpv("HEAAC_MULT5_ISO", 0) );
else if (mp4->channels <= 8 && mp4->bitrate <= 4032000)
my_hv_store( info, "dlna_profile", newSVpv("HEAAC_MULT7", 0) );
}
else if (mp4->samplerate <= 96000) {
if (mp4->channels <= 8 && mp4->bitrate <= 4032000)
my_hv_store( info, "dlna_profile", newSVpv("HEAAC_MULT7", 0) );
}
break;
}
case AAC_PARAM_ER:
case AAC_PS:
{
if (mp4->samplerate < 8000)
break;
if (mp4->samplerate <= 24000) {
if (mp4->channels > 2)
break;
if (mp4->bitrate <= 128000)
my_hv_store( info, "dlna_profile", newSVpv("HEAACv2_L2_128", 0) );
else if (mp4->bitrate <= 320000)
my_hv_store( info, "dlna_profile", newSVpv("HEAACv2_L2_320", 0) );
else if (mp4->bitrate <= 576000)
my_hv_store( info, "dlna_profile", newSVpv("HEAACv2_L2", 0) );
}
else if (mp4->samplerate <= 48000) {
if (mp4->channels <= 2 && mp4->bitrate <= 576000)
my_hv_store( info, "dlna_profile", newSVpv("HEAACv2_L3", 0) );
else if (mp4->channels <= 6 && mp4->bitrate <= 1440000)
my_hv_store( info, "dlna_profile", newSVpv("HEAACv2_L4", 0) );
else if (mp4->channels <= 6 && mp4->bitrate <= 2880000)
my_hv_store( info, "dlna_profile", newSVpv("HEAACv2_MULT5", 0) );
else if (mp4->channels <= 8 && mp4->bitrate <= 4032000)
my_hv_store( info, "dlna_profile", newSVpv("HEAACv2_MULT7", 0) );
}
else if (mp4->samplerate <= 96000) {
if (mp4->channels <= 8 && mp4->bitrate <= 4032000)
my_hv_store( info, "dlna_profile", newSVpv("HEAACv2_MULT7", 0) );
}
break;
}
case AAC_BSAC_ER:
{
if (mp4->samplerate < 16000 || mp4->samplerate > 48000)
break;
if (mp4->bitrate > 128000)
break;
if (mp4->channels <= 2)
my_hv_store( info, "dlna_profile", newSVpv("BSAC_ISO", 0) );
else if (mp4->channels <= 6)
my_hv_store( info, "dlna_profile", newSVpv("BSAC_MULT5_ISO", 0) );
break;
}
default:
break;
}
}
buffer_free(mp4->buf);
Safefree(mp4->buf);
return mp4;
}
int
_mp4_read_box(mp4info *mp4)
{
uint64_t size; // total size of box
char type[5];
uint8_t skip = 0;
mp4->rsize = 0; // remaining size in box
if ( !_check_buf(mp4->infile, mp4->buf, 16, MP4_BLOCK_SIZE) ) {
return 0;
}
size = buffer_get_int(mp4->buf);
strncpy( type, (char *)buffer_ptr(mp4->buf), 4 );
type[4] = '\0';
buffer_consume(mp4->buf, 4);
// Check for 64-bit size
if (size == 1) {
size = buffer_get_int64(mp4->buf);
mp4->hsize = 16;
}
else if (size == 0) {
// XXX: size extends to end of file
mp4->hsize = 8;
}
else {
mp4->hsize = 8;
}
if (size) {
mp4->rsize = size - mp4->hsize;
}
mp4->size = size;
DEBUG_TRACE("%s size %llu\n", type, size);
if (mp4->seekhdr) {
// Copy and adjust header if seeking
char tmp_size[4];
if (
FOURCC_EQ(type, "moov")
|| FOURCC_EQ(type, "trak")
|| FOURCC_EQ(type, "mdia")
|| FOURCC_EQ(type, "minf")
|| FOURCC_EQ(type, "stbl")
) {
// Container box, adjust size
put_u32(tmp_size, size - (mp4->old_st_size - mp4->new_st_size));
DEBUG_TRACE(" Box is parent of st*, changed size to %llu\n", size - (mp4->old_st_size - mp4->new_st_size));
sv_catpvn( mp4->seekhdr, tmp_size, 4 );
sv_catpvn( mp4->seekhdr, type, 4 );
}
// Replace st* boxes with our new versions
else if ( FOURCC_EQ(type, "stts") ) {
DEBUG_TRACE("adding new stts of size %ld\n", sv_len(mp4->new_stts));
sv_catsv( mp4->seekhdr, mp4->new_stts );
}
else if ( FOURCC_EQ(type, "stsc") ) {
DEBUG_TRACE("adding new stsc of size %ld\n", sv_len(mp4->new_stsc));
sv_catsv( mp4->seekhdr, mp4->new_stsc );
}
else if ( FOURCC_EQ(type, "stsz") ) {
DEBUG_TRACE("adding new stsz of size %ld\n", sv_len(mp4->new_stsz));
sv_catsv( mp4->seekhdr, mp4->new_stsz );
}
else if ( FOURCC_EQ(type, "stco") ) {
DEBUG_TRACE("adding new stco of size %ld\n", sv_len(mp4->new_stco));
sv_catsv( mp4->seekhdr, mp4->new_stco );
}
else {
// Normal box, copy it
put_u32(tmp_size, size);
sv_catpvn( mp4->seekhdr, tmp_size, 4 );
sv_catpvn( mp4->seekhdr, type, 4 );
// stsd is special and contains real bytes and is also a container
if ( FOURCC_EQ(type, "stsd") ) {
sv_catpvn( mp4->seekhdr, (char *)buffer_ptr(mp4->buf), 8 );
}
// mp4a is special, ugh
else if ( FOURCC_EQ(type, "mp4a") ) {
sv_catpvn( mp4->seekhdr, (char *)buffer_ptr(mp4->buf), 28 );
}
// and so is meta
else if ( FOURCC_EQ(type, "meta") ) {
sv_catpvn( mp4->seekhdr, (char *)buffer_ptr(mp4->buf), mp4->meta_size );
}
// Copy contents unless it's a container
else if (
!FOURCC_EQ(type, "edts")
&& !FOURCC_EQ(type, "dinf")
&& !FOURCC_EQ(type, "udta")
&& !FOURCC_EQ(type, "mdat")
) {
if ( !_check_buf(mp4->infile, mp4->buf, size - 8, MP4_BLOCK_SIZE) ) {
return 0;
}
// XXX find a way to skip udta completely when rewriting seek header
// to avoid useless copying of artwork. Will require adjusting offsets
// differently.
sv_catpvn( mp4->seekhdr, (char *)buffer_ptr(mp4->buf), size - 8 );
}
}
// XXX should probably return size here and avoid reading info a second time
// or move the header copying code to somewhere else
}
if ( FOURCC_EQ(type, "ftyp") ) {
if ( !_mp4_parse_ftyp(mp4) ) {
PerlIO_printf(PerlIO_stderr(), "Invalid MP4 file (bad ftyp box): %s\n", mp4->file);
return 0;
}
}
else if (
FOURCC_EQ(type, "moov")
|| FOURCC_EQ(type, "edts")
|| FOURCC_EQ(type, "mdia")
|| FOURCC_EQ(type, "minf")
|| FOURCC_EQ(type, "dinf")
|| FOURCC_EQ(type, "stbl")
|| FOURCC_EQ(type, "udta")
) {
// These boxes are containers for nested boxes, return only the fact that
// we read the header size of the container
size = mp4->hsize;
if ( FOURCC_EQ(type, "trak") ) {
mp4->track_count++;
}
}
else if ( FOURCC_EQ(type, "trak") ) {
// Also a container, but we need to increment track_count too
size = mp4->hsize;
mp4->track_count++;
}
else if ( FOURCC_EQ(type, "mvhd") ) {
mp4->seen_moov = 1;
if ( !_mp4_parse_mvhd(mp4) ) {
PerlIO_printf(PerlIO_stderr(), "Invalid MP4 file (bad mvhd box): %s\n", mp4->file);
return 0;
}
}
else if ( FOURCC_EQ(type, "tkhd") ) {
if ( !_mp4_parse_tkhd(mp4) ) {
PerlIO_printf(PerlIO_stderr(), "Invalid MP4 file (bad tkhd box): %s\n", mp4->file);
return 0;
}
}
else if ( FOURCC_EQ(type, "mdhd") ) {
if ( !_mp4_parse_mdhd(mp4) ) {
PerlIO_printf(PerlIO_stderr(), "Invalid MP4 file (bad mdhd box): %s\n", mp4->file);
return 0;
}
}
else if ( FOURCC_EQ(type, "hdlr") ) {
if ( !_mp4_parse_hdlr(mp4) ) {
PerlIO_printf(PerlIO_stderr(), "Invalid MP4 file (bad hdlr box): %s\n", mp4->file);
return 0;
}
}
else if ( FOURCC_EQ(type, "stsd") ) {
if ( !_mp4_parse_stsd(mp4) ) {
PerlIO_printf(PerlIO_stderr(), "Invalid MP4 file (bad stsd box): %s\n", mp4->file);
return 0;
}
// stsd is a special real box + container, count only the real bytes (8)
size = 8 + mp4->hsize;
}
else if ( FOURCC_EQ(type, "mp4a") ) {
if ( !_mp4_parse_mp4a(mp4) ) {
PerlIO_printf(PerlIO_stderr(), "Invalid MP4 file (bad mp4a box): %s\n", mp4->file);
return 0;
}
// mp4a is a special real box + container, count only the real bytes (28)
size = 28 + mp4->hsize;
}
else if ( FOURCC_EQ(type, "alac") ) {
if ( !_mp4_parse_alac(mp4) ) {
PerlIO_printf(PerlIO_stderr(), "Invalid MP4 file (bad alac box): %s\n", mp4->file);
return 0;
}
// skip rest (alac description)
mp4->rsize -= 28;
skip = 1;
}
else if ( FOURCC_EQ(type, "drms") ) {
// Mark encoding
HV *trackinfo = _mp4_get_current_trackinfo(mp4);
my_hv_store( trackinfo, "encoding", newSVpvn("drms", 4) );
// Skip rest
skip = 1;
}
else if ( FOURCC_EQ(type, "esds") ) {
if ( !_mp4_parse_esds(mp4) ) {
PerlIO_printf(PerlIO_stderr(), "Invalid MP4 file (bad esds box): %s\n", mp4->file);
return 0;
}
}
else if ( FOURCC_EQ(type, "stts") ) {
if ( mp4->seeking && mp4->track_count == 1 ) {
if ( !_mp4_parse_stts(mp4) ) {
PerlIO_printf(PerlIO_stderr(), "Invalid MP4 file (bad stts box): %s\n", mp4->file);
return 0;
}
mp4->old_st_size += size;
}
else {
skip = 1;
}
}
else if ( FOURCC_EQ(type, "stsc") ) {
if ( mp4->seeking && mp4->track_count == 1 ) {
if ( !_mp4_parse_stsc(mp4) ) {
PerlIO_printf(PerlIO_stderr(), "Invalid MP4 file (bad stsc box): %s\n", mp4->file);
return 0;
}
mp4->old_st_size += size;
}
else {
skip = 1;
}
}
else if ( FOURCC_EQ(type, "stsz") ) {
if ( mp4->seeking && mp4->track_count == 1 ) {
if ( !_mp4_parse_stsz(mp4) ) {
PerlIO_printf(PerlIO_stderr(), "Invalid MP4 file (bad stsz box): %s\n", mp4->file);
return 0;
}
mp4->old_st_size += size;
}
else {
skip = 1;
}
}
else if ( FOURCC_EQ(type, "stco") ) {
if ( mp4->seeking && mp4->track_count == 1 ) {
if ( !_mp4_parse_stco(mp4) ) {
PerlIO_printf(PerlIO_stderr(), "Invalid MP4 file (bad stco box): %s\n", mp4->file);
return 0;
}
mp4->old_st_size += size;
}
else {
skip = 1;
}
}
else if ( FOURCC_EQ(type, "meta") ) {
uint8_t meta_size = _mp4_parse_meta(mp4);
if ( !meta_size ) {
PerlIO_printf(PerlIO_stderr(), "Invalid MP4 file (bad meta box): %s\n", mp4->file);
return 0;
}
mp4->meta_size = meta_size;
// meta is a special real box + container, count only the real bytes
size = meta_size + mp4->hsize;
}
else if ( FOURCC_EQ(type, "ilst") ) {
if ( !_mp4_parse_ilst(mp4) ) {
PerlIO_printf(PerlIO_stderr(), "Invalid MP4 file (bad ilst box): %s\n", mp4->file);
return 0;
}
}
else if ( FOURCC_EQ(type, "mdat") ) {
// Audio data here, there may be boxes after mdat, so we have to skip it
skip = 1;
// If we haven't seen moov yet, set a flag so we can print a warning
// or handle it some other way
if ( !mp4->seen_moov ) {
my_hv_store( mp4->info, "leading_mdat", newSVuv(1) );
mp4->dlna_invalid = 1; // DLNA 8.6.34.8, moov must be before mdat
}
// Record audio offset and length
my_hv_store( mp4->info, "audio_offset", newSVuv(mp4->audio_offset) );
my_hv_store( mp4->info, "audio_size", newSVuv(size) );
mp4->audio_size = size;
}
else {
DEBUG_TRACE(" Unhandled box, skipping\n");
skip = 1;
}
if (skip) {
_mp4_skip(mp4, mp4->rsize);
}
return size;
}
uint8_t
_mp4_parse_ftyp(mp4info *mp4)
{
AV *compatible_brands = newAV();
if ( !_check_buf(mp4->infile, mp4->buf, mp4->rsize, MP4_BLOCK_SIZE) ) {
return 0;
}
my_hv_store( mp4->info, "major_brand", newSVpvn( buffer_ptr(mp4->buf), 4 ) );
buffer_consume(mp4->buf, 4);
my_hv_store( mp4->info, "minor_version", newSVuv( buffer_get_int(mp4->buf) ) );
mp4->rsize -= 8;
if (mp4->rsize % 4) {
// invalid ftyp
return 0;
}
while (mp4->rsize > 0) {
av_push( compatible_brands, newSVpvn( buffer_ptr(mp4->buf), 4 ) );
buffer_consume(mp4->buf, 4);
mp4->rsize -= 4;
}
my_hv_store( mp4->info, "compatible_brands", newRV_noinc( (SV *)compatible_brands ) );
return 1;
}
uint8_t
_mp4_parse_mvhd(mp4info *mp4)
{
uint32_t timescale;
uint8_t version;
if ( !_check_buf(mp4->infile, mp4->buf, mp4->rsize, MP4_BLOCK_SIZE) ) {
return 0;
}
version = buffer_get_char(mp4->buf);
buffer_consume(mp4->buf, 3); // flags
if (version == 0) { // 32-bit values
// Skip ctime and mtime
buffer_consume(mp4->buf, 8);
timescale = buffer_get_int(mp4->buf);
my_hv_store( mp4->info, "mv_timescale", newSVuv(timescale) );
my_hv_store( mp4->info, "song_length_ms", newSVuv( (buffer_get_int(mp4->buf) * 1.0 / timescale ) * 1000 ) );
}
else if (version == 1) { // 64-bit values
// Skip ctime and mtime
buffer_consume(mp4->buf, 16);
timescale = buffer_get_int(mp4->buf);
my_hv_store( mp4->info, "mv_timescale", newSVuv(timescale) );
my_hv_store( mp4->info, "song_length_ms", newSVuv( (buffer_get_int64(mp4->buf) * 1.0 / timescale ) * 1000 ) );
}
else {
return 0;
}
// Skip rest
buffer_consume(mp4->buf, 80);
return 1;
}
uint8_t
_mp4_parse_tkhd(mp4info *mp4)
{
AV *tracks = (AV *)SvRV( *(my_hv_fetch(mp4->info, "tracks")) );
HV *trackinfo = newHV();
uint32_t id;
double width;
double height;
uint8_t version;
uint32_t timescale = SvIV( *(my_hv_fetch(mp4->info, "mv_timescale")) );
if ( !_check_buf(mp4->infile, mp4->buf, mp4->rsize, MP4_BLOCK_SIZE) ) {
return 0;
}
version = buffer_get_char(mp4->buf);
buffer_consume(mp4->buf, 3); // flags
// XXX DLNA Requirement [8.6.34.5]: For the default audio track, "Track_enabled"
// must be set to the value of 1 in the "flags" field of Track Header Box of the track.
if (version == 0) { // 32-bit values
// Skip ctime and mtime
buffer_consume(mp4->buf, 8);
id = buffer_get_int(mp4->buf);
my_hv_store( trackinfo, "id", newSVuv(id) );
// Skip reserved
buffer_consume(mp4->buf, 4);
my_hv_store( trackinfo, "duration", newSVuv( (buffer_get_int(mp4->buf) * 1.0 / timescale ) * 1000 ) );
}
else if (version == 1) { // 64-bit values
// Skip ctime and mtime
buffer_consume(mp4->buf, 16);
id = buffer_get_int(mp4->buf);
my_hv_store( trackinfo, "id", newSVuv(id) );
// Skip reserved
buffer_consume(mp4->buf, 4);
my_hv_store( trackinfo, "duration", newSVuv( (buffer_get_int64(mp4->buf) * 1.0 / timescale ) * 1000 ) );
}
else {
return 0;
}
// Skip reserved, layer, alternate_group, volume, reserved, matrix
buffer_consume(mp4->buf, 52);
// width/height are fixed-point 16.16
width = buffer_get_short(mp4->buf);
width += buffer_get_short(mp4->buf) / 65536.;
if (width > 0) {
my_hv_store( trackinfo, "width", newSVnv(width) );
}
height = buffer_get_short(mp4->buf);
height += buffer_get_short(mp4->buf) / 65536.;
if (height > 0) {
my_hv_store( trackinfo, "height", newSVnv(height) );
}
av_push( tracks, newRV_noinc( (SV *)trackinfo ) );
// Remember the current track we're dealing with
mp4->current_track = id;
return 1;
}
uint8_t
_mp4_parse_mdhd(mp4info *mp4)
{
uint32_t timescale;
uint8_t version;
if ( !_check_buf(mp4->infile, mp4->buf, mp4->rsize, MP4_BLOCK_SIZE) ) {
return 0;
}
version = buffer_get_char(mp4->buf);
buffer_consume(mp4->buf, 3); // flags
if (version == 0) { // 32-bit values
// Skip ctime and mtime
buffer_consume(mp4->buf, 8);
timescale = buffer_get_int(mp4->buf);
my_hv_store( mp4->info, "samplerate", newSVuv(timescale) );
// Skip duration, if have song_length_ms from mvhd
if ( my_hv_exists( mp4->info, "song_length_ms" ) ) {
buffer_consume(mp4->buf, 4);
}
else {
my_hv_store( mp4->info, "song_length_ms", newSVuv( (buffer_get_int(mp4->buf) * 1.0 / timescale ) * 1000 ) );
}
}
else if (version == 1) { // 64-bit values
// Skip ctime and mtime
buffer_consume(mp4->buf, 16);
timescale = buffer_get_int(mp4->buf);
my_hv_store( mp4->info, "samplerate", newSVuv(timescale) );
// Skip duration, if have song_length_ms from mvhd
if ( my_hv_exists( mp4->info, "song_length_ms" ) ) {
buffer_consume(mp4->buf, 8);
}
else {
my_hv_store( mp4->info, "song_length_ms", newSVuv( (buffer_get_int64(mp4->buf) * 1.0 / timescale ) * 1000 ) );
}
}
else {
return 0;
}
mp4->samplerate = timescale;
// Skip rest
buffer_consume(mp4->buf, 4);
return 1;
}
uint8_t
_mp4_parse_hdlr(mp4info *mp4)
{
HV *trackinfo = _mp4_get_current_trackinfo(mp4);
SV *handler_name;
if (!trackinfo) {
return 0;
}
if ( !_check_buf(mp4->infile, mp4->buf, mp4->rsize, MP4_BLOCK_SIZE) ) {
return 0;
}
// Skip version, flags, pre_defined
buffer_consume(mp4->buf, 8);
my_hv_store( trackinfo, "handler_type", newSVpvn( buffer_ptr(mp4->buf), 4 ) );
buffer_consume(mp4->buf, 4);
// Skip reserved
buffer_consume(mp4->buf, 12);
handler_name = newSVpv( buffer_ptr(mp4->buf), 0 );
sv_utf8_decode(handler_name);
my_hv_store( trackinfo, "handler_name", handler_name );
buffer_consume(mp4->buf, mp4->rsize - 24);
return 1;
}
uint8_t
_mp4_parse_stsd(mp4info *mp4)
{
uint32_t entry_count;
if ( !_check_buf(mp4->infile, mp4->buf, 8, MP4_BLOCK_SIZE) ) {
return 0;
}
// Skip version/flags
buffer_consume(mp4->buf, 4);
entry_count = buffer_get_int(mp4->buf);
return 1;
}
uint8_t
_mp4_parse_mp4a(mp4info *mp4)
{
HV *trackinfo = _mp4_get_current_trackinfo(mp4);
if ( !_check_buf(mp4->infile, mp4->buf, 28, MP4_BLOCK_SIZE) ) {
return 0;
}
my_hv_store( trackinfo, "encoding", newSVpvn("mp4a", 4) );
// Skip reserved
buffer_consume(mp4->buf, 16);
mp4->channels = buffer_get_short(mp4->buf);
my_hv_store( trackinfo, "channels", newSVuv(mp4->channels) );
my_hv_store( trackinfo, "bits_per_sample", newSVuv( buffer_get_short(mp4->buf) ) );
// Skip reserved
buffer_consume(mp4->buf, 4);
// Skip bogus samplerate
buffer_consume(mp4->buf, 2);
// Skip reserved
buffer_consume(mp4->buf, 2);
return 1;
}
uint8_t
_mp4_parse_esds(mp4info *mp4)
{
HV *trackinfo = _mp4_get_current_trackinfo(mp4);
uint32_t len = 0;
uint32_t avg_bitrate;
if ( !_check_buf(mp4->infile, mp4->buf, mp4->rsize, MP4_BLOCK_SIZE) ) {
return 0;
}
// Skip version/flags
buffer_consume(mp4->buf, 4);
// Public docs on esds are hard to find, this is based on faad
// and http://www.geocities.com/xhelmboyx/quicktime/formats/mp4-layout.txt
// verify ES_DescrTag
if (buffer_get_char(mp4->buf) == 0x03) {
// read length
if ( _mp4_descr_length(mp4->buf) < 5 + 15 ) {
return 0;
}
// skip 3 bytes
buffer_consume(mp4->buf, 3);
}
else {
// skip 2 bytes
buffer_consume(mp4->buf, 2);
}
// verify DecoderConfigDescrTab
if (buffer_get_char(mp4->buf) != 0x04) {
return 0;
}
// read length
if ( _mp4_descr_length(mp4->buf) < 13 ) {
return 0;
}
// XXX: map to string
my_hv_store( trackinfo, "audio_type", newSVuv( buffer_get_char(mp4->buf) ) );
buffer_consume(mp4->buf, 4);
my_hv_store( trackinfo, "max_bitrate", newSVuv( buffer_get_int(mp4->buf) ) );
avg_bitrate = buffer_get_int(mp4->buf);
if (avg_bitrate) {
if ( my_hv_exists(mp4->info, "avg_bitrate") ) {
// If there are multiple tracks, just add up the bitrates
avg_bitrate += SvIV(*(my_hv_fetch(mp4->info, "avg_bitrate")));
}
my_hv_store( mp4->info, "avg_bitrate", newSVuv(avg_bitrate) );
mp4->bitrate = avg_bitrate;
}
// verify DecSpecificInfoTag
if (buffer_get_char(mp4->buf) != 0x05) {
return 0;
}
// Read audio object type
// 5 bits, if 0x1F, read 6 more bits
len = _mp4_descr_length(mp4->buf);
if (len > 0) {
uint32_t aot;
len *= 8; // count the number of bits left
aot = buffer_get_bits(mp4->buf, 5);
len -= 5;
if ( aot == 0x1F ) {
aot = 32 + buffer_get_bits(mp4->buf, 6);
len -= 6;
}
// samplerate: 4 bits
// if 0xF, samplerate is next 24 bits
// else lookup in samplerate table
{
uint32_t samplerate = buffer_get_bits(mp4->buf, 4);
len -= 4;
if (samplerate == 0xF) { // XXX need test file with 24-bit samplerate field
samplerate = buffer_get_bits(mp4->buf, 24);
len -= 24;
}
else {
samplerate = samplerate_table[samplerate];
}
// Channel configuration (4 bits)
// XXX This is sometimes wrong (1 when it should be 2)
mp4->channels = buffer_get_bits(mp4->buf, 4);
my_hv_store( trackinfo, "channels", newSVuv(mp4->channels) );
len -= 4;
if (aot == AAC_SLS) {
// Read some SLS-specific config
// bits per sample (3 bits) { 8, 16, 20, 24 }
uint8_t bps = buffer_get_bits(mp4->buf, 3);
len -= 3;
my_hv_store( trackinfo, "bits_per_sample", newSVuv( bps_table[bps] ) );
}
else if (aot == AAC_HE || aot == AAC_PS) {
// Read extended samplerate info
samplerate = buffer_get_bits(mp4->buf, 4);
len -= 4;
if (samplerate == 0xF) { // XXX need test file with 24-bit samplerate field
samplerate = buffer_get_bits(mp4->buf, 24);
len -= 24;
}
else {
samplerate = samplerate_table[samplerate];
}
}
my_hv_store( trackinfo, "samplerate", newSVuv(samplerate) );
mp4->samplerate = samplerate;
}
my_hv_store( trackinfo, "audio_object_type", newSVuv(aot) );
mp4->audio_object_type = aot;
// Skip rest of box
buffer_get_bits(mp4->buf, len);
}
// verify SL config descriptor type tag
if (buffer_get_char(mp4->buf) != 0x06) {
return 0;
}
_mp4_descr_length(mp4->buf);
// verify SL value
if (buffer_get_char(mp4->buf) != 0x02) {
return 0;
}
return 1;
}
uint8_t
_mp4_parse_alac(mp4info *mp4)
{
HV *trackinfo = _mp4_get_current_trackinfo(mp4);
if ( !_check_buf(mp4->infile, mp4->buf, 28, MP4_BLOCK_SIZE) ) {
return 0;
}
my_hv_store( trackinfo, "encoding", newSVpvn("alac", 4) );
// Skip reserved
buffer_consume(mp4->buf, 16);
mp4->channels = buffer_get_short(mp4->buf);
my_hv_store( trackinfo, "channels", newSVuv(mp4->channels) );
my_hv_store( trackinfo, "bits_per_sample", newSVuv( buffer_get_short(mp4->buf) ) );
// Skip reserved
buffer_consume(mp4->buf, 4);
// Skip bogus samplerate
buffer_consume(mp4->buf, 2);
// Skip reserved
buffer_consume(mp4->buf, 2);
return 1;
}
uint8_t
_mp4_parse_stts(mp4info *mp4)
{
int i;
if ( !_check_buf(mp4->infile, mp4->buf, mp4->rsize, MP4_BLOCK_SIZE) ) {
return 0;
}
// Skip version/flags
buffer_consume(mp4->buf, 4);
mp4->num_time_to_samples = buffer_get_int(mp4->buf);
DEBUG_TRACE(" num_time_to_samples %d\n", mp4->num_time_to_samples);
New(0,
mp4->time_to_sample,
mp4->num_time_to_samples * sizeof(*mp4->time_to_sample),
struct tts
);
if ( !mp4->time_to_sample ) {
PerlIO_printf(PerlIO_stderr(), "Unable to parse stts: too large\n");
return 0;
}
for (i = 0; i < mp4->num_time_to_samples; i++) {
mp4->time_to_sample[i].sample_count = buffer_get_int(mp4->buf);
mp4->time_to_sample[i].sample_duration = buffer_get_int(mp4->buf);
DEBUG_TRACE(
" sample_count %d sample_duration %d\n",
mp4->time_to_sample[i].sample_count,
mp4->time_to_sample[i].sample_duration
);
}
return 1;
}
uint8_t
_mp4_parse_stsc(mp4info *mp4)
{
int i;
if ( !_check_buf(mp4->infile, mp4->buf, mp4->rsize, MP4_BLOCK_SIZE) ) {
return 0;
}
// Skip version/flags
buffer_consume(mp4->buf, 4);
mp4->num_sample_to_chunks = buffer_get_int(mp4->buf);
DEBUG_TRACE(" num_sample_to_chunks %d\n", mp4->num_sample_to_chunks);
New(0,
mp4->sample_to_chunk,
mp4->num_sample_to_chunks * sizeof(*mp4->sample_to_chunk),
struct stc
);
if ( !mp4->sample_to_chunk ) {
PerlIO_printf(PerlIO_stderr(), "Unable to parse stsc: too large\n");
return 0;
}
for (i = 0; i < mp4->num_sample_to_chunks; i++) {
mp4->sample_to_chunk[i].first_chunk = buffer_get_int(mp4->buf);
mp4->sample_to_chunk[i].samples_per_chunk = buffer_get_int(mp4->buf);
// Skip sample desc index
buffer_consume(mp4->buf, 4);
DEBUG_TRACE(" first_chunk %d samples_per_chunk %d\n",
mp4->sample_to_chunk[i].first_chunk,
mp4->sample_to_chunk[i].samples_per_chunk
);
}
return 1;
}
uint8_t
_mp4_parse_stsz(mp4info *mp4)
{
int i;
if ( !_check_buf(mp4->infile, mp4->buf, mp4->rsize, MP4_BLOCK_SIZE) ) {
return 0;
}
// Skip version/flags
buffer_consume(mp4->buf, 4);
// Check sample size is 0
if ( buffer_get_int(mp4->buf) != 0 ) {
DEBUG_TRACE(" stsz uses fixed sample size\n");
buffer_consume(mp4->buf, 4);
return 1;
}
mp4->num_sample_byte_sizes = buffer_get_int(mp4->buf);
DEBUG_TRACE(" num_sample_byte_sizes %d\n", mp4->num_sample_byte_sizes);
New(0,
mp4->sample_byte_size,
mp4->num_sample_byte_sizes * sizeof(*mp4->sample_byte_size),
uint16_t
);
if ( !mp4->sample_byte_size ) {
PerlIO_printf(PerlIO_stderr(), "Unable to parse stsz: too large\n");
return 0;
}
for (i = 0; i < mp4->num_sample_byte_sizes; i++) {
uint32_t v = buffer_get_int(mp4->buf);
if (v > 0x0000ffff) {
DEBUG_TRACE("stsz[%d] > 65 kB (%ld)\n", i, (long)v);
return 0;
}
mp4->sample_byte_size[i] = v;
//DEBUG_TRACE(" sample_byte_size %d\n", v);
}
return 1;
}
uint8_t
_mp4_parse_stco(mp4info *mp4)
{
int i;
if ( !_check_buf(mp4->infile, mp4->buf, mp4->rsize, MP4_BLOCK_SIZE) ) {
return 0;
}
// Skip version/flags
buffer_consume(mp4->buf, 4);
mp4->num_chunk_offsets = buffer_get_int(mp4->buf);
DEBUG_TRACE(" num_chunk_offsets %d\n", mp4->num_chunk_offsets);
New(0,
mp4->chunk_offset,
mp4->num_chunk_offsets * sizeof(*mp4->chunk_offset),
uint32_t
);
if ( !mp4->chunk_offset ) {
PerlIO_printf(PerlIO_stderr(), "Unable to parse stco: too large\n");
return 0;
}
for (i = 0; i < mp4->num_chunk_offsets; i++) {
mp4->chunk_offset[i] = buffer_get_int(mp4->buf);
//DEBUG_TRACE(" chunk_offset %d\n", mp4->chunk_offset[i]);
}
return 1;
}
uint8_t
_mp4_parse_meta(mp4info *mp4)
{
uint32_t hdlr_size;
char type[5];
if ( !_check_buf(mp4->infile, mp4->buf, 12, MP4_BLOCK_SIZE) ) {
return 0;
}
// Skip version/flags
buffer_consume(mp4->buf, 4);
// Parse/skip meta version of hdlr
hdlr_size = buffer_get_int(mp4->buf);
strncpy( type, (char *)buffer_ptr(mp4->buf), 4 );
type[4] = '\0';
buffer_consume(mp4->buf, 4);
if ( !FOURCC_EQ(type, "hdlr") ) {
return 0;
}
// Skip rest of hdlr
if ( !_check_buf(mp4->infile, mp4->buf, hdlr_size - 8, MP4_BLOCK_SIZE) ) {
return 0;
}
buffer_consume(mp4->buf, hdlr_size - 8);
return 12 + hdlr_size - 8;
}
uint8_t
_mp4_parse_ilst(mp4info *mp4)
{
while (mp4->rsize) {
uint32_t size;
char key[5];
if ( !_check_buf(mp4->infile, mp4->buf, 8, MP4_BLOCK_SIZE) ) {
return 0;
}
DEBUG_TRACE(" ilst rsize %llu\n", mp4->rsize);
// Read Apple annotation box
size = buffer_get_int(mp4->buf);
strncpy( key, (char *)buffer_ptr(mp4->buf), 4 );
key[4] = '\0';
buffer_consume(mp4->buf, 4);
DEBUG_TRACE(" %s size %d\n", key, size);
// Note: extra _check_buf calls in this function and other ilst functions
// are to avoid reading in the full size of ilst in the case of large artwork
upcase(key);
if ( FOURCC_EQ(key, "----") ) {
// user-specified key/value pair
if ( !_mp4_parse_ilst_custom(mp4, size - 8) ) {
return 0;
}
}
else {
uint32_t bsize;
// Ensure we have 8 bytes
if ( !_check_buf(mp4->infile, mp4->buf, 8, MP4_BLOCK_SIZE) ) {
return 0;
}
// Verify data box
bsize = buffer_get_int(mp4->buf);
DEBUG_TRACE(" box size %d\n", bsize);
// Sanity check for bad data size
if ( bsize <= size - 8 ) {
SV *skey;
char *bptr = buffer_ptr(mp4->buf);
if ( !FOURCC_EQ(bptr, "data") ) {
return 0;
}
buffer_consume(mp4->buf, 4);
skey = newSVpv(key, 0);
if ( !_mp4_parse_ilst_data(mp4, bsize - 8, skey) ) {
SvREFCNT_dec(skey);
return 0;
}
SvREFCNT_dec(skey);
// XXX: bug 14476, files with multiple COVR images aren't handled here, just skipped for now
if ( bsize < size - 8 ) {
DEBUG_TRACE(" skipping rest of box, %d\n", size - 8 - bsize );
_mp4_skip(mp4, size - 8 - bsize);
}
}
else {
DEBUG_TRACE(" invalid data size %d, skipping value\n", bsize);
_mp4_skip(mp4, size - 12);
}
}
mp4->rsize -= size;
}
return 1;
}
uint8_t
_mp4_parse_ilst_data(mp4info *mp4, uint32_t size, SV *key)
{
uint32_t flags;
unsigned char *ckey;
SV *value;
ckey = (unsigned char *)SvPVX(key);
if ( FOURCC_EQ(ckey, "COVR") && _env_true("AUDIO_SCAN_NO_ARTWORK") ) {
// Skip artwork if requested and avoid the memory cost
value = newSVuv(size - 8);
my_hv_store( mp4->tags, "COVR_offset", newSVuv(mp4->audio_offset + (mp4->size - mp4->rsize) + 24) );
_mp4_skip(mp4, size);
}
else {
// Read the full ilst value
if ( !_check_buf(mp4->infile, mp4->buf, size, MP4_BLOCK_SIZE) ) {
return 0;
}
// Version(0) + Flags
flags = buffer_get_int(mp4->buf);
// Skip reserved
buffer_consume(mp4->buf, 4);
DEBUG_TRACE(" flags %d\n", flags);
if ( !flags || flags == 21 ) {
if ( FOURCC_EQ( SvPVX(key), "TRKN" ) || FOURCC_EQ( SvPVX(key), "DISK" ) ) {
// Special case trkn, disk (pair of 16-bit ints)
uint16_t num = 0;
uint16_t total = 0;
buffer_consume(mp4->buf, 2); // padding
num = buffer_get_short(mp4->buf);
// Total may not always be present
if (size > 12) {
total = buffer_get_short(mp4->buf);
buffer_consume(mp4->buf, size - 14); // optional padding
}
DEBUG_TRACE(" %d/%d\n", num, total);
if (total) {
my_hv_store_ent( mp4->tags, key, newSVpvf( "%d/%d", num, total ) );
}
else if (num) {
my_hv_store_ent( mp4->tags, key, newSVuv(num) );
}
return 1;
}
else if ( FOURCC_EQ( SvPVX(key), "GNRE" ) ) {
// Special case genre, 16-bit int as id3 genre code
char const *genre_string;
uint16_t genre_num = buffer_get_short(mp4->buf);
if (genre_num > 0 && genre_num < NGENRES + 1) {
genre_string = _id3_genre_index(genre_num - 1);
my_hv_store_ent( mp4->tags, key, newSVpv( genre_string, 0 ) );
}
return 1;
}
else {
// Other binary type, try to guess type based on size
uint32_t dsize = size - 8;
if (dsize == 1) {
value = newSVuv( buffer_get_char(mp4->buf) );
}
else if (dsize == 2) {
value = newSVuv( buffer_get_short(mp4->buf) );
}
else if (dsize == 4) {
value = newSVuv( buffer_get_int(mp4->buf) );
}
else if (dsize == 8) {
value = newSVuv( buffer_get_int64(mp4->buf) );
}
else {
value = newSVpvn( buffer_ptr(mp4->buf), dsize );
buffer_consume(mp4->buf, dsize);
}
}
}
else { // text data
value = newSVpvn( buffer_ptr(mp4->buf), size - 8 );
sv_utf8_decode(value);
// strip copyright symbol 0xA9 out of key
if ( ckey[0] == 0xA9 ) {
ckey++;
}
DEBUG_TRACE(" %s = %s\n", ckey, SvPVX(value));
buffer_consume(mp4->buf, size - 8);
}
}
// if key exists, create array
if ( my_hv_exists( mp4->tags, (char *)ckey ) ) {
SV **entry = my_hv_fetch( mp4->tags, (char *)ckey );
if (entry != NULL) {
if ( SvROK(*entry) && SvTYPE(SvRV(*entry)) == SVt_PVAV ) {
av_push( (AV *)SvRV(*entry), value );
}
else {
// A non-array entry, convert to array.
AV *ref = newAV();
av_push( ref, newSVsv(*entry) );
av_push( ref, value );
my_hv_store( mp4->tags, (char *)ckey, newRV_noinc( (SV*)ref ) );
}
}
}
else {
my_hv_store( mp4->tags, (char *)ckey, value );
}
return 1;
}
uint8_t
_mp4_parse_ilst_custom(mp4info *mp4, uint32_t size)
{
SV *key = NULL;
while (size) {
char type[5];
uint32_t bsize;
// Ensure we have 8 bytes to get the size and type
if ( !_check_buf(mp4->infile, mp4->buf, 8, MP4_BLOCK_SIZE) ) {
return 0;
}
// Read box
bsize = buffer_get_int(mp4->buf);
strncpy( type, (char *)buffer_ptr(mp4->buf), 4 );
type[4] = '\0';
buffer_consume(mp4->buf, 4);
DEBUG_TRACE(" %s size %d\n", type, bsize);
if ( FOURCC_EQ(type, "name") ) {
// Ensure we have bsize bytes
if ( !_check_buf(mp4->infile, mp4->buf, bsize, MP4_BLOCK_SIZE) ) {
return 0;
}
buffer_consume(mp4->buf, 4); // padding
key = newSVpvn( buffer_ptr(mp4->buf), bsize - 12);
sv_utf8_decode(key);
upcase(SvPVX(key));
buffer_consume(mp4->buf, bsize - 12);
DEBUG_TRACE(" %s\n", SvPVX(key));
}
else if ( FOURCC_EQ(type, "data") ) {
if (!key) {
// No key yet, data is out of order
return 0;
}
if ( !_mp4_parse_ilst_data(mp4, bsize - 8, key) ) {
SvREFCNT_dec(key);
return 0;
}
}
else {
// skip (mean, or other boxes)
if ( !_check_buf(mp4->infile, mp4->buf, bsize - 8, MP4_BLOCK_SIZE) ) {
return 0;
}
buffer_consume(mp4->buf, bsize - 8);
}
size -= bsize;
}
SvREFCNT_dec(key);
return 1;
}
HV *
_mp4_get_current_trackinfo(mp4info *mp4)
{
// Return the trackinfo hash for track id == mp4->current_track
AV *tracks;
HV *trackinfo;
int i;
SV **entry = my_hv_fetch(mp4->info, "tracks");
if (entry != NULL) {
tracks = (AV *)SvRV(*entry);
}
else {
return NULL;
}
// Find entry for this stream number
for (i = 0; av_len(tracks) >= 0 && i <= av_len(tracks); i++) {
SV **info = av_fetch(tracks, i, 0);
if (info != NULL) {
SV **tid;
trackinfo = (HV *)SvRV(*info);
tid = my_hv_fetch( trackinfo, "id" );
if (tid != NULL) {
if ( SvIV(*tid) == mp4->current_track ) {
return trackinfo;
}
}
}
}
return NULL;
}
uint32_t
_mp4_descr_length(Buffer *buf)
{
uint8_t b;
uint8_t num_bytes = 0;
uint32_t length = 0;
do {
b = buffer_get_char(buf);
num_bytes++;
length = (length << 7) | (b & 0x7f);
} while ( (b & 0x80) && num_bytes < 4 );
return length;
}
void
_mp4_skip(mp4info *mp4, uint32_t size)
{
if ( buffer_len(mp4->buf) >= size ) {
//buffer_dump(mp4->buf, size);
buffer_consume(mp4->buf, size);
DEBUG_TRACE(" skipped buffer data size %d\n", size);
}
else {
PerlIO_seek(mp4->infile, size - buffer_len(mp4->buf), SEEK_CUR);
buffer_clear(mp4->buf);
DEBUG_TRACE(" seeked past %d bytes to %d\n", size, (int)PerlIO_tell(mp4->infile));
}
}
uint32_t
_mp4_samples_in_chunk(mp4info *mp4, uint32_t chunk)
{
int i;
for (i = mp4->num_sample_to_chunks - 1; i >= 0; i--) {
if (mp4->sample_to_chunk[i].first_chunk <= chunk) {
return mp4->sample_to_chunk[i].samples_per_chunk;
}
}
return mp4->sample_to_chunk[0].samples_per_chunk;
}
uint32_t
_mp4_total_samples(mp4info *mp4)
{
int i;
uint32_t total = 0;
for (i = 0; i < mp4->num_time_to_samples; i++) {
total += mp4->time_to_sample[i].sample_count;
}
return total;
}
uint32_t
_mp4_get_sample_duration(mp4info *mp4, uint32_t sample)
{
int i;
uint32_t co = 0;
for (i = 0; i < mp4->num_time_to_samples; i++) {
uint32_t delta = mp4->time_to_sample[i].sample_count;
if (sample < co + delta) {
return mp4->time_to_sample[i].sample_duration;
}
co += delta;
}
return 0;
}