The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Audio::Wav::Read;

use strict;
eval { require warnings; }; #it's ok if we can't load warnings

use FileHandle;

use vars qw( $VERSION );
$VERSION = '0.14';

=head1 NAME

Audio::Wav::Read - Module for reading Microsoft WAV files.

=head1 SYNOPSIS

    use Audio::Wav;

    my $wav = new Audio::Wav;
    my $read = $wav -> read( 'filename.wav' );
#OR
    my $read = Audio::Wav -> read( 'filename.wav' );

    my $details = $read -> details();

=head1 DESCRIPTION

Reads Microsoft Wav files.

=head1 SEE ALSO

L<Audio::Wav>

L<Audio::Wav::Write>

=head1 NOTES

This module shouldn't be used directly, a blessed object can be returned from L<Audio::Wav>.

=head1 METHODS

=cut

sub new {
    my $class = shift;
    my $file = shift;
    my $tools = shift;
    $file =~ s#//#/#g;
    my $size = -s $file;

    my $handle = (ref $file eq 'GLOB') ? $file : new FileHandle "<$file";

    my $self = {
        'real_size' => $size,
        'file'      => $file,
        'handle'    => $handle,
        'tools'     => $tools,
    };

    bless $self, $class; 

    unless ( defined $handle ) {
        $self -> _error( "unable to open file ($!)" );
        return $self;
    }

    binmode $handle; 

    if( $Audio::Wav::_has_inline ) {
        local $/ = undef;
        my $c_string = <DATA>; 
        Inline->import(C => $c_string);
    } else {
        #TODO: do we have a reference to $tools here if using shortcuts?
        if( $tools && $tools -> is_debug() ) {
            warn "can't load Inline, using slow pure perl reads\n";
        }
    }

    $self -> {data} = $self -> _read_file();
    my $details = $self -> details();
    $self -> _init_read_sub();
    $self -> {pos} = $details -> {data_start};
    $self -> move_to();
    return $self; 
}

# just in case there are any memory leaks
sub DESTROY {
    my $self = shift;
    return unless $self;
    if ( exists $self->{handle} && defined $self->{handle} ) {
        $self->{handle}->close();
    }
    if ( exists $self->{tools} ) {
        delete $self->{tools};
    }
}

=head2 file_name

Returns the file name.

    my $file = $read -> file_name();

=cut

sub file_name {
    my $self = shift;
    return $self -> {file};
}

=head2 get_info

Returns information contained within the wav file.

    my $info = $read -> get_info();

Returns a reference to a hash containing;
(for example, a file marked up for use in Audio::Mix)

    {
        'keywords' => 'bpm:126 key:a',
        'name'     => 'Mission Venice',
        'artist'   => 'Nightmares on Wax'
    };

=cut

sub get_info {
    my $self = shift;
    return unless exists $self -> {data} -> {info};
    return $self -> {data} -> {info};
}

=head2 get_cues

Returns the cuepoints marked within the wav file.

    my $cues = $read -> get_cues();

Returns a reference to a hash containing;
(for example, a file marked up for use in Audio::Mix)
(position is sample position)

    {
        1 => {
            label    => 'sig',
            position => 764343,
            note     => 'first',
        },
        2 => {
            label    => 'fade_in',
            position => 1661774,
            note     => 'trig',
        },
        3 => {
            label    => 'sig',
            position => 18033735,
            note     => 'last',
        },
        4 => {
            label    => 'fade_out',
            position => 17145150,
            note     => 'trig',
        },
        5 => {
            label    => 'end',
            position => 18271676,
        }
    }

=cut

sub get_cues {
    my $self = shift;
    return unless exists $self -> {data} -> {cue};
    my $data = $self -> {data};
    my $cues = $data -> {cue};
    my $output = {};
    foreach my $id ( keys %{$cues} ) {
        my $pos = $cues -> {$id} -> {position};
        my $record = { 'position' => $pos };
        $record -> {label} = $data -> {labl} -> {$id} if ( exists $data -> {labl} -> {$id} );
        $record -> {note} = $data -> {note} -> {$id} if ( exists $data -> {note} -> {$id} );
        $output -> {$id} = $record;
    }
    return $output; 
}

=head2 read_raw

Reads raw packed bytes from the current audio data position in the file.

    my $data = $self -> read_raw( $byte_length );

=cut

sub read_raw {
    my $self = shift;
    my $len = shift;
    my $data_finish = $self -> {data} -> {data_finish};
    if ( $self -> {pos} + $len > $data_finish ) {
        $len = $data_finish - $self -> {pos};
    }
    return $self -> _read_raw( $len );
}

=head2 read_raw_samples

Reads raw packed samples from the current audio data position in the file.

    my $data = $self -> read_raw_samples( $samples );

=cut

sub read_raw_samples {
    my $self = shift;
    my $len = shift;
    $len *= $self -> {data} -> {block_align};
    return $self -> read_raw( $len );
}

sub _read_raw {
    my $self = shift;
    my $len = shift;
    my $data;
    return unless $len && $len > 0;
    $self -> {pos} += read $self -> {handle}, $data, $len;
    return $data; 
}

=head2 read

Returns the current audio data position sample across all channels.

    my @channels = $self -> read();

Returns an array of unpacked samples.
Each element is a channel i.e ( left, right ).
The numbers will be in the range;

    where $samp_max = ( 2 ** bits_per_sample ) / 2
    -$samp_max to +$samp_max 

=cut

# read is generated by _init_read_sub
sub read { die "ERROR: can't call read without first calling _init_read_sub"; };

sub _init_read_sub {
    my $self = shift;
    my $handle      = $self -> {handle};
    my $details     = $self -> {data};
    my $channels    = $details -> {channels};
    my $block       = $details -> {block_align};
    my $read_op;

    #TODO: we try to do something if we have bits_per_sample != multiple of 8?
    if ( $details -> {bits_sample} <= 8 ) {
        # Data in .wav-files with <= 8 bits is unsigned. >8 bits is signed
        my $offset = 2 ** ($details -> {bits_sample}-1);
        $read_op = q[ return map $_ - ] . $offset .
                   q[, unpack( 'C'.$channels, $val ); ];
    } elsif ( $details -> {bits_sample} == 16 ) {
        # 16 bits could be handled by general case below, but this is faster
        if ( $self -> {tools} -> is_big_endian() ) {
            $read_op = q[ return
                unpack 's' . $channels,        # 3. unpack native as signed short
                pack   'S' . $channels,        # 2. pack native unsigned short
                unpack 'v' . $channels, $val;  # 1. unpack little-endian unsigned short
            ];
        } else {
            $read_op = q[ return unpack( 's' . $channels, $val ); ];
        }
    } elsif ( $details -> {bits_sample} <= 32 ) {
        my $bytes  = $details -> {block_align} / $channels;
        my $fill   = 4 - $bytes;
        my $limit  = 2 ** ($details -> {bits_sample}-1);
        my $offset = 2 **  $details -> {bits_sample};
#warn "b: $bytes, f: $fill";    
        $read_op = q[ return 
            map    {$_ & ] . $limit . q[ ?           # 4. If sign bit is set
                    $_ - ] . $offset . q[ : $_}      #    convert to negative number
            unpack 'V*',                             # 3. unpack as little-endian unsigned long
            pack   "(a] . $bytes.'x'.$fill . q[)*",  # 2. fill with \0 to 4-byte-blocks and repack
            unpack "(a] . $bytes . q[)*", $val;      # 1. unpack to elements sized "$bytes"-bytes
         ];
#        $sub = sub 
#               { return  map    {$_ & $limit  ?          # 4. If sign bit is set
#                                 $_ - $offset : $_}      #    convert to negative number
#                         unpack 'V*',                    # 3. unpack as little-endian unsigned long
#                         pack   "(a${bytes}x${fill})*",  # 2. fill with \0 to 4-byte-blocks and repack
#                         unpack "(a$bytes)*", shift()    # 1. unpack to elements sized "$bytes"-bytes
#               };
    } else {
        $self->_error("Unpacking elements with more than 32 ($details->{bits_sample}) bits per sample not supported!");
    }

    $self -> {read_sub_string} = q[
        sub {
            my $val;
            $self -> {pos} += read( $handle, $val, $block );
            return unless defined $val;
            ] . $read_op . q[
        };
    ];
    if( $Audio::Wav::_has_inline ) {
        init( $handle, $details->{bits_sample}/8, $channels,
            $self -> {tools} -> is_big_endian() ? 1 : 0);
        *read = \&read_c;
    } else {
        my $read_sub = eval $self -> {read_sub_string};
        die "eval of read_sub failed: $@\n" if($@);
        $self -> {read_sub} = $read_sub; #in case any legacy code peaked at that
        *read = \&$read_sub;
    }
#warn $self -> {read_sub_string};
}

=head2 position

Returns the current audio data position (as byte offset).

    my $byte_offset = $read -> position();

=cut

sub position {
    my $self = shift;
    return $self -> {pos} - $self -> {data} -> {data_start};
}

=head2 position_samples

Returns the current audio data position (in samples).

    my $samples = $read -> position_samples();

=cut

sub position_samples {
    my $self = shift;
    return ( $self -> {pos} - $self -> {data} -> {data_start} ) / $self -> {data} -> {block_align};
}

=head2 move_to

Moves the current audio data position to byte offset.

    $read -> move_to( $byte_offset );

=cut

sub move_to {
    my $self = shift;
    my $pos = shift;
    my $data_start = $self -> {data} -> {data_start};
    if ( $pos ) {
	$pos = 0 if $pos < 0;
    } else {
	$pos = 0;
    }
    $pos += $data_start;
    if ( $pos > $self -> {pos} ) {
        my $max_pos = $self -> reread_length() + $data_start;
        $pos = $max_pos if $pos > $max_pos;
    }
    if ( seek $self -> {handle}, $pos, 0 ) {
	$self -> {pos} = $pos;
	return 1;
    } else {
	return $self -> _error( "can't move to position '$pos'" );
    }
}

=head2 move_to_sample

Moves the current audio data position to sample offset.

    $read -> move_to_sample( $sample_offset );

=cut

sub move_to_sample {
    my $self = shift;
    my $pos = shift;
    return $self -> move_to() unless defined $pos ;
    return $self -> move_to( $pos * $self -> {data} -> {block_align} );
}

=head2 length

Returns the number of bytes of audio data in the file.

    my $audio_bytes = $read -> length();

=cut

sub length {
    my $self = shift;
    return $self -> {data} -> {data_length};
}

=head2 length_samples

Returns the number of samples of audio data in the file.

    my $audio_samples = $read -> length_samples();

=cut

sub length_samples {
    my $self = shift;
    my $data = $self -> {data};
    return $data -> {data_length} / $data -> {block_align};
}

=head2 length_seconds

Returns the number of seconds of audio data in the file.

    my $audio_seconds = $read -> length_seconds();

=cut

sub length_seconds {
    my $self = shift;
    my $data = $self -> {data};
    return $data -> {data_length} / $data -> {bytes_sec};
}

=head2 details

Returns a reference to a hash of lots of details about the file.
Too many to list here, try it with Data::Dumper.....

    use Data::Dumper;
    my $details = $read -> details();
    print Data::Dumper->Dump([ $details ]);

=cut

sub details {
    my $self = shift;
    return $self -> {data};
}

=head2 reread_length

Rereads the length of the file in case it is being written to
as we are reading it.

    my $new_data_length = $read -> reread_length();

=cut

sub reread_length {
    my $self = shift;
    my $handle = $self -> {handle};
    my $old_pos = $self -> {pos};
    my $data = $self -> {data};
    my $data_start = $data -> {data_start};
    seek $handle, $data_start - 4, 0;
    my $new_length = $self -> _read_long();
    seek $handle, $old_pos, 0;
    $data -> {data_length} = $new_length;
    return $new_length; 
}

#########

sub _read_file {
    my $self = shift;
    my $handle = $self -> {handle};
    my %details;
    my $type = $self -> _read_raw( 4 );
    my $length = $self -> _read_long( );
    my $subtype = $self -> _read_raw( 4 );
    my $tools = $self -> {tools};
    my $old_cooledit = $tools -> is_oldcooledithack();
    my $debug = $tools -> is_debug();

    $details{total_length} = $length;

    unless ( $type eq 'RIFF' && $subtype eq 'WAVE' ) {
        return $self -> _error( "doesn't seem to be a wav file" );
    }

    my $walkover;  # for fixing cooledit 96 data-chunk bug

    while ( ! eof $handle && $self -> {pos} < $length ) {
        my $head;
        if ( $walkover ) {
            # rectify cooledit 96 data-chunk bug
            $head = $walkover . $self -> _read_raw( 3 );
            $walkover = undef;
            print "debug: CoolEdit 96 data-chunk bug detected!\n" if $debug;
        } else {
            $head = $self -> _read_raw( 4 );
        }
        my $chunk_len = $self -> _read_long();
        printf "debug: head: '$head' at %6d (%6d bytes)\n", $self->{pos}, $chunk_len if $debug;
        if ( $head eq 'fmt ' ) {
            my $format = $self -> _read_fmt( $chunk_len );
            my $comp = delete $format -> {format};
            if ( $comp == 65534 ) {
                $format -> {'wave-ex'} = 1;
            } elsif ( $comp != 1 ) {
                return $self -> _error( "seems to be compressed, I can't handle anything other than uncompressed PCM" );
            } else {
                $format -> {'wave-ex'} = 0;
            }
            %details = ( %details, %{$format} );
            next;
        } elsif ( $head eq 'cue ' ) {
            $details{cue} = $self -> _read_cue( $chunk_len, \%details );
            next;
        } elsif ( $head eq 'smpl' ) {
            $details{sampler} = $self -> _read_sampler( $chunk_len );
            next;
        } elsif ( $head eq 'LIST' ) {
            my $list = $self -> _read_list( $chunk_len, \%details );
            next;
        } elsif ( $head eq 'DISP' ) {
            $details{display} = $self -> _read_disp( $chunk_len );
            next;
        } elsif ( $head eq 'data' ) {
            $details{data_start} = $self -> {pos};
            $details{data_length} = $chunk_len;
        } else {
            $head =~ s/[^\w]+//g;
            $self -> _error( "ignored unknown block type: $head at $self->{pos} for $chunk_len", 'warn' );
        }

        seek $handle, $chunk_len, 1;
        $self -> {pos} += $chunk_len;

        # read padding
        if ($chunk_len % 2) {
            my $pad = $self->_read_raw(1);
            if ( ($pad =~ /\w/) && $old_cooledit && ($head eq 'data') ) {
                # Oh no, this file was written by cooledit 96...
                # This is not a pad byte but the first letter of the next head.
               $walkover = $pad;
            }
        }

        #unless ( $old_cooledit ) {
        #    $chunk_len += 1 if $chunk_len % 2; # padding
        #}
        #seek $handle, $chunk_len, 1;
        #$self -> {pos} += $chunk_len;

    }

    if ( exists $details{data_start} ) {
        $details{length} = $details{data_length} / $details{bytes_sec};
        $details{data_finish} = $details{data_start} + $details{data_length};
    } else {
        $details{data_start} = 0;
        $details{data_length} = 0;
        $details{length} = 0;
        $details{data_finish} = 0;
    }
    return \%details; 
}


sub _read_list {
    my $self = shift;
    my $length = shift;
    my $details = shift;
    my $note = $self -> _read_raw( 4 );
    my $pos = 4;

    if ( $note eq 'adtl' ) {
        my %allowed = map { $_ => 1 } qw( ltxt note labl );
        while ( $pos < $length ) {
            my $head = $self -> _read_raw( 4 );
            $pos += 4;
            if ( $head eq 'ltxt' ) {
                my $record = $self -> _decode_block( [ 1 .. 6 ] );
                $pos += 24;
            } else {
                my $bits = $self -> _read_long();
                $pos += $bits + 4;

                if ( $head eq 'labl' || $head eq 'note' ) {
                    my $id = $self -> _read_long();
                    my $text = $self -> _read_raw( $bits - 4 );
                    $text =~ s/\0+$//;
                    $details -> {$head} -> {$id} = $text; 
                } else {
                    my $unknown = $self -> _read_raw ( $bits ); # skip unknown chunk
                }
                if ($bits % 2) { # eat padding
                    my $padding = $self -> _read_raw(1);
                    $pos++;
                }
            }
        }
        # if it's a broken file and we've read too much then go back
        if ( $pos > $length ) {
            seek $self->{handle}, $length-$pos, 1;
        }
    }
    elsif ( $note eq 'INFO' ) {
        my %allowed = $self -> {tools} -> get_info_fields();
        while ( $pos < $length ) {
            my $head = $self -> _read_raw( 4 );
            $pos += 4;
            my $bits = $self -> _read_long();
            $pos += $bits + 4;
            my $text = $self -> _read_raw( $bits );
            if ( $allowed{$head} ) {
                $text =~ s/\0+$//;
                $details -> {info} -> { $allowed{$head} } = $text;
            }
            if ($bits % 2) { # eat padding
                my $padding = $self -> _read_raw(1);
                $pos++;
            }
        }
    } else {
        my $data = $self -> _read_raw( $length - 4 );
    }
}

sub _read_cue {
    my $self = shift;
    my $length = shift;
    my $details = shift;
    my $cues = $self -> _read_long();
    my @fields = qw( id position chunk cstart bstart offset );
    my @plain = qw( chunk );
    my $output;
    for ( 1 .. $cues ) {
        my $record = $self -> _decode_block( \@fields, \@plain );
        my $id = delete $record -> {id};
        $output -> {$id} = $record;
    }
    return $output; 
}

sub _read_disp {
    my $self = shift;
    my $length = shift;
    my $type = $self -> _read_long();
    my $data = $self -> _read_raw( $length - 4 + ($length%2) );
    $data =~ s/\0+$//;
    return [ $type, $data ];
}

sub _read_sampler {
    my $self = shift;
    my $length = shift;
    my %sampler_fields = $self -> {tools} -> get_sampler_fields();

    my $record = $self -> _decode_block( $sampler_fields{fields} );

    for my $id ( 1 .. $record -> {sample_loops} ) {
        push @{ $record -> {loop} }, $self -> _decode_block( $sampler_fields{loop} );
    }

    $record -> {sample_specific_data} = _read_raw( $record -> {sample_data} );

    my $read_bytes =
        9 * 4                                   # sampler info
        + 6 * 4 * $record -> {sample_loops}   # loops
        + $record -> {sample_data};           # specific data


    # read any junk
    if ($read_bytes < $length ) {
        my $junk = $self->_read_raw( $length - $read_bytes );
    }

    if ( $length % 2 ) {
        my $pad = $self -> _read_raw( 1 );
    }

    # temporary nasty hack to gooble the last bogus 12 bytes
    #my $extra = $self -> _decode_block( $sampler_fields{extra} );

    return $record; 
}


sub _decode_block {
    my $self = shift;
    my $fields = shift;
    my $plain = shift;
    my %plain;
    if ( $plain ) {
        foreach my $field ( @{$plain} ) {
            for my $id ( 0 .. $#{$fields} ) {
                next unless $fields -> [$id] eq $field;
                $plain{$id} = 1;
            }
        }
    }
    my $no_fields = scalar @{$fields};
    my %record;
    for my $id ( 0 .. $#{$fields} ) {
        if ( exists $plain{$id} ) {
            $record{ $fields -> [$id] } = $self -> _read_raw( 4 );
        } else {
            $record{ $fields -> [$id] } = $self -> _read_long();
        }
    }
    return \%record; 
}

sub _read_fmt {
    my $self = shift;
    my $length = shift;
    my $data = $self -> _read_raw( $length );
    my $types = $self -> {tools} -> get_wav_pack();
    my $pack_str = '';
    my $fields = $types -> {order};
    foreach my $type ( @{$fields} ) {
        $pack_str .= $types -> {types} -> {$type};
    }
    my @data = unpack $pack_str, $data;
    my %record;
    for my $id ( 0 .. $#{$fields} ) {
        $record{ $fields -> [$id] } = $data[$id];
    }
    return { %record };
}

sub _read_long {
    my $self = shift;
    my $data = $self -> _read_raw( 4 );
    return unpack 'V', $data; 
}

sub _error {
    my ($self, @args) = @_;
    return $self -> {tools} -> error( $self -> {file}, @args );
}

=head1 AUTHORS

    Nick Peskett (see http://www.peskett.co.uk/ for contact details).
    Brian Szymanski <ski-cpan@allafrica.com> (0.07-0.14)
    Wolfram humann (pureperl 24 and 32 bit read support in 0.09)
    Kurt George Gjerde <kurt.gjerde@media.uib.no>. (0.02-0.03)

=cut

1;

__DATA__

#ifdef WIN32
  // Note: if it becomes a problem that Visual Studio 6 and
  // Embedded Visual C++ 4 dont realize that char has the same
  // size as int8_t, check for #if (_MSC_VER < 1300) and use
  // signed __int8, unsigned __int16, etc. as in:
  // http://msinttypes.googlecode.com/svn/trunk/stdint.h
  typedef signed char       int8_t;
  typedef signed short      int16_t;
  typedef signed int        int32_t;
  typedef unsigned char     uint8_t;
  typedef unsigned short    uint16_t;
  typedef unsigned int      uint32_t;
#endif

//NOTE: 16, 32 bit audio do *NOT* work on big-endian platforms yet!
//verified formats (output is identical output to pureperl):
// 1 channel signed   16 little endian
// 2 channel signed   16 little endian
// 1 channel unsigned  8 little endian
// 2 channel unsigned  8 little endian
//verified "looks right" on these formats:
// 1 channel signed   32 little endian
// 2 channel signed   32 little endian
// 1 channel signed   24 little endian
// 2 channel signed   24 little endian

//maximum number of channels per audio stream
#define MAX_CHANNELS 10
//maximum number of bytes per sample (in one channel)
#define MAX_SAMPLE 4

FILE *handle;
int sample_size;
int channels;
int big_end;
int is_signed;
char buf[MAX_SAMPLE];
SV* retvals[MAX_CHANNELS];

void init(FILE *fh, int ss, int ch, int be) {
    int i;
    handle = fh;
    sample_size = ss;
    channels = ch;
    big_end = be;
    is_signed = (ss != 1); //TODO: is this really right?
    for(i=0; i<MAX_CHANNELS; i++) {
        retvals[i] = newSV(0);
    }
}

void read_c(void *self) {
    int samples[MAX_CHANNELS];
    int nread;
    int i, s;

    Inline_Stack_Vars;
    Inline_Stack_Reset;

    for(i=0; i<channels; i++) {
        // having fread in the loop is probably slightly less efficient,
        // but it avoids byte alignment problems and fread is buffered,
        // so it "shouldn't be a problem" (tm). more info:
        // http://www.eventhelix.com/RealtimeMantra/ByteAlignmentAndOrdering.htm
        nread = fread( buf, sample_size, 1, handle );
        if( !nread ) {
            if( feof( handle ) && i ) {
                perror("got EOF mid-sample!");
            } else if( ferror( handle ) ) {
                perror("io error");
            }
            break;
        }
        switch(sample_size) {
            case 4:
                if(big_end) {
                    s = buf[0]; buf[0] = buf[3]; buf[3] = s;
                    s = buf[1]; buf[1] = buf[2]; buf[2] = s;
                }
                s = is_signed ?
                    *((int32_t *)buf) :
                    *((uint32_t *)buf) - 0x7fffffff - 1;
                break;
            case 3:
                //TODO: test this!
                if(big_end) { s = buf[0]; buf[0] = buf[2]; buf[2] = s; }
                s = *((uint32_t *)buf);
                if(big_end) { s = (s & 0xffffff00) >> 8; }
                else        { s = s & 0x00ffffff; }
                //make negative via 2s compliment if data is signed
                //and the sign bit is set
                if ( is_signed ) {
                    if ( s & 0x00800000 ) {
                        s = -((~s & 0x00ffffff)+1);
                    }
                } else {
                    //we *always* return signed data
                    s += -0x800000;
                }
                break;
            case 2: 
                if(big_end) { s = buf[0]; buf[0] = buf[1]; buf[1] = s; }
                s = is_signed ?
                    *((int16_t *)buf) :
                    *((uint16_t *)buf) + -0x8000;
                break;
            case 1:
                //note: Audio::Wav *always* returns signed data
                s = is_signed ?
                    *((int8_t *)buf) :
                    *((uint8_t *)buf) + -0x80;
                break;
        }
        sv_setiv(retvals[i], s);
        Inline_Stack_Push(retvals[i]);
    }
    Inline_Stack_Done;
}