The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Python::Serialise::Pickle;

use strict;
use Data::Dumper;
use IO::File;
use vars qw($VERSION);

$VERSION   = '0.01';



my %_set_handlers = (
	'NUMBER' => \&_set_num,
	'STRING' => \&_set_string,
	'HASH'   => \&_set_dict,
	'ARRAY'  => \&_set_list,
);


my %_get_handlers = ( 
		'I' => \&_get_num,
		'L' => \&_get_num,
		'F' => \&_get_num,
		'S' => \&_get_string,
		'N' => \&_get_none,
		'l' => \&_get_list,
		'd' => \&_get_dict,
		'c' => \&_get_raw,
		'p' => \&_get_id,
		'i' => \&_get_raw,
		'(' => \&_get_compound,
);


=head1 NAME

Python::Serialise::Pickle - a file for reading and writing pickled Python files

=head1 SYNOPSIS


    use Python::Serialise::Pickle;

    my $pr = Python::Serialise::Pickle->new("file/for/reading");
    while (my $data = $pr->load()) {
          print Dumper $data;
    }

    my $pw = Python::Serialise::Pickle->new(">file/for/writing");
    
    $pw->dump(['a' 'list']);
    $pw->dump("a string");
    $pw->dump(42);
    $pw->dump({'a'=>'hash'});

    $pw->close();  

=head1 DESCRIPTION

Pickling is a method of serialising files in Python (another method, 
Marshalling, is also available).

This module is an attempt to write a pure Perl implementation of the algorithm.


=head1 METHODS

=head2 new <filename>

Open a file for reading or writing. Can take any arguments that C<IO::File> can.

=cut 

sub new {
	my $class = shift;
	my $file  = shift || die "You must pass a file\n";

	## FIXME error here
	my $fh    = IO::File->new($file) || die "Couldn't open file\n";
	my $self = { _fh => $fh };

	return bless $self, $class;
	
}

=head2 load

Returns the next data structure from the pickle file or undef.

=cut

sub load {
	my $self = shift;
	$self->{_cur_id} = 0;

	print "LOAD\n";

	my $id = $self->_get_char();
	return undef if (!defined $id or $id eq "");

	my $sub = $_get_handlers{$id}    || die "We have no handler to deal with '$id'\n";
	return $self->$sub();

}


=head2 dump <data structure>

Takes a ref to an array or a hash or a number or string and pickles it. 

Structures may be nested.

=cut

sub dump {
	my $self = shift;
	my $val  = shift;



	my $sub = $_set_handlers{$self->_type($val)};
	
	my $line = $self->$sub($val);
	$line .= ".";
	
	$self->_write($line);
	return $line;

}


sub _backup {
	my $self = shift;
	print "BACKUP\n";
	$self->{_fh}->seek(-1,1);
}


sub _get_char
{
	my $self = shift;
	$self->{_fh}->read(my $data, 1);
	print "C=$data\n";

	return $data;
}

sub _get_line {
	my $self = shift;
	my $line = "";

	while (1) {
		my $char = $self->_get_char();
		last unless defined $char;
		last if $char eq "\n";
		$line .= $char;
	}

	return $line;
}

sub _write {
	my $self = shift;
	my $val  = shift;

	$self->{_fh}->write($val);
}


sub _get_num 
{
	my $self = shift;
	my %opts = @_;

	my $num  = $self->_get_line();

	unless (defined $opts{'ignore_end_marker'} && $opts{'ignore_end_marker'} == 1) {
		$self->_get_char();
	}


	return $num;	
}

sub _set_num 
{
	my $self = shift;
	my $num  = shift;
	my %opts = @_;


	my $return;
	if (int $num != $num) {
		$return =  "F$num\n";
	} else {
		$return = "I$num\n";
	}

	 $return .= $opts{'terminator'} if ($opts{'terminator'});

	return $return;
}


sub _get_string 
{
	my $self = shift;
	my %opts = @_;


	my $string = $self->_get_line();
	$string =~ s!^(['"])(.*)['"]$!$2!;	
	$string =~ s!"!\\"!g;
	eval "\$string = \"$string\";";

	my $id     = $self->_get_id();
	die "No id!\n" unless defined $id;

	unless (defined $opts{'ignore_end_marker'} && $opts{'ignore_end_marker'} == 1) {
		$self->_get_char();
	}


	return $string;
	

}

sub _set_string
{
	my $self = shift;
	my $string = shift;
	my %opts = @_;

	# escape some control chars
	$string =~ s{
                (.)
            }{
                (ord($1)<33 || ord($1)>126)?sprintf '\\%.3o',ord($1):$1
            }sxeg;
	
	my $return = "S";
	if ($string =~ /^'.+'$/) {
		$return .= "\"$string\"\n";
	} else {
		$return .= "'$string'\n";
	}
	


	$return .= $self->_set_id();
	$return .= $opts{'terminator'} if ($opts{'terminator'});


	return $return;

}


sub _get_id {
	my $self = shift;
	my %opts = @_;

	my $char = $self->_get_char();
	die "Got $char - was expecting 'p' for id"  unless $char eq 'p';
	return $self->_get_line();
}

sub _set_id {
	my $self = shift;
	
	my $id = $self->{_cur_id}++;
	return "p$id\n";
}

sub _get_list {
	my $self = shift;
	my %opts = @_;

	print "GET LIST\n";

	my $oid   = $self->_get_id();
	my @vals;

	while (1) {	
		my $id = $self->_get_char();
		if ($id eq '.' || $id eq 's' || $id eq 'a') {
			last;
		}
		if ($id eq 'g') {
			my $tmp = $self->_get_line();
			last;
		}	

		if ($id eq 't') {
			$self->_backup;
			last;
		}

		
		my $sub = $_get_handlers{$id};
		print "ID=$id\n";
		$opts{'ignore_end_marker'}=0;
		die "No handler for '$id' in get_list ",(join ",",@vals),""  unless defined $sub;
		push @vals, $self->$sub(%opts);
		

	}	
	print "END LIST\n";
	return \@vals;
}

sub _set_list {
	my $self = shift;
	my $list = shift;
	my %opts = @_;
	
	my $terminator = $opts{'terminator'} || "";

	my $return = "";
	$return   .= "(" unless ($opts{ignore_compound});
	$return   .= "l";
	$return   .= $self->_set_id();	

	$opts{'terminator'} = 'a';

	foreach my $val (@$list) {
		my $sub = $_set_handlers{$self->_type($val)};
                die "No handler to set '$val'" unless defined $sub;
                $return .= $self->$sub($val, %opts);

	}

	$return .= $terminator;
	return $return;

}



sub _get_compound {
        my $self = shift;
	my %opts = @_;


	my $id = $self->_get_char();
	
	if ($id eq 'l') { 
		$self->_get_list(%opts);
	} elsif ($id eq 'd') { 
		$self->_get_dict(%opts);
	} else {
		$self->_backup();
		$self->_get_tuple(%opts);
	}
}


sub _get_tuple {
	my $self = shift;
	my %opts = @_;
	my $last_id = $opts{'last_tuple_marker'} || ".";

	my @vals;

	print "GET TUPLE\n";	

        while (1) {
                my $id = $self->_get_char();
	   	if ($id eq '.' || $id eq 'a' || $id eq 's') {
                        last;
                }

		if ($id eq 'g') {
			$self->_get_line();
			next;
		}
		
		if ($id eq 't') {
			$self->_get_id();
			print "Getting ID\n";
			if ($opts{'ignore_end_marker'}) {	
				last;
			} else {
				next;
			}
		}
	        
                my $sub = $_get_handlers{$id};
                die "No handler for type '$id'" unless defined $sub;
                push @vals, $self->$sub(ignore_end_marker=>1);
                
        }
	print "END TUPLE\n";
        return \@vals;
}


sub _get_dict
{
	my $self = shift;
	my %opts = @_;
	my %dict;

	print "GET DICT\n";
	print "IGNORE END MARKER = ",$opts{'ignore_end_marker'},"\n";
	#$opts{'ignore_end_marker'}=1;

		
	my $id = $self->_get_id();



	while (1) {
                my $key_id = $self->_get_char();
		
		


		if ($key_id eq '.')  {
			$self->_backup() if $opts{'ignore_end_marker'};
			last;
		}
		last if ($key_id eq 's' || $key_id eq 'a');
			
		if ($key_id eq 'g') {
			my $tmp = $self->_get_line();
                        next;
		}


                my $key_sub = $_get_handlers{$key_id};
                die "No handler for key '$key_id'" unless defined $key_sub;
		
		my $key = $self->$key_sub( ignore_end_marker => 1);
		
		print "GOT KEY\n";		
			
                my $val_id = $self->_get_char();
                my $val_sub = $_get_handlers{$val_id};
                die "No handler for value '$val_id'" unless defined $val_sub;
		my $val =  $self->$val_sub(%opts);

		$dict{$key} = $val;		



	}
	print "END DICT\n";
	

	return \%dict;

}

sub _set_dict 
{
	my $self = shift;
	my $hash  = shift;
        my %opts = @_;

	my $return = "";
        $return   .= "(";
        $return   .= "d";
        $return   .= $self->_set_id();
	

	$opts{'ignore_compound'}   = 0;
	$opts{'ignore_end_marker'} = 1;
	$opts{'terminator'}        = "";

	foreach my $key (keys %{$hash}) {
		my $val = $hash->{$key};
		
		my $keysub = $_set_handlers{$self->_type($key)};
                die "No handler for setting key '$key'" unless defined $keysub;
                $return .= $self->$keysub($key, %opts);

		my $valsub = $_set_handlers{$self->_type($val)};
                die "No handler for setting val '$val'" unless defined $valsub;
                $return .= $self->$valsub($val, %opts);
				

		$return .= "s";
	}	

	return $return;

}





sub _get_none {
	my $self = shift;
	return $self->_get_raw;

}

sub _get_raw {
	my $self = shift;
	$self->_backup;


	my $val = "";
	while (1) {
		my $char = $self->_get_char();	
		last if ($char eq ".");
		$val .= $char;
		$val .= $self->_get_line();
	}
	return $val;
		
}

sub _type {
	my $self = shift;
	my $val = shift;

	my $ref = ref $val;

	return $ref if defined $ref && $ref ne "";

	return "NUMBER" if ($val =~ /^-?(?:\d+(?:\.\d*)?|\.\d+)$/);
	return "STRING";

}


=head2 close

Closes the current file.

=cut 

sub close {
	my $self = shift;
	$self->{_fh}->close();
}

sub DESTROY {
	my $self = shift;
	$self->close();
}


=head1 BUGS

Almost certainly lots and lots.

=over 4

=item Serialised objects

At the moment we don't deal with serialised objects very well. 
Should probably just take or return a Python::Serialise::Pickle::Object 
object. 

=item The 'None' object

Similar to Perl's undef but an object. At the moment we deal with it badly 
because if we returned undef then that would signify the end of the Pickle file.

Should probably be returned as a special object or something.

=item Longs

There's no testing for longs

=item Unicode

Ditto

=item Some nested dictionaries

Dictionaries are the Python equivalent of hashes. This module can deal with most nested 
dictionaries but, for some reason, this one :

	a={'a':['two',{'goof':'foo', 'a':[1,2,3]}]}

causes it to fail.

Chnaging it slightly starts it working again.

=item Bad reading of specs

This is entirely my fault

=back

=head1 ALTERNATIVES

You could always dump the data structure out as YAML in Python
and then read it back in with YAML in Perl.

=head1 AUTHOR

Simon Wistow <simon@thegestalt.org>

=head1 COPYRIGHT

(c) 2003 Simon Wistow

Distributed under the same terms as Perl itself.

This software is under no warranty and will probably ruin your life,
kill your friends, burn your house and bring about the apocalypse.

=head1 SEE ALSO

http://www.python.org, L<YAML>, L<IO::File> and the RESOURCES file in 
this distribution.

=cut

1;