The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Palm::PalmDoc;

use strict;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);

require Exporter;

@ISA = qw(Exporter);
@EXPORT = qw();
$VERSION = '0.13';

# Palm::PalmDoc Constructor

sub new {
 my $proto = shift;
 my $class = ref($proto) || $proto;
 my $self = {};
 $self->{TITLE} = "PalmDoc Document";
 $self->{INFILE} = undef;
 $self->{OUTFILE} = undef;
 $self->{INFILEH} = undef;
 $self->{OUTFILEH} = undef;
 $self->{BODY} = undef;
 $self->{COMPRESS} = 0;
 $self->{BLOCK_SIZE} = [];
 $self->{IGNORENL} = 0;
 bless($self,$class);
 if (@_) 
 { my $ref = shift;
   my %params = ();
   if (ref $ref eq 'ARRAY')
   { %params = @{$ref}; }
   if (ref $ref eq 'HASH')
   { %params = %{$ref}; }
   if (ref $ref eq '') 
   { unshift @_,$ref;
     if (!(@_ % 2)) 
     { %params = @_; }
   }
   foreach (keys %params) { my $tkey = uc $_; my $tvalue = $params{$_}; delete $params{$_}; $params{$tkey} = $tvalue; } 
   $self->infile($params{INFILE}) if exists $params{INFILE};
   $self->outfile($params{OUTFILE}) if exists $params{OUTFILE};
   $self->title($params{TITLE}) if exists $params{TITLE};
   $self->compression($params{COMPRESS}) if exists $params{COMPRESS};
   $self->ignorenl($params{IGNORENL}) if exists $params{IGNORENL};
   $self->body($params{BODY}) if exists $params{BODY};
   $self->compressed(0);
 }
 return $self;
}

sub body {
 my $self = shift;
 if (@_) { 
 $self->{BODY} = shift;
 if ($self->ignorenl) 
    { my @body = split(/\n/, $self->{BODY});
      my $sep = "";
      $self->{BODY} = "";
      foreach (@body) 
      { if (/^\s*$/)
        { $self->{BODY} .= "\n";
          $sep = "";
        } else 
        { $self->{BODY} .= "$sep$_";
          $sep = " ";
        }
      }
     if ($sep eq " ") 
     { $self->{BODY} .= "\n"; }
   }
 $self->length(CORE::length $self->{BODY});
 if ($self->compression && !$self->compressed) { $self->compressed(1); $self->{BODY} = $self->compr_text($self->{BODY}); }
 }
 return($self->{BODY});
}

sub length {
 my $self = shift;
 if (@_) { $self->{LENGTH} = shift; }
 return($self->{LENGTH});
}

sub title {
 my $self = shift;
 if (@_) { $self->{TITLE} = shift; }
 return($self->{TITLE});
}

sub compression {
 my $self = shift;
 if (@_) { $self->{COMPRESS} = shift @_ ? 1 : 0; }
 return($self->{COMPRESS});
}

sub compressed {
 my $self = shift;
 if (@_) { $self->{COMPRESSED} = shift @_ ? 1 : 0; }
 return($self->{COMPRESSED});
}

sub ignorenl {
 my $self = shift;
 if (@_) { $self->{IGNORENL} = shift @_ ? 1 : 0; }
 return($self->{IGNORENL});
}


sub infile {
 my $self = shift;
 if (@_) 
 { $self->{INFILE} = shift; 
   $self->{INFILE} =~ s/([;\`'\\\|"*~<>^\(\)\[\]\{\}\$\n\r\0\t\s])//g;
 }
 return($self->{INFILE});
}

sub outfile {
 my $self = shift;
 if (@_) { 
 $self->{OUTFILE} = shift; 
 $self->{OUTFILE} =~ s/([;\`'\\\|"*~<>^\(\)\[\]\{\}\$\n\r\0\t\s])//g; 
 }
 return($self->{OUTFILE});
}

sub parse_from_file {
 my $self = shift;
 $self->infile(shift) if @_;
 $self->outfile(shift) if @_;
}

sub parse_from_filehandle {
 my $self = shift;
 ($self->{INFILEH},$self->{OUTFILEH}) = @_;
 $self->{INFILEH} ||= \*STDIN;
 $self->{OUTFILEH} ||= \*STDOUT;
}

sub read_text {
 my $self = shift;
 if ($self->infile) 
 { open (IN, "<".$self->infile) || die "Can't open ".$self->infile.": $!\n";
   { local $/ = undef;
     $self->body(<IN>);
   }
   close (IN);
   if ($self->{INFILEH} && !$self->infile)
   { local $/ = undef;
     $self->body(<INFILEH>);
   }
   $self->{INFILEH} and close($self->{INFILEH}) || die "Can't close input filehandle after reading: $!";
   if ($self->compression && !$self->compressed) { $self->compressed(1); $self->body($self->compr_text($self->body)); }
   return ($self->body);
 } else { return(0); }
}

sub write_text {
 my $self = shift;
 if ($self->body) 
 { if ($self->outfile) 
   { open (OUT,">".$self->outfile) || die "Can't open ".$self->outfile.": $!\n";
     binmode(OUT);
     print OUT $self->pdb_header(),$self->body;	
     close (OUT);
   }
   if ($self->{OUTFILEH} && !$self->outfile) 
   { binmode($self->{OUTFILEH});
     my $foo = $self->{OUTFILEH};
     print $foo $self->pdb_header,$self->body;
     $self->{OUTFILEH} and close($self->{OUTFILEH}) || die "Can't close output filehandle after reading: $!";
   }
   return (1); 
  } else { return(0); }
}

sub pdb_header {
my $self = shift;
my $COUNT_BITS = 3;
my $DISP_BITS = 11;
my $DOC_CREATOR = "REAd";
my $DOC_TYPE = "TEXt";
my $RECORD_SIZE_MAX = 4096;	# 4k record size
my $dmDBNameLength = 32;	# 31 chars + 1 null

my $pdb_rec_offset;		# PDB record offset
my $header_buff = "";		# Temporary buffer to build the headers in.
my $x;
my $y;
my $pdb_header_size = 78;
my $pdb_attributes = 0;
my $pdb_version = 0;
my $pdb_create_time = 0x11111111;			# Palm Desktop demands
my $pdb_modify_time = 0x11111111;			# a timestamp.
my $pdb_backup_time = 0;
my $pdb_modificationNumber = 0;
my $pdb_appInfoID = 0;
my $pdb_sortInfoID = 0;
my $pdb_type = $DOC_TYPE;
my $pdb_creator = $DOC_CREATOR;
my $pdb_id_seed = 0;
my $pdb_id_nextRecordList = 0;
my $pdb_numRecords = (int ($self->length / 4096)) + 2;	# +1 for record 0
							# +1 for fractional part
						
my $pdb_header = pack("a32nnNNNNNNa4a4NNn",substr($self->title,0,31)."\0",$pdb_attributes,
					 $pdb_version,$pdb_create_time,
					 $pdb_modify_time,$pdb_backup_time,
					 $pdb_modificationNumber,$pdb_appInfoID,
					 $pdb_sortInfoID,$pdb_type,$pdb_creator,
					 $pdb_id_seed,$pdb_id_nextRecordList,
					 $pdb_numRecords);

if ( (CORE::length $pdb_header) != 78) { die "pdb_header malformed\n"; }

my $doc_header_size = 16;
my $doc_version = 1 + $self->compression;
my $reserved1 = 0;
my $doc_doc_size = $self->length;
my $doc_rec_size = 4096;
my $doc_num_recs = (int ($self->length / 4096)) + 1;	
my $doc_reserved2 = 0;

my $doc_header = pack("nnNnnN",$doc_version,$reserved1,$doc_doc_size,
			     $doc_num_recs,$doc_rec_size,$doc_reserved2);

if ( (CORE::length $doc_header) != 16) { die "doc_header malformed\n"; }

my $pdb_rec_header_size = 8;
my $pdb_rec_attributes = 0x40;		# We'll fake this, 0x40 = 'dirty'
my $pdb_rec_uniqueID = 0x3D0;		# Simple increment

my $pdb_rec_header_template = "Nccn";

	$pdb_rec_offset = $pdb_header_size + 
			  (($pdb_numRecords)* $pdb_rec_header_size) + 2;

	$header_buff = $pdb_header . pack($pdb_rec_header_template,
					  $pdb_rec_offset, $pdb_rec_attributes,
					  ord('a'),$pdb_rec_uniqueID );
	$pdb_rec_offset += $doc_header_size;	# Add offset for doc_header

	for ($x = 0; $x < $pdb_numRecords - 1; $x++) {	

#		if ($x > 0 ) 
#			{ $self->{BLOCK_SIZE}[$x] = $RECORD_SIZE_MAX; }
			
		$pdb_rec_offset += $self->{BLOCK_SIZE}[$x];
		++$pdb_rec_uniqueID;
		$header_buff .=	pack($pdb_rec_header_template,$pdb_rec_offset,
				     $pdb_rec_attributes,ord('a'),$pdb_rec_uniqueID);
	}
	
	$header_buff .= 0x00 . 0x00;

	$header_buff .= $doc_header;	

return ($header_buff);
}

sub compr_text {
my $self = shift;
my $total_compr_size = 0;		# Final compressed text size
my $compr_buff = "";			# Temporary output buffer
my $numrecords = (int($self->{LENGTH} / 4096) +1);	# Number of blocks to compress.
my $x;
my $y;
my $block_offset;
my $block;			# Contains the current 4096 byte block of text
my $block_len;			# Length of current block
my $index;			# Current scan position in block
my $byte;			# Char at index (for space + char compression)
my $byte2;			# Char at index+1
my $test;			# Potentially compressible text for 
				# LZ77 compression.

my $frag_size;			# Current size of above
my $frag_size2;			# Spare for lazy byte compression	
my $test2;			# spare for above
my $test3; 			# second spare				
my $pos;			# Position (in $block) of reference text 
				# for $test
				# to compress against.

my $pos2;			# spare for above
my $pos3;			# second spare
my $back;			# $index - pos
my $mask;			# Bitwise mask to do LZ77 'magic'
my $compr_ratio;		# Compression ratio
my $done;				
my $comp_block_offset = 0;	# The $compr_buff index
				# block begins.
my $FRAG_MAX = 10;		# Max LZ77 fragment size
my $FRAG_MIN = 3;		# Min LZ77 fragment size
my $LAZY_BYTE_FRAG = $FRAG_MAX + $FRAG_MIN - 1;

		
$self->{BLOCK_SIZE}[0] = 0;	# Record 0 is already written and 
				# is not compressed.
for ($x = 1; $x <= $numrecords; $x++) {

	$block_offset = ($x - 1) * 4096;
	$block = substr($_[0],$block_offset, 4096);
	if ($x >= $numrecords) {			# Last block
		$block = substr($block,0,($self->{LENGTH} % 4096));

	}
		
$block_len = CORE::length($block);	

$index = 0;

while ( $index < $block_len ) {

	$byte = substr($block,$index,1);	# Char at $index
	if ($byte =~ /[\200-\377]/) {   # is high bit set?

		$y = 1;			# found at least one!

		while ( (substr($block,$index + $y ,1)  =~ 
			      /[\200-\377]/) &&
			($y < 8) ) {

			++$y;		# If found, increment counter
				 	
		}			

		$compr_buff .= chr($y); # Write escape code
		$compr_buff .= substr($block,$index,$y); # Write text
		$index += $y;		# Increment the index		

	 } else { 			# Real compression routines

	$frag_size = $FRAG_MIN;		# We don't care about anything less

	$test = substr($block,$index,$frag_size); # pull the current fragment
	$pos = rindex($block, $test, $index - 1); # check against the buffer

	if ( ($pos > 0) &&		 	
	     ($index - $pos <= 2047) && 	# Inside our 2047 byte window
	     ( $index < $block_len - $frag_size) ) { 

		for ($y = 4; $y <= $FRAG_MAX; $y++ ) { 
			++$frag_size ;
			$test2 = substr($block,$index,$frag_size);
			$pos2 = rindex($block, $test2, $index - 1);
			if (($pos2 > 0) && 
			    ($index - $pos2 <= 2047) && 
			    ($index < $block_len - $frag_size) ) { 
						# found a match!
				$pos = $pos2;
				$test = $test2;
			} else {		# no match, go back
				--$frag_size;
				last;
				
			}
			 
		}
						# Sanity check		
		if ($frag_size > $FRAG_MAX) 
		  { die "frag_size too big!!!: $frag_size\n"; }	
		  
	   $frag_size2 = $frag_size + 2;
	   $test2 = substr($block,$index + 1, $frag_size2);
	   $pos2 = rindex($block, $test2, $index - 1);
	   if (($pos2 > 0) && 
		    ($index - $pos2 <= 2047) && 
		    ($index < $block_len - $frag_size2) ) { 

		   for ($y = $frag_size2;$y <= $LAZY_BYTE_FRAG; 
		        $y++ ) { 		# Look for more
			++$frag_size2;
			$test2 = substr($block,$index + 1, $frag_size2);
			$pos2 = rindex($block, $test2, $index - 1);
			if (($pos2 > 0) && 
			    ($index - $pos2 <= 2047) && 
			    ($index < $block_len - $frag_size2) ) { 
							# found a match!

			} else {			# no match, go back
				--$frag_size2;
			        last;
				
			}			    		       
		   }
		  if ($frag_size2 < $LAZY_BYTE_FRAG)  {	

		       $pos = 0;		
		       $compr_buff .= substr($block,$index,1);	
		       ++$index; 
		  }
	    }	  		
		
	   if ($pos > 0) {		# Did we abort the compression?
		
	      $back = $index - $pos;
	      $mask = 0x8000 | int($frag_size - 3);

	      $compr_buff .= pack("n",int($back << 3) | $mask);
	      $index += $frag_size;
	   }
	   
	} else {

		$byte = substr($block,$index,1);	# Char at $index
		$byte2 = substr($block,$index + 1,1);	# next char as well
		if ( ($byte eq " ") && 
		     ($byte2 =~ /[\100-\176]/ ) && 
		     ($index <= $block_len - 1)) {
		       					# Got a space + char
						
							# Set the high bit
							# and add to output 
							# buffer.
	         		$compr_buff .= pack("C", ord ($byte2) | 0x80 );
				$index += 2;		# Compressed 2 bytes
	
		} else {
			$compr_buff .= $byte;		# No compression
		     	++$index; 
		}
	}
}
}

$self->{BLOCK_SIZE}[$x] = (CORE::length ($compr_buff)) - $total_compr_size;
$total_compr_size = CORE::length ($compr_buff);

}

return ($compr_buff);	
}


1;
__END__

=head1 NAME

Palm::PalmDoc - Perl extension for PalmDoc format

=head1 SYNOPSIS

  # Example 1
  use Palm::PalmDoc;
  my $doc = Palm::PalmDoc->new({INFILE=>"foo.txt",OUTFILE=>"foo.pdb",TITLE=>"foo bar",COMPRESS=>1});
  $doc->read_text();
  $doc->write_text();

  # Example 2
  use Palm::PalmDoc;
  my $doc = Palm::PalmDoc->new({OUTFILE=>"foo.pdb",TITLE=>"foo bar"});
  $doc->compression(1);
  $doc->body("Foo Bar"x100);
  $doc->write_text();

  # Example 3
  use Palm::PalmDoc;
  my $doc = Palm::PalmDoc->new(INFILE=>"README");
  $doc->compression(1); #Compression is off by default
  $doc->read_text();
  open(F,">readme.pdb") || die $!;
  print F $doc->pdb_header,$doc->body;
  close(F);

  # Example 4
  use Palm::PalmDoc;
  my $doc = Palm::PalmDoc->new();
  $doc->parse_from_file("README");
  open(F,">readme.pdb") || die $!;
  $doc->parse_from_filehandle("",\*F);
  $doc->compression(1); #Compression is off by default
  $doc->read_text();
  $doc->write_text();


=head1 DESCRIPTION

This module can format ASCII text into a PalmDoc PDB file.

Palm::PalmDoc provides the following functions :

=over 3

=item new(@params)

The constructor of Palm::PalmDoc. This function can accept parameters used to 
generate the PalmDoc file. Parameters accepted are INFILE, OUTFILE, TITLE 
and BODY. They need to be passed in hash context (or a list/array mimicking 
a hash). A reference to a hash is also accepted, as well as a reference to 
an array.

  my $doc = Palm::PalmDoc->new({INFILE=>"foo.txt",OUTFILE=>"foo.pdb"});

is same as 

  my $doc = Palm::PalmDoc->new(INFILE=>"foo.txt",OUTFILE=>"foo.pdb");

or as 

  my $doc = Palm::PalmDoc->new("INFILE","foo.txt","OUTFILE","foo.pdb");

Keys are always uppercased (even though they may not be passed as such). 
Possible keys are:

=back

=over 3

=item INFILE

=back

  The input filename

=over 3

=item OUTFILE

=back

  The output filename

=over 3

=item TITLE

=back

  The document title

=over 3

=item BODY

=back

  The document body

=over 3

=item COMPRESS

=back

  Boolean to indicate compression

=over 3

=item IGNORENL

=back

  Boolean to indicate to ingoring newlines.

=over 3

=item body($body)

=back

This is a plain getter/setter function except that it also sets the required 
length. The same action can be performed by setting the appropriate hash 
key/value pair in the constructor or by using the read_text function.

  $doc->body("Foo Bar"x100);

=over 3

=item title($title)

=back

This is a plain getter/setter function for the title. The same action can be 
performed by setting the appropriate hash key/value pair in the constructor.

  $doc->title("Foo Bar Baz");


=over 3

=item infile($filename)

=back

This is a plain getter/setter function for the Input filename. The same 
action can be performed by setting the appropriate hash key/value pair in 
the constructor. If both an input file and an input filehandle are defined,
the input file is used.

  $doc->infile("foo.txt");

=over 3

=item outfile($filename)

=back

This is a plain getter/setter function for the Output filename.	The same 
action can be performed by setting the appropriate hash key/value pair in 
the constructor. If both an output file and an output filehandle are defined,
the output file is used.

  $doc->outfile("foo.pdb");


=over 3

=item parse_from_file($inputfile,$outputfile)

=back

parse_from_file uses infile() and outfile() to set the filenames. If both an 
input file and an input filehandle are defined, the input file is used. Same
applies for output file and output filehandle.


  $doc->parse_from_file("foo.txt","foo.pdb");

=over 3

=item parse_from_filehandle($inputfilehandle,$outputfilehandle)

=back

parse_from_filehandle takes filehandle as arguments. When no input filehandle 
is defined STDIN is used. When no output filehandle is defined STDOUT is used.
If both an input file and an input filehandle are defined, the input file is 
used. Same applies for output file and output filehandle.

  $doc->parse_from_filehandle(\*FOO,\*BAR);

=over 3

=item read_text()

=back

This function uses the inputfile property to read the body from a file. It 
also sets the required length. This function returns the text read if 
successfull or a false if not successfull.	

  $doc->read_text();

=over 3

=item write_text()

=back

This function uses the outputfile property to write the header and body to a 
file. The headers are generated by the pdb_header function. This function 
returns true if successfull or false if not successfull.

  $doc->write_text();

=over 3

=item pdb_header()

=back

This function generates the correct PDB headers for the body and length. 
You only need to use this function if you're writing the body to a file 
manually since write_text() already used pdb_header. This function returns 
the generated header which should precede the converted body. Writing to an
already opened filehandle can be done with parse_from_filehandle too.

  use Palm::PalmDoc;
  my $doc = Palm::PalmDoc->new();
  $doc->body("Foo Bar"x1000);
  $doc->title("Foo Bar Baz");
  open(FOO,">foo.pdb") || die $!;
  print FOO $doc->pdb_header(),$doc->body();
  close(FOO);

=over 3

=item compression($boolean)

=back

This function toggles the compression. By default compression is off.
The same action can be performed by setting the appropriate hash 
key/value pair in the constructor.

  $doc->compression(1); #Turn PalmDoc Compression on

=over 3

=item ignorenl($boolean)

=back

This function toggles the ignoring the newlines. By default newlines are not 
ignored. The same action can be performed by setting the appropriate hash 
key/value pair in the constructor. Credit for this functionality goes to 
Josef Moellers.

  $doc->ignorenl(1); #Ignore newlines


=head1 THANK YOU!!!!

A HUGE thanks goes to Josef Moellers for fixing 2 BIG bugs in the code.

Thanks also to Scott Wiersdorf for adding warning cleanness.

Waves to Steve Swantz for pointing me to the typos in the POD and README.

John G. Smith for pointing out that titles can't be longer than 31 chars and providing fix for it.

=head1 TODO

Since my primary goal was to port the core, most of the features present in
Bibelot are not included. 

=head1 DISCLAIMER

MOST of this code is borrowed from Bibelot (http://www.sourceforge.net/projects/bibelot/).
This code is released under GPL (GNU Public License). More information can be 
found on http://www.gnu.org/copyleft/gpl.html

=head1 VERSION

This is Palm::PalmDoc 0.12.

=head1 AUTHOR

Hendrik Van Belleghem (beatnik@quickndirty.org)

=head1 SEE ALSO

Bibelot - http://www.sourceforge.net/projects/bibelot/

GNU & GPL - http://www.gnu.org/copyleft/gpl.html

=cut