package Palm::PalmDoc;
use strict;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
require Exporter;
@ISA = qw(Exporter);
@EXPORT = qw();
$VERSION = '0.13';
# Palm::PalmDoc Constructor
sub new {
my $proto = shift;
my $class = ref($proto) || $proto;
my $self = {};
$self->{TITLE} = "PalmDoc Document";
$self->{INFILE} = undef;
$self->{OUTFILE} = undef;
$self->{INFILEH} = undef;
$self->{OUTFILEH} = undef;
$self->{BODY} = undef;
$self->{COMPRESS} = 0;
$self->{BLOCK_SIZE} = [];
$self->{IGNORENL} = 0;
bless($self,$class);
if (@_)
{ my $ref = shift;
my %params = ();
if (ref $ref eq 'ARRAY')
{ %params = @{$ref}; }
if (ref $ref eq 'HASH')
{ %params = %{$ref}; }
if (ref $ref eq '')
{ unshift @_,$ref;
if (!(@_ % 2))
{ %params = @_; }
}
foreach (keys %params) { my $tkey = uc $_; my $tvalue = $params{$_}; delete $params{$_}; $params{$tkey} = $tvalue; }
$self->infile($params{INFILE}) if exists $params{INFILE};
$self->outfile($params{OUTFILE}) if exists $params{OUTFILE};
$self->title($params{TITLE}) if exists $params{TITLE};
$self->compression($params{COMPRESS}) if exists $params{COMPRESS};
$self->ignorenl($params{IGNORENL}) if exists $params{IGNORENL};
$self->body($params{BODY}) if exists $params{BODY};
$self->compressed(0);
}
return $self;
}
sub body {
my $self = shift;
if (@_) {
$self->{BODY} = shift;
if ($self->ignorenl)
{ my @body = split(/\n/, $self->{BODY});
my $sep = "";
$self->{BODY} = "";
foreach (@body)
{ if (/^\s*$/)
{ $self->{BODY} .= "\n";
$sep = "";
} else
{ $self->{BODY} .= "$sep$_";
$sep = " ";
}
}
if ($sep eq " ")
{ $self->{BODY} .= "\n"; }
}
$self->length(CORE::length $self->{BODY});
if ($self->compression && !$self->compressed) { $self->compressed(1); $self->{BODY} = $self->compr_text($self->{BODY}); }
}
return($self->{BODY});
}
sub length {
my $self = shift;
if (@_) { $self->{LENGTH} = shift; }
return($self->{LENGTH});
}
sub title {
my $self = shift;
if (@_) { $self->{TITLE} = shift; }
return($self->{TITLE});
}
sub compression {
my $self = shift;
if (@_) { $self->{COMPRESS} = shift @_ ? 1 : 0; }
return($self->{COMPRESS});
}
sub compressed {
my $self = shift;
if (@_) { $self->{COMPRESSED} = shift @_ ? 1 : 0; }
return($self->{COMPRESSED});
}
sub ignorenl {
my $self = shift;
if (@_) { $self->{IGNORENL} = shift @_ ? 1 : 0; }
return($self->{IGNORENL});
}
sub infile {
my $self = shift;
if (@_)
{ $self->{INFILE} = shift;
$self->{INFILE} =~ s/([;\`'\\\|"*~<>^\(\)\[\]\{\}\$\n\r\0\t\s])//g;
}
return($self->{INFILE});
}
sub outfile {
my $self = shift;
if (@_) {
$self->{OUTFILE} = shift;
$self->{OUTFILE} =~ s/([;\`'\\\|"*~<>^\(\)\[\]\{\}\$\n\r\0\t\s])//g;
}
return($self->{OUTFILE});
}
sub parse_from_file {
my $self = shift;
$self->infile(shift) if @_;
$self->outfile(shift) if @_;
}
sub parse_from_filehandle {
my $self = shift;
($self->{INFILEH},$self->{OUTFILEH}) = @_;
$self->{INFILEH} ||= \*STDIN;
$self->{OUTFILEH} ||= \*STDOUT;
}
sub read_text {
my $self = shift;
if ($self->infile)
{ open (IN, "<".$self->infile) || die "Can't open ".$self->infile.": $!\n";
{ local $/ = undef;
$self->body(<IN>);
}
close (IN);
if ($self->{INFILEH} && !$self->infile)
{ local $/ = undef;
$self->body(<INFILEH>);
}
$self->{INFILEH} and close($self->{INFILEH}) || die "Can't close input filehandle after reading: $!";
if ($self->compression && !$self->compressed) { $self->compressed(1); $self->body($self->compr_text($self->body)); }
return ($self->body);
} else { return(0); }
}
sub write_text {
my $self = shift;
if ($self->body)
{ if ($self->outfile)
{ open (OUT,">".$self->outfile) || die "Can't open ".$self->outfile.": $!\n";
binmode(OUT);
print OUT $self->pdb_header(),$self->body;
close (OUT);
}
if ($self->{OUTFILEH} && !$self->outfile)
{ binmode($self->{OUTFILEH});
my $foo = $self->{OUTFILEH};
print $foo $self->pdb_header,$self->body;
$self->{OUTFILEH} and close($self->{OUTFILEH}) || die "Can't close output filehandle after reading: $!";
}
return (1);
} else { return(0); }
}
sub pdb_header {
my $self = shift;
my $COUNT_BITS = 3;
my $DISP_BITS = 11;
my $DOC_CREATOR = "REAd";
my $DOC_TYPE = "TEXt";
my $RECORD_SIZE_MAX = 4096; # 4k record size
my $dmDBNameLength = 32; # 31 chars + 1 null
my $pdb_rec_offset; # PDB record offset
my $header_buff = ""; # Temporary buffer to build the headers in.
my $x;
my $y;
my $pdb_header_size = 78;
my $pdb_attributes = 0;
my $pdb_version = 0;
my $pdb_create_time = 0x11111111; # Palm Desktop demands
my $pdb_modify_time = 0x11111111; # a timestamp.
my $pdb_backup_time = 0;
my $pdb_modificationNumber = 0;
my $pdb_appInfoID = 0;
my $pdb_sortInfoID = 0;
my $pdb_type = $DOC_TYPE;
my $pdb_creator = $DOC_CREATOR;
my $pdb_id_seed = 0;
my $pdb_id_nextRecordList = 0;
my $pdb_numRecords = (int ($self->length / 4096)) + 2; # +1 for record 0
# +1 for fractional part
my $pdb_header = pack("a32nnNNNNNNa4a4NNn",substr($self->title,0,31)."\0",$pdb_attributes,
$pdb_version,$pdb_create_time,
$pdb_modify_time,$pdb_backup_time,
$pdb_modificationNumber,$pdb_appInfoID,
$pdb_sortInfoID,$pdb_type,$pdb_creator,
$pdb_id_seed,$pdb_id_nextRecordList,
$pdb_numRecords);
if ( (CORE::length $pdb_header) != 78) { die "pdb_header malformed\n"; }
my $doc_header_size = 16;
my $doc_version = 1 + $self->compression;
my $reserved1 = 0;
my $doc_doc_size = $self->length;
my $doc_rec_size = 4096;
my $doc_num_recs = (int ($self->length / 4096)) + 1;
my $doc_reserved2 = 0;
my $doc_header = pack("nnNnnN",$doc_version,$reserved1,$doc_doc_size,
$doc_num_recs,$doc_rec_size,$doc_reserved2);
if ( (CORE::length $doc_header) != 16) { die "doc_header malformed\n"; }
my $pdb_rec_header_size = 8;
my $pdb_rec_attributes = 0x40; # We'll fake this, 0x40 = 'dirty'
my $pdb_rec_uniqueID = 0x3D0; # Simple increment
my $pdb_rec_header_template = "Nccn";
$pdb_rec_offset = $pdb_header_size +
(($pdb_numRecords)* $pdb_rec_header_size) + 2;
$header_buff = $pdb_header . pack($pdb_rec_header_template,
$pdb_rec_offset, $pdb_rec_attributes,
ord('a'),$pdb_rec_uniqueID );
$pdb_rec_offset += $doc_header_size; # Add offset for doc_header
for ($x = 0; $x < $pdb_numRecords - 1; $x++) {
# if ($x > 0 )
# { $self->{BLOCK_SIZE}[$x] = $RECORD_SIZE_MAX; }
$pdb_rec_offset += $self->{BLOCK_SIZE}[$x];
++$pdb_rec_uniqueID;
$header_buff .= pack($pdb_rec_header_template,$pdb_rec_offset,
$pdb_rec_attributes,ord('a'),$pdb_rec_uniqueID);
}
$header_buff .= 0x00 . 0x00;
$header_buff .= $doc_header;
return ($header_buff);
}
sub compr_text {
my $self = shift;
my $total_compr_size = 0; # Final compressed text size
my $compr_buff = ""; # Temporary output buffer
my $numrecords = (int($self->{LENGTH} / 4096) +1); # Number of blocks to compress.
my $x;
my $y;
my $block_offset;
my $block; # Contains the current 4096 byte block of text
my $block_len; # Length of current block
my $index; # Current scan position in block
my $byte; # Char at index (for space + char compression)
my $byte2; # Char at index+1
my $test; # Potentially compressible text for
# LZ77 compression.
my $frag_size; # Current size of above
my $frag_size2; # Spare for lazy byte compression
my $test2; # spare for above
my $test3; # second spare
my $pos; # Position (in $block) of reference text
# for $test
# to compress against.
my $pos2; # spare for above
my $pos3; # second spare
my $back; # $index - pos
my $mask; # Bitwise mask to do LZ77 'magic'
my $compr_ratio; # Compression ratio
my $done;
my $comp_block_offset = 0; # The $compr_buff index
# block begins.
my $FRAG_MAX = 10; # Max LZ77 fragment size
my $FRAG_MIN = 3; # Min LZ77 fragment size
my $LAZY_BYTE_FRAG = $FRAG_MAX + $FRAG_MIN - 1;
$self->{BLOCK_SIZE}[0] = 0; # Record 0 is already written and
# is not compressed.
for ($x = 1; $x <= $numrecords; $x++) {
$block_offset = ($x - 1) * 4096;
$block = substr($_[0],$block_offset, 4096);
if ($x >= $numrecords) { # Last block
$block = substr($block,0,($self->{LENGTH} % 4096));
}
$block_len = CORE::length($block);
$index = 0;
while ( $index < $block_len ) {
$byte = substr($block,$index,1); # Char at $index
if ($byte =~ /[\200-\377]/) { # is high bit set?
$y = 1; # found at least one!
while ( (substr($block,$index + $y ,1) =~
/[\200-\377]/) &&
($y < 8) ) {
++$y; # If found, increment counter
}
$compr_buff .= chr($y); # Write escape code
$compr_buff .= substr($block,$index,$y); # Write text
$index += $y; # Increment the index
} else { # Real compression routines
$frag_size = $FRAG_MIN; # We don't care about anything less
$test = substr($block,$index,$frag_size); # pull the current fragment
$pos = rindex($block, $test, $index - 1); # check against the buffer
if ( ($pos > 0) &&
($index - $pos <= 2047) && # Inside our 2047 byte window
( $index < $block_len - $frag_size) ) {
for ($y = 4; $y <= $FRAG_MAX; $y++ ) {
++$frag_size ;
$test2 = substr($block,$index,$frag_size);
$pos2 = rindex($block, $test2, $index - 1);
if (($pos2 > 0) &&
($index - $pos2 <= 2047) &&
($index < $block_len - $frag_size) ) {
# found a match!
$pos = $pos2;
$test = $test2;
} else { # no match, go back
--$frag_size;
last;
}
}
# Sanity check
if ($frag_size > $FRAG_MAX)
{ die "frag_size too big!!!: $frag_size\n"; }
$frag_size2 = $frag_size + 2;
$test2 = substr($block,$index + 1, $frag_size2);
$pos2 = rindex($block, $test2, $index - 1);
if (($pos2 > 0) &&
($index - $pos2 <= 2047) &&
($index < $block_len - $frag_size2) ) {
for ($y = $frag_size2;$y <= $LAZY_BYTE_FRAG;
$y++ ) { # Look for more
++$frag_size2;
$test2 = substr($block,$index + 1, $frag_size2);
$pos2 = rindex($block, $test2, $index - 1);
if (($pos2 > 0) &&
($index - $pos2 <= 2047) &&
($index < $block_len - $frag_size2) ) {
# found a match!
} else { # no match, go back
--$frag_size2;
last;
}
}
if ($frag_size2 < $LAZY_BYTE_FRAG) {
$pos = 0;
$compr_buff .= substr($block,$index,1);
++$index;
}
}
if ($pos > 0) { # Did we abort the compression?
$back = $index - $pos;
$mask = 0x8000 | int($frag_size - 3);
$compr_buff .= pack("n",int($back << 3) | $mask);
$index += $frag_size;
}
} else {
$byte = substr($block,$index,1); # Char at $index
$byte2 = substr($block,$index + 1,1); # next char as well
if ( ($byte eq " ") &&
($byte2 =~ /[\100-\176]/ ) &&
($index <= $block_len - 1)) {
# Got a space + char
# Set the high bit
# and add to output
# buffer.
$compr_buff .= pack("C", ord ($byte2) | 0x80 );
$index += 2; # Compressed 2 bytes
} else {
$compr_buff .= $byte; # No compression
++$index;
}
}
}
}
$self->{BLOCK_SIZE}[$x] = (CORE::length ($compr_buff)) - $total_compr_size;
$total_compr_size = CORE::length ($compr_buff);
}
return ($compr_buff);
}
1;
__END__
=head1 NAME
Palm::PalmDoc - Perl extension for PalmDoc format
=head1 SYNOPSIS
# Example 1
use Palm::PalmDoc;
my $doc = Palm::PalmDoc->new({INFILE=>"foo.txt",OUTFILE=>"foo.pdb",TITLE=>"foo bar",COMPRESS=>1});
$doc->read_text();
$doc->write_text();
# Example 2
use Palm::PalmDoc;
my $doc = Palm::PalmDoc->new({OUTFILE=>"foo.pdb",TITLE=>"foo bar"});
$doc->compression(1);
$doc->body("Foo Bar"x100);
$doc->write_text();
# Example 3
use Palm::PalmDoc;
my $doc = Palm::PalmDoc->new(INFILE=>"README");
$doc->compression(1); #Compression is off by default
$doc->read_text();
open(F,">readme.pdb") || die $!;
print F $doc->pdb_header,$doc->body;
close(F);
# Example 4
use Palm::PalmDoc;
my $doc = Palm::PalmDoc->new();
$doc->parse_from_file("README");
open(F,">readme.pdb") || die $!;
$doc->parse_from_filehandle("",\*F);
$doc->compression(1); #Compression is off by default
$doc->read_text();
$doc->write_text();
=head1 DESCRIPTION
This module can format ASCII text into a PalmDoc PDB file.
Palm::PalmDoc provides the following functions :
=over 3
=item new(@params)
The constructor of Palm::PalmDoc. This function can accept parameters used to
generate the PalmDoc file. Parameters accepted are INFILE, OUTFILE, TITLE
and BODY. They need to be passed in hash context (or a list/array mimicking
a hash). A reference to a hash is also accepted, as well as a reference to
an array.
my $doc = Palm::PalmDoc->new({INFILE=>"foo.txt",OUTFILE=>"foo.pdb"});
is same as
my $doc = Palm::PalmDoc->new(INFILE=>"foo.txt",OUTFILE=>"foo.pdb");
or as
my $doc = Palm::PalmDoc->new("INFILE","foo.txt","OUTFILE","foo.pdb");
Keys are always uppercased (even though they may not be passed as such).
Possible keys are:
=back
=over 3
=item INFILE
=back
The input filename
=over 3
=item OUTFILE
=back
The output filename
=over 3
=item TITLE
=back
The document title
=over 3
=item BODY
=back
The document body
=over 3
=item COMPRESS
=back
Boolean to indicate compression
=over 3
=item IGNORENL
=back
Boolean to indicate to ingoring newlines.
=over 3
=item body($body)
=back
This is a plain getter/setter function except that it also sets the required
length. The same action can be performed by setting the appropriate hash
key/value pair in the constructor or by using the read_text function.
$doc->body("Foo Bar"x100);
=over 3
=item title($title)
=back
This is a plain getter/setter function for the title. The same action can be
performed by setting the appropriate hash key/value pair in the constructor.
$doc->title("Foo Bar Baz");
=over 3
=item infile($filename)
=back
This is a plain getter/setter function for the Input filename. The same
action can be performed by setting the appropriate hash key/value pair in
the constructor. If both an input file and an input filehandle are defined,
the input file is used.
$doc->infile("foo.txt");
=over 3
=item outfile($filename)
=back
This is a plain getter/setter function for the Output filename. The same
action can be performed by setting the appropriate hash key/value pair in
the constructor. If both an output file and an output filehandle are defined,
the output file is used.
$doc->outfile("foo.pdb");
=over 3
=item parse_from_file($inputfile,$outputfile)
=back
parse_from_file uses infile() and outfile() to set the filenames. If both an
input file and an input filehandle are defined, the input file is used. Same
applies for output file and output filehandle.
$doc->parse_from_file("foo.txt","foo.pdb");
=over 3
=item parse_from_filehandle($inputfilehandle,$outputfilehandle)
=back
parse_from_filehandle takes filehandle as arguments. When no input filehandle
is defined STDIN is used. When no output filehandle is defined STDOUT is used.
If both an input file and an input filehandle are defined, the input file is
used. Same applies for output file and output filehandle.
$doc->parse_from_filehandle(\*FOO,\*BAR);
=over 3
=item read_text()
=back
This function uses the inputfile property to read the body from a file. It
also sets the required length. This function returns the text read if
successfull or a false if not successfull.
$doc->read_text();
=over 3
=item write_text()
=back
This function uses the outputfile property to write the header and body to a
file. The headers are generated by the pdb_header function. This function
returns true if successfull or false if not successfull.
$doc->write_text();
=over 3
=item pdb_header()
=back
This function generates the correct PDB headers for the body and length.
You only need to use this function if you're writing the body to a file
manually since write_text() already used pdb_header. This function returns
the generated header which should precede the converted body. Writing to an
already opened filehandle can be done with parse_from_filehandle too.
use Palm::PalmDoc;
my $doc = Palm::PalmDoc->new();
$doc->body("Foo Bar"x1000);
$doc->title("Foo Bar Baz");
open(FOO,">foo.pdb") || die $!;
print FOO $doc->pdb_header(),$doc->body();
close(FOO);
=over 3
=item compression($boolean)
=back
This function toggles the compression. By default compression is off.
The same action can be performed by setting the appropriate hash
key/value pair in the constructor.
$doc->compression(1); #Turn PalmDoc Compression on
=over 3
=item ignorenl($boolean)
=back
This function toggles the ignoring the newlines. By default newlines are not
ignored. The same action can be performed by setting the appropriate hash
key/value pair in the constructor. Credit for this functionality goes to
Josef Moellers.
$doc->ignorenl(1); #Ignore newlines
=head1 THANK YOU!!!!
A HUGE thanks goes to Josef Moellers for fixing 2 BIG bugs in the code.
Thanks also to Scott Wiersdorf for adding warning cleanness.
Waves to Steve Swantz for pointing me to the typos in the POD and README.
John G. Smith for pointing out that titles can't be longer than 31 chars and providing fix for it.
=head1 TODO
Since my primary goal was to port the core, most of the features present in
Bibelot are not included.
=head1 DISCLAIMER
MOST of this code is borrowed from Bibelot (http://www.sourceforge.net/projects/bibelot/).
This code is released under GPL (GNU Public License). More information can be
found on http://www.gnu.org/copyleft/gpl.html
=head1 VERSION
This is Palm::PalmDoc 0.12.
=head1 AUTHOR
Hendrik Van Belleghem (beatnik@quickndirty.org)
=head1 SEE ALSO
Bibelot - http://www.sourceforge.net/projects/bibelot/
GNU & GPL - http://www.gnu.org/copyleft/gpl.html
=cut