The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
##########################################################################
## All portions of this code are copyright (c) 2003,2004 nethype GmbH   ##
##########################################################################
## Using, reading, modifying or copying this code requires a LICENSE    ##
## from nethype GmbH, Franz-Werfel-Str. 11, 74078 Heilbronn,            ##
## Germany. If you happen to have questions, feel free to contact us at ##
## license@nethype.de.                                                  ##
##########################################################################

=head1 NAME

PApp::Recode - convert bytes from one charset to another

=head1 SYNOPSIS

 use PApp::Recode;
 # not auto-imported into .papp-files

 $converter = to_utf8 PApp::Recode "iso-8859-1";
 $converter->("string");

=head1 DESCRIPTION

This module creates conversion functions that enable you to convert text
data from one character set (and/or encoding) to another.

=cut

package PApp::Recode;

use Convert::Scalar ();

BEGIN {
   $VERSION = 2.0;

   require XSLoader;
   XSLoader::load 'PApp::Recode', $VERSION;
}

=head2 Functions

=over 4

=item charset_valid $charset

Returns a boolean indicating wether the named charset is valid on this
system (where "valid" is defined as "can be converted from/to UTF-8 using
this module").

=cut

# cache the results, some frequently used charsets are always supported
my %charset_valid = ( "iso-8859-1" => 1, "utf-8" => 1, "ascii" => 1 );

sub charset_valid {
   unless (exists $charset_valid{$_[0]}) {
      $charset_valid{$cs} = eval {
         PApp::Recode::Pconv::open($cs, "utf-8");
         PApp::Recode::Pconv::open("utf-8", $cs);
         1;
      };
   }
   $charset_valid{$cs};
}

=back

=cut

=head2 The PApp::Recode Class

This class can be used to convert textual data between various encodings.

=over 4

=item $converter = new PApp::Recode "destination-charset", "source-charset" [, \&fallback]

Returns a new conversion function (a code reference) that converts its
argument from the source character set into the destination character set
each time it is called (it does remember state, though. A call without
arguments resets the state).

Perl's internal UTF-8-flag is ignored on input and not set on output.

Example: create a converter that converts UTF-8 into ascii, html-escaping
any non-ascii characters:

   new PApp::Recode "ascii", "utf-8", sub { sprintf "&#x%x;", $_[0] };

=item $converter = to_utf8 PApp::Recode "source-character-set" [, \&fallback]

Similar to a call to C<new> with the first argument equal to "utf-8". The
returned conversion function will, however, forcefully set perl's UTF-8
flag on the returned scalar.

=item $converter = utf8_to PApp::Recode "destination-character-set" [, \&fallback]

Similar to a call to C<new> with the second argument equal to "utf-8". The
returned conversion function will, however, upgrade its argument to UTF-8.

=cut

sub new($$$;$) {
   my $self = shift;
   my ($to, $from, $fb) = @_;
   my $pconv = PApp::Recode::Pconv::open($to, $from, $fb);
   $pconv && sub {
      unshift @_, $pconv;
      &PApp::Recode::Pconv::convert;
   };
}

sub to_utf8($$;$) {
   my $self = shift;
   my $converter = $self->new("utf-8", $_[0], $_[1]);
   sub {
      Convert::Scalar::utf8_on &$converter;
   };
}

sub utf8_to($$;$) {
   my $self = shift;
   my $converter = $self->new($_[0], "utf-8", $_[1]);
   sub {
      Convert::Scalar::utf8_upgrade($_[0]) if @_;
      &$converter;
   };
}

=back

=head2 The PApp::Recode::Pconv Class

This is the class that actually implements character conversion. It should not be used directly.

=cut

=over 4

=item new PApp::Recode::Pconv tocode, fromcode [, fallback]

Create a new encoding convert that encodes from C<fromcode> to C<tocode>,
optionally invoking the C<fallback> code reference for every character
that is not representable in the target encoding.

=item PApp::Recode::Pconv::open tocode, fromcode [, fallback]

Convenience function that calls C<new> and returns the converter.

=item $pconv->convert($string [, reset])

Convert the given string. If C<reset> is true, the internal multibyte
state is reset before conversion.

=item $pconv->reset

Reset the multibyte state.

=item $pconv->convert_fresh ($string)

Same as C<convert> with C<reset> set to true.

=cut

=back

=head1 SEE ALSO

L<PApp>.

=head1 AUTHOR

 Marc Lehmann <schmorp@schmorp.de>
 http://home.schmorp.de/

=cut

1;