The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# -*- perl -*-

package Encode::MIME::EncWords;
require 5.007003;

use strict;
use warnings;
use Carp qw(croak carp);
use MIME::EncWords;

our $VERSION = '0.03';

# Default of options
my $Config = {
    Charset => 'UTF-8',
    # Encoding => specified by each subclass.
    # Folding => fixes to "\n".
    # Replacement => given by encode()/decode().
    # others => derived from MIME::EncWords:
    map { ($_ => $MIME::EncWords::Config->{$_}) }
	qw(Detect7bit Field Mapping MaxLineLen Minimal)
};

$Encode::Encoding{'MIME-EncWords'} = bless {
    Encoding => 'A',
    Name     => 'MIME-EncWords',
} => __PACKAGE__;

$Encode::Encoding{'MIME-EncWords-B'} = bless {
    Encoding => 'B',
    Name     => 'MIME-EncWords-B',
} => __PACKAGE__;

$Encode::Encoding{'MIME-EncWords-Q'} = bless {
    Encoding => 'Q',
    Name     => 'MIME-EncWords-Q',
} => __PACKAGE__;

$Encode::Encoding{'MIME-EncWords-ISO_2022_JP'} = bless {
    Charset  => 'ISO-2022-JP',
    Encoding => 'B',
    Name     => 'MIME-EncWords-ISO_2022_JP',
} => __PACKAGE__;

use base qw(Encode::Encoding);

sub needs_lines { 1 }
sub perlio_ok   { 0 }

sub decode($$;$) {
    my ($obj, $str, $chk) = @_;

    my %opts = map { ($_ => ($obj->{$_} || $Config->{$_})) }
        qw(Detect7bit Mapping);
    $chk = 0 if ref $chk; # coderef not supported.
    my $repl = ($chk & 4) ? ($chk & ~4 | 1) : $chk;

    local $@;
    my $skip = 0; # for RETURN_ON_ERR
    my $ret = undef;
    pos($str) = 0;
    foreach my $line (
	$str =~ m{ \G (.*?) (?:\r\n|[\r\n]) (?![ \t]) }cgsx,
	substr($str, pos($str))
    ) {
	if (defined $ret) {
	    $ret .= "\n" unless $skip;
	} else {
	    $ret = '';
	}
	if ($skip) {
	    $_[1] .= "\n";
	    $_[1] .= $line;
	    next;
	}
	next unless length $line;

	my @words = MIME::EncWords::decode_mimewords($line, %opts);
	if ($@) { # broken MIME encoding.
	    croak $@ if $chk & 1;   # DIE_ON_ERR
	    carp $@ if $chk & 2;    # WARN_ON_ERR
	    if ($chk & 4) {         # RETURN_ON_ERR
		$_[1] = $line;
		$skip = 1;
		next;
	    }
	}
	for (my $i = 0; $i <= $#words; $i++) {
	    my $word = $words[$i];
	    my $cset = MIME::Charset->new(($word->[1] || 'US-ASCII'),
					  Mapping => $opts{Mapping});
	    if (! $cset->decoder) { # unknown charset or ``8BIT''.
		$@ = 'Unknown charset "'.$cset->as_string.'"';
		croak $@ if $chk & 1;
		carp $@ if $chk & 2;
		if ($chk & 4) {
		    # already decoded... re-encoding
		    $_[1] =
			MIME::EncWords::encode_mimewords([splice @words, $i],
							 Encoding => 'B',
							 Folding => '',
							 MaxLineLen => -1);
		    $skip = 1;
		    last;
		}
		$ret .= Encode::decode("ISO-8859-1", $word->[0], 0); #FIXME

		next;
	    }
	    eval {
		$ret .= $cset->decode($word->[0], $repl);
	    };
	    if ($@) {
		$@ =~ s/ at .+? line \d+[.\n]*$//; 
		croak $@ if $chk & 1;
		carp $@ if $chk & 2;
		if ($chk & 4) {
		    # already decoded... re-encoding
		    $_[1] =
			MIME::EncWords::encode_mimewords([splice @words, $i],
							 Encoding => 'B',
							 Folding => '',
							 MaxLineLen => -1);
		    $skip = 1;
		    last;
		}
	    }
	}
    }

    if ($chk & 4) { # RETURN_ON_ERR
	$_[1] = '' unless $skip;
    } elsif ($chk) { # ! LEAVE_SRC
	$_[1] = $ret unless $chk & 8;
    }
    return $ret;
}

sub encode($$;$) {
    my ($obj, $str, $chk) = @_;

    my %opts = map { ($_ => ($obj->{$_} || $Config->{$_})) }
        qw(Charset Detect7bit Encoding Field Mapping MaxLineLen Minimal);
    $opts{Charset} ||= 'UTF-8';
    $opts{Folding} = "\n";
    $chk = 0 if ref $chk; # coderef not supported.
    my $repl = ($chk & 4) ? ($chk & ~4 | 1) : $chk;

    $str = Encode::decode('ISO-8859-1', $str)
        if ! Encode::is_utf8($str) and $str =~ /[^\x00-\x7F]/;

    local $@;
    my $skip = 0; # for RETURN_ON_ERR
    my $ret = undef;
    pos($str) = 0;
    foreach my $line (
        $str =~ m{ \G (.*?) (?:\r\n|[\r\n]) (?![ \t]) }cgsx,
        substr($str, pos($str))
    ) {
	if (defined $ret) {
	    $ret .= "\n" unless $skip;
	} else {
	    $ret = '';
	}
	if ($skip) {
	    $_[1] .= "\n";
	    $_[1] .= $line;
	    next;
	}
	next unless length $line;

	eval {
	    $ret .= MIME::EncWords::encode_mimewords($line, %opts,
						     Replacement => $repl);
	};
	if ($@) {
	    $@ =~ s/ at .+? line \d+[.\n]*$//;
	    croak $@ if $chk & 1;   # DIE_ON_ERR
	    carp $@ if $chk & 2;    # WARN_ON_ERR
	    if ($chk & 4) {         # RETURN_ON_ERR
		$_[1] = $line;
		$skip = 1;
		next;
	    }
	}
    }

    if ($chk & 4) { # RETURN_ON_ERR
	$_[1] = '' unless $skip;
    } elsif ($chk) { # ! LEAVE_SRC
	$_[1] = '' unless $chk & 8; # FIXME:spec?
    } 
    return $ret;
}

sub config {
    my $klass = shift if scalar @_ % 2;
    my %opts = @_;
    foreach my $key (keys %opts) {
        croak "Unknown config option: $key" unless exists $Config->{$key};
        $Config->{$key} = $opts{$key};
    }
}

1;
__END__

=head1 NAME

Encode::MIME::EncWords -- MIME 'B' and 'Q' header encoding (alternative)

=head1 SYNOPSIS

    use Encode::MIME::EncWords;
    use Encode qw/encode decode/;
    
    # decode header:
    $utf8   = decode('MIME-EncWords', $header);
    
    # encode header with default charset, UTF-8:
    $header = encode('MIME-EncWords', $utf8);
    
    # encode header with another charset:
    Encode::MIME::EncWords->config(Charset => 'GB2312');
    $header = encode('MIME-EncWords', $utf8);

=head1 ABSTRACT

This module implements MIME header encoding described in RFC 2047.
There are three variant encoding names and one shorthand special to a
charset:

  Encoding name              Result of encode()     Comment
  -------------------------------------------------------------------
  MIME-EncWords              (auto-detect B or Q)
  MIME-EncWords-B            =?XXXX?B?...?=         Default is UTF-8.
  MIME-EncWords-Q            =?XXXX?Q?...?=                ,,
  MIME-EncWords-ISO_2022_JP  =?ISO-2022-JP?B?...?=

All encodings generate the same result by decode().

=head1 DESCRIPTION

This module is intended to be an alternative of C<MIME-*> encodings
provided by L<Encode::MIME::Header> core module.
To find out how to use this module in detail, see L<Encode>.

=head2 Module specific feature

=over 4

=item config(KEY => VALUE, ...);

I<Class method.>
Set options by KEY => VALUE pairs.
Following options are available.

=over 4

=item Charset

[encode] Name of character set by which data elements will be converted.
Default is C<"UTF-8">.
On C<MIME-EncWords-ISO_2022_JP> it is fixed to C<"ISO-2022-JP">.

=item Detect7bit

[decode/encode] Try to detect 7-bit charset on unencoded portions.
Default is C<"YES">.

=item Field

[encode] Name of the header field which will be considered on the first line
of encoded result in its length.
Default is C<undef>.

=item Mapping

[decode/encode] Specify mappings actually used for charset names.
Default is C<"EXTENDED">.

=item MaxLineLen

[encode] Maximum line length excluding newline.
Default is C<76>.

=item Minimal

[encode] Whether to do minimal encoding or not.
Default is C<"YES">.

=back

For more details about options see L<MIME::EncWords>.

=back

=head1 CAVEAT

=over 4

=item *

The encoding modules for MIME header encoding are not the
magic porridge pot to cook complex header fields properly.

To decode address header fields (From:, To:, ...), at first parse
mailbox-list; then decode each element by encoding module.
To encode them, at first encode each element by encoding module; then
construct mailbox-list of encoded elements.
To construct or parse mailbox-list, some modules such as L<Mail::Address>
may be used.

=item *

Lines are delimited with LF (C<"\n">).
RFC5322 states that lines in Internet messages are delimited with
CRLF (C<"\r\n">).

=back

=head1 BUGS

Please report bugs or buggy behaviors to developer.

CPAN Request Tracker:
L<http://rt.cpan.org/Public/Dist/Display.html?Name=MIME-EncWords>.

=head1 VERSION

Consult C<$VERSION> variable.

B<This is experimental release>.
Features might be changed in the near future.

Development versions of this package may be found at
L<http://hatuka.nezumi.nu/repos/MIME-EncWords/>.

=head1 SEE ALSO

L<Encode>, L<Encode::MIME::Header>, L<MIME::EncWords>.

RFC 2047 I<MIME (Multipurpose Internet Mail Extensions) Part Three:
Message Header Extensions for Non-ASCII Text>.

=head1 AUTHOR

Hatuka*nezumi - IKEDA Soji <hatuka(at)nezumi.nu>

=head1 COPYRIGHT

Copyright (C) 2011 Hatuka*nezumi - IKEDA Soji.

This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.

=cut