cpan/Encode/lib/Encode/MIME/Header.pm

package Encode::MIME::Header;
use strict;
use warnings;
no warnings 'redefine';

our $VERSION = do { my @r = ( q$Revision: 2.15 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use Encode qw(find_encoding encode_utf8 decode_utf8);
use MIME::Base64;
use Carp;

my %seed = (
    decode_b => '1',    # decodes 'B' encoding ?
    decode_q => '1',    # decodes 'Q' encoding ?
    encode   => 'B',    # encode with 'B' or 'Q' ?
    bpl      => 75,     # bytes per line
);

$Encode::Encoding{'MIME-Header'} =
  bless { %seed, Name => 'MIME-Header', } => __PACKAGE__;

$Encode::Encoding{'MIME-B'} = bless {
    %seed,
    decode_q => 0,
    Name     => 'MIME-B',
} => __PACKAGE__;

$Encode::Encoding{'MIME-Q'} = bless {
    %seed,
    decode_q => 1,
    encode   => 'Q',
    Name     => 'MIME-Q',
} => __PACKAGE__;

use parent qw(Encode::Encoding);

sub needs_lines { 1 }
sub perlio_ok   { 0 }

sub decode($$;$) {
    use utf8;
    my ( $obj, $str, $chk ) = @_;

    # zap spaces between encoded words
    $str =~ s/\?=\s+=\?/\?==\?/gos;

    # multi-line header to single line
    $str =~ s/(?:\r\n|[\r\n])[ \t]//gos;

    1 while ( $str =~
        s/(=\?[-0-9A-Za-z_]+\?[Qq]\?)(.*?)\?=\1(.*?\?=)/$1$2$3/ )
      ;    # Concat consecutive QP encoded mime headers
           # Fixes breaking inside multi-byte characters

    $str =~ s{
        =\?              # begin encoded word
        ([-0-9A-Za-z_]+) # charset (encoding)
        (?:\*[A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*)? # language (RFC 2231)
        \?([QqBb])\?     # delimiter
        (.*?)            # Base64-encodede contents
        \?=              # end encoded word
    }{
        if      (uc($2) eq 'B'){
            $obj->{decode_b} or croak qq(MIME "B" unsupported);
            decode_b($1, $3, $chk);
        } elsif (uc($2) eq 'Q'){
            $obj->{decode_q} or croak qq(MIME "Q" unsupported);
            decode_q($1, $3, $chk);
        } else {
            croak qq(MIME "$2" encoding is nonexistent!);
        }
    }egox;
    $_[1] = $str if $chk;
    return $str;
}

sub decode_b {
    my $enc  = shift;
    my $d    = find_encoding($enc) or croak qq(Unknown encoding "$enc");
    my $db64 = decode_base64(shift);
    my $chk  = shift;
    return $d->name eq 'utf8'
      ? Encode::decode_utf8($db64)
      : $d->decode( $db64, $chk || Encode::FB_PERLQQ );
}

sub decode_q {
    my ( $enc, $q, $chk ) = @_;
    my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc");
    $q =~ s/_/ /go;
    $q =~ s/=([0-9A-Fa-f]{2})/pack("C", hex($1))/ego;
    return $d->name eq 'utf8'
      ? Encode::decode_utf8($q)
      : $d->decode( $q, $chk || Encode::FB_PERLQQ );
}

my $especials =
  join( '|' => map { quotemeta( chr($_) ) }
      unpack( "C*", qq{()<>,;:"'/[]?=} ) );

my $re_encoded_word = qr{
    =\?                # begin encoded word
    (?:[-0-9A-Za-z_]+) # charset (encoding)
    (?:\*[A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*)? # language (RFC 2231)
    \?(?:[QqBb])\?     # delimiter
    (?:.*?)            # Base64-encodede contents
    \?=                # end encoded word
}xo;

my $re_especials = qr{$re_encoded_word|$especials}xo;

sub encode($$;$) {
    my ( $obj, $str, $chk ) = @_;
    my @line = ();
    for my $line ( split /\r\n|[\r\n]/o, $str ) {
        my ( @word, @subline );
        for my $word ( split /($re_especials)/o, $line ) {
            if (   $word =~ /[^\x00-\x7f]/o
                or $word =~ /^$re_encoded_word$/o )
            {
                push @word, $obj->_encode($word);
            }
            else {
                push @word, $word;
            }
        }
        my $subline = '';
        for my $word (@word) {
            use bytes ();
            if ( bytes::length($subline) + bytes::length($word) >
                $obj->{bpl} - 1 )
            {
                push @subline, $subline;
                $subline = '';
            }
            $subline .= ' ' if ($subline =~ /\?=$/ and $word =~ /^=\?/);
            $subline .= $word;
        }
        length($subline) and push @subline, $subline;
        push @line, join( "\n " => @subline );
    }
    $_[1] = '' if $chk;
    return join( "\n", @line );
}

use constant HEAD   => '=?UTF-8?';
use constant TAIL   => '?=';
use constant SINGLE => { B => \&_encode_b, Q => \&_encode_q, };

sub _encode {
    my ( $o, $str ) = @_;
    my $enc  = $o->{encode};
    my $llen = ( $o->{bpl} - length(HEAD) - 2 - length(TAIL) );

    # to coerce a floating-point arithmetics, the following contains
    # .0 in numbers -- dankogai
    $llen *= $enc eq 'B' ? 3.0 / 4.0 : 1.0 / 3.0;
    my @result = ();
    my $chunk  = '';
    while ( length( my $chr = substr( $str, 0, 1, '' ) ) ) {
        use bytes ();
        if ( bytes::length($chunk) + bytes::length($chr) > $llen ) {
            push @result, SINGLE->{$enc}($chunk);
            $chunk = '';
        }
        $chunk .= $chr;
    }
    length($chunk) and push @result, SINGLE->{$enc}($chunk);
    return @result;
}

sub _encode_b {
    HEAD . 'B?' . encode_base64( encode_utf8(shift), '' ) . TAIL;
}

sub _encode_q {
    my $chunk = shift;
    $chunk = encode_utf8($chunk);
    $chunk =~ s{
	   ([^0-9A-Za-z])
       }{
            join("" => map {sprintf "=%02X", $_} unpack("C*", $1))
       }egox;
    return HEAD . 'Q?' . $chunk . TAIL;
}

1;
__END__

=head1 NAME

Encode::MIME::Header -- MIME 'B' and 'Q' header encoding

=head1 SYNOPSIS

    use Encode qw/encode decode/;
    $utf8   = decode('MIME-Header', $header);
    $header = encode('MIME-Header', $utf8);

=head1 ABSTRACT

This module implements RFC 2047 Mime Header Encoding.  There are 3
variant encoding names; C<MIME-Header>, C<MIME-B> and C<MIME-Q>.  The
difference is described below

              decode()          encode()
  ----------------------------------------------
  MIME-Header Both B and Q      =?UTF-8?B?....?=
  MIME-B      B only; Q croaks  =?UTF-8?B?....?=
  MIME-Q      Q only; B croaks  =?UTF-8?Q?....?=

=head1 DESCRIPTION

When you decode(=?I<encoding>?I<X>?I<ENCODED WORD>?=), I<ENCODED WORD>
is extracted and decoded for I<X> encoding (B for Base64, Q for
Quoted-Printable). Then the decoded chunk is fed to
decode(I<encoding>).  So long as I<encoding> is supported by Encode,
any source encoding is fine.

When you encode, it just encodes UTF-8 string with I<X> encoding then
quoted with =?UTF-8?I<X>?....?= .  The parts that RFC 2047 forbids to
encode are left as is and long lines are folded within 76 bytes per
line.

=head1 BUGS

It would be nice to support encoding to non-UTF8, such as =?ISO-2022-JP?
and =?ISO-8859-1?= but that makes the implementation too complicated.
These days major mail agents all support =?UTF-8? so I think it is
just good enough.

Due to popular demand, 'MIME-Header-ISO_2022_JP' was introduced by
Makamaka.  Thre are still too many MUAs especially cellular phone
handsets which does not grok UTF-8.

=head1 SEE ALSO

L<Encode>

RFC 2047, L<http://www.faqs.org/rfcs/rfc2047.html> and many other
locations.

=cut
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)