The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# Copyright (C) 2013 MURATA Yasuhisa
# This program is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.

package MIME::EcoEncode::Fold;

use 5.008005;
use strict;
use warnings;

require Exporter;

our @ISA = qw(Exporter);
our @EXPORT_OK = qw($VERSION);
our @EXPORT = qw(mime_eco_fold);
our $VERSION = '0.95';

our $LF;   # line feed
our $BPL;  # bytes per line
our $UTF8;
our $REG_W;

our $SPL;

sub mime_eco_fold {
    my $str = shift;

    return '' unless defined $str;
    return '' if $str eq '';

    my $charset = shift || 'UTF-8';
    my $cs;

    if ($charset =~ /^([-0-9A-Za-z_]+)$/i) {
	$cs = lc($1);
    }
    else { # invalid option
	return undef;
    }

    our $LF  = shift || "\n "; # line feed
    our $BPL = shift || 990;   # bytes per line
    our $UTF8 = 1;
    our $REG_W = qr/(.)/;

    $LF =~ /([^\x0d\x0a]*)$/;
    our $SPL = length($1);

    my $jp = 0;

    if ($cs ne 'utf-8') {
	$UTF8 = 0;
	if ($cs eq 'iso-2022-jp') {
	    $jp = 1;
	}
	elsif ($cs eq 'shift_jis') {
	    # range of 2nd byte : [\x40-\x7e\x80-\xfc]
	    $REG_W = qr/([\x81-\x9f\xe0-\xfc]?.)/;
	}
	elsif ($cs eq 'gb2312') { # Simplified Chinese
	    # range of 2nd byte : [\xa1-\xfe]
	    $REG_W = qr/([\xa1-\xfe]?.)/;
	}
	elsif ($cs eq 'euc-kr') { # Korean
	    # range of 2nd byte : [\xa1-\xfe]
	    $REG_W = qr/([\xa1-\xfe]?.)/;
	}
	elsif ($cs eq 'big5') { # Traditional Chinese
	    # range of 2nd byte : [\x40-\x7e\xa1-\xfe]
	    $REG_W = qr/([\x81-\xfe]?.)/;
	}
	else { # Single Byte (Latin, Cyrillic, ...)
	    ;
	}
    }

    my $result = '';
    my $refsub = $jp ? \&line_fold_jp : \&line_fold;
    my $odd = 0;

    for my $line (split /(\x0d?\x0a|\x0d)/, $str) {
        if ($odd) {
            $result .= $line;
            $odd = 0;
        }
        else {
            $result .= &$refsub($line);
            $odd = 1;
        }
    }
    return $result;
}


sub line_fold {
    my $str = shift;

    return '' if $str eq '';

    my $str_len = length($str);

    our $BPL;

    return $str if $str_len <= $BPL;

    our $LF;
    our $UTF8;
    our $REG_W;
    our $SPL;

    my $w = '';
    my $w_len;
    my $w_bak = '';
    my $result = '';
    my $max_len = $BPL;

    my ($chunk, $chunk_len) = ('', 0);
    my $str_pos = 0;

    utf8::decode($str) if $UTF8; # UTF8 flag on

    while ($str =~ /$REG_W/g) {
        $w = $1;
	utf8::encode($w) if $UTF8; # UTF8 flag off
        $w_len = length($w); # size of one character
        if ($chunk_len + $w_len > $max_len) {
            $result .= $chunk . "$LF";
            $str_pos += $chunk_len;
            $max_len = $BPL - $w_len - $SPL;
            if ($str_len - $str_pos <= $max_len) {
		utf8::encode($str) if $UTF8; # UTF8 flag off
                $chunk = substr($str, $str_pos);
                last;
            }
            $chunk = $w;
            $chunk_len = $w_len;
        }
        else {
            $chunk .= $w;
            $chunk_len += $w_len;
        }
    }
    return $result . $chunk;
}


sub line_fold_jp {
    my $str = shift;

    return '' if $str eq '';

    our $BPL;

    return $str if length($str) <= $BPL;

    our $LF;
    our $SPL;

    my $k_in = 0; # ascii: 0, zen: 1 or 2, han: 9
    my $k_in_bak = -1;
    my $k_out;
    my $ec;
    my $w1;
    my $w1_bak = '';
    my $w = '';
    my $w_len;
    my $w_bak = '';
    my $result = '';
    my $max_len = $BPL;

    while ($str =~ /(\e(..)|.)/g) {
        ($w1, $ec) = ($1, $2);
        $w .= $w1;
        if (defined $ec) {
            $w1_bak = $w1;
            if ($ec eq '(B') {
                $k_in = 0;
            }
            elsif ($ec eq '$B') {
                $k_in = 1;
            }
            else {
                $k_in = 9;
            }
            next;
        }
        else {
            if ($k_in == 1) {
                $k_in = 2;
                next;
            }
            elsif ($k_in == 2) {
                $k_in = 1;
            }
        }
        $k_out = $k_in ? 3 : 0; # 3 is "\e\(B"
        if (pos($str) + $k_out > $max_len) {
            $w_len = length($w);
            if ($k_in_bak) {
                $result .= $w_bak .
                    substr($str, 0, pos($str) - $w_len, "") . "\e\(B$LF";
                if ($k_in) {
                    if ($k_in_bak == $k_in) {
                        $w = $w1_bak . $w;
                    }
                }
                else {
                    $w = $w1;
                }
            }
            else {
                $result .= $w_bak .
                    substr($str, 0, pos($str) - $w_len, "") . "$LF";
            }
            substr($str, 0, $w_len, "");
            $max_len = $BPL - length($w) - $SPL;
            if (length($str) <= $max_len) {
                return $result . $w . $str;
            }
            $w_bak = $w;
        }
        $k_in_bak = $k_in;
        $w = '';
    }
    return $result . $w_bak . $str; # impossible
}

1;