The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/env perl
#
# unifmt - Unicode text formatter
# 
# Tom Christiansen <tchrist@perl.com>
# Tue Apr 19 12:47:41 MDT 2011

use utf8;
use strict;
use 5.10.1;
use autodie;

# delay fatal warnings till runtime; otherwise screws up compiler msgs
use warnings; # qw[ FATAL all   ];
use open        qw[ :utf8 :std  ];
use charnames   qw[ :full       ];

use Carp qw[ carp croak confess cluck ];
use Getopt::Long qw[ GetOptions ];
use Pod::Usage;

use File::Basename;

use Unicode::GCString;                  # UAX#29
use Unicode::LineBreak qw(:all);        # UAX#14-C2

##
## ** THIS IS AN UNSUPPORTED, PRE-RELEASE VERSION ONLY **
##
our $VERSION = "0.2";

our %Opt;
our $TABS     =  8;
our $COLUMNS  = 72;

our $Formatter;

sub wrap_line(_) {
    my($text) = @_;
    $Formatter->config(Newline => ("\n" . " " x 4));
    say $Formatter->break($text);
}

sub wrap_paragraph(_) {
    my ($text) = @_;
    $Formatter->config(Newline => "\n");

    for (split /\R{2,}/, $text) {
	s/^\s+//gm;
	s/\p{Dash}\K\h*\n//g;
        s/(?:(?![\N{NO-BREAK SPACE}\t])\p{White_Space})+/ /g;
        s/^\s+//;
        s/\s+$//;
        say $Formatter->break($_);
    }

}

sub tabbed_sizing {
    my ($self, $cols, $pre, $spc, $str) = @_;
    my $spcstr = $spc.$str;
    while ($spcstr =~ s/^( *)(\t+)//) {
        $cols += length($1);
        $cols += length($2) * $TABS - $cols % $TABS;
    }
    $cols += $self->strsize(0, '', '', $spcstr);
    return $cols;
};


sub usage($) {
    my $_ = $_[0];
    s/^(\p{Ll})/\u$1/;
    s/$/./ if /^\pL/ && !/\.$/;
    pod2usage("$0: [USAGE] $_");
}

sub highlander_options {
    my @optlist  = map { "--$_"} @_;
    my $commlist = join(", " => @optlist);
    usage("specify no more than one of: $commlist");
} 

sub load_options {
    Getopt::Long::Configure qw[ 
	bundling 
	auto_version 
	pass_through
	permute
    ];

    $0 = basename($0);  # shorten up warnings/errors

    my @options = (

        # standard options
        qw[
	    version|v
            help|?|h
            man|m
            debug|d

	    tabs|t=i
	    columns|width|c|w=i

	    lines
	    paragraphs|p
        ],

    );

    GetOptions(\%Opt => @options) || pod2usage(2);
    pod2usage(0)                                 if $Opt{help};
    pod2usage(-exitstatus => 0, -verbose => 2)   if $Opt{man};

    if (@ARGV && $ARGV[0] =~ /^-([1-9][0-9]*)$/) {
	shift @ARGV;
	my $newcols = $1;
	usage("you already specified columns") if $Opt{columns};
	$Opt{columns} = $newcols;
    } 

    $TABS    = $Opt{tabs}    if $Opt{tabs};
    $COLUMNS = $Opt{columns} if $Opt{columns};

}


sub main { 
    load_options();

    my $wrapper;

    if ($Opt{lines}) {
	highlander_options(qw[lines paragraphs]) if $Opt{paragraphs};
	$wrapper = \&wrap_line;
    } 
    elsif ($Opt{paragraphs}) {
	highlander_options(qw[lines paragraphs]) if $Opt{lines};
	$wrapper = \&wrap_paragraph;
    } 
    else {
	$Opt{paragraphs} = 1;
	$wrapper = \&wrap_paragraph;
    } 

    local $SIG{__DIE__} = sub {
	confess "Untrapped fatal exception: @_" unless $^S;
    };
    local $SIG{__WARN__} = sub { 
	confess "Unexpected fatalized warning: @_";
    };

    local $SIG{PIPE} = sub { exit };
    local $/ = q();

    $Formatter = new Unicode::LineBreak (
    # makes for fewer linebreaks on this dataset:
	Context => "NONEASTASIAN",      # EASTASIAN, NONEATSIAN
	ColumnsMax => $COLUMNS,
	ColumnsMin => 8,
	Format => "SIMPLE",             # SIMPLE, NEWLINE, TRIM
	SizingMethod    => \&tabbed_sizing,  # for tab handling
	TailorLB => [
	    ord("\t") => LB_SP,
	    LEFT_QUOTES()  => LB_OP,
	    RIGHT_QUOTES() => LB_CL,
	],
    );

    while (<>) {
	print $wrapper->($_);
    } 

    close(STDOUT) || die "$0: couldn't close STDOUT: $!";
}

main();
exit();


#################################################################
#################################################################

__END__

=head1 NAME

unifmt - wrap input paragraphs using the ULA

=head1 SYNOPSIS

unifmt [-t tabwidth] [-width | -w columns] [input_files ...]

Standard options:

    --help  -h
    --man   -m
    --debug -d

Program options:

    --tabs  NUM  -t NUM    set tab columns to NUM (default 8)
    --width NUM  -w NUM    set column width to NUM (default 72)

    --paragraphs -p        wrap each paragraph (default)
    --lines		   wrap each line

=head1 DESCRIPTION

TO BE WRITTEN: DESCRIPTION

=head1 EXAMPLES

TO BE WRITTEN: EXAMPLES

=head1 ERRORS

TO BE WRITTEN: ERRORS

=head1 ENVIRONMENT

TO BE WRITTEN: ENVIRONMENT

=head1 FILES

TO BE WRITTEN: FILES

=head1 PROGRAMS

Calls no other programs.

=head1 BUGS

TO BE WRITTEN: BUGS

=head1 SEE ALSO

=over

=item [UAX #11]

A. Freytag (2008-2009).
I<Unicode Standard Annex #11: East Asian Width>, Revision 17-19.
L<http://unicode.org/reports/tr11/>.

=item [UAX #14]

A. Freytag and A. Heninger (2008-2010).
I<Unicode Standard Annex #14: Unicode Line Breaking Algorithm>, Revision 22-26.
L<http://unicode.org/reports/tr14/>.

=item Standard Manpages

L<fmt(1)> 

=item Perl Modules

L<Text::Autoformat>, L<Text::LineFold>, L<Text::Wrap>,
L<Unicode::LineBreak>, and L<Unicode::GCString>.

=head1 AUTHOR

Tom Christiansen <tchrist@perl.com>

=head1 COPYRIGHT AND LICENCE

Copyright 2011 Tom Christiansen.

This program is free software; you may redistribute it and/or modify it
under the same terms as Perl itself.