The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Text::Shorten;

use warnings;
use strict;
use Hash::SafeKeys;
use Exporter;
our @ISA = qw(Exporter);
our @EXPORT = qw();
our @EXPORT_OK = qw(shorten_scalar shorten_array shorten_hash);
our %EXPORT_TAGS = (all => [ @EXPORT_OK ]);
our $VERSION = '0.06';

our $DOTDOTDOT = '...';
our $DOTDOTDOT_LENGTH = length($DOTDOTDOT);
our $DEFAULT_ARRAY_ELEM_SEPARATOR_LENGTH = 1;
our $DEFAULT_HASH_ELEM_SEPARATOR_LENGTH = 1;
our $DEFAULT_HASH_KEYVALUE_SEPARATOR_LENGTH = 2;
our $HASHREPR_SORTKEYS;

sub shorten_scalar {
    my ($scalar, $maxlen) = @_;

    return $scalar if length($scalar) < $maxlen;

    if ($scalar =~ /^['"]./                         # "']/
	&& substr($scalar,0,1) eq substr($scalar,-1)) {

	return substr($scalar,0,$maxlen-$DOTDOTDOT_LENGTH-1)
	    . $DOTDOTDOT . substr($scalar,-1);

    }

    my ($sign,$d1,$d,$d2,$e,$exp) =
	$scalar =~ /^\s* ([+-]?)
                     (\d*)
		     (\.?)
		     (\d*)
		     ([Ee]?)
		     ([+-]?\d+)? \s*$/x;

    {
	no warnings 'uninitialized';
	if ("$d1$d2" eq "") {
	    # not a number.

	    return substr($scalar, 0, $maxlen-$DOTDOTDOT_LENGTH) . $DOTDOTDOT;
	}
    }

    # the rest of this function is strategies for making a number shorter.
    # We may lose precision but we won't need to use $DOTDOTDOT

    $sign ||= '';
    $d1 ||= '0';
    $d2 = '' if not defined $d2;
    $e ||= '';
    $exp = '' if not defined $exp;
    my $E = $e || 'e';

    $exp  =~ s/^\+//             if length("$sign$d1$d$d2$e$exp") > $maxlen;
    $d1   =~ s/^0+(.)/$1/        if length("$sign$d1$d$d2$e$exp") > $maxlen;
    $d2   =~ s/(.)0+$/$1/        if length("$sign$d1$d2$e$exp") > $maxlen;
    $exp  =~ s/^(\-?)0+(.)/$1$2/ if length("$sign$d1$d$d2$exp") > $maxlen;
    $sign =~ s/^\+//             if length("$sign$d1$d$d2$e$exp") > $maxlen;

    my $lastc = 0;
    while (length("$sign$d1$d$d2$e$exp") > $maxlen) {

	if ($d && $d2 eq '') {       # 1.E23 => 1E23
	    $d = '';
	    $d1 ||= '0';
	    next;
	}

	if ($e && ($exp eq '' || $exp==0)) {         # 4.567E0 => 4.567
	    $e = $exp = '';
	    next;
	}

	if (($d1 eq '' || $d1 == 0) && $d2 ne '') {
	    $d1=0;
	    while ($d1 == 0 && $d2 ne '') {        # 0.123 => 1.23,  
		                                   # 0.0004E-5 => 0.004E-6
		$d1 = substr($d2,0,1);
		$d2 = substr($d2,1);
		$e ||= $E;
		$exp--;
	    }
	    next;
	}

	if ($e eq '' && $d2 ne '') {
	    # start truncating digits
	    my $c = chop $d2;
	    if ($c > 5 || ($c == 5 && $lastc)) {
		my $new_d2 = $d2;
		$lastc = 0;
		if (length($new_d2) > length($d2)) {
		    $d1++;
		    $d2 = substr($new_d2,1);
		}
	    } else {
		$lastc = 1;
	    }
	    next;
	}

	if ($d eq '' && $d2 eq '') {
	    # start truncating digits and increment exp
	    my $c = chop $d1;
	    if ($c > 5 || ($c==5 && $lastc)) {
		$lastc = 0;
		$d1++;
	    } else {
		$lastc = 1;
	    }
	    $e ||= $E;
	    $exp++;
	    next;
	}

	if ($d2 ne '') {
	    # start truncating digits
	    my $c = chop $d2;
	    if ($c > 5 || ($c == 5 && $lastc)) {
		my $new_d2 = $d2;
		$lastc = 0;
		if (length($new_d2) > length($d2)) {
		    $d1++;
		    $d2 = substr($new_d2,1);
		}
	    } else {
		$lastc = 1;
	    }
	    next;
	}
    }

    $d = '' if $d2 eq '';
    while ($e && $d && $d1 ne '' && $d1 > 9) {        # 98.7E6 => 9.87E7
	$d2 = chop($d1) . $d2;
	$d = '.';
	$exp++;
    }
    $d = '' if $d2 eq '';
    return "$sign$d1$d$d2$e$exp";
}

# XXX - do we want to make it configurable whether shorten_array
#       always returns at least one full element?

sub shorten_array {
    my ($array, $maxlen, $seplen, @key) = @_;
    if (!defined($seplen)) {
	$seplen = $DEFAULT_ARRAY_ELEM_SEPARATOR_LENGTH;
    } elsif ($seplen =~ /\D/) {
	$seplen = length($seplen);
    }
    my $dotslen = $seplen + $DOTDOTDOT_LENGTH;

    my $n = $#{$array} + 1;
    @key = (0) if @key == 0;
    @key = sort {$a <=> $b} grep { $_ >= 0 && $_ < $n } @key;
    my @inc = (0) x $n;

    my $len = $n > 0 ? $dotslen - 1 : 0;
    if (@key > 1 || (@key == 1 && $key[0] != 0)) {

	my @prio = (0) x $n;
	# "prioritize" elements for display, giving preference to
	#     key items
	#     items between key items
	#     the first item
	#     <xxx>the last item</xxx>
	if ($n > 0) {
	    $prio[$_]  = 8 for @key;
	    $prio[$_] += 4 for $key[0]..$key[-1];
	}
	$prio[0] += 2;

	my $insert_fails = 0;
	for my $i ( sort { $prio[$b] <=> $prio[$a] || $a <=> $b } 0..$n-1) {
	    last if $prio[$i] < 8 && $len > $maxlen;

	    # what are the consequences of including $array->[$i] in the output?
	    #
	    # if none of $array->[$i]'s neighbors are excluded:
	    #      then we lose $dotslen and add length of $array->[$i]
	    #      [   a , ... , c ] ==> [ a , b , c ]
	    #
	    # if one of $array->[$i]'s neighbors are excluded:
	    #      then we add $array->[$i]
	    #      [   a , ... ] => [ a , b , ... ]
	    #      [   ... , c ] => [ ... , b , c ]
	    #      [  edge ... ] => [ edge a , ... ]
	    #
	    # if two of $array->[$i]'s neighbors are excluded
	    #      then we gain $dotslen + $array->[$i]
	    #      [ ... ] => [ ... , a , ... ]

	    my $excl = ($i>0 && !$inc[$i-1]) + ($i<$n-1 && !$inc[$i+1]) + 0;
	    my $dlen = defined($array->[$i])&&length($array->[$i])
		+ $seplen + $dotslen * ($excl - 1);
	    if ($prio[$i] >= 8 || $len + $dlen <= $maxlen) {
		$inc[$i] = 1;
		$len += $dlen;
		$insert_fails = 0;
	    } else {

		# for very large arrays, don't keep trying to squeeze
		# that last element in when there is a low probability
		# that it will work ...

		last if ++$insert_fails > 100;
	    }
	}

    } else {

	# don't need to sort, don't need to check $prio[$i]
	my $insert_fails = 0;
	for my $i (0 .. $n-1) {
	    last if $len > $maxlen;
	    my $excl = ($i>0 && !$inc[$i-1]) + ($i<$n-1 && !$inc[$i+1]) + 0;
	    my $dlen = defined($array->[$i])&&length($array->[$i])
		+ $seplen + $dotslen * ($excl - 1);
	    if ($len + $dlen <= $maxlen) {
		$inc[$i] = 1;
		$len += $dlen;
	    } else {
		last if ++$insert_fails > 100;
	    }
	}
    }

    # construct array, including elements in @inc
    my @result = ();
    my @explicit = grep $inc[$_], 0..$#inc;
    my $i = 0;
    while (@explicit) {
	if ($i < $explicit[0]) {
	    push @result, $DOTDOTDOT;
	}
	$i = shift @explicit;
	push @result, $array->[$i];
	$i++;
    }
    if ($i < $n) {
	push @result, $DOTDOTDOT;
    }
    return @result;
}

sub shorten_hash {
    my ($hash, $maxlen, $sep1, $sep2, @key) = @_;

    if (!defined($sep1)) {
	$sep1 = $DEFAULT_HASH_ELEM_SEPARATOR_LENGTH;
    } elsif ($sep1 =~ /\D/) {
	$sep1 = length($sep1);
    }
    if (!defined($sep2)) {
	$sep2 = $DEFAULT_HASH_KEYVALUE_SEPARATOR_LENGTH;
    } elsif ($sep2 =~ /\D/) {
	$sep2 = length($sep2);
    }

    my $total_len = -$sep1;
    my @safekeys = safekeys %$hash;
    for my $k (@safekeys) {
	$total_len += length($k) + length($hash->{$k}) + $sep1 + $sep2;
	last if $total_len > $maxlen;
    }
    if ($total_len <= $maxlen) {
	# ok to include all elements
	return map { [ $_ , $hash->{$_} ] } @safekeys;
    }

    my @r = ();
    my @hashkeys = ();
    my $hashkey = {};

    if (@key > 0) {
	my %key = map { $_ => 1 } @key;
	if (100 > @safekeys) {
	    @hashkeys = sort {
		($key{$b}||0) <=> ($key{$a}||0) 
		    || $a cmp $b
	    } @safekeys;
	} else {
	    @hashkeys = @key = grep { defined $key{$_} } @key;
	    push @hashkeys, grep { !defined $key{$_} } @safekeys;
	}
    } else {
	if (100 > @safekeys) {
	    @hashkeys = sort @safekeys;
	} else {
	    $hashkey = $hash;
	}
    }

    my $len = 3;
    my @sk = safekeys %$hashkey;
    if ($HASHREPR_SORTKEYS) {
	@sk = sort @sk;
    }

    foreach my $key (@hashkeys, @sk) {
	my $dlen = $sep1 + $sep2 + length($key) + length($hash->{$key});
	last if $len + $dlen > $maxlen
	    && @r > 0;      # always include at least one key-value pair
	push @r, [ $key, $hash->{$key} ];
	$len += $dlen;
    }
    return @r, [ $DOTDOTDOT ];
}

1;

__END__

=head1 NAME

Text::Shorten - Abbreviate long output

=head1 VERSION

0.06

=head1 SYNOPSIS

    use Text::Shorten ':all';
    $short_string = shorten_scalar($really_long_string, 40);
    @short_array = shorten_array(\@really_long_array, 80);
    @short_hash = shorten_hash(\%really_large_hash, 80);

=head1 DESCRIPTION

C<Text::Shorten> creates small strings and small arrays from
larger values for display purposes. The output will include
the string C<"..."> to indicate that the value being displayed
is abbreviated.

=head1 FUNCTIONS


=head2 shorten_scalar($SCALAR, $MAXLEN)

Returns a representation of C<$SCALAR> that is no longer than
C<$MAXLEN> characters. It usually works by chopping characters
off the end of the string and replacing them with the
abbreviation indicator C<"...">.

    $m = join('foo','A'..'Z');
    print shorten_scalar($m, 20); # => "AfooBfooCfooDfooE..."
    print shorten_scalar($m, 10); # => "AfooBfo..."

If C<$SCALAR> looks like a number,
then this method will use scientific notation and reduce the
precision of the number to fit it into the alloted space.

    $m = "123456789" x 9;
    print shorten_scalar($m, 20); # => "12345678912345679e64"
    print shorten_scalar($m, 10); # => "1234568e74"

The output of this function is not guaranteed to make sense
when C<$MAXLEN> is small, say, less than 10.


=head2 shorten_array($ARRAYREF, $MAXLEN [,$SEPARATOR=1 [,@KEY=()]])

Returns a list that is representative of the list in C<$ARRAYREF>
and which will be no longer than C<$MAXLEN> characters when it
is displayed.

  $m = [ 'aa' .. 'zz' ];
  print join',', shorten_array($m,20);    # "aa,ab,ac,ad,ae,..."

The default assumption is that displayed array elements
will be separated with a comma or other single-character delimiter.
Specify C<$SEPARATOR> as either a delimiting-string or as a number
representing the length of the delimiter to override this.

  $m = [ 'aa' .. 'zz' ];
  $s = ", ";
  print join $s, shorten_array($m,20,2);  # "aa, ab, ac, ad, ..."
  print join $s, shorten_array($m,20,$s); # "aa, ab, ac, ad, ..."

C<@KEY> is a (possibly empty) set of array indices for array
elements that must be returned in the output. All of the array
indices in C<@KEY> will be included in the output, even if that
makes the output length exceed C<$MAXLEN>,

  $m = [ 'aa' .. 'zz' ];
  $s = ',';
  print join $s, shorten_array($m,20,1,77);     # "aa,ab,ac,...,cz,..."
  print join $s, shorten_array($m,20,1,76..78); # "aa,...,cy,cz,da,..."

The output of this function is not guaranteed to make sense
when C<$MAXLEN> is small (say, less than 15).


=head2 shorten_hash($HASHREF, $MAXLEN [, $SEP1=1, $SEP2=2 [, @KEY=() ]])

Returns a list suitable for displaying representative elements
of the hashtable in C<$HASHREF> such that the displayed length
will be no longer than C<$MAXLEN> characters.

The return value is a list of list references. Each element of
the return value contains a key-value pair to be displayed,
except for the last element of the list, which may either contain
a key-value pair, or a list reference containing the single
token C<...>. This token indicates that some key-value pairs are
not to be displayed. Here is an example of how one print the output
of C<shorten_hash>, using C<":"> as a key-value pair separator,
and C<"; "> to separate different elements of the hash.

  $m = { foo => 'bar', 123 => 456 };
  @m = shorten_hash($m,20);

  # to display shortened results, do something like this
  print "{", join("; ", map { join ":", @$_ } @m), "}";
  # the above line prints "{abc:def; 123:456}"

It is assumed that each key-value pair in the output will be separated
by a single-character delimiter, and that the hash keys will be separated
from their associated hash values by a two-character delimiter.
Specify a delimiter or delimiter length to C<$SEP1> and C<$SEP2>
to override these assumptions.

C<@KEY> is a (possibly empty) set of hash keys that must be returned
in the output. The key-value pair for any valid key that is specified
in C<@KEY> will be included in the output, even if that would make
the output length exceed C<$MAXLEN>.

The output of this function is not guaranteed to make sense
when C<$MAXLEN> is small (say, less than 20).

=head1 EXPORT

Nothing by default. The three functions C<shorten_scalar>,
C<shorten_array>, and C<shorten_hash> may be exported individually,
or they may be exported as a group with the tag C<:all>.

=head1 AUTHOR

Marty O'Brien, C<< <mob at cpan.org> >>

=head1 BUGS

Please report any bugs or feature requests to 
C<bug-text-shorten at rt.cpan.org>, or through the web interface at 
L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Text-Shorten>.  I will be 
notified, and then you'll automatically be notified of progress on your bug
 as I make changes.

=head1 SUPPORT

You can find documentation for this module with the perldoc command.

    perldoc Text::Shorten


You can also look for information at:

=over 4

=item * RT: CPAN's request tracker

L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=Text-Shorten>

=item * AnnoCPAN: Annotated CPAN documentation

L<http://annocpan.org/dist/Text-Shorten>

=item * CPAN Ratings

L<http://cpanratings.perl.org/d/Text-Shorten>

=item * Search CPAN

L<http://search.cpan.org/dist/Text-Shorten/>

=back


=head1 ACKNOWLEDGEMENTS

L<Dumpvalue> / C<dumpvars.pl>.


=head1 LICENSE AND COPYRIGHT

Copyright 2010-2013 Marty O'Brien.

This program is free software; you can redistribute it and/or modify it
under the terms of either: the GNU General Public License as published
by the Free Software Foundation; or the Artistic License.

See http://dev.perl.org/licenses/ for more information.

=cut