lib/Text/ANSI/Util.pm - metacpan.org

package Text::ANSI::Util;

use 5.010001;
use locale;
use strict;
use utf8;
use warnings;

use List::Util qw(min max);
use Text::WideChar::Util 0.10 qw(mbswidth mbtrunc);

require Exporter;
our @ISA       = qw(Exporter);
our @EXPORT_OK = qw(
                       ta_add_color_resets
                       ta_detect
                       ta_extract_codes
                       ta_highlight
                       ta_highlight_all
                       ta_length
                       ta_length_height
                       ta_mbpad
                       ta_mbswidth
                       ta_mbswidth_height
                       ta_mbtrunc
                       ta_mbwrap
                       ta_pad
                       ta_split_codes
                       ta_split_codes_single
                       ta_strip
                       ta_trunc
                       ta_wrap
               );

our $VERSION = '0.16'; # VERSION

# used to find/strip escape codes from string
our $re       = qr/
                      #\e\[ (?: (\d+) ((?:;[^;]+?)*) )? ([\x40-\x7e])
                      # without captures
                      \e\[ (?: \d+ (?:;[^;]+?)* )? [\x40-\x7e]
                  /osx;

sub ta_detect {
    my $text = shift;
    $text =~ $re ? 1:0;
}

sub ta_length {
    my $text = shift;
    length(ta_strip($text));
}

sub _ta_length_height {
    my ($is_mb, $text) = @_;
    my $num_lines = 0;
    my @lens;
    for my $e (split /(\r?\n)/, ta_strip($text)) {
        if ($e =~ /\n/) {
            $num_lines++;
            next;
        }
        $num_lines = 1 if $num_lines == 0;
        push @lens, $is_mb ? mbswidth($e) : length($e);
    }
    [max(@lens) // 0, $num_lines];
}

sub ta_length_height {
    _ta_length_height(0, @_);
}

sub ta_mbswidth_height {
    _ta_length_height(1, @_);
}

sub ta_strip {
    my $text = shift;
    $text =~ s/$re//go;
    $text;
}

sub ta_extract_codes {
    my $text = shift;
    my $res = "";
    $res .= $1 while $text =~ /((?:$re)+)/go;
    $res;
}

sub ta_split_codes {
    my $text = shift;
    return split(/((?:$re)+)/o, $text);
}

sub ta_split_codes_single {
    my $text = shift;
    return split(/($re)/o, $text);
}

# same like _ta_mbswidth, but without handling multiline text
sub _ta_mbswidth0 {
    my $text = shift;
    mbswidth(ta_strip($text));
}

sub ta_mbswidth {
    my $text = shift;
    ta_mbswidth_height($text)->[0];
}

sub _ta_wrap {
    my ($is_mb, $text, $width, $opts) = @_;
    $width //= 80;
    $opts  //= {};

    # basically similar to Text::WideChar::Util's algorithm. we adjust for
    # dealing with ANSI codes by splitting codes first (to easily do color
    # resets/replays), then grouping into words and paras, then doing wrapping.

    my @termst; # store term type, 's' (spaces), 'w' (word), 'c' (cjk word) or
                # 'p' (parabreak)
    my @terms;  # store the text (w/ codes); for ws, only store the codes
    my @pterms; # store the plaintext ver, but only for ws to check parabreak
    my @termsw; # store width of each term, only for non-ws
    my @termsc; # store color replay code
    {
        my @ch = ta_split_codes_single($text);
        my $crcode = ""; # code for color replay to be put at the start of line
        my $term      = '';
        my $pterm     = '';
        my $prev_type = '';
        while (my ($pt, $c) = splice(@ch, 0, 2)) {
            #use Data::Dump; print "D:chunk: "; dd [$pt, $c];

            # split into (CJK and non-CJK) words and spaces.

            my @s; # (WORD1, TYPE, ...) where type is 's' for space, 'c' for
                   # CJK word, or 'w' for non-CJK word
            while ($pt =~ /($Text::WideChar::Util::re_cjk+)|(\S+)|(\s+)/gox) {
                if ($1) {
                    push @s, $1, 'c';
                } elsif ($3) {
                    push @s, $3, 's';
                } else {
                    my $pt2 = $2;
                    while ($pt2 =~ /($Text::WideChar::Util::re_cjk_class+)|
                                    ($Text::WideChar::Util::re_cjk_negclass+)/gox) {
                        if ($1) {
                            push @s, $1, 'c';
                        } else {
                            push @s, $2, 'w';
                        }
                    }
                }
            }

            #use Data::Dump; say "D:s=",Data::Dump::dump(\@s);

            my $only_code = 1 if !@s;
            while (1) {
                my ($s, $s_type) = splice @s, 0, 2;
                $s_type //= '';
                last unless $only_code || defined($s);
                # empty text, only code
                if ($only_code) {
                    $s = "";
                    $term .= $c if defined $c;
                }
                #say "D:s=[$s]  prev_type=$prev_type \@ch=",~~@ch,"  \@s=",~~@s;

                if ($s_type && $s_type ne 's') {
                    if ($prev_type eq 's') {
                        #say "D:found word, completed previous ws [$term]";
                        push @termst, 's';
                        push @terms , $term;
                        push @pterms, $pterm;
                        push @termsw, undef;
                        push @termsc, $crcode;
                        # start new word
                        $pterm = ''; $term = '';
                    } elsif ($prev_type && $prev_type ne $s_type) {
                        #say "D:found a ".($s_type eq 'c' ? 'CJK':'non-CJK')." word, completed previous ".($prev_type eq 'c' ? 'CJK':'non-CJK')." word [$term]";
                        push @termst, $prev_type;
                        push @terms , $term;
                        push @pterms, $pterm;
                        push @termsw, $is_mb ? mbswidth($pterm):length($pterm);
                        push @termsc, $crcode;
                        # start new word
                        $pterm = ''; $term = '';
                    }
                    $pterm .= $s;
                    $term  .= $s; $term .= $c if defined($c) && !@s;
                    if (!@s && !@ch) {
                        #say "D:complete word because this is the last token";
                        push @termst, $s_type;
                        push @terms , $term;
                        push @pterms, "";
                        push @termsw, $is_mb ? mbswidth($pterm):length($pterm);
                        push @termsc, $crcode;
                    }
                } elsif (length($s)) {
                    if ($prev_type ne 's') {
                        #say "D:found ws, completed previous word [$term]";
                        push @termst, $prev_type;
                        push @terms , $term;
                        push @pterms, "";
                        push @termsw, $is_mb ? mbswidth($pterm):length($pterm);
                        push @termsc, $crcode;
                        # start new ws
                        $pterm = ''; $term = '';
                    }
                    $pterm .= $s;
                    $term  .= $c if defined($c) && !@s;
                    if (!@s && !@ch) {
                        #say "D:complete ws because this is the last token";
                        push @termst, 's';
                        push @terms , $term;
                        push @pterms, $pterm;
                        push @termsw, undef;
                        push @termsc, $crcode;
                    }
                }
                $prev_type = $s_type;

                if (!@s) {
                    if (defined($c) && $c =~ /m\z/) {
                        if ($c eq "\e[0m") {
                            #say "D:found color reset, emptying crcode";
                            $crcode = "";
                        } elsif ($c =~ /m\z/) {
                            #say "D:adding to crcode";
                            $crcode .= $c;
                        }
                    }
                    last if $only_code;
                }

            } # splice @s
        } # splice @ch
    }

    # mark parabreaks
    {
        my $i = 0;
        while ($i < @pterms) {
            if ($termst[$i] eq 's') {
                if ($pterms[$i] =~ /[ \t]*(\n(?:[ \t]*\n)+)([ \t]*)/) {
                    #say "D:found parabreak";
                    $pterms[$i] = $1;
                    $termst[$i] = 'p';
                    if ($i < @pterms-1) {
                        # stick color code to the beginning of next para
                        $terms [$i+1] = $terms[$i] . $terms [$i+1];
                        $terms [$i] = "";
                    }
                    if (length $2) {
                        #say "D:found space after parabreak, splitting";
                        splice @termst, $i+1, 0, "s";
                        splice @terms , $i+1, 0, "";
                        splice @pterms, $i+1, 0, $2;
                        splice @termsw, $i+1, 0, undef;
                        splice @termsc, $i+1, 0, $termsc[$i];
                        $i += 2;
                        next;
                    }
                }
            }
            $i++;
        }
    }

    #use Data::Dump::Color; my @d; for (0..$#terms) { push @d, {type=>$termst[$_], term=>$terms[$_], pterm=>$pterms[$_], termc=>$termsc[$_], termw=>$termsw[$_], } } dd \@d;
    #return;

    #use Data::Dump; say "D:termst=".Data::Dump::dump(\@termst);
    #use Data::Dump; say "D:terms =".Data::Dump::dump(\@terms);
    #use Data::Dump; say "D:pterms=".Data::Dump::dump(\@pterms);
    #use Data::Dump; say "D:termsw=".Data::Dump::dump(\@termsw);
    #use Data::Dump; say "D:termsc=".Data::Dump::dump(\@termsc);

    my ($maxww, $minww);

    # now we perform wrapping

    my @res;
    {
        my $tw = $opts->{tab_width} // 8;
        die "Please specify a positive tab width" unless $tw > 0;
        my $optfli  = $opts->{flindent};
        my $optfliw = Text::WideChar::Util::_get_indent_width($is_mb, $optfli, $tw) if defined $optfli;
        my $optsli  = $opts->{slindent};
        my $optsliw = Text::WideChar::Util::_get_indent_width($is_mb, $optsli, $tw) if defined $optsli;
        my $pad = $opts->{pad};
        my $x = 0;
        my $y = 0;
        my ($fli, $sli, $fliw, $sliw);
        my $is_parastart = 1;
        my $line_has_word = 0;
        my ($termt, $prev_t);
      TERM:
        for my $i (0..$#terms) {
            $prev_t = $termt if $i;
            $termt = $termst[$i];
            my $term  = $terms[$i];
            my $pterm = $pterms[$i];
            my $termw = $termsw[$i];
            my $crcode = $i > 0 ? $termsc[$i-1] : "";
            #say "D:term=[", ($termt eq 'w' ? $term : $pterm), "] ($termt)";

            # end of paragraph
            if ($termt eq 'p') {
                my $numnl = 0;
                $numnl++ while $pterm =~ /\n/g;
                for (1..$numnl) {
                    push @res, "\e[0m" if $crcode && $_ == 1;
                    push @res, " " x ($width-$x) if $pad;
                    push @res, "\n";
                    $x = 0;
                    $y++;
                }
                $line_has_word = 0;
                $x = 0;
                $is_parastart = 1;
                next TERM;
            }

            if ($is_parastart) {
                # this is the start of paragraph, determine indents
                if (defined $optfli) {
                    $fli  = $optfli;
                    $fliw = $optfliw;
                } else {
                    if ($termt eq 's') {
                        $fli  = $pterm;
                        $fliw = Text::WideChar::Util::_get_indent_width($is_mb, $fli, $tw);
                    } else {
                        $fli  = "";
                        $fliw = 0;
                    }
                    #say "D:deduced fli from text [$fli] ($fliw)";
                    my $j = $i;
                    $sli = undef;
                    while ($j < @terms && $termst[$j] ne 'p') {
                        if ($termst[$j] eq 's') {
                            if ($pterms[$j] =~ /\n([ \t]+)/) {
                                $sli  = $1;
                                $sliw = Text::WideChar::Util::_get_indent_width($is_mb, $sli, $tw);
                                last;
                            }
                        }
                        $j++;
                    }
                    if (!defined($sli)) {
                        $sli  = "";
                        $sliw = 0;
                    }
                    #say "D:deduced sli from text [$sli] ($sliw)";
                    die "Subsequent indent must be less than width" if $sliw >= $width;
                }

                #say "D:inserting the fli [$fli] ($fliw)";
                push @res, $fli;
                $x += $fliw;
            } # parastart

            $is_parastart = 0;

            if ($termt eq 's') {
                # just print the codes
                push @res, $term;

                # maintain terminating newline
                if ($pterm =~ /\n/ && $i == $#terms) {
                    push @res, "\e[0m" if $crcode;
                    push @res, " " x ($width-$x) if $pad;
                    push @res, "\n";
                    $line_has_word = 0;
                }
            }

            if ($termt ne 's') {
                # we need to chop long words
                my @words;
                my @wordsw;
                my @wordst; # c if cjk, w if not
                my @wordswsb; # whether there are ws before the word
                my $j = 0;
                my $c = ""; # see below for explanation
                while (1) {
                    $j++;
                    # most words shouldn't be that long. and we don't need to
                    # truncate long CJK word first here because it will get
                    # truncated later.
                    if ($termw <= $width-$sliw || $termt eq 'c') {
                        push @words   , $c . $term;
                        push @wordsw  , $termw;
                        push @wordst  , $termt;
                        push @wordswsb, ($prev_t && $prev_t eq 's')?1:0;
                        last;
                    }
                    #use Data::Dump; print "D:truncating long word "; dd $term;
                    my $res = $is_mb ? ta_mbtrunc($term, $width-$sliw, 1) :
                        ta_trunc($term, $width-$sliw, 1);

                    my ($tword, $twordw);
                    if ($j == 1) {
                        $tword  = $res->[0];
                        $twordw = $res->[1];
                    } else {
                        # since ta_{,mb}trunc() adds the codes until the end of
                        # the word, to avoid messing colors, for the second word
                        # and so on we need to replay colors by prefixing with:
                        # \e[0m (reset) + $crcode + (all the codes from the
                        # start of the long word up until the truncated
                        # position, stored in $c).
                        #
                        # there might be faster way, but it is expected that
                        # long words are not that common.
                        $tword  = ($crcode ? "\e[0m" . $crcode : "") .
                            $c . $res->[0];
                        $twordw = $res->[1];
                    }
                    $c .= ta_extract_codes(substr($term, 0, $res->[2]));
                    #use Data::Dump; print "D:truncated word is "; dd $tword;

                    push @words   , $tword;
                    push @wordsw  , $twordw;
                    push @wordst  , $termt;
                    push @wordswsb, $j == 1 ? (($prev_t && $prev_t eq 's')?1:0) : 0;
                    $term  = substr($term, $res->[2]);
                    $termw = $is_mb ? _ta_mbswidth0($term) : ta_length($term);
                }

                #use Data::Dump; print "D:words="; dd \@words; print "D:wordsw="; dd \@wordsw; print "D:wordswsb="; dd \@wordswsb;

                # the core of the wrapping algo
                for my $word (@words) {
                    my $wordw = shift @wordsw;
                    my $wordt = shift @wordst;
                    my $ws_before = shift @wordswsb;
                    #say "D:x=$x word=$word wordw=$wordw wordt=$wordt ws_before=$ws_before line_has_word=$line_has_word width=$width";

                    $maxww = $wordw if !defined($maxww) || $maxww < $wordw;
                    $minww = $wordw if !defined($minww) || $minww > $wordw;

                    if ($x + ($line_has_word ? 1:0) + $wordw <= $width) {
                        if ($line_has_word && $ws_before) {
                            push @res, " ";
                            $x++;
                        }
                        push @res, $word;
                        $x += $wordw;
                    } else {
                        # line break
                        while (1) {
                            if ($wordt eq 'c') {
                                # a CJK word can be line-broken
                                my $res;
                                if ($ws_before) {
                                    $res = ta_mbtrunc($word, $width-$x-1, 1);
                                    push @res, " ", $res->[0];
                                } else {
                                    $res = ta_mbtrunc($word, $width-$x, 1);
                                    push @res, $res->[0];
                                }
                                #say "D:truncated CJK word: $word (".length($word)."), ".($width-$x)." -> $res->[0] (".length($res->[0]).") & $res->[1], remaining=$res->[3] (".length($res->[3]).")";
                                $word = $res->[3];
                                $wordw = _ta_mbswidth0($res->[3]);
                            } else {
                                push @res, "\e[0m" if $crcode;
                            }
                            push @res, " " x ($width-$x) if $pad;
                            push @res, "\n";
                            $y++;
                            push @res, $crcode;
                            push @res, $sli;

                            if ($sliw + $wordw <= $width) {
                                push @res, $word;
                                $x = $sliw + $wordw;
                                last;
                            } else {
                                # word still too long, break again
                                $x = $sliw;
                            }
                        }
                    }
                    $line_has_word++;
                }

            }
        } # for term
        push @res, " " x ($width-$x) if $line_has_word && $pad;
    }

    if ($opts->{return_stats}) {
        return [join("", @res), {
            max_word_width => $maxww,
            min_word_width => $minww,
        }];
    } else {
        return join("", @res);
    }
}

sub ta_wrap {
    _ta_wrap(0, @_);
}

sub ta_mbwrap {
    _ta_wrap(1, @_);
}

sub _ta_pad {
    my ($is_mb, $text, $width, $which, $padchar, $is_trunc) = @_;
    if ($which) {
        $which = substr($which, 0, 1);
    } else {
        $which = "r";
    }
    $padchar //= " ";

    my $w = $is_mb ? _ta_mbswidth0($text) : ta_length($text);
    if ($is_trunc && $w > $width) {
        my $res = $is_mb ?
            ta_mbtrunc($text, $width, 1) : ta_trunc($text, $width, 1);
        $text = $res->[0] . ($padchar x ($width-$res->[1]));
    } else {
        if ($which eq 'l') {
            $text = ($padchar x ($width-$w)) . $text;
        } elsif ($which eq 'c') {
            my $n = int(($width-$w)/2);
            $text = ($padchar x $n) . $text . ($padchar x ($width-$w-$n));
        } else {
            $text .= ($padchar x ($width-$w));
        }
    }
    $text;
}

sub ta_pad {
    _ta_pad(0, @_);
}

sub ta_mbpad {
    _ta_pad(1, @_);
}

sub _ta_trunc {
    my ($is_mb, $text, $width, $return_extra) = @_;

    # return_extra (undocumented): if set to 1, will return [truncated_text,
    # visual width, length(chars) up to truncation point, rest of the text not
    # included]

    my $w = $is_mb ? _ta_mbswidth0($text) : ta_length($text);
    if ($w <= $width) {
        return $return_extra ? [$text, $w, length($text), ''] : $text;
    }
    my @p = ta_split_codes($text);
    my @res;
    my $append = 1; # whether we should add more text
    my $code4rest = '';
    my $rest = '';
    $w = 0;
    my $c = 0;
    #use Data::Dump; dd \@p;
    while (my ($t, $ansi) = splice @p, 0, 2) {
        #say "D: t=<$t>, \@p=", ~~@p, ", code4rest=<$code4rest>, rest=<$rest>";
        if ($append) {
            my $tw = $is_mb ? mbswidth($t) : length($t);
            #say "D: tw=$tw";
            if ($w+$tw <= $width) {
                push @res, $t;
                $w += $tw;
                $c += length($t);
                $append = 0 if $w == $width;
                #say "D:end1" unless $append;
            } else {
                my $tres = $is_mb ?
                    mbtrunc($t, $width-$w, 1) :
                        [substr($t, 0, $width-$w), $width-$w, $width-$w];
                #use Data::Dump; dd $tres;
                push @res, $tres->[0];
                $w += $tres->[1];
                $c += $tres->[2];
                $rest = substr($t, $tres->[2]);
                $append = 0;
                #say "D:end2";
            }
        } else {
            $rest .= $t;
        }
        if (defined $ansi) {
            push @res, $ansi;
            $c += length($ansi) if $append;
            $code4rest .= $ansi if $append;
            $rest .= $ansi unless $append;
        }
    }

    if ($return_extra) {
        return [join("", @res), $w, $c, $code4rest . $rest];
    } else {
        return join("", @res);
    }
}

sub ta_trunc {
    _ta_trunc(0, @_);
}

sub ta_mbtrunc {
    _ta_trunc(1, @_);
}

sub _ta_highlight {
    my ($is_all, $text, $needle, $color) = @_;

    # break into chunks
    my (@chptext, @chcode, @chsavedc); # chunk plain texts, codes, saved codes
    my $sc = "";
    my $plaintext = "";
    my @ch = ta_split_codes_single($text);
    while (my ($pt, $c) = splice(@ch, 0, 2)) {
        push @chptext , $pt;
        push @chcode  , $c;
        push @chsavedc, $sc;
        $plaintext .= $pt;
        if (defined($c) && $c =~ /m\z/) {
            if ($c eq "\e[0m") {
                $sc = "";
            } elsif ($c =~ /m\z/) {
                $sc .= $c;
            }
        }
    }
    #use Data::Dump; print "\@chptext: "; dd \@chptext; print "\@chcode: "; dd \@chcode; print "\@chsavedc: "; dd \@chsavedc;

    # gather a list of needles to highlight, with their positions
    my (@needle, @npos);
    if (ref($needle) eq 'Regexp') {
        my @m = $plaintext =~ /$needle/g;
        return $text unless @m;
        my $pos = 0;
        while ($pos < length($plaintext)) {
            my @pt;
            for (@m) {
                my $p = index($plaintext, $_, $pos);
                push @pt, [$p, $_] if $p >= 0;
            }
            last unless @pt;
            my $pmin = $pt[0][0];
            my $t = $pt[0][1];
            for (@pt) {
                if ($pmin > $_->[0] ||
                        $pmin==$_->[0] && length($t) < length($_->[1])) {
                    $pmin = $_->[0];
                    $t = $_->[1];
                }
            }
            push @needle, $t;
            push @npos  , $pmin;
            last unless $is_all;
            $pos = $pmin + length($t);
        }
    } else {
        my $pos = 0;
        while (1) {
            #say "D:finding '$needle' in '$plaintext' from pos '$pos'";
            my $p = index($plaintext, $needle, $pos);
            last if $p < 0;
            push @needle, $needle;
            push @npos  , $p;
            last unless $is_all;
            $pos = $p + length($needle);
            last if $pos >= length($plaintext);
        }
        return $text unless @needle;
    }
    #use Data::Dump; print "\@needle: "; dd \@needle; print "\@npos: "; dd \@npos;

    my @res;
    my $found = 1;
    my $pos = 0;
    my $i = 0;
    my $curneed = shift @needle;
    my $npos    = shift @npos;
  CHUNK:
    while (1) {
        last if $i >= @chptext;
        my $pos2  = $pos+length($chptext[$i])-1;
        my $npos2 = $npos+length($curneed)-1;
        #say "D: chunk=[$chptext[$i]], npos=$npos, npos2=$npos2, pos=$pos, pos2=$pos2";
        if ($pos > $npos2 || $pos2 < $npos || !$found) {
            #say "D:inserting chunk: [$chptext[$i]]";
            # no need to highlight
            push @res, $chptext[$i];
            push @res, $chcode[$i] if defined $chcode[$i];
            goto L1;
        }

        # there is chunk text at the left of needle?
        if ($pos < $npos) {
            my $pre = substr($chptext[$i], 0, $npos-$pos);
            #say "D:inserting pre=[$pre]";
            push @res, $pre;
        }

        my $npart = substr($curneed,
                           max(0, $pos-$npos),
                           min($pos2, $npos2)-max($pos, $npos)+1);
        if (length($npart)) {
            #say "D:inserting npart=[$npart]";
            push @res, $color, $npart;
            push @res, "\e[0m";
            #use Data::Dump; dd [$chsaved[$i], $chcode[$i]];
            push @res, $chsavedc[$i];
        }

        # is there chunk text at the right of needle?
        if ($npos2 <= $pos2) {
            #say "D:We have run past current needle [$curneed]";
            my $post = substr($chptext[$i], $npos2-$pos+1);

            if (@needle) {
                $curneed = shift @needle;
                $npos    = shift @npos;
                #say "D:Finding the next needle ($curneed) at pos $npos";
                $pos     = $npos2+1;
                $chptext[$i] = $post;
                $found = 1;
                redo CHUNK;
            } else {
                # we're done finding needle
                $found = 0;
            }

            if (!$found) {
                #say "D:inserting post=[$post]";
                push @res, $post;
                push @res, $chcode[$i] if defined $chcode[$i];
            }
        }

      L1:
        $pos = $pos2+1;
        $i++;
    }

    join "", @res;
}

sub ta_highlight {
    _ta_highlight(0, @_);
}

sub ta_highlight_all {
    _ta_highlight(1, @_);
}

sub ta_add_color_resets {
    my (@text) = @_;

    my @res;
    my $i = 0;
    my $savedc = "";
    for my $text (@text) {
        $i++;
        my $newt = $i > 1 && !$savedc ? "\e[0m" : $savedc;

        # break into chunks
        my @ch = ta_split_codes_single($text);
        while (my ($t, $c) = splice(@ch, 0, 2)) {
            $newt .= $t;
            if (defined($c) && $c =~ /m\z/) {
                $newt .= $c;
                if ($c eq "\e[0m") {
                    $savedc = "";
                } elsif ($c =~ /m\z/) {
                    $savedc .= $c;
                }
            }
        }

        $newt .= "\e[0m" if $savedc && $i < @text;
        push @res, $newt;
    }

    @res;
}

1;
# ABSTRACT: Routines for text containing ANSI escape codes

__END__

=pod

=encoding UTF-8

=head1 NAME

Text::ANSI::Util - Routines for text containing ANSI escape codes

=head1 VERSION

This document describes version 0.16 of Text::ANSI::Util (from Perl distribution Text-ANSI-Util), released on 2015-01-03.

=head1 SYNOPSIS

 use Text::ANSI::Util qw(
     ta_add_color_resets
     ta_detect ta_highlight ta_highlight_all ta_length ta_mbpad ta_mbswidth
     ta_mbswidth_height ta_mbwrap ta_pad ta_split_codes ta_split_codes_single
     ta_strip ta_wrap);

 # detect whether text has ANSI escape codes?
 say ta_detect("red");       # => false
 say ta_detect("\e[31mred"); # => true

 # calculate length of text (excluding the ANSI escape codes)
 say ta_length("red");       # => 3
 say ta_length("\e[31mred"); # => 3

 # calculate visual width of text if printed on terminal (can handle Unicode
 # wide characters and exclude the ANSI escape codes)
 say ta_mbswidth("\e[31mred");  # => 3
 say ta_mbswidth("\e[31m红色"); # => 4

 # ditto, but also return the number of lines
 say ta_mbswidth_height("\e[31mred\n红色"); # => [4, 2]

 # strip ANSI escape codes
 say ta_strip("\e[31mred"); # => "red"

 # split codes (ANSI codes are always on the even positions)
 my @parts = ta_split_codes("\e[31mred"); # => ("", "\e[31m", "red")

 # wrap text to a certain column width, handle ANSI escape codes
 say ta_wrap("....", 40);

 # ditto, but handle wide characters
 say ta_mbwrap(...);

 # pad (left, right, center) text to a certain width
 say ta_pad("foo", 10);                          # => "foo       "
 say ta_pad("foo", 10, "left");                  # => "       foo"
 say ta_pad("foo\nbarbaz\n", 10, "center", "."); # => "...foo....\n..barbaz..\n"

 # ditto, but handle wide characters
 say ta_mbpad(...);

 # truncate text to a certain width while still passing ANSI escape codes
 use Term::ANSIColor;
 my $text = color("red")."red text".color("reset"); # => "\e[31mred text\e[0m"
 say ta_trunc($text, 5);                            # => "\e[31mred t\e[0m"

 # ditto, but handle wide characters
 say ta_mbtrunc(...);

 # highlight the first occurence of some string within text
 say ta_highlight("some text", "ome", "\e[7m\e[31m");

 # ditto, but highlight all occurrences
 say ta_highlight_all(...);

=head1 DESCRIPTION

This module provides routines for dealing with text containing ANSI escape codes
(mainly ANSI color codes).

Current caveats:

=over

=item * All codes are assumed to have zero width

This is true for color codes and some other codes, but there are also codes to
alter cursor positions which means they can have negative or undefined width.

=item * Single-character CSI (control sequence introducer) currently ignored

Only C<ESC+[> (two-character CSI) is currently parsed.

BTW, in ASCII terminals, single-character CSI is C<0x9b>. In UTF-8 terminals, it
is C<0xc2, 0x9b> (2 bytes).

=item * Private-mode- and trailing-intermediate character currently not parsed

=item * Only color reset code \e[0m is recognized

For simplicity, currently multiple SGR (select graphic rendition) parameters
inside a single ANSI escape code is not parsed. This means that color reset code
like C<\e[1;0m> or C<\e[31;47;0m> is not recognized, only C<\e[0m> is. I believe
this should not be a problem with most real-world text out there.

=back

=head1 FUNCTIONS

=head2 ta_detect($text) => BOOL

Return true if C<$text> contains ANSI escape codes, false otherwise.

=head2 ta_length($text) => INT

Count the number of characters in $text, while ignoring ANSI escape codes.
Equivalent to C<< length(ta_strip($text)) >>. See also: ta_mbswidth().

=head2 ta_length_height($text) => [INT, INT]

Like ta_length(), but also gives height (number of lines). For example, C<<
ta_length_height("foobar\nb\n") >> gives [6, 3].

=head2 ta_mbswidth($text) => INT

Return visual width of C<$text> (in number of columns) if printed on terminal.
Equivalent to C<< Text::WideChar::Util::mbswidth(ta_strip($text)) >>. This
function can be used e.g. in making sure that your text aligns vertically when
output to the terminal in tabular/table format.

Note that C<ta_mbswidth()> handles multiline text correctly, e.g.: C<<
ta_mbswidth("foo\nbarbaz") >> gives 6 instead of 3-1+8 = 8. It splits the input
text first against C<< /\r?\n/ >>.

=head2 ta_mbswidth_height($text) => [INT, INT]

Like C<ta_mbswidth()>, but also gives height (number of lines). For example, C<<
ta_mbswidth_height("西爪哇\nb\n") >> gives [6, 3].

=head2 ta_strip($text) => STR

Strip ANSI escape codes from C<$text>, returning the stripped text.

=head2 ta_extract_codes($text) => STR

This is the opposite of C<ta_strip()>, return only the ANSI codes in C<$text>.

=head2 ta_split_codes($text) => LIST

Split C<$text> to a list containing alternating ANSI escape codes and text. ANSI
escape codes are always on the second element, fourth, and so on. Example:

 ta_split_codes("");              # => ()
 ta_split_codes("a");             # => ("a")
 ta_split_codes("a\e[31m");       # => ("a", "\e[31m")
 ta_split_codes("\e[31ma");       # => ("", "\e[31m", "a")
 ta_split_codes("\e[31ma\e[0m");  # => ("", "\e[31m", "a", "\e[0m")
 ta_split_codes("\e[31ma\e[0mb"); # => ("", "\e[31m", "a", "\e[0m", "b")
 ta_split_codes("\e[31m\e[0mb");  # => ("", "\e[31m\e[0m", "b")

so you can do something like:

 my @parts = ta_split_codes($text);
 while (my ($text, $ansicode) = splice(@parts, 0, 2)) {
     ...
 }

=head2 ta_split_codes_single($text) => LIST

Like C<ta_split_codes()> but each ANSI escape code is split separately, instead
of grouped together. This routine is currently used internally e.g. for
C<ta_mbwrap()> and C<ta_highlight()> to trace color reset/replay codes.

=head2 ta_wrap($text, $width, \%opts) => STR

Like L<Text::WideChar::Util>'s wrap() except handles ANSI escape codes. Perform
color reset at the end of each line and a color replay at the start of
subsequent line so the text is safe for combining in a multicolumn/tabular
layout.

Options:

=over

=item * flindent => STR

First line indent. See Text::WideChar::Util for more details.

=item * slindent => STR

First line indent. See Text::WideChar::Util for more details.

=item * tab_width => INT (default: 8)

First line indent. See Text::WideChar::Util for more details.

=item * pad => BOOL (default: 0)

If set to true, will pad each line to C<$width>. This is convenient if you need
the lines padded, saves calls to ta_pad().

=item * return_stats => BOOL (default: 0)

If set to true, then instead of returning the wrapped string, function will
return C<< [$wrapped, $stats] >> where C<$stats> is a hash containing some
information like C<max_word_width>, C<min_word_width>.

=back

Performance: ~500/s on my Core i5 1.7GHz laptop for a ~1KB of text (with zero to
moderate amount of color codes). As a comparison, Text::WideChar::Util's wrap()
can do about 2000/s.

=head2 ta_mbwrap($text, $width, \%opts) => STR

Like ta_wrap(), but it uses ta_mbswidth() instead of ta_length(), so it can
handle wide characters.

Performance: ~300/s on my Core i5 1.7GHz laptop for a ~1KB of text (with zero to
moderate amount of color codes). As a comparison, Text::WideChar::Util's
mbwrap() can do about 650/s.

=head2 ta_add_color_resets(@text) => LIST

Make sure that a color reset command (add C<\e[0m>) to the end of each element
and a replay of all the color codes from the previous element, from the last
color reset) to the start of the next element, and so on. Return the new list.

This makes each element safe to be combined with other array of text into a
single line, e.g. in a multicolumn/tabular layout. An example:

Without color resets:

 my @col1 = split /\n/, "\e[31mred\nmerah\e[0m";
 my @col2 = split /\n/, "\e[32mgreen\e[1m\nhijau tebal\e[0m";

 printf "%s | %s\n", $col1[0], $col2[0];
 printf "%s | %s\n", $col1[1], $col2[1];

the printed output:

 \e[31mred | \e[32mgreen
 merah\e[0m | \e[1mhijau tebal\e[0m

The C<merah> text on the second line will become green because of the effect of
the last color command printed (C<\e[32m>). However, with ta_add_color_resets():

 my @col1 = ta_add_color_resets(split /\n/, "\e[31mred\nmerah\e[0m");
 my @col2 = ta_add_color_resets(split /\n/, "\e[32mgreen\e[1m\nhijau tebal\e[0m");

 printf "%s | %s\n", $col1[0], $col2[0];
 printf "%s | %s\n", $col1[1], $col2[1];

the printed output (C<< <...> >>) marks the code added by ta_add_color_resets():

 \e[31mred<\e[0m> | \e[32mgreen\e[1m<\e[0m>
 <\e[31m>merah\e[0m | <\e[32m\e[1m>hijau tebal\e[0m

All the cells are printed with the intended colors.

=head2 ta_pad($text, $width[, $which[, $padchar[, $truncate]]]) => STR

Return C<$text> padded with C<$padchar> to C<$width> columns. C<$which> is
either "r" or "right" for padding on the right (the default if not specified),
"l" or "left" for padding on the right, or "c" or "center" or "centre" for
left+right padding to center the text.

C<$padchar> is whitespace if not specified. It should be string having the width
of 1 column.

Does *not* handle multiline text; you can split text by C</\r?\n/> yourself.

=head2 ta_mbpad($text, $width[, $which[, $padchar[, $truncate]]]) => STR

Like ta_pad() but it uses ta_mbswidth() instead of ta_length(), so it can handle
wide characters.

=head2 ta_trunc($text, $width) => STR

Truncate C<$text> to C<$width> columns while still including all the ANSI escape
codes. This ensures that truncated text still reset colors, etc.

Does *not* handle multiline text; you can split text by C</\r?\n/> yourself.

=head2 ta_mbtrunc($text, $width) => STR

Like ta_trunc() but it uses ta_mbswidth() instead of ta_length(), so it can
handle wide characters.

=head2 ta_highlight($text, $needle, $color) => STR

Highlight the first occurence of C<$needle> in C<$text> with <$color>, taking
care not to mess up existing colors.

C<$needle> can be a string or a Regexp object.

Implementation note: to not mess up colors, we save up all color codes from the
last reset (C<\e[0m>) before inserting the highlight color + highlight text.
Then we issue C<\e[0m> and the saved up color code to return back to the color
state before the highlight is inserted. This is the same technique as described
in ta_add_color_resets().

=head2 ta_highlight_all($text, $needle, $color) => STR

Like ta_highlight(), but highlight all occurences instead of only the first.

=head1 FAQ

=head2 How do I truncate string based on number of characters?

You can simply use ta_trunc() even on text containing wide characters.
ta_trunc() uses Perl's length() which works on a per-character basis.

=head2 How do I highlight a string case-insensitively?

You can currently use a regex for the C<$needle> and use the C<i> modifier.
Example:

 use Term::ANSIColor;
 ta_highlight($text, qr/\b(foo)\b/i, color("bold red"));

=head1 SEE ALSO

L<Term::ANSIColor>

L<Text::ANSITable> uses this module. In fact, this module was first created
specifically for Text::ANSITable.

http://en.wikipedia.org/wiki/ANSI_escape_code

=head1 HOMEPAGE

Please visit the project's homepage at L<https://metacpan.org/release/Text-ANSI-Util>.

=head1 SOURCE

Source repository is at L<https://github.com/sharyanto/perl-Text-ANSI-Util>.

=head1 BUGS

Please report any bugs or feature requests on the bugtracker website L<https://rt.cpan.org/Public/Dist/Display.html?Name=Text-ANSI-Util>

When submitting a bug or request, please include a test-file or a
patch to an existing test-file that illustrates the bug or desired
feature.

=head1 AUTHOR

perlancar <perlancar@cpan.org>

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2015 by perlancar@cpan.org.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)