The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl -w

# The standard fold(1), implemented in Perl
#   copyright 1999, Clinton A. Pierce
# Freely redistributable under the Perl Artistic License

# Severely hacked at by Tom Christiansen:
#   code reformatting & rearranging, simplification, beautification
#   added pragmata
#   "screaming" code
#   argument parsing
#   pod documentation

use strict;
use locale;             # for what a space is.

my(
    $Byte_Only,         # if they said -b, ignore tabs etc
    $Space_Break,       # they said -s, so look for white space breaks
    $Width,             # screen size
    $Tabstop,           # tab stops are every Tabstop
);

END {
    close STDOUT            || die "$0: can't close stdout: $!\n";
    $? = 1 if $? == 255;    # from die
} 

$Tabstop  =  8;   # sane tab stops
$Width    = 80;   # default screen size

sub usage {    
    warn "$0: @_\n" if @_;
    die <<USAGE
$0 [-bs] [-w I<width> | -<width>] args
    -s         split lines on whitespace where possible
    -b         count bytes, not characters
    -w WIDTH   maximum length of lines on output
    -WIDTH     maximum length of lines on output (archaic form)
USAGE
}

# do this by hand, because we don't like $opt_80, $opt_132, etc.
# and we want to check for dups.  --tchrist
OPTION:
while (@ARGV && $ARGV[0] =~ /^-(.+)/ && (shift, ($_ = $1), 1)) {

    next OPTION unless length;

    if (s/^b//) { 
        warn "-b flag already set" if $Byte_Only++;
        redo OPTION;
    }

    if (s/^s//) { 
        warn "-s flag already set" if $Space_Break++;
        redo OPTION;
    }

    # historical practice makes -72 and -w 72 the same
    if (s/^(\d.*)// || s/^w(.*)//) { 
        $Width = $1 || shift; 
        next OPTION;
    }

    usage("unexpected option: -$_");
} 

unless ($Width && $Width =~ /^\d+$/) { 
    usage("illegal width value `$Width'");
}
if ($Space_Break && $Width < $Tabstop) {
    usage("width must be greater than $Tabstop with the -s option");
}

unshift(@ARGV, '-') unless @ARGV;
for (@ARGV) {
    fold_file($_);
}

exit;

########

# If we are not in byte-only mode, we have to calculate
# the new column based on the spec.  This is superslow.
sub adj_col {
    my($col, $char) = @_;

    die "XXX: called while byte count set" if $Byte_Only;

    # algorithm from BSD fold  --tchrist
    if    ($char eq "\b") { $col-- if $col } 
    elsif ($char eq "\r") { $col = 0; } 
    elsif ($char eq "\t") { $col += $Tabstop - ($col % $Tabstop) } 
    else                  { $col++ }

    return $col;
}

# run fold on a given file
sub fold_file { 
    my($filename) = @_;
    my($column, $char, $output);

    $column = 0; 
    open(INPUT, $filename) || die "Cannot open $filename: $!\n"; 

    # the following hack allows us to dispense with the 
    # slow getc() and the hairy adj_col() code because we
    # don't care about \t and \b anymore. This small adjustment
    # provides a screaming 3,000% speedup, so seems worth it!
    #   --tchrist

    if ($Byte_Only) { 
        my $soft  = "(.{0,$Width})(?=\b.)";   # XXX: \b != \s
        my $hard  = "(.{$Width})(?=.)";
        if ($Space_Break) {
            while (<INPUT>) {
              (s/$soft//o || s/$hard//o), print "$1\n" while length > $Width;
              print;
            }
        } else {
            s/$hard/$1\n/go, print while <INPUT>;   # SCREAM
        }
        close(INPUT) || die "can't close $filename: $!";
        return;
    }

CHAR:   # bytewise processing.  The horror! The horror!
    while (defined($char = getc(INPUT))) {
    
        if ($char eq "\n") {
            print $output, "\n";
            $output = "";
            $column = 0;
            next CHAR;
        }

ADJUST: {
        $column = adj_col($column, $char);
        if ($column > $Width) {     
            if ($Space_Break) {
                for (my $i = length($output); $i >= 0; $i--) {
                    if (substr($output, $i, 1) =~ /\s/) {
                        print substr($output, 0, $i+1), "\n";
                        $output = substr($output, $i+1);
                        for ($column = $i = 0; $i < length($output); $i++) {
                            $column = adj_col($column, substr($output, $i, 1));
                        }
                        redo ADJUST;
                    } 
                }
                print $output, "\n";
                $output = "";  
                $column = 0;
                redo ADJUST;
            } else {
                print "$output\n";
                $output = $char;
                $column = adj_col(0, $char);
            }
        } else {
            $output .= $char;
        }
      }  # ADJUST goto
    }
    close(INPUT) || die "can't close $filename: $!";
}

__END__

=head1 NAME

fold - wrap each input line to fit specified width

=head1 SYNOPSIS

B<fold> [B<-bs>] [B<-w> I<width>] [I<file> ...]

=head1 DESCRIPTION

The I<fold> command reads lines from the specified files (or standard
input if none are specified) and writes them to the standard output with
newlines inserted into lines longer than the specified column width.

The default column width is 80, but this may be overridden using the
B<-w> flag.  For historical reasons, the width may be specified directly,
as in C<fold -72>, omitting the B<-w>.

The B<-s> flag causes breaks to occur after whitespace rather than in
the middle of a word.  This produces a ragged right edge, but is much
nicer to look at.

The B<-b> flag makes the program ignore embedded backspaces, tabs, and
carriage returns when deciding where to split.  This makes it run about
thirty times faster.  You might want to get used to using B<-b>.

Current locale settings will be honored in determining what 
is meant by "whitespace" and "word characters".

=head1 BUGS

POSIX 1003.2 states that a newline will never be inserted
immediately before or after a backspace or a carriage return,
but this is not checked for.

=head1 SEE ALSO

expand(1), fmt(1)

=head1 AUTHORS

Clinton Pierce and Tom Christiansen.

This code is freely modifiable and freely redistributable under Perl's
Artistic License.