The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl

=begin metadata

Name: pr
Description: convert text files for printing
Author: Clinton Pierce, clintp@geeksalad.org
License: perl

=end metadata

=cut


#
# pr -- print formatter
#
# Notes and ToDo's:
#   There are probably bugs in the option processing.
#   -e and -i are not implemented.  I didn't understand the description,
#       didn't see the point, and was tired of looking at it.
#   Some implementations of pr(1) (notably AIX's) have a column-smart folding
#       built in.  Not implemented here, but would be nice.
#
#   The version I tested against seemed to adjust page-length automagically
#   according to screen-length if output was to a tty.  Cute and usable, but
#   not implemented.
#   The BSD manual says that input formfeed characters cause a page-feed.
#       I could not reliably get this to happen with the BSD code to test,
#       so it's left unimplemented, but it shouldn't be hard--I just didn't
#       know what it should look like.
#

use Data::Dumper;
use FileHandle;

use strict;

my $length=66;		# Total page length   (Int)
my $trailer=1;		# 5 lines of header; 5 lines of trailer (Bool)
my $multimerge=0;		# side-by-side files  (Bool)
my $columns=1;		# number of columns   (Int)
my $pagewidth=0; 		# Default page width; to start  (Int)
my $offsetspaces=0;	# chars at beginning of line (Char/Bool)
my $doublespace=0;		# whether to double space
my $number=0;		# number the lines; how high?  (Int/Bool)
my $startpageno=1;		# starting page no
my $header="";		# optional header text (String/Bool)
my $formfeed=0;		# Use formfeeds instead of spaces. (Bool)
my $quietskip=0;		# Ignore unopened files (Bool)
my $column_sep="";		# specified column separator	(Bool/Char)
my $roundrobin=0;		# across not down.  STR   (Bool)

# Constants that are useful
my $trailerlength=5;
my $numberchar="\t";

my $curfile="";
my(@FINFO, @COLINFO);

#
# Process the arguments by hand because of pre-getopt nonsense like "-2",
# "-s-" and "-n+5".  Grr...
#
OPTION:
while (@ARGV && $ARGV[0] =~ /^-(.+)/ && (shift, ($_ = $1), 1)) {
    next OPTION unless length;

    # Lousy options
    if (s/^[ei]//) {
	shift;      # Skipped, accepted for comptability
	redo OPTION;
    }
    if (s/^s(.)//) {
	$column_sep=$1;
	redo OPTION;
    }
    if (s/^n(\D)?(\d+)//) {
	$number=$2;
	$numberchar=$1;
	$numberchar="\t" if (! $numberchar);
	redo OPTION;
    }


    # Simple flags
    if (s/^m//) { warn "-m flag already set" if $multimerge++; redo OPTION; }
    if (s/^s//) { warn "-a flag already set" if $roundrobin++; redo OPTION; }
    if (s/^d//) { warn "-d flag already set" if $doublespace++; redo OPTION; }
    if (s/(^F)//i) { warn "-$1 flag already set" if $formfeed++; redo OPTION; }
    if (s/^r//) { warn "-r flag already set" if $quietskip++; redo OPTION; }
    if (s/^t//) {
	warn "-t flag already set" unless ( $trailer);
	$trailer=0;
	redo OPTION;
    }

    # normal "-opt value" arguments
    if (s/^w//) {
	$pagewidth=shift;
	redo OPTION;
    }

    if (s/^o//) {
	warn "-o option already used" if $offsetspaces;
	$offsetspaces=shift;
	redo OPTION;
    }

    if (s/^l//) {
	$length=shift;
	redo OPTION;
    }

    if (s/^h//) {
	warn "-h option already used" if $header;
	$header=shift;
	redo OPTION;
    }

    # Accept -2, -3, etc...
    if (s/^(\d.*)//) {
        $columns = $1;
        next OPTION;
    }

    usage("unexpected option: -$_");
}

# One more option
if (@ARGV and $ARGV[0]=~/^\+(\d+)/) {
	$startpageno=$1;
	shift @ARGV;
}

if (! $column_sep) {
	$pagewidth=72;
} else {
	$pagewidth=512;
}


#
# Initialize file and column structures
#
my @files;
my $pageno=$startpageno;

foreach(1..$columns) {
	push(@COLINFO, &create_col);
}
foreach(@ARGV) {
	my $fh=new FileHandle "$_", "r";
	if (! $fh) {
		next if ($quietskip);
		die "$0: Can't open $_";
	}
	push(@FINFO, {  name  => $_,
			handle=> $fh,
			lineno=> 0,
			});
}
#
# MAIN
#
if ($roundrobin) {   # Across the columns fill, pagebreak on EOF or end
	foreach my $fstruct (@FINFO) {
NEXTREAD:	while(! $$fstruct{handle}->eof) {
			foreach my $col (@COLINFO) {
				if (! &fill_column_1($col, $fstruct)) {
					&printpage;
					next NEXTREAD;
				}
			}
		}
		&printpage;
	}
} elsif ($multimerge) {	# Down the columns, one file per column
	while (&stillhavefiles) {
		my $i=0;
		foreach my $col (@COLINFO) {
			my $fstruct=$FINFO[$i];
			if (! $$fstruct{handle}->eof) {
				fill_column($col, $fstruct);
			}
			$i++;
		}
		&printpage;
	}
} else {	# Down the columns fill, pagebreak on EOF or end.
NEXTFILE: foreach my $fstruct (@FINFO) {
		while(!$$fstruct{handle}->eof) {
			foreach my $col (@COLINFO) {
				if (! &fill_column($col, $fstruct)) {
					&printpage;
					next NEXTFILE;
				}
			}
			&printpage;
		}
	}
}

exit 0;

sub usage {
	print STDERR @_, "\n";
	print STDERR <<USAGE;
$0 [-columns] [+page} [-adFfrts] [-n[char][count]] [-schar] [-ei] [-w width]
	[-o count] [-l length] [-h text] files
$0 -m [+page} [-adFfrts] [-n[char][count]] [-schar] [-ei] [-w width]
	[-o count] [-l length] [-h text] files
USAGE
	exit 1;
}

#
# FUNCTIONS
#
sub create_col  {
	my($col)=@_;

	my $pagelen=$length-($trailerlength*2);
	if($doublespace) {
		$pagelen=($pagelen%2 == 0)?$pagelen/2:int($pagelen/2)+1;
	}
	return({ height => $pagelen,
		 oheight=> $pagelen,
		 cfile =>"",
		 text => [],
		});
}
sub stillhavefiles {
	my $eof=0;
	foreach my $fstruct (@FINFO) {
		if (! $$fstruct{handle}->eof) {
			$eof++;
		}
	}
	return $eof;
}

sub fill_column {
	my($col,$fstruct)=@_;	# Column structure, and a filehandle

	while( $$col{height} ){
		if (! $$fstruct{handle}->eof) {
			fill_column_1($col, $fstruct);
		} else {
			return;
		}
	}
	return 1;
}
sub fill_column_1 {
	my($col, $fstruct)=@_;

	return if (! $$col{height});
	my $foo=$$fstruct{handle};  # Couldn't read it directly.  Syntax?
	my $line=<$foo>;
	chomp $line;

	# BSD Manual LIES.  Formfeeds are treated strangely, but experimentation
	# shows they don't do much.  At least in /usr/xpgs/bin/pr under Solaris,
	# which claims to be POSIX, which should work the same as BSD, no?.
	$line=~s/\f//g;

	$$col{cfile}=$$fstruct{name};    # In multi-merge, this is useless.
	push(@{$$col{text}}, { text => $line, lineno => ++$$fstruct{lineno} });
	$$col{height}--;

	return 1;
}


sub print_header {
	my($col)=@_;		# the current column.
	return if (!$trailer);

	print "\n\n", scalar(localtime), " ";
	if ($header) {
		print "$header ";
	} else {
		if (! $multimerge) {
			if ($curfile ne $$col{cfile}) {
				$pageno=$startpageno;
				$curfile=$$col{cfile};
			}
			print $$col{cfile}, " ";
		}
	}
	print "Page ", $pageno++, "\n\n\n";
}

sub print_footer {
	return if (!$trailer);
	if ($formfeed) {print chr(12); } else { print "\n"x5;}
}

#
# Most of the horizontal output-options are exercised here.
#
sub printpage {

	# option -o does not factor here.
	my $colwidth=$pagewidth/scalar(@COLINFO);
	if ($number) {
		$colwidth-=(length($numberchar)+$number);
	}

	print_header($COLINFO[0]);
	foreach my $line (1..$COLINFO[0]{oheight}) {
		print "O"x${offsetspaces} if ($offsetspaces);
		foreach my $column (@COLINFO) {
			my $pfmt;
			my $numbering="";
			next if (! exists $$column{text}[$line-1]{lineno});
			if ($number) {
				$pfmt="%${number}s";
				$numbering=sprintf("$pfmt", $$column{text}[$line-1]{lineno});
				# Truncate off left hand side
				$numbering=substr($numbering, length($numbering)-$number, $number);
			}
			print $numbering;
			print $numberchar if ($number);

			if (!$column_sep) {
				$pfmt="%-${colwidth}s";
				printf("$pfmt", $$column{text}[$line-1]{text});
			} else {
				print $$column{text}[$line-1]{text}, $column_sep;
			}
		}
		print "\n"x($doublespace+1);
	}
	print_footer;

	# Re-create blank page.
	@COLINFO=();
	foreach(1..$columns) { push(@COLINFO, &create_col); }
}

=pod

=head1 NAME

pr - convert text files for printing

=head1 SYNOPSIS

C<pr [-columns] [+page} [-adFfrts] [-n[char][count]] [-schar] [-ei] [-w width]>
	C<[-o count] [-l length] [-h text] files>

C<pr -m [+page} [-adFfrts] [-n[char][count]] [-schar] [-ei] [-w width]>
	C<[-o count] [-l length] [-h text] files>

=head1 DESCRIPTION

I<pr> formats text into fixed-length pages with headers, multiple columns and
other options suitable for hardcopy output.

=head2 OPTIONS

I<pr> accepts the following option:

=over 4

=item -columns

Causes I<pr> to print text in columns down the page.  Note that I<pr> does not attempt
to fold lines which are too long to fit in their column and long lines and control characters
will affect output.  Column width decreases as the number of columns goes up.  I<-w>
should be used to make the page wider.

=item +page

Begin numbering pages at page number I<page>.

=item -a

Multiple columns are filled from left-to-right down the page.

=item -m

Each input file is given a column in the output, and printing continues until
all input files are exhausted.

=item -d

Output is double-spaced

=item -f

Formfeed characters are used instead of trailing blank lines to control
page length.  I<-F> is a synonym for I<-f>.

=item -r

Do not print a message if files cannot be opened

=item -t

Suppress the 5-line header and 5-line trailer on each page

=item -sB<char>

The single character B<char> will be used to separate multi-column output.
Note that the columns are no-longer fixed-width with this option.

=item -n[B<char>[B<count>]]

Precede each column with a line number.  B<count> is the desired width of
the line numbering (numbers too large are truncated).  B<char> can be used to
separate numbers from the output line, tab is used if B<char> is not specified.

=item -e

Not implemented in this version, accepted for comptability.

=item -i

Not implemented in this version, accepted for comptability.

=item -w B<width>

Width of the page, in characters.  Note that excessively long lines are not folded or
truncated--they simply keep printing.  This may foul column alignment.  Defaults to 72.

=item -l B<length>

Length of the page, in lines.  Defaults to 66.

=item -o B<count>

Offset each line with a left margin B<count> characters wide.  This is in addition to
the width specified with the I<-w> option.

=item -h B<text>

Use B<text> in the header for each file, instead of the file name

=back

=head1 BUGS

The I<-e> and I<-i> switches are not implemented in this version.

Input which contains carriage returns, or other ASCII control characters will
affect the alignment of columns, and may affect page-length counts.  This includes
overprinting and backspaces.

Excessively long lines will cause I<pr> to mis-align columns in multi-column
output.

=head1 AUTHOR

The Perl implementation of I<pr> was written by Clinton Pierce, I<clintp@geeksalad.org>.

=head1 COPYRIGHT and LICENSE

This program is Copyright 1999, by Clinton Pierce.

Freely redistributable under the Perl Artistic License.

=cut