The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl
##############################################################################
# Tool for using regular expressions against the contents of files in a tar
# archive.  See 'ptargrep --help' for more documentation.
#

use strict;
use warnings;

use Pod::Usage   qw(pod2usage);
use Getopt::Long qw(GetOptions);
use Archive::Tar qw();
use File::Path   qw(mkpath);

my(%opt, $pattern);

if(!GetOptions(\%opt,
    'basename|b',
    'ignore-case|i',
    'list-only|l',
    'verbose|v',
    'help|?',
)) {
    pod2usage(-exitval => 1,  -verbose => 0);
}


pod2usage(-exitstatus => 0, -verbose => 2) if $opt{help};

pod2usage(-exitval => 1,  -verbose => 0,
    -message => "No pattern specified",
) unless @ARGV;
make_pattern( shift(@ARGV) );

pod2usage(-exitval => 1,  -verbose => 0,
    -message => "No tar files specified",
) unless @ARGV;

process_archive($_) foreach @ARGV;

exit 0;


sub make_pattern {
    my($pat) = @_;

    if($opt{'ignore-case'}) {
        $pattern = qr{(?im)$pat};
    }
    else {
        $pattern = qr{(?m)$pat};
    }
}


sub process_archive {
    my($filename) = @_;

    _log("Processing archive: $filename");
    my $next = Archive::Tar->iter($filename);
    while( my $f = $next->() ) {
        next unless $f->is_file;
        match_file($f) if $f->size > 0;
    }
}


sub match_file {
    my($f)   = @_;
    my $path = $f->name;
    my $prefix = $f->prefix;
    if (defined $prefix) {
        $path = File::Spec->catfile($prefix, $path);
    }

    _log("filename: %s  (%d bytes)", $path, $f->size);

    my $body = $f->get_content();
    if($body !~ $pattern) {
        _log("  no match");
        return;
    }

    if($opt{'list-only'}) {
        print $path, "\n";
        return;
    }

    save_file($path, $body);
}


sub save_file {
    my($path, $body) = @_;

    _log("  found match - extracting");
    my($fh);
    my($dir, $file) = $path =~ m{\A(?:(.*)/)?([^/]+)\z};
    if($dir and not $opt{basename}) {
        _log("  writing to $dir/$file");
        $dir =~ s{\A/}{./};
        mkpath($dir) unless -d $dir;
        open $fh, '>', "$dir/$file" or die "open($dir/$file): $!";
    }
    else {
        _log("  writing to ./$file");
        open $fh, '>', $file or die "open($file): $!";
    }
    print $fh $body;
    close($fh);
}


sub _log {
    return unless $opt{verbose};
    my($format, @args) = @_;
    warn sprintf($format, @args) . "\n";
}


__END__

=head1 NAME

ptargrep - Apply pattern matching to the contents of files in a tar archive

=head1 SYNOPSIS

  ptargrep [options] <pattern> <tar file> ...

  Options:

   --basename|-b     ignore directory paths from archive
   --ignore-case|-i  do case-insensitive pattern matching
   --list-only|-l    list matching filenames rather than extracting matches
   --verbose|-v      write debugging message to STDERR
   --help|-?         detailed help message

=head1 DESCRIPTION

This utility allows you to apply pattern matching to B<the contents> of files
contained in a tar archive.  You might use this to identify all files in an
archive which contain lines matching the specified pattern and either print out
the pathnames or extract the files.

The pattern will be used as a Perl regular expression (as opposed to a simple
grep regex).

Multiple tar archive filenames can be specified - they will each be processed
in turn.

=head1 OPTIONS

=over 4

=item B<--basename> (alias -b)

When matching files are extracted, ignore the directory path from the archive
and write to the current directory using the basename of the file from the
archive.  Beware: if two matching files in the archive have the same basename,
the second file extracted will overwrite the first.

=item B<--ignore-case> (alias -i)

Make pattern matching case-insensitive.

=item B<--list-only> (alias -l)

Print the pathname of each matching file from the archive to STDOUT.  Without
this option, the default behaviour is to extract each matching file.

=item B<--verbose> (alias -v)

Log debugging info to STDERR.

=item B<--help> (alias -?)

Display this documentation.

=back

=head1 COPYRIGHT

Copyright 2010 Grant McLean E<lt>grantm@cpan.orgE<gt>

This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.

=cut