The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package File::CodeSearch;

# Created on: 2009-08-07 18:32:44
# Create by:  Ivan Wills
# $Id$
# $Revision$, $HeadURL$, $Date$
# $Revision$, $Source$, $Date$

use Moose;
use warnings;
use version;
use autodie;
use English qw/ -no_match_vars /;
use IO::Handle;
use File::chdir;
use File::TypeCategories;
use Clone qw/clone/;
use Path::Tiny;

our $VERSION = version->new('0.7.2');

has regex => (
    is       => 'rw',
    isa      => 'File::CodeSearch::RegexBuilder',
    required => 1,
);
has files => (
    is      => 'rw',
    isa     => 'File::TypeCategories',
    default => sub { File::TypeCategories->new },
);
has recurse => (
    is      => 'rw',
    isa     => 'Bool',
    default => 1,
);
has breadth => (
    is      => 'rw',
    isa     => 'Bool',
    default => 0,
);
has depth => (
    is      => 'rw',
    isa     => 'Bool',
    default => 0,
);
has quiet => (
    is      => 'rw',
    isa     => 'Bool',
    default => 0,
);
has suround_before => (
    is      => 'rw',
    isa     => 'Int',
    default => 0,
);
has suround_after => (
    is      => 'rw',
    isa     => 'Int',
    default => 0,
);
has limit => (
    is      => 'rw',
    isa     => 'Int',
    default => 0,
);
has links => (
    is       => 'rw',
    isa      => 'HashRef',
    default  => sub{{}},
    init_arg => undef,
);
has found => (
    is       => 'ro',
    isa      => 'Int',
    default  => 0,
    writer   => '_found',
    init_arg => undef,
);

sub search {
    my ($self, $search, @dirs) = @_;

    for my $dir (@dirs) {
        $self->_find($search, $dir);
    }

    return;
}

sub _find {
    my ($self, $search, $dir, $parent) = @_;
    my @files;
    $dir =~ s{/$}{};

    # check if we have a directory and we can change into it
    return if !-d $dir || !-r $dir || !-x $dir;

    {
        local $CWD = $dir;
        opendir my $dirh, '.';
        @files = sort _alpha_num grep { $_ ne '.' && $_ ne '..' } readdir $dirh;

        if ($self->breadth) {
            @files = sort _breadth @files;
        }
        elsif ($self->depth) {
            @files = sort _depth @files;
        }
    }

    $dir = $dir eq '.' ? '' : "$dir/";

    FILE:
    for my $file (@files) {
        next FILE if !$self->files->file_ok("$dir$file");
        last FILE if $self->limit && $self->found >= $self->limit;

        if (-l "$dir$file") {
            next FILE if !$self->files->symlinks;

            my $real = path("$dir$file");
            $real = $real->realpath;
            $self->links->{$real} ||= 0;

            next FILE if $self->links->{$real}++;
        }
        if (-d "$dir$file") {
            if ($self->recurse) {
                $self->_find( $search, "$dir$file", $parent || $dir );
            }
        }
        else {
            $self->search_file( $search, "$dir$file", $parent || $dir );
        }
    }

    return;
}

sub _alpha_num {
    my $a1 = $a;
    my $b1 = $b;
    $a1 =~ s/(\d+)/sprintf "%5d", $1/exms;
    $b1 =~ s/(\d+)/sprintf "%5d", $1/exms;
    return $a1 cmp $b1;
}
sub _breadth {
    return
          -f $a && -d $b ? 1
        : -d $a && -f $b ? -1
        :                                0;
}
sub _depth {
    return
          -f $a && -d $b ? -1
        : -d $a && -f $b ? 1
        :                                0;
}

sub search_file {
    my ($self, $search, $file, $parent) = @_;

    open my $fh, '<', $file or $self->_message(file => $file, $OS_ERROR) and return;

    $self->regex->reset_file;
    $self->regex->current_file($file);
    my $before_max = $self->suround_before;
    my $after_max  = $self->suround_after;
    my @before;
    my @after;
    my @lines;
    my $found = undef;
    my %args = ( codesearch => $self, before => \@before, after => \@after, lines => \@lines, parent => $parent );
    my @sub_matches;
    my $post;

    LINE:
    while ( my $line = <$fh> ) {
        if ( $self->regex->isa('File::CodeSearch::Replacer') ) {
            push @lines, $line;
        }
        if (!defined $found) {
            push @before, $line;
            shift @before if @before > $before_max + 1;
        }
        elsif ($found) {
            push @after, $line;
            if (@after > $after_max) {
                undef $found;
            }
        }

        last LINE if @{$self->regex->sub_not_matches} && $self->regex->sub_not_match;

        next LINE if !$self->regex->match($line);

        pop @before;
        pop @after if $args{last_line_no} && $fh->input_line_number - $args{last_line_no} > $after_max - 1;

        if (@{$self->regex->sub_matches}) {
            push @sub_matches, clone [ $line, $file, $fh->input_line_number, %args ];
        }
        else {
            $self->_found( $self->found + 1 );
            $post = $search->($line, $file, $fh->input_line_number, %args);
            last LINE if $self->limit && $self->found >= $self->limit;
        }

        $args{last_line_no} = $fh->input_line_number;
        @after = ();
        $found = 1;
    }

    if ( @{$self->regex->sub_matches} && $self->regex->sub_match ) {
        SUB:
        for my $args (@sub_matches) {
            $self->_found( $self->found + 1 );
            $post = $search->( @$args );
            last SUB if $self->limit && $self->found >= $self->limit;
        }
    }

    # check if the line is an after match
    if (
        $post
        || (
            @after
            && (
                ! @{$self->regex->sub_matches}
                || $self->regex->sub_match
            )
        )
    ) {
        pop @after if $args{last_line_no} && $fh->input_line_number - $args{last_line_no} > $after_max - 1;
        @before = ();
        $self->_found( $self->found + 1 );
        $search->(undef, $file, $fh->input_line_number, %args);
    }

    return;
}

sub _message {
    my ($self, $type, $name, $error) = @_;

    if ( !$self->quiet ) {
        warn "Could not open the $type '$name': $error\n";
    }

    return 1;
}

1;

__END__

=head1 NAME

File::CodeSearch - Search file contents in code repositories

=head1 VERSION

This documentation refers to File::CodeSearch version 0.7.2.

=head1 SYNOPSIS

   use File::CodeSearch;

   # Simple usage
   code_search {
       my ($file, $line) = @_;
       # do stuff
   },
   @dirs;

   # More control
   my $cs = File::CodeSearch->new();
   $cs->code_search(sub {}, @dirs);

=head1 DESCRIPTION

=head1 SUBROUTINES/METHODS

=head2 C<new ( %params )>

B<Parameters>:

=over 4

=item C<regex> - L<File::CodeSearch::RegexBuilder>

This is the object that handles the testing of individual lines in a file
and must be created with the search options desired, note you can also use
the C<F::C::Highlighter> and C<F::C::Replacer> modules interchangeably with
C<F::C::RegexBuilder>.

=item C<files> - L<File::TypeCategories>

If you desire to limit files by file type, name, symlink status pass this
object, other wise a default object will be created.

=item C<recurse> - Bool

Set to false to not recurse into sub directories.

=item C<breadth> - Bool

Changes the search order to breadth first i.e. the searching will search all
the ordinary files in a directory before searching the directories. The
default is to search directories when they are found.

=item C<depth> - Bool

Changes the search order to depth first i.e. the searching will search all the
sub directories in a directory before searching the ordinary files. The
default is to search directories when they are found. If both C<breadth> and
C<depth> are both true C<breadth> will be used.

=item C<suround_before> - Int

Specifies the maximum number of lines before a match is found that should be
passed to the searching code reference.

=item C<suround_after> - Int

Specifies the maximum number of lines after a match is found that should be
passed to the searching code reference. B<Note> the after match lines are
passed to the next matched line in a file or to a call at the end of a file
with matches.

=item C<limit> - Int

Stops matching after C<limit> matches have been found across all files that
have been searched.

=back

B<Return>: C<File::CodeSearch> - new object

B<Description>: Creates & configure a C<File::CodeSearch> object.

=head2 C<search ( $search, @dirs )>

B<Arguments>:

=over 4

=item C<$search> - code ref

Subroutine to be executed each time a match in a file is found.

The subroutine should have accept parameters as

 $search->($line, $file, $line_number, %named);

=over 4

=item C<$line> - string

The line from the file that was matched by C<regex>. If searching with
C<after> set this may be undefined when called with the lines found after
the last match at the end of the file.

=item C<$file>

The file name that the line was found in (relative to the supplied directory

=item C<$line_number>

The line number in the said file

=item C<%named>

This contains all the other helpful values

=over 4

=item C<codesearch> - C<F::CodeSearch>

The object that is doing the searching.

=item C<before> - ArrayRef

An array of lines that were found before the matched line.

=item C<after> - ArrayRef

An array of lines that were found after the last matched line.

=item C<lines> - ArrayRef

An array of lines of the file. This is only present if C<regex> is a
C<F::C::Replacer> object.

=item C<parent> - path

The parent path from @path

=back

=back

=item C<@dir> - paths

An array of the directory paths to search through.

=back

B<Return>:

B<Description>:

=head2 C<search_file ( $search, $file, $parent )>

B<Param>:

=over 4

=item C<$search> - CodeRef

See C<search> above for details.

=item C<$file> - file

A file to search through line by line

=item C<$parent> - path

The directory from @dirs which the file was found in

=back

B<Description>: Searches an individual file for matches.

=head1 DIAGNOSTICS

=head1 CONFIGURATION AND ENVIRONMENT

=head1 DEPENDENCIES

=head1 INCOMPATIBILITIES

=head1 BUGS AND LIMITATIONS

There are no known bugs in this module.

Please report problems to Ivan Wills (ivan.wills@gmail.com).

Patches are welcome.

=head1 AUTHOR

Ivan Wills - (ivan.wills@gmail.com)

=head1 LICENSE AND COPYRIGHT

Copyright (c) 2009-2011 Ivan Wills (14 Mullion Close, Hornsby Heights, NSW Australia 2077).
All rights reserved.

This module is free software; you can redistribute it and/or modify it under
the same terms as Perl itself. See L<perlartistic>.  This program is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE.

=cut