The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package File::CodeSearch::RegexBuilder;

# Created on: 2009-08-07 18:41:21
# Create by:  Ivan Wills
# $Id$
# $Revision$, $HeadURL$, $Date$
# $Revision$, $Source$, $Date$

use Moose;
use warnings;
use version;
use Carp;
use List::MoreUtils qw/any/;
use English qw/ -no_match_vars /;

our $VERSION = version->new('0.7.4');

has regex => (
    is  => 'rw',
);
has re => (
    is  => 'ro',
    isa => 'ArrayRef',
);
has whole => (
    is  => 'ro',
    isa => 'Bool',
);
has all => (
    is  => 'ro',
    isa => 'Bool',
);
has words => (
    is  => 'ro',
    isa => 'Bool',
);
has ignore_case => (
    is  => 'ro',
    isa => 'Bool',
);
has files => (
    is  => 'rw',
    isa => 'HashRef',
    default => sub{{}},
);
has current_file => (
    is  => 'rw',
);
has current_count => (
    is  => 'rw',
    isa => 'Int',
    default => 0,
);
has sub_matches => (
    is  => 'rw',
    isa => 'ArrayRef[Str]',
    default => sub{[]},
);
has sub_match => (
    is  => 'rw',
    isa => 'Bool',
);
has sub_not_matches => (
    is  => 'rw',
    isa => 'ArrayRef[Str]',
    default => sub{[]},
);
has sub_not_match => (
    is  => 'rw',
    isa => 'Bool',
);
has last => (
    is  => 'rw',
    isa => 'ArrayRef[Str]',
);
has lasts => (
    is  => 'rw',
    isa => 'HashRef[Str]',
    default => sub{{}},
);
has smart => (
    is  => 'rw',
    isa => 'Bool',
);

sub make_regex {
    my ($self) = @_;
    return $self->regex if ref $self->regex eq 'Regexp';

    my $re;
    my $words = $self->re;

    my $start = shift @{ $words };
    return $self->regex(qr//) if !defined $start;

    if (!any {$start eq $_} qw/n b ss/) {
        unshift @{ $words }, $start;
        undef $start;
    }

    if ($self->whole) {
        @{$words} = map { "\\b$_\\b" } @{$words};
    }

    if ($self->all) {
        if (@{ $words } == 2 ) {
            $re = "$words->[0].*$words->[1]|$words->[1].*$words->[0]";
        }
        else {
            $re = join ' ', @$words;
        }
    }
    elsif ( $self->words ) {
        $re = join '.*', @{ $words };
    }
    else {
        $re = join ' ', @{ $words };
    }

    if ($self->ignore_case) {
        $re = "(?i:$re)";
    }

    $re =
          !defined $start ? $re
        : $start eq 'n'   ? "function(?:&?\\s+|\\s+&?\\s*)$re|$re\\s+=\\s+function"
        : $start eq 'b'   ? "sub\\s+$re"
        :                   "class\\s+$re";

    return $self->regex(qr/$re/);
}

sub match {
    my ($self, $line) = @_;
    my $re = $self->make_regex;

    $self->check_sub_matches($line);
    $self->check_lasts($line);

    my ($match) = $line =~ /($re)/;

    if (defined $match) {
        $self->current_count( $self->current_count + 1 );
    }

    return $match;
}

sub check_sub_matches {
    my ($self, $line) = @_;
    my $matches = $self->sub_matches;
    my $match = 0;
    my $not_matches = $self->sub_not_matches;
    my $not_match = 0;

    return if $self->sub_match;
    return if $self->sub_not_match;

    for my $match_re (@$matches) {
        $match = $line =~ /$match_re/;
        last if $match;
    }

    $self->sub_match($match);

    for my $not_match_re (@$not_matches) {
        $not_match = $line =~ /$not_match_re/;
        last if $not_match;
    }

    $self->sub_not_match($not_match);

    return;
}

sub check_lasts {
    my ($self, $line) = @_;

    if ($self->last) {
        for my $last (@{ $self->last }) {
            my ($match) =
                  $last eq 'function' ? $line =~ /function \s+ (?: & \s*)? ([\w-]+)/xms
                : $last eq 'class'    ? $line =~ /class \s+ ([\w-]+)/xms
                : $last eq 'sub'      ? $line =~ /sub \s+ ([\w-]+)/xms
                :                       $line =~ /$last \s+ ([\w-]+)/xms;
            $self->lasts->{$last} = $match if $match;
        }
    }

    return;
}

sub get_last_found {
    my ($self) = @_;
    my $out    = '';

    return '' if ! %{$self->lasts};

    for my $last (sort keys %{$self->lasts} ) {
        $out .= "$last " . $self->lasts->{$last} . "\n";
    }

    return $out;
}

sub reset_file {
    my ($self, $file) = @_;
    if ( $self->current_count() && $self->current_file ) {
        $self->files->{$self->current_file} = $self->current_count;
    }

    $self->sub_match(0);
    $self->sub_not_match(0);
    $self->current_count(0);
    $self->current_file($file);
    $self->lasts({});

    return;
}


1;

__END__

=head1 NAME

File::CodeSearch::RegexBuilder - Takes in various options and builds a regular expression to check lines of a file

=head1 VERSION

This documentation refers to File::CodeSearch::RegexBuilder version 0.7.4.

=head1 SYNOPSIS

   use File::CodeSearch::RegexBuilder;

   # Brief but working code example(s) here showing the most common usage(s)
   # This section will be as far as many users bother reading, so make it as
   # educational and exemplary as possible.

=head1 DESCRIPTION

=head1 ATTRIBUTES

=over 4

=item C<regex>

The compiled regex

=item C<re (ArrayRef)>

The strings to compile the regular expression from

=item C<whole (Bool)>

Makes sure each element of C<re> is matched as a whole word

=item C<all (Bool)>

Makes sure that the elements of C<re> are matched in any order (currently only two elements supported)

=item C<words (Bool)>

Match each word separated by arbitrary number of characters (default separation is one space)

=item C<ignore_case (Bool)>

Ignore case in the final regex

=item C<files (HashRef)>

Stores a count of matches in each file

=item C<current_file>

Reference to the current file being searched

=item C<current_count (Int)>

The number of matches found in the currently searched file

=item C<sub_matches (ArrayRef[Str])>

Terms to search on that the file should also contain to be considered to have matched

=item C<sub_match (Bool)>

Stores if a sub match has been found

=item C<sub_not_matches (ArrayRef[Str])>

Terms to search on that the file should not contain to be considered to have matched

=item C<sub_not_match (Bool)>

Stores if a not sub match has been found

=item C<last (ArrayRef[Str])>

A list of types to keep track of for context of a match (eg the last function, class or sub)

=item C<lasts (HashRef[Str])>

The current state of requested "last" types

=item C<smart (Bool)>

Create smart regular expression

=back

=head1 SUBROUTINES/METHODS

=head2 C<make_regex ()>

=head2 C<match ($line)>

=head2 C<sub_matches ($line)>

=head2 C<reset_file ( $file )>

Resets file based counters and adds $file as the new file being processed

=head2 C<check_sub_matches ( $line )>

Checks that $line matches any specified sub matches

=head2 C<check_lasts ( $line )>

Checks if the line matches a block start signature eg checks if we are starting
a sub, function or class so that any matches in that block can be identified as
coming from there.

=head2 C<get_last_found ()>

Returns the last match block

=head1 DIAGNOSTICS

=head1 CONFIGURATION AND ENVIRONMENT

=head1 DEPENDENCIES

=head1 INCOMPATIBILITIES

=head1 BUGS AND LIMITATIONS

There are no known bugs in this module.

Please report problems to Ivan Wills (ivan.wills@gmail.com).

Patches are welcome.

=head1 AUTHOR

Ivan Wills - (ivan.wills@gmail.com)

=head1 LICENSE AND COPYRIGHT

Copyright (c) 2009 Ivan Wills (14 Mullion Close, Hornsby Heights, NSW Australia 2077).
All rights reserved.

This module is free software; you can redistribute it and/or modify it under
the same terms as Perl itself. See L<perlartistic>.  This program is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE.

=cut