The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# Copyright (C) 2016-2017 Guido Flohr <guido.flohr@cantanea.com>,
# all rights reserved.

# This file is distributed under the same terms and conditions as
# Perl itself.

# This next lines is here to make Dist::Zilla happy.
# ABSTRACT: Perl Globstar (double asterisk globbing) and utils

package File::Globstar;
$File::Globstar::VERSION = '0.5';
use strict;

use Locale::TextDomain qw(File-Globstar);
use File::Glob qw(bsd_glob);
use Scalar::Util 1.21 qw(reftype);
use File::Find;

use base 'Exporter';
use vars qw(@EXPORT_OK);
@EXPORT_OK = qw(globstar fnmatchstar translatestar quotestar pnmatchstar);

use constant RE_NONE => 0x0;
use constant RE_NEGATED => 0x1;
use constant RE_FULL_MATCH => 0x2;
use constant RE_DIRECTORY => 0x4;

# Remember what Scalar::Util::reftype() returns for a compiled regular
# expression.  It should normally be 'REGEXP' but with Perl 5.10 (or
# maybe older) this seems to be an empty string.  In this case, the
# check in pnmatchstar() whether it received a compiled regex will be
# rather weak ...
my $test_re = qr/./;
my $regex_type = reftype $test_re;

sub _globstar;
sub pnmatchstar;

sub empty($) {
    my ($what) = @_;

    return if defined $what && length $what;

    return 1;
}

sub _find_files($) {
    my ($directory) = @_;

    my $empty = empty $directory;
    $directory = '.' if $empty;

    my @hits;
    File::Find::find sub {
        return if -d $_;
        return if '.' eq substr $_, 0, 1;
        push @hits, $File::Find::name;
    }, $directory;

    if ($empty) {
        @hits = map { substr $_, 2 } @hits;
    }

    return @hits;
}

sub _find_directories($) {
    my ($directory) = @_;

    my $empty = empty $directory;
    $directory = '.' if $empty;

    my @hits;
    File::Find::find sub {
        return if !-d $_;
        return if '.' eq substr $_, 0, 1;
        push @hits, $File::Find::name;
    }, $directory;

    if ($empty) {
        @hits = map { substr $_, 2 } @hits;
    }

    return @hits;
}

sub _find_all($) {
    my ($directory) = @_;

    my $empty = empty $directory;
    $directory = '.' if $empty;

    my @hits;
    File::Find::find sub {
        return if '.' eq substr $_, 0, 1;
        push @hits, $File::Find::name;
    }, $directory;

    if ($empty) {
        @hits = map { substr $_, 2 } @hits;
    }

    return @hits;
}

sub _globstar($$;$) {
    my ($pattern, $directory, $flags) = @_;

    $directory = '' if !defined $directory;
    $pattern = $_ if !@_;

    if ('**' eq $pattern) {
        return _find_all $directory;
    } elsif ('**/' eq $pattern) {
        return map { $_ . '/' } _find_directories $directory;
    } elsif ($pattern =~ s{^\*\*/}{}) {
        my %found_files;
        foreach my $directory ('', _find_directories $directory) {
            foreach my $file (_globstar $pattern, $directory, $flags) {
                $found_files{$file} = 1;
            }
        }
        return keys %found_files;
    }

    my $current = $directory;
 
    # This is a quotemeta() that does not escape the slash and the
    # colon.  Escaped slashes confuse bsd_glob() and escaping colons
    # may make a full port to Windows harder.
    $current =~ s{([\x00-\x2e\x3b-\x40\x5b-\x60\x7b-\x7f])}{\\$1}g;
    if ($directory ne '' && '/' ne substr $directory, -1, 1) {
        $current .= '/';
    }
    while ($pattern =~ s/(.)//s) {
        if ($1 eq '\\') {
            $pattern =~ s/(..?)//s;
            $current .= $1;
        } elsif ('/' eq $1 && $pattern =~ s{^\*\*/}{}) {
            $current .= '/';

            # Expand until here.
            my @directories = bsd_glob $current, $flags;

            # And search in every subdirectory;
            my %found_dirs;
            foreach my $directory (@directories) {
                $found_dirs{$directory} = 1;
                foreach my $subdirectory (_find_directories $directory) {
                    $found_dirs{$subdirectory . '/'} = 1;
                }
            }

            if ('' eq $pattern) {
                my %found_subdirs;
                foreach my $directory (keys %found_dirs) {
                    $found_subdirs{$directory} = 1;
                    foreach my $subdirectory (_find_directories $directory) {
                        $found_subdirs{$subdirectory . '/'} = 1;
                    }
                }
                return keys %found_subdirs;
            }
            my %found_files;
            foreach my $directory (keys %found_dirs) {
                foreach my $hit (_globstar $pattern, $directory, $flags) {
                    $found_files{$hit} = 1;
                }
            }
            return keys %found_files;
        } elsif ('**' eq $pattern) {
            my %found_files;
            foreach my $directory (bsd_glob $current, $flags) {
                $found_files{$directory . '/'} = 1;
                foreach my $file (_find_all $directory) {
                    $found_files{$file} = 1;
                }
            }
            return keys %found_files;
        } else {
            $current .= $1;
        }
    }

    # Pattern without globstar.  Just return the normal expansion.
    return bsd_glob $current, $flags;
}

sub globstar {
    my ($pattern, $flags) = @_;

    # The double asterisk can only be used in place of a directory.
    # It is illegal everywhere else.
    my @parts = split /\//, $pattern;
    foreach my $part (@parts) {
        $part ne '**' and 0 <= index $part, '**' and return;
    }

    return _globstar $pattern, '', $flags;
}

sub quotestar {
    my ($string, $listmatch) = @_;

    $string =~ s/([\\\[\]*?])/\\$1/g;
    $string =~ s/^!/\\!/ if $listmatch;
    
    return $string;
}

sub _transpile_range($) {
    my ($range) = @_;

    # Strip-off enclosing brackets.
    $range = substr $range, 1, -2 + length $range;

    # Replace leading exclamation mark with caret.
    $range =~ s/^!/^/;

    # Backslashes escape inside Perl ranges but not in ours.  Escape them:
    $range =~ s/\\/\\\\/g;

    # Quote dots and equal sign to prevent Perl from interpreting 
    # equivalence and collating classes.
    $range =~ s/\./\\\./g;
    $range =~ s/\=/\\\=/g;
    
    return "[$range]";
}

sub translatestar {
    my ($pattern, %options) = @_;

    die __x("invalid pattern '{pattern}'\n", pattern => $pattern)
        if $pattern =~ m{^/+$};

    my $blessing = RE_NONE;

    if ($options{pathMode}) {
        $blessing |= RE_NEGATED if $pattern =~ s/^!//;
        $blessing |= RE_DIRECTORY if $pattern =~ s{/$}{};
        $blessing |= RE_FULL_MATCH if $pattern =~ m{/};
        $pattern =~ s{^/}{};
    }

    # xgettext doesn't parse Perl code in regexes.
    my $invalid_msg = __"invalid use of double asterisk";

    $pattern =~ s
                {
                    (.*?)               # Anything, followed by ...
                    (  
                       \\.              # escaped character
                    |                   # or
                       \A\*\*(?=/)      # leading **/
                    |                   # or
                       /\*\*(?=/|\z)    # /**/ or /** at end of string
                    |                   # or
                      \*\*.             # invalid
                    |                   # or
                      .\*\*             # invalid
                    |                   # or
                       \.               # a dot
                    |                   # or
                       \*               # an asterisk
                    |
                       \?               # a question mark
                    |
                       \[               # opening bracket
                       !?
                       \]?              # possible (literal) closing bracket
                       (?:
                       \\.              # escaped character
                       |
                       \[:[a-z]+:\]     # character class
                       |
                       [^\\\]]+         # non-backslash or closing bracket
                       )+
                       \]
                    )?
                }{
                    my $translated = quotemeta $1;
                    if ('\\' eq substr $2, 0, 1) {
                        $translated .= quotemeta substr $2, 1, 1;
                    } elsif ('**' eq $2) {
                        $translated .= '.*';
                    } elsif ('/**' eq $2) {
                        $translated .= '(?:/.*)?';
                    } elsif ('.' eq $2) {
                        $translated .= '\\.';
                    } elsif ('*' eq $2) {
                        $translated .= '[^/]*';
                    } elsif ('?' eq $2) {
                        $translated .= '[^/]';
                    } elsif ('[' eq substr $2, 0, 1) {
                        $translated .= _transpile_range $2;
                    } elsif (length $2) {
                        if ($2 =~ /\*\*/) {
                            die $invalid_msg;
                        }
                        die "should not happen: $2"; 
                    }
                    $translated;
                }gsex;

    my $re = $options{ignoreCase} ? qr/^$pattern$/i : qr/^$pattern$/;

    bless $re, $blessing;
}

sub fnmatchstar {
    my ($pattern, $string, %options) = @_;

    my $transpiled = eval { translatestar $pattern, %options };
    return if $@;

    $string =~ $transpiled or return;

    return 1;
}

sub pnmatchstar {
    my ($pattern, $string, %options) = @_;

    $options{isDirectory} = 1 if $string =~ s{/$}{};

    my $full_path = $string;

    # Check whether the regular expression is compiled.  
    # (ref $pattern) may be false here because it can be 0.
    my $reftype = reftype $pattern;
    unless (defined $reftype && $regex_type eq $reftype) {
        $pattern = eval { translatestar $pattern, %options, pathMode => 1 };
        return if $@;
    }

    my $flags = ref $pattern;
    $string =~ s{.*/}{} unless $flags & RE_FULL_MATCH;

    my $match = $string =~ $pattern;
    if ($flags & RE_DIRECTORY) {
        undef $match if !$options{isDirectory};        
    }

    my $negated = $flags & RE_NEGATED;

    if ($match) {
        if ($negated) {
            return;
       } else {
            return 1;
        }
    }

    if ($full_path =~ s{/[^/]*$}{}) {
        return pnmatchstar $pattern, $full_path, %options, isDirectory => 1;
    }

    return 1 if $negated;
    
    return;
}

1;