The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#========================================================================
#
# Badger::Utils
#
# DESCRIPTION
#   Module implementing various useful utility functions.
#
# AUTHOR
#   Andy Wardley   <abw@wardley.org>
#
#========================================================================

package Badger::Utils;

use strict;
use warnings;
use base 'Badger::Exporter';
use File::Path;
use Scalar::Util qw( blessed );
use Badger::Constants 'HASH PKG DELIMITER BLANK';
use Badger::Debug 
    import  => ':dump',
    default => 0;
use overload;
use constant {
    UTILS  => 'Badger::Utils',
    CLASS  => 0,
    FILE   => 1,
    LOADED => 2,
};

our $VERSION  = 0.01;
#our $DEBUG    = 0 unless defined $DEBUG;
our $ERROR    = '';
our $WARN     = sub { warn @_ };  # for testing - see t/core/utils.t
our $MESSAGES = { };
our $HELPERS  = {       # keep this compact in case we don't need to use it
    'Digest::MD5'       => 'md5 md5_hex md5_base64',
    'Scalar::Util'      => 'blessed dualvar isweak readonly refaddr reftype 
                            tainted weaken isvstring looks_like_number 
                            set_prototype',
    'List::Util'        => 'first max maxstr min minstr reduce shuffle sum',
    'List::MoreUtils'   => 'any all none notall true false firstidx 
                            first_index lastidx last_index insert_after 
                            insert_after_string apply after after_incl before 
                            before_incl indexes firstval first_value lastval 
                            last_value each_array each_arrayref pairwise 
                            natatime mesh zip uniq minmax',
    'Hash::Util'        => 'lock_keys unlock_keys lock_value unlock_value
                            lock_hash unlock_hash hash_seed',
    'Badger::Timestamp' => 'TS Timestamp Now',
    'Badger::Logic'     => 'LOGIC Logic',
};
our $DELEGATES;         # fill this from $HELPERS on demand
our $RANDOM_NAME_LENGTH = 32;
our $TEXT_WRAP_WIDTH    = 78;


__PACKAGE__->export_any(qw(
    UTILS blessed is_object numlike textlike params self_params plural 
    odd_params xprintf dotid random_name camel_case CamelCase wrap
    permute_fragments
));

__PACKAGE__->export_fail(\&_export_fail);

# looks_like_number() is such a mouthful.  I prefer numlike() to go with textlike()
*numlike = \&Scalar::Util::looks_like_number;

# it would be too confusing not to have this alias
*CamelCase = \&camel_case;


sub _export_fail {    
    my ($class, $target, $symbol, $more_symbols) = @_;
    $DELEGATES ||= _expand_helpers($HELPERS);
    my $helper = $DELEGATES->{ $symbol } || return 0;
    require $helper->[FILE] unless $helper->[LOADED];
    $class->export_symbol($target, $symbol, \&{ $helper->[CLASS].PKG.$symbol });
    return 1;
}

sub _expand_helpers {
    # invert { x => 'a b c' } into { a => 'x', b => 'x', c => 'x' }
    my $helpers = shift;
    return {
        map {
            my $name = $_;                      # e.g. Scalar::Util
            my $file = module_file($name);      # e.g. Scalar/Util.pm
            map { $_ => [$name, $file, 0] }     # third item is loaded flag
            split(DELIMITER, $helpers->{ $name })
        }
        keys %$helpers
    }
}
        
sub is_object($$) {
    blessed $_[1] && $_[1]->isa($_[0]);
}

sub textlike($) {
    !  ref $_[0]                        # check if $[0] is a non-reference
    || blessed $_[0]                    # or an object with an overloaded
    && overload::Method($_[0], '""');   # '""' stringification operator
}

sub params {
    # enable $DEBUG to track down calls to params() that pass an odd number 
    # of arguments, typically when the rhs argument returns an empty list, 
    # e.g. $obj->foo( x => this_returns_empty_list() )
    my @args = @_;
    local $SIG{__WARN__} = sub {
        odd_params(@args);
    } if DEBUG;

    @_ && ref $_[0] eq HASH ? shift : { @_ };
}

sub self_params {
    my @args = @_;
    local $SIG{__WARN__} = sub {
        odd_params(@args);
    } if DEBUG;
    
    (shift, @_ && ref $_[0] eq HASH ? shift : { @_ });
}

sub odd_params {
    my $method = (caller(2))[3];
    $WARN->(
        "$method() called with an odd number of arguments: ", 
        join(', ', map { defined $_ ? $_ : '<undef>' } @_),
        "\n"
    );
    my $i = 3;
    while (1) {
        my @info = caller($i);
        last unless @info;
        my ($pkg, $file, $line, $sub) = @info;
        $WARN->(
            sprintf(
                "%4s: Called from %s in %s at line %s\n",
                '#' . ($i++ - 2), $sub, $file, $line
            )
        );
    }
}
    

sub plural {
    my $name = shift;

    if ($name =~ /(ss|sh|ch|x)$/) {
        $name .= 'es';
    }
    elsif ($name =~ s/([^aeiou])y$//) {
        $name .= $1.'ies';
    }
    elsif ($name =~ /([^s\d\W])$/) {
        $name .= 's';
    }
    return $name;
}

sub module_file {
    my $file = shift;
    $file  =~ s[::][/]g;
    $file .= '.pm';
}

sub xprintf {
    my $format = shift;
    my @args   = @_;
    $format =~ 
        s{ < (\d+) 
             (?: :( [#\-\+ ]? [\w\.]+ ) )?
             (?: \| (.*?) )?
           > 
         }
         {   defined $3
                ? _xprintf_ifdef(\@args, $1, $2, $3)
                : '%' . $1 . '$' . ($2 || 's') 
        }egx;
    sprintf($format, @_);
}

sub _xprintf_ifdef {
    my ($args, $n, $format, $text) = @_;
    if (defined $args->[$n-1]) {
        $format = 's' unless defined $format;
        $format = '%' . $n . '$' . $format;
        $text =~ s/\?/$format/g;
        return $text;
    }
    else {
        return '';
    }
}

sub dotid {
    my $text = shift;       # munge $text to canonical lower case and dotted form
    $text =~ s/\W+/./g;     # e.g. Foo::Bar ==> Foo.Bar
    return lc $text;        # e.g. Foo.Bar  ==> foo.bar
}

sub camel_case {
    join(
        BLANK, 
        map {
            map { ucfirst $_ } 
            split '_'
        } 
        @_
    );
}

sub random_name {
    my $length = shift || $RANDOM_NAME_LENGTH;
    my $name   = '';
    require Digest::MD5;
    
    while (length $name < $length) {
        $name .= Digest::MD5::md5_hex(
            time(), rand(), $$, { }, @_
        );
    }
    return substr($name, 0, $length);
}

sub alternates {
    my $text = shift;
    return  [ 
        $text =~ /\|/
            ? split(qr<\|>, $text, -1)  # alternates: (foo|bar) as ['foo', 'bar']
            : ('', $text)               # optional (foo) as (|foo) as ['', 'foo']
    ];
}

sub wrap {
    my $text   = shift;
    my $width  = shift || $TEXT_WRAP_WIDTH;
    my $indent = shift || 0;
    my @words = split(/\s+/, $text);
    my (@lines, @line, $length);
    my $total = 0;
    
    while (@words) {
        $length = length $words[0] || (shift(@words), next);
        if ($total + $length > 74 || $words[0] eq '\n') {
            shift @words if $words[0] eq '\n';
            push(@lines, join(" ", @line));
            @line = ();
            $total = 0;
        }
        else {
            $total += $length + 1;      # account for spaces joining words
            push(@line, shift @words);
        }
    }
    push(@lines, join(" ", @line)) if @line;
    return join(
        "\n" . (' ' x $indent), 
        @lines
    );
}


sub permute_fragments {
    my $input = shift;
    my (@frags, @outputs);

    # Lookup all the (a) optional fragments and (a|b|c) alternate fragments
    # replace them with %s.  This gives us an sprintf format that we can later
    # user to re-fill the fragment slots.  Meanwhile create a list of @frags
    # with each item corresponding to a (...) fragment which is represented 
    # by a list reference containing the alternates.  e.g. the input
    # string 'Fo(o|p) Ba(r|z)' generates @frags as ( ['o','p'], ['r','z'] ),
    # leaving $input set to 'Fo%s Ba%s'.  We treat (foo) as sugar for (|foo), 
    # so that 'Template(X)' is permuted as ('Template', 'TemplateX'), for 
    # example.
    
    $input =~ 
        s/ 
            \( ( .*? ) \) 
        /
            push(@frags, alternates($1));
            '%s';
        /gex;

    # If any of the fragments have multiple values then $format will still contain
    # one or more '%s' tokens and @frags will have the same number of list refs
    # in it, one for each fragment.  To iterate across all permutations of the 
    # fragment values, we calculate the product P of the sizes of all the lists in 
    # @frags and loop from 0 to P-1.  Then we use a div and a mod to get the right 
    # value for each fragment, for each iteration.  We divide $n by the product of
    # all fragment lists to the right of the current fragment and mod it by the size
    # of the current fragment list.  It's effectively counting with a different base
    # for each column. e.g. consider 3 fragments with 7, 3, and 5 values respectively
    #   [7]            [3]           [5]         P = 7 * 3 * 5 = 105
    #   [n / 15 % 7]   [n / 5 % 3]   [n % 5]     for 0 < n < P 

    if (@frags) {
        my $product = 1; $product *= @$_ for @frags;
        for (my $n = 0; $n < $product; $n++) {
            my $divisor = 1;
            my @args = reverse map {
                my $item = $_->[ $n / $divisor % @$_ ];
                $divisor *= @$_;
                $item;
            } reverse @frags;   # working backwards from right to left
            push(@outputs, sprintf($input, @args));
        }
    }
    else {
        push(@outputs, $input);
    }
    return wantarray
        ?  @outputs
        : \@outputs;
}

sub _debug {
    print STDERR @_;
}

1;

__END__

=head1 NAME

Badger::Utils - various utility functions

=head1 SYNOPSIS

    use Badger::Utils 'blessed params';
    
    sub example {
        my $self   = shift;
        my $params = params(@_);
        
        if (blessed $self) {
            print "self is blessed\n";
        }
    }
    

=head1 DESCRIPTION

This module implements a number of utility functions.  It also provides 
access to all of the utility functions in L<Scalar::Util>, L<List::Util>,
L<List::MoreUtils>, L<Hash::Util> and L<Digest::MD5> as a convenience.

    use Badger::Utils 'blessed reftype first max any all lock_hash md5_hex';

The single line of code shown here will import C<blessed> and C<reftype> from
L<Scalar::Util>, C<first> and C<max> from L<List::Util>, C<any> and C<all>
from L<List::Util>, C<lock_hash> from L<Hash::Util>, and C<md5_hex> from 
L<Digest::MD5>.

These modules are loaded on demand so there's no overhead incurred if you
don't use them (other than a lookup table so we know where to find them).

=head1 EXPORTABLE FUNCTIONS

C<Badger::Utils> can automatically load and export functions defined in the
L<Scalar::Util>, L<List::Util>, L<List::MoreUtils>, L<Hash::Util> and
L<Digest::MD5> Perl modules.

It also does the same for functions and constants defined in the Badger 
modules L<Badger::Timestamp> (L<TS|Badger::Timestamp/TS>,
L<Timestamp()|Badger::Timestamp/Timestamp()> and
L<Now()|Badger::Timestamp/Now()>) and L<Badger::Logic>
(L<LOGIC|Badger::Logic/LOGIC> and L<Logic()|Badger::Logic/Logic()>).

For example:

    use Badger::Utils 'Now';
    print Now->year;            # prints the current year

The following exportable functions are also defined in C<Badger::Utils>

=head2 UTILS

Exports a C<UTILS> constant which contains the name of the C<Badger::Utils>
class.  

=head2 is_object($class,$object)

Returns true if the C<$object> is a blessed reference which isa C<$class>.

    use Badger::Filesystem 'FS';
    use Badger::Utils 'is_object';
    
    if (is_object( FS => $object )) {       # FS == Badger::Filesystem
        print $object, ' isa ', FS, "\n";
    }

=head2 textlike($item)

Returns true if C<$item> is a non-reference scalar or an object that
has an overloaded stringification operator.

    use Badger::Filesystem 'File';
    use Badger::Utils 'textlike';
    
    # Badger::Filesystem::File objects have overloaded string operator
    my $file = File('example.txt'); 
    print $file;                                # example.txt
    print textlike $file ? 'ok' : 'not ok';     # ok

=head2 numlike($item)

This is an alias to the C<looks_like_number()> function defined in 
L<Scalar::Util>.  

=head2 params(@args)

Method to coerce a list of named parameters to a hash array reference.  If the
first argument is a reference to a hash array then it is returned.  Otherwise
the arguments are folded into a hash reference.

    use Badger::Utils 'params';
    
    params({ a => 10 });            # { a => 10 }
    params( a => 10 );              # { a => 10 }

Pro Tip: If you're getting warnings about an "Odd number of elements in
anonymous hash" then try enabling debugging in C<Badger::Utils>. To do this,
add the following to the start of your program before you've loaded
C<Badger::Utils>:

    use Badger::Debug
        modules => 'Badger::Utils'

When debugging is enabled in C<Badger::Utils> you'll get a full stack 
backtrace showing you where the subroutine was called from.  e.g.

    Badger::Utils::self_params() called with an odd number of arguments: <undef>
    #1: Called from Foo::bar in /path/to/Foo/Bar.pm at line 210
    #2: Called from Wam::bam in /path/to/Wam/Bam.pm at line 420
    #3: Called from main in /path/to/your/script.pl at line 217

=head2 self_params(@args)

Similar to L<params()> but also expects a C<$self> reference at the start of
the argument list.

    use Badger::Utils 'self_params';
    
    sub example {
        my ($self, $params) = self_params(@_);
        # do something...
    }

If you enable debugging in C<Badger::Utils> then you'll get a stack backtrace
in the event of an odd number of parameters being passed to this function.
See L<params()> for further details.

=head2 odd_params(@_)

This is an internal function used by L<params()> and L<self_params()> to 
report any attempt to pass an odd number of arguments to either of them.
It can be enabled by setting C<$Badger::Utils::DEBUG> to a true value.

    use Badger::Utils 'params';
    $Badger::Utils::DEBUG = 1;
    
    my $hash = params( foo => 10, 20 );    # oops!

The above code will raise a warning showing the arguments passed and a 
stack backtrace, allowing you to easily track down and fix the offending
code.  Apart from obvious typos like the above, this is most likely to 
happen if you call a function or methods that returns an empty list.  e.g.

    params(
        foo => 10,
        bar => get_the_bar_value(),
    );

If C<get_the_bar_value()> returns an empty list then you'll end up with an
odd number of elements being passed to C<params()>.  You can correct this
by providing C<undef> as an alternative value.  e.g.

    params(
        foo => 10,
        bar => get_the_bar_value() || undef,
    );

=head2 plural($noun)

The function makes a very naive attempt at pluralising the singular noun word
passed as an argument. 

If the C<$noun> word ends in C<ss>, C<sh>, C<ch> or C<x> then C<es> will be
added to the end of it.

    print plural('class');      # classes
    print plural('hash');       # hashes
    print plural('patch');      # patches 
    print plural('box');        # boxes 

If it ends in C<y> then it will be replaced with C<ies>.

    print plural('party');      # parties

In all other cases, C<s> will be added to the end of the word.

    print plural('device');     # devices

It will fail miserably on many common words.

    print plural('woman');      # womans     FAIL!
    print plural('child');      # childs     FAIL!
    print plural('foot');       # foots      FAIL!

This function should I<only> be used in cases where the singular noun is known
in advance and has a regular form that can be pluralised correctly by the
algorithm described above. For example, the L<Badger::Factory> module allows
you to specify C<$ITEM> and C<$ITEMS> package variable to provide the singular
and plural names of the items that the factory manages.

    our $ITEM  = 'person';
    our $ITEMS = 'people';

If the singular noun is sufficiently regular then the C<$ITEMS> can be 
omitted and the C<plural> function will be used.

    our $ITEM  = 'codec';       # $ITEMS defaults to 'codecs'

In this case we know that C<codec> will pluralise correctly to C<codecs> and
can safely leave C<$ITEMS> undefined.

For more robust pluralisation of English words, you should use the
L<Lingua::EN::Inflect> module by Damian Conway. For further information on the
difficulties of correctly pluralising English, and details of the
implementation of L<Lingua::EN::Inflect>, see Damian's paper "An Algorithmic
Approach to English Pluralization" at
L<http://www.csse.monash.edu.au/~damian/papers/HTML/Plurals.html>

=head2 module_file($name)

Returns the module name passed as an argument as a relative filesystem path
suitable for feeding into C<require()>

    print module_file('My::Module');     # My/Module.pm

=head2 camel_case($string) / CamelCase($string)

Converts a lower case string where words are separated by underscores (e.g.
C<like_this_example>) into CamelCase where each word is capitalised and words
are joined together (e.g. C<LikeThisExample>).

According to Perl convention (and personal preference), we use the lower case
form wherever possible. However, Perl's convention also dictates that module
names should be in CamelCase.  This function performs that conversion.

=head2 wrap($text, $width, $indent)

Simple subroutine to wrap C<$text> to a fixed C<$width>, applying an optional
indent of C<$indent> spaces.  It uses a trivial algorithm which splits the 
text into words, then rejoins them as lines.  It has an additional hack to 
recognise the literal sequence '\n' as a magical word indicating a forced 
newline break.  It must be specified as a separate whitespace delimited word.

    print wrap('Foo \n Bar');

If anyone knows how to make L<Text::Wrap> handle this, or knows of a better
solution then please let me know.

=head2 dotid($text)

The function returns a lower case representation of the text passed as
an argument with all non-word character sequences replaced with dots.

    print dotid('Foo::Bar');            # foo.bar

=head2 xprintf($format,@args)

A wrapper around C<sprintf()> which provides some syntactic sugar for 
embedding positional parameters.

    xprintf('The <2> sat on the <1>', 'mat', 'cat');
    xprintf('The <1> costs <2:%.2f>', 'widget', 11.99);

=head2 random_name($length,@data)

Generates a random name of maximum length C<$length> using any additional 
seeding data passed as C<@args>.  If C<$length> is undefined then the default
value in C<$RANDOM_NAME_LENGTH> (32) is used.

    my $name = random_name();
    my $name = random_name(64);

=head2 permute_fragments($text)

This function permutes any optional or alternate fragments embedded in 
parentheses. For example, C<Badger(X)> is permuted as (C<Badger>, C<BadgerX>)
and C<Badger(X|Y)> is permuted as (C<BadgerX>, C<BadgerY>).

    permute('Badger(X)');           # Badger, BadgerX
    permute('Badger(X|Y)');         # BadgerX, BadgerY

Multiple fragments may be embedded. They are expanded in order from left to
right, with the rightmost fragments changing most often.

    permute('A(1|2):B(3|4)')        # A1:B3, A1:B4, A2:B3, A2:B4

=head2 alternates($text)

This function is used internally by the L<permute_fragments()> function. It
returns a reference to a list containing the alternates split from C<$text>.

    alternates('foo|bar');          # returns ['foo','bar']
    alternates('foo');              # returns ['','bar']

If the C<$text> doesn't contain the C<|> character then it is assumed to be
an optional item.  A list reference is returned containing the empty string
as the first element and the original C<$text> string as the second.

=head1 AUTHOR

Andy Wardley L<http://wardley.org/>

=head1 COPYRIGHT

Copyright (C) 1996-2009 Andy Wardley.  All Rights Reserved.

This module is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.

=cut

# Local Variables:
# mode: perl
# perl-indent-level: 4
# indent-tabs-mode: nil
# End:
#
# vim: expandtab shiftwidth=4: