lib/Hash/MostUtils.pm - metacpan.org

use strict;
use warnings;
package Hash::MostUtils;
use base qw(Exporter);

use Carp qw(confess);
use Hash::MostUtils::leach qw(n_each leach);

our $VERSION = 1.05;

our @EXPORT_OK = qw(
  lvalues
  lkeys
  leach
  hash_slice_of
  hash_slice_by
  hashmap
  hashgrep
  hashapply
  n_each
  n_map
  n_grep
  n_apply
  reindex
  rekey
  revalue
);

# decrementing $| flips it between 0 and 1
sub lkeys   { local $|; return grep { $|-- == 0 } @_ }
sub lvalues { local $|; return grep { $|-- == 1 } @_ }

# I would put leach() here, but it was imported above

*hashmap = sub(&@) { unshift @_, 2; goto &n_map };
*hashgrep = sub(&@) { unshift @_, 2; goto &n_grep };
*hashapply = sub (&@) { unshift @_, 2; goto &n_apply };

# I would put n_each() here, but it was imported above

sub n_map ($&@) {
  # Usually I don't mutate @_. Here I deliberately modify @_ for the upcoming non-obvious goto-&NAME.
  my $n = shift;
  my $collector = sub { return $_[0]->() };
  unshift @_, $collector;

  # Using a "safe goto" allows n_map() to remove itself from the callstack, which allows _n_collect()
  # to see the correct caller.
  #
  # 'perldoc -f goto' for why this is a safe goto.
  goto &{_n_collect($n)};
}

sub n_grep ($&@) {
  my $n = shift;

  # the comments in n_map() apply here as well.

  my $collector = sub {
    my ($code, $vals, $aliases) = @_;
    return $code->() ? @$vals : ();
  };
  unshift @_, $collector;

  goto &{_n_collect($n)};
}

sub n_apply {
  my $n = shift;
  my $collector = sub {
    my ($code, $vals, $aliases) = @_;
    $code->();
    return map { $$_ } @$aliases;
  };
  unshift @_, $collector;

  goto &{_n_collect($n)};
}

sub _n_collect($) {
  my ($n) = @_;
  return sub(&@) {
    my $collector = shift;
    my $code = shift;
    if (@_ % $n != 0) {
      confess("your input is insane: can't evenly slice " . @_ . " elements into $n-sized chunks\n");
    }

    # these'll reserve some namespace back in the callpackage
    my @n = ('a' .. 'z');

    # stash old values back in callpackage *and* in main. If called from main::, this comes down to:
    #   local ${'main::a'}, ${'main::b'}, ${'main::c'}
    # when $n is 3.
    my $caller = caller;
    no strict 'refs';
    foreach ((@n[ 0 .. $n-1 ])) {
      local ${"$caller\::$_"};
      local ${"::$_"};
    }

    my @out;
    while (my @chunk = splice @_, 0, $n) {  # build up each set...
      my @aliases;
      foreach (0 .. $#chunk) {
        # ...assign values from @_ back to localized variables in $caller *and* in 'main::'.
        # Aliasing in main::  allows you to refer to variables $c and onwards as $::c.
        # Aliasing in $caller allows you to refer to variables $c and onwards as $whatever::package::c.
        ${"::$n[$_]"} = ${"$caller\::$n[$_]"} = $chunk[$_];

        # Keep a reference to $::a (etc.) and pass them in to the $collector; this allows $code to mutate
        # $::a (etc) and signal the changed values back to $collector.
        push @aliases, \${"::$n[$_]"};
      }
      push @out, $collector->($code, \@chunk, \@aliases);             # ...and apply $code.
    }

    return @out;
  };
}

sub hash_slice_of {
  my ($ref, @keys) = @_;
  return map { ($_ => $ref->{$_}) } @keys;
}

sub hash_slice_by {
  my ($obj, @methods) = @_;
  return map { ($_ => scalar($obj->$_)) } @methods;
}

sub rekey (&@) {
  my %map = shift()->();
  return n_map 2, sub { $map{$a} || $a => $b }, @_;
}

sub reindex (&@) {
  my %map = shift()->();
  @_[values %map] = delete @_[keys %map];
  return @_;
}

sub revalue (&@) {
  my %map = shift()->();
  return n_map 2, sub { $a => $map{$b} || $b }, @_;
}

1;

__END__

=head1 NAME

Hash::MostUtils - Yet another collection of tools for operating pairwise on lists.

=head1 SYNOPSIS

  my @found_and_transformed =
      hashmap { uc($b) => 100 + $a }
      hashgrep { $a < 100 && $b =~ /[aeiou]/i } (
          1 => 'cwm',
          2 => 'apple',
          100 => 'cherimoya',
      );

  my @keys = lkeys @found_and_transformed;
  my @vals = lvalues @found_and_transformed;
  foreach my $key (@keys) {
      my $value = shift @vals;
      print "$key => $val\n";
  }

  while (my ($key, $val) = leach @found_and_transformed) {
      print "$key => $val\n";
  }

=head1 EXPORTS

By default, none. On request, any of the following:

=head1 FUNCTIONS TO MAKE ARRAYS ACT LIKE HASHES

=head2 lkeys LIST

Return the "keys" of LIST. Perl's C<keys()> keyword only operates on hashes; lkeys() offers
an approximation of the same functionality for lists.

    my @evens = lkeys 1..10;

    my @keys  =
        lkeys                                     # give me back those keys (i.e. the letters)
        hashgrep { $b > 100 }                     # find key/value pairs where the value is > 100
        map { $_ => int(rand(1000)) } 'a'..'z';   # turn 'a'..'z' into key/value pairs with random values

The "keys" of a list are the even-positioned items. Note that in the case of an C<E<gt>empty slotE<lt>>
in a sparse array, the key will be C<undef>.

=head2 lvalues LIST

Return the "values" of LIST. Perl's C<values()> keyword only operates on hashes; lvalues() offers
an approximation of the same functionality for lists.

    my @odds = lkeys 1..10;

    my @values =
        lvalues                                  # give me back those values (i.e. the letters)
        hashgrep { $a > 100 }                    # look for key/value pairs where the key is > 100
        map { int(rand(1000)) => $_ } 'a'..'z';  # make 26 random keys from 1-1000, with fixed keys

The "values" of a list are the odd-positioned items. Note that in the case of an C<E<gt>empty slotE<lt>>
in a sparse array, the value will be C<undef>.

=head2 leach [ ARRAY | HASH | ARRAYREF | HASHREF ]

Iterate over an ARRAY, HASH, ARRAYREF, or HASHREF, returning successive "key/value" pairs. This behaves
functionally identically to Perl's built-in C<each> keyword; however, it is useful for arrays and array-
and hash-references. This function handles objects which are built around blessed array- and hash-references.

    my @array = (1..4);

    while (my ($k, $v) = leach @array) {
        print "$k => $v\n";
    }

    print "$_\n" for @array;

    __END__
    1 => 2
    3 => 4
    1
    2
    3
    4

Using C<leach> to gather key/value pairs from a collection is guaranteed to be non-destructive to that
collection. One pattern that's useful for iterating arrays and arrary references in pairs is to use C<splice>,
which has the possibly unintended side effect of destroying the subject collection:

    my @array = (1..4);

    while (my ($k, $v) = splice @array, 0, 2) {
        print "$k => $v\n";
    }

    print "$_\n" for @array;

    __END__
    1 => 2
    3 => 4

Note the distinction between saying that this function is

    leach ARRAY

rather than

    leach LIST

Perl does not allow this behavior:

    while (my ($k, $v) = leach 1..10) {                   # can't leach a list, only an array
        # do something with this key/value tuple
    }

But don't worry, Perl also doesn't allow for this behavior:

    while (my ($k, $v) = splice 1..10, 0, 2) {            # can't splice a list, only an array
        # do something with this key/value tuple
    }

=head1 FUNCTIONS TO OPERATE ON LISTS, ARRAYS, AND HASHES AS TUPLES

C<hashmap>, C<hashgrep>, and C<hashapply> all act like their corresponding C<map>, C<grep>, and
C<List::Utils::apply> but for one notable exception: whereas C<map>, C<grep>, and C<apply> all
eat items from the given list one-by-one and assign that current value to $_, C<hashmap>, C<hashgrep>,
and C<hashapply> all eat items from the given list two-by-two, and assigns them to $a and $b.

The names $a and $b were chosen because they're already in lexical scope in Perl due to C<sort>'s need
for them.

If you have a singular occurance of $a and $b within your program, you will probably see this warning
from Perl:

    Name 'main::a' used only once: possible typo at ...
    Name 'main::b' used only once: possible typo at ...

I've just gotten in the habit of adding:

    use strict;
    use warnings; no warnings 'once';

when I see that message.

=head2 hashmap BLOCK LIST

This acts similar to

    map BLOCK LIST

with the exception that C<map> eats items off of LIST one at a time, assigning the current value to $_;
whereas C<hashmap> eats items off of LIST two at a time, assigning the first value to $a and the second
value to $b.

    # naive transformation of this hash into (101 => 'A', 102 => 'B')
    my %hash = (
        a => 1,
        b => 2,
    );

    my %transformed =
        hashmap { $b + 100 => uc($a) }
        %hash;


Just like C<map>, your BLOCK will be called without any arguments. Like perl's keyword C<map>, this
function maintains the order of LIST.

C<hashmap> is simply a prototyped alias for n_map(2, CODEREF, LIST), so all of the documentation to
C<n_map> applies here.

=head2 hashgrep BLOCK LIST

This acts similar to

    grep BLOCK LIST

with the exception that C<grep> eats items off of LIST one at a time, assigning the current value to $_;
whereas C<hashgrep> eats items off of LIST two at a time, assigning the first value to $a and the second
value to $b.

    # lame object dumper
    my $object = Some::Class->new(...);

    my %dump =
        hashgrep { $a !~ /^_/ && ! ref($b) }   # hide private fields and internal data structures
        %$object;

Just like C<grep>, your BLOCK will be called without any arguments. Like perl's keyword C<grep>,
this function maintains the order of LIST.

C<hashgrep> is simply a prototyped alias for n_grep(2, CODEREF, LIST), so all of the documentation
to C<n_grep> applies here.

=head2 hashapply BLOCK LIST

This is similar to C<List::MoreUtils::apply>:

    apply BLOCK LIST

with the usual exception: C<apply> eats items off of LIST one at a time, assigning to $_; whereas
C<hashapply> eats items off of LIST two at a time, assigning the first value to $a and the second
value to $b.

Normal C<apply> can be written as map:

=over 4

my @words = qw(apple banana cherimoya);
my @clean1 = map { tr/aeiou//d; $_ } @words;  # @clean1 = @words = qw(ppl bnn chrmy);

@words = qw(apple banana cherimoya);
my @clean2 = apply { tr/aeiou//d } @words;    # @clean2 = qw(ppl bnn chrmy); @words = qw(apple banana cherimoya);

=back

Note that C<apply> does not transform the original data, whereas C<map> does. Similarly, C<hashapply> does
not transform the original data, whereas C<hashmap> might.

Note that C<apply> does not need to explicitly return $_, whereas C<map> does. Similarly, C<hashapply> does
not need to explicitly return a key/value tuple ($a, $b), whereas C<hashmap> does need to return something.

Like C<apply>, C<hashapply> will not transform the original LIST.

=head1 GENERIC N-ARY FORMS OF VARIOUS LIST-WISE FUNCTIONS

Each of the pairwise functions mentioned so far - C<leach>, C<hashmap>, C<hashgrep>, C<hashapply> - are
actually implemented in terms of more generic N-ary forms. This means that if you need to process a list
in sets of N, where N is E<gt> 2, you may use the n_* forms of these functions.

Variable naming becomes more interesting when moving beyond 2 items. Whereas $a and $b are always in
lexical scope, once you go to N of 3, you need to agree on some variable naming convention.

$a and $b work nicely for the first two elements of a list; so $c is the third, and $d the fourth, and
so on. One limitation of this naming scheme is that you may not easily go beyond N of 26 - but if you
find yourself needing that, you'll find the code simple to extend.

In order to prevent 'strict refs' from complaining about $c..$z, you'll need to address those variables a
bit differently:

    my @sets =
        n_map   6, sub { [$a, $b, $::c, $::d, $::e, $::f] },
        n_apply 3, sub { $_ *= 3 for $a, $b, $::c },
        n_grep  3, sub { $::c > 4 },
        (1..9);                             # @sets = ([12, 15, 18, 21, 24, 27]);

I personally find the transition between C<$b> and C<$::c> to be a bit jarring visually, so the one
time I wrote a line like the above I chose to write it as C<$::a> and C<$::b>.

    my @sets =
        n_map   6, sub { [$::a, $::b, $::c, $::d, $::e, $::f] },
        n_apply 3, sub { $_ *= 3 for $::a, $::b, $::c },
        n_grep  3, sub { $::c > 4 },
        (1..9);                             # @sets = ([12, 15, 18, 21, 24, 27]);

=head2 n_each N, LIST

Iterate over LIST, returning successive "key/values" sets.

    my @list = (1..9);

    while (my ($k, @v) = n_each 3, @list) {
        # do something with this $k and @v
    }

There's nothing that says your N needs to remain constant:

    my @list = (
        a => 1,
        b => 1, 2,
        c => 1, 2, 3,
        d => 1, 2, 3, 4,
    );

    my $n = 2;

    my %triangle;
    while (my ($k, @v) = n_each $n++, @list) {
        $triangle{$k} = \@v;
    }

    __END__
    %triangle = (
        a => [1],
        b => [1, 2],
        c => [1, 2, 3],
        d => [1, 2, 3, 4],
    );

There's probably something clever that you can do with this that I just don't understand. Please drop me
a line if you know what it is.

=head2 n_map N, CODEREF, LIST

C<map> CODEREF over LIST, operating in N-sized chunks. Within the context of CODEREF, values of LIST
will be selected and aliased. LIST must be evenly divisible by N.

See L<GENERIC N-ARY FORMS OF VARIOUS LIST-WISE FUNCTIONS> for a discussion of variable names.

    my @transformed = n_map(
        3,
        sub { "$a, $b $::c!\n" },
        qw(goodnight sweet prince goodbye cruel world),
    );

    # @transformed = ("goodnight, sweet prince!\n", "goodbye, cruel world!");


If you are consistently n_map'ping by some N, then you might consider wrapping n_map so the call
syntax looks more like one of Perl's functional keywords:

    sub tri_map (&@) { unshift @_, 3; goto &n_map }

    my @transformed =
        tri_map { "$::a, $::b $::c!\n" }
        qw(goodnight sweet prince goodbye cruel world);

    # @transformed = ("goodnight, sweet prince!\n", "goodbye, cruel world!");

=head2 n_grep N, CODEREF, LIST

C<grep> for CODEREF over LIST, operating in N-sized chunks. Within the context of CODEREF, values
of LIST will be selected and aliased. LIST must be evenly divisible by N.

See L<GENERIC N-ARY FORMS OF VARIOUS LIST-WISE FUNCTIONS> for a discussion of variable names.

    my @found = n_grep(
        3,
        sub { $a =~ /good/ && $::c =~ /prince/ },
        qw(goodnight sweet prince goodbye cruel world),
    );

    # @found = qw(goodnight sweet prince);

Just as with C<n_map>, writing a small bit of gloss to make your N of n_grep work in a functional
manner is simple, and makes your code more readable:

    sub tri_grep (&@) { unshift @_, 3; goto &n_grep }

    my @found =
        tri_grep { $::a =~ /good/ && $::c =~ /prince/ }
        qw(goodnight sweet prince goodbye cruel world);

    # @found = qw(goodnight sweet prince);

=head2 n_apply N, CODEREF, LIST

C<List::Utils::apply> CODEREF to LIST, operating in N-sized chunks. LIST must be evenly divisible by N.

See L<GENERIC N-ARY FORMS OF VARIOUS LIST-WISE FUNCTIONS> for a discussion of variable names.

    my @uppercase = n_apply(
        3,
        sub { uc $::c }
        qw(goodnight sweet prince goodbye cruel world),
    );

    # @uppercase = qw(goodnight sweet PRINCE goodbye cruel WORLD);

Just as with C<n_map>, writing a small bit of gloss to make your N of n_apply work in a functional
manner is simple, and makes your code more readable:

    sub tri_apply (&@) { unshift @_, 3; goto &n_apply }

    my @uppercase =
        tri_apply { uc $::c }
        qw(goodnight sweet prince goodbye cruel world);

    # @uppercase = qw(goodnight sweet PRINCE goodbye cruel WORLD);

=head1 GRAB BAG

I like these functions, but they're decidedly different from everything up to this point. They
are mostly used to turn an existing hash reference or object into a smaller representation of
itself.

=head2 hash_slice_of HASHREF, LIST

Looks into HASHREF and extracts the key/value pairs of the keys named in LIST.
If a key in LIST is not present in HASHREF, returns undefined.

    my %hash = (1..10);

    my %slice = hash_slice_of \%hash, qw(5, 7, 9, 11);

    __END__
    %slice = (
        5 => 6,
        7 => 8,
        9 => 10,
        11 => undef,
    );

If you only want to get back key/value pairs for keys in LIST that exist in
HASHREF, just add a C<hashgrep>:

    my %hash = (1..10);

    my %slice =
        hashgrep { exists $hash{$a} }
        hash_slice_of \%hash, qw(5, 7, 9, 11);

    __END__
    %slice = (
        5 => 6,
        7 => 8,
        9 => 10,
    );

=head2 hash_slice_by OBJECT, LIST

Calls the methods named in LIST on OBJECT and returns a hash of the results.
If a method in LIST can not be performed on OBJECT, you will get the standard
"Can't call method ->... on object" error that Perl throws in this circumstance.

    my $object = ...;
    my %out = hash_slice_by $object, qw(foo bar baz);

    __END__
    %out = (
        foo => 'output of foo',
        bar => 'output of bar',
        baz => 'output of baz',
    );

Note that you may not use C<hash_slice_by> to pass arguments to the methods given
in LIST. Note too that your methods are invoked in scalar context.

=head2 rekey BLOCK HASH

Rename the keys in HASH by the mapping table provided by BLOCK. HASH may be a real
hash, or it may be an array that you are treating like a key/value store.

    my %hash = (crow => 'black', snow => 'white', libro => 'read all over');
    my %spanish = rekey { crow => 'corvino', snow => 'nieve' } %hash;

    __END__
    %spanish = (
        corvino => 'black',
        nieve   => 'white',
        libro   => 'read all over',
    );

=head2 revalue BLOCK HASH

Rename the values in HASH to the mapping table provided by BLOCK. HASH may be a real
hash, or it may be an array that you are treating like a key/value store.

    my @start = (apple => 'red', apple => 'green');
    my @translated = revalue { red => 'rojo', green => 'verde' } @start;

    __END__
    @translated = (
        apple => 'rojo',
        apple => 'verde',
    );

=head2 reindex BLOCK LIST

Reorder the values in LIST by the mapping table provided by BLOCK. LIST may be
either an array or a list. In general this function will not work on hashes.

    my @array = (1..5);
    my @reindexed = reindex { map { $_ => $_ + 1 } 0..$#array } @array;

    __END__
    @reindexed = (undef, 1..5);

=head1 ACKNOWLEDGEMENTS

The names and behaviors of most of these functions were initially developed at
AirWave Wireless, Inc. I've re-implemented them here.

This software would be trapped on my hard drive were it not for Logan Bell's encouragement to
release it. Separating the personal time I have put into this from the professional time afforded
by my employer, Shutterstock, Inc. would be very difficult. Thankfully I haven't needed to; when
I asked to share this, Dan McCormick simply said, "Go for it! Thanks for hacking."

=head1 COPYRIGHT AND LICENSE

    (c) 2013 by Belden Lyman

This library is free software: you may redistribute it and/or modify it under the same terms as Perl
itself; either Perl version 5.8.8 or, at your option, any later version of Perl 5 you may have
available.
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)