The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Catmandu::Fix::Bind::hashmap;

use Catmandu::Sane;

our $VERSION = '1.0301';

use Moo;
use Catmandu::Util qw(:is);
use namespace::clean;
use Catmandu::Fix::Has;

with 'Catmandu::Fix::Bind';

has exporter   => (fix_opt => 1);
has store      => (fix_opt => 1);
has uniq       => (fix_opt => 1, default => sub {0});
has count      => (fix_opt => 1);
has join       => (fix_opt => 1);
has extra_args => (fix_opt => 'collect');
has flag       => (is      => 'rw', default => sub {0});
has hash       => (is      => 'lazy');

sub _build_hash {
    +{};
}

sub add_to_hash {
    my ($self, $key, $val) = @_;
    if ($self->count) {
        $self->hash->{$key} += 1;
    }
    elsif ($self->uniq) {
        $self->hash->{$key}->{$val} = 1;
    }
    else {
        push @{$self->hash->{$key}}, $val;
    }
}

sub bind {
    my ($self, $data, $code, $name, $fixer) = @_;

    return if $self->flag;

    $data = $fixer->fix($data);

    my $key   = $data->{key};
    my $value = $data->{value};

    if (defined $key) {
        if (is_string($key)) {
            $self->add_to_hash($key, $value);
        }
        elsif (is_array_ref($key)) {
            for (@$key) {
                $self->add_to_hash($_, $value);
            }
        }
        else {
            warn "$key is not a string or array for $value";
        }
    }

    $self->flag(1);

    $data;
}

sub result {
    my ($self, $mvar) = @_;

    $self->flag(0);

    $mvar;
}

sub DESTROY {
    my ($self) = @_;
    my $h = $self->hash;
    my $e;
    my $args = $self->extra_args // {};

    if ($self->store) {
        $e = Catmandu->store($self->store, %$args);
    }
    elsif ($self->exporter) {
        $e = Catmandu->exporter($self->exporter, %$args);
    }
    else {
        $e = Catmandu->exporter('JSON', line_delimited => 1);
    }

    my $sorter = $self->count ? sub {$h->{$b} <=> $h->{$a}} : sub {$a cmp $b};

    my $id = 0;
    for (sort $sorter keys %$h) {
        my $v;

        if ($self->count) {
            $v = $h->{$_};
        }
        elsif ($self->uniq) {
            $v = [sort keys %{$h->{$_}}];
        }
        else {
            $v = $h->{$_};
        }

        if (is_array_ref($v) && $self->join) {
            $v = join $self->join, @$v;
        }

        $e->add({_id => $_, value => $v});
    }

    $e->commit;
}

1;

__END__

=pod

=head1 NAME

Catmandu::Fix::Bind::hashmap - a binder to add key/value pairs to an internal hashmap

=head1 SYNOPSIS

 # Find all ISBN in a stream
 do hashmap(exporter: JSON, join: ',')
    copy_field(isbn,key)
    copy_field(_id,value)
 end

 # will export to the YAML exporter a hash map containing all isbn occurrences in the stream

 { "_id": "ISBN1" , "value": "0121,12912,121" }
 { "_id": "ISBN2" , "value": "102012" }

 # Count the number of ISBN occurrences in a stream
 # File: count.fix:
 do hashmap(count: 1)
    copy_field(isbn,key)
 end

 # Use the Null exporter to suppress the normal output
 $ cat /tmp/data.json | catmandu convert JSON --fix count.fix to Null

=head1 DESCRIPTION

The hashmap binder will insert all key/value pairs given to a internal hashmap that can be exported
using an Catmandu::Exporter.

If the key is an ARRAY, then multiple key/value pairs will be inserted into the hashmap.

By default all the values will be added as an array to the hashmap. Every key will have one
or more values.

=head1 CONFIGURATION

=head2 exporter: EXPORTER

The name of an exporter to send the results to. Default: JSON  Extra parameters can be added:

    do hashmap(exporter: JSON, file:/tmp/data.json, count: 1)
      ...
    end

=head2 store: STORE

Send the output to a store instead of an exporter. Extra parameters can be added:

    do hashmap(store: MongoDB, database_name: test, bag: data, count: 1)
      ...
    end

=head2 uniq: 0|1

All the values for the a key will be unique.

=head2 join: CHAR

Join all the values of a key using a delimiter.

=head2 count: 0|1

Don't store the values only count the number of key occurrences.

=head1 SEE ALSO

L<Catmandu::Fix::Bind>

=cut