The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Catmandu::Fix::marc_map;

use Catmandu::Sane;
use Catmandu::Util qw(:is :data);
use Data::Dumper;
use Moo;

has path  => (is => 'ro', required => 1);
has key   => (is => 'ro', required => 1);
has mpath => (is => 'ro', required => 1);
has opts  => (is => 'ro');

around BUILDARGS => sub {
    my ($orig, $class, $mpath, $path, %opts) = @_;
    my ($p,$key) = parse_data_path($path) if defined $path && length $path;
    $orig->($class, path => $p, key => $key, mpath => $mpath, opts => \%opts);
};

sub fix {
    my ($self, $data) = @_;

    my $path  = $self->path;
    my $key   = $self->key;
    my $mpath = $self->mpath;
    my $opts  = $self->opts || {};
    $opts->{-join} = '' unless $opts->{-join};
    
    my $marc_pointer = $opts->{-record} || 'record';
    my $marc  = $data->{$marc_pointer}; 

    my $fields = &marc_field($marc,$mpath);

    return $data if !@{$fields};

    my $match = [ grep ref, data_at($path, $data, key => $key, create => 1)]->[0];

    for my $field (@$fields) {
       my $field_value = &marc_subfield($field,$mpath);

       next if &is_empty($field_value);

       $field_value = [$opts->{-value}] if defined $opts->{-value};
       $field_value = join $opts->{-join} , @$field_value if defined $opts->{-join};
       $field_value = &create_path($opts->{-in},$field_value) if defined $opts->{-in};
       $field_value = &path_substr($mpath,$field_value) unless index($mpath,'/') == -1;

       if (is_array_ref($match)) {
          if (is_integer($key)) {
             $match->[$key] = $field_value;
          }
          else {
             push @{$match}, $field_value;
          }
       }
       else {
          if (exists $match->{$key}) {
             $match->{$key} .= $opts->{-join} . $field_value;
          }
          else {
             $match->{$key} = $field_value;
          }
       } 
    }

    $data;
}

sub is_empty {
    my ($ref) = shift;
    for (@$ref) {
      return 0 if defined $_;
    }
    return 1;
}

sub path_substr {
    my ($path,$value) = @_;
    return $value unless is_string($value);
    if ($path =~ /\/(\d+)(-(\d+))?/) {
      my $from = $1;
      my $to   = defined $3 ? $3-$from+1 : 0;
      return substr($value,$from,$to);
    }
    return $value;
}

sub create_path {
    my ($path, $value) = @_;
    my ($p,$key,$guard) = parse_data_path($path);
    my $leaf  = {};
    my $match = [ grep ref, data_at($p, $leaf, key => $key, guard => $guard, create => 1) ]->[0];
    $match->{$key} = $value;
    $leaf;
}

# Parse a marc_path into parts
#  245[1,2]abd  - field=245, ind1=1, ind2=2, subfields = a,d,d
#  008/33-35    - field=008 from index 33 to 35
sub parse_marc_path {
    my $path = shift;

    if ($path =~ /(\S{3})(\[(.)?,?(.)?\])?([_a-z0-9]+)?(\/(\d+)(-(\d+))?)?/) {
        my $field    = $1;
        my $ind1     = $3;
        my $ind2     = $4;
        my $subfield = $5 ? "[$5]" : "[a-z0-9_]";
        my $from     = $7;
        my $to       = $9;
        return { field => $field , ind1 => $ind1 , ind2 => $ind2 , subfield => $subfield , from => $from , to => $to };
    }
    else {
        return {};
    }
}

# Given an Catmandu::Importer::MARC item return all the field value
# that match the MARC path $path
# Usage: marc_value($data,'245[a]',-join=>' ');
sub marc_value {
    my ($marc_item,$path,$opts) = @_;
    my $marc_path = &parse_marc_path($path);

    my $join    = $opts->{-join} || ' ';
    my @results = ();

    my $subfields = &marc_field($marc_item,$marc_path->{field});

    for my $arr (@$subfields) {
      my $res;
      my $matched = &marc_subfield($arr,$marc_path->{subfield});      
      if (@$matched) {
         $res = join $join , @$matched;
      }
      else {
         $res = undef;
      }
      push(@results, $res);
    }

    return \@results;
}

# Given a Catmandu::Importer::MARC item return for each matching field the
# array of subfields
# Usage: marc_field($data,'245');
sub marc_field {
    my ($marc_item,$path) = @_;
    my $marc_path = &parse_marc_path($path);
    my @results = ();

    my $field = $marc_path->{field};
    $field =~ s/\*/./g;

    for (@$marc_item) {
      my ($tag,$ind1,$ind2,@subfields) = @$_;
      unless ($tag =~ /^00/ || $tag eq 'LDR') {
        splice(@subfields,0,2);
      }
      push(@results,\@subfields) if $tag =~ /$field/;
    }

    return \@results;
}

# Given a subarray of Catmandu::Importer::MARC subfields return all
# the subfields that match the $subfield regex
# Usage: marc_subfield($subfields,'[a]');
sub marc_subfield {
    my ($subfields,$path) = @_;
    my $marc_path = &parse_marc_path($path);
    my $regex = $marc_path->{subfield};

    my @results = ();

    for (my $i = 0 ; $i < @$subfields ; $i += 2) {
      my $code = $subfields->[$i];
      my $val  = $subfields->[$i+1];
      push(@results,$val) if $code =~ /$regex/;   
    }
   
    return \@results;
}

1;

=head1 NAME

Catmandu::Fix::marc_map - copy marc values of one field to a new field

=head1 SYNOPSIS

    # Copy all 245 subfields into the my.title hash
    marc_map('245','my.title');

    # Copy the 245-$a$b$c subfields into the my.title hash
    marc_map('245abc','my.title');

    # Copy the 100 subfields into the my.authors array
    marc_map('100','my.authors.$append');
    
    # Add the 710 subfields into the my.authors array
    marc_map('710','my.authors.$append');

    # Copy the 600-$x subfields into the my.subjects array while packing each into a genre.text hash
    marc_map('600x','my.subjects.$append', -in => 'genre.text');

    # Copy the 008 characters 35-35 into the my.language hash
    marc_map('008_/35-35','my.language');

    # Copy all the 600 fields into a my.stringy hash joining them by '; '
    marc_map('600','my.stringy', -join => '; ');

    # When 024 field exists create the my.has024 hash with value 'found'
    marc_map('024','my.has024', -value => 'found');

    # Do the same examples now with the marc fields in 'record2'
    marc_map('245','my.title', -record => 'record2');

=cut