The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Catmandu::Fix::marc_spec;

use Catmandu::Sane;
use Catmandu::MARC;
use Moo;
use Catmandu::Fix::Has;

with 'Catmandu::Fix::Base';

our $VERSION = '1.171';

has spec          => ( fix_arg=> 1 );
has path          => ( fix_arg=> 1 );
has split         => ( fix_opt=> 1 );
has join          => ( fix_opt=> 1 );
has value         => ( fix_opt=> 1 );
has pluck         => ( fix_opt=> 1 );
has invert        => ( fix_opt=> 1 );
has nested_arrays => ( fix_opt=> 1 );

sub emit {
    my ( $self, $fixer ) = @_;
    my $path         = $fixer->split_path( $self->path );
    my $key          = $path->[-1];
    my $marc_obj     = Catmandu::MARC->instance;

    # Precompile the marc_path to gain some speed
    my $spec         = $marc_obj->parse_marc_spec( $self->spec );
    my $marc         = $fixer->capture($marc_obj);
    my $marc_spec    = $fixer->capture($spec);
    my $marc_opt     = $fixer->capture({
                            '-join'        => $self->join   // '' ,
                            '-split'       => $self->split  // 0 ,
                            '-pluck'       => $self->pluck  // 0 ,
                            '-nested_arrays' => $self->nested_arrays // 0 ,
                            '-invert'      => $self->invert // 0 ,
                            '-value'       => $self->value ,
                            '-force_array' => ($key =~ /^(\$.*|[0-9]+)$/) ? 1 : 0
                        });

    my $var          = $fixer->var;
    my $result       = $fixer->generate_var;
    my $current_value = $fixer->generate_var;

    my $perl = "";
    $perl .= $fixer->emit_declare_vars($current_value, "[]");
    $perl .=<<EOF;
if (defined(my ${result} = ${marc}->marc_spec(
            ${var},
            ${marc_spec},
            ${marc_opt})) ) {
    ${result} = ref(${result}) ? ${result} : [${result}];
    for ${current_value} (\@{${result}}) {
EOF

    $perl .= $fixer->emit_create_path(
            $var,
            $path,
            sub {
                my $var2 = shift;
                "${var2} = ${current_value}"
            }
    );

    $perl .=<<EOF;
    }
}
EOF
    $perl;
}

1;

__END__

=encoding utf-8

=head1 NAME

Catmandu::Fix::marc_spec - reference MARC values via
L<MARCspec - A common MARC record path language|http://marcspec.github.io/MARCspec/>

=head1 SYNOPSIS

In a fix file e.g. 'my.fix':

    # Assign value of MARC leader to my.ldr.all
    marc_spec('LDR', my.ldr.all)

    # Assign values of all subfields of field 245 as a joined string
    marc_spec('245', my.title.all)

    # If field 245 exists, set string 'the title' as the value of my.title.default
    marc_spec('245', my.title.default, value:'the title')

    # Assign values of all subfields of every field 650 to my.subjects.all
    # as a joined string
    marc_spec('650', my.subjects.all)

    # Same as above with joining characters '###'
    marc_spec('650', my.subjects.all, join:'###')

    # Same as above but added as an element to the array my.append.subjects
    marc_spec('650', my.append.subjects.$append, join:'###')

    # Every value of a subfield will be an array element
    marc_spec('650', my.split.subjects, split:1)

    # Assign values of all subfields of all fields having indicator 1 = 1
    # and indicator 2 = 0 to the my.fields.indicators10 array.
    marc_spec('..._10', my.fields.indicators10.$append)

    # Assign first four characters of leader to my.firstcharpos.ldr
    marc_spec('LDR/0-3', my.firstcharpos.ldr)

    # Assign last four characters of leader to my.lastcharpos.ldr
    marc_spec('LDR/#-3', my.lastcharpos.ldr)

    # Assign value of subfield a of field 245 to my.title.proper
    marc_spec('245$a', my.title.proper)

    # Assign first two characters of subfield a of field 245 to my.title.proper
    marc_spec('245$a/0-1', my.title.charpos)

    # Assign all subfields of second field 650 to my.second.subject
    marc_spec('650[1]', my.second.subject)

    # Assign values of all subfields of last field 650 to my.last.subject
    marc_spec('650[#]', my.last.subject)

    # Assign an array of values of all subfields of the first two fields 650
    # to my.two.split.subjects
    marc_spec('650[0-1]', my.two.split.subjects, split:1)

    # Assign a joined string of values of all subfields of the last two fields 650
    # to my.two.join.subjects
    marc_spec('650[#-1]', my.two.join.subjects, join:'###')


    # Assign value of first subfield a of all fields 020 to my.isbn.number
    marc_spec('020$a[0]', my.isbn.number)

    # Assign value of first subfield q of first field 020 to my.isbn.qual.one
    marc_spec('020[0]$q[0]', my.isbn.qual.none)

    # Assign values of subfield q and a in the order stated as an array
    # to  my.isbns.pluck.all
    # without option 'pluck:1' the elments will be in 'natural' order
    # see example below
    marc_spec('020$q$a', my.isbns.pluck.all, split:1, pluck:1)

    # Assign value of last subfield q and second subfield a
    # in 'natural' order of last field 020 as an array to my.isbn.qual.other
    marc_spec('020[#]$q[#]$a[1]', my.isbn.qual.other, split:1)

    # Assign first five characters of value of last subfield q and last character
    # of value of second subfield a in 'natural' order of all fields 020
    # as an array to  my.isbn.qual.substring.other
    marc_spec('020$q[#]/0-4$a[1]/#', my.isbn.qual.substring.other, split:1)

    # Assign values of of all other subfields than a of field 020
    # to my.isbn.other.subfields
    marc_spec('020$a' my.isbn.other.subfields, invert:1)

    # Assign value of subfield a of field 245 only, if subfield a of field 246
    # with value 1 for indicator1 exists
    marc_spec('245$a{246_1$a}', my.var.title)

And then on command line:

    catmandu convert MARC to YAML --fix my.fix < perl_books.mrc

See L<Catmandu Importers|http://librecat.org/Catmandu/#importers> and
L<Catmandu Fixes|http://librecat.org/Catmandu/#fixes> for a deeper
understanding of how L<Catmandu|http://librecat.org/> works.

=head1 DESCRIPTION

L<Catmandu::Fix::marc_spec|Catmandu::Fix::marc_spec> is a fix for the
famous L<Catmandu Framework|Catmandu>.

For the most part it behaves like
L<Catmandu::Fix::marc_map|Catmandu::Fix::marc_map> , but has a more fine
grained method to reference MARC data content.

See L<MARCspec - A common MARC record path language|http://marcspec.github.io/MARCspec/>
for documentation on the path syntax.

=head1 METHODS

=head2 marc_spec(MARCspec, JSON_PATH, OPT:VAL, OPT2:VAL,...)

First parameter must be a string, following the syntax of
L<MARCspec - A common MARC record path language|http://marcspec.github.io/MARCspec/>.
Do always use single quotes with this first parameter.

Second parameter is a string describing the variable or the variable path
to assign referenced values to
(see L<Catmandu Paths|http://librecat.org/Catmandu/#paths>).

You may use one of $first, $last, $prepend or $append to add
referenced data values to a specific position of an array
(see L<Catmandu Wildcards|http://librecat.org/Catmandu/#wildcards> and
mapping rules at L<https://github.com/LibreCat/Catmandu-MARC/wiki/Mapping-rules>).

    # INPUT
    [245,1,0,"a","Cross-platform Perl /","c","Eric F. Johnson."]

    # CALL
    marc_spec('245', my.title.$append)

    # OUTPUT
    {
      my {
        title [
            [0] "Cross-platform Perl /Eric F. Johnson."
        ]
      }

    }

Third and every other parameters are optional and must
be in the form of key:value (see L</"OPTONS"> for a deeper
understanding of options).

=head1 OPTIONS

=head2 split: 0|1

If split is set to 1, every fixed fields value or every subfield will be
an array element.

    # INPUT
    [650," ",0,"a","Perl (Computer program language)"],
    [650," ",0,"a","Web servers."]

    # CALL
    marc_spec('650', my.subjects, split:1)

    # OUTPUT
    {
      my {
        subjects [
            [0] "Perl (Computer program language)",
            [1] "Web servers."
        ]
      }
    }

See split mapping rules at L<https://github.com/LibreCat/Catmandu-MARC/wiki/Mapping-rules>.


=head2 nested_arrays: 0|1

Using the nested_array
option the output will be an array of array of strings (one array item for
each matched field, one array of strings for each matched subfield).

    # INPUT
    [650," ",0,"a","Perl (Computer program language)"],
    [650," ",0,"a","Web servers."]

    # CALL
    marc_spec('650', my.subjects, nested_arrays:1)

    # OUTPUT
    {
      my {
        subjects [
            [0] [
                [0] "Perl (Computer program language)"
            ]
            [1] [
                [0] "Web servers."
            ]
        ]
      }
    }

See nested_array mapping rules at L<https://github.com/LibreCat/Catmandu-MARC/wiki/Mapping-rules>.

=head2 join: Str

If set, value of join will be used to join the referenced data content.
This will only have an effect if option split is undefined (not set or set to 0).

    # INPUT
    [650," ",0,"a","Perl (Computer program language)"],
    [650," ",0,"a","Web servers."]

    # CALL
    marc_spec('650', my.subjects, join:'###')

    # OUTPUT
    {
      my {
        subjects "Perl (Computer program language)###Web servers."
      }
    }

=head2 pluck: 0|1

This has only an effect on subfield values. By default subfield reference
happens in 'natural' order (first number 0 to 9 and then letters a to z).

    # INPUT
    ["020"," ", " ","a","0491001304","q","black leather"]

    # CALL
    marc_spec('020$q$a', my.isbn, split:1)

    # OUTPUT
    {
      my {
        isbn [
            [0] 0491001304,
            [1] "black leather"
        ]
      }
    }


If pluck is set to 1, values will be referenced by the order stated in the
MARCspec.

    # INPUT
    ["020"," ", " ","a","0491001304","q","black leather"]

    # CALL
    marc_spec('020$q$a', my.plucked.isbn, split:1, pluck:1)

    # OUTPUT
    {
      my {
        isbn [
            [0] "black leather",
            [1] 0491001304
        ]
      }
    }

=head2 value: Str

If set to a value, this value will be assigned to $var if MARCspec references
data content (if the field or subfield exists).

In case two or more subfields are referenced, the value will be assigned to $var if
at least one of them exists:

    # INPUT
    ["020"," ", " ","a","0491001304"]

    # CALL
    marc_spec('020$a$q', my.isbn, value:'one subfield exists')

    # OUTPUT
    {
      my {
        isbn "one subfield exists"
      }
    }

=head2 invert: 0|1

This has only an effect on subfields (values). If set to 1 it will invert the
last pattern for every subfield. E.g.

   # references all subfields but not subfield a and q
   marc_spec('020$a$q' my.other.subfields, invert:1)

   # references all subfields but not subfield a and not the last repetition
   # of subfield q
   marc_spec('020$a$q[#]' my.other.subfields, invert:1)

   # references all but not the last two characters of first subfield a
   marc_spec('020$a[0]/#-1' my.other.subfields, invert:1)

Invert will not work with subspecs.

=head1 INLINE

This Fix can be used inline in a Perl script:

    use Catmandu::Fix::marc_spec as => 'marc_spec';

    my $data = { record => [...] };

    $data = marc_spec($data,'245$a','title');

    print $data->{title} , "\n";

=head1 SEE ALSO

L<Catmandu::Fix>
L<Catmandu::Fix::marc_map>

=head1 AUTHOR

Carsten Klee E<lt>klee@cpan.orgE<gt>

=head1 CONTRIBUTORS

=over

=item * Johann Rolschewski, C<< <jorol at cpan> >>,

=item * Patrick Hochstenbach, C<< <patrick.hochstenbach at ugent.be> >>,

=item * Nicolas Steenlant, C<< <nicolas.steenlant at ugent.be> >>

=back

=head1 LICENSE AND COPYRIGHT

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.

=head1 SEE ALSO

=over

=item * L<MARCspec - A common MARC record path language|http://marcspec.github.io/MARCspec/>

=item * L<Catmandu|http://librecat.org/>

=item * L<Catmandu Importers|http://librecat.org/Catmandu/#importers>

=item * L<Catmandu Importers|http://librecat.org/Catmandu/#importers>

=item * L<Catmandu Fixes|http://librecat.org/Catmandu/#fixes>

=item * L<Catmandu::MARC::Fix::marc_map|Catmandu::MARC::Fix::marc_map>

=item * L<Catmandu Paths|http://librecat.org/Catmandu/#paths>

=item * L<Catmandu Wildcards|http://librecat.org/Catmandu/#wildcards>

=item * L<MARC::Spec|MARC::Spec>

=item * L<Catmandu::Fix|Catmandu::Fix>

=item * L<Catmandu::MARC|Catmandu::MARC>

=back

=cut