The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/env perl

use warnings;
use strict;

package Gwybodaeth::Write;

use Gwybodaeth::Escape;

=head1 NAME

Write::Write - Main class for applying maps to data.

=head1 SYNOPSIS

    use base qw(Write);

=head1 DESCRIPTION

This class is intended to be subclassed thus has no public methods bar new().

=over
=cut

use Carp qw(croak);
use XML::Twig;

# Allow output to be in utf8
binmode( STDOUT, ':utf8' );
binmode( STDERR, ':utf8' );

=item new()

Returns an instance of whichever class has subclassed Gwybodaeth::Write.;

=cut
 
sub new {
    my $class = shift;
    my $self = { ids => {}, Data => ""};
    $self->{XML} = XML::Twig->new(pretty_print => 'nice');
    bless $self, $class;
    return $self;
}

# Check cleanliness of input data 
sub _check_data {
    my $self        = shift;
    my $triple_data = shift;
    my $data        = shift;
    my $data_type   = shift;      # data type of $data;
    
    # Check $triple_data is the correct data type.
    unless (ref($triple_data) eq 'ARRAY') { 
        croak "expected array ref as first argument";
    }

    my $triples = ${ $triple_data }[0];
    my $functions = ${ $triple_data }[1]; 

    # Check that both array elements are the correct data types in 
    # $triple_data.
    unless (eval{ $triples->isa('Gwybodaeth::Triples') }) {
        croak 'expected a Gwybodaeth::Triples object as first argument of array';
    }
    unless (ref($functions) eq 'HASH') {
        croak 'expected a hash ref as second argument of array';
    }
    
    # Check $data is in the correct data type.
    unless (ref($data) eq $data_type) {
        croak "expected $data_type in the second array ref";
    }
    return 1;
}

sub _print2str {
    my $self = shift;
    my $str = shift;

    $self->{Data} .= $str;

    return 1;
}

sub _extract_field {
    my $self = shift;
    my $data = shift;
    my $field = shift;


    # The object is a specific field
    if ($field =~ m/^\"     # string's first char is a double quote
                    Ex:
                    \$      # $ sign
                    (       # start variable scope
                    [\:\w]+ # one or more word or colon chars
                    \/?     # possible forward slash
                    [\:\w]* # zero or more word or colon chars
                    )       # end variable scope
                    (       # start option scope
                    (\^\^|\@)   # ^^ or @
                    .*      # zero or more of any non \n chars
                    )?      # end option scope and make the scope 
                            # non essential
                    \"$     # string's last cha is a double quote
                    /x) { 
        # Remeber that _get_field() is often subclassed
        # so we can't assume what form of data it returns.
        return $self->_get_field($data,$1,$2);
    }
    # The object is a concatination of fields 
    if ($field =~ m/^[\"\<] # string's first char is a double quote
                            # or an opening angle bracket
                    Ex:
                    .*\+    # zero or more non \n char followed by a plus
                    /x) {
        return $self->_cat_field($data, $field);
    }
    if ($field =~ m/^\$     # string's first char is a doller sign
                    (       # start scope
                    [\:\w]+ # one or more word or colon chars
                    \/?     # possible forward slash
                    [\:\w]* # zero or more word or colon chars
                    )       # end scope
                    $/x) {
        return $self->_get_field($data,$1);
    } 
    if ($field =~ m/^\<     # string's first char is an opening angle bracket
                    Ex:
                    \$      # $ sign
                    (       # start scope
                    [\:\w]+ # one or more word or colon chars
                    \/?     # possible forward slash
                    [\:\w]* # zero or more word or colon chars
                    )       # close scope
                    \>$     # string's last char is a closing angle bracket
                    /x) {
        return $self->_get_field($data,$1);
    } 
    if ( $field =~ m/\@Split/x) {
        return $self->_split_field($data, $field);
    }
    
    # If it doesn't match any of the above, allow it to be a bareword field
    return "$field";
}

# Concatinate fields
sub _cat_field {
    my $self = shift;
    my $data = shift;
    (my $field = shift) =~ s/
                            # any char followed by Ex:
                            .Ex://x;

    my $string = qq{};

    my @values = split /\+/x, $field;

    for my $val (@values) {
        # Extract ${num} variables from data
        if ($val =~ m/\$    # $ sign
                    (       # start variable scope
                    [\:\w]+ # one or more word or colon characters
                    )       # end variable scope
                    /x) {
            $string .= $self->_get_field($data,$1);
        }
        # Put a space; 
        elsif ($val =~ m/\'\s*\' # single quoted zero or more whitespace char
                        /x) {
            $string .= " ";
        } 
        # Print a literal
        else {
            $string .= $val;
        }
    }
    return $string;
}

# How to interpret the @Split grammar
sub _split_field {
    my($self, $data, $field) = @_;

    my @strings;
    
    if ($field =~ m/\@Split # Split grammar
                    \(      # open bracket
                    Ex:
                    \$      # $ sign
                    (       # start variable scope
                    \d+     # one or more numeric character
                    )       # end variable scope
                    ,
                    "(.)"   # doublpe quoted any non \n char - delimeter 
                    \)      # close bracket
                    /x) {
        my $delimeter = $2;

        @strings = split /$delimeter/x, $self->_get_field($data,$1);
        return \@strings;
    }

    return $field;
}

sub _write_meta_data {
    my $self = shift;

    my $namespace = Gwybodaeth::NamespaceManager->new();
    my $name_hash = $namespace->get_namespace_hash();
    my $base = $namespace->get_base();

    $self->_print2str("<?xml version=\"1.0\"?>\n<rdf:RDF\n");
    for my $keys (keys %{ $name_hash }) {
        (my $key = $keys) =~ s/
                              # string ends in a colon
                              :$//x;
        next if ($key eq "");
        $self->_print2str("xmlns:$key=\"" . $name_hash->{$keys} . "\"\n");
    }
    if (${ $base }) {
        $self->_print2str("xml:base=\"${ $base }\"\n");
    }
    $self->_print2str(">\n");
    
    return 1;
}

sub _write_triples {
    my ($self,@vars) = @_;
    return $self->_really_write_triples(@vars);
}

sub _really_write_triples {
    my ($self, $row, $triples, $id) = @_;

    for my $triple_key ( keys %{ $triples } ) {

        my $subject = $self->_if_parse($triple_key,$row);
        $self->_print2str("<".$subject);
        if ($id) {
            chomp(my $id_text = $self->_extract_field($row,$id));
            if (ref($id_text) eq 'ARRAY') {
                for my $obj (@{ $id_text }) {
                    $self->_print2str($self->_about_or_id($obj));
                }
            } else {
                $self->_print2str($self->_about_or_id($id_text));
            }
            $self->_print2str('"');
        } 
        $self->_print2str(">\n");

        my @verbs = @{ $triples->{$triple_key}{'predicate'} };
        for my $indx (0..$#verbs ) {
            $self->_get_verb_and_object(
                                $verbs[$indx],
                                $triples->{$triple_key}{'obj'}[$indx],
                                $row);
        }
        $self->_print2str("</".$subject.">\n");
    }
    return;
}

sub _get_verb_and_object {
    my($self, $verb, $object, $row) = @_;

    my $obj_text = "";
    unless ( eval{ $object->isa('Gwybodaeth::Triples') } ) {
        $obj_text = $self->_get_object($row, $object);
    }

    if (ref($obj_text) eq 'ARRAY') {
        for my $obj (@{ $obj_text }) {
            $self->_print_verb_and_object($verb, $obj, $row, $object);
        }
    } else {
        $self->_print_verb_and_object($verb, $obj_text, $row, $object);
    }
    return 1;
}

sub _print_verb_and_object {
    my ($self, $verb, $object, $row, $unparsed_obj) = @_;
    my $esc = Gwybodaeth::Escape->new();

    my $predicate = $self->_if_parse($verb,$row);
    my $obj="";
    $self->_print2str("<" . $predicate );

    if ( $unparsed_obj =~ m/\<  # opening angle bracket 
                            Ex:
                            \$  # $ sign
                            \w+ # one or more word chars
                            \/? # a possible forward slash
                            \w* # zero or more word chars
                            \>$ # string ends with a closing angle brackt
                            /x ) {
        # We have a reference
        $self->_print2str(' rdf:resource="#');
        my $parsed_obj = $self->_get_object($row,$unparsed_obj);
        if (ref($parsed_obj) eq 'ARRAY') {
            for my $obj (@{ $parsed_obj }) {
                $self->_print2str($esc->escape($obj));
            }
        } else {
            $obj = $esc->escape($parsed_obj);
            $self->_print2str($obj);
        }
        $self->_print2str("\"/>\n");
    } else {
        $self->_print2str(">");
        if (eval{$unparsed_obj->isa('Gwybodaeth::Triples')}) {
            $obj =  $esc->escape($self->_get_object($row,$unparsed_obj));
            $self->_print2str($obj);
        } else {
            $obj = $esc->escape($self->_get_object($row,$object));
            $self->_print2str($obj);
        }
        $self->_print2str("</" . $predicate . ">\n");
    }
    return 1;
}

sub _get_object {
    my($self, $row, $object) = @_;

    if (eval {$object->isa('Gwybodaeth::Triples')}) {
        $self->_write_triples($row, $object);
    } else {
        return $self->_extract_field($row, $object);
    }
    return "";
}

sub _about_or_id {
    my($self, $text) = @_;

    if ($text =~ /\s/x or $text =~ /[^A-Z]+ # one or more non capital letters/x)
    { 
        $self->_print2str(' rdf:about="#');
    } else {
        $self->_print2str(' rdf:ID="');
    }
    return $text;
}
sub _if_parse {
    my($self, $token, $row) = @_;

    if ($token =~ m/\@If
                    \(      # open bracket
                    (       # start question scope
                    .+      # one or more non \n char
                    )       # end question scope
                    \;
                    (       # start 'true' scope
                    .+      # one or more non \n char
                    )       # end 'true' scope
                    \;
                    (       # start 'false' scope
                    .+
                    )       # end 'false scope
                    \)      # close bracket
                    /ix) {
        my($question,$true,$false) = ($1, $2, $3);

        $true =~ s/\'//gx;
        $false =~ s/\'//gx;

        my @q_split = split q{=}, $question;

        $q_split[0] =~ s/\'//gx;
        $q_split[1] =~ s/\'//gx;

        my $ans = qq{};
        if ($token =~ m/\<  # opening angle bracket
                        Ex
                        \:  # a colon
                        (   # start scope
                        .+  # one or more non \n chars
                        \+  # a plus sign
                        )   # end scope
                        \@If/ix ) {
            ($ans .= $1) =~ s/\+//gx;
            $ans .= qq{:};
        }

        if ($q_split[0] =~ m/^\$    # first char of the string is a $
                            (\w+)   # one or more word characters scoped
                                    # as the field
                            /x) {
            $q_split[0] = $self->_get_field($row,$1);
        }

        # If the returned field is an ARRAY join the elements
        # into one scalar string.
        if (ref($q_split[0]) eq 'ARRAY') {
            $q_split[0] = join ' ', @{ $q_split[0] };
        }

        if ($q_split[0] eq $q_split[1]) {
            $ans .= $true;
        } else {
            $ans .= $false;
        }
        $token = $ans;
    }
    return $token;
}

# Structure the serialized data string into an XML::Twig object.
sub _structurize {
    my $self = shift;

    my $twig = $self->{XML};

    my $xml = $self->{Data};

    $twig->safe_parse($xml);

    return $self->_set_datatype($twig);
}

sub _set_datatype {
    my($self, $twig) = @_;

    my $elt = $twig->root;
    while( $elt = $elt->next_elt($twig->root) ) {
        if ($elt->text_only =~ m/(  # begin text scope
                                .+  # one or more of any non \n character
                                )   # end text scope
                                \^\^# matches ^^
                                (   # begin datatype scope
                                \w+ # one ore more word character
                                )   # end datatype scope
                                $   # end of string/x ) {
           $elt->set_text($1);
           $elt->set_att(
                 'rdf:datatype' => "http://www.w3.org/TR/xmlschema-2/#".$2
           );
        } 
        elsif ($elt->text_only =~ m/
                                (   # begin text scope
                                .+  # one or more of any non \n character
                                )   # end text scope
                                \@  # 'at' symbol
                                (   # begin lang scope
                                \w+ # one or more word characters
                                )   # end of lang scope
                                $   # end of string/x ) {
            $elt->set_text($1);
            $elt->set_att(
                    'xml:lang' => $2
            );
        }
    }

    return $twig;
}
1;
__END__
=back

=head1 AUTHOR

Iestyn Pryce, <imp25@cam.ac.uk>

=head1 ACKNOWLEDGEMENTS

I'd like to thank the Ensemble project (L<www.ensemble.ac.uk>) for funding me to work on this project in the summer of 2009.

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2009 Iestyn Pryce <imp25@cam.ac.uk>

This library is free software; you can redistribute it and/or modify it under
the terms of the BSD license.