The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# Copyright (C) 2004 Jörg Tiedemann  <joerg@stp.ling.uu.se>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

#####################################################################
#
# Uplug::IO::PlugXML - bitext in PLUG XML
#
#####################################################################
# $Author$
# $Id$

package Uplug::IO::PlugXML;

use strict;
use vars qw(@ISA);
use vars qw(%StreamOptions);
use Uplug::IO::XML;
use Uplug::Data::Lang;
# use Uplug::IO::Any;

@ISA = qw( Uplug::IO::XML );

# stream options and their default values!!

%StreamOptions = ('DocRootTag' => 'PLUG',
		  'DocHeaderTag' => 'header',
		  'DocBodyTag' => 'corpus',
		  'root' => 'align',
		  'InternalEncoding' => 'iso-8859-1',
		  'DTDname' => 'PLUG',
		  'DTDsystemID' => 'plugXML.dtd',
		  'DefaultEncoding' => 'iso-8859-1',

#		  'encoding' => 'iso-8859-1',
#		  'DataStructure' => 'complex',
#		  'encoding' => $Uplug::IO::DEFAULTENCODING,
#		  'DTDpublicID' => '...'
		  );
$StreamOptions{DocRoot}{version}='1.0';

sub new{
    my $class=shift;
    my $self=$class->SUPER::new($class);
    &Uplug::IO::AddHash2Hash($self->{StreamOptions},\%StreamOptions);
    return $self;
}

sub read{
    my $self=shift;
    my ($data)=@_;

    if (not ref($data->{source})){$data->{source}=Uplug::Data::Lang->new();}
    if (not ref($data->{target})){$data->{target}=Uplug::Data::Lang->new();}

    if ($self->SUPER::read($data)){
	my @seg=$data->findNodes('seg');
	if (defined $seg[0]){
	    $seg[0]->setLabel('source');
	    $data->{source}->setRoot($seg[0]);
	}
	if (defined $seg[1]){
	    $seg[1]->setLabel('target');
	    $data->{target}->setRoot($seg[1]);
	}
	return 1;
    }
    return 0;
}

sub write{
    my $self=shift;
    my ($data)=@_;
    $self->Uplug::IO::write($data);
    my @seg=$data->getNodes('seg');
    if (not @seg){

	my ($source)=$data->getNodes('source');
	my ($target)=$data->getNodes('target');

	if (not ref($source)){return 0;}
	if (not ref($target)){return 0;}

	my $srcLang=$data->attribute($source,'lang');
	my $trgLang=$data->attribute($target,'lang');
	if (not $srcLang){$data->setAttribute($source,'lang','src');}
	if (not $trgLang){$data->setAttribute($target,'lang','trg');}

	$source->setTagName('seg');
	$target->setTagName('seg');

    }
    
    return $self->SUPER::write($data);
}


sub readFromHandle{
    my $self=shift;
    my $txt=$self->SUPER::readFromHandle(@_);
    if ($txt=~/\<align\s+id\=[^\"\']/){
	$txt=~s/(\<align\s+id\=)([^\"\'])/$1\'$2/;
    }
    if ($txt=~/\<\?xml/){
	if ($txt!~/encoding/){
	    $txt=~s/\?\>/ encoding=\"iso-8859-1\"\?\>/;
	}
    }
    my @ent=();
    while ($txt=~/\&([^\s\;]+)\;/g){
	if ($1 eq 'lt'){next;}
	if ($1 eq 'gt'){next;}
	if ($1 eq 'amp'){next;}
	push (@ent,$1);
    }
    foreach (@ent){
	print STDERR "replace \&$_\; with \[$_\]!\n";
	$txt=~s/\&$_\;/ [$_] /gs;
    }
    return $txt;
}