The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Treex::Block::W2A::EN::NormalizeForms;
BEGIN {
  $Treex::Block::W2A::EN::NormalizeForms::VERSION = '0.08171';
}
use utf8;
use Moose;
use Treex::Core::Common;
extends 'Treex::Core::Block';

sub process_anode {
    my ( $self, $anode ) = @_;

    my $form = $anode->form;

    if ( $form eq '"' ) {
        my $previous_anode = $anode->get_prev_node();
        if ( !$previous_anode || $previous_anode->form =~ /^[\(\[{<]$/ || !$previous_anode->no_space_after ) {
            $form = '``';
        }
    }

    $form =~ s/’/'/g;
    $form =~ s/"/''/g;
    $form =~ s/“/``/g;
    $form =~ s/”/''/g;
    $form =~ s/«/``/g;
    $form =~ s/»/''/g;
    $form =~ s/—/--/g;

    $anode->set_form($form);

    return 1;
}

1;

__END__

=encoding utf-8

=head1 NAME

Treex::Block::W2A::EN::NormalizeForms - normalize some wordforms

=head1 VERSION

version 0.08171

=head1 DESCRIPTION

Some forms are normalized, for example all the quotation marks
get the normalized form `` or ''.

=head1 OVERRIDEN METHODS

=head2 from C<Treex::Core::Block>

=over 4

=item process_anode

=back

=head1 AUTHOR

David Mareček <marecek@ufal.mff.cuni.cz>

=head1 COPYRIGHT AND LICENSE

Copyright © 2010 - 2011 by Institute of Formal and Applied Linguistics, Charles University in Prague

This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.