The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Treex::Block::Read::BaseCoNLLReader;
$Treex::Block::Read::BaseCoNLLReader::VERSION = '0.13095';
use strict;
use warnings;
use Moose;
use Treex::Core::Common;
extends 'Treex::Block::Read::BaseTextReader';

sub next_document_text {   
    my ($self) = @_;
    return $self->from->next_file_text() if $self->is_one_doc_per_file;
 
    my $text = '';
    my $empty_lines = 0;
    LINE:
    while(1){
        my $line = $self->from->next_line();
        if (!defined $line){
            return if $text eq '' && !$self->from->has_next_file();
            last LINE;
        }
        if ( $line =~ m/^\s*$/ ) {
            $empty_lines++;
            return $text if $empty_lines == $self->lines_per_doc;
        }
        $text .= $line;
    }
    return $text;
}


1;

__END__

=head1 NAME

Treex::Block::Read::BaseCoNLLReader

=head1 VERSION

version 0.13095

=head1 DESCRIPTION

Base class for reading CoNLL-like files (with one token per line, sentences
separated by empty lines).

=head1 ATTRIBUTES

=over

=item from

space or comma separated list of filenames

=item lines_per_doc

number of sentences (!) per document

=back

=head1 AUTHORS

David Mareček <marecek@ufal.mff.cuni.cz>

Dan Zeman <zeman@ufal.mff.cuni.cz>

Ondřej Dušek <odusek@ufal.mff.cuni.cz>

=head1 COPYRIGHT AND LICENSE

Copyright © 2013 by Institute of Formal and Applied Linguistics, Charles University in Prague

This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.