The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.


#!/usr/bin/perl
# -*- encoding: utf-8; mode: cperl -*-
package POE::Filter::Regexp;
use strict;
use vars qw($VERSION);

$VERSION='1.0';

sub new {
    my ($class,$re)=@_;
    $re ||= qr/\n/;
    die "Param in new must be a Regexp but this is a ".ref($re) unless ref $re eq 'Regexp';
    return bless [
		  '', # raw unparsed data
		  $re,
		 ], $class;
}


sub get {
    my ($self, $stream) = @_;
    my @ret;
    while($_=shift @$stream){
 	$self->[0].=$_;
	$_=''; #yah we'r cl'r mems !!!111 
	my $p=0;
	while($self->[0]=~/$self->[1]/gm) {
	    next unless $-[0]; #begin of stream
	    push @ret, substr($self->[0], $p, $-[0]-$p);
	    $p=$-[0];
	}
	substr($self->[0], 0, $p)='';
    }
    $self->[0]=''.$self->[0]; #Clean holes in string.
    return \@ret;
}


1;
__END__

=head1 NAME

    POE::Filter::Regexp - Fast spliting input stream
    
=head1 SYNOPSYS

use POE::Filter::ErrorProof;
my $wheel = POE::Wheel::ReadWrite->new(
    Driver => POE::Driver::SysRW->new( BlockSize =>16777216),
        #16M  OMG !
    Filter => POE::Filter::Regexp->new(qr/^HEAD:/),
        #each new chunk begins from /\nHEAD:/
);  

=head1 DESCRIPTION

This POE::Filter developed for reading large ammount of data from large files.
On each iteration POE reads BlockSize octets from stream and put it to filter, 
filter returns array of sliced chunks in one bundle, and InputEvent called for 
each of it without runing POE's internal loop doing anything else.
This construction blocks io but incredible fast on log processing, when main 
task of system - is parsing logs.
  

=head1 AUTHOR

Vany Serezhkin E<lt>ivan@serezhkin.comE<gt>


=cut