The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package WebSource::Extract::regexp;
use strict;
use WebSource::Parser;
use Carp;

our @ISA = ('WebSource::Module');

=head1 NAME

WebSource::Extract::regexp - Extract information using a regular expression

=head1 DESCRIPTION

The regexp B<Extract> operator allows to extract information based on a regular
expression.



=head1 METHODS

See WebSource::Module

=cut

sub _init_ {
  my $self = shift;
  $self->SUPER::_init_;
  my $wsd = $self->{wsdnode};
  if($wsd) {
    $self->{regexp} = $wsd->findvalue('regexp');
    my @keys = map { $_->textContent } $wsd->findnodes('map/key');
    $self->{keys} = \@keys;
    $self->{slot} = $wsd->getAttribute("apply-to");
  } 
  $self->{regexp} or croak "No regular expression given";
  return $self;
}

sub handle {
  my $self = shift;
  my $env = shift;
  my $re = $self->{regexp};
  my %meta = %$env;

  my $slot = $self->{slot} ? $self->{slot} : "data";
  $self->log(5,"Extracting with m/$re/ from $slot");
  my $str = $slot eq "data" ? $env->dataString : $env->{$slot};

  if(my @keys = @{$self->{keys}}) {
    if($str =~ m/$re/) {
      my %map;
      no strict "refs";
      for(my $i=0; $i<=$#keys && $i<9; $i++) {
        $map{$keys[$i]} = ${$i+1};
      }
      use strict "refs";
      $map{data} and $map{type} = "text/string";
      return WebSource::Envelope->new(
        %meta,
        %map,
      );
    } else {
      return ();
    }
  } else {
    # matching part is data
    if($str =~ m/$re/) {
      $slot eq "data" and $meta{type} = "text/string";
      return WebSource::Envelope->new(
        %meta,
        $slot => $&,
      );
    } else {
      return ();
    }
  }
}

=head1 SEE ALSO

WebSource

=cut

1;