The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# Copyright (c) Philippe Verdret, 1995-1999

# Architecture:
#           Parse::ALex - Abstract Lexer
#                |
#           +----------+
#           |          |
#           |     Parse::Tokenizer
#           |        |     |
#        LexEvent   Lex  CLex  ...       - Concrete lexers

require 5.004;
use integer;
use strict qw(vars);
use strict qw(refs);
use strict qw(subs);

package Parse::ALex;
$Parse::ALex::VERSION = '2.21';
use Parse::Trace;
@Parse::ALex::ISA = qw(Parse::Trace); 

use Parse::Token;	
use Parse::Template;

				# Default values
my $trace = 0;			# if true enable the trace mode
my $hold = 0;			# if true enable data saving
my $skip = '[ \t]+';		# strings to skip
my $DEFAULT_STREAM = \*STDIN;	# Input Filehandle 
my $eoi = 0;			# 1 if end of imput 

# define constant, use a pseudo-hash???
my %_map;			# Define a mapping between element names and numbers
my @attributes = qw(STREAM FROM_STRING LEXER_SUB BUFFER PENDING_TOKEN 
		    LINE RECORD_LENGTH OFFSET POS
		    EOI SKIP HOLD HOLD_TEXT 
		    NAME IN_PKG
		    TEMPLATE
		    LEXER_STRING_SUB LEXER_STREAM_SUB LEXER_CLOSURE_ENV
		    LEXER_STRING_CODE LEXER_STREAM_CODE LEXER_CODE
		    STATE_MACHINE_CODE STATES STACK_STATES EXCLUSIVE_COND INCLUSIVE_COND
		    TRACE  
		    TOKEN_LIST);
my($STREAM, $FROM_STRING, $LEXER_SUB, $BUFFER, $PENDING_TOKEN, 
   $LINE, $RECORD_LENGTH, $OFFSET, $POS,
   $EOI, $SKIP, $HOLD, $HOLD_TEXT, 
   $NAME, $IN_PKG,
   $TEMPLATE, 
   $LEXER_STRING_SUB, $LEXER_STREAM_SUB, $LEXER_CLOSURE_ENV,
   $LEXER_STRING_CODE, $LEXER_STREAM_CODE, $LEXER_CODE,
   $STATE_MACHINE_CODE, $STATES, $STACK_STATES, $EXCLUSIVE_COND, $INCLUSIVE_COND,
   $TRACE, 
   $TOKEN_LIST
) = @_map{@attributes} = (0..$#attributes);

#sub EOI () { 9 }		# trial

sub _map { 
  shift;
  if (@_) {
    wantarray ? @_map{@_} : $_map{$_[0]};
  } else {
    @attributes;
  }
}

my $somevar = '';		
				# Create and instanciate a prototypical instance
my $lexer = __PACKAGE__->clone;	
sub prototype { $lexer or [] }

my $TOKEN_CLASS = 'Parse::Token'; # Root class for the token objects
sub tokenClass { 
  if (defined $_[1]) {
    no strict qw/refs/;
    ${"$TOKEN_CLASS" . "::PENDING_TOKEN"} = $PENDING_TOKEN; 
    $TOKEN_CLASS = $_[1];
  } else {
    $_[1] 
  }
}
my $DEFAULT_TOKEN = $TOKEN_CLASS->new('DEFAULT', '.*'); # default token
$lexer->tokenClass($TOKEN_CLASS);

$lexer->[$STREAM] = $DEFAULT_STREAM;
$lexer->[$FROM_STRING] = 0;	# 1 for a string
$lexer->[$BUFFER] = \$somevar;		# string to tokenize
$lexer->[$PENDING_TOKEN] = $DEFAULT_TOKEN;
$lexer->[$LINE] = \$somevar;	# number of the current record
$lexer->[$RECORD_LENGTH] = \$somevar;	# length of the current record
$lexer->[$OFFSET] = \$somevar;		# offset from the beginning of the analysed stream
$lexer->[$POS] = \$somevar;		# position in the current record
$lexer->[$EOI] = $eoi;
$lexer->[$SKIP] = $skip;	# a pattern to skip
$lexer->[$HOLD] = $hold;	# save or not what is consumed
$lexer->[$HOLD_TEXT] = '';	# saved string
$lexer->[$TEMPLATE] = new Parse::Template; # code template

				# lexer code: [HEADER, BODY, FOOTER]
$lexer->[$LEXER_STREAM_CODE] = [];	# cached subroutine definition
$lexer->[$LEXER_STRING_CODE] = [];	# cached subroutine definition
$lexer->[$LEXER_CODE] = [];		# definition of the current lexer
$lexer->[$LEXER_CLOSURE_ENV] = [];	# environnement of the lexer closure
$lexer->[$LEXER_SUB] = my $DEFAULT_LEXER_SUB = sub {
  $_[0]->genLex;		# lexer autogeneration
  &{$_[0]->[$LEXER_SUB]};	# lexer execution
};
$lexer->[$LEXER_STREAM_SUB] = sub {};	# cache for the stream lexer
$lexer->[$LEXER_STRING_SUB] = sub {};	# cache for the string lexer

				# State machine
$lexer->[$EXCLUSIVE_COND] = {};	# exclusive conditions
$lexer->[$INCLUSIVE_COND] = {};	# inclusive conditions
$lexer->[$STATE_MACHINE_CODE] = ''; # definition of the state machine
$lexer->[$STATES] = { 'INITIAL' => \$somevar };	# state machine
$lexer->[$STACK_STATES] = [];	# stack of states, not used
$lexer->[$TRACE] = $trace;
$lexer->[$TOKEN_LIST] = [];	# Token instances

sub reset {			# reset all lexer's state values
  my $self = shift;
  ${$self->[$LINE]} = 0;
  ${$self->[$RECORD_LENGTH]} = 0;
  ${$self->[$OFFSET]} = 0;
  ${$self->[$POS]} = 0;
  ${$self->[$BUFFER]} = ''; 
  $self->[$HOLD_TEXT] = '';
  $self->[$EOI] = 0; 
  $self->state('INITIAL');	# initialize the state machine
  if ($self->[$PENDING_TOKEN]) { 
    $self->[$PENDING_TOKEN]->setText();
    $self->[$PENDING_TOKEN] = 0;
  }
  $self;
}

sub eoi { 
  my $self = shift;
  $self->[$EOI];
} 
sub token {			# always return a Token object
  my $self = shift;
  $self->[$PENDING_TOKEN] or $DEFAULT_TOKEN 
} 
*getToken = \&token;
sub setToken {			# force the token
  my $self = shift;
  $self->[$PENDING_TOKEN] = $_[0];
}
sub setBuffer {			# not documented
  my $self = shift;
  ${$self->[$BUFFER]} = $_[0];
} 
sub getBuffer {			# not documented
  my $self = shift;
  ${$self->[$BUFFER]}; 
} 
sub buffer { 
  my $self = shift;
  if (defined $_[0]) {
    ${$self->[$BUFFER]} = $_[0] 
  } else {
    ${$self->[$BUFFER]};
  }
} 
sub flush {
  my $self = shift;
  my $tmp = $self->[$HOLD_TEXT];
  $self->[$HOLD_TEXT] = '';
  $tmp;
}
# returns or sets the number of the current record
sub line {	
  my $self = shift;
  if (@_) {
    ${$self->[$LINE]} = shift;
  } else {
    ${$self->[$LINE]};
  }
}
# return the length of the current record
# not documented
sub length {	
  my $self = shift;
  if (@_) {
    ${$self->[$RECORD_LENGTH]} = $_[0];
  } else {
    ${$self->[$RECORD_LENGTH]};
  }
}
# return the end position of last token from the stream beginning
sub offset {			
  my $self = shift;
  ${$self->[$OFFSET]};
}
# return the end position of the last token 
# in the current record
sub pos {		
  my $self = shift;
  if (defined $_[0]) {
    ${$self->[$POS]} = $_[0] 
  } else {
    ${$self->[$POS]};
  }
}
sub name {		      
  my $self = shift;
  if (defined $_[0]) {
    $self->[$NAME] = $_[0] 
  } else {
    $self->[$NAME];
  }
}
# not documented
sub inpkg {		
  my $self = shift;
#  if (ref $self) {
    if (defined $_[0]) {
      $self->[$IN_PKG] = $_[0] 
    } else {
      $self->[$IN_PKG];
    }
#  } else {
#    if (defined $_[0]) {
#      $inpkg = $_[0] 
#    } else {
#      $inpkg;
#    }
#  }
}
use constant TRACE_GEN => 0;
sub tokenList {
  my $self = shift;
  if ($^W and @{$self->[$TOKEN_LIST]} == 0) {
    require Carp;
    Carp::carp("no token defined");
  }
  @{$self->[$TOKEN_LIST]}; 
}
####
# Purpose: define the data input
# Parameters: possibilities
# 1. filehandle (\*FH, *FH or IO::File instance)
# 2. list of strings
# 3. <none> 
# Returns:  1. returns the lexer
#           2. returns the lexer
#           3. returns the lexer's filehandle if defined
#              or undef if not 
sub from {
  my $self = shift;
  my $debug = 0;
  
  # check for stream : check only ref($fh) insteaf of fileno()
  # Filehandles connected to memory objects via new features of
  # "open" may return undefined even though they are open.)
  my $fd = $_[0];
  
  print STDERR "arg: $fd\n" if $debug;
  if (ref($fd)) {		# From STREAM
    $self->[$STREAM] = $fd;
    print STDERR "From stream\n" if $debug;

    if (@{$self->[$LEXER_STREAM_CODE]}) { # Code already exists
      if ($self->[$FROM_STRING]) { # if STREAM definition isn't the current
		print STDERR "code already exists\n" if $debug;
		$self->[$LEXER_SUB] = $self->[$LEXER_STREAM_SUB];
		$self->_switchClosureEnv();
		$self->[$FROM_STRING] = 0;
      }
    } 
	else {			# code doesn't exist
      print STDERR "Analyze STREAM - code generation\n" if $debug;
      $self->[$FROM_STRING] = 0;
      #$self->[$LEXER_SUB] = $DEFAULT_LEXER_SUB;      # 
      $self->genLex;		# lexer generation
    }

    $self->reset;
    $self;
  } 
  elsif (defined $_[0]) {	# From STRING
    print STDERR "From string\n" if $debug;
    if (@{$self->[$LEXER_STRING_CODE]}) { # code already exists
      unless ($self->[$FROM_STRING]) {
		print STDERR "code already exists\n" if $debug;
		$self->[$LEXER_SUB] = $self->[$LEXER_STRING_SUB];
		$self->_switchClosureEnv();
		$self->[$FROM_STRING] = 1;
      }
    } 
	else {			# code doesn't exist
      print STDERR "Analyze STRING - code generation\n" if $debug;
      $self->[$FROM_STRING] = 1;
      # autogeneration doesn't work, 
      # cause the generation delete the buffer
      #$self->[$LEXER_SUB] = $DEFAULT_LEXER_SUB;      # 
      $self->genLex;		# lexer generation
    }

    $self->reset;
    my $buffer = join($", @_); # Data from a list
    ${$self->[$BUFFER]} = $buffer;
    ${$self->[$RECORD_LENGTH]} = CORE::length($buffer);
    $self;
  } 
  elsif ($self->[$STREAM]) {
    $self->[$STREAM];
  } 
  else {
    undef;
  }
}


sub readline {
  my $fh = $_[0]->[$STREAM];
  my $record = '';
  if (not defined($record = <$fh>)) {
    $_[0]->[$EOI] = 1;
  } else {
    ${$_[0]->[$LINE]}++;
  }
  $record;
}

sub isFromString { $_[0]->[$FROM_STRING] }
sub isTrace { $_[0]->[$TRACE] }

# could be improved
# Purpose: Toggle the trace mode
# todo: regenerate the lexer if needed
sub trace { 
  my $self = shift;
  my $class = ref($self);
  if ($class) {			# Object method
    if ($self->[$TRACE]) {
      $self->[$TRACE] = 0;
      print STDERR qq!trace OFF for a "$class" object\n!;
    } else {
      $self->[$TRACE] = 1;
      print STDERR qq!trace ON for a "$class" object\n!;
    }
  } else {			# Class method
    $self->prototype()->[$TRACE] = not $self->prototype->[$TRACE];
    $self->SUPER::trace(@_);
  }
}
sub isHold { $_[0]->[$HOLD] }
# hold(EXPR)
# hold
# Purpose: Toggle method, hold or not consumed strings
# Arguments: nothing or EXPR true/false
# Returns: value of the hold attribute

sub hold {			
  my $self = shift;
  if (ref $self) {		# Instance method
      $self->[$HOLD] = not $self->[$HOLD];

      # delete the code already generated
      @{$lexer->[$LEXER_STREAM_CODE]} = ();
      @{$lexer->[$LEXER_STRING_CODE]} = ();
      @{$lexer->[$LEXER_CODE]} = ();		
      $lexer->[$LEXER_SUB] = $DEFAULT_LEXER_SUB;

  } else {			# Class method
    $self->prototype()->[$HOLD] = not $self->prototype()->[$HOLD];
  }
}

# skip(EXPR)
# skip
# Purpose: return or set the value of the regexp used for consuming
#          inter-token strings. 
# Arguments: with EXPR change the regexp and regenerate the
#            lexical analyzer 
# Returns: see Purpose
sub skip {			
  my $self = shift;

  my $debug = 0;
  if (ref $self) {		# Instance method
    if (defined($_[0]) and $_[0] ne $self->[$SKIP]) {
      print STDERR "skip value: '$_[0]'\n" if $debug;
      $self->[$SKIP] = $_[0];

      # delete the code already generated
      @{$self->[$LEXER_STREAM_CODE]} = (); # or $self->[$LEXER_STREAM_CODE] = []
      @{$self->[$LEXER_STRING_CODE]} = ();
      @{$self->[$LEXER_CODE]} = ();		
      $self->[$LEXER_SUB] = $DEFAULT_LEXER_SUB;

    } else {
      $self->[$SKIP];
    }
  } else {			# Used as a Class method
    print STDERR "skip value: '$_[0]'\n" if $debug;

    defined $_[0] ?
      $self->prototype()->[$SKIP] = $_[0] : $self->prototype()->[$SKIP];
  }
}
sub defineTokens {
  my $self = shift;
  my @token = $TOKEN_CLASS->factory(@_);
  my $token;
  foreach $token (@token) {	
    $token->lexer($self);	# Attach each token to its lexer
    $token->inpkg($self->inpkg); # Define the package in which the token is defined
    $token->exportTo();		# export in the calling package
  }
  print STDERR @token + 0,  " tokens\n"  if TRACE_GEN;
  $self->[$TOKEN_LIST] = [@token];
}
# From => STRING|FILEHANDLE, Tokens => [], Skip => RE
sub configure {
  my $self = shift;
  my ($key, $value);
  while (@_ >= 2) {
    ($key, $value) = (shift, shift);
    if ($key =~ /^[Ff]rom$/) {
      $self->from($value);
    } elsif ($key =~ /^[Ss]kip$/) {
      $self->skip($value);
    } elsif ($key =~ /^[Tt]okens$/) {
      unless (ref $value eq 'ARRAY') {
	require Carp;
	Carp::croak "'Tokens' must be associated to an ARRAY reference";
      }
      $self->defineTokens($value);
    } else {
      last;
    }
  }
  $self;
}
# not documented
# Purpose: returns :
# - a copy of the prototypical lexer if used as a class method
# - a copy of the message receiver if used as an instance method
# naive implementation
sub clone {
  my $receiver = shift;
  my $class;
  if ($class = ref $receiver) {	# Instance method: clone the current instance
    bless [@{$receiver}], $class; 
  } else {			# Class method: clone the class prototype
    bless [@{$receiver->prototype}], $receiver; 
  }
}
# Purpose: create the lexical analyzer
# Arguments: list of tokens or token specifications
# Returns: a lex object
sub new {
  my $receiver = shift;
  my $class = (ref $receiver or $receiver);

  if ($class eq __PACKAGE__) {
    require Carp;
    Carp::croak "can't create an instance of '$class' abstract class"
  }

  my $self = $receiver->clone;
  $self->reset;
  $self->[$IN_PKG] = caller; 

  if (@_) {			
    $self->defineTokens(@_);
  }
  $self;
}

# sub lexerType {
#   my $self = shift;
#   if ($self->isa('Parse::Lex')) {
#     return 'Parse::Lex';
#   } elsif ($self->isa('Parse::CLex')) {
#     return 'Parse::CLex';
#   } else {
#     return ref $self || $self;
#   }
# }

# Put or fetch a template object
sub template {
  my $self = shift;
  if (defined $_[0]) {
    $self->[$TEMPLATE] = $_[0];
  } else {
    $self->[$TEMPLATE];
  }
}
sub getTemplate {
  my $self = shift;
  my $part = shift;
  $self->[$TEMPLATE]->{$part};
}
sub setTemplate {
  my $self = shift;
  my $part = shift;
  $self->[$TEMPLATE]->{$part} = shift;
}

# redefine this!!! don't copy, just reference
# the LEXER_STRING_CODE!!!
# and don't regenerate if code already exists
sub genCode {
  my $self = shift;
  print STDERR "genCode()\n" if TRACE_GEN;
  $self->genHeader();
  $self->genBody($self->tokenList); 
  $self->genFooter();
  if ($self->[$FROM_STRING]) {	# cache the already generated code
    $self->[$LEXER_STRING_CODE] = [@{$self->[$LEXER_CODE]}]; 
    $self->[$LEXER_STRING_SUB] = $self->[$LEXER_SUB];
  } else {
    $self->[$LEXER_STREAM_CODE] = [@{$self->[$LEXER_CODE]}]; 
    $self->[$LEXER_STREAM_SUB] = $self->[$LEXER_SUB];
  }
}
# Remark: not documented
sub genHeader {
  my $self = shift;
  my $template = $self->template;
  print STDERR "genHeader()\n" if TRACE_GEN;
				# build the template env
  $template->env(
		 'SKIP' => $self->[$SKIP],
		 'IS_HOLD' => $self->[$HOLD],
		 'HOLD_TEXT' => $HOLD_TEXT,
		 'EOI' => $EOI,	# array index
		 'TRACE' => $TRACE,
		 'IS_TRACE' => $self->[$TRACE],
		 'PENDING_TOKEN' => $PENDING_TOKEN, # array index
		); 

  if ($self->[$FROM_STRING]) {
    $self->[$LEXER_CODE]->[0] = $self->template->eval('HEADER_STRING_PART');
  } else {
    $self->[$LEXER_CODE]->[0] = $self->template->eval('HEADER_STREAM_PART');
  }
}
# Purpose: create the lexical analyzer
# Arguments: list of tokens
# Returns: a Lex object
# Remark: not documented
sub genBody {
  my $self = shift;
  print STDERR "genBody()\n"  if TRACE_GEN;

  my $token;
  my $body = '';
  my $debug = 0;
  print STDERR @_ + 0,  " tokens\n"  if TRACE_GEN;
  while (@_) {			# list of Token instances
    $body .= shift->genCode();
  }
  $self->[$LEXER_CODE]->[1] = $body;
}
# Remark: not documented
sub genFooter {
  my $self = shift;
  print STDERR "genFooter()\n" if TRACE_GEN;
  $self->[$LEXER_CODE]->[2] = $self->template->eval('FOOTER_PART');
}

# Purpose: Returns code of the current lexer
# Arguments: nothing
# Returns: code of the lexical analyzer
# Remark: not documented, doesn't return the state machine definition
sub getCode { 
  my $self = shift;
  my @code = @{$self->[$LEXER_CODE]};
  unless (@code) {
    $self->genCode;
    @code = @{$self->[$LEXER_CODE]}
  } 
  join '', @code;
}

# Not documented
# Purpose: set/get environnement of the lexer closure
# Arguments: see definition
# Returns: references to some internal object fields
# todo: test type and number of arguments
sub _closureEnv {
  my $self = shift;
  if (@_) {
    ($self->[$BUFFER], 
     $self->[$RECORD_LENGTH],
     $self->[$LINE], 
     $self->[$POS], 
     $self->[$OFFSET],
     $self->[$STATES],
    ) = @_;
  } else {
    ($self->[$BUFFER], 
     $self->[$RECORD_LENGTH],
     $self->[$LINE], 
     $self->[$POS], 
     $self->[$OFFSET],
     $self->[$STATES],
    )
  }
}
sub _saveClosureEnv {
  my $self = shift;
  @{$self->[$LEXER_CLOSURE_ENV]} = $self->_closureEnv();
}
sub _switchClosureEnv {
  my $self = shift;
  my @tmp = $self->_closureEnv();
  $self->_closureEnv(@{$self->[$LEXER_CLOSURE_ENV]});
  @{$self->[$LEXER_CLOSURE_ENV]} = @tmp;
}

# Purpose: Generate the lexical analyzer
# Arguments: 
# A Returns: the anonymous subroutine implementing the lexical analyzer
# Remark: not documented
sub genLex {
  my $self = shift;
  # optimization: unless @{$self->[$LEXER_CODE]};	
  # or delegate this behavior to getCode() ? 
  $self->genCode; 

  print STDERR "Lexer generation...\n"  if TRACE_GEN;

				# Closure environnement
  my $LEX_BUFFER = '';		# buffer to analyze
  my $LEX_LENGTH = 0;		# buffer length

#  my $LEX_BUFFER = ${$self->[$BUFFER]};	# buffer to analyze
#  my $LEX_LENGTH = ${$self->[$RECORD_LENGTH]}; # buffer length

  my $LEX_RECORD = 0;		# current record number
  my $LEX_POS = 0;		# current position in buffer
  my $LEX_OFFSET = 0;		# offset from the beginning
  my $LEX_TOKEN = '';		# token instance
  my %LEX_STATE = ();		# states

  $self->_saveClosureEnv();
  $self->_closureEnv(\(
		    $LEX_BUFFER,	
		    $LEX_LENGTH,	
		    $LEX_RECORD,		
		    $LEX_POS,	
		    $LEX_OFFSET,
		    %LEX_STATE,
		   ));		

  my $LEX_FHR = \$self->[$STREAM];
  my $stateMachine = $self->genStateMachine();
  my $analyzer = $self->getCode();
  eval qq!$stateMachine; \$self->[$LEXER_SUB] = sub $analyzer!;

  my $debug = 0;
  if ($@ or $debug) {	# can be useful ;-)
    my $line = 0;
    $stateMachine =~ s/^/sprintf("%3d ", $line++)/meg; # line numbers
    $analyzer =~ s/^/sprintf("%3d ", $line++)/meg;
    print STDERR "$stateMachine$analyzer\n";
    print STDERR "$@\n";
    die "\n" if $@;
  }
  $self->[$LEXER_SUB];
}

# Purpose: returns the lexical analyzer routine
# Arguments: nothing
# Returns: the anonymous sub implementing the lexical analyzer
sub getSub {
  my $self = shift;
  if (ref($self->[$LEXER_SUB]) eq 'CODE') {
    $self->[$LEXER_SUB];
  } else {
    $self->genLex();
  }
}
				# 
				# The State Machine
				# 
#package Parse::State;
sub inclusive {
  my $self = shift;
  if (ref $self) {		# instance method
    if (@_)  {
      $self->[$INCLUSIVE_COND]  = {@_};
    } else {
      $self->[$INCLUSIVE_COND];
    }
  } else {			# class method
    $self->prototype->inclusive(map { $_ => 1 } @_);
  }
}
sub exclusive {
  my $self = shift;
  if (ref $self) {		# instance method
    if (@_) {
      $self->[$EXCLUSIVE_COND]  = {@_};
    } else {
      $self->[$EXCLUSIVE_COND];
    }
  } else {			# class method
    $self->prototype->exclusive(map { $_ => 1 } @_);
  }
}
use constant GEN_CONDITION => 0;
sub genCondition {
  my $self = shift;
  my $specif = shift;
  return '' if $specif =~ /^ALL:/; # special condition

  my %exclusion = %{$self->exclusive};
  my %inclusion = %{$self->inclusive};
  return '' unless $specif or keys %exclusion;

  my $condition;
  my @condition;
  my $cond_group;
  my $cond_item;
  my @cond_group;
  if ($specif =~ /^(.+):/g) {	# Ex. A:B:C: or A,C: 
    my ($prefix) = ($1);
    foreach $cond_group (split /:/, $prefix) {
      foreach $cond_item (@cond_group = split /,/, $cond_group) {
	unless ($cond_item eq 'INITIAL' or 
		defined $exclusion{$cond_item} or 
		defined $inclusion{$cond_item}) {
	  require Carp;
	  Carp::croak "'$cond_item' condition not defined";
	}
	delete $exclusion{$cond_item};
	delete $inclusion{$cond_item};
      }
      push @condition, "(" . join(" or ", map { "\$$_" } @cond_group) . ")";
    }
    if (@condition == 1) {
      $condition = shift @condition;
    } else {
      $condition = "(" . join(" and ", @condition) . ")";
    }
  }
  my @tmp = ();
  if (@tmp = map { "\$$_" } keys(%exclusion)) {
    if ($condition) {
      $condition = "not (" . join(" or ", @tmp) . ") and $condition";
    } else {
      $condition = "not (" . join(" or ", @tmp) . ")";
    }
  } 
  print STDERR "genCondition(): $specif -> $condition\n" if GEN_CONDITION;
  $condition ne '' ? "$condition and" : '';
}
sub genStateMachine { 
  my $self = shift;
  my $somevar;

  my $stateDeclaration = 'my $INITIAL = 1;' .
    "\n" .
      q!$LEX_STATE{'INITIAL'} = \\$INITIAL;! . "\n";
  my $stateName = '';
  foreach $stateName (keys (%{$self->exclusive}), keys(%{$self->inclusive})) {
    $stateDeclaration .=
      q!my $! . "$stateName" . q! = 0; ! . 
	q!$LEX_STATE{'! . "$stateName" . q!'} = \\$! . "$stateName" . q!;!  . "\n";
  }
  $self->setStateMachine($stateDeclaration);
}
# not documented
sub setStateMachine {
  my $self = shift;
  $self->[$STATE_MACHINE_CODE] = shift;
}
# not documented
sub getStateMachine {
  my $self = shift;
  $self->[$STATE_MACHINE_CODE];
}
# not documented
sub getState {
  my $self = shift;
  my $state = shift;
  ${$self->[$STATES]->{$state}};
}
# not documented
sub setState {
  my $self = shift;
  my $state = shift;
  ${$self->[$STATES]->{$state}} = shift;
}
sub state {			# get/set state
  my $self = shift;
  my $state = shift;
  if (@_)  {
    ${$self->[$STATES]->{$state}} = shift;
  } else {
    ${$self->[$STATES]->{$state}};
  }
}
sub start {
  my $self = shift;
  my $state = shift;
  if ($state eq 'INITIAL') {
    $self->_restart() 
  } else {
    if (exists $self->[$EXCLUSIVE_COND]->{$state}) {
      $self->_restart;
    }
    ${$self->[$STATES]->{$state}} = 1;
  }
}
sub _restart {
  my $self = shift;
  my $state = shift;
  my $hashref = $self->[$STATES];
  foreach $state (keys %$hashref) {
    ${$hashref->{$state}} = 0;
  }
  ${$hashref->{'INITIAL'}} = 1;
}
sub end {
  my $self = shift;
  my $state = shift;
  ${$self->[$STATES]->{$state}} = 0;
}
#sub pushState {}
#sub popState {}
#sub topState {}

package Parse::Tokenizer;
@Parse::Tokenizer::ISA = qw/Parse::ALex/;

sub next { &{$_[0]->[$LEXER_SUB]} }
				# 
				# next() wrappers
				# 
# Purpose: Analyze all data in one call
# Arguments: string or stream to analyze
# Returns: self
# Todo: generate a specific lexer sub
sub parse {
  my $self = shift;
  unless (defined $_[0]) {
    require Carp;
    Carp::carp "no data to analyze";
  }
  $self->from($_[0]);
  my $next = $self->[$LEXER_SUB];
  &{$next}($self) until $self->[$EOI]; 
  # or:
  #  local *next = $self->[$SUB];
  #  &next($self) until $self->[$EOI]; 
  $self;
}
# Purpose: Analyze data in one call
# Arguments: string or stream to analyze
# Returns: list of token name and token text
# Todo: generate a specific lexer sub
sub analyze {
  my $self = shift;
  unless (defined $_[0]) {
    require Carp;
    Carp::carp "no data to analyze";
  }
  $self->from($_[0]);
  my $next = $self->[$LEXER_SUB]; 
  my $token = &{$next}($self);
  my @token = ($token->name, $token->text);
  until ($self->[$EOI]) {
    $token = &{$next}($self);
    push (@token, $token->name, $token->text);
  }
  @token;
}
# Remark: not documented
# Purpose: put the next token in a scalar reference
# Arguments: a scalar reference
# Returns: 1 if token isn't equal to the EOI token
sub nextis {			
  my $self = shift;
  unless (@_ == 1) {
    require Carp;
    Carp::croak "bad argument number";
  }
  if (ref $_[0]) {
    my $token = &{$self->[$LEXER_SUB]}($self);
    ${$_[0]} = $token;
    $token == $Parse::Token::EOI ? return 0 : return 1;
  } else {
    require Carp;
    Carp::croak "bad argument $_[0]";
  }
}
# Purpose: execute an action on each token
# Arguments: an anonymous sub to call on each token
# Returns: undef
sub every {			
  my $self = shift;
  my $do_on = shift;
  my $ref = ref($do_on);
  if (not $ref or $ref ne 'CODE') { 
    require Carp;
    Carp::croak "argument of the 'every' method must be an anonymous routine";
  }
  my $token = &{$self->[$LEXER_SUB]}($self);
  while (not $self->[$EOI]) {
    &{$do_on}($token);
    $token = &{$self->[$LEXER_SUB]}($self);
  }
  $self;
}
__PACKAGE__

__END__

=head1 NAME

C<Parse::ALex> - Generator of lexical analyzers - abstract class

=head1 SYNOPSIS

See the C<Parse::Lex> documentation.

=head1 DESCRIPTION

This is an abstract class used by C<Parse::Lex> and C<Parse::CLex>.

=head1 AUTHOR

Philippe Verdret.

=head1 COPYRIGHT

Copyright (c) 1999 Philippe Verdret. All rights reserved.  This module
is free software; you can redistribute it and/or modify it under the
same terms as Perl itself.