The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Lisp::Reader;

use strict;
use vars qw($DEBUG $SYMBOLS_AS_STRINGS $NIL_AS_SYMBOL
            @EXPORT_OK $VERSION);

$VERSION = sprintf("%d.%02d", q$Revision: 1.10 $ =~ /(\d+)\.(\d+)/);

use Lisp::Symbol qw(symbol);

require Exporter;
*import = \&Exporter::import;
@EXPORT_OK = qw(lisp_read);


sub my_symbol
{
    ($_[0] eq "nil" && !$NIL_AS_SYMBOL) ?
      undef : 
      ($SYMBOLS_AS_STRINGS ? $_[0] : symbol($_[0]));
}

sub lisp_read
{
    local($_) = shift;
    my $one   = shift;
    my $level = shift || 0;
    my $indent = "  " x $level;

    my @stack;
    my $form = [];

    if ($DEBUG) {
	print "${indent}Parse";
	print "-one" if $one;
	print ": $_\n";
    }
    
    while (1) {
	if (/\G\s*;+([^\n]*)/gc) {
	    print "${indent}COMMENT $1\n" if $DEBUG;
	} elsif (/\G\s*([()\[\]])/gc) {
	    print "${indent}PARA $1\n" if $DEBUG;
	    if ($1 eq "(" or $1 eq "[") {
		my $prev = $form;
		push(@stack, $prev);
		push(@$prev, $form = []);
		bless $form, "Lisp::Vector" if $1 eq "[";
	    } else {
		last unless @stack;
		if (ref($form) eq "ARRAY" && @$form == 0) {
                    # () and nil is supposed to be the same thing
		    $stack[-1][-1] = my_symbol("nil");
		}
		$form = pop(@stack);
		last if $one && !@stack;
	    }
	} elsif (/\G\s*(
			[-+]?                  # optional sign
			(?:\d+(\.\d*)?         # 0 0. 0.0
			 |
			 \.\d+)                # .0
			([eE][-+]?\d+)?        # optional exponent
		       )
		 (?![^\s()\[\];])              # not followed by plain chars
		 /gcx)  
	{
	    print "${indent}NUMBER $1\n" if $DEBUG;
	    push(@$form, $1+0);
	    last if $one && !@stack;
	} elsif (/\G\s*\?((?:\\[A-Z]-)*(?:\\\^.|\\[0-7]{1,3}|\\.|.))/sgc) {
	    print "${indent}CHAR $1\n" if $DEBUG;
	    push(@$form, parse_char($1));
	    last if $one && !@stack;
	} elsif (/\G\s*
		 \"(                           # start quote
		    [^\"\\]*                   # unescaped
		    (?:\\.[^\"\\]*)*           # (escaped char + unescaped)*
		 )\"/gcxs)                     # end quote
	{
	    my $str = $1;

	    # Unescape
	    $str =~ s/\\\n//g;    # escaped newlines disappear
	    $str =~ s/((?:\\[A-Z]-)+.)/chr(parse_char($1,1))/ge;
	    $str =~ s/((?:\\[A-Z]-)*\\(?:\^.|[0-7]{1,3}|.))/
	              chr(parse_char($1,1))/ge;
	    print "${indent}STRING $str\n" if $DEBUG;
	    push(@$form, $str);
	    last if $one && !@stack;
	} elsif (/\G\s*\'/gc) {
	    print "${indent}QUOTE\n" if $DEBUG;
	    my $old_pos = pos($_);
	    my($subform, $pos) = lisp_read(substr($_, $old_pos), 1, $level+1);
	    pos($_) = $old_pos + $pos;
	    push(@$form, [my_symbol("quote"), $subform]);
	    last if $one && !@stack;
	} elsif (/\G\s*\./gc) {
	    print "${indent}DOT\n" if $DEBUG;
	    #XXX Should handle (a b . c) correctly and (a . b c) as error
	    bless $form, "Lisp::Cons";
	} elsif (/\G\s*\#/gc) {
	    die qq(invalid-read-syntax: "\#");
	} elsif (/\G\s*
                   (  [^\s()\[\];\\]*          # unescaped plain chars
                      (?:\\.[^\s()\[\];\\]*)*  # (escaped char + unescaped)*
                   )/gcsx
		 && length($1))
	{
	    # symbols can have space and parentesis embedded if they are
	    # escaped.
	    my $sym = $1;
	    $sym =~ s/\\(.)/$1/g; # unescape
	    print "${indent}SYMBOL $sym\n" if $DEBUG;
	    push(@$form, my_symbol($sym));
	    last if $one && !@stack;
	} elsif (/\G\s*(.)/gc) {
	    print "${indent}? $1\n";
	    die qq(invalid-read-syntax: "$1");
	} else {
	    last;
	}
    }

    if (@stack) {
	warn "Form terminated early";  # or should we die?
	$form = $stack[0];
    }

    if ($one) {
	die "More than one form parsed, this should never happen"
	  if @$form > 1;
	$form = $form->[0];
    }

    wantarray ? ($form, pos($_)) : $form;
}


sub parse_char
{
    my($char, $instring) = @_;
    my $ord = 0;
    my @mod;
    while ($char =~ s/^\\([A-Z])-//) {
	push(@mod, $1);
    }

    if (length($char) == 1) {
	$ord = ord($char);  # a plain one
    } elsif ($char =~ /^\\([0-7]+)$/) {
	$ord = oct($1);
    } elsif ($char =~ /^\\\^(.)$/) {
	$ord = ord(uc($1)) - ord("@");
	$ord += 128 if $ord < 0;
    } elsif ($char eq "\\t") {
	$ord = ord("\t");
    } elsif ($char eq "\\n") {
	$ord = ord("\n");
    } elsif ($char eq "\\a") {
	$ord = ord("\a");
    } elsif ($char eq "\\f") {
	$ord = ord("\f");
    } elsif ($char eq "\\r") {
	$ord = ord("\r");
    } elsif ($char eq "\\e") {
	$ord = ord("\e");
    } elsif ($char =~ /^\\(.)$/) {
	$ord = ord($1);
    } else {
	warn "Don't know how to handle character ($char)";
    }

    for (@mod) {
	if ($_ eq "C") {
	    $ord = ord(uc(chr($ord))) - ord("@");
	    $ord += 128 if $ord < 0;
	} elsif ($_ eq "M") {
	    $ord += $instring ? 2**7 : 2**27;
	} elsif ($_ eq "H") {
	    $ord += 2**24;
	} elsif ($_ eq "S") {
	    $ord += 2**23;
	} elsif ($_ eq "A") {
	    $ord += 2**22;
	} else {
	    warn "Unknown character modified ($_)";
	}
    }

    $ord;
}


1;