The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl

# $Id: gen_grammar.pl,v 2.1 2004-12-02 19:26:40 pajas Exp $

use strict;
use XML::LibXML;

if ($ARGV[0]=~q(^(-h|--help)?$)) {
  print <<EOF;
Generates RecDescent grammar from RecDescentXML source.

Usage: $0 <source.xml>

EOF
  exit;
}

my $parser=XML::LibXML->new();
$parser->load_ext_dtd(1);
$parser->validation(1);
$parser->complete_attributes(1);
my $doc=$parser->parse_file($ARGV[0]);

my $dom=$doc->getDocumentElement();
my ($rules)=$dom->findnodes('./rules');
my ($preamb)=$dom->findnodes('./preamb');
my ($postamb)=$dom->findnodes('./postamb');

print "# This file was automatically generated from $ARGV[0] on \n# ",scalar(localtime),"\n";

print get_text($preamb,1);

foreach my $r ($rules->findnodes('./rule[@inline!="yes" and production]')) {
  print "\n  ",$r->getAttribute('id'),":\n\t   ";
  print join("\n\t  |",create_productions($r)),"\n";
}
print get_text($postamb,1);
print "\n";
exit;

## ================================================

sub strip_space {
  my ($text)=@_;
  $text=~s/^\s*//;
  $text=~s/\s*$//;
  return $text;
}

sub get_text {
  my ($node,$no_strip)=@_;
  my $text="";
  foreach ($node->childNodes()) {
    if ($_->nodeType() == XML::LibXML::XML_ELEMENT_NODE &&
	$_->nodeName() eq 'lineinfo') {
#      $text.='0,0,0,$XML::XSH2::Functions::SCRIPT'
      $text.='$thisline,$thiscolumn,$thisoffset,$XML::XSH2::Functions::SCRIPT';
    } elsif ($_->nodeType() == XML::LibXML::XML_TEXT_NODE ||
	$_->nodeType() == XML::LibXML::XML_CDATA_SECTION_NODE) {
      $text.=$_->getData();
    }
  }
  return $no_strip ? $text : strip_space($text);
}

sub find_rule {
  my ($r)=@_;
  return $r->findnodes('id("'.$r->getAttribute('ref').'")');
}

sub create_productions {
  my ($rule)=@_;
  return map {
    $_->nodeName() eq 'production' ?
      create_rule_production($rule,$_) : create_productions(find_rule($_))
      }
    $rule->findnodes('production|ruleref');
}

sub has_sibling {
  my ($node)=@_;
  return 0 unless $node;
  my$value=
    $node->find(
  '(following-sibling::*[name()!="action" and name()!="directive"]
    or
    not(following-sibling::*) and
    (parent::production/parent::group/following-sibling::*[name()!="action"
                                                           and name()!="directive"])
   )');
  return $value;
#   $node=$node->nextSibling();
#   while ($node) {
#     return 1 if ($node->nodeType == XML::LibXML::XML_ELEMENT_NODE
# 		 and
# 		 $node->nodeName ne 'action'
# 		 and
# 		 $node->nodeName ne 'directive'
# 		);
#     $node=$node->nextSibling();
#   }
#   return 0;
}

sub create_rule_production {
  my ($rule,$prod)=@_;
   my $result;
  my $name;
  foreach my $item ($prod->childNodes()) {
    next unless $item->nodeType == XML::LibXML::XML_ELEMENT_NODE;
    $name=$item->nodeName();
    if ($name eq 'lookahead') {
      $result.=' ...' . ($item->getAttribute('negative') eq 'yes' ? '!' : '');
    } elsif ($name eq 'regexp') {
      $result.=" /".get_text($item)."/".$item->getAttribute('mod');
    } elsif ($name eq 'directive') {
      my $text=get_text($item);
      my $type=$item->getAttribute('type');
      $type='error?' if ($type eq 'error-if-committed');
      $result.=" <".$type;
      $result.=":$text" if ($text ne "");
      $result.=">";
    } elsif ($name eq 'ruleref') {
      $result.=" ".$item->getAttribute('ref');
      if ($item->getAttribute('rep') ne '') {
	$result.="(".$item->getAttribute('rep').")";
      }
      if ($item->getAttribute('arguments') ne '') {
	$result.="[".$item->getAttribute('arguments')."]";
      }
    } elsif ($name eq 'string') {
      $result.=" '".get_text($item)."'";
    } elsif ($name eq 'action') {
      $result.="\n\t\t{ ".get_text($item,1)." }\n  \t";
    } elsif ($name eq 'group') {
      $result.="\n\t  " unless $result eq "";
      $result.="("
	     . join("\n\t  |",create_productions($item))
             . "\n\t   )";
      if ($item->getAttribute('rep') ne '') {
	$result.="(".$item->getAttribute('rep').")";
      }
    } elsif ($name eq 'selfref') {
      $result.=' /('
	.join("|", map { $_->getAttribute('regexp') ne "" ?
			   $_->getAttribute('regexp') :
			   $_->getAttribute('name')
		       }
	      $rule->findnodes('ancestor-or-self::rule'),
	      grep {defined($_)} $rule->findnodes("./aliases/alias"))
	. ')'
	. (($item->getAttribute('sep') ne 'no' and has_sibling($item)) ? '\b/' : '/');
    } elsif ($name eq 'paramlist') {
      $result.="\n\t  (( ";
      my $count = 0;
      foreach my $param ($item->childNodes()) {
	next unless
	  $param->nodeType == XML::LibXML::XML_ELEMENT_NODE
	  and $param->nodeName eq 'param';
	$result.="\n\t   | " if $count++;
	my $name=$param->getAttribute('name');
	my $short=$param->getAttribute('short');
	my $arg=$param->getAttribute('argument');
	my $type=$param->getAttribute('type');
	my $prefix=$param->getAttribute('prefix');
	my $shortprefix=$param->getAttribute('shortprefix');
	$result.='/'.$prefix.$name;
	$result.='|'.$shortprefix.$short if $short ne "";
	$result.='/';
	if ($arg ne '') {
	  $result.=' '.$arg." { [ '$type/$name' => \$item[2] ] }";
	} else {
	  $result.=" { [ '/$name' => 1 ] }";
	}
      }
      $result.="\n\t   )(s?) { [ map { \@\$_ } \@{\$item[1]} ] }\n\t  )\n";
#(( /:fold\s/                { [":fold" => 1] }
#           | /:encoding\s/ expression { [":encoding" => $item[2]] }
#           )(s)
#           { [ map { @$_ } @{$item[1]} ] })
    }
  }
  
  return $result;
}