# WARNING: This file is no longer used by PCR itself;
# we're using examples/Grammar.grammar to generate Pugs::Grammar::Rule.pmc
# instead.
use v6-alpha;
use utf8;
# Perl6 implementation of the 'Rule' syntax
# author: Flavio S. Glock - fglock@gmail.com
=for compiling
The util/update-rule-pmc script is used to
generate lib/Pugs/Grammar/Rule.pmc from
this file.
This script does the following:
Use v6.pm to compile this file and
post-process it after compiling:
- remove all references to:
Data::Bind
- replace the header with:
package Pugs::Grammar::Rule;
use utf8;
no strict 'refs';
use Pugs::Runtime::Match;
use Pugs::Runtime::Regex;
The post-processing thing is done
by the Perl 5 script util/patch-rule-pmc.pl
=cut
grammar Pugs::Grammar::Rule;
#use Pugs::Runtime::Match;
our %rule_terms;
our %variables;
token pod_begin {
| \n =end \N*
| . \N* <.pod_begin>
}
token pod_other {
| \n =cut \N*
| . \N* <.pod_other>
}
token ws {
[
| \# \N*
| \n [ = [
| begin <.ws> END \N* .*
| begin <.pod_begin>
| kwid <.pod_other>
| pod <.pod_other>
| for <.pod_other>
| head1 <.pod_other>
]?
]?
| \s
]+
}
# regex ident can start with a number
token ident {
[ <.alnum> | _ | '::' ]+
}
token alnum {
<[0-9a-zA-Z]>
}
token alpha {
<[a-zA-Z]>
}
token digit {
<[0-9]>
}
# after '\\'
token special_char {
| ( c | C ) \[ ( [<alnum>|\s| ';' | '(' | ')' | '-' ]+) \]
# \c[LATIN LETTER A]
{ return { special_char => '\\' ~ $0 ~ $1 , } }
| [ x | X ] <xdigit>+
# \x0021 \X0021
{ return { special_char => '\\' ~ $/ , } }
| ( x | X ) \[ (<xdigit>+) \]
# \x[0021] \X[0021]
{ return { special_char => '\\' ~ $0 ~ $1 , } }
| [ o | O ] \d+
# \o0021 \O0021
{ return { special_char => '\\' ~ $/ , } }
| ( o | O ) \[ (\d+) \]
# \o[0021] \O[0021]
{ return { special_char => '\\' ~ $0 ~ $1 , } }
| .
# \e \E
{ return { special_char => '\\' ~ $/ , } }
}
token literal {
[
| \\ <special_char>
| <-[ \' ]>
]*
}
token double_quoted {
[
| \\ <special_char>
| <%Pugs::Grammar::Rule::variables>
| <-[ \" ]>
]*
}
token metasyntax {
[
| \\ <special_char>
| \' <.literal> \'
| \" <.double_quoted> \"
| \{ <.string_code> \}
| \< <.metasyntax> \>
| <-[ \> ]>
]+
}
token char_range {
[
| \\ <special_char>
| <-[ \] ]>
]+
}
token char_class {
| <.alpha>+
| \[ <.char_range> \]
}
token string_code {
# bootstrap "code"
[
| \\ <special_char>
| \' <.literal> \'
| \" <.double_quoted> \"
| \{ [ <.string_code> | '' ] \}
| \( [ <.string_code> | '' ] \)
| \< [ <.string_code> | '' ] \>
| [ <.ws> | \> | \= | \- ] \>
| <.ws>
| <-[ \} \) \> ]>
]+
}
token parsed_code {
# this subrule is overridden inside the perl6 compiler
<.string_code>
{ return '{' ~ $/ ~ '}' }
}
token named_capture_body {
| \( <rule> \) { return { capturing_group => $$<rule> ,} }
| \[ <rule> \] { return $$<rule> }
| \< <parse_metasyntax> { return $$<parse_metasyntax> }
| \' <.literal> \'
{ return { metasyntax => { metasyntax => ~ $$/ ,} } }
| { die "invalid alias syntax"; }
}
token parse_metasyntax {
$<modifier> := [ '!' | '?' | '.' | '' ]
[
'{' <parsed_code> '}>'
{ return { closure => {
closure => $$<parsed_code>,
modifier => $$<modifier>,
} } }
|
<char_class>
( <[+-]> <char_class> )+
\>
{
if ( $$<modifier> eq '!' ) {
return {
negate => {
char_class => [
'+' ~ $<char_class>,
@($/[0]), # TODO - stringify
] } }
}
return {
char_class => [
'+' ~ $<char_class>,
@($/[0]), # TODO - stringify
] }
}
|
<ident>
[
<.ws> <rule> \>
{
if ( $$<ident> eq 'before'
|| $$<ident> eq 'after'
) {
return { $$<ident> => { rule => $$<rule>, modifier => $$<modifier> } }
}
return { metasyntax => {
metasyntax => $$<ident>,
rule => $$<rule>,
modifier => $$<modifier>,
} }
}
|
':' <.ws>?
$<str> := [
[
| \\ <special_char>
| <%Pugs::Grammar::Rule::variables>
| <-[ \> ]>
]*
]
\>
{
if ( $$<ident> eq 'before'
|| $$<ident> eq 'after'
) {
return { $$<ident> => {
rule => { metasyntax => {
metasyntax => '\'' ~ $$<str> ~ '\''
} },
modifier => $$<modifier>,
} }
}
return { metasyntax => {
metasyntax => $$<ident>,
string => $$<str>,
modifier => $$<modifier>,
} }
}
|
\( <parsed_code> \) \>
{ return { call => {
method => $$<ident>,
params => $$<parsed_code>,
modifier => $$<modifier>,
} } }
]
|
<metasyntax> \>
{ return { metasyntax => {
metasyntax => ~$$<metasyntax>,
modifier => $$<modifier>,
} } }
]
}
%variables = (
'$<' => token {
<ident> \>
{ return { match_variable => '$' ~ $/<ident> ,} }
},
'$' => token {
<.digit>+
{ return { match_variable => '$' ~ $/ ,} }
|
\^?
[ <.alnum> | _ | \: \: ]+
{ return { variable => '$' ~ $/ ,} }
},
'@' => token {
<.digit>+
{ return { match_variable => '@' ~ $/ ,} }
|
\^?
[ <.alnum> | _ | \: \: ]+
{ return { variable => '@' ~ $/ ,} }
},
'%' => token {
<.digit>+
{ return { match_variable => '%' ~ $/ ,} }
|
\^?
[ <.alnum> | _ | \: \: ]+
{ return { variable => '%' ~ $/ ,} }
},
); # /%variables
%rule_terms = (
'{*}' => token {
# placeholder
{ return { metasyntax => { metasyntax => 'null' ,} } }
},
'\'' => token {
<.literal> \'
{ return { metasyntax => { metasyntax => '\'' ~ $$/ ,} } }
},
'(' => token {
<rule> \)
{ return { capturing_group => $$<rule> ,} }
},
'<(' => token {
<rule> ')>'
{ return { capture_as_result => $$<rule> ,} }
},
'<+' => token {
<char_class>
( <[+-]> <char_class> )*
\>
{ return {
char_class => [
'+' ~ $<char_class>,
@($/[0]), # TODO - stringify
] }
}
},
'<-' => token {
<char_class>
( <[+-]> <char_class> )*
\>
{ return {
char_class => [
'-' ~ $<char_class>,
@($/[0]), # TODO - stringify
] }
}
},
'<[' => token {
<char_range> \]
( <[+-]> <char_class> )*
\>
{ return {
char_class => [
'+[' ~ $<char_range> ~ ']',
@($/[0]), # TODO - stringify
] }
}
},
'<' => token {
<parse_metasyntax>
{ return $$<parse_metasyntax> }
},
'{' => token {
<parsed_code> \}
{ return { closure => {
closure => $$<parsed_code>,
modifier => 'plain',
} } }
},
'\\' => token {
<special_char>
{ return $$<special_char> }
},
'.' => token {
{ return { 'dot' => 1 ,} }
},
'[' => token {
<rule> \]
{ return $$<rule> }
},
':::' => token { { return { colon => ':::' ,} } },
':?' => token { { return { colon => ':?' ,} } },
':+' => token { { return { colon => ':+' ,} } },
'::' => token { { return { colon => '::' ,} } },
':' => token { { return { colon => ':' ,} } },
'$$' => token { { return { colon => '$$' ,} } },
'$' => token { { return { colon => '$' ,} } },
'^^' => token { { return { colon => '^^' ,} } },
'^' => token { { return { colon => '^' ,} } },
'>>' => token { { return { colon => '>>' ,} } },
'»' => token { { return { colon => '>>' ,} } },
'<<' => token { { return { colon => '<<' ,} } },
'«' => token { { return { colon => '<<' ,} } },
':i' => token {
<.ws> <rule>
{ return { modifier => { modifier => 'ignorecase', :$$<rule>, } } } },
':ignorecase' => token {
<.ws> <rule>
{ return { modifier => { modifier => 'ignorecase', :$$<rule>, } } } },
':s' => token {
<.ws> <rule>
{ return { modifier => 'sigspace', :$$<rule>, } } },
':sigspace' => token {
<.ws> <rule>
{ return { modifier => 'sigspace', :$$<rule>, } } },
':P5' => token {
<.ws> <rule>
{ return { modifier => 'Perl5', :$$<rule>, } } },
':Perl5' => token {
<.ws> <rule>
{ return { modifier => 'Perl5', :$$<rule>, } } },
':bytes' => token {
<.ws> <rule>
{ return { modifier => 'bytes', :$$<rule>, } } },
':codes' => token {
<.ws> <rule>
{ return { modifier => 'codes', :$$<rule>, } } },
':graphs' => token {
<.ws> <rule>
{ return { modifier => 'graphs', :$$<rule>, } } },
':langs' => token {
<.ws> <rule>
{ return { modifier => 'langs', :$$<rule>, } } },
); # /%rule_terms
token term {
| <%Pugs::Grammar::Rule::variables>
[ <.ws>? ':=' <.ws>? <named_capture_body>
{
return { named_capture => {
rule => $$<named_capture_body>,
ident => $$<Pugs::Grammar::Rule::variables>,
}, };
}
|
{
return $$<Pugs::Grammar::Rule::variables>
}
]
| <%Pugs::Grammar::Rule::rule_terms>
{
#print "term: ", Dumper( $_[0]->data );
return $$<Pugs::Grammar::Rule::rule_terms>
}
| <-[ \] \} \) \> \: \? \+ \* \| \& ]>
{
#print "constant: ", Dumper( $_[0]->data );
return { 'constant' => $$/ ,}
}
}
token quant {
| '**' <.ws>? \{ <parsed_code> \}
{ return { closure => $$<parsed_code> ,} }
| <[ \? \* \+ ]>?
}
token quantifier {
$<ws1> := (<.ws>?)
<!before <[ \} \] \) ]> >
<term>
$<ws2> := (<.ws>?)
<quant>
$<greedy> := (<[ \? \+ ]>?)
$<ws3> := (<.ws>?)
{
if
$$/{'quant'} eq ''
&& $$/{'greedy'} eq ''
&& $$/{'ws1'} eq ''
&& $$/{'ws2'} eq ''
&& $$/{'ws3'} eq ''
{
return $$/{'term'};
}
return {
quant => {
term => $$/{'term'},
quant => $$/{'quant'},
greedy => $$/{'greedy'},
ws1 => $$/{'ws1'},
ws2 => $$/{'ws2'},
ws3 => $$/{'ws3'},
} }
}
}
token concat {
<quantifier>+
{
use v5;
my @a = map { $_->() } @{ $::_V6_MATCH_->{'quantifier'} };
return { concat => \@a ,} if scalar @a > 1;
return $a[0];
use v6;
}
}
token conjunctive1 {
[ <.ws>? \& <!before \& > ]?
<concat>**{1}
[
\& <!before \& > <concat>
]*
{
use v5;
my @a = map { $$_ } @{ $::_V6_MATCH_->{'concat'} };
return { conjunctive1 => \@a ,} if scalar @a > 1;
return $a[0];
use v6;
}
}
token disjunctive1 {
[ <.ws>? \| <!before \| > ]?
<conjunctive1>**{1}
[
\| <!before \| > <conjunctive1>
]*
{
use v5;
my @a = map { $$_ } @{ $::_V6_MATCH_->{'conjunctive1'} };
return { alt1 => \@a ,} if scalar @a > 1;
return $a[0];
use v6;
}
}
token conjunctive {
[ <.ws>? \& \& ]?
<disjunctive1>**{1}
[
\& \& <disjunctive1>
]*
{
use v5;
my @a = map { $$_ } @{ $::_V6_MATCH_->{'disjunctive1'} };
return { conjunctive => \@a ,} if scalar @a > 1;
return $a[0];
use v6;
}
}
token rule {
[ <.ws>? \| \| ]?
<conjunctive>**{1}
[
\| \| <conjunctive>
]*
{
use v5;
my @a = map { $$_ } @{ $::_V6_MATCH_->{'conjunctive'} };
return { alt => \@a ,} if scalar @a > 1;
return $a[0];
use v6;
}
}
token named_regex {
( 'token' | 'regex' | 'rule' )
<.ws> <ident> <.ws>? '{'
<.ws>?
<rule>
'}' ';'?
{ return {
type => $$0,
name => $$<ident>,
ast => $$<rule>
};
}
}
# This is hacky, will do better later
token verbatim {
'%{' ( [ <!before '%}'> . ]* ) '%}'
{ return {
type => 'block',
value => $$0
};
}
}
token item {
| <verbatim> { return $$<verbatim>; }
| <named_regex> { return $$<named_regex>; }
}
token grammar {
<.ws>? 'grammar' <.ws> <ident> <.ws>? ';'
<.ws>?
[ <item> <.ws>? ]*
{ return { $$<ident> => $<item> } }
}
token spec {
<verbatim>?
<grammar>*
{ return {
block => $<verbatim>,
'grammar' => $<grammar> }
}
}