%{
# pascal.eyp
#
# Pascal grammar in Eyapp format, based originally on BNF given
# in "Standard Pascal -- User Reference Manual", by Doug Cooper.
# This in turn is the BNF given by the ANSI and ISO Pascal standards,
# and so, is PUBLIC DOMAIN. The grammar is for ISO Level 0 Pascal.
# The grammar has been massaged somewhat to make it LALR, and added
# the following extensions.
#
# constant expressions
# otherwise statement in a case
# productions to correctly match else's with if's
# beginnings of a separate compilation facility
#
%}
%strict
%token AND ARRAY ASSIGNMENT CASE CHARACTER_STRING COLON COMMA CONST DIGSEQ
%token DIV DO DOT DOTDOT DOWNTO ELSE END EQUAL EXTERNAL FOR FORWARD FUNCTION
%token GE GOTO GT IDENTIFIER IF IN LABEL LBRAC LE LPAREN LT MINUS MOD NIL NOT
%token NOTEQUAL OF OR OTHERWISE PACKED PBEGIN PFILE PLUS PROCEDURE PROGRAM RBRAC
%token REALNUMBER RECORD REPEAT RPAREN SEMICOLON SET SLASH STAR STARSTAR THEN
%token TO TYPE UNTIL UPARROW VAR WHILE WITH
%tree
%namingscheme {
#Receives a Parse::Eyapp object describing the grammar
my $self = shift;
#$self->tokennames();
# returns the handler that will give names
# to the right hand sides
\&give_rhs_name;
}
%%
file : program
| module
;
program : program_heading semicolon block DOT
;
program_heading : PROGRAM identifier
| PROGRAM identifier LPAREN identifier_list RPAREN
;
identifier_list : identifier_list comma identifier
| identifier
;
block : label_declaration_part
constant_definition_part
type_definition_part
variable_declaration_part
procedure_and_function_declaration_part
statement_part
;
module : constant_definition_part
type_definition_part
variable_declaration_part
procedure_and_function_declaration_part
;
label_declaration_part : LABEL label_list semicolon
|
;
label_list : label_list comma label
| label
;
label : DIGSEQ
;
constant_definition_part : CONST constant_list
|
;
constant_list : constant_list constant_definition
| constant_definition
;
constant_definition : identifier EQUAL cexpression semicolon
;
/*constant : cexpression ; good stuff! */
cexpression : csimple_expression
| csimple_expression relop csimple_expression
;
csimple_expression : cterm
| csimple_expression addop cterm
;
cterm : cfactor
| cterm mulop cfactor
;
cfactor : sign cfactor
| cexponentiation
;
cexponentiation : cprimary
| cprimary STARSTAR cexponentiation
;
cprimary : identifier
| LPAREN cexpression RPAREN
| unsigned_constant
| NOT cprimary
;
constant : non_string
| sign non_string
| CHARACTER_STRING
;
sign : PLUS
| MINUS
;
non_string : DIGSEQ
| identifier
| REALNUMBER
;
type_definition_part : TYPE type_definition_list
|
;
type_definition_list : type_definition_list type_definition
| type_definition
;
type_definition : identifier EQUAL type_denoter semicolon
;
type_denoter : identifier
| new_type
;
new_type : new_ordinal_type
| new_structured_type
| new_pointer_type
;
new_ordinal_type : enumerated_type
| subrange_type
;
enumerated_type : LPAREN identifier_list RPAREN
;
subrange_type : constant DOTDOT constant
;
new_structured_type : structured_type
| PACKED structured_type
;
structured_type : array_type
| record_type
| set_type
| file_type
;
array_type : ARRAY LBRAC index_list RBRAC OF component_type
;
index_list : index_list comma index_type
| index_type
;
index_type : ordinal_type ;
ordinal_type : new_ordinal_type
| identifier
;
component_type : type_denoter ;
record_type : RECORD record_section_list END
| RECORD record_section_list semicolon variant_part END
| RECORD variant_part END
;
record_section_list : record_section_list semicolon record_section
| record_section
;
record_section : identifier_list COLON type_denoter
;
variant_part : CASE variant_selector OF variant_list semicolon
| CASE variant_selector OF variant_list
|
;
variant_selector : tag_field COLON tag_type
| tag_type
;
variant_list : variant_list semicolon variant
| variant
;
variant : case_constant_list COLON LPAREN record_section_list RPAREN
| case_constant_list COLON LPAREN record_section_list semicolon
variant_part RPAREN
| case_constant_list COLON LPAREN variant_part RPAREN
;
case_constant_list : case_constant_list comma case_constant
| case_constant
;
case_constant : constant
| constant DOTDOT constant
;
tag_field : identifier ;
tag_type : identifier ;
set_type : SET OF base_type
;
base_type : ordinal_type ;
file_type : PFILE OF component_type
;
new_pointer_type : UPARROW domain_type
;
domain_type : identifier ;
variable_declaration_part : VAR variable_declaration_list semicolon
|
;
variable_declaration_list :
variable_declaration_list semicolon variable_declaration
| variable_declaration
;
variable_declaration : identifier_list COLON type_denoter
;
procedure_and_function_declaration_part :
proc_or_func_declaration_list semicolon
|
;
proc_or_func_declaration_list :
proc_or_func_declaration_list semicolon proc_or_func_declaration
| proc_or_func_declaration
;
proc_or_func_declaration : procedure_declaration
| function_declaration
;
procedure_declaration : procedure_heading semicolon directive
| procedure_heading semicolon procedure_block
;
procedure_heading : procedure_identification
| procedure_identification formal_parameter_list
;
directive : FORWARD
| EXTERNAL
;
formal_parameter_list : LPAREN formal_parameter_section_list RPAREN ;
formal_parameter_section_list : formal_parameter_section_list semicolon formal_parameter_section
| formal_parameter_section
;
formal_parameter_section : value_parameter_specification
| variable_parameter_specification
| procedural_parameter_specification
| functional_parameter_specification
;
value_parameter_specification : identifier_list COLON identifier
;
variable_parameter_specification : VAR identifier_list COLON identifier
;
procedural_parameter_specification : procedure_heading ;
functional_parameter_specification : function_heading ;
procedure_identification : PROCEDURE identifier ;
procedure_block : block ;
function_declaration : function_heading semicolon directive
| function_identification semicolon function_block
| function_heading semicolon function_block
;
function_heading : FUNCTION identifier COLON result_type
| FUNCTION identifier formal_parameter_list COLON result_type
;
result_type : identifier ;
function_identification : FUNCTION identifier ;
function_block : block ;
statement_part : compound_statement ;
compound_statement : PBEGIN statement_sequence END ;
statement_sequence : statement_sequence semicolon statement
| statement
;
statement : open_statement
| closed_statement
;
open_statement : label COLON non_labeled_open_statement
| non_labeled_open_statement
;
closed_statement : label COLON non_labeled_closed_statement
| non_labeled_closed_statement
;
non_labeled_closed_statement : assignment_statement
| procedure_statement
| goto_statement
| compound_statement
| case_statement
| repeat_statement
| closed_with_statement
| closed_if_statement
| closed_while_statement
| closed_for_statement
|
;
non_labeled_open_statement : open_with_statement
| open_if_statement
| open_while_statement
| open_for_statement
;
repeat_statement : REPEAT statement_sequence UNTIL boolean_expression
;
open_while_statement : WHILE boolean_expression DO open_statement
;
closed_while_statement : WHILE boolean_expression DO closed_statement
;
open_for_statement : FOR control_variable ASSIGNMENT initial_value direction
final_value DO open_statement
;
closed_for_statement : FOR control_variable ASSIGNMENT initial_value direction
final_value DO closed_statement
;
open_with_statement : WITH record_variable_list DO open_statement
;
closed_with_statement : WITH record_variable_list DO closed_statement
;
open_if_statement : IF boolean_expression THEN statement
| IF boolean_expression THEN closed_statement ELSE open_statement
;
closed_if_statement : IF boolean_expression THEN closed_statement
ELSE closed_statement
;
assignment_statement : variable_access ASSIGNMENT expression
;
variable_access : identifier
| indexed_variable
| field_designator
| variable_access UPARROW
;
indexed_variable : variable_access LBRAC index_expression_list RBRAC
;
index_expression_list : index_expression_list comma index_expression
| index_expression
;
index_expression : expression ;
field_designator : variable_access DOT identifier
;
procedure_statement : identifier params
| identifier
;
params : LPAREN actual_parameter_list RPAREN ;
actual_parameter_list : actual_parameter_list comma actual_parameter
| actual_parameter
;
#* this forces you to check all this to be sure that only write and
#* writeln use the 2nd and 3rd forms, you really can't do it easily in
#* the grammar, especially since write and writeln aren't reserved
actual_parameter : expression
| expression COLON expression
| expression COLON expression COLON expression
;
goto_statement : GOTO label
;
case_statement : CASE case_index OF case_list_element_list END
| CASE case_index OF case_list_element_list SEMICOLON END
| CASE case_index OF case_list_element_list semicolon
otherwisepart statement END
| CASE case_index OF case_list_element_list semicolon
otherwisepart statement SEMICOLON END
;
case_index : expression ;
case_list_element_list : case_list_element_list semicolon case_list_element
| case_list_element
;
case_list_element : case_constant_list COLON statement
;
otherwisepart : OTHERWISE
| OTHERWISE COLON
;
control_variable : identifier ;
initial_value : expression ;
direction : TO
| DOWNTO
;
final_value : expression ;
record_variable_list : record_variable_list comma variable_access
| variable_access
;
boolean_expression : expression ;
expression : simple_expression
| simple_expression relop simple_expression
;
simple_expression : term
| simple_expression addop term
;
term : factor
| term mulop factor
;
factor : sign factor
| exponentiation
;
exponentiation : primary
| primary STARSTAR exponentiation
;
primary : variable_access
| unsigned_constant
| function_designator
| set_constructor
| LPAREN expression RPAREN
| NOT primary
;
unsigned_constant : unsigned_number
| CHARACTER_STRING
| NIL
;
unsigned_number : unsigned_integer | unsigned_real ;
unsigned_integer : DIGSEQ
;
unsigned_real : REALNUMBER
;
/* functions with no params will be handled by plain identifier */
function_designator : identifier params
;
set_constructor : LBRAC member_designator_list RBRAC
| LBRAC RBRAC
;
member_designator_list : member_designator_list comma member_designator
| member_designator
;
member_designator : member_designator DOTDOT expression
| expression
;
addop: PLUS
| MINUS
| OR
;
mulop : STAR
| SLASH
| DIV
| MOD
| AND
;
relop : EQUAL
| NOTEQUAL
| LT
| GT
| LE
| GE
| IN
;
identifier : IDENTIFIER
;
semicolon : SEMICOLON
;
comma : COMMA
;
%%
use Carp;
use Getopt::Long;
my %keywords = (
AND => 'AND',
ARRAY => 'ARRAY',
CASE => 'CASE',
CONST => 'CONST',
DIV => 'DIV',
DO => 'DO',
DOWNTO => 'DOWNTO',
ELSE => 'ELSE',
END => 'END',
EXTERN => 'EXTERNAL',
EXTERNAL => 'EXTERNAL',
FOR => 'FOR',
FORWARD => 'FORWARD',
FUNCTION => 'FUNCTION',
GOTO => 'GOTO',
IF => 'IF',
IN => 'IN',
LABEL => 'LABEL',
MOD => 'MOD',
NIL => 'NIL',
NOT => 'NOT',
OF => 'OF',
OR => 'OR',
OTHERWISE => 'OTHERWISE',
PACKED => 'PACKED',
BEGIN => 'PBEGIN',
FILE => 'PFILE',
PROCEDURE => 'PROCEDURE',
PROGRAM => 'PROGRAM',
RECORD => 'RECORD',
REPEAT => 'REPEAT',
SET => 'SET',
THEN => 'THEN',
TO => 'TO',
TYPE => 'TYPE',
UNTIL => 'UNTIL',
VAR => 'VAR',
WHILE => 'WHILE',
WITH => 'WITH',
);
my %lexeme = (
':=' => 'ASSIGNMENT',
':' => 'COLON',
',' => 'COMMA',
'.' => 'DOT',
'..' => 'DOTDOT',
'=' => 'EQUAL',
'>=' => 'GE',
'>' => 'GT',
'[' => 'LBRAC',
'<=' => 'LE',
'(' => 'LPAREN',
'<' => 'LT',
'-' => 'MINUS',
'<>' => 'NOTEQUAL',
'+' => 'PLUS',
']' => 'RBRAC',
')' => 'RPAREN',
';' => 'SEMICOLON',
'/' => 'SLASH',
'*' => 'STAR',
'**' => 'STARSTAR',
'->' => 'UPARROW',
'^' => 'UPARROW',
);
my ($tokenbegin, $tokenend) = (1, 1);
sub _Lexer {
my($parser)=shift;
my $token;
for ($parser->{INPUT}) {
return('',undef) if !defined($_) or $_ eq '';
#Skip blanks and comments
s{\A
((?:
\s+ # any white space char
| \(\*.*?\*\) # (*.. *) comments
| \{.*?\} # { .. } comments
)+
)
}
{}xs
and do {
my($blanks)=$1;
#Maybe At EOF
return('', undef) if $_ eq '';
$tokenend += $blanks =~ tr/\n//;
};
$tokenbegin = $tokenend;
s{^([0-9]+(\.[0-9]+)?)}{} and do {
return ('DIGSEQ', [$1, $tokenbegin]) unless defined($2);
return ('REALNUMBER', [$1, $tokenbegin]);
};
s{^(\'(\\.|[^\\'])*\')}{}
and return('CHARACTER_STRING', [$1, $tokenbegin]);
s/^([a-zA-Z_][A-Za-z0-9_]*)//
and do {
my $word = uc($1);
my $r;
return ($r, [$r, $tokenbegin]) if defined($r = $keywords{$word});
return('IDENTIFIER',[$word, $tokenbegin]);
};
m/^(\S\S)/ and defined($token = $1) and exists($lexeme{$token})
and do {
s/..//;
return ($lexeme{$token}, [$token, $tokenbegin]);
}; # do
m/^(\S)/ and defined($token = $1) and exists($lexeme{$token})
and do {
s/.//;
return ($lexeme{$token}, [$token, $tokenbegin]);
}; # do
die "Unexpected character at $tokenbegin\n";
} # for
}
sub _Error {
my($token)=$_[0]->YYCurval;
my($what)= $token ? "input: '$token->[0]' in line $token->[1]" : "end of input";
my @expected = $_[0]->YYExpect();
my $expected = @expected? "Expected one of these tokens: '@expected'":"";
croak "Syntax error near $what. $expected\n";
}
sub Run {
my($self)=shift;
my $yydebug = shift || 0;
return $self->YYParse(
yylex => \&_Lexer,
yyerror => \&_Error,
yydebug => $yydebug, # 0x1F
);
}
sub uploadfile {
my $file = shift;
my $msg = shift;
my $input = '';
eval {
$input = Parse::Eyapp::Base::slurp_file($file)
};
if ($@) {
print $msg;
local $/ = undef;
$input = <STDIN>;
}
return $input;
}
sub main {
my $package = shift;
my $debug = 0;
my $file = '';
my $result = GetOptions (
"debug!" => \$debug,
"file=s" => \$file,
);
$debug = 0x1F if $debug;
$file = shift if !$file && @ARGV;
my $parser = $package->new();
my $prompt = "Expressions. Press CTRL-D (Unix) or CTRL-Z (Windows) to finish:\n";
$parser->{INPUT} = uploadfile($file, $prompt);
$Parse::Eyapp::Node::INDENT = 2;
print $parser->Run( $debug )->str,"\n";
}
sub TERMINAL::info {
$_[0]->{attr}[0]
};
__PACKAGE__->main unless caller();
=head1 NAME Pascal eyapp grammar
=head1 LIMITATIONS
A parameter declarations must be followed by an identifer.
A declaration like:
procedure one (i, j : integer; k : array [1..5] of real);
instead we can do:
type arrreal5 = array [1..5] of real;
procedure one (i, j : integer; k : arrreal5);
=cut