/*
* PIR.eyp
*
* Intermediate Code Compiler for Parrot.
*
* Copyright (C) 2002 Melvin Smith <melvin.smith@mindspring.com>
* Copyright (C) 2002-2009, Parrot Foundation.
*
* Grammar of the PIR language parser.
* Casiano: cleaned semantic actions to study the grammar with Eyapp. 2009
*/
%strict
/* We need precedence for a few tokens to resolve a couple of conflicts */
/* Commented tokens look as if they weren't used */
%nonassoc LOW_PREC
%nonassoc '\n'
%nonassoc <t> PARAM
%token HLL TK_LINE TK_FILE
%token GOTO ARG IF UNLESS PNULL SET_RETURN SET_YIELD
%token ADV_FLAT ADV_SLURPY ADV_OPTIONAL ADV_OPT_FLAG ADV_NAMED ADV_ARROW
#%token NEW
#%token ADV_INVOCANT
%token ADV_CALL_SIG
%token NAMESPACE
%token SUB LOCAL LEXICAL CONST ANNOTATE
#%token INC SYM DOT_METHOD DEC
%token GLOBAL_CONST
%token PLUS_ASSIGN MINUS_ASSIGN MUL_ASSIGN DIV_ASSIGN CONCAT_ASSIGN
%token BAND_ASSIGN BOR_ASSIGN BXOR_ASSIGN FDIV FDIV_ASSIGN MOD_ASSIGN
%token SHR_ASSIGN SHL_ASSIGN SHR_U_ASSIGN
%token SHIFT_LEFT SHIFT_RIGHT INTV FLOATV STRINGV PMCV LOG_XOR
%token RELOP_EQ RELOP_NE RELOP_GT RELOP_GTE RELOP_LT RELOP_LTE
%token RESULT RETURN TAILCALL YIELDT GET_RESULTS
%token POW SHIFT_RIGHT_U LOG_AND LOG_OR
%token COMMA ESUB
#%token DOTDOT ENDM
%token PCC_BEGIN PCC_END PCC_CALL PCC_SUB PCC_BEGIN_RETURN PCC_END_RETURN
%token PCC_BEGIN_YIELD PCC_END_YIELD NCI_CALL METH_CALL INVOCANT
%token MAIN LOAD INIT IMMEDIATE POSTCOMP METHOD ANON OUTER NEED_LEX
%token MULTI VTABLE_METHOD LOADLIB SUB_INSTANCE_OF SUBID
%token NS_ENTRY
%token UNIQUE_REG
%token LABEL
%token EMIT EOM
%token IREG NREG SREG PREG IDENTIFIER REG MACRO
%token STRINGC INTC FLOATC USTRINGC
%token PARROT_OP
%type type hll_def return_or_yield comma_or_goto opt_unique_reg
%type program
%type class_namespace
%type constdef sub emit pcc_ret pcc_yield
%type compilation_units compilation_unit pmc_const pragma
%type classname relop any_string assign_op bin_op un_op
%type labels _labels label statement sub_call
%type pcc_sub_call
%type sub_param sub_params pcc_arg pcc_result pcc_args pcc_results sub_param_type_def
%type pcc_returns pcc_yields pcc_return pcc_call arg arglist the_sub multi_type
%type argtype_list argtype paramtype_list paramtype
%type pcc_return_many
%type proto sub_proto sub_proto_list multi multi_types outer
%type vtable instanceof subid
%type method ns_entry_name
%type instruction assignment conditional_statement labeled_inst opt_label op_assign
%type if_statement unless_statement
%type func_assign get_results
%type opt_invocant
%type annotate_directive
%type target targetlist reg const var result pcc_set_yield
%type keylist keylist_force _keylist key maybe_ns
%type vars _vars var_or_i _var_or_i label_op sub_label_op sub_label_op_c
%type pasmcode pasmline pasm_inst
%type pasm_args
%type var_returns
%token VAR
%token LINECOMMENT
%token FILECOMMENT
%type id_list id_list_id
%nonassoc CONCAT DOT
%{
my $LETTER = qr{[a-zA-Z_\@]};
my $DIGIT = qr{[0-9]};
my $DIGITS = qr{(?:$DIGIT)+};
my $HEX = qr{0[xX][0-9A-Fa-f]+};
my $OCT = qr{0[oO][0-7]+};
my $BIN = qr{0[bB][01]+};
my $DOT = qr{[.]};
my $SIGN = qr{[-+]};
my $BIGINT = qr{(?:$SIGN)?$DIGITS"L"};
my $FLOATNUM = qr{(?:$SIGN)?(?:(?:$DIGITS$DOT(?:$DIGIT)*|$DOT$DIGITS)(?:[eE](?:$SIGN)?$DIGITS)?|$DIGITS[eE](?:$SIGN)?$DIGITS)};
my $LETTERDIGIT = qr{[a-zA-Z0-9_]};
my $LABELLETTERDIGIT = qr{(?:[a-zA-Z0-9_@])};
my $ID = qr{$LETTER(?:$LABELLETTERDIGIT)*};
my $DQ_STRING = qr{\"(?:\\.|[^"\\\n])*\"};
my $ENCCHAR = qr{$LETTER|$DIGIT|"-"};
my $ENCCHARS = qr{(?:$ENCCHAR)*};
my $ENC = qr{$LETTER$ENCCHARS":"};
my $UNICODE = qr{$ENC(?:$ENC)?$DQ_STRING};
my $SQ_STRING = qr{\'[^'\n]*\'};
my $STRINGCONSTANT = qr{$SQ_STRING|$DQ_STRING};
my $RANKSPEC = qr{\[[,]*\]};
my $EOL = qr{\r?\n};
my $WS = qr{[\t\f\r\x1a ]};
my $SP = qr{[ ]};
%}
%start program
%tree
%%
program:
compilation_units
;
compilation_units:
compilation_unit
| compilation_units compilation_unit
;
compilation_unit:
class_namespace
| constdef
| sub
| emit
| MACRO '\n'
| pragma
| location_directive
| '\n'
;
pragma:
hll_def '\n'
| LOADLIB STRINGC '\n'
;
location_directive:
TK_LINE INTC COMMA STRINGC '\n'
| TK_FILE STRINGC '\n'
;
annotate_directive:
ANNOTATE STRINGC COMMA const
;
hll_def:
HLL STRINGC
;
constdef:
CONST type IDENTIFIER '=' const
;
pmc_const:
CONST INTC var_or_i '=' any_string
| CONST STRINGC var_or_i '=' any_string
;
any_string:
STRINGC
| USTRINGC
;
pasmcode:
pasmline
| pasmcode pasmline
;
pasmline:
labels pasm_inst '\n'
| MACRO '\n'
| FILECOMMENT
| LINECOMMENT
| class_namespace
| pmc_const
| pragma
;
pasm_inst:
PARROT_OP pasm_args
| PCC_SUB sub_proto LABEL
| PNULL var
| LEXICAL STRINGC COMMA REG
| /* empty */
;
pasm_args:
vars
;
emit: /* EMIT and EOM tokens are used when compiling a .pasm file. */
EMIT opt_pasmcode EOM
;
opt_pasmcode:
/* empty */
| pasmcode
;
class_namespace:
NAMESPACE maybe_ns '\n'
;
maybe_ns:
'[' keylist ']'
| '[' ']'
;
sub:
SUB sub_label_op_c sub_proto '\n' sub_params sub_body ESUB
;
sub_params:
/* empty */ %prec LOW_PREC
| '\n'
| sub_params sub_param '\n'
;
sub_param:
PARAM sub_param_type_def
;
sub_param_type_def:
type IDENTIFIER paramtype_list
;
multi:
MULTI '(' multi_types ')'
;
outer:
OUTER '(' STRINGC ')'
| OUTER '(' IDENTIFIER ')'
;
vtable:
VTABLE_METHOD
| VTABLE_METHOD '(' STRINGC ')'
;
method:
METHOD
| METHOD '(' any_string ')'
;
ns_entry_name:
NS_ENTRY
| NS_ENTRY '(' any_string ')'
;
instanceof:
SUB_INSTANCE_OF '(' STRINGC ')'
;
subid:
SUBID
| SUBID '(' any_string ')'
;
multi_types:
/* empty */
| multi_types COMMA multi_type
| multi_type
;
multi_type:
INTV
| FLOATV
| PMCV
| STRINGV
| IDENTIFIER
| STRINGC
| '[' keylist ']'
;
sub_body:
/* empty */
| statements
;
pcc_sub_call:
PCC_BEGIN '\n'
pcc_args
opt_invocant
pcc_call
opt_label
pcc_results
PCC_END
;
opt_label:
/* empty */
| label '\n'
;
opt_invocant:
/* empty */
| INVOCANT var '\n'
;
sub_proto:
/* empty */
| sub_proto_list
;
sub_proto_list:
proto
| sub_proto_list proto
;
proto:
LOAD
| INIT
| MAIN
| IMMEDIATE
| POSTCOMP
| ANON
| NEED_LEX
| multi
| outer
| vtable
| method
| ns_entry_name
| instanceof
| subid
;
pcc_call:
PCC_CALL var COMMA var '\n'
| PCC_CALL var '\n'
| NCI_CALL var '\n'
| METH_CALL target '\n'
| METH_CALL STRINGC '\n'
| METH_CALL target COMMA var '\n'
| METH_CALL STRINGC COMMA var '\n'
;
pcc_args:
/* empty */
| pcc_args pcc_arg '\n'
;
pcc_arg:
ARG arg
;
pcc_results:
/* empty */
| pcc_results pcc_result '\n'
;
pcc_result:
RESULT target paramtype_list
| LOCAL type id_list_id
;
paramtype_list:
/* empty */
| paramtype_list paramtype
;
paramtype:
ADV_SLURPY
| ADV_OPTIONAL
| ADV_OPT_FLAG
| ADV_NAMED
| ADV_NAMED '(' STRINGC ')'
| ADV_NAMED '(' USTRINGC ')'
| UNIQUE_REG
| ADV_CALL_SIG
;
pcc_ret:
PCC_BEGIN_RETURN '\n'
pcc_returns
PCC_END_RETURN
| pcc_return_many
;
pcc_yield:
PCC_BEGIN_YIELD '\n'
pcc_yields
PCC_END_YIELD
;
pcc_returns:
/* empty */
| pcc_returns '\n'
| pcc_returns pcc_return '\n'
;
pcc_yields:
/* empty */
| pcc_yields '\n'
| pcc_yields pcc_set_yield '\n'
;
pcc_return:
SET_RETURN var argtype_list
;
pcc_set_yield:
SET_YIELD var argtype_list
;
pcc_return_many:
return_or_yield '('
var_returns ')'
;
return_or_yield:
RETURN
| YIELDT
;
var_returns:
/* empty */
| arg
| STRINGC ADV_ARROW var
| var_returns COMMA arg
| var_returns COMMA STRINGC ADV_ARROW var
;
statements:
statement
| statements statement
;
/* This is ugly. Because 'instruction' can start with PARAM and in the
* 'pcc_sub' rule, 'pcc_params' is followed by 'statement', we get a
* shift/reduce conflict on PARAM between reducing to the dummy
* rule and shifting the PARAM to be used as part
* of the 'pcc_params' (which is what we want). However, yacc syntax
* doesn't propagate precedence to the dummy rules, so we have to
* split out the action just so that we can assign it a precedence. */
helper_clear_state:
%prec LOW_PREC
;
statement:
helper_clear_state instruction
| MACRO '\n'
| FILECOMMENT
| LINECOMMENT
| location_directive
| annotate_directive
;
labels:
/* none */
| _labels
;
_labels:
_labels label
| label
;
label:
LABEL
;
instruction:
labels labeled_inst '\n'
| error '\n'
;
id_list :
id_list_id
| id_list COMMA id_list_id
;
id_list_id :
IDENTIFIER opt_unique_reg
;
opt_unique_reg:
/* empty */
| UNIQUE_REG
;
labeled_inst:
assignment
| conditional_statement
| LOCAL type id_list
| LEXICAL STRINGC COMMA target
| LEXICAL USTRINGC COMMA target
| CONST type IDENTIFIER '=' const
| pmc_const
| GLOBAL_CONST type IDENTIFIER '=' const
| TAILCALL sub_call
| GOTO label_op
| PARROT_OP vars
| PNULL var
| sub_call
| pcc_sub_call
| pcc_ret
| pcc_yield
| /* none */
;
type:
INTV
| FLOATV
| STRINGV
| PMCV
;
classname:
IDENTIFIER
;
assignment:
target '=' var
| target '=' un_op var
| target '=' var bin_op var
| target '=' var '[' keylist ']'
| target '[' keylist ']' '=' var
| target '=' 'new' classname '[' keylist ']'
/* Subroutine call the short way */
| target '=' sub_call
| '(' targetlist ')' '=' the_sub '(' arglist ')'
| get_results
| op_assign
| func_assign
| target '=' PNULL
;
un_op:
'!'
| '~'
| '-'
;
bin_op:
'-'
| '+'
| '*'
| '/'
| '%'
| FDIV
| POW
| CONCAT
| RELOP_EQ
| RELOP_NE
| RELOP_GT
| RELOP_GTE
| RELOP_LT
| RELOP_LTE
| SHIFT_LEFT
| SHIFT_RIGHT
| SHIFT_RIGHT_U
| LOG_AND
| LOG_OR
| LOG_XOR
| '&'
| '|'
| '~'
;
get_results:
GET_RESULTS '(' targetlist ')'
;
op_assign:
target assign_op var
;
assign_op:
PLUS_ASSIGN
| MINUS_ASSIGN
| MUL_ASSIGN
| DIV_ASSIGN
| MOD_ASSIGN
| FDIV_ASSIGN
| CONCAT_ASSIGN
| BAND_ASSIGN
| BOR_ASSIGN
| BXOR_ASSIGN
| SHR_ASSIGN
| SHL_ASSIGN
| SHR_U_ASSIGN
;
func_assign:
target '=' PARROT_OP pasm_args
;
the_sub:
IDENTIFIER
| STRINGC
| USTRINGC
| target
| target DOT sub_label_op
| target DOT USTRINGC
| target DOT STRINGC
| target DOT target
;
sub_call:
the_sub '(' arglist ')'
;
arglist:
/* empty */
| arglist COMMA arg
| arg
| arglist COMMA STRINGC ADV_ARROW var
| var ADV_ARROW var
| STRINGC ADV_ARROW var
;
arg:
var argtype_list
;
argtype_list:
/* empty */
| argtype_list argtype
;
argtype:
ADV_FLAT
| ADV_NAMED
| ADV_CALL_SIG
| ADV_NAMED '(' USTRINGC ')'
| ADV_NAMED '(' STRINGC ')'
;
result:
target paramtype_list
;
targetlist:
targetlist COMMA result
| targetlist COMMA STRINGC ADV_ARROW target
| result
| STRINGC ADV_ARROW target
| /* empty */
;
conditional_statement:
if_statement
| unless_statement
;
unless_statement:
UNLESS var relop var GOTO label_op
| UNLESS PNULL var GOTO label_op
| UNLESS var comma_or_goto label_op
;
if_statement:
IF var comma_or_goto label_op
| IF var relop var GOTO label_op
| IF PNULL var GOTO label_op
;
comma_or_goto:
COMMA
| GOTO
;
relop:
RELOP_EQ
| RELOP_NE
| RELOP_GT
| RELOP_GTE
| RELOP_LT
| RELOP_LTE
;
target:
VAR
| reg
;
vars:
/* empty */
| _vars
;
_vars:
_vars COMMA _var_or_i
| _var_or_i
;
_var_or_i:
var_or_i
| target '[' keylist ']'
| '[' keylist_force ']'
;
sub_label_op_c:
sub_label_op
| STRINGC
| USTRINGC
;
sub_label_op:
IDENTIFIER
| PARROT_OP
;
label_op:
IDENTIFIER
| PARROT_OP
;
var_or_i:
label_op
| var
;
var:
target
| const
;
keylist:
_keylist
;
keylist_force:
_keylist
;
_keylist:
key
| _keylist ';' key
;
key:
var
;
reg:
IREG
| NREG
| SREG
| PREG
| REG
;
const:
INTC
| FLOATC
| STRINGC
| USTRINGC
;
/* The End */
%%
my %directives = (
".set_arg" => 'ARG',
".sub" => 'SUB',
".end" => 'ESUB',
".begin_call" => 'PCC_BEGIN',
".end_call" => 'PCC_END',
".call" => 'PCC_CALL',
".nci_call" => 'NCI_CALL',
".meth_call" => 'METH_CALL',
".invocant" => 'INVOCANT',
".begin_return" => 'PCC_BEGIN_RETURN',
".end_return" => 'PCC_END_RETURN',
".begin_yield" => 'PCC_BEGIN_YIELD',
".end_yield" => 'PCC_END_YIELD',
".get_result" => 'RESULT',
".get_results" => 'GET_RESULTS',
".yield" => 'YIELDT',
".set_yield" => 'SET_YIELD',
".return" => 'RETURN',
".set_return" => 'SET_RETURN',
".tailcall" => 'TAILCALL',
".local" => 'LOCAL',
".globalconst" => 'GLOBAL_CONST',
".param" => 'PARAM',
);
my %colons = (
":unique_reg" => 'UNIQUE_REG',
":instanceof" => 'SUB_INSTANCE_OF',
":subid" => 'SUBID',
":flat" => 'ADV_FLAT',
":slurpy" => 'ADV_SLURPY',
":optional" => 'ADV_OPTIONAL',
":opt_flag" => 'ADV_OPT_FLAG',
":named" => 'ADV_NAMED',
":invocant" => 'ADV_INVOCANT',
":call_sig" => 'ADV_CALL_SIG',
);
my %words = (
"goto" => 'GOTO',
"if" => 'IF',
"unless" => 'UNLESS',
"null" => 'PNULL',
"int" => 'INTV',
"num" => 'FLOATV',
"pmc" => 'PMCV',
"string" => 'STRINGV',
);
my %lexemes = (
"=>" => 'ADV_ARROW',
"<<" => 'SHIFT_LEFT',
">>" => 'SHIFT_RIGHT',
">>>" => 'SHIFT_RIGHT_U',
"&&" => 'LOG_AND',
"||" => 'LOG_OR',
"~~" => 'LOG_XOR',
"<" => 'RELOP_LT',
"<=" => 'RELOP_LTE',
">" => 'RELOP_GT',
">=" => 'RELOP_GTE',
"==" => 'RELOP_EQ',
"!=" => 'RELOP_NE',
"**" => 'POW',
"." => 'DOT',
"+=" => 'PLUS_ASSIGN',
"-=" => 'MINUS_ASSIGN',
"*=" => 'MUL_ASSIGN',
"/=" => 'DIV_ASSIGN',
"%=" => 'MOD_ASSIGN',
"//" => 'FDIV',
"//=" => 'FDIV_ASSIGN',
"&=" => 'BAND_ASSIGN',
"|=" => 'BOR_ASSIGN',
"~=" => 'BXOR_ASSIGN',
">>=" => 'SHR_ASSIGN',
"<<=" => 'SHL_ASSIGN',
">>>=" => 'SHR_U_ASSIGN',
".=" => 'CONCAT_ASSIGN',
);