# Copyright (C) 2005-2009, Parrot Foundation.
=head1 DESCRIPTION
PGE::OPTable - PGE operator precedence table and parser
=head1 Methods
=over 4
=cut
.namespace [ 'PGE';'OPTable' ]
.const int PGE_OPTABLE_EXPECT_TERM = 0x01
.const int PGE_OPTABLE_EXPECT_OPER = 0x02
.const int PGE_OPTABLE_EXPECT_START = 0x05
.const int PGE_OPTABLE_EMPTY = 0x00
.const int PGE_OPTABLE_TERM = 0x10
.const int PGE_OPTABLE_POSTFIX = 0x20
.const int PGE_OPTABLE_CLOSE = 0x30
.const int PGE_OPTABLE_PREFIX = 0x40
.const int PGE_OPTABLE_PRELIST = 0x50
.const int PGE_OPTABLE_INFIX = 0x60
.const int PGE_OPTABLE_TERNARY = 0x70
.const int PGE_OPTABLE_POSTCIRCUMFIX = 0x80
.const int PGE_OPTABLE_CIRCUMFIX = 0x90
.const int PGE_OPTABLE_STOP_SUB = -1
.include "cclass.pasm"
.sub '__onload' :load
.local pmc p6meta
p6meta = new 'P6metaclass'
p6meta.'new_class'('PGE::OPTable', 'parent'=>'Hash', 'attr'=>'%!key %!klen &!ws')
.local pmc sctable
sctable = new 'Hash'
set_global '%!sctable', sctable
'sctable'('term:', 'syncat'=>PGE_OPTABLE_TERM, 'expect'=>0x0201)
'sctable'('postfix:', 'syncat'=>PGE_OPTABLE_POSTFIX, 'expect'=>0x0202, 'arity'=>1)
'sctable'('close:', 'syncat'=>PGE_OPTABLE_CLOSE, 'expect'=>0x0202)
'sctable'('prefix:', 'syncat'=>PGE_OPTABLE_PREFIX, 'expect'=>0x0101, 'arity'=>1)
'sctable'('infix:', 'syncat'=>PGE_OPTABLE_INFIX, 'expect'=>0x0102, 'arity'=>2)
'sctable'('ternary:', 'syncat'=>PGE_OPTABLE_TERNARY, 'expect'=>0x0102, 'expectclose'=>0x0102, 'arity'=>3)
'sctable'('postcircumfix:', 'syncat'=>PGE_OPTABLE_POSTCIRCUMFIX, 'expect'=>0x0102, 'arity'=>2)
'sctable'('circumfix:', 'syncat'=>PGE_OPTABLE_CIRCUMFIX, 'expect'=>0x0101, 'arity'=>1)
.return ()
.end
=item C<sctable(string name, adverbs :slurpy :named)>
Adds (or replaces) a syntactic category's defaults.
=cut
.sub 'sctable'
.param string name
.param pmc adverbs :slurpy :named
.local pmc sctable
sctable = get_global '%!sctable'
unless null adverbs goto with_adverbs
adverbs = new 'Hash'
with_adverbs:
sctable[name] = adverbs
.return (adverbs)
.end
.sub "init" :vtable :method
.local pmc tokentable, keytable, klentable
tokentable = self
keytable = new 'Hash'
klentable = new 'Hash'
setattribute self, '%!key', keytable
setattribute self, '%!klen', klentable
.end
.sub 'newtok' :method
.param string name
.param pmc args :slurpy :named
.local string syncat, key
$I0 = index name, ':'
inc $I0
syncat = substr name, 0, $I0
key = substr name, $I0
.local pmc sctable, token
sctable = get_hll_global ['PGE';'OPTable'], '%!sctable'
$I0 = exists sctable[syncat]
if $I0 == 0 goto token_hash
token = sctable[syncat]
token = clone token
goto with_token
token_hash:
token = new 'Hash'
with_token:
token['name'] = name
# we don't replace existing tokens
.local pmc tokentable
tokentable = self
$I0 = exists tokentable[name]
if $I0 goto end
tokentable[name] = token
$P0 = iter args
args_loop:
unless $P0 goto args_end
$S1 = shift $P0
$P2 = $P0[$S1]
token[$S1] = $P2
goto args_loop
args_end:
## handle token word boundaries
unless key goto with_wb
$I0 = exists token['wb']
if $I0 goto with_wb
$I0 = length key
$I1 = find_not_cclass .CCLASS_WORD, key, 0, $I0
if $I1 < $I0 goto with_wb
token['wb'] = 1
with_wb:
## handle key scanning
unless key goto with_skipkey
$I0 = exists token['skipkey']
if $I0 goto with_skipkey
$P0 = token['parsed']
if null $P0 goto with_skipkey
token['skipkey'] = 1
with_skipkey:
$S0 = token['match']
if $S0 > '' goto with_match
token['match'] = 'PGE::Match'
with_match:
$S0 = token['equiv']
unless $S0 goto with_equiv
$S1 = tokentable[$S0;'precedence']
token['precedence'] = $S1
$S1 = tokentable[$S0;'assoc']
token['assoc'] = $S1
with_equiv:
$S0 = token['looser']
unless $S0 goto with_looser
$S0 = tokentable[$S0;'precedence']
$S0 = clone $S0
$S0 = replace $S0, -1, 0, '<'
token['precedence'] = $S0
with_looser:
$S0 = token['tighter']
unless $S0 goto with_tighter
$S0 = tokentable[$S0;'precedence']
$S0 = clone $S0
$S0 = replace $S0, -1, 0, '>'
token['precedence'] = $S0
with_tighter:
$I0 = exists token['precclose']
if $I0 goto with_precclose
$P0 = token['precedence']
token['precclose'] = $P0
with_precclose:
.local string keyclose
$I0 = index key, ' '
if $I0 < 0 goto with_close
$I1 = $I0 + 1
keyclose = substr key, $I1
key = substr key, 0, $I0
token['keyclose'] = keyclose
$S0 = concat 'close:', keyclose
$I0 = token['expectclose']
if $I0 goto with_expectclose
$I0 = 0x0202
with_expectclose:
$I1 = token['nows']
self.'newtok'($S0, 'equiv' => name, 'expect'=>$I0, 'nows'=>$I1)
with_close:
add_key:
.local pmc keytable, klentable
keytable = getattribute self, '%!key'
klentable = getattribute self, '%!klen'
$I1 = length key
$S0 = substr key, 0, 1
$I0 = klentable[$S0]
if $I0 >= $I1 goto add_key_1
klentable[$S0] = $I1
add_key_1:
$I0 = exists keytable[key]
if $I0 goto add_key_array
keytable[key] = token
goto add_key_end
add_key_array:
$P0 = keytable[key]
$I0 = does $P0, 'array'
if $I0 goto add_key_array_2
$P1 = new 'ResizablePMCArray'
push $P1, $P0
push $P1, token
keytable[key] = $P1
goto add_key_end
add_key_array_2:
push $P0, token
add_key_end:
.local string assoc
assoc = token['assoc']
if assoc > '' goto with_assoc
token['assoc'] = 'left'
with_assoc:
end:
.return (token)
.end
.sub 'parse' :method
.param pmc mob
.param pmc adverbs :slurpy :named
.local pmc tokentable, keytable, klentable
.local pmc tokenstack, operstack, termstack
.local string target
.local pmc mfrom, mpos
.local int pos, lastpos, wspos
.local int expect, nows
.local pmc ws
.local string key
.local pmc token, top, oper
.local pmc it
.local int tokencat, topcat
.local int circumnest
.local pmc cstack
cstack = new 'ResizableIntegerArray'
tokentable = self
keytable = getattribute self, '%!key'
klentable = getattribute self, '%!klen'
unless null adverbs goto with_adverbs
adverbs = new 'Hash'
with_adverbs:
.local pmc action
.local string rulename
action = adverbs['action']
if null action goto no_rulename
rulename = adverbs['rulename']
unless rulename goto have_rulename
$I0 = can action, rulename
if $I0 goto have_rulename
no_rulename:
rulename = ''
have_rulename:
## see if we have a 'stop' adverb. If so, then it is either
## a string to be matched directly or a sub(rule) to be called
## to check for a match.
.local int has_stop, has_stop_nows
.local string stop_str
.local pmc stop
has_stop = 0
$I0 = exists adverbs['stop']
if $I0 == 0 goto with_stop
stop = adverbs['stop']
has_stop = PGE_OPTABLE_STOP_SUB
$I0 = isa stop, 'Sub'
if $I0 goto with_stop
stop_str = stop
$S0 = substr stop_str, 0, 1
if $S0 != ' ' goto stop_str_nows
stop_str = substr stop_str, 1
has_stop = length stop_str
has_stop_nows = 0
goto with_stop
stop_str_nows:
has_stop = length stop_str
has_stop_nows = 1
with_stop:
## if we have a 'tighter' adverb, set
## tighter to the precedence of the op specified
.local string tighter
tighter = adverbs['tighter']
$I0 = exists tokentable[tighter]
if $I0 == 0 goto with_tighter
token = tokentable[tighter]
tighter = token['precedence']
with_tighter:
ws = getattribute self, '&!ws'
unless null ws goto have_ws
$I0 = can mob, 'ws'
unless $I0 goto have_ws
ws = find_method mob, 'ws'
have_ws:
tokenstack = new 'ResizablePMCArray'
operstack = new 'ResizablePMCArray'
termstack = new 'ResizablePMCArray'
$P0 = get_hll_global ['PGE'], 'Match'
(mob, pos, target, mfrom, mpos) = $P0.'new'(mob, adverbs :flat :named)
lastpos = length target
circumnest = 0
expect = PGE_OPTABLE_EXPECT_START
token_next:
## figure out what we're looking for
## if we're at the end of the string, end match
wspos = pos
if pos >= lastpos goto oper_not_found
## check for leading whitespace -- it may limit token candidates
if null ws goto token_next_ws
mpos = pos
$P0 = ws(mob)
unless $P0 goto token_next_1
pos = $P0.'to'()
goto token_next_1
token_next_ws:
pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
token_next_1:
## "nows" tokens are eligible if we don't have leading ws
nows = isne pos, wspos
check_stop:
## Check for a stop pattern or string. But don't check
## if we're in a circumfix.
if circumnest > 0 goto key_search
if has_stop == 0 goto key_search
if has_stop == PGE_OPTABLE_STOP_SUB goto check_stop_sub
$I0 = has_stop_nows & nows
if $I0 goto key_search
$S0 = substr target, pos, has_stop
if $S0 == stop_str goto oper_not_found
goto key_search
check_stop_sub:
mpos = wspos
$P0 = stop(mob)
if $P0 goto oper_not_found
## look through eligible tokens to find longest match
key_search:
## use the next character of input stream to limit search
key = substr target, pos, 1
$I0 = klentable[key]
key = substr target, pos, $I0
key_loop:
$I0 = exists keytable[key]
if $I0 == 0 goto key_next
token = keytable[key]
$I0 = does token, "array"
if $I0 goto key_array
local_branch cstack, token_match
if_null oper, key_next
if oper goto oper_found
goto key_next
key_array:
it = iter token
key_array_1:
unless it goto key_next
token = shift it
local_branch cstack, token_match
if_null oper, key_array_1
if oper goto oper_found
goto key_array_1
key_next:
if key == '' goto token_nows
key = chopn key, 1
goto key_loop
token_nows:
if pos == wspos goto oper_not_found
## try again, with the whitespace operators this time
pos = wspos
nows = 0
goto check_stop
oper_not_found:
## we were unable to find a valid token for the current expect state
## if we're not expecting a term, then end the match here
$I0 = expect & PGE_OPTABLE_EXPECT_TERM
if $I0 == 0 goto end
## otherwise, let's add a "dummy" term to the stack for reduction
oper = mob.'new'(mob)
push termstack, oper
## if the current operator doesn't allow nullterm, end match
unless tokenstack goto end
top = tokenstack[-1]
$I0 = top['nullterm']
if $I0 == 0 goto end
## it's a nullterm operator, so we can continue parsing
oper.'to'(pos)
expect = PGE_OPTABLE_EXPECT_OPER
goto token_next
oper_found:
## tighter: if we have an insufficiently tight token,
## treat it as not found.
if circumnest > 0 goto oper_found_1
$S0 = token['precedence']
if $S0 <= tighter goto oper_not_found
oper_found_1:
tokencat = token['syncat']
## this section processes according to the
## table at the end of this function
if tokencat == PGE_OPTABLE_TERM goto term_shift
if tokencat == PGE_OPTABLE_PREFIX goto oper_shift # (S1)
if tokencat == PGE_OPTABLE_CIRCUMFIX goto oper_shift # (S2)
$I0 = elements termstack
if $I0 > 0 goto shift_reduce
if tokencat != PGE_OPTABLE_PRELIST goto end
## The shift/reduce loop
shift_reduce:
$I0 = elements tokenstack
if $I0 > 0 goto shift_reduce_1
if tokencat == PGE_OPTABLE_CLOSE goto end # (E3)
topcat = PGE_OPTABLE_EMPTY
goto oper_shift # (S3)
shift_reduce_1:
top = tokenstack[-1]
topcat = top['syncat']
if topcat == PGE_OPTABLE_POSTFIX goto oper_reduce # (R4)
if tokencat == PGE_OPTABLE_CLOSE goto oper_close # (R5, C5)
if topcat >= PGE_OPTABLE_POSTCIRCUMFIX goto oper_shift # (S6)
$P0 = token['precedence']
$P1 = top['precclose']
if $P0 > $P1 goto oper_shift # (P)
if topcat != PGE_OPTABLE_TERNARY goto shift_reduce_2
if tokencat != PGE_OPTABLE_TERNARY goto err_ternary # (P/E)
goto oper_shift
shift_reduce_2:
if $P0 < $P1 goto oper_reduce
$P2 = top['assoc']
if $P2 == 'right' goto oper_shift # (P/A)
oper_reduce:
local_branch cstack, reduce
goto shift_reduce
oper_close:
## if the top operator isn't a circumfix, reduce it
## if the close token doesn't match circumfix close, end here
## else shift (fall-through)
if topcat < PGE_OPTABLE_TERNARY goto oper_reduce # (R5)
$S0 = top['keyclose']
if key != $S0 goto end
dec circumnest
oper_shift:
## shift operator onto the operator stack
push tokenstack, token
push operstack, oper
pos = oper.'to'()
## for circumfix ops, increase the circumfix nesting level
$I0 = isgt tokencat, PGE_OPTABLE_POSTCIRCUMFIX
circumnest += $I0
expect = token['expect']
expect = shr expect, 8
goto token_next
term_shift:
push termstack, oper
pos = oper.'to'()
expect = token['expect']
expect = shr expect, 8
goto token_next
## reduce top operation on stack
reduce:
top = pop tokenstack
$P1 = pop operstack
topcat = top['syncat']
if topcat == PGE_OPTABLE_CLOSE goto reduce_close
if topcat < PGE_OPTABLE_POSTCIRCUMFIX goto reduce_normal
## we have an unbalanced open, so error. remove the
## incomplete circumfixed term, and for circumfix: opers
## put a failed nullterm onto the termstack
wspos = -1
$P0 = pop termstack
if topcat != PGE_OPTABLE_CIRCUMFIX goto reduce_end
oper = mob.'new'(mob)
push termstack, oper
goto reduce_end
reduce_close:
top = pop tokenstack
$P1 = pop operstack
reduce_normal:
.local int arity
arity = top['arity']
reduce_args:
if arity < 1 goto reduce_list
$P2 = pop termstack
dec arity
unless $P2 goto reduce_backtrack
$P1[arity] = $P2
goto reduce_args
reduce_backtrack:
wspos = -1
if arity > 0 goto end
push termstack, $P2
goto reduce_end
reduce_list:
## combine matching list associative operations
$S0 = top['assoc']
if $S0 != 'list' goto reduce_saveterm
$S1 = $P1['type']
$S2 = $P2['type']
if $S1 != $S2 goto reduce_saveterm
$P0 = $P2.'list'()
$P1 = $P1[1]
push $P0, $P1
$P1 = $P2
reduce_saveterm:
unless rulename goto reduce_saveterm_1
($P0 :optional, $I0 :opt_flag) = action.rulename($P1, 'reduce')
unless $I0 goto reduce_saveterm_1
$P1.'!make'($P0)
reduce_saveterm_1:
push termstack, $P1
reduce_end:
local_return cstack
token_match:
mpos = pos
null oper
$I0 = token['expect']
$I0 = $I0 & expect
if $I0 == 0 goto token_match_end
$I0 = token['nows']
$I0 = $I0 & nows
if $I0 goto token_match_end
$I0 = exists token['parsed']
if $I0 goto token_match_sub
$I0 = length key
$I0 += pos
$I1 = token['wb']
unless $I1 goto token_match_key
$I1 = is_cclass .CCLASS_WORD, target, $I0
if $I1 goto token_match_end
token_match_key:
$S0 = token['match']
oper = mob.'new'(mob, 'grammar'=>$S0)
oper.'to'($I0)
goto token_match_success
token_match_sub:
$P0 = token['parsed']
mob['KEY'] = key
mpos = pos
$I0 = token['skipkey']
unless $I0 goto token_match_sub_1
$I0 = length key
mpos += $I0
token_match_sub_1:
oper = $P0(mob, 'action'=>action)
delete mob['KEY']
$P0 = oper.'from'()
$P0 = pos
token_match_success:
$P0 = token["name"]
$P0 = clone $P0
oper['type'] = $P0
oper['top'] = token
token_match_end:
local_return cstack
## At end, reduce any remaining tokens and return result term
end:
$I0 = elements tokenstack
if $I0 < 1 goto end_1
local_branch cstack, reduce
goto end
end_1:
mpos = -1
## if the termstack is empty, fail the match
## if the term is an invalid term, fail the match
$I0 = elements termstack
if $I0 < 1 goto end_all
$P0 = pop termstack
unless $P0 goto end_all
mob["expr"] = $P0
mpos = wspos
if wspos > 0 goto end_2
## somewhere we encountered an error that caused us to backtrack
## find the "real" ending position here
end_1a:
$I0 = $P0.'to'()
if $I0 <= wspos goto end_1b
wspos = $I0
mpos = $I0
end_1b:
$P0 = $P0[-1]
if null $P0 goto end_2
$I0 = isa $P0, ['PGE';'Match']
if $I0 goto end_1a
end_2:
unless rulename goto end_all
($P0 :optional, $I0 :opt_flag) = action.rulename(mob, 'end')
unless $I0 goto end_all
mob.'!make'($P0)
end_all:
.return (mob)
err_ternary:
$S1 = pos
$S0 = concat 'Ternary error at offset ', $S1
$S0 .= ", found '"
$S1 = substr target, pos, 1
$S0 .= $S1
$S0 .= "'"
die $S0
.end
### Miscellaneous Notes
#
# Here's the shift-reduce table used by the C<parse> method.
# The digits in the table map each state to the corresponding
# statement in the C<parse> method above.
#
# stack Current token
# ------- -----------------------------------------------------------------
# postfix close prefix prelist infix ternary postcir circfix
# empty S3 E3 S1 S3 S3 S3 S3 S2
# postfix R4 R4 X R4 R4 R4 R4 X
# close P R5 S1 P P P P P2
# prefix P R5 S1 P P P P S2
# prelist R5 S1 S2
# infix P R5 S1 P P/A P P S2
# ternary P/E C5 S1 P/E P/E P/A P/E S2
# postcir S6 C5 S1 S6 S6 S6 S6 S2
# circfix S6 C5 S1 S6 S6 S6 S6 S2
#
# Legend:
# S# = shift -- push operator onto token stack
# R# = reduce -- pop operator from token stack, and fill it with
# the appropriate number of arguments (arity) from the term stack.
# Then put the operator token onto the term stack. Reducing a
# close token requires popping two operators from the token
# stack. Reducing a lone ternary operator is a parse error
# (its close token must be present).
# P = precedence -- compare the relative precedence of the top
# token in the token stack with the current token.
# If current is tighter than top, shift.
# If current is looser than top, reduce.
# P/A = precedence with associativity -- for tokens with equal
# precedence, use the associativity of the top token in the
# token stack, shift if it's right associative, reduce otherwise.
# P/E = higher precedence only -- shift if the current token has
# higher precedence than the top token on the stack, otherwise
# it's a parse error.
# C = close -- If the current token is an appropriate closing
# token for the top operator on the token stack, then shift.
# Otherwise, it's an unbalanced closing token.
# X = unreachable combination
# E = either the end of the parse, or a parse error (probably
# to be determined by the caller)
#
=back
=cut
# Local Variables:
# mode: pir
# fill-column: 100
# End:
# vim: expandtab shiftwidth=4 ft=pir: