The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# Copyright (C) 2007-2008, Parrot Foundation.

=head1 NAME

PCT::Grammar - base grammar with useful rules

=head1 SYNOPSIS

    grammar MyGrammar is PCT::Grammar;

    rule abc { [ word | <panic: word not found> ] }

    rule quote {
        [ \' <string_literal: '> \'
        | \" <string_literal: "> \"
        ]
    }

=head1 DESCRIPTION

This file implements C<PCT::Grammar>, which is a basic grammar object
with a few useful methods for parsing thrown in.

=head2 Methods

=over 4

=item panic(match [, message, ...] )

Throws an exception at the current point of the match, with message
as part of the exception payload.  The message doesn't end with
a newline, then the line number and offset of the match are
also included.

=cut

.namespace [ 'PCT';'Grammar' ]

.sub 'onload' :anon :init :load
    load_bytecode 'PGE.pbc'
    load_bytecode 'PGE/Util.pbc'
    .local pmc p6meta
    p6meta = new 'P6metaclass'
    p6meta.'new_class'('PCT::Grammar', 'parent'=>'PGE::Grammar')
    $P0 = split '::', 'PCT::Grammar'
    $P0 = get_class $P0
    $P1 = get_hll_global ['PGE';'Util'], 'die'
    $P0.'add_method'('panic', $P1)
    .return ()
.end


=item FAILGOAL($goal [, 'dba'=>dba])

Invoked when goal matching fails to find the goal.  Builds an appropriate
error message and delegates the rest to C<panic>.

=cut

.sub 'FAILGOAL' :method
    .param string goal
    .param pmc options         :named :slurpy
    .local string dba
    dba = options['dba']
    if dba goto have_dba
    ##  if no dba supplied, use the name of the caller sub
    $P0 = getinterp
    $P0 = $P0['sub';1]
    dba = $P0
  have_dba:
    .tailcall self.'panic'("Unable to parse ", dba, "; couldn't find final ", goal)
.end


=item item()

Here we overload the item() method from PGE::Match to
throw an exception if a result object hasn't been set.

=cut

.sub 'ast' :method
    .local pmc obj
    obj = getattribute self, '$!ast'
    unless null obj goto end
    die "No result object"
  end:
    .return (obj)
.end


=item ww()

Special-purpose rule to return true if we're in the middle
of a word -- i.e., if the previous and next character are
both "word characters".  This is roughly equivalent to
C<< <?after \w><?before \w> >> except it's much quicker.
In particular, C<< <!ww> >> can be used by :sigspace rules
to enforce whitespace between lexical words.

=cut

.include 'cclass.pasm'

.sub 'ww' :method
    .param pmc adverbs         :slurpy :named
    .local pmc mob
    .local int pos
    .local string target
    $P0 = get_hll_global ['PGE'], 'Match'
    (mob, pos, target) = $P0.'new'(self)
    if pos == 0 goto fail
    $I0 = is_cclass .CCLASS_WORD, target, pos
    unless $I0 goto fail
    $I1 = pos - 1
    $I0 = is_cclass .CCLASS_WORD, target, $I1
    unless $I0 goto fail
    mob.'to'(pos)
  fail:
    .return (mob)
.end


.sub 'string_literal' :method
    .param string stop
    .param pmc adverbs         :slurpy :named

    ##  create a new match object, get the new match position
    .local pmc mob
    .local int pos, lastpos, stoplen
    .local string target, escapechars
    (mob, pos, target) = self.'new'(self)
    lastpos = length target
    stoplen = length stop
    $S0 = substr stop, 0, 1
    escapechars = concat "\\", $S0

    ##  leave space for close delimiter
    lastpos -= stoplen

    ##  now initialize and loop through target
  literal_init:
    .local string literal, litchar
    literal = ''

  literal_loop:
    ##  if we're beyond the last possible position, fail
    if pos > lastpos goto fail

    ##  if ending delimiter, then we're done
    $S0 = substr target, pos, stoplen
    if $S0 == stop goto literal_end
    if pos >= lastpos goto fail

    ##  get next character in literal
    litchar = substr target, pos, 1
    inc pos

    ##  add non-escape characters to literal
    if litchar != "\\" goto add_litchar

    ##  look at the next character, if it's always escaped, add it and
    ##  move on
    .local string escaped
    escaped = substr target, pos, 1
    $I0 = index escapechars, escaped
    if $I0 < 0 goto interpolated_escape
    inc pos
    literal .= escaped
    goto literal_loop

  interpolated_escape:
    ##  if not double-quoted delim, no interpolation
    if stop != '"' goto add_litchar
    litchar = escaped
    inc pos
    $I0 = index "abefnrt0xdo", litchar
    if $I0 < 0 goto add_litchar

    ##  if it's one of "xdo", then handle that specially
    if $I0 >= 8 goto literal_xdo

    litchar = substr "\a\b\e\f\n\r\t\0", $I0, 1
    goto add_litchar

  literal_xdo:
    ##  handle \x, \d, and \o escapes.  start by converting
    ##  the 'o', 'd', or 'x' into 8, 10, or 16 (yes, it's hack
    ##  but it works).  Then loop through the characters that
    ##  follow to compute the integer value of the codepoint,
    ##  and add that codepoint to our literal.
    .local int base, codepoint, isbracketed
    base = index '        o d     x', litchar
    codepoint = 0
    $S0 = substr target, pos, 1
    isbracketed = iseq $S0, '['
    pos += isbracketed
  literal_xdo_char_loop:
    $S0 = substr target, pos, 1
    $I0 = index '0123456789abcdef', $S0
    if $I0 < 0 goto literal_xdo_char_end
    if $I0 >= base goto literal_xdo_char_end
    codepoint *= base
    codepoint += $I0
    inc pos
    goto literal_xdo_char_loop
  literal_xdo_char_end:
    $S1 = chr codepoint
    literal = concat literal, $S1
    unless isbracketed goto literal_xdo_end
    if $S0 == ']' goto literal_xdo_end
    if $S0 != ',' goto fail
    inc pos
    codepoint = 0
    goto literal_xdo_char_loop
  literal_xdo_end:
    pos += isbracketed
    goto literal_loop

  add_litchar:
    literal .= litchar
    goto literal_loop

  literal_end:
    mob.'to'(pos)
    mob.'!make'(literal)
    .return (mob)

  fail:
    mob.'to'(-1)
    .return (mob)
.end

# Local Variables:
#   mode: pir
#   fill-column: 100
# End:
# vim: expandtab shiftwidth=4 ft=pir: