lib/MarpaX/Languages/ECMAScript/AST/Grammar/ECMAScript_262_5/Pattern.pm

use strict;
use warnings FATAL => 'all';

package MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::Pattern;
use parent qw/MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::Base/;
use MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::Pattern::Semantics;
use MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::CharacterClasses;
use SUPER;
use Carp qw/croak/;
use Scalar::Util qw/blessed/;

# ABSTRACT: ECMAScript-262, Edition 5, pattern grammar written in Marpa BNF

our $VERSION = '0.020'; # VERSION


#
# Note that this grammar is NOT supposed to be injected in Program
#
our $grammar_content = do {local $/; <DATA>};


sub new {
    my ($class, $optionsp) = @_;

    $optionsp //= {};

    my $semantics_package = exists($optionsp->{semantics_package}) ? $optionsp->{semantics_package} : join('::', $class, 'Semantics');

    my $self = $class->SUPER();

    #
    # Add semantics package to self
    #
    $self->{_semantics_package} = $semantics_package;
    #
    # Add tracking of disjunction positions
    #
    $self->{_lparen} = [];

    return $self;
}


sub make_grammar_content {
    my ($class) = @_;
    return $grammar_content;
}


sub lparen {
    my ($self) = @_;

    return $self->{_lparen};
}


sub recce_option {
    my ($self) = @_;
    #
    # Get default hash
    #
    my $default = $self->SUPER();
    #
    # And overwrite the semantics_package
    #
    $default->{semantics_package} = $self->{_semantics_package};

    return $default;
}


sub parse {
    my ($self, $source, $impl) = @_;
    #
    # Reset tracking of disjunction positions
    #
    $self->{_lparen} = [];
    return $self->SUPER($source, $impl,
                        {
                         callback => \&_eventCallback,
                         callbackargs => [ $self ],
                        });
}

sub _eventCallback {
  my ($self, $source, $pos, $max, $impl) = @_;

  #
  # $pos is the exact position where SLIF stopped because of an event
  #
  my $rc = $pos;

  foreach (@{$impl->events()}) {
    my ($name) = @{$_};
    #
    # Events are always in this order:
    #
    # ---------------------------------
    # 1. Completion events first (XXX$)
    # ---------------------------------
    #
    if ($name eq 'LPAREN_ATOM_DISJUNCTION$') {
	#
	# By definition, the current position here is exactly
	# after the '(', so position in the stream of this
	# lexeme is $pos-1.
	push(@{$self->{_lparen}}, $pos-1);
    }
  }

  return $rc;
}


sub value {
  my ($self, $impl) = @_;

  #
  # Left-parenthesis locations, so that they are visible when Marpa will call
  # semantics_package's new().
  #
  local $MarpaX::Languages::ECMAScript::AST::Grammar::Pattern::lparen = $self->{_lparen};

  return $self->SUPER($impl);
}



1;

=pod

=encoding UTF-8

=head1 NAME

MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::Pattern - ECMAScript-262, Edition 5, pattern grammar written in Marpa BNF

=head1 VERSION

version 0.020

=head1 SYNOPSIS

    use strict;
    use warnings FATAL => 'all';
    use MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::Pattern;

    my $grammar = MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::Pattern->new();

    my $grammar_content = $grammar->content();
    my $grammar_option = $grammar->grammar_option();
    my $recce_option = $grammar->recce_option();

=head1 DESCRIPTION

This modules returns describes the ECMAScript 262, Edition 5 pattern grammar written in Marpa BNF, as of L<http://www.ecma-international.org/publications/standards/Ecma-262.htm>, section 15.10.1. This module inherits the methods from MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::Base package.

=head1 SUBROUTINES/METHODS

=head2 new($optionsp)

$optionsp is a reference to hash that may contain the following key/value pair:

=over

=item semantics_package

As per Marpa::R2, The semantics package is used when resolving action names to fully qualified Perl names. This package must support and behave as documented in the Semantics package (c.f. SEE ALSO).

=back

=head2 make_grammar_content($class)

Returns the grammar. This will be injected in the Program's grammar.

=head2 lparen($self)

Returns current lexer left parenthesis offsets of captures.

=head2 recce_option($self)

Returns option for Marpa::R2::Scanless::R->new(), returned as a reference to a hash.

=head2 parse($self, $source, $impl)

Parse the source given as $source using implementation $impl.

=head2 value($self, $impl)

Return the parse tree (unique) value. $impl is the recognizer instance for the grammar. Will raise an InternalError exception if there is no parse tree value, or more than one parse tree value. Please note that this method explicity destroys the recognizer using $impl->destroy_R. Value itself is an AST where every string is a perl string. This a subclass of MarpaX::Languages::ECMAScript::AST::Grammar::Base::value() because the position of disjunction left parenthesis is localized, so that value() will see them.

This method is explicitely setting a localized MarpaX::Languages::ECMAScript::AST::Grammar::Pattern::lparen variable that is an array reference of all disjunctions left parenthesis locations in the regular expression perl string.

=head1 SEE ALSO

L<MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::Base>

L<MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::Pattern::Semantics>

=head1 AUTHOR

Jean-Damien Durand <jeandamiendurand@free.fr>

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2013 by Jean-Damien Durand.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut

__DATA__
# =================================
# ECMAScript Script Pattern grammar
# =================================
#
:start ::= Pattern
:default ::= action => [values]
lexeme default = action => [start,length,value] forgiving => 1

Pattern ::=
      Disjunction                             action => _Pattern_Disjunction

Disjunction ::=
      Alternative                             action => _Disjunction_Alternative
    | Alternative '|' Disjunction             action => _Disjunction_Alternative_OR_Disjunction

Alternative ::=                               action => _Alternative
Alternative ::= Alternative Term              action => _Alternative_Alternative_Term

Term ::=
      Assertion                               action => _Term_Assertion
    | Atom                                    action => _Term_Atom
    | Atom Quantifier                         action => _Term_Atom_Quantifier

Assertion ::=
      '^'                                     action => _Assertion_Caret
    | '$'                                     action => _Assertion_Dollar
    | '\b'                                    action => _Assertion_b
    | '\B'                                    action => _Assertion_B
    | '(?=' Disjunction ')'                   action => _Assertion_DisjunctionPositiveLookAhead
    | '(?!' Disjunction ')'                   action => _Assertion_DisjunctionNegativeLookAhead

Quantifier ::=
      QuantifierPrefix                        action => _Quantifier_QuantifierPrefix
    | QuantifierPrefix '?'                    action => _Quantifier_QuantifierPrefix_QuestionMark

QuantifierPrefix ::=
      '*'                                     action => _QuantifierPrefix_Star
    | '+'                                     action => _QuantifierPrefix_Plus
    | '?'                                     action => _QuantifierPrefix_QuestionMark
    | '{' DecimalDigits '}'                   action => _QuantifierPrefix_DecimalDigits
    | '{' DecimalDigits ',}'                  action => _QuantifierPrefix_DecimalDigits_Comma
    | '{' DecimalDigits ',' DecimalDigits '}' action => _QuantifierPrefix_DecimalDigits_DecimalDigits

Atom ::=
      PatternCharacter                        action => _Atom_PatternCharacter
    | '.'                                     action => _Atom_Dot
    | '\' AtomEscape                          action => _Atom_Backslash_AtomEscape
    | CharacterClass                          action => _Atom_Backslash_CharacterClass
    | LPAREN_ATOM_DISJUNCTION Disjunction ')' action => _Atom_Lparen_Disjunction_Rparen
    | '(?:' Disjunction ')'                   action => _Atom_nonCapturingDisjunction

PatternCharacter ~
      [\p{IsPatternCharacter}]

AtomEscape ::=
      DecimalEscape                           action => _AtomEscape_DecimalEscape
    | CharacterEscape                         action => _AtomEscape_CharacterEscape
    | CharacterClassEscape                    action => _AtomEscape_CharacterClassEscape

CharacterEscape ::=
      ControlEscape                           action => _CharacterEscape_ControlEscape
    | 'c' ControlLetter                       action => _CharacterEscape_ControlLetter
    | HexEscapeSequence                       action => _CharacterEscape_HexEscapeSequence
    | UnicodeEscapeSequence                   action => _CharacterEscape_UnicodeEscapeSequence
    | IdentityEscape                          action => _CharacterEscape_IdentityEscape

ControlEscape ~
      [fnrtv]

ControlLetter ~
      [a-zA-Z]

#
# What means "SourceCharacterbut not IdentifierPart":
# - first this is a SourceCharacter, i.e. one single (utf8) character
# - IdentifierPart is
#    [\p{IsUnicodeLetter}]
#  | '$'
#  | '_'
#  | '\' _UnicodeEscapeSequence       ELIMINATED: more than one character
#  | [\p{IsUnicodeCombiningMark }]
#  | [\p{IsUnicodeDigit}]
#  | [\p{IsUnicodeConnectorPunctuation}]
#  | _ZWNJ
#  | _ZWJ
#
# So this mean:
#  +IsSourceCharacter
#  -IsUnicodeLetter
#  -'$'
#  -'_'
#  -IsUnicodeCombiningMark
#  -IsUnicodeDigit
#  -IsUnicodeConnectorPunctuation

#IdentityEscape ::=
#      SourceCharacterbut not IdentifierPart
#    | <ZWJ>
#    | <ZWNJ>

IdentityEscape ~
       [\p{IsIdentityEscape}]

DecimalEscape ::= # Lookahead not in decimal digit is automatic
    DecimalIntegerLiteral                           action => _DecimalEscape_DecimalIntegerLiteral

DecimalIntegerLiteral ::= _DecimalIntegerLiteral    action => _DecimalIntegerLiteral

_DecimalIntegerLiteral ~
    '0'
  | _NonZeroDigit
  | _NonZeroDigit __DecimalDigits

__DecimalDigits ~ [\p{IsDecimalDigit}]+
_DecimalDigits  ~ __DecimalDigits
DecimalDigits ::= _DecimalDigits                    action => _DecimalDigits

_NonZeroDigit      ~ [\p{IsNonZeroDigit}]

CharacterClassEscape ::=
      [dDsSwW]                                      action => _CharacterClassEscape

CharacterClass ::=
      '[' ClassRanges ']'                           action => _CharacterClass_ClassRanges
    | '[^' ClassRanges ']'                          action => _CharacterClass_CaretClassRanges

ClassRanges ::=                                     action => _ClassRanges
ClassRanges ::=
  NonemptyClassRanges                               action => _ClassRanges_NonemptyClassRanges

NonemptyClassRanges ::=
      ClassAtom                                     action => _NonemptyClassRanges_ClassAtom
    | ClassAtom NonemptyClassRangesNoDash           action => _NonemptyClassRanges_ClassAtom_NonemptyClassRangesNoDash
    | ClassAtom '-' ClassAtom ClassRanges           action => _NonemptyClassRanges_ClassAtom_ClassAtom_ClassRanges

NonemptyClassRangesNoDash ::=
      ClassAtom                                     action => _NonemptyClassRangesNoDash_ClassAtom
    | ClassAtomNoDash NonemptyClassRangesNoDash     action => _NonemptyClassRangesNoDash_ClassAtomNoDash_NonemptyClassRangesNoDash
    | ClassAtomNoDash '-' ClassAtom ClassRanges     action => _NonemptyClassRangesNoDash_ClassAtomNoDash_ClassAtom_ClassRanges

ClassAtom ::=
      '-'                                           action => _ClassAtom_Dash
    | ClassAtomNoDash                               action => _ClassAtom_ClassAtomNoDash

ClassAtomNoDash ::=
      OneChar                                       action => _ClassAtomNoDash_OneChar
    | '\' ClassEscape                               action => _ClassAtomNoDash_ClassEscape

ClassEscape ::=
      DecimalEscape                                 action => _ClassEscape_DecimalEscape
    | 'b'                                           action => _ClassEscape_b
    | CharacterEscape                               action => _ClassEscape_CharacterEscape
    | CharacterClassEscape                          action => _ClassEscape_CharacterClassEscape

HexEscapeSequence ::= 'x' _HexDigit _HexDigit                         action => _HexEscapeSequence

UnicodeEscapeSequence ::= 'u' _HexDigit _HexDigit _HexDigit _HexDigit action => _UnicodeEscapeSequence

_HexDigit              ~ [\p{IsHexDigit}]

OneChar                ~ [\p{IsSourceCharacterButNotOneOfBackslashOrRbracketOrMinus}]

:lexeme ~ <LPAREN_ATOM_DISJUNCTION> pause => after event => 'LPAREN_ATOM_DISJUNCTION$'
LPAREN_ATOM_DISJUNCTION ~ '('
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)