The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# This software is copyright (c) 2011 by Jeffrey Kegler
# This is free software; you can redistribute it and/or modify it
# under the same terms as the Perl 5 programming language system
# itself.

package Marpa::HTML;

use 5.010;
use strict;
use warnings;

use vars qw( $VERSION $STRING_VERSION );
$VERSION        = '0.112000';
$STRING_VERSION = $VERSION;
{
## no critic (BuiltinFunctions::ProhibitStringyEval)
## no critic (ValuesAndExpressions::RequireConstantVersion)
    $VERSION = eval $VERSION;
}

use Marpa::HTML::Version;

our @EXPORT_OK;
use base qw(Exporter);
BEGIN { @EXPORT_OK = qw(html); }

package Marpa::HTML::Internal;

use Carp;
use HTML::PullParser;
use HTML::Entities qw(decode_entities);
use HTML::Tagset ();

# versions below must be coordinated with
# those required in Build.PL
BEGIN {
    my $using_xs = eval {
        require Marpa::XS::Installed;
        defined $Marpa::XS::Installed::VERSION
            and $Marpa::XS::Installed::VERSION >= $Marpa::HTML::MARPA_XS_VERSION;
    };
    if ($using_xs) {
        require Marpa::XS;
        Marpa::XS->VERSION($Marpa::HTML::MARPA_XS_VERSION);    # double check
        $Marpa::HTML::MARPA_MODULE = 'Marpa::XS';
        no strict 'refs';
        *Marpa::HTML::Recognizer::new = \&Marpa::XS::Recognizer::new;
        *Marpa::HTML::Grammar::new    = \&Marpa::XS::Grammar::new;
    } ## end if ($using_xs)
    else {
        require Marpa::PP;
        Marpa::PP->VERSION($Marpa::HTML::MARPA_PP_VERSION);
        $Marpa::HTML::MARPA_MODULE = 'Marpa::PP';
        no strict 'refs';
        *Marpa::HTML::Recognizer::new = \&Marpa::PP::Recognizer::new;
        *Marpa::HTML::Grammar::new    = \&Marpa::PP::Grammar::new;
    } ## end else [ if ($using_xs) ]
} ## end BEGIN

# use Smart::Comments '-ENV';

### <where> Using smart comments ...

use English qw( -no_match_vars );

use Marpa::HTML::Offset qw(
    :package=Marpa::HTML::Internal::TDesc
    TYPE
    START_TOKEN
    END_TOKEN
);

use Marpa::HTML::Offset qw(
    :package=Marpa::HTML::Internal::TDesc::Element
    TYPE
    START_TOKEN
    END_TOKEN
    VALUE
    NODE_DATA
);

%Marpa::HTML::PULL_PARSER_OPTIONS = (
    start       => q{'S',line,column,offset,offset_end,tagname,attr},
    end         => q{'E',line,column,offset,offset_end,tagname},
    text        => q{'T',line,column,offset,offset_end,is_cdata},
    comment     => q{'C',line,column,offset,offset_end},
    declaration => q{'D',line,column,offset,offset_end},
    process     => q{'PI',line,column,offset,offset_end},

    # options that default on
    unbroken_text => 1,
);

use Marpa::HTML::Offset qw(
    :package=Marpa::HTML::Internal::Token
    TYPE
    LINE
    COL
    =COLUMN
    START_OFFSET
    END_OFFSET
    TAGNAME
    =IS_CDATA
    ATTR
);

use Marpa::HTML::Callback;

sub per_element_handlers {
    my ( $element, $user_handlers ) = @_;
    return {} if not $element;
    return {} if not $user_handlers;
    my $wildcard_handlers    = $user_handlers->{ANY} // {};
    my %handlers             = %{$wildcard_handlers};
    my $per_element_handlers = $user_handlers->{$element} // {};
    @handlers{ keys %{$per_element_handlers} } =
        values %{$per_element_handlers};
    return \%handlers;
} ## end sub per_element_handlers

sub tdesc_list_to_literal {
    my ( $self, $tdesc_list ) = @_;

    my $text     = q{};
    my $document = $self->{document};
    my $tokens   = $self->{tokens};
    TDESC: for my $tdesc ( @{$tdesc_list} ) {
        given ( $tdesc->[Marpa::HTML::Internal::TDesc::TYPE] ) {
            when ('POINT') { break; }
            when ('VALUED_SPAN') {
                if (defined(
                        my $value =
                            $tdesc
                            ->[Marpa::HTML::Internal::TDesc::Element::VALUE]
                    )
                    )
                {
                    $text .= $value;
                    break;    # next TDESC;
                } ## end if ( defined( my $value = $tdesc->[...]))

                # next TDESC if no first token id
                #<<< As of 2009-11-22 perltidy cycles on this code
                break
                    if not defined( my $first_token_id = $tdesc
                        ->[ Marpa::HTML::Internal::TDesc::START_TOKEN ] );
                #>>>

                # next TDESC if no last token id
                #<<< As of 2009-11-22 perltidy cycles on this code
                break
                    if not defined( my $last_token_id =
                        $tdesc->[Marpa::HTML::Internal::TDesc::END_TOKEN] );
                #>>>

                my $offset =
                    $tokens->[$first_token_id]
                    ->[Marpa::HTML::Internal::Token::START_OFFSET];
                my $end_offset =
                    $tokens->[$last_token_id]
                    ->[Marpa::HTML::Internal::Token::END_OFFSET];
                $text .= substr ${$document}, $offset,
                    ( $end_offset - $offset );
            } ## end when ('VALUED_SPAN')
            when ('UNVALUED_SPAN') {
                my $first_token_id =
                    $tdesc->[Marpa::HTML::Internal::TDesc::START_TOKEN];
                my $last_token_id =
                    $tdesc->[Marpa::HTML::Internal::TDesc::END_TOKEN];
                my $offset =
                    $tokens->[$first_token_id]
                    ->[Marpa::HTML::Internal::Token::START_OFFSET];
                my $end_offset =
                    $tokens->[$last_token_id]
                    ->[Marpa::HTML::Internal::Token::END_OFFSET];

                $text .= substr ${$document}, $offset,
                    ( $end_offset - $offset );
            } ## end when ('UNVALUED_SPAN')
            default {
                Carp::croak(qq{Internal error: unknown tdesc type "$_"});
            }
        } ## end given
    } ## end for my $tdesc ( @{$tdesc_list} )
    return \$text;
} ## end sub tdesc_list_to_literal

# Convert a list of text descriptions to text
sub default_top_handler {
    my ( $dummy, @tdesc_lists ) = @_;
    my $self = $Marpa::HTML::Internal::PARSE_INSTANCE;
    my @tdesc_list = map { @{$_} } grep {defined} @tdesc_lists;
    return tdesc_list_to_literal( $self, \@tdesc_list );

} ## end sub default_top_handler

sub wrap_user_top_handler {
    my ($user_handler) = @_;
    return sub {
        my ( $dummy, @tdesc_lists ) = @_;
        my @tdesc_list = map { @{$_} } grep {defined} @tdesc_lists;
        local $Marpa::HTML::Internal::TDESC_LIST = \@tdesc_list;
        local $Marpa::HTML::Internal::PER_NODE_DATA =
            { pseudoclass => 'TOP' };
        return scalar $user_handler->();
    };
} ## end sub wrap_user_top_handler

# Convert a list of text descriptions to a
# single, shortened text description
sub create_tdesc_handler {
    my ( $self, $element ) = @_;
    my $handlers_by_class =
        per_element_handlers( $element,
        ( $self ? $self->{user_handlers_by_class} : {} ) );
    my $handlers_by_id =
        per_element_handlers( $element,
        ( $self ? $self->{user_handlers_by_id} : {} ) );

    return sub {
        my ( $dummy, @tdesc_lists ) = @_;

        my @tdesc_list = map { @{$_} } grep {defined} @tdesc_lists;
        local $Marpa::HTML::Internal::TDESC_LIST = \@tdesc_list;

        my @token_ids = sort { $a <=> $b } grep {defined} map {
            @{$_}[
                Marpa::HTML::Internal::TDesc::START_TOKEN,
                Marpa::HTML::Internal::TDesc::END_TOKEN
                ]
        } @tdesc_list;

        my $first_token_id_in_node = $token_ids[0];
        my $last_token_id_in_node  = $token_ids[-1];
        my $per_node_data          = {
            element        => $element,
            first_token_id => $first_token_id_in_node,
            last_token_id  => $last_token_id_in_node,
        };

        if ( $tdesc_list[0]->[Marpa::HTML::Internal::TDesc::TYPE] ne 'POINT' )
        {
            $per_node_data->{start_tag_token_id} = $first_token_id_in_node;
        }

        if ($tdesc_list[-1]->[Marpa::HTML::Internal::TDesc::TYPE] ne 'POINT' )
        {
            $per_node_data->{end_tag_token_id} = $last_token_id_in_node;
        }

        local $Marpa::HTML::Internal::PER_NODE_DATA = $per_node_data;

        my $self           = $Marpa::HTML::Internal::PARSE_INSTANCE;
        my $trace_fh       = $self->{trace_fh};
        my $trace_handlers = $self->{trace_handlers};

        my $tokens = $self->{tokens};

        my $user_handler;
        GET_USER_HANDLER: {
            if ( my $id = Marpa::HTML::id() ) {
                if ( $user_handler = $handlers_by_id->{$id} ) {
                    if ($trace_handlers) {
                        say {$trace_fh}
                            "Resolved to user handler by element ($element) and id ($id)"
                            or Carp::croak("Cannot print: $ERRNO");
                    }
                    last GET_USER_HANDLER;
                } ## end if ( $user_handler = $handlers_by_id->{$id} )
            } ## end if ( my $id = Marpa::HTML::id() )
            if ( my $class = Marpa::HTML::class() ) {
                if ( $user_handler = $handlers_by_class->{$class} ) {
                    if ($trace_handlers) {
                        say {$trace_fh}
                            "Resolved to user handler by element ($element) and class ($class)"
                            or Carp::croak("Cannot print: $ERRNO");
                    }
                    last GET_USER_HANDLER;
                } ## end if ( $user_handler = $handlers_by_class->{$class} )
            } ## end if ( my $class = Marpa::HTML::class() )
            $user_handler = $handlers_by_class->{ANY};
            if ( $trace_handlers and $user_handler ) {
                say {$trace_fh} +(
                    defined $element
                    ? "Resolved to user handler by element ($element)"
                    : 'Resolved to default user handler'
                ) or Carp::croak("Cannot print: $ERRNO");
            } ## end if ( $trace_handlers and $user_handler )
        } ## end GET_USER_HANDLER:

        if ( defined $user_handler ) {

            # scalar context needed for the user handler
            # because so that a bare return returns undef
            # and not an empty list.
            return [
                [   VALUED_SPAN => $first_token_id_in_node,
                    $last_token_id_in_node, ( scalar $user_handler->() ),
                    $per_node_data
                ]
            ];
        } ## end if ( defined $user_handler )

        my $doc          = $self->{doc};
        my @tdesc_result = ();

        my $first_token_id_in_current_span;
        my $last_token_id_in_current_span;

        TDESC: for my $tdesc ( @tdesc_list, ['FINAL'] ) {

            my $next_tdesc;
            my $first_token_id;
            my $last_token_id;
            PARSE_TDESC: {
                my $ref_type = ref $tdesc;
                if ( not $ref_type or $ref_type ne 'ARRAY' ) {
                    $next_tdesc = $tdesc;
                    last PARSE_TDESC;
                }
                given ( $tdesc->[Marpa::HTML::Internal::TDesc::TYPE] ) {
                    when ('POINT') { break; }
                    when ('VALUED_SPAN') {
                        if (not defined(
                                my $value = $tdesc->[
                                    Marpa::HTML::Internal::TDesc::Element::VALUE
                                ]
                            )
                            )
                        {
                            #<<< As of 2009-11-22 pertidy cycles on this
                            $first_token_id = $tdesc->[
                                Marpa::HTML::Internal::TDesc::START_TOKEN ];
                            $last_token_id =
                                $tdesc
                                ->[ Marpa::HTML::Internal::TDesc::END_TOKEN
                                ];
                            #>>>
                            break;    # last PARSE_TDESC;
                        } ## end if ( not defined( my $value = $tdesc->[ ...]))
                        $next_tdesc = $tdesc;
                    } ## end when ('VALUED_SPAN')
                    when ('FINAL') {
                        $next_tdesc = $tdesc;
                    }
                    when ('UNVALUED_SPAN') {
                        $first_token_id = $tdesc
                            ->[Marpa::HTML::Internal::TDesc::START_TOKEN];
                        $last_token_id =
                            $tdesc->[Marpa::HTML::Internal::TDesc::END_TOKEN];
                    } ## end when ('UNVALUED_SPAN')
                    default {
                        Carp::croak("Unknown text description type: $_");
                    }
                } ## end given
            } ## end PARSE_TDESC:

            if ( defined $first_token_id and defined $last_token_id ) {
                if ( defined $first_token_id_in_current_span ) {
                    if ( $first_token_id
                        <= $last_token_id_in_current_span + 1 )
                    {
                        $last_token_id_in_current_span = $last_token_id;
                        next TDESC;
                    } ## end if ( $first_token_id <= ...)
                    push @tdesc_result,
                        [
                        'UNVALUED_SPAN',
                        $first_token_id_in_current_span,
                        $last_token_id_in_current_span
                        ];
                } ## end if ( defined $first_token_id_in_current_span )
                $first_token_id_in_current_span = $first_token_id;
                $last_token_id_in_current_span  = $last_token_id;
                next TDESC;
            } ## end if ( defined $first_token_id and defined $last_token_id)

            if ( defined $next_tdesc ) {
                if ( defined $first_token_id_in_current_span ) {
                    push @tdesc_result,
                        [
                        'UNVALUED_SPAN',
                        $first_token_id_in_current_span,
                        $last_token_id_in_current_span
                        ];

                    $first_token_id_in_current_span =
                        $last_token_id_in_current_span = undef;
                } ## end if ( defined $first_token_id_in_current_span )
                my $ref_type = ref $next_tdesc;

                last TDESC
                    if $ref_type eq 'ARRAY'
                        and $next_tdesc->[Marpa::HTML::Internal::TDesc::TYPE]
                        eq 'FINAL';
                push @tdesc_result, $next_tdesc;
            } ## end if ( defined $next_tdesc )

        } ## end for my $tdesc ( @tdesc_list, ['FINAL'] )

        return \@tdesc_result;
    };
} ## end sub create_tdesc_handler

sub wrap_user_tdesc_handler {
    my ( $user_handler, $per_node_data ) = @_;

    return sub {
        my ( $dummy, @tdesc_lists ) = @_;
        my @tdesc_list = map { @{$_} } grep {defined} @tdesc_lists;
        local $Marpa::HTML::Internal::TDESC_LIST = \@tdesc_list;
        my @token_ids = sort { $a <=> $b } grep {defined} map {
            @{$_}[
                Marpa::HTML::Internal::TDesc::START_TOKEN,
                Marpa::HTML::Internal::TDesc::END_TOKEN
                ]
        } @tdesc_list;

        my $first_token_id = $token_ids[0];
        my $last_token_id  = $token_ids[-1];
        $per_node_data //= {};
        $per_node_data->{first_token_id} = $first_token_id;
        $per_node_data->{last_token_id}  = $last_token_id;
        local $Marpa::HTML::Internal::PER_NODE_DATA = $per_node_data;

        # scalar context needed for the user handler
        # because so that a bare return returns undef
        # and not an empty list.
        return [
            [   VALUED_SPAN => $first_token_id,
                $last_token_id, ( scalar $user_handler->() ),
                $per_node_data
            ]
        ];

    };
} ## end sub wrap_user_tdesc_handler

sub earleme_to_linecol {
    my ( $self, $token_offset ) = @_;
    my $html_parser_tokens = $self->{tokens};

    # Special start of file for undefined offset
    if ( not defined $token_offset ) {
        return ( 1, 0 );
    }

    # Special case needed for a token offset after the last
    # token.  This happens with the EOF.
    if ( $token_offset < 0 or $token_offset > $#{$html_parser_tokens} ) {
        $token_offset = $#{$html_parser_tokens};
    }

    return @{ $html_parser_tokens->[$token_offset] }[
        Marpa::HTML::Internal::Token::LINE,
        Marpa::HTML::Internal::Token::COLUMN,
    ];

} ## end sub earleme_to_linecol

sub earleme_to_offset {

    my ( $self, $token_offset ) = @_;
    my $html_parser_tokens = $self->{tokens};

    # Special start of file for undefined offset
    if ( not defined $token_offset ) {
        return 0;
    }

    # Special case needed for a token offset after the last
    # token.  This happens with the EOF.
    my $offset;
    if ( $token_offset < 0 or $token_offset > $#{$html_parser_tokens} ) {
        $offset = length ${ $self->{document} };
    }
    else {
        $offset =
            $html_parser_tokens->[$token_offset]
            ->[Marpa::HTML::Internal::Token::END_OFFSET];
    }
    return $offset;

} ## end sub earleme_to_offset

my %ARGS = (
    start       => q{'S',offset,offset_end,tagname,attr},
    end         => q{'E',offset,offset_end,tagname},
    text        => q{'T',offset,offset_end,is_cdata},
    process     => q{'PI',offset,offset_end},
    comment     => q{'C',offset,offset_end},
    declaration => q{'D',offset,offset_end},

    # options that default on
    unbroken_text => 1,
);

sub add_handler {
    my ( $self, $handler_description ) = @_;
    my $ref_type = ref $handler_description || 'not a reference';
    Carp::croak(
        "Long form handler description should be ref to hash, but it is $ref_type"
    ) if $ref_type ne 'HASH';
    my $element     = delete $handler_description->{element};
    my $id          = delete $handler_description->{id};
    my $class       = delete $handler_description->{class};
    my $pseudoclass = delete $handler_description->{pseudoclass};
    my $action      = delete $handler_description->{action};
    Carp::croak(
        'Unknown option(s) in Long form handler description: ',
        ( join q{ }, keys %{$handler_description} )
    ) if scalar keys %{$handler_description};

    Carp::croak('Handler action must be CODE ref')
        if ref $action ne 'CODE';

    $element = ( not $element or $element eq q{*} ) ? 'ANY' : lc $element;
    if ( defined $pseudoclass ) {
        $self->{user_handlers_by_pseudoclass}->{$element}->{$pseudoclass} =
            $action;
        return 1;
    }

    if ( defined $id ) {
        $self->{user_handlers_by_id}->{$element}->{ lc $id } = $action;
        return 1;
    }
    $class = defined $class ? lc $class : 'ANY';
    $self->{user_handlers_by_class}->{$element}->{$class} = $action;
    return 1;
} ## end sub add_handler

sub add_handlers_from_hashes {
    my ( $self, $handler_specs ) = @_;
    my $ref_type = ref $handler_specs || 'not a reference';
    Carp::croak("handlers arg must must be ref to ARRAY, it is $ref_type")
        if $ref_type ne 'ARRAY';
    for my $handler_spec ( keys %{$handler_specs} ) {
        add_handler( $self, $handler_spec );
    }
    return 1;
} ## end sub add_handlers_from_hashes

sub add_handlers {
    my ( $self, $handler_specs ) = @_;
    HANDLER_SPEC: for my $specifier ( keys %{$handler_specs} ) {
        my ( $element, $id, $class, $pseudoclass );
        my $action = $handler_specs->{$specifier};
        ( $element, $id ) = ( $specifier =~ /\A ([^#]*) [#] (.*) \z/xms )
            or ( $element, $class ) =
               ( $specifier =~ /\A ([^.]*) [.] (.*) \z/xms )
            or ( $element, $pseudoclass ) =
            ( $specifier =~ /\A ([^:]*) [:] (.*) \z/xms )
            or $element = $specifier;
        if ($pseudoclass
            and not $pseudoclass ~~ [
                qw(TOP PI DECL COMMENT PROLOG TRAILER WHITESPACE CDATA PCDATA CRUFT)
            ]
            )
        {
            Carp::croak( qq{pseudoclass "$pseudoclass" is not known:\n},
                "Specifier was $specifier\n" );
        } ## end if ( $pseudoclass and not $pseudoclass ~~ [ ...])
        if ( $pseudoclass and $element ) {
            Carp::croak(
                qq{pseudoclass "$pseudoclass" may not have an element specified:\n},
                "Specifier was $specifier\n"
            );
        } ## end if ( $pseudoclass and $element )
        add_handler(
            $self,
            {   element     => $element,
                id          => $id,
                class       => $class,
                pseudoclass => $pseudoclass,
                action      => $action
            }
        );
    } ## end for my $specifier ( keys %{$handler_specs} )

    return 1;
} ## end sub add_handlers

# If we factor this package, this will be the constructor.
## no critic (Subroutines::RequireArgUnpacking)
sub create {

    ## use critic
    my $self = {};
    $self->{trace_fh} = \*STDERR;
    ARG: for my $arg (@_) {
        my $ref_type = ref $arg || 'not a reference';
        if ( $ref_type eq 'HASH' ) {
            Marpa::HTML::Internal::add_handlers( $self, $arg );
            next ARG;
        }
        Carp::croak("Argument must be hash or refs to hash: it is $ref_type")
            if $ref_type ne 'REF';
        my $option_hash = ${$arg};
        $ref_type = ref $option_hash || 'not a reference';
        Carp::croak(
            "Argument must be hash or refs to hash: it is ref to $ref_type")
            if $ref_type ne 'HASH';
        OPTION: for my $option ( keys %{$option_hash} ) {
            if ( $option eq 'handlers' ) {
                add_handlers_from_hashes( $self, $option_hash->{$option} );
            }
            if (not $option ~~ [
                    qw(trace_fh trace_values trace_handlers trace_actions
                        trace_conflicts trace_ambiguity trace_rules trace_QDFA
                        trace_earley_sets trace_terminals trace_cruft)
                ]
                )
            {
                Carp::croak("unknown option: $option");
            } ## end if ( not $option ~~ [ ...])
            $self->{$option} = $option_hash->{$option};
        } ## end for my $option ( keys %{$option_hash} )
    } ## end for my $arg (@_)
    return $self;
} ## end sub create

# block_element is for block-level ONLY elements.
# head is for anything legal inside the HTML header.
# Note that isindex can be both a head element and
# and block level element in the body.
# ISINDEX is classified as a header_element
%Marpa::HTML::Internal::ELEMENT_TYPE = (
    (   map { $_ => 'block_element' }
            qw(
            h1 h2 h3 h4 h5 h6
            ul ol dir menu
            pre
            p dl div center
            noscript noframes
            blockquote form hr
            table fieldset address
            )
    ),
    (   map { $_ => 'header_element' }
            qw(
            script style meta link object title isindex base
            )
    ),
    ( map { $_ => 'list_item_element' } qw( li dd dt ) ),
    ( map { $_ => 'table_cell_element' } qw( td th ) ),
    ( map { $_ => 'table_row_element' } qw( tr ) ),
);

@Marpa::HTML::Internal::CORE_OPTIONAL_TERMINALS = qw(
    E_html
    E_body
    S_table
    E_head
    E_table
    E_tbody
    E_tr
    E_td
    S_td
    S_tr
    S_tbody
    S_head
    S_body
    S_html
);

%Marpa::HTML::Internal::CORE_OPTIONAL_TERMINALS = ();
for my $rank ( 0 .. $#Marpa::HTML::Internal::CORE_OPTIONAL_TERMINALS ) {
    $Marpa::HTML::Internal::CORE_OPTIONAL_TERMINALS{
        $Marpa::HTML::Internal::CORE_OPTIONAL_TERMINALS[$rank] } = $rank;
}

%Marpa::HTML::Internal::VIRTUAL_TOKEN_HIERARCHY = ();
{
    my $hierarchy = <<'END_OF_STRING';
th td
tr
col
caption colgroup tfoot thead tbody
table
body head
html
END_OF_STRING

    my $iota = 0;
    my @hierarchy;
    for my $level ( split /\n/xms, $hierarchy ) {
        push @hierarchy,
            map { ( "S_$_" => $iota, "E_$_" => $iota ) }
            ( split q{ }, $level );
        $iota++;
    } ## end for my $level ( split /\n/xms, $hierarchy )
    %Marpa::HTML::Internal::VIRTUAL_TOKEN_HIERARCHY = @hierarchy;
    $Marpa::HTML::Internal::VIRTUAL_TOKEN_HIERARCHY{EOF} =
        $Marpa::HTML::Internal::VIRTUAL_TOKEN_HIERARCHY{E_tbody};
}

# This display set to be ignored
# until the HTML::Implementation doc
# is ready.

# Marpa::HTML::Display
# name: HTML BNF
# ignore: 1
# start-after-line: END_OF_BNF
# end-before-line: '^END_OF_BNF$'

my $BNF = <<'END_OF_BNF';
cruft ::= CRUFT
comment ::= C
pi ::= PI
decl ::= D
pcdata ::= PCDATA
cdata ::= CDATA
whitespace ::= WHITESPACE
SGML_item ::= comment
SGML_item ::= pi
SGML_item ::= decl
SGML_flow_item ::= SGML_item
SGML_flow_item ::= whitespace
SGML_flow_item ::= cruft
SGML_flow ::= SGML_flow_item*
document ::= prolog ELE_html trailer EOF
prolog ::= SGML_flow
trailer ::= SGML_flow
ELE_html ::= S_html Contents_html E_html
Contents_html ::= SGML_flow ELE_head SGML_flow ELE_body SGML_flow
ELE_head ::= S_head Contents_head E_head
Contents_head ::= head_item*
ELE_body ::= S_body flow E_body
ELE_table ::= S_table table_flow E_table
ELE_tbody ::= S_tbody table_section_flow E_tbody
ELE_tr ::= S_tr table_row_flow E_tr
ELE_td ::= S_td flow E_td
flow ::= flow_item*
flow_item ::= cruft
flow_item ::= SGML_item
flow_item ::= ELE_table
flow_item ::= list_item_element
flow_item ::= header_element
flow_item ::= block_element
flow_item ::= inline_element
flow_item ::= whitespace
flow_item ::= cdata
flow_item ::= pcdata
head_item ::= header_element
head_item ::= cruft
head_item ::= whitespace
head_item ::= SGML_item
inline_flow ::= inline_flow_item*
inline_flow_item ::= pcdata_flow_item
inline_flow_item ::= inline_element
pcdata_flow ::= pcdata_flow_item*
pcdata_flow_item ::= cdata
pcdata_flow_item ::= pcdata
pcdata_flow_item ::= cruft
pcdata_flow_item ::= whitespace
pcdata_flow_item ::= SGML_item
Contents_select ::= select_flow_item*
select_flow_item ::= ELE_optgroup
select_flow_item ::= ELE_option
select_flow_item ::= SGML_flow_item
Contents_optgroup ::= optgroup_flow_item*
optgroup_flow_item ::= ELE_option
optgroup_flow_item ::= SGML_flow_item
list_item_flow ::= list_item_flow_item*
list_item_flow_item ::= cruft
list_item_flow_item ::= SGML_item
list_item_flow_item ::= header_element
list_item_flow_item ::= block_element
list_item_flow_item ::= inline_element
list_item_flow_item ::= whitespace
list_item_flow_item ::= cdata
list_item_flow_item ::= pcdata
Contents_colgroup ::= colgroup_flow_item*
colgroup_flow_item ::= ELE_col
colgroup_flow_item ::= SGML_flow_item
table_row_flow ::= table_row_flow_item*
table_row_flow_item ::= ELE_th
table_row_flow_item ::= ELE_td
table_row_flow_item ::= SGML_flow_item
table_section_flow ::= table_section_flow_item*
table_section_flow_item ::= table_row_element
table_section_flow_item ::= SGML_flow_item
table_row_element ::= ELE_tr
table_flow ::= table_flow_item*
table_flow_item ::= ELE_colgroup
table_flow_item ::= ELE_thead
table_flow_item ::= ELE_tfoot
table_flow_item ::= ELE_tbody
table_flow_item ::= ELE_caption
table_flow_item ::= ELE_col
table_flow_item ::= SGML_flow_item
empty ::=
END_OF_BNF

@Marpa::HTML::Internal::CORE_RULES = ();

my %handler = (
    cruft      => '!CRUFT_handler',
    comment    => '!COMMENT_handler',
    pi         => '!PI_handler',
    decl       => '!DECL_handler',
    document   => '!TOP_handler',
    whitespace => '!WHITESPACE_handler',
    pcdata     => '!PCDATA_handler',
    cdata      => '!CDATA_handler',
    prolog     => '!PROLOG_handler',
    trailer    => '!TRAILER_handler',
);

for my $bnf_production ( split /\n/xms, $BNF ) {
    my $sequence = ( $bnf_production =~ s/ [*] \s* $//xms );
    $bnf_production =~ s/ \s* [:][:][=] \s* / /xms;
    my @symbols         = ( split q{ }, $bnf_production );
    my $lhs             = shift @symbols;
    my %rule_descriptor = (
        lhs => $lhs,
        rhs => \@symbols,
    );
    if ($sequence) {
        $rule_descriptor{min} = 0;
    }
    if ( my $handler = $handler{$lhs} ) {
        $rule_descriptor{action} = $handler;
    }
    elsif ( $lhs =~ /^ELE_/xms ) {
        $rule_descriptor{action} = "!$lhs";
    }
    push @Marpa::HTML::Internal::CORE_RULES, \%rule_descriptor;
} ## end for my $bnf_production ( split /\n/xms, $BNF )

@Marpa::HTML::Internal::CORE_TERMINALS =
    qw(C D PI CRUFT CDATA PCDATA WHITESPACE EOF );

push @Marpa::HTML::Internal::CORE_TERMINALS,
    keys %Marpa::HTML::Internal::CORE_OPTIONAL_TERMINALS;

no strict 'refs';
*{'Marpa::HTML::Internal::default_action'} = create_tdesc_handler();
use strict;

%Marpa::HTML::Internal::EMPTY_ELEMENT = map { $_ => 1 } qw(
    area base basefont br col frame hr
    img input isindex link meta param);

%Marpa::HTML::Internal::CONTENTS = (
    'p'        => 'inline_flow',
    'select'   => 'Contents_select',
    'option'   => 'pcdata_flow',
    'optgroup' => 'Contents_optgroup',
    'dt'       => 'inline_flow',
    'dd'       => 'list_item_flow',
    'li'       => 'list_item_flow',
    'colgroup' => 'Contents_colgroup',
    'thead'    => 'table_section_flow',
    'tfoot'    => 'table_section_flow',
    'tbody'    => 'table_section_flow',
    'table'    => 'table_flow',
    ( map { $_ => 'empty' } keys %Marpa::HTML::Internal::EMPTY_ELEMENT ),
);

sub parse {
    my ( $self, $document_ref ) = @_;

    my %start_tags = ();
    my %end_tags   = ();

    Carp::croak(
        "parse() already run on this object\n",
        'For a new parse, create a new object'
    ) if $self->{document};

    my $trace_cruft     = $self->{trace_cruft};
    my $trace_terminals = $self->{trace_terminals} // 0;
    my $trace_conflicts = $self->{trace_conflicts};
    my $trace_fh        = $self->{trace_fh};
    my $ref_type        = ref $document_ref;
    Carp::croak('Arg to parse() must be ref to string')
        if not $ref_type
            or $ref_type ne 'SCALAR'
            or not defined ${$document_ref};

    my %pull_parser_args;
    my $document = $pull_parser_args{doc} = $self->{document} = $document_ref;
    my $pull_parser =
        HTML::PullParser->new( %pull_parser_args,
        %Marpa::HTML::PULL_PARSER_OPTIONS )
        || Carp::croak('Could not create pull parser');

    my @tokens = ();

    my %terminals = map { $_ => 1 } @Marpa::HTML::Internal::CORE_TERMINALS;
    my %optional_terminals = %Marpa::HTML::Internal::CORE_OPTIONAL_TERMINALS;
    my @html_parser_tokens = ();
    my @marpa_tokens       = ();
    HTML_PARSER_TOKEN:
    while ( my $html_parser_token = $pull_parser->get_token ) {
        my ( $token_type, $line, $column, $offset, $offset_end ) =
            @{$html_parser_token};

        # If it's a virtual token from HTML::Parser,
        # pretend it never existed.
        # We figure out where the missing tags are,
        # and HTML::Parser's guesses are not helpful.
        next HTML_PARSER_TOKEN if $offset_end <= $offset;

        my $token_number = scalar @html_parser_tokens;
        push @html_parser_tokens, $html_parser_token;

        given ($token_type) {
            when ('T') {
                my $is_cdata = $html_parser_token
                    ->[Marpa::HTML::Internal::Token::IS_CDATA];
                push @marpa_tokens,
                    [
                    (   substr(
                            ${$document}, $offset,
                            ( $offset_end - $offset )
                            ) =~ / \A \s* \z /xms ? 'WHITESPACE'
                        : $is_cdata ? 'CDATA'
                        : 'PCDATA'
                    ),
                    [ [ 'UNVALUED_SPAN', $token_number, $token_number ] ],
                    ];
            } ## end when ('T')
            when ('S') {
                my $tag_name = $html_parser_token
                    ->[Marpa::HTML::Internal::Token::TAGNAME];
                $start_tags{$tag_name}++;
                my $terminal = "S_$tag_name";
                $terminals{$terminal}++;
                push @marpa_tokens,
                    [
                    $terminal,
                    [ [ 'UNVALUED_SPAN', $token_number, $token_number ] ],
                    ];
            } ## end when ('S')
            when ('E') {
                my $tag_name = $html_parser_token
                    ->[Marpa::HTML::Internal::Token::TAGNAME];
                $end_tags{$tag_name}++;
                my $terminal = "E_$tag_name";
                $terminals{$terminal}++;
                push @marpa_tokens,
                    [
                    $terminal,
                    [ [ 'UNVALUED_SPAN', $token_number, $token_number ] ],
                    ];
            } ## end when ('E')
            when ( [qw(C D)] ) {
                push @marpa_tokens,
                    [
                    $_, [ [ 'UNVALUED_SPAN', $token_number, $token_number ] ],
                    ];
            } ## end when ( [qw(C D)] )
            when ( ['PI'] ) {
                push @marpa_tokens,
                    [
                    $_, [ [ 'UNVALUED_SPAN', $token_number, $token_number ] ],
                    ];
            } ## end when ( ['PI'] )
            default { Carp::croak("Unprovided-for event: $_") }
        } ## end given
    } ## end while ( my $html_parser_token = $pull_parser->get_token)

    # Points AFTER the last HTML
    # Parser token.
    # The other logic needs to be ready for this.
    push @marpa_tokens, [ 'EOF', [ ['POINT'] ] ];

    $pull_parser = undef;    # conserve memory

    my @rules     = @Marpa::HTML::Internal::CORE_RULES;
    my @terminals = keys %terminals;

    my %pseudoclass_element_actions = ();
    my %element_actions             = ();

    # Special cases which are dealt with elsewhere.
    # As of now the only special cases are elements with optional
    # start and end tags
    for my $special_element (qw(html head body table tbody tr td)) {
        delete $start_tags{$special_element};
        $element_actions{"!ELE_$special_element"} = $special_element;
    }

    ELEMENT: for ( keys %start_tags ) {
        my $start_tag    = "S_$_";
        my $end_tag      = "E_$_";
        my $contents     = $Marpa::HTML::Internal::CONTENTS{$_} // 'flow';
        my $element_type = $Marpa::HTML::Internal::ELEMENT_TYPE{$_}
            // 'inline_element';

        push @rules,
            {
            lhs => $element_type,
            rhs => ["ELE_$_"],
            },
            {
            lhs    => "ELE_$_",
            rhs    => [ $start_tag, $contents, $end_tag ],
            action => "!ELE_$_",
            };

        # There may be no
        # end tag in the input.
        # This silences the warning.
        if ( not $terminals{$end_tag} ) {
            push @terminals, $end_tag;
            $terminals{$end_tag}++;
        }

        # Make each new optional terminal the highest ranking
        $optional_terminals{$end_tag} = keys %optional_terminals;

        $element_actions{"!ELE_$_"} = $_;
    } ## end for ( keys %start_tags )

    # The question is where to put cruft -- in the current element,
    # or at a higher level.  As a first step, we set up a system of
    # levels for specific elements, going from the lowest, where no
    # cruft is allowed, to the highest, where everything is
    # acceptable as cruft, if only because it has nowhere else to go.

    # First step, set up the levels, using specific elements.
    # Some of these elements will are stand-ins for large category.
    # For example, the HR element stands in for those elements
    # such as empty elements,
    # which tolerate zero cruft, while SPAN stands in for
    # inline elements and DIV stands in for the class of
    # block-level elements

    my %ok_as_cruft = ();
    DECIDE_CRUFT_TREATMENT: {
        my %level             = ();
        my @elements_by_level = (
            [qw( HR HEAD )],
            [qw( SPAN OPTION )],
            [qw( LI OPTGROUP DD DT )],
            [qw( DIR MENU )],
            [qw( DIV )],
            [qw( UL OL DL )],
            [qw( TH TD )],
            [qw( TR )],
            [qw( COL )],
            [qw( CAPTION COLGROUP THEAD TFOOT TBODY )],
            [qw( TABLE )],
            [qw( BODY )],
            [qw( HTML )],
        );

        # EOF comes after everything -- it is
        # the highest level of all
        $level{EOF} = scalar @elements_by_level;

        # Assign levels to the end tags of the elements
        # in the above table.
        for my $level ( 0 .. $#elements_by_level ) {
            for my $element ( @{ $elements_by_level[$level] } ) {
                $level{ 'S_' . lc $element } = $level{ 'E_' . lc $element } =
                    $level;
            }
        } ## end for my $level ( 0 .. $#elements_by_level )

        my $no_cruft_allowed = $level{E_hr};
        my $block_level      = $level{E_div};
        my $inline_level     = $level{E_span};

        # Now that we have set out the structure of levels
        # fill it in for all the terminals we have yet to
        # define.
        TERMINAL:
        for my $terminal ( grep { not defined $level{$_} }
            ( @terminals, keys %optional_terminals ) )
        {

            # With the exception of EOF,
            # only tags can have levels because only they really
            # tell us anyting about "state" --
            # whether we are awaiting something
            # or are inside something.
            if ( $terminal !~ /^[SE]_/xms ) {
                $level{$terminal} = $no_cruft_allowed;
                next TERMINAL;
            }
            my $element = substr $terminal, 2;
            if ( $Marpa::HTML::Internal::EMPTY_ELEMENT{$element} ) {
                $level{$terminal} = $no_cruft_allowed;
                next TERMINAL;
            }

            my $element_type = $Marpa::HTML::Internal::ELEMENT_TYPE{$element};
            if ( defined $element_type
                and $element_type ~~ [qw(block_element header_element)] )
            {
                $level{$terminal} = $block_level;
                next TERMINAL;
            } ## end if ( defined $element_type and $element_type ~~ [...])

            $level{$terminal} = $inline_level;

        } ## end for my $terminal ( grep { not defined $level{$_} } ( ...))

        EXPECTED_TERMINAL:
        for my $expected_terminal ( keys %optional_terminals ) {

            # Regardless of levels, allow no cruft before a start tag.
            # Start whatever it is, then deal with the cruft.
            next EXPECTED_TERMINAL if $expected_terminal =~ /^S_/xms;

            # For end tags, use the levels
            TERMINAL: for my $actual_terminal (@terminals) {
                $ok_as_cruft{$expected_terminal}{$actual_terminal} =
                    $level{$actual_terminal} < $level{$expected_terminal};
            }
        } ## end for my $expected_terminal ( keys %optional_terminals )

    } ## end DECIDE_CRUFT_TREATMENT:

    my $grammar = Marpa::HTML::Grammar->new(
        {   rules           => \@rules,
            start           => 'document',
            terminals       => \@terminals,
            inaccessible_ok => 1,
            unproductive_ok => 1,
            default_action  => 'Marpa::HTML::Internal::default_action',
            strip           => 0,
        }
    );
    $grammar->precompute();

    if ( $self->{trace_rules} ) {
        say {$trace_fh} $grammar->show_rules()
            or Carp::croak("Cannot print: $ERRNO");
    }
    if ( $self->{trace_QDFA} ) {
        say {$trace_fh} $grammar->show_QDFA()
            or Carp::croak("Cannot print: $ERRNO");
    }

    my $recce = Marpa::HTML::Recognizer->new(
        {   grammar           => $grammar,
            trace_terminals   => $self->{trace_terminals},
            trace_earley_sets => $self->{trace_earley_sets},
            mode              => 'stream',
        }
    );

    $self->{recce}  = $recce;
    $self->{tokens} = \@html_parser_tokens;

    # These variables track virtual start tokens as
    # a protection against infinite loops.
    my %start_virtuals_used           = ();
    my $earleme_of_last_start_virtual = -1;

    my $marpa_token = shift @marpa_tokens;
    RECCE_RESPONSE: while ( defined $marpa_token ) {

        my $read_result = $recce->read( @{$marpa_token} );
        if ( defined $read_result ) {
            $marpa_token = shift @marpa_tokens;
            next RECCE_RESPONSE;
        }

        my $actual_terminal = $marpa_token->[0];
        if ($trace_terminals) {
            say {$trace_fh} 'Literal Token not accepted: ', $actual_terminal
                or Carp::croak("Cannot print: $ERRNO");
        }

        my $virtual_token_to_add;

        FIND_VIRTUAL_TOKEN: {
            my $virtual_terminal;
            my @virtuals_expected =
                sort { $optional_terminals{$a} <=> $optional_terminals{$b} }
                grep { defined $optional_terminals{$_} }
                @{ $recce->terminals_expected() };
            if ($trace_conflicts) {
                say {$trace_fh} 'Conflict of virtual choices'
                    or Carp::croak("Cannot print: $ERRNO");
                say {$trace_fh} "Actual Token is $actual_terminal"
                    or Carp::croak("Cannot print: $ERRNO");
                say {$trace_fh} +( scalar @virtuals_expected ),
                    ' virtual terminals expected: ', join q{ },
                    @virtuals_expected
                    or Carp::croak("Cannot print: $ERRNO");
            } ## end if ($trace_conflicts)

            LOOKAHEAD_VIRTUAL_TERMINAL:
            while ( my $candidate = pop @virtuals_expected ) {

                # Start an implied table only if the next token is one which
                # can only occur inside a table
                if ( $candidate eq 'S_table' ) {
                    if (not $actual_terminal ~~ [
                            qw(
                                S_caption S_col S_colgroup S_thead S_tfoot
                                S_tbody S_tr S_th S_td
                                E_caption E_col E_colgroup E_thead E_tfoot
                                E_tbody E_tr E_th E_td
                                E_table
                                )
                        ]
                        )
                    {
                        next LOOKAHEAD_VIRTUAL_TERMINAL;
                    } ## end if ( not $actual_terminal ~~ [ qw(...)])

                    # The above test implies the others below, so
                    # this virtual table start terminal is OK.
                    $virtual_terminal = $candidate;
                    last LOOKAHEAD_VIRTUAL_TERMINAL;
                } ## end if ( $candidate eq 'S_table' )

                # For other than <table>, we are permissive.
                # Unless the lookahead gives us
                # a specific reason to
                # reject the virtual terminal, we accept it.

                # No need to check lookahead, unless we are starting
                # an element
                if ( $candidate !~ /^S_/xms ) {
                    $virtual_terminal = $candidate;
                    last LOOKAHEAD_VIRTUAL_TERMINAL;
                }

#<<< no perltidy cycles as of 12 Mar 2010

                my $candidate_level =
                    $Marpa::HTML::Internal::VIRTUAL_TOKEN_HIERARCHY{
                    $candidate };

#>>>
                # If the candidate is not part of the hierarchy, no need to check
                # lookahead
                if ( not defined $candidate_level ) {
                    $virtual_terminal = $candidate;
                    last LOOKAHEAD_VIRTUAL_TERMINAL;
                }

                my $actual_terminal_level =
                    $Marpa::HTML::Internal::VIRTUAL_TOKEN_HIERARCHY{
                    $actual_terminal};

                # If the actual terminal is not part of the hierarchy, no need to check
                # lookahead, either
                if ( not defined $actual_terminal_level ) {
                    $virtual_terminal = $candidate;
                    last LOOKAHEAD_VIRTUAL_TERMINAL;
                }

                # Here we are trying to deal with a higher-level element's
                # start or end, by starting a new lower level element.
                # This won't work, because we'll have to close it
                # immediately with another virtual terminal.
                # At best this means useless, empty elements.
                # At worst, it means an infinite loop where
                # empty lower-level elements are repeatedly added.
                #
                next LOOKAHEAD_VIRTUAL_TERMINAL
                    if $candidate_level <= $actual_terminal_level;

                $virtual_terminal = $candidate;
                last LOOKAHEAD_VIRTUAL_TERMINAL;

            } ## end while ( my $candidate = pop @virtuals_expected )

            if ($trace_terminals) {
                say {$trace_fh} 'Converting Token: ', $actual_terminal
                    or Carp::croak("Cannot print: $ERRNO");
                if ( defined $virtual_terminal ) {
                    say {$trace_fh} 'Candidate as Virtual Token: ',
                        $virtual_terminal
                        or Carp::croak("Cannot print: $ERRNO");
                }
            } ## end if ($trace_terminals)

            # Depending on the expected (optional or virtual)
            # terminal and the actual
            # terminal, we either want to add the actual one as cruft, or add
            # the virtual one to move on in the parse.

            if ( $trace_terminals > 1 and defined $virtual_terminal ) {
                say {$trace_fh}
                    "OK as cruft when expecting $virtual_terminal: ",
                    join q{ }, keys %{ $ok_as_cruft{$virtual_terminal} }
                    or Carp::croak("Cannot print: $ERRNO");
            } ## end if ( $trace_terminals > 1 and defined $virtual_terminal)

            last FIND_VIRTUAL_TOKEN if not defined $virtual_terminal;
            last FIND_VIRTUAL_TOKEN
                if $ok_as_cruft{$virtual_terminal}{$actual_terminal};

            CHECK_FOR_INFINITE_LOOP: {

                # It is sufficient to check for start tags.
                # Just ending things will never cause an infinite loop.
                last CHECK_FOR_INFINITE_LOOP if $virtual_terminal !~ /^S_/xms;

                # Are we at the same earleme as we were when the last
                # virtual start was added?  If not, no problem.
                # But we need to reinitialize.
                my $current_earleme = $recce->current_earleme();
                if ( $current_earleme != $earleme_of_last_start_virtual ) {
                    $earleme_of_last_start_virtual = $current_earleme;
                    %start_virtuals_used           = ();
                    last CHECK_FOR_INFINITE_LOOP;
                }

                # Is this the first time we've added this start
                # terminal?  If so, we're OK.
                last CHECK_FOR_INFINITE_LOOP
                    if $start_virtuals_used{$virtual_terminal}++ <= 1;

                # Attempt to add duplicate.
                # Give up on adding virtual at this location,
                # and warn the user.
                ( my $tagname = $virtual_terminal ) =~ s/^S_//xms;
                say {$trace_fh}
                    "Warning: attempt to add <$tagname> twice at the same place"
                    or Carp::croak("Cannot print: $ERRNO");
                last FIND_VIRTUAL_TOKEN;

            } ## end CHECK_FOR_INFINITE_LOOP:

            my $tdesc_list = $marpa_token->[1];
            my $first_tdesc_start_token =
                $tdesc_list->[0]->[Marpa::HTML::Internal::TDesc::START_TOKEN];
            $virtual_token_to_add = [
                $virtual_terminal, [ [ 'POINT', $first_tdesc_start_token ] ]
            ];

        } ## end FIND_VIRTUAL_TOKEN:

        if ( defined $virtual_token_to_add ) {
            $recce->read( @{$virtual_token_to_add} );
            next RECCE_RESPONSE;
        }

        # If we didn't find a token to add, add the
        # current physical token as CRUFT.

        if ($trace_terminals) {
            say {$trace_fh} 'Adding actual token as cruft: ', $actual_terminal
                or Carp::croak("Cannot print: $ERRNO");
        }

        # Cruft tokens are not virtual.
        # They are the real things, hacked up.
        $marpa_token->[0] = 'CRUFT';
        if ($trace_cruft) {
            my ( $line, $col ) =
                earleme_to_linecol( $self, $recce->current_earleme() );

            # HTML::Parser uses one-based line numbers,
            # but zero-based column numbers
            # The convention (in vi and cut) is that
            # columns are also one-based.
            $col++;

            say {$trace_fh} qq{Cruft at line $line, column $col: "},
                ${ tdesc_list_to_literal( $self, $marpa_token->[1] ) }, q{"}
                or Carp::croak("Cannot print: $ERRNO");
        } ## end if ($trace_cruft)

    } ## end while ( defined $marpa_token )

    if ($trace_terminals) {
        say {$trace_fh} 'at end of tokens'
            or Carp::croak("Cannot print: $ERRNO");
    }

    $recce->end_input();

    my %closure = ();
    {
        my $user_top_handler =
            $self->{user_handlers_by_pseudoclass}->{ANY}->{TOP};
        $closure{'!TOP_handler'} =
            defined $user_top_handler
            ? wrap_user_top_handler($user_top_handler)
            : \&Marpa::HTML::Internal::default_top_handler;
    } ## end if ( defined( my $user_top_handler = $self->{...}))

    if ( defined $self->{user_handlers_by_class}->{ANY}->{ANY} ) {
        $closure{'!DEFAULT_ELE_handler'} =
            $self->{user_handlers_by_class}->{ANY}->{ANY};
    }

    PSEUDO_CLASS:
    for my $pseudoclass (
        qw(PI DECL COMMENT PROLOG TRAILER WHITESPACE CDATA PCDATA CRUFT))
    {
        my $pseudoclass_action =
            $self->{user_handlers_by_pseudoclass}->{ANY}->{$pseudoclass};
        my $pseudoclass_action_name = "!$pseudoclass" . '_handler';
        if ($pseudoclass_action) {
            $closure{$pseudoclass_action_name} =
                wrap_user_tdesc_handler( $pseudoclass_action,
                { pseudoclass => $pseudoclass } );
            next PSEUDO_CLASS;
        } ## end if ($pseudoclass_action)
        $closure{$pseudoclass_action_name} =
            \&Marpa::HTML::Internal::default_action;
    } ## end for my $pseudoclass (...)

    while ( my ( $element_action, $element ) = each %element_actions ) {
        $closure{$element_action} = create_tdesc_handler( $self, $element );
    }

    ELEMENT_ACTION:
    while ( my ( $element_action, $data ) =
        each %pseudoclass_element_actions )
    {

        # As of now, there are
        # no per-element pseudo-classes, and since I can't regression test
        # this logic any more, I'm commenting it out.
        Carp::croak('per-element pseudo-classes not implemented');

        # my ( $pseudoclass, $element ) = @{$data};
        # my $pseudoclass_action =
        #    $self->{user_handlers_by_pseudoclass}->{$element}
        #    ->{$pseudoclass}
        #    // $self->{user_handlers_by_pseudoclass}->{ANY}->{$pseudoclass};
        # if ( defined $pseudoclass_action ) {
        #    $pseudoclass_action =
        #        wrap_user_tdesc_handler($pseudoclass_action);
        # }
        # $pseudoclass_action //= \&Marpa::HTML::Internal::default_action;
        # $closure{$element_action} = $pseudoclass_action;
    } ## end while ( my ( $element_action, $data ) = each ...)

    my $value = do {
        local $Marpa::HTML::Internal::PARSE_INSTANCE = $self;
        local $Marpa::HTML::INSTANCE                 = {};
        $recce->value(
            {   trace_values  => $self->{trace_values},
                trace_actions => $self->{trace_actions},
                closures      => \%closure,
            }
        );
    };
    Carp::croak('No parse: evaler returned undef') if not defined $value;
    return ${$value};

} ## end sub parse

sub Marpa::HTML::html {
    my ( $document_ref, @args ) = @_;
    my $html = Marpa::HTML::Internal::create(@args);
    return Marpa::HTML::Internal::parse( $html, $document_ref );
}

1;