The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl
# Copyright 2014 Jeffrey Kegler
# This file is part of Marpa::R2.  Marpa::R2 is free software: you can
# redistribute it and/or modify it under the terms of the GNU Lesser
# General Public License as published by the Free Software Foundation,
# either version 3 of the License, or (at your option) any later version.
#
# Marpa::R2 is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser
# General Public License along with Marpa::R2.  If not, see
# http://www.gnu.org/licenses/.

use 5.010;
use strict;
use warnings;
use English qw( -no_match_vars );
use Marpa::R2::HTML;
use List::Util qw(sum);
use Fatal qw(open close);
use Getopt::Long;

sub usage {
    say {*STDERR} "$PROGRAM_NAME html_score [uri|file]" or die "say failed: $ERRNO";
    exit 1;
}

my $html_flag = 0;
my $help_flag = 0;
my $dump_config_flag     = 0;
my $dump_AHFA_flag       = 0;
my $trace_terminals_flag = 0;
my $trace_cruft_flag     = 0;
my $trace_values_flag    = 0;
my $compile_flag;
usage() unless GetOptions(
    'html' => \$html_flag,
    'help' => \$help_flag,

    # undocumented
    'dump-config'     => \$dump_config_flag,
    'dump-AHFA'       => \$dump_AHFA_flag,
    'compile=s'       => \$compile_flag,
    'trace-terminals' => \$trace_terminals_flag,
    'trace-cruft'     => \$trace_cruft_flag,
    'trace-values'    => \$trace_values_flag,
);
usage() unless $help_flag or 1 >= scalar @ARGV;

my $locator = shift;
my $document;
GET_DOCUMENT: {
    if ( not defined $locator ) {
        $locator = 'STDIN';    # for possible display, later
        local $RS = undef;
        ## no critic(InputOutput::ProhibitExplicitStdin)
        $document = <STDIN>;
        last GET_DOCUMENT;
    } ## end if ( not defined $locator )
    if ( $locator =~ /\A [[:alnum:]]+ [:] /xms ) {
        require WWW::Mechanize;
        my $mech = WWW::Mechanize->new( autocheck => 1 );
        $mech->get($locator);
        $document = $mech->content;
        undef $mech;
        last GET_DOCUMENT;
    } ## end if ( $locator =~ /\A [[:alnum:]]+ [:] /xms )
    {
        local $RS = undef;
        open my $fh, q{<}, $locator;
        $document = <$fh>;
        close $fh;
    }
} ## end GET_DOCUMENT:

sub calculate_max_depths {
    my ($descendant_data) = @_;
    my %return_depths = ( ANY => 0 );
    for my $child_value ( grep { ref $_ } map { $_->[0] } @{$descendant_data} ) {
        my $depths = $child_value->{depths};
        CHILD_TAGNAME: for my $child_tagname ( keys %{$depths} ) {
            my $depth = $depths->{$child_tagname};
            if ( $depth > ( $return_depths{$child_tagname} // 0 ) ) {
                $return_depths{$child_tagname} = $depth;
            }
            if ( $depth > $return_depths{ANY} ) {
                $return_depths{ANY} = $depth;
            }
        } ## end for my $child_tagname ( keys %{$depths} )
    } ## end for my $child_value ( grep { ref $_ } map { $_->[0] }...)
    return \%return_depths;
} ## end sub calculate_max_depths

sub calculate_length {
    my ($descendant_data) = @_;
    my $length = 0;
    for my $descendant_data ( @{$descendant_data} ) {
        my ( $value, $literal ) = @{$descendant_data};
        my $this_length;
        if (defined $value) {
           $this_length = $value->{length};
        } else {
           (my $no_whitespace_literal = $literal) =~ s/\s//xmsg;
          $this_length = length $no_whitespace_literal;
        }
        $length += $this_length;
    } ## end for my $descendant_data ( @{$descendant_data} )
    return $length;
} ## end sub calculate_length

my %flags = (
    trace_terminals => $trace_terminals_flag,
    trace_cruft     => $trace_cruft_flag,
    trace_values    => $trace_values_flag,
    dump_config     => $dump_config_flag,
    dump_AHFA     => $dump_AHFA_flag,
);
if (defined $compile_flag) {
  open my $fh, q{<}, $compile_flag;
  my $source = join q{}, <$fh>;
  close $fh;
  $flags{compile} = \$source;
}

my ( $instance, $value ) = @{
    Marpa::R2::HTML::html(
        \$document,
        {   ':COMMENT' => sub { return { depths => {}, length => 0 } },
            q{*}       => sub {
                my $descendant_data =
                    Marpa::R2::HTML::descendants('value,literal');
                my $tagname = Marpa::R2::HTML::tagname();
                my $length  = calculate_length($descendant_data);
                $Marpa::R2::HTML::INSTANCE->{count}->{$tagname}++;
                $Marpa::R2::HTML::INSTANCE->{length}->{$tagname} += $length;
                my $return_depths = calculate_max_depths($descendant_data);
                ( $return_depths->{$tagname} //= 0 )++;
                $return_depths->{ANY}++;
                return {
                    depths => $return_depths,
                    length => $length,
                };
            },
            ':CRUFT' => sub {
                my $descendant_data =
                    Marpa::R2::HTML::descendants('value,literal');
                my $return_depths = { '[CRUFT]' => 1 };
                my $length = calculate_length($descendant_data);
                $Marpa::R2::HTML::INSTANCE->{count}->{'[CRUFT]'}++;
                $Marpa::R2::HTML::INSTANCE->{length}->{'[CRUFT]'} += $length;
                return {
                    depths => $return_depths,
                    length => $length,
                };
            },
            ':TOP' => sub {
                my $descendant_data =
                    Marpa::R2::HTML::descendants('value,literal');
                return [
                    $Marpa::R2::HTML::INSTANCE,
                    {   depths => calculate_max_depths($descendant_data),
                        length => calculate_length($descendant_data),
                    },
                ];
            },
        },
        \\%flags
    ),
    };

my $length_by_element = $instance->{length};
my $count_by_element = $instance->{count};
my $html_length = $length_by_element->{html};
my $total_lengths = List::Util::sum values %{ $length_by_element };
my $complexity = 0;
if ( $html_length >= 1 ) {
    $complexity = sprintf "%.3f",
        ( $total_lengths / ( $html_length * log($html_length) ) );
}
my $max_depths = $value->{depths};
my $max_element_depth = $max_depths->{ANY};
delete $max_depths->{ANY};

if ($html_flag) {
print qq{<table cellpadding="3" border="1">}
    . qq{<thead>\n}
    . qq{<tr><th colspan="5">$locator</tr>\n}
    . qq{<tr><th colspan="5">Complexity Score = $complexity</tr>\n}
    . qq{<tr><th colspan="5">Maximum Depth = $max_element_depth</tr>\n}
    . qq{<tr>}
    . qq{<th>Element}
    . qq{<th>Maximum<br>Nesting}
    . qq{<th>Number of<br>Elements}
    . qq{<th>Size in<br>Characters</th>}
    . qq{<th>Average<br>Size</th>}
    . qq{</tr>\n}
    . qq{</thead>\n};
} else {
    say $locator;
    say "Complexity Score = ", $complexity;
    say "Maximum Depth = ", $max_element_depth;
    printf "%11s%11s%11s%11s%11s\n", q{}, 'Maximum ', 'Number of', 'Size in  ', 'Average';
    printf "%11s%11s%11s%11s%11s\n", 'Element ', 'Nesting ', 'Elements ', 'Characters', 'Size  ';
}

for my $element ( sort keys %{$max_depths} ) {
    my $count = $count_by_element->{$element};
    my $size  = $length_by_element->{$element};
    my $average = $count ? int( $size / $count ) : q{-};
    if ($html_flag) {
    print join q{},
        q{<tr>},
        qq{<td>$element</td>},
        q{<td align="right">}, $max_depths->{$element}, q{</td>},
        qq{<td align="right">$count</td>},
        qq{<td align="right">$size</td>},
        qq{<td align="right">$average</td>},
        "</tr>\n";
    } else {
        printf "%-11s%11d%11d%11d%11d\n", $element, $max_depths->{$element}, $count, $size, $average;
    }
} ## end for my $element ( sort keys %{$max_depths} )

$html_flag and print qq{</table>\n};

exit 0;

# vim: set expandtab shiftwidth=4: