The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/env perl
# Marpa::R3 is Copyright (C) 2018, Jeffrey Kegler.
#
# This module is free software; you can redistribute it and/or modify it
# under the same terms as Perl 5.10.1. For more details, see the full text
# of the licenses in the directory LICENSES.
#
# This program is distributed in the hope that it will be
# useful, but it is provided "as is" and without any express
# or implied warranties. For details, see the full text of
# of the licenses in the directory LICENSES.

# This utility searches for mismatched braces --
# curly, square and round.

use 5.010001;

use strict;
use warnings;
use Marpa::R3;
use Data::Dumper;
use English qw( -no_match_vars );
use Getopt::Long ();
use POSIX qw(setlocale LC_ALL);

POSIX::setlocale(LC_ALL, "C");

use Test::More;

sub usage {
    die "Usage: $PROGRAM_NAME < file\n",
        "For testing: $PROGRAM_NAME --test\n";
}

our $TESTING = 1;
my $verbose = 0;
usage()
    if
    not Getopt::Long::GetOptions( verbose => \$verbose, 'test!' => \$TESTING );
usage() if @ARGV;

Test::More::plan tests => 5 if $TESTING;

my $dsl = << '=== GRAMMAR ===';
lexeme default = action => [ name, value ] # to add token names to ast

text ::= pieces
pieces ::= piece*
piece ::= filler | balanced

balanced ::= 
    lparen pieces rparen
  | lcurly pieces rcurly
  | lsquare pieces rsquare

# x5b is left square bracket
# x5d is right square bracket
filler ~ [^(){}\x5B\x5D]+

lparen ~ '('
rparen ~ ')'
lcurly ~ '{'
rcurly ~ '}'
lsquare ~ '['
rsquare ~ ']'

=== GRAMMAR ===

my %literal_match = ();
for my $pair (qw= () [] {} =) {
    my ( $left, $right ) = split //xms, $pair;
    $literal_match{$left}  = $right;
    $literal_match{$right} = $left;
}
my %closing_char_by_name = (
    rcurly  => '}',
    rsquare => ']',
    rparen  => ')',
);

my $g = Marpa::R3::Grammar->new(
    {
        source => \($dsl),
        ## Ask Marpa to generate an event on rejection
        rejection => 'event',
    }
);

my @tests = ();

if ($TESTING) {
    @tests = (
        [ 'z}ab)({[]})))(([]))zz',                   '1{ 4( 11( 12(' ],
        [ '9\090]{[][][9]89]8[][]90]{[]\{}{}09[]}[', '5[ 16} 16[ 24[ 39]' ],
        [ '([]([])([]([]',                           '13) 13) 13)' ],
        [ '([([([([',    '8] 8) 8] 8) 8] 8) 8] 8)' ],
        [ '({]-[(}-[{)', '2} 2) 2[ 6) 6] 6{ 10} 10] 10(' ],
    );
    for my $test (@tests) {
        my ( $string, $expected_result ) = @{$test};
        my $fixes = q{};
        test( $g, $string, \$fixes );
        diagnostic( "Input: ", substr( $string, 0, 60 ) ) if $verbose;
        my $description = qq{Result of "} . ( substr $string, 0, 60 ) . q{"};
        Test::More::is( $fixes, $expected_result, $description );
    } ## end for my $test (@tests)
} ## end if ($TESTING)
else {
    local $RS = undef;
    my $input = <>;
    my $actual_result = test( $g, $input );
    if ( not scalar @{$actual_result} ) {
        say '=== All brackets balanced ===';
    }
    else {
        my $divider = "\n" . ( '=' x 20 ) . "\n";
        say join $divider, @{$actual_result};
    }
} ## end else [ if ($TESTING) ]

sub diagnostic {
    if ($TESTING) {
        Test::More::diag(@_);
    }
    else {
        say {*STDERR} @_;
    }
} ## end sub diagnostic

sub marked_line {
    my ( $string, $column1, $column2 ) = @_;
    my $max_line_length = 60;
    $max_line_length = $column1 if $column1 > $max_line_length;
    $max_line_length = $column2
        if defined $column2 and $column2 > $max_line_length;

    # $pos_column is always the last of the two columns
    my $output_line = substr $string, 0, $max_line_length;
    my $nl_pos = index $output_line, "\n";
    $output_line = substr $output_line, 0, $nl_pos;
    my $pointer_line = ( q{ } x $column1 ) . q{^};
    if ( defined $column2 and $column2 > $column1 ) {
        my $further_offset = $column2 - $column1;
        $pointer_line .= ( q{ } x ( $further_offset - 1 ) ) . q{^};
    }
    return join "\n", $output_line, $pointer_line;
} ## end sub marked_line

sub test {
    my ( $grammar, $string, $fixes ) = @_;
    my @problems = ();
    my @fixes    = ();
    diagnostic( "Input: ", substr( $string, 0, 60 ) ) if $verbose;

    # state $recce_debug_args = { trace_terminals => 1, trace_values => 1 };
    state $recce_debug_args = {};

    my $rejection_is_fatal = undef;
    my $stalled            = undef;

    my $recce = Marpa::R3::Recognizer->new(
        {
            grammar        => $grammar,
            event_handlers => {
                "'rejected" => sub () {
                    die "Rejection at end of string"
                      if $rejection_is_fatal;
                    $stalled = 1;
                    'pause';
                }
            }
        },
        $recce_debug_args
    );

    my $main_block = $recce->block_new( \$string );
    my $pos        = 0;

    my %blk_by_bracket = ();
    for my $char ( keys %literal_match ) {
        $blk_by_bracket{$char} = $recce->block_new( \$char );
    }

    # I define a local closure for each of the main cases.
    # After they are defined, the main loop will call them

    # Local closure to
    # deal with the main case --
    # that where we want to read more input from the
    # main block
    # Returns Perl true on success,
    # Perl false if the case is inapplicable
    my $main_block_read = sub {
        return if $stalled;
        return if $pos >= length $string;
        $rejection_is_fatal = undef;
        $recce->block_set($main_block);
        $recce->block_move( $pos, -1 );
        $pos = $recce->block_read();
        return 1;
    };

    # Local closure to
    # deal with the case of a missing close bracket
    #
    # Returns Perl true on success,
    # Perl false and undef if the case is inapplicable
    # Perl false and an error message on failure
    my $missing_close_bracket_handle = sub {
        # Find, at random, one of the expected tokens that is a closing bracket.
        # (There should be only one.)
        my ($token_literal) =
          grep { defined }
          map  { $closing_char_by_name{$_} } @{ $recce->terminals_expected() };

        # The case is inapplicable if there is no closing bracket expected
        ## no critic (Subroutines::ProhibitExplicitReturnUndef)
        return undef, undef if not $token_literal;
        ## use critic

        # If there is an missing close bracket,
        # use Ruby Slippers to close it,
        # report the fix,
        # and start the read loop again.
        my $token_blk = $blk_by_bracket{$token_literal};
        $rejection_is_fatal = 1;
        $stalled = undef;
        $recce->block_set($token_blk);
        $recce->block_move( 0, -1 );
        $recce->block_read();
        $recce->block_set($main_block);

        # We've created a properly bracketed span of the input, using
        # the Ruby Slippers token.  Use Marpa's tables to find its
        # beginning.
        my ($opening_bracket) = $recce->last_completed('balanced');
        my ( $bracket_block, $bracket_l0_pos ) =
          $recce->g1_to_block_first($opening_bracket);
        my ( $line, $column ) =
          $recce->line_column( $bracket_block, $bracket_l0_pos );
        my $opening_column0 = $bracket_l0_pos - ( $column - 1 );

        my $problem = q{};
        my $this_block;
        ($this_block, $pos) = $recce->block_progress();
        my ( $pos_line, $pos_column ) = $recce->line_column($this_block, $pos);
        if ( $line == $pos_line ) {

            # Report a missing close bracket for cases contained in
            # a single text line
            $problem = join "\n",
              "* Line $line, column $column: Missing close $token_literal, "
              . "problem detected at line $pos_line, column $pos_column",
              marked_line(
                substr( $string, $pos - ( $pos_column - 1 ) ),
                $column - 1,
                $pos_column - 1
              );
        } ## end if ( $line == $pos_line )
        else {
            # Report a missing close bracket for cases that span
            # two or more text lines
            $problem = join "\n",
                "* Line $line, column $column: No matching bracket for "
              . q{'}
              . $literal_match{$token_literal} . q{', },
              marked_line( substr( $string, $opening_column0 ), $column - 1 ),
              , "*   Problem detected at line $pos_line, column $pos_column:",
              marked_line( substr( $string, $pos - ( $pos_column - 1 ), ),
                $pos_column - 1 );
        } ## end else [ if ( $line == $pos_line ) ]

        # Add our report to the list of problems.
        push @problems, [ $line, $column, $problem ];
        push @fixes, "$pos$token_literal" if $fixes;
        diagnostic(
            "* Line $line, column $column: Missing close $token_literal, ",
            "problem detected at line $pos_line, column $pos_column"
        ) if $verbose;
        return 1;
    };

    # Local closure:
    # Deal with # an extra close bracket,
    # one which does not match any
    # opening bracket.
    # We will use the Ruby Slippers to insert
    # an open bracket to fix the problem.
    #
    # Returns Perl true on success,
    # Perl false and undef if the case is inapplicable
    # Perl false and an error message on failure
    my $extra_close_bracket_handle = sub {

        ## no critic (Subroutines::ProhibitExplicitReturnUndef)
        return undef, undef if $pos >= length $string;
        ## use critic

        # The only remaining possibility is the opposite issue:
        my $nextchar = substr $string, $pos, 1;
        my $token_literal = $literal_match{$nextchar};

        # If the next character in input is not an close bracket,
        # something strange has happened.
        # All we can do is abend.
        ## no critic (Subroutines::ProhibitExplicitReturnUndef)
        return undef, "Rejection at pos $pos: ", substr( $string, $pos, 10 )
          if not defined $token_literal;
        ## use critic

        my $token_blk = $blk_by_bracket{$token_literal};

        $stalled = undef;
        $rejection_is_fatal = 1;
        $recce->block_set($token_blk);
        $recce->block_move( 0, -1 );
        $recce->block_read();
        $recce->block_set($main_block);

        # Used for testing
        push @fixes, "$pos$token_literal" if $fixes;

        # We've done the Ruby Slippers thing, and are ready to
        # continue reading.
        # But first we want to report the error.

        my ( $pos_line, $pos_column ) = $recce->line_column($main_block, $pos);

        # Report the error if it was a case of a missing open bracket.
        my $problem = join "\n",
          "* Line $pos_line, column $pos_column: Missing open $token_literal",
          marked_line( ( substr $string, $pos - ( $pos_column - 1 ) ),
            $pos_column - 1 );
        push @problems, [ $pos_line, $pos_column, $problem ];
        diagnostic(
            "* Line $pos_line, column $pos_column: Missing open $token_literal"
        ) if $verbose;
        return 1;
    };

    # While we have unread input or unclosed brackets ...
  MAIN_LOOP: while (1) {

        # Try to read from the main block
        my ($ok, $error) = $main_block_read->();
        next MAIN_LOOP if $ok;

        # Next, try to deal with unclosed brackets, if any
        ($ok, $error) = $missing_close_bracket_handle->();
        next MAIN_LOOP if $ok;

        # If we are here, we have unread input,
        # but no brackets to close.
        # The only possibility left is that
        # we have an extra close bracket in
        # the input.
        # To fix it, we "Ruby Slippers" up an opening bracket to
        # match it.
        ($ok, $error) = $extra_close_bracket_handle->();
        next MAIN_LOOP if $ok;
        last MAIN_LOOP if not $error;
        die $error;
    }

    # For testing
    if ( ref $fixes ) {
        ${$fixes} = join " ", @fixes;
    }

    # The problems do not necessarily occur in lexical order.
    # Sort them so that they can be reported that way.
    my @sorted_problems =
      sort { $a->[0] <=> $b->[0] or $a->[1] <=> $b->[1] } @problems;
    my @result = map { $_->[-1] } @sorted_problems;
    return \@result;

}

# vim: expandtab shiftwidth=4: