The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# Grind out a lot of combinatoric tests for folding.

binmode STDOUT, ":utf8";

BEGIN {
    chdir 't' if -d 't';
    @INC = '../lib';
    require './test.pl';
    require Config; import Config;
    skip_all_if_miniperl("no dynamic loading on miniperl, no Encode nor POSIX");
}

use charnames ":full";

my $DEBUG = 0;  # Outputs extra information for debugging this .t

use strict;
use warnings;
use Encode;
use POSIX;

# Special-cased characters in the .c's that we want to make sure get tested.
my %be_sure_to_test = (
        "\xDF" => 1, # LATIN_SMALL_LETTER_SHARP_S
        "\x{1E9E}" => 1, # LATIN_CAPITAL_LETTER_SHARP_S
        "\x{390}" => 1, # GREEK_SMALL_LETTER_IOTA_WITH_DIALYTIKA_AND_TONOS
        "\x{3B0}" => 1, # GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA_AND_TONOS
        "\x{1FD3}" => 1, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
        "\x{1FE3}" => 1, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
    );


# Tests both unicode and not, so make sure not implicitly testing unicode
no feature 'unicode_strings';

# Case-insensitive matching is a large and complicated issue.  Perl does not
# implement it fully, properly.  For example, it doesn't include normalization
# as part of the equation.  To test every conceivable combination is clearly
# impossible; these tests are mostly drawn from visual inspection of the code
# and experience, trying to exercise all areas.

# There are three basic ranges of characters that Perl may treat differently:
# 1) Invariants under utf8 which on ASCII-ish machines are ASCII, and are
#    referred to here as ASCII.  On EBCDIC machines, the non-ASCII invariants
#    are all controls that fold to themselves.
my $ASCII = 1;

# 2) Other characters that fit into a byte but are different in utf8 than not;
#    here referred to, taking some liberties, as Latin1.
my $Latin1 = 2;

# 3) Characters that won't fit in a byte; here referred to as Unicode
my $Unicode = 3;

# Within these basic groups are equivalence classes that testing any character
# in is likely to lead to the same results as any other character.  This is
# used to cut down the number of tests needed, unless PERL_RUN_SLOW_TESTS is
# set.
my $skip_apparently_redundant = ! $ENV{PERL_RUN_SLOW_TESTS};

# Additionally parts of this test run a lot of subtests, outputting the
# resulting TAP can be expensive so the tests are summarised internally. The
# PERL_DEBUG_FULL_TEST environment variable can be set to produce the full
# output for debugging purposes.

sub range_type {
    my $ord = ord shift;

    return $ASCII if $ord < 128;
    return $Latin1 if $ord < 256;
    return $Unicode;
}

sub numerically {
    return $a <=> $b
}

my $list_all_tests = $ENV{PERL_DEBUG_FULL_TEST} || $DEBUG;
$| = 1 if $list_all_tests;

# Significant time is saved by not outputting each test but grouping the
# output into subtests
my $okays;          # Number of ok's in current subtest
my $this_iteration; # Number of possible tests in current subtest
my $count=0;        # Number of subtests = number of total tests

sub run_test($$$) {
    my ($test, $todo, $debug) = @_;

    $debug = "" unless $DEBUG;
    my $res = eval $test;

    if (!$res || $list_all_tests) {
      # Failed or debug; output the result
      $count++;
      ok($res, "$test; $debug");
    } else {
      # Just count the test as passed
      $okays++;
    }
    $this_iteration++;
}

my %has_test_by_participants;   # Makes sure has tests for each range and each
                                # number of characters that fold to the same
                                # thing
my %has_test_by_byte_count; # Makes sure has tests for each combination of
                            # n bytes folds to m bytes

my %tests; # The set of tests.
# Each key is a code point that folds to something else.
# Each value is a list of things that the key folds to.  If the 'thing' is a
# single code point, it is that ordinal.  If it is a multi-char fold, it is an
# ordered list of the code points in that fold.  Here's an example for 'S':
#  '83' => [ 115, 383 ]
#
# And one for a multi-char fold: \xDF
#  223 => [
#            [  # 'ss'
#                83,
#                83
#            ],
#            [  # 'SS'
#                115,
#                115
#            ],
#            [  # LATIN SMALL LETTER LONG S
#                383,
#                383
#            ],
#          7838 # LATIN_CAPITAL_LETTER_SHARP_S
#        ],

my %inverse_folds;  # keys are strings of the folded-to;
                    # values are lists of characters that fold to them

sub add_test($@) {
    my ($to, @from) = @_;

    # Called to cause the input to be tested by adding to %tests.  @from is
    # the list of characters that fold to the string $to.  @from should be
    # sorted so the lowest code point is first....
    # The input is in string form; %tests uses code points, so have to
    # convert.

    my $to_chars = length $to;
    my @test_to;        # List of tests for $to

    if ($to_chars == 1) {
        @test_to = ord $to;
    }
    else {
        push @test_to, [ map { ord $_ } split "", $to ];

        # For multi-char folds, we also test that things that can fold to each
        # individual character in the fold also work.  If we were testing
        # comprehensively, we would try every combination of upper and lower
        # case in the fold, but it will have to suffice to avoid running
        # forever to make sure that each thing that folds to these is tested
        # at least once.  Because of complement matching ([^...]), we need to
        # do both the folded, and the folded-from.
        # We first look at each character in the multi-char fold, and save how
        # many characters fold to it; and also the maximum number of such
        # folds
        my @folds_to_count;     # 0th char in fold is index 0 ...
        my $max_folds_to = 0;

        for (my $i = 0; $i < $to_chars; $i++) {
            my $to_char = substr($to, $i, 1);
            if (exists $inverse_folds{$to_char}) {
                $folds_to_count[$i] = scalar @{$inverse_folds{$to_char}};
                $max_folds_to = $folds_to_count[$i] if $max_folds_to < $folds_to_count[$i];
            }
            else {
                $folds_to_count[$i] = 0;
            }
        }

        # We will need to generate as many tests as the maximum number of
        # folds, so that each fold will have at least one test.
        # For example, consider character X which folds to the three character
        # string 'xyz'.  If 2 things fold to x (X and x), 4 to y (Y, Y'
        # (Y-prime), Y'' (Y-prime-prime), and y), and 1 thing to z (itself), 4
        # tests will be generated:
        #   xyz
        #   XYz
        #   xY'z
        #   xY''z
        for (my $i = 0; $i < $max_folds_to; $i++) {
            my @this_test_to;   # Assemble a single test

            # For each character in the multi-char fold ...
            for (my $j = 0; $j < $to_chars; $j++) {
                my $this_char = substr($to, $j, 1);

                # Use its corresponding inverse fold, if available.
                if ($i < $folds_to_count[$j]) {
                    push @this_test_to, ord $inverse_folds{$this_char}[$i];
                }
                else {  # Or else itself.
                    push @this_test_to, ord $this_char;
                }
            }

            # Add this test to the list
            push @test_to, [ @this_test_to ];
        }

        # Here, have assembled all the tests for the multi-char fold.  Sort so
        # lowest code points are first for consistency and aesthetics in
        # output.  We know there are at least two characters in the fold, but
        # I haven't bothered to worry about sorting on an optional third
        # character if the first two are identical.
        @test_to = sort { ($a->[0] == $b->[0])
                           ? $a->[1] <=> $b->[1]
                           : $a->[0] <=> $b->[0]
                        } @test_to;
    }


    # This test is from n bytes to m bytes.  Record that so won't try to add
    # another test that does the same.
    use bytes;
    my $to_bytes = length $to;
    foreach my $from_map (@from) {
        $has_test_by_byte_count{length $from_map}{$to_bytes} = $to;
    }
    no bytes;

    my $ord_smallest_from = ord shift @from;
    if (exists $tests{$ord_smallest_from}) {
        die "There are already tests for $ord_smallest_from"
    };

    # Add in the fold tests,
    push @{$tests{$ord_smallest_from}}, @test_to;

    # Then any remaining froms in the equivalence class.
    push @{$tests{$ord_smallest_from}}, map { ord $_ } @from;
}

# Get the Unicode rules and construct inverse mappings from them

use Unicode::UCD;
my $file="../lib/unicore/CaseFolding.txt";

# Use the Unicode data file if we are on an ASCII platform (which its data is
# for), and it is in the modern format (starting in Unicode 3.1.0) and it is
# available.  This avoids being affected by potential bugs introduced by other
# layers of Perl
if (ord('A') == 65
    && pack("C*", split /\./, Unicode::UCD::UnicodeVersion()) ge v3.1.0
    && open my $fh, "<", $file)
{
    while (<$fh>) {
        chomp;

        # Lines look like (though without the initial '#')
        #0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE

        # Get rid of comments, ignore blank or comment-only lines
        my $line = $_ =~ s/ (?: \s* \# .* )? $ //rx;
        next unless length $line;
        my ($hex_from, $fold_type, @hex_folded) = split /[\s;]+/, $line;

        next if $fold_type =~ / ^ [IT] $/x; # Perl doesn't do Turkish folding
        next if $fold_type eq 'S';  # If Unicode's tables are correct, the F
                                    # should be a superset of S

        my $folded_str = pack ("U0U*", map { hex $_ } @hex_folded);
        push @{$inverse_folds{$folded_str}}, chr hex $hex_from;
    }
}
else {  # Here, can't use the .txt file: read the Unicode rules file and
        # construct inverse mappings from it

    my ($invlist_ref, $invmap_ref, undef, $default)
                                    = Unicode::UCD::prop_invmap('Case_Folding');
    for my $i (0 .. @$invlist_ref - 1 - 1) {
        next if $invmap_ref->[$i] == $default;

        # Make into an array if not so already, so can treat uniformly below
        $invmap_ref->[$i] = [ $invmap_ref->[$i] ] if ! ref $invmap_ref->[$i];

        # Each subsequent element of the range requires adjustment of +1 from
        # the previous element
        my $adjust = -1;
        for my $j ($invlist_ref->[$i] .. $invlist_ref->[$i+1] -1) {
            $adjust++;
            my $folded_str
                        = pack "U0U*", map { $_ + $adjust } @{$invmap_ref->[$i]};
            #note (sprintf "%d: %04X: %s", __LINE__, $j, join " ",
            #    map { sprintf "%04X", $_  + $adjust } @{$invmap_ref->[$i]});
            push @{$inverse_folds{$folded_str}}, chr $j;
        }
    }
}

# Analyze the data and generate tests to get adequate test coverage.  We sort
# things so that smallest code points are done first.
TO:
foreach my $to (sort { (length $a == length $b)
                        ? $a cmp $b
                        : length $a <=> length $b
                    } keys %inverse_folds)
{

    # Within each fold, sort so that the smallest code points are done first
    @{$inverse_folds{$to}} = sort { $a cmp $b } @{$inverse_folds{$to}};
    my @from = @{$inverse_folds{$to}};

    # Just add it to the tests if doing complete coverage
    if (! $skip_apparently_redundant) {
        add_test($to, @from);
        next TO;
    }

    my $to_chars = length $to;
    my $to_range_type = range_type(substr($to, 0, 1));

    # If this is required to be tested, do so.  We check for these first, as
    # they will take up slots of byte-to-byte combinations that we otherwise
    # would have to have other tests to get.
    foreach my $from_map (@from) {
        if (exists $be_sure_to_test{$from_map}) {
            add_test($to, @from);
            next TO;
        }
    }

    # If the fold contains heterogeneous range types, is suspect and should be
    # tested.
    if ($to_chars > 1) {
        foreach my $char (split "", $to) {
            if (range_type($char) != $to_range_type) {
                add_test($to, @from);
                next TO;
            }
        }
    }

    # If the mapping crosses range types, is suspect and should be tested
    foreach my $from_map (@from) {
        if (range_type($from_map) != $to_range_type) {
            add_test($to, @from);
            next TO;
        }
    }

    # Here, all components of the mapping are in the same range type.  For
    # single character folds, we test one case in each range type that has 2
    # particpants, 3 particpants, etc.
    if ($to_chars == 1) {
        if (! exists $has_test_by_participants{scalar @from}{$to_range_type}) {
            add_test($to, @from);
            $has_test_by_participants{scalar @from}{$to_range_type} = $to;
            next TO;
        }
    }

    # We also test all combinations of mappings from m to n bytes.  This is
    # because the regex optimizer cares.  (Don't bother worrying about that
    # Latin1 chars will occupy a different number of bytes under utf8, as
    # there are plenty of other cases that catch these byte numbers.)
    use bytes;
    my $to_bytes = length $to;
    foreach my $from_map (@from) {
        if (! exists $has_test_by_byte_count{length $from_map}{$to_bytes}) {
            add_test($to, @from);
            next TO;
        }
    }
}

# For each range type, test additionally a character that folds to itself
add_test(chr 0x3A, chr 0x3A);
add_test(chr 0xF7, chr 0xF7);
add_test(chr 0x2C7, chr 0x2C7);

# To cut down on the number of tests
my $has_tested_aa_above_latin1;
my $has_tested_latin1_aa;
my $has_tested_ascii_aa;
my $has_tested_l_above_latin1;
my $has_tested_above_latin1_l;
my $has_tested_ascii_l;
my $has_tested_above_latin1_d;
my $has_tested_ascii_d;
my $has_tested_non_latin1_d;
my $has_tested_above_latin1_a;
my $has_tested_ascii_a;
my $has_tested_non_latin1_a;

# For use by pairs() in generating combinations
sub prefix {
    my $p = shift;
    map [ $p, $_ ], @_
}

# Returns all ordered combinations of pairs of elements from the input array.
# It doesn't return pairs like (a, a), (b, b).  Change the slice to an array
# to do that.  This was just to have fewer tests.
sub pairs (@) {
    #print __LINE__, ": ", join(" XXX ", map { sprintf "%04X", $_ } @_), "\n";
    map { prefix $_[$_], @_[0..$_-1, $_+1..$#_] } 0..$#_
}

my @charsets = qw(d u a aa);
if($Config{d_setlocale}) {
    my $current_locale = POSIX::setlocale( &POSIX::LC_ALL, "C") // "";
    if ($current_locale eq 'C') {
        require locale; import locale;

        # Some implementations don't have the 128-255 range characters all
        # mean nothing under the C locale (an example being VMS).  This is
        # legal, but since we don't know what the right answers should be,
        # skip the locale tests in that situation.
        for my $i (128 .. 255) {
            my $char = chr($i);
            goto untestable_locale if uc($char) ne $char || lc($char) ne $char;
        }
        push @charsets, 'l';
      untestable_locale:
    }
}

# Finally ready to do the tests
foreach my $test (sort { numerically } keys %tests) {

  my $previous_target;
  my $previous_pattern;
  my @pairs = pairs(sort numerically $test, @{$tests{$test}});

  # Each fold can be viewed as a closure of all the characters that
  # participate in it.  Look at each possible pairing from a closure, with the
  # first member of the pair the target string to match against, and the
  # second member forming the pattern.  Thus each fold member gets tested as
  # the string, and the pattern with every other member in the opposite role.
  while (my $pair = shift @pairs) {
    my ($target, $pattern) = @$pair;

    # When testing a char that doesn't fold, we can get the same
    # permutation twice; so skip all but the first.
    next if $previous_target
            && $previous_target == $target
            && $previous_pattern == $pattern;
    ($previous_target, $previous_pattern) = ($target, $pattern);

    # Each side may be either a single char or a string.  Extract each into an
    # array (perhaps of length 1)
    my @target, my @pattern;
    @target = (ref $target) ? @$target : $target;
    @pattern = (ref $pattern) ? @$pattern : $pattern;

    # We are testing just folds to/from a single character.  If our pairs
    # happens to generate multi/multi, skip.
    next if @target > 1 && @pattern > 1;

    # Have to convert non-utf8 chars to native char set
    @target = map { $_ > 255 ? $_ : ord latin1_to_native(chr($_)) } @target;
    @pattern = map { $_ > 255 ? $_ : ord latin1_to_native(chr($_)) } @pattern;

    # Get in hex form.
    my @x_target = map { sprintf "\\x{%04X}", $_ } @target;
    my @x_pattern = map { sprintf "\\x{%04X}", $_ } @pattern;

    my $target_above_latin1 = grep { $_ > 255 } @target;
    my $pattern_above_latin1 = grep { $_ > 255 } @pattern;
    my $target_has_ascii = grep { $_ < 128 } @target;
    my $pattern_has_ascii = grep { $_ < 128 } @pattern;
    my $target_only_ascii = ! grep { $_ > 127 } @target;
    my $pattern_only_ascii = ! grep { $_ > 127 } @pattern;
    my $target_has_latin1 = grep { $_ < 256 } @target;
    my $target_has_upper_latin1 = grep { $_ < 256 && $_ > 127 } @target;
    my $pattern_has_upper_latin1 = grep { $_ < 256 && $_ > 127 } @pattern;
    my $pattern_has_latin1 = grep { $_ < 256 } @pattern;
    my $is_self = @target == 1 && @pattern == 1 && $target[0] == $pattern[0];

    # We don't test multi-char folding into other multi-chars.  We are testing
    # a code point that folds to or from other characters.  Find the single
    # code point for diagnostic purposes.  (If both are single, choose the
    # target string)
    my $ord = @target == 1 ? $target[0] : $pattern[0];
    my $progress = sprintf "%04X: \"%s\" and /%s/",
                            $test,
                            join("", @x_target),
                            join("", @x_pattern);
    #note $progress;

    # Now grind out tests, using various combinations.
    foreach my $charset (@charsets) {
      $okays = 0;
      $this_iteration = 0;

      # To cut down somewhat on the enormous quantity of tests this currently
      # runs, skip some for some of the character sets whose results aren't
      # likely to differ from others.  But run all tests on the code points
      # that don't fold, plus one other set in each range group.
      if (! $is_self) {

        # /aa should only affect things with folds in the ASCII range.  But, try
        # it on one set in the other ranges just to make sure it doesn't break
        # them.
        if ($charset eq 'aa') {
          if (! $target_has_ascii && ! $pattern_has_ascii) {
            if ($target_above_latin1 || $pattern_above_latin1) {
              next if defined $has_tested_aa_above_latin1
                      && $has_tested_aa_above_latin1 != $test;
              $has_tested_aa_above_latin1 = $test;
            }
            next if defined $has_tested_latin1_aa
                    && $has_tested_latin1_aa != $test;
            $has_tested_latin1_aa = $test;
          }
          elsif ($target_only_ascii && $pattern_only_ascii) {

              # And, except for one set just to make sure, skip tests
              # where both elements in the pair are ASCII.  If one works for
              # aa, the others are likely too.  This skips tests where the
              # fold is from non-ASCII to ASCII, but this part of the test
              # is just about the ASCII components.
              next if defined $has_tested_ascii_l
                      && $has_tested_ascii_l != $test;
              $has_tested_ascii_l = $test;
          }
        }
        elsif ($charset eq 'l') {

          # For l, don't need to test beyond one set those things that are
          # all above latin1, because unlikely to have different successes
          # than /u
          if (! $target_has_latin1 && ! $pattern_has_latin1) {
            next if defined $has_tested_above_latin1_l
                    && $has_tested_above_latin1_l != $test;
            $has_tested_above_latin1_l = $test;
          }
          elsif ($target_only_ascii && $pattern_only_ascii) {

              # And, except for one set just to make sure, skip tests
              # where both elements in the pair are ASCII.  This is
              # essentially the same reasoning as above for /aa.
              next if defined $has_tested_ascii_l
                      && $has_tested_ascii_l != $test;
              $has_tested_ascii_l = $test;
          }
        }
        elsif ($charset eq 'd') {
          # Similarly for d.  Beyond one test (besides self) each, we  don't
          # test pairs that are both ascii; or both above latin1, or are
          # combinations of ascii and above latin1.
          if (! $target_has_upper_latin1 && ! $pattern_has_upper_latin1) {
            if ($target_has_ascii && $pattern_has_ascii) {
              next if defined $has_tested_ascii_d
                      && $has_tested_ascii_d != $test;
              $has_tested_ascii_d = $test
            }
            elsif (! $target_has_latin1 && ! $pattern_has_latin1) {
              next if defined $has_tested_above_latin1_d
                      && $has_tested_above_latin1_d != $test;
              $has_tested_above_latin1_d = $test;
            }
            else {
              next if defined $has_tested_non_latin1_d
                      && $has_tested_non_latin1_d != $test;
              $has_tested_non_latin1_d = $test;
            }
          }
        }
        elsif ($charset eq 'a') {
          # Similarly for a.  This should match identically to /u, so wasn't
          # tested at all until a bug was found that was thereby missed.
          # As a compromise, beyond one test (besides self) each, we  don't
          # test pairs that are both ascii; or both above latin1, or are
          # combinations of ascii and above latin1.
          if (! $target_has_upper_latin1 && ! $pattern_has_upper_latin1) {
            if ($target_has_ascii && $pattern_has_ascii) {
              next if defined $has_tested_ascii_a
                      && $has_tested_ascii_a != $test;
              $has_tested_ascii_a = $test
            }
            elsif (! $target_has_latin1 && ! $pattern_has_latin1) {
              next if defined $has_tested_above_latin1_a
                      && $has_tested_above_latin1_a != $test;
              $has_tested_above_latin1_a = $test;
            }
            else {
              next if defined $has_tested_non_latin1_a
                      && $has_tested_non_latin1_a != $test;
              $has_tested_non_latin1_a = $test;
            }
          }
        }
      }

      foreach my $utf8_target (0, 1) {    # Both utf8 and not, for
                                          # code points < 256
        my $upgrade_target = "";

        # These must already be in utf8 because the string to match has
        # something above latin1.  So impossible to test if to not to be in
        # utf8; and otherwise, no upgrade is needed.
        next if $target_above_latin1 && ! $utf8_target;
        $upgrade_target = ' utf8::upgrade($c);' if ! $target_above_latin1 && $utf8_target;

        foreach my $utf8_pattern (0, 1) {
          next if $pattern_above_latin1 && ! $utf8_pattern;

          # Our testing of 'l' uses the POSIX locale, which is ASCII-only
          my $uni_semantics = $charset ne 'l' && ($utf8_target || $charset eq 'u' || ($charset eq 'd' && $utf8_pattern) || $charset =~ /a/);
          my $upgrade_pattern = "";
          $upgrade_pattern = ' utf8::upgrade($p);' if ! $pattern_above_latin1 && $utf8_pattern;

          my $lhs = join "", @x_target;
          my $lhs_str = eval qq{"$lhs"}; fail($@) if $@;
          my @rhs = @x_pattern;
          my $rhs = join "", @rhs;
          my $should_fail = (! $uni_semantics && $ord >= 128 && $ord < 256 && ! $is_self)
                            || ($charset eq 'aa' && $target_has_ascii != $pattern_has_ascii)
                            || ($charset eq 'l' && $target_has_latin1 != $pattern_has_latin1);

          # Do simple tests of referencing capture buffers, named and
          # numbered.
          my $op = '=~';
          $op = '!~' if $should_fail;

          my $todo = 0;  # No longer any todo's
          my $eval = "my \$c = \"$lhs$rhs\"; my \$p = qr/(?$charset:^($rhs)\\1\$)/i;$upgrade_target$upgrade_pattern \$c $op \$p";
          run_test($eval, $todo, "");

          $eval = "my \$c = \"$lhs$rhs\"; my \$p = qr/(?$charset:^(?<grind>$rhs)\\k<grind>\$)/i;$upgrade_target$upgrade_pattern \$c $op \$p";
          run_test($eval, $todo, "");

          if ($lhs ne $rhs) {
            $eval = "my \$c = \"$rhs$lhs\"; my \$p = qr/(?$charset:^($rhs)\\1\$)/i;$upgrade_target$upgrade_pattern \$c $op \$p";
            run_test($eval, "", "");

            $eval = "my \$c = \"$rhs$lhs\"; my \$p = qr/(?$charset:^(?<grind>$rhs)\\k<grind>\$)/i;$upgrade_target$upgrade_pattern \$c $op \$p";
            run_test($eval, "", "");
          }

          # See if works on what could be a simple trie.
          $eval = "my \$c = \"$lhs\"; my \$p = qr/$rhs|xyz/i$charset;$upgrade_target$upgrade_pattern \$c $op \$p";
          run_test($eval, "", "");

          # Check that works when the folded character follows something that
          # is quantified.  This test knows the regex code internals to the
          # extent that it knows this is a potential problem, and that there
          # are three different types of quantifiers generated: 1) The thing
          # being quantified matches a single character; 2) it matches more
          # than one character, but is fixed width; 3) it can match a variable
          # number of characters.  (It doesn't know that case 3 shouldn't
          # matter, since it doesn't do anything special for the character
          # following the quantifier; nor that some of the different
          # quantifiers execute the same underlying code, as these tests are
          # quick, and this insulates these tests from changes in the
          # implementation.)
          for my $quantifier ('?', '??', '*', '*?', '+', '+?', '{1,2}', '{1,2}?') {
            $eval = "my \$c = \"_$lhs\"; my \$p = qr/(?$charset:.$quantifier$rhs)/i;$upgrade_target$upgrade_pattern \$c $op \$p";
            run_test($eval, "", "");
            $eval = "my \$c = \"__$lhs\"; my \$p = qr/(?$charset:(?:..)$quantifier$rhs)/i;$upgrade_target$upgrade_pattern \$c $op \$p";
            run_test($eval, "", "");
            $eval = "my \$c = \"__$lhs\"; my \$p = qr/(?$charset:(?:.|\\R)$quantifier$rhs)/i;$upgrade_target$upgrade_pattern \$c $op \$p";
            run_test($eval, "", "");
          }

          foreach my $bracketed (0, 1) {   # Put rhs in [...], or not
            next if $bracketed && @pattern != 1;    # bracketed makes these
                                                    # or's instead of a sequence
            foreach my $optimize_bracketed (0, 1) {
                next if $optimize_bracketed && ! $bracketed;
            foreach my $inverted (0,1) {
                next if $inverted && ! $bracketed;  # inversion only valid in [^...]
                next if $inverted && @target != 1;  # [perl #89750] multi-char
                                                    # not valid in [^...]

              # In some cases, add an extra character that doesn't fold, and
              # looks ok in the output.
              my $extra_char = "_";
              foreach my $prepend ("", $extra_char) {
                foreach my $append ("", $extra_char) {

                  # Assemble the rhs.  Put each character in a separate
                  # bracketed if using charclasses.  This creates a stress on
                  # the code to span a match across multiple elements
                  my $rhs = "";
                  foreach my $rhs_char (@rhs) {
                      $rhs .= '[' if $bracketed;
                      $rhs .= '^' if $inverted;
                      $rhs .=  $rhs_char;

                      # Add a character to the class, so class doesn't get
                      # optimized out, unless we are testing that optimization
                      $rhs .= '_' if $optimize_bracketed;
                      $rhs .= ']' if $bracketed;
                  }

                  # Add one of: no capturing parens
                  #             a single set
                  #             a nested set
                  # Use quantifiers and extra variable width matches inside
                  # them to keep some optimizations from happening
                  foreach my $parend (0, 1, 2) {
                    my $interior = (! $parend)
                                    ? $rhs
                                    : ($parend == 1)
                                        ? "(${rhs},?)"
                                        : "((${rhs})+,?)";
                    foreach my $quantifier ("", '?', '*', '+', '{1,3}') {

                      # Perhaps should be TODOs, as are unimplemented, but
                      # maybe will never be implemented
                      next if @pattern != 1 && $quantifier;

                      # A ? or * quantifier normally causes the thing to be
                      # able to match a null string
                      my $quantifier_can_match_null = $quantifier eq '?' || $quantifier eq '*';

                      # But since we only quantify the last character in a
                      # multiple fold, the other characters will have width,
                      # except if we are quantifying the whole rhs
                      my $can_match_null = $quantifier_can_match_null && (@rhs == 1 || $parend);

                      foreach my $l_anchor ("", '^') { # '\A' didn't change result)
                        foreach my $r_anchor ("", '$') { # '\Z', '\z' didn't change result)

                          # The folded part can match the null string if it
                          # isn't required to have width, and there's not
                          # something on one or both sides that force it to.
                          my $both_sides = ($l_anchor && $r_anchor) || ($l_anchor && $append) || ($r_anchor && $prepend) || ($prepend && $append);
                          my $must_match = ! $can_match_null || $both_sides;
                          # for performance, but doing this missed many failures
                          #next unless $must_match;
                          my $quantified = "(?$charset:$l_anchor$prepend$interior${quantifier}$append$r_anchor)";
                          my $op;
                          if ($must_match && $should_fail)  {
                              $op = 0;
                          } else {
                              $op = 1;
                          }
                          $op = ! $op if $must_match && $inverted;

                          if ($inverted && @target > 1) {
                            # When doing an inverted match against a
                            # multi-char target, and there is not something on
                            # the left to anchor the match, if it shouldn't
                            # succeed, skip, as what will happen (when working
                            # correctly) is that it will match the first
                            # position correctly, and then be inverted to not
                            # match; then it will go to the second position
                            # where it won't match, but get inverted to match,
                            # and hence succeeding.
                            next if ! ($l_anchor || $prepend) && ! $op;

                            # Can't ever match for latin1 code points non-uni
                            # semantics that have a inverted multi-char fold
                            # when there is something on both sides and the
                            # quantifier isn't such as to span the required
                            # width, which is 2 or 3.
                            $op = 0 if $ord < 255
                                       && ! $uni_semantics
                                       && $both_sides
                                       && ( ! $quantifier || $quantifier eq '?')
                                       && $parend < 2;

                            # Similarly can't ever match when inverting a multi-char
                            # fold for /aa and the quantifier isn't sufficient
                            # to allow it to span to both sides.
                            $op = 0 if $target_has_ascii && $charset eq 'aa' && $both_sides && ( ! $quantifier || $quantifier eq '?') && $parend < 2;

                            # Or for /l
                            $op = 0 if $target_has_latin1 && $charset eq 'l' && $both_sides && ( ! $quantifier || $quantifier eq '?') && $parend < 2;
                          }


                          my $desc = "my \$c = \"$prepend$lhs$append\"; "
                                   . "my \$p = qr/$quantified/i;"
                                   . "$upgrade_target$upgrade_pattern "
                                   . "\$c " . ($op ? "=~" : "!~") . " \$p; ";
                          if ($DEBUG) {
                            $desc .= (
                             "; uni_semantics=$uni_semantics, "
                             . "should_fail=$should_fail, "
                             . "bracketed=$bracketed, "
                             . "prepend=$prepend, "
                             . "append=$append, "
                             . "parend=$parend, "
                             . "quantifier=$quantifier, "
                             . "l_anchor=$l_anchor, "
                             . "r_anchor=$r_anchor; "
                             . "pattern_above_latin1=$pattern_above_latin1; "
                             . "utf8_pattern=$utf8_pattern"
                            );
                          }

                          my $c = "$prepend$lhs_str$append";
                          my $p = qr/$quantified/i;
                          utf8::upgrade($c) if length($upgrade_target);
                          utf8::upgrade($p) if length($upgrade_pattern);
                          my $res = $op ? ($c =~ $p): ($c !~ $p);

                          if (!$res || $list_all_tests) {
                            # Failed or debug; output the result
                            $count++;
                            ok($res, "test $count - $desc");
                          } else {
                            # Just count the test as passed
                            $okays++;
                          }
                          $this_iteration++;
                        }
                      }
                    }
                  }
                }
              }
            }
          }
          }
        }
      }
      unless($list_all_tests) {
        $count++;
        is $okays, $this_iteration, "$okays subtests ok for"
          . " /$charset,"
          . ' target="' . join("", @x_target) . '",'
          . ' pat="' . join("", @x_pattern) . '"';
      }
    }
  }
}

plan($count);

1