# Grind out a lot of combinatoric tests for folding.
binmode STDOUT, ":utf8";
BEGIN {
chdir 't' if -d 't';
@INC = '../lib';
require './test.pl';
skip_all_if_miniperl("no dynamic loading on miniperl, no Encode nor POSIX");
}
use charnames ":full";
my $DEBUG = 0; # Outputs extra information for debugging this .t
use strict;
use warnings;
use Encode;
use POSIX;
# Special-cased characters in the .c's that we want to make sure get tested.
my %be_sure_to_test = (
"\xDF" => 1, # LATIN_SMALL_LETTER_SHARP_S
"\x{1E9E}" => 1, # LATIN_CAPITAL_LETTER_SHARP_S
"\x{390}" => 1, # GREEK_SMALL_LETTER_IOTA_WITH_DIALYTIKA_AND_TONOS
"\x{3B0}" => 1, # GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA_AND_TONOS
"\x{1FD3}" => 1, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
"\x{1FE3}" => 1, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
);
# Tests both unicode and not, so make sure not implicitly testing unicode
no feature 'unicode_strings';
# Case-insensitive matching is a large and complicated issue. Perl does not
# implement it fully, properly. For example, it doesn't include normalization
# as part of the equation. To test every conceivable combination is clearly
# impossible; these tests are mostly drawn from visual inspection of the code
# and experience, trying to exercise all areas.
# There are three basic ranges of characters that Perl may treat differently:
# 1) Invariants under utf8 which on ASCII-ish machines are ASCII, and are
# referred to here as ASCII. On EBCDIC machines, the non-ASCII invariants
# are all controls that fold to themselves.
my $ASCII = 1;
# 2) Other characters that fit into a byte but are different in utf8 than not;
# here referred to, taking some liberties, as Latin1.
my $Latin1 = 2;
# 3) Characters that won't fit in a byte; here referred to as Unicode
my $Unicode = 3;
# Within these basic groups are equivalence classes that testing any character
# in is likely to lead to the same results as any other character. This is
# used to cut down the number of tests needed, unless PERL_RUN_SLOW_TESTS is
# set.
my $skip_apparently_redundant = ! $ENV{PERL_RUN_SLOW_TESTS};
# Additionally parts of this test run a lot of subtests, outputting the
# resulting TAP can be expensive so the tests are summarised internally. The
# PERL_DEBUG_FULL_TEST environment variable can be set to produce the full
# output for debugging purposes.
sub range_type {
my $ord = ord shift;
return $ASCII if $ord < 128;
return $Latin1 if $ord < 256;
return $Unicode;
}
sub numerically {
return $a <=> $b
}
# Significant time is saved by not outputting each test but grouping the
# output into subtests
my $okays; # Number of ok's in current subtest
my $this_iteration; # Number of possible tests in current subtest
my $count=0; # Number of subtests = number of total tests
sub run_test($$$) {
my ($test, $todo, $debug) = @_;
$debug = "" unless $DEBUG;
my $res = eval $test;
if (!$res || $ENV{PERL_DEBUG_FULL_TEST}) {
# Failed or debug; output the result
$count++;
ok($res, "$test; $debug");
} else {
# Just count the test as passed
$okays++;
}
$this_iteration++;
}
my %has_test_by_participants; # Makes sure has tests for each range and each
# number of characters that fold to the same
# thing
my %has_test_by_byte_count; # Makes sure has tests for each combination of
# n bytes folds to m bytes
my %tests; # The set of tests.
# Each key is a code point that folds to something else.
# Each value is a list of things that the key folds to. If the 'thing' is a
# single code point, it is that ordinal. If it is a multi-char fold, it is an
# ordered list of the code points in that fold. Here's an example for 'S':
# '83' => [ 115, 383 ]
#
# And one for a multi-char fold: \xDF
# 223 => [
# [ # 'ss'
# 83,
# 83
# ],
# [ # 'SS'
# 115,
# 115
# ],
# [ # LATIN SMALL LETTER LONG S
# 383,
# 383
# ],
# 7838 # LATIN_CAPITAL_LETTER_SHARP_S
# ],
my %inverse_folds; # keys are strings of the folded-to;
# values are lists of characters that fold to them
sub add_test($@) {
my ($to, @from) = @_;
# Called to cause the input to be tested by adding to %tests. @from is
# the list of characters that fold to the string $to. @from should be
# sorted so the lowest code point is first....
# The input is in string form; %tests uses code points, so have to
# convert.
my $to_chars = length $to;
my @test_to; # List of tests for $to
if ($to_chars == 1) {
@test_to = ord $to;
}
else {
push @test_to, [ map { ord $_ } split "", $to ];
# For multi-char folds, we also test that things that can fold to each
# individual character in the fold also work. If we were testing
# comprehensively, we would try every combination of upper and lower
# case in the fold, but it will have to suffice to avoid running
# forever to make sure that each thing that folds to these is tested
# at least once. Because of complement matching ([^...]), we need to
# do both the folded, and the folded-from.
# We first look at each character in the multi-char fold, and save how
# many characters fold to it; and also the maximum number of such
# folds
my @folds_to_count; # 0th char in fold is index 0 ...
my $max_folds_to = 0;
for (my $i = 0; $i < $to_chars; $i++) {
my $to_char = substr($to, $i, 1);
if (exists $inverse_folds{$to_char}) {
$folds_to_count[$i] = scalar @{$inverse_folds{$to_char}};
$max_folds_to = $folds_to_count[$i] if $max_folds_to < $folds_to_count[$i];
}
else {
$folds_to_count[$i] = 0;
}
}
# We will need to generate as many tests as the maximum number of
# folds, so that each fold will have at least one test.
# For example, consider character X which folds to the three character
# string 'xyz'. If 2 things fold to x (X and x), 4 to y (Y, Y'
# (Y-prime), Y'' (Y-prime-prime), and y), and 1 thing to z (itself), 4
# tests will be generated:
# xyz
# XYz
# xY'z
# xY''z
for (my $i = 0; $i < $max_folds_to; $i++) {
my @this_test_to; # Assemble a single test
# For each character in the multi-char fold ...
for (my $j = 0; $j < $to_chars; $j++) {
my $this_char = substr($to, $j, 1);
# Use its corresponding inverse fold, if available.
if ($i < $folds_to_count[$j]) {
push @this_test_to, ord $inverse_folds{$this_char}[$i];
}
else { # Or else itself.
push @this_test_to, ord $this_char;
}
}
# Add this test to the list
push @test_to, [ @this_test_to ];
}
# Here, have assembled all the tests for the multi-char fold. Sort so
# lowest code points are first for consistency and aesthetics in
# output. We know there are at least two characters in the fold, but
# I haven't bothered to worry about sorting on an optional third
# character if the first two are identical.
@test_to = sort { ($a->[0] == $b->[0])
? $a->[1] <=> $b->[1]
: $a->[0] <=> $b->[0]
} @test_to;
}
# This test is from n bytes to m bytes. Record that so won't try to add
# another test that does the same.
use bytes;
my $to_bytes = length $to;
foreach my $from_map (@from) {
$has_test_by_byte_count{length $from_map}{$to_bytes} = $to;
}
no bytes;
my $ord_smallest_from = ord shift @from;
if (exists $tests{$ord_smallest_from}) {
die "There are already tests for $ord_smallest_from"
};
# Add in the fold tests,
push @{$tests{$ord_smallest_from}}, @test_to;
# Then any remaining froms in the equivalence class.
push @{$tests{$ord_smallest_from}}, map { ord $_ } @from;
}
# Read the Unicode rules file and construct inverse mappings from it
my $file="../lib/unicore/CaseFolding.txt";
open my $fh, "<", $file or die "Failed to read '$file': $!";
while (<$fh>) {
chomp;
# Lines look like (though without the initial '#')
#0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
my ($line, $comment) = split / \s+ \# \s+ /x, $_;
next if $line eq "" || $line =~ /^#/;
my ($hex_from, $fold_type, @hex_folded) = split /[\s;]+/, $line;
next if $fold_type eq 'T'; # Perl doesn't do Turkish folding
next if $fold_type eq 'S'; # If Unicode's tables are correct, the F
# should be a superset of S
my $folded_str = pack ("U0U*", map { hex $_ } @hex_folded);
push @{$inverse_folds{$folded_str}}, chr hex $hex_from;
}
# Analyze the data and generate tests to get adequate test coverage. We sort
# things so that smallest code points are done first.
TO:
foreach my $to (sort { (length $a == length $b)
? $a cmp $b
: length $a <=> length $b
} keys %inverse_folds) {
# Within each fold, sort so that the smallest code points are done first
@{$inverse_folds{$to}} = sort { $a cmp $b } @{$inverse_folds{$to}};
my @from = @{$inverse_folds{$to}};
# Just add it to the tests if doing complete coverage
if (! $skip_apparently_redundant) {
add_test($to, @from);
next TO;
}
my $to_chars = length $to;
my $to_range_type = range_type(substr($to, 0, 1));
# If this is required to be tested, do so. We check for these first, as
# they will take up slots of byte-to-byte combinations that we otherwise
# would have to have other tests to get.
foreach my $from_map (@from) {
if (exists $be_sure_to_test{$from_map}) {
add_test($to, @from);
next TO;
}
}
# If the fold contains heterogeneous range types, is suspect and should be
# tested.
if ($to_chars > 1) {
foreach my $char (split "", $to) {
if (range_type($char) != $to_range_type) {
add_test($to, @from);
next TO;
}
}
}
# If the mapping crosses range types, is suspect and should be tested
foreach my $from_map (@from) {
if (range_type($from_map) != $to_range_type) {
add_test($to, @from);
next TO;
}
}
# Here, all components of the mapping are in the same range type. For
# single character folds, we test one case in each range type that has 2
# particpants, 3 particpants, etc.
if ($to_chars == 1) {
if (! exists $has_test_by_participants{scalar @from}{$to_range_type}) {
add_test($to, @from);
$has_test_by_participants{scalar @from}{$to_range_type} = $to;
next TO;
}
}
# We also test all combinations of mappings from m to n bytes. This is
# because the regex optimizer cares. (Don't bother worrying about that
# Latin1 chars will occupy a different number of bytes under utf8, as
# there are plenty of other cases that catch these byte numbers.)
use bytes;
my $to_bytes = length $to;
foreach my $from_map (@from) {
if (! exists $has_test_by_byte_count{length $from_map}{$to_bytes}) {
add_test($to, @from);
next TO;
}
}
}
# For each range type, test additionally a character that folds to itself
add_test(chr 0x3A, chr 0x3A);
add_test(chr 0xF7, chr 0xF7);
add_test(chr 0x2C7, chr 0x2C7);
# To cut down on the number of tests
my $has_tested_aa_above_latin1;
my $has_tested_latin1_aa;
my $has_tested_ascii_aa;
my $has_tested_l_above_latin1;
my $has_tested_above_latin1_l;
my $has_tested_ascii_l;
my $has_tested_above_latin1_d;
my $has_tested_ascii_d;
my $has_tested_non_latin1_d;
my $has_tested_above_latin1_a;
my $has_tested_ascii_a;
my $has_tested_non_latin1_a;
# For use by pairs() in generating combinations
sub prefix {
my $p = shift;
map [ $p, $_ ], @_
}
# Returns all ordered combinations of pairs of elements from the input array.
# It doesn't return pairs like (a, a), (b, b). Change the slice to an array
# to do that. This was just to have fewer tests.
sub pairs (@) {
#print __LINE__, ": ", join(" XXX ", @_), "\n";
map { prefix $_[$_], @_[0..$_-1, $_+1..$#_] } 0..$#_
}
my @charsets = qw(d u a aa);
my $current_locale = POSIX::setlocale( &POSIX::LC_ALL, "C") // "";
if ($current_locale eq 'C') {
use locale;
# Some locale implementations don't have the range 128-255 characters all
# mean nothing. Skip the locale tests in that situation.
for my $i (128 .. 255) {
my $char = chr($i);
goto bad_locale if uc($char) ne $char || lc($char) ne $char;
}
push @charsets, 'l';
bad_locale:
}
# Finally ready to do the tests
foreach my $test (sort { numerically } keys %tests) {
my $previous_target;
my $previous_pattern;
my @pairs = pairs(sort numerically $test, @{$tests{$test}});
# Each fold can be viewed as a closure of all the characters that
# participate in it. Look at each possible pairing from a closure, with the
# first member of the pair the target string to match against, and the
# second member forming the pattern. Thus each fold member gets tested as
# the string, and the pattern with every other member in the opposite role.
while (my $pair = shift @pairs) {
my ($target, $pattern) = @$pair;
# When testing a char that doesn't fold, we can get the same
# permutation twice; so skip all but the first.
next if $previous_target
&& $previous_target == $target
&& $previous_pattern == $pattern;
($previous_target, $previous_pattern) = ($target, $pattern);
# Each side may be either a single char or a string. Extract each into an
# array (perhaps of length 1)
my @target, my @pattern;
@target = (ref $target) ? @$target : $target;
@pattern = (ref $pattern) ? @$pattern : $pattern;
# We are testing just folds to/from a single character. If our pairs
# happens to generate multi/multi, skip.
next if @target > 1 && @pattern > 1;
# Have to convert non-utf8 chars to native char set
@target = map { $_ > 255 ? $_ : ord latin1_to_native(chr($_)) } @target;
@pattern = map { $_ > 255 ? $_ : ord latin1_to_native(chr($_)) } @pattern;
# Get in hex form.
my @x_target = map { sprintf "\\x{%04X}", $_ } @target;
my @x_pattern = map { sprintf "\\x{%04X}", $_ } @pattern;
my $target_above_latin1 = grep { $_ > 255 } @target;
my $pattern_above_latin1 = grep { $_ > 255 } @pattern;
my $target_has_ascii = grep { $_ < 128 } @target;
my $pattern_has_ascii = grep { $_ < 128 } @pattern;
my $target_only_ascii = ! grep { $_ > 127 } @target;
my $pattern_only_ascii = ! grep { $_ > 127 } @pattern;
my $target_has_latin1 = grep { $_ < 256 } @target;
my $target_has_upper_latin1 = grep { $_ < 256 && $_ > 127 } @target;
my $pattern_has_upper_latin1 = grep { $_ < 256 && $_ > 127 } @pattern;
my $pattern_has_latin1 = grep { $_ < 256 } @pattern;
my $is_self = @target == 1 && @pattern == 1 && $target[0] == $pattern[0];
# We don't test multi-char folding into other multi-chars. We are testing
# a code point that folds to or from other characters. Find the single
# code point for diagnostic purposes. (If both are single, choose the
# target string)
my $ord = @target == 1 ? $target[0] : $pattern[0];
my $progress = sprintf "%04X: \"%s\" and /%s/",
$test,
join("", @x_target),
join("", @x_pattern);
#note $progress;
# Now grind out tests, using various combinations.
foreach my $charset (@charsets) {
$okays = 0;
$this_iteration = 0;
# To cut down somewhat on the enormous quantity of tests this currently
# runs, skip some for some of the character sets whose results aren't
# likely to differ from others. But run all tests on the code points
# that don't fold, plus one other set in each range group.
if (! $is_self) {
# /aa should only affect things with folds in the ASCII range. But, try
# it on one set in the other ranges just to make sure it doesn't break
# them.
if ($charset eq 'aa') {
if (! $target_has_ascii && ! $pattern_has_ascii) {
if ($target_above_latin1 || $pattern_above_latin1) {
next if defined $has_tested_aa_above_latin1
&& $has_tested_aa_above_latin1 != $test;
$has_tested_aa_above_latin1 = $test;
}
next if defined $has_tested_latin1_aa
&& $has_tested_latin1_aa != $test;
$has_tested_latin1_aa = $test;
}
elsif ($target_only_ascii && $pattern_only_ascii) {
# And, except for one set just to make sure, skip tests
# where both elements in the pair are ASCII. If one works for
# aa, the others are likely too. This skips tests where the
# fold is from non-ASCII to ASCII, but this part of the test
# is just about the ASCII components.
next if defined $has_tested_ascii_l
&& $has_tested_ascii_l != $test;
$has_tested_ascii_l = $test;
}
}
elsif ($charset eq 'l') {
# For l, don't need to test beyond one set those things that are
# all above latin1, because unlikely to have different successes
# than /u
if (! $target_has_latin1 && ! $pattern_has_latin1) {
next if defined $has_tested_above_latin1_l
&& $has_tested_above_latin1_l != $test;
$has_tested_above_latin1_l = $test;
}
elsif ($target_only_ascii && $pattern_only_ascii) {
# And, except for one set just to make sure, skip tests
# where both elements in the pair are ASCII. This is
# essentially the same reasoning as above for /aa.
next if defined $has_tested_ascii_l
&& $has_tested_ascii_l != $test;
$has_tested_ascii_l = $test;
}
}
elsif ($charset eq 'd') {
# Similarly for d. Beyond one test (besides self) each, we don't
# test pairs that are both ascii; or both above latin1, or are
# combinations of ascii and above latin1.
if (! $target_has_upper_latin1 && ! $pattern_has_upper_latin1) {
if ($target_has_ascii && $pattern_has_ascii) {
next if defined $has_tested_ascii_d
&& $has_tested_ascii_d != $test;
$has_tested_ascii_d = $test
}
elsif (! $target_has_latin1 && ! $pattern_has_latin1) {
next if defined $has_tested_above_latin1_d
&& $has_tested_above_latin1_d != $test;
$has_tested_above_latin1_d = $test;
}
else {
next if defined $has_tested_non_latin1_d
&& $has_tested_non_latin1_d != $test;
$has_tested_non_latin1_d = $test;
}
}
}
elsif ($charset eq 'a') {
# Similarly for a. This should match identically to /u, so wasn't
# tested at all until a bug was found that was thereby missed.
# As a compromise, beyond one test (besides self) each, we don't
# test pairs that are both ascii; or both above latin1, or are
# combinations of ascii and above latin1.
if (! $target_has_upper_latin1 && ! $pattern_has_upper_latin1) {
if ($target_has_ascii && $pattern_has_ascii) {
next if defined $has_tested_ascii_a
&& $has_tested_ascii_a != $test;
$has_tested_ascii_a = $test
}
elsif (! $target_has_latin1 && ! $pattern_has_latin1) {
next if defined $has_tested_above_latin1_a
&& $has_tested_above_latin1_a != $test;
$has_tested_above_latin1_a = $test;
}
else {
next if defined $has_tested_non_latin1_a
&& $has_tested_non_latin1_a != $test;
$has_tested_non_latin1_a = $test;
}
}
}
}
foreach my $utf8_target (0, 1) { # Both utf8 and not, for
# code points < 256
my $upgrade_target = "";
# These must already be in utf8 because the string to match has
# something above latin1. So impossible to test if to not to be in
# utf8; and otherwise, no upgrade is needed.
next if $target_above_latin1 && ! $utf8_target;
$upgrade_target = ' utf8::upgrade($c);' if ! $target_above_latin1 && $utf8_target;
foreach my $utf8_pattern (0, 1) {
next if $pattern_above_latin1 && ! $utf8_pattern;
# Our testing of 'l' uses the POSIX locale, which is ASCII-only
my $uni_semantics = $charset ne 'l' && ($utf8_target || $charset eq 'u' || ($charset eq 'd' && $utf8_pattern) || $charset =~ /a/);
my $upgrade_pattern = "";
$upgrade_pattern = ' utf8::upgrade($p);' if ! $pattern_above_latin1 && $utf8_pattern;
my $lhs = join "", @x_target;
my $lhs_str = eval qq{"$lhs"}; fail($@) if $@;
my @rhs = @x_pattern;
my $rhs = join "", @rhs;
my $should_fail = (! $uni_semantics && $ord >= 128 && $ord < 256 && ! $is_self)
|| ($charset eq 'aa' && $target_has_ascii != $pattern_has_ascii)
|| ($charset eq 'l' && $target_has_latin1 != $pattern_has_latin1);
# Do simple tests of referencing capture buffers, named and
# numbered.
my $op = '=~';
$op = '!~' if $should_fail;
my $todo = 0; # No longer any todo's
my $eval = "my \$c = \"$lhs$rhs\"; my \$p = qr/(?$charset:^($rhs)\\1\$)/i;$upgrade_target$upgrade_pattern \$c $op \$p";
run_test($eval, $todo, "");
$eval = "my \$c = \"$lhs$rhs\"; my \$p = qr/(?$charset:^(?<grind>$rhs)\\k<grind>\$)/i;$upgrade_target$upgrade_pattern \$c $op \$p";
run_test($eval, $todo, "");
if ($lhs ne $rhs) {
$eval = "my \$c = \"$rhs$lhs\"; my \$p = qr/(?$charset:^($rhs)\\1\$)/i;$upgrade_target$upgrade_pattern \$c $op \$p";
run_test($eval, "", "");
$eval = "my \$c = \"$rhs$lhs\"; my \$p = qr/(?$charset:^(?<grind>$rhs)\\k<grind>\$)/i;$upgrade_target$upgrade_pattern \$c $op \$p";
run_test($eval, "", "");
}
# See if works on what could be a simple trie.
$eval = "my \$c = \"$lhs\"; my \$p = qr/$rhs|xyz/i$charset;$upgrade_target$upgrade_pattern \$c $op \$p";
run_test($eval, "", "");
foreach my $bracketed (0, 1) { # Put rhs in [...], or not
next if $bracketed && @pattern != 1; # bracketed makes these
# or's instead of a sequence
foreach my $inverted (0,1) {
next if $inverted && ! $bracketed; # inversion only valid in [^...]
next if $inverted && @target != 1; # [perl #89750] multi-char
# not valid in [^...]
# In some cases, add an extra character that doesn't fold, and
# looks ok in the output.
my $extra_char = "_";
foreach my $prepend ("", $extra_char) {
foreach my $append ("", $extra_char) {
# Assemble the rhs. Put each character in a separate
# bracketed if using charclasses. This creates a stress on
# the code to span a match across multiple elements
my $rhs = "";
foreach my $rhs_char (@rhs) {
$rhs .= '[' if $bracketed;
$rhs .= '^' if $inverted;
$rhs .= $rhs_char;
# Add a character to the class, so class doesn't get
# optimized out
$rhs .= '_]' if $bracketed;
}
# Add one of: no capturing parens
# a single set
# a nested set
# Use quantifiers and extra variable width matches inside
# them to keep some optimizations from happening
foreach my $parend (0, 1, 2) {
my $interior = (! $parend)
? $rhs
: ($parend == 1)
? "(${rhs},?)"
: "((${rhs})+,?)";
foreach my $quantifier ("", '?', '*', '+', '{1,3}') {
# Perhaps should be TODOs, as are unimplemented, but
# maybe will never be implemented
next if @pattern != 1 && $quantifier;
# A ? or * quantifier normally causes the thing to be
# able to match a null string
my $quantifier_can_match_null = $quantifier eq '?' || $quantifier eq '*';
# But since we only quantify the last character in a
# multiple fold, the other characters will have width,
# except if we are quantifying the whole rhs
my $can_match_null = $quantifier_can_match_null && (@rhs == 1 || $parend);
foreach my $l_anchor ("", '^') { # '\A' didn't change result)
foreach my $r_anchor ("", '$') { # '\Z', '\z' didn't change result)
# The folded part can match the null string if it
# isn't required to have width, and there's not
# something on one or both sides that force it to.
my $both_sides = ($l_anchor && $r_anchor) || ($l_anchor && $append) || ($r_anchor && $prepend) || ($prepend && $append);
my $must_match = ! $can_match_null || $both_sides;
# for performance, but doing this missed many failures
#next unless $must_match;
my $quantified = "(?$charset:$l_anchor$prepend$interior${quantifier}$append$r_anchor)";
my $op;
if ($must_match && $should_fail) {
$op = 0;
} else {
$op = 1;
}
$op = ! $op if $must_match && $inverted;
if ($inverted && @target > 1) {
# When doing an inverted match against a
# multi-char target, and there is not something on
# the left to anchor the match, if it shouldn't
# succeed, skip, as what will happen (when working
# correctly) is that it will match the first
# position correctly, and then be inverted to not
# match; then it will go to the second position
# where it won't match, but get inverted to match,
# and hence succeeding.
next if ! ($l_anchor || $prepend) && ! $op;
# Can't ever match for latin1 code points non-uni
# semantics that have a inverted multi-char fold
# when there is something on both sides and the
# quantifier isn't such as to span the required
# width, which is 2 or 3.
$op = 0 if $ord < 255
&& ! $uni_semantics
&& $both_sides
&& ( ! $quantifier || $quantifier eq '?')
&& $parend < 2;
# Similarly can't ever match when inverting a multi-char
# fold for /aa and the quantifier isn't sufficient
# to allow it to span to both sides.
$op = 0 if $target_has_ascii && $charset eq 'aa' && $both_sides && ( ! $quantifier || $quantifier eq '?') && $parend < 2;
# Or for /l
$op = 0 if $target_has_latin1 && $charset eq 'l' && $both_sides && ( ! $quantifier || $quantifier eq '?') && $parend < 2;
}
my $desc = "my \$c = \"$prepend$lhs$append\"; "
. "my \$p = qr/$quantified/i;"
. "$upgrade_target$upgrade_pattern "
. "\$c " . ($op ? "=~" : "!~") . " \$p; ";
if ($DEBUG) {
$desc .= (
"; uni_semantics=$uni_semantics, "
. "should_fail=$should_fail, "
. "bracketed=$bracketed, "
. "prepend=$prepend, "
. "append=$append, "
. "parend=$parend, "
. "quantifier=$quantifier, "
. "l_anchor=$l_anchor, "
. "r_anchor=$r_anchor; "
. "pattern_above_latin1=$pattern_above_latin1; "
. "utf8_pattern=$utf8_pattern"
);
}
my $c = "$prepend$lhs_str$append";
my $p = qr/$quantified/i;
utf8::upgrade($c) if length($upgrade_target);
utf8::upgrade($p) if length($upgrade_pattern);
my $res = $op ? ($c =~ $p): ($c !~ $p);
if (!$res || $ENV{PERL_DEBUG_FULL_TEST}) {
# Failed or debug; output the result
$count++;
ok($res, $desc);
} else {
# Just count the test as passed
$okays++;
}
$this_iteration++;
}
}
}
}
}
}
}
}
}
}
unless($ENV{PERL_DEBUG_FULL_TEST}) {
$count++;
is $okays, $this_iteration, "$okays subtests ok for"
. " /$charset,"
. ' target="' . join("", @x_target) . '",'
. ' pat="' . join("", @x_pattern) . '"';
}
}
}
}
plan($count);
1