use warnings;
use strict;
BEGIN {
chdir 't' if -d 't';
@INC = '../lib';
require './test.pl';
}
plan(20736); # Determined by experimentation
# In this section, test the upper/lower/title case mappings for all characters
# 0-255.
# First compute the case mappings without resorting to the functions we're
# testing.
# Initialize the arrays so each $i maps to itself.
my @posix_to_upper;
for my $i (0 .. 255) {
$posix_to_upper[$i] = chr($i);
}
my @posix_to_lower
= my @posix_to_title
= my @latin1_to_upper
= my @latin1_to_lower
= my @latin1_to_title
= @posix_to_upper;
# Override the elements in the to_lower arrays that have different standard
# lower case mappings. (standard meaning they are 32 numbers apart)
for my $i (0x41 .. 0x5A, 0xC0 .. 0xD6, 0xD8 .. 0xDE) {
my $upper_ord = ord_latin1_to_native $i;
my $lower_ord = ord_latin1_to_native($i + 32);
$latin1_to_lower[$upper_ord] = chr($lower_ord);
next if $i > 127;
$posix_to_lower[$upper_ord] = chr($lower_ord);
}
# Same for upper and title
for my $i (0x61 .. 0x7A, 0xE0 .. 0xF6, 0xF8 .. 0xFE) {
my $lower_ord = ord_latin1_to_native $i;
my $upper_ord = ord_latin1_to_native($i - 32);
$latin1_to_upper[$lower_ord] = chr($upper_ord);
$latin1_to_title[$lower_ord] = chr($upper_ord);
next if $i > 127;
$posix_to_upper[$lower_ord] = chr($upper_ord);
$posix_to_title[$lower_ord] = chr($upper_ord);
}
# Override the abnormal cases.
$latin1_to_upper[ord_latin1_to_native 0xB5] = chr(0x39C);
$latin1_to_title[ord_latin1_to_native 0xB5] = chr(0x39C);
$latin1_to_upper[ord_latin1_to_native 0xDF] = 'SS';
$latin1_to_title[ord_latin1_to_native 0xDF] = 'Ss';
$latin1_to_upper[ord_latin1_to_native 0xFF] = chr(0x178);
$latin1_to_title[ord_latin1_to_native 0xFF] = chr(0x178);
my $repeat = 25; # Length to make strings.
# Create hashes of strings in several ranges, both for uc and lc.
my %posix;
$posix{'uc'} = 'A' x $repeat;
$posix{'lc'} = 'a' x $repeat ;
my %cyrillic;
$cyrillic{'uc'} = chr(0x42F) x $repeat;
$cyrillic{'lc'} = chr(0x44F) x $repeat;
my %latin1;
$latin1{'uc'} = chr(ord_latin1_to_native 0xD8) x $repeat;
$latin1{'lc'} = chr(ord_latin1_to_native 0xF8) x $repeat;
my %empty;
$empty{'lc'} = $empty{'uc'} = "";
# Loop so prefix each character being tested with nothing, and the various
# strings; then loop for suffixes of those strings as well.
for my $prefix (\%empty, \%posix, \%cyrillic, \%latin1) {
for my $suffix (\%empty, \%posix, \%cyrillic, \%latin1) {
for my $i (0 .. 255) { # For each possible posix or latin1 character
my $cp = sprintf "U+%04X", $i;
# First try using latin1 (Unicode) semantics.
use feature "unicode_strings";
my $phrase = 'in uni8bit';
my $char = chr($i);
my $pre_lc = $prefix->{'lc'};
my $pre_uc = $prefix->{'uc'};
my $post_lc = $suffix->{'lc'};
my $post_uc = $suffix->{'uc'};
my $to_upper = $pre_lc . $char . $post_lc;
my $expected_upper = $pre_uc . $latin1_to_upper[$i] . $post_uc;
my $to_lower = $pre_uc . $char . $post_uc;
my $expected_lower = $pre_lc . $latin1_to_lower[$i] . $post_lc;
is (uc($to_upper), $expected_upper,
display("$cp: $phrase: Verify uc($to_upper) eq $expected_upper"));
is (lc($to_lower), $expected_lower,
display("$cp: $phrase: Verify lc($to_lower) eq $expected_lower"));
if ($pre_uc eq "") { # Title case if null prefix.
my $expected_title = $latin1_to_title[$i] . $post_lc;
is (ucfirst($to_upper), $expected_title,
display("$cp: $phrase: Verify ucfirst($to_upper) eq $expected_title"));
my $expected_lcfirst = $latin1_to_lower[$i] . $post_uc;
is (lcfirst($to_lower), $expected_lcfirst,
display("$cp: $phrase: Verify lcfirst($to_lower) eq $expected_lcfirst"));
}
# Then try with posix semantics.
no feature "unicode_strings";
$phrase = 'no uni8bit';
# These don't contribute anything in this case.
next if $suffix == \%cyrillic;
next if $suffix == \%latin1;
next if $prefix == \%cyrillic;
next if $prefix == \%latin1;
$expected_upper = $pre_uc . $posix_to_upper[$i] . $post_uc;
$expected_lower = $pre_lc . $posix_to_lower[$i] . $post_lc;
is (uc($to_upper), $expected_upper,
display("$cp: $phrase: Verify uc($to_upper) eq $expected_upper"));
is (lc($to_lower), $expected_lower,
display("$cp: $phrase: Verify lc($to_lower) eq $expected_lower"));
if ($pre_uc eq "") {
my $expected_title = $posix_to_title[$i] . $post_lc;
is (ucfirst($to_upper), $expected_title,
display("$cp: $phrase: Verify ucfirst($to_upper) eq $expected_title"));
my $expected_lcfirst = $posix_to_lower[$i] . $post_uc;
is (lcfirst($to_lower), $expected_lcfirst,
display("$cp: $phrase: Verify lcfirst($to_lower) eq $expected_lcfirst"));
}
}
}
}
# In this section test that \w, \s, and \b (and complements) work correctly.
# These are the only character classes affected by this pragma. Above ASCII
# range Latin-1 characters are in \w and \s iff the pragma is on.
# Construct the expected full Latin1 values without using anything we're
# testing. All these were determined manually by looking in the manual.
# Boolean: is w[$i] a \w character?
my @w = (0) x 256;
for my $i ( 0x30 .. 0x39, # 0-9
0x41 .. 0x5a, # A-Z
0x61 .. 0x7a, # a-z
0x5F, # _
0xAA, # FEMININE ORDINAL INDICATOR
0xB5, # MICRO SIGN
0xBA, # MASCULINE ORDINAL INDICATOR
0xC0 .. 0xD6, # various
0xD8 .. 0xF6, # various
0xF8 .. 0xFF, # various
)
{
$w[ord_latin1_to_native $i] = 1;
}
# Boolean: is s[$i] a \s character?
my @s = (0) x 256;
$s[ord_latin1_to_native 0x09] = 1; # Tab
$s[ord_latin1_to_native 0x0A] = 1; # LF
$s[ord_latin1_to_native 0x0B] = 1; # VT
$s[ord_latin1_to_native 0x0C] = 1; # FF
$s[ord_latin1_to_native 0x0D] = 1; # CR
$s[ord_latin1_to_native 0x20] = 1; # SPACE
$s[ord_latin1_to_native 0x85] = 1; # NEL
$s[ord_latin1_to_native 0xA0] = 1; # NO BREAK SPACE
for my $i (0 .. 255) {
my $char = chr($i);
my $hex_i = sprintf "%02X", $i;
foreach my $which (\@s, \@w) {
my $basic_name;
if ($which == \@s) {
$basic_name = 's';
} else {
$basic_name = 'w'
}
# Test \w \W \s \S
foreach my $complement (0, 1) {
my $name = '\\' . (($complement) ? uc($basic_name) : $basic_name);
# in and out of [...]
foreach my $charclass (0, 1) {
# And like [^...] or just plain [...]
foreach my $complement_class (0, 1) {
next if ! $charclass && $complement_class;
# Start with the boolean as to if the character is in the
# class, and then complement as needed.
my $expect_success = $which->[$i];
$expect_success = ! $expect_success if $complement;
$expect_success = ! $expect_success if $complement_class;
my $test = $name;
$test = "^$test" if $complement_class;
$test = "[$test]" if $charclass;
$test = "^$test\$";
use feature 'unicode_strings';
my $prefix = "in uni8bit; Verify chr(0x$hex_i)";
if ($expect_success) {
like($char, qr/$test/, display("$prefix =~ qr/$test/"));
} else {
unlike($char, qr/$test/, display("$prefix !~ qr/$test/"));
}
no feature 'unicode_strings';
$prefix = "no uni8bit; Verify chr(0x$hex_i)";
# With the legacy, nothing above 128 should be in the
# class
if ($i >= 128) {
$expect_success = 0;
$expect_success = ! $expect_success if $complement;
$expect_success = ! $expect_success if $complement_class;
}
if ($expect_success) {
like($char, qr/$test/, display("$prefix =~ qr/$test/"));
} else {
unlike($char, qr/$test/, display("$prefix !~ qr/$test/"));
}
}
}
}
}
# Similarly for \b and \B.
foreach my $complement (0, 1) {
my $name = '\\' . (($complement) ? 'B' : 'b');
my $expect_success = ! $w[$i]; # \b is complement of \w
$expect_success = ! $expect_success if $complement;
my $string = "a$char";
my $test = "(^a$name\\x{$hex_i}\$)";
use feature 'unicode_strings';
my $prefix = "in uni8bit; Verify $string";
if ($expect_success) {
like($string, qr/$test/, display("$prefix =~ qr/$test/"));
} else {
unlike($string, qr/$test/, display("$prefix !~ qr/$test/"));
}
no feature 'unicode_strings';
$prefix = "no uni8bit; Verify $string";
if ($i >= 128) {
$expect_success = 1;
$expect_success = ! $expect_success if $complement;
}
if ($expect_success) {
like($string, qr/$test/, display("$prefix =~ qr/$test/"));
like($string, qr/$test/, display("$prefix =~ qr/$test/"));
} else {
unlike($string, qr/$test/, display("$prefix !~ qr/$test/"));
}
}
}